commit 0c61466771ff205a955f3e5002d2a7f2449ccc78 (tree)
parent f60c24c73cc5c5894fbfb7060a70bc683c4a4ba5
Author: Andrew Kelley <andrew@ziglang.org>
Date: Mon, 11 Mar 2024 17:18:09 -0700
Merge pull request #19174 from squeek502/lazy-resinator
Lazily compile the `zig rc` subcommand and use it during `zig build-exe`, etc
Diffstat:
37 files changed, 14940 insertions(+), 14234 deletions(-)
diff --git a/src/resinator/ani.zig b/lib/compiler/resinator/ani.zig
diff --git a/src/resinator/ast.zig b/lib/compiler/resinator/ast.zig
diff --git a/src/resinator/bmp.zig b/lib/compiler/resinator/bmp.zig
diff --git a/lib/compiler/resinator/cli.zig b/lib/compiler/resinator/cli.zig
@@ -0,0 +1,1507 @@
+const std = @import("std");
+const CodePage = @import("code_pages.zig").CodePage;
+const lang = @import("lang.zig");
+const res = @import("res.zig");
+const Allocator = std.mem.Allocator;
+const lex = @import("lex.zig");
+
+/// This is what /SL 100 will set the maximum string literal length to
+pub const max_string_literal_length_100_percent = 8192;
+
+pub const usage_string_after_command_name =
+ \\ [options] [--] <INPUT> [<OUTPUT>]
+ \\
+ \\The sequence -- can be used to signify when to stop parsing options.
+ \\This is necessary when the input path begins with a forward slash.
+ \\
+ \\Supported Win32 RC Options:
+ \\ /?, /h Print this help and exit.
+ \\ /v Verbose (print progress messages).
+ \\ /d <name>[=<value>] Define a symbol (during preprocessing).
+ \\ /u <name> Undefine a symbol (during preprocessing).
+ \\ /fo <value> Specify output file path.
+ \\ /l <value> Set default language using hexadecimal id (ex: 409).
+ \\ /ln <value> Set default language using language name (ex: en-us).
+ \\ /i <value> Add an include path.
+ \\ /x Ignore INCLUDE environment variable.
+ \\ /c <value> Set default code page (ex: 65001).
+ \\ /w Warn on invalid code page in .rc (instead of error).
+ \\ /y Suppress warnings for duplicate control IDs.
+ \\ /n Null-terminate all strings in string tables.
+ \\ /sl <value> Specify string literal length limit in percentage (1-100)
+ \\ where 100 corresponds to a limit of 8192. If the /sl
+ \\ option is not specified, the default limit is 4097.
+ \\ /p Only run the preprocessor and output a .rcpp file.
+ \\
+ \\No-op Win32 RC Options:
+ \\ /nologo, /a, /r Options that are recognized but do nothing.
+ \\
+ \\Unsupported Win32 RC Options:
+ \\ /fm, /q, /g, /gn, /g1, /g2 Unsupported MUI-related options.
+ \\ /?c, /hc, /t, /tp:<prefix>, Unsupported LCX/LCE-related options.
+ \\ /tn, /tm, /tc, /tw, /te,
+ \\ /ti, /ta
+ \\ /z Unsupported font-substitution-related option.
+ \\ /s Unsupported HWB-related option.
+ \\
+ \\Custom Options (resinator-specific):
+ \\ /:no-preprocess Do not run the preprocessor.
+ \\ /:debug Output the preprocessed .rc file and the parsed AST.
+ \\ /:auto-includes <value> Set the automatic include path detection behavior.
+ \\ any (default) Use MSVC if available, fall back to MinGW
+ \\ msvc Use MSVC include paths (must be present on the system)
+ \\ gnu Use MinGW include paths
+ \\ none Do not use any autodetected include paths
+ \\ /:depfile <path> Output a file containing a list of all the files that
+ \\ the .rc includes or otherwise depends on.
+ \\ /:depfile-fmt <value> Output format of the depfile, if /:depfile is set.
+ \\ json (default) A top-level JSON array of paths
+ \\ /:mingw-includes <path> Path to a directory containing MinGW include files. If
+ \\ not specified, bundled MinGW include files will be used.
+ \\
+ \\Note: For compatibility reasons, all custom options start with :
+ \\
+;
+
+pub fn writeUsage(writer: anytype, command_name: []const u8) !void {
+ try writer.writeAll("Usage: ");
+ try writer.writeAll(command_name);
+ try writer.writeAll(usage_string_after_command_name);
+}
+
+pub const Diagnostics = struct {
+ errors: std.ArrayListUnmanaged(ErrorDetails) = .{},
+ allocator: Allocator,
+
+ pub const ErrorDetails = struct {
+ arg_index: usize,
+ arg_span: ArgSpan = .{},
+ msg: std.ArrayListUnmanaged(u8) = .{},
+ type: Type = .err,
+ print_args: bool = true,
+
+ pub const Type = enum { err, warning, note };
+ pub const ArgSpan = struct {
+ point_at_next_arg: bool = false,
+ name_offset: usize = 0,
+ prefix_len: usize = 0,
+ value_offset: usize = 0,
+ name_len: usize = 0,
+ };
+ };
+
+ pub fn init(allocator: Allocator) Diagnostics {
+ return .{
+ .allocator = allocator,
+ };
+ }
+
+ pub fn deinit(self: *Diagnostics) void {
+ for (self.errors.items) |*details| {
+ details.msg.deinit(self.allocator);
+ }
+ self.errors.deinit(self.allocator);
+ }
+
+ pub fn append(self: *Diagnostics, error_details: ErrorDetails) !void {
+ try self.errors.append(self.allocator, error_details);
+ }
+
+ pub fn renderToStdErr(self: *Diagnostics, args: []const []const u8, config: std.io.tty.Config) void {
+ std.debug.getStderrMutex().lock();
+ defer std.debug.getStderrMutex().unlock();
+ const stderr = std.io.getStdErr().writer();
+ self.renderToWriter(args, stderr, config) catch return;
+ }
+
+ pub fn renderToWriter(self: *Diagnostics, args: []const []const u8, writer: anytype, config: std.io.tty.Config) !void {
+ for (self.errors.items) |err_details| {
+ try renderErrorMessage(writer, config, err_details, args);
+ }
+ }
+
+ pub fn hasError(self: *const Diagnostics) bool {
+ for (self.errors.items) |err| {
+ if (err.type == .err) return true;
+ }
+ return false;
+ }
+};
+
+pub const Options = struct {
+ allocator: Allocator,
+ input_filename: []const u8 = &[_]u8{},
+ output_filename: []const u8 = &[_]u8{},
+ extra_include_paths: std.ArrayListUnmanaged([]const u8) = .{},
+ ignore_include_env_var: bool = false,
+ preprocess: Preprocess = .yes,
+ default_language_id: ?u16 = null,
+ default_code_page: ?CodePage = null,
+ verbose: bool = false,
+ symbols: std.StringArrayHashMapUnmanaged(SymbolValue) = .{},
+ null_terminate_string_table_strings: bool = false,
+ max_string_literal_codepoints: u15 = lex.default_max_string_literal_codepoints,
+ silent_duplicate_control_ids: bool = false,
+ warn_instead_of_error_on_invalid_code_page: bool = false,
+ debug: bool = false,
+ print_help_and_exit: bool = false,
+ auto_includes: AutoIncludes = .any,
+ depfile_path: ?[]const u8 = null,
+ depfile_fmt: DepfileFormat = .json,
+ mingw_includes_dir: ?[]const u8 = null,
+
+ pub const AutoIncludes = enum { any, msvc, gnu, none };
+ pub const DepfileFormat = enum { json };
+ pub const Preprocess = enum { no, yes, only };
+ pub const SymbolAction = enum { define, undefine };
+ pub const SymbolValue = union(SymbolAction) {
+ define: []const u8,
+ undefine: void,
+
+ pub fn deinit(self: SymbolValue, allocator: Allocator) void {
+ switch (self) {
+ .define => |value| allocator.free(value),
+ .undefine => {},
+ }
+ }
+ };
+
+ /// Does not check that identifier contains only valid characters
+ pub fn define(self: *Options, identifier: []const u8, value: []const u8) !void {
+ if (self.symbols.getPtr(identifier)) |val_ptr| {
+ // If the symbol is undefined, then that always takes precedence so
+ // we shouldn't change anything.
+ if (val_ptr.* == .undefine) return;
+ // Otherwise, the new value takes precedence.
+ const duped_value = try self.allocator.dupe(u8, value);
+ errdefer self.allocator.free(duped_value);
+ val_ptr.deinit(self.allocator);
+ val_ptr.* = .{ .define = duped_value };
+ return;
+ }
+ const duped_key = try self.allocator.dupe(u8, identifier);
+ errdefer self.allocator.free(duped_key);
+ const duped_value = try self.allocator.dupe(u8, value);
+ errdefer self.allocator.free(duped_value);
+ try self.symbols.put(self.allocator, duped_key, .{ .define = duped_value });
+ }
+
+ /// Does not check that identifier contains only valid characters
+ pub fn undefine(self: *Options, identifier: []const u8) !void {
+ if (self.symbols.getPtr(identifier)) |action| {
+ action.deinit(self.allocator);
+ action.* = .{ .undefine = {} };
+ return;
+ }
+ const duped_key = try self.allocator.dupe(u8, identifier);
+ errdefer self.allocator.free(duped_key);
+ try self.symbols.put(self.allocator, duped_key, .{ .undefine = {} });
+ }
+
+ /// If the current input filename both:
+ /// - does not have an extension, and
+ /// - does not exist in the cwd
+ /// then this function will append `.rc` to the input filename
+ ///
+ /// Note: This behavior is different from the Win32 compiler.
+ /// It always appends .RC if the filename does not have
+ /// a `.` in it and it does not even try the verbatim name
+ /// in that scenario.
+ ///
+ /// The approach taken here is meant to give us a 'best of both
+ /// worlds' situation where we'll be compatible with most use-cases
+ /// of the .rc extension being omitted from the CLI args, but still
+ /// work fine if the file itself does not have an extension.
+ pub fn maybeAppendRC(options: *Options, cwd: std.fs.Dir) !void {
+ if (std.fs.path.extension(options.input_filename).len == 0) {
+ cwd.access(options.input_filename, .{}) catch |err| switch (err) {
+ error.FileNotFound => {
+ var filename_bytes = try options.allocator.alloc(u8, options.input_filename.len + 3);
+ @memcpy(filename_bytes[0..options.input_filename.len], options.input_filename);
+ @memcpy(filename_bytes[filename_bytes.len - 3 ..], ".rc");
+ options.allocator.free(options.input_filename);
+ options.input_filename = filename_bytes;
+ },
+ else => {},
+ };
+ }
+ }
+
+ pub fn deinit(self: *Options) void {
+ for (self.extra_include_paths.items) |extra_include_path| {
+ self.allocator.free(extra_include_path);
+ }
+ self.extra_include_paths.deinit(self.allocator);
+ self.allocator.free(self.input_filename);
+ self.allocator.free(self.output_filename);
+ var symbol_it = self.symbols.iterator();
+ while (symbol_it.next()) |entry| {
+ self.allocator.free(entry.key_ptr.*);
+ entry.value_ptr.deinit(self.allocator);
+ }
+ self.symbols.deinit(self.allocator);
+ if (self.depfile_path) |depfile_path| {
+ self.allocator.free(depfile_path);
+ }
+ if (self.mingw_includes_dir) |mingw_includes_dir| {
+ self.allocator.free(mingw_includes_dir);
+ }
+ }
+
+ pub fn dumpVerbose(self: *const Options, writer: anytype) !void {
+ try writer.print("Input filename: {s}\n", .{self.input_filename});
+ try writer.print("Output filename: {s}\n", .{self.output_filename});
+ if (self.extra_include_paths.items.len > 0) {
+ try writer.writeAll(" Extra include paths:\n");
+ for (self.extra_include_paths.items) |extra_include_path| {
+ try writer.print(" \"{s}\"\n", .{extra_include_path});
+ }
+ }
+ if (self.ignore_include_env_var) {
+ try writer.writeAll(" The INCLUDE environment variable will be ignored\n");
+ }
+ if (self.preprocess == .no) {
+ try writer.writeAll(" The preprocessor will not be invoked\n");
+ } else if (self.preprocess == .only) {
+ try writer.writeAll(" Only the preprocessor will be invoked\n");
+ }
+ if (self.symbols.count() > 0) {
+ try writer.writeAll(" Symbols:\n");
+ var it = self.symbols.iterator();
+ while (it.next()) |symbol| {
+ try writer.print(" {s} {s}", .{ switch (symbol.value_ptr.*) {
+ .define => "#define",
+ .undefine => "#undef",
+ }, symbol.key_ptr.* });
+ if (symbol.value_ptr.* == .define) {
+ try writer.print(" {s}", .{symbol.value_ptr.define});
+ }
+ try writer.writeAll("\n");
+ }
+ }
+ if (self.null_terminate_string_table_strings) {
+ try writer.writeAll(" Strings in string tables will be null-terminated\n");
+ }
+ if (self.max_string_literal_codepoints != lex.default_max_string_literal_codepoints) {
+ try writer.print(" Max string literal length: {}\n", .{self.max_string_literal_codepoints});
+ }
+ if (self.silent_duplicate_control_ids) {
+ try writer.writeAll(" Duplicate control IDs will not emit warnings\n");
+ }
+ if (self.silent_duplicate_control_ids) {
+ try writer.writeAll(" Invalid code page in .rc will produce a warning (instead of an error)\n");
+ }
+
+ const language_id = self.default_language_id orelse res.Language.default;
+ const language_name = language_name: {
+ if (std.meta.intToEnum(lang.LanguageId, language_id)) |lang_enum_val| {
+ break :language_name @tagName(lang_enum_val);
+ } else |_| {}
+ if (language_id == lang.LOCALE_CUSTOM_UNSPECIFIED) {
+ break :language_name "LOCALE_CUSTOM_UNSPECIFIED";
+ }
+ break :language_name "<UNKNOWN>";
+ };
+ try writer.print("Default language: {s} (id=0x{x})\n", .{ language_name, language_id });
+
+ const code_page = self.default_code_page orelse .windows1252;
+ try writer.print("Default codepage: {s} (id={})\n", .{ @tagName(code_page), @intFromEnum(code_page) });
+ }
+};
+
+pub const Arg = struct {
+ prefix: enum { long, short, slash },
+ name_offset: usize,
+ full: []const u8,
+
+ pub fn fromString(str: []const u8) ?@This() {
+ if (std.mem.startsWith(u8, str, "--")) {
+ return .{ .prefix = .long, .name_offset = 2, .full = str };
+ } else if (std.mem.startsWith(u8, str, "-")) {
+ return .{ .prefix = .short, .name_offset = 1, .full = str };
+ } else if (std.mem.startsWith(u8, str, "/")) {
+ return .{ .prefix = .slash, .name_offset = 1, .full = str };
+ }
+ return null;
+ }
+
+ pub fn prefixSlice(self: Arg) []const u8 {
+ return self.full[0..(if (self.prefix == .long) 2 else 1)];
+ }
+
+ pub fn name(self: Arg) []const u8 {
+ return self.full[self.name_offset..];
+ }
+
+ pub fn optionWithoutPrefix(self: Arg, option_len: usize) []const u8 {
+ return self.name()[0..option_len];
+ }
+
+ pub fn missingSpan(self: Arg) Diagnostics.ErrorDetails.ArgSpan {
+ return .{
+ .point_at_next_arg = true,
+ .value_offset = 0,
+ .name_offset = self.name_offset,
+ .prefix_len = self.prefixSlice().len,
+ };
+ }
+
+ pub fn optionAndAfterSpan(self: Arg) Diagnostics.ErrorDetails.ArgSpan {
+ return self.optionSpan(0);
+ }
+
+ pub fn optionSpan(self: Arg, option_len: usize) Diagnostics.ErrorDetails.ArgSpan {
+ return .{
+ .name_offset = self.name_offset,
+ .prefix_len = self.prefixSlice().len,
+ .name_len = option_len,
+ };
+ }
+
+ pub const Value = struct {
+ slice: []const u8,
+ index_increment: u2 = 1,
+
+ pub fn argSpan(self: Value, arg: Arg) Diagnostics.ErrorDetails.ArgSpan {
+ const prefix_len = arg.prefixSlice().len;
+ switch (self.index_increment) {
+ 1 => return .{
+ .value_offset = @intFromPtr(self.slice.ptr) - @intFromPtr(arg.full.ptr),
+ .prefix_len = prefix_len,
+ .name_offset = arg.name_offset,
+ },
+ 2 => return .{
+ .point_at_next_arg = true,
+ .prefix_len = prefix_len,
+ .name_offset = arg.name_offset,
+ },
+ else => unreachable,
+ }
+ }
+
+ pub fn index(self: Value, arg_index: usize) usize {
+ if (self.index_increment == 2) return arg_index + 1;
+ return arg_index;
+ }
+ };
+
+ pub fn value(self: Arg, option_len: usize, index: usize, args: []const []const u8) error{MissingValue}!Value {
+ const rest = self.full[self.name_offset + option_len ..];
+ if (rest.len > 0) return .{ .slice = rest };
+ if (index + 1 >= args.len) return error.MissingValue;
+ return .{ .slice = args[index + 1], .index_increment = 2 };
+ }
+
+ pub const Context = struct {
+ index: usize,
+ arg: Arg,
+ value: Value,
+ };
+};
+
+pub const ParseError = error{ParseError} || Allocator.Error;
+
+/// Note: Does not run `Options.maybeAppendRC` automatically. If that behavior is desired,
+/// it must be called separately.
+pub fn parse(allocator: Allocator, args: []const []const u8, diagnostics: *Diagnostics) ParseError!Options {
+ var options = Options{ .allocator = allocator };
+ errdefer options.deinit();
+
+ var output_filename: ?[]const u8 = null;
+ var output_filename_context: Arg.Context = undefined;
+
+ var arg_i: usize = 0;
+ next_arg: while (arg_i < args.len) {
+ var arg = Arg.fromString(args[arg_i]) orelse break;
+ if (arg.name().len == 0) {
+ switch (arg.prefix) {
+ // -- on its own ends arg parsing
+ .long => {
+ arg_i += 1;
+ break;
+ },
+ // - or / on its own is an error
+ else => {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("invalid option: {s}", .{arg.prefixSlice()});
+ try diagnostics.append(err_details);
+ arg_i += 1;
+ continue :next_arg;
+ },
+ }
+ }
+
+ while (arg.name().len > 0) {
+ const arg_name = arg.name();
+ // Note: These cases should be in order from longest to shortest, since
+ // shorter options that are a substring of a longer one could make
+ // the longer option's branch unreachable.
+ if (std.ascii.startsWithIgnoreCase(arg_name, ":no-preprocess")) {
+ options.preprocess = .no;
+ arg.name_offset += ":no-preprocess".len;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, ":mingw-includes")) {
+ const value = arg.value(":mingw-includes".len, arg_i, args) catch {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(":mingw-includes".len) });
+ try diagnostics.append(err_details);
+ arg_i += 1;
+ break :next_arg;
+ };
+ if (options.mingw_includes_dir) |overwritten_path| {
+ allocator.free(overwritten_path);
+ options.mingw_includes_dir = null;
+ }
+ const path = try allocator.dupe(u8, value.slice);
+ errdefer allocator.free(path);
+ options.mingw_includes_dir = path;
+ arg_i += value.index_increment;
+ continue :next_arg;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, ":auto-includes")) {
+ const value = arg.value(":auto-includes".len, arg_i, args) catch {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(":auto-includes".len) });
+ try diagnostics.append(err_details);
+ arg_i += 1;
+ break :next_arg;
+ };
+ options.auto_includes = std.meta.stringToEnum(Options.AutoIncludes, value.slice) orelse blk: {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("invalid auto includes setting: {s} ", .{value.slice});
+ try diagnostics.append(err_details);
+ break :blk options.auto_includes;
+ };
+ arg_i += value.index_increment;
+ continue :next_arg;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, ":depfile-fmt")) {
+ const value = arg.value(":depfile-fmt".len, arg_i, args) catch {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(":depfile-fmt".len) });
+ try diagnostics.append(err_details);
+ arg_i += 1;
+ break :next_arg;
+ };
+ options.depfile_fmt = std.meta.stringToEnum(Options.DepfileFormat, value.slice) orelse blk: {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("invalid depfile format setting: {s} ", .{value.slice});
+ try diagnostics.append(err_details);
+ break :blk options.depfile_fmt;
+ };
+ arg_i += value.index_increment;
+ continue :next_arg;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, ":depfile")) {
+ const value = arg.value(":depfile".len, arg_i, args) catch {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(":depfile".len) });
+ try diagnostics.append(err_details);
+ arg_i += 1;
+ break :next_arg;
+ };
+ if (options.depfile_path) |overwritten_path| {
+ allocator.free(overwritten_path);
+ options.depfile_path = null;
+ }
+ const path = try allocator.dupe(u8, value.slice);
+ errdefer allocator.free(path);
+ options.depfile_path = path;
+ arg_i += value.index_increment;
+ continue :next_arg;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, "nologo")) {
+ // No-op, we don't display any 'logo' to suppress
+ arg.name_offset += "nologo".len;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, ":debug")) {
+ options.debug = true;
+ arg.name_offset += ":debug".len;
+ }
+ // Unsupported LCX/LCE options that need a value (within the same arg only)
+ else if (std.ascii.startsWithIgnoreCase(arg_name, "tp:")) {
+ const rest = arg.full[arg.name_offset + 3 ..];
+ if (rest.len == 0) {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = .{
+ .name_offset = arg.name_offset,
+ .prefix_len = arg.prefixSlice().len,
+ .value_offset = arg.name_offset + 3,
+ } };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("missing value for {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(3) });
+ try diagnostics.append(err_details);
+ }
+ var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(3) });
+ try diagnostics.append(err_details);
+ arg_i += 1;
+ continue :next_arg;
+ }
+ // Unsupported LCX/LCE options that need a value
+ else if (std.ascii.startsWithIgnoreCase(arg_name, "tn")) {
+ const value = arg.value(2, arg_i, args) catch no_value: {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) });
+ try diagnostics.append(err_details);
+ // dummy zero-length slice starting where the value would have been
+ const value_start = arg.name_offset + 2;
+ break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] };
+ };
+ var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) });
+ try diagnostics.append(err_details);
+ arg_i += value.index_increment;
+ continue :next_arg;
+ }
+ // Unsupported MUI options that need a value
+ else if (std.ascii.startsWithIgnoreCase(arg_name, "fm") or
+ std.ascii.startsWithIgnoreCase(arg_name, "gn") or
+ std.ascii.startsWithIgnoreCase(arg_name, "g2"))
+ {
+ const value = arg.value(2, arg_i, args) catch no_value: {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) });
+ try diagnostics.append(err_details);
+ // dummy zero-length slice starting where the value would have been
+ const value_start = arg.name_offset + 2;
+ break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] };
+ };
+ var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) });
+ try diagnostics.append(err_details);
+ arg_i += value.index_increment;
+ continue :next_arg;
+ }
+ // Unsupported MUI options that do not need a value
+ else if (std.ascii.startsWithIgnoreCase(arg_name, "g1")) {
+ var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionSpan(2) };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) });
+ try diagnostics.append(err_details);
+ arg.name_offset += 2;
+ }
+ // Unsupported LCX/LCE options that do not need a value
+ else if (std.ascii.startsWithIgnoreCase(arg_name, "tm") or
+ std.ascii.startsWithIgnoreCase(arg_name, "tc") or
+ std.ascii.startsWithIgnoreCase(arg_name, "tw") or
+ std.ascii.startsWithIgnoreCase(arg_name, "te") or
+ std.ascii.startsWithIgnoreCase(arg_name, "ti") or
+ std.ascii.startsWithIgnoreCase(arg_name, "ta"))
+ {
+ var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionSpan(2) };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) });
+ try diagnostics.append(err_details);
+ arg.name_offset += 2;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, "fo")) {
+ const value = arg.value(2, arg_i, args) catch {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("missing output path after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) });
+ try diagnostics.append(err_details);
+ arg_i += 1;
+ break :next_arg;
+ };
+ output_filename_context = .{ .index = arg_i, .arg = arg, .value = value };
+ output_filename = value.slice;
+ arg_i += value.index_increment;
+ continue :next_arg;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, "sl")) {
+ const value = arg.value(2, arg_i, args) catch {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("missing language tag after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) });
+ try diagnostics.append(err_details);
+ arg_i += 1;
+ break :next_arg;
+ };
+ const percent_str = value.slice;
+ const percent: u32 = parsePercent(percent_str) catch {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("invalid percent format '{s}'", .{percent_str});
+ try diagnostics.append(err_details);
+ var note_details = Diagnostics.ErrorDetails{ .type = .note, .print_args = false, .arg_index = arg_i };
+ var note_writer = note_details.msg.writer(allocator);
+ try note_writer.writeAll("string length percent must be an integer between 1 and 100 (inclusive)");
+ try diagnostics.append(note_details);
+ arg_i += value.index_increment;
+ continue :next_arg;
+ };
+ if (percent == 0 or percent > 100) {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("percent out of range: {} (parsed from '{s}')", .{ percent, percent_str });
+ try diagnostics.append(err_details);
+ var note_details = Diagnostics.ErrorDetails{ .type = .note, .print_args = false, .arg_index = arg_i };
+ var note_writer = note_details.msg.writer(allocator);
+ try note_writer.writeAll("string length percent must be an integer between 1 and 100 (inclusive)");
+ try diagnostics.append(note_details);
+ arg_i += value.index_increment;
+ continue :next_arg;
+ }
+ const percent_float = @as(f32, @floatFromInt(percent)) / 100;
+ options.max_string_literal_codepoints = @intFromFloat(percent_float * max_string_literal_length_100_percent);
+ arg_i += value.index_increment;
+ continue :next_arg;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, "ln")) {
+ const value = arg.value(2, arg_i, args) catch {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("missing language tag after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) });
+ try diagnostics.append(err_details);
+ arg_i += 1;
+ break :next_arg;
+ };
+ const tag = value.slice;
+ options.default_language_id = lang.tagToInt(tag) catch {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("invalid language tag: {s}", .{tag});
+ try diagnostics.append(err_details);
+ arg_i += value.index_increment;
+ continue :next_arg;
+ };
+ if (options.default_language_id.? == lang.LOCALE_CUSTOM_UNSPECIFIED) {
+ var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = value.argSpan(arg) };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("language tag '{s}' does not have an assigned ID so it will be resolved to LOCALE_CUSTOM_UNSPECIFIED (id=0x{x})", .{ tag, lang.LOCALE_CUSTOM_UNSPECIFIED });
+ try diagnostics.append(err_details);
+ }
+ arg_i += value.index_increment;
+ continue :next_arg;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, "l")) {
+ const value = arg.value(1, arg_i, args) catch {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("missing language ID after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
+ try diagnostics.append(err_details);
+ arg_i += 1;
+ break :next_arg;
+ };
+ const num_str = value.slice;
+ options.default_language_id = lang.parseInt(num_str) catch {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("invalid language ID: {s}", .{num_str});
+ try diagnostics.append(err_details);
+ arg_i += value.index_increment;
+ continue :next_arg;
+ };
+ arg_i += value.index_increment;
+ continue :next_arg;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, "h") or std.mem.startsWith(u8, arg_name, "?")) {
+ options.print_help_and_exit = true;
+ // If there's been an error to this point, then we still want to fail
+ if (diagnostics.hasError()) return error.ParseError;
+ return options;
+ }
+ // 1 char unsupported MUI options that need a value
+ else if (std.ascii.startsWithIgnoreCase(arg_name, "q") or
+ std.ascii.startsWithIgnoreCase(arg_name, "g"))
+ {
+ const value = arg.value(1, arg_i, args) catch no_value: {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
+ try diagnostics.append(err_details);
+ // dummy zero-length slice starting where the value would have been
+ const value_start = arg.name_offset + 1;
+ break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] };
+ };
+ var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
+ try diagnostics.append(err_details);
+ arg_i += value.index_increment;
+ continue :next_arg;
+ }
+ // Undocumented (and unsupported) options that need a value
+ // /z has to do something with font substitution
+ // /s has something to do with HWB resources being inserted into the .res
+ else if (std.ascii.startsWithIgnoreCase(arg_name, "z") or
+ std.ascii.startsWithIgnoreCase(arg_name, "s"))
+ {
+ const value = arg.value(1, arg_i, args) catch no_value: {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
+ try diagnostics.append(err_details);
+ // dummy zero-length slice starting where the value would have been
+ const value_start = arg.name_offset + 1;
+ break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] };
+ };
+ var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
+ try diagnostics.append(err_details);
+ arg_i += value.index_increment;
+ continue :next_arg;
+ }
+ // 1 char unsupported LCX/LCE options that do not need a value
+ else if (std.ascii.startsWithIgnoreCase(arg_name, "t")) {
+ var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionSpan(1) };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
+ try diagnostics.append(err_details);
+ arg.name_offset += 1;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, "c")) {
+ const value = arg.value(1, arg_i, args) catch {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("missing code page ID after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
+ try diagnostics.append(err_details);
+ arg_i += 1;
+ break :next_arg;
+ };
+ const num_str = value.slice;
+ const code_page_id = std.fmt.parseUnsigned(u16, num_str, 10) catch {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("invalid code page ID: {s}", .{num_str});
+ try diagnostics.append(err_details);
+ arg_i += value.index_increment;
+ continue :next_arg;
+ };
+ options.default_code_page = CodePage.getByIdentifierEnsureSupported(code_page_id) catch |err| switch (err) {
+ error.InvalidCodePage => {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("invalid or unknown code page ID: {}", .{code_page_id});
+ try diagnostics.append(err_details);
+ arg_i += value.index_increment;
+ continue :next_arg;
+ },
+ error.UnsupportedCodePage => {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("unsupported code page: {s} (id={})", .{
+ @tagName(CodePage.getByIdentifier(code_page_id) catch unreachable),
+ code_page_id,
+ });
+ try diagnostics.append(err_details);
+ arg_i += value.index_increment;
+ continue :next_arg;
+ },
+ };
+ arg_i += value.index_increment;
+ continue :next_arg;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, "v")) {
+ options.verbose = true;
+ arg.name_offset += 1;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, "x")) {
+ options.ignore_include_env_var = true;
+ arg.name_offset += 1;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, "p")) {
+ options.preprocess = .only;
+ arg.name_offset += 1;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, "i")) {
+ const value = arg.value(1, arg_i, args) catch {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("missing include path after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
+ try diagnostics.append(err_details);
+ arg_i += 1;
+ break :next_arg;
+ };
+ const path = value.slice;
+ const duped = try allocator.dupe(u8, path);
+ errdefer allocator.free(duped);
+ try options.extra_include_paths.append(options.allocator, duped);
+ arg_i += value.index_increment;
+ continue :next_arg;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, "r")) {
+ // From https://learn.microsoft.com/en-us/windows/win32/menurc/using-rc-the-rc-command-line-
+ // "Ignored. Provided for compatibility with existing makefiles."
+ arg.name_offset += 1;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, "n")) {
+ options.null_terminate_string_table_strings = true;
+ arg.name_offset += 1;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, "y")) {
+ options.silent_duplicate_control_ids = true;
+ arg.name_offset += 1;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, "w")) {
+ options.warn_instead_of_error_on_invalid_code_page = true;
+ arg.name_offset += 1;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, "a")) {
+ // Undocumented option with unknown function
+ // TODO: More investigation to figure out what it does (if anything)
+ var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = arg.optionSpan(1) };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("option {s}{s} has no effect (it is undocumented and its function is unknown in the Win32 RC compiler)", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
+ try diagnostics.append(err_details);
+ arg.name_offset += 1;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, "d")) {
+ const value = arg.value(1, arg_i, args) catch {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("missing symbol to define after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
+ try diagnostics.append(err_details);
+ arg_i += 1;
+ break :next_arg;
+ };
+ var tokenizer = std.mem.tokenize(u8, value.slice, "=");
+ // guaranteed to exist since an empty value.slice would invoke
+ // the 'missing symbol to define' branch above
+ const symbol = tokenizer.next().?;
+ const symbol_value = tokenizer.next() orelse "1";
+
+ if (isValidIdentifier(symbol)) {
+ try options.define(symbol, symbol_value);
+ } else {
+ var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = value.argSpan(arg) };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("symbol \"{s}\" is not a valid identifier and therefore cannot be defined", .{symbol});
+ try diagnostics.append(err_details);
+ }
+ arg_i += value.index_increment;
+ continue :next_arg;
+ } else if (std.ascii.startsWithIgnoreCase(arg_name, "u")) {
+ const value = arg.value(1, arg_i, args) catch {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("missing symbol to undefine after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
+ try diagnostics.append(err_details);
+ arg_i += 1;
+ break :next_arg;
+ };
+ const symbol = value.slice;
+ if (isValidIdentifier(symbol)) {
+ try options.undefine(symbol);
+ } else {
+ var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = value.argSpan(arg) };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("symbol \"{s}\" is not a valid identifier and therefore cannot be undefined", .{symbol});
+ try diagnostics.append(err_details);
+ }
+ arg_i += value.index_increment;
+ continue :next_arg;
+ } else {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.print("invalid option: {s}{s}", .{ arg.prefixSlice(), arg.name() });
+ try diagnostics.append(err_details);
+ arg_i += 1;
+ continue :next_arg;
+ }
+ } else {
+ // The while loop exited via its conditional, meaning we are done with
+ // the current arg and can move on the the next
+ arg_i += 1;
+ continue;
+ }
+ }
+
+ const positionals = args[arg_i..];
+
+ if (positionals.len < 1) {
+ var err_details = Diagnostics.ErrorDetails{ .print_args = false, .arg_index = arg_i };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.writeAll("missing input filename");
+ try diagnostics.append(err_details);
+
+ const last_arg = args[args.len - 1];
+ if (arg_i > 0 and last_arg.len > 0 and last_arg[0] == '/' and std.ascii.endsWithIgnoreCase(last_arg, ".rc")) {
+ var note_details = Diagnostics.ErrorDetails{ .type = .note, .print_args = true, .arg_index = arg_i - 1 };
+ var note_writer = note_details.msg.writer(allocator);
+ try note_writer.writeAll("if this argument was intended to be the input filename, then -- should be specified in front of it to exclude it from option parsing");
+ try diagnostics.append(note_details);
+ }
+
+ // This is a fatal enough problem to justify an early return, since
+ // things after this rely on the value of the input filename.
+ return error.ParseError;
+ }
+ options.input_filename = try allocator.dupe(u8, positionals[0]);
+
+ if (positionals.len > 1) {
+ if (output_filename != null) {
+ var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i + 1 };
+ var msg_writer = err_details.msg.writer(allocator);
+ try msg_writer.writeAll("output filename already specified");
+ try diagnostics.append(err_details);
+ var note_details = Diagnostics.ErrorDetails{
+ .type = .note,
+ .arg_index = output_filename_context.value.index(output_filename_context.index),
+ .arg_span = output_filename_context.value.argSpan(output_filename_context.arg),
+ };
+ var note_writer = note_details.msg.writer(allocator);
+ try note_writer.writeAll("output filename previously specified here");
+ try diagnostics.append(note_details);
+ } else {
+ output_filename = positionals[1];
+ }
+ }
+ if (output_filename == null) {
+ var buf = std.ArrayList(u8).init(allocator);
+ errdefer buf.deinit();
+
+ if (std.fs.path.dirname(options.input_filename)) |dirname| {
+ var end_pos = dirname.len;
+ // We want to ensure that we write a path separator at the end, so if the dirname
+ // doesn't end with a path sep then include the char after the dirname
+ // which must be a path sep.
+ if (!std.fs.path.isSep(dirname[dirname.len - 1])) end_pos += 1;
+ try buf.appendSlice(options.input_filename[0..end_pos]);
+ }
+ try buf.appendSlice(std.fs.path.stem(options.input_filename));
+ if (options.preprocess == .only) {
+ try buf.appendSlice(".rcpp");
+ } else {
+ try buf.appendSlice(".res");
+ }
+
+ options.output_filename = try buf.toOwnedSlice();
+ } else {
+ options.output_filename = try allocator.dupe(u8, output_filename.?);
+ }
+
+ if (diagnostics.hasError()) {
+ return error.ParseError;
+ }
+
+ return options;
+}
+
+/// Returns true if the str is a valid C identifier for use in a #define/#undef macro
+pub fn isValidIdentifier(str: []const u8) bool {
+ for (str, 0..) |c, i| switch (c) {
+ '0'...'9' => if (i == 0) return false,
+ 'a'...'z', 'A'...'Z', '_' => {},
+ else => return false,
+ };
+ return true;
+}
+
+/// This function is specific to how the Win32 RC command line interprets
+/// max string literal length percent.
+/// - Wraps on overflow of u32
+/// - Stops parsing on any invalid hexadecimal digits
+/// - Errors if a digit is not the first char
+/// - `-` (negative) prefix is allowed
+pub fn parsePercent(str: []const u8) error{InvalidFormat}!u32 {
+ var result: u32 = 0;
+ const radix: u8 = 10;
+ var buf = str;
+
+ const Prefix = enum { none, minus };
+ var prefix: Prefix = .none;
+ switch (buf[0]) {
+ '-' => {
+ prefix = .minus;
+ buf = buf[1..];
+ },
+ else => {},
+ }
+
+ for (buf, 0..) |c, i| {
+ const digit = switch (c) {
+ // On invalid digit for the radix, just stop parsing but don't fail
+ '0'...'9' => std.fmt.charToDigit(c, radix) catch break,
+ else => {
+ // First digit must be valid
+ if (i == 0) {
+ return error.InvalidFormat;
+ }
+ break;
+ },
+ };
+
+ if (result != 0) {
+ result *%= radix;
+ }
+ result +%= digit;
+ }
+
+ switch (prefix) {
+ .none => {},
+ .minus => result = 0 -% result,
+ }
+
+ return result;
+}
+
+test parsePercent {
+ try std.testing.expectEqual(@as(u32, 16), try parsePercent("16"));
+ try std.testing.expectEqual(@as(u32, 0), try parsePercent("0x1A"));
+ try std.testing.expectEqual(@as(u32, 0x1), try parsePercent("1zzzz"));
+ try std.testing.expectEqual(@as(u32, 0xffffffff), try parsePercent("-1"));
+ try std.testing.expectEqual(@as(u32, 0xfffffff0), try parsePercent("-16"));
+ try std.testing.expectEqual(@as(u32, 1), try parsePercent("4294967297"));
+ try std.testing.expectError(error.InvalidFormat, parsePercent("--1"));
+ try std.testing.expectError(error.InvalidFormat, parsePercent("ha"));
+ try std.testing.expectError(error.InvalidFormat, parsePercent("¹"));
+ try std.testing.expectError(error.InvalidFormat, parsePercent("~1"));
+}
+
+pub fn renderErrorMessage(writer: anytype, config: std.io.tty.Config, err_details: Diagnostics.ErrorDetails, args: []const []const u8) !void {
+ try config.setColor(writer, .dim);
+ try writer.writeAll("<cli>");
+ try config.setColor(writer, .reset);
+ try config.setColor(writer, .bold);
+ try writer.writeAll(": ");
+ switch (err_details.type) {
+ .err => {
+ try config.setColor(writer, .red);
+ try writer.writeAll("error: ");
+ },
+ .warning => {
+ try config.setColor(writer, .yellow);
+ try writer.writeAll("warning: ");
+ },
+ .note => {
+ try config.setColor(writer, .cyan);
+ try writer.writeAll("note: ");
+ },
+ }
+ try config.setColor(writer, .reset);
+ try config.setColor(writer, .bold);
+ try writer.writeAll(err_details.msg.items);
+ try writer.writeByte('\n');
+ try config.setColor(writer, .reset);
+
+ if (!err_details.print_args) {
+ try writer.writeByte('\n');
+ return;
+ }
+
+ try config.setColor(writer, .dim);
+ const prefix = " ... ";
+ try writer.writeAll(prefix);
+ try config.setColor(writer, .reset);
+
+ const arg_with_name = args[err_details.arg_index];
+ const prefix_slice = arg_with_name[0..err_details.arg_span.prefix_len];
+ const before_name_slice = arg_with_name[err_details.arg_span.prefix_len..err_details.arg_span.name_offset];
+ var name_slice = arg_with_name[err_details.arg_span.name_offset..];
+ if (err_details.arg_span.name_len > 0) name_slice.len = err_details.arg_span.name_len;
+ const after_name_slice = arg_with_name[err_details.arg_span.name_offset + name_slice.len ..];
+
+ try writer.writeAll(prefix_slice);
+ if (before_name_slice.len > 0) {
+ try config.setColor(writer, .dim);
+ try writer.writeAll(before_name_slice);
+ try config.setColor(writer, .reset);
+ }
+ try writer.writeAll(name_slice);
+ if (after_name_slice.len > 0) {
+ try config.setColor(writer, .dim);
+ try writer.writeAll(after_name_slice);
+ try config.setColor(writer, .reset);
+ }
+
+ var next_arg_len: usize = 0;
+ if (err_details.arg_span.point_at_next_arg and err_details.arg_index + 1 < args.len) {
+ const next_arg = args[err_details.arg_index + 1];
+ try writer.writeByte(' ');
+ try writer.writeAll(next_arg);
+ next_arg_len = next_arg.len;
+ }
+
+ const last_shown_arg_index = if (err_details.arg_span.point_at_next_arg) err_details.arg_index + 1 else err_details.arg_index;
+ if (last_shown_arg_index + 1 < args.len) {
+ // special case for when pointing to a missing value within the same arg
+ // as the name
+ if (err_details.arg_span.value_offset >= arg_with_name.len) {
+ try writer.writeByte(' ');
+ }
+ try config.setColor(writer, .dim);
+ try writer.writeAll(" ...");
+ try config.setColor(writer, .reset);
+ }
+ try writer.writeByte('\n');
+
+ try config.setColor(writer, .green);
+ try writer.writeByteNTimes(' ', prefix.len);
+ // Special case for when the option is *only* a prefix (e.g. invalid option: -)
+ if (err_details.arg_span.prefix_len == arg_with_name.len) {
+ try writer.writeByteNTimes('^', err_details.arg_span.prefix_len);
+ } else {
+ try writer.writeByteNTimes('~', err_details.arg_span.prefix_len);
+ try writer.writeByteNTimes(' ', err_details.arg_span.name_offset - err_details.arg_span.prefix_len);
+ if (!err_details.arg_span.point_at_next_arg and err_details.arg_span.value_offset == 0) {
+ try writer.writeByte('^');
+ try writer.writeByteNTimes('~', name_slice.len - 1);
+ } else if (err_details.arg_span.value_offset > 0) {
+ try writer.writeByteNTimes('~', err_details.arg_span.value_offset - err_details.arg_span.name_offset);
+ try writer.writeByte('^');
+ if (err_details.arg_span.value_offset < arg_with_name.len) {
+ try writer.writeByteNTimes('~', arg_with_name.len - err_details.arg_span.value_offset - 1);
+ }
+ } else if (err_details.arg_span.point_at_next_arg) {
+ try writer.writeByteNTimes('~', arg_with_name.len - err_details.arg_span.name_offset + 1);
+ try writer.writeByte('^');
+ if (next_arg_len > 0) {
+ try writer.writeByteNTimes('~', next_arg_len - 1);
+ }
+ }
+ }
+ try writer.writeByte('\n');
+ try config.setColor(writer, .reset);
+}
+
+fn testParse(args: []const []const u8) !Options {
+ return (try testParseOutput(args, "")).?;
+}
+
+fn testParseWarning(args: []const []const u8, expected_output: []const u8) !Options {
+ return (try testParseOutput(args, expected_output)).?;
+}
+
+fn testParseError(args: []const []const u8, expected_output: []const u8) !void {
+ var maybe_options = try testParseOutput(args, expected_output);
+ if (maybe_options != null) {
+ std.debug.print("expected error, got options: {}\n", .{maybe_options.?});
+ maybe_options.?.deinit();
+ return error.TestExpectedError;
+ }
+}
+
+fn testParseOutput(args: []const []const u8, expected_output: []const u8) !?Options {
+ var diagnostics = Diagnostics.init(std.testing.allocator);
+ defer diagnostics.deinit();
+
+ var output = std.ArrayList(u8).init(std.testing.allocator);
+ defer output.deinit();
+
+ var options = parse(std.testing.allocator, args, &diagnostics) catch |err| switch (err) {
+ error.ParseError => {
+ try diagnostics.renderToWriter(args, output.writer(), .no_color);
+ try std.testing.expectEqualStrings(expected_output, output.items);
+ return null;
+ },
+ else => |e| return e,
+ };
+ errdefer options.deinit();
+
+ try diagnostics.renderToWriter(args, output.writer(), .no_color);
+ try std.testing.expectEqualStrings(expected_output, output.items);
+ return options;
+}
+
+test "parse errors: basic" {
+ try testParseError(&.{"/"},
+ \\<cli>: error: invalid option: /
+ \\ ... /
+ \\ ^
+ \\<cli>: error: missing input filename
+ \\
+ \\
+ );
+ try testParseError(&.{"/ln"},
+ \\<cli>: error: missing language tag after /ln option
+ \\ ... /ln
+ \\ ~~~~^
+ \\<cli>: error: missing input filename
+ \\
+ \\
+ );
+ try testParseError(&.{"-vln"},
+ \\<cli>: error: missing language tag after -ln option
+ \\ ... -vln
+ \\ ~ ~~~^
+ \\<cli>: error: missing input filename
+ \\
+ \\
+ );
+ try testParseError(&.{"/_not-an-option"},
+ \\<cli>: error: invalid option: /_not-an-option
+ \\ ... /_not-an-option
+ \\ ~^~~~~~~~~~~~~~
+ \\<cli>: error: missing input filename
+ \\
+ \\
+ );
+ try testParseError(&.{"-_not-an-option"},
+ \\<cli>: error: invalid option: -_not-an-option
+ \\ ... -_not-an-option
+ \\ ~^~~~~~~~~~~~~~
+ \\<cli>: error: missing input filename
+ \\
+ \\
+ );
+ try testParseError(&.{"--_not-an-option"},
+ \\<cli>: error: invalid option: --_not-an-option
+ \\ ... --_not-an-option
+ \\ ~~^~~~~~~~~~~~~~
+ \\<cli>: error: missing input filename
+ \\
+ \\
+ );
+ try testParseError(&.{"/v_not-an-option"},
+ \\<cli>: error: invalid option: /_not-an-option
+ \\ ... /v_not-an-option
+ \\ ~ ^~~~~~~~~~~~~~
+ \\<cli>: error: missing input filename
+ \\
+ \\
+ );
+ try testParseError(&.{"-v_not-an-option"},
+ \\<cli>: error: invalid option: -_not-an-option
+ \\ ... -v_not-an-option
+ \\ ~ ^~~~~~~~~~~~~~
+ \\<cli>: error: missing input filename
+ \\
+ \\
+ );
+ try testParseError(&.{"--v_not-an-option"},
+ \\<cli>: error: invalid option: --_not-an-option
+ \\ ... --v_not-an-option
+ \\ ~~ ^~~~~~~~~~~~~~
+ \\<cli>: error: missing input filename
+ \\
+ \\
+ );
+ try testParseError(&.{"/some/absolute/path/parsed/as/an/option.rc"},
+ \\<cli>: error: the /s option is unsupported
+ \\ ... /some/absolute/path/parsed/as/an/option.rc
+ \\ ~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ \\<cli>: error: missing input filename
+ \\
+ \\<cli>: note: if this argument was intended to be the input filename, then -- should be specified in front of it to exclude it from option parsing
+ \\ ... /some/absolute/path/parsed/as/an/option.rc
+ \\ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ \\
+ );
+}
+
+test "parse errors: /ln" {
+ try testParseError(&.{ "/ln", "invalid", "foo.rc" },
+ \\<cli>: error: invalid language tag: invalid
+ \\ ... /ln invalid ...
+ \\ ~~~~^~~~~~~
+ \\
+ );
+ try testParseError(&.{ "/lninvalid", "foo.rc" },
+ \\<cli>: error: invalid language tag: invalid
+ \\ ... /lninvalid ...
+ \\ ~~~^~~~~~~
+ \\
+ );
+}
+
+test "parse: options" {
+ {
+ var options = try testParse(&.{ "/v", "foo.rc" });
+ defer options.deinit();
+
+ try std.testing.expectEqual(true, options.verbose);
+ try std.testing.expectEqualStrings("foo.rc", options.input_filename);
+ try std.testing.expectEqualStrings("foo.res", options.output_filename);
+ }
+ {
+ var options = try testParse(&.{ "/vx", "foo.rc" });
+ defer options.deinit();
+
+ try std.testing.expectEqual(true, options.verbose);
+ try std.testing.expectEqual(true, options.ignore_include_env_var);
+ try std.testing.expectEqualStrings("foo.rc", options.input_filename);
+ try std.testing.expectEqualStrings("foo.res", options.output_filename);
+ }
+ {
+ var options = try testParse(&.{ "/xv", "foo.rc" });
+ defer options.deinit();
+
+ try std.testing.expectEqual(true, options.verbose);
+ try std.testing.expectEqual(true, options.ignore_include_env_var);
+ try std.testing.expectEqualStrings("foo.rc", options.input_filename);
+ try std.testing.expectEqualStrings("foo.res", options.output_filename);
+ }
+ {
+ var options = try testParse(&.{ "/xvFObar.res", "foo.rc" });
+ defer options.deinit();
+
+ try std.testing.expectEqual(true, options.verbose);
+ try std.testing.expectEqual(true, options.ignore_include_env_var);
+ try std.testing.expectEqualStrings("foo.rc", options.input_filename);
+ try std.testing.expectEqualStrings("bar.res", options.output_filename);
+ }
+}
+
+test "parse: define and undefine" {
+ {
+ var options = try testParse(&.{ "/dfoo", "foo.rc" });
+ defer options.deinit();
+
+ const action = options.symbols.get("foo").?;
+ try std.testing.expectEqualStrings("1", action.define);
+ }
+ {
+ var options = try testParse(&.{ "/dfoo=bar", "/dfoo=baz", "foo.rc" });
+ defer options.deinit();
+
+ const action = options.symbols.get("foo").?;
+ try std.testing.expectEqualStrings("baz", action.define);
+ }
+ {
+ var options = try testParse(&.{ "/ufoo", "foo.rc" });
+ defer options.deinit();
+
+ const action = options.symbols.get("foo").?;
+ try std.testing.expectEqual(Options.SymbolAction.undefine, action);
+ }
+ {
+ // Once undefined, future defines are ignored
+ var options = try testParse(&.{ "/ufoo", "/dfoo", "foo.rc" });
+ defer options.deinit();
+
+ const action = options.symbols.get("foo").?;
+ try std.testing.expectEqual(Options.SymbolAction.undefine, action);
+ }
+ {
+ // Undefined always takes precedence
+ var options = try testParse(&.{ "/dfoo", "/ufoo", "/dfoo", "foo.rc" });
+ defer options.deinit();
+
+ const action = options.symbols.get("foo").?;
+ try std.testing.expectEqual(Options.SymbolAction.undefine, action);
+ }
+ {
+ // Warn + ignore invalid identifiers
+ var options = try testParseWarning(
+ &.{ "/dfoo bar", "/u", "0leadingdigit", "foo.rc" },
+ \\<cli>: warning: symbol "foo bar" is not a valid identifier and therefore cannot be defined
+ \\ ... /dfoo bar ...
+ \\ ~~^~~~~~~
+ \\<cli>: warning: symbol "0leadingdigit" is not a valid identifier and therefore cannot be undefined
+ \\ ... /u 0leadingdigit ...
+ \\ ~~~^~~~~~~~~~~~~
+ \\
+ ,
+ );
+ defer options.deinit();
+
+ try std.testing.expectEqual(@as(usize, 0), options.symbols.count());
+ }
+}
+
+test "parse: /sl" {
+ try testParseError(&.{ "/sl", "0", "foo.rc" },
+ \\<cli>: error: percent out of range: 0 (parsed from '0')
+ \\ ... /sl 0 ...
+ \\ ~~~~^
+ \\<cli>: note: string length percent must be an integer between 1 and 100 (inclusive)
+ \\
+ \\
+ );
+ try testParseError(&.{ "/sl", "abcd", "foo.rc" },
+ \\<cli>: error: invalid percent format 'abcd'
+ \\ ... /sl abcd ...
+ \\ ~~~~^~~~
+ \\<cli>: note: string length percent must be an integer between 1 and 100 (inclusive)
+ \\
+ \\
+ );
+ {
+ var options = try testParse(&.{"foo.rc"});
+ defer options.deinit();
+
+ try std.testing.expectEqual(@as(u15, lex.default_max_string_literal_codepoints), options.max_string_literal_codepoints);
+ }
+ {
+ var options = try testParse(&.{ "/sl100", "foo.rc" });
+ defer options.deinit();
+
+ try std.testing.expectEqual(@as(u15, max_string_literal_length_100_percent), options.max_string_literal_codepoints);
+ }
+ {
+ var options = try testParse(&.{ "-SL33", "foo.rc" });
+ defer options.deinit();
+
+ try std.testing.expectEqual(@as(u15, 2703), options.max_string_literal_codepoints);
+ }
+ {
+ var options = try testParse(&.{ "/sl15", "foo.rc" });
+ defer options.deinit();
+
+ try std.testing.expectEqual(@as(u15, 1228), options.max_string_literal_codepoints);
+ }
+}
+
+test "parse: unsupported MUI-related options" {
+ try testParseError(&.{ "/q", "blah", "/g1", "-G2", "blah", "/fm", "blah", "/g", "blah", "foo.rc" },
+ \\<cli>: error: the /q option is unsupported
+ \\ ... /q ...
+ \\ ~^
+ \\<cli>: error: the /g1 option is unsupported
+ \\ ... /g1 ...
+ \\ ~^~
+ \\<cli>: error: the -G2 option is unsupported
+ \\ ... -G2 ...
+ \\ ~^~
+ \\<cli>: error: the /fm option is unsupported
+ \\ ... /fm ...
+ \\ ~^~
+ \\<cli>: error: the /g option is unsupported
+ \\ ... /g ...
+ \\ ~^
+ \\
+ );
+}
+
+test "parse: unsupported LCX/LCE-related options" {
+ try testParseError(&.{ "/t", "/tp:", "/tp:blah", "/tm", "/tc", "/tw", "-TEti", "/ta", "/tn", "blah", "foo.rc" },
+ \\<cli>: error: the /t option is unsupported
+ \\ ... /t ...
+ \\ ~^
+ \\<cli>: error: missing value for /tp: option
+ \\ ... /tp: ...
+ \\ ~~~~^
+ \\<cli>: error: the /tp: option is unsupported
+ \\ ... /tp: ...
+ \\ ~^~~
+ \\<cli>: error: the /tp: option is unsupported
+ \\ ... /tp:blah ...
+ \\ ~^~~~~~~
+ \\<cli>: error: the /tm option is unsupported
+ \\ ... /tm ...
+ \\ ~^~
+ \\<cli>: error: the /tc option is unsupported
+ \\ ... /tc ...
+ \\ ~^~
+ \\<cli>: error: the /tw option is unsupported
+ \\ ... /tw ...
+ \\ ~^~
+ \\<cli>: error: the -TE option is unsupported
+ \\ ... -TEti ...
+ \\ ~^~
+ \\<cli>: error: the -ti option is unsupported
+ \\ ... -TEti ...
+ \\ ~ ^~
+ \\<cli>: error: the /ta option is unsupported
+ \\ ... /ta ...
+ \\ ~^~
+ \\<cli>: error: the /tn option is unsupported
+ \\ ... /tn ...
+ \\ ~^~
+ \\
+ );
+}
+
+test "maybeAppendRC" {
+ var tmp = std.testing.tmpDir(.{});
+ defer tmp.cleanup();
+
+ var options = try testParse(&.{"foo"});
+ defer options.deinit();
+ try std.testing.expectEqualStrings("foo", options.input_filename);
+
+ // Create the file so that it's found. In this scenario, .rc should not get
+ // appended.
+ var file = try tmp.dir.createFile("foo", .{});
+ file.close();
+ try options.maybeAppendRC(tmp.dir);
+ try std.testing.expectEqualStrings("foo", options.input_filename);
+
+ // Now delete the file and try again. Since the verbatim name is no longer found
+ // and the input filename does not have an extension, .rc should get appended.
+ try tmp.dir.deleteFile("foo");
+ try options.maybeAppendRC(tmp.dir);
+ try std.testing.expectEqualStrings("foo.rc", options.input_filename);
+}
diff --git a/lib/compiler/resinator/code_pages.zig b/lib/compiler/resinator/code_pages.zig
@@ -0,0 +1,500 @@
+const std = @import("std");
+const windows1252 = @import("windows1252.zig");
+
+// TODO: Parts of this comment block may be more relevant to string/NameOrOrdinal parsing
+// than it is to the stuff in this file.
+//
+// ‰ representations for context:
+// Win-1252 89
+// UTF-8 E2 80 B0
+// UTF-16 20 30
+//
+// With code page 65001:
+// ‰ RCDATA { "‰" L"‰" }
+// File encoded as Windows-1252:
+// ‰ => <U+FFFD REPLACEMENT CHARACTER> as u16
+// "‰" => 0x3F ('?')
+// L"‰" => <U+FFFD REPLACEMENT CHARACTER> as u16
+// File encoded as UTF-8:
+// ‰ => <U+2030 ‰> as u16
+// "‰" => 0x89 ('‰' encoded as Windows-1252)
+// L"‰" => <U+2030 ‰> as u16
+//
+// With code page 1252:
+// ‰ RCDATA { "‰" L"‰" }
+// File encoded as Windows-1252:
+// ‰ => <U+2030 ‰> as u16
+// "‰" => 0x89 ('‰' encoded as Windows-1252)
+// L"‰" => <U+2030 ‰> as u16
+// File encoded as UTF-8:
+// ‰ => 0xE2 as u16, 0x20AC as u16, 0xB0 as u16
+// ^ first byte of utf8 representation
+// ^ second byte of UTF-8 representation (0x80), but interpretted as
+// Windows-1252 ('€') and then converted to UTF-16 (<U+20AC>)
+// ^ third byte of utf8 representation
+// "‰" => 0xE2, 0x80, 0xB0 (the bytes of the UTF-8 representation)
+// L"‰" => 0xE2 as u16, 0x20AC as u16, 0xB0 as u16 (see '‰ =>' explanation)
+//
+// With code page 1252:
+// <0x90> RCDATA { "<0x90>" L"<0x90>" }
+// File encoded as Windows-1252:
+// <0x90> => 0x90 as u16
+// "<0x90>" => 0x90
+// L"<0x90>" => 0x90 as u16
+// File encoded as UTF-8:
+// <0x90> => 0xC2 as u16, 0x90 as u16
+// "<0x90>" => 0xC2, 0x90 (the bytes of the UTF-8 representation of <U+0090>)
+// L"<0x90>" => 0xC2 as u16, 0x90 as u16
+//
+// Within a raw data block, file encoded as Windows-1252 (Â is <0xC2>):
+// "Âa" L"Âa" "\xC2ad" L"\xC2AD"
+// With code page 1252:
+// C2 61 C2 00 61 00 C2 61 64 AD C2
+// Â^ a^ Â~~~^ a~~~^ .^ a^ d^ ^~~~~\xC2AD
+// \xC2~`
+// With code page 65001:
+// 3F 61 FD FF 61 00 C2 61 64 AD C2
+// ^. a^ ^~~~. a~~~^ ^. a^ d^ ^~~~~\xC2AD
+// `. `. `~\xC2
+// `. `.~<0xC2>a is not well-formed UTF-8 (0xC2 expects a continutation byte after it).
+// `. Because 'a' is a valid first byte of a UTF-8 sequence, it is not included in the
+// `. invalid sequence so only the <0xC2> gets converted to <U+FFFD>.
+// `~Same as ^ but converted to '?' instead.
+//
+// Within a raw data block, file encoded as Windows-1252 (ð is <0xF0>, € is <0x80>):
+// "ð€a" L"ð€a"
+// With code page 1252:
+// F0 80 61 F0 00 AC 20 61 00
+// ð^ €^ a^ ð~~~^ €~~~^ a~~~^
+// With code page 65001:
+// 3F 61 FD FF 61 00
+// ^. a^ ^~~~. a~~~^
+// `. `.
+// `. `.~<0xF0><0x80> is not well-formed UTF-8, and <0x80> is not a valid first byte, so
+// `. both bytes are considered an invalid sequence and get converted to '<U+FFFD>'
+// `~Same as ^ but converted to '?' instead.
+
+/// https://learn.microsoft.com/en-us/windows/win32/intl/code-page-identifiers
+pub const CodePage = enum(u16) {
+ // supported
+ windows1252 = 1252, // windows-1252 ANSI Latin 1; Western European (Windows)
+ utf8 = 65001, // utf-8 Unicode (UTF-8)
+
+ // unsupported but valid
+ ibm037 = 37, // IBM037 IBM EBCDIC US-Canada
+ ibm437 = 437, // IBM437 OEM United States
+ ibm500 = 500, // IBM500 IBM EBCDIC International
+ asmo708 = 708, // ASMO-708 Arabic (ASMO 708)
+ asmo449plus = 709, // Arabic (ASMO-449+, BCON V4)
+ transparent_arabic = 710, // Arabic - Transparent Arabic
+ dos720 = 720, // DOS-720 Arabic (Transparent ASMO); Arabic (DOS)
+ ibm737 = 737, // ibm737 OEM Greek (formerly 437G); Greek (DOS)
+ ibm775 = 775, // ibm775 OEM Baltic; Baltic (DOS)
+ ibm850 = 850, // ibm850 OEM Multilingual Latin 1; Western European (DOS)
+ ibm852 = 852, // ibm852 OEM Latin 2; Central European (DOS)
+ ibm855 = 855, // IBM855 OEM Cyrillic (primarily Russian)
+ ibm857 = 857, // ibm857 OEM Turkish; Turkish (DOS)
+ ibm00858 = 858, // IBM00858 OEM Multilingual Latin 1 + Euro symbol
+ ibm860 = 860, // IBM860 OEM Portuguese; Portuguese (DOS)
+ ibm861 = 861, // ibm861 OEM Icelandic; Icelandic (DOS)
+ dos862 = 862, // DOS-862 OEM Hebrew; Hebrew (DOS)
+ ibm863 = 863, // IBM863 OEM French Canadian; French Canadian (DOS)
+ ibm864 = 864, // IBM864 OEM Arabic; Arabic (864)
+ ibm865 = 865, // IBM865 OEM Nordic; Nordic (DOS)
+ cp866 = 866, // cp866 OEM Russian; Cyrillic (DOS)
+ ibm869 = 869, // ibm869 OEM Modern Greek; Greek, Modern (DOS)
+ ibm870 = 870, // IBM870 IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2
+ windows874 = 874, // windows-874 Thai (Windows)
+ cp875 = 875, // cp875 IBM EBCDIC Greek Modern
+ shift_jis = 932, // shift_jis ANSI/OEM Japanese; Japanese (Shift-JIS)
+ gb2312 = 936, // gb2312 ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)
+ ks_c_5601_1987 = 949, // ks_c_5601-1987 ANSI/OEM Korean (Unified Hangul Code)
+ big5 = 950, // big5 ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
+ ibm1026 = 1026, // IBM1026 IBM EBCDIC Turkish (Latin 5)
+ ibm01047 = 1047, // IBM01047 IBM EBCDIC Latin 1/Open System
+ ibm01140 = 1140, // IBM01140 IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro)
+ ibm01141 = 1141, // IBM01141 IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro)
+ ibm01142 = 1142, // IBM01142 IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro)
+ ibm01143 = 1143, // IBM01143 IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro)
+ ibm01144 = 1144, // IBM01144 IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro)
+ ibm01145 = 1145, // IBM01145 IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro)
+ ibm01146 = 1146, // IBM01146 IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro)
+ ibm01147 = 1147, // IBM01147 IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro)
+ ibm01148 = 1148, // IBM01148 IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro)
+ ibm01149 = 1149, // IBM01149 IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro)
+ utf16 = 1200, // utf-16 Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications
+ utf16_fffe = 1201, // unicodeFFFE Unicode UTF-16, big endian byte order; available only to managed applications
+ windows1250 = 1250, // windows-1250 ANSI Central European; Central European (Windows)
+ windows1251 = 1251, // windows-1251 ANSI Cyrillic; Cyrillic (Windows)
+ windows1253 = 1253, // windows-1253 ANSI Greek; Greek (Windows)
+ windows1254 = 1254, // windows-1254 ANSI Turkish; Turkish (Windows)
+ windows1255 = 1255, // windows-1255 ANSI Hebrew; Hebrew (Windows)
+ windows1256 = 1256, // windows-1256 ANSI Arabic; Arabic (Windows)
+ windows1257 = 1257, // windows-1257 ANSI Baltic; Baltic (Windows)
+ windows1258 = 1258, // windows-1258 ANSI/OEM Vietnamese; Vietnamese (Windows)
+ johab = 1361, // Johab Korean (Johab)
+ macintosh = 10000, // macintosh MAC Roman; Western European (Mac)
+ x_mac_japanese = 10001, // x-mac-japanese Japanese (Mac)
+ x_mac_chinesetrad = 10002, // x-mac-chinesetrad MAC Traditional Chinese (Big5); Chinese Traditional (Mac)
+ x_mac_korean = 10003, // x-mac-korean Korean (Mac)
+ x_mac_arabic = 10004, // x-mac-arabic Arabic (Mac)
+ x_mac_hebrew = 10005, // x-mac-hebrew Hebrew (Mac)
+ x_mac_greek = 10006, // x-mac-greek Greek (Mac)
+ x_mac_cyrillic = 10007, // x-mac-cyrillic Cyrillic (Mac)
+ x_mac_chinesesimp = 10008, // x-mac-chinesesimp MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac)
+ x_mac_romanian = 10010, // x-mac-romanian Romanian (Mac)
+ x_mac_ukranian = 10017, // x-mac-ukrainian Ukrainian (Mac)
+ x_mac_thai = 10021, // x-mac-thai Thai (Mac)
+ x_mac_ce = 10029, // x-mac-ce MAC Latin 2; Central European (Mac)
+ x_mac_icelandic = 10079, // x-mac-icelandic Icelandic (Mac)
+ x_mac_turkish = 10081, // x-mac-turkish Turkish (Mac)
+ x_mac_croatian = 10082, // x-mac-croatian Croatian (Mac)
+ utf32 = 12000, // utf-32 Unicode UTF-32, little endian byte order; available only to managed applications
+ utf32_be = 12001, // utf-32BE Unicode UTF-32, big endian byte order; available only to managed applications
+ x_chinese_cns = 20000, // x-Chinese_CNS CNS Taiwan; Chinese Traditional (CNS)
+ x_cp20001 = 20001, // x-cp20001 TCA Taiwan
+ x_chinese_eten = 20002, // x_Chinese-Eten Eten Taiwan; Chinese Traditional (Eten)
+ x_cp20003 = 20003, // x-cp20003 IBM5550 Taiwan
+ x_cp20004 = 20004, // x-cp20004 TeleText Taiwan
+ x_cp20005 = 20005, // x-cp20005 Wang Taiwan
+ x_ia5 = 20105, // x-IA5 IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5)
+ x_ia5_german = 20106, // x-IA5-German IA5 German (7-bit)
+ x_ia5_swedish = 20107, // x-IA5-Swedish IA5 Swedish (7-bit)
+ x_ia5_norwegian = 20108, // x-IA5-Norwegian IA5 Norwegian (7-bit)
+ us_ascii = 20127, // us-ascii US-ASCII (7-bit)
+ x_cp20261 = 20261, // x-cp20261 T.61
+ x_cp20269 = 20269, // x-cp20269 ISO 6937 Non-Spacing Accent
+ ibm273 = 20273, // IBM273 IBM EBCDIC Germany
+ ibm277 = 20277, // IBM277 IBM EBCDIC Denmark-Norway
+ ibm278 = 20278, // IBM278 IBM EBCDIC Finland-Sweden
+ ibm280 = 20280, // IBM280 IBM EBCDIC Italy
+ ibm284 = 20284, // IBM284 IBM EBCDIC Latin America-Spain
+ ibm285 = 20285, // IBM285 IBM EBCDIC United Kingdom
+ ibm290 = 20290, // IBM290 IBM EBCDIC Japanese Katakana Extended
+ ibm297 = 20297, // IBM297 IBM EBCDIC France
+ ibm420 = 20420, // IBM420 IBM EBCDIC Arabic
+ ibm423 = 20423, // IBM423 IBM EBCDIC Greek
+ ibm424 = 20424, // IBM424 IBM EBCDIC Hebrew
+ x_ebcdic_korean_extended = 20833, // x-EBCDIC-KoreanExtended IBM EBCDIC Korean Extended
+ ibm_thai = 20838, // IBM-Thai IBM EBCDIC Thai
+ koi8_r = 20866, // koi8-r Russian (KOI8-R); Cyrillic (KOI8-R)
+ ibm871 = 20871, // IBM871 IBM EBCDIC Icelandic
+ ibm880 = 20880, // IBM880 IBM EBCDIC Cyrillic Russian
+ ibm905 = 20905, // IBM905 IBM EBCDIC Turkish
+ ibm00924 = 20924, // IBM00924 IBM EBCDIC Latin 1/Open System (1047 + Euro symbol)
+ euc_jp_jis = 20932, // EUC-JP Japanese (JIS 0208-1990 and 0212-1990)
+ x_cp20936 = 20936, // x-cp20936 Simplified Chinese (GB2312); Chinese Simplified (GB2312-80)
+ x_cp20949 = 20949, // x-cp20949 Korean Wansung
+ cp1025 = 21025, // cp1025 IBM EBCDIC Cyrillic Serbian-Bulgarian
+ // = 21027, // (deprecated)
+ koi8_u = 21866, // koi8-u Ukrainian (KOI8-U); Cyrillic (KOI8-U)
+ iso8859_1 = 28591, // iso-8859-1 ISO 8859-1 Latin 1; Western European (ISO)
+ iso8859_2 = 28592, // iso-8859-2 ISO 8859-2 Central European; Central European (ISO)
+ iso8859_3 = 28593, // iso-8859-3 ISO 8859-3 Latin 3
+ iso8859_4 = 28594, // iso-8859-4 ISO 8859-4 Baltic
+ iso8859_5 = 28595, // iso-8859-5 ISO 8859-5 Cyrillic
+ iso8859_6 = 28596, // iso-8859-6 ISO 8859-6 Arabic
+ iso8859_7 = 28597, // iso-8859-7 ISO 8859-7 Greek
+ iso8859_8 = 28598, // iso-8859-8 ISO 8859-8 Hebrew; Hebrew (ISO-Visual)
+ iso8859_9 = 28599, // iso-8859-9 ISO 8859-9 Turkish
+ iso8859_13 = 28603, // iso-8859-13 ISO 8859-13 Estonian
+ iso8859_15 = 28605, // iso-8859-15 ISO 8859-15 Latin 9
+ x_europa = 29001, // x-Europa Europa 3
+ is8859_8_i = 38598, // iso-8859-8-i ISO 8859-8 Hebrew; Hebrew (ISO-Logical)
+ iso2022_jp = 50220, // iso-2022-jp ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
+ cs_iso2022_jp = 50221, // csISO2022JP ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)
+ iso2022_jp_jis_x = 50222, // iso-2022-jp ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)
+ iso2022_kr = 50225, // iso-2022-kr ISO 2022 Korean
+ x_cp50227 = 50227, // x-cp50227 ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022)
+ iso2022_chinesetrad = 50229, // ISO 2022 Traditional Chinese
+ ebcdic_jp_katakana_extended = 50930, // EBCDIC Japanese (Katakana) Extended
+ ebcdic_us_ca_jp = 50931, // EBCDIC US-Canada and Japanese
+ ebcdic_kr_extended = 50933, // EBCDIC Korean Extended and Korean
+ ebcdic_chinesesimp_extended = 50935, // EBCDIC Simplified Chinese Extended and Simplified Chinese
+ ebcdic_chinesesimp = 50936, // EBCDIC Simplified Chinese
+ ebcdic_us_ca_chinesetrad = 50937, // EBCDIC US-Canada and Traditional Chinese
+ ebcdic_jp_latin_extended = 50939, // EBCDIC Japanese (Latin) Extended and Japanese
+ euc_jp = 51932, // euc-jp EUC Japanese
+ euc_cn = 51936, // EUC-CN EUC Simplified Chinese; Chinese Simplified (EUC)
+ euc_kr = 51949, // euc-kr EUC Korean
+ euc_chinesetrad = 51950, // EUC Traditional Chinese
+ hz_gb2312 = 52936, // hz-gb-2312 HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ)
+ gb18030 = 54936, // GB18030 Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030)
+ x_iscii_de = 57002, // x-iscii-de ISCII Devanagari
+ x_iscii_be = 57003, // x-iscii-be ISCII Bangla
+ x_iscii_ta = 57004, // x-iscii-ta ISCII Tamil
+ x_iscii_te = 57005, // x-iscii-te ISCII Telugu
+ x_iscii_as = 57006, // x-iscii-as ISCII Assamese
+ x_iscii_or = 57007, // x-iscii-or ISCII Odia
+ x_iscii_ka = 57008, // x-iscii-ka ISCII Kannada
+ x_iscii_ma = 57009, // x-iscii-ma ISCII Malayalam
+ x_iscii_gu = 57010, // x-iscii-gu ISCII Gujarati
+ x_iscii_pa = 57011, // x-iscii-pa ISCII Punjabi
+ utf7 = 65000, // utf-7 Unicode (UTF-7)
+
+ pub fn codepointAt(code_page: CodePage, index: usize, bytes: []const u8) ?Codepoint {
+ if (index >= bytes.len) return null;
+ switch (code_page) {
+ .windows1252 => {
+ // All byte values have a representation, so just convert the byte
+ return Codepoint{
+ .value = windows1252.toCodepoint(bytes[index]),
+ .byte_len = 1,
+ };
+ },
+ .utf8 => {
+ return Utf8.WellFormedDecoder.decode(bytes[index..]);
+ },
+ else => unreachable,
+ }
+ }
+
+ pub fn isSupported(code_page: CodePage) bool {
+ return switch (code_page) {
+ .windows1252, .utf8 => true,
+ else => false,
+ };
+ }
+
+ pub fn getByIdentifier(identifier: u16) !CodePage {
+ // There's probably a more efficient way to do this (e.g. ComptimeHashMap?) but
+ // this should be fine, especially since this function likely won't be called much.
+ inline for (@typeInfo(CodePage).Enum.fields) |enumField| {
+ if (identifier == enumField.value) {
+ return @field(CodePage, enumField.name);
+ }
+ }
+ return error.InvalidCodePage;
+ }
+
+ pub fn getByIdentifierEnsureSupported(identifier: u16) !CodePage {
+ const code_page = try getByIdentifier(identifier);
+ switch (isSupported(code_page)) {
+ true => return code_page,
+ false => return error.UnsupportedCodePage,
+ }
+ }
+};
+
+pub const Utf8 = struct {
+ /// Implements decoding with rejection of ill-formed UTF-8 sequences based on section
+ /// D92 of Chapter 3 of the Unicode standard (Table 3-7 specifically).
+ ///
+ /// Note: This does not match "U+FFFD Substitution of Maximal Subparts", but instead
+ /// matches the behavior of the Windows RC compiler.
+ pub const WellFormedDecoder = struct {
+ /// Like std.unicode.utf8ByteSequenceLength, but:
+ /// - Rejects non-well-formed first bytes, i.e. C0-C1, F5-FF
+ /// - Returns an optional value instead of an error union
+ pub fn sequenceLength(first_byte: u8) ?u3 {
+ return switch (first_byte) {
+ 0x00...0x7F => 1,
+ 0xC2...0xDF => 2,
+ 0xE0...0xEF => 3,
+ 0xF0...0xF4 => 4,
+ else => null,
+ };
+ }
+
+ fn isContinuationByte(byte: u8) bool {
+ return switch (byte) {
+ 0x80...0xBF => true,
+ else => false,
+ };
+ }
+
+ pub fn decode(bytes: []const u8) Codepoint {
+ std.debug.assert(bytes.len > 0);
+ const first_byte = bytes[0];
+ const expected_len = sequenceLength(first_byte) orelse {
+ return .{ .value = Codepoint.invalid, .byte_len = 1 };
+ };
+ if (expected_len == 1) return .{ .value = first_byte, .byte_len = 1 };
+
+ var value: u21 = first_byte & 0b00011111;
+ var byte_index: u8 = 1;
+ while (byte_index < @min(bytes.len, expected_len)) : (byte_index += 1) {
+ const byte = bytes[byte_index];
+ // See Table 3-7 of D92 in Chapter 3 of the Unicode Standard
+ const valid: bool = switch (byte_index) {
+ 1 => switch (first_byte) {
+ 0xE0 => switch (byte) {
+ 0xA0...0xBF => true,
+ else => false,
+ },
+ 0xED => switch (byte) {
+ 0x80...0x9F => true,
+ else => false,
+ },
+ 0xF0 => switch (byte) {
+ 0x90...0xBF => true,
+ else => false,
+ },
+ 0xF4 => switch (byte) {
+ 0x80...0x8F => true,
+ else => false,
+ },
+ else => switch (byte) {
+ 0x80...0xBF => true,
+ else => false,
+ },
+ },
+ else => switch (byte) {
+ 0x80...0xBF => true,
+ else => false,
+ },
+ };
+
+ if (!valid) {
+ var len = byte_index;
+ // Only include the byte in the invalid sequence if it's in the range
+ // of a continuation byte. All other values should not be included in the
+ // invalid sequence.
+ if (isContinuationByte(byte)) len += 1;
+ return .{ .value = Codepoint.invalid, .byte_len = len };
+ }
+
+ value <<= 6;
+ value |= byte & 0b00111111;
+ }
+ if (byte_index != expected_len) {
+ return .{ .value = Codepoint.invalid, .byte_len = byte_index };
+ }
+ return .{ .value = value, .byte_len = expected_len };
+ }
+ };
+};
+
+test "Utf8.WellFormedDecoder" {
+ const invalid_utf8 = "\xF0\x80";
+ const decoded = Utf8.WellFormedDecoder.decode(invalid_utf8);
+ try std.testing.expectEqual(Codepoint.invalid, decoded.value);
+ try std.testing.expectEqual(@as(usize, 2), decoded.byte_len);
+}
+
+test "codepointAt invalid utf8" {
+ {
+ const invalid_utf8 = "\xf0\xf0\x80\x80\x80";
+ try std.testing.expectEqual(Codepoint{
+ .value = Codepoint.invalid,
+ .byte_len = 1,
+ }, CodePage.utf8.codepointAt(0, invalid_utf8).?);
+ try std.testing.expectEqual(Codepoint{
+ .value = Codepoint.invalid,
+ .byte_len = 2,
+ }, CodePage.utf8.codepointAt(1, invalid_utf8).?);
+ try std.testing.expectEqual(Codepoint{
+ .value = Codepoint.invalid,
+ .byte_len = 1,
+ }, CodePage.utf8.codepointAt(3, invalid_utf8).?);
+ try std.testing.expectEqual(Codepoint{
+ .value = Codepoint.invalid,
+ .byte_len = 1,
+ }, CodePage.utf8.codepointAt(4, invalid_utf8).?);
+ try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(5, invalid_utf8));
+ }
+
+ {
+ const invalid_utf8 = "\xE1\xA0\xC0";
+ try std.testing.expectEqual(Codepoint{
+ .value = Codepoint.invalid,
+ .byte_len = 2,
+ }, CodePage.utf8.codepointAt(0, invalid_utf8).?);
+ try std.testing.expectEqual(Codepoint{
+ .value = Codepoint.invalid,
+ .byte_len = 1,
+ }, CodePage.utf8.codepointAt(2, invalid_utf8).?);
+ try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(3, invalid_utf8));
+ }
+
+ {
+ const invalid_utf8 = "\xD2";
+ try std.testing.expectEqual(Codepoint{
+ .value = Codepoint.invalid,
+ .byte_len = 1,
+ }, CodePage.utf8.codepointAt(0, invalid_utf8).?);
+ try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(1, invalid_utf8));
+ }
+
+ {
+ const invalid_utf8 = "\xE1\xA0";
+ try std.testing.expectEqual(Codepoint{
+ .value = Codepoint.invalid,
+ .byte_len = 2,
+ }, CodePage.utf8.codepointAt(0, invalid_utf8).?);
+ try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, invalid_utf8));
+ }
+
+ {
+ const invalid_utf8 = "\xC5\xFF";
+ try std.testing.expectEqual(Codepoint{
+ .value = Codepoint.invalid,
+ .byte_len = 1,
+ }, CodePage.utf8.codepointAt(0, invalid_utf8).?);
+ try std.testing.expectEqual(Codepoint{
+ .value = Codepoint.invalid,
+ .byte_len = 1,
+ }, CodePage.utf8.codepointAt(1, invalid_utf8).?);
+ try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, invalid_utf8));
+ }
+
+ {
+ // encoded high surrogate
+ const invalid_utf8 = "\xED\xA0\xBD";
+ try std.testing.expectEqual(Codepoint{
+ .value = Codepoint.invalid,
+ .byte_len = 2,
+ }, CodePage.utf8.codepointAt(0, invalid_utf8).?);
+ try std.testing.expectEqual(Codepoint{
+ .value = Codepoint.invalid,
+ .byte_len = 1,
+ }, CodePage.utf8.codepointAt(2, invalid_utf8).?);
+ }
+}
+
+test "codepointAt utf8 encoded" {
+ const utf8_encoded = "²";
+
+ // with code page utf8
+ try std.testing.expectEqual(Codepoint{
+ .value = '²',
+ .byte_len = 2,
+ }, CodePage.utf8.codepointAt(0, utf8_encoded).?);
+ try std.testing.expectEqual(@as(?Codepoint, null), CodePage.utf8.codepointAt(2, utf8_encoded));
+
+ // with code page windows1252
+ try std.testing.expectEqual(Codepoint{
+ .value = '\xC2',
+ .byte_len = 1,
+ }, CodePage.windows1252.codepointAt(0, utf8_encoded).?);
+ try std.testing.expectEqual(Codepoint{
+ .value = '\xB2',
+ .byte_len = 1,
+ }, CodePage.windows1252.codepointAt(1, utf8_encoded).?);
+ try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, utf8_encoded));
+}
+
+test "codepointAt windows1252 encoded" {
+ const windows1252_encoded = "\xB2";
+
+ // with code page utf8
+ try std.testing.expectEqual(Codepoint{
+ .value = Codepoint.invalid,
+ .byte_len = 1,
+ }, CodePage.utf8.codepointAt(0, windows1252_encoded).?);
+ try std.testing.expectEqual(@as(?Codepoint, null), CodePage.utf8.codepointAt(2, windows1252_encoded));
+
+ // with code page windows1252
+ try std.testing.expectEqual(Codepoint{
+ .value = '\xB2',
+ .byte_len = 1,
+ }, CodePage.windows1252.codepointAt(0, windows1252_encoded).?);
+ try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(1, windows1252_encoded));
+}
+
+pub const Codepoint = struct {
+ value: u21,
+ byte_len: usize,
+
+ pub const invalid: u21 = std.math.maxInt(u21);
+};
diff --git a/lib/compiler/resinator/comments.zig b/lib/compiler/resinator/comments.zig
@@ -0,0 +1,358 @@
+//! Expects to run after a C preprocessor step that preserves comments.
+//!
+//! `rc` has a peculiar quirk where something like `blah/**/blah` will be
+//! transformed into `blahblah` during parsing. However, `clang -E` will
+//! transform it into `blah blah`, so in order to match `rc`, we need
+//! to remove comments ourselves after the preprocessor runs.
+//! Note: Multiline comments that actually span more than one line do
+//! get translated to a space character by `rc`.
+//!
+//! Removing comments before lexing also allows the lexer to not have to
+//! deal with comments which would complicate its implementation (this is something
+//! of a tradeoff, as removing comments in a separate pass means that we'll
+//! need to iterate the source twice instead of once, but having to deal with
+//! comments when lexing would be a pain).
+
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+const UncheckedSliceWriter = @import("utils.zig").UncheckedSliceWriter;
+const SourceMappings = @import("source_mapping.zig").SourceMappings;
+const LineHandler = @import("lex.zig").LineHandler;
+const formsLineEndingPair = @import("source_mapping.zig").formsLineEndingPair;
+
+/// `buf` must be at least as long as `source`
+/// In-place transformation is supported (i.e. `source` and `buf` can be the same slice)
+pub fn removeComments(source: []const u8, buf: []u8, source_mappings: ?*SourceMappings) ![]u8 {
+ std.debug.assert(buf.len >= source.len);
+ var result = UncheckedSliceWriter{ .slice = buf };
+ const State = enum {
+ start,
+ forward_slash,
+ line_comment,
+ multiline_comment,
+ multiline_comment_end,
+ single_quoted,
+ single_quoted_escape,
+ double_quoted,
+ double_quoted_escape,
+ };
+ var state: State = .start;
+ var index: usize = 0;
+ var pending_start: ?usize = null;
+ var line_handler = LineHandler{ .buffer = source };
+ while (index < source.len) : (index += 1) {
+ const c = source[index];
+ // TODO: Disallow \x1A, \x00, \x7F in comments. At least \x1A and \x00 can definitely
+ // cause errors or parsing weirdness in the Win32 RC compiler. These are disallowed
+ // in the lexer, but comments are stripped before getting to the lexer.
+ switch (state) {
+ .start => switch (c) {
+ '/' => {
+ state = .forward_slash;
+ pending_start = index;
+ },
+ '\r', '\n' => {
+ _ = line_handler.incrementLineNumber(index);
+ result.write(c);
+ },
+ else => {
+ switch (c) {
+ '"' => state = .double_quoted,
+ '\'' => state = .single_quoted,
+ else => {},
+ }
+ result.write(c);
+ },
+ },
+ .forward_slash => switch (c) {
+ '/' => state = .line_comment,
+ '*' => {
+ state = .multiline_comment;
+ },
+ else => {
+ _ = line_handler.maybeIncrementLineNumber(index);
+ result.writeSlice(source[pending_start.? .. index + 1]);
+ pending_start = null;
+ state = .start;
+ },
+ },
+ .line_comment => switch (c) {
+ '\r', '\n' => {
+ _ = line_handler.incrementLineNumber(index);
+ result.write(c);
+ state = .start;
+ },
+ else => {},
+ },
+ .multiline_comment => switch (c) {
+ '\r' => try handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings),
+ '\n' => {
+ _ = line_handler.incrementLineNumber(index);
+ result.write(c);
+ },
+ '*' => state = .multiline_comment_end,
+ else => {},
+ },
+ .multiline_comment_end => switch (c) {
+ '\r' => {
+ try handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings);
+ // We only want to treat this as a newline if it's part of a CRLF pair. If it's
+ // not, then we still want to stay in .multiline_comment_end, so that e.g. `*<\r>/` still
+ // functions as a `*/` comment ending. Kinda crazy, but that's how the Win32 implementation works.
+ if (formsLineEndingPair(source, '\r', index + 1)) {
+ state = .multiline_comment;
+ }
+ },
+ '\n' => {
+ _ = line_handler.incrementLineNumber(index);
+ result.write(c);
+ state = .multiline_comment;
+ },
+ '/' => {
+ state = .start;
+ },
+ else => {
+ state = .multiline_comment;
+ },
+ },
+ .single_quoted => switch (c) {
+ '\r', '\n' => {
+ _ = line_handler.incrementLineNumber(index);
+ state = .start;
+ result.write(c);
+ },
+ '\\' => {
+ state = .single_quoted_escape;
+ result.write(c);
+ },
+ '\'' => {
+ state = .start;
+ result.write(c);
+ },
+ else => {
+ result.write(c);
+ },
+ },
+ .single_quoted_escape => switch (c) {
+ '\r', '\n' => {
+ _ = line_handler.incrementLineNumber(index);
+ state = .start;
+ result.write(c);
+ },
+ else => {
+ state = .single_quoted;
+ result.write(c);
+ },
+ },
+ .double_quoted => switch (c) {
+ '\r', '\n' => {
+ _ = line_handler.incrementLineNumber(index);
+ state = .start;
+ result.write(c);
+ },
+ '\\' => {
+ state = .double_quoted_escape;
+ result.write(c);
+ },
+ '"' => {
+ state = .start;
+ result.write(c);
+ },
+ else => {
+ result.write(c);
+ },
+ },
+ .double_quoted_escape => switch (c) {
+ '\r', '\n' => {
+ _ = line_handler.incrementLineNumber(index);
+ state = .start;
+ result.write(c);
+ },
+ else => {
+ state = .double_quoted;
+ result.write(c);
+ },
+ },
+ }
+ }
+ return result.getWritten();
+}
+
+inline fn handleMultilineCarriageReturn(
+ source: []const u8,
+ line_handler: *LineHandler,
+ index: usize,
+ result: *UncheckedSliceWriter,
+ source_mappings: ?*SourceMappings,
+) !void {
+ // This is a dumb way to go about this, but basically we want to determine
+ // if this is part of a distinct CRLF or LFCR pair. This function call will detect
+ // LFCR pairs correctly since the function we're in will only be called on CR,
+ // but will not detect CRLF pairs since it only looks at the line ending before the
+ // CR. So, we do a second (forward) check if the first fails to detect CRLF that is
+ // not part of another pair.
+ const is_lfcr_pair = line_handler.currentIndexFormsLineEndingPair(index);
+ const is_crlf_pair = !is_lfcr_pair and formsLineEndingPair(source, '\r', index + 1);
+ // Note: Bare \r within a multiline comment should *not* be treated as a line ending for the
+ // purposes of removing comments, but *should* be treated as a line ending for the
+ // purposes of line counting/source mapping
+ _ = line_handler.incrementLineNumber(index);
+ // So only write the \r if it's part of a CRLF/LFCR pair
+ if (is_lfcr_pair or is_crlf_pair) {
+ result.write('\r');
+ }
+ // And otherwise, we want to collapse the source mapping so that we can still know which
+ // line came from where.
+ else {
+ // Because the line gets collapsed, we need to decrement line number so that
+ // the next collapse acts on the first of the collapsed line numbers
+ line_handler.line_number -= 1;
+ if (source_mappings) |mappings| {
+ try mappings.collapse(line_handler.line_number, 1);
+ }
+ }
+}
+
+pub fn removeCommentsAlloc(allocator: Allocator, source: []const u8, source_mappings: ?*SourceMappings) ![]u8 {
+ const buf = try allocator.alloc(u8, source.len);
+ errdefer allocator.free(buf);
+ const result = try removeComments(source, buf, source_mappings);
+ return allocator.realloc(buf, result.len);
+}
+
+fn testRemoveComments(expected: []const u8, source: []const u8) !void {
+ const result = try removeCommentsAlloc(std.testing.allocator, source, null);
+ defer std.testing.allocator.free(result);
+
+ try std.testing.expectEqualStrings(expected, result);
+}
+
+test "basic" {
+ try testRemoveComments("", "// comment");
+ try testRemoveComments("", "/* comment */");
+}
+
+test "mixed" {
+ try testRemoveComments("hello", "hello// comment");
+ try testRemoveComments("hello", "hel/* comment */lo");
+}
+
+test "within a string" {
+ // escaped " is \"
+ try testRemoveComments(
+ \\blah"//som\"/*ething*/"BLAH
+ ,
+ \\blah"//som\"/*ething*/"BLAH
+ );
+}
+
+test "line comments retain newlines" {
+ try testRemoveComments(
+ \\
+ \\
+ \\
+ ,
+ \\// comment
+ \\// comment
+ \\// comment
+ );
+
+ try testRemoveComments("\r\n", "//comment\r\n");
+}
+
+test "unfinished multiline comment" {
+ try testRemoveComments(
+ \\unfinished
+ \\
+ ,
+ \\unfinished/*
+ \\
+ );
+}
+
+test "crazy" {
+ try testRemoveComments(
+ \\blah"/*som*/\""BLAH
+ ,
+ \\blah"/*som*/\""/*ething*/BLAH
+ );
+
+ try testRemoveComments(
+ \\blah"/*som*/"BLAH RCDATA "BEGIN END
+ \\
+ \\
+ \\hello
+ \\"
+ ,
+ \\blah"/*som*/"/*ething*/BLAH RCDATA "BEGIN END
+ \\// comment
+ \\//"blah blah" RCDATA {}
+ \\hello
+ \\"
+ );
+}
+
+test "multiline comment with newlines" {
+ // bare \r is not treated as a newline
+ try testRemoveComments("blahblah", "blah/*some\rthing*/blah");
+
+ try testRemoveComments(
+ \\blah
+ \\blah
+ ,
+ \\blah/*some
+ \\thing*/blah
+ );
+ try testRemoveComments(
+ "blah\r\nblah",
+ "blah/*some\r\nthing*/blah",
+ );
+
+ // handle *<not /> correctly
+ try testRemoveComments(
+ \\blah
+ \\
+ \\
+ ,
+ \\blah/*some
+ \\thing*
+ \\/bl*ah*/
+ );
+}
+
+test "comments appended to a line" {
+ try testRemoveComments(
+ \\blah
+ \\blah
+ ,
+ \\blah // line comment
+ \\blah
+ );
+ try testRemoveComments(
+ "blah \r\nblah",
+ "blah // line comment\r\nblah",
+ );
+}
+
+test "remove comments with mappings" {
+ const allocator = std.testing.allocator;
+ var mut_source = "blah/*\rcommented line*\r/blah".*;
+ var mappings = SourceMappings{};
+ _ = try mappings.files.put(allocator, "test.rc");
+ try mappings.set(1, 1, 0);
+ try mappings.set(2, 2, 0);
+ try mappings.set(3, 3, 0);
+ defer mappings.deinit(allocator);
+
+ const result = try removeComments(&mut_source, &mut_source, &mappings);
+
+ try std.testing.expectEqualStrings("blahblah", result);
+ try std.testing.expectEqual(@as(usize, 1), mappings.end_line);
+ try std.testing.expectEqual(@as(usize, 3), mappings.getCorrespondingSpan(1).?.end_line);
+}
+
+test "in place" {
+ var mut_source = "blah /* comment */ blah".*;
+ const result = try removeComments(&mut_source, &mut_source, null);
+ try std.testing.expectEqualStrings("blah blah", result);
+}
diff --git a/lib/compiler/resinator/compile.zig b/lib/compiler/resinator/compile.zig
@@ -0,0 +1,3427 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const Allocator = std.mem.Allocator;
+const Node = @import("ast.zig").Node;
+const lex = @import("lex.zig");
+const Parser = @import("parse.zig").Parser;
+const Resource = @import("rc.zig").Resource;
+const Token = @import("lex.zig").Token;
+const literals = @import("literals.zig");
+const Number = literals.Number;
+const SourceBytes = literals.SourceBytes;
+const Diagnostics = @import("errors.zig").Diagnostics;
+const ErrorDetails = @import("errors.zig").ErrorDetails;
+const MemoryFlags = @import("res.zig").MemoryFlags;
+const rc = @import("rc.zig");
+const res = @import("res.zig");
+const ico = @import("ico.zig");
+const ani = @import("ani.zig");
+const bmp = @import("bmp.zig");
+const WORD = std.os.windows.WORD;
+const DWORD = std.os.windows.DWORD;
+const utils = @import("utils.zig");
+const NameOrOrdinal = res.NameOrOrdinal;
+const CodePage = @import("code_pages.zig").CodePage;
+const CodePageLookup = @import("ast.zig").CodePageLookup;
+const SourceMappings = @import("source_mapping.zig").SourceMappings;
+const windows1252 = @import("windows1252.zig");
+const lang = @import("lang.zig");
+const code_pages = @import("code_pages.zig");
+const errors = @import("errors.zig");
+const native_endian = builtin.cpu.arch.endian();
+
+pub const CompileOptions = struct {
+ cwd: std.fs.Dir,
+ diagnostics: *Diagnostics,
+ source_mappings: ?*SourceMappings = null,
+ /// List of paths (absolute or relative to `cwd`) for every file that the resources within the .rc file depend on.
+ /// Items within the list will be allocated using the allocator of the ArrayList and must be
+ /// freed by the caller.
+ /// TODO: Maybe a dedicated struct for this purpose so that it's a bit nicer to work with.
+ dependencies_list: ?*std.ArrayList([]const u8) = null,
+ default_code_page: CodePage = .windows1252,
+ ignore_include_env_var: bool = false,
+ extra_include_paths: []const []const u8 = &.{},
+ /// This is just an API convenience to allow separately passing 'system' (i.e. those
+ /// that would normally be gotten from the INCLUDE env var) include paths. This is mostly
+ /// intended for use when setting `ignore_include_env_var = true`. When `ignore_include_env_var`
+ /// is false, `system_include_paths` will be searched before the paths in the INCLUDE env var.
+ system_include_paths: []const []const u8 = &.{},
+ default_language_id: ?u16 = null,
+ // TODO: Implement verbose output
+ verbose: bool = false,
+ null_terminate_string_table_strings: bool = false,
+ /// Note: This is a u15 to ensure that the maximum number of UTF-16 code units
+ /// plus a null-terminator can always fit into a u16.
+ max_string_literal_codepoints: u15 = lex.default_max_string_literal_codepoints,
+ silent_duplicate_control_ids: bool = false,
+ warn_instead_of_error_on_invalid_code_page: bool = false,
+};
+
+pub fn compile(allocator: Allocator, source: []const u8, writer: anytype, options: CompileOptions) !void {
+ var lexer = lex.Lexer.init(source, .{
+ .default_code_page = options.default_code_page,
+ .source_mappings = options.source_mappings,
+ .max_string_literal_codepoints = options.max_string_literal_codepoints,
+ });
+ var parser = Parser.init(&lexer, .{
+ .warn_instead_of_error_on_invalid_code_page = options.warn_instead_of_error_on_invalid_code_page,
+ });
+ var tree = try parser.parse(allocator, options.diagnostics);
+ defer tree.deinit();
+
+ var search_dirs = std.ArrayList(SearchDir).init(allocator);
+ defer {
+ for (search_dirs.items) |*search_dir| {
+ search_dir.deinit(allocator);
+ }
+ search_dirs.deinit();
+ }
+
+ if (options.source_mappings) |source_mappings| {
+ const root_path = source_mappings.files.get(source_mappings.root_filename_offset);
+ // If dirname returns null, then the root path will be the same as
+ // the cwd so we don't need to add it as a distinct search path.
+ if (std.fs.path.dirname(root_path)) |root_dir_path| {
+ var root_dir = try options.cwd.openDir(root_dir_path, .{});
+ errdefer root_dir.close();
+ try search_dirs.append(.{ .dir = root_dir, .path = try allocator.dupe(u8, root_dir_path) });
+ }
+ }
+ // Re-open the passed in cwd since we want to be able to close it (std.fs.cwd() shouldn't be closed)
+ const cwd_dir = options.cwd.openDir(".", .{}) catch |err| {
+ try options.diagnostics.append(.{
+ .err = .failed_to_open_cwd,
+ .token = .{
+ .id = .invalid,
+ .start = 0,
+ .end = 0,
+ .line_number = 1,
+ },
+ .print_source_line = false,
+ .extra = .{ .file_open_error = .{
+ .err = ErrorDetails.FileOpenError.enumFromError(err),
+ .filename_string_index = undefined,
+ } },
+ });
+ return error.CompileError;
+ };
+ try search_dirs.append(.{ .dir = cwd_dir, .path = null });
+ for (options.extra_include_paths) |extra_include_path| {
+ var dir = openSearchPathDir(options.cwd, extra_include_path) catch {
+ // TODO: maybe a warning that the search path is skipped?
+ continue;
+ };
+ errdefer dir.close();
+ try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, extra_include_path) });
+ }
+ for (options.system_include_paths) |system_include_path| {
+ var dir = openSearchPathDir(options.cwd, system_include_path) catch {
+ // TODO: maybe a warning that the search path is skipped?
+ continue;
+ };
+ errdefer dir.close();
+ try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, system_include_path) });
+ }
+ if (!options.ignore_include_env_var) {
+ const INCLUDE = std.process.getEnvVarOwned(allocator, "INCLUDE") catch "";
+ defer allocator.free(INCLUDE);
+
+ // The only precedence here is llvm-rc which also uses the platform-specific
+ // delimiter. There's no precedence set by `rc.exe` since it's Windows-only.
+ const delimiter = switch (builtin.os.tag) {
+ .windows => ';',
+ else => ':',
+ };
+ var it = std.mem.tokenizeScalar(u8, INCLUDE, delimiter);
+ while (it.next()) |search_path| {
+ var dir = openSearchPathDir(options.cwd, search_path) catch continue;
+ errdefer dir.close();
+ try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, search_path) });
+ }
+ }
+
+ var arena_allocator = std.heap.ArenaAllocator.init(allocator);
+ defer arena_allocator.deinit();
+ const arena = arena_allocator.allocator();
+
+ var compiler = Compiler{
+ .source = source,
+ .arena = arena,
+ .allocator = allocator,
+ .cwd = options.cwd,
+ .diagnostics = options.diagnostics,
+ .dependencies_list = options.dependencies_list,
+ .input_code_pages = &tree.input_code_pages,
+ .output_code_pages = &tree.output_code_pages,
+ // This is only safe because we know search_dirs won't be modified past this point
+ .search_dirs = search_dirs.items,
+ .null_terminate_string_table_strings = options.null_terminate_string_table_strings,
+ .silent_duplicate_control_ids = options.silent_duplicate_control_ids,
+ };
+ if (options.default_language_id) |default_language_id| {
+ compiler.state.language = res.Language.fromInt(default_language_id);
+ }
+
+ try compiler.writeRoot(tree.root(), writer);
+}
+
+pub const Compiler = struct {
+ source: []const u8,
+ arena: Allocator,
+ allocator: Allocator,
+ cwd: std.fs.Dir,
+ state: State = .{},
+ diagnostics: *Diagnostics,
+ dependencies_list: ?*std.ArrayList([]const u8),
+ input_code_pages: *const CodePageLookup,
+ output_code_pages: *const CodePageLookup,
+ search_dirs: []SearchDir,
+ null_terminate_string_table_strings: bool,
+ silent_duplicate_control_ids: bool,
+
+ pub const State = struct {
+ icon_id: u16 = 1,
+ string_tables: StringTablesByLanguage = .{},
+ language: res.Language = .{},
+ font_dir: FontDir = .{},
+ version: u32 = 0,
+ characteristics: u32 = 0,
+ };
+
+ pub fn writeRoot(self: *Compiler, root: *Node.Root, writer: anytype) !void {
+ try writeEmptyResource(writer);
+ for (root.body) |node| {
+ try self.writeNode(node, writer);
+ }
+
+ // now write the FONTDIR (if it has anything in it)
+ try self.state.font_dir.writeResData(self, writer);
+ if (self.state.font_dir.fonts.items.len != 0) {
+ // The Win32 RC compiler may write a different FONTDIR resource than us,
+ // due to it sometimes writing a non-zero-length device name/face name
+ // whereas we *always* write them both as zero-length.
+ //
+ // In practical terms, this doesn't matter, since for various reasons the format
+ // of the FONTDIR cannot be relied on and is seemingly not actually used by anything
+ // anymore. We still want to emit some sort of diagnostic for the purposes of being able
+ // to know that our .RES is intentionally not meant to be byte-for-byte identical with
+ // the rc.exe output.
+ //
+ // By using the hint type here, we allow this diagnostic to be detected in code,
+ // but it will not be printed since the end-user doesn't need to care.
+ try self.addErrorDetails(.{
+ .err = .result_contains_fontdir,
+ .type = .hint,
+ .token = undefined,
+ });
+ }
+ // once we've written every else out, we can write out the finalized STRINGTABLE resources
+ var string_tables_it = self.state.string_tables.tables.iterator();
+ while (string_tables_it.next()) |string_table_entry| {
+ var string_table_it = string_table_entry.value_ptr.blocks.iterator();
+ while (string_table_it.next()) |entry| {
+ try entry.value_ptr.writeResData(self, string_table_entry.key_ptr.*, entry.key_ptr.*, writer);
+ }
+ }
+ }
+
+ pub fn writeNode(self: *Compiler, node: *Node, writer: anytype) !void {
+ switch (node.id) {
+ .root => unreachable, // writeRoot should be called directly instead
+ .resource_external => try self.writeResourceExternal(@fieldParentPtr(Node.ResourceExternal, "base", node), writer),
+ .resource_raw_data => try self.writeResourceRawData(@fieldParentPtr(Node.ResourceRawData, "base", node), writer),
+ .literal => unreachable, // this is context dependent and should be handled by its parent
+ .binary_expression => unreachable,
+ .grouped_expression => unreachable,
+ .not_expression => unreachable,
+ .invalid => {}, // no-op, currently only used for dangling literals at EOF
+ .accelerators => try self.writeAccelerators(@fieldParentPtr(Node.Accelerators, "base", node), writer),
+ .accelerator => unreachable, // handled by writeAccelerators
+ .dialog => try self.writeDialog(@fieldParentPtr(Node.Dialog, "base", node), writer),
+ .control_statement => unreachable,
+ .toolbar => try self.writeToolbar(@fieldParentPtr(Node.Toolbar, "base", node), writer),
+ .menu => try self.writeMenu(@fieldParentPtr(Node.Menu, "base", node), writer),
+ .menu_item => unreachable,
+ .menu_item_separator => unreachable,
+ .menu_item_ex => unreachable,
+ .popup => unreachable,
+ .popup_ex => unreachable,
+ .version_info => try self.writeVersionInfo(@fieldParentPtr(Node.VersionInfo, "base", node), writer),
+ .version_statement => unreachable,
+ .block => unreachable,
+ .block_value => unreachable,
+ .block_value_value => unreachable,
+ .string_table => try self.writeStringTable(@fieldParentPtr(Node.StringTable, "base", node)),
+ .string_table_string => unreachable, // handled by writeStringTable
+ .language_statement => self.writeLanguageStatement(@fieldParentPtr(Node.LanguageStatement, "base", node)),
+ .font_statement => unreachable,
+ .simple_statement => self.writeTopLevelSimpleStatement(@fieldParentPtr(Node.SimpleStatement, "base", node)),
+ }
+ }
+
+ /// Returns the filename encoded as UTF-8 (allocated by self.allocator)
+ pub fn evaluateFilenameExpression(self: *Compiler, expression_node: *Node) ![]u8 {
+ switch (expression_node.id) {
+ .literal => {
+ const literal_node = expression_node.cast(.literal).?;
+ switch (literal_node.token.id) {
+ .literal, .number => {
+ const slice = literal_node.token.slice(self.source);
+ const code_page = self.input_code_pages.getForToken(literal_node.token);
+ var buf = try std.ArrayList(u8).initCapacity(self.allocator, slice.len);
+ errdefer buf.deinit();
+
+ var index: usize = 0;
+ while (code_page.codepointAt(index, slice)) |codepoint| : (index += codepoint.byte_len) {
+ const c = codepoint.value;
+ if (c == code_pages.Codepoint.invalid) {
+ try buf.appendSlice("�");
+ } else {
+ // Anything that is not returned as an invalid codepoint must be encodable as UTF-8.
+ const utf8_len = std.unicode.utf8CodepointSequenceLength(c) catch unreachable;
+ try buf.ensureUnusedCapacity(utf8_len);
+ _ = std.unicode.utf8Encode(c, buf.unusedCapacitySlice()) catch unreachable;
+ buf.items.len += utf8_len;
+ }
+ }
+
+ return buf.toOwnedSlice();
+ },
+ .quoted_ascii_string, .quoted_wide_string => {
+ const slice = literal_node.token.slice(self.source);
+ const column = literal_node.token.calculateColumn(self.source, 8, null);
+ const bytes = SourceBytes{ .slice = slice, .code_page = self.input_code_pages.getForToken(literal_node.token) };
+
+ var buf = std.ArrayList(u8).init(self.allocator);
+ errdefer buf.deinit();
+
+ // Filenames are sort-of parsed as if they were wide strings, but the max escape width of
+ // hex/octal escapes is still determined by the L prefix. Since we want to end up with
+ // UTF-8, we can parse either string type directly to UTF-8.
+ var parser = literals.IterativeStringParser.init(bytes, .{
+ .start_column = column,
+ .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal_node.token },
+ });
+
+ while (try parser.nextUnchecked()) |parsed| {
+ const c = parsed.codepoint;
+ if (c == code_pages.Codepoint.invalid) {
+ try buf.appendSlice("�");
+ } else {
+ var codepoint_buf: [4]u8 = undefined;
+ // If the codepoint cannot be encoded, we fall back to �
+ if (std.unicode.utf8Encode(c, &codepoint_buf)) |len| {
+ try buf.appendSlice(codepoint_buf[0..len]);
+ } else |_| {
+ try buf.appendSlice("�");
+ }
+ }
+ }
+
+ return buf.toOwnedSlice();
+ },
+ else => unreachable, // no other token types should be in a filename literal node
+ }
+ },
+ .binary_expression => {
+ const binary_expression_node = expression_node.cast(.binary_expression).?;
+ return self.evaluateFilenameExpression(binary_expression_node.right);
+ },
+ .grouped_expression => {
+ const grouped_expression_node = expression_node.cast(.grouped_expression).?;
+ return self.evaluateFilenameExpression(grouped_expression_node.expression);
+ },
+ else => unreachable,
+ }
+ }
+
+ /// https://learn.microsoft.com/en-us/windows/win32/menurc/searching-for-files
+ ///
+ /// Searches, in this order:
+ /// Directory of the 'root' .rc file (if different from CWD)
+ /// CWD
+ /// extra_include_paths (resolved relative to CWD)
+ /// system_include_paths (resolve relative to CWD)
+ /// INCLUDE environment var paths (only if ignore_include_env_var is false; resolved relative to CWD)
+ ///
+ /// Note: The CWD being searched *in addition to* the directory of the 'root' .rc file
+ /// is also how the Win32 RC compiler preprocessor searches for includes, but that
+ /// differs from how the clang preprocessor searches for includes.
+ ///
+ /// Note: This will always return the first matching file that can be opened.
+ /// This matches the Win32 RC compiler, which will fail with an error if the first
+ /// matching file is invalid. That is, it does not do the `cmd` PATH searching
+ /// thing of continuing to look for matching files until it finds a valid
+ /// one if a matching file is invalid.
+ fn searchForFile(self: *Compiler, path: []const u8) !std.fs.File {
+ // If the path is absolute, then it is not resolved relative to any search
+ // paths, so there's no point in checking them.
+ //
+ // This behavior was determined/confirmed with the following test:
+ // - A `test.rc` file with the contents `1 RCDATA "/test.bin"`
+ // - A `test.bin` file at `C:\test.bin`
+ // - A `test.bin` file at `inc\test.bin` relative to the .rc file
+ // - Invoking `rc` with `rc /i inc test.rc`
+ //
+ // This results in a .res file with the contents of `C:\test.bin`, not
+ // the contents of `inc\test.bin`. Further, if `C:\test.bin` is deleted,
+ // then it start failing to find `/test.bin`, meaning that it does not resolve
+ // `/test.bin` relative to include paths and instead only treats it as
+ // an absolute path.
+ if (std.fs.path.isAbsolute(path)) {
+ const file = try utils.openFileNotDir(std.fs.cwd(), path, .{});
+ errdefer file.close();
+
+ if (self.dependencies_list) |dependencies_list| {
+ const duped_path = try dependencies_list.allocator.dupe(u8, path);
+ errdefer dependencies_list.allocator.free(duped_path);
+ try dependencies_list.append(duped_path);
+ }
+ }
+
+ var first_error: ?std.fs.File.OpenError = null;
+ for (self.search_dirs) |search_dir| {
+ if (utils.openFileNotDir(search_dir.dir, path, .{})) |file| {
+ errdefer file.close();
+
+ if (self.dependencies_list) |dependencies_list| {
+ const searched_file_path = try std.fs.path.join(dependencies_list.allocator, &.{
+ search_dir.path orelse "", path,
+ });
+ errdefer dependencies_list.allocator.free(searched_file_path);
+ try dependencies_list.append(searched_file_path);
+ }
+
+ return file;
+ } else |err| if (first_error == null) {
+ first_error = err;
+ }
+ }
+ return first_error orelse error.FileNotFound;
+ }
+
+ pub fn parseDlgIncludeString(self: *Compiler, token: Token) ![]u8 {
+ // For the purposes of parsing, we want to strip the L prefix
+ // if it exists since we want escaped integers to be limited to
+ // their ascii string range.
+ //
+ // We keep track of whether or not there was an L prefix, though,
+ // since there's more weirdness to come.
+ var bytes = self.sourceBytesForToken(token);
+ var was_wide_string = false;
+ if (bytes.slice[0] == 'L' or bytes.slice[0] == 'l') {
+ was_wide_string = true;
+ bytes.slice = bytes.slice[1..];
+ }
+
+ var buf = try std.ArrayList(u8).initCapacity(self.allocator, bytes.slice.len);
+ errdefer buf.deinit();
+
+ var iterative_parser = literals.IterativeStringParser.init(bytes, .{
+ .start_column = token.calculateColumn(self.source, 8, null),
+ .diagnostics = .{ .diagnostics = self.diagnostics, .token = token },
+ });
+
+ // No real idea what's going on here, but this matches the rc.exe behavior
+ while (try iterative_parser.next()) |parsed| {
+ const c = parsed.codepoint;
+ switch (was_wide_string) {
+ true => {
+ switch (c) {
+ 0...0x7F, 0xA0...0xFF => try buf.append(@intCast(c)),
+ 0x80...0x9F => {
+ if (windows1252.bestFitFromCodepoint(c)) |_| {
+ try buf.append(@intCast(c));
+ } else {
+ try buf.append('?');
+ }
+ },
+ else => {
+ if (windows1252.bestFitFromCodepoint(c)) |best_fit| {
+ try buf.append(best_fit);
+ } else if (c < 0x10000 or c == code_pages.Codepoint.invalid) {
+ try buf.append('?');
+ } else {
+ try buf.appendSlice("??");
+ }
+ },
+ }
+ },
+ false => {
+ if (parsed.from_escaped_integer) {
+ try buf.append(@truncate(c));
+ } else {
+ if (windows1252.bestFitFromCodepoint(c)) |best_fit| {
+ try buf.append(best_fit);
+ } else if (c < 0x10000 or c == code_pages.Codepoint.invalid) {
+ try buf.append('?');
+ } else {
+ try buf.appendSlice("??");
+ }
+ }
+ },
+ }
+ }
+
+ return buf.toOwnedSlice();
+ }
+
+ pub fn writeResourceExternal(self: *Compiler, node: *Node.ResourceExternal, writer: anytype) !void {
+ // Init header with data size zero for now, will need to fill it in later
+ var header = try self.resourceHeader(node.id, node.type, .{});
+ defer header.deinit(self.allocator);
+
+ const maybe_predefined_type = header.predefinedResourceType();
+
+ // DLGINCLUDE has special handling that doesn't actually need the file to exist
+ if (maybe_predefined_type != null and maybe_predefined_type.? == .DLGINCLUDE) {
+ const filename_token = node.filename.cast(.literal).?.token;
+ const parsed_filename = try self.parseDlgIncludeString(filename_token);
+ defer self.allocator.free(parsed_filename);
+
+ // NUL within the parsed string acts as a terminator
+ const parsed_filename_terminated = std.mem.sliceTo(parsed_filename, 0);
+
+ header.applyMemoryFlags(node.common_resource_attributes, self.source);
+ header.data_size = @intCast(parsed_filename_terminated.len + 1);
+ try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
+ try writer.writeAll(parsed_filename_terminated);
+ try writer.writeByte(0);
+ try writeDataPadding(writer, header.data_size);
+ return;
+ }
+
+ const filename_utf8 = try self.evaluateFilenameExpression(node.filename);
+ defer self.allocator.free(filename_utf8);
+
+ // TODO: More robust checking of the validity of the filename.
+ // This currently only checks for NUL bytes, but it should probably also check for
+ // platform-specific invalid characters like '*', '?', '"', '<', '>', '|' (Windows)
+ // Related: https://github.com/ziglang/zig/pull/14533#issuecomment-1416888193
+ if (std.mem.indexOfScalar(u8, filename_utf8, 0) != null) {
+ return self.addErrorDetailsAndFail(.{
+ .err = .invalid_filename,
+ .token = node.filename.getFirstToken(),
+ .token_span_end = node.filename.getLastToken(),
+ .extra = .{ .number = 0 },
+ });
+ }
+
+ // Allow plain number literals, but complex number expressions are evaluated strangely
+ // and almost certainly lead to things not intended by the user (e.g. '(1+-1)' evaluates
+ // to the filename '-1'), so error if the filename node is a grouped/binary expression.
+ // Note: This is done here instead of during parsing so that we can easily include
+ // the evaluated filename as part of the error messages.
+ if (node.filename.id != .literal) {
+ const filename_string_index = try self.diagnostics.putString(filename_utf8);
+ try self.addErrorDetails(.{
+ .err = .number_expression_as_filename,
+ .token = node.filename.getFirstToken(),
+ .token_span_end = node.filename.getLastToken(),
+ .extra = .{ .number = filename_string_index },
+ });
+ return self.addErrorDetailsAndFail(.{
+ .err = .number_expression_as_filename,
+ .type = .note,
+ .token = node.filename.getFirstToken(),
+ .token_span_end = node.filename.getLastToken(),
+ .print_source_line = false,
+ .extra = .{ .number = filename_string_index },
+ });
+ }
+ // From here on out, we know that the filename must be comprised of a single token,
+ // so get it here to simplify future usage.
+ const filename_token = node.filename.getFirstToken();
+
+ const file = self.searchForFile(filename_utf8) catch |err| switch (err) {
+ error.OutOfMemory => |e| return e,
+ else => |e| {
+ const filename_string_index = try self.diagnostics.putString(filename_utf8);
+ return self.addErrorDetailsAndFail(.{
+ .err = .file_open_error,
+ .token = filename_token,
+ .extra = .{ .file_open_error = .{
+ .err = ErrorDetails.FileOpenError.enumFromError(e),
+ .filename_string_index = filename_string_index,
+ } },
+ });
+ },
+ };
+ defer file.close();
+
+ if (maybe_predefined_type) |predefined_type| {
+ switch (predefined_type) {
+ .GROUP_ICON, .GROUP_CURSOR => {
+ // Check for animated icon first
+ if (ani.isAnimatedIcon(file.reader())) {
+ // Animated icons are just put into the resource unmodified,
+ // and the resource type changes to ANIICON/ANICURSOR
+
+ const new_predefined_type: res.RT = switch (predefined_type) {
+ .GROUP_ICON => .ANIICON,
+ .GROUP_CURSOR => .ANICURSOR,
+ else => unreachable,
+ };
+ header.type_value.ordinal = @intFromEnum(new_predefined_type);
+ header.memory_flags = MemoryFlags.defaults(new_predefined_type);
+ header.applyMemoryFlags(node.common_resource_attributes, self.source);
+ header.data_size = @intCast(try file.getEndPos());
+
+ try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
+ try file.seekTo(0);
+ try writeResourceData(writer, file.reader(), header.data_size);
+ return;
+ }
+
+ // isAnimatedIcon moved the file cursor so reset to the start
+ try file.seekTo(0);
+
+ const icon_dir = ico.read(self.allocator, file.reader(), try file.getEndPos()) catch |err| switch (err) {
+ error.OutOfMemory => |e| return e,
+ else => |e| {
+ return self.iconReadError(
+ e,
+ filename_utf8,
+ filename_token,
+ predefined_type,
+ );
+ },
+ };
+ defer icon_dir.deinit();
+
+ // This limit is inherent to the ico format since number of entries is a u16 field.
+ std.debug.assert(icon_dir.entries.len <= std.math.maxInt(u16));
+
+ // Note: The Win32 RC compiler will compile the resource as whatever type is
+ // in the icon_dir regardless of the type of resource specified in the .rc.
+ // This leads to unusable .res files when the types mismatch, so
+ // we error instead.
+ const res_types_match = switch (predefined_type) {
+ .GROUP_ICON => icon_dir.image_type == .icon,
+ .GROUP_CURSOR => icon_dir.image_type == .cursor,
+ else => unreachable,
+ };
+ if (!res_types_match) {
+ return self.addErrorDetailsAndFail(.{
+ .err = .icon_dir_and_resource_type_mismatch,
+ .token = filename_token,
+ .extra = .{ .resource = switch (predefined_type) {
+ .GROUP_ICON => .icon,
+ .GROUP_CURSOR => .cursor,
+ else => unreachable,
+ } },
+ });
+ }
+
+ // Memory flags affect the RT_ICON and the RT_GROUP_ICON differently
+ var icon_memory_flags = MemoryFlags.defaults(res.RT.ICON);
+ applyToMemoryFlags(&icon_memory_flags, node.common_resource_attributes, self.source);
+ applyToGroupMemoryFlags(&header.memory_flags, node.common_resource_attributes, self.source);
+
+ const first_icon_id = self.state.icon_id;
+ const entry_type = if (predefined_type == .GROUP_ICON) @intFromEnum(res.RT.ICON) else @intFromEnum(res.RT.CURSOR);
+ for (icon_dir.entries, 0..) |*entry, entry_i_usize| {
+ // We know that the entry index must fit within a u16, so
+ // cast it here to simplify usage sites.
+ const entry_i: u16 = @intCast(entry_i_usize);
+ var full_data_size = entry.data_size_in_bytes;
+ if (icon_dir.image_type == .cursor) {
+ full_data_size = std.math.add(u32, full_data_size, 4) catch {
+ return self.addErrorDetailsAndFail(.{
+ .err = .resource_data_size_exceeds_max,
+ .token = node.id,
+ });
+ };
+ }
+
+ const image_header = ResourceHeader{
+ .type_value = .{ .ordinal = entry_type },
+ .name_value = .{ .ordinal = self.state.icon_id },
+ .data_size = full_data_size,
+ .memory_flags = icon_memory_flags,
+ .language = self.state.language,
+ .version = self.state.version,
+ .characteristics = self.state.characteristics,
+ };
+ try image_header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
+
+ // From https://learn.microsoft.com/en-us/windows/win32/menurc/localheader:
+ // > The LOCALHEADER structure is the first data written to the RT_CURSOR
+ // > resource if a RESDIR structure contains information about a cursor.
+ // where LOCALHEADER is `struct { WORD xHotSpot; WORD yHotSpot; }`
+ if (icon_dir.image_type == .cursor) {
+ try writer.writeInt(u16, entry.type_specific_data.cursor.hotspot_x, .little);
+ try writer.writeInt(u16, entry.type_specific_data.cursor.hotspot_y, .little);
+ }
+
+ try file.seekTo(entry.data_offset_from_start_of_file);
+ var header_bytes = file.reader().readBytesNoEof(16) catch {
+ return self.iconReadError(
+ error.UnexpectedEOF,
+ filename_utf8,
+ filename_token,
+ predefined_type,
+ );
+ };
+
+ const image_format = ico.ImageFormat.detect(&header_bytes);
+ if (!image_format.validate(&header_bytes)) {
+ return self.iconReadError(
+ error.InvalidHeader,
+ filename_utf8,
+ filename_token,
+ predefined_type,
+ );
+ }
+ switch (image_format) {
+ .riff => switch (icon_dir.image_type) {
+ .icon => {
+ // The Win32 RC compiler treats this as an error, but icon dirs
+ // with RIFF encoded icons within them work ~okay (they work
+ // in some places but not others, they may not animate, etc) if they are
+ // allowed to be compiled.
+ try self.addErrorDetails(.{
+ .err = .rc_would_error_on_icon_dir,
+ .type = .warning,
+ .token = filename_token,
+ .extra = .{ .icon_dir = .{ .icon_type = .icon, .icon_format = .riff, .index = entry_i } },
+ });
+ try self.addErrorDetails(.{
+ .err = .rc_would_error_on_icon_dir,
+ .type = .note,
+ .print_source_line = false,
+ .token = filename_token,
+ .extra = .{ .icon_dir = .{ .icon_type = .icon, .icon_format = .riff, .index = entry_i } },
+ });
+ },
+ .cursor => {
+ // The Win32 RC compiler errors in this case too, but we only error
+ // here because the cursor would fail to be loaded at runtime if we
+ // compiled it.
+ return self.addErrorDetailsAndFail(.{
+ .err = .format_not_supported_in_icon_dir,
+ .token = filename_token,
+ .extra = .{ .icon_dir = .{ .icon_type = .cursor, .icon_format = .riff, .index = entry_i } },
+ });
+ },
+ },
+ .png => switch (icon_dir.image_type) {
+ .icon => {
+ // PNG always seems to have 1 for color planes no matter what
+ entry.type_specific_data.icon.color_planes = 1;
+ // These seem to be the only values of num_colors that
+ // get treated specially
+ entry.type_specific_data.icon.bits_per_pixel = switch (entry.num_colors) {
+ 2 => 1,
+ 8 => 3,
+ 16 => 4,
+ else => entry.type_specific_data.icon.bits_per_pixel,
+ };
+ },
+ .cursor => {
+ // The Win32 RC compiler treats this as an error, but cursor dirs
+ // with PNG encoded icons within them work fine if they are
+ // allowed to be compiled.
+ try self.addErrorDetails(.{
+ .err = .rc_would_error_on_icon_dir,
+ .type = .warning,
+ .token = filename_token,
+ .extra = .{ .icon_dir = .{ .icon_type = .cursor, .icon_format = .png, .index = entry_i } },
+ });
+ },
+ },
+ .dib => {
+ const bitmap_header: *ico.BitmapHeader = @ptrCast(@alignCast(&header_bytes));
+ if (native_endian == .big) {
+ std.mem.byteSwapAllFields(ico.BitmapHeader, bitmap_header);
+ }
+ const bitmap_version = ico.BitmapHeader.Version.get(bitmap_header.bcSize);
+
+ // The Win32 RC compiler only allows headers with
+ // `bcSize == sizeof(BITMAPINFOHEADER)`, but it seems unlikely
+ // that there's a good reason for that outside of too-old
+ // bitmap headers.
+ // TODO: Need to test V4 and V5 bitmaps to check they actually work
+ if (bitmap_version == .@"win2.0") {
+ return self.addErrorDetailsAndFail(.{
+ .err = .rc_would_error_on_bitmap_version,
+ .token = filename_token,
+ .extra = .{ .icon_dir = .{
+ .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor,
+ .icon_format = image_format,
+ .index = entry_i,
+ .bitmap_version = bitmap_version,
+ } },
+ });
+ } else if (bitmap_version != .@"nt3.1") {
+ try self.addErrorDetails(.{
+ .err = .rc_would_error_on_bitmap_version,
+ .type = .warning,
+ .token = filename_token,
+ .extra = .{ .icon_dir = .{
+ .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor,
+ .icon_format = image_format,
+ .index = entry_i,
+ .bitmap_version = bitmap_version,
+ } },
+ });
+ }
+
+ switch (icon_dir.image_type) {
+ .icon => {
+ // The values in the icon's BITMAPINFOHEADER always take precedence over
+ // the values in the IconDir, but not in the LOCALHEADER (see above).
+ entry.type_specific_data.icon.color_planes = bitmap_header.bcPlanes;
+ entry.type_specific_data.icon.bits_per_pixel = bitmap_header.bcBitCount;
+ },
+ .cursor => {
+ // Only cursors get the width/height from BITMAPINFOHEADER (icons don't)
+ entry.width = @intCast(bitmap_header.bcWidth);
+ entry.height = @intCast(bitmap_header.bcHeight);
+ entry.type_specific_data.cursor.hotspot_x = bitmap_header.bcPlanes;
+ entry.type_specific_data.cursor.hotspot_y = bitmap_header.bcBitCount;
+ },
+ }
+ },
+ }
+
+ try file.seekTo(entry.data_offset_from_start_of_file);
+ try writeResourceDataNoPadding(writer, file.reader(), entry.data_size_in_bytes);
+ try writeDataPadding(writer, full_data_size);
+
+ if (self.state.icon_id == std.math.maxInt(u16)) {
+ try self.addErrorDetails(.{
+ .err = .max_icon_ids_exhausted,
+ .print_source_line = false,
+ .token = filename_token,
+ .extra = .{ .icon_dir = .{
+ .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor,
+ .icon_format = image_format,
+ .index = entry_i,
+ } },
+ });
+ return self.addErrorDetailsAndFail(.{
+ .err = .max_icon_ids_exhausted,
+ .type = .note,
+ .token = filename_token,
+ .extra = .{ .icon_dir = .{
+ .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor,
+ .icon_format = image_format,
+ .index = entry_i,
+ } },
+ });
+ }
+ self.state.icon_id += 1;
+ }
+
+ header.data_size = icon_dir.getResDataSize();
+
+ try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
+ try icon_dir.writeResData(writer, first_icon_id);
+ try writeDataPadding(writer, header.data_size);
+ return;
+ },
+ .RCDATA, .HTML, .MANIFEST, .MESSAGETABLE, .DLGINIT, .PLUGPLAY => {
+ header.applyMemoryFlags(node.common_resource_attributes, self.source);
+ },
+ .BITMAP => {
+ header.applyMemoryFlags(node.common_resource_attributes, self.source);
+ const file_size = try file.getEndPos();
+
+ const bitmap_info = bmp.read(file.reader(), file_size) catch |err| {
+ const filename_string_index = try self.diagnostics.putString(filename_utf8);
+ return self.addErrorDetailsAndFail(.{
+ .err = .bmp_read_error,
+ .token = filename_token,
+ .extra = .{ .bmp_read_error = .{
+ .err = ErrorDetails.BitmapReadError.enumFromError(err),
+ .filename_string_index = filename_string_index,
+ } },
+ });
+ };
+
+ if (bitmap_info.getActualPaletteByteLen() > bitmap_info.getExpectedPaletteByteLen()) {
+ const num_ignored_bytes = bitmap_info.getActualPaletteByteLen() - bitmap_info.getExpectedPaletteByteLen();
+ var number_as_bytes: [8]u8 = undefined;
+ std.mem.writeInt(u64, &number_as_bytes, num_ignored_bytes, native_endian);
+ const value_string_index = try self.diagnostics.putString(&number_as_bytes);
+ try self.addErrorDetails(.{
+ .err = .bmp_ignored_palette_bytes,
+ .type = .warning,
+ .token = filename_token,
+ .extra = .{ .number = value_string_index },
+ });
+ } else if (bitmap_info.getActualPaletteByteLen() < bitmap_info.getExpectedPaletteByteLen()) {
+ const num_padding_bytes = bitmap_info.getExpectedPaletteByteLen() - bitmap_info.getActualPaletteByteLen();
+
+ // TODO: Make this configurable (command line option)
+ const max_missing_bytes = 4096;
+ if (num_padding_bytes > max_missing_bytes) {
+ var numbers_as_bytes: [16]u8 = undefined;
+ std.mem.writeInt(u64, numbers_as_bytes[0..8], num_padding_bytes, native_endian);
+ std.mem.writeInt(u64, numbers_as_bytes[8..16], max_missing_bytes, native_endian);
+ const values_string_index = try self.diagnostics.putString(&numbers_as_bytes);
+ try self.addErrorDetails(.{
+ .err = .bmp_too_many_missing_palette_bytes,
+ .token = filename_token,
+ .extra = .{ .number = values_string_index },
+ });
+ return self.addErrorDetailsAndFail(.{
+ .err = .bmp_too_many_missing_palette_bytes,
+ .type = .note,
+ .print_source_line = false,
+ .token = filename_token,
+ });
+ }
+
+ var number_as_bytes: [8]u8 = undefined;
+ std.mem.writeInt(u64, &number_as_bytes, num_padding_bytes, native_endian);
+ const value_string_index = try self.diagnostics.putString(&number_as_bytes);
+ try self.addErrorDetails(.{
+ .err = .bmp_missing_palette_bytes,
+ .type = .warning,
+ .token = filename_token,
+ .extra = .{ .number = value_string_index },
+ });
+ const pixel_data_len = bitmap_info.getPixelDataLen(file_size);
+ if (pixel_data_len > 0) {
+ const miscompiled_bytes = @min(pixel_data_len, num_padding_bytes);
+ std.mem.writeInt(u64, &number_as_bytes, miscompiled_bytes, native_endian);
+ const miscompiled_bytes_string_index = try self.diagnostics.putString(&number_as_bytes);
+ try self.addErrorDetails(.{
+ .err = .rc_would_miscompile_bmp_palette_padding,
+ .type = .warning,
+ .token = filename_token,
+ .extra = .{ .number = miscompiled_bytes_string_index },
+ });
+ }
+ }
+
+ // TODO: It might be possible that the calculation done in this function
+ // could underflow if the underlying file is modified while reading
+ // it, but need to think about it more to determine if that's a
+ // real possibility
+ const bmp_bytes_to_write: u32 = @intCast(bitmap_info.getExpectedByteLen(file_size));
+
+ header.data_size = bmp_bytes_to_write;
+ try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
+ try file.seekTo(bmp.file_header_len);
+ const file_reader = file.reader();
+ try writeResourceDataNoPadding(writer, file_reader, bitmap_info.dib_header_size);
+ if (bitmap_info.getBitmasksByteLen() > 0) {
+ try writeResourceDataNoPadding(writer, file_reader, bitmap_info.getBitmasksByteLen());
+ }
+ if (bitmap_info.getExpectedPaletteByteLen() > 0) {
+ try writeResourceDataNoPadding(writer, file_reader, @intCast(bitmap_info.getActualPaletteByteLen()));
+ // We know that the number of missing palette bytes is <= 4096
+ // (see `bmp_too_many_missing_palette_bytes` error case above)
+ const padding_bytes: usize = @intCast(bitmap_info.getMissingPaletteByteLen());
+ if (padding_bytes > 0) {
+ try writer.writeByteNTimes(0, padding_bytes);
+ }
+ }
+ try file.seekTo(bitmap_info.pixel_data_offset);
+ const pixel_bytes: u32 = @intCast(file_size - bitmap_info.pixel_data_offset);
+ try writeResourceDataNoPadding(writer, file_reader, pixel_bytes);
+ try writeDataPadding(writer, bmp_bytes_to_write);
+ return;
+ },
+ .FONT => {
+ if (self.state.font_dir.ids.get(header.name_value.ordinal) != null) {
+ // Add warning and skip this resource
+ // Note: The Win32 compiler prints this as an error but it doesn't fail the compilation
+ // and the duplicate resource is skipped.
+ try self.addErrorDetails(ErrorDetails{
+ .err = .font_id_already_defined,
+ .token = node.id,
+ .type = .warning,
+ .extra = .{ .number = header.name_value.ordinal },
+ });
+ try self.addErrorDetails(ErrorDetails{
+ .err = .font_id_already_defined,
+ .token = self.state.font_dir.ids.get(header.name_value.ordinal).?,
+ .type = .note,
+ .extra = .{ .number = header.name_value.ordinal },
+ });
+ return;
+ }
+ header.applyMemoryFlags(node.common_resource_attributes, self.source);
+ const file_size = try file.getEndPos();
+ if (file_size > std.math.maxInt(u32)) {
+ return self.addErrorDetailsAndFail(.{
+ .err = .resource_data_size_exceeds_max,
+ .token = node.id,
+ });
+ }
+
+ // We now know that the data size will fit in a u32
+ header.data_size = @intCast(file_size);
+ try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
+
+ var header_slurping_reader = headerSlurpingReader(148, file.reader());
+ try writeResourceData(writer, header_slurping_reader.reader(), header.data_size);
+
+ try self.state.font_dir.add(self.arena, FontDir.Font{
+ .id = header.name_value.ordinal,
+ .header_bytes = header_slurping_reader.slurped_header,
+ }, node.id);
+ return;
+ },
+ .ACCELERATOR,
+ .ANICURSOR,
+ .ANIICON,
+ .CURSOR,
+ .DIALOG,
+ .DLGINCLUDE,
+ .FONTDIR,
+ .ICON,
+ .MENU,
+ .STRING,
+ .TOOLBAR,
+ .VERSION,
+ .VXD,
+ => unreachable,
+ _ => unreachable,
+ }
+ } else {
+ header.applyMemoryFlags(node.common_resource_attributes, self.source);
+ }
+
+ // Fallback to just writing out the entire contents of the file
+ const data_size = try file.getEndPos();
+ if (data_size > std.math.maxInt(u32)) {
+ return self.addErrorDetailsAndFail(.{
+ .err = .resource_data_size_exceeds_max,
+ .token = node.id,
+ });
+ }
+ // We now know that the data size will fit in a u32
+ header.data_size = @intCast(data_size);
+ try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
+ try writeResourceData(writer, file.reader(), header.data_size);
+ }
+
+ fn iconReadError(
+ self: *Compiler,
+ err: ico.ReadError,
+ filename: []const u8,
+ token: Token,
+ predefined_type: res.RT,
+ ) error{ CompileError, OutOfMemory } {
+ const filename_string_index = try self.diagnostics.putString(filename);
+ return self.addErrorDetailsAndFail(.{
+ .err = .icon_read_error,
+ .token = token,
+ .extra = .{ .icon_read_error = .{
+ .err = ErrorDetails.IconReadError.enumFromError(err),
+ .icon_type = switch (predefined_type) {
+ .GROUP_ICON => .icon,
+ .GROUP_CURSOR => .cursor,
+ else => unreachable,
+ },
+ .filename_string_index = filename_string_index,
+ } },
+ });
+ }
+
+ pub const DataType = enum {
+ number,
+ ascii_string,
+ wide_string,
+ };
+
+ pub const Data = union(DataType) {
+ number: Number,
+ ascii_string: []const u8,
+ wide_string: [:0]const u16,
+
+ pub fn deinit(self: Data, allocator: Allocator) void {
+ switch (self) {
+ .wide_string => |wide_string| {
+ allocator.free(wide_string);
+ },
+ .ascii_string => |ascii_string| {
+ allocator.free(ascii_string);
+ },
+ else => {},
+ }
+ }
+
+ pub fn write(self: Data, writer: anytype) !void {
+ switch (self) {
+ .number => |number| switch (number.is_long) {
+ false => try writer.writeInt(WORD, number.asWord(), .little),
+ true => try writer.writeInt(DWORD, number.value, .little),
+ },
+ .ascii_string => |ascii_string| {
+ try writer.writeAll(ascii_string);
+ },
+ .wide_string => |wide_string| {
+ try writer.writeAll(std.mem.sliceAsBytes(wide_string));
+ },
+ }
+ }
+ };
+
+ /// Assumes that the node is a number or number expression
+ pub fn evaluateNumberExpression(expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup) Number {
+ switch (expression_node.id) {
+ .literal => {
+ const literal_node = expression_node.cast(.literal).?;
+ std.debug.assert(literal_node.token.id == .number);
+ const bytes = SourceBytes{
+ .slice = literal_node.token.slice(source),
+ .code_page = code_page_lookup.getForToken(literal_node.token),
+ };
+ return literals.parseNumberLiteral(bytes);
+ },
+ .binary_expression => {
+ const binary_expression_node = expression_node.cast(.binary_expression).?;
+ const lhs = evaluateNumberExpression(binary_expression_node.left, source, code_page_lookup);
+ const rhs = evaluateNumberExpression(binary_expression_node.right, source, code_page_lookup);
+ const operator_char = binary_expression_node.operator.slice(source)[0];
+ return lhs.evaluateOperator(operator_char, rhs);
+ },
+ .grouped_expression => {
+ const grouped_expression_node = expression_node.cast(.grouped_expression).?;
+ return evaluateNumberExpression(grouped_expression_node.expression, source, code_page_lookup);
+ },
+ else => unreachable,
+ }
+ }
+
+ const FlagsNumber = struct {
+ value: u32,
+ not_mask: u32 = 0xFFFFFFFF,
+
+ pub fn evaluateOperator(lhs: FlagsNumber, operator_char: u8, rhs: FlagsNumber) FlagsNumber {
+ const result = switch (operator_char) {
+ '-' => lhs.value -% rhs.value,
+ '+' => lhs.value +% rhs.value,
+ '|' => lhs.value | rhs.value,
+ '&' => lhs.value & rhs.value,
+ else => unreachable, // invalid operator, this would be a lexer/parser bug
+ };
+ return .{
+ .value = result,
+ .not_mask = lhs.not_mask & rhs.not_mask,
+ };
+ }
+
+ pub fn applyNotMask(self: FlagsNumber) u32 {
+ return self.value & self.not_mask;
+ }
+ };
+
+ pub fn evaluateFlagsExpressionWithDefault(default: u32, expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup) u32 {
+ var context = FlagsExpressionContext{ .initial_value = default };
+ const number = evaluateFlagsExpression(expression_node, source, code_page_lookup, &context);
+ return number.value;
+ }
+
+ pub const FlagsExpressionContext = struct {
+ initial_value: u32 = 0,
+ initial_value_used: bool = false,
+ };
+
+ /// Assumes that the node is a number expression (which can contain not_expressions)
+ pub fn evaluateFlagsExpression(expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup, context: *FlagsExpressionContext) FlagsNumber {
+ switch (expression_node.id) {
+ .literal => {
+ const literal_node = expression_node.cast(.literal).?;
+ std.debug.assert(literal_node.token.id == .number);
+ const bytes = SourceBytes{
+ .slice = literal_node.token.slice(source),
+ .code_page = code_page_lookup.getForToken(literal_node.token),
+ };
+ var value = literals.parseNumberLiteral(bytes).value;
+ if (!context.initial_value_used) {
+ context.initial_value_used = true;
+ value |= context.initial_value;
+ }
+ return .{ .value = value };
+ },
+ .binary_expression => {
+ const binary_expression_node = expression_node.cast(.binary_expression).?;
+ const lhs = evaluateFlagsExpression(binary_expression_node.left, source, code_page_lookup, context);
+ const rhs = evaluateFlagsExpression(binary_expression_node.right, source, code_page_lookup, context);
+ const operator_char = binary_expression_node.operator.slice(source)[0];
+ const result = lhs.evaluateOperator(operator_char, rhs);
+ return .{ .value = result.applyNotMask() };
+ },
+ .grouped_expression => {
+ const grouped_expression_node = expression_node.cast(.grouped_expression).?;
+ return evaluateFlagsExpression(grouped_expression_node.expression, source, code_page_lookup, context);
+ },
+ .not_expression => {
+ const not_expression = expression_node.cast(.not_expression).?;
+ const bytes = SourceBytes{
+ .slice = not_expression.number_token.slice(source),
+ .code_page = code_page_lookup.getForToken(not_expression.number_token),
+ };
+ const not_number = literals.parseNumberLiteral(bytes);
+ if (!context.initial_value_used) {
+ context.initial_value_used = true;
+ return .{ .value = context.initial_value & ~not_number.value };
+ }
+ return .{ .value = 0, .not_mask = ~not_number.value };
+ },
+ else => unreachable,
+ }
+ }
+
+ pub fn evaluateDataExpression(self: *Compiler, expression_node: *Node) !Data {
+ switch (expression_node.id) {
+ .literal => {
+ const literal_node = expression_node.cast(.literal).?;
+ switch (literal_node.token.id) {
+ .number => {
+ const number = evaluateNumberExpression(expression_node, self.source, self.input_code_pages);
+ return .{ .number = number };
+ },
+ .quoted_ascii_string => {
+ const column = literal_node.token.calculateColumn(self.source, 8, null);
+ const bytes = SourceBytes{
+ .slice = literal_node.token.slice(self.source),
+ .code_page = self.input_code_pages.getForToken(literal_node.token),
+ };
+ const parsed = try literals.parseQuotedAsciiString(self.allocator, bytes, .{
+ .start_column = column,
+ .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal_node.token },
+ .output_code_page = self.output_code_pages.getForToken(literal_node.token),
+ });
+ errdefer self.allocator.free(parsed);
+ return .{ .ascii_string = parsed };
+ },
+ .quoted_wide_string => {
+ const column = literal_node.token.calculateColumn(self.source, 8, null);
+ const bytes = SourceBytes{
+ .slice = literal_node.token.slice(self.source),
+ .code_page = self.input_code_pages.getForToken(literal_node.token),
+ };
+ const parsed_string = try literals.parseQuotedWideString(self.allocator, bytes, .{
+ .start_column = column,
+ .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal_node.token },
+ });
+ errdefer self.allocator.free(parsed_string);
+ return .{ .wide_string = parsed_string };
+ },
+ else => unreachable, // no other token types should be in a data literal node
+ }
+ },
+ .binary_expression, .grouped_expression => {
+ const result = evaluateNumberExpression(expression_node, self.source, self.input_code_pages);
+ return .{ .number = result };
+ },
+ .not_expression => unreachable,
+ else => unreachable,
+ }
+ }
+
+ pub fn writeResourceRawData(self: *Compiler, node: *Node.ResourceRawData, writer: anytype) !void {
+ var data_buffer = std.ArrayList(u8).init(self.allocator);
+ defer data_buffer.deinit();
+ // The header's data length field is a u32 so limit the resource's data size so that
+ // we know we can always specify the real size.
+ var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32));
+ const data_writer = limited_writer.writer();
+
+ for (node.raw_data) |expression| {
+ const data = try self.evaluateDataExpression(expression);
+ defer data.deinit(self.allocator);
+ data.write(data_writer) catch |err| switch (err) {
+ error.NoSpaceLeft => {
+ return self.addErrorDetailsAndFail(.{
+ .err = .resource_data_size_exceeds_max,
+ .token = node.id,
+ });
+ },
+ else => |e| return e,
+ };
+ }
+
+ // This intCast can't fail because the limitedWriter above guarantees that
+ // we will never write more than maxInt(u32) bytes.
+ const data_len: u32 = @intCast(data_buffer.items.len);
+ try self.writeResourceHeader(writer, node.id, node.type, data_len, node.common_resource_attributes, self.state.language);
+
+ var data_fbs = std.io.fixedBufferStream(data_buffer.items);
+ try writeResourceData(writer, data_fbs.reader(), data_len);
+ }
+
+ pub fn writeResourceHeader(self: *Compiler, writer: anytype, id_token: Token, type_token: Token, data_size: u32, common_resource_attributes: []Token, language: res.Language) !void {
+ var header = try self.resourceHeader(id_token, type_token, .{
+ .language = language,
+ .data_size = data_size,
+ });
+ defer header.deinit(self.allocator);
+
+ header.applyMemoryFlags(common_resource_attributes, self.source);
+
+ try header.write(writer, .{ .diagnostics = self.diagnostics, .token = id_token });
+ }
+
+ pub fn writeResourceDataNoPadding(writer: anytype, data_reader: anytype, data_size: u32) !void {
+ var limited_reader = std.io.limitedReader(data_reader, data_size);
+
+ const FifoBuffer = std.fifo.LinearFifo(u8, .{ .Static = 4096 });
+ var fifo = FifoBuffer.init();
+ try fifo.pump(limited_reader.reader(), writer);
+ }
+
+ pub fn writeResourceData(writer: anytype, data_reader: anytype, data_size: u32) !void {
+ try writeResourceDataNoPadding(writer, data_reader, data_size);
+ try writeDataPadding(writer, data_size);
+ }
+
+ pub fn writeDataPadding(writer: anytype, data_size: u32) !void {
+ try writer.writeByteNTimes(0, numPaddingBytesNeeded(data_size));
+ }
+
+ pub fn numPaddingBytesNeeded(data_size: u32) u2 {
+ // Result is guaranteed to be between 0 and 3.
+ return @intCast((4 -% data_size) % 4);
+ }
+
+ pub fn evaluateAcceleratorKeyExpression(self: *Compiler, node: *Node, is_virt: bool) !u16 {
+ if (node.isNumberExpression()) {
+ return evaluateNumberExpression(node, self.source, self.input_code_pages).asWord();
+ } else {
+ std.debug.assert(node.isStringLiteral());
+ const literal = @fieldParentPtr(Node.Literal, "base", node);
+ const bytes = SourceBytes{
+ .slice = literal.token.slice(self.source),
+ .code_page = self.input_code_pages.getForToken(literal.token),
+ };
+ const column = literal.token.calculateColumn(self.source, 8, null);
+ return res.parseAcceleratorKeyString(bytes, is_virt, .{
+ .start_column = column,
+ .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal.token },
+ });
+ }
+ }
+
+ pub fn writeAccelerators(self: *Compiler, node: *Node.Accelerators, writer: anytype) !void {
+ var data_buffer = std.ArrayList(u8).init(self.allocator);
+ defer data_buffer.deinit();
+
+ // The header's data length field is a u32 so limit the resource's data size so that
+ // we know we can always specify the real size.
+ var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32));
+ const data_writer = limited_writer.writer();
+
+ self.writeAcceleratorsData(node, data_writer) catch |err| switch (err) {
+ error.NoSpaceLeft => {
+ return self.addErrorDetailsAndFail(.{
+ .err = .resource_data_size_exceeds_max,
+ .token = node.id,
+ });
+ },
+ else => |e| return e,
+ };
+
+ // This intCast can't fail because the limitedWriter above guarantees that
+ // we will never write more than maxInt(u32) bytes.
+ const data_size: u32 = @intCast(data_buffer.items.len);
+ var header = try self.resourceHeader(node.id, node.type, .{
+ .data_size = data_size,
+ });
+ defer header.deinit(self.allocator);
+
+ header.applyMemoryFlags(node.common_resource_attributes, self.source);
+ header.applyOptionalStatements(node.optional_statements, self.source, self.input_code_pages);
+
+ try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
+
+ var data_fbs = std.io.fixedBufferStream(data_buffer.items);
+ try writeResourceData(writer, data_fbs.reader(), data_size);
+ }
+
+ /// Expects `data_writer` to be a LimitedWriter limited to u32, meaning all writes to
+ /// the writer within this function could return error.NoSpaceLeft
+ pub fn writeAcceleratorsData(self: *Compiler, node: *Node.Accelerators, data_writer: anytype) !void {
+ for (node.accelerators, 0..) |accel_node, i| {
+ const accelerator = @fieldParentPtr(Node.Accelerator, "base", accel_node);
+ var modifiers = res.AcceleratorModifiers{};
+ for (accelerator.type_and_options) |type_or_option| {
+ const modifier = rc.AcceleratorTypeAndOptions.map.get(type_or_option.slice(self.source)).?;
+ modifiers.apply(modifier);
+ }
+ if (accelerator.event.isNumberExpression() and !modifiers.explicit_ascii_or_virtkey) {
+ return self.addErrorDetailsAndFail(.{
+ .err = .accelerator_type_required,
+ .token = accelerator.event.getFirstToken(),
+ .token_span_end = accelerator.event.getLastToken(),
+ });
+ }
+ const key = self.evaluateAcceleratorKeyExpression(accelerator.event, modifiers.isSet(.virtkey)) catch |err| switch (err) {
+ error.OutOfMemory => |e| return e,
+ else => |e| {
+ return self.addErrorDetailsAndFail(.{
+ .err = .invalid_accelerator_key,
+ .token = accelerator.event.getFirstToken(),
+ .token_span_end = accelerator.event.getLastToken(),
+ .extra = .{ .accelerator_error = .{
+ .err = ErrorDetails.AcceleratorError.enumFromError(e),
+ } },
+ });
+ },
+ };
+ const cmd_id = evaluateNumberExpression(accelerator.idvalue, self.source, self.input_code_pages);
+
+ if (i == node.accelerators.len - 1) {
+ modifiers.markLast();
+ }
+
+ try data_writer.writeByte(modifiers.value);
+ try data_writer.writeByte(0); // padding
+ try data_writer.writeInt(u16, key, .little);
+ try data_writer.writeInt(u16, cmd_id.asWord(), .little);
+ try data_writer.writeInt(u16, 0, .little); // padding
+ }
+ }
+
+ const DialogOptionalStatementValues = struct {
+ style: u32 = res.WS.SYSMENU | res.WS.BORDER | res.WS.POPUP,
+ exstyle: u32 = 0,
+ class: ?NameOrOrdinal = null,
+ menu: ?NameOrOrdinal = null,
+ font: ?FontStatementValues = null,
+ caption: ?Token = null,
+ };
+
+ pub fn writeDialog(self: *Compiler, node: *Node.Dialog, writer: anytype) !void {
+ var data_buffer = std.ArrayList(u8).init(self.allocator);
+ defer data_buffer.deinit();
+ // The header's data length field is a u32 so limit the resource's data size so that
+ // we know we can always specify the real size.
+ var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32));
+ const data_writer = limited_writer.writer();
+
+ const resource = Resource.fromString(.{
+ .slice = node.type.slice(self.source),
+ .code_page = self.input_code_pages.getForToken(node.type),
+ });
+ std.debug.assert(resource == .dialog or resource == .dialogex);
+
+ var optional_statement_values: DialogOptionalStatementValues = .{};
+ defer {
+ if (optional_statement_values.class) |class| {
+ class.deinit(self.allocator);
+ }
+ if (optional_statement_values.menu) |menu| {
+ menu.deinit(self.allocator);
+ }
+ }
+ var skipped_menu_or_classes = std.ArrayList(*Node.SimpleStatement).init(self.allocator);
+ defer skipped_menu_or_classes.deinit();
+ var last_menu: *Node.SimpleStatement = undefined;
+ var last_class: *Node.SimpleStatement = undefined;
+ var last_menu_would_be_forced_ordinal = false;
+ var last_menu_has_digit_as_first_char = false;
+ var last_menu_did_uppercase = false;
+ var last_class_would_be_forced_ordinal = false;
+
+ for (node.optional_statements) |optional_statement| {
+ switch (optional_statement.id) {
+ .simple_statement => {
+ const simple_statement = @fieldParentPtr(Node.SimpleStatement, "base", optional_statement);
+ const statement_identifier = simple_statement.identifier;
+ const statement_type = rc.OptionalStatements.dialog_map.get(statement_identifier.slice(self.source)) orelse continue;
+ switch (statement_type) {
+ .style, .exstyle => {
+ const style = evaluateFlagsExpressionWithDefault(0, simple_statement.value, self.source, self.input_code_pages);
+ if (statement_type == .style) {
+ optional_statement_values.style = style;
+ } else {
+ optional_statement_values.exstyle = style;
+ }
+ },
+ .caption => {
+ std.debug.assert(simple_statement.value.id == .literal);
+ const literal_node = @fieldParentPtr(Node.Literal, "base", simple_statement.value);
+ optional_statement_values.caption = literal_node.token;
+ },
+ .class => {
+ const is_duplicate = optional_statement_values.class != null;
+ if (is_duplicate) {
+ try skipped_menu_or_classes.append(last_class);
+ }
+ const forced_ordinal = is_duplicate and optional_statement_values.class.? == .ordinal;
+ // In the Win32 RC compiler, if any CLASS values that are interpreted as
+ // an ordinal exist, it affects all future CLASS statements and forces
+ // them to be treated as an ordinal no matter what.
+ if (forced_ordinal) {
+ last_class_would_be_forced_ordinal = true;
+ }
+ // clear out the old one if it exists
+ if (optional_statement_values.class) |prev| {
+ prev.deinit(self.allocator);
+ optional_statement_values.class = null;
+ }
+
+ if (simple_statement.value.isNumberExpression()) {
+ const class_ordinal = evaluateNumberExpression(simple_statement.value, self.source, self.input_code_pages);
+ optional_statement_values.class = NameOrOrdinal{ .ordinal = class_ordinal.asWord() };
+ } else {
+ std.debug.assert(simple_statement.value.isStringLiteral());
+ const literal_node = @fieldParentPtr(Node.Literal, "base", simple_statement.value);
+ const parsed = try self.parseQuotedStringAsWideString(literal_node.token);
+ optional_statement_values.class = NameOrOrdinal{ .name = parsed };
+ }
+
+ last_class = simple_statement;
+ },
+ .menu => {
+ const is_duplicate = optional_statement_values.menu != null;
+ if (is_duplicate) {
+ try skipped_menu_or_classes.append(last_menu);
+ }
+ const forced_ordinal = is_duplicate and optional_statement_values.menu.? == .ordinal;
+ // In the Win32 RC compiler, if any MENU values that are interpreted as
+ // an ordinal exist, it affects all future MENU statements and forces
+ // them to be treated as an ordinal no matter what.
+ if (forced_ordinal) {
+ last_menu_would_be_forced_ordinal = true;
+ }
+ // clear out the old one if it exists
+ if (optional_statement_values.menu) |prev| {
+ prev.deinit(self.allocator);
+ optional_statement_values.menu = null;
+ }
+
+ std.debug.assert(simple_statement.value.id == .literal);
+ const literal_node = @fieldParentPtr(Node.Literal, "base", simple_statement.value);
+
+ const token_slice = literal_node.token.slice(self.source);
+ const bytes = SourceBytes{
+ .slice = token_slice,
+ .code_page = self.input_code_pages.getForToken(literal_node.token),
+ };
+ optional_statement_values.menu = try NameOrOrdinal.fromString(self.allocator, bytes);
+
+ if (optional_statement_values.menu.? == .name) {
+ if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(bytes)) |win32_rc_ordinal| {
+ try self.addErrorDetails(.{
+ .err = .invalid_digit_character_in_ordinal,
+ .type = .err,
+ .token = literal_node.token,
+ });
+ return self.addErrorDetailsAndFail(.{
+ .err = .win32_non_ascii_ordinal,
+ .type = .note,
+ .token = literal_node.token,
+ .print_source_line = false,
+ .extra = .{ .number = win32_rc_ordinal.ordinal },
+ });
+ }
+ }
+
+ // Need to keep track of some properties of the value
+ // in order to emit the appropriate warning(s) later on.
+ // See where the warning are emitted below (outside this loop)
+ // for the full explanation.
+ var did_uppercase = false;
+ var codepoint_i: usize = 0;
+ while (bytes.code_page.codepointAt(codepoint_i, bytes.slice)) |codepoint| : (codepoint_i += codepoint.byte_len) {
+ const c = codepoint.value;
+ switch (c) {
+ 'a'...'z' => {
+ did_uppercase = true;
+ break;
+ },
+ else => {},
+ }
+ }
+ last_menu_did_uppercase = did_uppercase;
+ last_menu_has_digit_as_first_char = std.ascii.isDigit(token_slice[0]);
+ last_menu = simple_statement;
+ },
+ else => {},
+ }
+ },
+ .font_statement => {
+ const font = @fieldParentPtr(Node.FontStatement, "base", optional_statement);
+ if (optional_statement_values.font != null) {
+ optional_statement_values.font.?.node = font;
+ } else {
+ optional_statement_values.font = FontStatementValues{ .node = font };
+ }
+ if (font.weight) |weight| {
+ const value = evaluateNumberExpression(weight, self.source, self.input_code_pages);
+ optional_statement_values.font.?.weight = value.asWord();
+ }
+ if (font.italic) |italic| {
+ const value = evaluateNumberExpression(italic, self.source, self.input_code_pages);
+ optional_statement_values.font.?.italic = value.asWord() != 0;
+ }
+ },
+ else => {},
+ }
+ }
+
+ for (skipped_menu_or_classes.items) |simple_statement| {
+ const statement_identifier = simple_statement.identifier;
+ const statement_type = rc.OptionalStatements.dialog_map.get(statement_identifier.slice(self.source)) orelse continue;
+ try self.addErrorDetails(.{
+ .err = .duplicate_menu_or_class_skipped,
+ .type = .warning,
+ .token = simple_statement.identifier,
+ .token_span_start = simple_statement.base.getFirstToken(),
+ .token_span_end = simple_statement.base.getLastToken(),
+ .extra = .{ .menu_or_class = switch (statement_type) {
+ .menu => .menu,
+ .class => .class,
+ else => unreachable,
+ } },
+ });
+ }
+ // The Win32 RC compiler miscompiles the value in the following scenario:
+ // Multiple CLASS parameters are specified and any of them are treated as a number, then
+ // the last CLASS is always treated as a number no matter what
+ if (last_class_would_be_forced_ordinal and optional_statement_values.class.? == .name) {
+ const literal_node = @fieldParentPtr(Node.Literal, "base", last_class.value);
+ const ordinal_value = res.ForcedOrdinal.fromUtf16Le(optional_statement_values.class.?.name);
+
+ try self.addErrorDetails(.{
+ .err = .rc_would_miscompile_dialog_class,
+ .type = .warning,
+ .token = literal_node.token,
+ .extra = .{ .number = ordinal_value },
+ });
+ try self.addErrorDetails(.{
+ .err = .rc_would_miscompile_dialog_class,
+ .type = .note,
+ .print_source_line = false,
+ .token = literal_node.token,
+ .extra = .{ .number = ordinal_value },
+ });
+ try self.addErrorDetails(.{
+ .err = .rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal,
+ .type = .note,
+ .print_source_line = false,
+ .token = literal_node.token,
+ .extra = .{ .menu_or_class = .class },
+ });
+ }
+ // The Win32 RC compiler miscompiles the id in two different scenarios:
+ // 1. The first character of the ID is a digit, in which case it is always treated as a number
+ // no matter what (and therefore does not match how the MENU/MENUEX id is parsed)
+ // 2. Multiple MENU parameters are specified and any of them are treated as a number, then
+ // the last MENU is always treated as a number no matter what
+ if ((last_menu_would_be_forced_ordinal or last_menu_has_digit_as_first_char) and optional_statement_values.menu.? == .name) {
+ const literal_node = @fieldParentPtr(Node.Literal, "base", last_menu.value);
+ const token_slice = literal_node.token.slice(self.source);
+ const bytes = SourceBytes{
+ .slice = token_slice,
+ .code_page = self.input_code_pages.getForToken(literal_node.token),
+ };
+ const ordinal_value = res.ForcedOrdinal.fromBytes(bytes);
+
+ try self.addErrorDetails(.{
+ .err = .rc_would_miscompile_dialog_menu_id,
+ .type = .warning,
+ .token = literal_node.token,
+ .extra = .{ .number = ordinal_value },
+ });
+ try self.addErrorDetails(.{
+ .err = .rc_would_miscompile_dialog_menu_id,
+ .type = .note,
+ .print_source_line = false,
+ .token = literal_node.token,
+ .extra = .{ .number = ordinal_value },
+ });
+ if (last_menu_would_be_forced_ordinal) {
+ try self.addErrorDetails(.{
+ .err = .rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal,
+ .type = .note,
+ .print_source_line = false,
+ .token = literal_node.token,
+ .extra = .{ .menu_or_class = .menu },
+ });
+ } else {
+ try self.addErrorDetails(.{
+ .err = .rc_would_miscompile_dialog_menu_id_starts_with_digit,
+ .type = .note,
+ .print_source_line = false,
+ .token = literal_node.token,
+ });
+ }
+ }
+ // The MENU id parsing uses the exact same logic as the MENU/MENUEX resource id parsing,
+ // which means that it will convert ASCII characters to uppercase during the 'name' parsing.
+ // This turns out not to matter (`LoadMenu` does a case-insensitive lookup anyway),
+ // but it still makes sense to share the uppercasing logic since the MENU parameter
+ // here is just a reference to a MENU/MENUEX id within the .exe.
+ // So, because this is an intentional but inconsequential-to-the-user difference
+ // between resinator and the Win32 RC compiler, we only emit a hint instead of
+ // a warning.
+ if (last_menu_did_uppercase) {
+ const literal_node = @fieldParentPtr(Node.Literal, "base", last_menu.value);
+ try self.addErrorDetails(.{
+ .err = .dialog_menu_id_was_uppercased,
+ .type = .hint,
+ .token = literal_node.token,
+ });
+ }
+
+ const x = evaluateNumberExpression(node.x, self.source, self.input_code_pages);
+ const y = evaluateNumberExpression(node.y, self.source, self.input_code_pages);
+ const width = evaluateNumberExpression(node.width, self.source, self.input_code_pages);
+ const height = evaluateNumberExpression(node.height, self.source, self.input_code_pages);
+
+ // FONT statement requires DS_SETFONT, and if it's not present DS_SETFRONT must be unset
+ if (optional_statement_values.font) |_| {
+ optional_statement_values.style |= res.DS.SETFONT;
+ } else {
+ optional_statement_values.style &= ~res.DS.SETFONT;
+ }
+ // CAPTION statement implies WS_CAPTION
+ if (optional_statement_values.caption) |_| {
+ optional_statement_values.style |= res.WS.CAPTION;
+ }
+
+ self.writeDialogHeaderAndStrings(
+ node,
+ data_writer,
+ resource,
+ &optional_statement_values,
+ x,
+ y,
+ width,
+ height,
+ ) catch |err| switch (err) {
+ // Dialog header and menu/class/title strings can never exceed u32 bytes
+ // on their own, so this error is unreachable.
+ error.NoSpaceLeft => unreachable,
+ else => |e| return e,
+ };
+
+ var controls_by_id = std.AutoHashMap(u32, *const Node.ControlStatement).init(self.allocator);
+ // Number of controls are guaranteed by the parser to be within maxInt(u16).
+ try controls_by_id.ensureTotalCapacity(@as(u16, @intCast(node.controls.len)));
+ defer controls_by_id.deinit();
+
+ for (node.controls) |control_node| {
+ const control = @fieldParentPtr(Node.ControlStatement, "base", control_node);
+
+ self.writeDialogControl(
+ control,
+ data_writer,
+ resource,
+ // We know the data_buffer len is limited to u32 max.
+ @intCast(data_buffer.items.len),
+ &controls_by_id,
+ ) catch |err| switch (err) {
+ error.NoSpaceLeft => {
+ try self.addErrorDetails(.{
+ .err = .resource_data_size_exceeds_max,
+ .token = node.id,
+ });
+ return self.addErrorDetailsAndFail(.{
+ .err = .resource_data_size_exceeds_max,
+ .type = .note,
+ .token = control.type,
+ });
+ },
+ else => |e| return e,
+ };
+ }
+
+ // We know the data_buffer len is limited to u32 max.
+ const data_size: u32 = @intCast(data_buffer.items.len);
+ var header = try self.resourceHeader(node.id, node.type, .{
+ .data_size = data_size,
+ });
+ defer header.deinit(self.allocator);
+
+ header.applyMemoryFlags(node.common_resource_attributes, self.source);
+ header.applyOptionalStatements(node.optional_statements, self.source, self.input_code_pages);
+
+ try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
+
+ var data_fbs = std.io.fixedBufferStream(data_buffer.items);
+ try writeResourceData(writer, data_fbs.reader(), data_size);
+ }
+
+ fn writeDialogHeaderAndStrings(
+ self: *Compiler,
+ node: *Node.Dialog,
+ data_writer: anytype,
+ resource: Resource,
+ optional_statement_values: *const DialogOptionalStatementValues,
+ x: Number,
+ y: Number,
+ width: Number,
+ height: Number,
+ ) !void {
+ // Header
+ if (resource == .dialogex) {
+ const help_id: u32 = help_id: {
+ if (node.help_id == null) break :help_id 0;
+ break :help_id evaluateNumberExpression(node.help_id.?, self.source, self.input_code_pages).value;
+ };
+ try data_writer.writeInt(u16, 1, .little); // version number, always 1
+ try data_writer.writeInt(u16, 0xFFFF, .little); // signature, always 0xFFFF
+ try data_writer.writeInt(u32, help_id, .little);
+ try data_writer.writeInt(u32, optional_statement_values.exstyle, .little);
+ try data_writer.writeInt(u32, optional_statement_values.style, .little);
+ } else {
+ try data_writer.writeInt(u32, optional_statement_values.style, .little);
+ try data_writer.writeInt(u32, optional_statement_values.exstyle, .little);
+ }
+ // This limit is enforced by the parser, so we know the number of controls
+ // is within the range of a u16.
+ try data_writer.writeInt(u16, @as(u16, @intCast(node.controls.len)), .little);
+ try data_writer.writeInt(u16, x.asWord(), .little);
+ try data_writer.writeInt(u16, y.asWord(), .little);
+ try data_writer.writeInt(u16, width.asWord(), .little);
+ try data_writer.writeInt(u16, height.asWord(), .little);
+
+ // Menu
+ if (optional_statement_values.menu) |menu| {
+ try menu.write(data_writer);
+ } else {
+ try data_writer.writeInt(u16, 0, .little);
+ }
+ // Class
+ if (optional_statement_values.class) |class| {
+ try class.write(data_writer);
+ } else {
+ try data_writer.writeInt(u16, 0, .little);
+ }
+ // Caption
+ if (optional_statement_values.caption) |caption| {
+ const parsed = try self.parseQuotedStringAsWideString(caption);
+ defer self.allocator.free(parsed);
+ try data_writer.writeAll(std.mem.sliceAsBytes(parsed[0 .. parsed.len + 1]));
+ } else {
+ try data_writer.writeInt(u16, 0, .little);
+ }
+ // Font
+ if (optional_statement_values.font) |font| {
+ try self.writeDialogFont(resource, font, data_writer);
+ }
+ }
+
+ fn writeDialogControl(
+ self: *Compiler,
+ control: *Node.ControlStatement,
+ data_writer: anytype,
+ resource: Resource,
+ bytes_written_so_far: u32,
+ controls_by_id: *std.AutoHashMap(u32, *const Node.ControlStatement),
+ ) !void {
+ const control_type = rc.Control.map.get(control.type.slice(self.source)).?;
+
+ // Each control must be at a 4-byte boundary. However, the Windows RC
+ // compiler will miscompile controls if their extra data ends on an odd offset.
+ // We will avoid the miscompilation and emit a warning.
+ const num_padding = numPaddingBytesNeeded(bytes_written_so_far);
+ if (num_padding == 1 or num_padding == 3) {
+ try self.addErrorDetails(.{
+ .err = .rc_would_miscompile_control_padding,
+ .type = .warning,
+ .token = control.type,
+ });
+ try self.addErrorDetails(.{
+ .err = .rc_would_miscompile_control_padding,
+ .type = .note,
+ .print_source_line = false,
+ .token = control.type,
+ });
+ }
+ try data_writer.writeByteNTimes(0, num_padding);
+
+ const style = if (control.style) |style_expression|
+ // Certain styles are implied by the control type
+ evaluateFlagsExpressionWithDefault(res.ControlClass.getImpliedStyle(control_type), style_expression, self.source, self.input_code_pages)
+ else
+ res.ControlClass.getImpliedStyle(control_type);
+
+ const exstyle = if (control.exstyle) |exstyle_expression|
+ evaluateFlagsExpressionWithDefault(0, exstyle_expression, self.source, self.input_code_pages)
+ else
+ 0;
+
+ switch (resource) {
+ .dialog => {
+ // Note: Reverse order from DIALOGEX
+ try data_writer.writeInt(u32, style, .little);
+ try data_writer.writeInt(u32, exstyle, .little);
+ },
+ .dialogex => {
+ const help_id: u32 = if (control.help_id) |help_id_expression|
+ evaluateNumberExpression(help_id_expression, self.source, self.input_code_pages).value
+ else
+ 0;
+ try data_writer.writeInt(u32, help_id, .little);
+ // Note: Reverse order from DIALOG
+ try data_writer.writeInt(u32, exstyle, .little);
+ try data_writer.writeInt(u32, style, .little);
+ },
+ else => unreachable,
+ }
+
+ const control_x = evaluateNumberExpression(control.x, self.source, self.input_code_pages);
+ const control_y = evaluateNumberExpression(control.y, self.source, self.input_code_pages);
+ const control_width = evaluateNumberExpression(control.width, self.source, self.input_code_pages);
+ const control_height = evaluateNumberExpression(control.height, self.source, self.input_code_pages);
+
+ try data_writer.writeInt(u16, control_x.asWord(), .little);
+ try data_writer.writeInt(u16, control_y.asWord(), .little);
+ try data_writer.writeInt(u16, control_width.asWord(), .little);
+ try data_writer.writeInt(u16, control_height.asWord(), .little);
+
+ const control_id = evaluateNumberExpression(control.id, self.source, self.input_code_pages);
+ switch (resource) {
+ .dialog => try data_writer.writeInt(u16, control_id.asWord(), .little),
+ .dialogex => try data_writer.writeInt(u32, control_id.value, .little),
+ else => unreachable,
+ }
+
+ const control_id_for_map: u32 = switch (resource) {
+ .dialog => control_id.asWord(),
+ .dialogex => control_id.value,
+ else => unreachable,
+ };
+ const result = controls_by_id.getOrPutAssumeCapacity(control_id_for_map);
+ if (result.found_existing) {
+ if (!self.silent_duplicate_control_ids) {
+ try self.addErrorDetails(.{
+ .err = .control_id_already_defined,
+ .type = .warning,
+ .token = control.id.getFirstToken(),
+ .token_span_end = control.id.getLastToken(),
+ .extra = .{ .number = control_id_for_map },
+ });
+ try self.addErrorDetails(.{
+ .err = .control_id_already_defined,
+ .type = .note,
+ .token = result.value_ptr.*.id.getFirstToken(),
+ .token_span_end = result.value_ptr.*.id.getLastToken(),
+ .extra = .{ .number = control_id_for_map },
+ });
+ }
+ } else {
+ result.value_ptr.* = control;
+ }
+
+ if (res.ControlClass.fromControl(control_type)) |control_class| {
+ const ordinal = NameOrOrdinal{ .ordinal = @intFromEnum(control_class) };
+ try ordinal.write(data_writer);
+ } else {
+ const class_node = control.class.?;
+ if (class_node.isNumberExpression()) {
+ const number = evaluateNumberExpression(class_node, self.source, self.input_code_pages);
+ const ordinal = NameOrOrdinal{ .ordinal = number.asWord() };
+ // This is different from how the Windows RC compiles ordinals here,
+ // but I think that's a miscompilation/bug of the Windows implementation.
+ // The Windows behavior is (where LSB = least significant byte):
+ // - If the LSB is 0x00 => 0xFFFF0000
+ // - If the LSB is < 0x80 => 0x000000<LSB>
+ // - If the LSB is >= 0x80 => 0x0000FF<LSB>
+ //
+ // Because of this, we emit a warning about the potential miscompilation
+ try self.addErrorDetails(.{
+ .err = .rc_would_miscompile_control_class_ordinal,
+ .type = .warning,
+ .token = class_node.getFirstToken(),
+ .token_span_end = class_node.getLastToken(),
+ });
+ try self.addErrorDetails(.{
+ .err = .rc_would_miscompile_control_class_ordinal,
+ .type = .note,
+ .print_source_line = false,
+ .token = class_node.getFirstToken(),
+ .token_span_end = class_node.getLastToken(),
+ });
+ // And then write out the ordinal using a proper a NameOrOrdinal encoding.
+ try ordinal.write(data_writer);
+ } else if (class_node.isStringLiteral()) {
+ const literal_node = @fieldParentPtr(Node.Literal, "base", class_node);
+ const parsed = try self.parseQuotedStringAsWideString(literal_node.token);
+ defer self.allocator.free(parsed);
+ if (rc.ControlClass.fromWideString(parsed)) |control_class| {
+ const ordinal = NameOrOrdinal{ .ordinal = @intFromEnum(control_class) };
+ try ordinal.write(data_writer);
+ } else {
+ // NUL acts as a terminator
+ // TODO: Maybe warn when parsed_terminated.len != parsed.len, since
+ // it seems unlikely that NUL-termination is something intentional
+ const parsed_terminated = std.mem.sliceTo(parsed, 0);
+ const name = NameOrOrdinal{ .name = parsed_terminated };
+ try name.write(data_writer);
+ }
+ } else {
+ const literal_node = @fieldParentPtr(Node.Literal, "base", class_node);
+ const literal_slice = literal_node.token.slice(self.source);
+ // This succeeding is guaranteed by the parser
+ const control_class = rc.ControlClass.map.get(literal_slice) orelse unreachable;
+ const ordinal = NameOrOrdinal{ .ordinal = @intFromEnum(control_class) };
+ try ordinal.write(data_writer);
+ }
+ }
+
+ if (control.text) |text_token| {
+ const bytes = SourceBytes{
+ .slice = text_token.slice(self.source),
+ .code_page = self.input_code_pages.getForToken(text_token),
+ };
+ if (text_token.isStringLiteral()) {
+ const text = try self.parseQuotedStringAsWideString(text_token);
+ defer self.allocator.free(text);
+ const name = NameOrOrdinal{ .name = text };
+ try name.write(data_writer);
+ } else {
+ std.debug.assert(text_token.id == .number);
+ const number = literals.parseNumberLiteral(bytes);
+ const ordinal = NameOrOrdinal{ .ordinal = number.asWord() };
+ try ordinal.write(data_writer);
+ }
+ } else {
+ try NameOrOrdinal.writeEmpty(data_writer);
+ }
+
+ var extra_data_buf = std.ArrayList(u8).init(self.allocator);
+ defer extra_data_buf.deinit();
+ // The extra data byte length must be able to fit within a u16.
+ var limited_extra_data_writer = limitedWriter(extra_data_buf.writer(), std.math.maxInt(u16));
+ const extra_data_writer = limited_extra_data_writer.writer();
+ for (control.extra_data) |data_expression| {
+ const data = try self.evaluateDataExpression(data_expression);
+ defer data.deinit(self.allocator);
+ data.write(extra_data_writer) catch |err| switch (err) {
+ error.NoSpaceLeft => {
+ try self.addErrorDetails(.{
+ .err = .control_extra_data_size_exceeds_max,
+ .token = control.type,
+ });
+ return self.addErrorDetailsAndFail(.{
+ .err = .control_extra_data_size_exceeds_max,
+ .type = .note,
+ .token = data_expression.getFirstToken(),
+ .token_span_end = data_expression.getLastToken(),
+ });
+ },
+ else => |e| return e,
+ };
+ }
+ // We know the extra_data_buf size fits within a u16.
+ const extra_data_size: u16 = @intCast(extra_data_buf.items.len);
+ try data_writer.writeInt(u16, extra_data_size, .little);
+ try data_writer.writeAll(extra_data_buf.items);
+ }
+
+ pub fn writeToolbar(self: *Compiler, node: *Node.Toolbar, writer: anytype) !void {
+ var data_buffer = std.ArrayList(u8).init(self.allocator);
+ defer data_buffer.deinit();
+ const data_writer = data_buffer.writer();
+
+ const button_width = evaluateNumberExpression(node.button_width, self.source, self.input_code_pages);
+ const button_height = evaluateNumberExpression(node.button_height, self.source, self.input_code_pages);
+
+ // I'm assuming this is some sort of version
+ // TODO: Try to find something mentioning this
+ try data_writer.writeInt(u16, 1, .little);
+ try data_writer.writeInt(u16, button_width.asWord(), .little);
+ try data_writer.writeInt(u16, button_height.asWord(), .little);
+ // Number of buttons is guaranteed by the parser to be within maxInt(u16).
+ try data_writer.writeInt(u16, @as(u16, @intCast(node.buttons.len)), .little);
+
+ for (node.buttons) |button_or_sep| {
+ switch (button_or_sep.id) {
+ .literal => { // This is always SEPARATOR
+ std.debug.assert(button_or_sep.cast(.literal).?.token.id == .literal);
+ try data_writer.writeInt(u16, 0, .little);
+ },
+ .simple_statement => {
+ const value_node = button_or_sep.cast(.simple_statement).?.value;
+ const value = evaluateNumberExpression(value_node, self.source, self.input_code_pages);
+ try data_writer.writeInt(u16, value.asWord(), .little);
+ },
+ else => unreachable, // This is a bug in the parser
+ }
+ }
+
+ const data_size: u32 = @intCast(data_buffer.items.len);
+ var header = try self.resourceHeader(node.id, node.type, .{
+ .data_size = data_size,
+ });
+ defer header.deinit(self.allocator);
+
+ header.applyMemoryFlags(node.common_resource_attributes, self.source);
+
+ try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
+
+ var data_fbs = std.io.fixedBufferStream(data_buffer.items);
+ try writeResourceData(writer, data_fbs.reader(), data_size);
+ }
+
+ /// Weight and italic carry over from previous FONT statements within a single resource,
+ /// so they need to be parsed ahead-of-time and stored
+ const FontStatementValues = struct {
+ weight: u16 = 0,
+ italic: bool = false,
+ node: *Node.FontStatement,
+ };
+
+ pub fn writeDialogFont(self: *Compiler, resource: Resource, values: FontStatementValues, writer: anytype) !void {
+ const node = values.node;
+ const point_size = evaluateNumberExpression(node.point_size, self.source, self.input_code_pages);
+ try writer.writeInt(u16, point_size.asWord(), .little);
+
+ if (resource == .dialogex) {
+ try writer.writeInt(u16, values.weight, .little);
+ }
+
+ if (resource == .dialogex) {
+ try writer.writeInt(u8, @intFromBool(values.italic), .little);
+ }
+
+ if (node.char_set) |char_set| {
+ const value = evaluateNumberExpression(char_set, self.source, self.input_code_pages);
+ try writer.writeInt(u8, @as(u8, @truncate(value.value)), .little);
+ } else if (resource == .dialogex) {
+ try writer.writeInt(u8, 1, .little); // DEFAULT_CHARSET
+ }
+
+ const typeface = try self.parseQuotedStringAsWideString(node.typeface);
+ defer self.allocator.free(typeface);
+ try writer.writeAll(std.mem.sliceAsBytes(typeface[0 .. typeface.len + 1]));
+ }
+
+ pub fn writeMenu(self: *Compiler, node: *Node.Menu, writer: anytype) !void {
+ var data_buffer = std.ArrayList(u8).init(self.allocator);
+ defer data_buffer.deinit();
+ // The header's data length field is a u32 so limit the resource's data size so that
+ // we know we can always specify the real size.
+ var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32));
+ const data_writer = limited_writer.writer();
+
+ const type_bytes = SourceBytes{
+ .slice = node.type.slice(self.source),
+ .code_page = self.input_code_pages.getForToken(node.type),
+ };
+ const resource = Resource.fromString(type_bytes);
+ std.debug.assert(resource == .menu or resource == .menuex);
+
+ self.writeMenuData(node, data_writer, resource) catch |err| switch (err) {
+ error.NoSpaceLeft => {
+ return self.addErrorDetailsAndFail(.{
+ .err = .resource_data_size_exceeds_max,
+ .token = node.id,
+ });
+ },
+ else => |e| return e,
+ };
+
+ // This intCast can't fail because the limitedWriter above guarantees that
+ // we will never write more than maxInt(u32) bytes.
+ const data_size: u32 = @intCast(data_buffer.items.len);
+ var header = try self.resourceHeader(node.id, node.type, .{
+ .data_size = data_size,
+ });
+ defer header.deinit(self.allocator);
+
+ header.applyMemoryFlags(node.common_resource_attributes, self.source);
+ header.applyOptionalStatements(node.optional_statements, self.source, self.input_code_pages);
+
+ try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
+
+ var data_fbs = std.io.fixedBufferStream(data_buffer.items);
+ try writeResourceData(writer, data_fbs.reader(), data_size);
+ }
+
+ /// Expects `data_writer` to be a LimitedWriter limited to u32, meaning all writes to
+ /// the writer within this function could return error.NoSpaceLeft
+ pub fn writeMenuData(self: *Compiler, node: *Node.Menu, data_writer: anytype, resource: Resource) !void {
+ // menu header
+ const version: u16 = if (resource == .menu) 0 else 1;
+ try data_writer.writeInt(u16, version, .little);
+ const header_size: u16 = if (resource == .menu) 0 else 4;
+ try data_writer.writeInt(u16, header_size, .little); // cbHeaderSize
+ // Note: There can be extra bytes at the end of this header (`rgbExtra`),
+ // but they are always zero-length for us, so we don't write anything
+ // (the length of the rgbExtra field is inferred from the header_size).
+ // MENU => rgbExtra: [cbHeaderSize]u8
+ // MENUEX => rgbExtra: [cbHeaderSize-4]u8
+
+ if (resource == .menuex) {
+ if (node.help_id) |help_id_node| {
+ const help_id = evaluateNumberExpression(help_id_node, self.source, self.input_code_pages);
+ try data_writer.writeInt(u32, help_id.value, .little);
+ } else {
+ try data_writer.writeInt(u32, 0, .little);
+ }
+ }
+
+ for (node.items, 0..) |item, i| {
+ const is_last = i == node.items.len - 1;
+ try self.writeMenuItem(item, data_writer, is_last);
+ }
+ }
+
+ pub fn writeMenuItem(self: *Compiler, node: *Node, writer: anytype, is_last_of_parent: bool) !void {
+ switch (node.id) {
+ .menu_item_separator => {
+ // This is the 'alternate compability form' of the separator, see
+ // https://devblogs.microsoft.com/oldnewthing/20080710-00/?p=21673
+ //
+ // The 'correct' way is to set the MF_SEPARATOR flag, but the Win32 RC
+ // compiler still uses this alternate form, so that's what we use too.
+ var flags = res.MenuItemFlags{};
+ if (is_last_of_parent) flags.markLast();
+ try writer.writeInt(u16, flags.value, .little);
+ try writer.writeInt(u16, 0, .little); // id
+ try writer.writeInt(u16, 0, .little); // null-terminated UTF-16 text
+ },
+ .menu_item => {
+ const menu_item = @fieldParentPtr(Node.MenuItem, "base", node);
+ var flags = res.MenuItemFlags{};
+ for (menu_item.option_list) |option_token| {
+ // This failing would be a bug in the parser
+ const option = rc.MenuItem.Option.map.get(option_token.slice(self.source)) orelse unreachable;
+ flags.apply(option);
+ }
+ if (is_last_of_parent) flags.markLast();
+ try writer.writeInt(u16, flags.value, .little);
+
+ var result = evaluateNumberExpression(menu_item.result, self.source, self.input_code_pages);
+ try writer.writeInt(u16, result.asWord(), .little);
+
+ var text = try self.parseQuotedStringAsWideString(menu_item.text);
+ defer self.allocator.free(text);
+ try writer.writeAll(std.mem.sliceAsBytes(text[0 .. text.len + 1]));
+ },
+ .popup => {
+ const popup = @fieldParentPtr(Node.Popup, "base", node);
+ var flags = res.MenuItemFlags{ .value = res.MF.POPUP };
+ for (popup.option_list) |option_token| {
+ // This failing would be a bug in the parser
+ const option = rc.MenuItem.Option.map.get(option_token.slice(self.source)) orelse unreachable;
+ flags.apply(option);
+ }
+ if (is_last_of_parent) flags.markLast();
+ try writer.writeInt(u16, flags.value, .little);
+
+ var text = try self.parseQuotedStringAsWideString(popup.text);
+ defer self.allocator.free(text);
+ try writer.writeAll(std.mem.sliceAsBytes(text[0 .. text.len + 1]));
+
+ for (popup.items, 0..) |item, i| {
+ const is_last = i == popup.items.len - 1;
+ try self.writeMenuItem(item, writer, is_last);
+ }
+ },
+ inline .menu_item_ex, .popup_ex => |node_type| {
+ const menu_item = @fieldParentPtr(node_type.Type(), "base", node);
+
+ if (menu_item.type) |flags| {
+ const value = evaluateNumberExpression(flags, self.source, self.input_code_pages);
+ try writer.writeInt(u32, value.value, .little);
+ } else {
+ try writer.writeInt(u32, 0, .little);
+ }
+
+ if (menu_item.state) |state| {
+ const value = evaluateNumberExpression(state, self.source, self.input_code_pages);
+ try writer.writeInt(u32, value.value, .little);
+ } else {
+ try writer.writeInt(u32, 0, .little);
+ }
+
+ if (menu_item.id) |id| {
+ const value = evaluateNumberExpression(id, self.source, self.input_code_pages);
+ try writer.writeInt(u32, value.value, .little);
+ } else {
+ try writer.writeInt(u32, 0, .little);
+ }
+
+ var flags: u16 = 0;
+ if (is_last_of_parent) flags |= comptime @as(u16, @intCast(res.MF.END));
+ // This constant doesn't seem to have a named #define, it's different than MF_POPUP
+ if (node_type == .popup_ex) flags |= 0x01;
+ try writer.writeInt(u16, flags, .little);
+
+ var text = try self.parseQuotedStringAsWideString(menu_item.text);
+ defer self.allocator.free(text);
+ try writer.writeAll(std.mem.sliceAsBytes(text[0 .. text.len + 1]));
+
+ // Only the combination of the flags u16 and the text bytes can cause
+ // non-DWORD alignment, so we can just use the byte length of those
+ // two values to realign to DWORD alignment.
+ const relevant_bytes = 2 + (text.len + 1) * 2;
+ try writeDataPadding(writer, @intCast(relevant_bytes));
+
+ if (node_type == .popup_ex) {
+ if (menu_item.help_id) |help_id_node| {
+ const help_id = evaluateNumberExpression(help_id_node, self.source, self.input_code_pages);
+ try writer.writeInt(u32, help_id.value, .little);
+ } else {
+ try writer.writeInt(u32, 0, .little);
+ }
+
+ for (menu_item.items, 0..) |item, i| {
+ const is_last = i == menu_item.items.len - 1;
+ try self.writeMenuItem(item, writer, is_last);
+ }
+ }
+ },
+ else => unreachable,
+ }
+ }
+
+ pub fn writeVersionInfo(self: *Compiler, node: *Node.VersionInfo, writer: anytype) !void {
+ var data_buffer = std.ArrayList(u8).init(self.allocator);
+ defer data_buffer.deinit();
+ // The node's length field (which is inclusive of the length of all of its children) is a u16
+ // so limit the node's data size so that we know we can always specify the real size.
+ var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u16));
+ const data_writer = limited_writer.writer();
+
+ try data_writer.writeInt(u16, 0, .little); // placeholder size
+ try data_writer.writeInt(u16, res.FixedFileInfo.byte_len, .little);
+ try data_writer.writeInt(u16, res.VersionNode.type_binary, .little);
+ const key_bytes = std.mem.sliceAsBytes(res.FixedFileInfo.key[0 .. res.FixedFileInfo.key.len + 1]);
+ try data_writer.writeAll(key_bytes);
+ // The number of bytes written up to this point is always the same, since the name
+ // of the node is a constant (FixedFileInfo.key). The total number of bytes
+ // written so far is 38, so we need 2 padding bytes to get back to DWORD alignment
+ try data_writer.writeInt(u16, 0, .little);
+
+ var fixed_file_info = res.FixedFileInfo{};
+ for (node.fixed_info) |fixed_info| {
+ switch (fixed_info.id) {
+ .version_statement => {
+ const version_statement = @fieldParentPtr(Node.VersionStatement, "base", fixed_info);
+ const version_type = rc.VersionInfo.map.get(version_statement.type.slice(self.source)).?;
+
+ // Ensure that all parts are cleared for each version, to properly account for
+ // potential duplicate PRODUCTVERSION/FILEVERSION statements
+ switch (version_type) {
+ .file_version => @memset(&fixed_file_info.file_version.parts, 0),
+ .product_version => @memset(&fixed_file_info.product_version.parts, 0),
+ else => unreachable,
+ }
+
+ for (version_statement.parts, 0..) |part, i| {
+ const part_value = evaluateNumberExpression(part, self.source, self.input_code_pages);
+ if (part_value.is_long) {
+ try self.addErrorDetails(.{
+ .err = .rc_would_error_u16_with_l_suffix,
+ .type = .warning,
+ .token = part.getFirstToken(),
+ .token_span_end = part.getLastToken(),
+ .extra = .{ .statement_with_u16_param = switch (version_type) {
+ .file_version => .fileversion,
+ .product_version => .productversion,
+ else => unreachable,
+ } },
+ });
+ try self.addErrorDetails(.{
+ .err = .rc_would_error_u16_with_l_suffix,
+ .print_source_line = false,
+ .type = .note,
+ .token = part.getFirstToken(),
+ .token_span_end = part.getLastToken(),
+ .extra = .{ .statement_with_u16_param = switch (version_type) {
+ .file_version => .fileversion,
+ .product_version => .productversion,
+ else => unreachable,
+ } },
+ });
+ }
+ switch (version_type) {
+ .file_version => {
+ fixed_file_info.file_version.parts[i] = part_value.asWord();
+ },
+ .product_version => {
+ fixed_file_info.product_version.parts[i] = part_value.asWord();
+ },
+ else => unreachable,
+ }
+ }
+ },
+ .simple_statement => {
+ const statement = @fieldParentPtr(Node.SimpleStatement, "base", fixed_info);
+ const statement_type = rc.VersionInfo.map.get(statement.identifier.slice(self.source)).?;
+ const value = evaluateNumberExpression(statement.value, self.source, self.input_code_pages);
+ switch (statement_type) {
+ .file_flags_mask => fixed_file_info.file_flags_mask = value.value,
+ .file_flags => fixed_file_info.file_flags = value.value,
+ .file_os => fixed_file_info.file_os = value.value,
+ .file_type => fixed_file_info.file_type = value.value,
+ .file_subtype => fixed_file_info.file_subtype = value.value,
+ else => unreachable,
+ }
+ },
+ else => unreachable,
+ }
+ }
+ try fixed_file_info.write(data_writer);
+
+ for (node.block_statements) |statement| {
+ self.writeVersionNode(statement, data_writer, &data_buffer) catch |err| switch (err) {
+ error.NoSpaceLeft => {
+ try self.addErrorDetails(.{
+ .err = .version_node_size_exceeds_max,
+ .token = node.id,
+ });
+ return self.addErrorDetailsAndFail(.{
+ .err = .version_node_size_exceeds_max,
+ .type = .note,
+ .token = statement.getFirstToken(),
+ .token_span_end = statement.getLastToken(),
+ });
+ },
+ else => |e| return e,
+ };
+ }
+
+ // We know that data_buffer.items.len is within the limits of a u16, since we
+ // limited the writer to maxInt(u16)
+ const data_size: u16 = @intCast(data_buffer.items.len);
+ // And now that we know the full size of this node (including its children), set its size
+ std.mem.writeInt(u16, data_buffer.items[0..2], data_size, .little);
+
+ var header = try self.resourceHeader(node.id, node.versioninfo, .{
+ .data_size = data_size,
+ });
+ defer header.deinit(self.allocator);
+
+ header.applyMemoryFlags(node.common_resource_attributes, self.source);
+
+ try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
+
+ var data_fbs = std.io.fixedBufferStream(data_buffer.items);
+ try writeResourceData(writer, data_fbs.reader(), data_size);
+ }
+
+ /// Expects writer to be a LimitedWriter limited to u16, meaning all writes to
+ /// the writer within this function could return error.NoSpaceLeft, and that buf.items.len
+ /// will never be able to exceed maxInt(u16).
+ pub fn writeVersionNode(self: *Compiler, node: *Node, writer: anytype, buf: *std.ArrayList(u8)) !void {
+ // We can assume that buf.items.len will never be able to exceed the limits of a u16
+ try writeDataPadding(writer, @as(u16, @intCast(buf.items.len)));
+
+ const node_and_children_size_offset = buf.items.len;
+ try writer.writeInt(u16, 0, .little); // placeholder for size
+ const data_size_offset = buf.items.len;
+ try writer.writeInt(u16, 0, .little); // placeholder for data size
+ const data_type_offset = buf.items.len;
+ // Data type is string unless the node contains values that are numbers.
+ try writer.writeInt(u16, res.VersionNode.type_string, .little);
+
+ switch (node.id) {
+ inline .block, .block_value => |node_type| {
+ const block_or_value = @fieldParentPtr(node_type.Type(), "base", node);
+ const parsed_key = try self.parseQuotedStringAsWideString(block_or_value.key);
+ defer self.allocator.free(parsed_key);
+
+ const parsed_key_to_first_null = std.mem.sliceTo(parsed_key, 0);
+ try writer.writeAll(std.mem.sliceAsBytes(parsed_key_to_first_null[0 .. parsed_key_to_first_null.len + 1]));
+
+ var has_number_value: bool = false;
+ for (block_or_value.values) |value_value_node_uncasted| {
+ const value_value_node = value_value_node_uncasted.cast(.block_value_value).?;
+ if (value_value_node.expression.isNumberExpression()) {
+ has_number_value = true;
+ break;
+ }
+ }
+ // The units used here are dependent on the type. If there are any numbers, then
+ // this is a byte count. If there are only strings, then this is a count of
+ // UTF-16 code units.
+ //
+ // The Win32 RC compiler miscompiles this count in the case of values that
+ // have a mix of numbers and strings. This is detected and a warning is emitted
+ // during parsing, so we can just do the correct thing here.
+ var values_size: usize = 0;
+
+ try writeDataPadding(writer, @intCast(buf.items.len));
+
+ for (block_or_value.values, 0..) |value_value_node_uncasted, i| {
+ const value_value_node = value_value_node_uncasted.cast(.block_value_value).?;
+ const value_node = value_value_node.expression;
+ if (value_node.isNumberExpression()) {
+ const number = evaluateNumberExpression(value_node, self.source, self.input_code_pages);
+ // This is used to write u16 or u32 depending on the number's suffix
+ const data_wrapper = Data{ .number = number };
+ try data_wrapper.write(writer);
+ // Numbers use byte count
+ values_size += if (number.is_long) 4 else 2;
+ } else {
+ std.debug.assert(value_node.isStringLiteral());
+ const literal_node = value_node.cast(.literal).?;
+ const parsed_value = try self.parseQuotedStringAsWideString(literal_node.token);
+ defer self.allocator.free(parsed_value);
+
+ const parsed_to_first_null = std.mem.sliceTo(parsed_value, 0);
+ try writer.writeAll(std.mem.sliceAsBytes(parsed_to_first_null));
+ // Strings use UTF-16 code-unit count including the null-terminator, but
+ // only if there are no number values in the list.
+ var value_size = parsed_to_first_null.len;
+ if (has_number_value) value_size *= 2; // 2 bytes per UTF-16 code unit
+ values_size += value_size;
+ // The null-terminator is only included if there's a trailing comma
+ // or this is the last value. If the value evaluates to empty, then
+ // it never gets a null terminator. If there was an explicit null-terminator
+ // in the string, we still need to potentially add one since we already
+ // sliced to the terminator.
+ const is_last = i == block_or_value.values.len - 1;
+ const is_empty = parsed_to_first_null.len == 0;
+ const is_only = block_or_value.values.len == 1;
+ if ((!is_empty or !is_only) and (is_last or value_value_node.trailing_comma)) {
+ try writer.writeInt(u16, 0, .little);
+ values_size += if (has_number_value) 2 else 1;
+ }
+ }
+ }
+ var data_size_slice = buf.items[data_size_offset..];
+ std.mem.writeInt(u16, data_size_slice[0..@sizeOf(u16)], @as(u16, @intCast(values_size)), .little);
+
+ if (has_number_value) {
+ const data_type_slice = buf.items[data_type_offset..];
+ std.mem.writeInt(u16, data_type_slice[0..@sizeOf(u16)], res.VersionNode.type_binary, .little);
+ }
+
+ if (node_type == .block) {
+ const block = block_or_value;
+ for (block.children) |child| {
+ try self.writeVersionNode(child, writer, buf);
+ }
+ }
+ },
+ else => unreachable,
+ }
+
+ const node_and_children_size = buf.items.len - node_and_children_size_offset;
+ const node_and_children_size_slice = buf.items[node_and_children_size_offset..];
+ std.mem.writeInt(u16, node_and_children_size_slice[0..@sizeOf(u16)], @as(u16, @intCast(node_and_children_size)), .little);
+ }
+
+ pub fn writeStringTable(self: *Compiler, node: *Node.StringTable) !void {
+ const language = getLanguageFromOptionalStatements(node.optional_statements, self.source, self.input_code_pages) orelse self.state.language;
+
+ for (node.strings) |string_node| {
+ const string = @fieldParentPtr(Node.StringTableString, "base", string_node);
+ const string_id_data = try self.evaluateDataExpression(string.id);
+ const string_id = string_id_data.number.asWord();
+
+ self.state.string_tables.set(
+ self.arena,
+ language,
+ string_id,
+ string.string,
+ &node.base,
+ self.source,
+ self.input_code_pages,
+ self.state.version,
+ self.state.characteristics,
+ ) catch |err| switch (err) {
+ error.StringAlreadyDefined => {
+ // It might be nice to have these errors point to the ids rather than the
+ // string tokens, but that would mean storing the id token of each string
+ // which doesn't seem worth it just for slightly better error messages.
+ try self.addErrorDetails(ErrorDetails{
+ .err = .string_already_defined,
+ .token = string.string,
+ .extra = .{ .string_and_language = .{ .id = string_id, .language = language } },
+ });
+ const existing_def_table = self.state.string_tables.tables.getPtr(language).?;
+ const existing_definition = existing_def_table.get(string_id).?;
+ return self.addErrorDetailsAndFail(ErrorDetails{
+ .err = .string_already_defined,
+ .type = .note,
+ .token = existing_definition,
+ .extra = .{ .string_and_language = .{ .id = string_id, .language = language } },
+ });
+ },
+ error.OutOfMemory => |e| return e,
+ };
+ }
+ }
+
+ /// Expects this to be a top-level LANGUAGE statement
+ pub fn writeLanguageStatement(self: *Compiler, node: *Node.LanguageStatement) void {
+ const primary = Compiler.evaluateNumberExpression(node.primary_language_id, self.source, self.input_code_pages);
+ const sublanguage = Compiler.evaluateNumberExpression(node.sublanguage_id, self.source, self.input_code_pages);
+ self.state.language.primary_language_id = @truncate(primary.value);
+ self.state.language.sublanguage_id = @truncate(sublanguage.value);
+ }
+
+ /// Expects this to be a top-level VERSION or CHARACTERISTICS statement
+ pub fn writeTopLevelSimpleStatement(self: *Compiler, node: *Node.SimpleStatement) void {
+ const value = Compiler.evaluateNumberExpression(node.value, self.source, self.input_code_pages);
+ const statement_type = rc.TopLevelKeywords.map.get(node.identifier.slice(self.source)).?;
+ switch (statement_type) {
+ .characteristics => self.state.characteristics = value.value,
+ .version => self.state.version = value.value,
+ else => unreachable,
+ }
+ }
+
+ pub const ResourceHeaderOptions = struct {
+ language: ?res.Language = null,
+ data_size: DWORD = 0,
+ };
+
+ pub fn resourceHeader(self: *Compiler, id_token: Token, type_token: Token, options: ResourceHeaderOptions) !ResourceHeader {
+ const id_bytes = self.sourceBytesForToken(id_token);
+ const type_bytes = self.sourceBytesForToken(type_token);
+ return ResourceHeader.init(
+ self.allocator,
+ id_bytes,
+ type_bytes,
+ options.data_size,
+ options.language orelse self.state.language,
+ self.state.version,
+ self.state.characteristics,
+ ) catch |err| switch (err) {
+ error.OutOfMemory => |e| return e,
+ error.TypeNonAsciiOrdinal => {
+ const win32_rc_ordinal = NameOrOrdinal.maybeNonAsciiOrdinalFromString(type_bytes).?;
+ try self.addErrorDetails(.{
+ .err = .invalid_digit_character_in_ordinal,
+ .type = .err,
+ .token = type_token,
+ });
+ return self.addErrorDetailsAndFail(.{
+ .err = .win32_non_ascii_ordinal,
+ .type = .note,
+ .token = type_token,
+ .print_source_line = false,
+ .extra = .{ .number = win32_rc_ordinal.ordinal },
+ });
+ },
+ error.IdNonAsciiOrdinal => {
+ const win32_rc_ordinal = NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes).?;
+ try self.addErrorDetails(.{
+ .err = .invalid_digit_character_in_ordinal,
+ .type = .err,
+ .token = id_token,
+ });
+ return self.addErrorDetailsAndFail(.{
+ .err = .win32_non_ascii_ordinal,
+ .type = .note,
+ .token = id_token,
+ .print_source_line = false,
+ .extra = .{ .number = win32_rc_ordinal.ordinal },
+ });
+ },
+ };
+ }
+
+ pub const ResourceHeader = struct {
+ name_value: NameOrOrdinal,
+ type_value: NameOrOrdinal,
+ language: res.Language,
+ memory_flags: MemoryFlags,
+ data_size: DWORD,
+ version: DWORD,
+ characteristics: DWORD,
+ data_version: DWORD = 0,
+
+ pub const InitError = error{ OutOfMemory, IdNonAsciiOrdinal, TypeNonAsciiOrdinal };
+
+ pub fn init(allocator: Allocator, id_bytes: SourceBytes, type_bytes: SourceBytes, data_size: DWORD, language: res.Language, version: DWORD, characteristics: DWORD) InitError!ResourceHeader {
+ const type_value = type: {
+ const resource_type = Resource.fromString(type_bytes);
+ if (res.RT.fromResource(resource_type)) |rt_constant| {
+ break :type NameOrOrdinal{ .ordinal = @intFromEnum(rt_constant) };
+ } else {
+ break :type try NameOrOrdinal.fromString(allocator, type_bytes);
+ }
+ };
+ errdefer type_value.deinit(allocator);
+ if (type_value == .name) {
+ if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(type_bytes)) |_| {
+ return error.TypeNonAsciiOrdinal;
+ }
+ }
+
+ const name_value = try NameOrOrdinal.fromString(allocator, id_bytes);
+ errdefer name_value.deinit(allocator);
+ if (name_value == .name) {
+ if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes)) |_| {
+ return error.IdNonAsciiOrdinal;
+ }
+ }
+
+ const predefined_resource_type = type_value.predefinedResourceType();
+
+ return ResourceHeader{
+ .name_value = name_value,
+ .type_value = type_value,
+ .data_size = data_size,
+ .memory_flags = MemoryFlags.defaults(predefined_resource_type),
+ .language = language,
+ .version = version,
+ .characteristics = characteristics,
+ };
+ }
+
+ pub fn deinit(self: ResourceHeader, allocator: Allocator) void {
+ self.name_value.deinit(allocator);
+ self.type_value.deinit(allocator);
+ }
+
+ pub const SizeInfo = struct {
+ bytes: u32,
+ padding_after_name: u2,
+ };
+
+ fn calcSize(self: ResourceHeader) error{Overflow}!SizeInfo {
+ var header_size: u32 = 8;
+ header_size = try std.math.add(
+ u32,
+ header_size,
+ std.math.cast(u32, self.name_value.byteLen()) orelse return error.Overflow,
+ );
+ header_size = try std.math.add(
+ u32,
+ header_size,
+ std.math.cast(u32, self.type_value.byteLen()) orelse return error.Overflow,
+ );
+ const padding_after_name = numPaddingBytesNeeded(header_size);
+ header_size = try std.math.add(u32, header_size, padding_after_name);
+ header_size = try std.math.add(u32, header_size, 16);
+ return .{ .bytes = header_size, .padding_after_name = padding_after_name };
+ }
+
+ pub fn writeAssertNoOverflow(self: ResourceHeader, writer: anytype) !void {
+ return self.writeSizeInfo(writer, self.calcSize() catch unreachable);
+ }
+
+ pub fn write(self: ResourceHeader, writer: anytype, err_ctx: errors.DiagnosticsContext) !void {
+ const size_info = self.calcSize() catch {
+ try err_ctx.diagnostics.append(.{
+ .err = .resource_data_size_exceeds_max,
+ .token = err_ctx.token,
+ });
+ return error.CompileError;
+ };
+ return self.writeSizeInfo(writer, size_info);
+ }
+
+ fn writeSizeInfo(self: ResourceHeader, writer: anytype, size_info: SizeInfo) !void {
+ try writer.writeInt(DWORD, self.data_size, .little); // DataSize
+ try writer.writeInt(DWORD, size_info.bytes, .little); // HeaderSize
+ try self.type_value.write(writer); // TYPE
+ try self.name_value.write(writer); // NAME
+ try writer.writeByteNTimes(0, size_info.padding_after_name);
+
+ try writer.writeInt(DWORD, self.data_version, .little); // DataVersion
+ try writer.writeInt(WORD, self.memory_flags.value, .little); // MemoryFlags
+ try writer.writeInt(WORD, self.language.asInt(), .little); // LanguageId
+ try writer.writeInt(DWORD, self.version, .little); // Version
+ try writer.writeInt(DWORD, self.characteristics, .little); // Characteristics
+ }
+
+ pub fn predefinedResourceType(self: ResourceHeader) ?res.RT {
+ return self.type_value.predefinedResourceType();
+ }
+
+ pub fn applyMemoryFlags(self: *ResourceHeader, tokens: []Token, source: []const u8) void {
+ applyToMemoryFlags(&self.memory_flags, tokens, source);
+ }
+
+ pub fn applyOptionalStatements(self: *ResourceHeader, statements: []*Node, source: []const u8, code_page_lookup: *const CodePageLookup) void {
+ applyToOptionalStatements(&self.language, &self.version, &self.characteristics, statements, source, code_page_lookup);
+ }
+ };
+
+ fn applyToMemoryFlags(flags: *MemoryFlags, tokens: []Token, source: []const u8) void {
+ for (tokens) |token| {
+ const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?;
+ flags.set(attribute);
+ }
+ }
+
+ /// RT_GROUP_ICON and RT_GROUP_CURSOR have their own special rules for memory flags
+ fn applyToGroupMemoryFlags(flags: *MemoryFlags, tokens: []Token, source: []const u8) void {
+ // There's probably a cleaner implementation of this, but this will result in the same
+ // flags as the Win32 RC compiler for all 986,410 K-permutations of memory flags
+ // for an ICON resource.
+ //
+ // This was arrived at by iterating over the permutations and creating a
+ // list where each line looks something like this:
+ // MOVEABLE PRELOAD -> 0x1050 (MOVEABLE|PRELOAD|DISCARDABLE)
+ //
+ // and then noticing a few things:
+
+ // 1. Any permutation that does not have PRELOAD in it just uses the
+ // default flags.
+ const initial_flags = flags.*;
+ var flags_set = std.enums.EnumSet(rc.CommonResourceAttributes).initEmpty();
+ for (tokens) |token| {
+ const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?;
+ flags_set.insert(attribute);
+ }
+ if (!flags_set.contains(.preload)) return;
+
+ // 2. Any permutation of flags where applying only the PRELOAD and LOADONCALL flags
+ // results in no actual change by the end will just use the default flags.
+ // For example, `PRELOAD LOADONCALL` will result in default flags, but
+ // `LOADONCALL PRELOAD` will have PRELOAD set after they are both applied in order.
+ for (tokens) |token| {
+ const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?;
+ switch (attribute) {
+ .preload, .loadoncall => flags.set(attribute),
+ else => {},
+ }
+ }
+ if (flags.value == initial_flags.value) return;
+
+ // 3. If none of DISCARDABLE, SHARED, or PURE is specified, then PRELOAD
+ // implies `flags &= ~SHARED` and LOADONCALL implies `flags |= SHARED`
+ const shared_set = comptime blk: {
+ var set = std.enums.EnumSet(rc.CommonResourceAttributes).initEmpty();
+ set.insert(.discardable);
+ set.insert(.shared);
+ set.insert(.pure);
+ break :blk set;
+ };
+ const discardable_shared_or_pure_specified = flags_set.intersectWith(shared_set).count() != 0;
+ for (tokens) |token| {
+ const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?;
+ flags.setGroup(attribute, !discardable_shared_or_pure_specified);
+ }
+ }
+
+ /// Only handles the 'base' optional statements that are shared between resource types.
+ fn applyToOptionalStatements(language: *res.Language, version: *u32, characteristics: *u32, statements: []*Node, source: []const u8, code_page_lookup: *const CodePageLookup) void {
+ for (statements) |node| switch (node.id) {
+ .language_statement => {
+ const language_statement = @fieldParentPtr(Node.LanguageStatement, "base", node);
+ language.* = languageFromLanguageStatement(language_statement, source, code_page_lookup);
+ },
+ .simple_statement => {
+ const simple_statement = @fieldParentPtr(Node.SimpleStatement, "base", node);
+ const statement_type = rc.OptionalStatements.map.get(simple_statement.identifier.slice(source)) orelse continue;
+ const result = Compiler.evaluateNumberExpression(simple_statement.value, source, code_page_lookup);
+ switch (statement_type) {
+ .version => version.* = result.value,
+ .characteristics => characteristics.* = result.value,
+ else => unreachable, // only VERSION and CHARACTERISTICS should be in an optional statements list
+ }
+ },
+ else => {},
+ };
+ }
+
+ pub fn languageFromLanguageStatement(language_statement: *const Node.LanguageStatement, source: []const u8, code_page_lookup: *const CodePageLookup) res.Language {
+ const primary = Compiler.evaluateNumberExpression(language_statement.primary_language_id, source, code_page_lookup);
+ const sublanguage = Compiler.evaluateNumberExpression(language_statement.sublanguage_id, source, code_page_lookup);
+ return .{
+ .primary_language_id = @truncate(primary.value),
+ .sublanguage_id = @truncate(sublanguage.value),
+ };
+ }
+
+ pub fn getLanguageFromOptionalStatements(statements: []*Node, source: []const u8, code_page_lookup: *const CodePageLookup) ?res.Language {
+ for (statements) |node| switch (node.id) {
+ .language_statement => {
+ const language_statement = @fieldParentPtr(Node.LanguageStatement, "base", node);
+ return languageFromLanguageStatement(language_statement, source, code_page_lookup);
+ },
+ else => continue,
+ };
+ return null;
+ }
+
+ pub fn writeEmptyResource(writer: anytype) !void {
+ const header = ResourceHeader{
+ .name_value = .{ .ordinal = 0 },
+ .type_value = .{ .ordinal = 0 },
+ .language = .{
+ .primary_language_id = 0,
+ .sublanguage_id = 0,
+ },
+ .memory_flags = .{ .value = 0 },
+ .data_size = 0,
+ .version = 0,
+ .characteristics = 0,
+ };
+ try header.writeAssertNoOverflow(writer);
+ }
+
+ pub fn sourceBytesForToken(self: *Compiler, token: Token) SourceBytes {
+ return .{
+ .slice = token.slice(self.source),
+ .code_page = self.input_code_pages.getForToken(token),
+ };
+ }
+
+ /// Helper that calls parseQuotedStringAsWideString with the relevant context
+ /// Resulting slice is allocated by `self.allocator`.
+ pub fn parseQuotedStringAsWideString(self: *Compiler, token: Token) ![:0]u16 {
+ return literals.parseQuotedStringAsWideString(
+ self.allocator,
+ self.sourceBytesForToken(token),
+ .{
+ .start_column = token.calculateColumn(self.source, 8, null),
+ .diagnostics = .{ .diagnostics = self.diagnostics, .token = token },
+ },
+ );
+ }
+
+ fn addErrorDetails(self: *Compiler, details: ErrorDetails) Allocator.Error!void {
+ try self.diagnostics.append(details);
+ }
+
+ fn addErrorDetailsAndFail(self: *Compiler, details: ErrorDetails) error{ CompileError, OutOfMemory } {
+ try self.addErrorDetails(details);
+ return error.CompileError;
+ }
+};
+
+pub const OpenSearchPathError = std.fs.Dir.OpenError;
+
+fn openSearchPathDir(dir: std.fs.Dir, path: []const u8) OpenSearchPathError!std.fs.Dir {
+ // Validate the search path to avoid possible unreachable on invalid paths,
+ // see https://github.com/ziglang/zig/issues/15607 for why this is currently necessary.
+ try validateSearchPath(path);
+ return dir.openDir(path, .{});
+}
+
+/// Very crude attempt at validating a path. This is imperfect
+/// and AFAIK it is effectively impossible to implement perfect path
+/// validation, since it ultimately depends on the underlying filesystem.
+/// Note that this function won't be necessary if/when
+/// https://github.com/ziglang/zig/issues/15607
+/// is accepted/implemented.
+fn validateSearchPath(path: []const u8) error{BadPathName}!void {
+ switch (builtin.os.tag) {
+ .windows => {
+ // This will return error.BadPathName on non-Win32 namespaced paths
+ // (e.g. the NT \??\ prefix, the device \\.\ prefix, etc).
+ // Those path types are something of an unavoidable way to
+ // still hit unreachable during the openDir call.
+ var component_iterator = try std.fs.path.componentIterator(path);
+ while (component_iterator.next()) |component| {
+ // https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file
+ if (std.mem.indexOfAny(u8, component.name, "\x00<>:\"|?*") != null) return error.BadPathName;
+ }
+ },
+ else => {
+ if (std.mem.indexOfScalar(u8, path, 0) != null) return error.BadPathName;
+ },
+ }
+}
+
+pub const SearchDir = struct {
+ dir: std.fs.Dir,
+ path: ?[]const u8,
+
+ pub fn deinit(self: *SearchDir, allocator: Allocator) void {
+ self.dir.close();
+ if (self.path) |path| {
+ allocator.free(path);
+ }
+ }
+};
+
+/// Slurps the first `size` bytes read into `slurped_header`
+pub fn HeaderSlurpingReader(comptime size: usize, comptime ReaderType: anytype) type {
+ return struct {
+ child_reader: ReaderType,
+ bytes_read: usize = 0,
+ slurped_header: [size]u8 = [_]u8{0x00} ** size,
+
+ pub const Error = ReaderType.Error;
+ pub const Reader = std.io.Reader(*@This(), Error, read);
+
+ pub fn read(self: *@This(), buf: []u8) Error!usize {
+ const amt = try self.child_reader.read(buf);
+ if (self.bytes_read < size) {
+ const bytes_to_add = @min(amt, size - self.bytes_read);
+ const end_index = self.bytes_read + bytes_to_add;
+ @memcpy(self.slurped_header[self.bytes_read..end_index], buf[0..bytes_to_add]);
+ }
+ self.bytes_read +|= amt;
+ return amt;
+ }
+
+ pub fn reader(self: *@This()) Reader {
+ return .{ .context = self };
+ }
+ };
+}
+
+pub fn headerSlurpingReader(comptime size: usize, reader: anytype) HeaderSlurpingReader(size, @TypeOf(reader)) {
+ return .{ .child_reader = reader };
+}
+
+/// Sort of like std.io.LimitedReader, but a Writer.
+/// Returns an error if writing the requested number of bytes
+/// would ever exceed bytes_left, i.e. it does not always
+/// write up to the limit and instead will error if the
+/// limit would be breached if the entire slice was written.
+pub fn LimitedWriter(comptime WriterType: type) type {
+ return struct {
+ inner_writer: WriterType,
+ bytes_left: u64,
+
+ pub const Error = error{NoSpaceLeft} || WriterType.Error;
+ pub const Writer = std.io.Writer(*Self, Error, write);
+
+ const Self = @This();
+
+ pub fn write(self: *Self, bytes: []const u8) Error!usize {
+ if (bytes.len > self.bytes_left) return error.NoSpaceLeft;
+ const amt = try self.inner_writer.write(bytes);
+ self.bytes_left -= amt;
+ return amt;
+ }
+
+ pub fn writer(self: *Self) Writer {
+ return .{ .context = self };
+ }
+ };
+}
+
+/// Returns an initialised `LimitedWriter`
+/// `bytes_left` is a `u64` to be able to take 64 bit file offsets
+pub fn limitedWriter(inner_writer: anytype, bytes_left: u64) LimitedWriter(@TypeOf(inner_writer)) {
+ return .{ .inner_writer = inner_writer, .bytes_left = bytes_left };
+}
+
+test "limitedWriter basic usage" {
+ var buf: [4]u8 = undefined;
+ var fbs = std.io.fixedBufferStream(&buf);
+ var limited_stream = limitedWriter(fbs.writer(), 4);
+ var writer = limited_stream.writer();
+
+ try std.testing.expectEqual(@as(usize, 3), try writer.write("123"));
+ try std.testing.expectEqualSlices(u8, "123", buf[0..3]);
+ try std.testing.expectError(error.NoSpaceLeft, writer.write("45"));
+ try std.testing.expectEqual(@as(usize, 1), try writer.write("4"));
+ try std.testing.expectEqualSlices(u8, "1234", buf[0..4]);
+ try std.testing.expectError(error.NoSpaceLeft, writer.write("5"));
+}
+
+pub const FontDir = struct {
+ fonts: std.ArrayListUnmanaged(Font) = .{},
+ /// To keep track of which ids are set and where they were set from
+ ids: std.AutoHashMapUnmanaged(u16, Token) = .{},
+
+ pub const Font = struct {
+ id: u16,
+ header_bytes: [148]u8,
+ };
+
+ pub fn deinit(self: *FontDir, allocator: Allocator) void {
+ self.fonts.deinit(allocator);
+ }
+
+ pub fn add(self: *FontDir, allocator: Allocator, font: Font, id_token: Token) !void {
+ try self.ids.putNoClobber(allocator, font.id, id_token);
+ try self.fonts.append(allocator, font);
+ }
+
+ pub fn writeResData(self: *FontDir, compiler: *Compiler, writer: anytype) !void {
+ if (self.fonts.items.len == 0) return;
+
+ // We know the number of fonts is limited to maxInt(u16) because fonts
+ // must have a valid and unique u16 ordinal ID (trying to specify a FONT
+ // with e.g. id 65537 will wrap around to 1 and be ignored if there's already
+ // a font with that ID in the file).
+ const num_fonts: u16 = @intCast(self.fonts.items.len);
+
+ // u16 count + [(u16 id + 150 bytes) for each font]
+ // Note: This works out to a maximum data_size of 9,961,322.
+ const data_size: u32 = 2 + (2 + 150) * num_fonts;
+
+ var header = Compiler.ResourceHeader{
+ .name_value = try NameOrOrdinal.nameFromString(compiler.allocator, .{ .slice = "FONTDIR", .code_page = .windows1252 }),
+ .type_value = NameOrOrdinal{ .ordinal = @intFromEnum(res.RT.FONTDIR) },
+ .memory_flags = res.MemoryFlags.defaults(res.RT.FONTDIR),
+ .language = compiler.state.language,
+ .version = compiler.state.version,
+ .characteristics = compiler.state.characteristics,
+ .data_size = data_size,
+ };
+ defer header.deinit(compiler.allocator);
+
+ try header.writeAssertNoOverflow(writer);
+ try writer.writeInt(u16, num_fonts, .little);
+ for (self.fonts.items) |font| {
+ // The format of the FONTDIR is a strange beast.
+ // Technically, each FONT is seemingly meant to be written as a
+ // FONTDIRENTRY with two trailing NUL-terminated strings corresponding to
+ // the 'device name' and 'face name' of the .FNT file, but:
+ //
+ // 1. When dealing with .FNT files, the Win32 implementation
+ // gets the device name and face name from the wrong locations,
+ // so it's basically never going to write the real device/face name
+ // strings.
+ // 2. When dealing with files 76-140 bytes long, the Win32 implementation
+ // can just crash (if there are no NUL bytes in the file).
+ // 3. The 32-bit Win32 rc.exe uses a 148 byte size for the portion of
+ // the FONTDIRENTRY before the NUL-terminated strings, which
+ // does not match the documented FONTDIRENTRY size that (presumably)
+ // this format is meant to be using, so anything iterating the
+ // FONTDIR according to the available documentation will get bogus results.
+ // 4. The FONT resource can be used for non-.FNT types like TTF and OTF,
+ // in which case emulating the Win32 behavior of unconditionally
+ // interpreting the bytes as a .FNT and trying to grab device/face names
+ // from random bytes in the TTF/OTF file can lead to weird behavior
+ // and errors in the Win32 implementation (for example, the device/face
+ // name fields are offsets into the file where the NUL-terminated
+ // string is located, but the Win32 implementation actually treats
+ // them as signed so if they are negative then the Win32 implementation
+ // will error; this happening for TTF fonts would just be a bug
+ // since the TTF could otherwise be valid)
+ // 5. The FONTDIR resource doesn't actually seem to be used at all by
+ // anything that I've found, and instead in Windows 3.0 and newer
+ // it seems like the FONT resources are always just iterated/accessed
+ // directly without ever looking at the FONTDIR.
+ //
+ // All of these combined means that we:
+ // - Do not need or want to emulate Win32 behavior here
+ // - For maximum simplicity and compatibility, we just write the first
+ // 148 bytes of the file without any interpretation (padded with
+ // zeroes to get up to 148 bytes if necessary), and then
+ // unconditionally write two NUL bytes, meaning that we always
+ // write 'device name' and 'face name' as if they were 0-length
+ // strings.
+ //
+ // This gives us byte-for-byte .RES compatibility in the common case while
+ // allowing us to avoid any erroneous errors caused by trying to read
+ // the face/device name from a bogus location. Note that the Win32
+ // implementation never actually writes the real device/face name here
+ // anyway (except in the bizarre case that a .FNT file has the proper
+ // device/face name offsets within a reserved section of the .FNT file)
+ // so there's no feasible way that anything can actually think that the
+ // device name/face name in the FONTDIR is reliable.
+
+ // First, the ID is written, though
+ try writer.writeInt(u16, font.id, .little);
+ try writer.writeAll(&font.header_bytes);
+ try writer.writeByteNTimes(0, 2);
+ }
+ try Compiler.writeDataPadding(writer, data_size);
+ }
+};
+
+pub const StringTablesByLanguage = struct {
+ /// String tables for each language are written to the .res file in order depending on
+ /// when the first STRINGTABLE for the language was defined, and all blocks for a given
+ /// language are written contiguously.
+ /// Using an ArrayHashMap here gives us this property for free.
+ tables: std.AutoArrayHashMapUnmanaged(res.Language, StringTable) = .{},
+
+ pub fn deinit(self: *StringTablesByLanguage, allocator: Allocator) void {
+ self.tables.deinit(allocator);
+ }
+
+ pub fn set(
+ self: *StringTablesByLanguage,
+ allocator: Allocator,
+ language: res.Language,
+ id: u16,
+ string_token: Token,
+ node: *Node,
+ source: []const u8,
+ code_page_lookup: *const CodePageLookup,
+ version: u32,
+ characteristics: u32,
+ ) StringTable.SetError!void {
+ var get_or_put_result = try self.tables.getOrPut(allocator, language);
+ if (!get_or_put_result.found_existing) {
+ get_or_put_result.value_ptr.* = StringTable{};
+ }
+ return get_or_put_result.value_ptr.set(allocator, id, string_token, node, source, code_page_lookup, version, characteristics);
+ }
+};
+
+pub const StringTable = struct {
+ /// Blocks are written to the .res file in order depending on when the first string
+ /// was added to the block (i.e. `STRINGTABLE { 16 "b" 0 "a" }` would then get written
+ /// with block ID 2 (the one with "b") first and block ID 1 (the one with "a") second).
+ /// Using an ArrayHashMap here gives us this property for free.
+ blocks: std.AutoArrayHashMapUnmanaged(u16, Block) = .{},
+
+ pub const Block = struct {
+ strings: std.ArrayListUnmanaged(Token) = .{},
+ set_indexes: std.bit_set.IntegerBitSet(16) = .{ .mask = 0 },
+ memory_flags: MemoryFlags = MemoryFlags.defaults(res.RT.STRING),
+ characteristics: u32,
+ version: u32,
+
+ /// Returns the index to insert the string into the `strings` list.
+ /// Returns null if the string should be appended.
+ fn getInsertionIndex(self: *Block, index: u8) ?u8 {
+ std.debug.assert(!self.set_indexes.isSet(index));
+
+ const first_set = self.set_indexes.findFirstSet() orelse return null;
+ if (first_set > index) return 0;
+
+ const last_set = 15 - @clz(self.set_indexes.mask);
+ if (index > last_set) return null;
+
+ var bit = first_set + 1;
+ var insertion_index: u8 = 1;
+ while (bit != index) : (bit += 1) {
+ if (self.set_indexes.isSet(bit)) insertion_index += 1;
+ }
+ return insertion_index;
+ }
+
+ fn getTokenIndex(self: *Block, string_index: u8) ?u8 {
+ const count = self.strings.items.len;
+ if (count == 0) return null;
+ if (count == 1) return 0;
+
+ const first_set = self.set_indexes.findFirstSet() orelse unreachable;
+ if (first_set == string_index) return 0;
+ const last_set = 15 - @clz(self.set_indexes.mask);
+ if (last_set == string_index) return @intCast(count - 1);
+
+ if (first_set == last_set) return null;
+
+ var bit = first_set + 1;
+ var token_index: u8 = 1;
+ while (bit < last_set) : (bit += 1) {
+ if (!self.set_indexes.isSet(bit)) continue;
+ if (bit == string_index) return token_index;
+ token_index += 1;
+ }
+ return null;
+ }
+
+ fn dump(self: *Block) void {
+ var bit_it = self.set_indexes.iterator(.{});
+ var string_index: usize = 0;
+ while (bit_it.next()) |bit_index| {
+ const token = self.strings.items[string_index];
+ std.debug.print("{}: [{}] {any}\n", .{ bit_index, string_index, token });
+ string_index += 1;
+ }
+ }
+
+ pub fn applyAttributes(self: *Block, string_table: *Node.StringTable, source: []const u8, code_page_lookup: *const CodePageLookup) void {
+ Compiler.applyToMemoryFlags(&self.memory_flags, string_table.common_resource_attributes, source);
+ var dummy_language: res.Language = undefined;
+ Compiler.applyToOptionalStatements(&dummy_language, &self.version, &self.characteristics, string_table.optional_statements, source, code_page_lookup);
+ }
+
+ fn trimToDoubleNUL(comptime T: type, str: []const T) []const T {
+ var last_was_null = false;
+ for (str, 0..) |c, i| {
+ if (c == 0) {
+ if (last_was_null) return str[0 .. i - 1];
+ last_was_null = true;
+ } else {
+ last_was_null = false;
+ }
+ }
+ return str;
+ }
+
+ test "trimToDoubleNUL" {
+ try std.testing.expectEqualStrings("a\x00b", trimToDoubleNUL(u8, "a\x00b"));
+ try std.testing.expectEqualStrings("a", trimToDoubleNUL(u8, "a\x00\x00b"));
+ }
+
+ pub fn writeResData(self: *Block, compiler: *Compiler, language: res.Language, block_id: u16, writer: anytype) !void {
+ var data_buffer = std.ArrayList(u8).init(compiler.allocator);
+ defer data_buffer.deinit();
+ const data_writer = data_buffer.writer();
+
+ var i: u8 = 0;
+ var string_i: u8 = 0;
+ while (true) : (i += 1) {
+ if (!self.set_indexes.isSet(i)) {
+ try data_writer.writeInt(u16, 0, .little);
+ if (i == 15) break else continue;
+ }
+
+ const string_token = self.strings.items[string_i];
+ const slice = string_token.slice(compiler.source);
+ const column = string_token.calculateColumn(compiler.source, 8, null);
+ const code_page = compiler.input_code_pages.getForToken(string_token);
+ const bytes = SourceBytes{ .slice = slice, .code_page = code_page };
+ const utf16_string = try literals.parseQuotedStringAsWideString(compiler.allocator, bytes, .{
+ .start_column = column,
+ .diagnostics = .{ .diagnostics = compiler.diagnostics, .token = string_token },
+ });
+ defer compiler.allocator.free(utf16_string);
+
+ const trimmed_string = trim: {
+ // Two NUL characters in a row act as a terminator
+ // Note: This is only the case for STRINGTABLE strings
+ const trimmed = trimToDoubleNUL(u16, utf16_string);
+ // We also want to trim any trailing NUL characters
+ break :trim std.mem.trimRight(u16, trimmed, &[_]u16{0});
+ };
+
+ // String literals are limited to maxInt(u15) codepoints, so these UTF-16 encoded
+ // strings are limited to maxInt(u15) * 2 = 65,534 code units (since 2 is the
+ // maximum number of UTF-16 code units per codepoint).
+ // This leaves room for exactly one NUL terminator.
+ var string_len_in_utf16_code_units: u16 = @intCast(trimmed_string.len);
+ // If the option is set, then a NUL terminator is added unconditionally.
+ // We already trimmed any trailing NULs, so we know it will be a new addition to the string.
+ if (compiler.null_terminate_string_table_strings) string_len_in_utf16_code_units += 1;
+ try data_writer.writeInt(u16, string_len_in_utf16_code_units, .little);
+ try data_writer.writeAll(std.mem.sliceAsBytes(trimmed_string));
+ if (compiler.null_terminate_string_table_strings) {
+ try data_writer.writeInt(u16, 0, .little);
+ }
+
+ if (i == 15) break;
+ string_i += 1;
+ }
+
+ // This intCast will never be able to fail due to the length constraints on string literals.
+ //
+ // - STRINGTABLE resource definitions can can only provide one string literal per index.
+ // - STRINGTABLE strings are limited to maxInt(u16) UTF-16 code units (see 'string_len_in_utf16_code_units'
+ // above), which means that the maximum number of bytes per string literal is
+ // 2 * maxInt(u16) = 131,070 (since there are 2 bytes per UTF-16 code unit).
+ // - Each Block/RT_STRING resource includes exactly 16 strings and each have a 2 byte
+ // length field, so the maximum number of total bytes in a RT_STRING resource's data is
+ // 16 * (131,070 + 2) = 2,097,152 which is well within the u32 max.
+ //
+ // Note: The string literal maximum length is enforced by the lexer.
+ const data_size: u32 = @intCast(data_buffer.items.len);
+
+ const header = Compiler.ResourceHeader{
+ .name_value = .{ .ordinal = block_id },
+ .type_value = .{ .ordinal = @intFromEnum(res.RT.STRING) },
+ .memory_flags = self.memory_flags,
+ .language = language,
+ .version = self.version,
+ .characteristics = self.characteristics,
+ .data_size = data_size,
+ };
+ // The only variable parts of the header are name and type, which in this case
+ // we fully control and know are numbers, so they have a fixed size.
+ try header.writeAssertNoOverflow(writer);
+
+ var data_fbs = std.io.fixedBufferStream(data_buffer.items);
+ try Compiler.writeResourceData(writer, data_fbs.reader(), data_size);
+ }
+ };
+
+ pub fn deinit(self: *StringTable, allocator: Allocator) void {
+ var it = self.blocks.iterator();
+ while (it.next()) |entry| {
+ entry.value_ptr.strings.deinit(allocator);
+ }
+ self.blocks.deinit(allocator);
+ }
+
+ const SetError = error{StringAlreadyDefined} || Allocator.Error;
+
+ pub fn set(
+ self: *StringTable,
+ allocator: Allocator,
+ id: u16,
+ string_token: Token,
+ node: *Node,
+ source: []const u8,
+ code_page_lookup: *const CodePageLookup,
+ version: u32,
+ characteristics: u32,
+ ) SetError!void {
+ const block_id = (id / 16) + 1;
+ const string_index: u8 = @intCast(id & 0xF);
+
+ var get_or_put_result = try self.blocks.getOrPut(allocator, block_id);
+ if (!get_or_put_result.found_existing) {
+ get_or_put_result.value_ptr.* = Block{ .version = version, .characteristics = characteristics };
+ get_or_put_result.value_ptr.applyAttributes(node.cast(.string_table).?, source, code_page_lookup);
+ } else {
+ if (get_or_put_result.value_ptr.set_indexes.isSet(string_index)) {
+ return error.StringAlreadyDefined;
+ }
+ }
+
+ var block = get_or_put_result.value_ptr;
+ if (block.getInsertionIndex(string_index)) |insertion_index| {
+ try block.strings.insert(allocator, insertion_index, string_token);
+ } else {
+ try block.strings.append(allocator, string_token);
+ }
+ block.set_indexes.set(string_index);
+ }
+
+ pub fn get(self: *StringTable, id: u16) ?Token {
+ const block_id = (id / 16) + 1;
+ const string_index: u8 = @intCast(id & 0xF);
+
+ const block = self.blocks.getPtr(block_id) orelse return null;
+ const token_index = block.getTokenIndex(string_index) orelse return null;
+ return block.strings.items[token_index];
+ }
+
+ pub fn dump(self: *StringTable) !void {
+ var it = self.iterator();
+ while (it.next()) |entry| {
+ std.debug.print("block: {}\n", .{entry.key_ptr.*});
+ entry.value_ptr.dump();
+ }
+ }
+};
+
+test "StringTable" {
+ const S = struct {
+ fn makeDummyToken(id: usize) Token {
+ return Token{
+ .id = .invalid,
+ .start = id,
+ .end = id,
+ .line_number = id,
+ };
+ }
+ };
+ const allocator = std.testing.allocator;
+ var string_table = StringTable{};
+ defer string_table.deinit(allocator);
+
+ var code_page_lookup = CodePageLookup.init(allocator, .windows1252);
+ defer code_page_lookup.deinit();
+
+ var dummy_node = Node.StringTable{
+ .type = S.makeDummyToken(0),
+ .common_resource_attributes = &.{},
+ .optional_statements = &.{},
+ .begin_token = S.makeDummyToken(0),
+ .strings = &.{},
+ .end_token = S.makeDummyToken(0),
+ };
+
+ // randomize an array of ids 0-99
+ var ids = ids: {
+ var buf: [100]u16 = undefined;
+ var i: u16 = 0;
+ while (i < buf.len) : (i += 1) {
+ buf[i] = i;
+ }
+ break :ids buf;
+ };
+ var prng = std.rand.DefaultPrng.init(0);
+ var random = prng.random();
+ random.shuffle(u16, &ids);
+
+ // set each one in the randomized order
+ for (ids) |id| {
+ try string_table.set(allocator, id, S.makeDummyToken(id), &dummy_node.base, "", &code_page_lookup, 0, 0);
+ }
+
+ // make sure each one exists and is the right value when gotten
+ var id: u16 = 0;
+ while (id < 100) : (id += 1) {
+ const dummy = S.makeDummyToken(id);
+ try std.testing.expectError(error.StringAlreadyDefined, string_table.set(allocator, id, dummy, &dummy_node.base, "", &code_page_lookup, 0, 0));
+ try std.testing.expectEqual(dummy, string_table.get(id).?);
+ }
+
+ // make sure non-existent string ids are not found
+ try std.testing.expectEqual(@as(?Token, null), string_table.get(100));
+}
diff --git a/lib/compiler/resinator/errors.zig b/lib/compiler/resinator/errors.zig
@@ -0,0 +1,1076 @@
+const std = @import("std");
+const Token = @import("lex.zig").Token;
+const SourceMappings = @import("source_mapping.zig").SourceMappings;
+const utils = @import("utils.zig");
+const rc = @import("rc.zig");
+const res = @import("res.zig");
+const ico = @import("ico.zig");
+const bmp = @import("bmp.zig");
+const parse = @import("parse.zig");
+const lang = @import("lang.zig");
+const CodePage = @import("code_pages.zig").CodePage;
+const builtin = @import("builtin");
+const native_endian = builtin.cpu.arch.endian();
+
+pub const Diagnostics = struct {
+ errors: std.ArrayListUnmanaged(ErrorDetails) = .{},
+ /// Append-only, cannot handle removing strings.
+ /// Expects to own all strings within the list.
+ strings: std.ArrayListUnmanaged([]const u8) = .{},
+ allocator: std.mem.Allocator,
+
+ pub fn init(allocator: std.mem.Allocator) Diagnostics {
+ return .{
+ .allocator = allocator,
+ };
+ }
+
+ pub fn deinit(self: *Diagnostics) void {
+ self.errors.deinit(self.allocator);
+ for (self.strings.items) |str| {
+ self.allocator.free(str);
+ }
+ self.strings.deinit(self.allocator);
+ }
+
+ pub fn append(self: *Diagnostics, error_details: ErrorDetails) !void {
+ try self.errors.append(self.allocator, error_details);
+ }
+
+ const SmallestStringIndexType = std.meta.Int(.unsigned, @min(
+ @bitSizeOf(ErrorDetails.FileOpenError.FilenameStringIndex),
+ @min(
+ @bitSizeOf(ErrorDetails.IconReadError.FilenameStringIndex),
+ @bitSizeOf(ErrorDetails.BitmapReadError.FilenameStringIndex),
+ ),
+ ));
+
+ /// Returns the index of the added string as the SmallestStringIndexType
+ /// in order to avoid needing to `@intCast` it at callsites of putString.
+ /// Instead, this function will error if the index would ever exceed the
+ /// smallest FilenameStringIndex of an ErrorDetails type.
+ pub fn putString(self: *Diagnostics, str: []const u8) !SmallestStringIndexType {
+ if (self.strings.items.len >= std.math.maxInt(SmallestStringIndexType)) {
+ return error.OutOfMemory; // ran out of string indexes
+ }
+ const dupe = try self.allocator.dupe(u8, str);
+ const index = self.strings.items.len;
+ try self.strings.append(self.allocator, dupe);
+ return @intCast(index);
+ }
+
+ pub fn renderToStdErr(self: *Diagnostics, cwd: std.fs.Dir, source: []const u8, tty_config: std.io.tty.Config, source_mappings: ?SourceMappings) void {
+ std.debug.getStderrMutex().lock();
+ defer std.debug.getStderrMutex().unlock();
+ const stderr = std.io.getStdErr().writer();
+ for (self.errors.items) |err_details| {
+ renderErrorMessage(self.allocator, stderr, tty_config, cwd, err_details, source, self.strings.items, source_mappings) catch return;
+ }
+ }
+
+ pub fn renderToStdErrDetectTTY(self: *Diagnostics, cwd: std.fs.Dir, source: []const u8, source_mappings: ?SourceMappings) void {
+ const tty_config = std.io.tty.detectConfig(std.io.getStdErr());
+ return self.renderToStdErr(cwd, source, tty_config, source_mappings);
+ }
+
+ pub fn contains(self: *const Diagnostics, err: ErrorDetails.Error) bool {
+ for (self.errors.items) |details| {
+ if (details.err == err) return true;
+ }
+ return false;
+ }
+
+ pub fn containsAny(self: *const Diagnostics, errors: []const ErrorDetails.Error) bool {
+ for (self.errors.items) |details| {
+ for (errors) |err| {
+ if (details.err == err) return true;
+ }
+ }
+ return false;
+ }
+};
+
+/// Contains enough context to append errors/warnings/notes etc
+pub const DiagnosticsContext = struct {
+ diagnostics: *Diagnostics,
+ token: Token,
+};
+
+pub const ErrorDetails = struct {
+ err: Error,
+ token: Token,
+ /// If non-null, should be before `token`. If null, `token` is assumed to be the start.
+ token_span_start: ?Token = null,
+ /// If non-null, should be after `token`. If null, `token` is assumed to be the end.
+ token_span_end: ?Token = null,
+ type: Type = .err,
+ print_source_line: bool = true,
+ extra: union {
+ none: void,
+ expected: Token.Id,
+ number: u32,
+ expected_types: ExpectedTypes,
+ resource: rc.Resource,
+ string_and_language: StringAndLanguage,
+ file_open_error: FileOpenError,
+ icon_read_error: IconReadError,
+ icon_dir: IconDirContext,
+ bmp_read_error: BitmapReadError,
+ accelerator_error: AcceleratorError,
+ statement_with_u16_param: StatementWithU16Param,
+ menu_or_class: enum { class, menu },
+ } = .{ .none = {} },
+
+ pub const Type = enum {
+ /// Fatal error, stops compilation
+ err,
+ /// Warning that does not affect compilation result
+ warning,
+ /// A note that typically provides further context for a warning/error
+ note,
+ /// An invisible diagnostic that is not printed to stderr but can
+ /// provide information useful when comparing the behavior of different
+ /// implementations. For example, a hint is emitted when a FONTDIR resource
+ /// was included in the .RES file which is significant because rc.exe
+ /// does something different than us, but ultimately it's not important
+ /// enough to be a warning/note.
+ hint,
+ };
+
+ comptime {
+ // all fields in the extra union should be 32 bits or less
+ for (std.meta.fields(std.meta.fieldInfo(ErrorDetails, .extra).type)) |field| {
+ std.debug.assert(@bitSizeOf(field.type) <= 32);
+ }
+ }
+
+ pub const StatementWithU16Param = enum(u32) {
+ fileversion,
+ productversion,
+ language,
+ };
+
+ pub const StringAndLanguage = packed struct(u32) {
+ id: u16,
+ language: res.Language,
+ };
+
+ pub const FileOpenError = packed struct(u32) {
+ err: FileOpenErrorEnum,
+ filename_string_index: FilenameStringIndex,
+
+ pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(FileOpenErrorEnum));
+ pub const FileOpenErrorEnum = std.meta.FieldEnum(std.fs.File.OpenError);
+
+ pub fn enumFromError(err: std.fs.File.OpenError) FileOpenErrorEnum {
+ return switch (err) {
+ inline else => |e| @field(ErrorDetails.FileOpenError.FileOpenErrorEnum, @errorName(e)),
+ };
+ }
+ };
+
+ pub const IconReadError = packed struct(u32) {
+ err: IconReadErrorEnum,
+ icon_type: enum(u1) { cursor, icon },
+ filename_string_index: FilenameStringIndex,
+
+ pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(IconReadErrorEnum) - 1);
+ pub const IconReadErrorEnum = std.meta.FieldEnum(ico.ReadError);
+
+ pub fn enumFromError(err: ico.ReadError) IconReadErrorEnum {
+ return switch (err) {
+ inline else => |e| @field(ErrorDetails.IconReadError.IconReadErrorEnum, @errorName(e)),
+ };
+ }
+ };
+
+ pub const IconDirContext = packed struct(u32) {
+ icon_type: enum(u1) { cursor, icon },
+ icon_format: ico.ImageFormat,
+ index: u16,
+ bitmap_version: ico.BitmapHeader.Version = .unknown,
+ _: Padding = 0,
+
+ pub const Padding = std.meta.Int(.unsigned, 15 - @bitSizeOf(ico.BitmapHeader.Version) - @bitSizeOf(ico.ImageFormat));
+ };
+
+ pub const BitmapReadError = packed struct(u32) {
+ err: BitmapReadErrorEnum,
+ filename_string_index: FilenameStringIndex,
+
+ pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(BitmapReadErrorEnum));
+ pub const BitmapReadErrorEnum = std.meta.FieldEnum(bmp.ReadError);
+
+ pub fn enumFromError(err: bmp.ReadError) BitmapReadErrorEnum {
+ return switch (err) {
+ inline else => |e| @field(ErrorDetails.BitmapReadError.BitmapReadErrorEnum, @errorName(e)),
+ };
+ }
+ };
+
+ pub const BitmapUnsupportedDIB = packed struct(u32) {
+ dib_version: ico.BitmapHeader.Version,
+ filename_string_index: FilenameStringIndex,
+
+ pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(ico.BitmapHeader.Version));
+ };
+
+ pub const AcceleratorError = packed struct(u32) {
+ err: AcceleratorErrorEnum,
+ _: Padding = 0,
+
+ pub const Padding = std.meta.Int(.unsigned, 32 - @bitSizeOf(AcceleratorErrorEnum));
+ pub const AcceleratorErrorEnum = std.meta.FieldEnum(res.ParseAcceleratorKeyStringError);
+
+ pub fn enumFromError(err: res.ParseAcceleratorKeyStringError) AcceleratorErrorEnum {
+ return switch (err) {
+ inline else => |e| @field(ErrorDetails.AcceleratorError.AcceleratorErrorEnum, @errorName(e)),
+ };
+ }
+ };
+
+ pub const ExpectedTypes = packed struct(u32) {
+ number: bool = false,
+ number_expression: bool = false,
+ string_literal: bool = false,
+ accelerator_type_or_option: bool = false,
+ control_class: bool = false,
+ literal: bool = false,
+ // Note: This being 0 instead of undefined is arbitrary and something of a workaround,
+ // see https://github.com/ziglang/zig/issues/15395
+ _: u26 = 0,
+
+ pub const strings = std.ComptimeStringMap([]const u8, .{
+ .{ "number", "number" },
+ .{ "number_expression", "number expression" },
+ .{ "string_literal", "quoted string literal" },
+ .{ "accelerator_type_or_option", "accelerator type or option [ASCII, VIRTKEY, etc]" },
+ .{ "control_class", "control class [BUTTON, EDIT, etc]" },
+ .{ "literal", "unquoted literal" },
+ });
+
+ pub fn writeCommaSeparated(self: ExpectedTypes, writer: anytype) !void {
+ const struct_info = @typeInfo(ExpectedTypes).Struct;
+ const num_real_fields = struct_info.fields.len - 1;
+ const num_padding_bits = @bitSizeOf(ExpectedTypes) - num_real_fields;
+ const mask = std.math.maxInt(struct_info.backing_integer.?) >> num_padding_bits;
+ const relevant_bits_only = @as(struct_info.backing_integer.?, @bitCast(self)) & mask;
+ const num_set_bits = @popCount(relevant_bits_only);
+
+ var i: usize = 0;
+ inline for (struct_info.fields) |field_info| {
+ if (field_info.type != bool) continue;
+ if (i == num_set_bits) return;
+ if (@field(self, field_info.name)) {
+ try writer.writeAll(strings.get(field_info.name).?);
+ i += 1;
+ if (num_set_bits > 2 and i != num_set_bits) {
+ try writer.writeAll(", ");
+ } else if (i != num_set_bits) {
+ try writer.writeByte(' ');
+ }
+ if (num_set_bits > 1 and i == num_set_bits - 1) {
+ try writer.writeAll("or ");
+ }
+ }
+ }
+ }
+ };
+
+ pub const Error = enum {
+ // Lexer
+ unfinished_string_literal,
+ string_literal_too_long,
+ invalid_number_with_exponent,
+ invalid_digit_character_in_number_literal,
+ illegal_byte,
+ illegal_byte_outside_string_literals,
+ illegal_codepoint_outside_string_literals,
+ illegal_byte_order_mark,
+ illegal_private_use_character,
+ found_c_style_escaped_quote,
+ code_page_pragma_missing_left_paren,
+ code_page_pragma_missing_right_paren,
+ code_page_pragma_invalid_code_page,
+ code_page_pragma_not_integer,
+ code_page_pragma_overflow,
+ code_page_pragma_unsupported_code_page,
+
+ // Parser
+ unfinished_raw_data_block,
+ unfinished_string_table_block,
+ /// `expected` is populated.
+ expected_token,
+ /// `expected_types` is populated
+ expected_something_else,
+ /// `resource` is populated
+ resource_type_cant_use_raw_data,
+ /// `resource` is populated
+ id_must_be_ordinal,
+ /// `resource` is populated
+ name_or_id_not_allowed,
+ string_resource_as_numeric_type,
+ ascii_character_not_equivalent_to_virtual_key_code,
+ empty_menu_not_allowed,
+ rc_would_miscompile_version_value_padding,
+ rc_would_miscompile_version_value_byte_count,
+ code_page_pragma_in_included_file,
+ nested_resource_level_exceeds_max,
+ too_many_dialog_controls_or_toolbar_buttons,
+ nested_expression_level_exceeds_max,
+ close_paren_expression,
+ unary_plus_expression,
+ rc_could_miscompile_control_params,
+
+ // Compiler
+ /// `string_and_language` is populated
+ string_already_defined,
+ font_id_already_defined,
+ /// `file_open_error` is populated
+ file_open_error,
+ /// `accelerator_error` is populated
+ invalid_accelerator_key,
+ accelerator_type_required,
+ rc_would_miscompile_control_padding,
+ rc_would_miscompile_control_class_ordinal,
+ /// `icon_dir` is populated
+ rc_would_error_on_icon_dir,
+ /// `icon_dir` is populated
+ format_not_supported_in_icon_dir,
+ /// `resource` is populated and contains the expected type
+ icon_dir_and_resource_type_mismatch,
+ /// `icon_read_error` is populated
+ icon_read_error,
+ /// `icon_dir` is populated
+ rc_would_error_on_bitmap_version,
+ /// `icon_dir` is populated
+ max_icon_ids_exhausted,
+ /// `bmp_read_error` is populated
+ bmp_read_error,
+ /// `number` is populated and contains a string index for which the string contains
+ /// the bytes of a `u64` (native endian). The `u64` contains the number of ignored bytes.
+ bmp_ignored_palette_bytes,
+ /// `number` is populated and contains a string index for which the string contains
+ /// the bytes of a `u64` (native endian). The `u64` contains the number of missing bytes.
+ bmp_missing_palette_bytes,
+ /// `number` is populated and contains a string index for which the string contains
+ /// the bytes of a `u64` (native endian). The `u64` contains the number of miscompiled bytes.
+ rc_would_miscompile_bmp_palette_padding,
+ /// `number` is populated and contains a string index for which the string contains
+ /// the bytes of two `u64`s (native endian). The first contains the number of missing
+ /// palette bytes and the second contains the max number of missing palette bytes.
+ /// If type is `.note`, then `extra` is `none`.
+ bmp_too_many_missing_palette_bytes,
+ resource_header_size_exceeds_max,
+ resource_data_size_exceeds_max,
+ control_extra_data_size_exceeds_max,
+ version_node_size_exceeds_max,
+ fontdir_size_exceeds_max,
+ /// `number` is populated and contains a string index for the filename
+ number_expression_as_filename,
+ /// `number` is populated and contains the control ID that is a duplicate
+ control_id_already_defined,
+ /// `number` is populated and contains the disallowed codepoint
+ invalid_filename,
+ /// `statement_with_u16_param` is populated
+ rc_would_error_u16_with_l_suffix,
+ result_contains_fontdir,
+ /// `number` is populated and contains the ordinal value that the id would be miscompiled to
+ rc_would_miscompile_dialog_menu_id,
+ /// `number` is populated and contains the ordinal value that the value would be miscompiled to
+ rc_would_miscompile_dialog_class,
+ /// `menu_or_class` is populated and contains the type of the parameter statement
+ rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal,
+ rc_would_miscompile_dialog_menu_id_starts_with_digit,
+ dialog_menu_id_was_uppercased,
+ /// `menu_or_class` is populated and contains the type of the parameter statement
+ duplicate_menu_or_class_skipped,
+ invalid_digit_character_in_ordinal,
+
+ // Literals
+ /// `number` is populated
+ rc_would_miscompile_codepoint_byte_swap,
+ /// `number` is populated
+ rc_would_miscompile_codepoint_skip,
+ tab_converted_to_spaces,
+
+ // General (used in various places)
+ /// `number` is populated and contains the value that the ordinal would have in the Win32 RC compiler implementation
+ win32_non_ascii_ordinal,
+
+ // Initialization
+ /// `file_open_error` is populated, but `filename_string_index` is not
+ failed_to_open_cwd,
+ };
+
+ pub fn render(self: ErrorDetails, writer: anytype, source: []const u8, strings: []const []const u8) !void {
+ switch (self.err) {
+ .unfinished_string_literal => {
+ return writer.print("unfinished string literal at '{s}', expected closing '\"'", .{self.token.nameForErrorDisplay(source)});
+ },
+ .string_literal_too_long => {
+ return writer.print("string literal too long (max is currently {} characters)", .{self.extra.number});
+ },
+ .invalid_number_with_exponent => {
+ return writer.print("base 10 number literal with exponent is not allowed: {s}", .{self.token.slice(source)});
+ },
+ .invalid_digit_character_in_number_literal => switch (self.type) {
+ .err, .warning => return writer.writeAll("non-ASCII digit characters are not allowed in number literals"),
+ .note => return writer.writeAll("the Win32 RC compiler allows non-ASCII digit characters, but will miscompile them"),
+ .hint => return,
+ },
+ .illegal_byte => {
+ return writer.print("character '{s}' is not allowed", .{std.fmt.fmtSliceEscapeUpper(self.token.slice(source))});
+ },
+ .illegal_byte_outside_string_literals => {
+ return writer.print("character '{s}' is not allowed outside of string literals", .{std.fmt.fmtSliceEscapeUpper(self.token.slice(source))});
+ },
+ .illegal_codepoint_outside_string_literals => {
+ // This is somewhat hacky, but we know that:
+ // - This error is only possible with codepoints outside of the Windows-1252 character range
+ // - So, the only supported code page that could generate this error is UTF-8
+ // Therefore, we just assume the token bytes are UTF-8 and decode them to get the illegal
+ // codepoint.
+ //
+ // FIXME: Support other code pages if they become relevant
+ const bytes = self.token.slice(source);
+ const codepoint = std.unicode.utf8Decode(bytes) catch unreachable;
+ return writer.print("codepoint <U+{X:0>4}> is not allowed outside of string literals", .{codepoint});
+ },
+ .illegal_byte_order_mark => {
+ return writer.writeAll("byte order mark <U+FEFF> is not allowed");
+ },
+ .illegal_private_use_character => {
+ return writer.writeAll("private use character <U+E000> is not allowed");
+ },
+ .found_c_style_escaped_quote => {
+ return writer.writeAll("escaping quotes with \\\" is not allowed (use \"\" instead)");
+ },
+ .code_page_pragma_missing_left_paren => {
+ return writer.writeAll("expected left parenthesis after 'code_page' in #pragma code_page");
+ },
+ .code_page_pragma_missing_right_paren => {
+ return writer.writeAll("expected right parenthesis after '<number>' in #pragma code_page");
+ },
+ .code_page_pragma_invalid_code_page => {
+ return writer.writeAll("invalid or unknown code page in #pragma code_page");
+ },
+ .code_page_pragma_not_integer => {
+ return writer.writeAll("code page is not a valid integer in #pragma code_page");
+ },
+ .code_page_pragma_overflow => {
+ return writer.writeAll("code page too large in #pragma code_page");
+ },
+ .code_page_pragma_unsupported_code_page => {
+ // We know that the token slice is a well-formed #pragma code_page(N), so
+ // we can skip to the first ( and then get the number that follows
+ const token_slice = self.token.slice(source);
+ var number_start = std.mem.indexOfScalar(u8, token_slice, '(').? + 1;
+ while (std.ascii.isWhitespace(token_slice[number_start])) {
+ number_start += 1;
+ }
+ var number_slice = token_slice[number_start..number_start];
+ while (std.ascii.isDigit(token_slice[number_start + number_slice.len])) {
+ number_slice.len += 1;
+ }
+ const number = std.fmt.parseUnsigned(u16, number_slice, 10) catch unreachable;
+ const code_page = CodePage.getByIdentifier(number) catch unreachable;
+ // TODO: Improve or maybe add a note making it more clear that the code page
+ // is valid and that the code page is unsupported purely due to a limitation
+ // in this compiler.
+ return writer.print("unsupported code page '{s} (id={})' in #pragma code_page", .{ @tagName(code_page), number });
+ },
+ .unfinished_raw_data_block => {
+ return writer.print("unfinished raw data block at '{s}', expected closing '}}' or 'END'", .{self.token.nameForErrorDisplay(source)});
+ },
+ .unfinished_string_table_block => {
+ return writer.print("unfinished STRINGTABLE block at '{s}', expected closing '}}' or 'END'", .{self.token.nameForErrorDisplay(source)});
+ },
+ .expected_token => {
+ return writer.print("expected '{s}', got '{s}'", .{ self.extra.expected.nameForErrorDisplay(), self.token.nameForErrorDisplay(source) });
+ },
+ .expected_something_else => {
+ try writer.writeAll("expected ");
+ try self.extra.expected_types.writeCommaSeparated(writer);
+ return writer.print("; got '{s}'", .{self.token.nameForErrorDisplay(source)});
+ },
+ .resource_type_cant_use_raw_data => switch (self.type) {
+ .err, .warning => try writer.print("expected '<filename>', found '{s}' (resource type '{s}' can't use raw data)", .{ self.token.nameForErrorDisplay(source), self.extra.resource.nameForErrorDisplay() }),
+ .note => try writer.print("if '{s}' is intended to be a filename, it must be specified as a quoted string literal", .{self.token.nameForErrorDisplay(source)}),
+ .hint => return,
+ },
+ .id_must_be_ordinal => {
+ try writer.print("id of resource type '{s}' must be an ordinal (u16), got '{s}'", .{ self.extra.resource.nameForErrorDisplay(), self.token.nameForErrorDisplay(source) });
+ },
+ .name_or_id_not_allowed => {
+ try writer.print("name or id is not allowed for resource type '{s}'", .{self.extra.resource.nameForErrorDisplay()});
+ },
+ .string_resource_as_numeric_type => switch (self.type) {
+ .err, .warning => try writer.writeAll("the number 6 (RT_STRING) cannot be used as a resource type"),
+ .note => try writer.writeAll("using RT_STRING directly likely results in an invalid .res file, use a STRINGTABLE instead"),
+ .hint => return,
+ },
+ .ascii_character_not_equivalent_to_virtual_key_code => {
+ // TODO: Better wording? This is what the Win32 RC compiler emits.
+ // This occurs when VIRTKEY and a control code is specified ("^c", etc)
+ try writer.writeAll("ASCII character not equivalent to virtual key code");
+ },
+ .empty_menu_not_allowed => {
+ try writer.print("empty menu of type '{s}' not allowed", .{self.token.nameForErrorDisplay(source)});
+ },
+ .rc_would_miscompile_version_value_padding => switch (self.type) {
+ .err, .warning => return writer.print("the padding before this quoted string value would be miscompiled by the Win32 RC compiler", .{}),
+ .note => return writer.print("to avoid the potential miscompilation, consider adding a comma between the key and the quoted string", .{}),
+ .hint => return,
+ },
+ .rc_would_miscompile_version_value_byte_count => switch (self.type) {
+ .err, .warning => return writer.print("the byte count of this value would be miscompiled by the Win32 RC compiler", .{}),
+ .note => return writer.print("to avoid the potential miscompilation, do not mix numbers and strings within a value", .{}),
+ .hint => return,
+ },
+ .code_page_pragma_in_included_file => {
+ try writer.print("#pragma code_page is not supported in an included resource file", .{});
+ },
+ .nested_resource_level_exceeds_max => switch (self.type) {
+ .err, .warning => {
+ const max = switch (self.extra.resource) {
+ .versioninfo => parse.max_nested_version_level,
+ .menu, .menuex => parse.max_nested_menu_level,
+ else => unreachable,
+ };
+ return writer.print("{s} contains too many nested children (max is {})", .{ self.extra.resource.nameForErrorDisplay(), max });
+ },
+ .note => return writer.print("max {s} nesting level exceeded here", .{self.extra.resource.nameForErrorDisplay()}),
+ .hint => return,
+ },
+ .too_many_dialog_controls_or_toolbar_buttons => switch (self.type) {
+ .err, .warning => return writer.print("{s} contains too many {s} (max is {})", .{ self.extra.resource.nameForErrorDisplay(), switch (self.extra.resource) {
+ .toolbar => "buttons",
+ else => "controls",
+ }, std.math.maxInt(u16) }),
+ .note => return writer.print("maximum number of {s} exceeded here", .{switch (self.extra.resource) {
+ .toolbar => "buttons",
+ else => "controls",
+ }}),
+ .hint => return,
+ },
+ .nested_expression_level_exceeds_max => switch (self.type) {
+ .err, .warning => return writer.print("expression contains too many syntax levels (max is {})", .{parse.max_nested_expression_level}),
+ .note => return writer.print("maximum expression level exceeded here", .{}),
+ .hint => return,
+ },
+ .close_paren_expression => {
+ try writer.writeAll("the Win32 RC compiler would accept ')' as a valid expression, but it would be skipped over and potentially lead to unexpected outcomes");
+ },
+ .unary_plus_expression => {
+ try writer.writeAll("the Win32 RC compiler may accept '+' as a unary operator here, but it is not supported in this implementation; consider omitting the unary +");
+ },
+ .rc_could_miscompile_control_params => switch (self.type) {
+ .err, .warning => return writer.print("this token could be erroneously skipped over by the Win32 RC compiler", .{}),
+ .note => return writer.print("to avoid the potential miscompilation, consider adding a comma after the style parameter", .{}),
+ .hint => return,
+ },
+ .string_already_defined => switch (self.type) {
+ .err, .warning => {
+ const language_id = self.extra.string_and_language.language.asInt();
+ const language_name = language_name: {
+ if (std.meta.intToEnum(lang.LanguageId, language_id)) |lang_enum_val| {
+ break :language_name @tagName(lang_enum_val);
+ } else |_| {}
+ if (language_id == lang.LOCALE_CUSTOM_UNSPECIFIED) {
+ break :language_name "LOCALE_CUSTOM_UNSPECIFIED";
+ }
+ break :language_name "<UNKNOWN>";
+ };
+ return writer.print("string with id {d} (0x{X}) already defined for language {s} (0x{X})", .{ self.extra.string_and_language.id, self.extra.string_and_language.id, language_name, language_id });
+ },
+ .note => return writer.print("previous definition of string with id {d} (0x{X}) here", .{ self.extra.string_and_language.id, self.extra.string_and_language.id }),
+ .hint => return,
+ },
+ .font_id_already_defined => switch (self.type) {
+ .err => return writer.print("font with id {d} already defined", .{self.extra.number}),
+ .warning => return writer.print("skipped duplicate font with id {d}", .{self.extra.number}),
+ .note => return writer.print("previous definition of font with id {d} here", .{self.extra.number}),
+ .hint => return,
+ },
+ .file_open_error => {
+ try writer.print("unable to open file '{s}': {s}", .{ strings[self.extra.file_open_error.filename_string_index], @tagName(self.extra.file_open_error.err) });
+ },
+ .invalid_accelerator_key => {
+ try writer.print("invalid accelerator key '{s}': {s}", .{ self.token.nameForErrorDisplay(source), @tagName(self.extra.accelerator_error.err) });
+ },
+ .accelerator_type_required => {
+ try writer.print("accelerator type [ASCII or VIRTKEY] required when key is an integer", .{});
+ },
+ .rc_would_miscompile_control_padding => switch (self.type) {
+ .err, .warning => return writer.print("the padding before this control would be miscompiled by the Win32 RC compiler (it would insert 2 extra bytes of padding)", .{}),
+ .note => return writer.print("to avoid the potential miscompilation, consider removing any 'control data' blocks from the controls in this dialog", .{}),
+ .hint => return,
+ },
+ .rc_would_miscompile_control_class_ordinal => switch (self.type) {
+ .err, .warning => return writer.print("the control class of this CONTROL would be miscompiled by the Win32 RC compiler", .{}),
+ .note => return writer.print("to avoid the potential miscompilation, consider specifying the control class using a string (BUTTON, EDIT, etc) instead of a number", .{}),
+ .hint => return,
+ },
+ .rc_would_error_on_icon_dir => switch (self.type) {
+ .err, .warning => return writer.print("the resource at index {} of this {s} has the format '{s}'; this would be an error in the Win32 RC compiler", .{ self.extra.icon_dir.index, @tagName(self.extra.icon_dir.icon_type), @tagName(self.extra.icon_dir.icon_format) }),
+ .note => {
+ // The only note supported is one specific to exactly this combination
+ if (!(self.extra.icon_dir.icon_type == .icon and self.extra.icon_dir.icon_format == .riff)) unreachable;
+ try writer.print("animated RIFF icons within resource groups may not be well supported, consider using an animated icon file (.ani) instead", .{});
+ },
+ .hint => return,
+ },
+ .format_not_supported_in_icon_dir => {
+ try writer.print("resource with format '{s}' (at index {}) is not allowed in {s} resource groups", .{ @tagName(self.extra.icon_dir.icon_format), self.extra.icon_dir.index, @tagName(self.extra.icon_dir.icon_type) });
+ },
+ .icon_dir_and_resource_type_mismatch => {
+ const unexpected_type: rc.Resource = if (self.extra.resource == .icon) .cursor else .icon;
+ // TODO: Better wording
+ try writer.print("resource type '{s}' does not match type '{s}' specified in the file", .{ self.extra.resource.nameForErrorDisplay(), unexpected_type.nameForErrorDisplay() });
+ },
+ .icon_read_error => {
+ try writer.print("unable to read {s} file '{s}': {s}", .{ @tagName(self.extra.icon_read_error.icon_type), strings[self.extra.icon_read_error.filename_string_index], @tagName(self.extra.icon_read_error.err) });
+ },
+ .rc_would_error_on_bitmap_version => switch (self.type) {
+ .err => try writer.print("the DIB at index {} of this {s} is of version '{s}'; this version is no longer allowed and should be upgraded to '{s}'", .{
+ self.extra.icon_dir.index,
+ @tagName(self.extra.icon_dir.icon_type),
+ self.extra.icon_dir.bitmap_version.nameForErrorDisplay(),
+ ico.BitmapHeader.Version.@"nt3.1".nameForErrorDisplay(),
+ }),
+ .warning => try writer.print("the DIB at index {} of this {s} is of version '{s}'; this would be an error in the Win32 RC compiler", .{
+ self.extra.icon_dir.index,
+ @tagName(self.extra.icon_dir.icon_type),
+ self.extra.icon_dir.bitmap_version.nameForErrorDisplay(),
+ }),
+ .note => unreachable,
+ .hint => return,
+ },
+ .max_icon_ids_exhausted => switch (self.type) {
+ .err, .warning => try writer.print("maximum global icon/cursor ids exhausted (max is {})", .{std.math.maxInt(u16) - 1}),
+ .note => try writer.print("maximum icon/cursor id exceeded at index {} of this {s}", .{ self.extra.icon_dir.index, @tagName(self.extra.icon_dir.icon_type) }),
+ .hint => return,
+ },
+ .bmp_read_error => {
+ try writer.print("invalid bitmap file '{s}': {s}", .{ strings[self.extra.bmp_read_error.filename_string_index], @tagName(self.extra.bmp_read_error.err) });
+ },
+ .bmp_ignored_palette_bytes => {
+ const bytes = strings[self.extra.number];
+ const ignored_bytes = std.mem.readInt(u64, bytes[0..8], native_endian);
+ try writer.print("bitmap has {d} extra bytes preceding the pixel data which will be ignored", .{ignored_bytes});
+ },
+ .bmp_missing_palette_bytes => {
+ const bytes = strings[self.extra.number];
+ const missing_bytes = std.mem.readInt(u64, bytes[0..8], native_endian);
+ try writer.print("bitmap has {d} missing color palette bytes which will be padded with zeroes", .{missing_bytes});
+ },
+ .rc_would_miscompile_bmp_palette_padding => {
+ const bytes = strings[self.extra.number];
+ const miscompiled_bytes = std.mem.readInt(u64, bytes[0..8], native_endian);
+ try writer.print("the missing color palette bytes would be miscompiled by the Win32 RC compiler (the added padding bytes would include {d} bytes of the pixel data)", .{miscompiled_bytes});
+ },
+ .bmp_too_many_missing_palette_bytes => switch (self.type) {
+ .err, .warning => {
+ const bytes = strings[self.extra.number];
+ const missing_bytes = std.mem.readInt(u64, bytes[0..8], native_endian);
+ const max_missing_bytes = std.mem.readInt(u64, bytes[8..16], native_endian);
+ try writer.print("bitmap has {} missing color palette bytes which exceeds the maximum of {}", .{ missing_bytes, max_missing_bytes });
+ },
+ // TODO: command line option
+ .note => try writer.writeAll("the maximum number of missing color palette bytes is configurable via <<TODO command line option>>"),
+ .hint => return,
+ },
+ .resource_header_size_exceeds_max => {
+ try writer.print("resource's header length exceeds maximum of {} bytes", .{std.math.maxInt(u32)});
+ },
+ .resource_data_size_exceeds_max => switch (self.type) {
+ .err, .warning => return writer.print("resource's data length exceeds maximum of {} bytes", .{std.math.maxInt(u32)}),
+ .note => return writer.print("maximum data length exceeded here", .{}),
+ .hint => return,
+ },
+ .control_extra_data_size_exceeds_max => switch (self.type) {
+ .err, .warning => try writer.print("control data length exceeds maximum of {} bytes", .{std.math.maxInt(u16)}),
+ .note => return writer.print("maximum control data length exceeded here", .{}),
+ .hint => return,
+ },
+ .version_node_size_exceeds_max => switch (self.type) {
+ .err, .warning => return writer.print("version node tree size exceeds maximum of {} bytes", .{std.math.maxInt(u16)}),
+ .note => return writer.print("maximum tree size exceeded while writing this child", .{}),
+ .hint => return,
+ },
+ .fontdir_size_exceeds_max => switch (self.type) {
+ .err, .warning => return writer.print("FONTDIR data length exceeds maximum of {} bytes", .{std.math.maxInt(u32)}),
+ .note => return writer.writeAll("this is likely due to the size of the combined lengths of the device/face names of all FONT resources"),
+ .hint => return,
+ },
+ .number_expression_as_filename => switch (self.type) {
+ .err, .warning => return writer.writeAll("filename cannot be specified using a number expression, consider using a quoted string instead"),
+ .note => return writer.print("the Win32 RC compiler would evaluate this number expression as the filename '{s}'", .{strings[self.extra.number]}),
+ .hint => return,
+ },
+ .control_id_already_defined => switch (self.type) {
+ .err, .warning => return writer.print("control with id {d} already defined for this dialog", .{self.extra.number}),
+ .note => return writer.print("previous definition of control with id {d} here", .{self.extra.number}),
+ .hint => return,
+ },
+ .invalid_filename => {
+ const disallowed_codepoint = self.extra.number;
+ if (disallowed_codepoint < 128 and std.ascii.isPrint(@intCast(disallowed_codepoint))) {
+ try writer.print("evaluated filename contains a disallowed character: '{c}'", .{@as(u8, @intCast(disallowed_codepoint))});
+ } else {
+ try writer.print("evaluated filename contains a disallowed codepoint: <U+{X:0>4}>", .{disallowed_codepoint});
+ }
+ },
+ .rc_would_error_u16_with_l_suffix => switch (self.type) {
+ .err, .warning => return writer.print("this {s} parameter would be an error in the Win32 RC compiler", .{@tagName(self.extra.statement_with_u16_param)}),
+ .note => return writer.writeAll("to avoid the error, remove any L suffixes from numbers within the parameter"),
+ .hint => return,
+ },
+ .result_contains_fontdir => return,
+ .rc_would_miscompile_dialog_menu_id => switch (self.type) {
+ .err, .warning => return writer.print("the id of this menu would be miscompiled by the Win32 RC compiler", .{}),
+ .note => return writer.print("the Win32 RC compiler would evaluate the id as the ordinal/number value {d}", .{self.extra.number}),
+ .hint => return,
+ },
+ .rc_would_miscompile_dialog_class => switch (self.type) {
+ .err, .warning => return writer.print("this class would be miscompiled by the Win32 RC compiler", .{}),
+ .note => return writer.print("the Win32 RC compiler would evaluate it as the ordinal/number value {d}", .{self.extra.number}),
+ .hint => return,
+ },
+ .rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal => switch (self.type) {
+ .err, .warning => return,
+ .note => return writer.print("to avoid the potential miscompilation, only specify one {s} per dialog resource", .{@tagName(self.extra.menu_or_class)}),
+ .hint => return,
+ },
+ .rc_would_miscompile_dialog_menu_id_starts_with_digit => switch (self.type) {
+ .err, .warning => return,
+ .note => return writer.writeAll("to avoid the potential miscompilation, the first character of the id should not be a digit"),
+ .hint => return,
+ },
+ .dialog_menu_id_was_uppercased => return,
+ .duplicate_menu_or_class_skipped => {
+ return writer.print("this {s} was ignored; when multiple {s} statements are specified, only the last takes precedence", .{
+ @tagName(self.extra.menu_or_class),
+ @tagName(self.extra.menu_or_class),
+ });
+ },
+ .invalid_digit_character_in_ordinal => {
+ return writer.writeAll("non-ASCII digit characters are not allowed in ordinal (number) values");
+ },
+ .rc_would_miscompile_codepoint_byte_swap => switch (self.type) {
+ .err, .warning => return writer.print("codepoint U+{X} within a string literal would be miscompiled by the Win32 RC compiler (the bytes of the UTF-16 code unit would be swapped)", .{self.extra.number}),
+ .note => return writer.print("to avoid the potential miscompilation, an integer escape sequence in a wide string literal could be used instead: L\"\\x{X}\"", .{self.extra.number}),
+ .hint => return,
+ },
+ .rc_would_miscompile_codepoint_skip => switch (self.type) {
+ .err, .warning => return writer.print("codepoint U+{X} within a string literal would be miscompiled by the Win32 RC compiler (the codepoint would be missing from the compiled resource)", .{self.extra.number}),
+ .note => return writer.print("to avoid the potential miscompilation, an integer escape sequence in a wide string literal could be used instead: L\"\\x{X}\"", .{self.extra.number}),
+ .hint => return,
+ },
+ .tab_converted_to_spaces => switch (self.type) {
+ .err, .warning => return writer.writeAll("the tab character(s) in this string will be converted into a variable number of spaces (determined by the column of the tab character in the .rc file)"),
+ .note => return writer.writeAll("to include the tab character itself in a string, the escape sequence \\t should be used"),
+ .hint => return,
+ },
+ .win32_non_ascii_ordinal => switch (self.type) {
+ .err, .warning => unreachable,
+ .note => return writer.print("the Win32 RC compiler would accept this as an ordinal but its value would be {}", .{self.extra.number}),
+ .hint => return,
+ },
+ .failed_to_open_cwd => {
+ try writer.print("failed to open CWD for compilation: {s}", .{@tagName(self.extra.file_open_error.err)});
+ },
+ }
+ }
+
+ pub const VisualTokenInfo = struct {
+ before_len: usize,
+ point_offset: usize,
+ after_len: usize,
+ };
+
+ pub fn visualTokenInfo(self: ErrorDetails, source_line_start: usize, source_line_end: usize) VisualTokenInfo {
+ // Note: A perfect solution here would involve full grapheme cluster
+ // awareness, but oh well. This will give incorrect offsets
+ // if there are any multibyte codepoints within the relevant span,
+ // and even more inflated for grapheme clusters.
+ //
+ // We mitigate this slightly when we know we'll be pointing at
+ // something that displays as 1 character.
+ return switch (self.err) {
+ // These can technically be more than 1 byte depending on encoding,
+ // but they always refer to one visual character/grapheme.
+ .illegal_byte,
+ .illegal_byte_outside_string_literals,
+ .illegal_codepoint_outside_string_literals,
+ .illegal_byte_order_mark,
+ .illegal_private_use_character,
+ => .{
+ .before_len = 0,
+ .point_offset = self.token.start - source_line_start,
+ .after_len = 0,
+ },
+ else => .{
+ .before_len = before: {
+ const start = @max(source_line_start, if (self.token_span_start) |span_start| span_start.start else self.token.start);
+ break :before self.token.start - start;
+ },
+ .point_offset = self.token.start - source_line_start,
+ .after_len = after: {
+ const end = @min(source_line_end, if (self.token_span_end) |span_end| span_end.end else self.token.end);
+ // end may be less than start when pointing to EOF
+ if (end <= self.token.start) break :after 0;
+ break :after end - self.token.start - 1;
+ },
+ },
+ };
+ }
+};
+
+pub fn renderErrorMessage(allocator: std.mem.Allocator, writer: anytype, tty_config: std.io.tty.Config, cwd: std.fs.Dir, err_details: ErrorDetails, source: []const u8, strings: []const []const u8, source_mappings: ?SourceMappings) !void {
+ if (err_details.type == .hint) return;
+
+ const source_line_start = err_details.token.getLineStartForErrorDisplay(source);
+ // Treat tab stops as 1 column wide for error display purposes,
+ // and add one to get a 1-based column
+ const column = err_details.token.calculateColumn(source, 1, source_line_start) + 1;
+
+ const corresponding_span: ?SourceMappings.CorrespondingSpan = if (source_mappings) |mappings|
+ mappings.getCorrespondingSpan(err_details.token.line_number)
+ else
+ null;
+ const corresponding_file: ?[]const u8 = if (source_mappings != null and corresponding_span != null)
+ source_mappings.?.files.get(corresponding_span.?.filename_offset)
+ else
+ null;
+
+ const err_line = if (corresponding_span) |span| span.start_line else err_details.token.line_number;
+
+ try tty_config.setColor(writer, .bold);
+ if (corresponding_file) |file| {
+ try writer.writeAll(file);
+ } else {
+ try tty_config.setColor(writer, .dim);
+ try writer.writeAll("<after preprocessor>");
+ try tty_config.setColor(writer, .reset);
+ try tty_config.setColor(writer, .bold);
+ }
+ try writer.print(":{d}:{d}: ", .{ err_line, column });
+ switch (err_details.type) {
+ .err => {
+ try tty_config.setColor(writer, .red);
+ try writer.writeAll("error: ");
+ },
+ .warning => {
+ try tty_config.setColor(writer, .yellow);
+ try writer.writeAll("warning: ");
+ },
+ .note => {
+ try tty_config.setColor(writer, .cyan);
+ try writer.writeAll("note: ");
+ },
+ .hint => unreachable,
+ }
+ try tty_config.setColor(writer, .reset);
+ try tty_config.setColor(writer, .bold);
+ try err_details.render(writer, source, strings);
+ try writer.writeByte('\n');
+ try tty_config.setColor(writer, .reset);
+
+ if (!err_details.print_source_line) {
+ try writer.writeByte('\n');
+ return;
+ }
+
+ const source_line = err_details.token.getLineForErrorDisplay(source, source_line_start);
+ const visual_info = err_details.visualTokenInfo(source_line_start, source_line_start + source_line.len);
+
+ // Need this to determine if the 'line originated from' note is worth printing
+ var source_line_for_display_buf = try std.ArrayList(u8).initCapacity(allocator, source_line.len);
+ defer source_line_for_display_buf.deinit();
+ try writeSourceSlice(source_line_for_display_buf.writer(), source_line);
+
+ // TODO: General handling of long lines, not tied to this specific error
+ if (err_details.err == .string_literal_too_long) {
+ const before_slice = source_line[0..@min(source_line.len, visual_info.point_offset + 16)];
+ try writeSourceSlice(writer, before_slice);
+ try tty_config.setColor(writer, .dim);
+ try writer.writeAll("<...truncated...>");
+ try tty_config.setColor(writer, .reset);
+ } else {
+ try writer.writeAll(source_line_for_display_buf.items);
+ }
+ try writer.writeByte('\n');
+
+ try tty_config.setColor(writer, .green);
+ const num_spaces = visual_info.point_offset - visual_info.before_len;
+ try writer.writeByteNTimes(' ', num_spaces);
+ try writer.writeByteNTimes('~', visual_info.before_len);
+ try writer.writeByte('^');
+ if (visual_info.after_len > 0) {
+ var num_squiggles = visual_info.after_len;
+ if (err_details.err == .string_literal_too_long) {
+ num_squiggles = @min(num_squiggles, 15);
+ }
+ try writer.writeByteNTimes('~', num_squiggles);
+ }
+ try writer.writeByte('\n');
+ try tty_config.setColor(writer, .reset);
+
+ if (corresponding_span != null and corresponding_file != null) {
+ var corresponding_lines = try CorrespondingLines.init(allocator, cwd, err_details, source_line_for_display_buf.items, corresponding_span.?, corresponding_file.?);
+ defer corresponding_lines.deinit(allocator);
+
+ if (!corresponding_lines.worth_printing_note) return;
+
+ try tty_config.setColor(writer, .bold);
+ if (corresponding_file) |file| {
+ try writer.writeAll(file);
+ } else {
+ try tty_config.setColor(writer, .dim);
+ try writer.writeAll("<after preprocessor>");
+ try tty_config.setColor(writer, .reset);
+ try tty_config.setColor(writer, .bold);
+ }
+ try writer.print(":{d}:{d}: ", .{ err_line, column });
+ try tty_config.setColor(writer, .cyan);
+ try writer.writeAll("note: ");
+ try tty_config.setColor(writer, .reset);
+ try tty_config.setColor(writer, .bold);
+ try writer.writeAll("this line originated from line");
+ if (corresponding_span.?.start_line != corresponding_span.?.end_line) {
+ try writer.print("s {}-{}", .{ corresponding_span.?.start_line, corresponding_span.?.end_line });
+ } else {
+ try writer.print(" {}", .{corresponding_span.?.start_line});
+ }
+ try writer.print(" of file '{s}'\n", .{corresponding_file.?});
+ try tty_config.setColor(writer, .reset);
+
+ if (!corresponding_lines.worth_printing_lines) return;
+
+ if (corresponding_lines.lines_is_error_message) {
+ try tty_config.setColor(writer, .red);
+ try writer.writeAll(" | ");
+ try tty_config.setColor(writer, .reset);
+ try tty_config.setColor(writer, .dim);
+ try writer.writeAll(corresponding_lines.lines.items);
+ try tty_config.setColor(writer, .reset);
+ try writer.writeAll("\n\n");
+ return;
+ }
+
+ try writer.writeAll(corresponding_lines.lines.items);
+ try writer.writeAll("\n\n");
+ }
+}
+
+const CorrespondingLines = struct {
+ worth_printing_note: bool = true,
+ worth_printing_lines: bool = true,
+ lines: std.ArrayListUnmanaged(u8) = .{},
+ lines_is_error_message: bool = false,
+
+ pub fn init(allocator: std.mem.Allocator, cwd: std.fs.Dir, err_details: ErrorDetails, lines_for_comparison: []const u8, corresponding_span: SourceMappings.CorrespondingSpan, corresponding_file: []const u8) !CorrespondingLines {
+ var corresponding_lines = CorrespondingLines{};
+
+ // We don't do line comparison for this error, so don't print the note if the line
+ // number is different
+ if (err_details.err == .string_literal_too_long and err_details.token.line_number == corresponding_span.start_line) {
+ corresponding_lines.worth_printing_note = false;
+ return corresponding_lines;
+ }
+
+ // Don't print the originating line for this error, we know it's really long
+ if (err_details.err == .string_literal_too_long) {
+ corresponding_lines.worth_printing_lines = false;
+ return corresponding_lines;
+ }
+
+ var writer = corresponding_lines.lines.writer(allocator);
+ if (utils.openFileNotDir(cwd, corresponding_file, .{})) |file| {
+ defer file.close();
+ var buffered_reader = std.io.bufferedReader(file.reader());
+ writeLinesFromStream(writer, buffered_reader.reader(), corresponding_span.start_line, corresponding_span.end_line) catch |err| switch (err) {
+ error.LinesNotFound => {
+ corresponding_lines.lines.clearRetainingCapacity();
+ try writer.print("unable to print line(s) from file: {s}", .{@errorName(err)});
+ corresponding_lines.lines_is_error_message = true;
+ return corresponding_lines;
+ },
+ else => |e| return e,
+ };
+ } else |err| {
+ corresponding_lines.lines.clearRetainingCapacity();
+ try writer.print("unable to print line(s) from file: {s}", .{@errorName(err)});
+ corresponding_lines.lines_is_error_message = true;
+ return corresponding_lines;
+ }
+
+ // If the lines are the same as they were before preprocessing, skip printing the note entirely
+ if (std.mem.eql(u8, lines_for_comparison, corresponding_lines.lines.items)) {
+ corresponding_lines.worth_printing_note = false;
+ }
+ return corresponding_lines;
+ }
+
+ pub fn deinit(self: *CorrespondingLines, allocator: std.mem.Allocator) void {
+ self.lines.deinit(allocator);
+ }
+};
+
+fn writeSourceSlice(writer: anytype, slice: []const u8) !void {
+ for (slice) |c| try writeSourceByte(writer, c);
+}
+
+inline fn writeSourceByte(writer: anytype, byte: u8) !void {
+ switch (byte) {
+ '\x00'...'\x08', '\x0E'...'\x1F', '\x7F' => try writer.writeAll("�"),
+ // \r is seemingly ignored by the RC compiler so skipping it when printing source lines
+ // could help avoid confusing output (e.g. RC\rDATA if printed verbatim would show up
+ // in the console as DATA but the compiler reads it as RCDATA)
+ //
+ // NOTE: This is irrelevant when using the clang preprocessor, because unpaired \r
+ // characters get converted to \n, but may become relevant if another
+ // preprocessor is used instead.
+ '\r' => {},
+ '\t', '\x0B', '\x0C' => try writer.writeByte(' '),
+ else => try writer.writeByte(byte),
+ }
+}
+
+pub fn writeLinesFromStream(writer: anytype, input: anytype, start_line: usize, end_line: usize) !void {
+ var line_num: usize = 1;
+ var last_byte: u8 = 0;
+ while (try readByteOrEof(input)) |byte| {
+ switch (byte) {
+ '\n', '\r' => {
+ if (!utils.isLineEndingPair(last_byte, byte)) {
+ if (line_num == end_line) return;
+ if (line_num >= start_line) try writeSourceByte(writer, byte);
+ line_num += 1;
+ } else {
+ // reset last_byte to a non-line ending so that
+ // consecutive CRLF pairs don't get treated as one
+ // long line ending 'pair'
+ last_byte = 0;
+ continue;
+ }
+ },
+ else => {
+ if (line_num >= start_line) try writeSourceByte(writer, byte);
+ },
+ }
+ last_byte = byte;
+ }
+ if (line_num != end_line) {
+ return error.LinesNotFound;
+ }
+}
+
+pub fn readByteOrEof(reader: anytype) !?u8 {
+ return reader.readByte() catch |err| switch (err) {
+ error.EndOfStream => return null,
+ else => |e| return e,
+ };
+}
diff --git a/src/resinator/ico.zig b/lib/compiler/resinator/ico.zig
diff --git a/lib/compiler/resinator/lang.zig b/lib/compiler/resinator/lang.zig
@@ -0,0 +1,877 @@
+const std = @import("std");
+
+/// This function is specific to how the Win32 RC command line interprets
+/// language IDs specified as integers.
+/// - Always interpreted as hexadecimal, but explicit 0x prefix is also allowed
+/// - Wraps on overflow of u16
+/// - Stops parsing on any invalid hexadecimal digits
+/// - Errors if a digit is not the first char
+/// - `-` (negative) prefix is allowed
+pub fn parseInt(str: []const u8) error{InvalidLanguageId}!u16 {
+ var result: u16 = 0;
+ const radix: u8 = 16;
+ var buf = str;
+
+ const Prefix = enum { none, minus };
+ var prefix: Prefix = .none;
+ switch (buf[0]) {
+ '-' => {
+ prefix = .minus;
+ buf = buf[1..];
+ },
+ else => {},
+ }
+
+ if (buf.len > 2 and buf[0] == '0' and buf[1] == 'x') {
+ buf = buf[2..];
+ }
+
+ for (buf, 0..) |c, i| {
+ const digit = switch (c) {
+ // On invalid digit for the radix, just stop parsing but don't fail
+ 'a'...'f', 'A'...'F', '0'...'9' => std.fmt.charToDigit(c, radix) catch break,
+ else => {
+ // First digit must be valid
+ if (i == 0) {
+ return error.InvalidLanguageId;
+ }
+ break;
+ },
+ };
+
+ if (result != 0) {
+ result *%= radix;
+ }
+ result +%= digit;
+ }
+
+ switch (prefix) {
+ .none => {},
+ .minus => result = 0 -% result,
+ }
+
+ return result;
+}
+
+test parseInt {
+ try std.testing.expectEqual(@as(u16, 0x16), try parseInt("16"));
+ try std.testing.expectEqual(@as(u16, 0x1a), try parseInt("0x1A"));
+ try std.testing.expectEqual(@as(u16, 0x1a), try parseInt("0x1Azzzz"));
+ try std.testing.expectEqual(@as(u16, 0xffff), try parseInt("-1"));
+ try std.testing.expectEqual(@as(u16, 0xffea), try parseInt("-0x16"));
+ try std.testing.expectEqual(@as(u16, 0x0), try parseInt("0o100"));
+ try std.testing.expectEqual(@as(u16, 0x1), try parseInt("10001"));
+ try std.testing.expectError(error.InvalidLanguageId, parseInt("--1"));
+ try std.testing.expectError(error.InvalidLanguageId, parseInt("0xha"));
+ try std.testing.expectError(error.InvalidLanguageId, parseInt("¹"));
+ try std.testing.expectError(error.InvalidLanguageId, parseInt("~1"));
+}
+
+/// This function is specific to how the Win32 RC command line interprets
+/// language tags: invalid tags are rejected, but tags that don't have
+/// a specific assigned ID but are otherwise valid enough will get
+/// converted to an ID of LOCALE_CUSTOM_UNSPECIFIED.
+pub fn tagToInt(tag: []const u8) error{InvalidLanguageTag}!u16 {
+ const maybe_id = try tagToId(tag);
+ if (maybe_id) |id| {
+ return @intFromEnum(id);
+ } else {
+ return LOCALE_CUSTOM_UNSPECIFIED;
+ }
+}
+
+pub fn tagToId(tag: []const u8) error{InvalidLanguageTag}!?LanguageId {
+ const parsed = try parse(tag);
+ // There are currently no language tags with assigned IDs that have
+ // multiple suffixes, so we can skip the lookup.
+ if (parsed.multiple_suffixes) return null;
+ const longest_known_tag = comptime blk: {
+ var len = 0;
+ for (@typeInfo(LanguageId).Enum.fields) |field| {
+ if (field.name.len > len) len = field.name.len;
+ }
+ break :blk len;
+ };
+ // If the tag is longer than the longest tag that has an assigned ID,
+ // then we can skip the lookup.
+ if (tag.len > longest_known_tag) return null;
+ var normalized_buf: [longest_known_tag]u8 = undefined;
+ // To allow e.g. `de-de_phoneb` to get looked up as `de-de`, we need to
+ // omit the suffix, but only if the tag contains a valid alternate sort order.
+ const tag_to_normalize = if (parsed.isSuffixValidSortOrder()) tag[0 .. tag.len - (parsed.suffix.?.len + 1)] else tag;
+ const normalized_tag = normalizeTag(tag_to_normalize, &normalized_buf);
+ return std.meta.stringToEnum(LanguageId, normalized_tag) orelse {
+ // special case for a tag that has been mapped to the same ID
+ // twice.
+ if (std.mem.eql(u8, "ff_latn_ng", normalized_tag)) {
+ return LanguageId.ff_ng;
+ }
+ return null;
+ };
+}
+
+test tagToId {
+ try std.testing.expectEqual(LanguageId.ar_ae, (try tagToId("ar-ae")).?);
+ try std.testing.expectEqual(LanguageId.ar_ae, (try tagToId("AR_AE")).?);
+ try std.testing.expectEqual(LanguageId.ff_ng, (try tagToId("ff-ng")).?);
+ // Special case
+ try std.testing.expectEqual(LanguageId.ff_ng, (try tagToId("ff-Latn-NG")).?);
+}
+
+test "exhaustive tagToId" {
+ inline for (@typeInfo(LanguageId).Enum.fields) |field| {
+ const id = tagToId(field.name) catch |err| {
+ std.debug.print("tag: {s}\n", .{field.name});
+ return err;
+ };
+ try std.testing.expectEqual(@field(LanguageId, field.name), id orelse {
+ std.debug.print("tag: {s}, got null\n", .{field.name});
+ return error.TestExpectedEqual;
+ });
+ }
+ var buf: [32]u8 = undefined;
+ inline for (valid_alternate_sorts) |parsed_sort| {
+ var fbs = std.io.fixedBufferStream(&buf);
+ const writer = fbs.writer();
+ writer.writeAll(parsed_sort.language_code) catch unreachable;
+ writer.writeAll("-") catch unreachable;
+ writer.writeAll(parsed_sort.country_code.?) catch unreachable;
+ writer.writeAll("-") catch unreachable;
+ writer.writeAll(parsed_sort.suffix.?) catch unreachable;
+ const expected_field_name = comptime field: {
+ var name_buf: [5]u8 = undefined;
+ @memcpy(name_buf[0..parsed_sort.language_code.len], parsed_sort.language_code);
+ name_buf[2] = '_';
+ @memcpy(name_buf[3..], parsed_sort.country_code.?);
+ break :field name_buf;
+ };
+ const expected = @field(LanguageId, &expected_field_name);
+ const id = tagToId(fbs.getWritten()) catch |err| {
+ std.debug.print("tag: {s}\n", .{fbs.getWritten()});
+ return err;
+ };
+ try std.testing.expectEqual(expected, id orelse {
+ std.debug.print("tag: {s}, expected: {}, got null\n", .{ fbs.getWritten(), expected });
+ return error.TestExpectedEqual;
+ });
+ }
+}
+
+fn normalizeTag(tag: []const u8, buf: []u8) []u8 {
+ std.debug.assert(buf.len >= tag.len);
+ for (tag, 0..) |c, i| {
+ if (c == '-')
+ buf[i] = '_'
+ else
+ buf[i] = std.ascii.toLower(c);
+ }
+ return buf[0..tag.len];
+}
+
+/// https://winprotocoldoc.blob.core.windows.net/productionwindowsarchives/MS-LCID/%5bMS-LCID%5d.pdf#%5B%7B%22num%22%3A72%2C%22gen%22%3A0%7D%2C%7B%22name%22%3A%22XYZ%22%7D%2C69%2C574%2C0%5D
+/// "When an LCID is requested for a locale without a
+/// permanent LCID assignment, nor a temporary
+/// assignment as above, the protocol will respond
+/// with LOCALE_CUSTOM_UNSPECIFIED for all such
+/// locales. Because this single value is used for
+/// numerous possible locale names, it is impossible to
+/// round trip this locale, even temporarily.
+/// Applications should discard this value as soon as
+/// possible and never persist it. If the system is
+/// forced to respond to a request for
+/// LCID_CUSTOM_UNSPECIFIED, it will fall back to
+/// the current user locale. This is often incorrect but
+/// may prevent an application or component from
+/// failing. As the meaning of this temporary LCID is
+/// unstable, it should never be used for interchange
+/// or persisted data. This is a 1-to-many relationship
+/// that is very unstable."
+pub const LOCALE_CUSTOM_UNSPECIFIED = 0x1000;
+
+pub const LANG_ENGLISH = 0x09;
+pub const SUBLANG_ENGLISH_US = 0x01;
+
+/// https://learn.microsoft.com/en-us/windows/win32/intl/language-identifiers
+pub fn MAKELANGID(primary: u10, sublang: u6) u16 {
+ return (@as(u16, primary) << 10) | sublang;
+}
+
+/// Language tag format expressed as a regular expression (rough approximation):
+///
+/// [a-zA-Z]{1,3}([-_][a-zA-Z]{4})?([-_][a-zA-Z]{2})?([-_][a-zA-Z0-9]{1,8})?
+/// lang | script | country | suffix
+///
+/// Notes:
+/// - If lang code is 1 char, it seems to mean that everything afterwards uses suffix
+/// parsing rules (e.g. `a-0` and `a-00000000` are allowed).
+/// - There can also be any number of trailing suffix parts as long as they each
+/// would be a valid suffix part, e.g. `en-us-blah-blah1-blah2-blah3` is allowed.
+/// - When doing lookups, trailing suffix parts are taken into account, e.g.
+/// `ca-es-valencia` is not considered equivalent to `ca-es-valencia-blah`.
+/// - A suffix is only allowed if:
+/// + Lang code is 1 char long, or
+/// + A country code is present, or
+/// + A script tag is not present and:
+/// - the suffix is numeric-only and has a length of 3, or
+/// - the lang is `qps` and the suffix is `ploca` or `plocm`
+pub fn parse(lang_tag: []const u8) error{InvalidLanguageTag}!Parsed {
+ var it = std.mem.splitAny(u8, lang_tag, "-_");
+ const lang_code = it.first();
+ const is_valid_lang_code = lang_code.len >= 1 and lang_code.len <= 3 and isAllAlphabetic(lang_code);
+ if (!is_valid_lang_code) return error.InvalidLanguageTag;
+ var parsed = Parsed{
+ .language_code = lang_code,
+ };
+ // The second part could be a script tag, a country code, or a suffix
+ if (it.next()) |part_str| {
+ // The lang code being length 1 behaves strangely, so fully special case it.
+ if (lang_code.len == 1) {
+ // This is almost certainly not the 'right' way to do this, but I don't have a method
+ // to determine how exactly these language tags are parsed, and it seems like
+ // suffix parsing rules apply generally (digits allowed, length of 1 to 8).
+ //
+ // However, because we want to be able to lookup `x-iv-mathan` normally without
+ // `multiple_suffixes` being set to true, we need to make sure to treat two-length
+ // alphabetic parts as a country code.
+ if (part_str.len == 2 and isAllAlphabetic(part_str)) {
+ parsed.country_code = part_str;
+ }
+ // Everything else, though, we can just throw into the suffix as long as the normal
+ // rules apply.
+ else if (part_str.len > 0 and part_str.len <= 8 and isAllAlphanumeric(part_str)) {
+ parsed.suffix = part_str;
+ } else {
+ return error.InvalidLanguageTag;
+ }
+ } else if (part_str.len == 4 and isAllAlphabetic(part_str)) {
+ parsed.script_tag = part_str;
+ } else if (part_str.len == 2 and isAllAlphabetic(part_str)) {
+ parsed.country_code = part_str;
+ }
+ // Only a 3-len numeric suffix is allowed as the second part of a tag
+ else if (part_str.len == 3 and isAllNumeric(part_str)) {
+ parsed.suffix = part_str;
+ }
+ // Special case for qps-ploca and qps-plocm
+ else if (std.ascii.eqlIgnoreCase(lang_code, "qps") and
+ (std.ascii.eqlIgnoreCase(part_str, "ploca") or
+ std.ascii.eqlIgnoreCase(part_str, "plocm")))
+ {
+ parsed.suffix = part_str;
+ } else {
+ return error.InvalidLanguageTag;
+ }
+ } else {
+ // If there's no part besides a 1-len lang code, then it is malformed
+ if (lang_code.len == 1) return error.InvalidLanguageTag;
+ return parsed;
+ }
+ if (parsed.script_tag != null) {
+ if (it.next()) |part_str| {
+ if (part_str.len == 2 and isAllAlphabetic(part_str)) {
+ parsed.country_code = part_str;
+ } else {
+ // Suffix is not allowed when a country code is not present.
+ return error.InvalidLanguageTag;
+ }
+ } else {
+ return parsed;
+ }
+ }
+ // We've now parsed any potential script tag/country codes, so anything remaining
+ // is a suffix
+ while (it.next()) |part_str| {
+ if (part_str.len == 0 or part_str.len > 8 or !isAllAlphanumeric(part_str)) {
+ return error.InvalidLanguageTag;
+ }
+ if (parsed.suffix == null) {
+ parsed.suffix = part_str;
+ } else {
+ // In theory we could return early here but we still want to validate
+ // that each part is a valid suffix all the way to the end, e.g.
+ // we should reject `en-us-suffix-a-b-c-!!!` because of the invalid `!!!`
+ // suffix part.
+ parsed.multiple_suffixes = true;
+ }
+ }
+ return parsed;
+}
+
+pub const Parsed = struct {
+ language_code: []const u8,
+ script_tag: ?[]const u8 = null,
+ country_code: ?[]const u8 = null,
+ /// Can be a sort order (e.g. phoneb) or something like valencia, 001, etc
+ suffix: ?[]const u8 = null,
+ /// There can be any number of suffixes, but we don't need to care what their
+ /// values are, we just need to know if any exist so that e.g. `ca-es-valencia-blah`
+ /// can be seen as different from `ca-es-valencia`. Storing this as a bool
+ /// allows us to avoid needing either (a) dynamic allocation or (b) a limit to
+ /// the number of suffixes allowed when parsing.
+ multiple_suffixes: bool = false,
+
+ pub fn isSuffixValidSortOrder(self: Parsed) bool {
+ if (self.country_code == null) return false;
+ if (self.suffix == null) return false;
+ if (self.script_tag != null) return false;
+ if (self.multiple_suffixes) return false;
+ for (valid_alternate_sorts) |valid_sort| {
+ if (std.ascii.eqlIgnoreCase(valid_sort.language_code, self.language_code) and
+ std.ascii.eqlIgnoreCase(valid_sort.country_code.?, self.country_code.?) and
+ std.ascii.eqlIgnoreCase(valid_sort.suffix.?, self.suffix.?))
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+};
+
+/// https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f
+/// See the table following this text: "Alternate sorts can be selected by using one of the identifiers from the following table."
+const valid_alternate_sorts = [_]Parsed{
+ // Note: x-IV-mathan is omitted due to how lookups are implemented.
+ // This table is used to make e.g. `de-de_phoneb` get looked up
+ // as `de-de` (the suffix is omitted for the lookup), but x-iv-mathan
+ // instead needs to be looked up with the suffix included because
+ // `x-iv` is not a tag with an assigned ID.
+ .{ .language_code = "de", .country_code = "de", .suffix = "phoneb" },
+ .{ .language_code = "hu", .country_code = "hu", .suffix = "tchncl" },
+ .{ .language_code = "ka", .country_code = "ge", .suffix = "modern" },
+ .{ .language_code = "zh", .country_code = "cn", .suffix = "stroke" },
+ .{ .language_code = "zh", .country_code = "sg", .suffix = "stroke" },
+ .{ .language_code = "zh", .country_code = "mo", .suffix = "stroke" },
+ .{ .language_code = "zh", .country_code = "tw", .suffix = "pronun" },
+ .{ .language_code = "zh", .country_code = "tw", .suffix = "radstr" },
+ .{ .language_code = "ja", .country_code = "jp", .suffix = "radstr" },
+ .{ .language_code = "zh", .country_code = "hk", .suffix = "radstr" },
+ .{ .language_code = "zh", .country_code = "mo", .suffix = "radstr" },
+ .{ .language_code = "zh", .country_code = "cn", .suffix = "phoneb" },
+ .{ .language_code = "zh", .country_code = "sg", .suffix = "phoneb" },
+};
+
+test "parse" {
+ try std.testing.expectEqualDeep(Parsed{
+ .language_code = "en",
+ }, try parse("en"));
+ try std.testing.expectEqualDeep(Parsed{
+ .language_code = "en",
+ .country_code = "us",
+ }, try parse("en-us"));
+ try std.testing.expectEqualDeep(Parsed{
+ .language_code = "en",
+ .suffix = "123",
+ }, try parse("en-123"));
+ try std.testing.expectEqualDeep(Parsed{
+ .language_code = "en",
+ .suffix = "123",
+ .multiple_suffixes = true,
+ }, try parse("en-123-blah"));
+ try std.testing.expectEqualDeep(Parsed{
+ .language_code = "en",
+ .country_code = "us",
+ .suffix = "123",
+ .multiple_suffixes = true,
+ }, try parse("en-us_123-blah"));
+ try std.testing.expectEqualDeep(Parsed{
+ .language_code = "eng",
+ .script_tag = "Latn",
+ }, try parse("eng-Latn"));
+ try std.testing.expectEqualDeep(Parsed{
+ .language_code = "eng",
+ .script_tag = "Latn",
+ }, try parse("eng-Latn"));
+ try std.testing.expectEqualDeep(Parsed{
+ .language_code = "ff",
+ .script_tag = "Latn",
+ .country_code = "NG",
+ }, try parse("ff-Latn-NG"));
+ try std.testing.expectEqualDeep(Parsed{
+ .language_code = "qps",
+ .suffix = "Plocm",
+ }, try parse("qps-Plocm"));
+ try std.testing.expectEqualDeep(Parsed{
+ .language_code = "qps",
+ .suffix = "ploca",
+ }, try parse("qps-ploca"));
+ try std.testing.expectEqualDeep(Parsed{
+ .language_code = "x",
+ .country_code = "IV",
+ .suffix = "mathan",
+ }, try parse("x-IV-mathan"));
+ try std.testing.expectEqualDeep(Parsed{
+ .language_code = "a",
+ .suffix = "a",
+ }, try parse("a-a"));
+ try std.testing.expectEqualDeep(Parsed{
+ .language_code = "a",
+ .suffix = "000",
+ }, try parse("a-000"));
+ try std.testing.expectEqualDeep(Parsed{
+ .language_code = "a",
+ .suffix = "00000000",
+ }, try parse("a-00000000"));
+ // suffix not allowed if script tag is present without country code
+ try std.testing.expectError(error.InvalidLanguageTag, parse("eng-Latn-suffix"));
+ // suffix must be 3 numeric digits if neither script tag nor country code is present
+ try std.testing.expectError(error.InvalidLanguageTag, parse("eng-suffix"));
+ try std.testing.expectError(error.InvalidLanguageTag, parse("en-plocm"));
+ // 1-len lang code is not allowed if it's the only part
+ try std.testing.expectError(error.InvalidLanguageTag, parse("e"));
+}
+
+fn isAllAlphabetic(str: []const u8) bool {
+ for (str) |c| {
+ if (!std.ascii.isAlphabetic(c)) return false;
+ }
+ return true;
+}
+
+fn isAllAlphanumeric(str: []const u8) bool {
+ for (str) |c| {
+ if (!std.ascii.isAlphanumeric(c)) return false;
+ }
+ return true;
+}
+
+fn isAllNumeric(str: []const u8) bool {
+ for (str) |c| {
+ if (!std.ascii.isDigit(c)) return false;
+ }
+ return true;
+}
+
+/// Derived from https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f
+/// - Protocol Revision: 15.0
+/// - Language / Language ID / Language Tag table in Appendix A
+/// - Removed all rows that have Language ID 0x1000 (LOCALE_CUSTOM_UNSPECIFIED)
+/// - Normalized each language tag (lowercased, replaced all `-` with `_`)
+/// - There is one special case where two tags are mapped to the same ID, the following
+/// has been omitted and must be special cased during lookup to map to the ID ff_ng / 0x0467.
+/// ff_latn_ng = 0x0467, // Fulah (Latin), Nigeria
+/// - x_iv_mathan has been added which is not in the table but does appear in the Alternate sorts
+/// table as 0x007F (LANG_INVARIANT).
+pub const LanguageId = enum(u16) {
+ // Language tag = Language ID, // Language, Location (or type)
+ af = 0x0036, // Afrikaans
+ af_za = 0x0436, // Afrikaans, South Africa
+ sq = 0x001C, // Albanian
+ sq_al = 0x041C, // Albanian, Albania
+ gsw = 0x0084, // Alsatian
+ gsw_fr = 0x0484, // Alsatian, France
+ am = 0x005E, // Amharic
+ am_et = 0x045E, // Amharic, Ethiopia
+ ar = 0x0001, // Arabic
+ ar_dz = 0x1401, // Arabic, Algeria
+ ar_bh = 0x3C01, // Arabic, Bahrain
+ ar_eg = 0x0c01, // Arabic, Egypt
+ ar_iq = 0x0801, // Arabic, Iraq
+ ar_jo = 0x2C01, // Arabic, Jordan
+ ar_kw = 0x3401, // Arabic, Kuwait
+ ar_lb = 0x3001, // Arabic, Lebanon
+ ar_ly = 0x1001, // Arabic, Libya
+ ar_ma = 0x1801, // Arabic, Morocco
+ ar_om = 0x2001, // Arabic, Oman
+ ar_qa = 0x4001, // Arabic, Qatar
+ ar_sa = 0x0401, // Arabic, Saudi Arabia
+ ar_sy = 0x2801, // Arabic, Syria
+ ar_tn = 0x1C01, // Arabic, Tunisia
+ ar_ae = 0x3801, // Arabic, U.A.E.
+ ar_ye = 0x2401, // Arabic, Yemen
+ hy = 0x002B, // Armenian
+ hy_am = 0x042B, // Armenian, Armenia
+ as = 0x004D, // Assamese
+ as_in = 0x044D, // Assamese, India
+ az_cyrl = 0x742C, // Azerbaijani (Cyrillic)
+ az_cyrl_az = 0x082C, // Azerbaijani (Cyrillic), Azerbaijan
+ az = 0x002C, // Azerbaijani (Latin)
+ az_latn = 0x782C, // Azerbaijani (Latin)
+ az_latn_az = 0x042C, // Azerbaijani (Latin), Azerbaijan
+ bn = 0x0045, // Bangla
+ bn_bd = 0x0845, // Bangla, Bangladesh
+ bn_in = 0x0445, // Bangla, India
+ ba = 0x006D, // Bashkir
+ ba_ru = 0x046D, // Bashkir, Russia
+ eu = 0x002D, // Basque
+ eu_es = 0x042D, // Basque, Spain
+ be = 0x0023, // Belarusian
+ be_by = 0x0423, // Belarusian, Belarus
+ bs_cyrl = 0x641A, // Bosnian (Cyrillic)
+ bs_cyrl_ba = 0x201A, // Bosnian (Cyrillic), Bosnia and Herzegovina
+ bs_latn = 0x681A, // Bosnian (Latin)
+ bs = 0x781A, // Bosnian (Latin)
+ bs_latn_ba = 0x141A, // Bosnian (Latin), Bosnia and Herzegovina
+ br = 0x007E, // Breton
+ br_fr = 0x047E, // Breton, France
+ bg = 0x0002, // Bulgarian
+ bg_bg = 0x0402, // Bulgarian, Bulgaria
+ my = 0x0055, // Burmese
+ my_mm = 0x0455, // Burmese, Myanmar
+ ca = 0x0003, // Catalan
+ ca_es = 0x0403, // Catalan, Spain
+ tzm_arab_ma = 0x045F, // Central Atlas Tamazight (Arabic), Morocco
+ ku = 0x0092, // Central Kurdish
+ ku_arab = 0x7c92, // Central Kurdish
+ ku_arab_iq = 0x0492, // Central Kurdish, Iraq
+ chr = 0x005C, // Cherokee
+ chr_cher = 0x7c5C, // Cherokee
+ chr_cher_us = 0x045C, // Cherokee, United States
+ zh_hans = 0x0004, // Chinese (Simplified)
+ zh = 0x7804, // Chinese (Simplified)
+ zh_cn = 0x0804, // Chinese (Simplified), People's Republic of China
+ zh_sg = 0x1004, // Chinese (Simplified), Singapore
+ zh_hant = 0x7C04, // Chinese (Traditional)
+ zh_hk = 0x0C04, // Chinese (Traditional), Hong Kong S.A.R.
+ zh_mo = 0x1404, // Chinese (Traditional), Macao S.A.R.
+ zh_tw = 0x0404, // Chinese (Traditional), Taiwan
+ co = 0x0083, // Corsican
+ co_fr = 0x0483, // Corsican, France
+ hr = 0x001A, // Croatian
+ hr_hr = 0x041A, // Croatian, Croatia
+ hr_ba = 0x101A, // Croatian (Latin), Bosnia and Herzegovina
+ cs = 0x0005, // Czech
+ cs_cz = 0x0405, // Czech, Czech Republic
+ da = 0x0006, // Danish
+ da_dk = 0x0406, // Danish, Denmark
+ prs = 0x008C, // Dari
+ prs_af = 0x048C, // Dari, Afghanistan
+ dv = 0x0065, // Divehi
+ dv_mv = 0x0465, // Divehi, Maldives
+ nl = 0x0013, // Dutch
+ nl_be = 0x0813, // Dutch, Belgium
+ nl_nl = 0x0413, // Dutch, Netherlands
+ dz_bt = 0x0C51, // Dzongkha, Bhutan
+ en = 0x0009, // English
+ en_au = 0x0C09, // English, Australia
+ en_bz = 0x2809, // English, Belize
+ en_ca = 0x1009, // English, Canada
+ en_029 = 0x2409, // English, Caribbean
+ en_hk = 0x3C09, // English, Hong Kong
+ en_in = 0x4009, // English, India
+ en_ie = 0x1809, // English, Ireland
+ en_jm = 0x2009, // English, Jamaica
+ en_my = 0x4409, // English, Malaysia
+ en_nz = 0x1409, // English, New Zealand
+ en_ph = 0x3409, // English, Republic of the Philippines
+ en_sg = 0x4809, // English, Singapore
+ en_za = 0x1C09, // English, South Africa
+ en_tt = 0x2c09, // English, Trinidad and Tobago
+ en_ae = 0x4C09, // English, United Arab Emirates
+ en_gb = 0x0809, // English, United Kingdom
+ en_us = 0x0409, // English, United States
+ en_zw = 0x3009, // English, Zimbabwe
+ et = 0x0025, // Estonian
+ et_ee = 0x0425, // Estonian, Estonia
+ fo = 0x0038, // Faroese
+ fo_fo = 0x0438, // Faroese, Faroe Islands
+ fil = 0x0064, // Filipino
+ fil_ph = 0x0464, // Filipino, Philippines
+ fi = 0x000B, // Finnish
+ fi_fi = 0x040B, // Finnish, Finland
+ fr = 0x000C, // French
+ fr_be = 0x080C, // French, Belgium
+ fr_cm = 0x2c0C, // French, Cameroon
+ fr_ca = 0x0c0C, // French, Canada
+ fr_029 = 0x1C0C, // French, Caribbean
+ fr_cd = 0x240C, // French, Congo, DRC
+ fr_ci = 0x300C, // French, Côte d'Ivoire
+ fr_fr = 0x040C, // French, France
+ fr_ht = 0x3c0C, // French, Haiti
+ fr_lu = 0x140C, // French, Luxembourg
+ fr_ml = 0x340C, // French, Mali
+ fr_ma = 0x380C, // French, Morocco
+ fr_mc = 0x180C, // French, Principality of Monaco
+ fr_re = 0x200C, // French, Reunion
+ fr_sn = 0x280C, // French, Senegal
+ fr_ch = 0x100C, // French, Switzerland
+ fy = 0x0062, // Frisian
+ fy_nl = 0x0462, // Frisian, Netherlands
+ ff = 0x0067, // Fulah
+ ff_latn = 0x7C67, // Fulah (Latin)
+ ff_ng = 0x0467, // Fulah, Nigeria
+ ff_latn_sn = 0x0867, // Fulah, Senegal
+ gl = 0x0056, // Galician
+ gl_es = 0x0456, // Galician, Spain
+ ka = 0x0037, // Georgian
+ ka_ge = 0x0437, // Georgian, Georgia
+ de = 0x0007, // German
+ de_at = 0x0C07, // German, Austria
+ de_de = 0x0407, // German, Germany
+ de_li = 0x1407, // German, Liechtenstein
+ de_lu = 0x1007, // German, Luxembourg
+ de_ch = 0x0807, // German, Switzerland
+ el = 0x0008, // Greek
+ el_gr = 0x0408, // Greek, Greece
+ kl = 0x006F, // Greenlandic
+ kl_gl = 0x046F, // Greenlandic, Greenland
+ gn = 0x0074, // Guarani
+ gn_py = 0x0474, // Guarani, Paraguay
+ gu = 0x0047, // Gujarati
+ gu_in = 0x0447, // Gujarati, India
+ ha = 0x0068, // Hausa (Latin)
+ ha_latn = 0x7C68, // Hausa (Latin)
+ ha_latn_ng = 0x0468, // Hausa (Latin), Nigeria
+ haw = 0x0075, // Hawaiian
+ haw_us = 0x0475, // Hawaiian, United States
+ he = 0x000D, // Hebrew
+ he_il = 0x040D, // Hebrew, Israel
+ hi = 0x0039, // Hindi
+ hi_in = 0x0439, // Hindi, India
+ hu = 0x000E, // Hungarian
+ hu_hu = 0x040E, // Hungarian, Hungary
+ is = 0x000F, // Icelandic
+ is_is = 0x040F, // Icelandic, Iceland
+ ig = 0x0070, // Igbo
+ ig_ng = 0x0470, // Igbo, Nigeria
+ id = 0x0021, // Indonesian
+ id_id = 0x0421, // Indonesian, Indonesia
+ iu = 0x005D, // Inuktitut (Latin)
+ iu_latn = 0x7C5D, // Inuktitut (Latin)
+ iu_latn_ca = 0x085D, // Inuktitut (Latin), Canada
+ iu_cans = 0x785D, // Inuktitut (Syllabics)
+ iu_cans_ca = 0x045d, // Inuktitut (Syllabics), Canada
+ ga = 0x003C, // Irish
+ ga_ie = 0x083C, // Irish, Ireland
+ it = 0x0010, // Italian
+ it_it = 0x0410, // Italian, Italy
+ it_ch = 0x0810, // Italian, Switzerland
+ ja = 0x0011, // Japanese
+ ja_jp = 0x0411, // Japanese, Japan
+ kn = 0x004B, // Kannada
+ kn_in = 0x044B, // Kannada, India
+ kr_latn_ng = 0x0471, // Kanuri (Latin), Nigeria
+ ks = 0x0060, // Kashmiri
+ ks_arab = 0x0460, // Kashmiri, Perso-Arabic
+ ks_deva_in = 0x0860, // Kashmiri (Devanagari), India
+ kk = 0x003F, // Kazakh
+ kk_kz = 0x043F, // Kazakh, Kazakhstan
+ km = 0x0053, // Khmer
+ km_kh = 0x0453, // Khmer, Cambodia
+ quc = 0x0086, // K'iche
+ quc_latn_gt = 0x0486, // K'iche, Guatemala
+ rw = 0x0087, // Kinyarwanda
+ rw_rw = 0x0487, // Kinyarwanda, Rwanda
+ sw = 0x0041, // Kiswahili
+ sw_ke = 0x0441, // Kiswahili, Kenya
+ kok = 0x0057, // Konkani
+ kok_in = 0x0457, // Konkani, India
+ ko = 0x0012, // Korean
+ ko_kr = 0x0412, // Korean, Korea
+ ky = 0x0040, // Kyrgyz
+ ky_kg = 0x0440, // Kyrgyz, Kyrgyzstan
+ lo = 0x0054, // Lao
+ lo_la = 0x0454, // Lao, Lao P.D.R.
+ la_va = 0x0476, // Latin, Vatican City
+ lv = 0x0026, // Latvian
+ lv_lv = 0x0426, // Latvian, Latvia
+ lt = 0x0027, // Lithuanian
+ lt_lt = 0x0427, // Lithuanian, Lithuania
+ dsb = 0x7C2E, // Lower Sorbian
+ dsb_de = 0x082E, // Lower Sorbian, Germany
+ lb = 0x006E, // Luxembourgish
+ lb_lu = 0x046E, // Luxembourgish, Luxembourg
+ mk = 0x002F, // Macedonian
+ mk_mk = 0x042F, // Macedonian, North Macedonia
+ ms = 0x003E, // Malay
+ ms_bn = 0x083E, // Malay, Brunei Darussalam
+ ms_my = 0x043E, // Malay, Malaysia
+ ml = 0x004C, // Malayalam
+ ml_in = 0x044C, // Malayalam, India
+ mt = 0x003A, // Maltese
+ mt_mt = 0x043A, // Maltese, Malta
+ mi = 0x0081, // Maori
+ mi_nz = 0x0481, // Maori, New Zealand
+ arn = 0x007A, // Mapudungun
+ arn_cl = 0x047A, // Mapudungun, Chile
+ mr = 0x004E, // Marathi
+ mr_in = 0x044E, // Marathi, India
+ moh = 0x007C, // Mohawk
+ moh_ca = 0x047C, // Mohawk, Canada
+ mn = 0x0050, // Mongolian (Cyrillic)
+ mn_cyrl = 0x7850, // Mongolian (Cyrillic)
+ mn_mn = 0x0450, // Mongolian (Cyrillic), Mongolia
+ mn_mong = 0x7C50, // Mongolian (Traditional Mongolian)
+ mn_mong_cn = 0x0850, // Mongolian (Traditional Mongolian), People's Republic of China
+ mn_mong_mn = 0x0C50, // Mongolian (Traditional Mongolian), Mongolia
+ ne = 0x0061, // Nepali
+ ne_in = 0x0861, // Nepali, India
+ ne_np = 0x0461, // Nepali, Nepal
+ no = 0x0014, // Norwegian (Bokmal)
+ nb = 0x7C14, // Norwegian (Bokmal)
+ nb_no = 0x0414, // Norwegian (Bokmal), Norway
+ nn = 0x7814, // Norwegian (Nynorsk)
+ nn_no = 0x0814, // Norwegian (Nynorsk), Norway
+ oc = 0x0082, // Occitan
+ oc_fr = 0x0482, // Occitan, France
+ @"or" = 0x0048, // Odia
+ or_in = 0x0448, // Odia, India
+ om = 0x0072, // Oromo
+ om_et = 0x0472, // Oromo, Ethiopia
+ ps = 0x0063, // Pashto
+ ps_af = 0x0463, // Pashto, Afghanistan
+ fa = 0x0029, // Persian
+ fa_ir = 0x0429, // Persian, Iran
+ pl = 0x0015, // Polish
+ pl_pl = 0x0415, // Polish, Poland
+ pt = 0x0016, // Portuguese
+ pt_br = 0x0416, // Portuguese, Brazil
+ pt_pt = 0x0816, // Portuguese, Portugal
+ qps_ploca = 0x05FE, // Pseudo Language, Pseudo locale for east Asian/complex script localization testing
+ qps_ploc = 0x0501, // Pseudo Language, Pseudo locale used for localization testing
+ qps_plocm = 0x09FF, // Pseudo Language, Pseudo locale used for localization testing of mirrored locales
+ pa = 0x0046, // Punjabi
+ pa_arab = 0x7C46, // Punjabi
+ pa_in = 0x0446, // Punjabi, India
+ pa_arab_pk = 0x0846, // Punjabi, Islamic Republic of Pakistan
+ quz = 0x006B, // Quechua
+ quz_bo = 0x046B, // Quechua, Bolivia
+ quz_ec = 0x086B, // Quechua, Ecuador
+ quz_pe = 0x0C6B, // Quechua, Peru
+ ro = 0x0018, // Romanian
+ ro_md = 0x0818, // Romanian, Moldova
+ ro_ro = 0x0418, // Romanian, Romania
+ rm = 0x0017, // Romansh
+ rm_ch = 0x0417, // Romansh, Switzerland
+ ru = 0x0019, // Russian
+ ru_md = 0x0819, // Russian, Moldova
+ ru_ru = 0x0419, // Russian, Russia
+ sah = 0x0085, // Sakha
+ sah_ru = 0x0485, // Sakha, Russia
+ smn = 0x703B, // Sami (Inari)
+ smn_fi = 0x243B, // Sami (Inari), Finland
+ smj = 0x7C3B, // Sami (Lule)
+ smj_no = 0x103B, // Sami (Lule), Norway
+ smj_se = 0x143B, // Sami (Lule), Sweden
+ se = 0x003B, // Sami (Northern)
+ se_fi = 0x0C3B, // Sami (Northern), Finland
+ se_no = 0x043B, // Sami (Northern), Norway
+ se_se = 0x083B, // Sami (Northern), Sweden
+ sms = 0x743B, // Sami (Skolt)
+ sms_fi = 0x203B, // Sami (Skolt), Finland
+ sma = 0x783B, // Sami (Southern)
+ sma_no = 0x183B, // Sami (Southern), Norway
+ sma_se = 0x1C3B, // Sami (Southern), Sweden
+ sa = 0x004F, // Sanskrit
+ sa_in = 0x044F, // Sanskrit, India
+ gd = 0x0091, // Scottish Gaelic
+ gd_gb = 0x0491, // Scottish Gaelic, United Kingdom
+ sr_cyrl = 0x6C1A, // Serbian (Cyrillic)
+ sr_cyrl_ba = 0x1C1A, // Serbian (Cyrillic), Bosnia and Herzegovina
+ sr_cyrl_me = 0x301A, // Serbian (Cyrillic), Montenegro
+ sr_cyrl_rs = 0x281A, // Serbian (Cyrillic), Serbia
+ sr_cyrl_cs = 0x0C1A, // Serbian (Cyrillic), Serbia and Montenegro (Former)
+ sr_latn = 0x701A, // Serbian (Latin)
+ sr = 0x7C1A, // Serbian (Latin)
+ sr_latn_ba = 0x181A, // Serbian (Latin), Bosnia and Herzegovina
+ sr_latn_me = 0x2c1A, // Serbian (Latin), Montenegro
+ sr_latn_rs = 0x241A, // Serbian (Latin), Serbia
+ sr_latn_cs = 0x081A, // Serbian (Latin), Serbia and Montenegro (Former)
+ nso = 0x006C, // Sesotho sa Leboa
+ nso_za = 0x046C, // Sesotho sa Leboa, South Africa
+ tn = 0x0032, // Setswana
+ tn_bw = 0x0832, // Setswana, Botswana
+ tn_za = 0x0432, // Setswana, South Africa
+ sd = 0x0059, // Sindhi
+ sd_arab = 0x7C59, // Sindhi
+ sd_arab_pk = 0x0859, // Sindhi, Islamic Republic of Pakistan
+ si = 0x005B, // Sinhala
+ si_lk = 0x045B, // Sinhala, Sri Lanka
+ sk = 0x001B, // Slovak
+ sk_sk = 0x041B, // Slovak, Slovakia
+ sl = 0x0024, // Slovenian
+ sl_si = 0x0424, // Slovenian, Slovenia
+ so = 0x0077, // Somali
+ so_so = 0x0477, // Somali, Somalia
+ st = 0x0030, // Sotho
+ st_za = 0x0430, // Sotho, South Africa
+ es = 0x000A, // Spanish
+ es_ar = 0x2C0A, // Spanish, Argentina
+ es_ve = 0x200A, // Spanish, Bolivarian Republic of Venezuela
+ es_bo = 0x400A, // Spanish, Bolivia
+ es_cl = 0x340A, // Spanish, Chile
+ es_co = 0x240A, // Spanish, Colombia
+ es_cr = 0x140A, // Spanish, Costa Rica
+ es_cu = 0x5c0A, // Spanish, Cuba
+ es_do = 0x1c0A, // Spanish, Dominican Republic
+ es_ec = 0x300A, // Spanish, Ecuador
+ es_sv = 0x440A, // Spanish, El Salvador
+ es_gt = 0x100A, // Spanish, Guatemala
+ es_hn = 0x480A, // Spanish, Honduras
+ es_419 = 0x580A, // Spanish, Latin America
+ es_mx = 0x080A, // Spanish, Mexico
+ es_ni = 0x4C0A, // Spanish, Nicaragua
+ es_pa = 0x180A, // Spanish, Panama
+ es_py = 0x3C0A, // Spanish, Paraguay
+ es_pe = 0x280A, // Spanish, Peru
+ es_pr = 0x500A, // Spanish, Puerto Rico
+ es_es_tradnl = 0x040A, // Spanish, Spain
+ es_es = 0x0c0A, // Spanish, Spain
+ es_us = 0x540A, // Spanish, United States
+ es_uy = 0x380A, // Spanish, Uruguay
+ sv = 0x001D, // Swedish
+ sv_fi = 0x081D, // Swedish, Finland
+ sv_se = 0x041D, // Swedish, Sweden
+ syr = 0x005A, // Syriac
+ syr_sy = 0x045A, // Syriac, Syria
+ tg = 0x0028, // Tajik (Cyrillic)
+ tg_cyrl = 0x7C28, // Tajik (Cyrillic)
+ tg_cyrl_tj = 0x0428, // Tajik (Cyrillic), Tajikistan
+ tzm = 0x005F, // Tamazight (Latin)
+ tzm_latn = 0x7C5F, // Tamazight (Latin)
+ tzm_latn_dz = 0x085F, // Tamazight (Latin), Algeria
+ ta = 0x0049, // Tamil
+ ta_in = 0x0449, // Tamil, India
+ ta_lk = 0x0849, // Tamil, Sri Lanka
+ tt = 0x0044, // Tatar
+ tt_ru = 0x0444, // Tatar, Russia
+ te = 0x004A, // Telugu
+ te_in = 0x044A, // Telugu, India
+ th = 0x001E, // Thai
+ th_th = 0x041E, // Thai, Thailand
+ bo = 0x0051, // Tibetan
+ bo_cn = 0x0451, // Tibetan, People's Republic of China
+ ti = 0x0073, // Tigrinya
+ ti_er = 0x0873, // Tigrinya, Eritrea
+ ti_et = 0x0473, // Tigrinya, Ethiopia
+ ts = 0x0031, // Tsonga
+ ts_za = 0x0431, // Tsonga, South Africa
+ tr = 0x001F, // Turkish
+ tr_tr = 0x041F, // Turkish, Turkey
+ tk = 0x0042, // Turkmen
+ tk_tm = 0x0442, // Turkmen, Turkmenistan
+ uk = 0x0022, // Ukrainian
+ uk_ua = 0x0422, // Ukrainian, Ukraine
+ hsb = 0x002E, // Upper Sorbian
+ hsb_de = 0x042E, // Upper Sorbian, Germany
+ ur = 0x0020, // Urdu
+ ur_in = 0x0820, // Urdu, India
+ ur_pk = 0x0420, // Urdu, Islamic Republic of Pakistan
+ ug = 0x0080, // Uyghur
+ ug_cn = 0x0480, // Uyghur, People's Republic of China
+ uz_cyrl = 0x7843, // Uzbek (Cyrillic)
+ uz_cyrl_uz = 0x0843, // Uzbek (Cyrillic), Uzbekistan
+ uz = 0x0043, // Uzbek (Latin)
+ uz_latn = 0x7C43, // Uzbek (Latin)
+ uz_latn_uz = 0x0443, // Uzbek (Latin), Uzbekistan
+ ca_es_valencia = 0x0803, // Valencian, Spain
+ ve = 0x0033, // Venda
+ ve_za = 0x0433, // Venda, South Africa
+ vi = 0x002A, // Vietnamese
+ vi_vn = 0x042A, // Vietnamese, Vietnam
+ cy = 0x0052, // Welsh
+ cy_gb = 0x0452, // Welsh, United Kingdom
+ wo = 0x0088, // Wolof
+ wo_sn = 0x0488, // Wolof, Senegal
+ xh = 0x0034, // Xhosa
+ xh_za = 0x0434, // Xhosa, South Africa
+ ii = 0x0078, // Yi
+ ii_cn = 0x0478, // Yi, People's Republic of China
+ yi_001 = 0x043D, // Yiddish, World
+ yo = 0x006A, // Yoruba
+ yo_ng = 0x046A, // Yoruba, Nigeria
+ zu = 0x0035, // Zulu
+ zu_za = 0x0435, // Zulu, South Africa
+
+ /// Special case
+ x_iv_mathan = 0x007F, // LANG_INVARIANT, "math alphanumeric sorting"
+};
diff --git a/lib/compiler/resinator/lex.zig b/lib/compiler/resinator/lex.zig
@@ -0,0 +1,1106 @@
+//! Expects to be run after the C preprocessor and after `removeComments`.
+//! This means that the lexer assumes that:
+//! - Splices ('\' at the end of a line) have been handled/collapsed.
+//! - Preprocessor directives and macros have been expanded (any remaining should be skipped with the exception of `#pragma code_page`).
+//! - All comments have been removed.
+
+const std = @import("std");
+const ErrorDetails = @import("errors.zig").ErrorDetails;
+const columnWidth = @import("literals.zig").columnWidth;
+const code_pages = @import("code_pages.zig");
+const CodePage = code_pages.CodePage;
+const SourceMappings = @import("source_mapping.zig").SourceMappings;
+const isNonAsciiDigit = @import("utils.zig").isNonAsciiDigit;
+
+const dumpTokensDuringTests = false;
+
+pub const default_max_string_literal_codepoints = 4097;
+
+pub const Token = struct {
+ id: Id,
+ start: usize,
+ end: usize,
+ line_number: usize,
+
+ pub const Id = enum {
+ literal,
+ number,
+ quoted_ascii_string,
+ quoted_wide_string,
+ operator,
+ begin,
+ end,
+ comma,
+ open_paren,
+ close_paren,
+ /// This Id is only used for errors, the Lexer will never return one
+ /// of these from a `next` call.
+ preprocessor_command,
+ invalid,
+ eof,
+
+ pub fn nameForErrorDisplay(self: Id) []const u8 {
+ return switch (self) {
+ .literal => "<literal>",
+ .number => "<number>",
+ .quoted_ascii_string => "<quoted ascii string>",
+ .quoted_wide_string => "<quoted wide string>",
+ .operator => "<operator>",
+ .begin => "<'{' or BEGIN>",
+ .end => "<'}' or END>",
+ .comma => ",",
+ .open_paren => "(",
+ .close_paren => ")",
+ .preprocessor_command => "<preprocessor command>",
+ .invalid => unreachable,
+ .eof => "<eof>",
+ };
+ }
+ };
+
+ pub fn slice(self: Token, buffer: []const u8) []const u8 {
+ return buffer[self.start..self.end];
+ }
+
+ pub fn nameForErrorDisplay(self: Token, buffer: []const u8) []const u8 {
+ return switch (self.id) {
+ .eof => self.id.nameForErrorDisplay(),
+ else => self.slice(buffer),
+ };
+ }
+
+ /// Returns 0-based column
+ pub fn calculateColumn(token: Token, source: []const u8, tab_columns: usize, maybe_line_start: ?usize) usize {
+ const line_start = maybe_line_start orelse token.getLineStartForColumnCalc(source);
+
+ var i: usize = line_start;
+ var column: usize = 0;
+ while (i < token.start) : (i += 1) {
+ column += columnWidth(column, source[i], tab_columns);
+ }
+ return column;
+ }
+
+ // TODO: More testing is needed to determine if this can be merged with getLineStartForErrorDisplay
+ // (the TODO in currentIndexFormsLineEndingPair should be taken into account as well)
+ pub fn getLineStartForColumnCalc(token: Token, source: []const u8) usize {
+ const line_start = line_start: {
+ if (token.start != 0) {
+ // start checking at the byte before the token
+ var index = token.start - 1;
+ while (true) {
+ if (source[index] == '\n') break :line_start @min(source.len - 1, index + 1);
+ if (index != 0) index -= 1 else break;
+ }
+ }
+ break :line_start 0;
+ };
+ return line_start;
+ }
+
+ pub fn getLineStartForErrorDisplay(token: Token, source: []const u8) usize {
+ const line_start = line_start: {
+ if (token.start != 0) {
+ // start checking at the byte before the token
+ var index = token.start - 1;
+ while (true) {
+ if (source[index] == '\r' or source[index] == '\n') break :line_start @min(source.len - 1, index + 1);
+ if (index != 0) index -= 1 else break;
+ }
+ }
+ break :line_start 0;
+ };
+ return line_start;
+ }
+
+ pub fn getLineForErrorDisplay(token: Token, source: []const u8, maybe_line_start: ?usize) []const u8 {
+ const line_start = maybe_line_start orelse token.getLineStartForErrorDisplay(source);
+
+ var line_end = line_start;
+ while (line_end < source.len and source[line_end] != '\r' and source[line_end] != '\n') : (line_end += 1) {}
+ return source[line_start..line_end];
+ }
+
+ pub fn isStringLiteral(token: Token) bool {
+ return token.id == .quoted_ascii_string or token.id == .quoted_wide_string;
+ }
+};
+
+pub const LineHandler = struct {
+ line_number: usize = 1,
+ buffer: []const u8,
+ last_line_ending_index: ?usize = null,
+
+ /// Like incrementLineNumber but checks that the current char is a line ending first.
+ /// Returns the new line number if it was incremented, null otherwise.
+ pub fn maybeIncrementLineNumber(self: *LineHandler, cur_index: usize) ?usize {
+ const c = self.buffer[cur_index];
+ if (c == '\r' or c == '\n') {
+ return self.incrementLineNumber(cur_index);
+ }
+ return null;
+ }
+
+ /// Increments line_number appropriately (handling line ending pairs)
+ /// and returns the new line number if it was incremented, or null otherwise.
+ pub fn incrementLineNumber(self: *LineHandler, cur_index: usize) ?usize {
+ if (self.currentIndexFormsLineEndingPair(cur_index)) {
+ self.last_line_ending_index = null;
+ return null;
+ } else {
+ self.line_number += 1;
+ self.last_line_ending_index = cur_index;
+ return self.line_number;
+ }
+ }
+
+ /// \r\n and \n\r pairs are treated as a single line ending (but not \r\r \n\n)
+ /// expects self.index and last_line_ending_index (if non-null) to contain line endings
+ ///
+ /// TODO: This is not really how the Win32 RC compiler handles line endings. Instead, it
+ /// seems to drop all carriage returns during preprocessing and then replace all
+ /// remaining line endings with well-formed CRLF pairs (e.g. `<CR>a<CR>b<LF>c` becomes `ab<CR><LF>c`).
+ /// Handling this the same as the Win32 RC compiler would need control over the preprocessor,
+ /// since Clang converts unpaired <CR> into unpaired <LF>.
+ pub fn currentIndexFormsLineEndingPair(self: *const LineHandler, cur_index: usize) bool {
+ if (self.last_line_ending_index == null) return false;
+
+ // must immediately precede the current index, we know cur_index must
+ // be >= 1 since last_line_ending_index is non-null (so if the subtraction
+ // overflows it is a bug at the callsite of this function).
+ if (self.last_line_ending_index.? != cur_index - 1) return false;
+
+ const cur_line_ending = self.buffer[cur_index];
+ const last_line_ending = self.buffer[self.last_line_ending_index.?];
+
+ // sanity check
+ std.debug.assert(cur_line_ending == '\r' or cur_line_ending == '\n');
+ std.debug.assert(last_line_ending == '\r' or last_line_ending == '\n');
+
+ // can't be \n\n or \r\r
+ if (last_line_ending == cur_line_ending) return false;
+
+ return true;
+ }
+};
+
+pub const LexError = error{
+ UnfinishedStringLiteral,
+ StringLiteralTooLong,
+ InvalidNumberWithExponent,
+ InvalidDigitCharacterInNumberLiteral,
+ IllegalByte,
+ IllegalByteOutsideStringLiterals,
+ IllegalCodepointOutsideStringLiterals,
+ IllegalByteOrderMark,
+ IllegalPrivateUseCharacter,
+ FoundCStyleEscapedQuote,
+ CodePagePragmaMissingLeftParen,
+ CodePagePragmaMissingRightParen,
+ /// Can be caught and ignored
+ CodePagePragmaInvalidCodePage,
+ CodePagePragmaNotInteger,
+ CodePagePragmaOverflow,
+ CodePagePragmaUnsupportedCodePage,
+ /// Can be caught and ignored
+ CodePagePragmaInIncludedFile,
+};
+
+pub const Lexer = struct {
+ const Self = @This();
+
+ buffer: []const u8,
+ index: usize,
+ line_handler: LineHandler,
+ at_start_of_line: bool = true,
+ error_context_token: ?Token = null,
+ current_code_page: CodePage,
+ default_code_page: CodePage,
+ source_mappings: ?*SourceMappings,
+ max_string_literal_codepoints: u15,
+ /// Needed to determine whether or not the output code page should
+ /// be set in the parser.
+ seen_pragma_code_pages: u2 = 0,
+
+ pub const Error = LexError;
+
+ pub const LexerOptions = struct {
+ default_code_page: CodePage = .windows1252,
+ source_mappings: ?*SourceMappings = null,
+ max_string_literal_codepoints: u15 = default_max_string_literal_codepoints,
+ };
+
+ pub fn init(buffer: []const u8, options: LexerOptions) Self {
+ return Self{
+ .buffer = buffer,
+ .index = 0,
+ .current_code_page = options.default_code_page,
+ .default_code_page = options.default_code_page,
+ .source_mappings = options.source_mappings,
+ .max_string_literal_codepoints = options.max_string_literal_codepoints,
+ .line_handler = .{ .buffer = buffer },
+ };
+ }
+
+ pub fn dump(self: *Self, token: *const Token) void {
+ std.debug.print("{s}:{d}: {s}\n", .{ @tagName(token.id), token.line_number, std.fmt.fmtSliceEscapeLower(token.slice(self.buffer)) });
+ }
+
+ pub const LexMethod = enum {
+ whitespace_delimiter_only,
+ normal,
+ normal_expect_operator,
+ };
+
+ pub fn next(self: *Self, comptime method: LexMethod) LexError!Token {
+ switch (method) {
+ .whitespace_delimiter_only => return self.nextWhitespaceDelimeterOnly(),
+ .normal => return self.nextNormal(),
+ .normal_expect_operator => return self.nextNormalWithContext(.expect_operator),
+ }
+ }
+
+ const StateWhitespaceDelimiterOnly = enum {
+ start,
+ literal,
+ preprocessor,
+ semicolon,
+ };
+
+ pub fn nextWhitespaceDelimeterOnly(self: *Self) LexError!Token {
+ const start_index = self.index;
+ var result = Token{
+ .id = .eof,
+ .start = start_index,
+ .end = undefined,
+ .line_number = self.line_handler.line_number,
+ };
+ var state = StateWhitespaceDelimiterOnly.start;
+
+ while (self.current_code_page.codepointAt(self.index, self.buffer)) |codepoint| : (self.index += codepoint.byte_len) {
+ const c = codepoint.value;
+ try self.checkForIllegalCodepoint(codepoint, false);
+ switch (state) {
+ .start => switch (c) {
+ '\r', '\n' => {
+ result.start = self.index + 1;
+ result.line_number = self.incrementLineNumber();
+ },
+ ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F' => {
+ result.start = self.index + 1;
+ },
+ // NBSP only counts as whitespace at the start of a line (but
+ // can be intermixed with other whitespace). Who knows why.
+ '\xA0' => if (self.at_start_of_line) {
+ result.start = self.index + codepoint.byte_len;
+ } else {
+ state = .literal;
+ self.at_start_of_line = false;
+ },
+ '#' => {
+ if (self.at_start_of_line) {
+ state = .preprocessor;
+ } else {
+ state = .literal;
+ }
+ self.at_start_of_line = false;
+ },
+ // Semi-colon acts as a line-terminator, but in this lexing mode
+ // that's only true if it's at the start of a line.
+ ';' => {
+ if (self.at_start_of_line) {
+ state = .semicolon;
+ }
+ self.at_start_of_line = false;
+ },
+ else => {
+ state = .literal;
+ self.at_start_of_line = false;
+ },
+ },
+ .literal => switch (c) {
+ '\r', '\n', ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F' => {
+ result.id = .literal;
+ break;
+ },
+ else => {},
+ },
+ .preprocessor => switch (c) {
+ '\r', '\n' => {
+ try self.evaluatePreprocessorCommand(result.start, self.index);
+ result.start = self.index + 1;
+ state = .start;
+ result.line_number = self.incrementLineNumber();
+ },
+ else => {},
+ },
+ .semicolon => switch (c) {
+ '\r', '\n' => {
+ result.start = self.index + 1;
+ state = .start;
+ result.line_number = self.incrementLineNumber();
+ },
+ else => {},
+ },
+ }
+ } else { // got EOF
+ switch (state) {
+ .start, .semicolon => {},
+ .literal => {
+ result.id = .literal;
+ },
+ .preprocessor => {
+ try self.evaluatePreprocessorCommand(result.start, self.index);
+ result.start = self.index;
+ },
+ }
+ }
+
+ result.end = self.index;
+ return result;
+ }
+
+ const StateNormal = enum {
+ start,
+ literal_or_quoted_wide_string,
+ quoted_ascii_string,
+ quoted_wide_string,
+ quoted_ascii_string_escape,
+ quoted_wide_string_escape,
+ quoted_ascii_string_maybe_end,
+ quoted_wide_string_maybe_end,
+ literal,
+ number_literal,
+ preprocessor,
+ semicolon,
+ // end
+ e,
+ en,
+ // begin
+ b,
+ be,
+ beg,
+ begi,
+ };
+
+ /// TODO: A not-terrible name
+ pub fn nextNormal(self: *Self) LexError!Token {
+ return self.nextNormalWithContext(.any);
+ }
+
+ pub fn nextNormalWithContext(self: *Self, context: enum { expect_operator, any }) LexError!Token {
+ const start_index = self.index;
+ var result = Token{
+ .id = .eof,
+ .start = start_index,
+ .end = undefined,
+ .line_number = self.line_handler.line_number,
+ };
+ var state = StateNormal.start;
+
+ // Note: The Windows RC compiler uses a non-standard method of computing
+ // length for its 'string literal too long' errors; it isn't easily
+ // explained or intuitive (it's sort-of pre-parsed byte length but with
+ // a few of exceptions/edge cases).
+ //
+ // It also behaves strangely with non-ASCII codepoints, e.g. even though the default
+ // limit is 4097, you can only have 4094 € codepoints (1 UTF-16 code unit each),
+ // and 2048 𐐷 codepoints (2 UTF-16 code units each).
+ //
+ // TODO: Understand this more, bring it more in line with how the Win32 limits work.
+ // Alternatively, do something that makes more sense but may be more permissive.
+ var string_literal_length: usize = 0;
+ // Keeping track of the string literal column prevents pathological edge cases when
+ // there are tons of tab stop characters within a string literal.
+ var string_literal_column: usize = 0;
+ var string_literal_collapsing_whitespace: bool = false;
+ var still_could_have_exponent: bool = true;
+ var exponent_index: ?usize = null;
+ while (self.current_code_page.codepointAt(self.index, self.buffer)) |codepoint| : (self.index += codepoint.byte_len) {
+ const c = codepoint.value;
+ const in_string_literal = switch (state) {
+ .quoted_ascii_string,
+ .quoted_wide_string,
+ .quoted_ascii_string_escape,
+ .quoted_wide_string_escape,
+ .quoted_ascii_string_maybe_end,
+ .quoted_wide_string_maybe_end,
+ =>
+ // If the current line is not the same line as the start of the string literal,
+ // then we want to treat the current codepoint as 'not in a string literal'
+ // for the purposes of detecting illegal codepoints. This means that we will
+ // error on illegal-outside-string-literal characters that are outside string
+ // literals from the perspective of a C preprocessor, but that may be
+ // inside string literals from the perspective of the RC lexer. For example,
+ // "hello
+ // @"
+ // will be treated as a single string literal by the RC lexer but the Win32
+ // preprocessor will consider this an unclosed string literal followed by
+ // the character @ and ", and will therefore error since the Win32 RC preprocessor
+ // errors on the @ character outside string literals.
+ //
+ // By doing this here, we can effectively emulate the Win32 RC preprocessor behavior
+ // at lex-time, and avoid the need for a separate step that checks for this edge-case
+ // specifically.
+ result.line_number == self.line_handler.line_number,
+ else => false,
+ };
+ try self.checkForIllegalCodepoint(codepoint, in_string_literal);
+ switch (state) {
+ .start => switch (c) {
+ '\r', '\n' => {
+ result.start = self.index + 1;
+ result.line_number = self.incrementLineNumber();
+ },
+ ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F' => {
+ result.start = self.index + 1;
+ },
+ // NBSP only counts as whitespace at the start of a line (but
+ // can be intermixed with other whitespace). Who knows why.
+ '\xA0' => if (self.at_start_of_line) {
+ result.start = self.index + codepoint.byte_len;
+ } else {
+ state = .literal;
+ self.at_start_of_line = false;
+ },
+ 'L', 'l' => {
+ state = .literal_or_quoted_wide_string;
+ self.at_start_of_line = false;
+ },
+ 'E', 'e' => {
+ state = .e;
+ self.at_start_of_line = false;
+ },
+ 'B', 'b' => {
+ state = .b;
+ self.at_start_of_line = false;
+ },
+ '"' => {
+ state = .quoted_ascii_string;
+ self.at_start_of_line = false;
+ string_literal_collapsing_whitespace = false;
+ string_literal_length = 0;
+
+ var dummy_token = Token{
+ .start = self.index,
+ .end = self.index,
+ .line_number = self.line_handler.line_number,
+ .id = .invalid,
+ };
+ string_literal_column = dummy_token.calculateColumn(self.buffer, 8, null);
+ },
+ '+', '&', '|' => {
+ self.index += 1;
+ result.id = .operator;
+ self.at_start_of_line = false;
+ break;
+ },
+ '-' => {
+ if (context == .expect_operator) {
+ self.index += 1;
+ result.id = .operator;
+ self.at_start_of_line = false;
+ break;
+ } else {
+ state = .number_literal;
+ still_could_have_exponent = true;
+ exponent_index = null;
+ self.at_start_of_line = false;
+ }
+ },
+ '0'...'9', '~' => {
+ state = .number_literal;
+ still_could_have_exponent = true;
+ exponent_index = null;
+ self.at_start_of_line = false;
+ },
+ '#' => {
+ if (self.at_start_of_line) {
+ state = .preprocessor;
+ } else {
+ state = .literal;
+ }
+ self.at_start_of_line = false;
+ },
+ ';' => {
+ state = .semicolon;
+ self.at_start_of_line = false;
+ },
+ '{', '}' => {
+ self.index += 1;
+ result.id = if (c == '{') .begin else .end;
+ self.at_start_of_line = false;
+ break;
+ },
+ '(', ')' => {
+ self.index += 1;
+ result.id = if (c == '(') .open_paren else .close_paren;
+ self.at_start_of_line = false;
+ break;
+ },
+ ',' => {
+ self.index += 1;
+ result.id = .comma;
+ self.at_start_of_line = false;
+ break;
+ },
+ else => {
+ if (isNonAsciiDigit(c)) {
+ self.error_context_token = .{
+ .id = .number,
+ .start = result.start,
+ .end = self.index + 1,
+ .line_number = self.line_handler.line_number,
+ };
+ return error.InvalidDigitCharacterInNumberLiteral;
+ }
+ state = .literal;
+ self.at_start_of_line = false;
+ },
+ },
+ .preprocessor => switch (c) {
+ '\r', '\n' => {
+ try self.evaluatePreprocessorCommand(result.start, self.index);
+ result.start = self.index + 1;
+ state = .start;
+ result.line_number = self.incrementLineNumber();
+ },
+ else => {},
+ },
+ // Semi-colon acts as a line-terminator--everything is skipped until
+ // the next line.
+ .semicolon => switch (c) {
+ '\r', '\n' => {
+ result.start = self.index + 1;
+ state = .start;
+ result.line_number = self.incrementLineNumber();
+ },
+ else => {},
+ },
+ .number_literal => switch (c) {
+ // zig fmt: off
+ ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F',
+ '\r', '\n', '"', ',', '{', '}', '+', '-', '|', '&', '~', '(', ')',
+ '\'', ';', '=',
+ => {
+ // zig fmt: on
+ result.id = .number;
+ break;
+ },
+ '0'...'9' => {
+ if (exponent_index) |exp_i| {
+ if (self.index - 1 == exp_i) {
+ // Note: This being an error is a quirk of the preprocessor used by
+ // the Win32 RC compiler.
+ self.error_context_token = .{
+ .id = .number,
+ .start = result.start,
+ .end = self.index + 1,
+ .line_number = self.line_handler.line_number,
+ };
+ return error.InvalidNumberWithExponent;
+ }
+ }
+ },
+ 'e', 'E' => {
+ if (still_could_have_exponent) {
+ exponent_index = self.index;
+ still_could_have_exponent = false;
+ }
+ },
+ else => {
+ if (isNonAsciiDigit(c)) {
+ self.error_context_token = .{
+ .id = .number,
+ .start = result.start,
+ .end = self.index + 1,
+ .line_number = self.line_handler.line_number,
+ };
+ return error.InvalidDigitCharacterInNumberLiteral;
+ }
+ still_could_have_exponent = false;
+ },
+ },
+ .literal_or_quoted_wide_string => switch (c) {
+ // zig fmt: off
+ ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F',
+ '\r', '\n', ',', '{', '}', '+', '-', '|', '&', '~', '(', ')',
+ '\'', ';', '=',
+ // zig fmt: on
+ => {
+ result.id = .literal;
+ break;
+ },
+ '"' => {
+ state = .quoted_wide_string;
+ string_literal_collapsing_whitespace = false;
+ string_literal_length = 0;
+
+ var dummy_token = Token{
+ .start = self.index,
+ .end = self.index,
+ .line_number = self.line_handler.line_number,
+ .id = .invalid,
+ };
+ string_literal_column = dummy_token.calculateColumn(self.buffer, 8, null);
+ },
+ else => {
+ state = .literal;
+ },
+ },
+ .literal => switch (c) {
+ // zig fmt: off
+ ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F',
+ '\r', '\n', '"', ',', '{', '}', '+', '-', '|', '&', '~', '(', ')',
+ '\'', ';', '=',
+ => {
+ // zig fmt: on
+ result.id = .literal;
+ break;
+ },
+ else => {},
+ },
+ .e => switch (c) {
+ 'N', 'n' => {
+ state = .en;
+ },
+ else => {
+ state = .literal;
+ self.index -= 1;
+ },
+ },
+ .en => switch (c) {
+ 'D', 'd' => {
+ result.id = .end;
+ self.index += 1;
+ break;
+ },
+ else => {
+ state = .literal;
+ self.index -= 1;
+ },
+ },
+ .b => switch (c) {
+ 'E', 'e' => {
+ state = .be;
+ },
+ else => {
+ state = .literal;
+ self.index -= 1;
+ },
+ },
+ .be => switch (c) {
+ 'G', 'g' => {
+ state = .beg;
+ },
+ else => {
+ state = .literal;
+ self.index -= 1;
+ },
+ },
+ .beg => switch (c) {
+ 'I', 'i' => {
+ state = .begi;
+ },
+ else => {
+ state = .literal;
+ self.index -= 1;
+ },
+ },
+ .begi => switch (c) {
+ 'N', 'n' => {
+ result.id = .begin;
+ self.index += 1;
+ break;
+ },
+ else => {
+ state = .literal;
+ self.index -= 1;
+ },
+ },
+ .quoted_ascii_string, .quoted_wide_string => switch (c) {
+ '"' => {
+ string_literal_column += 1;
+ state = if (state == .quoted_ascii_string) .quoted_ascii_string_maybe_end else .quoted_wide_string_maybe_end;
+ },
+ '\\' => {
+ string_literal_length += 1;
+ string_literal_column += 1;
+ state = if (state == .quoted_ascii_string) .quoted_ascii_string_escape else .quoted_wide_string_escape;
+ },
+ '\r' => {
+ string_literal_column = 0;
+ // \r doesn't count towards string literal length
+
+ // Increment line number but don't affect the result token's line number
+ _ = self.incrementLineNumber();
+ },
+ '\n' => {
+ string_literal_column = 0;
+ // first \n expands to <space><\n>
+ if (!string_literal_collapsing_whitespace) {
+ string_literal_length += 2;
+ string_literal_collapsing_whitespace = true;
+ }
+ // the rest are collapsed into the <space><\n>
+
+ // Increment line number but don't affect the result token's line number
+ _ = self.incrementLineNumber();
+ },
+ // only \t, space, Vertical Tab, and Form Feed count as whitespace when collapsing
+ '\t', ' ', '\x0b', '\x0c' => {
+ if (!string_literal_collapsing_whitespace) {
+ // Literal tab characters are counted as the number of space characters
+ // needed to reach the next 8-column tab stop.
+ const width = columnWidth(string_literal_column, @intCast(c), 8);
+ string_literal_length += width;
+ string_literal_column += width;
+ }
+ },
+ else => {
+ string_literal_collapsing_whitespace = false;
+ string_literal_length += 1;
+ string_literal_column += 1;
+ },
+ },
+ .quoted_ascii_string_escape, .quoted_wide_string_escape => switch (c) {
+ '"' => {
+ self.error_context_token = .{
+ .id = .invalid,
+ .start = self.index - 1,
+ .end = self.index + 1,
+ .line_number = self.line_handler.line_number,
+ };
+ return error.FoundCStyleEscapedQuote;
+ },
+ else => {
+ string_literal_length += 1;
+ string_literal_column += 1;
+ state = if (state == .quoted_ascii_string_escape) .quoted_ascii_string else .quoted_wide_string;
+ },
+ },
+ .quoted_ascii_string_maybe_end, .quoted_wide_string_maybe_end => switch (c) {
+ '"' => {
+ state = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string;
+ // Escaped quotes count as 1 char for string literal length checks.
+ // Since we did not increment on the first " (because it could have been
+ // the end of the quoted string), we increment here
+ string_literal_length += 1;
+ string_literal_column += 1;
+ },
+ else => {
+ result.id = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string;
+ break;
+ },
+ },
+ }
+ } else { // got EOF
+ switch (state) {
+ .start, .semicolon => {},
+ .literal_or_quoted_wide_string, .literal, .e, .en, .b, .be, .beg, .begi => {
+ result.id = .literal;
+ },
+ .preprocessor => {
+ try self.evaluatePreprocessorCommand(result.start, self.index);
+ result.start = self.index;
+ },
+ .number_literal => {
+ result.id = .number;
+ },
+ .quoted_ascii_string_maybe_end, .quoted_wide_string_maybe_end => {
+ result.id = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string;
+ },
+ .quoted_ascii_string,
+ .quoted_wide_string,
+ .quoted_ascii_string_escape,
+ .quoted_wide_string_escape,
+ => {
+ self.error_context_token = .{
+ .id = .eof,
+ .start = self.index,
+ .end = self.index,
+ .line_number = self.line_handler.line_number,
+ };
+ return LexError.UnfinishedStringLiteral;
+ },
+ }
+ }
+
+ result.end = self.index;
+
+ if (result.id == .quoted_ascii_string or result.id == .quoted_wide_string) {
+ if (string_literal_length > self.max_string_literal_codepoints) {
+ self.error_context_token = result;
+ return LexError.StringLiteralTooLong;
+ }
+ }
+
+ return result;
+ }
+
+ /// Increments line_number appropriately (handling line ending pairs)
+ /// and returns the new line number.
+ fn incrementLineNumber(self: *Self) usize {
+ _ = self.line_handler.incrementLineNumber(self.index);
+ self.at_start_of_line = true;
+ return self.line_handler.line_number;
+ }
+
+ fn checkForIllegalCodepoint(self: *Self, codepoint: code_pages.Codepoint, in_string_literal: bool) LexError!void {
+ const err = switch (codepoint.value) {
+ // 0x00 = NUL
+ // 0x1A = Substitute (treated as EOF)
+ // NOTE: 0x1A gets treated as EOF by the clang preprocessor so after a .rc file
+ // is run through the clang preprocessor it will no longer have 0x1A characters in it.
+ // 0x7F = DEL (treated as a context-specific terminator by the Windows RC compiler)
+ 0x00, 0x1A, 0x7F => error.IllegalByte,
+ // 0x01...0x03 result in strange 'macro definition too big' errors when used outside of string literals
+ // 0x04 is valid but behaves strangely (sort of acts as a 'skip the next character' instruction)
+ 0x01...0x04 => if (!in_string_literal) error.IllegalByteOutsideStringLiterals else return,
+ // @ and ` both result in error RC2018: unknown character '0x60' (and subsequently
+ // fatal error RC1116: RC terminating after preprocessor errors) if they are ever used
+ // outside of string literals. Not exactly sure why this would be the case, though.
+ // TODO: Make sure there aren't any exceptions
+ '@', '`' => if (!in_string_literal) error.IllegalByteOutsideStringLiterals else return,
+ // The Byte Order Mark is mostly skipped over by the Windows RC compiler, but
+ // there are edge cases where it leads to cryptic 'compiler limit : macro definition too big'
+ // errors (e.g. a BOM within a number literal). By making this illegal we avoid having to
+ // deal with a lot of edge cases and remove the potential footgun of the bytes of a BOM
+ // being 'missing' when included in a string literal (the Windows RC compiler acts as
+ // if the codepoint was never part of the string literal).
+ '\u{FEFF}' => error.IllegalByteOrderMark,
+ // Similar deal with this private use codepoint, it gets skipped/ignored by the
+ // RC compiler (but without the cryptic errors). Silently dropping bytes still seems like
+ // enough of a footgun with no real use-cases that it's still worth erroring instead of
+ // emulating the RC compiler's behavior, though.
+ '\u{E000}' => error.IllegalPrivateUseCharacter,
+ // These codepoints lead to strange errors when used outside of string literals,
+ // and miscompilations when used within string literals. We avoid the miscompilation
+ // within string literals and emit a warning, but outside of string literals it makes
+ // more sense to just disallow these codepoints.
+ 0x900, 0xA00, 0xA0D, 0x2000, 0xFFFE, 0xD00 => if (!in_string_literal) error.IllegalCodepointOutsideStringLiterals else return,
+ else => return,
+ };
+ self.error_context_token = .{
+ .id = .invalid,
+ .start = self.index,
+ .end = self.index + codepoint.byte_len,
+ .line_number = self.line_handler.line_number,
+ };
+ return err;
+ }
+
+ fn evaluatePreprocessorCommand(self: *Self, start: usize, end: usize) !void {
+ const token = Token{
+ .id = .preprocessor_command,
+ .start = start,
+ .end = end,
+ .line_number = self.line_handler.line_number,
+ };
+ errdefer self.error_context_token = token;
+ const full_command = self.buffer[start..end];
+ var command = full_command;
+
+ // Anything besides exactly this is ignored by the Windows RC implementation
+ const expected_directive = "#pragma";
+ if (!std.mem.startsWith(u8, command, expected_directive)) return;
+ command = command[expected_directive.len..];
+
+ if (command.len == 0 or !std.ascii.isWhitespace(command[0])) return;
+ while (command.len > 0 and std.ascii.isWhitespace(command[0])) {
+ command = command[1..];
+ }
+
+ // Note: CoDe_PaGeZ is also treated as "code_page" by the Windows RC implementation,
+ // and it will error with 'Missing left parenthesis in code_page #pragma'
+ const expected_extension = "code_page";
+ if (!std.ascii.startsWithIgnoreCase(command, expected_extension)) return;
+ command = command[expected_extension.len..];
+
+ while (command.len > 0 and std.ascii.isWhitespace(command[0])) {
+ command = command[1..];
+ }
+
+ if (command.len == 0 or command[0] != '(') {
+ return error.CodePagePragmaMissingLeftParen;
+ }
+ command = command[1..];
+
+ while (command.len > 0 and std.ascii.isWhitespace(command[0])) {
+ command = command[1..];
+ }
+
+ var num_str: []u8 = command[0..0];
+ while (command.len > 0 and (command[0] != ')' and !std.ascii.isWhitespace(command[0]))) {
+ command = command[1..];
+ num_str.len += 1;
+ }
+
+ if (num_str.len == 0) {
+ return error.CodePagePragmaNotInteger;
+ }
+
+ while (command.len > 0 and std.ascii.isWhitespace(command[0])) {
+ command = command[1..];
+ }
+
+ if (command.len == 0 or command[0] != ')') {
+ return error.CodePagePragmaMissingRightParen;
+ }
+
+ const code_page = code_page: {
+ if (std.ascii.eqlIgnoreCase("DEFAULT", num_str)) {
+ break :code_page self.default_code_page;
+ }
+
+ // The Win32 compiler behaves fairly strangely around maxInt(u32):
+ // - If the overflowed u32 wraps and becomes a known code page ID, then
+ // it will error/warn with "Codepage not valid: ignored" (depending on /w)
+ // - If the overflowed u32 wraps and does not become a known code page ID,
+ // then it will error with 'constant too big' and 'Codepage not integer'
+ //
+ // Instead of that, we just have a separate error specifically for overflow.
+ const num = parseCodePageNum(num_str) catch |err| switch (err) {
+ error.InvalidCharacter => return error.CodePagePragmaNotInteger,
+ error.Overflow => return error.CodePagePragmaOverflow,
+ };
+
+ // Anything that starts with 0 but does not resolve to 0 is treated as invalid, e.g. 01252
+ if (num_str[0] == '0' and num != 0) {
+ return error.CodePagePragmaInvalidCodePage;
+ }
+ // Anything that resolves to 0 is treated as 'not an integer' by the Win32 implementation.
+ else if (num == 0) {
+ return error.CodePagePragmaNotInteger;
+ }
+ // Anything above u16 max is not going to be found since our CodePage enum is backed by a u16.
+ if (num > std.math.maxInt(u16)) {
+ return error.CodePagePragmaInvalidCodePage;
+ }
+
+ break :code_page code_pages.CodePage.getByIdentifierEnsureSupported(@intCast(num)) catch |err| switch (err) {
+ error.InvalidCodePage => return error.CodePagePragmaInvalidCodePage,
+ error.UnsupportedCodePage => return error.CodePagePragmaUnsupportedCodePage,
+ };
+ };
+
+ // https://learn.microsoft.com/en-us/windows/win32/menurc/pragma-directives
+ // > This pragma is not supported in an included resource file (.rc)
+ //
+ // Even though the Win32 behavior is to just ignore such directives silently,
+ // this is an error in the lexer to allow for emitting warnings/errors when
+ // such directives are found if that's wanted. The intention is for the lexer
+ // to still be able to work correctly after this error is returned.
+ if (self.source_mappings) |source_mappings| {
+ if (!source_mappings.isRootFile(token.line_number)) {
+ return error.CodePagePragmaInIncludedFile;
+ }
+ }
+
+ self.seen_pragma_code_pages +|= 1;
+ self.current_code_page = code_page;
+ }
+
+ fn parseCodePageNum(str: []const u8) !u32 {
+ var x: u32 = 0;
+ for (str) |c| {
+ const digit = try std.fmt.charToDigit(c, 10);
+ if (x != 0) x = try std.math.mul(u32, x, 10);
+ x = try std.math.add(u32, x, digit);
+ }
+ return x;
+ }
+
+ pub fn getErrorDetails(self: Self, lex_err: LexError) ErrorDetails {
+ const err = switch (lex_err) {
+ error.UnfinishedStringLiteral => ErrorDetails.Error.unfinished_string_literal,
+ error.StringLiteralTooLong => return .{
+ .err = .string_literal_too_long,
+ .token = self.error_context_token.?,
+ .extra = .{ .number = self.max_string_literal_codepoints },
+ },
+ error.InvalidNumberWithExponent => ErrorDetails.Error.invalid_number_with_exponent,
+ error.InvalidDigitCharacterInNumberLiteral => ErrorDetails.Error.invalid_digit_character_in_number_literal,
+ error.IllegalByte => ErrorDetails.Error.illegal_byte,
+ error.IllegalByteOutsideStringLiterals => ErrorDetails.Error.illegal_byte_outside_string_literals,
+ error.IllegalCodepointOutsideStringLiterals => ErrorDetails.Error.illegal_codepoint_outside_string_literals,
+ error.IllegalByteOrderMark => ErrorDetails.Error.illegal_byte_order_mark,
+ error.IllegalPrivateUseCharacter => ErrorDetails.Error.illegal_private_use_character,
+ error.FoundCStyleEscapedQuote => ErrorDetails.Error.found_c_style_escaped_quote,
+ error.CodePagePragmaMissingLeftParen => ErrorDetails.Error.code_page_pragma_missing_left_paren,
+ error.CodePagePragmaMissingRightParen => ErrorDetails.Error.code_page_pragma_missing_right_paren,
+ error.CodePagePragmaInvalidCodePage => ErrorDetails.Error.code_page_pragma_invalid_code_page,
+ error.CodePagePragmaNotInteger => ErrorDetails.Error.code_page_pragma_not_integer,
+ error.CodePagePragmaOverflow => ErrorDetails.Error.code_page_pragma_overflow,
+ error.CodePagePragmaUnsupportedCodePage => ErrorDetails.Error.code_page_pragma_unsupported_code_page,
+ error.CodePagePragmaInIncludedFile => ErrorDetails.Error.code_page_pragma_in_included_file,
+ };
+ return .{
+ .err = err,
+ .token = self.error_context_token.?,
+ };
+ }
+};
+
+fn testLexNormal(source: []const u8, expected_tokens: []const Token.Id) !void {
+ var lexer = Lexer.init(source, .{});
+ if (dumpTokensDuringTests) std.debug.print("\n----------------------\n{s}\n----------------------\n", .{lexer.buffer});
+ for (expected_tokens) |expected_token_id| {
+ const token = try lexer.nextNormal();
+ if (dumpTokensDuringTests) lexer.dump(&token);
+ try std.testing.expectEqual(expected_token_id, token.id);
+ }
+ const last_token = try lexer.nextNormal();
+ try std.testing.expectEqual(Token.Id.eof, last_token.id);
+}
+
+fn expectLexError(expected: LexError, actual: anytype) !void {
+ try std.testing.expectError(expected, actual);
+ if (dumpTokensDuringTests) std.debug.print("{!}\n", .{actual});
+}
+
+test "normal: numbers" {
+ try testLexNormal("1", &.{.number});
+ try testLexNormal("-1", &.{.number});
+ try testLexNormal("- 1", &.{ .number, .number });
+ try testLexNormal("-a", &.{.number});
+}
+
+test "normal: string literals" {
+ try testLexNormal("\"\"", &.{.quoted_ascii_string});
+ // "" is an escaped "
+ try testLexNormal("\" \"\" \"", &.{.quoted_ascii_string});
+}
+
+test "superscript chars and code pages" {
+ const firstToken = struct {
+ pub fn firstToken(source: []const u8, default_code_page: CodePage, comptime lex_method: Lexer.LexMethod) LexError!Token {
+ var lexer = Lexer.init(source, .{ .default_code_page = default_code_page });
+ return lexer.next(lex_method);
+ }
+ }.firstToken;
+ const utf8_source = "²";
+ const windows1252_source = "\xB2";
+
+ const windows1252_encoded_as_windows1252 = firstToken(windows1252_source, .windows1252, .normal);
+ try std.testing.expectError(error.InvalidDigitCharacterInNumberLiteral, windows1252_encoded_as_windows1252);
+
+ const utf8_encoded_as_windows1252 = try firstToken(utf8_source, .windows1252, .normal);
+ try std.testing.expectEqual(Token{
+ .id = .literal,
+ .start = 0,
+ .end = 2,
+ .line_number = 1,
+ }, utf8_encoded_as_windows1252);
+
+ const utf8_encoded_as_utf8 = firstToken(utf8_source, .utf8, .normal);
+ try std.testing.expectError(error.InvalidDigitCharacterInNumberLiteral, utf8_encoded_as_utf8);
+
+ const windows1252_encoded_as_utf8 = try firstToken(windows1252_source, .utf8, .normal);
+ try std.testing.expectEqual(Token{
+ .id = .literal,
+ .start = 0,
+ .end = 1,
+ .line_number = 1,
+ }, windows1252_encoded_as_utf8);
+}
diff --git a/lib/compiler/resinator/literals.zig b/lib/compiler/resinator/literals.zig
@@ -0,0 +1,910 @@
+const std = @import("std");
+const code_pages = @import("code_pages.zig");
+const CodePage = code_pages.CodePage;
+const windows1252 = @import("windows1252.zig");
+const ErrorDetails = @import("errors.zig").ErrorDetails;
+const DiagnosticsContext = @import("errors.zig").DiagnosticsContext;
+const Token = @import("lex.zig").Token;
+
+/// rc is maximally liberal in terms of what it accepts as a number literal
+/// for data values. As long as it starts with a number or - or ~, that's good enough.
+pub fn isValidNumberDataLiteral(str: []const u8) bool {
+ if (str.len == 0) return false;
+ switch (str[0]) {
+ '~', '-', '0'...'9' => return true,
+ else => return false,
+ }
+}
+
+pub const SourceBytes = struct {
+ slice: []const u8,
+ code_page: CodePage,
+};
+
+pub const StringType = enum { ascii, wide };
+
+/// Valid escapes:
+/// "" -> "
+/// \a, \A => 0x08 (not 0x07 like in C)
+/// \n => 0x0A
+/// \r => 0x0D
+/// \t, \T => 0x09
+/// \\ => \
+/// \nnn => byte with numeric value given by nnn interpreted as octal
+/// (wraps on overflow, number of digits can be 1-3 for ASCII strings
+/// and 1-7 for wide strings)
+/// \xhh => byte with numeric value given by hh interpreted as hex
+/// (number of digits can be 0-2 for ASCII strings and 0-4 for
+/// wide strings)
+/// \<\r+> => \
+/// \<[\r\n\t ]+> => <nothing>
+///
+/// Special cases:
+/// <\t> => 1-8 spaces, dependent on columns in the source rc file itself
+/// <\r> => <nothing>
+/// <\n+><\w+?\n?> => <space><\n>
+///
+/// Special, especially weird case:
+/// \"" => "
+/// NOTE: This leads to footguns because the preprocessor can start parsing things
+/// out-of-sync with the RC compiler, expanding macros within string literals, etc.
+/// This parse function handles this case the same as the Windows RC compiler, but
+/// \" within a string literal is treated as an error by the lexer, so the relevant
+/// branches should never actually be hit during this function.
+pub const IterativeStringParser = struct {
+ source: []const u8,
+ code_page: CodePage,
+ /// The type of the string inferred by the prefix (L"" or "")
+ /// This is what matters for things like the maximum digits in an
+ /// escape sequence, whether or not invalid escape sequences are skipped, etc.
+ declared_string_type: StringType,
+ pending_codepoint: ?u21 = null,
+ num_pending_spaces: u8 = 0,
+ index: usize = 0,
+ column: usize = 0,
+ diagnostics: ?DiagnosticsContext = null,
+ seen_tab: bool = false,
+
+ const State = enum {
+ normal,
+ quote,
+ newline,
+ escaped,
+ escaped_cr,
+ escaped_newlines,
+ escaped_octal,
+ escaped_hex,
+ };
+
+ pub fn init(bytes: SourceBytes, options: StringParseOptions) IterativeStringParser {
+ const declared_string_type: StringType = switch (bytes.slice[0]) {
+ 'L', 'l' => .wide,
+ else => .ascii,
+ };
+ var source = bytes.slice[1 .. bytes.slice.len - 1]; // remove ""
+ var column = options.start_column + 1; // for the removed "
+ if (declared_string_type == .wide) {
+ source = source[1..]; // remove L
+ column += 1; // for the removed L
+ }
+ return .{
+ .source = source,
+ .code_page = bytes.code_page,
+ .declared_string_type = declared_string_type,
+ .column = column,
+ .diagnostics = options.diagnostics,
+ };
+ }
+
+ pub const ParsedCodepoint = struct {
+ codepoint: u21,
+ /// Note: If this is true, `codepoint` will be a value with a max of maxInt(u16).
+ /// This is enforced by using saturating arithmetic, so in e.g. a wide string literal the
+ /// octal escape sequence \7777777 (2,097,151) will be parsed into the value 0xFFFF (65,535).
+ /// If the value needs to be truncated to a smaller integer (for ASCII string literals), then that
+ /// must be done by the caller.
+ from_escaped_integer: bool = false,
+ };
+
+ pub fn next(self: *IterativeStringParser) std.mem.Allocator.Error!?ParsedCodepoint {
+ const result = try self.nextUnchecked();
+ if (self.diagnostics != null and result != null and !result.?.from_escaped_integer) {
+ switch (result.?.codepoint) {
+ 0x900, 0xA00, 0xA0D, 0x2000, 0xFFFE, 0xD00 => {
+ const err: ErrorDetails.Error = if (result.?.codepoint == 0xD00)
+ .rc_would_miscompile_codepoint_skip
+ else
+ .rc_would_miscompile_codepoint_byte_swap;
+ try self.diagnostics.?.diagnostics.append(ErrorDetails{
+ .err = err,
+ .type = .warning,
+ .token = self.diagnostics.?.token,
+ .extra = .{ .number = result.?.codepoint },
+ });
+ try self.diagnostics.?.diagnostics.append(ErrorDetails{
+ .err = err,
+ .type = .note,
+ .token = self.diagnostics.?.token,
+ .print_source_line = false,
+ .extra = .{ .number = result.?.codepoint },
+ });
+ },
+ else => {},
+ }
+ }
+ return result;
+ }
+
+ pub fn nextUnchecked(self: *IterativeStringParser) std.mem.Allocator.Error!?ParsedCodepoint {
+ if (self.num_pending_spaces > 0) {
+ // Ensure that we don't get into this predicament so we can ensure that
+ // the order of processing any pending stuff doesn't matter
+ std.debug.assert(self.pending_codepoint == null);
+ self.num_pending_spaces -= 1;
+ return .{ .codepoint = ' ' };
+ }
+ if (self.pending_codepoint) |pending_codepoint| {
+ self.pending_codepoint = null;
+ return .{ .codepoint = pending_codepoint };
+ }
+ if (self.index >= self.source.len) return null;
+
+ var state: State = .normal;
+ var string_escape_n: u16 = 0;
+ var string_escape_i: u8 = 0;
+ const max_octal_escape_digits: u8 = switch (self.declared_string_type) {
+ .ascii => 3,
+ .wide => 7,
+ };
+ const max_hex_escape_digits: u8 = switch (self.declared_string_type) {
+ .ascii => 2,
+ .wide => 4,
+ };
+
+ var backtrack: bool = undefined;
+ while (self.code_page.codepointAt(self.index, self.source)) |codepoint| : ({
+ if (!backtrack) self.index += codepoint.byte_len;
+ }) {
+ backtrack = false;
+ const c = codepoint.value;
+ defer {
+ if (!backtrack) {
+ if (c == '\t') {
+ self.column += columnsUntilTabStop(self.column, 8);
+ } else {
+ self.column += codepoint.byte_len;
+ }
+ }
+ }
+ switch (state) {
+ .normal => switch (c) {
+ '\\' => state = .escaped,
+ '"' => state = .quote,
+ '\r' => {},
+ '\n' => state = .newline,
+ '\t' => {
+ // Only warn about a tab getting converted to spaces once per string
+ if (self.diagnostics != null and !self.seen_tab) {
+ try self.diagnostics.?.diagnostics.append(ErrorDetails{
+ .err = .tab_converted_to_spaces,
+ .type = .warning,
+ .token = self.diagnostics.?.token,
+ });
+ try self.diagnostics.?.diagnostics.append(ErrorDetails{
+ .err = .tab_converted_to_spaces,
+ .type = .note,
+ .token = self.diagnostics.?.token,
+ .print_source_line = false,
+ });
+ self.seen_tab = true;
+ }
+ const cols = columnsUntilTabStop(self.column, 8);
+ self.num_pending_spaces = @intCast(cols - 1);
+ self.index += codepoint.byte_len;
+ return .{ .codepoint = ' ' };
+ },
+ else => {
+ self.index += codepoint.byte_len;
+ return .{ .codepoint = c };
+ },
+ },
+ .quote => switch (c) {
+ '"' => {
+ // "" => "
+ self.index += codepoint.byte_len;
+ return .{ .codepoint = '"' };
+ },
+ else => unreachable, // this is a bug in the lexer
+ },
+ .newline => switch (c) {
+ '\r', ' ', '\t', '\n', '\x0b', '\x0c', '\xa0' => {},
+ else => {
+ // we intentionally avoid incrementing self.index
+ // to handle the current char in the next call,
+ // and we set backtrack so column count is handled correctly
+ backtrack = true;
+
+ // <space><newline>
+ self.pending_codepoint = '\n';
+ return .{ .codepoint = ' ' };
+ },
+ },
+ .escaped => switch (c) {
+ '\r' => state = .escaped_cr,
+ '\n' => state = .escaped_newlines,
+ '0'...'7' => {
+ string_escape_n = std.fmt.charToDigit(@intCast(c), 8) catch unreachable;
+ string_escape_i = 1;
+ state = .escaped_octal;
+ },
+ 'x', 'X' => {
+ string_escape_n = 0;
+ string_escape_i = 0;
+ state = .escaped_hex;
+ },
+ else => {
+ switch (c) {
+ 'a', 'A' => {
+ self.index += codepoint.byte_len;
+ return .{ .codepoint = '\x08' };
+ }, // might be a bug in RC, but matches its behavior
+ 'n' => {
+ self.index += codepoint.byte_len;
+ return .{ .codepoint = '\n' };
+ },
+ 'r' => {
+ self.index += codepoint.byte_len;
+ return .{ .codepoint = '\r' };
+ },
+ 't', 'T' => {
+ self.index += codepoint.byte_len;
+ return .{ .codepoint = '\t' };
+ },
+ '\\' => {
+ self.index += codepoint.byte_len;
+ return .{ .codepoint = '\\' };
+ },
+ '"' => {
+ // \" is a special case that doesn't get the \ included,
+ backtrack = true;
+ },
+ else => switch (self.declared_string_type) {
+ .wide => {}, // invalid escape sequences are skipped in wide strings
+ .ascii => {
+ // we intentionally avoid incrementing self.index
+ // to handle the current char in the next call,
+ // and we set backtrack so column count is handled correctly
+ backtrack = true;
+ return .{ .codepoint = '\\' };
+ },
+ },
+ }
+ state = .normal;
+ },
+ },
+ .escaped_cr => switch (c) {
+ '\r' => {},
+ '\n' => state = .escaped_newlines,
+ else => {
+ // we intentionally avoid incrementing self.index
+ // to handle the current char in the next call,
+ // and we set backtrack so column count is handled correctly
+ backtrack = true;
+ return .{ .codepoint = '\\' };
+ },
+ },
+ .escaped_newlines => switch (c) {
+ '\r', '\n', '\t', ' ', '\x0b', '\x0c', '\xa0' => {},
+ else => {
+ // backtrack so that we handle the current char properly
+ backtrack = true;
+ state = .normal;
+ },
+ },
+ .escaped_octal => switch (c) {
+ '0'...'7' => {
+ string_escape_n *%= 8;
+ string_escape_n +%= std.fmt.charToDigit(@intCast(c), 8) catch unreachable;
+ string_escape_i += 1;
+ if (string_escape_i == max_octal_escape_digits) {
+ self.index += codepoint.byte_len;
+ return .{ .codepoint = string_escape_n, .from_escaped_integer = true };
+ }
+ },
+ else => {
+ // we intentionally avoid incrementing self.index
+ // to handle the current char in the next call,
+ // and we set backtrack so column count is handled correctly
+ backtrack = true;
+
+ // write out whatever byte we have parsed so far
+ return .{ .codepoint = string_escape_n, .from_escaped_integer = true };
+ },
+ },
+ .escaped_hex => switch (c) {
+ '0'...'9', 'a'...'f', 'A'...'F' => {
+ string_escape_n *= 16;
+ string_escape_n += std.fmt.charToDigit(@intCast(c), 16) catch unreachable;
+ string_escape_i += 1;
+ if (string_escape_i == max_hex_escape_digits) {
+ self.index += codepoint.byte_len;
+ return .{ .codepoint = string_escape_n, .from_escaped_integer = true };
+ }
+ },
+ else => {
+ // we intentionally avoid incrementing self.index
+ // to handle the current char in the next call,
+ // and we set backtrack so column count is handled correctly
+ backtrack = true;
+
+ // write out whatever byte we have parsed so far
+ // (even with 0 actual digits, \x alone parses to 0)
+ const escaped_value = string_escape_n;
+ return .{ .codepoint = escaped_value, .from_escaped_integer = true };
+ },
+ },
+ }
+ }
+
+ switch (state) {
+ .normal, .escaped_newlines => {},
+ .newline => {
+ // <space><newline>
+ self.pending_codepoint = '\n';
+ return .{ .codepoint = ' ' };
+ },
+ .escaped, .escaped_cr => return .{ .codepoint = '\\' },
+ .escaped_octal, .escaped_hex => {
+ return .{ .codepoint = string_escape_n, .from_escaped_integer = true };
+ },
+ .quote => unreachable, // this is a bug in the lexer
+ }
+
+ return null;
+ }
+};
+
+pub const StringParseOptions = struct {
+ start_column: usize = 0,
+ diagnostics: ?DiagnosticsContext = null,
+ output_code_page: CodePage = .windows1252,
+};
+
+pub fn parseQuotedString(
+ comptime literal_type: StringType,
+ allocator: std.mem.Allocator,
+ bytes: SourceBytes,
+ options: StringParseOptions,
+) !(switch (literal_type) {
+ .ascii => []u8,
+ .wide => [:0]u16,
+}) {
+ const T = if (literal_type == .ascii) u8 else u16;
+ std.debug.assert(bytes.slice.len >= 2); // must at least have 2 double quote chars
+
+ var buf = try std.ArrayList(T).initCapacity(allocator, bytes.slice.len);
+ errdefer buf.deinit();
+
+ var iterative_parser = IterativeStringParser.init(bytes, options);
+
+ while (try iterative_parser.next()) |parsed| {
+ const c = parsed.codepoint;
+ if (parsed.from_escaped_integer) {
+ // We truncate here to get the correct behavior for ascii strings
+ try buf.append(std.mem.nativeToLittle(T, @truncate(c)));
+ } else {
+ switch (literal_type) {
+ .ascii => switch (options.output_code_page) {
+ .windows1252 => {
+ if (windows1252.bestFitFromCodepoint(c)) |best_fit| {
+ try buf.append(best_fit);
+ } else if (c < 0x10000 or c == code_pages.Codepoint.invalid) {
+ try buf.append('?');
+ } else {
+ try buf.appendSlice("??");
+ }
+ },
+ .utf8 => {
+ var codepoint_to_encode = c;
+ if (c == code_pages.Codepoint.invalid) {
+ codepoint_to_encode = '�';
+ }
+ var utf8_buf: [4]u8 = undefined;
+ const utf8_len = std.unicode.utf8Encode(codepoint_to_encode, &utf8_buf) catch unreachable;
+ try buf.appendSlice(utf8_buf[0..utf8_len]);
+ },
+ else => unreachable, // Unsupported code page
+ },
+ .wide => {
+ if (c == code_pages.Codepoint.invalid) {
+ try buf.append(std.mem.nativeToLittle(u16, '�'));
+ } else if (c < 0x10000) {
+ const short: u16 = @intCast(c);
+ try buf.append(std.mem.nativeToLittle(u16, short));
+ } else {
+ const high = @as(u16, @intCast((c - 0x10000) >> 10)) + 0xD800;
+ try buf.append(std.mem.nativeToLittle(u16, high));
+ const low = @as(u16, @intCast(c & 0x3FF)) + 0xDC00;
+ try buf.append(std.mem.nativeToLittle(u16, low));
+ }
+ },
+ }
+ }
+ }
+
+ if (literal_type == .wide) {
+ return buf.toOwnedSliceSentinel(0);
+ } else {
+ return buf.toOwnedSlice();
+ }
+}
+
+pub fn parseQuotedAsciiString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![]u8 {
+ std.debug.assert(bytes.slice.len >= 2); // ""
+ return parseQuotedString(.ascii, allocator, bytes, options);
+}
+
+pub fn parseQuotedWideString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![:0]u16 {
+ std.debug.assert(bytes.slice.len >= 3); // L""
+ return parseQuotedString(.wide, allocator, bytes, options);
+}
+
+pub fn parseQuotedStringAsWideString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![:0]u16 {
+ std.debug.assert(bytes.slice.len >= 2); // ""
+ return parseQuotedString(.wide, allocator, bytes, options);
+}
+
+test "parse quoted ascii string" {
+ var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer arena_allocator.deinit();
+ const arena = arena_allocator.allocator();
+
+ try std.testing.expectEqualSlices(u8, "hello", try parseQuotedAsciiString(arena, .{
+ .slice =
+ \\"hello"
+ ,
+ .code_page = .windows1252,
+ }, .{}));
+ // hex with 0 digits
+ try std.testing.expectEqualSlices(u8, "\x00", try parseQuotedAsciiString(arena, .{
+ .slice =
+ \\"\x"
+ ,
+ .code_page = .windows1252,
+ }, .{}));
+ // hex max of 2 digits
+ try std.testing.expectEqualSlices(u8, "\xFFf", try parseQuotedAsciiString(arena, .{
+ .slice =
+ \\"\XfFf"
+ ,
+ .code_page = .windows1252,
+ }, .{}));
+ // octal with invalid octal digit
+ try std.testing.expectEqualSlices(u8, "\x019", try parseQuotedAsciiString(arena, .{
+ .slice =
+ \\"\19"
+ ,
+ .code_page = .windows1252,
+ }, .{}));
+ // escaped quotes
+ try std.testing.expectEqualSlices(u8, " \" ", try parseQuotedAsciiString(arena, .{
+ .slice =
+ \\" "" "
+ ,
+ .code_page = .windows1252,
+ }, .{}));
+ // backslash right before escaped quotes
+ try std.testing.expectEqualSlices(u8, "\"", try parseQuotedAsciiString(arena, .{
+ .slice =
+ \\"\"""
+ ,
+ .code_page = .windows1252,
+ }, .{}));
+ // octal overflow
+ try std.testing.expectEqualSlices(u8, "\x01", try parseQuotedAsciiString(arena, .{
+ .slice =
+ \\"\401"
+ ,
+ .code_page = .windows1252,
+ }, .{}));
+ // escapes
+ try std.testing.expectEqualSlices(u8, "\x08\n\r\t\\", try parseQuotedAsciiString(arena, .{
+ .slice =
+ \\"\a\n\r\t\\"
+ ,
+ .code_page = .windows1252,
+ }, .{}));
+ // uppercase escapes
+ try std.testing.expectEqualSlices(u8, "\x08\\N\\R\t\\", try parseQuotedAsciiString(arena, .{
+ .slice =
+ \\"\A\N\R\T\\"
+ ,
+ .code_page = .windows1252,
+ }, .{}));
+ // backslash on its own
+ try std.testing.expectEqualSlices(u8, "\\", try parseQuotedAsciiString(arena, .{
+ .slice =
+ \\"\"
+ ,
+ .code_page = .windows1252,
+ }, .{}));
+ // unrecognized escapes
+ try std.testing.expectEqualSlices(u8, "\\b", try parseQuotedAsciiString(arena, .{
+ .slice =
+ \\"\b"
+ ,
+ .code_page = .windows1252,
+ }, .{}));
+ // escaped carriage returns
+ try std.testing.expectEqualSlices(u8, "\\", try parseQuotedAsciiString(
+ arena,
+ .{ .slice = "\"\\\r\r\r\r\r\"", .code_page = .windows1252 },
+ .{},
+ ));
+ // escaped newlines
+ try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
+ arena,
+ .{ .slice = "\"\\\n\n\n\n\n\"", .code_page = .windows1252 },
+ .{},
+ ));
+ // escaped CRLF pairs
+ try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
+ arena,
+ .{ .slice = "\"\\\r\n\r\n\r\n\r\n\r\n\"", .code_page = .windows1252 },
+ .{},
+ ));
+ // escaped newlines with other whitespace
+ try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
+ arena,
+ .{ .slice = "\"\\\n \t\r\n \r\t\n \t\"", .code_page = .windows1252 },
+ .{},
+ ));
+ // literal tab characters get converted to spaces (dependent on source file columns)
+ try std.testing.expectEqualSlices(u8, " ", try parseQuotedAsciiString(
+ arena,
+ .{ .slice = "\"\t\"", .code_page = .windows1252 },
+ .{},
+ ));
+ try std.testing.expectEqualSlices(u8, "abc ", try parseQuotedAsciiString(
+ arena,
+ .{ .slice = "\"abc\t\"", .code_page = .windows1252 },
+ .{},
+ ));
+ try std.testing.expectEqualSlices(u8, "abcdefg ", try parseQuotedAsciiString(
+ arena,
+ .{ .slice = "\"abcdefg\t\"", .code_page = .windows1252 },
+ .{},
+ ));
+ try std.testing.expectEqualSlices(u8, "\\ ", try parseQuotedAsciiString(
+ arena,
+ .{ .slice = "\"\\\t\"", .code_page = .windows1252 },
+ .{},
+ ));
+ // literal CR's get dropped
+ try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
+ arena,
+ .{ .slice = "\"\r\r\r\r\r\"", .code_page = .windows1252 },
+ .{},
+ ));
+ // contiguous newlines and whitespace get collapsed to <space><newline>
+ try std.testing.expectEqualSlices(u8, " \n", try parseQuotedAsciiString(
+ arena,
+ .{ .slice = "\"\n\r\r \r\n \t \"", .code_page = .windows1252 },
+ .{},
+ ));
+}
+
+test "parse quoted ascii string with utf8 code page" {
+ var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer arena_allocator.deinit();
+ const arena = arena_allocator.allocator();
+
+ try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
+ arena,
+ .{ .slice = "\"\"", .code_page = .utf8 },
+ .{},
+ ));
+ // Codepoints that don't have a Windows-1252 representation get converted to ?
+ try std.testing.expectEqualSlices(u8, "?????????", try parseQuotedAsciiString(
+ arena,
+ .{ .slice = "\"кириллица\"", .code_page = .utf8 },
+ .{},
+ ));
+ // Codepoints that have a best fit mapping get converted accordingly,
+ // these are box drawing codepoints
+ try std.testing.expectEqualSlices(u8, "\x2b\x2d\x2b", try parseQuotedAsciiString(
+ arena,
+ .{ .slice = "\"┌─┐\"", .code_page = .utf8 },
+ .{},
+ ));
+ // Invalid UTF-8 gets converted to ? depending on well-formedness
+ try std.testing.expectEqualSlices(u8, "????", try parseQuotedAsciiString(
+ arena,
+ .{ .slice = "\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 },
+ .{},
+ ));
+ // Codepoints that would require a UTF-16 surrogate pair get converted to ??
+ try std.testing.expectEqualSlices(u8, "??", try parseQuotedAsciiString(
+ arena,
+ .{ .slice = "\"\xF2\xAF\xBA\xB4\"", .code_page = .utf8 },
+ .{},
+ ));
+
+ // Output code page changes how invalid UTF-8 gets converted, since it
+ // now encodes the result as UTF-8 so it can write replacement characters.
+ try std.testing.expectEqualSlices(u8, "����", try parseQuotedAsciiString(
+ arena,
+ .{ .slice = "\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 },
+ .{ .output_code_page = .utf8 },
+ ));
+ try std.testing.expectEqualSlices(u8, "\xF2\xAF\xBA\xB4", try parseQuotedAsciiString(
+ arena,
+ .{ .slice = "\"\xF2\xAF\xBA\xB4\"", .code_page = .utf8 },
+ .{ .output_code_page = .utf8 },
+ ));
+
+ // This used to cause integer overflow when reconsuming the 4-byte long codepoint
+ // after the escaped CRLF pair.
+ try std.testing.expectEqualSlices(u8, "\u{10348}", try parseQuotedAsciiString(
+ arena,
+ .{ .slice = "\"\\\r\n\u{10348}\"", .code_page = .utf8 },
+ .{ .output_code_page = .utf8 },
+ ));
+}
+
+test "parse quoted wide string" {
+ var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer arena_allocator.deinit();
+ const arena = arena_allocator.allocator();
+
+ try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("hello"), try parseQuotedWideString(arena, .{
+ .slice =
+ \\L"hello"
+ ,
+ .code_page = .windows1252,
+ }, .{}));
+ // hex with 0 digits
+ try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{0x0}, try parseQuotedWideString(arena, .{
+ .slice =
+ \\L"\x"
+ ,
+ .code_page = .windows1252,
+ }, .{}));
+ // hex max of 4 digits
+ try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ std.mem.nativeToLittle(u16, 0xFFFF), std.mem.nativeToLittle(u16, 'f') }, try parseQuotedWideString(arena, .{
+ .slice =
+ \\L"\XfFfFf"
+ ,
+ .code_page = .windows1252,
+ }, .{}));
+ // octal max of 7 digits
+ try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ std.mem.nativeToLittle(u16, 0x9493), std.mem.nativeToLittle(u16, '3'), std.mem.nativeToLittle(u16, '3') }, try parseQuotedWideString(arena, .{
+ .slice =
+ \\L"\111222333"
+ ,
+ .code_page = .windows1252,
+ }, .{}));
+ // octal overflow
+ try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{std.mem.nativeToLittle(u16, 0xFF01)}, try parseQuotedWideString(arena, .{
+ .slice =
+ \\L"\777401"
+ ,
+ .code_page = .windows1252,
+ }, .{}));
+ // literal tab characters get converted to spaces (dependent on source file columns)
+ try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("abcdefg "), try parseQuotedWideString(
+ arena,
+ .{ .slice = "L\"abcdefg\t\"", .code_page = .windows1252 },
+ .{},
+ ));
+ // Windows-1252 conversion
+ try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("ðð€€€"), try parseQuotedWideString(
+ arena,
+ .{ .slice = "L\"\xf0\xf0\x80\x80\x80\"", .code_page = .windows1252 },
+ .{},
+ ));
+ // Invalid escape sequences are skipped
+ try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral(""), try parseQuotedWideString(
+ arena,
+ .{ .slice = "L\"\\H\"", .code_page = .windows1252 },
+ .{},
+ ));
+}
+
+test "parse quoted wide string with utf8 code page" {
+ var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer arena_allocator.deinit();
+ const arena = arena_allocator.allocator();
+
+ try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{}, try parseQuotedWideString(
+ arena,
+ .{ .slice = "L\"\"", .code_page = .utf8 },
+ .{},
+ ));
+ try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("кириллица"), try parseQuotedWideString(
+ arena,
+ .{ .slice = "L\"кириллица\"", .code_page = .utf8 },
+ .{},
+ ));
+ // Invalid UTF-8 gets converted to � depending on well-formedness
+ try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("����"), try parseQuotedWideString(
+ arena,
+ .{ .slice = "L\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 },
+ .{},
+ ));
+}
+
+test "parse quoted ascii string as wide string" {
+ var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer arena_allocator.deinit();
+ const arena = arena_allocator.allocator();
+
+ try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("кириллица"), try parseQuotedStringAsWideString(
+ arena,
+ .{ .slice = "\"кириллица\"", .code_page = .utf8 },
+ .{},
+ ));
+ // Whether or not invalid escapes are skipped is still determined by the L prefix
+ try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("\\H"), try parseQuotedStringAsWideString(
+ arena,
+ .{ .slice = "\"\\H\"", .code_page = .windows1252 },
+ .{},
+ ));
+ try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral(""), try parseQuotedStringAsWideString(
+ arena,
+ .{ .slice = "L\"\\H\"", .code_page = .windows1252 },
+ .{},
+ ));
+ // Maximum escape sequence value is also determined by the L prefix
+ try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ std.mem.nativeToLittle(u16, 0x12), std.mem.nativeToLittle(u16, '3'), std.mem.nativeToLittle(u16, '4') }, try parseQuotedStringAsWideString(
+ arena,
+ .{ .slice = "\"\\x1234\"", .code_page = .windows1252 },
+ .{},
+ ));
+ try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{std.mem.nativeToLittle(u16, 0x1234)}, try parseQuotedStringAsWideString(
+ arena,
+ .{ .slice = "L\"\\x1234\"", .code_page = .windows1252 },
+ .{},
+ ));
+}
+
+pub fn columnsUntilTabStop(column: usize, tab_columns: usize) usize {
+ // 0 => 8, 1 => 7, 2 => 6, 3 => 5, 4 => 4
+ // 5 => 3, 6 => 2, 7 => 1, 8 => 8
+ return tab_columns - (column % tab_columns);
+}
+
+pub fn columnWidth(cur_column: usize, c: u8, tab_columns: usize) usize {
+ return switch (c) {
+ '\t' => columnsUntilTabStop(cur_column, tab_columns),
+ else => 1,
+ };
+}
+
+pub const Number = struct {
+ value: u32,
+ is_long: bool = false,
+
+ pub fn asWord(self: Number) u16 {
+ return @truncate(self.value);
+ }
+
+ pub fn evaluateOperator(lhs: Number, operator_char: u8, rhs: Number) Number {
+ const result = switch (operator_char) {
+ '-' => lhs.value -% rhs.value,
+ '+' => lhs.value +% rhs.value,
+ '|' => lhs.value | rhs.value,
+ '&' => lhs.value & rhs.value,
+ else => unreachable, // invalid operator, this would be a lexer/parser bug
+ };
+ return .{
+ .value = result,
+ .is_long = lhs.is_long or rhs.is_long,
+ };
+ }
+};
+
+/// Assumes that number literals normally rejected by RC's preprocessor
+/// are similarly rejected before being parsed.
+///
+/// Relevant RC preprocessor errors:
+/// RC2021: expected exponent value, not '<digit>'
+/// example that is rejected: 1e1
+/// example that is accepted: 1ea
+/// (this function will parse the two examples above the same)
+pub fn parseNumberLiteral(bytes: SourceBytes) Number {
+ std.debug.assert(bytes.slice.len > 0);
+ var result = Number{ .value = 0, .is_long = false };
+ var radix: u8 = 10;
+ var buf = bytes.slice;
+
+ const Prefix = enum { none, minus, complement };
+ var prefix: Prefix = .none;
+ switch (buf[0]) {
+ '-' => {
+ prefix = .minus;
+ buf = buf[1..];
+ },
+ '~' => {
+ prefix = .complement;
+ buf = buf[1..];
+ },
+ else => {},
+ }
+
+ if (buf.len > 2 and buf[0] == '0') {
+ switch (buf[1]) {
+ 'o' => { // octal radix prefix is case-sensitive
+ radix = 8;
+ buf = buf[2..];
+ },
+ 'x', 'X' => {
+ radix = 16;
+ buf = buf[2..];
+ },
+ else => {},
+ }
+ }
+
+ var i: usize = 0;
+ while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) {
+ const c = codepoint.value;
+ if (c == 'L' or c == 'l') {
+ result.is_long = true;
+ break;
+ }
+ const digit = switch (c) {
+ // On invalid digit for the radix, just stop parsing but don't fail
+ 0x00...0x7F => std.fmt.charToDigit(@intCast(c), radix) catch break,
+ else => break,
+ };
+
+ if (result.value != 0) {
+ result.value *%= radix;
+ }
+ result.value +%= digit;
+ }
+
+ switch (prefix) {
+ .none => {},
+ .minus => result.value = 0 -% result.value,
+ .complement => result.value = ~result.value,
+ }
+
+ return result;
+}
+
+test "parse number literal" {
+ try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "0", .code_page = .windows1252 }));
+ try std.testing.expectEqual(Number{ .value = 1, .is_long = false }, parseNumberLiteral(.{ .slice = "1", .code_page = .windows1252 }));
+ try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "1L", .code_page = .windows1252 }));
+ try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "1l", .code_page = .windows1252 }));
+ try std.testing.expectEqual(Number{ .value = 1, .is_long = false }, parseNumberLiteral(.{ .slice = "1garbageL", .code_page = .windows1252 }));
+ try std.testing.expectEqual(Number{ .value = 4294967295, .is_long = false }, parseNumberLiteral(.{ .slice = "4294967295", .code_page = .windows1252 }));
+ try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "4294967296", .code_page = .windows1252 }));
+ try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "4294967297L", .code_page = .windows1252 }));
+
+ // can handle any length of number, wraps on overflow appropriately
+ const big_overflow = parseNumberLiteral(.{ .slice = "1000000000000000000000000000000000000000000000000000000000000000000000000000000090000000001", .code_page = .windows1252 });
+ try std.testing.expectEqual(Number{ .value = 4100654081, .is_long = false }, big_overflow);
+ try std.testing.expectEqual(@as(u16, 1025), big_overflow.asWord());
+
+ try std.testing.expectEqual(Number{ .value = 0x20, .is_long = false }, parseNumberLiteral(.{ .slice = "0x20", .code_page = .windows1252 }));
+ try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2AL", .code_page = .windows1252 }));
+ try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL", .code_page = .windows1252 }));
+ try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL", .code_page = .windows1252 }));
+
+ try std.testing.expectEqual(Number{ .value = 0o20, .is_long = false }, parseNumberLiteral(.{ .slice = "0o20", .code_page = .windows1252 }));
+ try std.testing.expectEqual(Number{ .value = 0o20, .is_long = true }, parseNumberLiteral(.{ .slice = "0o20L", .code_page = .windows1252 }));
+ try std.testing.expectEqual(Number{ .value = 0o2, .is_long = false }, parseNumberLiteral(.{ .slice = "0o29", .code_page = .windows1252 }));
+ try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "0O29", .code_page = .windows1252 }));
+
+ try std.testing.expectEqual(Number{ .value = 0xFFFFFFFF, .is_long = false }, parseNumberLiteral(.{ .slice = "-1", .code_page = .windows1252 }));
+ try std.testing.expectEqual(Number{ .value = 0xFFFFFFFE, .is_long = false }, parseNumberLiteral(.{ .slice = "~1", .code_page = .windows1252 }));
+ try std.testing.expectEqual(Number{ .value = 0xFFFFFFFF, .is_long = true }, parseNumberLiteral(.{ .slice = "-4294967297L", .code_page = .windows1252 }));
+ try std.testing.expectEqual(Number{ .value = 0xFFFFFFFE, .is_long = true }, parseNumberLiteral(.{ .slice = "~4294967297L", .code_page = .windows1252 }));
+ try std.testing.expectEqual(Number{ .value = 0xFFFFFFFD, .is_long = false }, parseNumberLiteral(.{ .slice = "-0X3", .code_page = .windows1252 }));
+
+ // anything after L is ignored
+ try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL5", .code_page = .windows1252 }));
+}
diff --git a/lib/compiler/resinator/main.zig b/lib/compiler/resinator/main.zig
@@ -0,0 +1,719 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const removeComments = @import("comments.zig").removeComments;
+const parseAndRemoveLineCommands = @import("source_mapping.zig").parseAndRemoveLineCommands;
+const compile = @import("compile.zig").compile;
+const Diagnostics = @import("errors.zig").Diagnostics;
+const cli = @import("cli.zig");
+const preprocess = @import("preprocess.zig");
+const renderErrorMessage = @import("utils.zig").renderErrorMessage;
+const aro = @import("aro");
+
+pub fn main() !void {
+ var gpa = std.heap.GeneralPurposeAllocator(.{}){};
+ defer std.debug.assert(gpa.deinit() == .ok);
+ const allocator = gpa.allocator();
+
+ var arena_state = std.heap.ArenaAllocator.init(allocator);
+ defer arena_state.deinit();
+ const arena = arena_state.allocator();
+
+ const stderr = std.io.getStdErr();
+ const stderr_config = std.io.tty.detectConfig(stderr);
+
+ const args = try std.process.argsAlloc(allocator);
+ defer std.process.argsFree(allocator, args);
+
+ if (args.len < 2) {
+ try renderErrorMessage(stderr.writer(), stderr_config, .err, "expected zig lib dir as first argument", .{});
+ std.os.exit(1);
+ }
+ const zig_lib_dir = args[1];
+ var cli_args = args[2..];
+
+ var zig_integration = false;
+ if (cli_args.len > 0 and std.mem.eql(u8, cli_args[0], "--zig-integration")) {
+ zig_integration = true;
+ cli_args = args[3..];
+ }
+
+ var error_handler: ErrorHandler = switch (zig_integration) {
+ true => .{
+ .server = .{
+ .out = std.io.getStdOut(),
+ .in = undefined, // won't be receiving messages
+ .receive_fifo = undefined, // won't be receiving messages
+ },
+ },
+ false => .{
+ .tty = stderr_config,
+ },
+ };
+
+ if (zig_integration) {
+ // Send progress with a special string to indicate that the building of the
+ // resinator binary is finished and we've moved on to actually compiling the .rc file
+ try error_handler.server.serveStringMessage(.progress, "<resinator>");
+ }
+
+ var options = options: {
+ var cli_diagnostics = cli.Diagnostics.init(allocator);
+ defer cli_diagnostics.deinit();
+ var options = cli.parse(allocator, cli_args, &cli_diagnostics) catch |err| switch (err) {
+ error.ParseError => {
+ try error_handler.emitCliDiagnostics(allocator, cli_args, &cli_diagnostics);
+ std.os.exit(1);
+ },
+ else => |e| return e,
+ };
+ try options.maybeAppendRC(std.fs.cwd());
+
+ if (!zig_integration) {
+ // print any warnings/notes
+ cli_diagnostics.renderToStdErr(args, stderr_config);
+ // If there was something printed, then add an extra newline separator
+ // so that there is a clear separation between the cli diagnostics and whatever
+ // gets printed after
+ if (cli_diagnostics.errors.items.len > 0) {
+ try stderr.writeAll("\n");
+ }
+ }
+ break :options options;
+ };
+ defer options.deinit();
+
+ if (options.print_help_and_exit) {
+ try cli.writeUsage(stderr.writer(), "zig rc");
+ return;
+ }
+
+ // Don't allow verbose when integrating with Zig via stdout
+ options.verbose = false;
+
+ const stdout_writer = std.io.getStdOut().writer();
+ if (options.verbose) {
+ try options.dumpVerbose(stdout_writer);
+ try stdout_writer.writeByte('\n');
+ }
+
+ var dependencies_list = std.ArrayList([]const u8).init(allocator);
+ defer {
+ for (dependencies_list.items) |item| {
+ allocator.free(item);
+ }
+ dependencies_list.deinit();
+ }
+ const maybe_dependencies_list: ?*std.ArrayList([]const u8) = if (options.depfile_path != null) &dependencies_list else null;
+
+ const include_paths = getIncludePaths(arena, options.auto_includes, zig_lib_dir) catch |err| switch (err) {
+ error.OutOfMemory => |e| return e,
+ else => |e| {
+ switch (e) {
+ error.MsvcIncludesNotFound => {
+ try error_handler.emitMessage(allocator, .err, "MSVC include paths could not be automatically detected", .{});
+ },
+ error.MingwIncludesNotFound => {
+ try error_handler.emitMessage(allocator, .err, "MinGW include paths could not be automatically detected", .{});
+ },
+ }
+ try error_handler.emitMessage(allocator, .note, "to disable auto includes, use the option /:auto-includes none", .{});
+ std.os.exit(1);
+ },
+ };
+
+ const full_input = full_input: {
+ if (options.preprocess != .no) {
+ var preprocessed_buf = std.ArrayList(u8).init(allocator);
+ errdefer preprocessed_buf.deinit();
+
+ // We're going to throw away everything except the final preprocessed output anyway,
+ // so we can use a scoped arena for everything else.
+ var aro_arena_state = std.heap.ArenaAllocator.init(allocator);
+ defer aro_arena_state.deinit();
+ const aro_arena = aro_arena_state.allocator();
+
+ var comp = aro.Compilation.init(aro_arena);
+ defer comp.deinit();
+
+ var argv = std.ArrayList([]const u8).init(comp.gpa);
+ defer argv.deinit();
+
+ try argv.append("arocc"); // dummy command name
+ try preprocess.appendAroArgs(aro_arena, &argv, options, include_paths);
+ try argv.append(options.input_filename);
+
+ if (options.verbose) {
+ try stdout_writer.writeAll("Preprocessor: arocc (built-in)\n");
+ for (argv.items[0 .. argv.items.len - 1]) |arg| {
+ try stdout_writer.print("{s} ", .{arg});
+ }
+ try stdout_writer.print("{s}\n\n", .{argv.items[argv.items.len - 1]});
+ }
+
+ preprocess.preprocess(&comp, preprocessed_buf.writer(), argv.items, maybe_dependencies_list) catch |err| switch (err) {
+ error.GeneratedSourceError => {
+ try error_handler.emitAroDiagnostics(allocator, "failed during preprocessor setup (this is always a bug):", &comp);
+ std.os.exit(1);
+ },
+ // ArgError can occur if e.g. the .rc file is not found
+ error.ArgError, error.PreprocessError => {
+ try error_handler.emitAroDiagnostics(allocator, "failed during preprocessing:", &comp);
+ std.os.exit(1);
+ },
+ error.StreamTooLong => {
+ try error_handler.emitMessage(allocator, .err, "failed during preprocessing: maximum file size exceeded", .{});
+ std.os.exit(1);
+ },
+ error.OutOfMemory => |e| return e,
+ };
+
+ break :full_input try preprocessed_buf.toOwnedSlice();
+ } else {
+ break :full_input std.fs.cwd().readFileAlloc(allocator, options.input_filename, std.math.maxInt(usize)) catch |err| {
+ try error_handler.emitMessage(allocator, .err, "unable to read input file path '{s}': {s}", .{ options.input_filename, @errorName(err) });
+ std.os.exit(1);
+ };
+ }
+ };
+ defer allocator.free(full_input);
+
+ if (options.preprocess == .only) {
+ try std.fs.cwd().writeFile(options.output_filename, full_input);
+ return;
+ }
+
+ // Note: We still want to run this when no-preprocess is set because:
+ // 1. We want to print accurate line numbers after removing multiline comments
+ // 2. We want to be able to handle an already-preprocessed input with #line commands in it
+ var mapping_results = try parseAndRemoveLineCommands(allocator, full_input, full_input, .{ .initial_filename = options.input_filename });
+ defer mapping_results.mappings.deinit(allocator);
+
+ const final_input = removeComments(mapping_results.result, mapping_results.result, &mapping_results.mappings) catch |err| switch (err) {
+ error.InvalidSourceMappingCollapse => {
+ try error_handler.emitMessage(allocator, .err, "failed during comment removal; this is a known bug", .{});
+ std.os.exit(1);
+ },
+ else => |e| return e,
+ };
+
+ var output_file = std.fs.cwd().createFile(options.output_filename, .{}) catch |err| {
+ try error_handler.emitMessage(allocator, .err, "unable to create output file '{s}': {s}", .{ options.output_filename, @errorName(err) });
+ std.os.exit(1);
+ };
+ var output_file_closed = false;
+ defer if (!output_file_closed) output_file.close();
+
+ var diagnostics = Diagnostics.init(allocator);
+ defer diagnostics.deinit();
+
+ var output_buffered_stream = std.io.bufferedWriter(output_file.writer());
+
+ compile(allocator, final_input, output_buffered_stream.writer(), .{
+ .cwd = std.fs.cwd(),
+ .diagnostics = &diagnostics,
+ .source_mappings = &mapping_results.mappings,
+ .dependencies_list = maybe_dependencies_list,
+ .ignore_include_env_var = options.ignore_include_env_var,
+ .extra_include_paths = options.extra_include_paths.items,
+ .system_include_paths = include_paths,
+ .default_language_id = options.default_language_id,
+ .default_code_page = options.default_code_page orelse .windows1252,
+ .verbose = options.verbose,
+ .null_terminate_string_table_strings = options.null_terminate_string_table_strings,
+ .max_string_literal_codepoints = options.max_string_literal_codepoints,
+ .silent_duplicate_control_ids = options.silent_duplicate_control_ids,
+ .warn_instead_of_error_on_invalid_code_page = options.warn_instead_of_error_on_invalid_code_page,
+ }) catch |err| switch (err) {
+ error.ParseError, error.CompileError => {
+ try error_handler.emitDiagnostics(allocator, std.fs.cwd(), final_input, &diagnostics, mapping_results.mappings);
+ // Delete the output file on error
+ output_file.close();
+ output_file_closed = true;
+ // Failing to delete is not really a big deal, so swallow any errors
+ std.fs.cwd().deleteFile(options.output_filename) catch {};
+ std.os.exit(1);
+ },
+ else => |e| return e,
+ };
+
+ try output_buffered_stream.flush();
+
+ // print any warnings/notes
+ if (!zig_integration) {
+ diagnostics.renderToStdErr(std.fs.cwd(), final_input, stderr_config, mapping_results.mappings);
+ }
+
+ // write the depfile
+ if (options.depfile_path) |depfile_path| {
+ var depfile = std.fs.cwd().createFile(depfile_path, .{}) catch |err| {
+ try error_handler.emitMessage(allocator, .err, "unable to create depfile '{s}': {s}", .{ depfile_path, @errorName(err) });
+ std.os.exit(1);
+ };
+ defer depfile.close();
+
+ const depfile_writer = depfile.writer();
+ var depfile_buffered_writer = std.io.bufferedWriter(depfile_writer);
+ switch (options.depfile_fmt) {
+ .json => {
+ var write_stream = std.json.writeStream(depfile_buffered_writer.writer(), .{ .whitespace = .indent_2 });
+ defer write_stream.deinit();
+
+ try write_stream.beginArray();
+ for (dependencies_list.items) |dep_path| {
+ try write_stream.write(dep_path);
+ }
+ try write_stream.endArray();
+ },
+ }
+ try depfile_buffered_writer.flush();
+ }
+}
+
+fn getIncludePaths(arena: std.mem.Allocator, auto_includes_option: cli.Options.AutoIncludes, zig_lib_dir: []const u8) ![]const []const u8 {
+ var includes = auto_includes_option;
+ if (builtin.target.os.tag != .windows) {
+ switch (includes) {
+ // MSVC can't be found when the host isn't Windows, so short-circuit.
+ .msvc => return error.MsvcIncludesNotFound,
+ // Skip straight to gnu since we won't be able to detect MSVC on non-Windows hosts.
+ .any => includes = .gnu,
+ .none, .gnu => {},
+ }
+ }
+
+ while (true) {
+ switch (includes) {
+ .none => return &[_][]const u8{},
+ .any, .msvc => {
+ // MSVC is only detectable on Windows targets. This unreachable is to signify
+ // that .any and .msvc should be dealt with on non-Windows targets before this point,
+ // since getting MSVC include paths uses Windows-only APIs.
+ if (builtin.target.os.tag != .windows) unreachable;
+
+ const target_query: std.Target.Query = .{
+ .os_tag = .windows,
+ .abi = .msvc,
+ };
+ const target = std.zig.resolveTargetQueryOrFatal(target_query);
+ const is_native_abi = target_query.isNativeAbi();
+ const detected_libc = std.zig.LibCDirs.detect(arena, zig_lib_dir, target, is_native_abi, true, null) catch {
+ if (includes == .any) {
+ // fall back to mingw
+ includes = .gnu;
+ continue;
+ }
+ return error.MsvcIncludesNotFound;
+ };
+ if (detected_libc.libc_include_dir_list.len == 0) {
+ if (includes == .any) {
+ // fall back to mingw
+ includes = .gnu;
+ continue;
+ }
+ return error.MsvcIncludesNotFound;
+ }
+ return detected_libc.libc_include_dir_list;
+ },
+ .gnu => {
+ const target_query: std.Target.Query = .{
+ .os_tag = .windows,
+ .abi = .gnu,
+ };
+ const target = std.zig.resolveTargetQueryOrFatal(target_query);
+ const is_native_abi = target_query.isNativeAbi();
+ const detected_libc = std.zig.LibCDirs.detect(arena, zig_lib_dir, target, is_native_abi, true, null) catch |err| switch (err) {
+ error.OutOfMemory => |e| return e,
+ else => return error.MingwIncludesNotFound,
+ };
+ return detected_libc.libc_include_dir_list;
+ },
+ }
+ }
+}
+
+const ErrorBundle = std.zig.ErrorBundle;
+const SourceMappings = @import("source_mapping.zig").SourceMappings;
+
+const ErrorHandler = union(enum) {
+ server: std.zig.Server,
+ tty: std.io.tty.Config,
+
+ pub fn emitCliDiagnostics(
+ self: *ErrorHandler,
+ allocator: std.mem.Allocator,
+ args: []const []const u8,
+ diagnostics: *cli.Diagnostics,
+ ) !void {
+ switch (self.*) {
+ .server => |*server| {
+ var error_bundle = try cliDiagnosticsToErrorBundle(allocator, diagnostics);
+ defer error_bundle.deinit(allocator);
+
+ try server.serveErrorBundle(error_bundle);
+ },
+ .tty => {
+ diagnostics.renderToStdErr(args, self.tty);
+ },
+ }
+ }
+
+ pub fn emitAroDiagnostics(
+ self: *ErrorHandler,
+ allocator: std.mem.Allocator,
+ fail_msg: []const u8,
+ comp: *aro.Compilation,
+ ) !void {
+ switch (self.*) {
+ .server => |*server| {
+ var error_bundle = try aroDiagnosticsToErrorBundle(allocator, fail_msg, comp);
+ defer error_bundle.deinit(allocator);
+
+ try server.serveErrorBundle(error_bundle);
+ },
+ .tty => {
+ // extra newline to separate this line from the aro errors
+ try renderErrorMessage(std.io.getStdErr().writer(), self.tty, .err, "{s}\n", .{fail_msg});
+ aro.Diagnostics.render(comp, self.tty);
+ },
+ }
+ }
+
+ pub fn emitDiagnostics(
+ self: *ErrorHandler,
+ allocator: std.mem.Allocator,
+ cwd: std.fs.Dir,
+ source: []const u8,
+ diagnostics: *Diagnostics,
+ mappings: SourceMappings,
+ ) !void {
+ switch (self.*) {
+ .server => |*server| {
+ var error_bundle = try diagnosticsToErrorBundle(allocator, source, diagnostics, mappings);
+ defer error_bundle.deinit(allocator);
+
+ try server.serveErrorBundle(error_bundle);
+ },
+ .tty => {
+ diagnostics.renderToStdErr(cwd, source, self.tty, mappings);
+ },
+ }
+ }
+
+ pub fn emitMessage(
+ self: *ErrorHandler,
+ allocator: std.mem.Allocator,
+ msg_type: @import("utils.zig").ErrorMessageType,
+ comptime format: []const u8,
+ args: anytype,
+ ) !void {
+ switch (self.*) {
+ .server => |*server| {
+ // only emit errors
+ if (msg_type != .err) return;
+
+ var error_bundle = try errorStringToErrorBundle(allocator, format, args);
+ defer error_bundle.deinit(allocator);
+
+ try server.serveErrorBundle(error_bundle);
+ },
+ .tty => {
+ try renderErrorMessage(std.io.getStdErr().writer(), self.tty, msg_type, format, args);
+ },
+ }
+ }
+};
+
+fn cliDiagnosticsToErrorBundle(
+ gpa: std.mem.Allocator,
+ diagnostics: *cli.Diagnostics,
+) !ErrorBundle {
+ @setCold(true);
+
+ var bundle: ErrorBundle.Wip = undefined;
+ try bundle.init(gpa);
+ errdefer bundle.deinit();
+
+ try bundle.addRootErrorMessage(.{
+ .msg = try bundle.addString("invalid command line option(s)"),
+ });
+
+ var cur_err: ?ErrorBundle.ErrorMessage = null;
+ var cur_notes: std.ArrayListUnmanaged(ErrorBundle.ErrorMessage) = .{};
+ defer cur_notes.deinit(gpa);
+ for (diagnostics.errors.items) |err_details| {
+ switch (err_details.type) {
+ .err => {
+ if (cur_err) |err| {
+ try flushErrorMessageIntoBundle(&bundle, err, cur_notes.items);
+ }
+ cur_err = .{
+ .msg = try bundle.addString(err_details.msg.items),
+ };
+ cur_notes.clearRetainingCapacity();
+ },
+ .warning => cur_err = null,
+ .note => {
+ if (cur_err == null) continue;
+ cur_err.?.notes_len += 1;
+ try cur_notes.append(gpa, .{
+ .msg = try bundle.addString(err_details.msg.items),
+ });
+ },
+ }
+ }
+ if (cur_err) |err| {
+ try flushErrorMessageIntoBundle(&bundle, err, cur_notes.items);
+ }
+
+ return try bundle.toOwnedBundle("");
+}
+
+fn diagnosticsToErrorBundle(
+ gpa: std.mem.Allocator,
+ source: []const u8,
+ diagnostics: *Diagnostics,
+ mappings: SourceMappings,
+) !ErrorBundle {
+ @setCold(true);
+
+ var bundle: ErrorBundle.Wip = undefined;
+ try bundle.init(gpa);
+ errdefer bundle.deinit();
+
+ var msg_buf: std.ArrayListUnmanaged(u8) = .{};
+ defer msg_buf.deinit(gpa);
+ var cur_err: ?ErrorBundle.ErrorMessage = null;
+ var cur_notes: std.ArrayListUnmanaged(ErrorBundle.ErrorMessage) = .{};
+ defer cur_notes.deinit(gpa);
+ for (diagnostics.errors.items) |err_details| {
+ switch (err_details.type) {
+ .hint => continue,
+ // Clear the current error so that notes don't bleed into unassociated errors
+ .warning => {
+ cur_err = null;
+ continue;
+ },
+ .note => if (cur_err == null) continue,
+ .err => {},
+ }
+ const corresponding_span = mappings.getCorrespondingSpan(err_details.token.line_number).?;
+ const err_line = corresponding_span.start_line;
+ const err_filename = mappings.files.get(corresponding_span.filename_offset);
+
+ const source_line_start = err_details.token.getLineStartForErrorDisplay(source);
+ // Treat tab stops as 1 column wide for error display purposes,
+ // and add one to get a 1-based column
+ const column = err_details.token.calculateColumn(source, 1, source_line_start) + 1;
+
+ msg_buf.clearRetainingCapacity();
+ try err_details.render(msg_buf.writer(gpa), source, diagnostics.strings.items);
+
+ const src_loc = src_loc: {
+ var src_loc: ErrorBundle.SourceLocation = .{
+ .src_path = try bundle.addString(err_filename),
+ .line = @intCast(err_line - 1), // 1-based -> 0-based
+ .column = @intCast(column - 1), // 1-based -> 0-based
+ .span_start = 0,
+ .span_main = 0,
+ .span_end = 0,
+ };
+ if (err_details.print_source_line) {
+ const source_line = err_details.token.getLineForErrorDisplay(source, source_line_start);
+ const visual_info = err_details.visualTokenInfo(source_line_start, source_line_start + source_line.len);
+ src_loc.span_start = @intCast(visual_info.point_offset - visual_info.before_len);
+ src_loc.span_main = @intCast(visual_info.point_offset);
+ src_loc.span_end = @intCast(visual_info.point_offset + 1 + visual_info.after_len);
+ src_loc.source_line = try bundle.addString(source_line);
+ }
+ break :src_loc try bundle.addSourceLocation(src_loc);
+ };
+
+ switch (err_details.type) {
+ .err => {
+ if (cur_err) |err| {
+ try flushErrorMessageIntoBundle(&bundle, err, cur_notes.items);
+ }
+ cur_err = .{
+ .msg = try bundle.addString(msg_buf.items),
+ .src_loc = src_loc,
+ };
+ cur_notes.clearRetainingCapacity();
+ },
+ .note => {
+ cur_err.?.notes_len += 1;
+ try cur_notes.append(gpa, .{
+ .msg = try bundle.addString(msg_buf.items),
+ .src_loc = src_loc,
+ });
+ },
+ .warning, .hint => unreachable,
+ }
+ }
+ if (cur_err) |err| {
+ try flushErrorMessageIntoBundle(&bundle, err, cur_notes.items);
+ }
+
+ return try bundle.toOwnedBundle("");
+}
+
+fn flushErrorMessageIntoBundle(wip: *ErrorBundle.Wip, msg: ErrorBundle.ErrorMessage, notes: []const ErrorBundle.ErrorMessage) !void {
+ try wip.addRootErrorMessage(msg);
+ const notes_start = try wip.reserveNotes(@intCast(notes.len));
+ for (notes_start.., notes) |i, note| {
+ wip.extra.items[i] = @intFromEnum(wip.addErrorMessageAssumeCapacity(note));
+ }
+}
+
+fn errorStringToErrorBundle(allocator: std.mem.Allocator, comptime format: []const u8, args: anytype) !ErrorBundle {
+ @setCold(true);
+ var bundle: ErrorBundle.Wip = undefined;
+ try bundle.init(allocator);
+ errdefer bundle.deinit();
+ try bundle.addRootErrorMessage(.{
+ .msg = try bundle.printString(format, args),
+ });
+ return try bundle.toOwnedBundle("");
+}
+
+fn aroDiagnosticsToErrorBundle(
+ gpa: std.mem.Allocator,
+ fail_msg: []const u8,
+ comp: *aro.Compilation,
+) !ErrorBundle {
+ @setCold(true);
+
+ var bundle: ErrorBundle.Wip = undefined;
+ try bundle.init(gpa);
+ errdefer bundle.deinit();
+
+ try bundle.addRootErrorMessage(.{
+ .msg = try bundle.addString(fail_msg),
+ });
+
+ var msg_writer = MsgWriter.init(gpa);
+ defer msg_writer.deinit();
+ var cur_err: ?ErrorBundle.ErrorMessage = null;
+ var cur_notes: std.ArrayListUnmanaged(ErrorBundle.ErrorMessage) = .{};
+ defer cur_notes.deinit(gpa);
+ for (comp.diagnostics.list.items) |msg| {
+ switch (msg.kind) {
+ // Clear the current error so that notes don't bleed into unassociated errors
+ .off, .warning => {
+ cur_err = null;
+ continue;
+ },
+ .note => if (cur_err == null) continue,
+ .@"fatal error", .@"error" => {},
+ .default => unreachable,
+ }
+ msg_writer.resetRetainingCapacity();
+ aro.Diagnostics.renderMessage(comp, &msg_writer, msg);
+
+ const src_loc = src_loc: {
+ if (msg_writer.path) |src_path| {
+ var src_loc: ErrorBundle.SourceLocation = .{
+ .src_path = try bundle.addString(src_path),
+ .line = msg_writer.line - 1, // 1-based -> 0-based
+ .column = msg_writer.col - 1, // 1-based -> 0-based
+ .span_start = 0,
+ .span_main = 0,
+ .span_end = 0,
+ };
+ if (msg_writer.source_line) |source_line| {
+ src_loc.span_start = msg_writer.span_main;
+ src_loc.span_main = msg_writer.span_main;
+ src_loc.span_end = msg_writer.span_main;
+ src_loc.source_line = try bundle.addString(source_line);
+ }
+ break :src_loc try bundle.addSourceLocation(src_loc);
+ }
+ break :src_loc ErrorBundle.SourceLocationIndex.none;
+ };
+
+ switch (msg.kind) {
+ .@"fatal error", .@"error" => {
+ if (cur_err) |err| {
+ try flushErrorMessageIntoBundle(&bundle, err, cur_notes.items);
+ }
+ cur_err = .{
+ .msg = try bundle.addString(msg_writer.buf.items),
+ .src_loc = src_loc,
+ };
+ cur_notes.clearRetainingCapacity();
+ },
+ .note => {
+ cur_err.?.notes_len += 1;
+ try cur_notes.append(gpa, .{
+ .msg = try bundle.addString(msg_writer.buf.items),
+ .src_loc = src_loc,
+ });
+ },
+ .off, .warning, .default => unreachable,
+ }
+ }
+ if (cur_err) |err| {
+ try flushErrorMessageIntoBundle(&bundle, err, cur_notes.items);
+ }
+
+ return try bundle.toOwnedBundle("");
+}
+
+// Similar to aro.Diagnostics.MsgWriter but:
+// - Writers to an ArrayList
+// - Only prints the message itself (no location, source line, error: prefix, etc)
+// - Keeps track of source path/line/col instead
+const MsgWriter = struct {
+ buf: std.ArrayList(u8),
+ path: ?[]const u8 = null,
+ // 1-indexed
+ line: u32 = undefined,
+ col: u32 = undefined,
+ source_line: ?[]const u8 = null,
+ span_main: u32 = undefined,
+
+ fn init(allocator: std.mem.Allocator) MsgWriter {
+ return .{
+ .buf = std.ArrayList(u8).init(allocator),
+ };
+ }
+
+ fn deinit(m: *MsgWriter) void {
+ m.buf.deinit();
+ }
+
+ fn resetRetainingCapacity(m: *MsgWriter) void {
+ m.buf.clearRetainingCapacity();
+ m.path = null;
+ m.source_line = null;
+ }
+
+ pub fn print(m: *MsgWriter, comptime fmt: []const u8, args: anytype) void {
+ m.buf.writer().print(fmt, args) catch {};
+ }
+
+ pub fn write(m: *MsgWriter, msg: []const u8) void {
+ m.buf.writer().writeAll(msg) catch {};
+ }
+
+ pub fn setColor(m: *MsgWriter, color: std.io.tty.Color) void {
+ _ = m;
+ _ = color;
+ }
+
+ pub fn location(m: *MsgWriter, path: []const u8, line: u32, col: u32) void {
+ m.path = path;
+ m.line = line;
+ m.col = col;
+ }
+
+ pub fn start(m: *MsgWriter, kind: aro.Diagnostics.Kind) void {
+ _ = m;
+ _ = kind;
+ }
+
+ pub fn end(m: *MsgWriter, maybe_line: ?[]const u8, col: u32, end_with_splice: bool) void {
+ _ = end_with_splice;
+ m.source_line = maybe_line;
+ m.span_main = col;
+ }
+};
diff --git a/lib/compiler/resinator/parse.zig b/lib/compiler/resinator/parse.zig
@@ -0,0 +1,1897 @@
+const std = @import("std");
+const Lexer = @import("lex.zig").Lexer;
+const Token = @import("lex.zig").Token;
+const Node = @import("ast.zig").Node;
+const Tree = @import("ast.zig").Tree;
+const CodePageLookup = @import("ast.zig").CodePageLookup;
+const Resource = @import("rc.zig").Resource;
+const Allocator = std.mem.Allocator;
+const ErrorDetails = @import("errors.zig").ErrorDetails;
+const Diagnostics = @import("errors.zig").Diagnostics;
+const SourceBytes = @import("literals.zig").SourceBytes;
+const Compiler = @import("compile.zig").Compiler;
+const rc = @import("rc.zig");
+const res = @import("res.zig");
+
+// TODO: Make these configurable?
+pub const max_nested_menu_level: u32 = 512;
+pub const max_nested_version_level: u32 = 512;
+pub const max_nested_expression_level: u32 = 200;
+
+pub const Parser = struct {
+ const Self = @This();
+
+ lexer: *Lexer,
+ /// values that need to be initialized per-parse
+ state: Parser.State = undefined,
+ options: Parser.Options,
+
+ pub const Error = error{ParseError} || Allocator.Error;
+
+ pub const Options = struct {
+ warn_instead_of_error_on_invalid_code_page: bool = false,
+ };
+
+ pub fn init(lexer: *Lexer, options: Options) Parser {
+ return Parser{
+ .lexer = lexer,
+ .options = options,
+ };
+ }
+
+ pub const State = struct {
+ token: Token,
+ lookahead_lexer: Lexer,
+ allocator: Allocator,
+ arena: Allocator,
+ diagnostics: *Diagnostics,
+ input_code_page_lookup: CodePageLookup,
+ output_code_page_lookup: CodePageLookup,
+ };
+
+ pub fn parse(self: *Self, allocator: Allocator, diagnostics: *Diagnostics) Error!*Tree {
+ var arena = std.heap.ArenaAllocator.init(allocator);
+ errdefer arena.deinit();
+
+ self.state = Parser.State{
+ .token = undefined,
+ .lookahead_lexer = undefined,
+ .allocator = allocator,
+ .arena = arena.allocator(),
+ .diagnostics = diagnostics,
+ .input_code_page_lookup = CodePageLookup.init(arena.allocator(), self.lexer.default_code_page),
+ .output_code_page_lookup = CodePageLookup.init(arena.allocator(), self.lexer.default_code_page),
+ };
+
+ const parsed_root = try self.parseRoot();
+
+ const tree = try self.state.arena.create(Tree);
+ tree.* = .{
+ .node = parsed_root,
+ .input_code_pages = self.state.input_code_page_lookup,
+ .output_code_pages = self.state.output_code_page_lookup,
+ .source = self.lexer.buffer,
+ .arena = arena.state,
+ .allocator = allocator,
+ };
+ return tree;
+ }
+
+ fn parseRoot(self: *Self) Error!*Node {
+ var statements = std.ArrayList(*Node).init(self.state.allocator);
+ defer statements.deinit();
+
+ try self.parseStatements(&statements);
+ try self.check(.eof);
+
+ const node = try self.state.arena.create(Node.Root);
+ node.* = .{
+ .body = try self.state.arena.dupe(*Node, statements.items),
+ };
+ return &node.base;
+ }
+
+ fn parseStatements(self: *Self, statements: *std.ArrayList(*Node)) Error!void {
+ while (true) {
+ try self.nextToken(.whitespace_delimiter_only);
+ if (self.state.token.id == .eof) break;
+ // The Win32 compiler will sometimes try to recover from errors
+ // and then restart parsing afterwards. We don't ever do this
+ // because it almost always leads to unhelpful error messages
+ // (usually it will end up with bogus things like 'file
+ // not found: {')
+ const statement = try self.parseStatement();
+ try statements.append(statement);
+ }
+ }
+
+ /// Expects the current token to be the token before possible common resource attributes.
+ /// After return, the current token will be the token immediately before the end of the
+ /// common resource attributes (if any). If there are no common resource attributes, the
+ /// current token is unchanged.
+ /// The returned slice is allocated by the parser's arena
+ fn parseCommonResourceAttributes(self: *Self) ![]Token {
+ var common_resource_attributes = std.ArrayListUnmanaged(Token){};
+ while (true) {
+ const maybe_common_resource_attribute = try self.lookaheadToken(.normal);
+ if (maybe_common_resource_attribute.id == .literal and rc.CommonResourceAttributes.map.has(maybe_common_resource_attribute.slice(self.lexer.buffer))) {
+ try common_resource_attributes.append(self.state.arena, maybe_common_resource_attribute);
+ self.nextToken(.normal) catch unreachable;
+ } else {
+ break;
+ }
+ }
+ return common_resource_attributes.toOwnedSlice(self.state.arena);
+ }
+
+ /// Expects the current token to have already been dealt with, and that the
+ /// optional statements will potentially start on the next token.
+ /// After return, the current token will be the token immediately before the end of the
+ /// optional statements (if any). If there are no optional statements, the
+ /// current token is unchanged.
+ /// The returned slice is allocated by the parser's arena
+ fn parseOptionalStatements(self: *Self, resource: Resource) ![]*Node {
+ var optional_statements = std.ArrayListUnmanaged(*Node){};
+ while (true) {
+ const lookahead_token = try self.lookaheadToken(.normal);
+ if (lookahead_token.id != .literal) break;
+ const slice = lookahead_token.slice(self.lexer.buffer);
+ const optional_statement_type = rc.OptionalStatements.map.get(slice) orelse switch (resource) {
+ .dialog, .dialogex => rc.OptionalStatements.dialog_map.get(slice) orelse break,
+ else => break,
+ };
+ self.nextToken(.normal) catch unreachable;
+ switch (optional_statement_type) {
+ .language => {
+ const language = try self.parseLanguageStatement();
+ try optional_statements.append(self.state.arena, language);
+ },
+ // Number only
+ .version, .characteristics, .style, .exstyle => {
+ const identifier = self.state.token;
+ const value = try self.parseExpression(.{
+ .can_contain_not_expressions = optional_statement_type == .style or optional_statement_type == .exstyle,
+ .allowed_types = .{ .number = true },
+ });
+ const node = try self.state.arena.create(Node.SimpleStatement);
+ node.* = .{
+ .identifier = identifier,
+ .value = value,
+ };
+ try optional_statements.append(self.state.arena, &node.base);
+ },
+ // String only
+ .caption => {
+ const identifier = self.state.token;
+ try self.nextToken(.normal);
+ const value = self.state.token;
+ if (!value.isStringLiteral()) {
+ return self.addErrorDetailsAndFail(ErrorDetails{
+ .err = .expected_something_else,
+ .token = value,
+ .extra = .{ .expected_types = .{
+ .string_literal = true,
+ } },
+ });
+ }
+ const value_node = try self.state.arena.create(Node.Literal);
+ value_node.* = .{
+ .token = value,
+ };
+ const node = try self.state.arena.create(Node.SimpleStatement);
+ node.* = .{
+ .identifier = identifier,
+ .value = &value_node.base,
+ };
+ try optional_statements.append(self.state.arena, &node.base);
+ },
+ // String or number
+ .class => {
+ const identifier = self.state.token;
+ const value = try self.parseExpression(.{ .allowed_types = .{ .number = true, .string = true } });
+ const node = try self.state.arena.create(Node.SimpleStatement);
+ node.* = .{
+ .identifier = identifier,
+ .value = value,
+ };
+ try optional_statements.append(self.state.arena, &node.base);
+ },
+ // Special case
+ .menu => {
+ const identifier = self.state.token;
+ try self.nextToken(.whitespace_delimiter_only);
+ try self.check(.literal);
+ const value_node = try self.state.arena.create(Node.Literal);
+ value_node.* = .{
+ .token = self.state.token,
+ };
+ const node = try self.state.arena.create(Node.SimpleStatement);
+ node.* = .{
+ .identifier = identifier,
+ .value = &value_node.base,
+ };
+ try optional_statements.append(self.state.arena, &node.base);
+ },
+ .font => {
+ const identifier = self.state.token;
+ const point_size = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+
+ // The comma between point_size and typeface is both optional and
+ // there can be any number of them
+ try self.skipAnyCommas();
+
+ try self.nextToken(.normal);
+ const typeface = self.state.token;
+ if (!typeface.isStringLiteral()) {
+ return self.addErrorDetailsAndFail(ErrorDetails{
+ .err = .expected_something_else,
+ .token = typeface,
+ .extra = .{ .expected_types = .{
+ .string_literal = true,
+ } },
+ });
+ }
+
+ const ExSpecificValues = struct {
+ weight: ?*Node = null,
+ italic: ?*Node = null,
+ char_set: ?*Node = null,
+ };
+ var ex_specific = ExSpecificValues{};
+ ex_specific: {
+ var optional_param_parser = OptionalParamParser{ .parser = self };
+ switch (resource) {
+ .dialogex => {
+ {
+ ex_specific.weight = try optional_param_parser.parse(.{});
+ if (optional_param_parser.finished) break :ex_specific;
+ }
+ {
+ if (!(try self.parseOptionalToken(.comma))) break :ex_specific;
+ ex_specific.italic = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+ }
+ {
+ ex_specific.char_set = try optional_param_parser.parse(.{});
+ if (optional_param_parser.finished) break :ex_specific;
+ }
+ },
+ .dialog => {},
+ else => unreachable, // only DIALOG and DIALOGEX have FONT optional-statements
+ }
+ }
+
+ const node = try self.state.arena.create(Node.FontStatement);
+ node.* = .{
+ .identifier = identifier,
+ .point_size = point_size,
+ .typeface = typeface,
+ .weight = ex_specific.weight,
+ .italic = ex_specific.italic,
+ .char_set = ex_specific.char_set,
+ };
+ try optional_statements.append(self.state.arena, &node.base);
+ },
+ }
+ }
+ return optional_statements.toOwnedSlice(self.state.arena);
+ }
+
+ /// Expects the current token to be the first token of the statement.
+ fn parseStatement(self: *Self) Error!*Node {
+ const first_token = self.state.token;
+ std.debug.assert(first_token.id == .literal);
+
+ if (rc.TopLevelKeywords.map.get(first_token.slice(self.lexer.buffer))) |keyword| switch (keyword) {
+ .language => {
+ const language_statement = try self.parseLanguageStatement();
+ return language_statement;
+ },
+ .version, .characteristics => {
+ const identifier = self.state.token;
+ const value = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+ const node = try self.state.arena.create(Node.SimpleStatement);
+ node.* = .{
+ .identifier = identifier,
+ .value = value,
+ };
+ return &node.base;
+ },
+ .stringtable => {
+ // common resource attributes must all be contiguous and come before optional-statements
+ const common_resource_attributes = try self.parseCommonResourceAttributes();
+ const optional_statements = try self.parseOptionalStatements(.stringtable);
+
+ try self.nextToken(.normal);
+ const begin_token = self.state.token;
+ try self.check(.begin);
+
+ var strings = std.ArrayList(*Node).init(self.state.allocator);
+ defer strings.deinit();
+ while (true) {
+ const maybe_end_token = try self.lookaheadToken(.normal);
+ switch (maybe_end_token.id) {
+ .end => {
+ self.nextToken(.normal) catch unreachable;
+ break;
+ },
+ .eof => {
+ return self.addErrorDetailsAndFail(ErrorDetails{
+ .err = .unfinished_string_table_block,
+ .token = maybe_end_token,
+ });
+ },
+ else => {},
+ }
+ const id_expression = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+
+ const comma_token: ?Token = if (try self.parseOptionalToken(.comma)) self.state.token else null;
+
+ try self.nextToken(.normal);
+ if (self.state.token.id != .quoted_ascii_string and self.state.token.id != .quoted_wide_string) {
+ return self.addErrorDetailsAndFail(ErrorDetails{
+ .err = .expected_something_else,
+ .token = self.state.token,
+ .extra = .{ .expected_types = .{ .string_literal = true } },
+ });
+ }
+
+ const string_node = try self.state.arena.create(Node.StringTableString);
+ string_node.* = .{
+ .id = id_expression,
+ .maybe_comma = comma_token,
+ .string = self.state.token,
+ };
+ try strings.append(&string_node.base);
+ }
+
+ if (strings.items.len == 0) {
+ return self.addErrorDetailsAndFail(ErrorDetails{
+ .err = .expected_token, // TODO: probably a more specific error message
+ .token = self.state.token,
+ .extra = .{ .expected = .number },
+ });
+ }
+
+ const end_token = self.state.token;
+ try self.check(.end);
+
+ const node = try self.state.arena.create(Node.StringTable);
+ node.* = .{
+ .type = first_token,
+ .common_resource_attributes = common_resource_attributes,
+ .optional_statements = optional_statements,
+ .begin_token = begin_token,
+ .strings = try self.state.arena.dupe(*Node, strings.items),
+ .end_token = end_token,
+ };
+ return &node.base;
+ },
+ };
+
+ // The Win32 RC compiler allows for a 'dangling' literal at the end of a file
+ // (as long as it's not a valid top-level keyword), and there is actually an
+ // .rc file with a such a dangling literal in the Windows-classic-samples set
+ // of projects. So, we have special compatibility for this particular case.
+ const maybe_eof = try self.lookaheadToken(.whitespace_delimiter_only);
+ if (maybe_eof.id == .eof) {
+ // TODO: emit warning
+ var context = try self.state.arena.alloc(Token, 2);
+ context[0] = first_token;
+ context[1] = maybe_eof;
+ const invalid_node = try self.state.arena.create(Node.Invalid);
+ invalid_node.* = .{
+ .context = context,
+ };
+ return &invalid_node.base;
+ }
+
+ const id_token = first_token;
+ const id_code_page = self.lexer.current_code_page;
+ try self.nextToken(.whitespace_delimiter_only);
+ const resource = try self.checkResource();
+ const type_token = self.state.token;
+
+ if (resource == .string_num) {
+ try self.addErrorDetails(.{
+ .err = .string_resource_as_numeric_type,
+ .token = type_token,
+ });
+ return self.addErrorDetailsAndFail(.{
+ .err = .string_resource_as_numeric_type,
+ .token = type_token,
+ .type = .note,
+ .print_source_line = false,
+ });
+ }
+
+ if (resource == .font) {
+ const id_bytes = SourceBytes{
+ .slice = id_token.slice(self.lexer.buffer),
+ .code_page = id_code_page,
+ };
+ const maybe_ordinal = res.NameOrOrdinal.maybeOrdinalFromString(id_bytes);
+ if (maybe_ordinal == null) {
+ const would_be_win32_rc_ordinal = res.NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes);
+ if (would_be_win32_rc_ordinal) |win32_rc_ordinal| {
+ try self.addErrorDetails(ErrorDetails{
+ .err = .id_must_be_ordinal,
+ .token = id_token,
+ .extra = .{ .resource = resource },
+ });
+ return self.addErrorDetailsAndFail(ErrorDetails{
+ .err = .win32_non_ascii_ordinal,
+ .token = id_token,
+ .type = .note,
+ .print_source_line = false,
+ .extra = .{ .number = win32_rc_ordinal.ordinal },
+ });
+ } else {
+ return self.addErrorDetailsAndFail(ErrorDetails{
+ .err = .id_must_be_ordinal,
+ .token = id_token,
+ .extra = .{ .resource = resource },
+ });
+ }
+ }
+ }
+
+ switch (resource) {
+ .accelerators => {
+ // common resource attributes must all be contiguous and come before optional-statements
+ const common_resource_attributes = try self.parseCommonResourceAttributes();
+ const optional_statements = try self.parseOptionalStatements(resource);
+
+ try self.nextToken(.normal);
+ const begin_token = self.state.token;
+ try self.check(.begin);
+
+ var accelerators = std.ArrayListUnmanaged(*Node){};
+
+ while (true) {
+ const lookahead = try self.lookaheadToken(.normal);
+ switch (lookahead.id) {
+ .end, .eof => {
+ self.nextToken(.normal) catch unreachable;
+ break;
+ },
+ else => {},
+ }
+ const event = try self.parseExpression(.{ .allowed_types = .{ .number = true, .string = true } });
+
+ try self.nextToken(.normal);
+ try self.check(.comma);
+
+ const idvalue = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+
+ var type_and_options = std.ArrayListUnmanaged(Token){};
+ while (true) {
+ if (!(try self.parseOptionalToken(.comma))) break;
+
+ try self.nextToken(.normal);
+ if (!rc.AcceleratorTypeAndOptions.map.has(self.tokenSlice())) {
+ return self.addErrorDetailsAndFail(.{
+ .err = .expected_something_else,
+ .token = self.state.token,
+ .extra = .{ .expected_types = .{
+ .accelerator_type_or_option = true,
+ } },
+ });
+ }
+ try type_and_options.append(self.state.arena, self.state.token);
+ }
+
+ const node = try self.state.arena.create(Node.Accelerator);
+ node.* = .{
+ .event = event,
+ .idvalue = idvalue,
+ .type_and_options = try type_and_options.toOwnedSlice(self.state.arena),
+ };
+ try accelerators.append(self.state.arena, &node.base);
+ }
+
+ const end_token = self.state.token;
+ try self.check(.end);
+
+ const node = try self.state.arena.create(Node.Accelerators);
+ node.* = .{
+ .id = id_token,
+ .type = type_token,
+ .common_resource_attributes = common_resource_attributes,
+ .optional_statements = optional_statements,
+ .begin_token = begin_token,
+ .accelerators = try accelerators.toOwnedSlice(self.state.arena),
+ .end_token = end_token,
+ };
+ return &node.base;
+ },
+ .dialog, .dialogex => {
+ // common resource attributes must all be contiguous and come before optional-statements
+ const common_resource_attributes = try self.parseCommonResourceAttributes();
+
+ const x = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+ _ = try self.parseOptionalToken(.comma);
+
+ const y = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+ _ = try self.parseOptionalToken(.comma);
+
+ const width = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+ _ = try self.parseOptionalToken(.comma);
+
+ const height = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+
+ var optional_param_parser = OptionalParamParser{ .parser = self };
+ const help_id: ?*Node = try optional_param_parser.parse(.{});
+
+ const optional_statements = try self.parseOptionalStatements(resource);
+
+ try self.nextToken(.normal);
+ const begin_token = self.state.token;
+ try self.check(.begin);
+
+ var controls = std.ArrayListUnmanaged(*Node){};
+ defer controls.deinit(self.state.allocator);
+ while (try self.parseControlStatement(resource)) |control_node| {
+ // The number of controls must fit in a u16 in order for it to
+ // be able to be written into the relevant field in the .res data.
+ if (controls.items.len >= std.math.maxInt(u16)) {
+ try self.addErrorDetails(.{
+ .err = .too_many_dialog_controls_or_toolbar_buttons,
+ .token = id_token,
+ .extra = .{ .resource = resource },
+ });
+ return self.addErrorDetailsAndFail(.{
+ .err = .too_many_dialog_controls_or_toolbar_buttons,
+ .type = .note,
+ .token = control_node.getFirstToken(),
+ .token_span_end = control_node.getLastToken(),
+ .extra = .{ .resource = resource },
+ });
+ }
+
+ try controls.append(self.state.allocator, control_node);
+ }
+
+ try self.nextToken(.normal);
+ const end_token = self.state.token;
+ try self.check(.end);
+
+ const node = try self.state.arena.create(Node.Dialog);
+ node.* = .{
+ .id = id_token,
+ .type = type_token,
+ .common_resource_attributes = common_resource_attributes,
+ .x = x,
+ .y = y,
+ .width = width,
+ .height = height,
+ .help_id = help_id,
+ .optional_statements = optional_statements,
+ .begin_token = begin_token,
+ .controls = try self.state.arena.dupe(*Node, controls.items),
+ .end_token = end_token,
+ };
+ return &node.base;
+ },
+ .toolbar => {
+ // common resource attributes must all be contiguous and come before optional-statements
+ const common_resource_attributes = try self.parseCommonResourceAttributes();
+
+ const button_width = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+
+ try self.nextToken(.normal);
+ try self.check(.comma);
+
+ const button_height = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+
+ try self.nextToken(.normal);
+ const begin_token = self.state.token;
+ try self.check(.begin);
+
+ var buttons = std.ArrayListUnmanaged(*Node){};
+ defer buttons.deinit(self.state.allocator);
+ while (try self.parseToolbarButtonStatement()) |button_node| {
+ // The number of buttons must fit in a u16 in order for it to
+ // be able to be written into the relevant field in the .res data.
+ if (buttons.items.len >= std.math.maxInt(u16)) {
+ try self.addErrorDetails(.{
+ .err = .too_many_dialog_controls_or_toolbar_buttons,
+ .token = id_token,
+ .extra = .{ .resource = resource },
+ });
+ return self.addErrorDetailsAndFail(.{
+ .err = .too_many_dialog_controls_or_toolbar_buttons,
+ .type = .note,
+ .token = button_node.getFirstToken(),
+ .token_span_end = button_node.getLastToken(),
+ .extra = .{ .resource = resource },
+ });
+ }
+
+ try buttons.append(self.state.allocator, button_node);
+ }
+
+ try self.nextToken(.normal);
+ const end_token = self.state.token;
+ try self.check(.end);
+
+ const node = try self.state.arena.create(Node.Toolbar);
+ node.* = .{
+ .id = id_token,
+ .type = type_token,
+ .common_resource_attributes = common_resource_attributes,
+ .button_width = button_width,
+ .button_height = button_height,
+ .begin_token = begin_token,
+ .buttons = try self.state.arena.dupe(*Node, buttons.items),
+ .end_token = end_token,
+ };
+ return &node.base;
+ },
+ .menu, .menuex => {
+ // common resource attributes must all be contiguous and come before optional-statements
+ const common_resource_attributes = try self.parseCommonResourceAttributes();
+ // help id is optional but must come between common resource attributes and optional-statements
+ var help_id: ?*Node = null;
+ // Note: No comma is allowed before or after help_id of MENUEX and help_id is not
+ // a possible field of MENU.
+ if (resource == .menuex and try self.lookaheadCouldBeNumberExpression(.not_disallowed)) {
+ help_id = try self.parseExpression(.{
+ .is_known_to_be_number_expression = true,
+ });
+ }
+ const optional_statements = try self.parseOptionalStatements(.stringtable);
+
+ try self.nextToken(.normal);
+ const begin_token = self.state.token;
+ try self.check(.begin);
+
+ var items = std.ArrayListUnmanaged(*Node){};
+ defer items.deinit(self.state.allocator);
+ while (try self.parseMenuItemStatement(resource, id_token, 1)) |item_node| {
+ try items.append(self.state.allocator, item_node);
+ }
+
+ try self.nextToken(.normal);
+ const end_token = self.state.token;
+ try self.check(.end);
+
+ if (items.items.len == 0) {
+ return self.addErrorDetailsAndFail(.{
+ .err = .empty_menu_not_allowed,
+ .token = type_token,
+ });
+ }
+
+ const node = try self.state.arena.create(Node.Menu);
+ node.* = .{
+ .id = id_token,
+ .type = type_token,
+ .common_resource_attributes = common_resource_attributes,
+ .optional_statements = optional_statements,
+ .help_id = help_id,
+ .begin_token = begin_token,
+ .items = try self.state.arena.dupe(*Node, items.items),
+ .end_token = end_token,
+ };
+ return &node.base;
+ },
+ .versioninfo => {
+ // common resource attributes must all be contiguous and come before optional-statements
+ const common_resource_attributes = try self.parseCommonResourceAttributes();
+
+ var fixed_info = std.ArrayListUnmanaged(*Node){};
+ while (try self.parseVersionStatement()) |version_statement| {
+ try fixed_info.append(self.state.arena, version_statement);
+ }
+
+ try self.nextToken(.normal);
+ const begin_token = self.state.token;
+ try self.check(.begin);
+
+ var block_statements = std.ArrayListUnmanaged(*Node){};
+ while (try self.parseVersionBlockOrValue(id_token, 1)) |block_node| {
+ try block_statements.append(self.state.arena, block_node);
+ }
+
+ try self.nextToken(.normal);
+ const end_token = self.state.token;
+ try self.check(.end);
+
+ const node = try self.state.arena.create(Node.VersionInfo);
+ node.* = .{
+ .id = id_token,
+ .versioninfo = type_token,
+ .common_resource_attributes = common_resource_attributes,
+ .fixed_info = try fixed_info.toOwnedSlice(self.state.arena),
+ .begin_token = begin_token,
+ .block_statements = try block_statements.toOwnedSlice(self.state.arena),
+ .end_token = end_token,
+ };
+ return &node.base;
+ },
+ .dlginclude => {
+ const common_resource_attributes = try self.parseCommonResourceAttributes();
+
+ const filename_expression = try self.parseExpression(.{
+ .allowed_types = .{ .string = true },
+ });
+
+ const node = try self.state.arena.create(Node.ResourceExternal);
+ node.* = .{
+ .id = id_token,
+ .type = type_token,
+ .common_resource_attributes = common_resource_attributes,
+ .filename = filename_expression,
+ };
+ return &node.base;
+ },
+ .stringtable => {
+ return self.addErrorDetailsAndFail(.{
+ .err = .name_or_id_not_allowed,
+ .token = id_token,
+ .extra = .{ .resource = resource },
+ });
+ },
+ // Just try everything as a 'generic' resource (raw data or external file)
+ // TODO: More fine-grained switch cases as necessary
+ else => {
+ const common_resource_attributes = try self.parseCommonResourceAttributes();
+
+ const maybe_begin = try self.lookaheadToken(.normal);
+ if (maybe_begin.id == .begin) {
+ self.nextToken(.normal) catch unreachable;
+
+ if (!resource.canUseRawData()) {
+ try self.addErrorDetails(ErrorDetails{
+ .err = .resource_type_cant_use_raw_data,
+ .token = maybe_begin,
+ .extra = .{ .resource = resource },
+ });
+ return self.addErrorDetailsAndFail(ErrorDetails{
+ .err = .resource_type_cant_use_raw_data,
+ .type = .note,
+ .print_source_line = false,
+ .token = maybe_begin,
+ });
+ }
+
+ const raw_data = try self.parseRawDataBlock();
+ const end_token = self.state.token;
+
+ const node = try self.state.arena.create(Node.ResourceRawData);
+ node.* = .{
+ .id = id_token,
+ .type = type_token,
+ .common_resource_attributes = common_resource_attributes,
+ .begin_token = maybe_begin,
+ .raw_data = raw_data,
+ .end_token = end_token,
+ };
+ return &node.base;
+ }
+
+ const filename_expression = try self.parseExpression(.{
+ // Don't tell the user that numbers are accepted since we error on
+ // number expressions and regular number literals are treated as unquoted
+ // literals rather than numbers, so from the users perspective
+ // numbers aren't really allowed.
+ .expected_types_override = .{
+ .literal = true,
+ .string_literal = true,
+ },
+ });
+
+ const node = try self.state.arena.create(Node.ResourceExternal);
+ node.* = .{
+ .id = id_token,
+ .type = type_token,
+ .common_resource_attributes = common_resource_attributes,
+ .filename = filename_expression,
+ };
+ return &node.base;
+ },
+ }
+ }
+
+ /// Expects the current token to be a begin token.
+ /// After return, the current token will be the end token.
+ fn parseRawDataBlock(self: *Self) Error![]*Node {
+ var raw_data = std.ArrayList(*Node).init(self.state.allocator);
+ defer raw_data.deinit();
+ while (true) {
+ const maybe_end_token = try self.lookaheadToken(.normal);
+ switch (maybe_end_token.id) {
+ .comma => {
+ // comma as the first token in a raw data block is an error
+ if (raw_data.items.len == 0) {
+ return self.addErrorDetailsAndFail(ErrorDetails{
+ .err = .expected_something_else,
+ .token = maybe_end_token,
+ .extra = .{ .expected_types = .{
+ .number = true,
+ .number_expression = true,
+ .string_literal = true,
+ } },
+ });
+ }
+ // otherwise just skip over commas
+ self.nextToken(.normal) catch unreachable;
+ continue;
+ },
+ .end => {
+ self.nextToken(.normal) catch unreachable;
+ break;
+ },
+ .eof => {
+ return self.addErrorDetailsAndFail(ErrorDetails{
+ .err = .unfinished_raw_data_block,
+ .token = maybe_end_token,
+ });
+ },
+ else => {},
+ }
+ const expression = try self.parseExpression(.{ .allowed_types = .{ .number = true, .string = true } });
+ try raw_data.append(expression);
+
+ if (expression.isNumberExpression()) {
+ const maybe_close_paren = try self.lookaheadToken(.normal);
+ if (maybe_close_paren.id == .close_paren) {
+ // <number expression>) is an error
+ return self.addErrorDetailsAndFail(ErrorDetails{
+ .err = .expected_token,
+ .token = maybe_close_paren,
+ .extra = .{ .expected = .operator },
+ });
+ }
+ }
+ }
+ return try self.state.arena.dupe(*Node, raw_data.items);
+ }
+
+ /// Expects the current token to be handled, and that the control statement will
+ /// begin on the next token.
+ /// After return, the current token will be the token immediately before the end of the
+ /// control statement (or unchanged if the function returns null).
+ fn parseControlStatement(self: *Self, resource: Resource) Error!?*Node {
+ const control_token = try self.lookaheadToken(.normal);
+ const control = rc.Control.map.get(control_token.slice(self.lexer.buffer)) orelse return null;
+ self.nextToken(.normal) catch unreachable;
+
+ try self.skipAnyCommas();
+
+ var text: ?Token = null;
+ if (control.hasTextParam()) {
+ try self.nextToken(.normal);
+ switch (self.state.token.id) {
+ .quoted_ascii_string, .quoted_wide_string, .number => {
+ text = self.state.token;
+ },
+ else => {
+ return self.addErrorDetailsAndFail(ErrorDetails{
+ .err = .expected_something_else,
+ .token = self.state.token,
+ .extra = .{ .expected_types = .{
+ .number = true,
+ .string_literal = true,
+ } },
+ });
+ },
+ }
+ try self.skipAnyCommas();
+ }
+
+ const id = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+
+ try self.skipAnyCommas();
+
+ var class: ?*Node = null;
+ var style: ?*Node = null;
+ if (control == .control) {
+ class = try self.parseExpression(.{});
+ if (class.?.id == .literal) {
+ const class_literal = @fieldParentPtr(Node.Literal, "base", class.?);
+ const is_invalid_control_class = class_literal.token.id == .literal and !rc.ControlClass.map.has(class_literal.token.slice(self.lexer.buffer));
+ if (is_invalid_control_class) {
+ return self.addErrorDetailsAndFail(.{
+ .err = .expected_something_else,
+ .token = self.state.token,
+ .extra = .{ .expected_types = .{
+ .control_class = true,
+ } },
+ });
+ }
+ }
+ try self.skipAnyCommas();
+ style = try self.parseExpression(.{
+ .can_contain_not_expressions = true,
+ .allowed_types = .{ .number = true },
+ });
+ // If there is no comma after the style paramter, the Win32 RC compiler
+ // could misinterpret the statement and end up skipping over at least one token
+ // that should have been interepeted as the next parameter (x). For example:
+ // CONTROL "text", 1, BUTTON, 15 30, 1, 2, 3, 4
+ // the `15` is the style parameter, but in the Win32 implementation the `30`
+ // is completely ignored (i.e. the `1, 2, 3, 4` are `x`, `y`, `w`, `h`).
+ // If a comma is added after the `15`, then `30` gets interpreted (correctly)
+ // as the `x` value.
+ //
+ // Instead of emulating this behavior, we just warn about the potential for
+ // weird behavior in the Win32 implementation whenever there isn't a comma after
+ // the style parameter.
+ const lookahead_token = try self.lookaheadToken(.normal);
+ if (lookahead_token.id != .comma and lookahead_token.id != .eof) {
+ try self.addErrorDetails(.{
+ .err = .rc_could_miscompile_control_params,
+ .type = .warning,
+ .token = lookahead_token,
+ });
+ try self.addErrorDetails(.{
+ .err = .rc_could_miscompile_control_params,
+ .type = .note,
+ .token = style.?.getFirstToken(),
+ .token_span_end = style.?.getLastToken(),
+ });
+ }
+ try self.skipAnyCommas();
+ }
+
+ const x = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+ _ = try self.parseOptionalToken(.comma);
+ const y = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+ _ = try self.parseOptionalToken(.comma);
+ const width = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+ _ = try self.parseOptionalToken(.comma);
+ const height = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+
+ var optional_param_parser = OptionalParamParser{ .parser = self };
+ if (control != .control) {
+ style = try optional_param_parser.parse(.{ .not_expression_allowed = true });
+ }
+
+ const exstyle: ?*Node = try optional_param_parser.parse(.{ .not_expression_allowed = true });
+ const help_id: ?*Node = switch (resource) {
+ .dialogex => try optional_param_parser.parse(.{}),
+ else => null,
+ };
+
+ var extra_data: []*Node = &[_]*Node{};
+ var extra_data_begin: ?Token = null;
+ var extra_data_end: ?Token = null;
+ // extra data is DIALOGEX-only
+ if (resource == .dialogex and try self.parseOptionalToken(.begin)) {
+ extra_data_begin = self.state.token;
+ extra_data = try self.parseRawDataBlock();
+ extra_data_end = self.state.token;
+ }
+
+ const node = try self.state.arena.create(Node.ControlStatement);
+ node.* = .{
+ .type = control_token,
+ .text = text,
+ .class = class,
+ .id = id,
+ .x = x,
+ .y = y,
+ .width = width,
+ .height = height,
+ .style = style,
+ .exstyle = exstyle,
+ .help_id = help_id,
+ .extra_data_begin = extra_data_begin,
+ .extra_data = extra_data,
+ .extra_data_end = extra_data_end,
+ };
+ return &node.base;
+ }
+
+ fn parseToolbarButtonStatement(self: *Self) Error!?*Node {
+ const keyword_token = try self.lookaheadToken(.normal);
+ const button_type = rc.ToolbarButton.map.get(keyword_token.slice(self.lexer.buffer)) orelse return null;
+ self.nextToken(.normal) catch unreachable;
+
+ switch (button_type) {
+ .separator => {
+ const node = try self.state.arena.create(Node.Literal);
+ node.* = .{
+ .token = keyword_token,
+ };
+ return &node.base;
+ },
+ .button => {
+ const button_id = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+
+ const node = try self.state.arena.create(Node.SimpleStatement);
+ node.* = .{
+ .identifier = keyword_token,
+ .value = button_id,
+ };
+ return &node.base;
+ },
+ }
+ }
+
+ /// Expects the current token to be handled, and that the menuitem/popup statement will
+ /// begin on the next token.
+ /// After return, the current token will be the token immediately before the end of the
+ /// menuitem statement (or unchanged if the function returns null).
+ fn parseMenuItemStatement(self: *Self, resource: Resource, top_level_menu_id_token: Token, nesting_level: u32) Error!?*Node {
+ const menuitem_token = try self.lookaheadToken(.normal);
+ const menuitem = rc.MenuItem.map.get(menuitem_token.slice(self.lexer.buffer)) orelse return null;
+ self.nextToken(.normal) catch unreachable;
+
+ if (nesting_level > max_nested_menu_level) {
+ try self.addErrorDetails(.{
+ .err = .nested_resource_level_exceeds_max,
+ .token = top_level_menu_id_token,
+ .extra = .{ .resource = resource },
+ });
+ return self.addErrorDetailsAndFail(.{
+ .err = .nested_resource_level_exceeds_max,
+ .type = .note,
+ .token = menuitem_token,
+ .extra = .{ .resource = resource },
+ });
+ }
+
+ switch (resource) {
+ .menu => switch (menuitem) {
+ .menuitem => {
+ try self.nextToken(.normal);
+ if (rc.MenuItem.isSeparator(self.state.token.slice(self.lexer.buffer))) {
+ const separator_token = self.state.token;
+ // There can be any number of trailing commas after SEPARATOR
+ try self.skipAnyCommas();
+ const node = try self.state.arena.create(Node.MenuItemSeparator);
+ node.* = .{
+ .menuitem = menuitem_token,
+ .separator = separator_token,
+ };
+ return &node.base;
+ } else {
+ const text = self.state.token;
+ if (!text.isStringLiteral()) {
+ return self.addErrorDetailsAndFail(ErrorDetails{
+ .err = .expected_something_else,
+ .token = text,
+ .extra = .{ .expected_types = .{
+ .string_literal = true,
+ } },
+ });
+ }
+ try self.skipAnyCommas();
+
+ const result = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+
+ _ = try self.parseOptionalToken(.comma);
+
+ var options = std.ArrayListUnmanaged(Token){};
+ while (true) {
+ const option_token = try self.lookaheadToken(.normal);
+ if (!rc.MenuItem.Option.map.has(option_token.slice(self.lexer.buffer))) {
+ break;
+ }
+ self.nextToken(.normal) catch unreachable;
+ try options.append(self.state.arena, option_token);
+ try self.skipAnyCommas();
+ }
+
+ const node = try self.state.arena.create(Node.MenuItem);
+ node.* = .{
+ .menuitem = menuitem_token,
+ .text = text,
+ .result = result,
+ .option_list = try options.toOwnedSlice(self.state.arena),
+ };
+ return &node.base;
+ }
+ },
+ .popup => {
+ try self.nextToken(.normal);
+ const text = self.state.token;
+ if (!text.isStringLiteral()) {
+ return self.addErrorDetailsAndFail(ErrorDetails{
+ .err = .expected_something_else,
+ .token = text,
+ .extra = .{ .expected_types = .{
+ .string_literal = true,
+ } },
+ });
+ }
+ try self.skipAnyCommas();
+
+ var options = std.ArrayListUnmanaged(Token){};
+ while (true) {
+ const option_token = try self.lookaheadToken(.normal);
+ if (!rc.MenuItem.Option.map.has(option_token.slice(self.lexer.buffer))) {
+ break;
+ }
+ self.nextToken(.normal) catch unreachable;
+ try options.append(self.state.arena, option_token);
+ try self.skipAnyCommas();
+ }
+
+ try self.nextToken(.normal);
+ const begin_token = self.state.token;
+ try self.check(.begin);
+
+ var items = std.ArrayListUnmanaged(*Node){};
+ while (try self.parseMenuItemStatement(resource, top_level_menu_id_token, nesting_level + 1)) |item_node| {
+ try items.append(self.state.arena, item_node);
+ }
+
+ try self.nextToken(.normal);
+ const end_token = self.state.token;
+ try self.check(.end);
+
+ if (items.items.len == 0) {
+ return self.addErrorDetailsAndFail(.{
+ .err = .empty_menu_not_allowed,
+ .token = menuitem_token,
+ });
+ }
+
+ const node = try self.state.arena.create(Node.Popup);
+ node.* = .{
+ .popup = menuitem_token,
+ .text = text,
+ .option_list = try options.toOwnedSlice(self.state.arena),
+ .begin_token = begin_token,
+ .items = try items.toOwnedSlice(self.state.arena),
+ .end_token = end_token,
+ };
+ return &node.base;
+ },
+ },
+ .menuex => {
+ try self.nextToken(.normal);
+ const text = self.state.token;
+ if (!text.isStringLiteral()) {
+ return self.addErrorDetailsAndFail(ErrorDetails{
+ .err = .expected_something_else,
+ .token = text,
+ .extra = .{ .expected_types = .{
+ .string_literal = true,
+ } },
+ });
+ }
+
+ var param_parser = OptionalParamParser{ .parser = self };
+ const id = try param_parser.parse(.{});
+ const item_type = try param_parser.parse(.{});
+ const state = try param_parser.parse(.{});
+
+ if (menuitem == .menuitem) {
+ // trailing comma is allowed, skip it
+ _ = try self.parseOptionalToken(.comma);
+
+ const node = try self.state.arena.create(Node.MenuItemEx);
+ node.* = .{
+ .menuitem = menuitem_token,
+ .text = text,
+ .id = id,
+ .type = item_type,
+ .state = state,
+ };
+ return &node.base;
+ }
+
+ const help_id = try param_parser.parse(.{});
+
+ // trailing comma is allowed, skip it
+ _ = try self.parseOptionalToken(.comma);
+
+ try self.nextToken(.normal);
+ const begin_token = self.state.token;
+ try self.check(.begin);
+
+ var items = std.ArrayListUnmanaged(*Node){};
+ while (try self.parseMenuItemStatement(resource, top_level_menu_id_token, nesting_level + 1)) |item_node| {
+ try items.append(self.state.arena, item_node);
+ }
+
+ try self.nextToken(.normal);
+ const end_token = self.state.token;
+ try self.check(.end);
+
+ if (items.items.len == 0) {
+ return self.addErrorDetailsAndFail(.{
+ .err = .empty_menu_not_allowed,
+ .token = menuitem_token,
+ });
+ }
+
+ const node = try self.state.arena.create(Node.PopupEx);
+ node.* = .{
+ .popup = menuitem_token,
+ .text = text,
+ .id = id,
+ .type = item_type,
+ .state = state,
+ .help_id = help_id,
+ .begin_token = begin_token,
+ .items = try items.toOwnedSlice(self.state.arena),
+ .end_token = end_token,
+ };
+ return &node.base;
+ },
+ else => unreachable,
+ }
+ @compileError("unreachable");
+ }
+
+ pub const OptionalParamParser = struct {
+ finished: bool = false,
+ parser: *Self,
+
+ pub const Options = struct {
+ not_expression_allowed: bool = false,
+ };
+
+ pub fn parse(self: *OptionalParamParser, options: OptionalParamParser.Options) Error!?*Node {
+ if (self.finished) return null;
+ if (!(try self.parser.parseOptionalToken(.comma))) {
+ self.finished = true;
+ return null;
+ }
+ // If the next lookahead token could be part of a number expression,
+ // then parse it. Otherwise, treat it as an 'empty' expression and
+ // continue parsing, since 'empty' values are allowed.
+ if (try self.parser.lookaheadCouldBeNumberExpression(switch (options.not_expression_allowed) {
+ true => .not_allowed,
+ false => .not_disallowed,
+ })) {
+ const node = try self.parser.parseExpression(.{
+ .allowed_types = .{ .number = true },
+ .can_contain_not_expressions = options.not_expression_allowed,
+ });
+ return node;
+ }
+ return null;
+ }
+ };
+
+ /// Expects the current token to be handled, and that the version statement will
+ /// begin on the next token.
+ /// After return, the current token will be the token immediately before the end of the
+ /// version statement (or unchanged if the function returns null).
+ fn parseVersionStatement(self: *Self) Error!?*Node {
+ const type_token = try self.lookaheadToken(.normal);
+ const statement_type = rc.VersionInfo.map.get(type_token.slice(self.lexer.buffer)) orelse return null;
+ self.nextToken(.normal) catch unreachable;
+ switch (statement_type) {
+ .file_version, .product_version => {
+ var parts_buffer: [4]*Node = undefined;
+ var parts = std.ArrayListUnmanaged(*Node).initBuffer(&parts_buffer);
+
+ while (true) {
+ const value = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+ parts.addOneAssumeCapacity().* = value;
+
+ if (parts.unusedCapacitySlice().len == 0 or
+ !(try self.parseOptionalToken(.comma)))
+ {
+ break;
+ }
+ }
+
+ const node = try self.state.arena.create(Node.VersionStatement);
+ node.* = .{
+ .type = type_token,
+ .parts = try self.state.arena.dupe(*Node, parts.items),
+ };
+ return &node.base;
+ },
+ else => {
+ const value = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+
+ const node = try self.state.arena.create(Node.SimpleStatement);
+ node.* = .{
+ .identifier = type_token,
+ .value = value,
+ };
+ return &node.base;
+ },
+ }
+ }
+
+ /// Expects the current token to be handled, and that the version BLOCK/VALUE will
+ /// begin on the next token.
+ /// After return, the current token will be the token immediately before the end of the
+ /// version BLOCK/VALUE (or unchanged if the function returns null).
+ fn parseVersionBlockOrValue(self: *Self, top_level_version_id_token: Token, nesting_level: u32) Error!?*Node {
+ const keyword_token = try self.lookaheadToken(.normal);
+ const keyword = rc.VersionBlock.map.get(keyword_token.slice(self.lexer.buffer)) orelse return null;
+ self.nextToken(.normal) catch unreachable;
+
+ if (nesting_level > max_nested_version_level) {
+ try self.addErrorDetails(.{
+ .err = .nested_resource_level_exceeds_max,
+ .token = top_level_version_id_token,
+ .extra = .{ .resource = .versioninfo },
+ });
+ return self.addErrorDetailsAndFail(.{
+ .err = .nested_resource_level_exceeds_max,
+ .type = .note,
+ .token = keyword_token,
+ .extra = .{ .resource = .versioninfo },
+ });
+ }
+
+ try self.nextToken(.normal);
+ const key = self.state.token;
+ if (!key.isStringLiteral()) {
+ return self.addErrorDetailsAndFail(.{
+ .err = .expected_something_else,
+ .token = key,
+ .extra = .{ .expected_types = .{
+ .string_literal = true,
+ } },
+ });
+ }
+ // Need to keep track of this to detect a potential miscompilation when
+ // the comma is omitted and the first value is a quoted string.
+ const had_comma_before_first_value = try self.parseOptionalToken(.comma);
+ try self.skipAnyCommas();
+
+ const values = try self.parseBlockValuesList(had_comma_before_first_value);
+
+ switch (keyword) {
+ .block => {
+ try self.nextToken(.normal);
+ const begin_token = self.state.token;
+ try self.check(.begin);
+
+ var children = std.ArrayListUnmanaged(*Node){};
+ while (try self.parseVersionBlockOrValue(top_level_version_id_token, nesting_level + 1)) |value_node| {
+ try children.append(self.state.arena, value_node);
+ }
+
+ try self.nextToken(.normal);
+ const end_token = self.state.token;
+ try self.check(.end);
+
+ const node = try self.state.arena.create(Node.Block);
+ node.* = .{
+ .identifier = keyword_token,
+ .key = key,
+ .values = values,
+ .begin_token = begin_token,
+ .children = try children.toOwnedSlice(self.state.arena),
+ .end_token = end_token,
+ };
+ return &node.base;
+ },
+ .value => {
+ const node = try self.state.arena.create(Node.BlockValue);
+ node.* = .{
+ .identifier = keyword_token,
+ .key = key,
+ .values = values,
+ };
+ return &node.base;
+ },
+ }
+ }
+
+ fn parseBlockValuesList(self: *Self, had_comma_before_first_value: bool) Error![]*Node {
+ var values = std.ArrayListUnmanaged(*Node){};
+ var seen_number: bool = false;
+ var first_string_value: ?*Node = null;
+ while (true) {
+ const lookahead_token = try self.lookaheadToken(.normal);
+ switch (lookahead_token.id) {
+ .operator,
+ .number,
+ .open_paren,
+ .quoted_ascii_string,
+ .quoted_wide_string,
+ => {},
+ else => break,
+ }
+ const value = try self.parseExpression(.{});
+
+ if (value.isNumberExpression()) {
+ seen_number = true;
+ } else if (first_string_value == null) {
+ std.debug.assert(value.isStringLiteral());
+ first_string_value = value;
+ }
+
+ const has_trailing_comma = try self.parseOptionalToken(.comma);
+ try self.skipAnyCommas();
+
+ const value_value = try self.state.arena.create(Node.BlockValueValue);
+ value_value.* = .{
+ .expression = value,
+ .trailing_comma = has_trailing_comma,
+ };
+ try values.append(self.state.arena, &value_value.base);
+ }
+ if (seen_number and first_string_value != null) {
+ // The Win32 RC compiler does some strange stuff with the data size:
+ // Strings are counted as UTF-16 code units including the null-terminator
+ // Numbers are counted as their byte lengths
+ // So, when both strings and numbers are within a single value,
+ // it incorrectly sets the value's type as binary, but then gives the
+ // data length as a mixture of bytes and UTF-16 code units. This means that
+ // when the length is read, it will be treated as byte length and will
+ // not read the full value. We don't reproduce this behavior, so we warn
+ // of the miscompilation here.
+ try self.addErrorDetails(.{
+ .err = .rc_would_miscompile_version_value_byte_count,
+ .type = .warning,
+ .token = first_string_value.?.getFirstToken(),
+ .token_span_start = values.items[0].getFirstToken(),
+ .token_span_end = values.items[values.items.len - 1].getLastToken(),
+ });
+ try self.addErrorDetails(.{
+ .err = .rc_would_miscompile_version_value_byte_count,
+ .type = .note,
+ .token = first_string_value.?.getFirstToken(),
+ .token_span_start = values.items[0].getFirstToken(),
+ .token_span_end = values.items[values.items.len - 1].getLastToken(),
+ .print_source_line = false,
+ });
+ }
+ if (!had_comma_before_first_value and values.items.len > 0 and values.items[0].cast(.block_value_value).?.expression.isStringLiteral()) {
+ const token = values.items[0].cast(.block_value_value).?.expression.cast(.literal).?.token;
+ try self.addErrorDetails(.{
+ .err = .rc_would_miscompile_version_value_padding,
+ .type = .warning,
+ .token = token,
+ });
+ try self.addErrorDetails(.{
+ .err = .rc_would_miscompile_version_value_padding,
+ .type = .note,
+ .token = token,
+ .print_source_line = false,
+ });
+ }
+ return values.toOwnedSlice(self.state.arena);
+ }
+
+ fn numberExpressionContainsAnyLSuffixes(expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup) bool {
+ // TODO: This could probably be done without evaluating the whole expression
+ return Compiler.evaluateNumberExpression(expression_node, source, code_page_lookup).is_long;
+ }
+
+ /// Expects the current token to be a literal token that contains the string LANGUAGE
+ fn parseLanguageStatement(self: *Self) Error!*Node {
+ const language_token = self.state.token;
+
+ const primary_language = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+
+ try self.nextToken(.normal);
+ try self.check(.comma);
+
+ const sublanguage = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
+
+ // The Win32 RC compiler errors if either parameter contains any number with an L
+ // suffix. Instead of that, we want to warn and then let the values get truncated.
+ // The warning is done here to allow the compiler logic to not have to deal with this.
+ if (numberExpressionContainsAnyLSuffixes(primary_language, self.lexer.buffer, &self.state.input_code_page_lookup)) {
+ try self.addErrorDetails(.{
+ .err = .rc_would_error_u16_with_l_suffix,
+ .type = .warning,
+ .token = primary_language.getFirstToken(),
+ .token_span_end = primary_language.getLastToken(),
+ .extra = .{ .statement_with_u16_param = .language },
+ });
+ try self.addErrorDetails(.{
+ .err = .rc_would_error_u16_with_l_suffix,
+ .print_source_line = false,
+ .type = .note,
+ .token = primary_language.getFirstToken(),
+ .token_span_end = primary_language.getLastToken(),
+ .extra = .{ .statement_with_u16_param = .language },
+ });
+ }
+ if (numberExpressionContainsAnyLSuffixes(sublanguage, self.lexer.buffer, &self.state.input_code_page_lookup)) {
+ try self.addErrorDetails(.{
+ .err = .rc_would_error_u16_with_l_suffix,
+ .type = .warning,
+ .token = sublanguage.getFirstToken(),
+ .token_span_end = sublanguage.getLastToken(),
+ .extra = .{ .statement_with_u16_param = .language },
+ });
+ try self.addErrorDetails(.{
+ .err = .rc_would_error_u16_with_l_suffix,
+ .print_source_line = false,
+ .type = .note,
+ .token = sublanguage.getFirstToken(),
+ .token_span_end = sublanguage.getLastToken(),
+ .extra = .{ .statement_with_u16_param = .language },
+ });
+ }
+
+ const node = try self.state.arena.create(Node.LanguageStatement);
+ node.* = .{
+ .language_token = language_token,
+ .primary_language_id = primary_language,
+ .sublanguage_id = sublanguage,
+ };
+ return &node.base;
+ }
+
+ pub const ParseExpressionOptions = struct {
+ is_known_to_be_number_expression: bool = false,
+ can_contain_not_expressions: bool = false,
+ nesting_context: NestingContext = .{},
+ allowed_types: AllowedTypes = .{ .literal = true, .number = true, .string = true },
+ expected_types_override: ?ErrorDetails.ExpectedTypes = null,
+
+ pub const AllowedTypes = struct {
+ literal: bool = false,
+ number: bool = false,
+ string: bool = false,
+ };
+
+ pub const NestingContext = struct {
+ first_token: ?Token = null,
+ last_token: ?Token = null,
+ level: u32 = 0,
+
+ /// Returns a new NestingContext with values modified appropriately for an increased nesting level
+ fn incremented(ctx: NestingContext, first_token: Token, most_recent_token: Token) NestingContext {
+ return .{
+ .first_token = ctx.first_token orelse first_token,
+ .last_token = most_recent_token,
+ .level = ctx.level + 1,
+ };
+ }
+ };
+
+ pub fn toErrorDetails(options: ParseExpressionOptions, token: Token) ErrorDetails {
+ // TODO: expected_types_override interaction with is_known_to_be_number_expression?
+ const expected_types = options.expected_types_override orelse ErrorDetails.ExpectedTypes{
+ .number = options.allowed_types.number,
+ .number_expression = options.allowed_types.number,
+ .string_literal = options.allowed_types.string and !options.is_known_to_be_number_expression,
+ .literal = options.allowed_types.literal and !options.is_known_to_be_number_expression,
+ };
+ return ErrorDetails{
+ .err = .expected_something_else,
+ .token = token,
+ .extra = .{ .expected_types = expected_types },
+ };
+ }
+ };
+
+ /// Returns true if the next lookahead token is a number or could be the start of a number expression.
+ /// Only useful when looking for empty expressions in optional fields.
+ fn lookaheadCouldBeNumberExpression(self: *Self, not_allowed: enum { not_allowed, not_disallowed }) Error!bool {
+ var lookahead_token = try self.lookaheadToken(.normal);
+ switch (lookahead_token.id) {
+ .literal => if (not_allowed == .not_allowed) {
+ return std.ascii.eqlIgnoreCase("NOT", lookahead_token.slice(self.lexer.buffer));
+ } else return false,
+ .number => return true,
+ .open_paren => return true,
+ .operator => {
+ // + can be a unary operator, see parseExpression's handling of unary +
+ const operator_char = lookahead_token.slice(self.lexer.buffer)[0];
+ return operator_char == '+';
+ },
+ else => return false,
+ }
+ }
+
+ fn parsePrimary(self: *Self, options: ParseExpressionOptions) Error!*Node {
+ try self.nextToken(.normal);
+ const first_token = self.state.token;
+ var is_close_paren_expression = false;
+ var is_unary_plus_expression = false;
+ switch (self.state.token.id) {
+ .quoted_ascii_string, .quoted_wide_string => {
+ if (!options.allowed_types.string) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token));
+ const node = try self.state.arena.create(Node.Literal);
+ node.* = .{ .token = self.state.token };
+ return &node.base;
+ },
+ .literal => {
+ if (options.can_contain_not_expressions and std.ascii.eqlIgnoreCase("NOT", self.state.token.slice(self.lexer.buffer))) {
+ const not_token = self.state.token;
+ try self.nextToken(.normal);
+ try self.check(.number);
+ if (!options.allowed_types.number) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token));
+ const node = try self.state.arena.create(Node.NotExpression);
+ node.* = .{
+ .not_token = not_token,
+ .number_token = self.state.token,
+ };
+ return &node.base;
+ }
+ if (!options.allowed_types.literal) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token));
+ const node = try self.state.arena.create(Node.Literal);
+ node.* = .{ .token = self.state.token };
+ return &node.base;
+ },
+ .number => {
+ if (!options.allowed_types.number) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token));
+ const node = try self.state.arena.create(Node.Literal);
+ node.* = .{ .token = self.state.token };
+ return &node.base;
+ },
+ .open_paren => {
+ const open_paren_token = self.state.token;
+
+ const expression = try self.parseExpression(.{
+ .is_known_to_be_number_expression = true,
+ .can_contain_not_expressions = options.can_contain_not_expressions,
+ .nesting_context = options.nesting_context.incremented(first_token, open_paren_token),
+ .allowed_types = .{ .number = true },
+ });
+
+ try self.nextToken(.normal);
+ // TODO: Add context to error about where the open paren is
+ try self.check(.close_paren);
+
+ if (!options.allowed_types.number) return self.addErrorDetailsAndFail(options.toErrorDetails(open_paren_token));
+ const node = try self.state.arena.create(Node.GroupedExpression);
+ node.* = .{
+ .open_token = open_paren_token,
+ .expression = expression,
+ .close_token = self.state.token,
+ };
+ return &node.base;
+ },
+ .close_paren => {
+ // Note: In the Win32 implementation, a single close paren
+ // counts as a valid "expression", but only when its the first and
+ // only token in the expression. Such an expression is then treated
+ // as a 'skip this expression' instruction. For example:
+ // 1 RCDATA { 1, ), ), ), 2 }
+ // will be evaluated as if it were `1 RCDATA { 1, 2 }` and only
+ // 0x0001 and 0x0002 will be written to the .res data.
+ //
+ // This behavior is not emulated because it almost certainly has
+ // no valid use cases and only introduces edge cases that are
+ // not worth the effort to track down and deal with. Instead,
+ // we error but also add a note about the Win32 RC behavior if
+ // this edge case is detected.
+ if (!options.is_known_to_be_number_expression) {
+ is_close_paren_expression = true;
+ }
+ },
+ .operator => {
+ // In the Win32 implementation, something akin to a unary +
+ // is allowed but it doesn't behave exactly like a unary +.
+ // Instead of emulating the Win32 behavior, we instead error
+ // and add a note about unary plus not being allowed.
+ //
+ // This is done because unary + only works in some places,
+ // and there's no real use-case for it since it's so limited
+ // in how it can be used (e.g. +1 is accepted but (+1) will error)
+ //
+ // Even understanding when unary plus is allowed is difficult, so
+ // we don't do any fancy detection of when the Win32 RC compiler would
+ // allow a unary + and instead just output the note in all cases.
+ //
+ // Some examples of allowed expressions by the Win32 compiler:
+ // +1
+ // 0|+5
+ // +1+2
+ // +~-5
+ // +(1)
+ //
+ // Some examples of disallowed expressions by the Win32 compiler:
+ // (+1)
+ // ++5
+ //
+ // TODO: Potentially re-evaluate and support the unary plus in a bug-for-bug
+ // compatible way.
+ const operator_char = self.state.token.slice(self.lexer.buffer)[0];
+ if (operator_char == '+') {
+ is_unary_plus_expression = true;
+ }
+ },
+ else => {},
+ }
+
+ try self.addErrorDetails(options.toErrorDetails(self.state.token));
+ if (is_close_paren_expression) {
+ try self.addErrorDetails(ErrorDetails{
+ .err = .close_paren_expression,
+ .type = .note,
+ .token = self.state.token,
+ .print_source_line = false,
+ });
+ }
+ if (is_unary_plus_expression) {
+ try self.addErrorDetails(ErrorDetails{
+ .err = .unary_plus_expression,
+ .type = .note,
+ .token = self.state.token,
+ .print_source_line = false,
+ });
+ }
+ return error.ParseError;
+ }
+
+ /// Expects the current token to have already been dealt with, and that the
+ /// expression will start on the next token.
+ /// After return, the current token will have been dealt with.
+ fn parseExpression(self: *Self, options: ParseExpressionOptions) Error!*Node {
+ if (options.nesting_context.level > max_nested_expression_level) {
+ try self.addErrorDetails(.{
+ .err = .nested_expression_level_exceeds_max,
+ .token = options.nesting_context.first_token.?,
+ });
+ return self.addErrorDetailsAndFail(.{
+ .err = .nested_expression_level_exceeds_max,
+ .type = .note,
+ .token = options.nesting_context.last_token.?,
+ });
+ }
+ var expr: *Node = try self.parsePrimary(options);
+ const first_token = expr.getFirstToken();
+
+ // Non-number expressions can't have operators, so we can just return
+ if (!expr.isNumberExpression()) return expr;
+
+ while (try self.parseOptionalTokenAdvanced(.operator, .normal_expect_operator)) {
+ const operator = self.state.token;
+ const rhs_node = try self.parsePrimary(.{
+ .is_known_to_be_number_expression = true,
+ .can_contain_not_expressions = options.can_contain_not_expressions,
+ .nesting_context = options.nesting_context.incremented(first_token, operator),
+ .allowed_types = options.allowed_types,
+ });
+
+ if (!rhs_node.isNumberExpression()) {
+ return self.addErrorDetailsAndFail(ErrorDetails{
+ .err = .expected_something_else,
+ .token = rhs_node.getFirstToken(),
+ .token_span_end = rhs_node.getLastToken(),
+ .extra = .{ .expected_types = .{
+ .number = true,
+ .number_expression = true,
+ } },
+ });
+ }
+
+ const node = try self.state.arena.create(Node.BinaryExpression);
+ node.* = .{
+ .left = expr,
+ .operator = operator,
+ .right = rhs_node,
+ };
+ expr = &node.base;
+ }
+
+ return expr;
+ }
+
+ /// Skips any amount of commas (including zero)
+ /// In other words, it will skip the regex `,*`
+ /// Assumes the token(s) should be parsed with `.normal` as the method.
+ fn skipAnyCommas(self: *Self) !void {
+ while (try self.parseOptionalToken(.comma)) {}
+ }
+
+ /// Advances the current token only if the token's id matches the specified `id`.
+ /// Assumes the token should be parsed with `.normal` as the method.
+ /// Returns true if the token matched, false otherwise.
+ fn parseOptionalToken(self: *Self, id: Token.Id) Error!bool {
+ return self.parseOptionalTokenAdvanced(id, .normal);
+ }
+
+ /// Advances the current token only if the token's id matches the specified `id`.
+ /// Returns true if the token matched, false otherwise.
+ fn parseOptionalTokenAdvanced(self: *Self, id: Token.Id, comptime method: Lexer.LexMethod) Error!bool {
+ const maybe_token = try self.lookaheadToken(method);
+ if (maybe_token.id != id) return false;
+ self.nextToken(method) catch unreachable;
+ return true;
+ }
+
+ fn addErrorDetails(self: *Self, details: ErrorDetails) Allocator.Error!void {
+ try self.state.diagnostics.append(details);
+ }
+
+ fn addErrorDetailsAndFail(self: *Self, details: ErrorDetails) Error {
+ try self.addErrorDetails(details);
+ return error.ParseError;
+ }
+
+ fn nextToken(self: *Self, comptime method: Lexer.LexMethod) Error!void {
+ self.state.token = token: while (true) {
+ const token = self.lexer.next(method) catch |err| switch (err) {
+ error.CodePagePragmaInIncludedFile => {
+ // The Win32 RC compiler silently ignores such `#pragma code_point` directives,
+ // but we want to both ignore them *and* emit a warning
+ try self.addErrorDetails(.{
+ .err = .code_page_pragma_in_included_file,
+ .type = .warning,
+ .token = self.lexer.error_context_token.?,
+ });
+ continue;
+ },
+ error.CodePagePragmaInvalidCodePage => {
+ var details = self.lexer.getErrorDetails(err);
+ if (!self.options.warn_instead_of_error_on_invalid_code_page) {
+ return self.addErrorDetailsAndFail(details);
+ }
+ details.type = .warning;
+ try self.addErrorDetails(details);
+ continue;
+ },
+ error.InvalidDigitCharacterInNumberLiteral => {
+ const details = self.lexer.getErrorDetails(err);
+ try self.addErrorDetails(details);
+ return self.addErrorDetailsAndFail(.{
+ .err = details.err,
+ .type = .note,
+ .token = details.token,
+ .print_source_line = false,
+ });
+ },
+ else => return self.addErrorDetailsAndFail(self.lexer.getErrorDetails(err)),
+ };
+ break :token token;
+ };
+ // After every token, set the input code page for its line
+ try self.state.input_code_page_lookup.setForToken(self.state.token, self.lexer.current_code_page);
+ // But only set the output code page to the current code page if we are past the first code_page pragma in the file.
+ // Otherwise, we want to fill the lookup using the default code page so that lookups still work for lines that
+ // don't have an explicit output code page set.
+ const output_code_page = if (self.lexer.seen_pragma_code_pages > 1) self.lexer.current_code_page else self.state.output_code_page_lookup.default_code_page;
+ try self.state.output_code_page_lookup.setForToken(self.state.token, output_code_page);
+ }
+
+ fn lookaheadToken(self: *Self, comptime method: Lexer.LexMethod) Error!Token {
+ self.state.lookahead_lexer = self.lexer.*;
+ return token: while (true) {
+ break :token self.state.lookahead_lexer.next(method) catch |err| switch (err) {
+ // Ignore this error and get the next valid token, we'll deal with this
+ // properly when getting the token for real
+ error.CodePagePragmaInIncludedFile => continue,
+ else => return self.addErrorDetailsAndFail(self.state.lookahead_lexer.getErrorDetails(err)),
+ };
+ };
+ }
+
+ fn tokenSlice(self: *Self) []const u8 {
+ return self.state.token.slice(self.lexer.buffer);
+ }
+
+ /// Check that the current token is something that can be used as an ID
+ fn checkId(self: *Self) !void {
+ switch (self.state.token.id) {
+ .literal => {},
+ else => {
+ return self.addErrorDetailsAndFail(ErrorDetails{
+ .err = .expected_token,
+ .token = self.state.token,
+ .extra = .{ .expected = .literal },
+ });
+ },
+ }
+ }
+
+ fn check(self: *Self, expected_token_id: Token.Id) !void {
+ if (self.state.token.id != expected_token_id) {
+ return self.addErrorDetailsAndFail(ErrorDetails{
+ .err = .expected_token,
+ .token = self.state.token,
+ .extra = .{ .expected = expected_token_id },
+ });
+ }
+ }
+
+ fn checkResource(self: *Self) !Resource {
+ switch (self.state.token.id) {
+ .literal => return Resource.fromString(.{
+ .slice = self.state.token.slice(self.lexer.buffer),
+ .code_page = self.lexer.current_code_page,
+ }),
+ else => {
+ return self.addErrorDetailsAndFail(ErrorDetails{
+ .err = .expected_token,
+ .token = self.state.token,
+ .extra = .{ .expected = .literal },
+ });
+ },
+ }
+ }
+};
diff --git a/lib/compiler/resinator/preprocess.zig b/lib/compiler/resinator/preprocess.zig
@@ -0,0 +1,140 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const Allocator = std.mem.Allocator;
+const cli = @import("cli.zig");
+const aro = @import("aro");
+
+const PreprocessError = error{ ArgError, GeneratedSourceError, PreprocessError, StreamTooLong, OutOfMemory };
+
+pub fn preprocess(
+ comp: *aro.Compilation,
+ writer: anytype,
+ /// Expects argv[0] to be the command name
+ argv: []const []const u8,
+ maybe_dependencies_list: ?*std.ArrayList([]const u8),
+) PreprocessError!void {
+ try comp.addDefaultPragmaHandlers();
+
+ var driver: aro.Driver = .{ .comp = comp, .aro_name = "arocc" };
+ defer driver.deinit();
+
+ var macro_buf = std.ArrayList(u8).init(comp.gpa);
+ defer macro_buf.deinit();
+
+ _ = driver.parseArgs(std.io.null_writer, macro_buf.writer(), argv) catch |err| switch (err) {
+ error.FatalError => return error.ArgError,
+ error.OutOfMemory => |e| return e,
+ };
+
+ if (hasAnyErrors(comp)) return error.ArgError;
+
+ // .include_system_defines gives us things like _WIN32
+ const builtin_macros = comp.generateBuiltinMacros(.include_system_defines) catch |err| switch (err) {
+ error.FatalError => return error.GeneratedSourceError,
+ else => |e| return e,
+ };
+ const user_macros = comp.addSourceFromBuffer("<command line>", macro_buf.items) catch |err| switch (err) {
+ error.FatalError => return error.GeneratedSourceError,
+ else => |e| return e,
+ };
+ const source = driver.inputs.items[0];
+
+ if (hasAnyErrors(comp)) return error.GeneratedSourceError;
+
+ comp.generated_buf.items.len = 0;
+ var pp = try aro.Preprocessor.initDefault(comp);
+ defer pp.deinit();
+
+ if (comp.langopts.ms_extensions) {
+ comp.ms_cwd_source_id = source.id;
+ }
+
+ pp.preserve_whitespace = true;
+ pp.linemarkers = .line_directives;
+
+ pp.preprocessSources(&.{ source, builtin_macros, user_macros }) catch |err| switch (err) {
+ error.FatalError => return error.PreprocessError,
+ else => |e| return e,
+ };
+
+ if (hasAnyErrors(comp)) return error.PreprocessError;
+
+ try pp.prettyPrintTokens(writer);
+
+ if (maybe_dependencies_list) |dependencies_list| {
+ for (comp.sources.values()) |comp_source| {
+ if (comp_source.id == builtin_macros.id or comp_source.id == user_macros.id) continue;
+ if (comp_source.id == .unused or comp_source.id == .generated) continue;
+ const duped_path = try dependencies_list.allocator.dupe(u8, comp_source.path);
+ errdefer dependencies_list.allocator.free(duped_path);
+ try dependencies_list.append(duped_path);
+ }
+ }
+}
+
+fn hasAnyErrors(comp: *aro.Compilation) bool {
+ // In theory we could just check Diagnostics.errors != 0, but that only
+ // gets set during rendering of the error messages, see:
+ // https://github.com/Vexu/arocc/issues/603
+ for (comp.diagnostics.list.items) |msg| {
+ switch (msg.kind) {
+ .@"fatal error", .@"error" => return true,
+ else => {},
+ }
+ }
+ return false;
+}
+
+/// `arena` is used for temporary -D argument strings and the INCLUDE environment variable.
+/// The arena should be kept alive at least as long as `argv`.
+pub fn appendAroArgs(arena: Allocator, argv: *std.ArrayList([]const u8), options: cli.Options, system_include_paths: []const []const u8) !void {
+ try argv.appendSlice(&.{
+ "-E",
+ "--comments",
+ "-fuse-line-directives",
+ "--target=x86_64-windows-msvc",
+ "--emulate=msvc",
+ "-nostdinc",
+ "-DRC_INVOKED",
+ });
+ for (options.extra_include_paths.items) |extra_include_path| {
+ try argv.append("-I");
+ try argv.append(extra_include_path);
+ }
+
+ for (system_include_paths) |include_path| {
+ try argv.append("-isystem");
+ try argv.append(include_path);
+ }
+
+ if (!options.ignore_include_env_var) {
+ const INCLUDE = std.process.getEnvVarOwned(arena, "INCLUDE") catch "";
+
+ // The only precedence here is llvm-rc which also uses the platform-specific
+ // delimiter. There's no precedence set by `rc.exe` since it's Windows-only.
+ const delimiter = switch (builtin.os.tag) {
+ .windows => ';',
+ else => ':',
+ };
+ var it = std.mem.tokenizeScalar(u8, INCLUDE, delimiter);
+ while (it.next()) |include_path| {
+ try argv.append("-isystem");
+ try argv.append(include_path);
+ }
+ }
+
+ var symbol_it = options.symbols.iterator();
+ while (symbol_it.next()) |entry| {
+ switch (entry.value_ptr.*) {
+ .define => |value| {
+ try argv.append("-D");
+ const define_arg = try std.fmt.allocPrint(arena, "{s}={s}", .{ entry.key_ptr.*, value });
+ try argv.append(define_arg);
+ },
+ .undefine => {
+ try argv.append("-U");
+ try argv.append(entry.key_ptr.*);
+ },
+ }
+ }
+}
diff --git a/src/resinator/rc.zig b/lib/compiler/resinator/rc.zig
diff --git a/lib/compiler/resinator/res.zig b/lib/compiler/resinator/res.zig
@@ -0,0 +1,1107 @@
+const std = @import("std");
+const rc = @import("rc.zig");
+const Resource = rc.Resource;
+const CommonResourceAttributes = rc.CommonResourceAttributes;
+const Allocator = std.mem.Allocator;
+const windows1252 = @import("windows1252.zig");
+const CodePage = @import("code_pages.zig").CodePage;
+const literals = @import("literals.zig");
+const SourceBytes = literals.SourceBytes;
+const Codepoint = @import("code_pages.zig").Codepoint;
+const lang = @import("lang.zig");
+const isNonAsciiDigit = @import("utils.zig").isNonAsciiDigit;
+
+/// https://learn.microsoft.com/en-us/windows/win32/menurc/resource-types
+pub const RT = enum(u8) {
+ ACCELERATOR = 9,
+ ANICURSOR = 21,
+ ANIICON = 22,
+ BITMAP = 2,
+ CURSOR = 1,
+ DIALOG = 5,
+ DLGINCLUDE = 17,
+ DLGINIT = 240,
+ FONT = 8,
+ FONTDIR = 7,
+ GROUP_CURSOR = 1 + 11, // CURSOR + 11
+ GROUP_ICON = 3 + 11, // ICON + 11
+ HTML = 23,
+ ICON = 3,
+ MANIFEST = 24,
+ MENU = 4,
+ MESSAGETABLE = 11,
+ PLUGPLAY = 19,
+ RCDATA = 10,
+ STRING = 6,
+ TOOLBAR = 241,
+ VERSION = 16,
+ VXD = 20,
+ _,
+
+ /// Returns null if the resource type is user-defined
+ /// Asserts that the resource is not `stringtable`
+ pub fn fromResource(resource: Resource) ?RT {
+ return switch (resource) {
+ .accelerators => .ACCELERATOR,
+ .bitmap => .BITMAP,
+ .cursor => .GROUP_CURSOR,
+ .dialog => .DIALOG,
+ .dialogex => .DIALOG,
+ .dlginclude => .DLGINCLUDE,
+ .dlginit => .DLGINIT,
+ .font => .FONT,
+ .html => .HTML,
+ .icon => .GROUP_ICON,
+ .menu => .MENU,
+ .menuex => .MENU,
+ .messagetable => .MESSAGETABLE,
+ .plugplay => .PLUGPLAY,
+ .rcdata => .RCDATA,
+ .stringtable => unreachable,
+ .toolbar => .TOOLBAR,
+ .user_defined => null,
+ .versioninfo => .VERSION,
+ .vxd => .VXD,
+
+ .cursor_num => .CURSOR,
+ .icon_num => .ICON,
+ .string_num => .STRING,
+ .anicursor_num => .ANICURSOR,
+ .aniicon_num => .ANIICON,
+ .fontdir_num => .FONTDIR,
+ .manifest_num => .MANIFEST,
+ };
+ }
+};
+
+/// https://learn.microsoft.com/en-us/windows/win32/menurc/common-resource-attributes
+/// https://learn.microsoft.com/en-us/windows/win32/menurc/resourceheader
+pub const MemoryFlags = packed struct(u16) {
+ value: u16,
+
+ pub const MOVEABLE: u16 = 0x10;
+ // TODO: SHARED and PURE seem to be the same thing? Testing seems to confirm this but
+ // would like to find mention of it somewhere.
+ pub const SHARED: u16 = 0x20;
+ pub const PURE: u16 = 0x20;
+ pub const PRELOAD: u16 = 0x40;
+ pub const DISCARDABLE: u16 = 0x1000;
+
+ /// Note: The defaults can have combinations that are not possible to specify within
+ /// an .rc file, as the .rc attributes imply other values (i.e. specifying
+ /// DISCARDABLE always implies MOVEABLE and PURE/SHARED, and yet RT_ICON
+ /// has a default of only MOVEABLE | DISCARDABLE).
+ pub fn defaults(predefined_resource_type: ?RT) MemoryFlags {
+ if (predefined_resource_type == null) {
+ return MemoryFlags{ .value = MOVEABLE | SHARED };
+ } else {
+ return switch (predefined_resource_type.?) {
+ // zig fmt: off
+ .RCDATA, .BITMAP, .HTML, .MANIFEST,
+ .ACCELERATOR, .VERSION, .MESSAGETABLE,
+ .DLGINIT, .TOOLBAR, .PLUGPLAY,
+ .VXD, => MemoryFlags{ .value = MOVEABLE | SHARED },
+
+ .GROUP_ICON, .GROUP_CURSOR,
+ .STRING, .FONT, .DIALOG, .MENU,
+ .DLGINCLUDE, => MemoryFlags{ .value = MOVEABLE | SHARED | DISCARDABLE },
+
+ .ICON, .CURSOR, .ANIICON, .ANICURSOR => MemoryFlags{ .value = MOVEABLE | DISCARDABLE },
+ .FONTDIR => MemoryFlags{ .value = MOVEABLE | PRELOAD },
+ // zig fmt: on
+ // Same as predefined_resource_type == null
+ _ => return MemoryFlags{ .value = MOVEABLE | SHARED },
+ };
+ }
+ }
+
+ pub fn set(self: *MemoryFlags, attribute: CommonResourceAttributes) void {
+ switch (attribute) {
+ .preload => self.value |= PRELOAD,
+ .loadoncall => self.value &= ~PRELOAD,
+ .moveable => self.value |= MOVEABLE,
+ .fixed => self.value &= ~(MOVEABLE | DISCARDABLE),
+ .shared => self.value |= SHARED,
+ .nonshared => self.value &= ~(SHARED | DISCARDABLE),
+ .pure => self.value |= PURE,
+ .impure => self.value &= ~(PURE | DISCARDABLE),
+ .discardable => self.value |= DISCARDABLE | MOVEABLE | PURE,
+ }
+ }
+
+ pub fn setGroup(self: *MemoryFlags, attribute: CommonResourceAttributes, implied_shared_or_pure: bool) void {
+ switch (attribute) {
+ .preload => {
+ self.value |= PRELOAD;
+ if (implied_shared_or_pure) self.value &= ~SHARED;
+ },
+ .loadoncall => {
+ self.value &= ~PRELOAD;
+ if (implied_shared_or_pure) self.value |= SHARED;
+ },
+ else => self.set(attribute),
+ }
+ }
+};
+
+/// https://learn.microsoft.com/en-us/windows/win32/intl/language-identifiers
+pub const Language = packed struct(u16) {
+ // Note: This is the default no matter what locale the current system is set to,
+ // e.g. even if the system's locale is en-GB, en-US will still be the
+ // default language for resources in the Win32 rc compiler.
+ primary_language_id: u10 = lang.LANG_ENGLISH,
+ sublanguage_id: u6 = lang.SUBLANG_ENGLISH_US,
+
+ /// Default language ID as a u16
+ pub const default: u16 = (Language{}).asInt();
+
+ pub fn fromInt(int: u16) Language {
+ return @bitCast(int);
+ }
+
+ pub fn asInt(self: Language) u16 {
+ return @bitCast(self);
+ }
+};
+
+/// https://learn.microsoft.com/en-us/windows/win32/api/winuser/ns-winuser-dlgitemtemplate#remarks
+pub const ControlClass = enum(u16) {
+ button = 0x80,
+ edit = 0x81,
+ static = 0x82,
+ listbox = 0x83,
+ scrollbar = 0x84,
+ combobox = 0x85,
+
+ pub fn fromControl(control: rc.Control) ?ControlClass {
+ return switch (control) {
+ // zig fmt: off
+ .auto3state, .autocheckbox, .autoradiobutton,
+ .checkbox, .defpushbutton, .groupbox, .pushbox,
+ .pushbutton, .radiobutton, .state3, .userbutton => .button,
+ // zig fmt: on
+ .combobox => .combobox,
+ .control => null,
+ .ctext, .icon, .ltext, .rtext => .static,
+ .edittext, .hedit, .iedit => .edit,
+ .listbox => .listbox,
+ .scrollbar => .scrollbar,
+ };
+ }
+
+ pub fn getImpliedStyle(control: rc.Control) u32 {
+ var style = WS.CHILD | WS.VISIBLE;
+ switch (control) {
+ .auto3state => style |= BS.AUTO3STATE | WS.TABSTOP,
+ .autocheckbox => style |= BS.AUTOCHECKBOX | WS.TABSTOP,
+ .autoradiobutton => style |= BS.AUTORADIOBUTTON,
+ .checkbox => style |= BS.CHECKBOX | WS.TABSTOP,
+ .combobox => {},
+ .control => {},
+ .ctext => style |= SS.CENTER | WS.GROUP,
+ .defpushbutton => style |= BS.DEFPUSHBUTTON | WS.TABSTOP,
+ .edittext, .hedit, .iedit => style |= WS.TABSTOP | WS.BORDER,
+ .groupbox => style |= BS.GROUPBOX,
+ .icon => style |= SS.ICON,
+ .listbox => style |= LBS.NOTIFY | WS.BORDER,
+ .ltext => style |= WS.GROUP,
+ .pushbox => style |= BS.PUSHBOX | WS.TABSTOP,
+ .pushbutton => style |= WS.TABSTOP,
+ .radiobutton => style |= BS.RADIOBUTTON,
+ .rtext => style |= SS.RIGHT | WS.GROUP,
+ .scrollbar => {},
+ .state3 => style |= BS.@"3STATE" | WS.TABSTOP,
+ .userbutton => style |= BS.USERBUTTON | WS.TABSTOP,
+ }
+ return style;
+ }
+};
+
+pub const NameOrOrdinal = union(enum) {
+ // UTF-16 LE
+ name: [:0]const u16,
+ ordinal: u16,
+
+ pub fn deinit(self: NameOrOrdinal, allocator: Allocator) void {
+ switch (self) {
+ .name => |name| {
+ allocator.free(name);
+ },
+ .ordinal => {},
+ }
+ }
+
+ /// Returns the full length of the amount of bytes that would be written by `write`
+ /// (e.g. for an ordinal it will return the length including the 0xFFFF indicator)
+ pub fn byteLen(self: NameOrOrdinal) usize {
+ switch (self) {
+ .name => |name| {
+ // + 1 for 0-terminated
+ return (name.len + 1) * @sizeOf(u16);
+ },
+ .ordinal => return 4,
+ }
+ }
+
+ pub fn write(self: NameOrOrdinal, writer: anytype) !void {
+ switch (self) {
+ .name => |name| {
+ try writer.writeAll(std.mem.sliceAsBytes(name[0 .. name.len + 1]));
+ },
+ .ordinal => |ordinal| {
+ try writer.writeInt(u16, 0xffff, .little);
+ try writer.writeInt(u16, ordinal, .little);
+ },
+ }
+ }
+
+ pub fn writeEmpty(writer: anytype) !void {
+ try writer.writeInt(u16, 0, .little);
+ }
+
+ pub fn fromString(allocator: Allocator, bytes: SourceBytes) !NameOrOrdinal {
+ if (maybeOrdinalFromString(bytes)) |ordinal| {
+ return ordinal;
+ }
+ return nameFromString(allocator, bytes);
+ }
+
+ pub fn nameFromString(allocator: Allocator, bytes: SourceBytes) !NameOrOrdinal {
+ // Names have a limit of 256 UTF-16 code units + null terminator
+ var buf = try std.ArrayList(u16).initCapacity(allocator, @min(257, bytes.slice.len));
+ errdefer buf.deinit();
+
+ var i: usize = 0;
+ while (bytes.code_page.codepointAt(i, bytes.slice)) |codepoint| : (i += codepoint.byte_len) {
+ if (buf.items.len == 256) break;
+
+ const c = codepoint.value;
+ if (c == Codepoint.invalid) {
+ try buf.append(std.mem.nativeToLittle(u16, '�'));
+ } else if (c < 0x7F) {
+ // ASCII chars in names are always converted to uppercase
+ try buf.append(std.mem.nativeToLittle(u16, std.ascii.toUpper(@intCast(c))));
+ } else if (c < 0x10000) {
+ const short: u16 = @intCast(c);
+ try buf.append(std.mem.nativeToLittle(u16, short));
+ } else {
+ const high = @as(u16, @intCast((c - 0x10000) >> 10)) + 0xD800;
+ try buf.append(std.mem.nativeToLittle(u16, high));
+
+ // Note: This can cut-off in the middle of a UTF-16 surrogate pair,
+ // i.e. it can make the string end with an unpaired high surrogate
+ if (buf.items.len == 256) break;
+
+ const low = @as(u16, @intCast(c & 0x3FF)) + 0xDC00;
+ try buf.append(std.mem.nativeToLittle(u16, low));
+ }
+ }
+
+ return NameOrOrdinal{ .name = try buf.toOwnedSliceSentinel(0) };
+ }
+
+ /// Returns `null` if the bytes do not form a valid number.
+ /// Does not allow non-ASCII digits (which the Win32 RC compiler does allow
+ /// in base 10 numbers, see `maybeNonAsciiOrdinalFromString`).
+ pub fn maybeOrdinalFromString(bytes: SourceBytes) ?NameOrOrdinal {
+ var buf = bytes.slice;
+ var radix: u8 = 10;
+ if (buf.len > 2 and buf[0] == '0') {
+ switch (buf[1]) {
+ '0'...'9' => {},
+ 'x', 'X' => {
+ radix = 16;
+ buf = buf[2..];
+ // only the first 4 hex digits matter, anything else is ignored
+ // i.e. 0x12345 is treated as if it were 0x1234
+ buf.len = @min(buf.len, 4);
+ },
+ else => return null,
+ }
+ }
+
+ var i: usize = 0;
+ var result: u16 = 0;
+ while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) {
+ const c = codepoint.value;
+ const digit: u8 = switch (c) {
+ 0x00...0x7F => std.fmt.charToDigit(@intCast(c), radix) catch switch (radix) {
+ 10 => return null,
+ // non-hex-digits are treated as a terminator rather than invalidating
+ // the number (note: if there are no valid hex digits then the result
+ // will be zero which is not treated as a valid number)
+ 16 => break,
+ else => unreachable,
+ },
+ else => if (radix == 10) return null else break,
+ };
+
+ if (result != 0) {
+ result *%= radix;
+ }
+ result +%= digit;
+ }
+
+ // Anything that resolves to zero is not interpretted as a number
+ if (result == 0) return null;
+ return NameOrOrdinal{ .ordinal = result };
+ }
+
+ /// The Win32 RC compiler uses `iswdigit` for digit detection for base 10
+ /// numbers, which means that non-ASCII digits are 'accepted' but handled
+ /// in a totally unintuitive manner, leading to arbitrary results.
+ ///
+ /// This function will return the value that such an ordinal 'would' have
+ /// if it was run through the Win32 RC compiler. This allows us to disallow
+ /// non-ASCII digits in number literals but still detect when the Win32
+ /// RC compiler would have allowed them, so that a proper warning/error
+ /// can be emitted.
+ pub fn maybeNonAsciiOrdinalFromString(bytes: SourceBytes) ?NameOrOrdinal {
+ const buf = bytes.slice;
+ const radix = 10;
+ if (buf.len > 2 and buf[0] == '0') {
+ switch (buf[1]) {
+ // We only care about base 10 numbers here
+ 'x', 'X' => return null,
+ else => {},
+ }
+ }
+
+ var i: usize = 0;
+ var result: u16 = 0;
+ while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) {
+ const c = codepoint.value;
+ const digit: u16 = digit: {
+ const is_digit = (c >= '0' and c <= '9') or isNonAsciiDigit(c);
+ if (!is_digit) return null;
+ break :digit @intCast(c - '0');
+ };
+
+ if (result != 0) {
+ result *%= radix;
+ }
+ result +%= digit;
+ }
+
+ // Anything that resolves to zero is not interpretted as a number
+ if (result == 0) return null;
+ return NameOrOrdinal{ .ordinal = result };
+ }
+
+ pub fn predefinedResourceType(self: NameOrOrdinal) ?RT {
+ switch (self) {
+ .ordinal => |ordinal| {
+ if (ordinal >= 256) return null;
+ switch (@as(RT, @enumFromInt(ordinal))) {
+ .ACCELERATOR,
+ .ANICURSOR,
+ .ANIICON,
+ .BITMAP,
+ .CURSOR,
+ .DIALOG,
+ .DLGINCLUDE,
+ .DLGINIT,
+ .FONT,
+ .FONTDIR,
+ .GROUP_CURSOR,
+ .GROUP_ICON,
+ .HTML,
+ .ICON,
+ .MANIFEST,
+ .MENU,
+ .MESSAGETABLE,
+ .PLUGPLAY,
+ .RCDATA,
+ .STRING,
+ .TOOLBAR,
+ .VERSION,
+ .VXD,
+ => |rt| return rt,
+ _ => return null,
+ }
+ },
+ .name => return null,
+ }
+ }
+};
+
+fn expectNameOrOrdinal(expected: NameOrOrdinal, actual: NameOrOrdinal) !void {
+ switch (expected) {
+ .name => {
+ if (actual != .name) return error.TestExpectedEqual;
+ try std.testing.expectEqualSlices(u16, expected.name, actual.name);
+ },
+ .ordinal => {
+ if (actual != .ordinal) return error.TestExpectedEqual;
+ try std.testing.expectEqual(expected.ordinal, actual.ordinal);
+ },
+ }
+}
+
+test "NameOrOrdinal" {
+ var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer arena.deinit();
+
+ const allocator = arena.allocator();
+
+ // zero is treated as a string
+ try expectNameOrOrdinal(
+ NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("0") },
+ try NameOrOrdinal.fromString(allocator, .{ .slice = "0", .code_page = .windows1252 }),
+ );
+ // any non-digit byte invalidates the number
+ try expectNameOrOrdinal(
+ NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1A") },
+ try NameOrOrdinal.fromString(allocator, .{ .slice = "1a", .code_page = .windows1252 }),
+ );
+ try expectNameOrOrdinal(
+ NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1ÿ") },
+ try NameOrOrdinal.fromString(allocator, .{ .slice = "1\xff", .code_page = .windows1252 }),
+ );
+ try expectNameOrOrdinal(
+ NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1€") },
+ try NameOrOrdinal.fromString(allocator, .{ .slice = "1€", .code_page = .utf8 }),
+ );
+ try expectNameOrOrdinal(
+ NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1�") },
+ try NameOrOrdinal.fromString(allocator, .{ .slice = "1\x80", .code_page = .utf8 }),
+ );
+ // same with overflow that resolves to 0
+ try expectNameOrOrdinal(
+ NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("65536") },
+ try NameOrOrdinal.fromString(allocator, .{ .slice = "65536", .code_page = .windows1252 }),
+ );
+ // hex zero is also treated as a string
+ try expectNameOrOrdinal(
+ NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("0X0") },
+ try NameOrOrdinal.fromString(allocator, .{ .slice = "0x0", .code_page = .windows1252 }),
+ );
+ // hex numbers work
+ try expectNameOrOrdinal(
+ NameOrOrdinal{ .ordinal = 0x100 },
+ try NameOrOrdinal.fromString(allocator, .{ .slice = "0x100", .code_page = .windows1252 }),
+ );
+ // only the first 4 hex digits matter
+ try expectNameOrOrdinal(
+ NameOrOrdinal{ .ordinal = 0x1234 },
+ try NameOrOrdinal.fromString(allocator, .{ .slice = "0X12345", .code_page = .windows1252 }),
+ );
+ // octal is not supported so it gets treated as a string
+ try expectNameOrOrdinal(
+ NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("0O1234") },
+ try NameOrOrdinal.fromString(allocator, .{ .slice = "0o1234", .code_page = .windows1252 }),
+ );
+ // overflow wraps
+ try expectNameOrOrdinal(
+ NameOrOrdinal{ .ordinal = @truncate(65635) },
+ try NameOrOrdinal.fromString(allocator, .{ .slice = "65635", .code_page = .windows1252 }),
+ );
+ // non-hex-digits in a hex literal are treated as a terminator
+ try expectNameOrOrdinal(
+ NameOrOrdinal{ .ordinal = 0x4 },
+ try NameOrOrdinal.fromString(allocator, .{ .slice = "0x4n", .code_page = .windows1252 }),
+ );
+ try expectNameOrOrdinal(
+ NameOrOrdinal{ .ordinal = 0xFA },
+ try NameOrOrdinal.fromString(allocator, .{ .slice = "0xFAZ92348", .code_page = .windows1252 }),
+ );
+ // 0 at the start is allowed
+ try expectNameOrOrdinal(
+ NameOrOrdinal{ .ordinal = 50 },
+ try NameOrOrdinal.fromString(allocator, .{ .slice = "050", .code_page = .windows1252 }),
+ );
+ // limit of 256 UTF-16 code units, can cut off between a surrogate pair
+ {
+ var expected = blk: {
+ // the input before the 𐐷 character, but uppercased
+ const expected_u8_bytes = "00614982008907933748980730280674788429543776231864944218790698304852300002973622122844631429099469274282385299397783838528QFFL7SHNSIETG0QKLR1UYPBTUV1PMFQRRA0VJDG354GQEDJMUPGPP1W1EXVNTZVEIZ6K3IPQM1AWGEYALMEODYVEZGOD3MFMGEY8FNR4JUETTB1PZDEWSNDRGZUA8SNXP3NGO";
+ var buf: [256:0]u16 = undefined;
+ for (expected_u8_bytes, 0..) |byte, i| {
+ buf[i] = std.mem.nativeToLittle(u16, byte);
+ }
+ // surrogate pair that is now orphaned
+ buf[255] = std.mem.nativeToLittle(u16, 0xD801);
+ break :blk buf;
+ };
+ try expectNameOrOrdinal(
+ NameOrOrdinal{ .name = &expected },
+ try NameOrOrdinal.fromString(allocator, .{
+ .slice = "00614982008907933748980730280674788429543776231864944218790698304852300002973622122844631429099469274282385299397783838528qffL7ShnSIETg0qkLr1UYpbtuv1PMFQRRa0VjDG354GQedJmUPgpp1w1ExVnTzVEiz6K3iPqM1AWGeYALmeODyvEZGOD3MfmGey8fnR4jUeTtB1PzdeWsNDrGzuA8Snxp3NGO𐐷",
+ .code_page = .utf8,
+ }),
+ );
+ }
+}
+
+test "NameOrOrdinal code page awareness" {
+ var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer arena.deinit();
+
+ const allocator = arena.allocator();
+
+ try expectNameOrOrdinal(
+ NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("��𐐷") },
+ try NameOrOrdinal.fromString(allocator, .{
+ .slice = "\xF0\x80\x80𐐷",
+ .code_page = .utf8,
+ }),
+ );
+ try expectNameOrOrdinal(
+ // The UTF-8 representation of 𐐷 is 0xF0 0x90 0x90 0xB7. In order to provide valid
+ // UTF-8 to utf8ToUtf16LeStringLiteral, it uses the UTF-8 representation of the codepoint
+ // <U+0x90> which is 0xC2 0x90. The code units in the expected UTF-16 string are:
+ // { 0x00F0, 0x20AC, 0x20AC, 0x00F0, 0x0090, 0x0090, 0x00B7 }
+ NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("ð€€ð\xC2\x90\xC2\x90·") },
+ try NameOrOrdinal.fromString(allocator, .{
+ .slice = "\xF0\x80\x80𐐷",
+ .code_page = .windows1252,
+ }),
+ );
+}
+
+/// https://learn.microsoft.com/en-us/windows/win32/api/winuser/ns-winuser-accel#members
+/// https://devblogs.microsoft.com/oldnewthing/20070316-00/?p=27593
+pub const AcceleratorModifiers = struct {
+ value: u8 = 0,
+ explicit_ascii_or_virtkey: bool = false,
+
+ pub const ASCII = 0;
+ pub const VIRTKEY = 1;
+ pub const NOINVERT = 1 << 1;
+ pub const SHIFT = 1 << 2;
+ pub const CONTROL = 1 << 3;
+ pub const ALT = 1 << 4;
+ /// Marker for the last accelerator in an accelerator table
+ pub const last_accelerator_in_table = 1 << 7;
+
+ pub fn apply(self: *AcceleratorModifiers, modifier: rc.AcceleratorTypeAndOptions) void {
+ if (modifier == .ascii or modifier == .virtkey) self.explicit_ascii_or_virtkey = true;
+ self.value |= modifierValue(modifier);
+ }
+
+ pub fn isSet(self: AcceleratorModifiers, modifier: rc.AcceleratorTypeAndOptions) bool {
+ // ASCII is set whenever VIRTKEY is not
+ if (modifier == .ascii) return self.value & modifierValue(.virtkey) == 0;
+ return self.value & modifierValue(modifier) != 0;
+ }
+
+ fn modifierValue(modifier: rc.AcceleratorTypeAndOptions) u8 {
+ return switch (modifier) {
+ .ascii => ASCII,
+ .virtkey => VIRTKEY,
+ .noinvert => NOINVERT,
+ .shift => SHIFT,
+ .control => CONTROL,
+ .alt => ALT,
+ };
+ }
+
+ pub fn markLast(self: *AcceleratorModifiers) void {
+ self.value |= last_accelerator_in_table;
+ }
+};
+
+const AcceleratorKeyCodepointTranslator = struct {
+ string_type: literals.StringType,
+
+ pub fn translate(self: @This(), maybe_parsed: ?literals.IterativeStringParser.ParsedCodepoint) ?u21 {
+ const parsed = maybe_parsed orelse return null;
+ if (parsed.codepoint == Codepoint.invalid) return 0xFFFD;
+ if (parsed.from_escaped_integer and self.string_type == .ascii) {
+ return windows1252.toCodepoint(@truncate(parsed.codepoint));
+ }
+ return parsed.codepoint;
+ }
+};
+
+pub const ParseAcceleratorKeyStringError = error{ EmptyAccelerator, AcceleratorTooLong, InvalidControlCharacter, ControlCharacterOutOfRange };
+
+/// Expects bytes to be the full bytes of a string literal token (e.g. including the "" or L"").
+pub fn parseAcceleratorKeyString(bytes: SourceBytes, is_virt: bool, options: literals.StringParseOptions) (ParseAcceleratorKeyStringError || Allocator.Error)!u16 {
+ if (bytes.slice.len == 0) {
+ return error.EmptyAccelerator;
+ }
+
+ var parser = literals.IterativeStringParser.init(bytes, options);
+ var translator = AcceleratorKeyCodepointTranslator{ .string_type = parser.declared_string_type };
+
+ const first_codepoint = translator.translate(try parser.next()) orelse return error.EmptyAccelerator;
+ // 0 is treated as a terminator, so this is equivalent to an empty string
+ if (first_codepoint == 0) return error.EmptyAccelerator;
+
+ if (first_codepoint == '^') {
+ // Note: Emitting this warning unconditonally whenever ^ is the first character
+ // matches the Win32 RC behavior, but it's questionable whether or not
+ // the warning should be emitted for ^^ since that results in the ASCII
+ // character ^ being written to the .res.
+ if (is_virt and options.diagnostics != null) {
+ try options.diagnostics.?.diagnostics.append(.{
+ .err = .ascii_character_not_equivalent_to_virtual_key_code,
+ .type = .warning,
+ .token = options.diagnostics.?.token,
+ });
+ }
+
+ const c = translator.translate(try parser.next()) orelse return error.InvalidControlCharacter;
+ switch (c) {
+ '^' => return '^', // special case
+ 'a'...'z', 'A'...'Z' => return std.ascii.toUpper(@intCast(c)) - 0x40,
+ // Note: The Windows RC compiler allows more than just A-Z, but what it allows
+ // seems to be tied to some sort of Unicode-aware 'is character' function or something.
+ // The full list of codepoints that trigger an out-of-range error can be found here:
+ // https://gist.github.com/squeek502/2e9d0a4728a83eed074ad9785a209fd0
+ // For codepoints >= 0x80 that don't trigger the error, the Windows RC compiler takes the
+ // codepoint and does the `- 0x40` transformation as if it were A-Z which couldn't lead
+ // to anything useable, so there's no point in emulating that behavior--erroring for
+ // all non-[a-zA-Z] makes much more sense and is what was probably intended by the
+ // Windows RC compiler.
+ else => return error.ControlCharacterOutOfRange,
+ }
+ @compileError("this should be unreachable");
+ }
+
+ const second_codepoint = translator.translate(try parser.next());
+
+ var result: u32 = initial_value: {
+ if (first_codepoint >= 0x10000) {
+ if (second_codepoint != null and second_codepoint.? != 0) return error.AcceleratorTooLong;
+ // No idea why it works this way, but this seems to match the Windows RC
+ // behavior for codepoints >= 0x10000
+ const low = @as(u16, @intCast(first_codepoint & 0x3FF)) + 0xDC00;
+ const extra = (first_codepoint - 0x10000) / 0x400;
+ break :initial_value low + extra * 0x100;
+ }
+ break :initial_value first_codepoint;
+ };
+
+ // 0 is treated as a terminator
+ if (second_codepoint != null and second_codepoint.? == 0) return @truncate(result);
+
+ const third_codepoint = translator.translate(try parser.next());
+ // 0 is treated as a terminator, so a 0 in the third position is fine but
+ // anything else is too many codepoints for an accelerator
+ if (third_codepoint != null and third_codepoint.? != 0) return error.AcceleratorTooLong;
+
+ if (second_codepoint) |c| {
+ if (c >= 0x10000) return error.AcceleratorTooLong;
+ result <<= 8;
+ result += c;
+ } else if (is_virt) {
+ switch (result) {
+ 'a'...'z' => result -= 0x20, // toUpper
+ else => {},
+ }
+ }
+ return @truncate(result);
+}
+
+test "accelerator keys" {
+ try std.testing.expectEqual(@as(u16, 1), try parseAcceleratorKeyString(
+ .{ .slice = "\"^a\"", .code_page = .windows1252 },
+ false,
+ .{},
+ ));
+ try std.testing.expectEqual(@as(u16, 1), try parseAcceleratorKeyString(
+ .{ .slice = "\"^A\"", .code_page = .windows1252 },
+ false,
+ .{},
+ ));
+ try std.testing.expectEqual(@as(u16, 26), try parseAcceleratorKeyString(
+ .{ .slice = "\"^Z\"", .code_page = .windows1252 },
+ false,
+ .{},
+ ));
+ try std.testing.expectEqual(@as(u16, '^'), try parseAcceleratorKeyString(
+ .{ .slice = "\"^^\"", .code_page = .windows1252 },
+ false,
+ .{},
+ ));
+
+ try std.testing.expectEqual(@as(u16, 'a'), try parseAcceleratorKeyString(
+ .{ .slice = "\"a\"", .code_page = .windows1252 },
+ false,
+ .{},
+ ));
+ try std.testing.expectEqual(@as(u16, 0x6162), try parseAcceleratorKeyString(
+ .{ .slice = "\"ab\"", .code_page = .windows1252 },
+ false,
+ .{},
+ ));
+
+ try std.testing.expectEqual(@as(u16, 'C'), try parseAcceleratorKeyString(
+ .{ .slice = "\"c\"", .code_page = .windows1252 },
+ true,
+ .{},
+ ));
+ try std.testing.expectEqual(@as(u16, 0x6363), try parseAcceleratorKeyString(
+ .{ .slice = "\"cc\"", .code_page = .windows1252 },
+ true,
+ .{},
+ ));
+
+ // \x00 or any escape that evaluates to zero acts as a terminator, everything past it
+ // is ignored
+ try std.testing.expectEqual(@as(u16, 'a'), try parseAcceleratorKeyString(
+ .{ .slice = "\"a\\0bcdef\"", .code_page = .windows1252 },
+ false,
+ .{},
+ ));
+
+ // \x80 is € in Windows-1252, which is Unicode codepoint 20AC
+ try std.testing.expectEqual(@as(u16, 0x20AC), try parseAcceleratorKeyString(
+ .{ .slice = "\"\x80\"", .code_page = .windows1252 },
+ false,
+ .{},
+ ));
+ // This depends on the code page, though, with codepage 65001, \x80
+ // on its own is invalid UTF-8 so it gets converted to the replacement character
+ try std.testing.expectEqual(@as(u16, 0xFFFD), try parseAcceleratorKeyString(
+ .{ .slice = "\"\x80\"", .code_page = .utf8 },
+ false,
+ .{},
+ ));
+ try std.testing.expectEqual(@as(u16, 0xCCAC), try parseAcceleratorKeyString(
+ .{ .slice = "\"\x80\x80\"", .code_page = .windows1252 },
+ false,
+ .{},
+ ));
+ // This also behaves the same with escaped characters
+ try std.testing.expectEqual(@as(u16, 0x20AC), try parseAcceleratorKeyString(
+ .{ .slice = "\"\\x80\"", .code_page = .windows1252 },
+ false,
+ .{},
+ ));
+ // Even with utf8 code page
+ try std.testing.expectEqual(@as(u16, 0x20AC), try parseAcceleratorKeyString(
+ .{ .slice = "\"\\x80\"", .code_page = .utf8 },
+ false,
+ .{},
+ ));
+ try std.testing.expectEqual(@as(u16, 0xCCAC), try parseAcceleratorKeyString(
+ .{ .slice = "\"\\x80\\x80\"", .code_page = .windows1252 },
+ false,
+ .{},
+ ));
+ // Wide string with the actual characters behaves like the ASCII string version
+ try std.testing.expectEqual(@as(u16, 0xCCAC), try parseAcceleratorKeyString(
+ .{ .slice = "L\"\x80\x80\"", .code_page = .windows1252 },
+ false,
+ .{},
+ ));
+ // But wide string with escapes behaves differently
+ try std.testing.expectEqual(@as(u16, 0x8080), try parseAcceleratorKeyString(
+ .{ .slice = "L\"\\x80\\x80\"", .code_page = .windows1252 },
+ false,
+ .{},
+ ));
+ // and invalid escapes within wide strings get skipped
+ try std.testing.expectEqual(@as(u16, 'z'), try parseAcceleratorKeyString(
+ .{ .slice = "L\"\\Hz\"", .code_page = .windows1252 },
+ false,
+ .{},
+ ));
+
+ // any non-A-Z codepoints are illegal
+ try std.testing.expectError(error.ControlCharacterOutOfRange, parseAcceleratorKeyString(
+ .{ .slice = "\"^\x83\"", .code_page = .windows1252 },
+ false,
+ .{},
+ ));
+ try std.testing.expectError(error.ControlCharacterOutOfRange, parseAcceleratorKeyString(
+ .{ .slice = "\"^1\"", .code_page = .windows1252 },
+ false,
+ .{},
+ ));
+ try std.testing.expectError(error.InvalidControlCharacter, parseAcceleratorKeyString(
+ .{ .slice = "\"^\"", .code_page = .windows1252 },
+ false,
+ .{},
+ ));
+ try std.testing.expectError(error.EmptyAccelerator, parseAcceleratorKeyString(
+ .{ .slice = "\"\"", .code_page = .windows1252 },
+ false,
+ .{},
+ ));
+ try std.testing.expectError(error.AcceleratorTooLong, parseAcceleratorKeyString(
+ .{ .slice = "\"hello\"", .code_page = .windows1252 },
+ false,
+ .{},
+ ));
+ try std.testing.expectError(error.ControlCharacterOutOfRange, parseAcceleratorKeyString(
+ .{ .slice = "\"^\x80\"", .code_page = .windows1252 },
+ false,
+ .{},
+ ));
+
+ // Invalid UTF-8 gets converted to 0xFFFD, multiple invalids get shifted and added together
+ // The behavior is the same for ascii and wide strings
+ try std.testing.expectEqual(@as(u16, 0xFCFD), try parseAcceleratorKeyString(
+ .{ .slice = "\"\x80\x80\"", .code_page = .utf8 },
+ false,
+ .{},
+ ));
+ try std.testing.expectEqual(@as(u16, 0xFCFD), try parseAcceleratorKeyString(
+ .{ .slice = "L\"\x80\x80\"", .code_page = .utf8 },
+ false,
+ .{},
+ ));
+
+ // Codepoints >= 0x10000
+ try std.testing.expectEqual(@as(u16, 0xDD00), try parseAcceleratorKeyString(
+ .{ .slice = "\"\xF0\x90\x84\x80\"", .code_page = .utf8 },
+ false,
+ .{},
+ ));
+ try std.testing.expectEqual(@as(u16, 0xDD00), try parseAcceleratorKeyString(
+ .{ .slice = "L\"\xF0\x90\x84\x80\"", .code_page = .utf8 },
+ false,
+ .{},
+ ));
+ try std.testing.expectEqual(@as(u16, 0x9C01), try parseAcceleratorKeyString(
+ .{ .slice = "\"\xF4\x80\x80\x81\"", .code_page = .utf8 },
+ false,
+ .{},
+ ));
+ // anything before or after a codepoint >= 0x10000 causes an error
+ try std.testing.expectError(error.AcceleratorTooLong, parseAcceleratorKeyString(
+ .{ .slice = "\"a\xF0\x90\x80\x80\"", .code_page = .utf8 },
+ false,
+ .{},
+ ));
+ try std.testing.expectError(error.AcceleratorTooLong, parseAcceleratorKeyString(
+ .{ .slice = "\"\xF0\x90\x80\x80a\"", .code_page = .utf8 },
+ false,
+ .{},
+ ));
+}
+
+pub const ForcedOrdinal = struct {
+ pub fn fromBytes(bytes: SourceBytes) u16 {
+ var i: usize = 0;
+ var result: u21 = 0;
+ while (bytes.code_page.codepointAt(i, bytes.slice)) |codepoint| : (i += codepoint.byte_len) {
+ const c = switch (codepoint.value) {
+ // Codepoints that would need a surrogate pair in UTF-16 are
+ // broken up into their UTF-16 code units and each code unit
+ // is interpreted as a digit.
+ 0x10000...0x10FFFF => {
+ const high = @as(u16, @intCast((codepoint.value - 0x10000) >> 10)) + 0xD800;
+ if (result != 0) result *%= 10;
+ result +%= high -% '0';
+
+ const low = @as(u16, @intCast(codepoint.value & 0x3FF)) + 0xDC00;
+ if (result != 0) result *%= 10;
+ result +%= low -% '0';
+ continue;
+ },
+ Codepoint.invalid => 0xFFFD,
+ else => codepoint.value,
+ };
+ if (result != 0) result *%= 10;
+ result +%= c -% '0';
+ }
+ return @truncate(result);
+ }
+
+ pub fn fromUtf16Le(utf16: [:0]const u16) u16 {
+ var result: u16 = 0;
+ for (utf16) |code_unit| {
+ if (result != 0) result *%= 10;
+ result +%= std.mem.littleToNative(u16, code_unit) -% '0';
+ }
+ return result;
+ }
+};
+
+test "forced ordinal" {
+ try std.testing.expectEqual(@as(u16, 3200), ForcedOrdinal.fromBytes(.{ .slice = "3200", .code_page = .windows1252 }));
+ try std.testing.expectEqual(@as(u16, 0x33), ForcedOrdinal.fromBytes(.{ .slice = "1+1", .code_page = .windows1252 }));
+ try std.testing.expectEqual(@as(u16, 65531), ForcedOrdinal.fromBytes(.{ .slice = "1!", .code_page = .windows1252 }));
+
+ try std.testing.expectEqual(@as(u16, 0x122), ForcedOrdinal.fromBytes(.{ .slice = "0\x8C", .code_page = .windows1252 }));
+ try std.testing.expectEqual(@as(u16, 0x122), ForcedOrdinal.fromBytes(.{ .slice = "0Œ", .code_page = .utf8 }));
+
+ // invalid UTF-8 gets converted to 0xFFFD (replacement char) and then interpreted as a digit
+ try std.testing.expectEqual(@as(u16, 0xFFCD), ForcedOrdinal.fromBytes(.{ .slice = "0\x81", .code_page = .utf8 }));
+ // codepoints >= 0x10000
+ try std.testing.expectEqual(@as(u16, 0x49F2), ForcedOrdinal.fromBytes(.{ .slice = "0\u{10002}", .code_page = .utf8 }));
+ try std.testing.expectEqual(@as(u16, 0x4AF0), ForcedOrdinal.fromBytes(.{ .slice = "0\u{10100}", .code_page = .utf8 }));
+
+ // From UTF-16
+ try std.testing.expectEqual(@as(u16, 0x122), ForcedOrdinal.fromUtf16Le(&[_:0]u16{ std.mem.nativeToLittle(u16, '0'), std.mem.nativeToLittle(u16, 'Œ') }));
+ try std.testing.expectEqual(@as(u16, 0x4AF0), ForcedOrdinal.fromUtf16Le(std.unicode.utf8ToUtf16LeStringLiteral("0\u{10100}")));
+}
+
+/// https://learn.microsoft.com/en-us/windows/win32/api/verrsrc/ns-verrsrc-vs_fixedfileinfo
+pub const FixedFileInfo = struct {
+ file_version: Version = .{},
+ product_version: Version = .{},
+ file_flags_mask: u32 = 0,
+ file_flags: u32 = 0,
+ file_os: u32 = 0,
+ file_type: u32 = 0,
+ file_subtype: u32 = 0,
+ file_date: Version = .{}, // TODO: I think this is always all zeroes?
+
+ pub const signature = 0xFEEF04BD;
+ // Note: This corresponds to a version of 1.0
+ pub const version = 0x00010000;
+
+ pub const byte_len = 0x34;
+ pub const key = std.unicode.utf8ToUtf16LeStringLiteral("VS_VERSION_INFO");
+
+ pub const Version = struct {
+ parts: [4]u16 = [_]u16{0} ** 4,
+
+ pub fn mostSignificantCombinedParts(self: Version) u32 {
+ return (@as(u32, self.parts[0]) << 16) + self.parts[1];
+ }
+
+ pub fn leastSignificantCombinedParts(self: Version) u32 {
+ return (@as(u32, self.parts[2]) << 16) + self.parts[3];
+ }
+ };
+
+ pub fn write(self: FixedFileInfo, writer: anytype) !void {
+ try writer.writeInt(u32, signature, .little);
+ try writer.writeInt(u32, version, .little);
+ try writer.writeInt(u32, self.file_version.mostSignificantCombinedParts(), .little);
+ try writer.writeInt(u32, self.file_version.leastSignificantCombinedParts(), .little);
+ try writer.writeInt(u32, self.product_version.mostSignificantCombinedParts(), .little);
+ try writer.writeInt(u32, self.product_version.leastSignificantCombinedParts(), .little);
+ try writer.writeInt(u32, self.file_flags_mask, .little);
+ try writer.writeInt(u32, self.file_flags, .little);
+ try writer.writeInt(u32, self.file_os, .little);
+ try writer.writeInt(u32, self.file_type, .little);
+ try writer.writeInt(u32, self.file_subtype, .little);
+ try writer.writeInt(u32, self.file_date.mostSignificantCombinedParts(), .little);
+ try writer.writeInt(u32, self.file_date.leastSignificantCombinedParts(), .little);
+ }
+};
+
+test "FixedFileInfo.Version" {
+ const version = FixedFileInfo.Version{
+ .parts = .{ 1, 2, 3, 4 },
+ };
+ try std.testing.expectEqual(@as(u32, 0x00010002), version.mostSignificantCombinedParts());
+ try std.testing.expectEqual(@as(u32, 0x00030004), version.leastSignificantCombinedParts());
+}
+
+pub const VersionNode = struct {
+ pub const type_string: u16 = 1;
+ pub const type_binary: u16 = 0;
+};
+
+pub const MenuItemFlags = struct {
+ value: u16 = 0,
+
+ pub fn apply(self: *MenuItemFlags, option: rc.MenuItem.Option) void {
+ self.value |= optionValue(option);
+ }
+
+ pub fn isSet(self: MenuItemFlags, option: rc.MenuItem.Option) bool {
+ return self.value & optionValue(option) != 0;
+ }
+
+ fn optionValue(option: rc.MenuItem.Option) u16 {
+ return @intCast(switch (option) {
+ .checked => MF.CHECKED,
+ .grayed => MF.GRAYED,
+ .help => MF.HELP,
+ .inactive => MF.DISABLED,
+ .menubarbreak => MF.MENUBARBREAK,
+ .menubreak => MF.MENUBREAK,
+ });
+ }
+
+ pub fn markLast(self: *MenuItemFlags) void {
+ self.value |= @intCast(MF.END);
+ }
+};
+
+/// Menu Flags from WinUser.h
+/// This is not complete, it only contains what is needed
+pub const MF = struct {
+ pub const GRAYED: u32 = 0x00000001;
+ pub const DISABLED: u32 = 0x00000002;
+ pub const CHECKED: u32 = 0x00000008;
+ pub const POPUP: u32 = 0x00000010;
+ pub const MENUBARBREAK: u32 = 0x00000020;
+ pub const MENUBREAK: u32 = 0x00000040;
+ pub const HELP: u32 = 0x00004000;
+ pub const END: u32 = 0x00000080;
+};
+
+/// Window Styles from WinUser.h
+pub const WS = struct {
+ pub const OVERLAPPED: u32 = 0x00000000;
+ pub const POPUP: u32 = 0x80000000;
+ pub const CHILD: u32 = 0x40000000;
+ pub const MINIMIZE: u32 = 0x20000000;
+ pub const VISIBLE: u32 = 0x10000000;
+ pub const DISABLED: u32 = 0x08000000;
+ pub const CLIPSIBLINGS: u32 = 0x04000000;
+ pub const CLIPCHILDREN: u32 = 0x02000000;
+ pub const MAXIMIZE: u32 = 0x01000000;
+ pub const CAPTION: u32 = BORDER | DLGFRAME;
+ pub const BORDER: u32 = 0x00800000;
+ pub const DLGFRAME: u32 = 0x00400000;
+ pub const VSCROLL: u32 = 0x00200000;
+ pub const HSCROLL: u32 = 0x00100000;
+ pub const SYSMENU: u32 = 0x00080000;
+ pub const THICKFRAME: u32 = 0x00040000;
+ pub const GROUP: u32 = 0x00020000;
+ pub const TABSTOP: u32 = 0x00010000;
+
+ pub const MINIMIZEBOX: u32 = 0x00020000;
+ pub const MAXIMIZEBOX: u32 = 0x00010000;
+
+ pub const TILED: u32 = OVERLAPPED;
+ pub const ICONIC: u32 = MINIMIZE;
+ pub const SIZEBOX: u32 = THICKFRAME;
+ pub const TILEDWINDOW: u32 = OVERLAPPEDWINDOW;
+
+ // Common Window Styles
+ pub const OVERLAPPEDWINDOW: u32 = OVERLAPPED | CAPTION | SYSMENU | THICKFRAME | MINIMIZEBOX | MAXIMIZEBOX;
+ pub const POPUPWINDOW: u32 = POPUP | BORDER | SYSMENU;
+ pub const CHILDWINDOW: u32 = CHILD;
+};
+
+/// Dialog Box Template Styles from WinUser.h
+pub const DS = struct {
+ pub const SETFONT: u32 = 0x40;
+};
+
+/// Button Control Styles from WinUser.h
+/// This is not complete, it only contains what is needed
+pub const BS = struct {
+ pub const PUSHBUTTON: u32 = 0x00000000;
+ pub const DEFPUSHBUTTON: u32 = 0x00000001;
+ pub const CHECKBOX: u32 = 0x00000002;
+ pub const AUTOCHECKBOX: u32 = 0x00000003;
+ pub const RADIOBUTTON: u32 = 0x00000004;
+ pub const @"3STATE": u32 = 0x00000005;
+ pub const AUTO3STATE: u32 = 0x00000006;
+ pub const GROUPBOX: u32 = 0x00000007;
+ pub const USERBUTTON: u32 = 0x00000008;
+ pub const AUTORADIOBUTTON: u32 = 0x00000009;
+ pub const PUSHBOX: u32 = 0x0000000A;
+ pub const OWNERDRAW: u32 = 0x0000000B;
+ pub const TYPEMASK: u32 = 0x0000000F;
+ pub const LEFTTEXT: u32 = 0x00000020;
+};
+
+/// Static Control Constants from WinUser.h
+/// This is not complete, it only contains what is needed
+pub const SS = struct {
+ pub const LEFT: u32 = 0x00000000;
+ pub const CENTER: u32 = 0x00000001;
+ pub const RIGHT: u32 = 0x00000002;
+ pub const ICON: u32 = 0x00000003;
+};
+
+/// Listbox Styles from WinUser.h
+/// This is not complete, it only contains what is needed
+pub const LBS = struct {
+ pub const NOTIFY: u32 = 0x0001;
+};
diff --git a/lib/compiler/resinator/source_mapping.zig b/lib/compiler/resinator/source_mapping.zig
@@ -0,0 +1,831 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+const utils = @import("utils.zig");
+const UncheckedSliceWriter = utils.UncheckedSliceWriter;
+
+pub const ParseLineCommandsResult = struct {
+ result: []u8,
+ mappings: SourceMappings,
+};
+
+const CurrentMapping = struct {
+ line_num: usize = 1,
+ filename: std.ArrayListUnmanaged(u8) = .{},
+ pending: bool = true,
+ ignore_contents: bool = false,
+};
+
+pub const ParseAndRemoveLineCommandsOptions = struct {
+ initial_filename: ?[]const u8 = null,
+};
+
+/// Parses and removes #line commands as well as all source code that is within a file
+/// with .c or .h extensions.
+///
+/// > RC treats files with the .c and .h extensions in a special manner. It
+/// > assumes that a file with one of these extensions does not contain
+/// > resources. If a file has the .c or .h file name extension, RC ignores all
+/// > lines in the file except the preprocessor directives. Therefore, to
+/// > include a file that contains resources in another resource script, give
+/// > the file to be included an extension other than .c or .h.
+/// from https://learn.microsoft.com/en-us/windows/win32/menurc/preprocessor-directives
+///
+/// Returns a slice of `buf` with the aforementioned stuff removed as well as a mapping
+/// between the lines and their corresponding lines in their original files.
+///
+/// `buf` must be at least as long as `source`
+/// In-place transformation is supported (i.e. `source` and `buf` can be the same slice)
+///
+/// If `options.initial_filename` is provided, that filename is guaranteed to be
+/// within the `mappings.files` table and `root_filename_offset` will be set appropriately.
+pub fn parseAndRemoveLineCommands(allocator: Allocator, source: []const u8, buf: []u8, options: ParseAndRemoveLineCommandsOptions) !ParseLineCommandsResult {
+ var parse_result = ParseLineCommandsResult{
+ .result = undefined,
+ .mappings = .{},
+ };
+ errdefer parse_result.mappings.deinit(allocator);
+
+ var current_mapping: CurrentMapping = .{};
+ defer current_mapping.filename.deinit(allocator);
+
+ if (options.initial_filename) |initial_filename| {
+ try current_mapping.filename.appendSlice(allocator, initial_filename);
+ parse_result.mappings.root_filename_offset = try parse_result.mappings.files.put(allocator, initial_filename);
+ }
+
+ std.debug.assert(buf.len >= source.len);
+ var result = UncheckedSliceWriter{ .slice = buf };
+ const State = enum {
+ line_start,
+ preprocessor,
+ non_preprocessor,
+ };
+ var state: State = .line_start;
+ var index: usize = 0;
+ var pending_start: ?usize = null;
+ var preprocessor_start: usize = 0;
+ var line_number: usize = 1;
+ while (index < source.len) : (index += 1) {
+ const c = source[index];
+ switch (state) {
+ .line_start => switch (c) {
+ '#' => {
+ preprocessor_start = index;
+ state = .preprocessor;
+ if (pending_start == null) {
+ pending_start = index;
+ }
+ },
+ '\r', '\n' => {
+ const is_crlf = formsLineEndingPair(source, c, index + 1);
+ if (!current_mapping.ignore_contents) {
+ try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
+
+ result.write(c);
+ if (is_crlf) result.write(source[index + 1]);
+ line_number += 1;
+ }
+ if (is_crlf) index += 1;
+ pending_start = null;
+ },
+ ' ', '\t', '\x0b', '\x0c' => {
+ if (pending_start == null) {
+ pending_start = index;
+ }
+ },
+ else => {
+ state = .non_preprocessor;
+ if (pending_start != null) {
+ if (!current_mapping.ignore_contents) {
+ result.writeSlice(source[pending_start.? .. index + 1]);
+ }
+ pending_start = null;
+ continue;
+ }
+ if (!current_mapping.ignore_contents) {
+ result.write(c);
+ }
+ },
+ },
+ .preprocessor => switch (c) {
+ '\r', '\n' => {
+ // Now that we have the full line we can decide what to do with it
+ const preprocessor_str = source[preprocessor_start..index];
+ const is_crlf = formsLineEndingPair(source, c, index + 1);
+ if (std.mem.startsWith(u8, preprocessor_str, "#line")) {
+ try handleLineCommand(allocator, preprocessor_str, ¤t_mapping);
+ } else {
+ if (!current_mapping.ignore_contents) {
+ try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
+
+ const line_ending_len: usize = if (is_crlf) 2 else 1;
+ result.writeSlice(source[pending_start.? .. index + line_ending_len]);
+ line_number += 1;
+ }
+ }
+ if (is_crlf) index += 1;
+ state = .line_start;
+ pending_start = null;
+ },
+ else => {},
+ },
+ .non_preprocessor => switch (c) {
+ '\r', '\n' => {
+ const is_crlf = formsLineEndingPair(source, c, index + 1);
+ if (!current_mapping.ignore_contents) {
+ try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
+
+ result.write(c);
+ if (is_crlf) result.write(source[index + 1]);
+ line_number += 1;
+ }
+ if (is_crlf) index += 1;
+ state = .line_start;
+ pending_start = null;
+ },
+ else => {
+ if (!current_mapping.ignore_contents) {
+ result.write(c);
+ }
+ },
+ },
+ }
+ } else {
+ switch (state) {
+ .line_start => {},
+ .non_preprocessor => {
+ try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
+ },
+ .preprocessor => {
+ // Now that we have the full line we can decide what to do with it
+ const preprocessor_str = source[preprocessor_start..index];
+ if (std.mem.startsWith(u8, preprocessor_str, "#line")) {
+ try handleLineCommand(allocator, preprocessor_str, ¤t_mapping);
+ } else {
+ try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
+ if (!current_mapping.ignore_contents) {
+ result.writeSlice(source[pending_start.?..index]);
+ }
+ }
+ },
+ }
+ }
+
+ parse_result.result = result.getWritten();
+
+ // Remove whitespace from the end of the result. This avoids issues when the
+ // preprocessor adds a newline to the end of the file, since then the
+ // post-preprocessed source could have more lines than the corresponding input source and
+ // the inserted line can't be mapped to any lines in the original file.
+ // There's no way that whitespace at the end of a file can affect the parsing
+ // of the RC script so this is okay to do unconditionally.
+ // TODO: There might be a better way around this
+ while (parse_result.result.len > 0 and std.ascii.isWhitespace(parse_result.result[parse_result.result.len - 1])) {
+ parse_result.result.len -= 1;
+ }
+
+ // If there have been no line mappings at all, then we're dealing with an empty file.
+ // In this case, we want to fake a line mapping just so that we return something
+ // that is useable in the same way that a non-empty mapping would be.
+ if (parse_result.mappings.sources.root == null) {
+ try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
+ }
+
+ return parse_result;
+}
+
+/// Note: This should function the same as lex.LineHandler.currentIndexFormsLineEndingPair
+pub fn formsLineEndingPair(source: []const u8, line_ending: u8, next_index: usize) bool {
+ if (next_index >= source.len) return false;
+
+ const next_ending = source[next_index];
+ return utils.isLineEndingPair(line_ending, next_ending);
+}
+
+pub fn handleLineEnd(allocator: Allocator, post_processed_line_number: usize, mapping: *SourceMappings, current_mapping: *CurrentMapping) !void {
+ const filename_offset = try mapping.files.put(allocator, current_mapping.filename.items);
+
+ try mapping.set(post_processed_line_number, current_mapping.line_num, filename_offset);
+
+ current_mapping.line_num += 1;
+ current_mapping.pending = false;
+}
+
+// TODO: Might want to provide diagnostics on invalid line commands instead of just returning
+pub fn handleLineCommand(allocator: Allocator, line_command: []const u8, current_mapping: *CurrentMapping) error{OutOfMemory}!void {
+ // TODO: Are there other whitespace characters that should be included?
+ var tokenizer = std.mem.tokenize(u8, line_command, " \t");
+ const line_directive = tokenizer.next() orelse return; // #line
+ if (!std.mem.eql(u8, line_directive, "#line")) return;
+ const linenum_str = tokenizer.next() orelse return;
+ const linenum = std.fmt.parseUnsigned(usize, linenum_str, 10) catch return;
+
+ var filename_literal = tokenizer.rest();
+ while (filename_literal.len > 0 and std.ascii.isWhitespace(filename_literal[filename_literal.len - 1])) {
+ filename_literal.len -= 1;
+ }
+ if (filename_literal.len < 2) return;
+ const is_quoted = filename_literal[0] == '"' and filename_literal[filename_literal.len - 1] == '"';
+ if (!is_quoted) return;
+ const filename = parseFilename(allocator, filename_literal[1 .. filename_literal.len - 1]) catch |err| switch (err) {
+ error.OutOfMemory => |e| return e,
+ else => return,
+ };
+ defer allocator.free(filename);
+
+ // \x00 bytes in the filename is incompatible with how StringTable works
+ if (std.mem.indexOfScalar(u8, filename, '\x00') != null) return;
+
+ current_mapping.line_num = linenum;
+ current_mapping.filename.clearRetainingCapacity();
+ try current_mapping.filename.appendSlice(allocator, filename);
+ current_mapping.pending = true;
+ current_mapping.ignore_contents = std.ascii.endsWithIgnoreCase(filename, ".c") or std.ascii.endsWithIgnoreCase(filename, ".h");
+}
+
+pub fn parseAndRemoveLineCommandsAlloc(allocator: Allocator, source: []const u8, options: ParseAndRemoveLineCommandsOptions) !ParseLineCommandsResult {
+ const buf = try allocator.alloc(u8, source.len);
+ errdefer allocator.free(buf);
+ var result = try parseAndRemoveLineCommands(allocator, source, buf, options);
+ result.result = try allocator.realloc(buf, result.result.len);
+ return result;
+}
+
+/// C-style string parsing with a few caveats:
+/// - The str cannot contain newlines or carriage returns
+/// - Hex and octal escape are limited to u8
+/// - No handling/support for L, u, or U prefixed strings
+/// - The start and end double quotes should be omitted from the `str`
+/// - Other than the above, does not assume any validity of the strings (i.e. there
+/// may be unescaped double quotes within the str) and will return error.InvalidString
+/// on any problems found.
+///
+/// The result is a UTF-8 encoded string.
+fn parseFilename(allocator: Allocator, str: []const u8) error{ OutOfMemory, InvalidString }![]u8 {
+ const State = enum {
+ string,
+ escape,
+ escape_hex,
+ escape_octal,
+ escape_u,
+ };
+
+ var filename = try std.ArrayList(u8).initCapacity(allocator, str.len);
+ errdefer filename.deinit();
+ var state: State = .string;
+ var index: usize = 0;
+ var escape_len: usize = undefined;
+ var escape_val: u64 = undefined;
+ var escape_expected_len: u8 = undefined;
+ while (index < str.len) : (index += 1) {
+ const c = str[index];
+ switch (state) {
+ .string => switch (c) {
+ '\\' => state = .escape,
+ '"' => return error.InvalidString,
+ else => filename.appendAssumeCapacity(c),
+ },
+ .escape => switch (c) {
+ '\'', '"', '\\', '?', 'n', 'r', 't', 'a', 'b', 'e', 'f', 'v' => {
+ const escaped_c = switch (c) {
+ '\'', '"', '\\', '?' => c,
+ 'n' => '\n',
+ 'r' => '\r',
+ 't' => '\t',
+ 'a' => '\x07',
+ 'b' => '\x08',
+ 'e' => '\x1b', // non-standard
+ 'f' => '\x0c',
+ 'v' => '\x0b',
+ else => unreachable,
+ };
+ filename.appendAssumeCapacity(escaped_c);
+ state = .string;
+ },
+ 'x' => {
+ escape_val = 0;
+ escape_len = 0;
+ state = .escape_hex;
+ },
+ '0'...'7' => {
+ escape_val = std.fmt.charToDigit(c, 8) catch unreachable;
+ escape_len = 1;
+ state = .escape_octal;
+ },
+ 'u' => {
+ escape_val = 0;
+ escape_len = 0;
+ state = .escape_u;
+ escape_expected_len = 4;
+ },
+ 'U' => {
+ escape_val = 0;
+ escape_len = 0;
+ state = .escape_u;
+ escape_expected_len = 8;
+ },
+ else => return error.InvalidString,
+ },
+ .escape_hex => switch (c) {
+ '0'...'9', 'a'...'f', 'A'...'F' => {
+ const digit = std.fmt.charToDigit(c, 16) catch unreachable;
+ if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 16) catch return error.InvalidString;
+ escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString;
+ escape_len += 1;
+ },
+ else => {
+ if (escape_len == 0) return error.InvalidString;
+ filename.appendAssumeCapacity(@intCast(escape_val));
+ state = .string;
+ index -= 1; // reconsume
+ },
+ },
+ .escape_octal => switch (c) {
+ '0'...'7' => {
+ const digit = std.fmt.charToDigit(c, 8) catch unreachable;
+ if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 8) catch return error.InvalidString;
+ escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString;
+ escape_len += 1;
+ if (escape_len == 3) {
+ filename.appendAssumeCapacity(@intCast(escape_val));
+ state = .string;
+ }
+ },
+ else => {
+ if (escape_len == 0) return error.InvalidString;
+ filename.appendAssumeCapacity(@intCast(escape_val));
+ state = .string;
+ index -= 1; // reconsume
+ },
+ },
+ .escape_u => switch (c) {
+ '0'...'9', 'a'...'f', 'A'...'F' => {
+ const digit = std.fmt.charToDigit(c, 16) catch unreachable;
+ if (escape_val != 0) escape_val = std.math.mul(u21, @as(u21, @intCast(escape_val)), 16) catch return error.InvalidString;
+ escape_val = std.math.add(u21, @as(u21, @intCast(escape_val)), digit) catch return error.InvalidString;
+ escape_len += 1;
+ if (escape_len == escape_expected_len) {
+ var buf: [4]u8 = undefined;
+ const utf8_len = std.unicode.utf8Encode(@intCast(escape_val), &buf) catch return error.InvalidString;
+ filename.appendSliceAssumeCapacity(buf[0..utf8_len]);
+ state = .string;
+ }
+ },
+ // Requires escape_expected_len valid hex digits
+ else => return error.InvalidString,
+ },
+ }
+ } else {
+ switch (state) {
+ .string => {},
+ .escape, .escape_u => return error.InvalidString,
+ .escape_hex => {
+ if (escape_len == 0) return error.InvalidString;
+ filename.appendAssumeCapacity(@intCast(escape_val));
+ },
+ .escape_octal => {
+ filename.appendAssumeCapacity(@intCast(escape_val));
+ },
+ }
+ }
+
+ return filename.toOwnedSlice();
+}
+
+fn testParseFilename(expected: []const u8, input: []const u8) !void {
+ const parsed = try parseFilename(std.testing.allocator, input);
+ defer std.testing.allocator.free(parsed);
+
+ return std.testing.expectEqualSlices(u8, expected, parsed);
+}
+
+test parseFilename {
+ try testParseFilename("'\"?\\\t\n\r\x11", "\\'\\\"\\?\\\\\\t\\n\\r\\x11");
+ try testParseFilename("\xABz\x53", "\\xABz\\123");
+ try testParseFilename("⚡⚡", "\\u26A1\\U000026A1");
+ try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\""));
+ try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\"));
+ try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\u"));
+ try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\U"));
+ try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\x"));
+ try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xZZ"));
+ try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xABCDEF"));
+ try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\777"));
+}
+
+pub const SourceMappings = struct {
+ sources: Sources = .{},
+ files: StringTable = .{},
+ /// The default assumes that the first filename added is the root file.
+ /// The value should be set to the correct offset if that assumption does not hold.
+ root_filename_offset: u32 = 0,
+ source_node_pool: std.heap.MemoryPool(Sources.Node) = std.heap.MemoryPool(Sources.Node).init(std.heap.page_allocator),
+ end_line: usize = 0,
+
+ const sourceCompare = struct {
+ fn compare(a: Source, b: Source) std.math.Order {
+ return std.math.order(a.start_line, b.start_line);
+ }
+ }.compare;
+ const Sources = std.Treap(Source, sourceCompare);
+
+ pub const Source = struct {
+ start_line: usize,
+ span: usize = 0,
+ corresponding_start_line: usize,
+ filename_offset: u32,
+ };
+
+ pub fn deinit(self: *SourceMappings, allocator: Allocator) void {
+ self.files.deinit(allocator);
+ self.source_node_pool.deinit();
+ }
+
+ /// Find the node that 'contains' the `line`, i.e. the node's start_line is
+ /// >= `line`
+ fn findNode(self: SourceMappings, line: usize) ?*Sources.Node {
+ var node = self.sources.root;
+ var last_gt: ?*Sources.Node = null;
+
+ var search_key: Source = undefined;
+ search_key.start_line = line;
+ while (node) |current| {
+ const order = sourceCompare(search_key, current.key);
+ if (order == .eq) break;
+ if (order == .gt) last_gt = current;
+
+ node = current.children[@intFromBool(order == .gt)] orelse {
+ // Regardless of the current order, last_gt will contain the
+ // the node we want to return.
+ //
+ // If search key is > current node's key, then last_gt will be
+ // current which we now know is the closest node that is <=
+ // the search key.
+ //
+ //
+ // If the key is < current node's key, we want to jump back to the
+ // node that the search key was most recently greater than.
+ // This is necessary for scenarios like (where the search key is 2):
+ //
+ // 1
+ // \
+ // 6
+ // /
+ // 3
+ //
+ // In this example, we'll get down to the '3' node but ultimately want
+ // to return the '1' node.
+ //
+ // Note: If we've never seen a key that the search key is greater than,
+ // then we know that there's no valid node, so last_gt will be null.
+ return last_gt;
+ };
+ }
+
+ return node;
+ }
+
+ /// Note: `line_num` and `corresponding_line_num` start at 1
+ pub fn set(self: *SourceMappings, line_num: usize, corresponding_line_num: usize, filename_offset: u32) !void {
+ const maybe_node = self.findNode(line_num);
+
+ const need_new_node = need_new_node: {
+ if (maybe_node) |node| {
+ if (node.key.filename_offset != filename_offset) {
+ break :need_new_node true;
+ }
+ const exist_delta = @as(i64, @intCast(node.key.corresponding_start_line)) - @as(i64, @intCast(node.key.start_line));
+ const cur_delta = @as(i64, @intCast(corresponding_line_num)) - @as(i64, @intCast(line_num));
+ if (exist_delta != cur_delta) {
+ break :need_new_node true;
+ }
+ break :need_new_node false;
+ }
+ break :need_new_node true;
+ };
+ if (need_new_node) {
+ // spans must not overlap
+ if (maybe_node) |node| {
+ std.debug.assert(node.key.start_line != line_num);
+ }
+
+ const key = Source{
+ .start_line = line_num,
+ .corresponding_start_line = corresponding_line_num,
+ .filename_offset = filename_offset,
+ };
+ var entry = self.sources.getEntryFor(key);
+ var new_node = try self.source_node_pool.create();
+ new_node.key = key;
+ entry.set(new_node);
+ }
+ if (line_num > self.end_line) {
+ self.end_line = line_num;
+ }
+ }
+
+ /// Note: `line_num` starts at 1
+ pub fn get(self: SourceMappings, line_num: usize) ?Source {
+ const node = self.findNode(line_num) orelse return null;
+ return node.key;
+ }
+
+ pub const CorrespondingSpan = struct {
+ start_line: usize,
+ end_line: usize,
+ filename_offset: u32,
+ };
+
+ pub fn getCorrespondingSpan(self: SourceMappings, line_num: usize) ?CorrespondingSpan {
+ const source = self.get(line_num) orelse return null;
+ const diff = line_num - source.start_line;
+ const start_line = source.corresponding_start_line + (if (line_num == source.start_line) 0 else source.span + diff);
+ const end_line = start_line + (if (line_num == source.start_line) source.span else 0);
+ return CorrespondingSpan{
+ .start_line = start_line,
+ .end_line = end_line,
+ .filename_offset = source.filename_offset,
+ };
+ }
+
+ pub fn collapse(self: *SourceMappings, line_num: usize, num_following_lines_to_collapse: usize) !void {
+ std.debug.assert(num_following_lines_to_collapse > 0);
+ var node = self.findNode(line_num).?;
+ const span_diff = num_following_lines_to_collapse;
+ if (node.key.start_line != line_num) {
+ const offset = line_num - node.key.start_line;
+ const key = Source{
+ .start_line = line_num,
+ .span = num_following_lines_to_collapse,
+ .corresponding_start_line = node.key.corresponding_start_line + node.key.span + offset,
+ .filename_offset = node.key.filename_offset,
+ };
+ var entry = self.sources.getEntryFor(key);
+ var new_node = try self.source_node_pool.create();
+ new_node.key = key;
+ entry.set(new_node);
+ node = new_node;
+ } else {
+ node.key.span += span_diff;
+ }
+
+ // now subtract the span diff from the start line number of all of
+ // the following nodes in order
+ var it = Sources.InorderIterator{
+ .current = node,
+ .previous = node.children[0],
+ };
+ // skip past current, but store it
+ var prev = it.next().?;
+ while (it.next()) |inorder_node| {
+ inorder_node.key.start_line -= span_diff;
+
+ // This can only really happen if there are #line commands within
+ // a multiline comment, which in theory should be skipped over.
+ // However, currently, parseAndRemoveLineCommands is not aware of
+ // comments at all.
+ //
+ // TODO: Make parseAndRemoveLineCommands aware of comments/strings
+ // and turn this into an assertion
+ if (prev.key.start_line > inorder_node.key.start_line) {
+ return error.InvalidSourceMappingCollapse;
+ }
+ prev = inorder_node;
+ }
+ self.end_line -= span_diff;
+ }
+
+ /// Returns true if the line is from the main/root file (i.e. not a file that has been
+ /// `#include`d).
+ pub fn isRootFile(self: *SourceMappings, line_num: usize) bool {
+ const source = self.get(line_num) orelse return false;
+ return source.filename_offset == self.root_filename_offset;
+ }
+};
+
+test "SourceMappings collapse" {
+ const allocator = std.testing.allocator;
+
+ var mappings = SourceMappings{};
+ defer mappings.deinit(allocator);
+ const filename_offset = try mappings.files.put(allocator, "test.rc");
+
+ try mappings.set(1, 1, filename_offset);
+ try mappings.set(5, 5, filename_offset);
+
+ try mappings.collapse(2, 2);
+
+ try std.testing.expectEqual(@as(usize, 3), mappings.end_line);
+ const span_1 = mappings.getCorrespondingSpan(1).?;
+ try std.testing.expectEqual(@as(usize, 1), span_1.start_line);
+ try std.testing.expectEqual(@as(usize, 1), span_1.end_line);
+ const span_2 = mappings.getCorrespondingSpan(2).?;
+ try std.testing.expectEqual(@as(usize, 2), span_2.start_line);
+ try std.testing.expectEqual(@as(usize, 4), span_2.end_line);
+ const span_3 = mappings.getCorrespondingSpan(3).?;
+ try std.testing.expectEqual(@as(usize, 5), span_3.start_line);
+ try std.testing.expectEqual(@as(usize, 5), span_3.end_line);
+}
+
+/// Same thing as StringTable in Zig's src/Wasm.zig
+pub const StringTable = struct {
+ data: std.ArrayListUnmanaged(u8) = .{},
+ map: std.HashMapUnmanaged(u32, void, std.hash_map.StringIndexContext, std.hash_map.default_max_load_percentage) = .{},
+
+ pub fn deinit(self: *StringTable, allocator: Allocator) void {
+ self.data.deinit(allocator);
+ self.map.deinit(allocator);
+ }
+
+ pub fn put(self: *StringTable, allocator: Allocator, value: []const u8) !u32 {
+ const result = try self.map.getOrPutContextAdapted(
+ allocator,
+ value,
+ std.hash_map.StringIndexAdapter{ .bytes = &self.data },
+ .{ .bytes = &self.data },
+ );
+ if (result.found_existing) {
+ return result.key_ptr.*;
+ }
+
+ try self.data.ensureUnusedCapacity(allocator, value.len + 1);
+ const offset: u32 = @intCast(self.data.items.len);
+
+ self.data.appendSliceAssumeCapacity(value);
+ self.data.appendAssumeCapacity(0);
+
+ result.key_ptr.* = offset;
+
+ return offset;
+ }
+
+ pub fn get(self: StringTable, offset: u32) []const u8 {
+ std.debug.assert(offset < self.data.items.len);
+ return std.mem.sliceTo(@as([*:0]const u8, @ptrCast(self.data.items.ptr + offset)), 0);
+ }
+
+ pub fn getOffset(self: *StringTable, value: []const u8) ?u32 {
+ return self.map.getKeyAdapted(
+ value,
+ std.hash_map.StringIndexAdapter{ .bytes = &self.data },
+ );
+ }
+};
+
+const ExpectedSourceSpan = struct {
+ start_line: usize,
+ end_line: usize,
+ filename: []const u8,
+};
+
+fn testParseAndRemoveLineCommands(
+ expected: []const u8,
+ comptime expected_spans: []const ExpectedSourceSpan,
+ source: []const u8,
+ options: ParseAndRemoveLineCommandsOptions,
+) !void {
+ var results = try parseAndRemoveLineCommandsAlloc(std.testing.allocator, source, options);
+ defer std.testing.allocator.free(results.result);
+ defer results.mappings.deinit(std.testing.allocator);
+
+ try std.testing.expectEqualStrings(expected, results.result);
+
+ expectEqualMappings(expected_spans, results.mappings) catch |err| {
+ std.debug.print("\nexpected mappings:\n", .{});
+ for (expected_spans, 0..) |span, i| {
+ const line_num = i + 1;
+ std.debug.print("{}: {s}:{}-{}\n", .{ line_num, span.filename, span.start_line, span.end_line });
+ }
+ std.debug.print("\nactual mappings:\n", .{});
+ var i: usize = 1;
+ while (i <= results.mappings.end_line) : (i += 1) {
+ const span = results.mappings.getCorrespondingSpan(i).?;
+ const filename = results.mappings.files.get(span.filename_offset);
+ std.debug.print("{}: {s}:{}-{}\n", .{ i, filename, span.start_line, span.end_line });
+ }
+ std.debug.print("\n", .{});
+ return err;
+ };
+}
+
+fn expectEqualMappings(expected_spans: []const ExpectedSourceSpan, mappings: SourceMappings) !void {
+ try std.testing.expectEqual(expected_spans.len, mappings.end_line);
+ for (expected_spans, 0..) |expected_span, i| {
+ const line_num = i + 1;
+ const span = mappings.getCorrespondingSpan(line_num) orelse return error.MissingLineNum;
+ const filename = mappings.files.get(span.filename_offset);
+ try std.testing.expectEqual(expected_span.start_line, span.start_line);
+ try std.testing.expectEqual(expected_span.end_line, span.end_line);
+ try std.testing.expectEqualStrings(expected_span.filename, filename);
+ }
+}
+
+test "basic" {
+ try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{
+ .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
+ }, "#line 1 \"blah.rc\"", .{});
+}
+
+test "only removes line commands" {
+ try testParseAndRemoveLineCommands(
+ \\#pragma code_page(65001)
+ , &[_]ExpectedSourceSpan{
+ .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
+ },
+ \\#line 1 "blah.rc"
+ \\#pragma code_page(65001)
+ , .{});
+}
+
+test "whitespace and line endings" {
+ try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{
+ .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
+ }, "#line \t 1 \t \"blah.rc\"\r\n", .{});
+}
+
+test "example" {
+ try testParseAndRemoveLineCommands(
+ \\
+ \\included RCDATA {"hello"}
+ , &[_]ExpectedSourceSpan{
+ .{ .start_line = 1, .end_line = 1, .filename = "./included.rc" },
+ .{ .start_line = 2, .end_line = 2, .filename = "./included.rc" },
+ },
+ \\#line 1 "rcdata.rc"
+ \\#line 1 "<built-in>"
+ \\#line 1 "<built-in>"
+ \\#line 355 "<built-in>"
+ \\#line 1 "<command line>"
+ \\#line 1 "<built-in>"
+ \\#line 1 "rcdata.rc"
+ \\#line 1 "./header.h"
+ \\
+ \\
+ \\2 RCDATA {"blah"}
+ \\
+ \\
+ \\#line 1 "./included.rc"
+ \\
+ \\included RCDATA {"hello"}
+ \\#line 7 "./header.h"
+ \\#line 1 "rcdata.rc"
+ , .{});
+}
+
+test "CRLF and other line endings" {
+ try testParseAndRemoveLineCommands(
+ "hello\r\n#pragma code_page(65001)\r\nworld",
+ &[_]ExpectedSourceSpan{
+ .{ .start_line = 1, .end_line = 1, .filename = "crlf.rc" },
+ .{ .start_line = 2, .end_line = 2, .filename = "crlf.rc" },
+ .{ .start_line = 3, .end_line = 3, .filename = "crlf.rc" },
+ },
+ "#line 1 \"crlf.rc\"\r\n#line 1 \"<built-in>\"\r#line 1 \"crlf.rc\"\n\rhello\r\n#pragma code_page(65001)\r\nworld\r\n",
+ .{},
+ );
+}
+
+test "no line commands" {
+ try testParseAndRemoveLineCommands(
+ \\1 RCDATA {"blah"}
+ \\2 RCDATA {"blah"}
+ , &[_]ExpectedSourceSpan{
+ .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
+ .{ .start_line = 2, .end_line = 2, .filename = "blah.rc" },
+ },
+ \\1 RCDATA {"blah"}
+ \\2 RCDATA {"blah"}
+ , .{ .initial_filename = "blah.rc" });
+}
+
+test "in place" {
+ var mut_source = "#line 1 \"blah.rc\"".*;
+ var result = try parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{});
+ defer result.mappings.deinit(std.testing.allocator);
+ try std.testing.expectEqualStrings("", result.result);
+}
+
+test "line command within a multiline comment" {
+ // TODO: Enable once parseAndRemoveLineCommands is comment-aware
+ if (true) return error.SkipZigTest;
+
+ try testParseAndRemoveLineCommands(
+ \\/*
+ \\#line 1 "irrelevant.rc"
+ \\
+ \\
+ \\*/
+ , &[_]ExpectedSourceSpan{
+ .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
+ .{ .start_line = 2, .end_line = 2, .filename = "blah.rc" },
+ .{ .start_line = 3, .end_line = 3, .filename = "blah.rc" },
+ .{ .start_line = 4, .end_line = 4, .filename = "blah.rc" },
+ .{ .start_line = 5, .end_line = 5, .filename = "blah.rc" },
+ },
+ \\/*
+ \\#line 1 "irrelevant.rc"
+ \\
+ \\
+ \\*/
+ , .{ .initial_filename = "blah.rc" });
+}
diff --git a/lib/compiler/resinator/utils.zig b/lib/compiler/resinator/utils.zig
@@ -0,0 +1,124 @@
+const std = @import("std");
+const builtin = @import("builtin");
+
+/// Like std.io.FixedBufferStream but does no bounds checking
+pub const UncheckedSliceWriter = struct {
+ const Self = @This();
+
+ pos: usize = 0,
+ slice: []u8,
+
+ pub fn write(self: *Self, char: u8) void {
+ self.slice[self.pos] = char;
+ self.pos += 1;
+ }
+
+ pub fn writeSlice(self: *Self, slice: []const u8) void {
+ for (slice) |c| {
+ self.write(c);
+ }
+ }
+
+ pub fn getWritten(self: Self) []u8 {
+ return self.slice[0..self.pos];
+ }
+};
+
+/// Cross-platform 'std.fs.Dir.openFile' wrapper that will always return IsDir if
+/// a directory is attempted to be opened.
+/// TODO: Remove once https://github.com/ziglang/zig/issues/5732 is addressed.
+pub fn openFileNotDir(cwd: std.fs.Dir, path: []const u8, flags: std.fs.File.OpenFlags) std.fs.File.OpenError!std.fs.File {
+ const file = try cwd.openFile(path, flags);
+ errdefer file.close();
+ // https://github.com/ziglang/zig/issues/5732
+ if (builtin.os.tag != .windows) {
+ const stat = try file.stat();
+
+ if (stat.kind == .directory)
+ return error.IsDir;
+ }
+ return file;
+}
+
+/// Emulates the Windows implementation of `iswdigit`, but only returns true
+/// for the non-ASCII digits that `iswdigit` on Windows would return true for.
+pub fn isNonAsciiDigit(c: u21) bool {
+ return switch (c) {
+ '²',
+ '³',
+ '¹',
+ '\u{660}'...'\u{669}',
+ '\u{6F0}'...'\u{6F9}',
+ '\u{7C0}'...'\u{7C9}',
+ '\u{966}'...'\u{96F}',
+ '\u{9E6}'...'\u{9EF}',
+ '\u{A66}'...'\u{A6F}',
+ '\u{AE6}'...'\u{AEF}',
+ '\u{B66}'...'\u{B6F}',
+ '\u{BE6}'...'\u{BEF}',
+ '\u{C66}'...'\u{C6F}',
+ '\u{CE6}'...'\u{CEF}',
+ '\u{D66}'...'\u{D6F}',
+ '\u{E50}'...'\u{E59}',
+ '\u{ED0}'...'\u{ED9}',
+ '\u{F20}'...'\u{F29}',
+ '\u{1040}'...'\u{1049}',
+ '\u{1090}'...'\u{1099}',
+ '\u{17E0}'...'\u{17E9}',
+ '\u{1810}'...'\u{1819}',
+ '\u{1946}'...'\u{194F}',
+ '\u{19D0}'...'\u{19D9}',
+ '\u{1B50}'...'\u{1B59}',
+ '\u{1BB0}'...'\u{1BB9}',
+ '\u{1C40}'...'\u{1C49}',
+ '\u{1C50}'...'\u{1C59}',
+ '\u{A620}'...'\u{A629}',
+ '\u{A8D0}'...'\u{A8D9}',
+ '\u{A900}'...'\u{A909}',
+ '\u{AA50}'...'\u{AA59}',
+ '\u{FF10}'...'\u{FF19}',
+ => true,
+ else => false,
+ };
+}
+
+pub const ErrorMessageType = enum { err, warning, note };
+
+/// Used for generic colored errors/warnings/notes, more context-specific error messages
+/// are handled elsewhere.
+pub fn renderErrorMessage(writer: anytype, config: std.io.tty.Config, msg_type: ErrorMessageType, comptime format: []const u8, args: anytype) !void {
+ switch (msg_type) {
+ .err => {
+ try config.setColor(writer, .bold);
+ try config.setColor(writer, .red);
+ try writer.writeAll("error: ");
+ },
+ .warning => {
+ try config.setColor(writer, .bold);
+ try config.setColor(writer, .yellow);
+ try writer.writeAll("warning: ");
+ },
+ .note => {
+ try config.setColor(writer, .reset);
+ try config.setColor(writer, .cyan);
+ try writer.writeAll("note: ");
+ },
+ }
+ try config.setColor(writer, .reset);
+ if (msg_type == .err) {
+ try config.setColor(writer, .bold);
+ }
+ try writer.print(format, args);
+ try writer.writeByte('\n');
+ try config.setColor(writer, .reset);
+}
+
+pub fn isLineEndingPair(first: u8, second: u8) bool {
+ if (first != '\r' and first != '\n') return false;
+ if (second != '\r' and second != '\n') return false;
+
+ // can't be \n\n or \r\r
+ if (first == second) return false;
+
+ return true;
+}
diff --git a/src/resinator/windows1252.zig b/lib/compiler/resinator/windows1252.zig
diff --git a/lib/std/zig/ErrorBundle.zig b/lib/std/zig/ErrorBundle.zig
@@ -433,7 +433,9 @@ pub const Wip = struct {
// The ensureUnusedCapacity call above guarantees this.
const notes_start = wip.reserveNotes(@intCast(other_list.len)) catch unreachable;
for (notes_start.., other_list) |note, message| {
- wip.extra.items[note] = @intFromEnum(wip.addOtherMessage(other, message) catch unreachable);
+ // This line can cause `wip.extra.items` to be resized.
+ const note_index = @intFromEnum(wip.addOtherMessage(other, message) catch unreachable);
+ wip.extra.items[note] = note_index;
}
}
@@ -522,7 +524,8 @@ pub const Wip = struct {
};
const loc = std.zig.findLineColumn(source, span.main);
- eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{
+ // This line can cause `wip.extra.items` to be resized.
+ const note_index = @intFromEnum(try eb.addErrorMessage(.{
.msg = try eb.addString(msg),
.src_loc = try eb.addSourceLocation(.{
.src_path = try eb.addString(src_path),
@@ -538,6 +541,7 @@ pub const Wip = struct {
}),
.notes_len = 0, // TODO rework this function to be recursive
}));
+ eb.extra.items[note_i] = note_index;
}
}
}
@@ -567,6 +571,28 @@ pub const Wip = struct {
if (index == .none) return .none;
const other_sl = other.getSourceLocation(index);
+ var ref_traces: std.ArrayListUnmanaged(ReferenceTrace) = .{};
+ defer ref_traces.deinit(wip.gpa);
+
+ if (other_sl.reference_trace_len > 0) {
+ var ref_index = other.extraData(SourceLocation, @intFromEnum(index)).end;
+ for (0..other_sl.reference_trace_len) |_| {
+ const other_ref_trace_ed = other.extraData(ReferenceTrace, ref_index);
+ const other_ref_trace = other_ref_trace_ed.data;
+ ref_index = other_ref_trace_ed.end;
+
+ const ref_trace: ReferenceTrace = if (other_ref_trace.src_loc == .none) .{
+ // sentinel ReferenceTrace does not store a string index in decl_name
+ .decl_name = other_ref_trace.decl_name,
+ .src_loc = .none,
+ } else .{
+ .decl_name = try wip.addString(other.nullTerminatedString(other_ref_trace.decl_name)),
+ .src_loc = try wip.addOtherSourceLocation(other, other_ref_trace.src_loc),
+ };
+ try ref_traces.append(wip.gpa, ref_trace);
+ }
+ }
+
const src_loc = try wip.addSourceLocation(.{
.src_path = try wip.addString(other.nullTerminatedString(other_sl.src_path)),
.line = other_sl.line,
@@ -581,7 +607,9 @@ pub const Wip = struct {
.reference_trace_len = other_sl.reference_trace_len,
});
- // TODO: also add the reference trace
+ for (ref_traces.items) |ref_trace| {
+ try wip.addReferenceTrace(ref_trace);
+ }
return src_loc;
}
@@ -615,3 +643,95 @@ pub const Wip = struct {
}
}
};
+
+test "addBundleAsRoots" {
+ var bundle = bundle: {
+ var wip: ErrorBundle.Wip = undefined;
+ try wip.init(std.testing.allocator);
+ errdefer wip.deinit();
+
+ var ref_traces: [3]ReferenceTrace = undefined;
+ for (&ref_traces, 0..) |*ref_trace, i| {
+ if (i == ref_traces.len - 1) {
+ // sentinel reference trace
+ ref_trace.* = .{
+ .decl_name = 3, // signifies 3 hidden references
+ .src_loc = .none,
+ };
+ } else {
+ ref_trace.* = .{
+ .decl_name = try wip.addString("foo"),
+ .src_loc = try wip.addSourceLocation(.{
+ .src_path = try wip.addString("foo"),
+ .line = 1,
+ .column = 2,
+ .span_start = 3,
+ .span_main = 4,
+ .span_end = 5,
+ .source_line = 0,
+ }),
+ };
+ }
+ }
+
+ const src_loc = try wip.addSourceLocation(.{
+ .src_path = try wip.addString("foo"),
+ .line = 1,
+ .column = 2,
+ .span_start = 3,
+ .span_main = 4,
+ .span_end = 5,
+ .source_line = try wip.addString("some source code"),
+ .reference_trace_len = ref_traces.len,
+ });
+ for (&ref_traces) |ref_trace| {
+ try wip.addReferenceTrace(ref_trace);
+ }
+
+ try wip.addRootErrorMessage(ErrorMessage{
+ .msg = try wip.addString("hello world"),
+ .src_loc = src_loc,
+ .notes_len = 1,
+ });
+ const i = try wip.reserveNotes(1);
+ const note_index = @intFromEnum(wip.addErrorMessageAssumeCapacity(.{
+ .msg = try wip.addString("this is a note"),
+ .src_loc = try wip.addSourceLocation(.{
+ .src_path = try wip.addString("bar"),
+ .line = 1,
+ .column = 2,
+ .span_start = 3,
+ .span_main = 4,
+ .span_end = 5,
+ .source_line = try wip.addString("another line of source"),
+ }),
+ }));
+ wip.extra.items[i] = note_index;
+
+ break :bundle try wip.toOwnedBundle("");
+ };
+ defer bundle.deinit(std.testing.allocator);
+
+ const ttyconf: std.io.tty.Config = .no_color;
+
+ var bundle_buf = std.ArrayList(u8).init(std.testing.allocator);
+ defer bundle_buf.deinit();
+ try bundle.renderToWriter(.{ .ttyconf = ttyconf }, bundle_buf.writer());
+
+ var copy = copy: {
+ var wip: ErrorBundle.Wip = undefined;
+ try wip.init(std.testing.allocator);
+ errdefer wip.deinit();
+
+ try wip.addBundleAsRoots(bundle);
+
+ break :copy try wip.toOwnedBundle("");
+ };
+ defer copy.deinit(std.testing.allocator);
+
+ var copy_buf = std.ArrayList(u8).init(std.testing.allocator);
+ defer copy_buf.deinit();
+ try copy.renderToWriter(.{ .ttyconf = ttyconf }, copy_buf.writer());
+
+ try std.testing.expectEqualStrings(bundle_buf.items, copy_buf.items);
+}
diff --git a/src/Compilation.zig b/src/Compilation.zig
@@ -36,7 +36,6 @@ const Cache = std.Build.Cache;
const c_codegen = @import("codegen/c.zig");
const libtsan = @import("libtsan.zig");
const Zir = std.zig.Zir;
-const resinator = @import("resinator.zig");
const Builtin = @import("Builtin.zig");
const LlvmObject = @import("codegen/llvm.zig").Object;
@@ -174,7 +173,7 @@ local_cache_directory: Directory,
global_cache_directory: Directory,
libc_include_dir_list: []const []const u8,
libc_framework_dir_list: []const []const u8,
-rc_include_dir_list: []const []const u8,
+rc_includes: RcIncludes,
thread_pool: *ThreadPool,
/// Populated when we build the libc++ static library. A Job to build this is placed in the queue
@@ -1243,68 +1242,6 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
options.libc_installation,
);
- // The include directories used when preprocessing .rc files are separate from the
- // target. Which include directories are used is determined by `options.rc_includes`.
- //
- // Note: It should be okay that the include directories used when compiling .rc
- // files differ from the include directories used when compiling the main
- // binary, since the .res format is not dependent on anything ABI-related. The
- // only relevant differences would be things like `#define` constants being
- // different in the MinGW headers vs the MSVC headers, but any such
- // differences would likely be a MinGW bug.
- const rc_dirs: std.zig.LibCDirs = b: {
- // Set the includes to .none here when there are no rc files to compile
- var includes = if (options.rc_source_files.len > 0) options.rc_includes else .none;
- const target = options.root_mod.resolved_target.result;
- if (!options.root_mod.resolved_target.is_native_os or target.os.tag != .windows) {
- switch (includes) {
- // MSVC can't be found when the host isn't Windows, so short-circuit.
- .msvc => return error.WindowsSdkNotFound,
- // Skip straight to gnu since we won't be able to detect
- // MSVC on non-Windows hosts.
- .any => includes = .gnu,
- .none, .gnu => {},
- }
- }
- while (true) switch (includes) {
- .any, .msvc => break :b std.zig.LibCDirs.detect(
- arena,
- options.zig_lib_directory.path.?,
- .{
- .cpu = target.cpu,
- .os = target.os,
- .abi = .msvc,
- .ofmt = target.ofmt,
- },
- options.root_mod.resolved_target.is_native_abi,
- // The .rc preprocessor will need to know the libc include dirs even if we
- // are not linking libc, so force 'link_libc' to true
- true,
- options.libc_installation,
- ) catch |err| {
- if (includes == .any) {
- // fall back to mingw
- includes = .gnu;
- continue;
- }
- return err;
- },
- .gnu => break :b try std.zig.LibCDirs.detectFromBuilding(arena, options.zig_lib_directory.path.?, .{
- .cpu = target.cpu,
- .os = target.os,
- .abi = .gnu,
- .ofmt = target.ofmt,
- }),
- .none => break :b .{
- .libc_include_dir_list = &[0][]u8{},
- .libc_installation = null,
- .libc_framework_dir_list = &.{},
- .sysroot = null,
- .darwin_sdk_layout = null,
- },
- };
- };
-
const sysroot = options.sysroot orelse libc_dirs.sysroot;
const include_compiler_rt = options.want_compiler_rt orelse
@@ -1492,7 +1429,7 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
.self_exe_path = options.self_exe_path,
.libc_include_dir_list = libc_dirs.libc_include_dir_list,
.libc_framework_dir_list = libc_dirs.libc_framework_dir_list,
- .rc_include_dir_list = rc_dirs.libc_include_dir_list,
+ .rc_includes = options.rc_includes,
.thread_pool = options.thread_pool,
.clang_passthrough_mode = options.clang_passthrough_mode,
.clang_preprocessor_mode = options.clang_preprocessor_mode,
@@ -2506,7 +2443,7 @@ fn addNonIncrementalStuffToCacheManifest(
man.hash.add(comp.link_eh_frame_hdr);
man.hash.add(comp.skip_linker_dependencies);
man.hash.add(comp.include_compiler_rt);
- man.hash.addListOfBytes(comp.rc_include_dir_list);
+ man.hash.add(comp.rc_includes);
man.hash.addListOfBytes(comp.force_undefined_symbols.keys());
man.hash.addListOfBytes(comp.framework_dirs);
try link.hashAddSystemLibs(man, comp.system_libs);
@@ -4172,7 +4109,7 @@ pub fn obtainCObjectCacheManifest(
pub fn obtainWin32ResourceCacheManifest(comp: *const Compilation) Cache.Manifest {
var man = comp.cache_parent.obtain();
- man.hash.addListOfBytes(comp.rc_include_dir_list);
+ man.hash.add(comp.rc_includes);
return man;
}
@@ -4812,11 +4749,12 @@ fn updateCObject(comp: *Compilation, c_object: *CObject, c_obj_prog_node: *std.P
}
fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32_resource_prog_node: *std.Progress.Node) !void {
- if (!build_options.have_llvm) {
- return comp.failWin32Resource(win32_resource, "clang not available: compiler built without LLVM extensions", .{});
+ if (!std.process.can_spawn) {
+ return comp.failWin32Resource(win32_resource, "{s} does not support spawning a child process", .{@tagName(builtin.os.tag)});
}
+
const self_exe_path = comp.self_exe_path orelse
- return comp.failWin32Resource(win32_resource, "clang compilation disabled", .{});
+ return comp.failWin32Resource(win32_resource, "unable to find self exe path", .{});
const tracy_trace = trace(@src());
defer tracy_trace.end();
@@ -4856,6 +4794,7 @@ fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32
if (win32_resource.src == .manifest) {
_ = try man.addFile(src_path, null);
+ const rc_basename = try std.fmt.allocPrint(arena, "{s}.rc", .{src_basename});
const res_basename = try std.fmt.allocPrint(arena, "{s}.res", .{src_basename});
const digest = if (try man.hit()) man.final() else blk: {
@@ -4867,17 +4806,12 @@ fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32
var o_dir = try comp.local_cache_directory.handle.makeOpenPath(o_sub_path, .{});
defer o_dir.close();
- var output_file = o_dir.createFile(res_basename, .{}) catch |err| {
- const output_file_path = try comp.local_cache_directory.join(arena, &.{ o_sub_path, res_basename });
- return comp.failWin32Resource(win32_resource, "failed to create output file '{s}': {s}", .{ output_file_path, @errorName(err) });
- };
- var output_file_closed = false;
- defer if (!output_file_closed) output_file.close();
-
- var diagnostics = resinator.errors.Diagnostics.init(arena);
- defer diagnostics.deinit();
-
- var output_buffered_stream = std.io.bufferedWriter(output_file.writer());
+ const in_rc_path = try comp.local_cache_directory.join(comp.gpa, &.{
+ o_sub_path, rc_basename,
+ });
+ const out_res_path = try comp.local_cache_directory.join(comp.gpa, &.{
+ o_sub_path, res_basename,
+ });
// In .rc files, a " within a quoted string is escaped as ""
const fmtRcEscape = struct {
@@ -4899,28 +4833,24 @@ fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32
// 1 is CREATEPROCESS_MANIFEST_RESOURCE_ID which is the default ID used for RT_MANIFEST resources
// 24 is RT_MANIFEST
const input = try std.fmt.allocPrint(arena, "1 24 \"{s}\"", .{fmtRcEscape(src_path)});
+ try o_dir.writeFile(rc_basename, input);
+
+ var argv = std.ArrayList([]const u8).init(comp.gpa);
+ defer argv.deinit();
+
+ try argv.appendSlice(&.{
+ self_exe_path,
+ "rc",
+ "--zig-integration",
+ "/:no-preprocess",
+ "/x", // ignore INCLUDE environment variable
+ "/c65001", // UTF-8 codepage
+ "/:auto-includes",
+ "none",
+ });
+ try argv.appendSlice(&.{ "--", in_rc_path, out_res_path });
- resinator.compile.compile(arena, input, output_buffered_stream.writer(), .{
- .cwd = std.fs.cwd(),
- .diagnostics = &diagnostics,
- .ignore_include_env_var = true,
- .default_code_page = .utf8,
- }) catch |err| switch (err) {
- error.ParseError, error.CompileError => {
- // Delete the output file on error
- output_file.close();
- output_file_closed = true;
- // Failing to delete is not really a big deal, so swallow any errors
- o_dir.deleteFile(res_basename) catch {
- const output_file_path = try comp.local_cache_directory.join(arena, &.{ o_sub_path, res_basename });
- log.warn("failed to delete '{s}': {s}", .{ output_file_path, @errorName(err) });
- };
- return comp.failWin32ResourceCompile(win32_resource, input, &diagnostics, null);
- },
- else => |e| return e,
- };
-
- try output_buffered_stream.flush();
+ try spawnZigRc(comp, win32_resource, src_basename, arena, argv.items, &child_progress_node);
break :blk digest;
};
@@ -4951,9 +4881,6 @@ fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32
const rc_basename_noext = src_basename[0 .. src_basename.len - std.fs.path.extension(src_basename).len];
const digest = if (try man.hit()) man.final() else blk: {
- const rcpp_filename = try std.fmt.allocPrint(arena, "{s}.rcpp", .{rc_basename_noext});
-
- const out_rcpp_path = try comp.tmpFilePath(arena, rcpp_filename);
var zig_cache_tmp_dir = try comp.local_cache_directory.handle.makeOpenPath("tmp", .{});
defer zig_cache_tmp_dir.close();
@@ -4963,193 +4890,66 @@ fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32
// so we need a temporary filename.
const out_res_path = try comp.tmpFilePath(arena, res_filename);
- var options = options: {
- var resinator_args = try std.ArrayListUnmanaged([]const u8).initCapacity(comp.gpa, rc_src.extra_flags.len + 4);
- defer resinator_args.deinit(comp.gpa);
-
- resinator_args.appendAssumeCapacity(""); // dummy 'process name' arg
- resinator_args.appendSliceAssumeCapacity(rc_src.extra_flags);
- resinator_args.appendSliceAssumeCapacity(&.{ "--", out_rcpp_path, out_res_path });
-
- var cli_diagnostics = resinator.cli.Diagnostics.init(comp.gpa);
- defer cli_diagnostics.deinit();
- const options = resinator.cli.parse(comp.gpa, resinator_args.items, &cli_diagnostics) catch |err| switch (err) {
- error.ParseError => {
- return comp.failWin32ResourceCli(win32_resource, &cli_diagnostics);
- },
- else => |e| return e,
- };
- break :options options;
- };
- defer options.deinit();
-
- // We never want to read the INCLUDE environment variable, so
- // unconditionally set `ignore_include_env_var` to true
- options.ignore_include_env_var = true;
-
- if (options.preprocess != .yes) {
- return comp.failWin32Resource(win32_resource, "the '{s}' option is not supported in this context", .{switch (options.preprocess) {
- .no => "/:no-preprocess",
- .only => "/p",
- .yes => unreachable,
- }});
- }
-
var argv = std.ArrayList([]const u8).init(comp.gpa);
defer argv.deinit();
- try argv.appendSlice(&[_][]const u8{ self_exe_path, "clang" });
-
- try resinator.preprocess.appendClangArgs(arena, &argv, options, .{
- .clang_target = null, // handled by addCCArgs
- .system_include_paths = &.{}, // handled by addCCArgs
- .needs_gnu_workaround = comp.getTarget().isGnu(),
- .nostdinc = false, // handled by addCCArgs
+ const depfile_filename = try std.fmt.allocPrint(arena, "{s}.d.json", .{rc_basename_noext});
+ const out_dep_path = try comp.tmpFilePath(arena, depfile_filename);
+ try argv.appendSlice(&.{
+ self_exe_path,
+ "rc",
+ "--zig-integration",
+ "/:depfile",
+ out_dep_path,
+ "/:depfile-fmt",
+ "json",
+ "/x", // ignore INCLUDE environment variable
+ "/:auto-includes",
+ @tagName(comp.rc_includes),
});
-
- try argv.append(rc_src.src_path);
- try argv.appendSlice(&[_][]const u8{
- "-o",
- out_rcpp_path,
- });
-
- const out_dep_path = try std.fmt.allocPrint(arena, "{s}.d", .{out_rcpp_path});
- // Note: addCCArgs will implicitly add _DEBUG/NDEBUG depending on the optimization
- // mode. While these defines are not normally present when calling rc.exe directly,
+ // While these defines are not normally present when calling rc.exe directly,
// them being defined matches the behavior of how MSVC calls rc.exe which is the more
// relevant behavior in this case.
- try comp.addCCArgs(arena, &argv, .rc, out_dep_path, rc_src.owner);
-
- if (comp.verbose_cc) {
- dump_argv(argv.items);
+ switch (rc_src.owner.optimize_mode) {
+ .Debug => try argv.append("-D_DEBUG"),
+ .ReleaseSafe => {},
+ .ReleaseFast, .ReleaseSmall => try argv.append("-DNDEBUG"),
}
+ try argv.appendSlice(rc_src.extra_flags);
+ try argv.appendSlice(&.{ "--", rc_src.src_path, out_res_path });
- if (std.process.can_spawn) {
- var child = std.ChildProcess.init(argv.items, arena);
- child.stdin_behavior = .Ignore;
- child.stdout_behavior = .Ignore;
- child.stderr_behavior = .Pipe;
-
- try child.spawn();
-
- const stderr_reader = child.stderr.?.reader();
-
- const stderr = try stderr_reader.readAllAlloc(arena, 10 * 1024 * 1024);
-
- const term = child.wait() catch |err| {
- return comp.failWin32Resource(win32_resource, "unable to spawn {s}: {s}", .{ argv.items[0], @errorName(err) });
- };
+ try spawnZigRc(comp, win32_resource, src_basename, arena, argv.items, &child_progress_node);
- switch (term) {
- .Exited => |code| {
- if (code != 0) {
- // TODO parse clang stderr and turn it into an error message
- // and then call failCObjWithOwnedErrorMsg
- log.err("clang preprocessor failed with stderr:\n{s}", .{stderr});
- return comp.failWin32Resource(win32_resource, "clang preprocessor exited with code {d}", .{code});
- }
- },
- else => {
- log.err("clang preprocessor terminated with stderr:\n{s}", .{stderr});
- return comp.failWin32Resource(win32_resource, "clang preprocessor terminated unexpectedly", .{});
- },
- }
- } else {
- const exit_code = try clangMain(arena, argv.items);
- if (exit_code != 0) {
- return comp.failWin32Resource(win32_resource, "clang preprocessor exited with code {d}", .{exit_code});
- }
- }
-
- const dep_basename = std.fs.path.basename(out_dep_path);
- // Add the files depended on to the cache system.
- try man.addDepFilePost(zig_cache_tmp_dir, dep_basename);
- switch (comp.cache_use) {
- .whole => |whole| if (whole.cache_manifest) |whole_cache_manifest| {
- whole.cache_manifest_mutex.lock();
- defer whole.cache_manifest_mutex.unlock();
- try whole_cache_manifest.addDepFilePost(zig_cache_tmp_dir, dep_basename);
- },
- .incremental => {},
- }
- // Just to save disk space, we delete the file because it is never needed again.
- zig_cache_tmp_dir.deleteFile(dep_basename) catch |err| {
- log.warn("failed to delete '{s}': {s}", .{ out_dep_path, @errorName(err) });
- };
-
- const full_input = std.fs.cwd().readFileAlloc(arena, out_rcpp_path, std.math.maxInt(usize)) catch |err| switch (err) {
- error.OutOfMemory => return error.OutOfMemory,
- else => |e| {
- return comp.failWin32Resource(win32_resource, "failed to read preprocessed file '{s}': {s}", .{ out_rcpp_path, @errorName(e) });
- },
- };
-
- var mapping_results = try resinator.source_mapping.parseAndRemoveLineCommands(arena, full_input, full_input, .{ .initial_filename = rc_src.src_path });
- defer mapping_results.mappings.deinit(arena);
-
- const final_input = resinator.comments.removeComments(mapping_results.result, mapping_results.result, &mapping_results.mappings);
-
- var output_file = zig_cache_tmp_dir.createFile(out_res_path, .{}) catch |err| {
- return comp.failWin32Resource(win32_resource, "failed to create output file '{s}': {s}", .{ out_res_path, @errorName(err) });
- };
- var output_file_closed = false;
- defer if (!output_file_closed) output_file.close();
-
- var diagnostics = resinator.errors.Diagnostics.init(arena);
- defer diagnostics.deinit();
+ // Read depfile and update cache manifest
+ {
+ const dep_basename = std.fs.path.basename(out_dep_path);
+ const dep_file_contents = try zig_cache_tmp_dir.readFileAlloc(arena, dep_basename, 50 * 1024 * 1024);
+ defer arena.free(dep_file_contents);
- var dependencies_list = std.ArrayList([]const u8).init(comp.gpa);
- defer {
- for (dependencies_list.items) |item| {
- comp.gpa.free(item);
+ const value = try std.json.parseFromSliceLeaky(std.json.Value, arena, dep_file_contents, .{});
+ if (value != .array) {
+ return comp.failWin32Resource(win32_resource, "depfile from zig rc has unexpected format", .{});
}
- dependencies_list.deinit();
- }
- var output_buffered_stream = std.io.bufferedWriter(output_file.writer());
-
- resinator.compile.compile(arena, final_input, output_buffered_stream.writer(), .{
- .cwd = std.fs.cwd(),
- .diagnostics = &diagnostics,
- .source_mappings = &mapping_results.mappings,
- .dependencies_list = &dependencies_list,
- .system_include_paths = comp.rc_include_dir_list,
- .ignore_include_env_var = true,
- // options
- .extra_include_paths = options.extra_include_paths.items,
- .default_language_id = options.default_language_id,
- .default_code_page = options.default_code_page orelse .windows1252,
- .verbose = options.verbose,
- .null_terminate_string_table_strings = options.null_terminate_string_table_strings,
- .max_string_literal_codepoints = options.max_string_literal_codepoints,
- .silent_duplicate_control_ids = options.silent_duplicate_control_ids,
- .warn_instead_of_error_on_invalid_code_page = options.warn_instead_of_error_on_invalid_code_page,
- }) catch |err| switch (err) {
- error.ParseError, error.CompileError => {
- // Delete the output file on error
- output_file.close();
- output_file_closed = true;
- // Failing to delete is not really a big deal, so swallow any errors
- zig_cache_tmp_dir.deleteFile(out_res_path) catch {
- log.warn("failed to delete '{s}': {s}", .{ out_res_path, @errorName(err) });
- };
- return comp.failWin32ResourceCompile(win32_resource, final_input, &diagnostics, mapping_results.mappings);
- },
- else => |e| return e,
- };
-
- try output_buffered_stream.flush();
-
- for (dependencies_list.items) |dep_file_path| {
- try man.addFilePost(dep_file_path);
- switch (comp.cache_use) {
- .whole => |whole| if (whole.cache_manifest) |whole_cache_manifest| {
- whole.cache_manifest_mutex.lock();
- defer whole.cache_manifest_mutex.unlock();
- try whole_cache_manifest.addFilePost(dep_file_path);
- },
- .incremental => {},
+ for (value.array.items) |element| {
+ if (element != .string) {
+ return comp.failWin32Resource(win32_resource, "depfile from zig rc has unexpected format", .{});
+ }
+ const dep_file_path = element.string;
+ try man.addFilePost(dep_file_path);
+ switch (comp.cache_use) {
+ .whole => |whole| if (whole.cache_manifest) |whole_cache_manifest| {
+ whole.cache_manifest_mutex.lock();
+ defer whole.cache_manifest_mutex.unlock();
+ try whole_cache_manifest.addFilePost(dep_file_path);
+ },
+ .incremental => {},
+ }
}
+ // Just to save disk space, we delete the file because it is never needed again.
+ zig_cache_tmp_dir.deleteFile(dep_basename) catch |err| {
+ log.warn("failed to delete '{s}': {s}", .{ out_dep_path, @errorName(err) });
+ };
}
// Rename into place.
@@ -5159,8 +4959,6 @@ fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32
defer o_dir.close();
const tmp_basename = std.fs.path.basename(out_res_path);
try std.fs.rename(zig_cache_tmp_dir, tmp_basename, o_dir, res_filename);
- const tmp_rcpp_basename = std.fs.path.basename(out_rcpp_path);
- try std.fs.rename(zig_cache_tmp_dir, tmp_rcpp_basename, o_dir, rcpp_filename);
break :blk digest;
};
@@ -5186,6 +4984,106 @@ fn updateWin32Resource(comp: *Compilation, win32_resource: *Win32Resource, win32
};
}
+fn spawnZigRc(
+ comp: *Compilation,
+ win32_resource: *Win32Resource,
+ src_basename: []const u8,
+ arena: Allocator,
+ argv: []const []const u8,
+ child_progress_node: *std.Progress.Node,
+) !void {
+ var node_name: std.ArrayListUnmanaged(u8) = .{};
+ defer node_name.deinit(arena);
+
+ var child = std.ChildProcess.init(argv, arena);
+ child.stdin_behavior = .Ignore;
+ child.stdout_behavior = .Pipe;
+ child.stderr_behavior = .Pipe;
+
+ child.spawn() catch |err| {
+ return comp.failWin32Resource(win32_resource, "unable to spawn {s} rc: {s}", .{ argv[0], @errorName(err) });
+ };
+
+ var poller = std.io.poll(comp.gpa, enum { stdout }, .{
+ .stdout = child.stdout.?,
+ });
+ defer poller.deinit();
+
+ const stdout = poller.fifo(.stdout);
+
+ poll: while (true) {
+ while (stdout.readableLength() < @sizeOf(std.zig.Server.Message.Header)) {
+ if (!(try poller.poll())) break :poll;
+ }
+ const header = stdout.reader().readStruct(std.zig.Server.Message.Header) catch unreachable;
+ while (stdout.readableLength() < header.bytes_len) {
+ if (!(try poller.poll())) break :poll;
+ }
+ const body = stdout.readableSliceOfLen(header.bytes_len);
+
+ switch (header.tag) {
+ // We expect exactly one ErrorBundle, and if any error_bundle header is
+ // sent then it's a fatal error.
+ .error_bundle => {
+ const EbHdr = std.zig.Server.Message.ErrorBundle;
+ const eb_hdr = @as(*align(1) const EbHdr, @ptrCast(body));
+ const extra_bytes =
+ body[@sizeOf(EbHdr)..][0 .. @sizeOf(u32) * eb_hdr.extra_len];
+ const string_bytes =
+ body[@sizeOf(EbHdr) + extra_bytes.len ..][0..eb_hdr.string_bytes_len];
+ const unaligned_extra = std.mem.bytesAsSlice(u32, extra_bytes);
+ const extra_array = try comp.gpa.alloc(u32, unaligned_extra.len);
+ @memcpy(extra_array, unaligned_extra);
+ const error_bundle = std.zig.ErrorBundle{
+ .string_bytes = try comp.gpa.dupe(u8, string_bytes),
+ .extra = extra_array,
+ };
+ return comp.failWin32ResourceWithOwnedBundle(win32_resource, error_bundle);
+ },
+ .progress => {
+ node_name.clearRetainingCapacity();
+ // <resinator> is a special string that indicates that the child
+ // process has reached resinator's main function
+ if (std.mem.eql(u8, body, "<resinator>")) {
+ child_progress_node.setName(src_basename);
+ }
+ // Ignore 0-length strings since if multiple zig rc commands
+ // are executed at the same time, only one will send progress strings
+ // while the other(s) will send empty strings.
+ else if (body.len > 0) {
+ try node_name.appendSlice(arena, "build 'zig rc'... ");
+ try node_name.appendSlice(arena, body);
+ child_progress_node.setName(node_name.items);
+ }
+ },
+ else => {}, // ignore other messages
+ }
+
+ stdout.discard(body.len);
+ }
+
+ // Just in case there's a failure that didn't send an ErrorBundle (e.g. an error return trace)
+ const stderr_reader = child.stderr.?.reader();
+ const stderr = try stderr_reader.readAllAlloc(arena, 10 * 1024 * 1024);
+
+ const term = child.wait() catch |err| {
+ return comp.failWin32Resource(win32_resource, "unable to wait for {s} rc: {s}", .{ argv[0], @errorName(err) });
+ };
+
+ switch (term) {
+ .Exited => |code| {
+ if (code != 0) {
+ log.err("zig rc failed with stderr:\n{s}", .{stderr});
+ return comp.failWin32Resource(win32_resource, "zig rc exited with code {d}", .{code});
+ }
+ },
+ else => {
+ log.err("zig rc terminated with stderr:\n{s}", .{stderr});
+ return comp.failWin32Resource(win32_resource, "zig rc terminated unexpectedly", .{});
+ },
+ }
+}
+
pub fn tmpFilePath(comp: *Compilation, ally: Allocator, suffix: []const u8) error{OutOfMemory}![]const u8 {
const s = std.fs.path.sep_str;
const rand_int = std.crypto.random.int(u64);
@@ -5352,16 +5250,9 @@ pub fn addCCArgs(
try argv.append("-isystem");
try argv.append(c_headers_dir);
- if (ext == .rc) {
- for (comp.rc_include_dir_list) |include_dir| {
- try argv.append("-isystem");
- try argv.append(include_dir);
- }
- } else {
- for (comp.libc_include_dir_list) |include_dir| {
- try argv.append("-isystem");
- try argv.append(include_dir);
- }
+ for (comp.libc_include_dir_list) |include_dir| {
+ try argv.append("-isystem");
+ try argv.append(include_dir);
}
if (target.cpu.model.llvm_name) |llvm_name| {
@@ -5726,167 +5617,6 @@ fn failWin32ResourceWithOwnedBundle(
return error.AnalysisFail;
}
-fn failWin32ResourceCli(
- comp: *Compilation,
- win32_resource: *Win32Resource,
- diagnostics: *resinator.cli.Diagnostics,
-) SemaError {
- @setCold(true);
-
- var bundle: ErrorBundle.Wip = undefined;
- try bundle.init(comp.gpa);
- errdefer bundle.deinit();
-
- try bundle.addRootErrorMessage(.{
- .msg = try bundle.addString("invalid command line option(s)"),
- .src_loc = try bundle.addSourceLocation(.{
- .src_path = try bundle.addString(switch (win32_resource.src) {
- .rc => |rc_src| rc_src.src_path,
- .manifest => |manifest_src| manifest_src,
- }),
- .line = 0,
- .column = 0,
- .span_start = 0,
- .span_main = 0,
- .span_end = 0,
- }),
- });
-
- var cur_err: ?ErrorBundle.ErrorMessage = null;
- var cur_notes: std.ArrayListUnmanaged(ErrorBundle.ErrorMessage) = .{};
- defer cur_notes.deinit(comp.gpa);
- for (diagnostics.errors.items) |err_details| {
- switch (err_details.type) {
- .err => {
- if (cur_err) |err| {
- try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items);
- }
- cur_err = .{
- .msg = try bundle.addString(err_details.msg.items),
- };
- cur_notes.clearRetainingCapacity();
- },
- .warning => cur_err = null,
- .note => {
- if (cur_err == null) continue;
- cur_err.?.notes_len += 1;
- try cur_notes.append(comp.gpa, .{
- .msg = try bundle.addString(err_details.msg.items),
- });
- },
- }
- }
- if (cur_err) |err| {
- try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items);
- }
-
- const finished_bundle = try bundle.toOwnedBundle("");
- return comp.failWin32ResourceWithOwnedBundle(win32_resource, finished_bundle);
-}
-
-fn failWin32ResourceCompile(
- comp: *Compilation,
- win32_resource: *Win32Resource,
- source: []const u8,
- diagnostics: *resinator.errors.Diagnostics,
- opt_mappings: ?resinator.source_mapping.SourceMappings,
-) SemaError {
- @setCold(true);
-
- var bundle: ErrorBundle.Wip = undefined;
- try bundle.init(comp.gpa);
- errdefer bundle.deinit();
-
- var msg_buf: std.ArrayListUnmanaged(u8) = .{};
- defer msg_buf.deinit(comp.gpa);
- var cur_err: ?ErrorBundle.ErrorMessage = null;
- var cur_notes: std.ArrayListUnmanaged(ErrorBundle.ErrorMessage) = .{};
- defer cur_notes.deinit(comp.gpa);
- for (diagnostics.errors.items) |err_details| {
- switch (err_details.type) {
- .hint => continue,
- // Clear the current error so that notes don't bleed into unassociated errors
- .warning => {
- cur_err = null;
- continue;
- },
- .note => if (cur_err == null) continue,
- .err => {},
- }
- const err_line, const err_filename = blk: {
- if (opt_mappings) |mappings| {
- const corresponding_span = mappings.get(err_details.token.line_number);
- const corresponding_file = mappings.files.get(corresponding_span.filename_offset);
- const err_line = corresponding_span.start_line;
- break :blk .{ err_line, corresponding_file };
- } else {
- break :blk .{ err_details.token.line_number, "<generated rc>" };
- }
- };
-
- const source_line_start = err_details.token.getLineStart(source);
- const column = err_details.token.calculateColumn(source, 1, source_line_start);
-
- msg_buf.clearRetainingCapacity();
- try err_details.render(msg_buf.writer(comp.gpa), source, diagnostics.strings.items);
-
- const src_loc = src_loc: {
- var src_loc: ErrorBundle.SourceLocation = .{
- .src_path = try bundle.addString(err_filename),
- .line = @intCast(err_line - 1), // 1-based -> 0-based
- .column = @intCast(column),
- .span_start = 0,
- .span_main = 0,
- .span_end = 0,
- };
- if (err_details.print_source_line) {
- const source_line = err_details.token.getLine(source, source_line_start);
- const visual_info = err_details.visualTokenInfo(source_line_start, source_line_start + source_line.len);
- src_loc.span_start = @intCast(visual_info.point_offset - visual_info.before_len);
- src_loc.span_main = @intCast(visual_info.point_offset);
- src_loc.span_end = @intCast(visual_info.point_offset + 1 + visual_info.after_len);
- src_loc.source_line = try bundle.addString(source_line);
- }
- break :src_loc try bundle.addSourceLocation(src_loc);
- };
-
- switch (err_details.type) {
- .err => {
- if (cur_err) |err| {
- try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items);
- }
- cur_err = .{
- .msg = try bundle.addString(msg_buf.items),
- .src_loc = src_loc,
- };
- cur_notes.clearRetainingCapacity();
- },
- .note => {
- cur_err.?.notes_len += 1;
- try cur_notes.append(comp.gpa, .{
- .msg = try bundle.addString(msg_buf.items),
- .src_loc = src_loc,
- });
- },
- .warning, .hint => unreachable,
- }
- }
- if (cur_err) |err| {
- try win32ResourceFlushErrorMessage(&bundle, err, cur_notes.items);
- }
-
- const finished_bundle = try bundle.toOwnedBundle("");
- return comp.failWin32ResourceWithOwnedBundle(win32_resource, finished_bundle);
-}
-
-fn win32ResourceFlushErrorMessage(wip: *ErrorBundle.Wip, msg: ErrorBundle.ErrorMessage, notes: []const ErrorBundle.ErrorMessage) !void {
- try wip.addRootErrorMessage(msg);
- const notes_start = try wip.reserveNotes(@intCast(notes.len));
- for (notes_start.., notes) |i, note| {
- wip.extra.items[i] = @intFromEnum(wip.addErrorMessageAssumeCapacity(note));
- }
-}
-
pub const FileExt = enum {
c,
cpp,
diff --git a/src/main.zig b/src/main.zig
@@ -291,7 +291,14 @@ fn mainArgs(gpa: Allocator, arena: Allocator, args: []const []const u8) !void {
} else if (mem.eql(u8, cmd, "translate-c")) {
return buildOutputType(gpa, arena, args, .translate_c);
} else if (mem.eql(u8, cmd, "rc")) {
- return cmdRc(gpa, arena, args[1..]);
+ const use_server = cmd_args.len > 0 and std.mem.eql(u8, cmd_args[0], "--zig-integration");
+ return jitCmd(gpa, arena, cmd_args, .{
+ .cmd_name = "resinator",
+ .root_src_path = "resinator/main.zig",
+ .depend_on_aro = true,
+ .prepend_zig_lib_dir_path = true,
+ .server = use_server,
+ });
} else if (mem.eql(u8, cmd, "fmt")) {
return jitCmd(gpa, arena, cmd_args, .{
.cmd_name = "fmt",
@@ -4625,276 +4632,6 @@ fn cmdTranslateC(comp: *Compilation, arena: Allocator, fancy_output: ?*Compilati
}
}
-fn cmdRc(gpa: Allocator, arena: Allocator, args: []const []const u8) !void {
- const resinator = @import("resinator.zig");
-
- const stderr = std.io.getStdErr();
- const stderr_config = std.io.tty.detectConfig(stderr);
-
- var options = options: {
- var cli_diagnostics = resinator.cli.Diagnostics.init(gpa);
- defer cli_diagnostics.deinit();
- var options = resinator.cli.parse(gpa, args, &cli_diagnostics) catch |err| switch (err) {
- error.ParseError => {
- cli_diagnostics.renderToStdErr(args, stderr_config);
- process.exit(1);
- },
- else => |e| return e,
- };
- try options.maybeAppendRC(std.fs.cwd());
-
- // print any warnings/notes
- cli_diagnostics.renderToStdErr(args, stderr_config);
- // If there was something printed, then add an extra newline separator
- // so that there is a clear separation between the cli diagnostics and whatever
- // gets printed after
- if (cli_diagnostics.errors.items.len > 0) {
- std.debug.print("\n", .{});
- }
- break :options options;
- };
- defer options.deinit();
-
- if (options.print_help_and_exit) {
- try resinator.cli.writeUsage(stderr.writer(), "zig rc");
- return;
- }
-
- const stdout_writer = std.io.getStdOut().writer();
- if (options.verbose) {
- try options.dumpVerbose(stdout_writer);
- try stdout_writer.writeByte('\n');
- }
-
- const full_input = full_input: {
- if (options.preprocess != .no) {
- if (!build_options.have_llvm) {
- fatal("clang not available: compiler built without LLVM extensions", .{});
- }
-
- var argv = std.ArrayList([]const u8).init(gpa);
- defer argv.deinit();
-
- const self_exe_path = try introspect.findZigExePath(arena);
- var zig_lib_directory = introspect.findZigLibDirFromSelfExe(arena, self_exe_path) catch |err| {
- try resinator.utils.renderErrorMessage(stderr.writer(), stderr_config, .err, "unable to find zig installation directory: {s}", .{@errorName(err)});
- process.exit(1);
- };
- defer zig_lib_directory.handle.close();
-
- const include_args = detectRcIncludeDirs(arena, zig_lib_directory.path.?, options.auto_includes) catch |err| {
- try resinator.utils.renderErrorMessage(stderr.writer(), stderr_config, .err, "unable to detect system include directories: {s}", .{@errorName(err)});
- process.exit(1);
- };
-
- try argv.appendSlice(&[_][]const u8{ self_exe_path, "clang" });
-
- const clang_target = clang_target: {
- if (include_args.target_abi) |abi| {
- break :clang_target try std.fmt.allocPrint(arena, "x86_64-unknown-windows-{s}", .{abi});
- }
- break :clang_target "x86_64-unknown-windows";
- };
- try resinator.preprocess.appendClangArgs(arena, &argv, options, .{
- .clang_target = clang_target,
- .system_include_paths = include_args.include_paths,
- .needs_gnu_workaround = if (include_args.target_abi) |abi| std.mem.eql(u8, abi, "gnu") else false,
- .nostdinc = true,
- });
-
- try argv.append(options.input_filename);
-
- if (options.verbose) {
- try stdout_writer.writeAll("Preprocessor: zig clang\n");
- for (argv.items[0 .. argv.items.len - 1]) |arg| {
- try stdout_writer.print("{s} ", .{arg});
- }
- try stdout_writer.print("{s}\n\n", .{argv.items[argv.items.len - 1]});
- }
-
- if (process.can_spawn) {
- const result = std.ChildProcess.run(.{
- .allocator = gpa,
- .argv = argv.items,
- .max_output_bytes = std.math.maxInt(u32),
- }) catch |err| {
- try resinator.utils.renderErrorMessage(stderr.writer(), stderr_config, .err, "unable to spawn preprocessor child process: {s}", .{@errorName(err)});
- process.exit(1);
- };
- errdefer gpa.free(result.stdout);
- defer gpa.free(result.stderr);
-
- switch (result.term) {
- .Exited => |code| {
- if (code != 0) {
- try resinator.utils.renderErrorMessage(stderr.writer(), stderr_config, .err, "the preprocessor failed with exit code {}:", .{code});
- try stderr.writeAll(result.stderr);
- try stderr.writeAll("\n");
- process.exit(1);
- }
- },
- .Signal, .Stopped, .Unknown => {
- try resinator.utils.renderErrorMessage(stderr.writer(), stderr_config, .err, "the preprocessor terminated unexpectedly ({s}):", .{@tagName(result.term)});
- try stderr.writeAll(result.stderr);
- try stderr.writeAll("\n");
- process.exit(1);
- },
- }
-
- break :full_input result.stdout;
- } else {
- // need to use an intermediate file
- const rand_int = std.crypto.random.int(u64);
- const preprocessed_path = try std.fmt.allocPrint(gpa, "resinator{x}.rcpp", .{rand_int});
- defer gpa.free(preprocessed_path);
- defer std.fs.cwd().deleteFile(preprocessed_path) catch {};
-
- try argv.appendSlice(&.{ "-o", preprocessed_path });
- const exit_code = try clangMain(arena, argv.items);
- if (exit_code != 0) {
- try resinator.utils.renderErrorMessage(stderr.writer(), stderr_config, .err, "the preprocessor failed with exit code {}:", .{exit_code});
- process.exit(1);
- }
- break :full_input std.fs.cwd().readFileAlloc(gpa, preprocessed_path, std.math.maxInt(usize)) catch |err| {
- try resinator.utils.renderErrorMessage(stderr.writer(), stderr_config, .err, "unable to read preprocessed file path '{s}': {s}", .{ preprocessed_path, @errorName(err) });
- process.exit(1);
- };
- }
- } else {
- break :full_input std.fs.cwd().readFileAlloc(gpa, options.input_filename, std.math.maxInt(usize)) catch |err| {
- try resinator.utils.renderErrorMessage(stderr.writer(), stderr_config, .err, "unable to read input file path '{s}': {s}", .{ options.input_filename, @errorName(err) });
- process.exit(1);
- };
- }
- };
- defer gpa.free(full_input);
-
- if (options.preprocess == .only) {
- std.fs.cwd().writeFile(options.output_filename, full_input) catch |err| {
- try resinator.utils.renderErrorMessage(stderr.writer(), stderr_config, .err, "unable to write output file '{s}': {s}", .{ options.output_filename, @errorName(err) });
- process.exit(1);
- };
- return cleanExit();
- }
-
- var mapping_results = try resinator.source_mapping.parseAndRemoveLineCommands(gpa, full_input, full_input, .{ .initial_filename = options.input_filename });
- defer mapping_results.mappings.deinit(gpa);
-
- const final_input = resinator.comments.removeComments(mapping_results.result, mapping_results.result, &mapping_results.mappings);
-
- var output_file = std.fs.cwd().createFile(options.output_filename, .{}) catch |err| {
- try resinator.utils.renderErrorMessage(stderr.writer(), stderr_config, .err, "unable to create output file '{s}': {s}", .{ options.output_filename, @errorName(err) });
- process.exit(1);
- };
- var output_file_closed = false;
- defer if (!output_file_closed) output_file.close();
-
- var diagnostics = resinator.errors.Diagnostics.init(gpa);
- defer diagnostics.deinit();
-
- var output_buffered_stream = std.io.bufferedWriter(output_file.writer());
-
- resinator.compile.compile(gpa, final_input, output_buffered_stream.writer(), .{
- .cwd = std.fs.cwd(),
- .diagnostics = &diagnostics,
- .source_mappings = &mapping_results.mappings,
- .dependencies_list = null,
- .ignore_include_env_var = options.ignore_include_env_var,
- .extra_include_paths = options.extra_include_paths.items,
- .default_language_id = options.default_language_id,
- .default_code_page = options.default_code_page orelse .windows1252,
- .verbose = options.verbose,
- .null_terminate_string_table_strings = options.null_terminate_string_table_strings,
- .max_string_literal_codepoints = options.max_string_literal_codepoints,
- .silent_duplicate_control_ids = options.silent_duplicate_control_ids,
- .warn_instead_of_error_on_invalid_code_page = options.warn_instead_of_error_on_invalid_code_page,
- }) catch |err| switch (err) {
- error.ParseError, error.CompileError => {
- diagnostics.renderToStdErr(std.fs.cwd(), final_input, stderr_config, mapping_results.mappings);
- // Delete the output file on error
- output_file.close();
- output_file_closed = true;
- // Failing to delete is not really a big deal, so swallow any errors
- std.fs.cwd().deleteFile(options.output_filename) catch {};
- process.exit(1);
- },
- else => |e| return e,
- };
-
- try output_buffered_stream.flush();
-
- // print any warnings/notes
- diagnostics.renderToStdErr(std.fs.cwd(), final_input, stderr_config, mapping_results.mappings);
-
- return cleanExit();
-}
-
-const RcIncludeArgs = struct {
- include_paths: []const []const u8 = &.{},
- target_abi: ?[]const u8 = null,
-};
-
-fn detectRcIncludeDirs(arena: Allocator, zig_lib_dir: []const u8, auto_includes: @import("resinator.zig").cli.Options.AutoIncludes) !RcIncludeArgs {
- if (auto_includes == .none) return .{};
- var cur_includes = auto_includes;
- if (builtin.target.os.tag != .windows) {
- switch (cur_includes) {
- // MSVC can't be found when the host isn't Windows, so short-circuit.
- .msvc => return error.WindowsSdkNotFound,
- // Skip straight to gnu since we won't be able to detect MSVC on non-Windows hosts.
- .any => cur_includes = .gnu,
- .gnu => {},
- .none => unreachable,
- }
- }
- while (true) {
- switch (cur_includes) {
- .any, .msvc => {
- const target_query: std.Target.Query = .{
- .os_tag = .windows,
- .abi = .msvc,
- };
- const target = std.zig.resolveTargetQueryOrFatal(target_query);
- const is_native_abi = target_query.isNativeAbi();
- const detected_libc = std.zig.LibCDirs.detect(arena, zig_lib_dir, target, is_native_abi, true, null) catch |err| {
- if (cur_includes == .any) {
- // fall back to mingw
- cur_includes = .gnu;
- continue;
- }
- return err;
- };
- if (detected_libc.libc_include_dir_list.len == 0) {
- if (cur_includes == .any) {
- // fall back to mingw
- cur_includes = .gnu;
- continue;
- }
- return error.WindowsSdkNotFound;
- }
- return .{
- .include_paths = detected_libc.libc_include_dir_list,
- .target_abi = "msvc",
- };
- },
- .gnu => {
- const target_query: std.Target.Query = .{
- .os_tag = .windows,
- .abi = .gnu,
- };
- const target = std.zig.resolveTargetQueryOrFatal(target_query);
- const is_native_abi = target_query.isNativeAbi();
- const detected_libc = try std.zig.LibCDirs.detect(arena, zig_lib_dir, target, is_native_abi, true, null);
- return .{
- .include_paths = detected_libc.libc_include_dir_list,
- .target_abi = "gnu",
- };
- },
- .none => unreachable,
- }
- }
-}
-
const usage_init =
\\Usage: zig init
\\
@@ -5569,6 +5306,8 @@ const JitCmdOptions = struct {
prepend_zig_exe_path: bool = false,
depend_on_aro: bool = false,
capture: ?*[]u8 = null,
+ /// Send progress and error bundles via std.zig.Server over stdout
+ server: bool = false,
};
fn jitCmd(
@@ -5714,10 +5453,52 @@ fn jitCmd(
};
defer comp.destroy();
- updateModule(comp, color) catch |err| switch (err) {
- error.SemanticAnalyzeFail => process.exit(2),
- else => |e| return e,
- };
+ if (options.server and !builtin.single_threaded) {
+ var reset: std.Thread.ResetEvent = .{};
+ var progress: std.Progress = .{
+ .terminal = null,
+ .root = .{
+ .context = undefined,
+ .parent = null,
+ .name = "",
+ .unprotected_estimated_total_items = 0,
+ .unprotected_completed_items = 0,
+ },
+ .columns_written = 0,
+ .prev_refresh_timestamp = 0,
+ .timer = null,
+ .done = false,
+ };
+ const main_progress_node = &progress.root;
+ main_progress_node.context = &progress;
+ var server = std.zig.Server{
+ .out = std.io.getStdOut(),
+ .in = undefined, // won't be receiving messages
+ .receive_fifo = undefined, // won't be receiving messages
+ };
+
+ var progress_thread = try std.Thread.spawn(.{}, progressThread, .{
+ &progress, &server, &reset,
+ });
+ defer {
+ reset.set();
+ progress_thread.join();
+ }
+
+ try comp.update(main_progress_node);
+
+ var error_bundle = try comp.getAllErrorsAlloc();
+ defer error_bundle.deinit(comp.gpa);
+ if (error_bundle.errorMessageCount() > 0) {
+ try server.serveErrorBundle(error_bundle);
+ process.exit(2);
+ }
+ } else {
+ updateModule(comp, color) catch |err| switch (err) {
+ error.SemanticAnalyzeFail => process.exit(2),
+ else => |e| return e,
+ };
+ }
const exe_path = try global_cache_directory.join(arena, &.{comp.cache_use.whole.bin_sub_path.?});
child_argv.appendAssumeCapacity(exe_path);
diff --git a/src/resinator.zig b/src/resinator.zig
@@ -1,25 +0,0 @@
-comptime {
- if (@import("build_options").only_core_functionality) {
- @compileError("resinator included in only_core_functionality build");
- }
-}
-
-pub const ani = @import("resinator/ani.zig");
-pub const ast = @import("resinator/ast.zig");
-pub const bmp = @import("resinator/bmp.zig");
-pub const cli = @import("resinator/cli.zig");
-pub const code_pages = @import("resinator/code_pages.zig");
-pub const comments = @import("resinator/comments.zig");
-pub const compile = @import("resinator/compile.zig");
-pub const errors = @import("resinator/errors.zig");
-pub const ico = @import("resinator/ico.zig");
-pub const lang = @import("resinator/lang.zig");
-pub const lex = @import("resinator/lex.zig");
-pub const literals = @import("resinator/literals.zig");
-pub const parse = @import("resinator/parse.zig");
-pub const preprocess = @import("resinator/preprocess.zig");
-pub const rc = @import("resinator/rc.zig");
-pub const res = @import("resinator/res.zig");
-pub const source_mapping = @import("resinator/source_mapping.zig");
-pub const utils = @import("resinator/utils.zig");
-pub const windows1252 = @import("resinator/windows1252.zig");
diff --git a/src/resinator/cli.zig b/src/resinator/cli.zig
@@ -1,1439 +0,0 @@
-const std = @import("std");
-const CodePage = @import("code_pages.zig").CodePage;
-const lang = @import("lang.zig");
-const res = @import("res.zig");
-const Allocator = std.mem.Allocator;
-const lex = @import("lex.zig");
-
-/// This is what /SL 100 will set the maximum string literal length to
-pub const max_string_literal_length_100_percent = 8192;
-
-pub const usage_string_after_command_name =
- \\ [options] [--] <INPUT> [<OUTPUT>]
- \\
- \\The sequence -- can be used to signify when to stop parsing options.
- \\This is necessary when the input path begins with a forward slash.
- \\
- \\Supported Win32 RC Options:
- \\ /?, /h Print this help and exit.
- \\ /v Verbose (print progress messages).
- \\ /d <name>[=<value>] Define a symbol (during preprocessing).
- \\ /u <name> Undefine a symbol (during preprocessing).
- \\ /fo <value> Specify output file path.
- \\ /l <value> Set default language using hexadecimal id (ex: 409).
- \\ /ln <value> Set default language using language name (ex: en-us).
- \\ /i <value> Add an include path.
- \\ /x Ignore INCLUDE environment variable.
- \\ /c <value> Set default code page (ex: 65001).
- \\ /w Warn on invalid code page in .rc (instead of error).
- \\ /y Suppress warnings for duplicate control IDs.
- \\ /n Null-terminate all strings in string tables.
- \\ /sl <value> Specify string literal length limit in percentage (1-100)
- \\ where 100 corresponds to a limit of 8192. If the /sl
- \\ option is not specified, the default limit is 4097.
- \\ /p Only run the preprocessor and output a .rcpp file.
- \\
- \\No-op Win32 RC Options:
- \\ /nologo, /a, /r Options that are recognized but do nothing.
- \\
- \\Unsupported Win32 RC Options:
- \\ /fm, /q, /g, /gn, /g1, /g2 Unsupported MUI-related options.
- \\ /?c, /hc, /t, /tp:<prefix>, Unsupported LCX/LCE-related options.
- \\ /tn, /tm, /tc, /tw, /te,
- \\ /ti, /ta
- \\ /z Unsupported font-substitution-related option.
- \\ /s Unsupported HWB-related option.
- \\
- \\Custom Options (resinator-specific):
- \\ /:no-preprocess Do not run the preprocessor.
- \\ /:debug Output the preprocessed .rc file and the parsed AST.
- \\ /:auto-includes <value> Set the automatic include path detection behavior.
- \\ any (default) Use MSVC if available, fall back to MinGW
- \\ msvc Use MSVC include paths (must be present on the system)
- \\ gnu Use MinGW include paths (requires Zig as the preprocessor)
- \\ none Do not use any autodetected include paths
- \\
- \\Note: For compatibility reasons, all custom options start with :
- \\
-;
-
-pub fn writeUsage(writer: anytype, command_name: []const u8) !void {
- try writer.writeAll("Usage: ");
- try writer.writeAll(command_name);
- try writer.writeAll(usage_string_after_command_name);
-}
-
-pub const Diagnostics = struct {
- errors: std.ArrayListUnmanaged(ErrorDetails) = .{},
- allocator: Allocator,
-
- pub const ErrorDetails = struct {
- arg_index: usize,
- arg_span: ArgSpan = .{},
- msg: std.ArrayListUnmanaged(u8) = .{},
- type: Type = .err,
- print_args: bool = true,
-
- pub const Type = enum { err, warning, note };
- pub const ArgSpan = struct {
- point_at_next_arg: bool = false,
- name_offset: usize = 0,
- prefix_len: usize = 0,
- value_offset: usize = 0,
- name_len: usize = 0,
- };
- };
-
- pub fn init(allocator: Allocator) Diagnostics {
- return .{
- .allocator = allocator,
- };
- }
-
- pub fn deinit(self: *Diagnostics) void {
- for (self.errors.items) |*details| {
- details.msg.deinit(self.allocator);
- }
- self.errors.deinit(self.allocator);
- }
-
- pub fn append(self: *Diagnostics, error_details: ErrorDetails) !void {
- try self.errors.append(self.allocator, error_details);
- }
-
- pub fn renderToStdErr(self: *Diagnostics, args: []const []const u8, config: std.io.tty.Config) void {
- std.debug.getStderrMutex().lock();
- defer std.debug.getStderrMutex().unlock();
- const stderr = std.io.getStdErr().writer();
- self.renderToWriter(args, stderr, config) catch return;
- }
-
- pub fn renderToWriter(self: *Diagnostics, args: []const []const u8, writer: anytype, config: std.io.tty.Config) !void {
- for (self.errors.items) |err_details| {
- try renderErrorMessage(writer, config, err_details, args);
- }
- }
-
- pub fn hasError(self: *const Diagnostics) bool {
- for (self.errors.items) |err| {
- if (err.type == .err) return true;
- }
- return false;
- }
-};
-
-pub const Options = struct {
- allocator: Allocator,
- input_filename: []const u8 = &[_]u8{},
- output_filename: []const u8 = &[_]u8{},
- extra_include_paths: std.ArrayListUnmanaged([]const u8) = .{},
- ignore_include_env_var: bool = false,
- preprocess: Preprocess = .yes,
- default_language_id: ?u16 = null,
- default_code_page: ?CodePage = null,
- verbose: bool = false,
- symbols: std.StringArrayHashMapUnmanaged(SymbolValue) = .{},
- null_terminate_string_table_strings: bool = false,
- max_string_literal_codepoints: u15 = lex.default_max_string_literal_codepoints,
- silent_duplicate_control_ids: bool = false,
- warn_instead_of_error_on_invalid_code_page: bool = false,
- debug: bool = false,
- print_help_and_exit: bool = false,
- auto_includes: AutoIncludes = .any,
-
- pub const AutoIncludes = enum { any, msvc, gnu, none };
- pub const Preprocess = enum { no, yes, only };
- pub const SymbolAction = enum { define, undefine };
- pub const SymbolValue = union(SymbolAction) {
- define: []const u8,
- undefine: void,
-
- pub fn deinit(self: SymbolValue, allocator: Allocator) void {
- switch (self) {
- .define => |value| allocator.free(value),
- .undefine => {},
- }
- }
- };
-
- /// Does not check that identifier contains only valid characters
- pub fn define(self: *Options, identifier: []const u8, value: []const u8) !void {
- if (self.symbols.getPtr(identifier)) |val_ptr| {
- // If the symbol is undefined, then that always takes precedence so
- // we shouldn't change anything.
- if (val_ptr.* == .undefine) return;
- // Otherwise, the new value takes precedence.
- const duped_value = try self.allocator.dupe(u8, value);
- errdefer self.allocator.free(duped_value);
- val_ptr.deinit(self.allocator);
- val_ptr.* = .{ .define = duped_value };
- return;
- }
- const duped_key = try self.allocator.dupe(u8, identifier);
- errdefer self.allocator.free(duped_key);
- const duped_value = try self.allocator.dupe(u8, value);
- errdefer self.allocator.free(duped_value);
- try self.symbols.put(self.allocator, duped_key, .{ .define = duped_value });
- }
-
- /// Does not check that identifier contains only valid characters
- pub fn undefine(self: *Options, identifier: []const u8) !void {
- if (self.symbols.getPtr(identifier)) |action| {
- action.deinit(self.allocator);
- action.* = .{ .undefine = {} };
- return;
- }
- const duped_key = try self.allocator.dupe(u8, identifier);
- errdefer self.allocator.free(duped_key);
- try self.symbols.put(self.allocator, duped_key, .{ .undefine = {} });
- }
-
- /// If the current input filename both:
- /// - does not have an extension, and
- /// - does not exist in the cwd
- /// then this function will append `.rc` to the input filename
- ///
- /// Note: This behavior is different from the Win32 compiler.
- /// It always appends .RC if the filename does not have
- /// a `.` in it and it does not even try the verbatim name
- /// in that scenario.
- ///
- /// The approach taken here is meant to give us a 'best of both
- /// worlds' situation where we'll be compatible with most use-cases
- /// of the .rc extension being omitted from the CLI args, but still
- /// work fine if the file itself does not have an extension.
- pub fn maybeAppendRC(options: *Options, cwd: std.fs.Dir) !void {
- if (std.fs.path.extension(options.input_filename).len == 0) {
- cwd.access(options.input_filename, .{}) catch |err| switch (err) {
- error.FileNotFound => {
- var filename_bytes = try options.allocator.alloc(u8, options.input_filename.len + 3);
- @memcpy(filename_bytes[0 .. filename_bytes.len - 3], options.input_filename);
- @memcpy(filename_bytes[filename_bytes.len - 3 ..], ".rc");
- options.allocator.free(options.input_filename);
- options.input_filename = filename_bytes;
- },
- else => {},
- };
- }
- }
-
- pub fn deinit(self: *Options) void {
- for (self.extra_include_paths.items) |extra_include_path| {
- self.allocator.free(extra_include_path);
- }
- self.extra_include_paths.deinit(self.allocator);
- self.allocator.free(self.input_filename);
- self.allocator.free(self.output_filename);
- var symbol_it = self.symbols.iterator();
- while (symbol_it.next()) |entry| {
- self.allocator.free(entry.key_ptr.*);
- entry.value_ptr.deinit(self.allocator);
- }
- self.symbols.deinit(self.allocator);
- }
-
- pub fn dumpVerbose(self: *const Options, writer: anytype) !void {
- try writer.print("Input filename: {s}\n", .{self.input_filename});
- try writer.print("Output filename: {s}\n", .{self.output_filename});
- if (self.extra_include_paths.items.len > 0) {
- try writer.writeAll(" Extra include paths:\n");
- for (self.extra_include_paths.items) |extra_include_path| {
- try writer.print(" \"{s}\"\n", .{extra_include_path});
- }
- }
- if (self.ignore_include_env_var) {
- try writer.writeAll(" The INCLUDE environment variable will be ignored\n");
- }
- if (self.preprocess == .no) {
- try writer.writeAll(" The preprocessor will not be invoked\n");
- } else if (self.preprocess == .only) {
- try writer.writeAll(" Only the preprocessor will be invoked\n");
- }
- if (self.symbols.count() > 0) {
- try writer.writeAll(" Symbols:\n");
- var it = self.symbols.iterator();
- while (it.next()) |symbol| {
- try writer.print(" {s} {s}", .{ switch (symbol.value_ptr.*) {
- .define => "#define",
- .undefine => "#undef",
- }, symbol.key_ptr.* });
- if (symbol.value_ptr.* == .define) {
- try writer.print(" {s}", .{symbol.value_ptr.define});
- }
- try writer.writeAll("\n");
- }
- }
- if (self.null_terminate_string_table_strings) {
- try writer.writeAll(" Strings in string tables will be null-terminated\n");
- }
- if (self.max_string_literal_codepoints != lex.default_max_string_literal_codepoints) {
- try writer.print(" Max string literal length: {}\n", .{self.max_string_literal_codepoints});
- }
- if (self.silent_duplicate_control_ids) {
- try writer.writeAll(" Duplicate control IDs will not emit warnings\n");
- }
- if (self.silent_duplicate_control_ids) {
- try writer.writeAll(" Invalid code page in .rc will produce a warning (instead of an error)\n");
- }
-
- const language_id = self.default_language_id orelse res.Language.default;
- const language_name = language_name: {
- if (std.meta.intToEnum(lang.LanguageId, language_id)) |lang_enum_val| {
- break :language_name @tagName(lang_enum_val);
- } else |_| {}
- if (language_id == lang.LOCALE_CUSTOM_UNSPECIFIED) {
- break :language_name "LOCALE_CUSTOM_UNSPECIFIED";
- }
- break :language_name "<UNKNOWN>";
- };
- try writer.print("Default language: {s} (id=0x{x})\n", .{ language_name, language_id });
-
- const code_page = self.default_code_page orelse .windows1252;
- try writer.print("Default codepage: {s} (id={})\n", .{ @tagName(code_page), @intFromEnum(code_page) });
- }
-};
-
-pub const Arg = struct {
- prefix: enum { long, short, slash },
- name_offset: usize,
- full: []const u8,
-
- pub fn fromString(str: []const u8) ?@This() {
- if (std.mem.startsWith(u8, str, "--")) {
- return .{ .prefix = .long, .name_offset = 2, .full = str };
- } else if (std.mem.startsWith(u8, str, "-")) {
- return .{ .prefix = .short, .name_offset = 1, .full = str };
- } else if (std.mem.startsWith(u8, str, "/")) {
- return .{ .prefix = .slash, .name_offset = 1, .full = str };
- }
- return null;
- }
-
- pub fn prefixSlice(self: Arg) []const u8 {
- return self.full[0..(if (self.prefix == .long) 2 else 1)];
- }
-
- pub fn name(self: Arg) []const u8 {
- return self.full[self.name_offset..];
- }
-
- pub fn optionWithoutPrefix(self: Arg, option_len: usize) []const u8 {
- return self.name()[0..option_len];
- }
-
- pub fn missingSpan(self: Arg) Diagnostics.ErrorDetails.ArgSpan {
- return .{
- .point_at_next_arg = true,
- .value_offset = 0,
- .name_offset = self.name_offset,
- .prefix_len = self.prefixSlice().len,
- };
- }
-
- pub fn optionAndAfterSpan(self: Arg) Diagnostics.ErrorDetails.ArgSpan {
- return self.optionSpan(0);
- }
-
- pub fn optionSpan(self: Arg, option_len: usize) Diagnostics.ErrorDetails.ArgSpan {
- return .{
- .name_offset = self.name_offset,
- .prefix_len = self.prefixSlice().len,
- .name_len = option_len,
- };
- }
-
- pub const Value = struct {
- slice: []const u8,
- index_increment: u2 = 1,
-
- pub fn argSpan(self: Value, arg: Arg) Diagnostics.ErrorDetails.ArgSpan {
- const prefix_len = arg.prefixSlice().len;
- switch (self.index_increment) {
- 1 => return .{
- .value_offset = @intFromPtr(self.slice.ptr) - @intFromPtr(arg.full.ptr),
- .prefix_len = prefix_len,
- .name_offset = arg.name_offset,
- },
- 2 => return .{
- .point_at_next_arg = true,
- .prefix_len = prefix_len,
- .name_offset = arg.name_offset,
- },
- else => unreachable,
- }
- }
-
- pub fn index(self: Value, arg_index: usize) usize {
- if (self.index_increment == 2) return arg_index + 1;
- return arg_index;
- }
- };
-
- pub fn value(self: Arg, option_len: usize, index: usize, args: []const []const u8) error{MissingValue}!Value {
- const rest = self.full[self.name_offset + option_len ..];
- if (rest.len > 0) return .{ .slice = rest };
- if (index + 1 >= args.len) return error.MissingValue;
- return .{ .slice = args[index + 1], .index_increment = 2 };
- }
-
- pub const Context = struct {
- index: usize,
- arg: Arg,
- value: Value,
- };
-};
-
-pub const ParseError = error{ParseError} || Allocator.Error;
-
-/// Note: Does not run `Options.maybeAppendRC` automatically. If that behavior is desired,
-/// it must be called separately.
-pub fn parse(allocator: Allocator, args: []const []const u8, diagnostics: *Diagnostics) ParseError!Options {
- var options = Options{ .allocator = allocator };
- errdefer options.deinit();
-
- var output_filename: ?[]const u8 = null;
- var output_filename_context: Arg.Context = undefined;
-
- var arg_i: usize = 1; // start at 1 to skip past the exe name
- next_arg: while (arg_i < args.len) {
- var arg = Arg.fromString(args[arg_i]) orelse break;
- if (arg.name().len == 0) {
- switch (arg.prefix) {
- // -- on its own ends arg parsing
- .long => {
- arg_i += 1;
- break;
- },
- // - or / on its own is an error
- else => {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("invalid option: {s}", .{arg.prefixSlice()});
- try diagnostics.append(err_details);
- arg_i += 1;
- continue :next_arg;
- },
- }
- }
-
- while (arg.name().len > 0) {
- const arg_name = arg.name();
- // Note: These cases should be in order from longest to shortest, since
- // shorter options that are a substring of a longer one could make
- // the longer option's branch unreachable.
- if (std.ascii.startsWithIgnoreCase(arg_name, ":no-preprocess")) {
- options.preprocess = .no;
- arg.name_offset += ":no-preprocess".len;
- } else if (std.ascii.startsWithIgnoreCase(arg_name, ":auto-includes")) {
- const value = arg.value(":auto-includes".len, arg_i, args) catch {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(":auto-includes".len) });
- try diagnostics.append(err_details);
- arg_i += 1;
- break :next_arg;
- };
- options.auto_includes = std.meta.stringToEnum(Options.AutoIncludes, value.slice) orelse blk: {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("invalid auto includes setting: {s} ", .{value.slice});
- try diagnostics.append(err_details);
- break :blk options.auto_includes;
- };
- arg_i += value.index_increment;
- continue :next_arg;
- } else if (std.ascii.startsWithIgnoreCase(arg_name, "nologo")) {
- // No-op, we don't display any 'logo' to suppress
- arg.name_offset += "nologo".len;
- } else if (std.ascii.startsWithIgnoreCase(arg_name, ":debug")) {
- options.debug = true;
- arg.name_offset += ":debug".len;
- }
- // Unsupported LCX/LCE options that need a value (within the same arg only)
- else if (std.ascii.startsWithIgnoreCase(arg_name, "tp:")) {
- const rest = arg.full[arg.name_offset + 3 ..];
- if (rest.len == 0) {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = .{
- .name_offset = arg.name_offset,
- .prefix_len = arg.prefixSlice().len,
- .value_offset = arg.name_offset + 3,
- } };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("missing value for {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(3) });
- try diagnostics.append(err_details);
- }
- var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(3) });
- try diagnostics.append(err_details);
- arg_i += 1;
- continue :next_arg;
- }
- // Unsupported LCX/LCE options that need a value
- else if (std.ascii.startsWithIgnoreCase(arg_name, "tn")) {
- const value = arg.value(2, arg_i, args) catch no_value: {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) });
- try diagnostics.append(err_details);
- // dummy zero-length slice starting where the value would have been
- const value_start = arg.name_offset + 2;
- break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] };
- };
- var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) });
- try diagnostics.append(err_details);
- arg_i += value.index_increment;
- continue :next_arg;
- }
- // Unsupported MUI options that need a value
- else if (std.ascii.startsWithIgnoreCase(arg_name, "fm") or
- std.ascii.startsWithIgnoreCase(arg_name, "gn") or
- std.ascii.startsWithIgnoreCase(arg_name, "g2"))
- {
- const value = arg.value(2, arg_i, args) catch no_value: {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) });
- try diagnostics.append(err_details);
- // dummy zero-length slice starting where the value would have been
- const value_start = arg.name_offset + 2;
- break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] };
- };
- var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) });
- try diagnostics.append(err_details);
- arg_i += value.index_increment;
- continue :next_arg;
- }
- // Unsupported MUI options that do not need a value
- else if (std.ascii.startsWithIgnoreCase(arg_name, "g1")) {
- var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionSpan(2) };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) });
- try diagnostics.append(err_details);
- arg.name_offset += 2;
- }
- // Unsupported LCX/LCE options that do not need a value
- else if (std.ascii.startsWithIgnoreCase(arg_name, "tm") or
- std.ascii.startsWithIgnoreCase(arg_name, "tc") or
- std.ascii.startsWithIgnoreCase(arg_name, "tw") or
- std.ascii.startsWithIgnoreCase(arg_name, "te") or
- std.ascii.startsWithIgnoreCase(arg_name, "ti") or
- std.ascii.startsWithIgnoreCase(arg_name, "ta"))
- {
- var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionSpan(2) };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) });
- try diagnostics.append(err_details);
- arg.name_offset += 2;
- } else if (std.ascii.startsWithIgnoreCase(arg_name, "fo")) {
- const value = arg.value(2, arg_i, args) catch {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("missing output path after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) });
- try diagnostics.append(err_details);
- arg_i += 1;
- break :next_arg;
- };
- output_filename_context = .{ .index = arg_i, .arg = arg, .value = value };
- output_filename = value.slice;
- arg_i += value.index_increment;
- continue :next_arg;
- } else if (std.ascii.startsWithIgnoreCase(arg_name, "sl")) {
- const value = arg.value(2, arg_i, args) catch {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("missing language tag after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) });
- try diagnostics.append(err_details);
- arg_i += 1;
- break :next_arg;
- };
- const percent_str = value.slice;
- const percent: u32 = parsePercent(percent_str) catch {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("invalid percent format '{s}'", .{percent_str});
- try diagnostics.append(err_details);
- var note_details = Diagnostics.ErrorDetails{ .type = .note, .print_args = false, .arg_index = arg_i };
- var note_writer = note_details.msg.writer(allocator);
- try note_writer.writeAll("string length percent must be an integer between 1 and 100 (inclusive)");
- try diagnostics.append(note_details);
- arg_i += value.index_increment;
- continue :next_arg;
- };
- if (percent == 0 or percent > 100) {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("percent out of range: {} (parsed from '{s}')", .{ percent, percent_str });
- try diagnostics.append(err_details);
- var note_details = Diagnostics.ErrorDetails{ .type = .note, .print_args = false, .arg_index = arg_i };
- var note_writer = note_details.msg.writer(allocator);
- try note_writer.writeAll("string length percent must be an integer between 1 and 100 (inclusive)");
- try diagnostics.append(note_details);
- arg_i += value.index_increment;
- continue :next_arg;
- }
- const percent_float = @as(f32, @floatFromInt(percent)) / 100;
- options.max_string_literal_codepoints = @intFromFloat(percent_float * max_string_literal_length_100_percent);
- arg_i += value.index_increment;
- continue :next_arg;
- } else if (std.ascii.startsWithIgnoreCase(arg_name, "ln")) {
- const value = arg.value(2, arg_i, args) catch {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("missing language tag after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(2) });
- try diagnostics.append(err_details);
- arg_i += 1;
- break :next_arg;
- };
- const tag = value.slice;
- options.default_language_id = lang.tagToInt(tag) catch {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("invalid language tag: {s}", .{tag});
- try diagnostics.append(err_details);
- arg_i += value.index_increment;
- continue :next_arg;
- };
- if (options.default_language_id.? == lang.LOCALE_CUSTOM_UNSPECIFIED) {
- var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = value.argSpan(arg) };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("language tag '{s}' does not have an assigned ID so it will be resolved to LOCALE_CUSTOM_UNSPECIFIED (id=0x{x})", .{ tag, lang.LOCALE_CUSTOM_UNSPECIFIED });
- try diagnostics.append(err_details);
- }
- arg_i += value.index_increment;
- continue :next_arg;
- } else if (std.ascii.startsWithIgnoreCase(arg_name, "l")) {
- const value = arg.value(1, arg_i, args) catch {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("missing language ID after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
- try diagnostics.append(err_details);
- arg_i += 1;
- break :next_arg;
- };
- const num_str = value.slice;
- options.default_language_id = lang.parseInt(num_str) catch {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("invalid language ID: {s}", .{num_str});
- try diagnostics.append(err_details);
- arg_i += value.index_increment;
- continue :next_arg;
- };
- arg_i += value.index_increment;
- continue :next_arg;
- } else if (std.ascii.startsWithIgnoreCase(arg_name, "h") or std.mem.startsWith(u8, arg_name, "?")) {
- options.print_help_and_exit = true;
- // If there's been an error to this point, then we still want to fail
- if (diagnostics.hasError()) return error.ParseError;
- return options;
- }
- // 1 char unsupported MUI options that need a value
- else if (std.ascii.startsWithIgnoreCase(arg_name, "q") or
- std.ascii.startsWithIgnoreCase(arg_name, "g"))
- {
- const value = arg.value(1, arg_i, args) catch no_value: {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
- try diagnostics.append(err_details);
- // dummy zero-length slice starting where the value would have been
- const value_start = arg.name_offset + 1;
- break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] };
- };
- var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
- try diagnostics.append(err_details);
- arg_i += value.index_increment;
- continue :next_arg;
- }
- // Undocumented (and unsupported) options that need a value
- // /z has to do something with font substitution
- // /s has something to do with HWB resources being inserted into the .res
- else if (std.ascii.startsWithIgnoreCase(arg_name, "z") or
- std.ascii.startsWithIgnoreCase(arg_name, "s"))
- {
- const value = arg.value(1, arg_i, args) catch no_value: {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("missing value after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
- try diagnostics.append(err_details);
- // dummy zero-length slice starting where the value would have been
- const value_start = arg.name_offset + 1;
- break :no_value Arg.Value{ .slice = arg.full[value_start..value_start] };
- };
- var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
- try diagnostics.append(err_details);
- arg_i += value.index_increment;
- continue :next_arg;
- }
- // 1 char unsupported LCX/LCE options that do not need a value
- else if (std.ascii.startsWithIgnoreCase(arg_name, "t")) {
- var err_details = Diagnostics.ErrorDetails{ .type = .err, .arg_index = arg_i, .arg_span = arg.optionSpan(1) };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("the {s}{s} option is unsupported", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
- try diagnostics.append(err_details);
- arg.name_offset += 1;
- } else if (std.ascii.startsWithIgnoreCase(arg_name, "c")) {
- const value = arg.value(1, arg_i, args) catch {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("missing code page ID after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
- try diagnostics.append(err_details);
- arg_i += 1;
- break :next_arg;
- };
- const num_str = value.slice;
- const code_page_id = std.fmt.parseUnsigned(u16, num_str, 10) catch {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("invalid code page ID: {s}", .{num_str});
- try diagnostics.append(err_details);
- arg_i += value.index_increment;
- continue :next_arg;
- };
- options.default_code_page = CodePage.getByIdentifierEnsureSupported(code_page_id) catch |err| switch (err) {
- error.InvalidCodePage => {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("invalid or unknown code page ID: {}", .{code_page_id});
- try diagnostics.append(err_details);
- arg_i += value.index_increment;
- continue :next_arg;
- },
- error.UnsupportedCodePage => {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = value.argSpan(arg) };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("unsupported code page: {s} (id={})", .{
- @tagName(CodePage.getByIdentifier(code_page_id) catch unreachable),
- code_page_id,
- });
- try diagnostics.append(err_details);
- arg_i += value.index_increment;
- continue :next_arg;
- },
- };
- arg_i += value.index_increment;
- continue :next_arg;
- } else if (std.ascii.startsWithIgnoreCase(arg_name, "v")) {
- options.verbose = true;
- arg.name_offset += 1;
- } else if (std.ascii.startsWithIgnoreCase(arg_name, "x")) {
- options.ignore_include_env_var = true;
- arg.name_offset += 1;
- } else if (std.ascii.startsWithIgnoreCase(arg_name, "p")) {
- options.preprocess = .only;
- arg.name_offset += 1;
- } else if (std.ascii.startsWithIgnoreCase(arg_name, "i")) {
- const value = arg.value(1, arg_i, args) catch {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("missing include path after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
- try diagnostics.append(err_details);
- arg_i += 1;
- break :next_arg;
- };
- const path = value.slice;
- const duped = try allocator.dupe(u8, path);
- errdefer allocator.free(duped);
- try options.extra_include_paths.append(options.allocator, duped);
- arg_i += value.index_increment;
- continue :next_arg;
- } else if (std.ascii.startsWithIgnoreCase(arg_name, "r")) {
- // From https://learn.microsoft.com/en-us/windows/win32/menurc/using-rc-the-rc-command-line-
- // "Ignored. Provided for compatibility with existing makefiles."
- arg.name_offset += 1;
- } else if (std.ascii.startsWithIgnoreCase(arg_name, "n")) {
- options.null_terminate_string_table_strings = true;
- arg.name_offset += 1;
- } else if (std.ascii.startsWithIgnoreCase(arg_name, "y")) {
- options.silent_duplicate_control_ids = true;
- arg.name_offset += 1;
- } else if (std.ascii.startsWithIgnoreCase(arg_name, "w")) {
- options.warn_instead_of_error_on_invalid_code_page = true;
- arg.name_offset += 1;
- } else if (std.ascii.startsWithIgnoreCase(arg_name, "a")) {
- // Undocumented option with unknown function
- // TODO: More investigation to figure out what it does (if anything)
- var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = arg.optionSpan(1) };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("option {s}{s} has no effect (it is undocumented and its function is unknown in the Win32 RC compiler)", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
- try diagnostics.append(err_details);
- arg.name_offset += 1;
- } else if (std.ascii.startsWithIgnoreCase(arg_name, "d")) {
- const value = arg.value(1, arg_i, args) catch {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("missing symbol to define after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
- try diagnostics.append(err_details);
- arg_i += 1;
- break :next_arg;
- };
- var tokenizer = std.mem.tokenize(u8, value.slice, "=");
- // guaranteed to exist since an empty value.slice would invoke
- // the 'missing symbol to define' branch above
- const symbol = tokenizer.next().?;
- const symbol_value = tokenizer.next() orelse "1";
-
- if (isValidIdentifier(symbol)) {
- try options.define(symbol, symbol_value);
- } else {
- var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = value.argSpan(arg) };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("symbol \"{s}\" is not a valid identifier and therefore cannot be defined", .{symbol});
- try diagnostics.append(err_details);
- }
- arg_i += value.index_increment;
- continue :next_arg;
- } else if (std.ascii.startsWithIgnoreCase(arg_name, "u")) {
- const value = arg.value(1, arg_i, args) catch {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.missingSpan() };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("missing symbol to undefine after {s}{s} option", .{ arg.prefixSlice(), arg.optionWithoutPrefix(1) });
- try diagnostics.append(err_details);
- arg_i += 1;
- break :next_arg;
- };
- const symbol = value.slice;
- if (isValidIdentifier(symbol)) {
- try options.undefine(symbol);
- } else {
- var err_details = Diagnostics.ErrorDetails{ .type = .warning, .arg_index = arg_i, .arg_span = value.argSpan(arg) };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("symbol \"{s}\" is not a valid identifier and therefore cannot be undefined", .{symbol});
- try diagnostics.append(err_details);
- }
- arg_i += value.index_increment;
- continue :next_arg;
- } else {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i, .arg_span = arg.optionAndAfterSpan() };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.print("invalid option: {s}{s}", .{ arg.prefixSlice(), arg.name() });
- try diagnostics.append(err_details);
- arg_i += 1;
- continue :next_arg;
- }
- } else {
- // The while loop exited via its conditional, meaning we are done with
- // the current arg and can move on the the next
- arg_i += 1;
- continue;
- }
- }
-
- const positionals = args[arg_i..];
-
- if (positionals.len < 1) {
- var err_details = Diagnostics.ErrorDetails{ .print_args = false, .arg_index = arg_i };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.writeAll("missing input filename");
- try diagnostics.append(err_details);
-
- const last_arg = args[args.len - 1];
- if (arg_i > 1 and last_arg.len > 0 and last_arg[0] == '/' and std.ascii.endsWithIgnoreCase(last_arg, ".rc")) {
- var note_details = Diagnostics.ErrorDetails{ .type = .note, .print_args = true, .arg_index = arg_i - 1 };
- var note_writer = note_details.msg.writer(allocator);
- try note_writer.writeAll("if this argument was intended to be the input filename, then -- should be specified in front of it to exclude it from option parsing");
- try diagnostics.append(note_details);
- }
-
- // This is a fatal enough problem to justify an early return, since
- // things after this rely on the value of the input filename.
- return error.ParseError;
- }
- options.input_filename = try allocator.dupe(u8, positionals[0]);
-
- if (positionals.len > 1) {
- if (output_filename != null) {
- var err_details = Diagnostics.ErrorDetails{ .arg_index = arg_i + 1 };
- var msg_writer = err_details.msg.writer(allocator);
- try msg_writer.writeAll("output filename already specified");
- try diagnostics.append(err_details);
- var note_details = Diagnostics.ErrorDetails{
- .type = .note,
- .arg_index = output_filename_context.value.index(output_filename_context.index),
- .arg_span = output_filename_context.value.argSpan(output_filename_context.arg),
- };
- var note_writer = note_details.msg.writer(allocator);
- try note_writer.writeAll("output filename previously specified here");
- try diagnostics.append(note_details);
- } else {
- output_filename = positionals[1];
- }
- }
- if (output_filename == null) {
- var buf = std.ArrayList(u8).init(allocator);
- errdefer buf.deinit();
-
- if (std.fs.path.dirname(options.input_filename)) |dirname| {
- var end_pos = dirname.len;
- // We want to ensure that we write a path separator at the end, so if the dirname
- // doesn't end with a path sep then include the char after the dirname
- // which must be a path sep.
- if (!std.fs.path.isSep(dirname[dirname.len - 1])) end_pos += 1;
- try buf.appendSlice(options.input_filename[0..end_pos]);
- }
- try buf.appendSlice(std.fs.path.stem(options.input_filename));
- if (options.preprocess == .only) {
- try buf.appendSlice(".rcpp");
- } else {
- try buf.appendSlice(".res");
- }
-
- options.output_filename = try buf.toOwnedSlice();
- } else {
- options.output_filename = try allocator.dupe(u8, output_filename.?);
- }
-
- if (diagnostics.hasError()) {
- return error.ParseError;
- }
-
- return options;
-}
-
-/// Returns true if the str is a valid C identifier for use in a #define/#undef macro
-pub fn isValidIdentifier(str: []const u8) bool {
- for (str, 0..) |c, i| switch (c) {
- '0'...'9' => if (i == 0) return false,
- 'a'...'z', 'A'...'Z', '_' => {},
- else => return false,
- };
- return true;
-}
-
-/// This function is specific to how the Win32 RC command line interprets
-/// max string literal length percent.
-/// - Wraps on overflow of u32
-/// - Stops parsing on any invalid hexadecimal digits
-/// - Errors if a digit is not the first char
-/// - `-` (negative) prefix is allowed
-pub fn parsePercent(str: []const u8) error{InvalidFormat}!u32 {
- var result: u32 = 0;
- const radix: u8 = 10;
- var buf = str;
-
- const Prefix = enum { none, minus };
- var prefix: Prefix = .none;
- switch (buf[0]) {
- '-' => {
- prefix = .minus;
- buf = buf[1..];
- },
- else => {},
- }
-
- for (buf, 0..) |c, i| {
- const digit = switch (c) {
- // On invalid digit for the radix, just stop parsing but don't fail
- '0'...'9' => std.fmt.charToDigit(c, radix) catch break,
- else => {
- // First digit must be valid
- if (i == 0) {
- return error.InvalidFormat;
- }
- break;
- },
- };
-
- if (result != 0) {
- result *%= radix;
- }
- result +%= digit;
- }
-
- switch (prefix) {
- .none => {},
- .minus => result = 0 -% result,
- }
-
- return result;
-}
-
-test parsePercent {
- try std.testing.expectEqual(@as(u32, 16), try parsePercent("16"));
- try std.testing.expectEqual(@as(u32, 0), try parsePercent("0x1A"));
- try std.testing.expectEqual(@as(u32, 0x1), try parsePercent("1zzzz"));
- try std.testing.expectEqual(@as(u32, 0xffffffff), try parsePercent("-1"));
- try std.testing.expectEqual(@as(u32, 0xfffffff0), try parsePercent("-16"));
- try std.testing.expectEqual(@as(u32, 1), try parsePercent("4294967297"));
- try std.testing.expectError(error.InvalidFormat, parsePercent("--1"));
- try std.testing.expectError(error.InvalidFormat, parsePercent("ha"));
- try std.testing.expectError(error.InvalidFormat, parsePercent("¹"));
- try std.testing.expectError(error.InvalidFormat, parsePercent("~1"));
-}
-
-pub fn renderErrorMessage(writer: anytype, config: std.io.tty.Config, err_details: Diagnostics.ErrorDetails, args: []const []const u8) !void {
- try config.setColor(writer, .dim);
- try writer.writeAll("<cli>");
- try config.setColor(writer, .reset);
- try config.setColor(writer, .bold);
- try writer.writeAll(": ");
- switch (err_details.type) {
- .err => {
- try config.setColor(writer, .red);
- try writer.writeAll("error: ");
- },
- .warning => {
- try config.setColor(writer, .yellow);
- try writer.writeAll("warning: ");
- },
- .note => {
- try config.setColor(writer, .cyan);
- try writer.writeAll("note: ");
- },
- }
- try config.setColor(writer, .reset);
- try config.setColor(writer, .bold);
- try writer.writeAll(err_details.msg.items);
- try writer.writeByte('\n');
- try config.setColor(writer, .reset);
-
- if (!err_details.print_args) {
- try writer.writeByte('\n');
- return;
- }
-
- try config.setColor(writer, .dim);
- const prefix = " ... ";
- try writer.writeAll(prefix);
- try config.setColor(writer, .reset);
-
- const arg_with_name = args[err_details.arg_index];
- const prefix_slice = arg_with_name[0..err_details.arg_span.prefix_len];
- const before_name_slice = arg_with_name[err_details.arg_span.prefix_len..err_details.arg_span.name_offset];
- var name_slice = arg_with_name[err_details.arg_span.name_offset..];
- if (err_details.arg_span.name_len > 0) name_slice.len = err_details.arg_span.name_len;
- const after_name_slice = arg_with_name[err_details.arg_span.name_offset + name_slice.len ..];
-
- try writer.writeAll(prefix_slice);
- if (before_name_slice.len > 0) {
- try config.setColor(writer, .dim);
- try writer.writeAll(before_name_slice);
- try config.setColor(writer, .reset);
- }
- try writer.writeAll(name_slice);
- if (after_name_slice.len > 0) {
- try config.setColor(writer, .dim);
- try writer.writeAll(after_name_slice);
- try config.setColor(writer, .reset);
- }
-
- var next_arg_len: usize = 0;
- if (err_details.arg_span.point_at_next_arg and err_details.arg_index + 1 < args.len) {
- const next_arg = args[err_details.arg_index + 1];
- try writer.writeByte(' ');
- try writer.writeAll(next_arg);
- next_arg_len = next_arg.len;
- }
-
- const last_shown_arg_index = if (err_details.arg_span.point_at_next_arg) err_details.arg_index + 1 else err_details.arg_index;
- if (last_shown_arg_index + 1 < args.len) {
- // special case for when pointing to a missing value within the same arg
- // as the name
- if (err_details.arg_span.value_offset >= arg_with_name.len) {
- try writer.writeByte(' ');
- }
- try config.setColor(writer, .dim);
- try writer.writeAll(" ...");
- try config.setColor(writer, .reset);
- }
- try writer.writeByte('\n');
-
- try config.setColor(writer, .green);
- try writer.writeByteNTimes(' ', prefix.len);
- // Special case for when the option is *only* a prefix (e.g. invalid option: -)
- if (err_details.arg_span.prefix_len == arg_with_name.len) {
- try writer.writeByteNTimes('^', err_details.arg_span.prefix_len);
- } else {
- try writer.writeByteNTimes('~', err_details.arg_span.prefix_len);
- try writer.writeByteNTimes(' ', err_details.arg_span.name_offset - err_details.arg_span.prefix_len);
- if (!err_details.arg_span.point_at_next_arg and err_details.arg_span.value_offset == 0) {
- try writer.writeByte('^');
- try writer.writeByteNTimes('~', name_slice.len - 1);
- } else if (err_details.arg_span.value_offset > 0) {
- try writer.writeByteNTimes('~', err_details.arg_span.value_offset - err_details.arg_span.name_offset);
- try writer.writeByte('^');
- if (err_details.arg_span.value_offset < arg_with_name.len) {
- try writer.writeByteNTimes('~', arg_with_name.len - err_details.arg_span.value_offset - 1);
- }
- } else if (err_details.arg_span.point_at_next_arg) {
- try writer.writeByteNTimes('~', arg_with_name.len - err_details.arg_span.name_offset + 1);
- try writer.writeByte('^');
- if (next_arg_len > 0) {
- try writer.writeByteNTimes('~', next_arg_len - 1);
- }
- }
- }
- try writer.writeByte('\n');
- try config.setColor(writer, .reset);
-}
-
-fn testParse(args: []const []const u8) !Options {
- return (try testParseOutput(args, "")).?;
-}
-
-fn testParseWarning(args: []const []const u8, expected_output: []const u8) !Options {
- return (try testParseOutput(args, expected_output)).?;
-}
-
-fn testParseError(args: []const []const u8, expected_output: []const u8) !void {
- var maybe_options = try testParseOutput(args, expected_output);
- if (maybe_options != null) {
- std.debug.print("expected error, got options: {}\n", .{maybe_options.?});
- maybe_options.?.deinit();
- return error.TestExpectedError;
- }
-}
-
-fn testParseOutput(args: []const []const u8, expected_output: []const u8) !?Options {
- var diagnostics = Diagnostics.init(std.testing.allocator);
- defer diagnostics.deinit();
-
- var output = std.ArrayList(u8).init(std.testing.allocator);
- defer output.deinit();
-
- var options = parse(std.testing.allocator, args, &diagnostics) catch |err| switch (err) {
- error.ParseError => {
- try diagnostics.renderToWriter(args, output.writer(), .no_color);
- try std.testing.expectEqualStrings(expected_output, output.items);
- return null;
- },
- else => |e| return e,
- };
- errdefer options.deinit();
-
- try diagnostics.renderToWriter(args, output.writer(), .no_color);
- try std.testing.expectEqualStrings(expected_output, output.items);
- return options;
-}
-
-test "parse errors: basic" {
- try testParseError(&.{ "foo.exe", "/" },
- \\<cli>: error: invalid option: /
- \\ ... /
- \\ ^
- \\<cli>: error: missing input filename
- \\
- \\
- );
- try testParseError(&.{ "foo.exe", "/ln" },
- \\<cli>: error: missing language tag after /ln option
- \\ ... /ln
- \\ ~~~~^
- \\<cli>: error: missing input filename
- \\
- \\
- );
- try testParseError(&.{ "foo.exe", "-vln" },
- \\<cli>: error: missing language tag after -ln option
- \\ ... -vln
- \\ ~ ~~~^
- \\<cli>: error: missing input filename
- \\
- \\
- );
- try testParseError(&.{ "foo.exe", "/_not-an-option" },
- \\<cli>: error: invalid option: /_not-an-option
- \\ ... /_not-an-option
- \\ ~^~~~~~~~~~~~~~
- \\<cli>: error: missing input filename
- \\
- \\
- );
- try testParseError(&.{ "foo.exe", "-_not-an-option" },
- \\<cli>: error: invalid option: -_not-an-option
- \\ ... -_not-an-option
- \\ ~^~~~~~~~~~~~~~
- \\<cli>: error: missing input filename
- \\
- \\
- );
- try testParseError(&.{ "foo.exe", "--_not-an-option" },
- \\<cli>: error: invalid option: --_not-an-option
- \\ ... --_not-an-option
- \\ ~~^~~~~~~~~~~~~~
- \\<cli>: error: missing input filename
- \\
- \\
- );
- try testParseError(&.{ "foo.exe", "/v_not-an-option" },
- \\<cli>: error: invalid option: /_not-an-option
- \\ ... /v_not-an-option
- \\ ~ ^~~~~~~~~~~~~~
- \\<cli>: error: missing input filename
- \\
- \\
- );
- try testParseError(&.{ "foo.exe", "-v_not-an-option" },
- \\<cli>: error: invalid option: -_not-an-option
- \\ ... -v_not-an-option
- \\ ~ ^~~~~~~~~~~~~~
- \\<cli>: error: missing input filename
- \\
- \\
- );
- try testParseError(&.{ "foo.exe", "--v_not-an-option" },
- \\<cli>: error: invalid option: --_not-an-option
- \\ ... --v_not-an-option
- \\ ~~ ^~~~~~~~~~~~~~
- \\<cli>: error: missing input filename
- \\
- \\
- );
- try testParseError(&.{ "foo.exe", "/some/absolute/path/parsed/as/an/option.rc" },
- \\<cli>: error: the /s option is unsupported
- \\ ... /some/absolute/path/parsed/as/an/option.rc
- \\ ~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- \\<cli>: error: missing input filename
- \\
- \\<cli>: note: if this argument was intended to be the input filename, then -- should be specified in front of it to exclude it from option parsing
- \\ ... /some/absolute/path/parsed/as/an/option.rc
- \\ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- \\
- );
-}
-
-test "parse errors: /ln" {
- try testParseError(&.{ "foo.exe", "/ln", "invalid", "foo.rc" },
- \\<cli>: error: invalid language tag: invalid
- \\ ... /ln invalid ...
- \\ ~~~~^~~~~~~
- \\
- );
- try testParseError(&.{ "foo.exe", "/lninvalid", "foo.rc" },
- \\<cli>: error: invalid language tag: invalid
- \\ ... /lninvalid ...
- \\ ~~~^~~~~~~
- \\
- );
-}
-
-test "parse: options" {
- {
- var options = try testParse(&.{ "foo.exe", "/v", "foo.rc" });
- defer options.deinit();
-
- try std.testing.expectEqual(true, options.verbose);
- try std.testing.expectEqualStrings("foo.rc", options.input_filename);
- try std.testing.expectEqualStrings("foo.res", options.output_filename);
- }
- {
- var options = try testParse(&.{ "foo.exe", "/vx", "foo.rc" });
- defer options.deinit();
-
- try std.testing.expectEqual(true, options.verbose);
- try std.testing.expectEqual(true, options.ignore_include_env_var);
- try std.testing.expectEqualStrings("foo.rc", options.input_filename);
- try std.testing.expectEqualStrings("foo.res", options.output_filename);
- }
- {
- var options = try testParse(&.{ "foo.exe", "/xv", "foo.rc" });
- defer options.deinit();
-
- try std.testing.expectEqual(true, options.verbose);
- try std.testing.expectEqual(true, options.ignore_include_env_var);
- try std.testing.expectEqualStrings("foo.rc", options.input_filename);
- try std.testing.expectEqualStrings("foo.res", options.output_filename);
- }
- {
- var options = try testParse(&.{ "foo.exe", "/xvFObar.res", "foo.rc" });
- defer options.deinit();
-
- try std.testing.expectEqual(true, options.verbose);
- try std.testing.expectEqual(true, options.ignore_include_env_var);
- try std.testing.expectEqualStrings("foo.rc", options.input_filename);
- try std.testing.expectEqualStrings("bar.res", options.output_filename);
- }
-}
-
-test "parse: define and undefine" {
- {
- var options = try testParse(&.{ "foo.exe", "/dfoo", "foo.rc" });
- defer options.deinit();
-
- const action = options.symbols.get("foo").?;
- try std.testing.expectEqual(Options.SymbolAction.define, action);
- try std.testing.expectEqualStrings("1", action.define);
- }
- {
- var options = try testParse(&.{ "foo.exe", "/dfoo=bar", "/dfoo=baz", "foo.rc" });
- defer options.deinit();
-
- const action = options.symbols.get("foo").?;
- try std.testing.expectEqual(Options.SymbolAction.define, action);
- try std.testing.expectEqualStrings("baz", action.define);
- }
- {
- var options = try testParse(&.{ "foo.exe", "/ufoo", "foo.rc" });
- defer options.deinit();
-
- const action = options.symbols.get("foo").?;
- try std.testing.expectEqual(Options.SymbolAction.undefine, action);
- }
- {
- // Once undefined, future defines are ignored
- var options = try testParse(&.{ "foo.exe", "/ufoo", "/dfoo", "foo.rc" });
- defer options.deinit();
-
- const action = options.symbols.get("foo").?;
- try std.testing.expectEqual(Options.SymbolAction.undefine, action);
- }
- {
- // Undefined always takes precedence
- var options = try testParse(&.{ "foo.exe", "/dfoo", "/ufoo", "/dfoo", "foo.rc" });
- defer options.deinit();
-
- const action = options.symbols.get("foo").?;
- try std.testing.expectEqual(Options.SymbolAction.undefine, action);
- }
- {
- // Warn + ignore invalid identifiers
- var options = try testParseWarning(
- &.{ "foo.exe", "/dfoo bar", "/u", "0leadingdigit", "foo.rc" },
- \\<cli>: warning: symbol "foo bar" is not a valid identifier and therefore cannot be defined
- \\ ... /dfoo bar ...
- \\ ~~^~~~~~~
- \\<cli>: warning: symbol "0leadingdigit" is not a valid identifier and therefore cannot be undefined
- \\ ... /u 0leadingdigit ...
- \\ ~~~^~~~~~~~~~~~~
- \\
- ,
- );
- defer options.deinit();
-
- try std.testing.expectEqual(@as(usize, 0), options.symbols.count());
- }
-}
-
-test "parse: /sl" {
- try testParseError(&.{ "foo.exe", "/sl", "0", "foo.rc" },
- \\<cli>: error: percent out of range: 0 (parsed from '0')
- \\ ... /sl 0 ...
- \\ ~~~~^
- \\<cli>: note: string length percent must be an integer between 1 and 100 (inclusive)
- \\
- \\
- );
- try testParseError(&.{ "foo.exe", "/sl", "abcd", "foo.rc" },
- \\<cli>: error: invalid percent format 'abcd'
- \\ ... /sl abcd ...
- \\ ~~~~^~~~
- \\<cli>: note: string length percent must be an integer between 1 and 100 (inclusive)
- \\
- \\
- );
- {
- var options = try testParse(&.{ "foo.exe", "foo.rc" });
- defer options.deinit();
-
- try std.testing.expectEqual(@as(u15, lex.default_max_string_literal_codepoints), options.max_string_literal_codepoints);
- }
- {
- var options = try testParse(&.{ "foo.exe", "/sl100", "foo.rc" });
- defer options.deinit();
-
- try std.testing.expectEqual(@as(u15, max_string_literal_length_100_percent), options.max_string_literal_codepoints);
- }
- {
- var options = try testParse(&.{ "foo.exe", "-SL33", "foo.rc" });
- defer options.deinit();
-
- try std.testing.expectEqual(@as(u15, 2703), options.max_string_literal_codepoints);
- }
- {
- var options = try testParse(&.{ "foo.exe", "/sl15", "foo.rc" });
- defer options.deinit();
-
- try std.testing.expectEqual(@as(u15, 1228), options.max_string_literal_codepoints);
- }
-}
-
-test "parse: unsupported MUI-related options" {
- try testParseError(&.{ "foo.exe", "/q", "blah", "/g1", "-G2", "blah", "/fm", "blah", "/g", "blah", "foo.rc" },
- \\<cli>: error: the /q option is unsupported
- \\ ... /q ...
- \\ ~^
- \\<cli>: error: the /g1 option is unsupported
- \\ ... /g1 ...
- \\ ~^~
- \\<cli>: error: the -G2 option is unsupported
- \\ ... -G2 ...
- \\ ~^~
- \\<cli>: error: the /fm option is unsupported
- \\ ... /fm ...
- \\ ~^~
- \\<cli>: error: the /g option is unsupported
- \\ ... /g ...
- \\ ~^
- \\
- );
-}
-
-test "parse: unsupported LCX/LCE-related options" {
- try testParseError(&.{ "foo.exe", "/t", "/tp:", "/tp:blah", "/tm", "/tc", "/tw", "-TEti", "/ta", "/tn", "blah", "foo.rc" },
- \\<cli>: error: the /t option is unsupported
- \\ ... /t ...
- \\ ~^
- \\<cli>: error: missing value for /tp: option
- \\ ... /tp: ...
- \\ ~~~~^
- \\<cli>: error: the /tp: option is unsupported
- \\ ... /tp: ...
- \\ ~^~~
- \\<cli>: error: the /tp: option is unsupported
- \\ ... /tp:blah ...
- \\ ~^~~~~~~
- \\<cli>: error: the /tm option is unsupported
- \\ ... /tm ...
- \\ ~^~
- \\<cli>: error: the /tc option is unsupported
- \\ ... /tc ...
- \\ ~^~
- \\<cli>: error: the /tw option is unsupported
- \\ ... /tw ...
- \\ ~^~
- \\<cli>: error: the -TE option is unsupported
- \\ ... -TEti ...
- \\ ~^~
- \\<cli>: error: the -ti option is unsupported
- \\ ... -TEti ...
- \\ ~ ^~
- \\<cli>: error: the /ta option is unsupported
- \\ ... /ta ...
- \\ ~^~
- \\<cli>: error: the /tn option is unsupported
- \\ ... /tn ...
- \\ ~^~
- \\
- );
-}
-
-test "maybeAppendRC" {
- var tmp = std.testing.tmpDir(.{});
- defer tmp.cleanup();
-
- var options = try testParse(&.{ "foo.exe", "foo" });
- defer options.deinit();
- try std.testing.expectEqualStrings("foo", options.input_filename);
-
- // Create the file so that it's found. In this scenario, .rc should not get
- // appended.
- var file = try tmp.dir.createFile("foo", .{});
- file.close();
- try options.maybeAppendRC(tmp.dir);
- try std.testing.expectEqualStrings("foo", options.input_filename);
-
- // Now delete the file and try again. Since the verbatim name is no longer found
- // and the input filename does not have an extension, .rc should get appended.
- try tmp.dir.deleteFile("foo");
- try options.maybeAppendRC(tmp.dir);
- try std.testing.expectEqualStrings("foo.rc", options.input_filename);
-}
diff --git a/src/resinator/code_pages.zig b/src/resinator/code_pages.zig
@@ -1,487 +0,0 @@
-const std = @import("std");
-const windows1252 = @import("windows1252.zig");
-
-// TODO: Parts of this comment block may be more relevant to string/NameOrOrdinal parsing
-// than it is to the stuff in this file.
-//
-// ‰ representations for context:
-// Win-1252 89
-// UTF-8 E2 80 B0
-// UTF-16 20 30
-//
-// With code page 65001:
-// ‰ RCDATA { "‰" L"‰" }
-// File encoded as Windows-1252:
-// ‰ => <U+FFFD REPLACEMENT CHARACTER> as u16
-// "‰" => 0x3F ('?')
-// L"‰" => <U+FFFD REPLACEMENT CHARACTER> as u16
-// File encoded as UTF-8:
-// ‰ => <U+2030 ‰> as u16
-// "‰" => 0x89 ('‰' encoded as Windows-1252)
-// L"‰" => <U+2030 ‰> as u16
-//
-// With code page 1252:
-// ‰ RCDATA { "‰" L"‰" }
-// File encoded as Windows-1252:
-// ‰ => <U+2030 ‰> as u16
-// "‰" => 0x89 ('‰' encoded as Windows-1252)
-// L"‰" => <U+2030 ‰> as u16
-// File encoded as UTF-8:
-// ‰ => 0xE2 as u16, 0x20AC as u16, 0xB0 as u16
-// ^ first byte of utf8 representation
-// ^ second byte of UTF-8 representation (0x80), but interpretted as
-// Windows-1252 ('€') and then converted to UTF-16 (<U+20AC>)
-// ^ third byte of utf8 representation
-// "‰" => 0xE2, 0x80, 0xB0 (the bytes of the UTF-8 representation)
-// L"‰" => 0xE2 as u16, 0x20AC as u16, 0xB0 as u16 (see '‰ =>' explanation)
-//
-// With code page 1252:
-// <0x90> RCDATA { "<0x90>" L"<0x90>" }
-// File encoded as Windows-1252:
-// <0x90> => 0x90 as u16
-// "<0x90>" => 0x90
-// L"<0x90>" => 0x90 as u16
-// File encoded as UTF-8:
-// <0x90> => 0xC2 as u16, 0x90 as u16
-// "<0x90>" => 0xC2, 0x90 (the bytes of the UTF-8 representation of <U+0090>)
-// L"<0x90>" => 0xC2 as u16, 0x90 as u16
-//
-// Within a raw data block, file encoded as Windows-1252 (Â is <0xC2>):
-// "Âa" L"Âa" "\xC2ad" L"\xC2AD"
-// With code page 1252:
-// C2 61 C2 00 61 00 C2 61 64 AD C2
-// Â^ a^ Â~~~^ a~~~^ .^ a^ d^ ^~~~~\xC2AD
-// \xC2~`
-// With code page 65001:
-// 3F 61 FD FF 61 00 C2 61 64 AD C2
-// ^. a^ ^~~~. a~~~^ ^. a^ d^ ^~~~~\xC2AD
-// `. `. `~\xC2
-// `. `.~<0xC2>a is not well-formed UTF-8 (0xC2 expects a continutation byte after it).
-// `. Because 'a' is a valid first byte of a UTF-8 sequence, it is not included in the
-// `. invalid sequence so only the <0xC2> gets converted to <U+FFFD>.
-// `~Same as ^ but converted to '?' instead.
-//
-// Within a raw data block, file encoded as Windows-1252 (ð is <0xF0>, € is <0x80>):
-// "ð€a" L"ð€a"
-// With code page 1252:
-// F0 80 61 F0 00 AC 20 61 00
-// ð^ €^ a^ ð~~~^ €~~~^ a~~~^
-// With code page 65001:
-// 3F 61 FD FF 61 00
-// ^. a^ ^~~~. a~~~^
-// `. `.
-// `. `.~<0xF0><0x80> is not well-formed UTF-8, and <0x80> is not a valid first byte, so
-// `. both bytes are considered an invalid sequence and get converted to '<U+FFFD>'
-// `~Same as ^ but converted to '?' instead.
-
-/// https://learn.microsoft.com/en-us/windows/win32/intl/code-page-identifiers
-pub const CodePage = enum(u16) {
- // supported
- windows1252 = 1252, // windows-1252 ANSI Latin 1; Western European (Windows)
- utf8 = 65001, // utf-8 Unicode (UTF-8)
-
- // unsupported but valid
- ibm037 = 37, // IBM037 IBM EBCDIC US-Canada
- ibm437 = 437, // IBM437 OEM United States
- ibm500 = 500, // IBM500 IBM EBCDIC International
- asmo708 = 708, // ASMO-708 Arabic (ASMO 708)
- asmo449plus = 709, // Arabic (ASMO-449+, BCON V4)
- transparent_arabic = 710, // Arabic - Transparent Arabic
- dos720 = 720, // DOS-720 Arabic (Transparent ASMO); Arabic (DOS)
- ibm737 = 737, // ibm737 OEM Greek (formerly 437G); Greek (DOS)
- ibm775 = 775, // ibm775 OEM Baltic; Baltic (DOS)
- ibm850 = 850, // ibm850 OEM Multilingual Latin 1; Western European (DOS)
- ibm852 = 852, // ibm852 OEM Latin 2; Central European (DOS)
- ibm855 = 855, // IBM855 OEM Cyrillic (primarily Russian)
- ibm857 = 857, // ibm857 OEM Turkish; Turkish (DOS)
- ibm00858 = 858, // IBM00858 OEM Multilingual Latin 1 + Euro symbol
- ibm860 = 860, // IBM860 OEM Portuguese; Portuguese (DOS)
- ibm861 = 861, // ibm861 OEM Icelandic; Icelandic (DOS)
- dos862 = 862, // DOS-862 OEM Hebrew; Hebrew (DOS)
- ibm863 = 863, // IBM863 OEM French Canadian; French Canadian (DOS)
- ibm864 = 864, // IBM864 OEM Arabic; Arabic (864)
- ibm865 = 865, // IBM865 OEM Nordic; Nordic (DOS)
- cp866 = 866, // cp866 OEM Russian; Cyrillic (DOS)
- ibm869 = 869, // ibm869 OEM Modern Greek; Greek, Modern (DOS)
- ibm870 = 870, // IBM870 IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2
- windows874 = 874, // windows-874 Thai (Windows)
- cp875 = 875, // cp875 IBM EBCDIC Greek Modern
- shift_jis = 932, // shift_jis ANSI/OEM Japanese; Japanese (Shift-JIS)
- gb2312 = 936, // gb2312 ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)
- ks_c_5601_1987 = 949, // ks_c_5601-1987 ANSI/OEM Korean (Unified Hangul Code)
- big5 = 950, // big5 ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
- ibm1026 = 1026, // IBM1026 IBM EBCDIC Turkish (Latin 5)
- ibm01047 = 1047, // IBM01047 IBM EBCDIC Latin 1/Open System
- ibm01140 = 1140, // IBM01140 IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro)
- ibm01141 = 1141, // IBM01141 IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro)
- ibm01142 = 1142, // IBM01142 IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro)
- ibm01143 = 1143, // IBM01143 IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro)
- ibm01144 = 1144, // IBM01144 IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro)
- ibm01145 = 1145, // IBM01145 IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro)
- ibm01146 = 1146, // IBM01146 IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro)
- ibm01147 = 1147, // IBM01147 IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro)
- ibm01148 = 1148, // IBM01148 IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro)
- ibm01149 = 1149, // IBM01149 IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro)
- utf16 = 1200, // utf-16 Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications
- utf16_fffe = 1201, // unicodeFFFE Unicode UTF-16, big endian byte order; available only to managed applications
- windows1250 = 1250, // windows-1250 ANSI Central European; Central European (Windows)
- windows1251 = 1251, // windows-1251 ANSI Cyrillic; Cyrillic (Windows)
- windows1253 = 1253, // windows-1253 ANSI Greek; Greek (Windows)
- windows1254 = 1254, // windows-1254 ANSI Turkish; Turkish (Windows)
- windows1255 = 1255, // windows-1255 ANSI Hebrew; Hebrew (Windows)
- windows1256 = 1256, // windows-1256 ANSI Arabic; Arabic (Windows)
- windows1257 = 1257, // windows-1257 ANSI Baltic; Baltic (Windows)
- windows1258 = 1258, // windows-1258 ANSI/OEM Vietnamese; Vietnamese (Windows)
- johab = 1361, // Johab Korean (Johab)
- macintosh = 10000, // macintosh MAC Roman; Western European (Mac)
- x_mac_japanese = 10001, // x-mac-japanese Japanese (Mac)
- x_mac_chinesetrad = 10002, // x-mac-chinesetrad MAC Traditional Chinese (Big5); Chinese Traditional (Mac)
- x_mac_korean = 10003, // x-mac-korean Korean (Mac)
- x_mac_arabic = 10004, // x-mac-arabic Arabic (Mac)
- x_mac_hebrew = 10005, // x-mac-hebrew Hebrew (Mac)
- x_mac_greek = 10006, // x-mac-greek Greek (Mac)
- x_mac_cyrillic = 10007, // x-mac-cyrillic Cyrillic (Mac)
- x_mac_chinesesimp = 10008, // x-mac-chinesesimp MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac)
- x_mac_romanian = 10010, // x-mac-romanian Romanian (Mac)
- x_mac_ukranian = 10017, // x-mac-ukrainian Ukrainian (Mac)
- x_mac_thai = 10021, // x-mac-thai Thai (Mac)
- x_mac_ce = 10029, // x-mac-ce MAC Latin 2; Central European (Mac)
- x_mac_icelandic = 10079, // x-mac-icelandic Icelandic (Mac)
- x_mac_turkish = 10081, // x-mac-turkish Turkish (Mac)
- x_mac_croatian = 10082, // x-mac-croatian Croatian (Mac)
- utf32 = 12000, // utf-32 Unicode UTF-32, little endian byte order; available only to managed applications
- utf32_be = 12001, // utf-32BE Unicode UTF-32, big endian byte order; available only to managed applications
- x_chinese_cns = 20000, // x-Chinese_CNS CNS Taiwan; Chinese Traditional (CNS)
- x_cp20001 = 20001, // x-cp20001 TCA Taiwan
- x_chinese_eten = 20002, // x_Chinese-Eten Eten Taiwan; Chinese Traditional (Eten)
- x_cp20003 = 20003, // x-cp20003 IBM5550 Taiwan
- x_cp20004 = 20004, // x-cp20004 TeleText Taiwan
- x_cp20005 = 20005, // x-cp20005 Wang Taiwan
- x_ia5 = 20105, // x-IA5 IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5)
- x_ia5_german = 20106, // x-IA5-German IA5 German (7-bit)
- x_ia5_swedish = 20107, // x-IA5-Swedish IA5 Swedish (7-bit)
- x_ia5_norwegian = 20108, // x-IA5-Norwegian IA5 Norwegian (7-bit)
- us_ascii = 20127, // us-ascii US-ASCII (7-bit)
- x_cp20261 = 20261, // x-cp20261 T.61
- x_cp20269 = 20269, // x-cp20269 ISO 6937 Non-Spacing Accent
- ibm273 = 20273, // IBM273 IBM EBCDIC Germany
- ibm277 = 20277, // IBM277 IBM EBCDIC Denmark-Norway
- ibm278 = 20278, // IBM278 IBM EBCDIC Finland-Sweden
- ibm280 = 20280, // IBM280 IBM EBCDIC Italy
- ibm284 = 20284, // IBM284 IBM EBCDIC Latin America-Spain
- ibm285 = 20285, // IBM285 IBM EBCDIC United Kingdom
- ibm290 = 20290, // IBM290 IBM EBCDIC Japanese Katakana Extended
- ibm297 = 20297, // IBM297 IBM EBCDIC France
- ibm420 = 20420, // IBM420 IBM EBCDIC Arabic
- ibm423 = 20423, // IBM423 IBM EBCDIC Greek
- ibm424 = 20424, // IBM424 IBM EBCDIC Hebrew
- x_ebcdic_korean_extended = 20833, // x-EBCDIC-KoreanExtended IBM EBCDIC Korean Extended
- ibm_thai = 20838, // IBM-Thai IBM EBCDIC Thai
- koi8_r = 20866, // koi8-r Russian (KOI8-R); Cyrillic (KOI8-R)
- ibm871 = 20871, // IBM871 IBM EBCDIC Icelandic
- ibm880 = 20880, // IBM880 IBM EBCDIC Cyrillic Russian
- ibm905 = 20905, // IBM905 IBM EBCDIC Turkish
- ibm00924 = 20924, // IBM00924 IBM EBCDIC Latin 1/Open System (1047 + Euro symbol)
- euc_jp_jis = 20932, // EUC-JP Japanese (JIS 0208-1990 and 0212-1990)
- x_cp20936 = 20936, // x-cp20936 Simplified Chinese (GB2312); Chinese Simplified (GB2312-80)
- x_cp20949 = 20949, // x-cp20949 Korean Wansung
- cp1025 = 21025, // cp1025 IBM EBCDIC Cyrillic Serbian-Bulgarian
- // = 21027, // (deprecated)
- koi8_u = 21866, // koi8-u Ukrainian (KOI8-U); Cyrillic (KOI8-U)
- iso8859_1 = 28591, // iso-8859-1 ISO 8859-1 Latin 1; Western European (ISO)
- iso8859_2 = 28592, // iso-8859-2 ISO 8859-2 Central European; Central European (ISO)
- iso8859_3 = 28593, // iso-8859-3 ISO 8859-3 Latin 3
- iso8859_4 = 28594, // iso-8859-4 ISO 8859-4 Baltic
- iso8859_5 = 28595, // iso-8859-5 ISO 8859-5 Cyrillic
- iso8859_6 = 28596, // iso-8859-6 ISO 8859-6 Arabic
- iso8859_7 = 28597, // iso-8859-7 ISO 8859-7 Greek
- iso8859_8 = 28598, // iso-8859-8 ISO 8859-8 Hebrew; Hebrew (ISO-Visual)
- iso8859_9 = 28599, // iso-8859-9 ISO 8859-9 Turkish
- iso8859_13 = 28603, // iso-8859-13 ISO 8859-13 Estonian
- iso8859_15 = 28605, // iso-8859-15 ISO 8859-15 Latin 9
- x_europa = 29001, // x-Europa Europa 3
- is8859_8_i = 38598, // iso-8859-8-i ISO 8859-8 Hebrew; Hebrew (ISO-Logical)
- iso2022_jp = 50220, // iso-2022-jp ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
- cs_iso2022_jp = 50221, // csISO2022JP ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)
- iso2022_jp_jis_x = 50222, // iso-2022-jp ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)
- iso2022_kr = 50225, // iso-2022-kr ISO 2022 Korean
- x_cp50227 = 50227, // x-cp50227 ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022)
- iso2022_chinesetrad = 50229, // ISO 2022 Traditional Chinese
- ebcdic_jp_katakana_extended = 50930, // EBCDIC Japanese (Katakana) Extended
- ebcdic_us_ca_jp = 50931, // EBCDIC US-Canada and Japanese
- ebcdic_kr_extended = 50933, // EBCDIC Korean Extended and Korean
- ebcdic_chinesesimp_extended = 50935, // EBCDIC Simplified Chinese Extended and Simplified Chinese
- ebcdic_chinesesimp = 50936, // EBCDIC Simplified Chinese
- ebcdic_us_ca_chinesetrad = 50937, // EBCDIC US-Canada and Traditional Chinese
- ebcdic_jp_latin_extended = 50939, // EBCDIC Japanese (Latin) Extended and Japanese
- euc_jp = 51932, // euc-jp EUC Japanese
- euc_cn = 51936, // EUC-CN EUC Simplified Chinese; Chinese Simplified (EUC)
- euc_kr = 51949, // euc-kr EUC Korean
- euc_chinesetrad = 51950, // EUC Traditional Chinese
- hz_gb2312 = 52936, // hz-gb-2312 HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ)
- gb18030 = 54936, // GB18030 Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030)
- x_iscii_de = 57002, // x-iscii-de ISCII Devanagari
- x_iscii_be = 57003, // x-iscii-be ISCII Bangla
- x_iscii_ta = 57004, // x-iscii-ta ISCII Tamil
- x_iscii_te = 57005, // x-iscii-te ISCII Telugu
- x_iscii_as = 57006, // x-iscii-as ISCII Assamese
- x_iscii_or = 57007, // x-iscii-or ISCII Odia
- x_iscii_ka = 57008, // x-iscii-ka ISCII Kannada
- x_iscii_ma = 57009, // x-iscii-ma ISCII Malayalam
- x_iscii_gu = 57010, // x-iscii-gu ISCII Gujarati
- x_iscii_pa = 57011, // x-iscii-pa ISCII Punjabi
- utf7 = 65000, // utf-7 Unicode (UTF-7)
-
- pub fn codepointAt(code_page: CodePage, index: usize, bytes: []const u8) ?Codepoint {
- if (index >= bytes.len) return null;
- switch (code_page) {
- .windows1252 => {
- // All byte values have a representation, so just convert the byte
- return Codepoint{
- .value = windows1252.toCodepoint(bytes[index]),
- .byte_len = 1,
- };
- },
- .utf8 => {
- return Utf8.WellFormedDecoder.decode(bytes[index..]);
- },
- else => unreachable,
- }
- }
-
- pub fn isSupported(code_page: CodePage) bool {
- return switch (code_page) {
- .windows1252, .utf8 => true,
- else => false,
- };
- }
-
- pub fn getByIdentifier(identifier: u16) !CodePage {
- // There's probably a more efficient way to do this (e.g. ComptimeHashMap?) but
- // this should be fine, especially since this function likely won't be called much.
- inline for (@typeInfo(CodePage).Enum.fields) |enumField| {
- if (identifier == enumField.value) {
- return @field(CodePage, enumField.name);
- }
- }
- return error.InvalidCodePage;
- }
-
- pub fn getByIdentifierEnsureSupported(identifier: u16) !CodePage {
- const code_page = try getByIdentifier(identifier);
- switch (isSupported(code_page)) {
- true => return code_page,
- false => return error.UnsupportedCodePage,
- }
- }
-};
-
-pub const Utf8 = struct {
- /// Implements decoding with rejection of ill-formed UTF-8 sequences based on section
- /// D92 of Chapter 3 of the Unicode standard (Table 3-7 specifically).
- pub const WellFormedDecoder = struct {
- /// Like std.unicode.utf8ByteSequenceLength, but:
- /// - Rejects non-well-formed first bytes, i.e. C0-C1, F5-FF
- /// - Returns an optional value instead of an error union
- pub fn sequenceLength(first_byte: u8) ?u3 {
- return switch (first_byte) {
- 0x00...0x7F => 1,
- 0xC2...0xDF => 2,
- 0xE0...0xEF => 3,
- 0xF0...0xF4 => 4,
- else => null,
- };
- }
-
- fn isContinuationByte(byte: u8) bool {
- return switch (byte) {
- 0x80...0xBF => true,
- else => false,
- };
- }
-
- pub fn decode(bytes: []const u8) Codepoint {
- std.debug.assert(bytes.len > 0);
- const first_byte = bytes[0];
- const expected_len = sequenceLength(first_byte) orelse {
- return .{ .value = Codepoint.invalid, .byte_len = 1 };
- };
- if (expected_len == 1) return .{ .value = first_byte, .byte_len = 1 };
-
- var value: u21 = first_byte & 0b00011111;
- var byte_index: u8 = 1;
- while (byte_index < @min(bytes.len, expected_len)) : (byte_index += 1) {
- const byte = bytes[byte_index];
- // See Table 3-7 of D92 in Chapter 3 of the Unicode Standard
- const valid: bool = switch (byte_index) {
- 1 => switch (first_byte) {
- 0xE0 => switch (byte) {
- 0xA0...0xBF => true,
- else => false,
- },
- 0xED => switch (byte) {
- 0x80...0x9F => true,
- else => false,
- },
- 0xF0 => switch (byte) {
- 0x90...0xBF => true,
- else => false,
- },
- 0xF4 => switch (byte) {
- 0x80...0x8F => true,
- else => false,
- },
- else => switch (byte) {
- 0x80...0xBF => true,
- else => false,
- },
- },
- else => switch (byte) {
- 0x80...0xBF => true,
- else => false,
- },
- };
-
- if (!valid) {
- var len = byte_index;
- // Only include the byte in the invalid sequence if it's in the range
- // of a continuation byte. All other values should not be included in the
- // invalid sequence.
- //
- // Note: This is how the Windows RC compiler handles this, this may not
- // be the correct-as-according-to-the-Unicode-standard way to do it.
- if (isContinuationByte(byte)) len += 1;
- return .{ .value = Codepoint.invalid, .byte_len = len };
- }
-
- value <<= 6;
- value |= byte & 0b00111111;
- }
- if (byte_index != expected_len) {
- return .{ .value = Codepoint.invalid, .byte_len = byte_index };
- }
- return .{ .value = value, .byte_len = expected_len };
- }
- };
-};
-
-test "Utf8.WellFormedDecoder" {
- const invalid_utf8 = "\xF0\x80";
- const decoded = Utf8.WellFormedDecoder.decode(invalid_utf8);
- try std.testing.expectEqual(Codepoint.invalid, decoded.value);
- try std.testing.expectEqual(@as(usize, 2), decoded.byte_len);
-}
-
-test "codepointAt invalid utf8" {
- {
- const invalid_utf8 = "\xf0\xf0\x80\x80\x80";
- try std.testing.expectEqual(Codepoint{
- .value = Codepoint.invalid,
- .byte_len = 1,
- }, CodePage.utf8.codepointAt(0, invalid_utf8).?);
- try std.testing.expectEqual(Codepoint{
- .value = Codepoint.invalid,
- .byte_len = 2,
- }, CodePage.utf8.codepointAt(1, invalid_utf8).?);
- try std.testing.expectEqual(Codepoint{
- .value = Codepoint.invalid,
- .byte_len = 1,
- }, CodePage.utf8.codepointAt(3, invalid_utf8).?);
- try std.testing.expectEqual(Codepoint{
- .value = Codepoint.invalid,
- .byte_len = 1,
- }, CodePage.utf8.codepointAt(4, invalid_utf8).?);
- try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(5, invalid_utf8));
- }
-
- {
- const invalid_utf8 = "\xE1\xA0\xC0";
- try std.testing.expectEqual(Codepoint{
- .value = Codepoint.invalid,
- .byte_len = 2,
- }, CodePage.utf8.codepointAt(0, invalid_utf8).?);
- try std.testing.expectEqual(Codepoint{
- .value = Codepoint.invalid,
- .byte_len = 1,
- }, CodePage.utf8.codepointAt(2, invalid_utf8).?);
- try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(3, invalid_utf8));
- }
-
- {
- const invalid_utf8 = "\xD2";
- try std.testing.expectEqual(Codepoint{
- .value = Codepoint.invalid,
- .byte_len = 1,
- }, CodePage.utf8.codepointAt(0, invalid_utf8).?);
- try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(1, invalid_utf8));
- }
-
- {
- const invalid_utf8 = "\xE1\xA0";
- try std.testing.expectEqual(Codepoint{
- .value = Codepoint.invalid,
- .byte_len = 2,
- }, CodePage.utf8.codepointAt(0, invalid_utf8).?);
- try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, invalid_utf8));
- }
-
- {
- const invalid_utf8 = "\xC5\xFF";
- try std.testing.expectEqual(Codepoint{
- .value = Codepoint.invalid,
- .byte_len = 1,
- }, CodePage.utf8.codepointAt(0, invalid_utf8).?);
- try std.testing.expectEqual(Codepoint{
- .value = Codepoint.invalid,
- .byte_len = 1,
- }, CodePage.utf8.codepointAt(1, invalid_utf8).?);
- try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, invalid_utf8));
- }
-}
-
-test "codepointAt utf8 encoded" {
- const utf8_encoded = "²";
-
- // with code page utf8
- try std.testing.expectEqual(Codepoint{
- .value = '²',
- .byte_len = 2,
- }, CodePage.utf8.codepointAt(0, utf8_encoded).?);
- try std.testing.expectEqual(@as(?Codepoint, null), CodePage.utf8.codepointAt(2, utf8_encoded));
-
- // with code page windows1252
- try std.testing.expectEqual(Codepoint{
- .value = '\xC2',
- .byte_len = 1,
- }, CodePage.windows1252.codepointAt(0, utf8_encoded).?);
- try std.testing.expectEqual(Codepoint{
- .value = '\xB2',
- .byte_len = 1,
- }, CodePage.windows1252.codepointAt(1, utf8_encoded).?);
- try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, utf8_encoded));
-}
-
-test "codepointAt windows1252 encoded" {
- const windows1252_encoded = "\xB2";
-
- // with code page utf8
- try std.testing.expectEqual(Codepoint{
- .value = Codepoint.invalid,
- .byte_len = 1,
- }, CodePage.utf8.codepointAt(0, windows1252_encoded).?);
- try std.testing.expectEqual(@as(?Codepoint, null), CodePage.utf8.codepointAt(2, windows1252_encoded));
-
- // with code page windows1252
- try std.testing.expectEqual(Codepoint{
- .value = '\xB2',
- .byte_len = 1,
- }, CodePage.windows1252.codepointAt(0, windows1252_encoded).?);
- try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(1, windows1252_encoded));
-}
-
-pub const Codepoint = struct {
- value: u21,
- byte_len: usize,
-
- pub const invalid: u21 = std.math.maxInt(u21);
-};
diff --git a/src/resinator/comments.zig b/src/resinator/comments.zig
@@ -1,340 +0,0 @@
-//! Expects to run after a C preprocessor step that preserves comments.
-//!
-//! `rc` has a peculiar quirk where something like `blah/**/blah` will be
-//! transformed into `blahblah` during parsing. However, `clang -E` will
-//! transform it into `blah blah`, so in order to match `rc`, we need
-//! to remove comments ourselves after the preprocessor runs.
-//! Note: Multiline comments that actually span more than one line do
-//! get translated to a space character by `rc`.
-//!
-//! Removing comments before lexing also allows the lexer to not have to
-//! deal with comments which would complicate its implementation (this is something
-//! of a tradeoff, as removing comments in a separate pass means that we'll
-//! need to iterate the source twice instead of once, but having to deal with
-//! comments when lexing would be a pain).
-
-const std = @import("std");
-const Allocator = std.mem.Allocator;
-const UncheckedSliceWriter = @import("utils.zig").UncheckedSliceWriter;
-const SourceMappings = @import("source_mapping.zig").SourceMappings;
-const LineHandler = @import("lex.zig").LineHandler;
-const formsLineEndingPair = @import("source_mapping.zig").formsLineEndingPair;
-
-/// `buf` must be at least as long as `source`
-/// In-place transformation is supported (i.e. `source` and `buf` can be the same slice)
-pub fn removeComments(source: []const u8, buf: []u8, source_mappings: ?*SourceMappings) []u8 {
- std.debug.assert(buf.len >= source.len);
- var result = UncheckedSliceWriter{ .slice = buf };
- const State = enum {
- start,
- forward_slash,
- line_comment,
- multiline_comment,
- multiline_comment_end,
- single_quoted,
- single_quoted_escape,
- double_quoted,
- double_quoted_escape,
- };
- var state: State = .start;
- var index: usize = 0;
- var pending_start: ?usize = null;
- var line_handler = LineHandler{ .buffer = source };
- while (index < source.len) : (index += 1) {
- const c = source[index];
- // TODO: Disallow \x1A, \x00, \x7F in comments. At least \x1A and \x00 can definitely
- // cause errors or parsing weirdness in the Win32 RC compiler. These are disallowed
- // in the lexer, but comments are stripped before getting to the lexer.
- switch (state) {
- .start => switch (c) {
- '/' => {
- state = .forward_slash;
- pending_start = index;
- },
- '\r', '\n' => {
- _ = line_handler.incrementLineNumber(index);
- result.write(c);
- },
- else => {
- switch (c) {
- '"' => state = .double_quoted,
- '\'' => state = .single_quoted,
- else => {},
- }
- result.write(c);
- },
- },
- .forward_slash => switch (c) {
- '/' => state = .line_comment,
- '*' => {
- state = .multiline_comment;
- },
- else => {
- _ = line_handler.maybeIncrementLineNumber(index);
- result.writeSlice(source[pending_start.? .. index + 1]);
- pending_start = null;
- state = .start;
- },
- },
- .line_comment => switch (c) {
- '\r', '\n' => {
- _ = line_handler.incrementLineNumber(index);
- result.write(c);
- state = .start;
- },
- else => {},
- },
- .multiline_comment => switch (c) {
- '\r' => handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings),
- '\n' => {
- _ = line_handler.incrementLineNumber(index);
- result.write(c);
- },
- '*' => state = .multiline_comment_end,
- else => {},
- },
- .multiline_comment_end => switch (c) {
- '\r' => {
- handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings);
- // We only want to treat this as a newline if it's part of a CRLF pair. If it's
- // not, then we still want to stay in .multiline_comment_end, so that e.g. `*<\r>/` still
- // functions as a `*/` comment ending. Kinda crazy, but that's how the Win32 implementation works.
- if (formsLineEndingPair(source, '\r', index + 1)) {
- state = .multiline_comment;
- }
- },
- '\n' => {
- _ = line_handler.incrementLineNumber(index);
- result.write(c);
- state = .multiline_comment;
- },
- '/' => {
- state = .start;
- },
- else => {
- state = .multiline_comment;
- },
- },
- .single_quoted => switch (c) {
- '\r', '\n' => {
- _ = line_handler.incrementLineNumber(index);
- state = .start;
- result.write(c);
- },
- '\\' => {
- state = .single_quoted_escape;
- result.write(c);
- },
- '\'' => {
- state = .start;
- result.write(c);
- },
- else => {
- result.write(c);
- },
- },
- .single_quoted_escape => switch (c) {
- '\r', '\n' => {
- _ = line_handler.incrementLineNumber(index);
- state = .start;
- result.write(c);
- },
- else => {
- state = .single_quoted;
- result.write(c);
- },
- },
- .double_quoted => switch (c) {
- '\r', '\n' => {
- _ = line_handler.incrementLineNumber(index);
- state = .start;
- result.write(c);
- },
- '\\' => {
- state = .double_quoted_escape;
- result.write(c);
- },
- '"' => {
- state = .start;
- result.write(c);
- },
- else => {
- result.write(c);
- },
- },
- .double_quoted_escape => switch (c) {
- '\r', '\n' => {
- _ = line_handler.incrementLineNumber(index);
- state = .start;
- result.write(c);
- },
- else => {
- state = .double_quoted;
- result.write(c);
- },
- },
- }
- }
- return result.getWritten();
-}
-
-inline fn handleMultilineCarriageReturn(
- source: []const u8,
- line_handler: *LineHandler,
- index: usize,
- result: *UncheckedSliceWriter,
- source_mappings: ?*SourceMappings,
-) void {
- // Note: Bare \r within a multiline comment should *not* be treated as a line ending for the
- // purposes of removing comments, but *should* be treated as a line ending for the
- // purposes of line counting/source mapping
- _ = line_handler.incrementLineNumber(index);
- // So only write the \r if it's part of a CRLF pair
- if (formsLineEndingPair(source, '\r', index + 1)) {
- result.write('\r');
- }
- // And otherwise, we want to collapse the source mapping so that we can still know which
- // line came from where.
- else {
- // Because the line gets collapsed, we need to decrement line number so that
- // the next collapse acts on the first of the collapsed line numbers
- line_handler.line_number -= 1;
- if (source_mappings) |mappings| {
- mappings.collapse(line_handler.line_number, 1);
- }
- }
-}
-
-pub fn removeCommentsAlloc(allocator: Allocator, source: []const u8, source_mappings: ?*SourceMappings) ![]u8 {
- const buf = try allocator.alloc(u8, source.len);
- errdefer allocator.free(buf);
- const result = removeComments(source, buf, source_mappings);
- return allocator.realloc(buf, result.len);
-}
-
-fn testRemoveComments(expected: []const u8, source: []const u8) !void {
- const result = try removeCommentsAlloc(std.testing.allocator, source, null);
- defer std.testing.allocator.free(result);
-
- try std.testing.expectEqualStrings(expected, result);
-}
-
-test "basic" {
- try testRemoveComments("", "// comment");
- try testRemoveComments("", "/* comment */");
-}
-
-test "mixed" {
- try testRemoveComments("hello", "hello// comment");
- try testRemoveComments("hello", "hel/* comment */lo");
-}
-
-test "within a string" {
- // escaped " is \"
- try testRemoveComments(
- \\blah"//som\"/*ething*/"BLAH
- ,
- \\blah"//som\"/*ething*/"BLAH
- );
-}
-
-test "line comments retain newlines" {
- try testRemoveComments(
- \\
- \\
- \\
- ,
- \\// comment
- \\// comment
- \\// comment
- );
-
- try testRemoveComments("\r\n", "//comment\r\n");
-}
-
-test "crazy" {
- try testRemoveComments(
- \\blah"/*som*/\""BLAH
- ,
- \\blah"/*som*/\""/*ething*/BLAH
- );
-
- try testRemoveComments(
- \\blah"/*som*/"BLAH RCDATA "BEGIN END
- \\
- \\
- \\hello
- \\"
- ,
- \\blah"/*som*/"/*ething*/BLAH RCDATA "BEGIN END
- \\// comment
- \\//"blah blah" RCDATA {}
- \\hello
- \\"
- );
-}
-
-test "multiline comment with newlines" {
- // bare \r is not treated as a newline
- try testRemoveComments("blahblah", "blah/*some\rthing*/blah");
-
- try testRemoveComments(
- \\blah
- \\blah
- ,
- \\blah/*some
- \\thing*/blah
- );
- try testRemoveComments(
- "blah\r\nblah",
- "blah/*some\r\nthing*/blah",
- );
-
- // handle *<not /> correctly
- try testRemoveComments(
- \\blah
- \\
- \\
- ,
- \\blah/*some
- \\thing*
- \\/bl*ah*/
- );
-}
-
-test "comments appended to a line" {
- try testRemoveComments(
- \\blah
- \\blah
- ,
- \\blah // line comment
- \\blah
- );
- try testRemoveComments(
- "blah \r\nblah",
- "blah // line comment\r\nblah",
- );
-}
-
-test "remove comments with mappings" {
- const allocator = std.testing.allocator;
- var mut_source = "blah/*\rcommented line*\r/blah".*;
- var mappings = SourceMappings{};
- _ = try mappings.files.put(allocator, "test.rc");
- try mappings.set(allocator, 1, .{ .start_line = 1, .end_line = 1, .filename_offset = 0 });
- try mappings.set(allocator, 2, .{ .start_line = 2, .end_line = 2, .filename_offset = 0 });
- try mappings.set(allocator, 3, .{ .start_line = 3, .end_line = 3, .filename_offset = 0 });
- defer mappings.deinit(allocator);
-
- const result = removeComments(&mut_source, &mut_source, &mappings);
-
- try std.testing.expectEqualStrings("blahblah", result);
- try std.testing.expectEqual(@as(usize, 1), mappings.mapping.items.len);
- try std.testing.expectEqual(@as(usize, 3), mappings.mapping.items[0].end_line);
-}
-
-test "in place" {
- var mut_source = "blah /* comment */ blah".*;
- const result = removeComments(&mut_source, &mut_source, null);
- try std.testing.expectEqualStrings("blah blah", result);
-}
diff --git a/src/resinator/compile.zig b/src/resinator/compile.zig
@@ -1,3378 +0,0 @@
-const std = @import("std");
-const builtin = @import("builtin");
-const Allocator = std.mem.Allocator;
-const Node = @import("ast.zig").Node;
-const lex = @import("lex.zig");
-const Parser = @import("parse.zig").Parser;
-const Resource = @import("rc.zig").Resource;
-const Token = @import("lex.zig").Token;
-const literals = @import("literals.zig");
-const Number = literals.Number;
-const SourceBytes = literals.SourceBytes;
-const Diagnostics = @import("errors.zig").Diagnostics;
-const ErrorDetails = @import("errors.zig").ErrorDetails;
-const MemoryFlags = @import("res.zig").MemoryFlags;
-const rc = @import("rc.zig");
-const res = @import("res.zig");
-const ico = @import("ico.zig");
-const ani = @import("ani.zig");
-const bmp = @import("bmp.zig");
-const WORD = std.os.windows.WORD;
-const DWORD = std.os.windows.DWORD;
-const utils = @import("utils.zig");
-const NameOrOrdinal = res.NameOrOrdinal;
-const CodePage = @import("code_pages.zig").CodePage;
-const CodePageLookup = @import("ast.zig").CodePageLookup;
-const SourceMappings = @import("source_mapping.zig").SourceMappings;
-const windows1252 = @import("windows1252.zig");
-const lang = @import("lang.zig");
-const code_pages = @import("code_pages.zig");
-const errors = @import("errors.zig");
-const native_endian = builtin.cpu.arch.endian();
-
-pub const CompileOptions = struct {
- cwd: std.fs.Dir,
- diagnostics: *Diagnostics,
- source_mappings: ?*SourceMappings = null,
- /// List of paths (absolute or relative to `cwd`) for every file that the resources within the .rc file depend on.
- /// Items within the list will be allocated using the allocator of the ArrayList and must be
- /// freed by the caller.
- /// TODO: Maybe a dedicated struct for this purpose so that it's a bit nicer to work with.
- dependencies_list: ?*std.ArrayList([]const u8) = null,
- default_code_page: CodePage = .windows1252,
- ignore_include_env_var: bool = false,
- extra_include_paths: []const []const u8 = &.{},
- /// This is just an API convenience to allow separately passing 'system' (i.e. those
- /// that would normally be gotten from the INCLUDE env var) include paths. This is mostly
- /// intended for use when setting `ignore_include_env_var = true`. When `ignore_include_env_var`
- /// is false, `system_include_paths` will be searched before the paths in the INCLUDE env var.
- system_include_paths: []const []const u8 = &.{},
- default_language_id: ?u16 = null,
- // TODO: Implement verbose output
- verbose: bool = false,
- null_terminate_string_table_strings: bool = false,
- /// Note: This is a u15 to ensure that the maximum number of UTF-16 code units
- /// plus a null-terminator can always fit into a u16.
- max_string_literal_codepoints: u15 = lex.default_max_string_literal_codepoints,
- silent_duplicate_control_ids: bool = false,
- warn_instead_of_error_on_invalid_code_page: bool = false,
-};
-
-pub fn compile(allocator: Allocator, source: []const u8, writer: anytype, options: CompileOptions) !void {
- var lexer = lex.Lexer.init(source, .{
- .default_code_page = options.default_code_page,
- .source_mappings = options.source_mappings,
- .max_string_literal_codepoints = options.max_string_literal_codepoints,
- });
- var parser = Parser.init(&lexer, .{
- .warn_instead_of_error_on_invalid_code_page = options.warn_instead_of_error_on_invalid_code_page,
- });
- var tree = try parser.parse(allocator, options.diagnostics);
- defer tree.deinit();
-
- var search_dirs = std.ArrayList(SearchDir).init(allocator);
- defer {
- for (search_dirs.items) |*search_dir| {
- search_dir.deinit(allocator);
- }
- search_dirs.deinit();
- }
-
- if (options.source_mappings) |source_mappings| {
- const root_path = source_mappings.files.get(source_mappings.root_filename_offset);
- // If dirname returns null, then the root path will be the same as
- // the cwd so we don't need to add it as a distinct search path.
- if (std.fs.path.dirname(root_path)) |root_dir_path| {
- var root_dir = try options.cwd.openDir(root_dir_path, .{});
- errdefer root_dir.close();
- try search_dirs.append(.{ .dir = root_dir, .path = try allocator.dupe(u8, root_dir_path) });
- }
- }
- // Re-open the passed in cwd since we want to be able to close it (std.fs.cwd() shouldn't be closed)
- const cwd_dir = options.cwd.openDir(".", .{}) catch |err| {
- try options.diagnostics.append(.{
- .err = .failed_to_open_cwd,
- .token = .{
- .id = .invalid,
- .start = 0,
- .end = 0,
- .line_number = 1,
- },
- .print_source_line = false,
- .extra = .{ .file_open_error = .{
- .err = ErrorDetails.FileOpenError.enumFromError(err),
- .filename_string_index = undefined,
- } },
- });
- return error.CompileError;
- };
- try search_dirs.append(.{ .dir = cwd_dir, .path = null });
- for (options.extra_include_paths) |extra_include_path| {
- var dir = openSearchPathDir(options.cwd, extra_include_path) catch {
- // TODO: maybe a warning that the search path is skipped?
- continue;
- };
- errdefer dir.close();
- try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, extra_include_path) });
- }
- for (options.system_include_paths) |system_include_path| {
- var dir = openSearchPathDir(options.cwd, system_include_path) catch {
- // TODO: maybe a warning that the search path is skipped?
- continue;
- };
- errdefer dir.close();
- try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, system_include_path) });
- }
- if (!options.ignore_include_env_var) {
- const INCLUDE = std.process.getEnvVarOwned(allocator, "INCLUDE") catch "";
- defer allocator.free(INCLUDE);
-
- // The only precedence here is llvm-rc which also uses the platform-specific
- // delimiter. There's no precedence set by `rc.exe` since it's Windows-only.
- const delimiter = switch (builtin.os.tag) {
- .windows => ';',
- else => ':',
- };
- var it = std.mem.tokenizeScalar(u8, INCLUDE, delimiter);
- while (it.next()) |search_path| {
- var dir = openSearchPathDir(options.cwd, search_path) catch continue;
- errdefer dir.close();
- try search_dirs.append(.{ .dir = dir, .path = try allocator.dupe(u8, search_path) });
- }
- }
-
- var arena_allocator = std.heap.ArenaAllocator.init(allocator);
- defer arena_allocator.deinit();
- const arena = arena_allocator.allocator();
-
- var compiler = Compiler{
- .source = source,
- .arena = arena,
- .allocator = allocator,
- .cwd = options.cwd,
- .diagnostics = options.diagnostics,
- .dependencies_list = options.dependencies_list,
- .input_code_pages = &tree.input_code_pages,
- .output_code_pages = &tree.output_code_pages,
- // This is only safe because we know search_dirs won't be modified past this point
- .search_dirs = search_dirs.items,
- .null_terminate_string_table_strings = options.null_terminate_string_table_strings,
- .silent_duplicate_control_ids = options.silent_duplicate_control_ids,
- };
- if (options.default_language_id) |default_language_id| {
- compiler.state.language = res.Language.fromInt(default_language_id);
- }
-
- try compiler.writeRoot(tree.root(), writer);
-}
-
-pub const Compiler = struct {
- source: []const u8,
- arena: Allocator,
- allocator: Allocator,
- cwd: std.fs.Dir,
- state: State = .{},
- diagnostics: *Diagnostics,
- dependencies_list: ?*std.ArrayList([]const u8),
- input_code_pages: *const CodePageLookup,
- output_code_pages: *const CodePageLookup,
- search_dirs: []SearchDir,
- null_terminate_string_table_strings: bool,
- silent_duplicate_control_ids: bool,
-
- pub const State = struct {
- icon_id: u16 = 1,
- string_tables: StringTablesByLanguage = .{},
- language: res.Language = .{},
- font_dir: FontDir = .{},
- version: u32 = 0,
- characteristics: u32 = 0,
- };
-
- pub fn writeRoot(self: *Compiler, root: *Node.Root, writer: anytype) !void {
- try writeEmptyResource(writer);
- for (root.body) |node| {
- try self.writeNode(node, writer);
- }
-
- // now write the FONTDIR (if it has anything in it)
- try self.state.font_dir.writeResData(self, writer);
- if (self.state.font_dir.fonts.items.len != 0) {
- // The Win32 RC compiler may write a different FONTDIR resource than us,
- // due to it sometimes writing a non-zero-length device name/face name
- // whereas we *always* write them both as zero-length.
- //
- // In practical terms, this doesn't matter, since for various reasons the format
- // of the FONTDIR cannot be relied on and is seemingly not actually used by anything
- // anymore. We still want to emit some sort of diagnostic for the purposes of being able
- // to know that our .RES is intentionally not meant to be byte-for-byte identical with
- // the rc.exe output.
- //
- // By using the hint type here, we allow this diagnostic to be detected in code,
- // but it will not be printed since the end-user doesn't need to care.
- try self.addErrorDetails(.{
- .err = .result_contains_fontdir,
- .type = .hint,
- .token = undefined,
- });
- }
- // once we've written every else out, we can write out the finalized STRINGTABLE resources
- var string_tables_it = self.state.string_tables.tables.iterator();
- while (string_tables_it.next()) |string_table_entry| {
- var string_table_it = string_table_entry.value_ptr.blocks.iterator();
- while (string_table_it.next()) |entry| {
- try entry.value_ptr.writeResData(self, string_table_entry.key_ptr.*, entry.key_ptr.*, writer);
- }
- }
- }
-
- pub fn writeNode(self: *Compiler, node: *Node, writer: anytype) !void {
- switch (node.id) {
- .root => unreachable, // writeRoot should be called directly instead
- .resource_external => try self.writeResourceExternal(@fieldParentPtr(Node.ResourceExternal, "base", node), writer),
- .resource_raw_data => try self.writeResourceRawData(@fieldParentPtr(Node.ResourceRawData, "base", node), writer),
- .literal => unreachable, // this is context dependent and should be handled by its parent
- .binary_expression => unreachable,
- .grouped_expression => unreachable,
- .not_expression => unreachable,
- .invalid => {}, // no-op, currently only used for dangling literals at EOF
- .accelerators => try self.writeAccelerators(@fieldParentPtr(Node.Accelerators, "base", node), writer),
- .accelerator => unreachable, // handled by writeAccelerators
- .dialog => try self.writeDialog(@fieldParentPtr(Node.Dialog, "base", node), writer),
- .control_statement => unreachable,
- .toolbar => try self.writeToolbar(@fieldParentPtr(Node.Toolbar, "base", node), writer),
- .menu => try self.writeMenu(@fieldParentPtr(Node.Menu, "base", node), writer),
- .menu_item => unreachable,
- .menu_item_separator => unreachable,
- .menu_item_ex => unreachable,
- .popup => unreachable,
- .popup_ex => unreachable,
- .version_info => try self.writeVersionInfo(@fieldParentPtr(Node.VersionInfo, "base", node), writer),
- .version_statement => unreachable,
- .block => unreachable,
- .block_value => unreachable,
- .block_value_value => unreachable,
- .string_table => try self.writeStringTable(@fieldParentPtr(Node.StringTable, "base", node)),
- .string_table_string => unreachable, // handled by writeStringTable
- .language_statement => self.writeLanguageStatement(@fieldParentPtr(Node.LanguageStatement, "base", node)),
- .font_statement => unreachable,
- .simple_statement => self.writeTopLevelSimpleStatement(@fieldParentPtr(Node.SimpleStatement, "base", node)),
- }
- }
-
- /// Returns the filename encoded as UTF-8 (allocated by self.allocator)
- pub fn evaluateFilenameExpression(self: *Compiler, expression_node: *Node) ![]u8 {
- switch (expression_node.id) {
- .literal => {
- const literal_node = expression_node.cast(.literal).?;
- switch (literal_node.token.id) {
- .literal, .number => {
- const slice = literal_node.token.slice(self.source);
- const code_page = self.input_code_pages.getForToken(literal_node.token);
- var buf = try std.ArrayList(u8).initCapacity(self.allocator, slice.len);
- errdefer buf.deinit();
-
- var index: usize = 0;
- while (code_page.codepointAt(index, slice)) |codepoint| : (index += codepoint.byte_len) {
- const c = codepoint.value;
- if (c == code_pages.Codepoint.invalid) {
- try buf.appendSlice("�");
- } else {
- // Anything that is not returned as an invalid codepoint must be encodable as UTF-8.
- const utf8_len = std.unicode.utf8CodepointSequenceLength(c) catch unreachable;
- try buf.ensureUnusedCapacity(utf8_len);
- _ = std.unicode.utf8Encode(c, buf.unusedCapacitySlice()) catch unreachable;
- buf.items.len += utf8_len;
- }
- }
-
- return buf.toOwnedSlice();
- },
- .quoted_ascii_string, .quoted_wide_string => {
- const slice = literal_node.token.slice(self.source);
- const column = literal_node.token.calculateColumn(self.source, 8, null);
- const bytes = SourceBytes{ .slice = slice, .code_page = self.input_code_pages.getForToken(literal_node.token) };
-
- var buf = std.ArrayList(u8).init(self.allocator);
- errdefer buf.deinit();
-
- // Filenames are sort-of parsed as if they were wide strings, but the max escape width of
- // hex/octal escapes is still determined by the L prefix. Since we want to end up with
- // UTF-8, we can parse either string type directly to UTF-8.
- var parser = literals.IterativeStringParser.init(bytes, .{
- .start_column = column,
- .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal_node.token },
- });
-
- while (try parser.nextUnchecked()) |parsed| {
- const c = parsed.codepoint;
- if (c == code_pages.Codepoint.invalid) {
- try buf.appendSlice("�");
- } else {
- var codepoint_buf: [4]u8 = undefined;
- // If the codepoint cannot be encoded, we fall back to �
- if (std.unicode.utf8Encode(c, &codepoint_buf)) |len| {
- try buf.appendSlice(codepoint_buf[0..len]);
- } else |_| {
- try buf.appendSlice("�");
- }
- }
- }
-
- return buf.toOwnedSlice();
- },
- else => {
- std.debug.print("unexpected filename token type: {}\n", .{literal_node.token});
- unreachable; // no other token types should be in a filename literal node
- },
- }
- },
- .binary_expression => {
- const binary_expression_node = expression_node.cast(.binary_expression).?;
- return self.evaluateFilenameExpression(binary_expression_node.right);
- },
- .grouped_expression => {
- const grouped_expression_node = expression_node.cast(.grouped_expression).?;
- return self.evaluateFilenameExpression(grouped_expression_node.expression);
- },
- else => unreachable,
- }
- }
-
- /// https://learn.microsoft.com/en-us/windows/win32/menurc/searching-for-files
- ///
- /// Searches, in this order:
- /// Directory of the 'root' .rc file (if different from CWD)
- /// CWD
- /// extra_include_paths (resolved relative to CWD)
- /// system_include_paths (resolve relative to CWD)
- /// INCLUDE environment var paths (only if ignore_include_env_var is false; resolved relative to CWD)
- ///
- /// Note: The CWD being searched *in addition to* the directory of the 'root' .rc file
- /// is also how the Win32 RC compiler preprocessor searches for includes, but that
- /// differs from how the clang preprocessor searches for includes.
- ///
- /// Note: This will always return the first matching file that can be opened.
- /// This matches the Win32 RC compiler, which will fail with an error if the first
- /// matching file is invalid. That is, it does not do the `cmd` PATH searching
- /// thing of continuing to look for matching files until it finds a valid
- /// one if a matching file is invalid.
- fn searchForFile(self: *Compiler, path: []const u8) !std.fs.File {
- // If the path is absolute, then it is not resolved relative to any search
- // paths, so there's no point in checking them.
- //
- // This behavior was determined/confirmed with the following test:
- // - A `test.rc` file with the contents `1 RCDATA "/test.bin"`
- // - A `test.bin` file at `C:\test.bin`
- // - A `test.bin` file at `inc\test.bin` relative to the .rc file
- // - Invoking `rc` with `rc /i inc test.rc`
- //
- // This results in a .res file with the contents of `C:\test.bin`, not
- // the contents of `inc\test.bin`. Further, if `C:\test.bin` is deleted,
- // then it start failing to find `/test.bin`, meaning that it does not resolve
- // `/test.bin` relative to include paths and instead only treats it as
- // an absolute path.
- if (std.fs.path.isAbsolute(path)) {
- const file = try utils.openFileNotDir(std.fs.cwd(), path, .{});
- errdefer file.close();
-
- if (self.dependencies_list) |dependencies_list| {
- const duped_path = try dependencies_list.allocator.dupe(u8, path);
- errdefer dependencies_list.allocator.free(duped_path);
- try dependencies_list.append(duped_path);
- }
- }
-
- var first_error: ?std.fs.File.OpenError = null;
- for (self.search_dirs) |search_dir| {
- if (utils.openFileNotDir(search_dir.dir, path, .{})) |file| {
- errdefer file.close();
-
- if (self.dependencies_list) |dependencies_list| {
- const searched_file_path = try std.fs.path.join(dependencies_list.allocator, &.{
- search_dir.path orelse "", path,
- });
- errdefer dependencies_list.allocator.free(searched_file_path);
- try dependencies_list.append(searched_file_path);
- }
-
- return file;
- } else |err| if (first_error == null) {
- first_error = err;
- }
- }
- return first_error orelse error.FileNotFound;
- }
-
- pub fn writeResourceExternal(self: *Compiler, node: *Node.ResourceExternal, writer: anytype) !void {
- // Init header with data size zero for now, will need to fill it in later
- var header = try self.resourceHeader(node.id, node.type, .{});
- defer header.deinit(self.allocator);
-
- const maybe_predefined_type = header.predefinedResourceType();
-
- // DLGINCLUDE has special handling that doesn't actually need the file to exist
- if (maybe_predefined_type != null and maybe_predefined_type.? == .DLGINCLUDE) {
- const filename_token = node.filename.cast(.literal).?.token;
- const parsed_filename = try self.parseQuotedStringAsAsciiString(filename_token);
- defer self.allocator.free(parsed_filename);
-
- header.applyMemoryFlags(node.common_resource_attributes, self.source);
- header.data_size = @intCast(parsed_filename.len + 1);
- try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
- try writer.writeAll(parsed_filename);
- try writer.writeByte(0);
- try writeDataPadding(writer, header.data_size);
- return;
- }
-
- const filename_utf8 = try self.evaluateFilenameExpression(node.filename);
- defer self.allocator.free(filename_utf8);
-
- // TODO: More robust checking of the validity of the filename.
- // This currently only checks for NUL bytes, but it should probably also check for
- // platform-specific invalid characters like '*', '?', '"', '<', '>', '|' (Windows)
- // Related: https://github.com/ziglang/zig/pull/14533#issuecomment-1416888193
- if (std.mem.indexOfScalar(u8, filename_utf8, 0) != null) {
- return self.addErrorDetailsAndFail(.{
- .err = .invalid_filename,
- .token = node.filename.getFirstToken(),
- .token_span_end = node.filename.getLastToken(),
- .extra = .{ .number = 0 },
- });
- }
-
- // Allow plain number literals, but complex number expressions are evaluated strangely
- // and almost certainly lead to things not intended by the user (e.g. '(1+-1)' evaluates
- // to the filename '-1'), so error if the filename node is a grouped/binary expression.
- // Note: This is done here instead of during parsing so that we can easily include
- // the evaluated filename as part of the error messages.
- if (node.filename.id != .literal) {
- const filename_string_index = try self.diagnostics.putString(filename_utf8);
- try self.addErrorDetails(.{
- .err = .number_expression_as_filename,
- .token = node.filename.getFirstToken(),
- .token_span_end = node.filename.getLastToken(),
- .extra = .{ .number = filename_string_index },
- });
- return self.addErrorDetailsAndFail(.{
- .err = .number_expression_as_filename,
- .type = .note,
- .token = node.filename.getFirstToken(),
- .token_span_end = node.filename.getLastToken(),
- .print_source_line = false,
- .extra = .{ .number = filename_string_index },
- });
- }
- // From here on out, we know that the filename must be comprised of a single token,
- // so get it here to simplify future usage.
- const filename_token = node.filename.getFirstToken();
-
- const file = self.searchForFile(filename_utf8) catch |err| switch (err) {
- error.OutOfMemory => |e| return e,
- else => |e| {
- const filename_string_index = try self.diagnostics.putString(filename_utf8);
- return self.addErrorDetailsAndFail(.{
- .err = .file_open_error,
- .token = filename_token,
- .extra = .{ .file_open_error = .{
- .err = ErrorDetails.FileOpenError.enumFromError(e),
- .filename_string_index = filename_string_index,
- } },
- });
- },
- };
- defer file.close();
-
- if (maybe_predefined_type) |predefined_type| {
- switch (predefined_type) {
- .GROUP_ICON, .GROUP_CURSOR => {
- // Check for animated icon first
- if (ani.isAnimatedIcon(file.reader())) {
- // Animated icons are just put into the resource unmodified,
- // and the resource type changes to ANIICON/ANICURSOR
-
- const new_predefined_type: res.RT = switch (predefined_type) {
- .GROUP_ICON => .ANIICON,
- .GROUP_CURSOR => .ANICURSOR,
- else => unreachable,
- };
- header.type_value.ordinal = @intFromEnum(new_predefined_type);
- header.memory_flags = MemoryFlags.defaults(new_predefined_type);
- header.applyMemoryFlags(node.common_resource_attributes, self.source);
- header.data_size = @intCast(try file.getEndPos());
-
- try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
- try file.seekTo(0);
- try writeResourceData(writer, file.reader(), header.data_size);
- return;
- }
-
- // isAnimatedIcon moved the file cursor so reset to the start
- try file.seekTo(0);
-
- const icon_dir = ico.read(self.allocator, file.reader(), try file.getEndPos()) catch |err| switch (err) {
- error.OutOfMemory => |e| return e,
- else => |e| {
- return self.iconReadError(
- e,
- filename_utf8,
- filename_token,
- predefined_type,
- );
- },
- };
- defer icon_dir.deinit();
-
- // This limit is inherent to the ico format since number of entries is a u16 field.
- std.debug.assert(icon_dir.entries.len <= std.math.maxInt(u16));
-
- // Note: The Win32 RC compiler will compile the resource as whatever type is
- // in the icon_dir regardless of the type of resource specified in the .rc.
- // This leads to unusable .res files when the types mismatch, so
- // we error instead.
- const res_types_match = switch (predefined_type) {
- .GROUP_ICON => icon_dir.image_type == .icon,
- .GROUP_CURSOR => icon_dir.image_type == .cursor,
- else => unreachable,
- };
- if (!res_types_match) {
- return self.addErrorDetailsAndFail(.{
- .err = .icon_dir_and_resource_type_mismatch,
- .token = filename_token,
- .extra = .{ .resource = switch (predefined_type) {
- .GROUP_ICON => .icon,
- .GROUP_CURSOR => .cursor,
- else => unreachable,
- } },
- });
- }
-
- // Memory flags affect the RT_ICON and the RT_GROUP_ICON differently
- var icon_memory_flags = MemoryFlags.defaults(res.RT.ICON);
- applyToMemoryFlags(&icon_memory_flags, node.common_resource_attributes, self.source);
- applyToGroupMemoryFlags(&header.memory_flags, node.common_resource_attributes, self.source);
-
- const first_icon_id = self.state.icon_id;
- const entry_type = if (predefined_type == .GROUP_ICON) @intFromEnum(res.RT.ICON) else @intFromEnum(res.RT.CURSOR);
- for (icon_dir.entries, 0..) |*entry, entry_i_usize| {
- // We know that the entry index must fit within a u16, so
- // cast it here to simplify usage sites.
- const entry_i: u16 = @intCast(entry_i_usize);
- var full_data_size = entry.data_size_in_bytes;
- if (icon_dir.image_type == .cursor) {
- full_data_size = std.math.add(u32, full_data_size, 4) catch {
- return self.addErrorDetailsAndFail(.{
- .err = .resource_data_size_exceeds_max,
- .token = node.id,
- });
- };
- }
-
- const image_header = ResourceHeader{
- .type_value = .{ .ordinal = entry_type },
- .name_value = .{ .ordinal = self.state.icon_id },
- .data_size = full_data_size,
- .memory_flags = icon_memory_flags,
- .language = self.state.language,
- .version = self.state.version,
- .characteristics = self.state.characteristics,
- };
- try image_header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
-
- // From https://learn.microsoft.com/en-us/windows/win32/menurc/localheader:
- // > The LOCALHEADER structure is the first data written to the RT_CURSOR
- // > resource if a RESDIR structure contains information about a cursor.
- // where LOCALHEADER is `struct { WORD xHotSpot; WORD yHotSpot; }`
- if (icon_dir.image_type == .cursor) {
- try writer.writeInt(u16, entry.type_specific_data.cursor.hotspot_x, .little);
- try writer.writeInt(u16, entry.type_specific_data.cursor.hotspot_y, .little);
- }
-
- try file.seekTo(entry.data_offset_from_start_of_file);
- var header_bytes = file.reader().readBytesNoEof(16) catch {
- return self.iconReadError(
- error.UnexpectedEOF,
- filename_utf8,
- filename_token,
- predefined_type,
- );
- };
-
- const image_format = ico.ImageFormat.detect(&header_bytes);
- if (!image_format.validate(&header_bytes)) {
- return self.iconReadError(
- error.InvalidHeader,
- filename_utf8,
- filename_token,
- predefined_type,
- );
- }
- switch (image_format) {
- .riff => switch (icon_dir.image_type) {
- .icon => {
- // The Win32 RC compiler treats this as an error, but icon dirs
- // with RIFF encoded icons within them work ~okay (they work
- // in some places but not others, they may not animate, etc) if they are
- // allowed to be compiled.
- try self.addErrorDetails(.{
- .err = .rc_would_error_on_icon_dir,
- .type = .warning,
- .token = filename_token,
- .extra = .{ .icon_dir = .{ .icon_type = .icon, .icon_format = .riff, .index = entry_i } },
- });
- try self.addErrorDetails(.{
- .err = .rc_would_error_on_icon_dir,
- .type = .note,
- .print_source_line = false,
- .token = filename_token,
- .extra = .{ .icon_dir = .{ .icon_type = .icon, .icon_format = .riff, .index = entry_i } },
- });
- },
- .cursor => {
- // The Win32 RC compiler errors in this case too, but we only error
- // here because the cursor would fail to be loaded at runtime if we
- // compiled it.
- return self.addErrorDetailsAndFail(.{
- .err = .format_not_supported_in_icon_dir,
- .token = filename_token,
- .extra = .{ .icon_dir = .{ .icon_type = .cursor, .icon_format = .riff, .index = entry_i } },
- });
- },
- },
- .png => switch (icon_dir.image_type) {
- .icon => {
- // PNG always seems to have 1 for color planes no matter what
- entry.type_specific_data.icon.color_planes = 1;
- // These seem to be the only values of num_colors that
- // get treated specially
- entry.type_specific_data.icon.bits_per_pixel = switch (entry.num_colors) {
- 2 => 1,
- 8 => 3,
- 16 => 4,
- else => entry.type_specific_data.icon.bits_per_pixel,
- };
- },
- .cursor => {
- // The Win32 RC compiler treats this as an error, but cursor dirs
- // with PNG encoded icons within them work fine if they are
- // allowed to be compiled.
- try self.addErrorDetails(.{
- .err = .rc_would_error_on_icon_dir,
- .type = .warning,
- .token = filename_token,
- .extra = .{ .icon_dir = .{ .icon_type = .cursor, .icon_format = .png, .index = entry_i } },
- });
- },
- },
- .dib => {
- const bitmap_header: *ico.BitmapHeader = @ptrCast(@alignCast(&header_bytes));
- if (native_endian == .big) {
- std.mem.byteSwapAllFields(ico.BitmapHeader, bitmap_header);
- }
- const bitmap_version = ico.BitmapHeader.Version.get(bitmap_header.bcSize);
-
- // The Win32 RC compiler only allows headers with
- // `bcSize == sizeof(BITMAPINFOHEADER)`, but it seems unlikely
- // that there's a good reason for that outside of too-old
- // bitmap headers.
- // TODO: Need to test V4 and V5 bitmaps to check they actually work
- if (bitmap_version == .@"win2.0") {
- return self.addErrorDetailsAndFail(.{
- .err = .rc_would_error_on_bitmap_version,
- .token = filename_token,
- .extra = .{ .icon_dir = .{
- .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor,
- .icon_format = image_format,
- .index = entry_i,
- .bitmap_version = bitmap_version,
- } },
- });
- } else if (bitmap_version != .@"nt3.1") {
- try self.addErrorDetails(.{
- .err = .rc_would_error_on_bitmap_version,
- .type = .warning,
- .token = filename_token,
- .extra = .{ .icon_dir = .{
- .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor,
- .icon_format = image_format,
- .index = entry_i,
- .bitmap_version = bitmap_version,
- } },
- });
- }
-
- switch (icon_dir.image_type) {
- .icon => {
- // The values in the icon's BITMAPINFOHEADER always take precedence over
- // the values in the IconDir, but not in the LOCALHEADER (see above).
- entry.type_specific_data.icon.color_planes = bitmap_header.bcPlanes;
- entry.type_specific_data.icon.bits_per_pixel = bitmap_header.bcBitCount;
- },
- .cursor => {
- // Only cursors get the width/height from BITMAPINFOHEADER (icons don't)
- entry.width = @intCast(bitmap_header.bcWidth);
- entry.height = @intCast(bitmap_header.bcHeight);
- entry.type_specific_data.cursor.hotspot_x = bitmap_header.bcPlanes;
- entry.type_specific_data.cursor.hotspot_y = bitmap_header.bcBitCount;
- },
- }
- },
- }
-
- try file.seekTo(entry.data_offset_from_start_of_file);
- try writeResourceDataNoPadding(writer, file.reader(), entry.data_size_in_bytes);
- try writeDataPadding(writer, full_data_size);
-
- if (self.state.icon_id == std.math.maxInt(u16)) {
- try self.addErrorDetails(.{
- .err = .max_icon_ids_exhausted,
- .print_source_line = false,
- .token = filename_token,
- .extra = .{ .icon_dir = .{
- .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor,
- .icon_format = image_format,
- .index = entry_i,
- } },
- });
- return self.addErrorDetailsAndFail(.{
- .err = .max_icon_ids_exhausted,
- .type = .note,
- .token = filename_token,
- .extra = .{ .icon_dir = .{
- .icon_type = if (icon_dir.image_type == .icon) .icon else .cursor,
- .icon_format = image_format,
- .index = entry_i,
- } },
- });
- }
- self.state.icon_id += 1;
- }
-
- header.data_size = icon_dir.getResDataSize();
-
- try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
- try icon_dir.writeResData(writer, first_icon_id);
- try writeDataPadding(writer, header.data_size);
- return;
- },
- .RCDATA, .HTML, .MANIFEST, .MESSAGETABLE, .DLGINIT, .PLUGPLAY => {
- header.applyMemoryFlags(node.common_resource_attributes, self.source);
- },
- .BITMAP => {
- header.applyMemoryFlags(node.common_resource_attributes, self.source);
- const file_size = try file.getEndPos();
-
- const bitmap_info = bmp.read(file.reader(), file_size) catch |err| {
- const filename_string_index = try self.diagnostics.putString(filename_utf8);
- return self.addErrorDetailsAndFail(.{
- .err = .bmp_read_error,
- .token = filename_token,
- .extra = .{ .bmp_read_error = .{
- .err = ErrorDetails.BitmapReadError.enumFromError(err),
- .filename_string_index = filename_string_index,
- } },
- });
- };
-
- if (bitmap_info.getActualPaletteByteLen() > bitmap_info.getExpectedPaletteByteLen()) {
- const num_ignored_bytes = bitmap_info.getActualPaletteByteLen() - bitmap_info.getExpectedPaletteByteLen();
- var number_as_bytes: [8]u8 = undefined;
- std.mem.writeInt(u64, &number_as_bytes, num_ignored_bytes, native_endian);
- const value_string_index = try self.diagnostics.putString(&number_as_bytes);
- try self.addErrorDetails(.{
- .err = .bmp_ignored_palette_bytes,
- .type = .warning,
- .token = filename_token,
- .extra = .{ .number = value_string_index },
- });
- } else if (bitmap_info.getActualPaletteByteLen() < bitmap_info.getExpectedPaletteByteLen()) {
- const num_padding_bytes = bitmap_info.getExpectedPaletteByteLen() - bitmap_info.getActualPaletteByteLen();
-
- // TODO: Make this configurable (command line option)
- const max_missing_bytes = 4096;
- if (num_padding_bytes > max_missing_bytes) {
- var numbers_as_bytes: [16]u8 = undefined;
- std.mem.writeInt(u64, numbers_as_bytes[0..8], num_padding_bytes, native_endian);
- std.mem.writeInt(u64, numbers_as_bytes[8..16], max_missing_bytes, native_endian);
- const values_string_index = try self.diagnostics.putString(&numbers_as_bytes);
- try self.addErrorDetails(.{
- .err = .bmp_too_many_missing_palette_bytes,
- .token = filename_token,
- .extra = .{ .number = values_string_index },
- });
- return self.addErrorDetailsAndFail(.{
- .err = .bmp_too_many_missing_palette_bytes,
- .type = .note,
- .print_source_line = false,
- .token = filename_token,
- });
- }
-
- var number_as_bytes: [8]u8 = undefined;
- std.mem.writeInt(u64, &number_as_bytes, num_padding_bytes, native_endian);
- const value_string_index = try self.diagnostics.putString(&number_as_bytes);
- try self.addErrorDetails(.{
- .err = .bmp_missing_palette_bytes,
- .type = .warning,
- .token = filename_token,
- .extra = .{ .number = value_string_index },
- });
- const pixel_data_len = bitmap_info.getPixelDataLen(file_size);
- if (pixel_data_len > 0) {
- const miscompiled_bytes = @min(pixel_data_len, num_padding_bytes);
- std.mem.writeInt(u64, &number_as_bytes, miscompiled_bytes, native_endian);
- const miscompiled_bytes_string_index = try self.diagnostics.putString(&number_as_bytes);
- try self.addErrorDetails(.{
- .err = .rc_would_miscompile_bmp_palette_padding,
- .type = .warning,
- .token = filename_token,
- .extra = .{ .number = miscompiled_bytes_string_index },
- });
- }
- }
-
- // TODO: It might be possible that the calculation done in this function
- // could underflow if the underlying file is modified while reading
- // it, but need to think about it more to determine if that's a
- // real possibility
- const bmp_bytes_to_write: u32 = @intCast(bitmap_info.getExpectedByteLen(file_size));
-
- header.data_size = bmp_bytes_to_write;
- try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
- try file.seekTo(bmp.file_header_len);
- const file_reader = file.reader();
- try writeResourceDataNoPadding(writer, file_reader, bitmap_info.dib_header_size);
- if (bitmap_info.getBitmasksByteLen() > 0) {
- try writeResourceDataNoPadding(writer, file_reader, bitmap_info.getBitmasksByteLen());
- }
- if (bitmap_info.getExpectedPaletteByteLen() > 0) {
- try writeResourceDataNoPadding(writer, file_reader, @intCast(bitmap_info.getActualPaletteByteLen()));
- // We know that the number of missing palette bytes is <= 4096
- // (see `bmp_too_many_missing_palette_bytes` error case above)
- const padding_bytes: usize = @intCast(bitmap_info.getMissingPaletteByteLen());
- if (padding_bytes > 0) {
- try writer.writeByteNTimes(0, padding_bytes);
- }
- }
- try file.seekTo(bitmap_info.pixel_data_offset);
- const pixel_bytes: u32 = @intCast(file_size - bitmap_info.pixel_data_offset);
- try writeResourceDataNoPadding(writer, file_reader, pixel_bytes);
- try writeDataPadding(writer, bmp_bytes_to_write);
- return;
- },
- .FONT => {
- if (self.state.font_dir.ids.get(header.name_value.ordinal) != null) {
- // Add warning and skip this resource
- // Note: The Win32 compiler prints this as an error but it doesn't fail the compilation
- // and the duplicate resource is skipped.
- try self.addErrorDetails(ErrorDetails{
- .err = .font_id_already_defined,
- .token = node.id,
- .type = .warning,
- .extra = .{ .number = header.name_value.ordinal },
- });
- try self.addErrorDetails(ErrorDetails{
- .err = .font_id_already_defined,
- .token = self.state.font_dir.ids.get(header.name_value.ordinal).?,
- .type = .note,
- .extra = .{ .number = header.name_value.ordinal },
- });
- return;
- }
- header.applyMemoryFlags(node.common_resource_attributes, self.source);
- const file_size = try file.getEndPos();
- if (file_size > std.math.maxInt(u32)) {
- return self.addErrorDetailsAndFail(.{
- .err = .resource_data_size_exceeds_max,
- .token = node.id,
- });
- }
-
- // We now know that the data size will fit in a u32
- header.data_size = @intCast(file_size);
- try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
-
- var header_slurping_reader = headerSlurpingReader(148, file.reader());
- try writeResourceData(writer, header_slurping_reader.reader(), header.data_size);
-
- try self.state.font_dir.add(self.arena, FontDir.Font{
- .id = header.name_value.ordinal,
- .header_bytes = header_slurping_reader.slurped_header,
- }, node.id);
- return;
- },
- .ACCELERATOR,
- .ANICURSOR,
- .ANIICON,
- .CURSOR,
- .DIALOG,
- .DLGINCLUDE,
- .FONTDIR,
- .ICON,
- .MENU,
- .STRING,
- .TOOLBAR,
- .VERSION,
- .VXD,
- => unreachable,
- _ => unreachable,
- }
- } else {
- header.applyMemoryFlags(node.common_resource_attributes, self.source);
- }
-
- // Fallback to just writing out the entire contents of the file
- const data_size = try file.getEndPos();
- if (data_size > std.math.maxInt(u32)) {
- return self.addErrorDetailsAndFail(.{
- .err = .resource_data_size_exceeds_max,
- .token = node.id,
- });
- }
- // We now know that the data size will fit in a u32
- header.data_size = @intCast(data_size);
- try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
- try writeResourceData(writer, file.reader(), header.data_size);
- }
-
- fn iconReadError(
- self: *Compiler,
- err: ico.ReadError,
- filename: []const u8,
- token: Token,
- predefined_type: res.RT,
- ) error{ CompileError, OutOfMemory } {
- const filename_string_index = try self.diagnostics.putString(filename);
- return self.addErrorDetailsAndFail(.{
- .err = .icon_read_error,
- .token = token,
- .extra = .{ .icon_read_error = .{
- .err = ErrorDetails.IconReadError.enumFromError(err),
- .icon_type = switch (predefined_type) {
- .GROUP_ICON => .icon,
- .GROUP_CURSOR => .cursor,
- else => unreachable,
- },
- .filename_string_index = filename_string_index,
- } },
- });
- }
-
- pub const DataType = enum {
- number,
- ascii_string,
- wide_string,
- };
-
- pub const Data = union(DataType) {
- number: Number,
- ascii_string: []const u8,
- wide_string: [:0]const u16,
-
- pub fn deinit(self: Data, allocator: Allocator) void {
- switch (self) {
- .wide_string => |wide_string| {
- allocator.free(wide_string);
- },
- .ascii_string => |ascii_string| {
- allocator.free(ascii_string);
- },
- else => {},
- }
- }
-
- pub fn write(self: Data, writer: anytype) !void {
- switch (self) {
- .number => |number| switch (number.is_long) {
- false => try writer.writeInt(WORD, number.asWord(), .little),
- true => try writer.writeInt(DWORD, number.value, .little),
- },
- .ascii_string => |ascii_string| {
- try writer.writeAll(ascii_string);
- },
- .wide_string => |wide_string| {
- try writer.writeAll(std.mem.sliceAsBytes(wide_string));
- },
- }
- }
- };
-
- /// Assumes that the node is a number or number expression
- pub fn evaluateNumberExpression(expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup) Number {
- switch (expression_node.id) {
- .literal => {
- const literal_node = expression_node.cast(.literal).?;
- std.debug.assert(literal_node.token.id == .number);
- const bytes = SourceBytes{
- .slice = literal_node.token.slice(source),
- .code_page = code_page_lookup.getForToken(literal_node.token),
- };
- return literals.parseNumberLiteral(bytes);
- },
- .binary_expression => {
- const binary_expression_node = expression_node.cast(.binary_expression).?;
- const lhs = evaluateNumberExpression(binary_expression_node.left, source, code_page_lookup);
- const rhs = evaluateNumberExpression(binary_expression_node.right, source, code_page_lookup);
- const operator_char = binary_expression_node.operator.slice(source)[0];
- return lhs.evaluateOperator(operator_char, rhs);
- },
- .grouped_expression => {
- const grouped_expression_node = expression_node.cast(.grouped_expression).?;
- return evaluateNumberExpression(grouped_expression_node.expression, source, code_page_lookup);
- },
- else => unreachable,
- }
- }
-
- const FlagsNumber = struct {
- value: u32,
- not_mask: u32 = 0xFFFFFFFF,
-
- pub fn evaluateOperator(lhs: FlagsNumber, operator_char: u8, rhs: FlagsNumber) FlagsNumber {
- const result = switch (operator_char) {
- '-' => lhs.value -% rhs.value,
- '+' => lhs.value +% rhs.value,
- '|' => lhs.value | rhs.value,
- '&' => lhs.value & rhs.value,
- else => unreachable, // invalid operator, this would be a lexer/parser bug
- };
- return .{
- .value = result,
- .not_mask = lhs.not_mask & rhs.not_mask,
- };
- }
-
- pub fn applyNotMask(self: FlagsNumber) u32 {
- return self.value & self.not_mask;
- }
- };
-
- pub fn evaluateFlagsExpressionWithDefault(default: u32, expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup) u32 {
- var context = FlagsExpressionContext{ .initial_value = default };
- const number = evaluateFlagsExpression(expression_node, source, code_page_lookup, &context);
- return number.value;
- }
-
- pub const FlagsExpressionContext = struct {
- initial_value: u32 = 0,
- initial_value_used: bool = false,
- };
-
- /// Assumes that the node is a number expression (which can contain not_expressions)
- pub fn evaluateFlagsExpression(expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup, context: *FlagsExpressionContext) FlagsNumber {
- switch (expression_node.id) {
- .literal => {
- const literal_node = expression_node.cast(.literal).?;
- std.debug.assert(literal_node.token.id == .number);
- const bytes = SourceBytes{
- .slice = literal_node.token.slice(source),
- .code_page = code_page_lookup.getForToken(literal_node.token),
- };
- var value = literals.parseNumberLiteral(bytes).value;
- if (!context.initial_value_used) {
- context.initial_value_used = true;
- value |= context.initial_value;
- }
- return .{ .value = value };
- },
- .binary_expression => {
- const binary_expression_node = expression_node.cast(.binary_expression).?;
- const lhs = evaluateFlagsExpression(binary_expression_node.left, source, code_page_lookup, context);
- const rhs = evaluateFlagsExpression(binary_expression_node.right, source, code_page_lookup, context);
- const operator_char = binary_expression_node.operator.slice(source)[0];
- const result = lhs.evaluateOperator(operator_char, rhs);
- return .{ .value = result.applyNotMask() };
- },
- .grouped_expression => {
- const grouped_expression_node = expression_node.cast(.grouped_expression).?;
- return evaluateFlagsExpression(grouped_expression_node.expression, source, code_page_lookup, context);
- },
- .not_expression => {
- const not_expression = expression_node.cast(.not_expression).?;
- const bytes = SourceBytes{
- .slice = not_expression.number_token.slice(source),
- .code_page = code_page_lookup.getForToken(not_expression.number_token),
- };
- const not_number = literals.parseNumberLiteral(bytes);
- if (!context.initial_value_used) {
- context.initial_value_used = true;
- return .{ .value = context.initial_value & ~not_number.value };
- }
- return .{ .value = 0, .not_mask = ~not_number.value };
- },
- else => unreachable,
- }
- }
-
- pub fn evaluateDataExpression(self: *Compiler, expression_node: *Node) !Data {
- switch (expression_node.id) {
- .literal => {
- const literal_node = expression_node.cast(.literal).?;
- switch (literal_node.token.id) {
- .number => {
- const number = evaluateNumberExpression(expression_node, self.source, self.input_code_pages);
- return .{ .number = number };
- },
- .quoted_ascii_string => {
- const column = literal_node.token.calculateColumn(self.source, 8, null);
- const bytes = SourceBytes{
- .slice = literal_node.token.slice(self.source),
- .code_page = self.input_code_pages.getForToken(literal_node.token),
- };
- const parsed = try literals.parseQuotedAsciiString(self.allocator, bytes, .{
- .start_column = column,
- .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal_node.token },
- .output_code_page = self.output_code_pages.getForToken(literal_node.token),
- });
- errdefer self.allocator.free(parsed);
- return .{ .ascii_string = parsed };
- },
- .quoted_wide_string => {
- const column = literal_node.token.calculateColumn(self.source, 8, null);
- const bytes = SourceBytes{
- .slice = literal_node.token.slice(self.source),
- .code_page = self.input_code_pages.getForToken(literal_node.token),
- };
- const parsed_string = try literals.parseQuotedWideString(self.allocator, bytes, .{
- .start_column = column,
- .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal_node.token },
- });
- errdefer self.allocator.free(parsed_string);
- return .{ .wide_string = parsed_string };
- },
- else => {
- std.debug.print("unexpected token in literal node: {}\n", .{literal_node.token});
- unreachable; // no other token types should be in a data literal node
- },
- }
- },
- .binary_expression, .grouped_expression => {
- const result = evaluateNumberExpression(expression_node, self.source, self.input_code_pages);
- return .{ .number = result };
- },
- .not_expression => unreachable,
- else => {
- std.debug.print("{}\n", .{expression_node.id});
- @panic("TODO: evaluateDataExpression");
- },
- }
- }
-
- pub fn writeResourceRawData(self: *Compiler, node: *Node.ResourceRawData, writer: anytype) !void {
- var data_buffer = std.ArrayList(u8).init(self.allocator);
- defer data_buffer.deinit();
- // The header's data length field is a u32 so limit the resource's data size so that
- // we know we can always specify the real size.
- var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32));
- const data_writer = limited_writer.writer();
-
- for (node.raw_data) |expression| {
- const data = try self.evaluateDataExpression(expression);
- defer data.deinit(self.allocator);
- data.write(data_writer) catch |err| switch (err) {
- error.NoSpaceLeft => {
- return self.addErrorDetailsAndFail(.{
- .err = .resource_data_size_exceeds_max,
- .token = node.id,
- });
- },
- else => |e| return e,
- };
- }
-
- // This intCast can't fail because the limitedWriter above guarantees that
- // we will never write more than maxInt(u32) bytes.
- const data_len: u32 = @intCast(data_buffer.items.len);
- try self.writeResourceHeader(writer, node.id, node.type, data_len, node.common_resource_attributes, self.state.language);
-
- var data_fbs = std.io.fixedBufferStream(data_buffer.items);
- try writeResourceData(writer, data_fbs.reader(), data_len);
- }
-
- pub fn writeResourceHeader(self: *Compiler, writer: anytype, id_token: Token, type_token: Token, data_size: u32, common_resource_attributes: []Token, language: res.Language) !void {
- var header = try self.resourceHeader(id_token, type_token, .{
- .language = language,
- .data_size = data_size,
- });
- defer header.deinit(self.allocator);
-
- header.applyMemoryFlags(common_resource_attributes, self.source);
-
- try header.write(writer, .{ .diagnostics = self.diagnostics, .token = id_token });
- }
-
- pub fn writeResourceDataNoPadding(writer: anytype, data_reader: anytype, data_size: u32) !void {
- var limited_reader = std.io.limitedReader(data_reader, data_size);
-
- const FifoBuffer = std.fifo.LinearFifo(u8, .{ .Static = 4096 });
- var fifo = FifoBuffer.init();
- try fifo.pump(limited_reader.reader(), writer);
- }
-
- pub fn writeResourceData(writer: anytype, data_reader: anytype, data_size: u32) !void {
- try writeResourceDataNoPadding(writer, data_reader, data_size);
- try writeDataPadding(writer, data_size);
- }
-
- pub fn writeDataPadding(writer: anytype, data_size: u32) !void {
- try writer.writeByteNTimes(0, numPaddingBytesNeeded(data_size));
- }
-
- pub fn numPaddingBytesNeeded(data_size: u32) u2 {
- // Result is guaranteed to be between 0 and 3.
- return @intCast((4 -% data_size) % 4);
- }
-
- pub fn evaluateAcceleratorKeyExpression(self: *Compiler, node: *Node, is_virt: bool) !u16 {
- if (node.isNumberExpression()) {
- return evaluateNumberExpression(node, self.source, self.input_code_pages).asWord();
- } else {
- std.debug.assert(node.isStringLiteral());
- const literal = @fieldParentPtr(Node.Literal, "base", node);
- const bytes = SourceBytes{
- .slice = literal.token.slice(self.source),
- .code_page = self.input_code_pages.getForToken(literal.token),
- };
- const column = literal.token.calculateColumn(self.source, 8, null);
- return res.parseAcceleratorKeyString(bytes, is_virt, .{
- .start_column = column,
- .diagnostics = .{ .diagnostics = self.diagnostics, .token = literal.token },
- });
- }
- }
-
- pub fn writeAccelerators(self: *Compiler, node: *Node.Accelerators, writer: anytype) !void {
- var data_buffer = std.ArrayList(u8).init(self.allocator);
- defer data_buffer.deinit();
-
- // The header's data length field is a u32 so limit the resource's data size so that
- // we know we can always specify the real size.
- var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32));
- const data_writer = limited_writer.writer();
-
- self.writeAcceleratorsData(node, data_writer) catch |err| switch (err) {
- error.NoSpaceLeft => {
- return self.addErrorDetailsAndFail(.{
- .err = .resource_data_size_exceeds_max,
- .token = node.id,
- });
- },
- else => |e| return e,
- };
-
- // This intCast can't fail because the limitedWriter above guarantees that
- // we will never write more than maxInt(u32) bytes.
- const data_size: u32 = @intCast(data_buffer.items.len);
- var header = try self.resourceHeader(node.id, node.type, .{
- .data_size = data_size,
- });
- defer header.deinit(self.allocator);
-
- header.applyMemoryFlags(node.common_resource_attributes, self.source);
- header.applyOptionalStatements(node.optional_statements, self.source, self.input_code_pages);
-
- try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
-
- var data_fbs = std.io.fixedBufferStream(data_buffer.items);
- try writeResourceData(writer, data_fbs.reader(), data_size);
- }
-
- /// Expects `data_writer` to be a LimitedWriter limited to u32, meaning all writes to
- /// the writer within this function could return error.NoSpaceLeft
- pub fn writeAcceleratorsData(self: *Compiler, node: *Node.Accelerators, data_writer: anytype) !void {
- for (node.accelerators, 0..) |accel_node, i| {
- const accelerator = @fieldParentPtr(Node.Accelerator, "base", accel_node);
- var modifiers = res.AcceleratorModifiers{};
- for (accelerator.type_and_options) |type_or_option| {
- const modifier = rc.AcceleratorTypeAndOptions.map.get(type_or_option.slice(self.source)).?;
- modifiers.apply(modifier);
- }
- if (accelerator.event.isNumberExpression() and !modifiers.explicit_ascii_or_virtkey) {
- return self.addErrorDetailsAndFail(.{
- .err = .accelerator_type_required,
- .token = accelerator.event.getFirstToken(),
- .token_span_end = accelerator.event.getLastToken(),
- });
- }
- const key = self.evaluateAcceleratorKeyExpression(accelerator.event, modifiers.isSet(.virtkey)) catch |err| switch (err) {
- error.OutOfMemory => |e| return e,
- else => |e| {
- return self.addErrorDetailsAndFail(.{
- .err = .invalid_accelerator_key,
- .token = accelerator.event.getFirstToken(),
- .token_span_end = accelerator.event.getLastToken(),
- .extra = .{ .accelerator_error = .{
- .err = ErrorDetails.AcceleratorError.enumFromError(e),
- } },
- });
- },
- };
- const cmd_id = evaluateNumberExpression(accelerator.idvalue, self.source, self.input_code_pages);
-
- if (i == node.accelerators.len - 1) {
- modifiers.markLast();
- }
-
- try data_writer.writeByte(modifiers.value);
- try data_writer.writeByte(0); // padding
- try data_writer.writeInt(u16, key, .little);
- try data_writer.writeInt(u16, cmd_id.asWord(), .little);
- try data_writer.writeInt(u16, 0, .little); // padding
- }
- }
-
- const DialogOptionalStatementValues = struct {
- style: u32 = res.WS.SYSMENU | res.WS.BORDER | res.WS.POPUP,
- exstyle: u32 = 0,
- class: ?NameOrOrdinal = null,
- menu: ?NameOrOrdinal = null,
- font: ?FontStatementValues = null,
- caption: ?Token = null,
- };
-
- pub fn writeDialog(self: *Compiler, node: *Node.Dialog, writer: anytype) !void {
- var data_buffer = std.ArrayList(u8).init(self.allocator);
- defer data_buffer.deinit();
- // The header's data length field is a u32 so limit the resource's data size so that
- // we know we can always specify the real size.
- var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32));
- const data_writer = limited_writer.writer();
-
- const resource = Resource.fromString(.{
- .slice = node.type.slice(self.source),
- .code_page = self.input_code_pages.getForToken(node.type),
- });
- std.debug.assert(resource == .dialog or resource == .dialogex);
-
- var optional_statement_values: DialogOptionalStatementValues = .{};
- defer {
- if (optional_statement_values.class) |class| {
- class.deinit(self.allocator);
- }
- if (optional_statement_values.menu) |menu| {
- menu.deinit(self.allocator);
- }
- }
- var skipped_menu_or_classes = std.ArrayList(*Node.SimpleStatement).init(self.allocator);
- defer skipped_menu_or_classes.deinit();
- var last_menu: *Node.SimpleStatement = undefined;
- var last_class: *Node.SimpleStatement = undefined;
- var last_menu_would_be_forced_ordinal = false;
- var last_menu_has_digit_as_first_char = false;
- var last_menu_did_uppercase = false;
- var last_class_would_be_forced_ordinal = false;
-
- for (node.optional_statements) |optional_statement| {
- switch (optional_statement.id) {
- .simple_statement => {
- const simple_statement = @fieldParentPtr(Node.SimpleStatement, "base", optional_statement);
- const statement_identifier = simple_statement.identifier;
- const statement_type = rc.OptionalStatements.dialog_map.get(statement_identifier.slice(self.source)) orelse continue;
- switch (statement_type) {
- .style, .exstyle => {
- const style = evaluateFlagsExpressionWithDefault(0, simple_statement.value, self.source, self.input_code_pages);
- if (statement_type == .style) {
- optional_statement_values.style = style;
- } else {
- optional_statement_values.exstyle = style;
- }
- },
- .caption => {
- std.debug.assert(simple_statement.value.id == .literal);
- const literal_node = @fieldParentPtr(Node.Literal, "base", simple_statement.value);
- optional_statement_values.caption = literal_node.token;
- },
- .class => {
- const is_duplicate = optional_statement_values.class != null;
- if (is_duplicate) {
- try skipped_menu_or_classes.append(last_class);
- }
- const forced_ordinal = is_duplicate and optional_statement_values.class.? == .ordinal;
- // In the Win32 RC compiler, if any CLASS values that are interpreted as
- // an ordinal exist, it affects all future CLASS statements and forces
- // them to be treated as an ordinal no matter what.
- if (forced_ordinal) {
- last_class_would_be_forced_ordinal = true;
- }
- // clear out the old one if it exists
- if (optional_statement_values.class) |prev| {
- prev.deinit(self.allocator);
- optional_statement_values.class = null;
- }
-
- if (simple_statement.value.isNumberExpression()) {
- const class_ordinal = evaluateNumberExpression(simple_statement.value, self.source, self.input_code_pages);
- optional_statement_values.class = NameOrOrdinal{ .ordinal = class_ordinal.asWord() };
- } else {
- std.debug.assert(simple_statement.value.isStringLiteral());
- const literal_node = @fieldParentPtr(Node.Literal, "base", simple_statement.value);
- const parsed = try self.parseQuotedStringAsWideString(literal_node.token);
- optional_statement_values.class = NameOrOrdinal{ .name = parsed };
- }
-
- last_class = simple_statement;
- },
- .menu => {
- const is_duplicate = optional_statement_values.menu != null;
- if (is_duplicate) {
- try skipped_menu_or_classes.append(last_menu);
- }
- const forced_ordinal = is_duplicate and optional_statement_values.menu.? == .ordinal;
- // In the Win32 RC compiler, if any MENU values that are interpreted as
- // an ordinal exist, it affects all future MENU statements and forces
- // them to be treated as an ordinal no matter what.
- if (forced_ordinal) {
- last_menu_would_be_forced_ordinal = true;
- }
- // clear out the old one if it exists
- if (optional_statement_values.menu) |prev| {
- prev.deinit(self.allocator);
- optional_statement_values.menu = null;
- }
-
- std.debug.assert(simple_statement.value.id == .literal);
- const literal_node = @fieldParentPtr(Node.Literal, "base", simple_statement.value);
-
- const token_slice = literal_node.token.slice(self.source);
- const bytes = SourceBytes{
- .slice = token_slice,
- .code_page = self.input_code_pages.getForToken(literal_node.token),
- };
- optional_statement_values.menu = try NameOrOrdinal.fromString(self.allocator, bytes);
-
- if (optional_statement_values.menu.? == .name) {
- if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(bytes)) |win32_rc_ordinal| {
- try self.addErrorDetails(.{
- .err = .invalid_digit_character_in_ordinal,
- .type = .err,
- .token = literal_node.token,
- });
- return self.addErrorDetailsAndFail(.{
- .err = .win32_non_ascii_ordinal,
- .type = .note,
- .token = literal_node.token,
- .print_source_line = false,
- .extra = .{ .number = win32_rc_ordinal.ordinal },
- });
- }
- }
-
- // Need to keep track of some properties of the value
- // in order to emit the appropriate warning(s) later on.
- // See where the warning are emitted below (outside this loop)
- // for the full explanation.
- var did_uppercase = false;
- var codepoint_i: usize = 0;
- while (bytes.code_page.codepointAt(codepoint_i, bytes.slice)) |codepoint| : (codepoint_i += codepoint.byte_len) {
- const c = codepoint.value;
- switch (c) {
- 'a'...'z' => {
- did_uppercase = true;
- break;
- },
- else => {},
- }
- }
- last_menu_did_uppercase = did_uppercase;
- last_menu_has_digit_as_first_char = std.ascii.isDigit(token_slice[0]);
- last_menu = simple_statement;
- },
- else => {},
- }
- },
- .font_statement => {
- const font = @fieldParentPtr(Node.FontStatement, "base", optional_statement);
- if (optional_statement_values.font != null) {
- optional_statement_values.font.?.node = font;
- } else {
- optional_statement_values.font = FontStatementValues{ .node = font };
- }
- if (font.weight) |weight| {
- const value = evaluateNumberExpression(weight, self.source, self.input_code_pages);
- optional_statement_values.font.?.weight = value.asWord();
- }
- if (font.italic) |italic| {
- const value = evaluateNumberExpression(italic, self.source, self.input_code_pages);
- optional_statement_values.font.?.italic = value.asWord() != 0;
- }
- },
- else => {},
- }
- }
-
- for (skipped_menu_or_classes.items) |simple_statement| {
- const statement_identifier = simple_statement.identifier;
- const statement_type = rc.OptionalStatements.dialog_map.get(statement_identifier.slice(self.source)) orelse continue;
- try self.addErrorDetails(.{
- .err = .duplicate_menu_or_class_skipped,
- .type = .warning,
- .token = simple_statement.identifier,
- .token_span_start = simple_statement.base.getFirstToken(),
- .token_span_end = simple_statement.base.getLastToken(),
- .extra = .{ .menu_or_class = switch (statement_type) {
- .menu => .menu,
- .class => .class,
- else => unreachable,
- } },
- });
- }
- // The Win32 RC compiler miscompiles the value in the following scenario:
- // Multiple CLASS parameters are specified and any of them are treated as a number, then
- // the last CLASS is always treated as a number no matter what
- if (last_class_would_be_forced_ordinal and optional_statement_values.class.? == .name) {
- const literal_node = @fieldParentPtr(Node.Literal, "base", last_class.value);
- const ordinal_value = res.ForcedOrdinal.fromUtf16Le(optional_statement_values.class.?.name);
-
- try self.addErrorDetails(.{
- .err = .rc_would_miscompile_dialog_class,
- .type = .warning,
- .token = literal_node.token,
- .extra = .{ .number = ordinal_value },
- });
- try self.addErrorDetails(.{
- .err = .rc_would_miscompile_dialog_class,
- .type = .note,
- .print_source_line = false,
- .token = literal_node.token,
- .extra = .{ .number = ordinal_value },
- });
- try self.addErrorDetails(.{
- .err = .rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal,
- .type = .note,
- .print_source_line = false,
- .token = literal_node.token,
- .extra = .{ .menu_or_class = .class },
- });
- }
- // The Win32 RC compiler miscompiles the id in two different scenarios:
- // 1. The first character of the ID is a digit, in which case it is always treated as a number
- // no matter what (and therefore does not match how the MENU/MENUEX id is parsed)
- // 2. Multiple MENU parameters are specified and any of them are treated as a number, then
- // the last MENU is always treated as a number no matter what
- if ((last_menu_would_be_forced_ordinal or last_menu_has_digit_as_first_char) and optional_statement_values.menu.? == .name) {
- const literal_node = @fieldParentPtr(Node.Literal, "base", last_menu.value);
- const token_slice = literal_node.token.slice(self.source);
- const bytes = SourceBytes{
- .slice = token_slice,
- .code_page = self.input_code_pages.getForToken(literal_node.token),
- };
- const ordinal_value = res.ForcedOrdinal.fromBytes(bytes);
-
- try self.addErrorDetails(.{
- .err = .rc_would_miscompile_dialog_menu_id,
- .type = .warning,
- .token = literal_node.token,
- .extra = .{ .number = ordinal_value },
- });
- try self.addErrorDetails(.{
- .err = .rc_would_miscompile_dialog_menu_id,
- .type = .note,
- .print_source_line = false,
- .token = literal_node.token,
- .extra = .{ .number = ordinal_value },
- });
- if (last_menu_would_be_forced_ordinal) {
- try self.addErrorDetails(.{
- .err = .rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal,
- .type = .note,
- .print_source_line = false,
- .token = literal_node.token,
- .extra = .{ .menu_or_class = .menu },
- });
- } else {
- try self.addErrorDetails(.{
- .err = .rc_would_miscompile_dialog_menu_id_starts_with_digit,
- .type = .note,
- .print_source_line = false,
- .token = literal_node.token,
- });
- }
- }
- // The MENU id parsing uses the exact same logic as the MENU/MENUEX resource id parsing,
- // which means that it will convert ASCII characters to uppercase during the 'name' parsing.
- // This turns out not to matter (`LoadMenu` does a case-insensitive lookup anyway),
- // but it still makes sense to share the uppercasing logic since the MENU parameter
- // here is just a reference to a MENU/MENUEX id within the .exe.
- // So, because this is an intentional but inconsequential-to-the-user difference
- // between resinator and the Win32 RC compiler, we only emit a hint instead of
- // a warning.
- if (last_menu_did_uppercase) {
- const literal_node = @fieldParentPtr(Node.Literal, "base", last_menu.value);
- try self.addErrorDetails(.{
- .err = .dialog_menu_id_was_uppercased,
- .type = .hint,
- .token = literal_node.token,
- });
- }
-
- const x = evaluateNumberExpression(node.x, self.source, self.input_code_pages);
- const y = evaluateNumberExpression(node.y, self.source, self.input_code_pages);
- const width = evaluateNumberExpression(node.width, self.source, self.input_code_pages);
- const height = evaluateNumberExpression(node.height, self.source, self.input_code_pages);
-
- // FONT statement requires DS_SETFONT, and if it's not present DS_SETFRONT must be unset
- if (optional_statement_values.font) |_| {
- optional_statement_values.style |= res.DS.SETFONT;
- } else {
- optional_statement_values.style &= ~res.DS.SETFONT;
- }
- // CAPTION statement implies WS_CAPTION
- if (optional_statement_values.caption) |_| {
- optional_statement_values.style |= res.WS.CAPTION;
- }
-
- self.writeDialogHeaderAndStrings(
- node,
- data_writer,
- resource,
- &optional_statement_values,
- x,
- y,
- width,
- height,
- ) catch |err| switch (err) {
- // Dialog header and menu/class/title strings can never exceed u32 bytes
- // on their own, so this error is unreachable.
- error.NoSpaceLeft => unreachable,
- else => |e| return e,
- };
-
- var controls_by_id = std.AutoHashMap(u32, *const Node.ControlStatement).init(self.allocator);
- // Number of controls are guaranteed by the parser to be within maxInt(u16).
- try controls_by_id.ensureTotalCapacity(@as(u16, @intCast(node.controls.len)));
- defer controls_by_id.deinit();
-
- for (node.controls) |control_node| {
- const control = @fieldParentPtr(Node.ControlStatement, "base", control_node);
-
- self.writeDialogControl(
- control,
- data_writer,
- resource,
- // We know the data_buffer len is limited to u32 max.
- @intCast(data_buffer.items.len),
- &controls_by_id,
- ) catch |err| switch (err) {
- error.NoSpaceLeft => {
- try self.addErrorDetails(.{
- .err = .resource_data_size_exceeds_max,
- .token = node.id,
- });
- return self.addErrorDetailsAndFail(.{
- .err = .resource_data_size_exceeds_max,
- .type = .note,
- .token = control.type,
- });
- },
- else => |e| return e,
- };
- }
-
- const data_size: u32 = @intCast(data_buffer.items.len);
- var header = try self.resourceHeader(node.id, node.type, .{
- .data_size = data_size,
- });
- defer header.deinit(self.allocator);
-
- header.applyMemoryFlags(node.common_resource_attributes, self.source);
- header.applyOptionalStatements(node.optional_statements, self.source, self.input_code_pages);
-
- try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
-
- var data_fbs = std.io.fixedBufferStream(data_buffer.items);
- try writeResourceData(writer, data_fbs.reader(), data_size);
- }
-
- fn writeDialogHeaderAndStrings(
- self: *Compiler,
- node: *Node.Dialog,
- data_writer: anytype,
- resource: Resource,
- optional_statement_values: *const DialogOptionalStatementValues,
- x: Number,
- y: Number,
- width: Number,
- height: Number,
- ) !void {
- // Header
- if (resource == .dialogex) {
- const help_id: u32 = help_id: {
- if (node.help_id == null) break :help_id 0;
- break :help_id evaluateNumberExpression(node.help_id.?, self.source, self.input_code_pages).value;
- };
- try data_writer.writeInt(u16, 1, .little); // version number, always 1
- try data_writer.writeInt(u16, 0xFFFF, .little); // signature, always 0xFFFF
- try data_writer.writeInt(u32, help_id, .little);
- try data_writer.writeInt(u32, optional_statement_values.exstyle, .little);
- try data_writer.writeInt(u32, optional_statement_values.style, .little);
- } else {
- try data_writer.writeInt(u32, optional_statement_values.style, .little);
- try data_writer.writeInt(u32, optional_statement_values.exstyle, .little);
- }
- // This limit is enforced by the parser, so we know the number of controls
- // is within the range of a u16.
- try data_writer.writeInt(u16, @as(u16, @intCast(node.controls.len)), .little);
- try data_writer.writeInt(u16, x.asWord(), .little);
- try data_writer.writeInt(u16, y.asWord(), .little);
- try data_writer.writeInt(u16, width.asWord(), .little);
- try data_writer.writeInt(u16, height.asWord(), .little);
-
- // Menu
- if (optional_statement_values.menu) |menu| {
- try menu.write(data_writer);
- } else {
- try data_writer.writeInt(u16, 0, .little);
- }
- // Class
- if (optional_statement_values.class) |class| {
- try class.write(data_writer);
- } else {
- try data_writer.writeInt(u16, 0, .little);
- }
- // Caption
- if (optional_statement_values.caption) |caption| {
- const parsed = try self.parseQuotedStringAsWideString(caption);
- defer self.allocator.free(parsed);
- try data_writer.writeAll(std.mem.sliceAsBytes(parsed[0 .. parsed.len + 1]));
- } else {
- try data_writer.writeInt(u16, 0, .little);
- }
- // Font
- if (optional_statement_values.font) |font| {
- try self.writeDialogFont(resource, font, data_writer);
- }
- }
-
- fn writeDialogControl(
- self: *Compiler,
- control: *Node.ControlStatement,
- data_writer: anytype,
- resource: Resource,
- bytes_written_so_far: u32,
- controls_by_id: *std.AutoHashMap(u32, *const Node.ControlStatement),
- ) !void {
- const control_type = rc.Control.map.get(control.type.slice(self.source)).?;
-
- // Each control must be at a 4-byte boundary. However, the Windows RC
- // compiler will miscompile controls if their extra data ends on an odd offset.
- // We will avoid the miscompilation and emit a warning.
- const num_padding = numPaddingBytesNeeded(bytes_written_so_far);
- if (num_padding == 1 or num_padding == 3) {
- try self.addErrorDetails(.{
- .err = .rc_would_miscompile_control_padding,
- .type = .warning,
- .token = control.type,
- });
- try self.addErrorDetails(.{
- .err = .rc_would_miscompile_control_padding,
- .type = .note,
- .print_source_line = false,
- .token = control.type,
- });
- }
- try data_writer.writeByteNTimes(0, num_padding);
-
- const style = if (control.style) |style_expression|
- // Certain styles are implied by the control type
- evaluateFlagsExpressionWithDefault(res.ControlClass.getImpliedStyle(control_type), style_expression, self.source, self.input_code_pages)
- else
- res.ControlClass.getImpliedStyle(control_type);
-
- const exstyle = if (control.exstyle) |exstyle_expression|
- evaluateFlagsExpressionWithDefault(0, exstyle_expression, self.source, self.input_code_pages)
- else
- 0;
-
- switch (resource) {
- .dialog => {
- // Note: Reverse order from DIALOGEX
- try data_writer.writeInt(u32, style, .little);
- try data_writer.writeInt(u32, exstyle, .little);
- },
- .dialogex => {
- const help_id: u32 = if (control.help_id) |help_id_expression|
- evaluateNumberExpression(help_id_expression, self.source, self.input_code_pages).value
- else
- 0;
- try data_writer.writeInt(u32, help_id, .little);
- // Note: Reverse order from DIALOG
- try data_writer.writeInt(u32, exstyle, .little);
- try data_writer.writeInt(u32, style, .little);
- },
- else => unreachable,
- }
-
- const control_x = evaluateNumberExpression(control.x, self.source, self.input_code_pages);
- const control_y = evaluateNumberExpression(control.y, self.source, self.input_code_pages);
- const control_width = evaluateNumberExpression(control.width, self.source, self.input_code_pages);
- const control_height = evaluateNumberExpression(control.height, self.source, self.input_code_pages);
-
- try data_writer.writeInt(u16, control_x.asWord(), .little);
- try data_writer.writeInt(u16, control_y.asWord(), .little);
- try data_writer.writeInt(u16, control_width.asWord(), .little);
- try data_writer.writeInt(u16, control_height.asWord(), .little);
-
- const control_id = evaluateNumberExpression(control.id, self.source, self.input_code_pages);
- switch (resource) {
- .dialog => try data_writer.writeInt(u16, control_id.asWord(), .little),
- .dialogex => try data_writer.writeInt(u32, control_id.value, .little),
- else => unreachable,
- }
-
- const control_id_for_map: u32 = switch (resource) {
- .dialog => control_id.asWord(),
- .dialogex => control_id.value,
- else => unreachable,
- };
- const result = controls_by_id.getOrPutAssumeCapacity(control_id_for_map);
- if (result.found_existing) {
- if (!self.silent_duplicate_control_ids) {
- try self.addErrorDetails(.{
- .err = .control_id_already_defined,
- .type = .warning,
- .token = control.id.getFirstToken(),
- .token_span_end = control.id.getLastToken(),
- .extra = .{ .number = control_id_for_map },
- });
- try self.addErrorDetails(.{
- .err = .control_id_already_defined,
- .type = .note,
- .token = result.value_ptr.*.id.getFirstToken(),
- .token_span_end = result.value_ptr.*.id.getLastToken(),
- .extra = .{ .number = control_id_for_map },
- });
- }
- } else {
- result.value_ptr.* = control;
- }
-
- if (res.ControlClass.fromControl(control_type)) |control_class| {
- const ordinal = NameOrOrdinal{ .ordinal = @intFromEnum(control_class) };
- try ordinal.write(data_writer);
- } else {
- const class_node = control.class.?;
- if (class_node.isNumberExpression()) {
- const number = evaluateNumberExpression(class_node, self.source, self.input_code_pages);
- const ordinal = NameOrOrdinal{ .ordinal = number.asWord() };
- // This is different from how the Windows RC compiles ordinals here,
- // but I think that's a miscompilation/bug of the Windows implementation.
- // The Windows behavior is (where LSB = least significant byte):
- // - If the LSB is 0x00 => 0xFFFF0000
- // - If the LSB is < 0x80 => 0x000000<LSB>
- // - If the LSB is >= 0x80 => 0x0000FF<LSB>
- //
- // Because of this, we emit a warning about the potential miscompilation
- try self.addErrorDetails(.{
- .err = .rc_would_miscompile_control_class_ordinal,
- .type = .warning,
- .token = class_node.getFirstToken(),
- .token_span_end = class_node.getLastToken(),
- });
- try self.addErrorDetails(.{
- .err = .rc_would_miscompile_control_class_ordinal,
- .type = .note,
- .print_source_line = false,
- .token = class_node.getFirstToken(),
- .token_span_end = class_node.getLastToken(),
- });
- // And then write out the ordinal using a proper a NameOrOrdinal encoding.
- try ordinal.write(data_writer);
- } else if (class_node.isStringLiteral()) {
- const literal_node = @fieldParentPtr(Node.Literal, "base", class_node);
- const parsed = try self.parseQuotedStringAsWideString(literal_node.token);
- defer self.allocator.free(parsed);
- if (rc.ControlClass.fromWideString(parsed)) |control_class| {
- const ordinal = NameOrOrdinal{ .ordinal = @intFromEnum(control_class) };
- try ordinal.write(data_writer);
- } else {
- // NUL acts as a terminator
- // TODO: Maybe warn when parsed_terminated.len != parsed.len, since
- // it seems unlikely that NUL-termination is something intentional
- const parsed_terminated = std.mem.sliceTo(parsed, 0);
- const name = NameOrOrdinal{ .name = parsed_terminated };
- try name.write(data_writer);
- }
- } else {
- const literal_node = @fieldParentPtr(Node.Literal, "base", class_node);
- const literal_slice = literal_node.token.slice(self.source);
- // This succeeding is guaranteed by the parser
- const control_class = rc.ControlClass.map.get(literal_slice) orelse unreachable;
- const ordinal = NameOrOrdinal{ .ordinal = @intFromEnum(control_class) };
- try ordinal.write(data_writer);
- }
- }
-
- if (control.text) |text_token| {
- const bytes = SourceBytes{
- .slice = text_token.slice(self.source),
- .code_page = self.input_code_pages.getForToken(text_token),
- };
- if (text_token.isStringLiteral()) {
- const text = try self.parseQuotedStringAsWideString(text_token);
- defer self.allocator.free(text);
- const name = NameOrOrdinal{ .name = text };
- try name.write(data_writer);
- } else {
- std.debug.assert(text_token.id == .number);
- const number = literals.parseNumberLiteral(bytes);
- const ordinal = NameOrOrdinal{ .ordinal = number.asWord() };
- try ordinal.write(data_writer);
- }
- } else {
- try NameOrOrdinal.writeEmpty(data_writer);
- }
-
- var extra_data_buf = std.ArrayList(u8).init(self.allocator);
- defer extra_data_buf.deinit();
- // The extra data byte length must be able to fit within a u16.
- var limited_extra_data_writer = limitedWriter(extra_data_buf.writer(), std.math.maxInt(u16));
- const extra_data_writer = limited_extra_data_writer.writer();
- for (control.extra_data) |data_expression| {
- const data = try self.evaluateDataExpression(data_expression);
- defer data.deinit(self.allocator);
- data.write(extra_data_writer) catch |err| switch (err) {
- error.NoSpaceLeft => {
- try self.addErrorDetails(.{
- .err = .control_extra_data_size_exceeds_max,
- .token = control.type,
- });
- return self.addErrorDetailsAndFail(.{
- .err = .control_extra_data_size_exceeds_max,
- .type = .note,
- .token = data_expression.getFirstToken(),
- .token_span_end = data_expression.getLastToken(),
- });
- },
- else => |e| return e,
- };
- }
- // We know the extra_data_buf size fits within a u16.
- const extra_data_size: u16 = @intCast(extra_data_buf.items.len);
- try data_writer.writeInt(u16, extra_data_size, .little);
- try data_writer.writeAll(extra_data_buf.items);
- }
-
- pub fn writeToolbar(self: *Compiler, node: *Node.Toolbar, writer: anytype) !void {
- var data_buffer = std.ArrayList(u8).init(self.allocator);
- defer data_buffer.deinit();
- const data_writer = data_buffer.writer();
-
- const button_width = evaluateNumberExpression(node.button_width, self.source, self.input_code_pages);
- const button_height = evaluateNumberExpression(node.button_height, self.source, self.input_code_pages);
-
- // I'm assuming this is some sort of version
- // TODO: Try to find something mentioning this
- try data_writer.writeInt(u16, 1, .little);
- try data_writer.writeInt(u16, button_width.asWord(), .little);
- try data_writer.writeInt(u16, button_height.asWord(), .little);
- try data_writer.writeInt(u16, @as(u16, @intCast(node.buttons.len)), .little);
-
- for (node.buttons) |button_or_sep| {
- switch (button_or_sep.id) {
- .literal => { // This is always SEPARATOR
- std.debug.assert(button_or_sep.cast(.literal).?.token.id == .literal);
- try data_writer.writeInt(u16, 0, .little);
- },
- .simple_statement => {
- const value_node = button_or_sep.cast(.simple_statement).?.value;
- const value = evaluateNumberExpression(value_node, self.source, self.input_code_pages);
- try data_writer.writeInt(u16, value.asWord(), .little);
- },
- else => unreachable, // This is a bug in the parser
- }
- }
-
- const data_size: u32 = @intCast(data_buffer.items.len);
- var header = try self.resourceHeader(node.id, node.type, .{
- .data_size = data_size,
- });
- defer header.deinit(self.allocator);
-
- header.applyMemoryFlags(node.common_resource_attributes, self.source);
-
- try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
-
- var data_fbs = std.io.fixedBufferStream(data_buffer.items);
- try writeResourceData(writer, data_fbs.reader(), data_size);
- }
-
- /// Weight and italic carry over from previous FONT statements within a single resource,
- /// so they need to be parsed ahead-of-time and stored
- const FontStatementValues = struct {
- weight: u16 = 0,
- italic: bool = false,
- node: *Node.FontStatement,
- };
-
- pub fn writeDialogFont(self: *Compiler, resource: Resource, values: FontStatementValues, writer: anytype) !void {
- const node = values.node;
- const point_size = evaluateNumberExpression(node.point_size, self.source, self.input_code_pages);
- try writer.writeInt(u16, point_size.asWord(), .little);
-
- if (resource == .dialogex) {
- try writer.writeInt(u16, values.weight, .little);
- }
-
- if (resource == .dialogex) {
- try writer.writeInt(u8, @intFromBool(values.italic), .little);
- }
-
- if (node.char_set) |char_set| {
- const value = evaluateNumberExpression(char_set, self.source, self.input_code_pages);
- try writer.writeInt(u8, @as(u8, @truncate(value.value)), .little);
- } else if (resource == .dialogex) {
- try writer.writeInt(u8, 1, .little); // DEFAULT_CHARSET
- }
-
- const typeface = try self.parseQuotedStringAsWideString(node.typeface);
- defer self.allocator.free(typeface);
- try writer.writeAll(std.mem.sliceAsBytes(typeface[0 .. typeface.len + 1]));
- }
-
- pub fn writeMenu(self: *Compiler, node: *Node.Menu, writer: anytype) !void {
- var data_buffer = std.ArrayList(u8).init(self.allocator);
- defer data_buffer.deinit();
- // The header's data length field is a u32 so limit the resource's data size so that
- // we know we can always specify the real size.
- var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u32));
- const data_writer = limited_writer.writer();
-
- const type_bytes = SourceBytes{
- .slice = node.type.slice(self.source),
- .code_page = self.input_code_pages.getForToken(node.type),
- };
- const resource = Resource.fromString(type_bytes);
- std.debug.assert(resource == .menu or resource == .menuex);
-
- self.writeMenuData(node, data_writer, resource) catch |err| switch (err) {
- error.NoSpaceLeft => {
- return self.addErrorDetailsAndFail(.{
- .err = .resource_data_size_exceeds_max,
- .token = node.id,
- });
- },
- else => |e| return e,
- };
-
- // This intCast can't fail because the limitedWriter above guarantees that
- // we will never write more than maxInt(u32) bytes.
- const data_size: u32 = @intCast(data_buffer.items.len);
- var header = try self.resourceHeader(node.id, node.type, .{
- .data_size = data_size,
- });
- defer header.deinit(self.allocator);
-
- header.applyMemoryFlags(node.common_resource_attributes, self.source);
- header.applyOptionalStatements(node.optional_statements, self.source, self.input_code_pages);
-
- try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
-
- var data_fbs = std.io.fixedBufferStream(data_buffer.items);
- try writeResourceData(writer, data_fbs.reader(), data_size);
- }
-
- /// Expects `data_writer` to be a LimitedWriter limited to u32, meaning all writes to
- /// the writer within this function could return error.NoSpaceLeft
- pub fn writeMenuData(self: *Compiler, node: *Node.Menu, data_writer: anytype, resource: Resource) !void {
- // menu header
- const version: u16 = if (resource == .menu) 0 else 1;
- try data_writer.writeInt(u16, version, .little);
- const header_size: u16 = if (resource == .menu) 0 else 4;
- try data_writer.writeInt(u16, header_size, .little); // cbHeaderSize
- // Note: There can be extra bytes at the end of this header (`rgbExtra`),
- // but they are always zero-length for us, so we don't write anything
- // (the length of the rgbExtra field is inferred from the header_size).
- // MENU => rgbExtra: [cbHeaderSize]u8
- // MENUEX => rgbExtra: [cbHeaderSize-4]u8
-
- if (resource == .menuex) {
- if (node.help_id) |help_id_node| {
- const help_id = evaluateNumberExpression(help_id_node, self.source, self.input_code_pages);
- try data_writer.writeInt(u32, help_id.value, .little);
- } else {
- try data_writer.writeInt(u32, 0, .little);
- }
- }
-
- for (node.items, 0..) |item, i| {
- const is_last = i == node.items.len - 1;
- try self.writeMenuItem(item, data_writer, is_last);
- }
- }
-
- pub fn writeMenuItem(self: *Compiler, node: *Node, writer: anytype, is_last_of_parent: bool) !void {
- switch (node.id) {
- .menu_item_separator => {
- // This is the 'alternate compability form' of the separator, see
- // https://devblogs.microsoft.com/oldnewthing/20080710-00/?p=21673
- //
- // The 'correct' way is to set the MF_SEPARATOR flag, but the Win32 RC
- // compiler still uses this alternate form, so that's what we use too.
- var flags = res.MenuItemFlags{};
- if (is_last_of_parent) flags.markLast();
- try writer.writeInt(u16, flags.value, .little);
- try writer.writeInt(u16, 0, .little); // id
- try writer.writeInt(u16, 0, .little); // null-terminated UTF-16 text
- },
- .menu_item => {
- const menu_item = @fieldParentPtr(Node.MenuItem, "base", node);
- var flags = res.MenuItemFlags{};
- for (menu_item.option_list) |option_token| {
- // This failing would be a bug in the parser
- const option = rc.MenuItem.Option.map.get(option_token.slice(self.source)) orelse unreachable;
- flags.apply(option);
- }
- if (is_last_of_parent) flags.markLast();
- try writer.writeInt(u16, flags.value, .little);
-
- var result = evaluateNumberExpression(menu_item.result, self.source, self.input_code_pages);
- try writer.writeInt(u16, result.asWord(), .little);
-
- var text = try self.parseQuotedStringAsWideString(menu_item.text);
- defer self.allocator.free(text);
- try writer.writeAll(std.mem.sliceAsBytes(text[0 .. text.len + 1]));
- },
- .popup => {
- const popup = @fieldParentPtr(Node.Popup, "base", node);
- var flags = res.MenuItemFlags{ .value = res.MF.POPUP };
- for (popup.option_list) |option_token| {
- // This failing would be a bug in the parser
- const option = rc.MenuItem.Option.map.get(option_token.slice(self.source)) orelse unreachable;
- flags.apply(option);
- }
- if (is_last_of_parent) flags.markLast();
- try writer.writeInt(u16, flags.value, .little);
-
- var text = try self.parseQuotedStringAsWideString(popup.text);
- defer self.allocator.free(text);
- try writer.writeAll(std.mem.sliceAsBytes(text[0 .. text.len + 1]));
-
- for (popup.items, 0..) |item, i| {
- const is_last = i == popup.items.len - 1;
- try self.writeMenuItem(item, writer, is_last);
- }
- },
- inline .menu_item_ex, .popup_ex => |node_type| {
- const menu_item = @fieldParentPtr(node_type.Type(), "base", node);
-
- if (menu_item.type) |flags| {
- const value = evaluateNumberExpression(flags, self.source, self.input_code_pages);
- try writer.writeInt(u32, value.value, .little);
- } else {
- try writer.writeInt(u32, 0, .little);
- }
-
- if (menu_item.state) |state| {
- const value = evaluateNumberExpression(state, self.source, self.input_code_pages);
- try writer.writeInt(u32, value.value, .little);
- } else {
- try writer.writeInt(u32, 0, .little);
- }
-
- if (menu_item.id) |id| {
- const value = evaluateNumberExpression(id, self.source, self.input_code_pages);
- try writer.writeInt(u32, value.value, .little);
- } else {
- try writer.writeInt(u32, 0, .little);
- }
-
- var flags: u16 = 0;
- if (is_last_of_parent) flags |= comptime @as(u16, @intCast(res.MF.END));
- // This constant doesn't seem to have a named #define, it's different than MF_POPUP
- if (node_type == .popup_ex) flags |= 0x01;
- try writer.writeInt(u16, flags, .little);
-
- var text = try self.parseQuotedStringAsWideString(menu_item.text);
- defer self.allocator.free(text);
- try writer.writeAll(std.mem.sliceAsBytes(text[0 .. text.len + 1]));
-
- // Only the combination of the flags u16 and the text bytes can cause
- // non-DWORD alignment, so we can just use the byte length of those
- // two values to realign to DWORD alignment.
- const relevant_bytes = 2 + (text.len + 1) * 2;
- try writeDataPadding(writer, @intCast(relevant_bytes));
-
- if (node_type == .popup_ex) {
- if (menu_item.help_id) |help_id_node| {
- const help_id = evaluateNumberExpression(help_id_node, self.source, self.input_code_pages);
- try writer.writeInt(u32, help_id.value, .little);
- } else {
- try writer.writeInt(u32, 0, .little);
- }
-
- for (menu_item.items, 0..) |item, i| {
- const is_last = i == menu_item.items.len - 1;
- try self.writeMenuItem(item, writer, is_last);
- }
- }
- },
- else => unreachable,
- }
- }
-
- pub fn writeVersionInfo(self: *Compiler, node: *Node.VersionInfo, writer: anytype) !void {
- var data_buffer = std.ArrayList(u8).init(self.allocator);
- defer data_buffer.deinit();
- // The node's length field (which is inclusive of the length of all of its children) is a u16
- // so limit the node's data size so that we know we can always specify the real size.
- var limited_writer = limitedWriter(data_buffer.writer(), std.math.maxInt(u16));
- const data_writer = limited_writer.writer();
-
- try data_writer.writeInt(u16, 0, .little); // placeholder size
- try data_writer.writeInt(u16, res.FixedFileInfo.byte_len, .little);
- try data_writer.writeInt(u16, res.VersionNode.type_binary, .little);
- const key_bytes = std.mem.sliceAsBytes(res.FixedFileInfo.key[0 .. res.FixedFileInfo.key.len + 1]);
- try data_writer.writeAll(key_bytes);
- // The number of bytes written up to this point is always the same, since the name
- // of the node is a constant (FixedFileInfo.key). The total number of bytes
- // written so far is 38, so we need 2 padding bytes to get back to DWORD alignment
- try data_writer.writeInt(u16, 0, .little);
-
- var fixed_file_info = res.FixedFileInfo{};
- for (node.fixed_info) |fixed_info| {
- switch (fixed_info.id) {
- .version_statement => {
- const version_statement = @fieldParentPtr(Node.VersionStatement, "base", fixed_info);
- const version_type = rc.VersionInfo.map.get(version_statement.type.slice(self.source)).?;
-
- // Ensure that all parts are cleared for each version, to properly account for
- // potential duplicate PRODUCTVERSION/FILEVERSION statements
- switch (version_type) {
- .file_version => @memset(&fixed_file_info.file_version.parts, 0),
- .product_version => @memset(&fixed_file_info.product_version.parts, 0),
- else => unreachable,
- }
-
- for (version_statement.parts, 0..) |part, i| {
- const part_value = evaluateNumberExpression(part, self.source, self.input_code_pages);
- if (part_value.is_long) {
- try self.addErrorDetails(.{
- .err = .rc_would_error_u16_with_l_suffix,
- .type = .warning,
- .token = part.getFirstToken(),
- .token_span_end = part.getLastToken(),
- .extra = .{ .statement_with_u16_param = switch (version_type) {
- .file_version => .fileversion,
- .product_version => .productversion,
- else => unreachable,
- } },
- });
- try self.addErrorDetails(.{
- .err = .rc_would_error_u16_with_l_suffix,
- .print_source_line = false,
- .type = .note,
- .token = part.getFirstToken(),
- .token_span_end = part.getLastToken(),
- .extra = .{ .statement_with_u16_param = switch (version_type) {
- .file_version => .fileversion,
- .product_version => .productversion,
- else => unreachable,
- } },
- });
- }
- switch (version_type) {
- .file_version => {
- fixed_file_info.file_version.parts[i] = part_value.asWord();
- },
- .product_version => {
- fixed_file_info.product_version.parts[i] = part_value.asWord();
- },
- else => unreachable,
- }
- }
- },
- .simple_statement => {
- const statement = @fieldParentPtr(Node.SimpleStatement, "base", fixed_info);
- const statement_type = rc.VersionInfo.map.get(statement.identifier.slice(self.source)).?;
- const value = evaluateNumberExpression(statement.value, self.source, self.input_code_pages);
- switch (statement_type) {
- .file_flags_mask => fixed_file_info.file_flags_mask = value.value,
- .file_flags => fixed_file_info.file_flags = value.value,
- .file_os => fixed_file_info.file_os = value.value,
- .file_type => fixed_file_info.file_type = value.value,
- .file_subtype => fixed_file_info.file_subtype = value.value,
- else => unreachable,
- }
- },
- else => unreachable,
- }
- }
- try fixed_file_info.write(data_writer);
-
- for (node.block_statements) |statement| {
- self.writeVersionNode(statement, data_writer, &data_buffer) catch |err| switch (err) {
- error.NoSpaceLeft => {
- try self.addErrorDetails(.{
- .err = .version_node_size_exceeds_max,
- .token = node.id,
- });
- return self.addErrorDetailsAndFail(.{
- .err = .version_node_size_exceeds_max,
- .type = .note,
- .token = statement.getFirstToken(),
- .token_span_end = statement.getLastToken(),
- });
- },
- else => |e| return e,
- };
- }
-
- // We know that data_buffer.items.len is within the limits of a u16, since we
- // limited the writer to maxInt(u16)
- const data_size: u16 = @intCast(data_buffer.items.len);
- // And now that we know the full size of this node (including its children), set its size
- std.mem.writeInt(u16, data_buffer.items[0..2], data_size, .little);
-
- var header = try self.resourceHeader(node.id, node.versioninfo, .{
- .data_size = data_size,
- });
- defer header.deinit(self.allocator);
-
- header.applyMemoryFlags(node.common_resource_attributes, self.source);
-
- try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
-
- var data_fbs = std.io.fixedBufferStream(data_buffer.items);
- try writeResourceData(writer, data_fbs.reader(), data_size);
- }
-
- /// Expects writer to be a LimitedWriter limited to u16, meaning all writes to
- /// the writer within this function could return error.NoSpaceLeft, and that buf.items.len
- /// will never be able to exceed maxInt(u16).
- pub fn writeVersionNode(self: *Compiler, node: *Node, writer: anytype, buf: *std.ArrayList(u8)) !void {
- // We can assume that buf.items.len will never be able to exceed the limits of a u16
- try writeDataPadding(writer, @as(u16, @intCast(buf.items.len)));
-
- const node_and_children_size_offset = buf.items.len;
- try writer.writeInt(u16, 0, .little); // placeholder for size
- const data_size_offset = buf.items.len;
- try writer.writeInt(u16, 0, .little); // placeholder for data size
- const data_type_offset = buf.items.len;
- // Data type is string unless the node contains values that are numbers.
- try writer.writeInt(u16, res.VersionNode.type_string, .little);
-
- switch (node.id) {
- inline .block, .block_value => |node_type| {
- const block_or_value = @fieldParentPtr(node_type.Type(), "base", node);
- const parsed_key = try self.parseQuotedStringAsWideString(block_or_value.key);
- defer self.allocator.free(parsed_key);
-
- const parsed_key_to_first_null = std.mem.sliceTo(parsed_key, 0);
- try writer.writeAll(std.mem.sliceAsBytes(parsed_key_to_first_null[0 .. parsed_key_to_first_null.len + 1]));
-
- var has_number_value: bool = false;
- for (block_or_value.values) |value_value_node_uncasted| {
- const value_value_node = value_value_node_uncasted.cast(.block_value_value).?;
- if (value_value_node.expression.isNumberExpression()) {
- has_number_value = true;
- break;
- }
- }
- // The units used here are dependent on the type. If there are any numbers, then
- // this is a byte count. If there are only strings, then this is a count of
- // UTF-16 code units.
- //
- // The Win32 RC compiler miscompiles this count in the case of values that
- // have a mix of numbers and strings. This is detected and a warning is emitted
- // during parsing, so we can just do the correct thing here.
- var values_size: usize = 0;
-
- try writeDataPadding(writer, @intCast(buf.items.len));
-
- for (block_or_value.values, 0..) |value_value_node_uncasted, i| {
- const value_value_node = value_value_node_uncasted.cast(.block_value_value).?;
- const value_node = value_value_node.expression;
- if (value_node.isNumberExpression()) {
- const number = evaluateNumberExpression(value_node, self.source, self.input_code_pages);
- // This is used to write u16 or u32 depending on the number's suffix
- const data_wrapper = Data{ .number = number };
- try data_wrapper.write(writer);
- // Numbers use byte count
- values_size += if (number.is_long) 4 else 2;
- } else {
- std.debug.assert(value_node.isStringLiteral());
- const literal_node = value_node.cast(.literal).?;
- const parsed_value = try self.parseQuotedStringAsWideString(literal_node.token);
- defer self.allocator.free(parsed_value);
-
- const parsed_to_first_null = std.mem.sliceTo(parsed_value, 0);
- try writer.writeAll(std.mem.sliceAsBytes(parsed_to_first_null));
- // Strings use UTF-16 code-unit count including the null-terminator, but
- // only if there are no number values in the list.
- var value_size = parsed_to_first_null.len;
- if (has_number_value) value_size *= 2; // 2 bytes per UTF-16 code unit
- values_size += value_size;
- // The null-terminator is only included if there's a trailing comma
- // or this is the last value. If the value evaluates to empty, then
- // it never gets a null terminator. If there was an explicit null-terminator
- // in the string, we still need to potentially add one since we already
- // sliced to the terminator.
- const is_last = i == block_or_value.values.len - 1;
- const is_empty = parsed_to_first_null.len == 0;
- const is_only = block_or_value.values.len == 1;
- if ((!is_empty or !is_only) and (is_last or value_value_node.trailing_comma)) {
- try writer.writeInt(u16, 0, .little);
- values_size += if (has_number_value) 2 else 1;
- }
- }
- }
- var data_size_slice = buf.items[data_size_offset..];
- std.mem.writeInt(u16, data_size_slice[0..@sizeOf(u16)], @as(u16, @intCast(values_size)), .little);
-
- if (has_number_value) {
- const data_type_slice = buf.items[data_type_offset..];
- std.mem.writeInt(u16, data_type_slice[0..@sizeOf(u16)], res.VersionNode.type_binary, .little);
- }
-
- if (node_type == .block) {
- const block = block_or_value;
- for (block.children) |child| {
- try self.writeVersionNode(child, writer, buf);
- }
- }
- },
- else => unreachable,
- }
-
- const node_and_children_size = buf.items.len - node_and_children_size_offset;
- const node_and_children_size_slice = buf.items[node_and_children_size_offset..];
- std.mem.writeInt(u16, node_and_children_size_slice[0..@sizeOf(u16)], @as(u16, @intCast(node_and_children_size)), .little);
- }
-
- pub fn writeStringTable(self: *Compiler, node: *Node.StringTable) !void {
- const language = getLanguageFromOptionalStatements(node.optional_statements, self.source, self.input_code_pages) orelse self.state.language;
-
- for (node.strings) |string_node| {
- const string = @fieldParentPtr(Node.StringTableString, "base", string_node);
- const string_id_data = try self.evaluateDataExpression(string.id);
- const string_id = string_id_data.number.asWord();
-
- self.state.string_tables.set(
- self.arena,
- language,
- string_id,
- string.string,
- &node.base,
- self.source,
- self.input_code_pages,
- self.state.version,
- self.state.characteristics,
- ) catch |err| switch (err) {
- error.StringAlreadyDefined => {
- // It might be nice to have these errors point to the ids rather than the
- // string tokens, but that would mean storing the id token of each string
- // which doesn't seem worth it just for slightly better error messages.
- try self.addErrorDetails(ErrorDetails{
- .err = .string_already_defined,
- .token = string.string,
- .extra = .{ .string_and_language = .{ .id = string_id, .language = language } },
- });
- const existing_def_table = self.state.string_tables.tables.getPtr(language).?;
- const existing_definition = existing_def_table.get(string_id).?;
- return self.addErrorDetailsAndFail(ErrorDetails{
- .err = .string_already_defined,
- .type = .note,
- .token = existing_definition,
- .extra = .{ .string_and_language = .{ .id = string_id, .language = language } },
- });
- },
- error.OutOfMemory => |e| return e,
- };
- }
- }
-
- /// Expects this to be a top-level LANGUAGE statement
- pub fn writeLanguageStatement(self: *Compiler, node: *Node.LanguageStatement) void {
- const primary = Compiler.evaluateNumberExpression(node.primary_language_id, self.source, self.input_code_pages);
- const sublanguage = Compiler.evaluateNumberExpression(node.sublanguage_id, self.source, self.input_code_pages);
- self.state.language.primary_language_id = @truncate(primary.value);
- self.state.language.sublanguage_id = @truncate(sublanguage.value);
- }
-
- /// Expects this to be a top-level VERSION or CHARACTERISTICS statement
- pub fn writeTopLevelSimpleStatement(self: *Compiler, node: *Node.SimpleStatement) void {
- const value = Compiler.evaluateNumberExpression(node.value, self.source, self.input_code_pages);
- const statement_type = rc.TopLevelKeywords.map.get(node.identifier.slice(self.source)).?;
- switch (statement_type) {
- .characteristics => self.state.characteristics = value.value,
- .version => self.state.version = value.value,
- else => unreachable,
- }
- }
-
- pub const ResourceHeaderOptions = struct {
- language: ?res.Language = null,
- data_size: DWORD = 0,
- };
-
- pub fn resourceHeader(self: *Compiler, id_token: Token, type_token: Token, options: ResourceHeaderOptions) !ResourceHeader {
- const id_bytes = self.sourceBytesForToken(id_token);
- const type_bytes = self.sourceBytesForToken(type_token);
- return ResourceHeader.init(
- self.allocator,
- id_bytes,
- type_bytes,
- options.data_size,
- options.language orelse self.state.language,
- self.state.version,
- self.state.characteristics,
- ) catch |err| switch (err) {
- error.OutOfMemory => |e| return e,
- error.TypeNonAsciiOrdinal => {
- const win32_rc_ordinal = NameOrOrdinal.maybeNonAsciiOrdinalFromString(type_bytes).?;
- try self.addErrorDetails(.{
- .err = .invalid_digit_character_in_ordinal,
- .type = .err,
- .token = type_token,
- });
- return self.addErrorDetailsAndFail(.{
- .err = .win32_non_ascii_ordinal,
- .type = .note,
- .token = type_token,
- .print_source_line = false,
- .extra = .{ .number = win32_rc_ordinal.ordinal },
- });
- },
- error.IdNonAsciiOrdinal => {
- const win32_rc_ordinal = NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes).?;
- try self.addErrorDetails(.{
- .err = .invalid_digit_character_in_ordinal,
- .type = .err,
- .token = id_token,
- });
- return self.addErrorDetailsAndFail(.{
- .err = .win32_non_ascii_ordinal,
- .type = .note,
- .token = id_token,
- .print_source_line = false,
- .extra = .{ .number = win32_rc_ordinal.ordinal },
- });
- },
- };
- }
-
- pub const ResourceHeader = struct {
- name_value: NameOrOrdinal,
- type_value: NameOrOrdinal,
- language: res.Language,
- memory_flags: MemoryFlags,
- data_size: DWORD,
- version: DWORD,
- characteristics: DWORD,
- data_version: DWORD = 0,
-
- pub const InitError = error{ OutOfMemory, IdNonAsciiOrdinal, TypeNonAsciiOrdinal };
-
- pub fn init(allocator: Allocator, id_bytes: SourceBytes, type_bytes: SourceBytes, data_size: DWORD, language: res.Language, version: DWORD, characteristics: DWORD) InitError!ResourceHeader {
- const type_value = type: {
- const resource_type = Resource.fromString(type_bytes);
- if (res.RT.fromResource(resource_type)) |rt_constant| {
- break :type NameOrOrdinal{ .ordinal = @intFromEnum(rt_constant) };
- } else {
- break :type try NameOrOrdinal.fromString(allocator, type_bytes);
- }
- };
- errdefer type_value.deinit(allocator);
- if (type_value == .name) {
- if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(type_bytes)) |_| {
- return error.TypeNonAsciiOrdinal;
- }
- }
-
- const name_value = try NameOrOrdinal.fromString(allocator, id_bytes);
- errdefer name_value.deinit(allocator);
- if (name_value == .name) {
- if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes)) |_| {
- return error.IdNonAsciiOrdinal;
- }
- }
-
- const predefined_resource_type = type_value.predefinedResourceType();
-
- return ResourceHeader{
- .name_value = name_value,
- .type_value = type_value,
- .data_size = data_size,
- .memory_flags = MemoryFlags.defaults(predefined_resource_type),
- .language = language,
- .version = version,
- .characteristics = characteristics,
- };
- }
-
- pub fn deinit(self: ResourceHeader, allocator: Allocator) void {
- self.name_value.deinit(allocator);
- self.type_value.deinit(allocator);
- }
-
- pub const SizeInfo = struct {
- bytes: u32,
- padding_after_name: u2,
- };
-
- fn calcSize(self: ResourceHeader) error{Overflow}!SizeInfo {
- var header_size: u32 = 8;
- header_size = try std.math.add(
- u32,
- header_size,
- std.math.cast(u32, self.name_value.byteLen()) orelse return error.Overflow,
- );
- header_size = try std.math.add(
- u32,
- header_size,
- std.math.cast(u32, self.type_value.byteLen()) orelse return error.Overflow,
- );
- const padding_after_name = numPaddingBytesNeeded(header_size);
- header_size = try std.math.add(u32, header_size, padding_after_name);
- header_size = try std.math.add(u32, header_size, 16);
- return .{ .bytes = header_size, .padding_after_name = padding_after_name };
- }
-
- pub fn writeAssertNoOverflow(self: ResourceHeader, writer: anytype) !void {
- return self.writeSizeInfo(writer, self.calcSize() catch unreachable);
- }
-
- pub fn write(self: ResourceHeader, writer: anytype, err_ctx: errors.DiagnosticsContext) !void {
- const size_info = self.calcSize() catch {
- try err_ctx.diagnostics.append(.{
- .err = .resource_data_size_exceeds_max,
- .token = err_ctx.token,
- });
- return error.CompileError;
- };
- return self.writeSizeInfo(writer, size_info);
- }
-
- fn writeSizeInfo(self: ResourceHeader, writer: anytype, size_info: SizeInfo) !void {
- try writer.writeInt(DWORD, self.data_size, .little); // DataSize
- try writer.writeInt(DWORD, size_info.bytes, .little); // HeaderSize
- try self.type_value.write(writer); // TYPE
- try self.name_value.write(writer); // NAME
- try writer.writeByteNTimes(0, size_info.padding_after_name);
-
- try writer.writeInt(DWORD, self.data_version, .little); // DataVersion
- try writer.writeInt(WORD, self.memory_flags.value, .little); // MemoryFlags
- try writer.writeInt(WORD, self.language.asInt(), .little); // LanguageId
- try writer.writeInt(DWORD, self.version, .little); // Version
- try writer.writeInt(DWORD, self.characteristics, .little); // Characteristics
- }
-
- pub fn predefinedResourceType(self: ResourceHeader) ?res.RT {
- return self.type_value.predefinedResourceType();
- }
-
- pub fn applyMemoryFlags(self: *ResourceHeader, tokens: []Token, source: []const u8) void {
- applyToMemoryFlags(&self.memory_flags, tokens, source);
- }
-
- pub fn applyOptionalStatements(self: *ResourceHeader, statements: []*Node, source: []const u8, code_page_lookup: *const CodePageLookup) void {
- applyToOptionalStatements(&self.language, &self.version, &self.characteristics, statements, source, code_page_lookup);
- }
- };
-
- fn applyToMemoryFlags(flags: *MemoryFlags, tokens: []Token, source: []const u8) void {
- for (tokens) |token| {
- const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?;
- flags.set(attribute);
- }
- }
-
- /// RT_GROUP_ICON and RT_GROUP_CURSOR have their own special rules for memory flags
- fn applyToGroupMemoryFlags(flags: *MemoryFlags, tokens: []Token, source: []const u8) void {
- // There's probably a cleaner implementation of this, but this will result in the same
- // flags as the Win32 RC compiler for all 986,410 K-permutations of memory flags
- // for an ICON resource.
- //
- // This was arrived at by iterating over the permutations and creating a
- // list where each line looks something like this:
- // MOVEABLE PRELOAD -> 0x1050 (MOVEABLE|PRELOAD|DISCARDABLE)
- //
- // and then noticing a few things:
-
- // 1. Any permutation that does not have PRELOAD in it just uses the
- // default flags.
- const initial_flags = flags.*;
- var flags_set = std.enums.EnumSet(rc.CommonResourceAttributes).initEmpty();
- for (tokens) |token| {
- const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?;
- flags_set.insert(attribute);
- }
- if (!flags_set.contains(.preload)) return;
-
- // 2. Any permutation of flags where applying only the PRELOAD and LOADONCALL flags
- // results in no actual change by the end will just use the default flags.
- // For example, `PRELOAD LOADONCALL` will result in default flags, but
- // `LOADONCALL PRELOAD` will have PRELOAD set after they are both applied in order.
- for (tokens) |token| {
- const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?;
- switch (attribute) {
- .preload, .loadoncall => flags.set(attribute),
- else => {},
- }
- }
- if (flags.value == initial_flags.value) return;
-
- // 3. If none of DISCARDABLE, SHARED, or PURE is specified, then PRELOAD
- // implies `flags &= ~SHARED` and LOADONCALL implies `flags |= SHARED`
- const shared_set = comptime blk: {
- var set = std.enums.EnumSet(rc.CommonResourceAttributes).initEmpty();
- set.insert(.discardable);
- set.insert(.shared);
- set.insert(.pure);
- break :blk set;
- };
- const discardable_shared_or_pure_specified = flags_set.intersectWith(shared_set).count() != 0;
- for (tokens) |token| {
- const attribute = rc.CommonResourceAttributes.map.get(token.slice(source)).?;
- flags.setGroup(attribute, !discardable_shared_or_pure_specified);
- }
- }
-
- /// Only handles the 'base' optional statements that are shared between resource types.
- fn applyToOptionalStatements(language: *res.Language, version: *u32, characteristics: *u32, statements: []*Node, source: []const u8, code_page_lookup: *const CodePageLookup) void {
- for (statements) |node| switch (node.id) {
- .language_statement => {
- const language_statement = @fieldParentPtr(Node.LanguageStatement, "base", node);
- language.* = languageFromLanguageStatement(language_statement, source, code_page_lookup);
- },
- .simple_statement => {
- const simple_statement = @fieldParentPtr(Node.SimpleStatement, "base", node);
- const statement_type = rc.OptionalStatements.map.get(simple_statement.identifier.slice(source)) orelse continue;
- const result = Compiler.evaluateNumberExpression(simple_statement.value, source, code_page_lookup);
- switch (statement_type) {
- .version => version.* = result.value,
- .characteristics => characteristics.* = result.value,
- else => unreachable, // only VERSION and CHARACTERISTICS should be in an optional statements list
- }
- },
- else => {},
- };
- }
-
- pub fn languageFromLanguageStatement(language_statement: *const Node.LanguageStatement, source: []const u8, code_page_lookup: *const CodePageLookup) res.Language {
- const primary = Compiler.evaluateNumberExpression(language_statement.primary_language_id, source, code_page_lookup);
- const sublanguage = Compiler.evaluateNumberExpression(language_statement.sublanguage_id, source, code_page_lookup);
- return .{
- .primary_language_id = @truncate(primary.value),
- .sublanguage_id = @truncate(sublanguage.value),
- };
- }
-
- pub fn getLanguageFromOptionalStatements(statements: []*Node, source: []const u8, code_page_lookup: *const CodePageLookup) ?res.Language {
- for (statements) |node| switch (node.id) {
- .language_statement => {
- const language_statement = @fieldParentPtr(Node.LanguageStatement, "base", node);
- return languageFromLanguageStatement(language_statement, source, code_page_lookup);
- },
- else => continue,
- };
- return null;
- }
-
- pub fn writeEmptyResource(writer: anytype) !void {
- const header = ResourceHeader{
- .name_value = .{ .ordinal = 0 },
- .type_value = .{ .ordinal = 0 },
- .language = .{
- .primary_language_id = 0,
- .sublanguage_id = 0,
- },
- .memory_flags = .{ .value = 0 },
- .data_size = 0,
- .version = 0,
- .characteristics = 0,
- };
- try header.writeAssertNoOverflow(writer);
- }
-
- pub fn sourceBytesForToken(self: *Compiler, token: Token) SourceBytes {
- return .{
- .slice = token.slice(self.source),
- .code_page = self.input_code_pages.getForToken(token),
- };
- }
-
- /// Helper that calls parseQuotedStringAsWideString with the relevant context
- /// Resulting slice is allocated by `self.allocator`.
- pub fn parseQuotedStringAsWideString(self: *Compiler, token: Token) ![:0]u16 {
- return literals.parseQuotedStringAsWideString(
- self.allocator,
- self.sourceBytesForToken(token),
- .{
- .start_column = token.calculateColumn(self.source, 8, null),
- .diagnostics = .{ .diagnostics = self.diagnostics, .token = token },
- },
- );
- }
-
- /// Helper that calls parseQuotedStringAsAsciiString with the relevant context
- /// Resulting slice is allocated by `self.allocator`.
- pub fn parseQuotedStringAsAsciiString(self: *Compiler, token: Token) ![]u8 {
- return literals.parseQuotedStringAsAsciiString(
- self.allocator,
- self.sourceBytesForToken(token),
- .{
- .start_column = token.calculateColumn(self.source, 8, null),
- .diagnostics = .{ .diagnostics = self.diagnostics, .token = token },
- },
- );
- }
-
- fn addErrorDetails(self: *Compiler, details: ErrorDetails) Allocator.Error!void {
- try self.diagnostics.append(details);
- }
-
- fn addErrorDetailsAndFail(self: *Compiler, details: ErrorDetails) error{ CompileError, OutOfMemory } {
- try self.addErrorDetails(details);
- return error.CompileError;
- }
-};
-
-pub const OpenSearchPathError = std.fs.Dir.OpenError;
-
-fn openSearchPathDir(dir: std.fs.Dir, path: []const u8) OpenSearchPathError!std.fs.Dir {
- // Validate the search path to avoid possible unreachable on invalid paths,
- // see https://github.com/ziglang/zig/issues/15607 for why this is currently necessary.
- try validateSearchPath(path);
- return dir.openDir(path, .{});
-}
-
-/// Very crude attempt at validating a path. This is imperfect
-/// and AFAIK it is effectively impossible to implement perfect path
-/// validation, since it ultimately depends on the underlying filesystem.
-/// Note that this function won't be necessary if/when
-/// https://github.com/ziglang/zig/issues/15607
-/// is accepted/implemented.
-fn validateSearchPath(path: []const u8) error{BadPathName}!void {
- switch (builtin.os.tag) {
- .windows => {
- // This will return error.BadPathName on non-Win32 namespaced paths
- // (e.g. the NT \??\ prefix, the device \\.\ prefix, etc).
- // Those path types are something of an unavoidable way to
- // still hit unreachable during the openDir call.
- var component_iterator = try std.fs.path.componentIterator(path);
- while (component_iterator.next()) |component| {
- // https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file
- if (std.mem.indexOfAny(u8, component.name, "\x00<>:\"|?*") != null) return error.BadPathName;
- }
- },
- else => {
- if (std.mem.indexOfScalar(u8, path, 0) != null) return error.BadPathName;
- },
- }
-}
-
-pub const SearchDir = struct {
- dir: std.fs.Dir,
- path: ?[]const u8,
-
- pub fn deinit(self: *SearchDir, allocator: Allocator) void {
- self.dir.close();
- if (self.path) |path| {
- allocator.free(path);
- }
- }
-};
-
-/// Slurps the first `size` bytes read into `slurped_header`
-pub fn HeaderSlurpingReader(comptime size: usize, comptime ReaderType: anytype) type {
- return struct {
- child_reader: ReaderType,
- bytes_read: usize = 0,
- slurped_header: [size]u8 = [_]u8{0x00} ** size,
-
- pub const Error = ReaderType.Error;
- pub const Reader = std.io.Reader(*@This(), Error, read);
-
- pub fn read(self: *@This(), buf: []u8) Error!usize {
- const amt = try self.child_reader.read(buf);
- if (self.bytes_read < size) {
- const bytes_to_add = @min(amt, size - self.bytes_read);
- const end_index = self.bytes_read + bytes_to_add;
- @memcpy(self.slurped_header[self.bytes_read..end_index], buf[0..bytes_to_add]);
- }
- self.bytes_read +|= amt;
- return amt;
- }
-
- pub fn reader(self: *@This()) Reader {
- return .{ .context = self };
- }
- };
-}
-
-pub fn headerSlurpingReader(comptime size: usize, reader: anytype) HeaderSlurpingReader(size, @TypeOf(reader)) {
- return .{ .child_reader = reader };
-}
-
-/// Sort of like std.io.LimitedReader, but a Writer.
-/// Returns an error if writing the requested number of bytes
-/// would ever exceed bytes_left, i.e. it does not always
-/// write up to the limit and instead will error if the
-/// limit would be breached if the entire slice was written.
-pub fn LimitedWriter(comptime WriterType: type) type {
- return struct {
- inner_writer: WriterType,
- bytes_left: u64,
-
- pub const Error = error{NoSpaceLeft} || WriterType.Error;
- pub const Writer = std.io.Writer(*Self, Error, write);
-
- const Self = @This();
-
- pub fn write(self: *Self, bytes: []const u8) Error!usize {
- if (bytes.len > self.bytes_left) return error.NoSpaceLeft;
- const amt = try self.inner_writer.write(bytes);
- self.bytes_left -= amt;
- return amt;
- }
-
- pub fn writer(self: *Self) Writer {
- return .{ .context = self };
- }
- };
-}
-
-/// Returns an initialised `LimitedWriter`
-/// `bytes_left` is a `u64` to be able to take 64 bit file offsets
-pub fn limitedWriter(inner_writer: anytype, bytes_left: u64) LimitedWriter(@TypeOf(inner_writer)) {
- return .{ .inner_writer = inner_writer, .bytes_left = bytes_left };
-}
-
-test "limitedWriter basic usage" {
- var buf: [4]u8 = undefined;
- var fbs = std.io.fixedBufferStream(&buf);
- var limited_stream = limitedWriter(fbs.writer(), 4);
- var writer = limited_stream.writer();
-
- try std.testing.expectEqual(@as(usize, 3), try writer.write("123"));
- try std.testing.expectEqualSlices(u8, "123", buf[0..3]);
- try std.testing.expectError(error.NoSpaceLeft, writer.write("45"));
- try std.testing.expectEqual(@as(usize, 1), try writer.write("4"));
- try std.testing.expectEqualSlices(u8, "1234", buf[0..4]);
- try std.testing.expectError(error.NoSpaceLeft, writer.write("5"));
-}
-
-pub const FontDir = struct {
- fonts: std.ArrayListUnmanaged(Font) = .{},
- /// To keep track of which ids are set and where they were set from
- ids: std.AutoHashMapUnmanaged(u16, Token) = .{},
-
- pub const Font = struct {
- id: u16,
- header_bytes: [148]u8,
- };
-
- pub fn deinit(self: *FontDir, allocator: Allocator) void {
- self.fonts.deinit(allocator);
- }
-
- pub fn add(self: *FontDir, allocator: Allocator, font: Font, id_token: Token) !void {
- try self.ids.putNoClobber(allocator, font.id, id_token);
- try self.fonts.append(allocator, font);
- }
-
- pub fn writeResData(self: *FontDir, compiler: *Compiler, writer: anytype) !void {
- if (self.fonts.items.len == 0) return;
-
- // We know the number of fonts is limited to maxInt(u16) because fonts
- // must have a valid and unique u16 ordinal ID (trying to specify a FONT
- // with e.g. id 65537 will wrap around to 1 and be ignored if there's already
- // a font with that ID in the file).
- const num_fonts: u16 = @intCast(self.fonts.items.len);
-
- // u16 count + [(u16 id + 150 bytes) for each font]
- // Note: This works out to a maximum data_size of 9,961,322.
- const data_size: u32 = 2 + (2 + 150) * num_fonts;
-
- var header = Compiler.ResourceHeader{
- .name_value = try NameOrOrdinal.nameFromString(compiler.allocator, .{ .slice = "FONTDIR", .code_page = .windows1252 }),
- .type_value = NameOrOrdinal{ .ordinal = @intFromEnum(res.RT.FONTDIR) },
- .memory_flags = res.MemoryFlags.defaults(res.RT.FONTDIR),
- .language = compiler.state.language,
- .version = compiler.state.version,
- .characteristics = compiler.state.characteristics,
- .data_size = data_size,
- };
- defer header.deinit(compiler.allocator);
-
- try header.writeAssertNoOverflow(writer);
- try writer.writeInt(u16, num_fonts, .little);
- for (self.fonts.items) |font| {
- // The format of the FONTDIR is a strange beast.
- // Technically, each FONT is seemingly meant to be written as a
- // FONTDIRENTRY with two trailing NUL-terminated strings corresponding to
- // the 'device name' and 'face name' of the .FNT file, but:
- //
- // 1. When dealing with .FNT files, the Win32 implementation
- // gets the device name and face name from the wrong locations,
- // so it's basically never going to write the real device/face name
- // strings.
- // 2. When dealing with files 76-140 bytes long, the Win32 implementation
- // can just crash (if there are no NUL bytes in the file).
- // 3. The 32-bit Win32 rc.exe uses a 148 byte size for the portion of
- // the FONTDIRENTRY before the NUL-terminated strings, which
- // does not match the documented FONTDIRENTRY size that (presumably)
- // this format is meant to be using, so anything iterating the
- // FONTDIR according to the available documentation will get bogus results.
- // 4. The FONT resource can be used for non-.FNT types like TTF and OTF,
- // in which case emulating the Win32 behavior of unconditionally
- // interpreting the bytes as a .FNT and trying to grab device/face names
- // from random bytes in the TTF/OTF file can lead to weird behavior
- // and errors in the Win32 implementation (for example, the device/face
- // name fields are offsets into the file where the NUL-terminated
- // string is located, but the Win32 implementation actually treats
- // them as signed so if they are negative then the Win32 implementation
- // will error; this happening for TTF fonts would just be a bug
- // since the TTF could otherwise be valid)
- // 5. The FONTDIR resource doesn't actually seem to be used at all by
- // anything that I've found, and instead in Windows 3.0 and newer
- // it seems like the FONT resources are always just iterated/accessed
- // directly without ever looking at the FONTDIR.
- //
- // All of these combined means that we:
- // - Do not need or want to emulate Win32 behavior here
- // - For maximum simplicity and compatibility, we just write the first
- // 148 bytes of the file without any interpretation (padded with
- // zeroes to get up to 148 bytes if necessary), and then
- // unconditionally write two NUL bytes, meaning that we always
- // write 'device name' and 'face name' as if they were 0-length
- // strings.
- //
- // This gives us byte-for-byte .RES compatibility in the common case while
- // allowing us to avoid any erroneous errors caused by trying to read
- // the face/device name from a bogus location. Note that the Win32
- // implementation never actually writes the real device/face name here
- // anyway (except in the bizarre case that a .FNT file has the proper
- // device/face name offsets within a reserved section of the .FNT file)
- // so there's no feasible way that anything can actually think that the
- // device name/face name in the FONTDIR is reliable.
-
- // First, the ID is written, though
- try writer.writeInt(u16, font.id, .little);
- try writer.writeAll(&font.header_bytes);
- try writer.writeByteNTimes(0, 2);
- }
- try Compiler.writeDataPadding(writer, data_size);
- }
-};
-
-pub const StringTablesByLanguage = struct {
- /// String tables for each language are written to the .res file in order depending on
- /// when the first STRINGTABLE for the language was defined, and all blocks for a given
- /// language are written contiguously.
- /// Using an ArrayHashMap here gives us this property for free.
- tables: std.AutoArrayHashMapUnmanaged(res.Language, StringTable) = .{},
-
- pub fn deinit(self: *StringTablesByLanguage, allocator: Allocator) void {
- self.tables.deinit(allocator);
- }
-
- pub fn set(
- self: *StringTablesByLanguage,
- allocator: Allocator,
- language: res.Language,
- id: u16,
- string_token: Token,
- node: *Node,
- source: []const u8,
- code_page_lookup: *const CodePageLookup,
- version: u32,
- characteristics: u32,
- ) StringTable.SetError!void {
- var get_or_put_result = try self.tables.getOrPut(allocator, language);
- if (!get_or_put_result.found_existing) {
- get_or_put_result.value_ptr.* = StringTable{};
- }
- return get_or_put_result.value_ptr.set(allocator, id, string_token, node, source, code_page_lookup, version, characteristics);
- }
-};
-
-pub const StringTable = struct {
- /// Blocks are written to the .res file in order depending on when the first string
- /// was added to the block (i.e. `STRINGTABLE { 16 "b" 0 "a" }` would then get written
- /// with block ID 2 (the one with "b") first and block ID 1 (the one with "a") second).
- /// Using an ArrayHashMap here gives us this property for free.
- blocks: std.AutoArrayHashMapUnmanaged(u16, Block) = .{},
-
- pub const Block = struct {
- strings: std.ArrayListUnmanaged(Token) = .{},
- set_indexes: std.bit_set.IntegerBitSet(16) = .{ .mask = 0 },
- memory_flags: MemoryFlags = MemoryFlags.defaults(res.RT.STRING),
- characteristics: u32,
- version: u32,
-
- /// Returns the index to insert the string into the `strings` list.
- /// Returns null if the string should be appended.
- fn getInsertionIndex(self: *Block, index: u8) ?u8 {
- std.debug.assert(!self.set_indexes.isSet(index));
-
- const first_set = self.set_indexes.findFirstSet() orelse return null;
- if (first_set > index) return 0;
-
- const last_set = 15 - @clz(self.set_indexes.mask);
- if (index > last_set) return null;
-
- var bit = first_set + 1;
- var insertion_index: u8 = 1;
- while (bit != index) : (bit += 1) {
- if (self.set_indexes.isSet(bit)) insertion_index += 1;
- }
- return insertion_index;
- }
-
- fn getTokenIndex(self: *Block, string_index: u8) ?u8 {
- const count = self.strings.items.len;
- if (count == 0) return null;
- if (count == 1) return 0;
-
- const first_set = self.set_indexes.findFirstSet() orelse unreachable;
- if (first_set == string_index) return 0;
- const last_set = 15 - @clz(self.set_indexes.mask);
- if (last_set == string_index) return @intCast(count - 1);
-
- if (first_set == last_set) return null;
-
- var bit = first_set + 1;
- var token_index: u8 = 1;
- while (bit < last_set) : (bit += 1) {
- if (!self.set_indexes.isSet(bit)) continue;
- if (bit == string_index) return token_index;
- token_index += 1;
- }
- return null;
- }
-
- fn dump(self: *Block) void {
- var bit_it = self.set_indexes.iterator(.{});
- var string_index: usize = 0;
- while (bit_it.next()) |bit_index| {
- const token = self.strings.items[string_index];
- std.debug.print("{}: [{}] {any}\n", .{ bit_index, string_index, token });
- string_index += 1;
- }
- }
-
- pub fn applyAttributes(self: *Block, string_table: *Node.StringTable, source: []const u8, code_page_lookup: *const CodePageLookup) void {
- Compiler.applyToMemoryFlags(&self.memory_flags, string_table.common_resource_attributes, source);
- var dummy_language: res.Language = undefined;
- Compiler.applyToOptionalStatements(&dummy_language, &self.version, &self.characteristics, string_table.optional_statements, source, code_page_lookup);
- }
-
- fn trimToDoubleNUL(comptime T: type, str: []const T) []const T {
- var last_was_null = false;
- for (str, 0..) |c, i| {
- if (c == 0) {
- if (last_was_null) return str[0 .. i - 1];
- last_was_null = true;
- } else {
- last_was_null = false;
- }
- }
- return str;
- }
-
- test "trimToDoubleNUL" {
- try std.testing.expectEqualStrings("a\x00b", trimToDoubleNUL(u8, "a\x00b"));
- try std.testing.expectEqualStrings("a", trimToDoubleNUL(u8, "a\x00\x00b"));
- }
-
- pub fn writeResData(self: *Block, compiler: *Compiler, language: res.Language, block_id: u16, writer: anytype) !void {
- var data_buffer = std.ArrayList(u8).init(compiler.allocator);
- defer data_buffer.deinit();
- const data_writer = data_buffer.writer();
-
- var i: u8 = 0;
- var string_i: u8 = 0;
- while (true) : (i += 1) {
- if (!self.set_indexes.isSet(i)) {
- try data_writer.writeInt(u16, 0, .little);
- if (i == 15) break else continue;
- }
-
- const string_token = self.strings.items[string_i];
- const slice = string_token.slice(compiler.source);
- const column = string_token.calculateColumn(compiler.source, 8, null);
- const code_page = compiler.input_code_pages.getForToken(string_token);
- const bytes = SourceBytes{ .slice = slice, .code_page = code_page };
- const utf16_string = try literals.parseQuotedStringAsWideString(compiler.allocator, bytes, .{
- .start_column = column,
- .diagnostics = .{ .diagnostics = compiler.diagnostics, .token = string_token },
- });
- defer compiler.allocator.free(utf16_string);
-
- const trimmed_string = trim: {
- // Two NUL characters in a row act as a terminator
- // Note: This is only the case for STRINGTABLE strings
- const trimmed = trimToDoubleNUL(u16, utf16_string);
- // We also want to trim any trailing NUL characters
- break :trim std.mem.trimRight(u16, trimmed, &[_]u16{0});
- };
-
- // String literals are limited to maxInt(u15) codepoints, so these UTF-16 encoded
- // strings are limited to maxInt(u15) * 2 = 65,534 code units (since 2 is the
- // maximum number of UTF-16 code units per codepoint).
- // This leaves room for exactly one NUL terminator.
- var string_len_in_utf16_code_units: u16 = @intCast(trimmed_string.len);
- // If the option is set, then a NUL terminator is added unconditionally.
- // We already trimmed any trailing NULs, so we know it will be a new addition to the string.
- if (compiler.null_terminate_string_table_strings) string_len_in_utf16_code_units += 1;
- try data_writer.writeInt(u16, string_len_in_utf16_code_units, .little);
- try data_writer.writeAll(std.mem.sliceAsBytes(trimmed_string));
- if (compiler.null_terminate_string_table_strings) {
- try data_writer.writeInt(u16, 0, .little);
- }
-
- if (i == 15) break;
- string_i += 1;
- }
-
- // This intCast will never be able to fail due to the length constraints on string literals.
- //
- // - STRINGTABLE resource definitions can can only provide one string literal per index.
- // - STRINGTABLE strings are limited to maxInt(u16) UTF-16 code units (see 'string_len_in_utf16_code_units'
- // above), which means that the maximum number of bytes per string literal is
- // 2 * maxInt(u16) = 131,070 (since there are 2 bytes per UTF-16 code unit).
- // - Each Block/RT_STRING resource includes exactly 16 strings and each have a 2 byte
- // length field, so the maximum number of total bytes in a RT_STRING resource's data is
- // 16 * (131,070 + 2) = 2,097,152 which is well within the u32 max.
- //
- // Note: The string literal maximum length is enforced by the lexer.
- const data_size: u32 = @intCast(data_buffer.items.len);
-
- const header = Compiler.ResourceHeader{
- .name_value = .{ .ordinal = block_id },
- .type_value = .{ .ordinal = @intFromEnum(res.RT.STRING) },
- .memory_flags = self.memory_flags,
- .language = language,
- .version = self.version,
- .characteristics = self.characteristics,
- .data_size = data_size,
- };
- // The only variable parts of the header are name and type, which in this case
- // we fully control and know are numbers, so they have a fixed size.
- try header.writeAssertNoOverflow(writer);
-
- var data_fbs = std.io.fixedBufferStream(data_buffer.items);
- try Compiler.writeResourceData(writer, data_fbs.reader(), data_size);
- }
- };
-
- pub fn deinit(self: *StringTable, allocator: Allocator) void {
- var it = self.blocks.iterator();
- while (it.next()) |entry| {
- entry.value_ptr.strings.deinit(allocator);
- }
- self.blocks.deinit(allocator);
- }
-
- const SetError = error{StringAlreadyDefined} || Allocator.Error;
-
- pub fn set(
- self: *StringTable,
- allocator: Allocator,
- id: u16,
- string_token: Token,
- node: *Node,
- source: []const u8,
- code_page_lookup: *const CodePageLookup,
- version: u32,
- characteristics: u32,
- ) SetError!void {
- const block_id = (id / 16) + 1;
- const string_index: u8 = @intCast(id & 0xF);
-
- var get_or_put_result = try self.blocks.getOrPut(allocator, block_id);
- if (!get_or_put_result.found_existing) {
- get_or_put_result.value_ptr.* = Block{ .version = version, .characteristics = characteristics };
- get_or_put_result.value_ptr.applyAttributes(node.cast(.string_table).?, source, code_page_lookup);
- } else {
- if (get_or_put_result.value_ptr.set_indexes.isSet(string_index)) {
- return error.StringAlreadyDefined;
- }
- }
-
- var block = get_or_put_result.value_ptr;
- if (block.getInsertionIndex(string_index)) |insertion_index| {
- try block.strings.insert(allocator, insertion_index, string_token);
- } else {
- try block.strings.append(allocator, string_token);
- }
- block.set_indexes.set(string_index);
- }
-
- pub fn get(self: *StringTable, id: u16) ?Token {
- const block_id = (id / 16) + 1;
- const string_index: u8 = @intCast(id & 0xF);
-
- const block = self.blocks.getPtr(block_id) orelse return null;
- const token_index = block.getTokenIndex(string_index) orelse return null;
- return block.strings.items[token_index];
- }
-
- pub fn dump(self: *StringTable) !void {
- var it = self.iterator();
- while (it.next()) |entry| {
- std.debug.print("block: {}\n", .{entry.key_ptr.*});
- entry.value_ptr.dump();
- }
- }
-};
-
-test "StringTable" {
- const S = struct {
- fn makeDummyToken(id: usize) Token {
- return Token{
- .id = .invalid,
- .start = id,
- .end = id,
- .line_number = id,
- };
- }
- };
- const allocator = std.testing.allocator;
- var string_table = StringTable{};
- defer string_table.deinit(allocator);
-
- var code_page_lookup = CodePageLookup.init(allocator, .windows1252);
- defer code_page_lookup.deinit();
-
- var dummy_node = Node.StringTable{
- .type = S.makeDummyToken(0),
- .common_resource_attributes = &.{},
- .optional_statements = &.{},
- .begin_token = S.makeDummyToken(0),
- .strings = &.{},
- .end_token = S.makeDummyToken(0),
- };
-
- // randomize an array of ids 0-99
- var ids = ids: {
- var buf: [100]u16 = undefined;
- var i: u16 = 0;
- while (i < buf.len) : (i += 1) {
- buf[i] = i;
- }
- break :ids buf;
- };
- var prng = std.Random.DefaultPrng.init(0);
- var random = prng.random();
- random.shuffle(u16, &ids);
-
- // set each one in the randomized order
- for (ids) |id| {
- try string_table.set(allocator, id, S.makeDummyToken(id), &dummy_node.base, "", &code_page_lookup, 0, 0);
- }
-
- // make sure each one exists and is the right value when gotten
- var id: u16 = 0;
- while (id < 100) : (id += 1) {
- const dummy = S.makeDummyToken(id);
- try std.testing.expectError(error.StringAlreadyDefined, string_table.set(allocator, id, dummy, &dummy_node.base, "", &code_page_lookup, 0, 0));
- try std.testing.expectEqual(dummy, string_table.get(id).?);
- }
-
- // make sure non-existent string ids are not found
- try std.testing.expectEqual(@as(?Token, null), string_table.get(100));
-}
diff --git a/src/resinator/errors.zig b/src/resinator/errors.zig
@@ -1,1060 +0,0 @@
-const std = @import("std");
-const Token = @import("lex.zig").Token;
-const SourceMappings = @import("source_mapping.zig").SourceMappings;
-const utils = @import("utils.zig");
-const rc = @import("rc.zig");
-const res = @import("res.zig");
-const ico = @import("ico.zig");
-const bmp = @import("bmp.zig");
-const parse = @import("parse.zig");
-const lang = @import("lang.zig");
-const CodePage = @import("code_pages.zig").CodePage;
-const builtin = @import("builtin");
-const native_endian = builtin.cpu.arch.endian();
-
-pub const Diagnostics = struct {
- errors: std.ArrayListUnmanaged(ErrorDetails) = .{},
- /// Append-only, cannot handle removing strings.
- /// Expects to own all strings within the list.
- strings: std.ArrayListUnmanaged([]const u8) = .{},
- allocator: std.mem.Allocator,
-
- pub fn init(allocator: std.mem.Allocator) Diagnostics {
- return .{
- .allocator = allocator,
- };
- }
-
- pub fn deinit(self: *Diagnostics) void {
- self.errors.deinit(self.allocator);
- for (self.strings.items) |str| {
- self.allocator.free(str);
- }
- self.strings.deinit(self.allocator);
- }
-
- pub fn append(self: *Diagnostics, error_details: ErrorDetails) !void {
- try self.errors.append(self.allocator, error_details);
- }
-
- const SmallestStringIndexType = std.meta.Int(.unsigned, @min(
- @bitSizeOf(ErrorDetails.FileOpenError.FilenameStringIndex),
- @min(
- @bitSizeOf(ErrorDetails.IconReadError.FilenameStringIndex),
- @bitSizeOf(ErrorDetails.BitmapReadError.FilenameStringIndex),
- ),
- ));
-
- /// Returns the index of the added string as the SmallestStringIndexType
- /// in order to avoid needing to `@intCast` it at callsites of putString.
- /// Instead, this function will error if the index would ever exceed the
- /// smallest FilenameStringIndex of an ErrorDetails type.
- pub fn putString(self: *Diagnostics, str: []const u8) !SmallestStringIndexType {
- if (self.strings.items.len >= std.math.maxInt(SmallestStringIndexType)) {
- return error.OutOfMemory; // ran out of string indexes
- }
- const dupe = try self.allocator.dupe(u8, str);
- const index = self.strings.items.len;
- try self.strings.append(self.allocator, dupe);
- return @intCast(index);
- }
-
- pub fn renderToStdErr(self: *Diagnostics, cwd: std.fs.Dir, source: []const u8, tty_config: std.io.tty.Config, source_mappings: ?SourceMappings) void {
- std.debug.getStderrMutex().lock();
- defer std.debug.getStderrMutex().unlock();
- const stderr = std.io.getStdErr().writer();
- for (self.errors.items) |err_details| {
- renderErrorMessage(self.allocator, stderr, tty_config, cwd, err_details, source, self.strings.items, source_mappings) catch return;
- }
- }
-
- pub fn renderToStdErrDetectTTY(self: *Diagnostics, cwd: std.fs.Dir, source: []const u8, source_mappings: ?SourceMappings) void {
- const tty_config = std.io.tty.detectConfig(std.io.getStdErr());
- return self.renderToStdErr(cwd, source, tty_config, source_mappings);
- }
-
- pub fn contains(self: *const Diagnostics, err: ErrorDetails.Error) bool {
- for (self.errors.items) |details| {
- if (details.err == err) return true;
- }
- return false;
- }
-
- pub fn containsAny(self: *const Diagnostics, errors: []const ErrorDetails.Error) bool {
- for (self.errors.items) |details| {
- for (errors) |err| {
- if (details.err == err) return true;
- }
- }
- return false;
- }
-};
-
-/// Contains enough context to append errors/warnings/notes etc
-pub const DiagnosticsContext = struct {
- diagnostics: *Diagnostics,
- token: Token,
-};
-
-pub const ErrorDetails = struct {
- err: Error,
- token: Token,
- /// If non-null, should be before `token`. If null, `token` is assumed to be the start.
- token_span_start: ?Token = null,
- /// If non-null, should be after `token`. If null, `token` is assumed to be the end.
- token_span_end: ?Token = null,
- type: Type = .err,
- print_source_line: bool = true,
- extra: union {
- none: void,
- expected: Token.Id,
- number: u32,
- expected_types: ExpectedTypes,
- resource: rc.Resource,
- string_and_language: StringAndLanguage,
- file_open_error: FileOpenError,
- icon_read_error: IconReadError,
- icon_dir: IconDirContext,
- bmp_read_error: BitmapReadError,
- accelerator_error: AcceleratorError,
- statement_with_u16_param: StatementWithU16Param,
- menu_or_class: enum { class, menu },
- } = .{ .none = {} },
-
- pub const Type = enum {
- /// Fatal error, stops compilation
- err,
- /// Warning that does not affect compilation result
- warning,
- /// A note that typically provides further context for a warning/error
- note,
- /// An invisible diagnostic that is not printed to stderr but can
- /// provide information useful when comparing the behavior of different
- /// implementations. For example, a hint is emitted when a FONTDIR resource
- /// was included in the .RES file which is significant because rc.exe
- /// does something different than us, but ultimately it's not important
- /// enough to be a warning/note.
- hint,
- };
-
- comptime {
- // all fields in the extra union should be 32 bits or less
- for (std.meta.fields(std.meta.fieldInfo(ErrorDetails, .extra).type)) |field| {
- std.debug.assert(@bitSizeOf(field.type) <= 32);
- }
- }
-
- pub const StatementWithU16Param = enum(u32) {
- fileversion,
- productversion,
- language,
- };
-
- pub const StringAndLanguage = packed struct(u32) {
- id: u16,
- language: res.Language,
- };
-
- pub const FileOpenError = packed struct(u32) {
- err: FileOpenErrorEnum,
- filename_string_index: FilenameStringIndex,
-
- pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(FileOpenErrorEnum));
- pub const FileOpenErrorEnum = std.meta.FieldEnum(std.fs.File.OpenError);
-
- pub fn enumFromError(err: std.fs.File.OpenError) FileOpenErrorEnum {
- return switch (err) {
- inline else => |e| @field(ErrorDetails.FileOpenError.FileOpenErrorEnum, @errorName(e)),
- };
- }
- };
-
- pub const IconReadError = packed struct(u32) {
- err: IconReadErrorEnum,
- icon_type: enum(u1) { cursor, icon },
- filename_string_index: FilenameStringIndex,
-
- pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(IconReadErrorEnum) - 1);
- pub const IconReadErrorEnum = std.meta.FieldEnum(ico.ReadError);
-
- pub fn enumFromError(err: ico.ReadError) IconReadErrorEnum {
- return switch (err) {
- inline else => |e| @field(ErrorDetails.IconReadError.IconReadErrorEnum, @errorName(e)),
- };
- }
- };
-
- pub const IconDirContext = packed struct(u32) {
- icon_type: enum(u1) { cursor, icon },
- icon_format: ico.ImageFormat,
- index: u16,
- bitmap_version: ico.BitmapHeader.Version = .unknown,
- _: Padding = 0,
-
- pub const Padding = std.meta.Int(.unsigned, 15 - @bitSizeOf(ico.BitmapHeader.Version) - @bitSizeOf(ico.ImageFormat));
- };
-
- pub const BitmapReadError = packed struct(u32) {
- err: BitmapReadErrorEnum,
- filename_string_index: FilenameStringIndex,
-
- pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(BitmapReadErrorEnum));
- pub const BitmapReadErrorEnum = std.meta.FieldEnum(bmp.ReadError);
-
- pub fn enumFromError(err: bmp.ReadError) BitmapReadErrorEnum {
- return switch (err) {
- inline else => |e| @field(ErrorDetails.BitmapReadError.BitmapReadErrorEnum, @errorName(e)),
- };
- }
- };
-
- pub const BitmapUnsupportedDIB = packed struct(u32) {
- dib_version: ico.BitmapHeader.Version,
- filename_string_index: FilenameStringIndex,
-
- pub const FilenameStringIndex = std.meta.Int(.unsigned, 32 - @bitSizeOf(ico.BitmapHeader.Version));
- };
-
- pub const AcceleratorError = packed struct(u32) {
- err: AcceleratorErrorEnum,
- _: Padding = 0,
-
- pub const Padding = std.meta.Int(.unsigned, 32 - @bitSizeOf(AcceleratorErrorEnum));
- pub const AcceleratorErrorEnum = std.meta.FieldEnum(res.ParseAcceleratorKeyStringError);
-
- pub fn enumFromError(err: res.ParseAcceleratorKeyStringError) AcceleratorErrorEnum {
- return switch (err) {
- inline else => |e| @field(ErrorDetails.AcceleratorError.AcceleratorErrorEnum, @errorName(e)),
- };
- }
- };
-
- pub const ExpectedTypes = packed struct(u32) {
- number: bool = false,
- number_expression: bool = false,
- string_literal: bool = false,
- accelerator_type_or_option: bool = false,
- control_class: bool = false,
- literal: bool = false,
- // Note: This being 0 instead of undefined is arbitrary and something of a workaround,
- // see https://github.com/ziglang/zig/issues/15395
- _: u26 = 0,
-
- pub const strings = std.ComptimeStringMap([]const u8, .{
- .{ "number", "number" },
- .{ "number_expression", "number expression" },
- .{ "string_literal", "quoted string literal" },
- .{ "accelerator_type_or_option", "accelerator type or option [ASCII, VIRTKEY, etc]" },
- .{ "control_class", "control class [BUTTON, EDIT, etc]" },
- .{ "literal", "unquoted literal" },
- });
-
- pub fn writeCommaSeparated(self: ExpectedTypes, writer: anytype) !void {
- const struct_info = @typeInfo(ExpectedTypes).Struct;
- const num_real_fields = struct_info.fields.len - 1;
- const num_padding_bits = @bitSizeOf(ExpectedTypes) - num_real_fields;
- const mask = std.math.maxInt(struct_info.backing_integer.?) >> num_padding_bits;
- const relevant_bits_only = @as(struct_info.backing_integer.?, @bitCast(self)) & mask;
- const num_set_bits = @popCount(relevant_bits_only);
-
- var i: usize = 0;
- inline for (struct_info.fields) |field_info| {
- if (field_info.type != bool) continue;
- if (i == num_set_bits) return;
- if (@field(self, field_info.name)) {
- try writer.writeAll(strings.get(field_info.name).?);
- i += 1;
- if (num_set_bits > 2 and i != num_set_bits) {
- try writer.writeAll(", ");
- } else if (i != num_set_bits) {
- try writer.writeByte(' ');
- }
- if (num_set_bits > 1 and i == num_set_bits - 1) {
- try writer.writeAll("or ");
- }
- }
- }
- }
- };
-
- pub const Error = enum {
- // Lexer
- unfinished_string_literal,
- string_literal_too_long,
- invalid_number_with_exponent,
- invalid_digit_character_in_number_literal,
- illegal_byte,
- illegal_byte_outside_string_literals,
- illegal_codepoint_outside_string_literals,
- illegal_byte_order_mark,
- illegal_private_use_character,
- found_c_style_escaped_quote,
- code_page_pragma_missing_left_paren,
- code_page_pragma_missing_right_paren,
- code_page_pragma_invalid_code_page,
- code_page_pragma_not_integer,
- code_page_pragma_overflow,
- code_page_pragma_unsupported_code_page,
-
- // Parser
- unfinished_raw_data_block,
- unfinished_string_table_block,
- /// `expected` is populated.
- expected_token,
- /// `expected_types` is populated
- expected_something_else,
- /// `resource` is populated
- resource_type_cant_use_raw_data,
- /// `resource` is populated
- id_must_be_ordinal,
- /// `resource` is populated
- name_or_id_not_allowed,
- string_resource_as_numeric_type,
- ascii_character_not_equivalent_to_virtual_key_code,
- empty_menu_not_allowed,
- rc_would_miscompile_version_value_padding,
- rc_would_miscompile_version_value_byte_count,
- code_page_pragma_in_included_file,
- nested_resource_level_exceeds_max,
- too_many_dialog_controls,
- nested_expression_level_exceeds_max,
- close_paren_expression,
- unary_plus_expression,
- rc_could_miscompile_control_params,
-
- // Compiler
- /// `string_and_language` is populated
- string_already_defined,
- font_id_already_defined,
- /// `file_open_error` is populated
- file_open_error,
- /// `accelerator_error` is populated
- invalid_accelerator_key,
- accelerator_type_required,
- rc_would_miscompile_control_padding,
- rc_would_miscompile_control_class_ordinal,
- /// `icon_dir` is populated
- rc_would_error_on_icon_dir,
- /// `icon_dir` is populated
- format_not_supported_in_icon_dir,
- /// `resource` is populated and contains the expected type
- icon_dir_and_resource_type_mismatch,
- /// `icon_read_error` is populated
- icon_read_error,
- /// `icon_dir` is populated
- rc_would_error_on_bitmap_version,
- /// `icon_dir` is populated
- max_icon_ids_exhausted,
- /// `bmp_read_error` is populated
- bmp_read_error,
- /// `number` is populated and contains a string index for which the string contains
- /// the bytes of a `u64` (native endian). The `u64` contains the number of ignored bytes.
- bmp_ignored_palette_bytes,
- /// `number` is populated and contains a string index for which the string contains
- /// the bytes of a `u64` (native endian). The `u64` contains the number of missing bytes.
- bmp_missing_palette_bytes,
- /// `number` is populated and contains a string index for which the string contains
- /// the bytes of a `u64` (native endian). The `u64` contains the number of miscompiled bytes.
- rc_would_miscompile_bmp_palette_padding,
- /// `number` is populated and contains a string index for which the string contains
- /// the bytes of two `u64`s (native endian). The first contains the number of missing
- /// palette bytes and the second contains the max number of missing palette bytes.
- /// If type is `.note`, then `extra` is `none`.
- bmp_too_many_missing_palette_bytes,
- resource_header_size_exceeds_max,
- resource_data_size_exceeds_max,
- control_extra_data_size_exceeds_max,
- version_node_size_exceeds_max,
- fontdir_size_exceeds_max,
- /// `number` is populated and contains a string index for the filename
- number_expression_as_filename,
- /// `number` is populated and contains the control ID that is a duplicate
- control_id_already_defined,
- /// `number` is populated and contains the disallowed codepoint
- invalid_filename,
- /// `statement_with_u16_param` is populated
- rc_would_error_u16_with_l_suffix,
- result_contains_fontdir,
- /// `number` is populated and contains the ordinal value that the id would be miscompiled to
- rc_would_miscompile_dialog_menu_id,
- /// `number` is populated and contains the ordinal value that the value would be miscompiled to
- rc_would_miscompile_dialog_class,
- /// `menu_or_class` is populated and contains the type of the parameter statement
- rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal,
- rc_would_miscompile_dialog_menu_id_starts_with_digit,
- dialog_menu_id_was_uppercased,
- /// `menu_or_class` is populated and contains the type of the parameter statement
- duplicate_menu_or_class_skipped,
- invalid_digit_character_in_ordinal,
-
- // Literals
- /// `number` is populated
- rc_would_miscompile_codepoint_byte_swap,
- /// `number` is populated
- rc_would_miscompile_codepoint_skip,
- tab_converted_to_spaces,
-
- // General (used in various places)
- /// `number` is populated and contains the value that the ordinal would have in the Win32 RC compiler implementation
- win32_non_ascii_ordinal,
-
- // Initialization
- /// `file_open_error` is populated, but `filename_string_index` is not
- failed_to_open_cwd,
- };
-
- pub fn render(self: ErrorDetails, writer: anytype, source: []const u8, strings: []const []const u8) !void {
- switch (self.err) {
- .unfinished_string_literal => {
- return writer.print("unfinished string literal at '{s}', expected closing '\"'", .{self.token.nameForErrorDisplay(source)});
- },
- .string_literal_too_long => {
- return writer.print("string literal too long (max is currently {} characters)", .{self.extra.number});
- },
- .invalid_number_with_exponent => {
- return writer.print("base 10 number literal with exponent is not allowed: {s}", .{self.token.slice(source)});
- },
- .invalid_digit_character_in_number_literal => switch (self.type) {
- .err, .warning => return writer.writeAll("non-ASCII digit characters are not allowed in number literals"),
- .note => return writer.writeAll("the Win32 RC compiler allows non-ASCII digit characters, but will miscompile them"),
- .hint => return,
- },
- .illegal_byte => {
- return writer.print("character '{s}' is not allowed", .{std.fmt.fmtSliceEscapeUpper(self.token.slice(source))});
- },
- .illegal_byte_outside_string_literals => {
- return writer.print("character '{s}' is not allowed outside of string literals", .{std.fmt.fmtSliceEscapeUpper(self.token.slice(source))});
- },
- .illegal_codepoint_outside_string_literals => {
- // This is somewhat hacky, but we know that:
- // - This error is only possible with codepoints outside of the Windows-1252 character range
- // - So, the only supported code page that could generate this error is UTF-8
- // Therefore, we just assume the token bytes are UTF-8 and decode them to get the illegal
- // codepoint.
- //
- // FIXME: Support other code pages if they become relevant
- const bytes = self.token.slice(source);
- const codepoint = std.unicode.utf8Decode(bytes) catch unreachable;
- return writer.print("codepoint <U+{X:0>4}> is not allowed outside of string literals", .{codepoint});
- },
- .illegal_byte_order_mark => {
- return writer.writeAll("byte order mark <U+FEFF> is not allowed");
- },
- .illegal_private_use_character => {
- return writer.writeAll("private use character <U+E000> is not allowed");
- },
- .found_c_style_escaped_quote => {
- return writer.writeAll("escaping quotes with \\\" is not allowed (use \"\" instead)");
- },
- .code_page_pragma_missing_left_paren => {
- return writer.writeAll("expected left parenthesis after 'code_page' in #pragma code_page");
- },
- .code_page_pragma_missing_right_paren => {
- return writer.writeAll("expected right parenthesis after '<number>' in #pragma code_page");
- },
- .code_page_pragma_invalid_code_page => {
- return writer.writeAll("invalid or unknown code page in #pragma code_page");
- },
- .code_page_pragma_not_integer => {
- return writer.writeAll("code page is not a valid integer in #pragma code_page");
- },
- .code_page_pragma_overflow => {
- return writer.writeAll("code page too large in #pragma code_page");
- },
- .code_page_pragma_unsupported_code_page => {
- // We know that the token slice is a well-formed #pragma code_page(N), so
- // we can skip to the first ( and then get the number that follows
- const token_slice = self.token.slice(source);
- var number_start = std.mem.indexOfScalar(u8, token_slice, '(').? + 1;
- while (std.ascii.isWhitespace(token_slice[number_start])) {
- number_start += 1;
- }
- var number_slice = token_slice[number_start..number_start];
- while (std.ascii.isDigit(token_slice[number_start + number_slice.len])) {
- number_slice.len += 1;
- }
- const number = std.fmt.parseUnsigned(u16, number_slice, 10) catch unreachable;
- const code_page = CodePage.getByIdentifier(number) catch unreachable;
- // TODO: Improve or maybe add a note making it more clear that the code page
- // is valid and that the code page is unsupported purely due to a limitation
- // in this compiler.
- return writer.print("unsupported code page '{s} (id={})' in #pragma code_page", .{ @tagName(code_page), number });
- },
- .unfinished_raw_data_block => {
- return writer.print("unfinished raw data block at '{s}', expected closing '}}' or 'END'", .{self.token.nameForErrorDisplay(source)});
- },
- .unfinished_string_table_block => {
- return writer.print("unfinished STRINGTABLE block at '{s}', expected closing '}}' or 'END'", .{self.token.nameForErrorDisplay(source)});
- },
- .expected_token => {
- return writer.print("expected '{s}', got '{s}'", .{ self.extra.expected.nameForErrorDisplay(), self.token.nameForErrorDisplay(source) });
- },
- .expected_something_else => {
- try writer.writeAll("expected ");
- try self.extra.expected_types.writeCommaSeparated(writer);
- return writer.print("; got '{s}'", .{self.token.nameForErrorDisplay(source)});
- },
- .resource_type_cant_use_raw_data => switch (self.type) {
- .err, .warning => try writer.print("expected '<filename>', found '{s}' (resource type '{s}' can't use raw data)", .{ self.token.nameForErrorDisplay(source), self.extra.resource.nameForErrorDisplay() }),
- .note => try writer.print("if '{s}' is intended to be a filename, it must be specified as a quoted string literal", .{self.token.nameForErrorDisplay(source)}),
- .hint => return,
- },
- .id_must_be_ordinal => {
- try writer.print("id of resource type '{s}' must be an ordinal (u16), got '{s}'", .{ self.extra.resource.nameForErrorDisplay(), self.token.nameForErrorDisplay(source) });
- },
- .name_or_id_not_allowed => {
- try writer.print("name or id is not allowed for resource type '{s}'", .{self.extra.resource.nameForErrorDisplay()});
- },
- .string_resource_as_numeric_type => switch (self.type) {
- .err, .warning => try writer.writeAll("the number 6 (RT_STRING) cannot be used as a resource type"),
- .note => try writer.writeAll("using RT_STRING directly likely results in an invalid .res file, use a STRINGTABLE instead"),
- .hint => return,
- },
- .ascii_character_not_equivalent_to_virtual_key_code => {
- // TODO: Better wording? This is what the Win32 RC compiler emits.
- // This occurs when VIRTKEY and a control code is specified ("^c", etc)
- try writer.writeAll("ASCII character not equivalent to virtual key code");
- },
- .empty_menu_not_allowed => {
- try writer.print("empty menu of type '{s}' not allowed", .{self.token.nameForErrorDisplay(source)});
- },
- .rc_would_miscompile_version_value_padding => switch (self.type) {
- .err, .warning => return writer.print("the padding before this quoted string value would be miscompiled by the Win32 RC compiler", .{}),
- .note => return writer.print("to avoid the potential miscompilation, consider adding a comma between the key and the quoted string", .{}),
- .hint => return,
- },
- .rc_would_miscompile_version_value_byte_count => switch (self.type) {
- .err, .warning => return writer.print("the byte count of this value would be miscompiled by the Win32 RC compiler", .{}),
- .note => return writer.print("to avoid the potential miscompilation, do not mix numbers and strings within a value", .{}),
- .hint => return,
- },
- .code_page_pragma_in_included_file => {
- try writer.print("#pragma code_page is not supported in an included resource file", .{});
- },
- .nested_resource_level_exceeds_max => switch (self.type) {
- .err, .warning => {
- const max = switch (self.extra.resource) {
- .versioninfo => parse.max_nested_version_level,
- .menu, .menuex => parse.max_nested_menu_level,
- else => unreachable,
- };
- return writer.print("{s} contains too many nested children (max is {})", .{ self.extra.resource.nameForErrorDisplay(), max });
- },
- .note => return writer.print("max {s} nesting level exceeded here", .{self.extra.resource.nameForErrorDisplay()}),
- .hint => return,
- },
- .too_many_dialog_controls => switch (self.type) {
- .err, .warning => return writer.print("{s} contains too many controls (max is {})", .{ self.extra.resource.nameForErrorDisplay(), std.math.maxInt(u16) }),
- .note => return writer.writeAll("maximum number of controls exceeded here"),
- .hint => return,
- },
- .nested_expression_level_exceeds_max => switch (self.type) {
- .err, .warning => return writer.print("expression contains too many syntax levels (max is {})", .{parse.max_nested_expression_level}),
- .note => return writer.print("maximum expression level exceeded here", .{}),
- .hint => return,
- },
- .close_paren_expression => {
- try writer.writeAll("the Win32 RC compiler would accept ')' as a valid expression, but it would be skipped over and potentially lead to unexpected outcomes");
- },
- .unary_plus_expression => {
- try writer.writeAll("the Win32 RC compiler may accept '+' as a unary operator here, but it is not supported in this implementation; consider omitting the unary +");
- },
- .rc_could_miscompile_control_params => switch (self.type) {
- .err, .warning => return writer.print("this token could be erroneously skipped over by the Win32 RC compiler", .{}),
- .note => return writer.print("to avoid the potential miscompilation, consider adding a comma after the style parameter", .{}),
- .hint => return,
- },
- .string_already_defined => switch (self.type) {
- .err, .warning => {
- const language_id = self.extra.string_and_language.language.asInt();
- const language_name = language_name: {
- if (std.meta.intToEnum(lang.LanguageId, language_id)) |lang_enum_val| {
- break :language_name @tagName(lang_enum_val);
- } else |_| {}
- if (language_id == lang.LOCALE_CUSTOM_UNSPECIFIED) {
- break :language_name "LOCALE_CUSTOM_UNSPECIFIED";
- }
- break :language_name "<UNKNOWN>";
- };
- return writer.print("string with id {d} (0x{X}) already defined for language {s} (0x{X})", .{ self.extra.string_and_language.id, self.extra.string_and_language.id, language_name, language_id });
- },
- .note => return writer.print("previous definition of string with id {d} (0x{X}) here", .{ self.extra.string_and_language.id, self.extra.string_and_language.id }),
- .hint => return,
- },
- .font_id_already_defined => switch (self.type) {
- .err => return writer.print("font with id {d} already defined", .{self.extra.number}),
- .warning => return writer.print("skipped duplicate font with id {d}", .{self.extra.number}),
- .note => return writer.print("previous definition of font with id {d} here", .{self.extra.number}),
- .hint => return,
- },
- .file_open_error => {
- try writer.print("unable to open file '{s}': {s}", .{ strings[self.extra.file_open_error.filename_string_index], @tagName(self.extra.file_open_error.err) });
- },
- .invalid_accelerator_key => {
- try writer.print("invalid accelerator key '{s}': {s}", .{ self.token.nameForErrorDisplay(source), @tagName(self.extra.accelerator_error.err) });
- },
- .accelerator_type_required => {
- try writer.print("accelerator type [ASCII or VIRTKEY] required when key is an integer", .{});
- },
- .rc_would_miscompile_control_padding => switch (self.type) {
- .err, .warning => return writer.print("the padding before this control would be miscompiled by the Win32 RC compiler (it would insert 2 extra bytes of padding)", .{}),
- .note => return writer.print("to avoid the potential miscompilation, consider removing any 'control data' blocks from the controls in this dialog", .{}),
- .hint => return,
- },
- .rc_would_miscompile_control_class_ordinal => switch (self.type) {
- .err, .warning => return writer.print("the control class of this CONTROL would be miscompiled by the Win32 RC compiler", .{}),
- .note => return writer.print("to avoid the potential miscompilation, consider specifying the control class using a string (BUTTON, EDIT, etc) instead of a number", .{}),
- .hint => return,
- },
- .rc_would_error_on_icon_dir => switch (self.type) {
- .err, .warning => return writer.print("the resource at index {} of this {s} has the format '{s}'; this would be an error in the Win32 RC compiler", .{ self.extra.icon_dir.index, @tagName(self.extra.icon_dir.icon_type), @tagName(self.extra.icon_dir.icon_format) }),
- .note => {
- // The only note supported is one specific to exactly this combination
- if (!(self.extra.icon_dir.icon_type == .icon and self.extra.icon_dir.icon_format == .riff)) unreachable;
- try writer.print("animated RIFF icons within resource groups may not be well supported, consider using an animated icon file (.ani) instead", .{});
- },
- .hint => return,
- },
- .format_not_supported_in_icon_dir => {
- try writer.print("resource with format '{s}' (at index {}) is not allowed in {s} resource groups", .{ @tagName(self.extra.icon_dir.icon_format), self.extra.icon_dir.index, @tagName(self.extra.icon_dir.icon_type) });
- },
- .icon_dir_and_resource_type_mismatch => {
- const unexpected_type: rc.Resource = if (self.extra.resource == .icon) .cursor else .icon;
- // TODO: Better wording
- try writer.print("resource type '{s}' does not match type '{s}' specified in the file", .{ self.extra.resource.nameForErrorDisplay(), unexpected_type.nameForErrorDisplay() });
- },
- .icon_read_error => {
- try writer.print("unable to read {s} file '{s}': {s}", .{ @tagName(self.extra.icon_read_error.icon_type), strings[self.extra.icon_read_error.filename_string_index], @tagName(self.extra.icon_read_error.err) });
- },
- .rc_would_error_on_bitmap_version => switch (self.type) {
- .err => try writer.print("the DIB at index {} of this {s} is of version '{s}'; this version is no longer allowed and should be upgraded to '{s}'", .{
- self.extra.icon_dir.index,
- @tagName(self.extra.icon_dir.icon_type),
- self.extra.icon_dir.bitmap_version.nameForErrorDisplay(),
- ico.BitmapHeader.Version.@"nt3.1".nameForErrorDisplay(),
- }),
- .warning => try writer.print("the DIB at index {} of this {s} is of version '{s}'; this would be an error in the Win32 RC compiler", .{
- self.extra.icon_dir.index,
- @tagName(self.extra.icon_dir.icon_type),
- self.extra.icon_dir.bitmap_version.nameForErrorDisplay(),
- }),
- .note => unreachable,
- .hint => return,
- },
- .max_icon_ids_exhausted => switch (self.type) {
- .err, .warning => try writer.print("maximum global icon/cursor ids exhausted (max is {})", .{std.math.maxInt(u16) - 1}),
- .note => try writer.print("maximum icon/cursor id exceeded at index {} of this {s}", .{ self.extra.icon_dir.index, @tagName(self.extra.icon_dir.icon_type) }),
- .hint => return,
- },
- .bmp_read_error => {
- try writer.print("invalid bitmap file '{s}': {s}", .{ strings[self.extra.bmp_read_error.filename_string_index], @tagName(self.extra.bmp_read_error.err) });
- },
- .bmp_ignored_palette_bytes => {
- const bytes = strings[self.extra.number];
- const ignored_bytes = std.mem.readInt(u64, bytes[0..8], native_endian);
- try writer.print("bitmap has {d} extra bytes preceding the pixel data which will be ignored", .{ignored_bytes});
- },
- .bmp_missing_palette_bytes => {
- const bytes = strings[self.extra.number];
- const missing_bytes = std.mem.readInt(u64, bytes[0..8], native_endian);
- try writer.print("bitmap has {d} missing color palette bytes which will be padded with zeroes", .{missing_bytes});
- },
- .rc_would_miscompile_bmp_palette_padding => {
- const bytes = strings[self.extra.number];
- const miscompiled_bytes = std.mem.readInt(u64, bytes[0..8], native_endian);
- try writer.print("the missing color palette bytes would be miscompiled by the Win32 RC compiler (the added padding bytes would include {d} bytes of the pixel data)", .{miscompiled_bytes});
- },
- .bmp_too_many_missing_palette_bytes => switch (self.type) {
- .err, .warning => {
- const bytes = strings[self.extra.number];
- const missing_bytes = std.mem.readInt(u64, bytes[0..8], native_endian);
- const max_missing_bytes = std.mem.readInt(u64, bytes[8..16], native_endian);
- try writer.print("bitmap has {} missing color palette bytes which exceeds the maximum of {}", .{ missing_bytes, max_missing_bytes });
- },
- // TODO: command line option
- .note => try writer.writeAll("the maximum number of missing color palette bytes is configurable via <<TODO command line option>>"),
- .hint => return,
- },
- .resource_header_size_exceeds_max => {
- try writer.print("resource's header length exceeds maximum of {} bytes", .{std.math.maxInt(u32)});
- },
- .resource_data_size_exceeds_max => switch (self.type) {
- .err, .warning => return writer.print("resource's data length exceeds maximum of {} bytes", .{std.math.maxInt(u32)}),
- .note => return writer.print("maximum data length exceeded here", .{}),
- .hint => return,
- },
- .control_extra_data_size_exceeds_max => switch (self.type) {
- .err, .warning => try writer.print("control data length exceeds maximum of {} bytes", .{std.math.maxInt(u16)}),
- .note => return writer.print("maximum control data length exceeded here", .{}),
- .hint => return,
- },
- .version_node_size_exceeds_max => switch (self.type) {
- .err, .warning => return writer.print("version node tree size exceeds maximum of {} bytes", .{std.math.maxInt(u16)}),
- .note => return writer.print("maximum tree size exceeded while writing this child", .{}),
- .hint => return,
- },
- .fontdir_size_exceeds_max => switch (self.type) {
- .err, .warning => return writer.print("FONTDIR data length exceeds maximum of {} bytes", .{std.math.maxInt(u32)}),
- .note => return writer.writeAll("this is likely due to the size of the combined lengths of the device/face names of all FONT resources"),
- .hint => return,
- },
- .number_expression_as_filename => switch (self.type) {
- .err, .warning => return writer.writeAll("filename cannot be specified using a number expression, consider using a quoted string instead"),
- .note => return writer.print("the Win32 RC compiler would evaluate this number expression as the filename '{s}'", .{strings[self.extra.number]}),
- .hint => return,
- },
- .control_id_already_defined => switch (self.type) {
- .err, .warning => return writer.print("control with id {d} already defined for this dialog", .{self.extra.number}),
- .note => return writer.print("previous definition of control with id {d} here", .{self.extra.number}),
- .hint => return,
- },
- .invalid_filename => {
- const disallowed_codepoint = self.extra.number;
- if (disallowed_codepoint < 128 and std.ascii.isPrint(@intCast(disallowed_codepoint))) {
- try writer.print("evaluated filename contains a disallowed character: '{c}'", .{@as(u8, @intCast(disallowed_codepoint))});
- } else {
- try writer.print("evaluated filename contains a disallowed codepoint: <U+{X:0>4}>", .{disallowed_codepoint});
- }
- },
- .rc_would_error_u16_with_l_suffix => switch (self.type) {
- .err, .warning => return writer.print("this {s} parameter would be an error in the Win32 RC compiler", .{@tagName(self.extra.statement_with_u16_param)}),
- .note => return writer.writeAll("to avoid the error, remove any L suffixes from numbers within the parameter"),
- .hint => return,
- },
- .result_contains_fontdir => return,
- .rc_would_miscompile_dialog_menu_id => switch (self.type) {
- .err, .warning => return writer.print("the id of this menu would be miscompiled by the Win32 RC compiler", .{}),
- .note => return writer.print("the Win32 RC compiler would evaluate the id as the ordinal/number value {d}", .{self.extra.number}),
- .hint => return,
- },
- .rc_would_miscompile_dialog_class => switch (self.type) {
- .err, .warning => return writer.print("this class would be miscompiled by the Win32 RC compiler", .{}),
- .note => return writer.print("the Win32 RC compiler would evaluate it as the ordinal/number value {d}", .{self.extra.number}),
- .hint => return,
- },
- .rc_would_miscompile_dialog_menu_or_class_id_forced_ordinal => switch (self.type) {
- .err, .warning => return,
- .note => return writer.print("to avoid the potential miscompilation, only specify one {s} per dialog resource", .{@tagName(self.extra.menu_or_class)}),
- .hint => return,
- },
- .rc_would_miscompile_dialog_menu_id_starts_with_digit => switch (self.type) {
- .err, .warning => return,
- .note => return writer.writeAll("to avoid the potential miscompilation, the first character of the id should not be a digit"),
- .hint => return,
- },
- .dialog_menu_id_was_uppercased => return,
- .duplicate_menu_or_class_skipped => {
- return writer.print("this {s} was ignored; when multiple {s} statements are specified, only the last takes precedence", .{
- @tagName(self.extra.menu_or_class),
- @tagName(self.extra.menu_or_class),
- });
- },
- .invalid_digit_character_in_ordinal => {
- return writer.writeAll("non-ASCII digit characters are not allowed in ordinal (number) values");
- },
- .rc_would_miscompile_codepoint_byte_swap => switch (self.type) {
- .err, .warning => return writer.print("codepoint U+{X} within a string literal would be miscompiled by the Win32 RC compiler (the bytes of the UTF-16 code unit would be swapped)", .{self.extra.number}),
- .note => return writer.print("to avoid the potential miscompilation, an integer escape sequence in a wide string literal could be used instead: L\"\\x{X}\"", .{self.extra.number}),
- .hint => return,
- },
- .rc_would_miscompile_codepoint_skip => switch (self.type) {
- .err, .warning => return writer.print("codepoint U+{X} within a string literal would be miscompiled by the Win32 RC compiler (the codepoint would be missing from the compiled resource)", .{self.extra.number}),
- .note => return writer.print("to avoid the potential miscompilation, an integer escape sequence in a wide string literal could be used instead: L\"\\x{X}\"", .{self.extra.number}),
- .hint => return,
- },
- .tab_converted_to_spaces => switch (self.type) {
- .err, .warning => return writer.writeAll("the tab character(s) in this string will be converted into a variable number of spaces (determined by the column of the tab character in the .rc file)"),
- .note => return writer.writeAll("to include the tab character itself in a string, the escape sequence \\t should be used"),
- .hint => return,
- },
- .win32_non_ascii_ordinal => switch (self.type) {
- .err, .warning => unreachable,
- .note => return writer.print("the Win32 RC compiler would accept this as an ordinal but its value would be {}", .{self.extra.number}),
- .hint => return,
- },
- .failed_to_open_cwd => {
- try writer.print("failed to open CWD for compilation: {s}", .{@tagName(self.extra.file_open_error.err)});
- },
- }
- }
-
- pub const VisualTokenInfo = struct {
- before_len: usize,
- point_offset: usize,
- after_len: usize,
- };
-
- pub fn visualTokenInfo(self: ErrorDetails, source_line_start: usize, source_line_end: usize) VisualTokenInfo {
- // Note: A perfect solution here would involve full grapheme cluster
- // awareness, but oh well. This will give incorrect offsets
- // if there are any multibyte codepoints within the relevant span,
- // and even more inflated for grapheme clusters.
- //
- // We mitigate this slightly when we know we'll be pointing at
- // something that displays as 1 character.
- return switch (self.err) {
- // These can technically be more than 1 byte depending on encoding,
- // but they always refer to one visual character/grapheme.
- .illegal_byte,
- .illegal_byte_outside_string_literals,
- .illegal_codepoint_outside_string_literals,
- .illegal_byte_order_mark,
- .illegal_private_use_character,
- => .{
- .before_len = 0,
- .point_offset = self.token.start - source_line_start,
- .after_len = 0,
- },
- else => .{
- .before_len = before: {
- const start = @max(source_line_start, if (self.token_span_start) |span_start| span_start.start else self.token.start);
- break :before self.token.start - start;
- },
- .point_offset = self.token.start - source_line_start,
- .after_len = after: {
- const end = @min(source_line_end, if (self.token_span_end) |span_end| span_end.end else self.token.end);
- // end may be less than start when pointing to EOF
- if (end <= self.token.start) break :after 0;
- break :after end - self.token.start - 1;
- },
- },
- };
- }
-};
-
-pub fn renderErrorMessage(allocator: std.mem.Allocator, writer: anytype, tty_config: std.io.tty.Config, cwd: std.fs.Dir, err_details: ErrorDetails, source: []const u8, strings: []const []const u8, source_mappings: ?SourceMappings) !void {
- if (err_details.type == .hint) return;
-
- const source_line_start = err_details.token.getLineStart(source);
- // Treat tab stops as 1 column wide for error display purposes,
- // and add one to get a 1-based column
- const column = err_details.token.calculateColumn(source, 1, source_line_start) + 1;
-
- const corresponding_span: ?SourceMappings.SourceSpan = if (source_mappings != null and source_mappings.?.has(err_details.token.line_number))
- source_mappings.?.get(err_details.token.line_number)
- else
- null;
- const corresponding_file: ?[]const u8 = if (source_mappings != null and corresponding_span != null)
- source_mappings.?.files.get(corresponding_span.?.filename_offset)
- else
- null;
-
- const err_line = if (corresponding_span) |span| span.start_line else err_details.token.line_number;
-
- try tty_config.setColor(writer, .bold);
- if (corresponding_file) |file| {
- try writer.writeAll(file);
- } else {
- try tty_config.setColor(writer, .dim);
- try writer.writeAll("<after preprocessor>");
- try tty_config.setColor(writer, .reset);
- try tty_config.setColor(writer, .bold);
- }
- try writer.print(":{d}:{d}: ", .{ err_line, column });
- switch (err_details.type) {
- .err => {
- try tty_config.setColor(writer, .red);
- try writer.writeAll("error: ");
- },
- .warning => {
- try tty_config.setColor(writer, .yellow);
- try writer.writeAll("warning: ");
- },
- .note => {
- try tty_config.setColor(writer, .cyan);
- try writer.writeAll("note: ");
- },
- .hint => unreachable,
- }
- try tty_config.setColor(writer, .reset);
- try tty_config.setColor(writer, .bold);
- try err_details.render(writer, source, strings);
- try writer.writeByte('\n');
- try tty_config.setColor(writer, .reset);
-
- if (!err_details.print_source_line) {
- try writer.writeByte('\n');
- return;
- }
-
- const source_line = err_details.token.getLine(source, source_line_start);
- const visual_info = err_details.visualTokenInfo(source_line_start, source_line_start + source_line.len);
-
- // Need this to determine if the 'line originated from' note is worth printing
- var source_line_for_display_buf = try std.ArrayList(u8).initCapacity(allocator, source_line.len);
- defer source_line_for_display_buf.deinit();
- try writeSourceSlice(source_line_for_display_buf.writer(), source_line);
-
- // TODO: General handling of long lines, not tied to this specific error
- if (err_details.err == .string_literal_too_long) {
- const before_slice = source_line[0..@min(source_line.len, visual_info.point_offset + 16)];
- try writeSourceSlice(writer, before_slice);
- try tty_config.setColor(writer, .dim);
- try writer.writeAll("<...truncated...>");
- try tty_config.setColor(writer, .reset);
- } else {
- try writer.writeAll(source_line_for_display_buf.items);
- }
- try writer.writeByte('\n');
-
- try tty_config.setColor(writer, .green);
- const num_spaces = visual_info.point_offset - visual_info.before_len;
- try writer.writeByteNTimes(' ', num_spaces);
- try writer.writeByteNTimes('~', visual_info.before_len);
- try writer.writeByte('^');
- if (visual_info.after_len > 0) {
- var num_squiggles = visual_info.after_len;
- if (err_details.err == .string_literal_too_long) {
- num_squiggles = @min(num_squiggles, 15);
- }
- try writer.writeByteNTimes('~', num_squiggles);
- }
- try writer.writeByte('\n');
- try tty_config.setColor(writer, .reset);
-
- if (corresponding_span != null and corresponding_file != null) {
- var corresponding_lines = try CorrespondingLines.init(allocator, cwd, err_details, source_line_for_display_buf.items, corresponding_span.?, corresponding_file.?);
- defer corresponding_lines.deinit(allocator);
-
- if (!corresponding_lines.worth_printing_note) return;
-
- try tty_config.setColor(writer, .bold);
- if (corresponding_file) |file| {
- try writer.writeAll(file);
- } else {
- try tty_config.setColor(writer, .dim);
- try writer.writeAll("<after preprocessor>");
- try tty_config.setColor(writer, .reset);
- try tty_config.setColor(writer, .bold);
- }
- try writer.print(":{d}:{d}: ", .{ err_line, column });
- try tty_config.setColor(writer, .cyan);
- try writer.writeAll("note: ");
- try tty_config.setColor(writer, .reset);
- try tty_config.setColor(writer, .bold);
- try writer.writeAll("this line originated from line");
- if (corresponding_span.?.start_line != corresponding_span.?.end_line) {
- try writer.print("s {}-{}", .{ corresponding_span.?.start_line, corresponding_span.?.end_line });
- } else {
- try writer.print(" {}", .{corresponding_span.?.start_line});
- }
- try writer.print(" of file '{s}'\n", .{corresponding_file.?});
- try tty_config.setColor(writer, .reset);
-
- if (!corresponding_lines.worth_printing_lines) return;
-
- if (corresponding_lines.lines_is_error_message) {
- try tty_config.setColor(writer, .red);
- try writer.writeAll(" | ");
- try tty_config.setColor(writer, .reset);
- try tty_config.setColor(writer, .dim);
- try writer.writeAll(corresponding_lines.lines.items);
- try tty_config.setColor(writer, .reset);
- try writer.writeAll("\n\n");
- return;
- }
-
- try writer.writeAll(corresponding_lines.lines.items);
- try writer.writeAll("\n\n");
- }
-}
-
-const CorrespondingLines = struct {
- worth_printing_note: bool = true,
- worth_printing_lines: bool = true,
- lines: std.ArrayListUnmanaged(u8) = .{},
- lines_is_error_message: bool = false,
-
- pub fn init(allocator: std.mem.Allocator, cwd: std.fs.Dir, err_details: ErrorDetails, lines_for_comparison: []const u8, corresponding_span: SourceMappings.SourceSpan, corresponding_file: []const u8) !CorrespondingLines {
- var corresponding_lines = CorrespondingLines{};
-
- // We don't do line comparison for this error, so don't print the note if the line
- // number is different
- if (err_details.err == .string_literal_too_long and err_details.token.line_number == corresponding_span.start_line) {
- corresponding_lines.worth_printing_note = false;
- return corresponding_lines;
- }
-
- // Don't print the originating line for this error, we know it's really long
- if (err_details.err == .string_literal_too_long) {
- corresponding_lines.worth_printing_lines = false;
- return corresponding_lines;
- }
-
- var writer = corresponding_lines.lines.writer(allocator);
- if (utils.openFileNotDir(cwd, corresponding_file, .{})) |file| {
- defer file.close();
- var buffered_reader = std.io.bufferedReader(file.reader());
- writeLinesFromStream(writer, buffered_reader.reader(), corresponding_span.start_line, corresponding_span.end_line) catch |err| switch (err) {
- error.LinesNotFound => {
- corresponding_lines.lines.clearRetainingCapacity();
- try writer.print("unable to print line(s) from file: {s}", .{@errorName(err)});
- corresponding_lines.lines_is_error_message = true;
- return corresponding_lines;
- },
- else => |e| return e,
- };
- } else |err| {
- corresponding_lines.lines.clearRetainingCapacity();
- try writer.print("unable to print line(s) from file: {s}", .{@errorName(err)});
- corresponding_lines.lines_is_error_message = true;
- return corresponding_lines;
- }
-
- // If the lines are the same as they were before preprocessing, skip printing the note entirely
- if (std.mem.eql(u8, lines_for_comparison, corresponding_lines.lines.items)) {
- corresponding_lines.worth_printing_note = false;
- }
- return corresponding_lines;
- }
-
- pub fn deinit(self: *CorrespondingLines, allocator: std.mem.Allocator) void {
- self.lines.deinit(allocator);
- }
-};
-
-fn writeSourceSlice(writer: anytype, slice: []const u8) !void {
- for (slice) |c| try writeSourceByte(writer, c);
-}
-
-inline fn writeSourceByte(writer: anytype, byte: u8) !void {
- switch (byte) {
- '\x00'...'\x08', '\x0E'...'\x1F', '\x7F' => try writer.writeAll("�"),
- // \r is seemingly ignored by the RC compiler so skipping it when printing source lines
- // could help avoid confusing output (e.g. RC\rDATA if printed verbatim would show up
- // in the console as DATA but the compiler reads it as RCDATA)
- //
- // NOTE: This is irrelevant when using the clang preprocessor, because unpaired \r
- // characters get converted to \n, but may become relevant if another
- // preprocessor is used instead.
- '\r' => {},
- '\t', '\x0B', '\x0C' => try writer.writeByte(' '),
- else => try writer.writeByte(byte),
- }
-}
-
-pub fn writeLinesFromStream(writer: anytype, input: anytype, start_line: usize, end_line: usize) !void {
- var line_num: usize = 1;
- while (try readByteOrEof(input)) |byte| {
- switch (byte) {
- '\n' => {
- if (line_num == end_line) return;
- if (line_num >= start_line) try writeSourceByte(writer, byte);
- line_num += 1;
- },
- else => {
- if (line_num >= start_line) try writeSourceByte(writer, byte);
- },
- }
- }
- if (line_num != end_line) {
- return error.LinesNotFound;
- }
-}
-
-pub fn readByteOrEof(reader: anytype) !?u8 {
- return reader.readByte() catch |err| switch (err) {
- error.EndOfStream => return null,
- else => |e| return e,
- };
-}
diff --git a/src/resinator/lang.zig b/src/resinator/lang.zig
@@ -1,877 +0,0 @@
-const std = @import("std");
-
-/// This function is specific to how the Win32 RC command line interprets
-/// language IDs specified as integers.
-/// - Always interpreted as hexadecimal, but explicit 0x prefix is also allowed
-/// - Wraps on overflow of u16
-/// - Stops parsing on any invalid hexadecimal digits
-/// - Errors if a digit is not the first char
-/// - `-` (negative) prefix is allowed
-pub fn parseInt(str: []const u8) error{InvalidLanguageId}!u16 {
- var result: u16 = 0;
- const radix: u8 = 16;
- var buf = str;
-
- const Prefix = enum { none, minus };
- var prefix: Prefix = .none;
- switch (buf[0]) {
- '-' => {
- prefix = .minus;
- buf = buf[1..];
- },
- else => {},
- }
-
- if (buf.len > 2 and buf[0] == '0' and buf[1] == 'x') {
- buf = buf[2..];
- }
-
- for (buf, 0..) |c, i| {
- const digit = switch (c) {
- // On invalid digit for the radix, just stop parsing but don't fail
- 'a'...'f', 'A'...'F', '0'...'9' => std.fmt.charToDigit(c, radix) catch break,
- else => {
- // First digit must be valid
- if (i == 0) {
- return error.InvalidLanguageId;
- }
- break;
- },
- };
-
- if (result != 0) {
- result *%= radix;
- }
- result +%= digit;
- }
-
- switch (prefix) {
- .none => {},
- .minus => result = 0 -% result,
- }
-
- return result;
-}
-
-test parseInt {
- try std.testing.expectEqual(@as(u16, 0x16), try parseInt("16"));
- try std.testing.expectEqual(@as(u16, 0x1a), try parseInt("0x1A"));
- try std.testing.expectEqual(@as(u16, 0x1a), try parseInt("0x1Azzzz"));
- try std.testing.expectEqual(@as(u16, 0xffff), try parseInt("-1"));
- try std.testing.expectEqual(@as(u16, 0xffea), try parseInt("-0x16"));
- try std.testing.expectEqual(@as(u16, 0x0), try parseInt("0o100"));
- try std.testing.expectEqual(@as(u16, 0x1), try parseInt("10001"));
- try std.testing.expectError(error.InvalidLanguageId, parseInt("--1"));
- try std.testing.expectError(error.InvalidLanguageId, parseInt("0xha"));
- try std.testing.expectError(error.InvalidLanguageId, parseInt("¹"));
- try std.testing.expectError(error.InvalidLanguageId, parseInt("~1"));
-}
-
-/// This function is specific to how the Win32 RC command line interprets
-/// language tags: invalid tags are rejected, but tags that don't have
-/// a specific assigned ID but are otherwise valid enough will get
-/// converted to an ID of LOCALE_CUSTOM_UNSPECIFIED.
-pub fn tagToInt(tag: []const u8) error{InvalidLanguageTag}!u16 {
- const maybe_id = try tagToId(tag);
- if (maybe_id) |id| {
- return @intFromEnum(id);
- } else {
- return LOCALE_CUSTOM_UNSPECIFIED;
- }
-}
-
-pub fn tagToId(tag: []const u8) error{InvalidLanguageTag}!?LanguageId {
- const parsed = try parse(tag);
- // There are currently no language tags with assigned IDs that have
- // multiple suffixes, so we can skip the lookup.
- if (parsed.multiple_suffixes) return null;
- const longest_known_tag = comptime blk: {
- var len = 0;
- for (@typeInfo(LanguageId).Enum.fields) |field| {
- if (field.name.len > len) len = field.name.len;
- }
- break :blk len;
- };
- // If the tag is longer than the longest tag that has an assigned ID,
- // then we can skip the lookup.
- if (tag.len > longest_known_tag) return null;
- var normalized_buf: [longest_known_tag]u8 = undefined;
- // To allow e.g. `de-de_phoneb` to get looked up as `de-de`, we need to
- // omit the suffix, but only if the tag contains a valid alternate sort order.
- const tag_to_normalize = if (parsed.isSuffixValidSortOrder()) tag[0 .. tag.len - (parsed.suffix.?.len + 1)] else tag;
- const normalized_tag = normalizeTag(tag_to_normalize, &normalized_buf);
- return std.meta.stringToEnum(LanguageId, normalized_tag) orelse {
- // special case for a tag that has been mapped to the same ID
- // twice.
- if (std.mem.eql(u8, "ff_latn_ng", normalized_tag)) {
- return LanguageId.ff_ng;
- }
- return null;
- };
-}
-
-test tagToId {
- try std.testing.expectEqual(LanguageId.ar_ae, (try tagToId("ar-ae")).?);
- try std.testing.expectEqual(LanguageId.ar_ae, (try tagToId("AR_AE")).?);
- try std.testing.expectEqual(LanguageId.ff_ng, (try tagToId("ff-ng")).?);
- // Special case
- try std.testing.expectEqual(LanguageId.ff_ng, (try tagToId("ff-Latn-NG")).?);
-}
-
-test "exhaustive tagToId" {
- inline for (@typeInfo(LanguageId).Enum.fields) |field| {
- const id = tagToId(field.name) catch |err| {
- std.debug.print("tag: {s}\n", .{field.name});
- return err;
- };
- try std.testing.expectEqual(@field(LanguageId, field.name), id orelse {
- std.debug.print("tag: {s}, got null\n", .{field.name});
- return error.TestExpectedEqual;
- });
- }
- var buf: [32]u8 = undefined;
- inline for (valid_alternate_sorts) |parsed_sort| {
- var fbs = std.io.fixedBufferStream(&buf);
- const writer = fbs.writer();
- writer.writeAll(parsed_sort.language_code) catch unreachable;
- writer.writeAll("-") catch unreachable;
- writer.writeAll(parsed_sort.country_code.?) catch unreachable;
- writer.writeAll("-") catch unreachable;
- writer.writeAll(parsed_sort.suffix.?) catch unreachable;
- const expected_field_name = comptime field: {
- var name_buf: [5]u8 = undefined;
- @memcpy(&name_buf[0..parsed_sort.language_code.len], parsed_sort.language_code);
- name_buf[2] = '_';
- @memcpy(name_buf[3..], parsed_sort.country_code.?);
- break :field name_buf;
- };
- const expected = @field(LanguageId, &expected_field_name);
- const id = tagToId(fbs.getWritten()) catch |err| {
- std.debug.print("tag: {s}\n", .{fbs.getWritten()});
- return err;
- };
- try std.testing.expectEqual(expected, id orelse {
- std.debug.print("tag: {s}, expected: {}, got null\n", .{ fbs.getWritten(), expected });
- return error.TestExpectedEqual;
- });
- }
-}
-
-fn normalizeTag(tag: []const u8, buf: []u8) []u8 {
- std.debug.assert(buf.len >= tag.len);
- for (tag, 0..) |c, i| {
- if (c == '-')
- buf[i] = '_'
- else
- buf[i] = std.ascii.toLower(c);
- }
- return buf[0..tag.len];
-}
-
-/// https://winprotocoldoc.blob.core.windows.net/productionwindowsarchives/MS-LCID/%5bMS-LCID%5d.pdf#%5B%7B%22num%22%3A72%2C%22gen%22%3A0%7D%2C%7B%22name%22%3A%22XYZ%22%7D%2C69%2C574%2C0%5D
-/// "When an LCID is requested for a locale without a
-/// permanent LCID assignment, nor a temporary
-/// assignment as above, the protocol will respond
-/// with LOCALE_CUSTOM_UNSPECIFIED for all such
-/// locales. Because this single value is used for
-/// numerous possible locale names, it is impossible to
-/// round trip this locale, even temporarily.
-/// Applications should discard this value as soon as
-/// possible and never persist it. If the system is
-/// forced to respond to a request for
-/// LCID_CUSTOM_UNSPECIFIED, it will fall back to
-/// the current user locale. This is often incorrect but
-/// may prevent an application or component from
-/// failing. As the meaning of this temporary LCID is
-/// unstable, it should never be used for interchange
-/// or persisted data. This is a 1-to-many relationship
-/// that is very unstable."
-pub const LOCALE_CUSTOM_UNSPECIFIED = 0x1000;
-
-pub const LANG_ENGLISH = 0x09;
-pub const SUBLANG_ENGLISH_US = 0x01;
-
-/// https://learn.microsoft.com/en-us/windows/win32/intl/language-identifiers
-pub fn MAKELANGID(primary: u10, sublang: u6) u16 {
- return (@as(u16, primary) << 10) | sublang;
-}
-
-/// Language tag format expressed as a regular expression (rough approximation):
-///
-/// [a-zA-Z]{1,3}([-_][a-zA-Z]{4})?([-_][a-zA-Z]{2})?([-_][a-zA-Z0-9]{1,8})?
-/// lang | script | country | suffix
-///
-/// Notes:
-/// - If lang code is 1 char, it seems to mean that everything afterwards uses suffix
-/// parsing rules (e.g. `a-0` and `a-00000000` are allowed).
-/// - There can also be any number of trailing suffix parts as long as they each
-/// would be a valid suffix part, e.g. `en-us-blah-blah1-blah2-blah3` is allowed.
-/// - When doing lookups, trailing suffix parts are taken into account, e.g.
-/// `ca-es-valencia` is not considered equivalent to `ca-es-valencia-blah`.
-/// - A suffix is only allowed if:
-/// + Lang code is 1 char long, or
-/// + A country code is present, or
-/// + A script tag is not present and:
-/// - the suffix is numeric-only and has a length of 3, or
-/// - the lang is `qps` and the suffix is `ploca` or `plocm`
-pub fn parse(lang_tag: []const u8) error{InvalidLanguageTag}!Parsed {
- var it = std.mem.splitAny(u8, lang_tag, "-_");
- const lang_code = it.first();
- const is_valid_lang_code = lang_code.len >= 1 and lang_code.len <= 3 and isAllAlphabetic(lang_code);
- if (!is_valid_lang_code) return error.InvalidLanguageTag;
- var parsed = Parsed{
- .language_code = lang_code,
- };
- // The second part could be a script tag, a country code, or a suffix
- if (it.next()) |part_str| {
- // The lang code being length 1 behaves strangely, so fully special case it.
- if (lang_code.len == 1) {
- // This is almost certainly not the 'right' way to do this, but I don't have a method
- // to determine how exactly these language tags are parsed, and it seems like
- // suffix parsing rules apply generally (digits allowed, length of 1 to 8).
- //
- // However, because we want to be able to lookup `x-iv-mathan` normally without
- // `multiple_suffixes` being set to true, we need to make sure to treat two-length
- // alphabetic parts as a country code.
- if (part_str.len == 2 and isAllAlphabetic(part_str)) {
- parsed.country_code = part_str;
- }
- // Everything else, though, we can just throw into the suffix as long as the normal
- // rules apply.
- else if (part_str.len > 0 and part_str.len <= 8 and isAllAlphanumeric(part_str)) {
- parsed.suffix = part_str;
- } else {
- return error.InvalidLanguageTag;
- }
- } else if (part_str.len == 4 and isAllAlphabetic(part_str)) {
- parsed.script_tag = part_str;
- } else if (part_str.len == 2 and isAllAlphabetic(part_str)) {
- parsed.country_code = part_str;
- }
- // Only a 3-len numeric suffix is allowed as the second part of a tag
- else if (part_str.len == 3 and isAllNumeric(part_str)) {
- parsed.suffix = part_str;
- }
- // Special case for qps-ploca and qps-plocm
- else if (std.ascii.eqlIgnoreCase(lang_code, "qps") and
- (std.ascii.eqlIgnoreCase(part_str, "ploca") or
- std.ascii.eqlIgnoreCase(part_str, "plocm")))
- {
- parsed.suffix = part_str;
- } else {
- return error.InvalidLanguageTag;
- }
- } else {
- // If there's no part besides a 1-len lang code, then it is malformed
- if (lang_code.len == 1) return error.InvalidLanguageTag;
- return parsed;
- }
- if (parsed.script_tag != null) {
- if (it.next()) |part_str| {
- if (part_str.len == 2 and isAllAlphabetic(part_str)) {
- parsed.country_code = part_str;
- } else {
- // Suffix is not allowed when a country code is not present.
- return error.InvalidLanguageTag;
- }
- } else {
- return parsed;
- }
- }
- // We've now parsed any potential script tag/country codes, so anything remaining
- // is a suffix
- while (it.next()) |part_str| {
- if (part_str.len == 0 or part_str.len > 8 or !isAllAlphanumeric(part_str)) {
- return error.InvalidLanguageTag;
- }
- if (parsed.suffix == null) {
- parsed.suffix = part_str;
- } else {
- // In theory we could return early here but we still want to validate
- // that each part is a valid suffix all the way to the end, e.g.
- // we should reject `en-us-suffix-a-b-c-!!!` because of the invalid `!!!`
- // suffix part.
- parsed.multiple_suffixes = true;
- }
- }
- return parsed;
-}
-
-pub const Parsed = struct {
- language_code: []const u8,
- script_tag: ?[]const u8 = null,
- country_code: ?[]const u8 = null,
- /// Can be a sort order (e.g. phoneb) or something like valencia, 001, etc
- suffix: ?[]const u8 = null,
- /// There can be any number of suffixes, but we don't need to care what their
- /// values are, we just need to know if any exist so that e.g. `ca-es-valencia-blah`
- /// can be seen as different from `ca-es-valencia`. Storing this as a bool
- /// allows us to avoid needing either (a) dynamic allocation or (b) a limit to
- /// the number of suffixes allowed when parsing.
- multiple_suffixes: bool = false,
-
- pub fn isSuffixValidSortOrder(self: Parsed) bool {
- if (self.country_code == null) return false;
- if (self.suffix == null) return false;
- if (self.script_tag != null) return false;
- if (self.multiple_suffixes) return false;
- for (valid_alternate_sorts) |valid_sort| {
- if (std.ascii.eqlIgnoreCase(valid_sort.language_code, self.language_code) and
- std.ascii.eqlIgnoreCase(valid_sort.country_code.?, self.country_code.?) and
- std.ascii.eqlIgnoreCase(valid_sort.suffix.?, self.suffix.?))
- {
- return true;
- }
- }
- return false;
- }
-};
-
-/// https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f
-/// See the table following this text: "Alternate sorts can be selected by using one of the identifiers from the following table."
-const valid_alternate_sorts = [_]Parsed{
- // Note: x-IV-mathan is omitted due to how lookups are implemented.
- // This table is used to make e.g. `de-de_phoneb` get looked up
- // as `de-de` (the suffix is omitted for the lookup), but x-iv-mathan
- // instead needs to be looked up with the suffix included because
- // `x-iv` is not a tag with an assigned ID.
- .{ .language_code = "de", .country_code = "de", .suffix = "phoneb" },
- .{ .language_code = "hu", .country_code = "hu", .suffix = "tchncl" },
- .{ .language_code = "ka", .country_code = "ge", .suffix = "modern" },
- .{ .language_code = "zh", .country_code = "cn", .suffix = "stroke" },
- .{ .language_code = "zh", .country_code = "sg", .suffix = "stroke" },
- .{ .language_code = "zh", .country_code = "mo", .suffix = "stroke" },
- .{ .language_code = "zh", .country_code = "tw", .suffix = "pronun" },
- .{ .language_code = "zh", .country_code = "tw", .suffix = "radstr" },
- .{ .language_code = "ja", .country_code = "jp", .suffix = "radstr" },
- .{ .language_code = "zh", .country_code = "hk", .suffix = "radstr" },
- .{ .language_code = "zh", .country_code = "mo", .suffix = "radstr" },
- .{ .language_code = "zh", .country_code = "cn", .suffix = "phoneb" },
- .{ .language_code = "zh", .country_code = "sg", .suffix = "phoneb" },
-};
-
-test "parse" {
- try std.testing.expectEqualDeep(Parsed{
- .language_code = "en",
- }, try parse("en"));
- try std.testing.expectEqualDeep(Parsed{
- .language_code = "en",
- .country_code = "us",
- }, try parse("en-us"));
- try std.testing.expectEqualDeep(Parsed{
- .language_code = "en",
- .suffix = "123",
- }, try parse("en-123"));
- try std.testing.expectEqualDeep(Parsed{
- .language_code = "en",
- .suffix = "123",
- .multiple_suffixes = true,
- }, try parse("en-123-blah"));
- try std.testing.expectEqualDeep(Parsed{
- .language_code = "en",
- .country_code = "us",
- .suffix = "123",
- .multiple_suffixes = true,
- }, try parse("en-us_123-blah"));
- try std.testing.expectEqualDeep(Parsed{
- .language_code = "eng",
- .script_tag = "Latn",
- }, try parse("eng-Latn"));
- try std.testing.expectEqualDeep(Parsed{
- .language_code = "eng",
- .script_tag = "Latn",
- }, try parse("eng-Latn"));
- try std.testing.expectEqualDeep(Parsed{
- .language_code = "ff",
- .script_tag = "Latn",
- .country_code = "NG",
- }, try parse("ff-Latn-NG"));
- try std.testing.expectEqualDeep(Parsed{
- .language_code = "qps",
- .suffix = "Plocm",
- }, try parse("qps-Plocm"));
- try std.testing.expectEqualDeep(Parsed{
- .language_code = "qps",
- .suffix = "ploca",
- }, try parse("qps-ploca"));
- try std.testing.expectEqualDeep(Parsed{
- .language_code = "x",
- .country_code = "IV",
- .suffix = "mathan",
- }, try parse("x-IV-mathan"));
- try std.testing.expectEqualDeep(Parsed{
- .language_code = "a",
- .suffix = "a",
- }, try parse("a-a"));
- try std.testing.expectEqualDeep(Parsed{
- .language_code = "a",
- .suffix = "000",
- }, try parse("a-000"));
- try std.testing.expectEqualDeep(Parsed{
- .language_code = "a",
- .suffix = "00000000",
- }, try parse("a-00000000"));
- // suffix not allowed if script tag is present without country code
- try std.testing.expectError(error.InvalidLanguageTag, parse("eng-Latn-suffix"));
- // suffix must be 3 numeric digits if neither script tag nor country code is present
- try std.testing.expectError(error.InvalidLanguageTag, parse("eng-suffix"));
- try std.testing.expectError(error.InvalidLanguageTag, parse("en-plocm"));
- // 1-len lang code is not allowed if it's the only part
- try std.testing.expectError(error.InvalidLanguageTag, parse("e"));
-}
-
-fn isAllAlphabetic(str: []const u8) bool {
- for (str) |c| {
- if (!std.ascii.isAlphabetic(c)) return false;
- }
- return true;
-}
-
-fn isAllAlphanumeric(str: []const u8) bool {
- for (str) |c| {
- if (!std.ascii.isAlphanumeric(c)) return false;
- }
- return true;
-}
-
-fn isAllNumeric(str: []const u8) bool {
- for (str) |c| {
- if (!std.ascii.isDigit(c)) return false;
- }
- return true;
-}
-
-/// Derived from https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f
-/// - Protocol Revision: 15.0
-/// - Language / Language ID / Language Tag table in Appendix A
-/// - Removed all rows that have Language ID 0x1000 (LOCALE_CUSTOM_UNSPECIFIED)
-/// - Normalized each language tag (lowercased, replaced all `-` with `_`)
-/// - There is one special case where two tags are mapped to the same ID, the following
-/// has been omitted and must be special cased during lookup to map to the ID ff_ng / 0x0467.
-/// ff_latn_ng = 0x0467, // Fulah (Latin), Nigeria
-/// - x_iv_mathan has been added which is not in the table but does appear in the Alternate sorts
-/// table as 0x007F (LANG_INVARIANT).
-pub const LanguageId = enum(u16) {
- // Language tag = Language ID, // Language, Location (or type)
- af = 0x0036, // Afrikaans
- af_za = 0x0436, // Afrikaans, South Africa
- sq = 0x001C, // Albanian
- sq_al = 0x041C, // Albanian, Albania
- gsw = 0x0084, // Alsatian
- gsw_fr = 0x0484, // Alsatian, France
- am = 0x005E, // Amharic
- am_et = 0x045E, // Amharic, Ethiopia
- ar = 0x0001, // Arabic
- ar_dz = 0x1401, // Arabic, Algeria
- ar_bh = 0x3C01, // Arabic, Bahrain
- ar_eg = 0x0c01, // Arabic, Egypt
- ar_iq = 0x0801, // Arabic, Iraq
- ar_jo = 0x2C01, // Arabic, Jordan
- ar_kw = 0x3401, // Arabic, Kuwait
- ar_lb = 0x3001, // Arabic, Lebanon
- ar_ly = 0x1001, // Arabic, Libya
- ar_ma = 0x1801, // Arabic, Morocco
- ar_om = 0x2001, // Arabic, Oman
- ar_qa = 0x4001, // Arabic, Qatar
- ar_sa = 0x0401, // Arabic, Saudi Arabia
- ar_sy = 0x2801, // Arabic, Syria
- ar_tn = 0x1C01, // Arabic, Tunisia
- ar_ae = 0x3801, // Arabic, U.A.E.
- ar_ye = 0x2401, // Arabic, Yemen
- hy = 0x002B, // Armenian
- hy_am = 0x042B, // Armenian, Armenia
- as = 0x004D, // Assamese
- as_in = 0x044D, // Assamese, India
- az_cyrl = 0x742C, // Azerbaijani (Cyrillic)
- az_cyrl_az = 0x082C, // Azerbaijani (Cyrillic), Azerbaijan
- az = 0x002C, // Azerbaijani (Latin)
- az_latn = 0x782C, // Azerbaijani (Latin)
- az_latn_az = 0x042C, // Azerbaijani (Latin), Azerbaijan
- bn = 0x0045, // Bangla
- bn_bd = 0x0845, // Bangla, Bangladesh
- bn_in = 0x0445, // Bangla, India
- ba = 0x006D, // Bashkir
- ba_ru = 0x046D, // Bashkir, Russia
- eu = 0x002D, // Basque
- eu_es = 0x042D, // Basque, Spain
- be = 0x0023, // Belarusian
- be_by = 0x0423, // Belarusian, Belarus
- bs_cyrl = 0x641A, // Bosnian (Cyrillic)
- bs_cyrl_ba = 0x201A, // Bosnian (Cyrillic), Bosnia and Herzegovina
- bs_latn = 0x681A, // Bosnian (Latin)
- bs = 0x781A, // Bosnian (Latin)
- bs_latn_ba = 0x141A, // Bosnian (Latin), Bosnia and Herzegovina
- br = 0x007E, // Breton
- br_fr = 0x047E, // Breton, France
- bg = 0x0002, // Bulgarian
- bg_bg = 0x0402, // Bulgarian, Bulgaria
- my = 0x0055, // Burmese
- my_mm = 0x0455, // Burmese, Myanmar
- ca = 0x0003, // Catalan
- ca_es = 0x0403, // Catalan, Spain
- tzm_arab_ma = 0x045F, // Central Atlas Tamazight (Arabic), Morocco
- ku = 0x0092, // Central Kurdish
- ku_arab = 0x7c92, // Central Kurdish
- ku_arab_iq = 0x0492, // Central Kurdish, Iraq
- chr = 0x005C, // Cherokee
- chr_cher = 0x7c5C, // Cherokee
- chr_cher_us = 0x045C, // Cherokee, United States
- zh_hans = 0x0004, // Chinese (Simplified)
- zh = 0x7804, // Chinese (Simplified)
- zh_cn = 0x0804, // Chinese (Simplified), People's Republic of China
- zh_sg = 0x1004, // Chinese (Simplified), Singapore
- zh_hant = 0x7C04, // Chinese (Traditional)
- zh_hk = 0x0C04, // Chinese (Traditional), Hong Kong S.A.R.
- zh_mo = 0x1404, // Chinese (Traditional), Macao S.A.R.
- zh_tw = 0x0404, // Chinese (Traditional), Taiwan
- co = 0x0083, // Corsican
- co_fr = 0x0483, // Corsican, France
- hr = 0x001A, // Croatian
- hr_hr = 0x041A, // Croatian, Croatia
- hr_ba = 0x101A, // Croatian (Latin), Bosnia and Herzegovina
- cs = 0x0005, // Czech
- cs_cz = 0x0405, // Czech, Czech Republic
- da = 0x0006, // Danish
- da_dk = 0x0406, // Danish, Denmark
- prs = 0x008C, // Dari
- prs_af = 0x048C, // Dari, Afghanistan
- dv = 0x0065, // Divehi
- dv_mv = 0x0465, // Divehi, Maldives
- nl = 0x0013, // Dutch
- nl_be = 0x0813, // Dutch, Belgium
- nl_nl = 0x0413, // Dutch, Netherlands
- dz_bt = 0x0C51, // Dzongkha, Bhutan
- en = 0x0009, // English
- en_au = 0x0C09, // English, Australia
- en_bz = 0x2809, // English, Belize
- en_ca = 0x1009, // English, Canada
- en_029 = 0x2409, // English, Caribbean
- en_hk = 0x3C09, // English, Hong Kong
- en_in = 0x4009, // English, India
- en_ie = 0x1809, // English, Ireland
- en_jm = 0x2009, // English, Jamaica
- en_my = 0x4409, // English, Malaysia
- en_nz = 0x1409, // English, New Zealand
- en_ph = 0x3409, // English, Republic of the Philippines
- en_sg = 0x4809, // English, Singapore
- en_za = 0x1C09, // English, South Africa
- en_tt = 0x2c09, // English, Trinidad and Tobago
- en_ae = 0x4C09, // English, United Arab Emirates
- en_gb = 0x0809, // English, United Kingdom
- en_us = 0x0409, // English, United States
- en_zw = 0x3009, // English, Zimbabwe
- et = 0x0025, // Estonian
- et_ee = 0x0425, // Estonian, Estonia
- fo = 0x0038, // Faroese
- fo_fo = 0x0438, // Faroese, Faroe Islands
- fil = 0x0064, // Filipino
- fil_ph = 0x0464, // Filipino, Philippines
- fi = 0x000B, // Finnish
- fi_fi = 0x040B, // Finnish, Finland
- fr = 0x000C, // French
- fr_be = 0x080C, // French, Belgium
- fr_cm = 0x2c0C, // French, Cameroon
- fr_ca = 0x0c0C, // French, Canada
- fr_029 = 0x1C0C, // French, Caribbean
- fr_cd = 0x240C, // French, Congo, DRC
- fr_ci = 0x300C, // French, Côte d'Ivoire
- fr_fr = 0x040C, // French, France
- fr_ht = 0x3c0C, // French, Haiti
- fr_lu = 0x140C, // French, Luxembourg
- fr_ml = 0x340C, // French, Mali
- fr_ma = 0x380C, // French, Morocco
- fr_mc = 0x180C, // French, Principality of Monaco
- fr_re = 0x200C, // French, Reunion
- fr_sn = 0x280C, // French, Senegal
- fr_ch = 0x100C, // French, Switzerland
- fy = 0x0062, // Frisian
- fy_nl = 0x0462, // Frisian, Netherlands
- ff = 0x0067, // Fulah
- ff_latn = 0x7C67, // Fulah (Latin)
- ff_ng = 0x0467, // Fulah, Nigeria
- ff_latn_sn = 0x0867, // Fulah, Senegal
- gl = 0x0056, // Galician
- gl_es = 0x0456, // Galician, Spain
- ka = 0x0037, // Georgian
- ka_ge = 0x0437, // Georgian, Georgia
- de = 0x0007, // German
- de_at = 0x0C07, // German, Austria
- de_de = 0x0407, // German, Germany
- de_li = 0x1407, // German, Liechtenstein
- de_lu = 0x1007, // German, Luxembourg
- de_ch = 0x0807, // German, Switzerland
- el = 0x0008, // Greek
- el_gr = 0x0408, // Greek, Greece
- kl = 0x006F, // Greenlandic
- kl_gl = 0x046F, // Greenlandic, Greenland
- gn = 0x0074, // Guarani
- gn_py = 0x0474, // Guarani, Paraguay
- gu = 0x0047, // Gujarati
- gu_in = 0x0447, // Gujarati, India
- ha = 0x0068, // Hausa (Latin)
- ha_latn = 0x7C68, // Hausa (Latin)
- ha_latn_ng = 0x0468, // Hausa (Latin), Nigeria
- haw = 0x0075, // Hawaiian
- haw_us = 0x0475, // Hawaiian, United States
- he = 0x000D, // Hebrew
- he_il = 0x040D, // Hebrew, Israel
- hi = 0x0039, // Hindi
- hi_in = 0x0439, // Hindi, India
- hu = 0x000E, // Hungarian
- hu_hu = 0x040E, // Hungarian, Hungary
- is = 0x000F, // Icelandic
- is_is = 0x040F, // Icelandic, Iceland
- ig = 0x0070, // Igbo
- ig_ng = 0x0470, // Igbo, Nigeria
- id = 0x0021, // Indonesian
- id_id = 0x0421, // Indonesian, Indonesia
- iu = 0x005D, // Inuktitut (Latin)
- iu_latn = 0x7C5D, // Inuktitut (Latin)
- iu_latn_ca = 0x085D, // Inuktitut (Latin), Canada
- iu_cans = 0x785D, // Inuktitut (Syllabics)
- iu_cans_ca = 0x045d, // Inuktitut (Syllabics), Canada
- ga = 0x003C, // Irish
- ga_ie = 0x083C, // Irish, Ireland
- it = 0x0010, // Italian
- it_it = 0x0410, // Italian, Italy
- it_ch = 0x0810, // Italian, Switzerland
- ja = 0x0011, // Japanese
- ja_jp = 0x0411, // Japanese, Japan
- kn = 0x004B, // Kannada
- kn_in = 0x044B, // Kannada, India
- kr_latn_ng = 0x0471, // Kanuri (Latin), Nigeria
- ks = 0x0060, // Kashmiri
- ks_arab = 0x0460, // Kashmiri, Perso-Arabic
- ks_deva_in = 0x0860, // Kashmiri (Devanagari), India
- kk = 0x003F, // Kazakh
- kk_kz = 0x043F, // Kazakh, Kazakhstan
- km = 0x0053, // Khmer
- km_kh = 0x0453, // Khmer, Cambodia
- quc = 0x0086, // K'iche
- quc_latn_gt = 0x0486, // K'iche, Guatemala
- rw = 0x0087, // Kinyarwanda
- rw_rw = 0x0487, // Kinyarwanda, Rwanda
- sw = 0x0041, // Kiswahili
- sw_ke = 0x0441, // Kiswahili, Kenya
- kok = 0x0057, // Konkani
- kok_in = 0x0457, // Konkani, India
- ko = 0x0012, // Korean
- ko_kr = 0x0412, // Korean, Korea
- ky = 0x0040, // Kyrgyz
- ky_kg = 0x0440, // Kyrgyz, Kyrgyzstan
- lo = 0x0054, // Lao
- lo_la = 0x0454, // Lao, Lao P.D.R.
- la_va = 0x0476, // Latin, Vatican City
- lv = 0x0026, // Latvian
- lv_lv = 0x0426, // Latvian, Latvia
- lt = 0x0027, // Lithuanian
- lt_lt = 0x0427, // Lithuanian, Lithuania
- dsb = 0x7C2E, // Lower Sorbian
- dsb_de = 0x082E, // Lower Sorbian, Germany
- lb = 0x006E, // Luxembourgish
- lb_lu = 0x046E, // Luxembourgish, Luxembourg
- mk = 0x002F, // Macedonian
- mk_mk = 0x042F, // Macedonian, North Macedonia
- ms = 0x003E, // Malay
- ms_bn = 0x083E, // Malay, Brunei Darussalam
- ms_my = 0x043E, // Malay, Malaysia
- ml = 0x004C, // Malayalam
- ml_in = 0x044C, // Malayalam, India
- mt = 0x003A, // Maltese
- mt_mt = 0x043A, // Maltese, Malta
- mi = 0x0081, // Maori
- mi_nz = 0x0481, // Maori, New Zealand
- arn = 0x007A, // Mapudungun
- arn_cl = 0x047A, // Mapudungun, Chile
- mr = 0x004E, // Marathi
- mr_in = 0x044E, // Marathi, India
- moh = 0x007C, // Mohawk
- moh_ca = 0x047C, // Mohawk, Canada
- mn = 0x0050, // Mongolian (Cyrillic)
- mn_cyrl = 0x7850, // Mongolian (Cyrillic)
- mn_mn = 0x0450, // Mongolian (Cyrillic), Mongolia
- mn_mong = 0x7C50, // Mongolian (Traditional Mongolian)
- mn_mong_cn = 0x0850, // Mongolian (Traditional Mongolian), People's Republic of China
- mn_mong_mn = 0x0C50, // Mongolian (Traditional Mongolian), Mongolia
- ne = 0x0061, // Nepali
- ne_in = 0x0861, // Nepali, India
- ne_np = 0x0461, // Nepali, Nepal
- no = 0x0014, // Norwegian (Bokmal)
- nb = 0x7C14, // Norwegian (Bokmal)
- nb_no = 0x0414, // Norwegian (Bokmal), Norway
- nn = 0x7814, // Norwegian (Nynorsk)
- nn_no = 0x0814, // Norwegian (Nynorsk), Norway
- oc = 0x0082, // Occitan
- oc_fr = 0x0482, // Occitan, France
- @"or" = 0x0048, // Odia
- or_in = 0x0448, // Odia, India
- om = 0x0072, // Oromo
- om_et = 0x0472, // Oromo, Ethiopia
- ps = 0x0063, // Pashto
- ps_af = 0x0463, // Pashto, Afghanistan
- fa = 0x0029, // Persian
- fa_ir = 0x0429, // Persian, Iran
- pl = 0x0015, // Polish
- pl_pl = 0x0415, // Polish, Poland
- pt = 0x0016, // Portuguese
- pt_br = 0x0416, // Portuguese, Brazil
- pt_pt = 0x0816, // Portuguese, Portugal
- qps_ploca = 0x05FE, // Pseudo Language, Pseudo locale for east Asian/complex script localization testing
- qps_ploc = 0x0501, // Pseudo Language, Pseudo locale used for localization testing
- qps_plocm = 0x09FF, // Pseudo Language, Pseudo locale used for localization testing of mirrored locales
- pa = 0x0046, // Punjabi
- pa_arab = 0x7C46, // Punjabi
- pa_in = 0x0446, // Punjabi, India
- pa_arab_pk = 0x0846, // Punjabi, Islamic Republic of Pakistan
- quz = 0x006B, // Quechua
- quz_bo = 0x046B, // Quechua, Bolivia
- quz_ec = 0x086B, // Quechua, Ecuador
- quz_pe = 0x0C6B, // Quechua, Peru
- ro = 0x0018, // Romanian
- ro_md = 0x0818, // Romanian, Moldova
- ro_ro = 0x0418, // Romanian, Romania
- rm = 0x0017, // Romansh
- rm_ch = 0x0417, // Romansh, Switzerland
- ru = 0x0019, // Russian
- ru_md = 0x0819, // Russian, Moldova
- ru_ru = 0x0419, // Russian, Russia
- sah = 0x0085, // Sakha
- sah_ru = 0x0485, // Sakha, Russia
- smn = 0x703B, // Sami (Inari)
- smn_fi = 0x243B, // Sami (Inari), Finland
- smj = 0x7C3B, // Sami (Lule)
- smj_no = 0x103B, // Sami (Lule), Norway
- smj_se = 0x143B, // Sami (Lule), Sweden
- se = 0x003B, // Sami (Northern)
- se_fi = 0x0C3B, // Sami (Northern), Finland
- se_no = 0x043B, // Sami (Northern), Norway
- se_se = 0x083B, // Sami (Northern), Sweden
- sms = 0x743B, // Sami (Skolt)
- sms_fi = 0x203B, // Sami (Skolt), Finland
- sma = 0x783B, // Sami (Southern)
- sma_no = 0x183B, // Sami (Southern), Norway
- sma_se = 0x1C3B, // Sami (Southern), Sweden
- sa = 0x004F, // Sanskrit
- sa_in = 0x044F, // Sanskrit, India
- gd = 0x0091, // Scottish Gaelic
- gd_gb = 0x0491, // Scottish Gaelic, United Kingdom
- sr_cyrl = 0x6C1A, // Serbian (Cyrillic)
- sr_cyrl_ba = 0x1C1A, // Serbian (Cyrillic), Bosnia and Herzegovina
- sr_cyrl_me = 0x301A, // Serbian (Cyrillic), Montenegro
- sr_cyrl_rs = 0x281A, // Serbian (Cyrillic), Serbia
- sr_cyrl_cs = 0x0C1A, // Serbian (Cyrillic), Serbia and Montenegro (Former)
- sr_latn = 0x701A, // Serbian (Latin)
- sr = 0x7C1A, // Serbian (Latin)
- sr_latn_ba = 0x181A, // Serbian (Latin), Bosnia and Herzegovina
- sr_latn_me = 0x2c1A, // Serbian (Latin), Montenegro
- sr_latn_rs = 0x241A, // Serbian (Latin), Serbia
- sr_latn_cs = 0x081A, // Serbian (Latin), Serbia and Montenegro (Former)
- nso = 0x006C, // Sesotho sa Leboa
- nso_za = 0x046C, // Sesotho sa Leboa, South Africa
- tn = 0x0032, // Setswana
- tn_bw = 0x0832, // Setswana, Botswana
- tn_za = 0x0432, // Setswana, South Africa
- sd = 0x0059, // Sindhi
- sd_arab = 0x7C59, // Sindhi
- sd_arab_pk = 0x0859, // Sindhi, Islamic Republic of Pakistan
- si = 0x005B, // Sinhala
- si_lk = 0x045B, // Sinhala, Sri Lanka
- sk = 0x001B, // Slovak
- sk_sk = 0x041B, // Slovak, Slovakia
- sl = 0x0024, // Slovenian
- sl_si = 0x0424, // Slovenian, Slovenia
- so = 0x0077, // Somali
- so_so = 0x0477, // Somali, Somalia
- st = 0x0030, // Sotho
- st_za = 0x0430, // Sotho, South Africa
- es = 0x000A, // Spanish
- es_ar = 0x2C0A, // Spanish, Argentina
- es_ve = 0x200A, // Spanish, Bolivarian Republic of Venezuela
- es_bo = 0x400A, // Spanish, Bolivia
- es_cl = 0x340A, // Spanish, Chile
- es_co = 0x240A, // Spanish, Colombia
- es_cr = 0x140A, // Spanish, Costa Rica
- es_cu = 0x5c0A, // Spanish, Cuba
- es_do = 0x1c0A, // Spanish, Dominican Republic
- es_ec = 0x300A, // Spanish, Ecuador
- es_sv = 0x440A, // Spanish, El Salvador
- es_gt = 0x100A, // Spanish, Guatemala
- es_hn = 0x480A, // Spanish, Honduras
- es_419 = 0x580A, // Spanish, Latin America
- es_mx = 0x080A, // Spanish, Mexico
- es_ni = 0x4C0A, // Spanish, Nicaragua
- es_pa = 0x180A, // Spanish, Panama
- es_py = 0x3C0A, // Spanish, Paraguay
- es_pe = 0x280A, // Spanish, Peru
- es_pr = 0x500A, // Spanish, Puerto Rico
- es_es_tradnl = 0x040A, // Spanish, Spain
- es_es = 0x0c0A, // Spanish, Spain
- es_us = 0x540A, // Spanish, United States
- es_uy = 0x380A, // Spanish, Uruguay
- sv = 0x001D, // Swedish
- sv_fi = 0x081D, // Swedish, Finland
- sv_se = 0x041D, // Swedish, Sweden
- syr = 0x005A, // Syriac
- syr_sy = 0x045A, // Syriac, Syria
- tg = 0x0028, // Tajik (Cyrillic)
- tg_cyrl = 0x7C28, // Tajik (Cyrillic)
- tg_cyrl_tj = 0x0428, // Tajik (Cyrillic), Tajikistan
- tzm = 0x005F, // Tamazight (Latin)
- tzm_latn = 0x7C5F, // Tamazight (Latin)
- tzm_latn_dz = 0x085F, // Tamazight (Latin), Algeria
- ta = 0x0049, // Tamil
- ta_in = 0x0449, // Tamil, India
- ta_lk = 0x0849, // Tamil, Sri Lanka
- tt = 0x0044, // Tatar
- tt_ru = 0x0444, // Tatar, Russia
- te = 0x004A, // Telugu
- te_in = 0x044A, // Telugu, India
- th = 0x001E, // Thai
- th_th = 0x041E, // Thai, Thailand
- bo = 0x0051, // Tibetan
- bo_cn = 0x0451, // Tibetan, People's Republic of China
- ti = 0x0073, // Tigrinya
- ti_er = 0x0873, // Tigrinya, Eritrea
- ti_et = 0x0473, // Tigrinya, Ethiopia
- ts = 0x0031, // Tsonga
- ts_za = 0x0431, // Tsonga, South Africa
- tr = 0x001F, // Turkish
- tr_tr = 0x041F, // Turkish, Turkey
- tk = 0x0042, // Turkmen
- tk_tm = 0x0442, // Turkmen, Turkmenistan
- uk = 0x0022, // Ukrainian
- uk_ua = 0x0422, // Ukrainian, Ukraine
- hsb = 0x002E, // Upper Sorbian
- hsb_de = 0x042E, // Upper Sorbian, Germany
- ur = 0x0020, // Urdu
- ur_in = 0x0820, // Urdu, India
- ur_pk = 0x0420, // Urdu, Islamic Republic of Pakistan
- ug = 0x0080, // Uyghur
- ug_cn = 0x0480, // Uyghur, People's Republic of China
- uz_cyrl = 0x7843, // Uzbek (Cyrillic)
- uz_cyrl_uz = 0x0843, // Uzbek (Cyrillic), Uzbekistan
- uz = 0x0043, // Uzbek (Latin)
- uz_latn = 0x7C43, // Uzbek (Latin)
- uz_latn_uz = 0x0443, // Uzbek (Latin), Uzbekistan
- ca_es_valencia = 0x0803, // Valencian, Spain
- ve = 0x0033, // Venda
- ve_za = 0x0433, // Venda, South Africa
- vi = 0x002A, // Vietnamese
- vi_vn = 0x042A, // Vietnamese, Vietnam
- cy = 0x0052, // Welsh
- cy_gb = 0x0452, // Welsh, United Kingdom
- wo = 0x0088, // Wolof
- wo_sn = 0x0488, // Wolof, Senegal
- xh = 0x0034, // Xhosa
- xh_za = 0x0434, // Xhosa, South Africa
- ii = 0x0078, // Yi
- ii_cn = 0x0478, // Yi, People's Republic of China
- yi_001 = 0x043D, // Yiddish, World
- yo = 0x006A, // Yoruba
- yo_ng = 0x046A, // Yoruba, Nigeria
- zu = 0x0035, // Zulu
- zu_za = 0x0435, // Zulu, South Africa
-
- /// Special case
- x_iv_mathan = 0x007F, // LANG_INVARIANT, "math alphanumeric sorting"
-};
diff --git a/src/resinator/lex.zig b/src/resinator/lex.zig
@@ -1,1098 +0,0 @@
-//! Expects to be run after the C preprocessor and after `removeComments`.
-//! This means that the lexer assumes that:
-//! - Splices ('\' at the end of a line) have been handled/collapsed.
-//! - Preprocessor directives and macros have been expanded (any remaining should be skipped with the exception of `#pragma code_page`).
-//! - All comments have been removed.
-
-const std = @import("std");
-const ErrorDetails = @import("errors.zig").ErrorDetails;
-const columnWidth = @import("literals.zig").columnWidth;
-const code_pages = @import("code_pages.zig");
-const CodePage = code_pages.CodePage;
-const SourceMappings = @import("source_mapping.zig").SourceMappings;
-const isNonAsciiDigit = @import("utils.zig").isNonAsciiDigit;
-
-const dumpTokensDuringTests = false;
-
-pub const default_max_string_literal_codepoints = 4097;
-
-pub const Token = struct {
- id: Id,
- start: usize,
- end: usize,
- line_number: usize,
-
- pub const Id = enum {
- literal,
- number,
- quoted_ascii_string,
- quoted_wide_string,
- operator,
- begin,
- end,
- comma,
- open_paren,
- close_paren,
- /// This Id is only used for errors, the Lexer will never return one
- /// of these from a `next` call.
- preprocessor_command,
- invalid,
- eof,
-
- pub fn nameForErrorDisplay(self: Id) []const u8 {
- return switch (self) {
- .literal => "<literal>",
- .number => "<number>",
- .quoted_ascii_string => "<quoted ascii string>",
- .quoted_wide_string => "<quoted wide string>",
- .operator => "<operator>",
- .begin => "<'{' or BEGIN>",
- .end => "<'}' or END>",
- .comma => ",",
- .open_paren => "(",
- .close_paren => ")",
- .preprocessor_command => "<preprocessor command>",
- .invalid => unreachable,
- .eof => "<eof>",
- };
- }
- };
-
- pub fn slice(self: Token, buffer: []const u8) []const u8 {
- return buffer[self.start..self.end];
- }
-
- pub fn nameForErrorDisplay(self: Token, buffer: []const u8) []const u8 {
- return switch (self.id) {
- .eof => self.id.nameForErrorDisplay(),
- else => self.slice(buffer),
- };
- }
-
- /// Returns 0-based column
- pub fn calculateColumn(token: Token, source: []const u8, tab_columns: usize, maybe_line_start: ?usize) usize {
- const line_start = maybe_line_start orelse token.getLineStart(source);
-
- var i: usize = line_start;
- var column: usize = 0;
- while (i < token.start) : (i += 1) {
- column += columnWidth(column, source[i], tab_columns);
- }
- return column;
- }
-
- // TODO: This doesn't necessarily match up with how we count line numbers, but where a line starts
- // has a knock-on effect on calculateColumn. More testing is needed to determine what needs
- // to be changed to make this both (1) match how line numbers are counted and (2) match how
- // the Win32 RC compiler counts tab columns.
- //
- // (the TODO in currentIndexFormsLineEndingPair should be taken into account as well)
- pub fn getLineStart(token: Token, source: []const u8) usize {
- const line_start = line_start: {
- if (token.start != 0) {
- // start checking at the byte before the token
- var index = token.start - 1;
- while (true) {
- if (source[index] == '\n') break :line_start @min(source.len - 1, index + 1);
- if (index != 0) index -= 1 else break;
- }
- }
- break :line_start 0;
- };
- return line_start;
- }
-
- pub fn getLine(token: Token, source: []const u8, maybe_line_start: ?usize) []const u8 {
- const line_start = maybe_line_start orelse token.getLineStart(source);
-
- var line_end = line_start + 1;
- if (line_end >= source.len or source[line_end] == '\n') return source[line_start..line_start];
- while (line_end < source.len and source[line_end] != '\n') : (line_end += 1) {}
- while (line_end > 0 and source[line_end - 1] == '\r') : (line_end -= 1) {}
-
- return source[line_start..line_end];
- }
-
- pub fn isStringLiteral(token: Token) bool {
- return token.id == .quoted_ascii_string or token.id == .quoted_wide_string;
- }
-};
-
-pub const LineHandler = struct {
- line_number: usize = 1,
- buffer: []const u8,
- last_line_ending_index: ?usize = null,
-
- /// Like incrementLineNumber but checks that the current char is a line ending first.
- /// Returns the new line number if it was incremented, null otherwise.
- pub fn maybeIncrementLineNumber(self: *LineHandler, cur_index: usize) ?usize {
- const c = self.buffer[cur_index];
- if (c == '\r' or c == '\n') {
- return self.incrementLineNumber(cur_index);
- }
- return null;
- }
-
- /// Increments line_number appropriately (handling line ending pairs)
- /// and returns the new line number if it was incremented, or null otherwise.
- pub fn incrementLineNumber(self: *LineHandler, cur_index: usize) ?usize {
- if (self.currentIndexFormsLineEndingPair(cur_index)) {
- self.last_line_ending_index = null;
- return null;
- } else {
- self.line_number += 1;
- self.last_line_ending_index = cur_index;
- return self.line_number;
- }
- }
-
- /// \r\n and \n\r pairs are treated as a single line ending (but not \r\r \n\n)
- /// expects self.index and last_line_ending_index (if non-null) to contain line endings
- ///
- /// TODO: This is not really how the Win32 RC compiler handles line endings. Instead, it
- /// seems to drop all carriage returns during preprocessing and then replace all
- /// remaining line endings with well-formed CRLF pairs (e.g. `<CR>a<CR>b<LF>c` becomes `ab<CR><LF>c`).
- /// Handling this the same as the Win32 RC compiler would need control over the preprocessor,
- /// since Clang converts unpaired <CR> into unpaired <LF>.
- pub fn currentIndexFormsLineEndingPair(self: *const LineHandler, cur_index: usize) bool {
- if (self.last_line_ending_index == null) return false;
-
- // must immediately precede the current index, we know cur_index must
- // be >= 1 since last_line_ending_index is non-null (so if the subtraction
- // overflows it is a bug at the callsite of this function).
- if (self.last_line_ending_index.? != cur_index - 1) return false;
-
- const cur_line_ending = self.buffer[cur_index];
- const last_line_ending = self.buffer[self.last_line_ending_index.?];
-
- // sanity check
- std.debug.assert(cur_line_ending == '\r' or cur_line_ending == '\n');
- std.debug.assert(last_line_ending == '\r' or last_line_ending == '\n');
-
- // can't be \n\n or \r\r
- if (last_line_ending == cur_line_ending) return false;
-
- return true;
- }
-};
-
-pub const LexError = error{
- UnfinishedStringLiteral,
- StringLiteralTooLong,
- InvalidNumberWithExponent,
- InvalidDigitCharacterInNumberLiteral,
- IllegalByte,
- IllegalByteOutsideStringLiterals,
- IllegalCodepointOutsideStringLiterals,
- IllegalByteOrderMark,
- IllegalPrivateUseCharacter,
- FoundCStyleEscapedQuote,
- CodePagePragmaMissingLeftParen,
- CodePagePragmaMissingRightParen,
- /// Can be caught and ignored
- CodePagePragmaInvalidCodePage,
- CodePagePragmaNotInteger,
- CodePagePragmaOverflow,
- CodePagePragmaUnsupportedCodePage,
- /// Can be caught and ignored
- CodePagePragmaInIncludedFile,
-};
-
-pub const Lexer = struct {
- const Self = @This();
-
- buffer: []const u8,
- index: usize,
- line_handler: LineHandler,
- at_start_of_line: bool = true,
- error_context_token: ?Token = null,
- current_code_page: CodePage,
- default_code_page: CodePage,
- source_mappings: ?*SourceMappings,
- max_string_literal_codepoints: u15,
- /// Needed to determine whether or not the output code page should
- /// be set in the parser.
- seen_pragma_code_pages: u2 = 0,
-
- pub const Error = LexError;
-
- pub const LexerOptions = struct {
- default_code_page: CodePage = .windows1252,
- source_mappings: ?*SourceMappings = null,
- max_string_literal_codepoints: u15 = default_max_string_literal_codepoints,
- };
-
- pub fn init(buffer: []const u8, options: LexerOptions) Self {
- return Self{
- .buffer = buffer,
- .index = 0,
- .current_code_page = options.default_code_page,
- .default_code_page = options.default_code_page,
- .source_mappings = options.source_mappings,
- .max_string_literal_codepoints = options.max_string_literal_codepoints,
- .line_handler = .{ .buffer = buffer },
- };
- }
-
- pub fn dump(self: *Self, token: *const Token) void {
- std.debug.print("{s}:{d}: {s}\n", .{ @tagName(token.id), token.line_number, std.fmt.fmtSliceEscapeLower(token.slice(self.buffer)) });
- }
-
- pub const LexMethod = enum {
- whitespace_delimiter_only,
- normal,
- normal_expect_operator,
- };
-
- pub fn next(self: *Self, comptime method: LexMethod) LexError!Token {
- switch (method) {
- .whitespace_delimiter_only => return self.nextWhitespaceDelimeterOnly(),
- .normal => return self.nextNormal(),
- .normal_expect_operator => return self.nextNormalWithContext(.expect_operator),
- }
- }
-
- const StateWhitespaceDelimiterOnly = enum {
- start,
- literal,
- preprocessor,
- semicolon,
- };
-
- pub fn nextWhitespaceDelimeterOnly(self: *Self) LexError!Token {
- const start_index = self.index;
- var result = Token{
- .id = .eof,
- .start = start_index,
- .end = undefined,
- .line_number = self.line_handler.line_number,
- };
- var state = StateWhitespaceDelimiterOnly.start;
-
- while (self.current_code_page.codepointAt(self.index, self.buffer)) |codepoint| : (self.index += codepoint.byte_len) {
- const c = codepoint.value;
- try self.checkForIllegalCodepoint(codepoint, false);
- switch (state) {
- .start => switch (c) {
- '\r', '\n' => {
- result.start = self.index + 1;
- result.line_number = self.incrementLineNumber();
- },
- ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F' => {
- result.start = self.index + 1;
- },
- // NBSP only counts as whitespace at the start of a line (but
- // can be intermixed with other whitespace). Who knows why.
- '\xA0' => if (self.at_start_of_line) {
- result.start = self.index + codepoint.byte_len;
- } else {
- state = .literal;
- self.at_start_of_line = false;
- },
- '#' => {
- if (self.at_start_of_line) {
- state = .preprocessor;
- } else {
- state = .literal;
- }
- self.at_start_of_line = false;
- },
- // Semi-colon acts as a line-terminator, but in this lexing mode
- // that's only true if it's at the start of a line.
- ';' => {
- if (self.at_start_of_line) {
- state = .semicolon;
- }
- self.at_start_of_line = false;
- },
- else => {
- state = .literal;
- self.at_start_of_line = false;
- },
- },
- .literal => switch (c) {
- '\r', '\n', ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F' => {
- result.id = .literal;
- break;
- },
- else => {},
- },
- .preprocessor => switch (c) {
- '\r', '\n' => {
- try self.evaluatePreprocessorCommand(result.start, self.index);
- result.start = self.index + 1;
- state = .start;
- result.line_number = self.incrementLineNumber();
- },
- else => {},
- },
- .semicolon => switch (c) {
- '\r', '\n' => {
- result.start = self.index + 1;
- state = .start;
- result.line_number = self.incrementLineNumber();
- },
- else => {},
- },
- }
- } else { // got EOF
- switch (state) {
- .start, .semicolon => {},
- .literal => {
- result.id = .literal;
- },
- .preprocessor => {
- try self.evaluatePreprocessorCommand(result.start, self.index);
- result.start = self.index;
- },
- }
- }
-
- result.end = self.index;
- return result;
- }
-
- const StateNormal = enum {
- start,
- literal_or_quoted_wide_string,
- quoted_ascii_string,
- quoted_wide_string,
- quoted_ascii_string_escape,
- quoted_wide_string_escape,
- quoted_ascii_string_maybe_end,
- quoted_wide_string_maybe_end,
- literal,
- number_literal,
- preprocessor,
- semicolon,
- // end
- e,
- en,
- // begin
- b,
- be,
- beg,
- begi,
- };
-
- /// TODO: A not-terrible name
- pub fn nextNormal(self: *Self) LexError!Token {
- return self.nextNormalWithContext(.any);
- }
-
- pub fn nextNormalWithContext(self: *Self, context: enum { expect_operator, any }) LexError!Token {
- const start_index = self.index;
- var result = Token{
- .id = .eof,
- .start = start_index,
- .end = undefined,
- .line_number = self.line_handler.line_number,
- };
- var state = StateNormal.start;
-
- // Note: The Windows RC compiler uses a non-standard method of computing
- // length for its 'string literal too long' errors; it isn't easily
- // explained or intuitive (it's sort-of pre-parsed byte length but with
- // a few of exceptions/edge cases).
- //
- // It also behaves strangely with non-ASCII codepoints, e.g. even though the default
- // limit is 4097, you can only have 4094 € codepoints (1 UTF-16 code unit each),
- // and 2048 𐐷 codepoints (2 UTF-16 code units each).
- //
- // TODO: Understand this more, bring it more in line with how the Win32 limits work.
- // Alternatively, do something that makes more sense but may be more permissive.
- var string_literal_length: usize = 0;
- // Keeping track of the string literal column prevents pathological edge cases when
- // there are tons of tab stop characters within a string literal.
- var string_literal_column: usize = 0;
- var string_literal_collapsing_whitespace: bool = false;
- var still_could_have_exponent: bool = true;
- var exponent_index: ?usize = null;
- while (self.current_code_page.codepointAt(self.index, self.buffer)) |codepoint| : (self.index += codepoint.byte_len) {
- const c = codepoint.value;
- const in_string_literal = switch (state) {
- .quoted_ascii_string,
- .quoted_wide_string,
- .quoted_ascii_string_escape,
- .quoted_wide_string_escape,
- .quoted_ascii_string_maybe_end,
- .quoted_wide_string_maybe_end,
- =>
- // If the current line is not the same line as the start of the string literal,
- // then we want to treat the current codepoint as 'not in a string literal'
- // for the purposes of detecting illegal codepoints. This means that we will
- // error on illegal-outside-string-literal characters that are outside string
- // literals from the perspective of a C preprocessor, but that may be
- // inside string literals from the perspective of the RC lexer. For example,
- // "hello
- // @"
- // will be treated as a single string literal by the RC lexer but the Win32
- // preprocessor will consider this an unclosed string literal followed by
- // the character @ and ", and will therefore error since the Win32 RC preprocessor
- // errors on the @ character outside string literals.
- //
- // By doing this here, we can effectively emulate the Win32 RC preprocessor behavior
- // at lex-time, and avoid the need for a separate step that checks for this edge-case
- // specifically.
- result.line_number == self.line_handler.line_number,
- else => false,
- };
- try self.checkForIllegalCodepoint(codepoint, in_string_literal);
- switch (state) {
- .start => switch (c) {
- '\r', '\n' => {
- result.start = self.index + 1;
- result.line_number = self.incrementLineNumber();
- },
- ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F' => {
- result.start = self.index + 1;
- },
- // NBSP only counts as whitespace at the start of a line (but
- // can be intermixed with other whitespace). Who knows why.
- '\xA0' => if (self.at_start_of_line) {
- result.start = self.index + codepoint.byte_len;
- } else {
- state = .literal;
- self.at_start_of_line = false;
- },
- 'L', 'l' => {
- state = .literal_or_quoted_wide_string;
- self.at_start_of_line = false;
- },
- 'E', 'e' => {
- state = .e;
- self.at_start_of_line = false;
- },
- 'B', 'b' => {
- state = .b;
- self.at_start_of_line = false;
- },
- '"' => {
- state = .quoted_ascii_string;
- self.at_start_of_line = false;
- string_literal_collapsing_whitespace = false;
- string_literal_length = 0;
-
- var dummy_token = Token{
- .start = self.index,
- .end = self.index,
- .line_number = self.line_handler.line_number,
- .id = .invalid,
- };
- string_literal_column = dummy_token.calculateColumn(self.buffer, 8, null);
- },
- '+', '&', '|' => {
- self.index += 1;
- result.id = .operator;
- self.at_start_of_line = false;
- break;
- },
- '-' => {
- if (context == .expect_operator) {
- self.index += 1;
- result.id = .operator;
- self.at_start_of_line = false;
- break;
- } else {
- state = .number_literal;
- still_could_have_exponent = true;
- exponent_index = null;
- self.at_start_of_line = false;
- }
- },
- '0'...'9', '~' => {
- state = .number_literal;
- still_could_have_exponent = true;
- exponent_index = null;
- self.at_start_of_line = false;
- },
- '#' => {
- if (self.at_start_of_line) {
- state = .preprocessor;
- } else {
- state = .literal;
- }
- self.at_start_of_line = false;
- },
- ';' => {
- state = .semicolon;
- self.at_start_of_line = false;
- },
- '{', '}' => {
- self.index += 1;
- result.id = if (c == '{') .begin else .end;
- self.at_start_of_line = false;
- break;
- },
- '(', ')' => {
- self.index += 1;
- result.id = if (c == '(') .open_paren else .close_paren;
- self.at_start_of_line = false;
- break;
- },
- ',' => {
- self.index += 1;
- result.id = .comma;
- self.at_start_of_line = false;
- break;
- },
- else => {
- if (isNonAsciiDigit(c)) {
- self.error_context_token = .{
- .id = .number,
- .start = result.start,
- .end = self.index + 1,
- .line_number = self.line_handler.line_number,
- };
- return error.InvalidDigitCharacterInNumberLiteral;
- }
- state = .literal;
- self.at_start_of_line = false;
- },
- },
- .preprocessor => switch (c) {
- '\r', '\n' => {
- try self.evaluatePreprocessorCommand(result.start, self.index);
- result.start = self.index + 1;
- state = .start;
- result.line_number = self.incrementLineNumber();
- },
- else => {},
- },
- // Semi-colon acts as a line-terminator--everything is skipped until
- // the next line.
- .semicolon => switch (c) {
- '\r', '\n' => {
- result.start = self.index + 1;
- state = .start;
- result.line_number = self.incrementLineNumber();
- },
- else => {},
- },
- .number_literal => switch (c) {
- // zig fmt: off
- ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F',
- '\r', '\n', '"', ',', '{', '}', '+', '-', '|', '&', '~', '(', ')',
- '\'', ';', '=',
- => {
- // zig fmt: on
- result.id = .number;
- break;
- },
- '0'...'9' => {
- if (exponent_index) |exp_i| {
- if (self.index - 1 == exp_i) {
- // Note: This being an error is a quirk of the preprocessor used by
- // the Win32 RC compiler.
- self.error_context_token = .{
- .id = .number,
- .start = result.start,
- .end = self.index + 1,
- .line_number = self.line_handler.line_number,
- };
- return error.InvalidNumberWithExponent;
- }
- }
- },
- 'e', 'E' => {
- if (still_could_have_exponent) {
- exponent_index = self.index;
- still_could_have_exponent = false;
- }
- },
- else => {
- if (isNonAsciiDigit(c)) {
- self.error_context_token = .{
- .id = .number,
- .start = result.start,
- .end = self.index + 1,
- .line_number = self.line_handler.line_number,
- };
- return error.InvalidDigitCharacterInNumberLiteral;
- }
- still_could_have_exponent = false;
- },
- },
- .literal_or_quoted_wide_string => switch (c) {
- // zig fmt: off
- ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F',
- '\r', '\n', ',', '{', '}', '+', '-', '|', '&', '~', '(', ')',
- '\'', ';', '=',
- // zig fmt: on
- => {
- result.id = .literal;
- break;
- },
- '"' => {
- state = .quoted_wide_string;
- string_literal_collapsing_whitespace = false;
- string_literal_length = 0;
-
- var dummy_token = Token{
- .start = self.index,
- .end = self.index,
- .line_number = self.line_handler.line_number,
- .id = .invalid,
- };
- string_literal_column = dummy_token.calculateColumn(self.buffer, 8, null);
- },
- else => {
- state = .literal;
- },
- },
- .literal => switch (c) {
- // zig fmt: off
- ' ', '\t', '\x05'...'\x08', '\x0B'...'\x0C', '\x0E'...'\x1F',
- '\r', '\n', '"', ',', '{', '}', '+', '-', '|', '&', '~', '(', ')',
- '\'', ';', '=',
- => {
- // zig fmt: on
- result.id = .literal;
- break;
- },
- else => {},
- },
- .e => switch (c) {
- 'N', 'n' => {
- state = .en;
- },
- else => {
- state = .literal;
- self.index -= 1;
- },
- },
- .en => switch (c) {
- 'D', 'd' => {
- result.id = .end;
- self.index += 1;
- break;
- },
- else => {
- state = .literal;
- self.index -= 1;
- },
- },
- .b => switch (c) {
- 'E', 'e' => {
- state = .be;
- },
- else => {
- state = .literal;
- self.index -= 1;
- },
- },
- .be => switch (c) {
- 'G', 'g' => {
- state = .beg;
- },
- else => {
- state = .literal;
- self.index -= 1;
- },
- },
- .beg => switch (c) {
- 'I', 'i' => {
- state = .begi;
- },
- else => {
- state = .literal;
- self.index -= 1;
- },
- },
- .begi => switch (c) {
- 'N', 'n' => {
- result.id = .begin;
- self.index += 1;
- break;
- },
- else => {
- state = .literal;
- self.index -= 1;
- },
- },
- .quoted_ascii_string, .quoted_wide_string => switch (c) {
- '"' => {
- string_literal_column += 1;
- state = if (state == .quoted_ascii_string) .quoted_ascii_string_maybe_end else .quoted_wide_string_maybe_end;
- },
- '\\' => {
- string_literal_length += 1;
- string_literal_column += 1;
- state = if (state == .quoted_ascii_string) .quoted_ascii_string_escape else .quoted_wide_string_escape;
- },
- '\r' => {
- string_literal_column = 0;
- // \r doesn't count towards string literal length
-
- // Increment line number but don't affect the result token's line number
- _ = self.incrementLineNumber();
- },
- '\n' => {
- string_literal_column = 0;
- // first \n expands to <space><\n>
- if (!string_literal_collapsing_whitespace) {
- string_literal_length += 2;
- string_literal_collapsing_whitespace = true;
- }
- // the rest are collapsed into the <space><\n>
-
- // Increment line number but don't affect the result token's line number
- _ = self.incrementLineNumber();
- },
- // only \t, space, Vertical Tab, and Form Feed count as whitespace when collapsing
- '\t', ' ', '\x0b', '\x0c' => {
- if (!string_literal_collapsing_whitespace) {
- // Literal tab characters are counted as the number of space characters
- // needed to reach the next 8-column tab stop.
- const width = columnWidth(string_literal_column, @intCast(c), 8);
- string_literal_length += width;
- string_literal_column += width;
- }
- },
- else => {
- string_literal_collapsing_whitespace = false;
- string_literal_length += 1;
- string_literal_column += 1;
- },
- },
- .quoted_ascii_string_escape, .quoted_wide_string_escape => switch (c) {
- '"' => {
- self.error_context_token = .{
- .id = .invalid,
- .start = self.index - 1,
- .end = self.index + 1,
- .line_number = self.line_handler.line_number,
- };
- return error.FoundCStyleEscapedQuote;
- },
- else => {
- string_literal_length += 1;
- string_literal_column += 1;
- state = if (state == .quoted_ascii_string_escape) .quoted_ascii_string else .quoted_wide_string;
- },
- },
- .quoted_ascii_string_maybe_end, .quoted_wide_string_maybe_end => switch (c) {
- '"' => {
- state = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string;
- // Escaped quotes count as 1 char for string literal length checks.
- // Since we did not increment on the first " (because it could have been
- // the end of the quoted string), we increment here
- string_literal_length += 1;
- string_literal_column += 1;
- },
- else => {
- result.id = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string;
- break;
- },
- },
- }
- } else { // got EOF
- switch (state) {
- .start, .semicolon => {},
- .literal_or_quoted_wide_string, .literal, .e, .en, .b, .be, .beg, .begi => {
- result.id = .literal;
- },
- .preprocessor => {
- try self.evaluatePreprocessorCommand(result.start, self.index);
- result.start = self.index;
- },
- .number_literal => {
- result.id = .number;
- },
- .quoted_ascii_string_maybe_end, .quoted_wide_string_maybe_end => {
- result.id = if (state == .quoted_ascii_string_maybe_end) .quoted_ascii_string else .quoted_wide_string;
- },
- .quoted_ascii_string,
- .quoted_wide_string,
- .quoted_ascii_string_escape,
- .quoted_wide_string_escape,
- => {
- self.error_context_token = .{
- .id = .eof,
- .start = self.index,
- .end = self.index,
- .line_number = self.line_handler.line_number,
- };
- return LexError.UnfinishedStringLiteral;
- },
- }
- }
-
- result.end = self.index;
-
- if (result.id == .quoted_ascii_string or result.id == .quoted_wide_string) {
- if (string_literal_length > self.max_string_literal_codepoints) {
- self.error_context_token = result;
- return LexError.StringLiteralTooLong;
- }
- }
-
- return result;
- }
-
- /// Increments line_number appropriately (handling line ending pairs)
- /// and returns the new line number.
- fn incrementLineNumber(self: *Self) usize {
- _ = self.line_handler.incrementLineNumber(self.index);
- self.at_start_of_line = true;
- return self.line_handler.line_number;
- }
-
- fn checkForIllegalCodepoint(self: *Self, codepoint: code_pages.Codepoint, in_string_literal: bool) LexError!void {
- const err = switch (codepoint.value) {
- // 0x00 = NUL
- // 0x1A = Substitute (treated as EOF)
- // NOTE: 0x1A gets treated as EOF by the clang preprocessor so after a .rc file
- // is run through the clang preprocessor it will no longer have 0x1A characters in it.
- // 0x7F = DEL (treated as a context-specific terminator by the Windows RC compiler)
- 0x00, 0x1A, 0x7F => error.IllegalByte,
- // 0x01...0x03 result in strange 'macro definition too big' errors when used outside of string literals
- // 0x04 is valid but behaves strangely (sort of acts as a 'skip the next character' instruction)
- 0x01...0x04 => if (!in_string_literal) error.IllegalByteOutsideStringLiterals else return,
- // @ and ` both result in error RC2018: unknown character '0x60' (and subsequently
- // fatal error RC1116: RC terminating after preprocessor errors) if they are ever used
- // outside of string literals. Not exactly sure why this would be the case, though.
- // TODO: Make sure there aren't any exceptions
- '@', '`' => if (!in_string_literal) error.IllegalByteOutsideStringLiterals else return,
- // The Byte Order Mark is mostly skipped over by the Windows RC compiler, but
- // there are edge cases where it leads to cryptic 'compiler limit : macro definition too big'
- // errors (e.g. a BOM within a number literal). By making this illegal we avoid having to
- // deal with a lot of edge cases and remove the potential footgun of the bytes of a BOM
- // being 'missing' when included in a string literal (the Windows RC compiler acts as
- // if the codepoint was never part of the string literal).
- '\u{FEFF}' => error.IllegalByteOrderMark,
- // Similar deal with this private use codepoint, it gets skipped/ignored by the
- // RC compiler (but without the cryptic errors). Silently dropping bytes still seems like
- // enough of a footgun with no real use-cases that it's still worth erroring instead of
- // emulating the RC compiler's behavior, though.
- '\u{E000}' => error.IllegalPrivateUseCharacter,
- // These codepoints lead to strange errors when used outside of string literals,
- // and miscompilations when used within string literals. We avoid the miscompilation
- // within string literals and emit a warning, but outside of string literals it makes
- // more sense to just disallow these codepoints.
- 0x900, 0xA00, 0xA0D, 0x2000, 0xFFFE, 0xD00 => if (!in_string_literal) error.IllegalCodepointOutsideStringLiterals else return,
- else => return,
- };
- self.error_context_token = .{
- .id = .invalid,
- .start = self.index,
- .end = self.index + codepoint.byte_len,
- .line_number = self.line_handler.line_number,
- };
- return err;
- }
-
- fn evaluatePreprocessorCommand(self: *Self, start: usize, end: usize) !void {
- const token = Token{
- .id = .preprocessor_command,
- .start = start,
- .end = end,
- .line_number = self.line_handler.line_number,
- };
- errdefer self.error_context_token = token;
- const full_command = self.buffer[start..end];
- var command = full_command;
-
- // Anything besides exactly this is ignored by the Windows RC implementation
- const expected_directive = "#pragma";
- if (!std.mem.startsWith(u8, command, expected_directive)) return;
- command = command[expected_directive.len..];
-
- if (command.len == 0 or !std.ascii.isWhitespace(command[0])) return;
- while (command.len > 0 and std.ascii.isWhitespace(command[0])) {
- command = command[1..];
- }
-
- // Note: CoDe_PaGeZ is also treated as "code_page" by the Windows RC implementation,
- // and it will error with 'Missing left parenthesis in code_page #pragma'
- const expected_extension = "code_page";
- if (!std.ascii.startsWithIgnoreCase(command, expected_extension)) return;
- command = command[expected_extension.len..];
-
- while (command.len > 0 and std.ascii.isWhitespace(command[0])) {
- command = command[1..];
- }
-
- if (command.len == 0 or command[0] != '(') {
- return error.CodePagePragmaMissingLeftParen;
- }
- command = command[1..];
-
- while (command.len > 0 and std.ascii.isWhitespace(command[0])) {
- command = command[1..];
- }
-
- var num_str: []u8 = command[0..0];
- while (command.len > 0 and (command[0] != ')' and !std.ascii.isWhitespace(command[0]))) {
- command = command[1..];
- num_str.len += 1;
- }
-
- if (num_str.len == 0) {
- return error.CodePagePragmaNotInteger;
- }
-
- while (command.len > 0 and std.ascii.isWhitespace(command[0])) {
- command = command[1..];
- }
-
- if (command.len == 0 or command[0] != ')') {
- return error.CodePagePragmaMissingRightParen;
- }
-
- const code_page = code_page: {
- if (std.ascii.eqlIgnoreCase("DEFAULT", num_str)) {
- break :code_page self.default_code_page;
- }
-
- // The Win32 compiler behaves fairly strangely around maxInt(u32):
- // - If the overflowed u32 wraps and becomes a known code page ID, then
- // it will error/warn with "Codepage not valid: ignored" (depending on /w)
- // - If the overflowed u32 wraps and does not become a known code page ID,
- // then it will error with 'constant too big' and 'Codepage not integer'
- //
- // Instead of that, we just have a separate error specifically for overflow.
- const num = parseCodePageNum(num_str) catch |err| switch (err) {
- error.InvalidCharacter => return error.CodePagePragmaNotInteger,
- error.Overflow => return error.CodePagePragmaOverflow,
- };
-
- // Anything that starts with 0 but does not resolve to 0 is treated as invalid, e.g. 01252
- if (num_str[0] == '0' and num != 0) {
- return error.CodePagePragmaInvalidCodePage;
- }
- // Anything that resolves to 0 is treated as 'not an integer' by the Win32 implementation.
- else if (num == 0) {
- return error.CodePagePragmaNotInteger;
- }
- // Anything above u16 max is not going to be found since our CodePage enum is backed by a u16.
- if (num > std.math.maxInt(u16)) {
- return error.CodePagePragmaInvalidCodePage;
- }
-
- break :code_page code_pages.CodePage.getByIdentifierEnsureSupported(@intCast(num)) catch |err| switch (err) {
- error.InvalidCodePage => return error.CodePagePragmaInvalidCodePage,
- error.UnsupportedCodePage => return error.CodePagePragmaUnsupportedCodePage,
- };
- };
-
- // https://learn.microsoft.com/en-us/windows/win32/menurc/pragma-directives
- // > This pragma is not supported in an included resource file (.rc)
- //
- // Even though the Win32 behavior is to just ignore such directives silently,
- // this is an error in the lexer to allow for emitting warnings/errors when
- // such directives are found if that's wanted. The intention is for the lexer
- // to still be able to work correctly after this error is returned.
- if (self.source_mappings) |source_mappings| {
- if (!source_mappings.isRootFile(token.line_number)) {
- return error.CodePagePragmaInIncludedFile;
- }
- }
-
- self.seen_pragma_code_pages +|= 1;
- self.current_code_page = code_page;
- }
-
- fn parseCodePageNum(str: []const u8) !u32 {
- var x: u32 = 0;
- for (str) |c| {
- const digit = try std.fmt.charToDigit(c, 10);
- if (x != 0) x = try std.math.mul(u32, x, 10);
- x = try std.math.add(u32, x, digit);
- }
- return x;
- }
-
- pub fn getErrorDetails(self: Self, lex_err: LexError) ErrorDetails {
- const err = switch (lex_err) {
- error.UnfinishedStringLiteral => ErrorDetails.Error.unfinished_string_literal,
- error.StringLiteralTooLong => return .{
- .err = .string_literal_too_long,
- .token = self.error_context_token.?,
- .extra = .{ .number = self.max_string_literal_codepoints },
- },
- error.InvalidNumberWithExponent => ErrorDetails.Error.invalid_number_with_exponent,
- error.InvalidDigitCharacterInNumberLiteral => ErrorDetails.Error.invalid_digit_character_in_number_literal,
- error.IllegalByte => ErrorDetails.Error.illegal_byte,
- error.IllegalByteOutsideStringLiterals => ErrorDetails.Error.illegal_byte_outside_string_literals,
- error.IllegalCodepointOutsideStringLiterals => ErrorDetails.Error.illegal_codepoint_outside_string_literals,
- error.IllegalByteOrderMark => ErrorDetails.Error.illegal_byte_order_mark,
- error.IllegalPrivateUseCharacter => ErrorDetails.Error.illegal_private_use_character,
- error.FoundCStyleEscapedQuote => ErrorDetails.Error.found_c_style_escaped_quote,
- error.CodePagePragmaMissingLeftParen => ErrorDetails.Error.code_page_pragma_missing_left_paren,
- error.CodePagePragmaMissingRightParen => ErrorDetails.Error.code_page_pragma_missing_right_paren,
- error.CodePagePragmaInvalidCodePage => ErrorDetails.Error.code_page_pragma_invalid_code_page,
- error.CodePagePragmaNotInteger => ErrorDetails.Error.code_page_pragma_not_integer,
- error.CodePagePragmaOverflow => ErrorDetails.Error.code_page_pragma_overflow,
- error.CodePagePragmaUnsupportedCodePage => ErrorDetails.Error.code_page_pragma_unsupported_code_page,
- error.CodePagePragmaInIncludedFile => ErrorDetails.Error.code_page_pragma_in_included_file,
- };
- return .{
- .err = err,
- .token = self.error_context_token.?,
- };
- }
-};
-
-fn testLexNormal(source: []const u8, expected_tokens: []const Token.Id) !void {
- var lexer = Lexer.init(source, .{});
- if (dumpTokensDuringTests) std.debug.print("\n----------------------\n{s}\n----------------------\n", .{lexer.buffer});
- for (expected_tokens) |expected_token_id| {
- const token = try lexer.nextNormal();
- if (dumpTokensDuringTests) lexer.dump(&token);
- try std.testing.expectEqual(expected_token_id, token.id);
- }
- const last_token = try lexer.nextNormal();
- try std.testing.expectEqual(Token.Id.eof, last_token.id);
-}
-
-fn expectLexError(expected: LexError, actual: anytype) !void {
- try std.testing.expectError(expected, actual);
- if (dumpTokensDuringTests) std.debug.print("{!}\n", .{actual});
-}
-
-test "normal: numbers" {
- try testLexNormal("1", &.{.number});
- try testLexNormal("-1", &.{.number});
- try testLexNormal("- 1", &.{ .number, .number });
- try testLexNormal("-a", &.{.number});
-}
-
-test "normal: string literals" {
- try testLexNormal("\"\"", &.{.quoted_ascii_string});
- // "" is an escaped "
- try testLexNormal("\" \"\" \"", &.{.quoted_ascii_string});
-}
-
-test "superscript chars and code pages" {
- const firstToken = struct {
- pub fn firstToken(source: []const u8, default_code_page: CodePage, comptime lex_method: Lexer.LexMethod) LexError!Token {
- var lexer = Lexer.init(source, .{ .default_code_page = default_code_page });
- return lexer.next(lex_method);
- }
- }.firstToken;
- const utf8_source = "²";
- const windows1252_source = "\xB2";
-
- const windows1252_encoded_as_windows1252 = firstToken(windows1252_source, .windows1252, .normal);
- try std.testing.expectError(error.InvalidDigitCharacterInNumberLiteral, windows1252_encoded_as_windows1252);
-
- const utf8_encoded_as_windows1252 = try firstToken(utf8_source, .windows1252, .normal);
- try std.testing.expectEqual(Token{
- .id = .literal,
- .start = 0,
- .end = 2,
- .line_number = 1,
- }, utf8_encoded_as_windows1252);
-
- const utf8_encoded_as_utf8 = firstToken(utf8_source, .utf8, .normal);
- try std.testing.expectError(error.InvalidDigitCharacterInNumberLiteral, utf8_encoded_as_utf8);
-
- const windows1252_encoded_as_utf8 = try firstToken(windows1252_source, .utf8, .normal);
- try std.testing.expectEqual(Token{
- .id = .literal,
- .start = 0,
- .end = 1,
- .line_number = 1,
- }, windows1252_encoded_as_utf8);
-}
diff --git a/src/resinator/literals.zig b/src/resinator/literals.zig
@@ -1,911 +0,0 @@
-const std = @import("std");
-const code_pages = @import("code_pages.zig");
-const CodePage = code_pages.CodePage;
-const windows1252 = @import("windows1252.zig");
-const ErrorDetails = @import("errors.zig").ErrorDetails;
-const DiagnosticsContext = @import("errors.zig").DiagnosticsContext;
-const Token = @import("lex.zig").Token;
-
-/// rc is maximally liberal in terms of what it accepts as a number literal
-/// for data values. As long as it starts with a number or - or ~, that's good enough.
-pub fn isValidNumberDataLiteral(str: []const u8) bool {
- if (str.len == 0) return false;
- switch (str[0]) {
- '~', '-', '0'...'9' => return true,
- else => return false,
- }
-}
-
-pub const SourceBytes = struct {
- slice: []const u8,
- code_page: CodePage,
-};
-
-pub const StringType = enum { ascii, wide };
-
-/// Valid escapes:
-/// "" -> "
-/// \a, \A => 0x08 (not 0x07 like in C)
-/// \n => 0x0A
-/// \r => 0x0D
-/// \t, \T => 0x09
-/// \\ => \
-/// \nnn => byte with numeric value given by nnn interpreted as octal
-/// (wraps on overflow, number of digits can be 1-3 for ASCII strings
-/// and 1-7 for wide strings)
-/// \xhh => byte with numeric value given by hh interpreted as hex
-/// (number of digits can be 0-2 for ASCII strings and 0-4 for
-/// wide strings)
-/// \<\r+> => \
-/// \<[\r\n\t ]+> => <nothing>
-///
-/// Special cases:
-/// <\t> => 1-8 spaces, dependent on columns in the source rc file itself
-/// <\r> => <nothing>
-/// <\n+><\w+?\n?> => <space><\n>
-///
-/// Special, especially weird case:
-/// \"" => "
-/// NOTE: This leads to footguns because the preprocessor can start parsing things
-/// out-of-sync with the RC compiler, expanding macros within string literals, etc.
-/// This parse function handles this case the same as the Windows RC compiler, but
-/// \" within a string literal is treated as an error by the lexer, so the relevant
-/// branches should never actually be hit during this function.
-pub const IterativeStringParser = struct {
- source: []const u8,
- code_page: CodePage,
- /// The type of the string inferred by the prefix (L"" or "")
- /// This is what matters for things like the maximum digits in an
- /// escape sequence, whether or not invalid escape sequences are skipped, etc.
- declared_string_type: StringType,
- pending_codepoint: ?u21 = null,
- num_pending_spaces: u8 = 0,
- index: usize = 0,
- column: usize = 0,
- diagnostics: ?DiagnosticsContext = null,
- seen_tab: bool = false,
-
- const State = enum {
- normal,
- quote,
- newline,
- escaped,
- escaped_cr,
- escaped_newlines,
- escaped_octal,
- escaped_hex,
- };
-
- pub fn init(bytes: SourceBytes, options: StringParseOptions) IterativeStringParser {
- const declared_string_type: StringType = switch (bytes.slice[0]) {
- 'L', 'l' => .wide,
- else => .ascii,
- };
- var source = bytes.slice[1 .. bytes.slice.len - 1]; // remove ""
- var column = options.start_column + 1; // for the removed "
- if (declared_string_type == .wide) {
- source = source[1..]; // remove L
- column += 1; // for the removed L
- }
- return .{
- .source = source,
- .code_page = bytes.code_page,
- .declared_string_type = declared_string_type,
- .column = column,
- .diagnostics = options.diagnostics,
- };
- }
-
- pub const ParsedCodepoint = struct {
- codepoint: u21,
- from_escaped_integer: bool = false,
- };
-
- pub fn next(self: *IterativeStringParser) std.mem.Allocator.Error!?ParsedCodepoint {
- const result = try self.nextUnchecked();
- if (self.diagnostics != null and result != null and !result.?.from_escaped_integer) {
- switch (result.?.codepoint) {
- 0x900, 0xA00, 0xA0D, 0x2000, 0xFFFE, 0xD00 => {
- const err: ErrorDetails.Error = if (result.?.codepoint == 0xD00)
- .rc_would_miscompile_codepoint_skip
- else
- .rc_would_miscompile_codepoint_byte_swap;
- try self.diagnostics.?.diagnostics.append(ErrorDetails{
- .err = err,
- .type = .warning,
- .token = self.diagnostics.?.token,
- .extra = .{ .number = result.?.codepoint },
- });
- try self.diagnostics.?.diagnostics.append(ErrorDetails{
- .err = err,
- .type = .note,
- .token = self.diagnostics.?.token,
- .print_source_line = false,
- .extra = .{ .number = result.?.codepoint },
- });
- },
- else => {},
- }
- }
- return result;
- }
-
- pub fn nextUnchecked(self: *IterativeStringParser) std.mem.Allocator.Error!?ParsedCodepoint {
- if (self.num_pending_spaces > 0) {
- // Ensure that we don't get into this predicament so we can ensure that
- // the order of processing any pending stuff doesn't matter
- std.debug.assert(self.pending_codepoint == null);
- self.num_pending_spaces -= 1;
- return .{ .codepoint = ' ' };
- }
- if (self.pending_codepoint) |pending_codepoint| {
- self.pending_codepoint = null;
- return .{ .codepoint = pending_codepoint };
- }
- if (self.index >= self.source.len) return null;
-
- var state: State = .normal;
- var string_escape_n: u16 = 0;
- var string_escape_i: u8 = 0;
- const max_octal_escape_digits: u8 = switch (self.declared_string_type) {
- .ascii => 3,
- .wide => 7,
- };
- const max_hex_escape_digits: u8 = switch (self.declared_string_type) {
- .ascii => 2,
- .wide => 4,
- };
-
- while (self.code_page.codepointAt(self.index, self.source)) |codepoint| : (self.index += codepoint.byte_len) {
- const c = codepoint.value;
- var backtrack = false;
- defer {
- if (backtrack) {
- self.index -= codepoint.byte_len;
- } else {
- if (c == '\t') {
- self.column += columnsUntilTabStop(self.column, 8);
- } else {
- self.column += codepoint.byte_len;
- }
- }
- }
- switch (state) {
- .normal => switch (c) {
- '\\' => state = .escaped,
- '"' => state = .quote,
- '\r' => {},
- '\n' => state = .newline,
- '\t' => {
- // Only warn about a tab getting converted to spaces once per string
- if (self.diagnostics != null and !self.seen_tab) {
- try self.diagnostics.?.diagnostics.append(ErrorDetails{
- .err = .tab_converted_to_spaces,
- .type = .warning,
- .token = self.diagnostics.?.token,
- });
- try self.diagnostics.?.diagnostics.append(ErrorDetails{
- .err = .tab_converted_to_spaces,
- .type = .note,
- .token = self.diagnostics.?.token,
- .print_source_line = false,
- });
- self.seen_tab = true;
- }
- const cols = columnsUntilTabStop(self.column, 8);
- self.num_pending_spaces = @intCast(cols - 1);
- self.index += codepoint.byte_len;
- return .{ .codepoint = ' ' };
- },
- else => {
- self.index += codepoint.byte_len;
- return .{ .codepoint = c };
- },
- },
- .quote => switch (c) {
- '"' => {
- // "" => "
- self.index += codepoint.byte_len;
- return .{ .codepoint = '"' };
- },
- else => unreachable, // this is a bug in the lexer
- },
- .newline => switch (c) {
- '\r', ' ', '\t', '\n', '\x0b', '\x0c', '\xa0' => {},
- else => {
- // backtrack so that we handle the current char properly
- backtrack = true;
- // <space><newline>
- self.index += codepoint.byte_len;
- self.pending_codepoint = '\n';
- return .{ .codepoint = ' ' };
- },
- },
- .escaped => switch (c) {
- '\r' => state = .escaped_cr,
- '\n' => state = .escaped_newlines,
- '0'...'7' => {
- string_escape_n = std.fmt.charToDigit(@intCast(c), 8) catch unreachable;
- string_escape_i = 1;
- state = .escaped_octal;
- },
- 'x', 'X' => {
- string_escape_n = 0;
- string_escape_i = 0;
- state = .escaped_hex;
- },
- else => {
- switch (c) {
- 'a', 'A' => {
- self.index += codepoint.byte_len;
- return .{ .codepoint = '\x08' };
- }, // might be a bug in RC, but matches its behavior
- 'n' => {
- self.index += codepoint.byte_len;
- return .{ .codepoint = '\n' };
- },
- 'r' => {
- self.index += codepoint.byte_len;
- return .{ .codepoint = '\r' };
- },
- 't', 'T' => {
- self.index += codepoint.byte_len;
- return .{ .codepoint = '\t' };
- },
- '\\' => {
- self.index += codepoint.byte_len;
- return .{ .codepoint = '\\' };
- },
- '"' => {
- // \" is a special case that doesn't get the \ included,
- backtrack = true;
- },
- else => switch (self.declared_string_type) {
- .wide => {}, // invalid escape sequences are skipped in wide strings
- .ascii => {
- // backtrack so that we handle the current char properly
- backtrack = true;
- self.index += codepoint.byte_len;
- return .{ .codepoint = '\\' };
- },
- },
- }
- state = .normal;
- },
- },
- .escaped_cr => switch (c) {
- '\r' => {},
- '\n' => state = .escaped_newlines,
- else => {
- // backtrack so that we handle the current char properly
- backtrack = true;
- self.index += codepoint.byte_len;
- return .{ .codepoint = '\\' };
- },
- },
- .escaped_newlines => switch (c) {
- '\r', '\n', '\t', ' ', '\x0b', '\x0c', '\xa0' => {},
- else => {
- // backtrack so that we handle the current char properly
- backtrack = true;
- state = .normal;
- },
- },
- .escaped_octal => switch (c) {
- '0'...'7' => {
- string_escape_n *%= 8;
- string_escape_n +%= std.fmt.charToDigit(@intCast(c), 8) catch unreachable;
- string_escape_i += 1;
- if (string_escape_i == max_octal_escape_digits) {
- const escaped_value = switch (self.declared_string_type) {
- .ascii => @as(u8, @truncate(string_escape_n)),
- .wide => string_escape_n,
- };
- self.index += codepoint.byte_len;
- return .{ .codepoint = escaped_value, .from_escaped_integer = true };
- }
- },
- else => {
- // backtrack so that we handle the current char properly
- backtrack = true;
- // write out whatever byte we have parsed so far
- const escaped_value = switch (self.declared_string_type) {
- .ascii => @as(u8, @truncate(string_escape_n)),
- .wide => string_escape_n,
- };
- self.index += codepoint.byte_len;
- return .{ .codepoint = escaped_value, .from_escaped_integer = true };
- },
- },
- .escaped_hex => switch (c) {
- '0'...'9', 'a'...'f', 'A'...'F' => {
- string_escape_n *= 16;
- string_escape_n += std.fmt.charToDigit(@intCast(c), 16) catch unreachable;
- string_escape_i += 1;
- if (string_escape_i == max_hex_escape_digits) {
- const escaped_value = switch (self.declared_string_type) {
- .ascii => @as(u8, @truncate(string_escape_n)),
- .wide => string_escape_n,
- };
- self.index += codepoint.byte_len;
- return .{ .codepoint = escaped_value, .from_escaped_integer = true };
- }
- },
- else => {
- // backtrack so that we handle the current char properly
- backtrack = true;
- // write out whatever byte we have parsed so far
- // (even with 0 actual digits, \x alone parses to 0)
- const escaped_value = switch (self.declared_string_type) {
- .ascii => @as(u8, @truncate(string_escape_n)),
- .wide => string_escape_n,
- };
- self.index += codepoint.byte_len;
- return .{ .codepoint = escaped_value, .from_escaped_integer = true };
- },
- },
- }
- }
-
- switch (state) {
- .normal, .escaped_newlines => {},
- .newline => {
- // <space><newline>
- self.pending_codepoint = '\n';
- return .{ .codepoint = ' ' };
- },
- .escaped, .escaped_cr => return .{ .codepoint = '\\' },
- .escaped_octal, .escaped_hex => {
- const escaped_value = switch (self.declared_string_type) {
- .ascii => @as(u8, @truncate(string_escape_n)),
- .wide => string_escape_n,
- };
- return .{ .codepoint = escaped_value, .from_escaped_integer = true };
- },
- .quote => unreachable, // this is a bug in the lexer
- }
-
- return null;
- }
-};
-
-pub const StringParseOptions = struct {
- start_column: usize = 0,
- diagnostics: ?DiagnosticsContext = null,
- output_code_page: CodePage = .windows1252,
-};
-
-pub fn parseQuotedString(
- comptime literal_type: StringType,
- allocator: std.mem.Allocator,
- bytes: SourceBytes,
- options: StringParseOptions,
-) !(switch (literal_type) {
- .ascii => []u8,
- .wide => [:0]u16,
-}) {
- const T = if (literal_type == .ascii) u8 else u16;
- std.debug.assert(bytes.slice.len >= 2); // must at least have 2 double quote chars
-
- var buf = try std.ArrayList(T).initCapacity(allocator, bytes.slice.len);
- errdefer buf.deinit();
-
- var iterative_parser = IterativeStringParser.init(bytes, options);
-
- while (try iterative_parser.next()) |parsed| {
- const c = parsed.codepoint;
- if (parsed.from_escaped_integer) {
- try buf.append(std.mem.nativeToLittle(T, @intCast(c)));
- } else {
- switch (literal_type) {
- .ascii => switch (options.output_code_page) {
- .windows1252 => {
- if (windows1252.bestFitFromCodepoint(c)) |best_fit| {
- try buf.append(best_fit);
- } else if (c < 0x10000 or c == code_pages.Codepoint.invalid) {
- try buf.append('?');
- } else {
- try buf.appendSlice("??");
- }
- },
- .utf8 => {
- var codepoint_to_encode = c;
- if (c == code_pages.Codepoint.invalid) {
- codepoint_to_encode = '�';
- }
- var utf8_buf: [4]u8 = undefined;
- const utf8_len = std.unicode.utf8Encode(codepoint_to_encode, &utf8_buf) catch unreachable;
- try buf.appendSlice(utf8_buf[0..utf8_len]);
- },
- else => unreachable, // Unsupported code page
- },
- .wide => {
- if (c == code_pages.Codepoint.invalid) {
- try buf.append(std.mem.nativeToLittle(u16, '�'));
- } else if (c < 0x10000) {
- const short: u16 = @intCast(c);
- try buf.append(std.mem.nativeToLittle(u16, short));
- } else {
- const high = @as(u16, @intCast((c - 0x10000) >> 10)) + 0xD800;
- try buf.append(std.mem.nativeToLittle(u16, high));
- const low = @as(u16, @intCast(c & 0x3FF)) + 0xDC00;
- try buf.append(std.mem.nativeToLittle(u16, low));
- }
- },
- }
- }
- }
-
- if (literal_type == .wide) {
- return buf.toOwnedSliceSentinel(0);
- } else {
- return buf.toOwnedSlice();
- }
-}
-
-pub fn parseQuotedAsciiString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![]u8 {
- std.debug.assert(bytes.slice.len >= 2); // ""
- return parseQuotedString(.ascii, allocator, bytes, options);
-}
-
-pub fn parseQuotedWideString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![:0]u16 {
- std.debug.assert(bytes.slice.len >= 3); // L""
- return parseQuotedString(.wide, allocator, bytes, options);
-}
-
-pub fn parseQuotedStringAsWideString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![:0]u16 {
- std.debug.assert(bytes.slice.len >= 2); // ""
- return parseQuotedString(.wide, allocator, bytes, options);
-}
-
-pub fn parseQuotedStringAsAsciiString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![]u8 {
- std.debug.assert(bytes.slice.len >= 2); // ""
- return parseQuotedString(.ascii, allocator, bytes, options);
-}
-
-test "parse quoted ascii string" {
- var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
- defer arena_allocator.deinit();
- const arena = arena_allocator.allocator();
-
- try std.testing.expectEqualSlices(u8, "hello", try parseQuotedAsciiString(arena, .{
- .slice =
- \\"hello"
- ,
- .code_page = .windows1252,
- }, .{}));
- // hex with 0 digits
- try std.testing.expectEqualSlices(u8, "\x00", try parseQuotedAsciiString(arena, .{
- .slice =
- \\"\x"
- ,
- .code_page = .windows1252,
- }, .{}));
- // hex max of 2 digits
- try std.testing.expectEqualSlices(u8, "\xFFf", try parseQuotedAsciiString(arena, .{
- .slice =
- \\"\XfFf"
- ,
- .code_page = .windows1252,
- }, .{}));
- // octal with invalid octal digit
- try std.testing.expectEqualSlices(u8, "\x019", try parseQuotedAsciiString(arena, .{
- .slice =
- \\"\19"
- ,
- .code_page = .windows1252,
- }, .{}));
- // escaped quotes
- try std.testing.expectEqualSlices(u8, " \" ", try parseQuotedAsciiString(arena, .{
- .slice =
- \\" "" "
- ,
- .code_page = .windows1252,
- }, .{}));
- // backslash right before escaped quotes
- try std.testing.expectEqualSlices(u8, "\"", try parseQuotedAsciiString(arena, .{
- .slice =
- \\"\"""
- ,
- .code_page = .windows1252,
- }, .{}));
- // octal overflow
- try std.testing.expectEqualSlices(u8, "\x01", try parseQuotedAsciiString(arena, .{
- .slice =
- \\"\401"
- ,
- .code_page = .windows1252,
- }, .{}));
- // escapes
- try std.testing.expectEqualSlices(u8, "\x08\n\r\t\\", try parseQuotedAsciiString(arena, .{
- .slice =
- \\"\a\n\r\t\\"
- ,
- .code_page = .windows1252,
- }, .{}));
- // uppercase escapes
- try std.testing.expectEqualSlices(u8, "\x08\\N\\R\t\\", try parseQuotedAsciiString(arena, .{
- .slice =
- \\"\A\N\R\T\\"
- ,
- .code_page = .windows1252,
- }, .{}));
- // backslash on its own
- try std.testing.expectEqualSlices(u8, "\\", try parseQuotedAsciiString(arena, .{
- .slice =
- \\"\"
- ,
- .code_page = .windows1252,
- }, .{}));
- // unrecognized escapes
- try std.testing.expectEqualSlices(u8, "\\b", try parseQuotedAsciiString(arena, .{
- .slice =
- \\"\b"
- ,
- .code_page = .windows1252,
- }, .{}));
- // escaped carriage returns
- try std.testing.expectEqualSlices(u8, "\\", try parseQuotedAsciiString(
- arena,
- .{ .slice = "\"\\\r\r\r\r\r\"", .code_page = .windows1252 },
- .{},
- ));
- // escaped newlines
- try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
- arena,
- .{ .slice = "\"\\\n\n\n\n\n\"", .code_page = .windows1252 },
- .{},
- ));
- // escaped CRLF pairs
- try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
- arena,
- .{ .slice = "\"\\\r\n\r\n\r\n\r\n\r\n\"", .code_page = .windows1252 },
- .{},
- ));
- // escaped newlines with other whitespace
- try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
- arena,
- .{ .slice = "\"\\\n \t\r\n \r\t\n \t\"", .code_page = .windows1252 },
- .{},
- ));
- // literal tab characters get converted to spaces (dependent on source file columns)
- try std.testing.expectEqualSlices(u8, " ", try parseQuotedAsciiString(
- arena,
- .{ .slice = "\"\t\"", .code_page = .windows1252 },
- .{},
- ));
- try std.testing.expectEqualSlices(u8, "abc ", try parseQuotedAsciiString(
- arena,
- .{ .slice = "\"abc\t\"", .code_page = .windows1252 },
- .{},
- ));
- try std.testing.expectEqualSlices(u8, "abcdefg ", try parseQuotedAsciiString(
- arena,
- .{ .slice = "\"abcdefg\t\"", .code_page = .windows1252 },
- .{},
- ));
- try std.testing.expectEqualSlices(u8, "\\ ", try parseQuotedAsciiString(
- arena,
- .{ .slice = "\"\\\t\"", .code_page = .windows1252 },
- .{},
- ));
- // literal CR's get dropped
- try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
- arena,
- .{ .slice = "\"\r\r\r\r\r\"", .code_page = .windows1252 },
- .{},
- ));
- // contiguous newlines and whitespace get collapsed to <space><newline>
- try std.testing.expectEqualSlices(u8, " \n", try parseQuotedAsciiString(
- arena,
- .{ .slice = "\"\n\r\r \r\n \t \"", .code_page = .windows1252 },
- .{},
- ));
-}
-
-test "parse quoted ascii string with utf8 code page" {
- var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
- defer arena_allocator.deinit();
- const arena = arena_allocator.allocator();
-
- try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
- arena,
- .{ .slice = "\"\"", .code_page = .utf8 },
- .{},
- ));
- // Codepoints that don't have a Windows-1252 representation get converted to ?
- try std.testing.expectEqualSlices(u8, "?????????", try parseQuotedAsciiString(
- arena,
- .{ .slice = "\"кириллица\"", .code_page = .utf8 },
- .{},
- ));
- // Codepoints that have a best fit mapping get converted accordingly,
- // these are box drawing codepoints
- try std.testing.expectEqualSlices(u8, "\x2b\x2d\x2b", try parseQuotedAsciiString(
- arena,
- .{ .slice = "\"┌─┐\"", .code_page = .utf8 },
- .{},
- ));
- // Invalid UTF-8 gets converted to ? depending on well-formedness
- try std.testing.expectEqualSlices(u8, "????", try parseQuotedAsciiString(
- arena,
- .{ .slice = "\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 },
- .{},
- ));
- // Codepoints that would require a UTF-16 surrogate pair get converted to ??
- try std.testing.expectEqualSlices(u8, "??", try parseQuotedAsciiString(
- arena,
- .{ .slice = "\"\xF2\xAF\xBA\xB4\"", .code_page = .utf8 },
- .{},
- ));
-
- // Output code page changes how invalid UTF-8 gets converted, since it
- // now encodes the result as UTF-8 so it can write replacement characters.
- try std.testing.expectEqualSlices(u8, "����", try parseQuotedAsciiString(
- arena,
- .{ .slice = "\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 },
- .{ .output_code_page = .utf8 },
- ));
- try std.testing.expectEqualSlices(u8, "\xF2\xAF\xBA\xB4", try parseQuotedAsciiString(
- arena,
- .{ .slice = "\"\xF2\xAF\xBA\xB4\"", .code_page = .utf8 },
- .{ .output_code_page = .utf8 },
- ));
-}
-
-test "parse quoted wide string" {
- var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
- defer arena_allocator.deinit();
- const arena = arena_allocator.allocator();
-
- try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("hello"), try parseQuotedWideString(arena, .{
- .slice =
- \\L"hello"
- ,
- .code_page = .windows1252,
- }, .{}));
- // hex with 0 digits
- try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{0x0}, try parseQuotedWideString(arena, .{
- .slice =
- \\L"\x"
- ,
- .code_page = .windows1252,
- }, .{}));
- // hex max of 4 digits
- try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ std.mem.nativeToLittle(u16, 0xFFFF), std.mem.nativeToLittle(u16, 'f') }, try parseQuotedWideString(arena, .{
- .slice =
- \\L"\XfFfFf"
- ,
- .code_page = .windows1252,
- }, .{}));
- // octal max of 7 digits
- try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ std.mem.nativeToLittle(u16, 0x9493), std.mem.nativeToLittle(u16, '3'), std.mem.nativeToLittle(u16, '3') }, try parseQuotedWideString(arena, .{
- .slice =
- \\L"\111222333"
- ,
- .code_page = .windows1252,
- }, .{}));
- // octal overflow
- try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{std.mem.nativeToLittle(u16, 0xFF01)}, try parseQuotedWideString(arena, .{
- .slice =
- \\L"\777401"
- ,
- .code_page = .windows1252,
- }, .{}));
- // literal tab characters get converted to spaces (dependent on source file columns)
- try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("abcdefg "), try parseQuotedWideString(
- arena,
- .{ .slice = "L\"abcdefg\t\"", .code_page = .windows1252 },
- .{},
- ));
- // Windows-1252 conversion
- try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("ðð€€€"), try parseQuotedWideString(
- arena,
- .{ .slice = "L\"\xf0\xf0\x80\x80\x80\"", .code_page = .windows1252 },
- .{},
- ));
- // Invalid escape sequences are skipped
- try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral(""), try parseQuotedWideString(
- arena,
- .{ .slice = "L\"\\H\"", .code_page = .windows1252 },
- .{},
- ));
-}
-
-test "parse quoted wide string with utf8 code page" {
- var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
- defer arena_allocator.deinit();
- const arena = arena_allocator.allocator();
-
- try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{}, try parseQuotedWideString(
- arena,
- .{ .slice = "L\"\"", .code_page = .utf8 },
- .{},
- ));
- try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("кириллица"), try parseQuotedWideString(
- arena,
- .{ .slice = "L\"кириллица\"", .code_page = .utf8 },
- .{},
- ));
- // Invalid UTF-8 gets converted to � depending on well-formedness
- try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("����"), try parseQuotedWideString(
- arena,
- .{ .slice = "L\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 },
- .{},
- ));
-}
-
-test "parse quoted ascii string as wide string" {
- var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
- defer arena_allocator.deinit();
- const arena = arena_allocator.allocator();
-
- try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("кириллица"), try parseQuotedStringAsWideString(
- arena,
- .{ .slice = "\"кириллица\"", .code_page = .utf8 },
- .{},
- ));
- // Whether or not invalid escapes are skipped is still determined by the L prefix
- try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("\\H"), try parseQuotedStringAsWideString(
- arena,
- .{ .slice = "\"\\H\"", .code_page = .windows1252 },
- .{},
- ));
- try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral(""), try parseQuotedStringAsWideString(
- arena,
- .{ .slice = "L\"\\H\"", .code_page = .windows1252 },
- .{},
- ));
- // Maximum escape sequence value is also determined by the L prefix
- try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ std.mem.nativeToLittle(u16, 0x12), std.mem.nativeToLittle(u16, '3'), std.mem.nativeToLittle(u16, '4') }, try parseQuotedStringAsWideString(
- arena,
- .{ .slice = "\"\\x1234\"", .code_page = .windows1252 },
- .{},
- ));
- try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{std.mem.nativeToLittle(u16, 0x1234)}, try parseQuotedStringAsWideString(
- arena,
- .{ .slice = "L\"\\x1234\"", .code_page = .windows1252 },
- .{},
- ));
-}
-
-pub fn columnsUntilTabStop(column: usize, tab_columns: usize) usize {
- // 0 => 8, 1 => 7, 2 => 6, 3 => 5, 4 => 4
- // 5 => 3, 6 => 2, 7 => 1, 8 => 8
- return tab_columns - (column % tab_columns);
-}
-
-pub fn columnWidth(cur_column: usize, c: u8, tab_columns: usize) usize {
- return switch (c) {
- '\t' => columnsUntilTabStop(cur_column, tab_columns),
- else => 1,
- };
-}
-
-pub const Number = struct {
- value: u32,
- is_long: bool = false,
-
- pub fn asWord(self: Number) u16 {
- return @truncate(self.value);
- }
-
- pub fn evaluateOperator(lhs: Number, operator_char: u8, rhs: Number) Number {
- const result = switch (operator_char) {
- '-' => lhs.value -% rhs.value,
- '+' => lhs.value +% rhs.value,
- '|' => lhs.value | rhs.value,
- '&' => lhs.value & rhs.value,
- else => unreachable, // invalid operator, this would be a lexer/parser bug
- };
- return .{
- .value = result,
- .is_long = lhs.is_long or rhs.is_long,
- };
- }
-};
-
-/// Assumes that number literals normally rejected by RC's preprocessor
-/// are similarly rejected before being parsed.
-///
-/// Relevant RC preprocessor errors:
-/// RC2021: expected exponent value, not '<digit>'
-/// example that is rejected: 1e1
-/// example that is accepted: 1ea
-/// (this function will parse the two examples above the same)
-pub fn parseNumberLiteral(bytes: SourceBytes) Number {
- std.debug.assert(bytes.slice.len > 0);
- var result = Number{ .value = 0, .is_long = false };
- var radix: u8 = 10;
- var buf = bytes.slice;
-
- const Prefix = enum { none, minus, complement };
- var prefix: Prefix = .none;
- switch (buf[0]) {
- '-' => {
- prefix = .minus;
- buf = buf[1..];
- },
- '~' => {
- prefix = .complement;
- buf = buf[1..];
- },
- else => {},
- }
-
- if (buf.len > 2 and buf[0] == '0') {
- switch (buf[1]) {
- 'o' => { // octal radix prefix is case-sensitive
- radix = 8;
- buf = buf[2..];
- },
- 'x', 'X' => {
- radix = 16;
- buf = buf[2..];
- },
- else => {},
- }
- }
-
- var i: usize = 0;
- while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) {
- const c = codepoint.value;
- if (c == 'L' or c == 'l') {
- result.is_long = true;
- break;
- }
- const digit = switch (c) {
- // On invalid digit for the radix, just stop parsing but don't fail
- 0x00...0x7F => std.fmt.charToDigit(@intCast(c), radix) catch break,
- else => break,
- };
-
- if (result.value != 0) {
- result.value *%= radix;
- }
- result.value +%= digit;
- }
-
- switch (prefix) {
- .none => {},
- .minus => result.value = 0 -% result.value,
- .complement => result.value = ~result.value,
- }
-
- return result;
-}
-
-test "parse number literal" {
- try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "0", .code_page = .windows1252 }));
- try std.testing.expectEqual(Number{ .value = 1, .is_long = false }, parseNumberLiteral(.{ .slice = "1", .code_page = .windows1252 }));
- try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "1L", .code_page = .windows1252 }));
- try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "1l", .code_page = .windows1252 }));
- try std.testing.expectEqual(Number{ .value = 1, .is_long = false }, parseNumberLiteral(.{ .slice = "1garbageL", .code_page = .windows1252 }));
- try std.testing.expectEqual(Number{ .value = 4294967295, .is_long = false }, parseNumberLiteral(.{ .slice = "4294967295", .code_page = .windows1252 }));
- try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "4294967296", .code_page = .windows1252 }));
- try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "4294967297L", .code_page = .windows1252 }));
-
- // can handle any length of number, wraps on overflow appropriately
- const big_overflow = parseNumberLiteral(.{ .slice = "1000000000000000000000000000000000000000000000000000000000000000000000000000000090000000001", .code_page = .windows1252 });
- try std.testing.expectEqual(Number{ .value = 4100654081, .is_long = false }, big_overflow);
- try std.testing.expectEqual(@as(u16, 1025), big_overflow.asWord());
-
- try std.testing.expectEqual(Number{ .value = 0x20, .is_long = false }, parseNumberLiteral(.{ .slice = "0x20", .code_page = .windows1252 }));
- try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2AL", .code_page = .windows1252 }));
- try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL", .code_page = .windows1252 }));
- try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL", .code_page = .windows1252 }));
-
- try std.testing.expectEqual(Number{ .value = 0o20, .is_long = false }, parseNumberLiteral(.{ .slice = "0o20", .code_page = .windows1252 }));
- try std.testing.expectEqual(Number{ .value = 0o20, .is_long = true }, parseNumberLiteral(.{ .slice = "0o20L", .code_page = .windows1252 }));
- try std.testing.expectEqual(Number{ .value = 0o2, .is_long = false }, parseNumberLiteral(.{ .slice = "0o29", .code_page = .windows1252 }));
- try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "0O29", .code_page = .windows1252 }));
-
- try std.testing.expectEqual(Number{ .value = 0xFFFFFFFF, .is_long = false }, parseNumberLiteral(.{ .slice = "-1", .code_page = .windows1252 }));
- try std.testing.expectEqual(Number{ .value = 0xFFFFFFFE, .is_long = false }, parseNumberLiteral(.{ .slice = "~1", .code_page = .windows1252 }));
- try std.testing.expectEqual(Number{ .value = 0xFFFFFFFF, .is_long = true }, parseNumberLiteral(.{ .slice = "-4294967297L", .code_page = .windows1252 }));
- try std.testing.expectEqual(Number{ .value = 0xFFFFFFFE, .is_long = true }, parseNumberLiteral(.{ .slice = "~4294967297L", .code_page = .windows1252 }));
- try std.testing.expectEqual(Number{ .value = 0xFFFFFFFD, .is_long = false }, parseNumberLiteral(.{ .slice = "-0X3", .code_page = .windows1252 }));
-
- // anything after L is ignored
- try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL5", .code_page = .windows1252 }));
-}
diff --git a/src/resinator/parse.zig b/src/resinator/parse.zig
@@ -1,1883 +0,0 @@
-const std = @import("std");
-const Lexer = @import("lex.zig").Lexer;
-const Token = @import("lex.zig").Token;
-const Node = @import("ast.zig").Node;
-const Tree = @import("ast.zig").Tree;
-const CodePageLookup = @import("ast.zig").CodePageLookup;
-const Resource = @import("rc.zig").Resource;
-const Allocator = std.mem.Allocator;
-const ErrorDetails = @import("errors.zig").ErrorDetails;
-const Diagnostics = @import("errors.zig").Diagnostics;
-const SourceBytes = @import("literals.zig").SourceBytes;
-const Compiler = @import("compile.zig").Compiler;
-const rc = @import("rc.zig");
-const res = @import("res.zig");
-
-// TODO: Make these configurable?
-pub const max_nested_menu_level: u32 = 512;
-pub const max_nested_version_level: u32 = 512;
-pub const max_nested_expression_level: u32 = 200;
-
-pub const Parser = struct {
- const Self = @This();
-
- lexer: *Lexer,
- /// values that need to be initialized per-parse
- state: Parser.State = undefined,
- options: Parser.Options,
-
- pub const Error = error{ParseError} || Allocator.Error;
-
- pub const Options = struct {
- warn_instead_of_error_on_invalid_code_page: bool = false,
- };
-
- pub fn init(lexer: *Lexer, options: Options) Parser {
- return Parser{
- .lexer = lexer,
- .options = options,
- };
- }
-
- pub const State = struct {
- token: Token,
- lookahead_lexer: Lexer,
- allocator: Allocator,
- arena: Allocator,
- diagnostics: *Diagnostics,
- input_code_page_lookup: CodePageLookup,
- output_code_page_lookup: CodePageLookup,
- };
-
- pub fn parse(self: *Self, allocator: Allocator, diagnostics: *Diagnostics) Error!*Tree {
- var arena = std.heap.ArenaAllocator.init(allocator);
- errdefer arena.deinit();
-
- self.state = Parser.State{
- .token = undefined,
- .lookahead_lexer = undefined,
- .allocator = allocator,
- .arena = arena.allocator(),
- .diagnostics = diagnostics,
- .input_code_page_lookup = CodePageLookup.init(arena.allocator(), self.lexer.default_code_page),
- .output_code_page_lookup = CodePageLookup.init(arena.allocator(), self.lexer.default_code_page),
- };
-
- const parsed_root = try self.parseRoot();
-
- const tree = try self.state.arena.create(Tree);
- tree.* = .{
- .node = parsed_root,
- .input_code_pages = self.state.input_code_page_lookup,
- .output_code_pages = self.state.output_code_page_lookup,
- .source = self.lexer.buffer,
- .arena = arena.state,
- .allocator = allocator,
- };
- return tree;
- }
-
- fn parseRoot(self: *Self) Error!*Node {
- var statements = std.ArrayList(*Node).init(self.state.allocator);
- defer statements.deinit();
-
- try self.parseStatements(&statements);
- try self.check(.eof);
-
- const node = try self.state.arena.create(Node.Root);
- node.* = .{
- .body = try self.state.arena.dupe(*Node, statements.items),
- };
- return &node.base;
- }
-
- fn parseStatements(self: *Self, statements: *std.ArrayList(*Node)) Error!void {
- while (true) {
- try self.nextToken(.whitespace_delimiter_only);
- if (self.state.token.id == .eof) break;
- // The Win32 compiler will sometimes try to recover from errors
- // and then restart parsing afterwards. We don't ever do this
- // because it almost always leads to unhelpful error messages
- // (usually it will end up with bogus things like 'file
- // not found: {')
- const statement = try self.parseStatement();
- try statements.append(statement);
- }
- }
-
- /// Expects the current token to be the token before possible common resource attributes.
- /// After return, the current token will be the token immediately before the end of the
- /// common resource attributes (if any). If there are no common resource attributes, the
- /// current token is unchanged.
- /// The returned slice is allocated by the parser's arena
- fn parseCommonResourceAttributes(self: *Self) ![]Token {
- var common_resource_attributes = std.ArrayListUnmanaged(Token){};
- while (true) {
- const maybe_common_resource_attribute = try self.lookaheadToken(.normal);
- if (maybe_common_resource_attribute.id == .literal and rc.CommonResourceAttributes.map.has(maybe_common_resource_attribute.slice(self.lexer.buffer))) {
- try common_resource_attributes.append(self.state.arena, maybe_common_resource_attribute);
- self.nextToken(.normal) catch unreachable;
- } else {
- break;
- }
- }
- return common_resource_attributes.toOwnedSlice(self.state.arena);
- }
-
- /// Expects the current token to have already been dealt with, and that the
- /// optional statements will potentially start on the next token.
- /// After return, the current token will be the token immediately before the end of the
- /// optional statements (if any). If there are no optional statements, the
- /// current token is unchanged.
- /// The returned slice is allocated by the parser's arena
- fn parseOptionalStatements(self: *Self, resource: Resource) ![]*Node {
- var optional_statements = std.ArrayListUnmanaged(*Node){};
- while (true) {
- const lookahead_token = try self.lookaheadToken(.normal);
- if (lookahead_token.id != .literal) break;
- const slice = lookahead_token.slice(self.lexer.buffer);
- const optional_statement_type = rc.OptionalStatements.map.get(slice) orelse switch (resource) {
- .dialog, .dialogex => rc.OptionalStatements.dialog_map.get(slice) orelse break,
- else => break,
- };
- self.nextToken(.normal) catch unreachable;
- switch (optional_statement_type) {
- .language => {
- const language = try self.parseLanguageStatement();
- try optional_statements.append(self.state.arena, language);
- },
- // Number only
- .version, .characteristics, .style, .exstyle => {
- const identifier = self.state.token;
- const value = try self.parseExpression(.{
- .can_contain_not_expressions = optional_statement_type == .style or optional_statement_type == .exstyle,
- .allowed_types = .{ .number = true },
- });
- const node = try self.state.arena.create(Node.SimpleStatement);
- node.* = .{
- .identifier = identifier,
- .value = value,
- };
- try optional_statements.append(self.state.arena, &node.base);
- },
- // String only
- .caption => {
- const identifier = self.state.token;
- try self.nextToken(.normal);
- const value = self.state.token;
- if (!value.isStringLiteral()) {
- return self.addErrorDetailsAndFail(ErrorDetails{
- .err = .expected_something_else,
- .token = value,
- .extra = .{ .expected_types = .{
- .string_literal = true,
- } },
- });
- }
- // TODO: Wrapping this in a Node.Literal is superfluous but necessary
- // to put it in a SimpleStatement
- const value_node = try self.state.arena.create(Node.Literal);
- value_node.* = .{
- .token = value,
- };
- const node = try self.state.arena.create(Node.SimpleStatement);
- node.* = .{
- .identifier = identifier,
- .value = &value_node.base,
- };
- try optional_statements.append(self.state.arena, &node.base);
- },
- // String or number
- .class => {
- const identifier = self.state.token;
- const value = try self.parseExpression(.{ .allowed_types = .{ .number = true, .string = true } });
- const node = try self.state.arena.create(Node.SimpleStatement);
- node.* = .{
- .identifier = identifier,
- .value = value,
- };
- try optional_statements.append(self.state.arena, &node.base);
- },
- // Special case
- .menu => {
- const identifier = self.state.token;
- try self.nextToken(.whitespace_delimiter_only);
- try self.check(.literal);
- // TODO: Wrapping this in a Node.Literal is superfluous but necessary
- // to put it in a SimpleStatement
- const value_node = try self.state.arena.create(Node.Literal);
- value_node.* = .{
- .token = self.state.token,
- };
- const node = try self.state.arena.create(Node.SimpleStatement);
- node.* = .{
- .identifier = identifier,
- .value = &value_node.base,
- };
- try optional_statements.append(self.state.arena, &node.base);
- },
- .font => {
- const identifier = self.state.token;
- const point_size = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
-
- // The comma between point_size and typeface is both optional and
- // there can be any number of them
- try self.skipAnyCommas();
-
- try self.nextToken(.normal);
- const typeface = self.state.token;
- if (!typeface.isStringLiteral()) {
- return self.addErrorDetailsAndFail(ErrorDetails{
- .err = .expected_something_else,
- .token = typeface,
- .extra = .{ .expected_types = .{
- .string_literal = true,
- } },
- });
- }
-
- const ExSpecificValues = struct {
- weight: ?*Node = null,
- italic: ?*Node = null,
- char_set: ?*Node = null,
- };
- var ex_specific = ExSpecificValues{};
- ex_specific: {
- var optional_param_parser = OptionalParamParser{ .parser = self };
- switch (resource) {
- .dialogex => {
- {
- ex_specific.weight = try optional_param_parser.parse(.{});
- if (optional_param_parser.finished) break :ex_specific;
- }
- {
- if (!(try self.parseOptionalToken(.comma))) break :ex_specific;
- ex_specific.italic = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
- }
- {
- ex_specific.char_set = try optional_param_parser.parse(.{});
- if (optional_param_parser.finished) break :ex_specific;
- }
- },
- .dialog => {},
- else => unreachable, // only DIALOG and DIALOGEX have FONT optional-statements
- }
- }
-
- const node = try self.state.arena.create(Node.FontStatement);
- node.* = .{
- .identifier = identifier,
- .point_size = point_size,
- .typeface = typeface,
- .weight = ex_specific.weight,
- .italic = ex_specific.italic,
- .char_set = ex_specific.char_set,
- };
- try optional_statements.append(self.state.arena, &node.base);
- },
- }
- }
- return optional_statements.toOwnedSlice(self.state.arena);
- }
-
- /// Expects the current token to be the first token of the statement.
- fn parseStatement(self: *Self) Error!*Node {
- const first_token = self.state.token;
- std.debug.assert(first_token.id == .literal);
-
- if (rc.TopLevelKeywords.map.get(first_token.slice(self.lexer.buffer))) |keyword| switch (keyword) {
- .language => {
- const language_statement = try self.parseLanguageStatement();
- return language_statement;
- },
- .version, .characteristics => {
- const identifier = self.state.token;
- const value = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
- const node = try self.state.arena.create(Node.SimpleStatement);
- node.* = .{
- .identifier = identifier,
- .value = value,
- };
- return &node.base;
- },
- .stringtable => {
- // common resource attributes must all be contiguous and come before optional-statements
- const common_resource_attributes = try self.parseCommonResourceAttributes();
- const optional_statements = try self.parseOptionalStatements(.stringtable);
-
- try self.nextToken(.normal);
- const begin_token = self.state.token;
- try self.check(.begin);
-
- var strings = std.ArrayList(*Node).init(self.state.allocator);
- defer strings.deinit();
- while (true) {
- const maybe_end_token = try self.lookaheadToken(.normal);
- switch (maybe_end_token.id) {
- .end => {
- self.nextToken(.normal) catch unreachable;
- break;
- },
- .eof => {
- return self.addErrorDetailsAndFail(ErrorDetails{
- .err = .unfinished_string_table_block,
- .token = maybe_end_token,
- });
- },
- else => {},
- }
- const id_expression = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
-
- const comma_token: ?Token = if (try self.parseOptionalToken(.comma)) self.state.token else null;
-
- try self.nextToken(.normal);
- if (self.state.token.id != .quoted_ascii_string and self.state.token.id != .quoted_wide_string) {
- return self.addErrorDetailsAndFail(ErrorDetails{
- .err = .expected_something_else,
- .token = self.state.token,
- .extra = .{ .expected_types = .{ .string_literal = true } },
- });
- }
-
- const string_node = try self.state.arena.create(Node.StringTableString);
- string_node.* = .{
- .id = id_expression,
- .maybe_comma = comma_token,
- .string = self.state.token,
- };
- try strings.append(&string_node.base);
- }
-
- if (strings.items.len == 0) {
- return self.addErrorDetailsAndFail(ErrorDetails{
- .err = .expected_token, // TODO: probably a more specific error message
- .token = self.state.token,
- .extra = .{ .expected = .number },
- });
- }
-
- const end_token = self.state.token;
- try self.check(.end);
-
- const node = try self.state.arena.create(Node.StringTable);
- node.* = .{
- .type = first_token,
- .common_resource_attributes = common_resource_attributes,
- .optional_statements = optional_statements,
- .begin_token = begin_token,
- .strings = try self.state.arena.dupe(*Node, strings.items),
- .end_token = end_token,
- };
- return &node.base;
- },
- };
-
- // The Win32 RC compiler allows for a 'dangling' literal at the end of a file
- // (as long as it's not a valid top-level keyword), and there is actually an
- // .rc file with a such a dangling literal in the Windows-classic-samples set
- // of projects. So, we have special compatibility for this particular case.
- const maybe_eof = try self.lookaheadToken(.whitespace_delimiter_only);
- if (maybe_eof.id == .eof) {
- // TODO: emit warning
- var context = try self.state.arena.alloc(Token, 2);
- context[0] = first_token;
- context[1] = maybe_eof;
- const invalid_node = try self.state.arena.create(Node.Invalid);
- invalid_node.* = .{
- .context = context,
- };
- return &invalid_node.base;
- }
-
- const id_token = first_token;
- const id_code_page = self.lexer.current_code_page;
- try self.nextToken(.whitespace_delimiter_only);
- const resource = try self.checkResource();
- const type_token = self.state.token;
-
- if (resource == .string_num) {
- try self.addErrorDetails(.{
- .err = .string_resource_as_numeric_type,
- .token = type_token,
- });
- return self.addErrorDetailsAndFail(.{
- .err = .string_resource_as_numeric_type,
- .token = type_token,
- .type = .note,
- .print_source_line = false,
- });
- }
-
- if (resource == .font) {
- const id_bytes = SourceBytes{
- .slice = id_token.slice(self.lexer.buffer),
- .code_page = id_code_page,
- };
- const maybe_ordinal = res.NameOrOrdinal.maybeOrdinalFromString(id_bytes);
- if (maybe_ordinal == null) {
- const would_be_win32_rc_ordinal = res.NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes);
- if (would_be_win32_rc_ordinal) |win32_rc_ordinal| {
- try self.addErrorDetails(ErrorDetails{
- .err = .id_must_be_ordinal,
- .token = id_token,
- .extra = .{ .resource = resource },
- });
- return self.addErrorDetailsAndFail(ErrorDetails{
- .err = .win32_non_ascii_ordinal,
- .token = id_token,
- .type = .note,
- .print_source_line = false,
- .extra = .{ .number = win32_rc_ordinal.ordinal },
- });
- } else {
- return self.addErrorDetailsAndFail(ErrorDetails{
- .err = .id_must_be_ordinal,
- .token = id_token,
- .extra = .{ .resource = resource },
- });
- }
- }
- }
-
- switch (resource) {
- .accelerators => {
- // common resource attributes must all be contiguous and come before optional-statements
- const common_resource_attributes = try self.parseCommonResourceAttributes();
- const optional_statements = try self.parseOptionalStatements(resource);
-
- try self.nextToken(.normal);
- const begin_token = self.state.token;
- try self.check(.begin);
-
- var accelerators = std.ArrayListUnmanaged(*Node){};
-
- while (true) {
- const lookahead = try self.lookaheadToken(.normal);
- switch (lookahead.id) {
- .end, .eof => {
- self.nextToken(.normal) catch unreachable;
- break;
- },
- else => {},
- }
- const event = try self.parseExpression(.{ .allowed_types = .{ .number = true, .string = true } });
-
- try self.nextToken(.normal);
- try self.check(.comma);
-
- const idvalue = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
-
- var type_and_options = std.ArrayListUnmanaged(Token){};
- while (true) {
- if (!(try self.parseOptionalToken(.comma))) break;
-
- try self.nextToken(.normal);
- if (!rc.AcceleratorTypeAndOptions.map.has(self.tokenSlice())) {
- return self.addErrorDetailsAndFail(.{
- .err = .expected_something_else,
- .token = self.state.token,
- .extra = .{ .expected_types = .{
- .accelerator_type_or_option = true,
- } },
- });
- }
- try type_and_options.append(self.state.arena, self.state.token);
- }
-
- const node = try self.state.arena.create(Node.Accelerator);
- node.* = .{
- .event = event,
- .idvalue = idvalue,
- .type_and_options = try type_and_options.toOwnedSlice(self.state.arena),
- };
- try accelerators.append(self.state.arena, &node.base);
- }
-
- const end_token = self.state.token;
- try self.check(.end);
-
- const node = try self.state.arena.create(Node.Accelerators);
- node.* = .{
- .id = id_token,
- .type = type_token,
- .common_resource_attributes = common_resource_attributes,
- .optional_statements = optional_statements,
- .begin_token = begin_token,
- .accelerators = try accelerators.toOwnedSlice(self.state.arena),
- .end_token = end_token,
- };
- return &node.base;
- },
- .dialog, .dialogex => {
- // common resource attributes must all be contiguous and come before optional-statements
- const common_resource_attributes = try self.parseCommonResourceAttributes();
-
- const x = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
- _ = try self.parseOptionalToken(.comma);
-
- const y = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
- _ = try self.parseOptionalToken(.comma);
-
- const width = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
- _ = try self.parseOptionalToken(.comma);
-
- const height = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
-
- var optional_param_parser = OptionalParamParser{ .parser = self };
- const help_id: ?*Node = try optional_param_parser.parse(.{});
-
- const optional_statements = try self.parseOptionalStatements(resource);
-
- try self.nextToken(.normal);
- const begin_token = self.state.token;
- try self.check(.begin);
-
- var controls = std.ArrayListUnmanaged(*Node){};
- defer controls.deinit(self.state.allocator);
- while (try self.parseControlStatement(resource)) |control_node| {
- // The number of controls must fit in a u16 in order for it to
- // be able to be written into the relevant field in the .res data.
- if (controls.items.len >= std.math.maxInt(u16)) {
- try self.addErrorDetails(.{
- .err = .too_many_dialog_controls,
- .token = id_token,
- .extra = .{ .resource = resource },
- });
- return self.addErrorDetailsAndFail(.{
- .err = .too_many_dialog_controls,
- .type = .note,
- .token = control_node.getFirstToken(),
- .token_span_end = control_node.getLastToken(),
- .extra = .{ .resource = resource },
- });
- }
-
- try controls.append(self.state.allocator, control_node);
- }
-
- try self.nextToken(.normal);
- const end_token = self.state.token;
- try self.check(.end);
-
- const node = try self.state.arena.create(Node.Dialog);
- node.* = .{
- .id = id_token,
- .type = type_token,
- .common_resource_attributes = common_resource_attributes,
- .x = x,
- .y = y,
- .width = width,
- .height = height,
- .help_id = help_id,
- .optional_statements = optional_statements,
- .begin_token = begin_token,
- .controls = try self.state.arena.dupe(*Node, controls.items),
- .end_token = end_token,
- };
- return &node.base;
- },
- .toolbar => {
- // common resource attributes must all be contiguous and come before optional-statements
- const common_resource_attributes = try self.parseCommonResourceAttributes();
-
- const button_width = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
-
- try self.nextToken(.normal);
- try self.check(.comma);
-
- const button_height = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
-
- try self.nextToken(.normal);
- const begin_token = self.state.token;
- try self.check(.begin);
-
- var buttons = std.ArrayListUnmanaged(*Node){};
- while (try self.parseToolbarButtonStatement()) |button_node| {
- try buttons.append(self.state.arena, button_node);
- }
-
- try self.nextToken(.normal);
- const end_token = self.state.token;
- try self.check(.end);
-
- const node = try self.state.arena.create(Node.Toolbar);
- node.* = .{
- .id = id_token,
- .type = type_token,
- .common_resource_attributes = common_resource_attributes,
- .button_width = button_width,
- .button_height = button_height,
- .begin_token = begin_token,
- .buttons = try buttons.toOwnedSlice(self.state.arena),
- .end_token = end_token,
- };
- return &node.base;
- },
- .menu, .menuex => {
- // common resource attributes must all be contiguous and come before optional-statements
- const common_resource_attributes = try self.parseCommonResourceAttributes();
- // help id is optional but must come between common resource attributes and optional-statements
- var help_id: ?*Node = null;
- // Note: No comma is allowed before or after help_id of MENUEX and help_id is not
- // a possible field of MENU.
- if (resource == .menuex and try self.lookaheadCouldBeNumberExpression(.not_disallowed)) {
- help_id = try self.parseExpression(.{
- .is_known_to_be_number_expression = true,
- });
- }
- const optional_statements = try self.parseOptionalStatements(.stringtable);
-
- try self.nextToken(.normal);
- const begin_token = self.state.token;
- try self.check(.begin);
-
- var items = std.ArrayListUnmanaged(*Node){};
- defer items.deinit(self.state.allocator);
- while (try self.parseMenuItemStatement(resource, id_token, 1)) |item_node| {
- try items.append(self.state.allocator, item_node);
- }
-
- try self.nextToken(.normal);
- const end_token = self.state.token;
- try self.check(.end);
-
- if (items.items.len == 0) {
- return self.addErrorDetailsAndFail(.{
- .err = .empty_menu_not_allowed,
- .token = type_token,
- });
- }
-
- const node = try self.state.arena.create(Node.Menu);
- node.* = .{
- .id = id_token,
- .type = type_token,
- .common_resource_attributes = common_resource_attributes,
- .optional_statements = optional_statements,
- .help_id = help_id,
- .begin_token = begin_token,
- .items = try self.state.arena.dupe(*Node, items.items),
- .end_token = end_token,
- };
- return &node.base;
- },
- .versioninfo => {
- // common resource attributes must all be contiguous and come before optional-statements
- const common_resource_attributes = try self.parseCommonResourceAttributes();
-
- var fixed_info = std.ArrayListUnmanaged(*Node){};
- while (try self.parseVersionStatement()) |version_statement| {
- try fixed_info.append(self.state.arena, version_statement);
- }
-
- try self.nextToken(.normal);
- const begin_token = self.state.token;
- try self.check(.begin);
-
- var block_statements = std.ArrayListUnmanaged(*Node){};
- while (try self.parseVersionBlockOrValue(id_token, 1)) |block_node| {
- try block_statements.append(self.state.arena, block_node);
- }
-
- try self.nextToken(.normal);
- const end_token = self.state.token;
- try self.check(.end);
-
- const node = try self.state.arena.create(Node.VersionInfo);
- node.* = .{
- .id = id_token,
- .versioninfo = type_token,
- .common_resource_attributes = common_resource_attributes,
- .fixed_info = try fixed_info.toOwnedSlice(self.state.arena),
- .begin_token = begin_token,
- .block_statements = try block_statements.toOwnedSlice(self.state.arena),
- .end_token = end_token,
- };
- return &node.base;
- },
- .dlginclude => {
- const common_resource_attributes = try self.parseCommonResourceAttributes();
-
- const filename_expression = try self.parseExpression(.{
- .allowed_types = .{ .string = true },
- });
-
- const node = try self.state.arena.create(Node.ResourceExternal);
- node.* = .{
- .id = id_token,
- .type = type_token,
- .common_resource_attributes = common_resource_attributes,
- .filename = filename_expression,
- };
- return &node.base;
- },
- .stringtable => {
- return self.addErrorDetailsAndFail(.{
- .err = .name_or_id_not_allowed,
- .token = id_token,
- .extra = .{ .resource = resource },
- });
- },
- // Just try everything as a 'generic' resource (raw data or external file)
- // TODO: More fine-grained switch cases as necessary
- else => {
- const common_resource_attributes = try self.parseCommonResourceAttributes();
-
- const maybe_begin = try self.lookaheadToken(.normal);
- if (maybe_begin.id == .begin) {
- self.nextToken(.normal) catch unreachable;
-
- if (!resource.canUseRawData()) {
- try self.addErrorDetails(ErrorDetails{
- .err = .resource_type_cant_use_raw_data,
- .token = maybe_begin,
- .extra = .{ .resource = resource },
- });
- return self.addErrorDetailsAndFail(ErrorDetails{
- .err = .resource_type_cant_use_raw_data,
- .type = .note,
- .print_source_line = false,
- .token = maybe_begin,
- });
- }
-
- const raw_data = try self.parseRawDataBlock();
- const end_token = self.state.token;
-
- const node = try self.state.arena.create(Node.ResourceRawData);
- node.* = .{
- .id = id_token,
- .type = type_token,
- .common_resource_attributes = common_resource_attributes,
- .begin_token = maybe_begin,
- .raw_data = raw_data,
- .end_token = end_token,
- };
- return &node.base;
- }
-
- const filename_expression = try self.parseExpression(.{
- // Don't tell the user that numbers are accepted since we error on
- // number expressions and regular number literals are treated as unquoted
- // literals rather than numbers, so from the users perspective
- // numbers aren't really allowed.
- .expected_types_override = .{
- .literal = true,
- .string_literal = true,
- },
- });
-
- const node = try self.state.arena.create(Node.ResourceExternal);
- node.* = .{
- .id = id_token,
- .type = type_token,
- .common_resource_attributes = common_resource_attributes,
- .filename = filename_expression,
- };
- return &node.base;
- },
- }
- }
-
- /// Expects the current token to be a begin token.
- /// After return, the current token will be the end token.
- fn parseRawDataBlock(self: *Self) Error![]*Node {
- var raw_data = std.ArrayList(*Node).init(self.state.allocator);
- defer raw_data.deinit();
- while (true) {
- const maybe_end_token = try self.lookaheadToken(.normal);
- switch (maybe_end_token.id) {
- .comma => {
- // comma as the first token in a raw data block is an error
- if (raw_data.items.len == 0) {
- return self.addErrorDetailsAndFail(ErrorDetails{
- .err = .expected_something_else,
- .token = maybe_end_token,
- .extra = .{ .expected_types = .{
- .number = true,
- .number_expression = true,
- .string_literal = true,
- } },
- });
- }
- // otherwise just skip over commas
- self.nextToken(.normal) catch unreachable;
- continue;
- },
- .end => {
- self.nextToken(.normal) catch unreachable;
- break;
- },
- .eof => {
- return self.addErrorDetailsAndFail(ErrorDetails{
- .err = .unfinished_raw_data_block,
- .token = maybe_end_token,
- });
- },
- else => {},
- }
- const expression = try self.parseExpression(.{ .allowed_types = .{ .number = true, .string = true } });
- try raw_data.append(expression);
-
- if (expression.isNumberExpression()) {
- const maybe_close_paren = try self.lookaheadToken(.normal);
- if (maybe_close_paren.id == .close_paren) {
- // <number expression>) is an error
- return self.addErrorDetailsAndFail(ErrorDetails{
- .err = .expected_token,
- .token = maybe_close_paren,
- .extra = .{ .expected = .operator },
- });
- }
- }
- }
- return try self.state.arena.dupe(*Node, raw_data.items);
- }
-
- /// Expects the current token to be handled, and that the control statement will
- /// begin on the next token.
- /// After return, the current token will be the token immediately before the end of the
- /// control statement (or unchanged if the function returns null).
- fn parseControlStatement(self: *Self, resource: Resource) Error!?*Node {
- const control_token = try self.lookaheadToken(.normal);
- const control = rc.Control.map.get(control_token.slice(self.lexer.buffer)) orelse return null;
- self.nextToken(.normal) catch unreachable;
-
- try self.skipAnyCommas();
-
- var text: ?Token = null;
- if (control.hasTextParam()) {
- try self.nextToken(.normal);
- switch (self.state.token.id) {
- .quoted_ascii_string, .quoted_wide_string, .number => {
- text = self.state.token;
- },
- else => {
- return self.addErrorDetailsAndFail(ErrorDetails{
- .err = .expected_something_else,
- .token = self.state.token,
- .extra = .{ .expected_types = .{
- .number = true,
- .string_literal = true,
- } },
- });
- },
- }
- try self.skipAnyCommas();
- }
-
- const id = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
-
- try self.skipAnyCommas();
-
- var class: ?*Node = null;
- var style: ?*Node = null;
- if (control == .control) {
- class = try self.parseExpression(.{});
- if (class.?.id == .literal) {
- const class_literal = @fieldParentPtr(Node.Literal, "base", class.?);
- const is_invalid_control_class = class_literal.token.id == .literal and !rc.ControlClass.map.has(class_literal.token.slice(self.lexer.buffer));
- if (is_invalid_control_class) {
- return self.addErrorDetailsAndFail(.{
- .err = .expected_something_else,
- .token = self.state.token,
- .extra = .{ .expected_types = .{
- .control_class = true,
- } },
- });
- }
- }
- try self.skipAnyCommas();
- style = try self.parseExpression(.{
- .can_contain_not_expressions = true,
- .allowed_types = .{ .number = true },
- });
- // If there is no comma after the style paramter, the Win32 RC compiler
- // could misinterpret the statement and end up skipping over at least one token
- // that should have been interepeted as the next parameter (x). For example:
- // CONTROL "text", 1, BUTTON, 15 30, 1, 2, 3, 4
- // the `15` is the style parameter, but in the Win32 implementation the `30`
- // is completely ignored (i.e. the `1, 2, 3, 4` are `x`, `y`, `w`, `h`).
- // If a comma is added after the `15`, then `30` gets interpreted (correctly)
- // as the `x` value.
- //
- // Instead of emulating this behavior, we just warn about the potential for
- // weird behavior in the Win32 implementation whenever there isn't a comma after
- // the style parameter.
- const lookahead_token = try self.lookaheadToken(.normal);
- if (lookahead_token.id != .comma and lookahead_token.id != .eof) {
- try self.addErrorDetails(.{
- .err = .rc_could_miscompile_control_params,
- .type = .warning,
- .token = lookahead_token,
- });
- try self.addErrorDetails(.{
- .err = .rc_could_miscompile_control_params,
- .type = .note,
- .token = style.?.getFirstToken(),
- .token_span_end = style.?.getLastToken(),
- });
- }
- try self.skipAnyCommas();
- }
-
- const x = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
- _ = try self.parseOptionalToken(.comma);
- const y = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
- _ = try self.parseOptionalToken(.comma);
- const width = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
- _ = try self.parseOptionalToken(.comma);
- const height = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
-
- var optional_param_parser = OptionalParamParser{ .parser = self };
- if (control != .control) {
- style = try optional_param_parser.parse(.{ .not_expression_allowed = true });
- }
-
- const exstyle: ?*Node = try optional_param_parser.parse(.{ .not_expression_allowed = true });
- const help_id: ?*Node = switch (resource) {
- .dialogex => try optional_param_parser.parse(.{}),
- else => null,
- };
-
- var extra_data: []*Node = &[_]*Node{};
- var extra_data_begin: ?Token = null;
- var extra_data_end: ?Token = null;
- // extra data is DIALOGEX-only
- if (resource == .dialogex and try self.parseOptionalToken(.begin)) {
- extra_data_begin = self.state.token;
- extra_data = try self.parseRawDataBlock();
- extra_data_end = self.state.token;
- }
-
- const node = try self.state.arena.create(Node.ControlStatement);
- node.* = .{
- .type = control_token,
- .text = text,
- .class = class,
- .id = id,
- .x = x,
- .y = y,
- .width = width,
- .height = height,
- .style = style,
- .exstyle = exstyle,
- .help_id = help_id,
- .extra_data_begin = extra_data_begin,
- .extra_data = extra_data,
- .extra_data_end = extra_data_end,
- };
- return &node.base;
- }
-
- fn parseToolbarButtonStatement(self: *Self) Error!?*Node {
- const keyword_token = try self.lookaheadToken(.normal);
- const button_type = rc.ToolbarButton.map.get(keyword_token.slice(self.lexer.buffer)) orelse return null;
- self.nextToken(.normal) catch unreachable;
-
- switch (button_type) {
- .separator => {
- const node = try self.state.arena.create(Node.Literal);
- node.* = .{
- .token = keyword_token,
- };
- return &node.base;
- },
- .button => {
- const button_id = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
-
- const node = try self.state.arena.create(Node.SimpleStatement);
- node.* = .{
- .identifier = keyword_token,
- .value = button_id,
- };
- return &node.base;
- },
- }
- }
-
- /// Expects the current token to be handled, and that the menuitem/popup statement will
- /// begin on the next token.
- /// After return, the current token will be the token immediately before the end of the
- /// menuitem statement (or unchanged if the function returns null).
- fn parseMenuItemStatement(self: *Self, resource: Resource, top_level_menu_id_token: Token, nesting_level: u32) Error!?*Node {
- const menuitem_token = try self.lookaheadToken(.normal);
- const menuitem = rc.MenuItem.map.get(menuitem_token.slice(self.lexer.buffer)) orelse return null;
- self.nextToken(.normal) catch unreachable;
-
- if (nesting_level > max_nested_menu_level) {
- try self.addErrorDetails(.{
- .err = .nested_resource_level_exceeds_max,
- .token = top_level_menu_id_token,
- .extra = .{ .resource = resource },
- });
- return self.addErrorDetailsAndFail(.{
- .err = .nested_resource_level_exceeds_max,
- .type = .note,
- .token = menuitem_token,
- .extra = .{ .resource = resource },
- });
- }
-
- switch (resource) {
- .menu => switch (menuitem) {
- .menuitem => {
- try self.nextToken(.normal);
- if (rc.MenuItem.isSeparator(self.state.token.slice(self.lexer.buffer))) {
- const separator_token = self.state.token;
- // There can be any number of trailing commas after SEPARATOR
- try self.skipAnyCommas();
- const node = try self.state.arena.create(Node.MenuItemSeparator);
- node.* = .{
- .menuitem = menuitem_token,
- .separator = separator_token,
- };
- return &node.base;
- } else {
- const text = self.state.token;
- if (!text.isStringLiteral()) {
- return self.addErrorDetailsAndFail(ErrorDetails{
- .err = .expected_something_else,
- .token = text,
- .extra = .{ .expected_types = .{
- .string_literal = true,
- } },
- });
- }
- try self.skipAnyCommas();
-
- const result = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
-
- _ = try self.parseOptionalToken(.comma);
-
- var options = std.ArrayListUnmanaged(Token){};
- while (true) {
- const option_token = try self.lookaheadToken(.normal);
- if (!rc.MenuItem.Option.map.has(option_token.slice(self.lexer.buffer))) {
- break;
- }
- self.nextToken(.normal) catch unreachable;
- try options.append(self.state.arena, option_token);
- try self.skipAnyCommas();
- }
-
- const node = try self.state.arena.create(Node.MenuItem);
- node.* = .{
- .menuitem = menuitem_token,
- .text = text,
- .result = result,
- .option_list = try options.toOwnedSlice(self.state.arena),
- };
- return &node.base;
- }
- },
- .popup => {
- try self.nextToken(.normal);
- const text = self.state.token;
- if (!text.isStringLiteral()) {
- return self.addErrorDetailsAndFail(ErrorDetails{
- .err = .expected_something_else,
- .token = text,
- .extra = .{ .expected_types = .{
- .string_literal = true,
- } },
- });
- }
- try self.skipAnyCommas();
-
- var options = std.ArrayListUnmanaged(Token){};
- while (true) {
- const option_token = try self.lookaheadToken(.normal);
- if (!rc.MenuItem.Option.map.has(option_token.slice(self.lexer.buffer))) {
- break;
- }
- self.nextToken(.normal) catch unreachable;
- try options.append(self.state.arena, option_token);
- try self.skipAnyCommas();
- }
-
- try self.nextToken(.normal);
- const begin_token = self.state.token;
- try self.check(.begin);
-
- var items = std.ArrayListUnmanaged(*Node){};
- while (try self.parseMenuItemStatement(resource, top_level_menu_id_token, nesting_level + 1)) |item_node| {
- try items.append(self.state.arena, item_node);
- }
-
- try self.nextToken(.normal);
- const end_token = self.state.token;
- try self.check(.end);
-
- if (items.items.len == 0) {
- return self.addErrorDetailsAndFail(.{
- .err = .empty_menu_not_allowed,
- .token = menuitem_token,
- });
- }
-
- const node = try self.state.arena.create(Node.Popup);
- node.* = .{
- .popup = menuitem_token,
- .text = text,
- .option_list = try options.toOwnedSlice(self.state.arena),
- .begin_token = begin_token,
- .items = try items.toOwnedSlice(self.state.arena),
- .end_token = end_token,
- };
- return &node.base;
- },
- },
- .menuex => {
- try self.nextToken(.normal);
- const text = self.state.token;
- if (!text.isStringLiteral()) {
- return self.addErrorDetailsAndFail(ErrorDetails{
- .err = .expected_something_else,
- .token = text,
- .extra = .{ .expected_types = .{
- .string_literal = true,
- } },
- });
- }
-
- var param_parser = OptionalParamParser{ .parser = self };
- const id = try param_parser.parse(.{});
- const item_type = try param_parser.parse(.{});
- const state = try param_parser.parse(.{});
-
- if (menuitem == .menuitem) {
- // trailing comma is allowed, skip it
- _ = try self.parseOptionalToken(.comma);
-
- const node = try self.state.arena.create(Node.MenuItemEx);
- node.* = .{
- .menuitem = menuitem_token,
- .text = text,
- .id = id,
- .type = item_type,
- .state = state,
- };
- return &node.base;
- }
-
- const help_id = try param_parser.parse(.{});
-
- // trailing comma is allowed, skip it
- _ = try self.parseOptionalToken(.comma);
-
- try self.nextToken(.normal);
- const begin_token = self.state.token;
- try self.check(.begin);
-
- var items = std.ArrayListUnmanaged(*Node){};
- while (try self.parseMenuItemStatement(resource, top_level_menu_id_token, nesting_level + 1)) |item_node| {
- try items.append(self.state.arena, item_node);
- }
-
- try self.nextToken(.normal);
- const end_token = self.state.token;
- try self.check(.end);
-
- if (items.items.len == 0) {
- return self.addErrorDetailsAndFail(.{
- .err = .empty_menu_not_allowed,
- .token = menuitem_token,
- });
- }
-
- const node = try self.state.arena.create(Node.PopupEx);
- node.* = .{
- .popup = menuitem_token,
- .text = text,
- .id = id,
- .type = item_type,
- .state = state,
- .help_id = help_id,
- .begin_token = begin_token,
- .items = try items.toOwnedSlice(self.state.arena),
- .end_token = end_token,
- };
- return &node.base;
- },
- else => unreachable,
- }
- @compileError("unreachable");
- }
-
- pub const OptionalParamParser = struct {
- finished: bool = false,
- parser: *Self,
-
- pub const Options = struct {
- not_expression_allowed: bool = false,
- };
-
- pub fn parse(self: *OptionalParamParser, options: OptionalParamParser.Options) Error!?*Node {
- if (self.finished) return null;
- if (!(try self.parser.parseOptionalToken(.comma))) {
- self.finished = true;
- return null;
- }
- // If the next lookahead token could be part of a number expression,
- // then parse it. Otherwise, treat it as an 'empty' expression and
- // continue parsing, since 'empty' values are allowed.
- if (try self.parser.lookaheadCouldBeNumberExpression(switch (options.not_expression_allowed) {
- true => .not_allowed,
- false => .not_disallowed,
- })) {
- const node = try self.parser.parseExpression(.{
- .allowed_types = .{ .number = true },
- .can_contain_not_expressions = options.not_expression_allowed,
- });
- return node;
- }
- return null;
- }
- };
-
- /// Expects the current token to be handled, and that the version statement will
- /// begin on the next token.
- /// After return, the current token will be the token immediately before the end of the
- /// version statement (or unchanged if the function returns null).
- fn parseVersionStatement(self: *Self) Error!?*Node {
- const type_token = try self.lookaheadToken(.normal);
- const statement_type = rc.VersionInfo.map.get(type_token.slice(self.lexer.buffer)) orelse return null;
- self.nextToken(.normal) catch unreachable;
- switch (statement_type) {
- .file_version, .product_version => {
- var parts_buffer: [4]*Node = undefined;
- var parts = std.ArrayListUnmanaged(*Node).initBuffer(&parts_buffer);
-
- while (true) {
- const value = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
- parts.addOneAssumeCapacity().* = value;
-
- if (parts.unusedCapacitySlice().len == 0 or
- !(try self.parseOptionalToken(.comma)))
- {
- break;
- }
- }
-
- const node = try self.state.arena.create(Node.VersionStatement);
- node.* = .{
- .type = type_token,
- .parts = try self.state.arena.dupe(*Node, parts.items),
- };
- return &node.base;
- },
- else => {
- const value = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
-
- const node = try self.state.arena.create(Node.SimpleStatement);
- node.* = .{
- .identifier = type_token,
- .value = value,
- };
- return &node.base;
- },
- }
- }
-
- /// Expects the current token to be handled, and that the version BLOCK/VALUE will
- /// begin on the next token.
- /// After return, the current token will be the token immediately before the end of the
- /// version BLOCK/VALUE (or unchanged if the function returns null).
- fn parseVersionBlockOrValue(self: *Self, top_level_version_id_token: Token, nesting_level: u32) Error!?*Node {
- const keyword_token = try self.lookaheadToken(.normal);
- const keyword = rc.VersionBlock.map.get(keyword_token.slice(self.lexer.buffer)) orelse return null;
- self.nextToken(.normal) catch unreachable;
-
- if (nesting_level > max_nested_version_level) {
- try self.addErrorDetails(.{
- .err = .nested_resource_level_exceeds_max,
- .token = top_level_version_id_token,
- .extra = .{ .resource = .versioninfo },
- });
- return self.addErrorDetailsAndFail(.{
- .err = .nested_resource_level_exceeds_max,
- .type = .note,
- .token = keyword_token,
- .extra = .{ .resource = .versioninfo },
- });
- }
-
- try self.nextToken(.normal);
- const key = self.state.token;
- if (!key.isStringLiteral()) {
- return self.addErrorDetailsAndFail(.{
- .err = .expected_something_else,
- .token = key,
- .extra = .{ .expected_types = .{
- .string_literal = true,
- } },
- });
- }
- // Need to keep track of this to detect a potential miscompilation when
- // the comma is omitted and the first value is a quoted string.
- const had_comma_before_first_value = try self.parseOptionalToken(.comma);
- try self.skipAnyCommas();
-
- const values = try self.parseBlockValuesList(had_comma_before_first_value);
-
- switch (keyword) {
- .block => {
- try self.nextToken(.normal);
- const begin_token = self.state.token;
- try self.check(.begin);
-
- var children = std.ArrayListUnmanaged(*Node){};
- while (try self.parseVersionBlockOrValue(top_level_version_id_token, nesting_level + 1)) |value_node| {
- try children.append(self.state.arena, value_node);
- }
-
- try self.nextToken(.normal);
- const end_token = self.state.token;
- try self.check(.end);
-
- const node = try self.state.arena.create(Node.Block);
- node.* = .{
- .identifier = keyword_token,
- .key = key,
- .values = values,
- .begin_token = begin_token,
- .children = try children.toOwnedSlice(self.state.arena),
- .end_token = end_token,
- };
- return &node.base;
- },
- .value => {
- const node = try self.state.arena.create(Node.BlockValue);
- node.* = .{
- .identifier = keyword_token,
- .key = key,
- .values = values,
- };
- return &node.base;
- },
- }
- }
-
- fn parseBlockValuesList(self: *Self, had_comma_before_first_value: bool) Error![]*Node {
- var values = std.ArrayListUnmanaged(*Node){};
- var seen_number: bool = false;
- var first_string_value: ?*Node = null;
- while (true) {
- const lookahead_token = try self.lookaheadToken(.normal);
- switch (lookahead_token.id) {
- .operator,
- .number,
- .open_paren,
- .quoted_ascii_string,
- .quoted_wide_string,
- => {},
- else => break,
- }
- const value = try self.parseExpression(.{});
-
- if (value.isNumberExpression()) {
- seen_number = true;
- } else if (first_string_value == null) {
- std.debug.assert(value.isStringLiteral());
- first_string_value = value;
- }
-
- const has_trailing_comma = try self.parseOptionalToken(.comma);
- try self.skipAnyCommas();
-
- const value_value = try self.state.arena.create(Node.BlockValueValue);
- value_value.* = .{
- .expression = value,
- .trailing_comma = has_trailing_comma,
- };
- try values.append(self.state.arena, &value_value.base);
- }
- if (seen_number and first_string_value != null) {
- // The Win32 RC compiler does some strange stuff with the data size:
- // Strings are counted as UTF-16 code units including the null-terminator
- // Numbers are counted as their byte lengths
- // So, when both strings and numbers are within a single value,
- // it incorrectly sets the value's type as binary, but then gives the
- // data length as a mixture of bytes and UTF-16 code units. This means that
- // when the length is read, it will be treated as byte length and will
- // not read the full value. We don't reproduce this behavior, so we warn
- // of the miscompilation here.
- try self.addErrorDetails(.{
- .err = .rc_would_miscompile_version_value_byte_count,
- .type = .warning,
- .token = first_string_value.?.getFirstToken(),
- .token_span_start = values.items[0].getFirstToken(),
- .token_span_end = values.items[values.items.len - 1].getLastToken(),
- });
- try self.addErrorDetails(.{
- .err = .rc_would_miscompile_version_value_byte_count,
- .type = .note,
- .token = first_string_value.?.getFirstToken(),
- .token_span_start = values.items[0].getFirstToken(),
- .token_span_end = values.items[values.items.len - 1].getLastToken(),
- .print_source_line = false,
- });
- }
- if (!had_comma_before_first_value and values.items.len > 0 and values.items[0].cast(.block_value_value).?.expression.isStringLiteral()) {
- const token = values.items[0].cast(.block_value_value).?.expression.cast(.literal).?.token;
- try self.addErrorDetails(.{
- .err = .rc_would_miscompile_version_value_padding,
- .type = .warning,
- .token = token,
- });
- try self.addErrorDetails(.{
- .err = .rc_would_miscompile_version_value_padding,
- .type = .note,
- .token = token,
- .print_source_line = false,
- });
- }
- return values.toOwnedSlice(self.state.arena);
- }
-
- fn numberExpressionContainsAnyLSuffixes(expression_node: *Node, source: []const u8, code_page_lookup: *const CodePageLookup) bool {
- // TODO: This could probably be done without evaluating the whole expression
- return Compiler.evaluateNumberExpression(expression_node, source, code_page_lookup).is_long;
- }
-
- /// Expects the current token to be a literal token that contains the string LANGUAGE
- fn parseLanguageStatement(self: *Self) Error!*Node {
- const language_token = self.state.token;
-
- const primary_language = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
-
- try self.nextToken(.normal);
- try self.check(.comma);
-
- const sublanguage = try self.parseExpression(.{ .allowed_types = .{ .number = true } });
-
- // The Win32 RC compiler errors if either parameter contains any number with an L
- // suffix. Instead of that, we want to warn and then let the values get truncated.
- // The warning is done here to allow the compiler logic to not have to deal with this.
- if (numberExpressionContainsAnyLSuffixes(primary_language, self.lexer.buffer, &self.state.input_code_page_lookup)) {
- try self.addErrorDetails(.{
- .err = .rc_would_error_u16_with_l_suffix,
- .type = .warning,
- .token = primary_language.getFirstToken(),
- .token_span_end = primary_language.getLastToken(),
- .extra = .{ .statement_with_u16_param = .language },
- });
- try self.addErrorDetails(.{
- .err = .rc_would_error_u16_with_l_suffix,
- .print_source_line = false,
- .type = .note,
- .token = primary_language.getFirstToken(),
- .token_span_end = primary_language.getLastToken(),
- .extra = .{ .statement_with_u16_param = .language },
- });
- }
- if (numberExpressionContainsAnyLSuffixes(sublanguage, self.lexer.buffer, &self.state.input_code_page_lookup)) {
- try self.addErrorDetails(.{
- .err = .rc_would_error_u16_with_l_suffix,
- .type = .warning,
- .token = sublanguage.getFirstToken(),
- .token_span_end = sublanguage.getLastToken(),
- .extra = .{ .statement_with_u16_param = .language },
- });
- try self.addErrorDetails(.{
- .err = .rc_would_error_u16_with_l_suffix,
- .print_source_line = false,
- .type = .note,
- .token = sublanguage.getFirstToken(),
- .token_span_end = sublanguage.getLastToken(),
- .extra = .{ .statement_with_u16_param = .language },
- });
- }
-
- const node = try self.state.arena.create(Node.LanguageStatement);
- node.* = .{
- .language_token = language_token,
- .primary_language_id = primary_language,
- .sublanguage_id = sublanguage,
- };
- return &node.base;
- }
-
- pub const ParseExpressionOptions = struct {
- is_known_to_be_number_expression: bool = false,
- can_contain_not_expressions: bool = false,
- nesting_context: NestingContext = .{},
- allowed_types: AllowedTypes = .{ .literal = true, .number = true, .string = true },
- expected_types_override: ?ErrorDetails.ExpectedTypes = null,
-
- pub const AllowedTypes = struct {
- literal: bool = false,
- number: bool = false,
- string: bool = false,
- };
-
- pub const NestingContext = struct {
- first_token: ?Token = null,
- last_token: ?Token = null,
- level: u32 = 0,
-
- /// Returns a new NestingContext with values modified appropriately for an increased nesting level
- fn incremented(ctx: NestingContext, first_token: Token, most_recent_token: Token) NestingContext {
- return .{
- .first_token = ctx.first_token orelse first_token,
- .last_token = most_recent_token,
- .level = ctx.level + 1,
- };
- }
- };
-
- pub fn toErrorDetails(options: ParseExpressionOptions, token: Token) ErrorDetails {
- // TODO: expected_types_override interaction with is_known_to_be_number_expression?
- const expected_types = options.expected_types_override orelse ErrorDetails.ExpectedTypes{
- .number = options.allowed_types.number,
- .number_expression = options.allowed_types.number,
- .string_literal = options.allowed_types.string and !options.is_known_to_be_number_expression,
- .literal = options.allowed_types.literal and !options.is_known_to_be_number_expression,
- };
- return ErrorDetails{
- .err = .expected_something_else,
- .token = token,
- .extra = .{ .expected_types = expected_types },
- };
- }
- };
-
- /// Returns true if the next lookahead token is a number or could be the start of a number expression.
- /// Only useful when looking for empty expressions in optional fields.
- fn lookaheadCouldBeNumberExpression(self: *Self, not_allowed: enum { not_allowed, not_disallowed }) Error!bool {
- var lookahead_token = try self.lookaheadToken(.normal);
- switch (lookahead_token.id) {
- .literal => if (not_allowed == .not_allowed) {
- return std.ascii.eqlIgnoreCase("NOT", lookahead_token.slice(self.lexer.buffer));
- } else return false,
- .number => return true,
- .open_paren => return true,
- .operator => {
- // + can be a unary operator, see parseExpression's handling of unary +
- const operator_char = lookahead_token.slice(self.lexer.buffer)[0];
- return operator_char == '+';
- },
- else => return false,
- }
- }
-
- fn parsePrimary(self: *Self, options: ParseExpressionOptions) Error!*Node {
- try self.nextToken(.normal);
- const first_token = self.state.token;
- var is_close_paren_expression = false;
- var is_unary_plus_expression = false;
- switch (self.state.token.id) {
- .quoted_ascii_string, .quoted_wide_string => {
- if (!options.allowed_types.string) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token));
- const node = try self.state.arena.create(Node.Literal);
- node.* = .{ .token = self.state.token };
- return &node.base;
- },
- .literal => {
- if (options.can_contain_not_expressions and std.ascii.eqlIgnoreCase("NOT", self.state.token.slice(self.lexer.buffer))) {
- const not_token = self.state.token;
- try self.nextToken(.normal);
- try self.check(.number);
- if (!options.allowed_types.number) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token));
- const node = try self.state.arena.create(Node.NotExpression);
- node.* = .{
- .not_token = not_token,
- .number_token = self.state.token,
- };
- return &node.base;
- }
- if (!options.allowed_types.literal) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token));
- const node = try self.state.arena.create(Node.Literal);
- node.* = .{ .token = self.state.token };
- return &node.base;
- },
- .number => {
- if (!options.allowed_types.number) return self.addErrorDetailsAndFail(options.toErrorDetails(self.state.token));
- const node = try self.state.arena.create(Node.Literal);
- node.* = .{ .token = self.state.token };
- return &node.base;
- },
- .open_paren => {
- const open_paren_token = self.state.token;
-
- const expression = try self.parseExpression(.{
- .is_known_to_be_number_expression = true,
- .can_contain_not_expressions = options.can_contain_not_expressions,
- .nesting_context = options.nesting_context.incremented(first_token, open_paren_token),
- .allowed_types = .{ .number = true },
- });
-
- try self.nextToken(.normal);
- // TODO: Add context to error about where the open paren is
- try self.check(.close_paren);
-
- if (!options.allowed_types.number) return self.addErrorDetailsAndFail(options.toErrorDetails(open_paren_token));
- const node = try self.state.arena.create(Node.GroupedExpression);
- node.* = .{
- .open_token = open_paren_token,
- .expression = expression,
- .close_token = self.state.token,
- };
- return &node.base;
- },
- .close_paren => {
- // Note: In the Win32 implementation, a single close paren
- // counts as a valid "expression", but only when its the first and
- // only token in the expression. Such an expression is then treated
- // as a 'skip this expression' instruction. For example:
- // 1 RCDATA { 1, ), ), ), 2 }
- // will be evaluated as if it were `1 RCDATA { 1, 2 }` and only
- // 0x0001 and 0x0002 will be written to the .res data.
- //
- // This behavior is not emulated because it almost certainly has
- // no valid use cases and only introduces edge cases that are
- // not worth the effort to track down and deal with. Instead,
- // we error but also add a note about the Win32 RC behavior if
- // this edge case is detected.
- if (!options.is_known_to_be_number_expression) {
- is_close_paren_expression = true;
- }
- },
- .operator => {
- // In the Win32 implementation, something akin to a unary +
- // is allowed but it doesn't behave exactly like a unary +.
- // Instead of emulating the Win32 behavior, we instead error
- // and add a note about unary plus not being allowed.
- //
- // This is done because unary + only works in some places,
- // and there's no real use-case for it since it's so limited
- // in how it can be used (e.g. +1 is accepted but (+1) will error)
- //
- // Even understanding when unary plus is allowed is difficult, so
- // we don't do any fancy detection of when the Win32 RC compiler would
- // allow a unary + and instead just output the note in all cases.
- //
- // Some examples of allowed expressions by the Win32 compiler:
- // +1
- // 0|+5
- // +1+2
- // +~-5
- // +(1)
- //
- // Some examples of disallowed expressions by the Win32 compiler:
- // (+1)
- // ++5
- //
- // TODO: Potentially re-evaluate and support the unary plus in a bug-for-bug
- // compatible way.
- const operator_char = self.state.token.slice(self.lexer.buffer)[0];
- if (operator_char == '+') {
- is_unary_plus_expression = true;
- }
- },
- else => {},
- }
-
- try self.addErrorDetails(options.toErrorDetails(self.state.token));
- if (is_close_paren_expression) {
- try self.addErrorDetails(ErrorDetails{
- .err = .close_paren_expression,
- .type = .note,
- .token = self.state.token,
- .print_source_line = false,
- });
- }
- if (is_unary_plus_expression) {
- try self.addErrorDetails(ErrorDetails{
- .err = .unary_plus_expression,
- .type = .note,
- .token = self.state.token,
- .print_source_line = false,
- });
- }
- return error.ParseError;
- }
-
- /// Expects the current token to have already been dealt with, and that the
- /// expression will start on the next token.
- /// After return, the current token will have been dealt with.
- fn parseExpression(self: *Self, options: ParseExpressionOptions) Error!*Node {
- if (options.nesting_context.level > max_nested_expression_level) {
- try self.addErrorDetails(.{
- .err = .nested_expression_level_exceeds_max,
- .token = options.nesting_context.first_token.?,
- });
- return self.addErrorDetailsAndFail(.{
- .err = .nested_expression_level_exceeds_max,
- .type = .note,
- .token = options.nesting_context.last_token.?,
- });
- }
- var expr: *Node = try self.parsePrimary(options);
- const first_token = expr.getFirstToken();
-
- // Non-number expressions can't have operators, so we can just return
- if (!expr.isNumberExpression()) return expr;
-
- while (try self.parseOptionalTokenAdvanced(.operator, .normal_expect_operator)) {
- const operator = self.state.token;
- const rhs_node = try self.parsePrimary(.{
- .is_known_to_be_number_expression = true,
- .can_contain_not_expressions = options.can_contain_not_expressions,
- .nesting_context = options.nesting_context.incremented(first_token, operator),
- .allowed_types = options.allowed_types,
- });
-
- if (!rhs_node.isNumberExpression()) {
- return self.addErrorDetailsAndFail(ErrorDetails{
- .err = .expected_something_else,
- .token = rhs_node.getFirstToken(),
- .token_span_end = rhs_node.getLastToken(),
- .extra = .{ .expected_types = .{
- .number = true,
- .number_expression = true,
- } },
- });
- }
-
- const node = try self.state.arena.create(Node.BinaryExpression);
- node.* = .{
- .left = expr,
- .operator = operator,
- .right = rhs_node,
- };
- expr = &node.base;
- }
-
- return expr;
- }
-
- /// Skips any amount of commas (including zero)
- /// In other words, it will skip the regex `,*`
- /// Assumes the token(s) should be parsed with `.normal` as the method.
- fn skipAnyCommas(self: *Self) !void {
- while (try self.parseOptionalToken(.comma)) {}
- }
-
- /// Advances the current token only if the token's id matches the specified `id`.
- /// Assumes the token should be parsed with `.normal` as the method.
- /// Returns true if the token matched, false otherwise.
- fn parseOptionalToken(self: *Self, id: Token.Id) Error!bool {
- return self.parseOptionalTokenAdvanced(id, .normal);
- }
-
- /// Advances the current token only if the token's id matches the specified `id`.
- /// Returns true if the token matched, false otherwise.
- fn parseOptionalTokenAdvanced(self: *Self, id: Token.Id, comptime method: Lexer.LexMethod) Error!bool {
- const maybe_token = try self.lookaheadToken(method);
- if (maybe_token.id != id) return false;
- self.nextToken(method) catch unreachable;
- return true;
- }
-
- fn addErrorDetails(self: *Self, details: ErrorDetails) Allocator.Error!void {
- try self.state.diagnostics.append(details);
- }
-
- fn addErrorDetailsAndFail(self: *Self, details: ErrorDetails) Error {
- try self.addErrorDetails(details);
- return error.ParseError;
- }
-
- fn nextToken(self: *Self, comptime method: Lexer.LexMethod) Error!void {
- self.state.token = token: while (true) {
- const token = self.lexer.next(method) catch |err| switch (err) {
- error.CodePagePragmaInIncludedFile => {
- // The Win32 RC compiler silently ignores such `#pragma code_point` directives,
- // but we want to both ignore them *and* emit a warning
- try self.addErrorDetails(.{
- .err = .code_page_pragma_in_included_file,
- .type = .warning,
- .token = self.lexer.error_context_token.?,
- });
- continue;
- },
- error.CodePagePragmaInvalidCodePage => {
- var details = self.lexer.getErrorDetails(err);
- if (!self.options.warn_instead_of_error_on_invalid_code_page) {
- return self.addErrorDetailsAndFail(details);
- }
- details.type = .warning;
- try self.addErrorDetails(details);
- continue;
- },
- error.InvalidDigitCharacterInNumberLiteral => {
- const details = self.lexer.getErrorDetails(err);
- try self.addErrorDetails(details);
- return self.addErrorDetailsAndFail(.{
- .err = details.err,
- .type = .note,
- .token = details.token,
- .print_source_line = false,
- });
- },
- else => return self.addErrorDetailsAndFail(self.lexer.getErrorDetails(err)),
- };
- break :token token;
- };
- // After every token, set the input code page for its line
- try self.state.input_code_page_lookup.setForToken(self.state.token, self.lexer.current_code_page);
- // But only set the output code page to the current code page if we are past the first code_page pragma in the file.
- // Otherwise, we want to fill the lookup using the default code page so that lookups still work for lines that
- // don't have an explicit output code page set.
- const output_code_page = if (self.lexer.seen_pragma_code_pages > 1) self.lexer.current_code_page else self.state.output_code_page_lookup.default_code_page;
- try self.state.output_code_page_lookup.setForToken(self.state.token, output_code_page);
- }
-
- fn lookaheadToken(self: *Self, comptime method: Lexer.LexMethod) Error!Token {
- self.state.lookahead_lexer = self.lexer.*;
- return token: while (true) {
- break :token self.state.lookahead_lexer.next(method) catch |err| switch (err) {
- // Ignore this error and get the next valid token, we'll deal with this
- // properly when getting the token for real
- error.CodePagePragmaInIncludedFile => continue,
- else => return self.addErrorDetailsAndFail(self.state.lookahead_lexer.getErrorDetails(err)),
- };
- };
- }
-
- fn tokenSlice(self: *Self) []const u8 {
- return self.state.token.slice(self.lexer.buffer);
- }
-
- /// Check that the current token is something that can be used as an ID
- fn checkId(self: *Self) !void {
- switch (self.state.token.id) {
- .literal => {},
- else => {
- return self.addErrorDetailsAndFail(ErrorDetails{
- .err = .expected_token,
- .token = self.state.token,
- .extra = .{ .expected = .literal },
- });
- },
- }
- }
-
- fn check(self: *Self, expected_token_id: Token.Id) !void {
- if (self.state.token.id != expected_token_id) {
- return self.addErrorDetailsAndFail(ErrorDetails{
- .err = .expected_token,
- .token = self.state.token,
- .extra = .{ .expected = expected_token_id },
- });
- }
- }
-
- fn checkResource(self: *Self) !Resource {
- switch (self.state.token.id) {
- .literal => return Resource.fromString(.{
- .slice = self.state.token.slice(self.lexer.buffer),
- .code_page = self.lexer.current_code_page,
- }),
- else => {
- return self.addErrorDetailsAndFail(ErrorDetails{
- .err = .expected_token,
- .token = self.state.token,
- .extra = .{ .expected = .literal },
- });
- },
- }
- }
-};
diff --git a/src/resinator/preprocess.zig b/src/resinator/preprocess.zig
@@ -1,100 +0,0 @@
-const std = @import("std");
-const builtin = @import("builtin");
-const Allocator = std.mem.Allocator;
-const cli = @import("cli.zig");
-
-pub const IncludeArgs = struct {
- clang_target: ?[]const u8 = null,
- system_include_paths: []const []const u8,
- /// Should be set to `true` when -target has the GNU abi
- /// (either because `clang_target` has `-gnu` or `-target`
- /// is appended via other means and it has `-gnu`)
- needs_gnu_workaround: bool = false,
- nostdinc: bool = false,
-
- pub const IncludeAbi = enum {
- msvc,
- gnu,
- };
-};
-
-/// `arena` is used for temporary -D argument strings and the INCLUDE environment variable.
-/// The arena should be kept alive at least as long as `argv`.
-pub fn appendClangArgs(arena: Allocator, argv: *std.ArrayList([]const u8), options: cli.Options, include_args: IncludeArgs) !void {
- try argv.appendSlice(&[_][]const u8{
- "-E", // preprocessor only
- "--comments",
- "-fuse-line-directives", // #line <num> instead of # <num>
- // TODO: could use --trace-includes to give info about what's included from where
- "-xc", // output c
- // TODO: Turn this off, check the warnings, and convert the spaces back to NUL
- "-Werror=null-character", // error on null characters instead of converting them to spaces
- // TODO: could remove -Werror=null-character and instead parse warnings looking for 'warning: null character ignored'
- // since the only real problem is when clang doesn't preserve null characters
- //"-Werror=invalid-pp-token", // will error on unfinished string literals
- // TODO: could use -Werror instead
- "-fms-compatibility", // Allow things like "header.h" to be resolved relative to the 'root' .rc file, among other things
- // https://learn.microsoft.com/en-us/windows/win32/menurc/predefined-macros
- "-DRC_INVOKED",
- });
- for (options.extra_include_paths.items) |extra_include_path| {
- try argv.append("-I");
- try argv.append(extra_include_path);
- }
-
- if (include_args.nostdinc) {
- try argv.append("-nostdinc");
- }
- for (include_args.system_include_paths) |include_path| {
- try argv.append("-isystem");
- try argv.append(include_path);
- }
- if (include_args.clang_target) |target| {
- try argv.append("-target");
- try argv.append(target);
- }
- // Using -fms-compatibility and targeting the GNU abi interact in a strange way:
- // - Targeting the GNU abi stops _MSC_VER from being defined
- // - Passing -fms-compatibility stops __GNUC__ from being defined
- // Neither being defined is a problem for things like MinGW's vadefs.h,
- // which will fail during preprocessing if neither are defined.
- // So, when targeting the GNU abi, we need to force __GNUC__ to be defined.
- //
- // TODO: This is a workaround that should be removed if possible.
- if (include_args.needs_gnu_workaround) {
- // This is the same default gnuc version that Clang uses:
- // https://github.com/llvm/llvm-project/blob/4b5366c9512aa273a5272af1d833961e1ed156e7/clang/lib/Driver/ToolChains/Clang.cpp#L6738
- try argv.append("-fgnuc-version=4.2.1");
- }
-
- if (!options.ignore_include_env_var) {
- const INCLUDE = std.process.getEnvVarOwned(arena, "INCLUDE") catch "";
-
- // The only precedence here is llvm-rc which also uses the platform-specific
- // delimiter. There's no precedence set by `rc.exe` since it's Windows-only.
- const delimiter = switch (builtin.os.tag) {
- .windows => ';',
- else => ':',
- };
- var it = std.mem.tokenizeScalar(u8, INCLUDE, delimiter);
- while (it.next()) |include_path| {
- try argv.append("-isystem");
- try argv.append(include_path);
- }
- }
-
- var symbol_it = options.symbols.iterator();
- while (symbol_it.next()) |entry| {
- switch (entry.value_ptr.*) {
- .define => |value| {
- try argv.append("-D");
- const define_arg = try std.fmt.allocPrint(arena, "{s}={s}", .{ entry.key_ptr.*, value });
- try argv.append(define_arg);
- },
- .undefine => {
- try argv.append("-U");
- try argv.append(entry.key_ptr.*);
- },
- }
- }
-}
diff --git a/src/resinator/res.zig b/src/resinator/res.zig
@@ -1,1107 +0,0 @@
-const std = @import("std");
-const rc = @import("rc.zig");
-const Resource = rc.Resource;
-const CommonResourceAttributes = rc.CommonResourceAttributes;
-const Allocator = std.mem.Allocator;
-const windows1252 = @import("windows1252.zig");
-const CodePage = @import("code_pages.zig").CodePage;
-const literals = @import("literals.zig");
-const SourceBytes = literals.SourceBytes;
-const Codepoint = @import("code_pages.zig").Codepoint;
-const lang = @import("lang.zig");
-const isNonAsciiDigit = @import("utils.zig").isNonAsciiDigit;
-
-/// https://learn.microsoft.com/en-us/windows/win32/menurc/resource-types
-pub const RT = enum(u8) {
- ACCELERATOR = 9,
- ANICURSOR = 21,
- ANIICON = 22,
- BITMAP = 2,
- CURSOR = 1,
- DIALOG = 5,
- DLGINCLUDE = 17,
- DLGINIT = 240,
- FONT = 8,
- FONTDIR = 7,
- GROUP_CURSOR = 1 + 11, // CURSOR + 11
- GROUP_ICON = 3 + 11, // ICON + 11
- HTML = 23,
- ICON = 3,
- MANIFEST = 24,
- MENU = 4,
- MESSAGETABLE = 11,
- PLUGPLAY = 19,
- RCDATA = 10,
- STRING = 6,
- TOOLBAR = 241,
- VERSION = 16,
- VXD = 20,
- _,
-
- /// Returns null if the resource type is user-defined
- /// Asserts that the resource is not `stringtable`
- pub fn fromResource(resource: Resource) ?RT {
- return switch (resource) {
- .accelerators => .ACCELERATOR,
- .bitmap => .BITMAP,
- .cursor => .GROUP_CURSOR,
- .dialog => .DIALOG,
- .dialogex => .DIALOG,
- .dlginclude => .DLGINCLUDE,
- .dlginit => .DLGINIT,
- .font => .FONT,
- .html => .HTML,
- .icon => .GROUP_ICON,
- .menu => .MENU,
- .menuex => .MENU,
- .messagetable => .MESSAGETABLE,
- .plugplay => .PLUGPLAY,
- .rcdata => .RCDATA,
- .stringtable => unreachable,
- .toolbar => .TOOLBAR,
- .user_defined => null,
- .versioninfo => .VERSION,
- .vxd => .VXD,
-
- .cursor_num => .CURSOR,
- .icon_num => .ICON,
- .string_num => .STRING,
- .anicursor_num => .ANICURSOR,
- .aniicon_num => .ANIICON,
- .fontdir_num => .FONTDIR,
- .manifest_num => .MANIFEST,
- };
- }
-};
-
-/// https://learn.microsoft.com/en-us/windows/win32/menurc/common-resource-attributes
-/// https://learn.microsoft.com/en-us/windows/win32/menurc/resourceheader
-pub const MemoryFlags = packed struct(u16) {
- value: u16,
-
- pub const MOVEABLE: u16 = 0x10;
- // TODO: SHARED and PURE seem to be the same thing? Testing seems to confirm this but
- // would like to find mention of it somewhere.
- pub const SHARED: u16 = 0x20;
- pub const PURE: u16 = 0x20;
- pub const PRELOAD: u16 = 0x40;
- pub const DISCARDABLE: u16 = 0x1000;
-
- /// Note: The defaults can have combinations that are not possible to specify within
- /// an .rc file, as the .rc attributes imply other values (i.e. specifying
- /// DISCARDABLE always implies MOVEABLE and PURE/SHARED, and yet RT_ICON
- /// has a default of only MOVEABLE | DISCARDABLE).
- pub fn defaults(predefined_resource_type: ?RT) MemoryFlags {
- if (predefined_resource_type == null) {
- return MemoryFlags{ .value = MOVEABLE | SHARED };
- } else {
- return switch (predefined_resource_type.?) {
- // zig fmt: off
- .RCDATA, .BITMAP, .HTML, .MANIFEST,
- .ACCELERATOR, .VERSION, .MESSAGETABLE,
- .DLGINIT, .TOOLBAR, .PLUGPLAY,
- .VXD, => MemoryFlags{ .value = MOVEABLE | SHARED },
-
- .GROUP_ICON, .GROUP_CURSOR,
- .STRING, .FONT, .DIALOG, .MENU,
- .DLGINCLUDE, => MemoryFlags{ .value = MOVEABLE | SHARED | DISCARDABLE },
-
- .ICON, .CURSOR, .ANIICON, .ANICURSOR => MemoryFlags{ .value = MOVEABLE | DISCARDABLE },
- .FONTDIR => MemoryFlags{ .value = MOVEABLE | PRELOAD },
- // zig fmt: on
- // Same as predefined_resource_type == null
- _ => return MemoryFlags{ .value = MOVEABLE | SHARED },
- };
- }
- }
-
- pub fn set(self: *MemoryFlags, attribute: CommonResourceAttributes) void {
- switch (attribute) {
- .preload => self.value |= PRELOAD,
- .loadoncall => self.value &= ~PRELOAD,
- .moveable => self.value |= MOVEABLE,
- .fixed => self.value &= ~(MOVEABLE | DISCARDABLE),
- .shared => self.value |= SHARED,
- .nonshared => self.value &= ~(SHARED | DISCARDABLE),
- .pure => self.value |= PURE,
- .impure => self.value &= ~(PURE | DISCARDABLE),
- .discardable => self.value |= DISCARDABLE | MOVEABLE | PURE,
- }
- }
-
- pub fn setGroup(self: *MemoryFlags, attribute: CommonResourceAttributes, implied_shared_or_pure: bool) void {
- switch (attribute) {
- .preload => {
- self.value |= PRELOAD;
- if (implied_shared_or_pure) self.value &= ~SHARED;
- },
- .loadoncall => {
- self.value &= ~PRELOAD;
- if (implied_shared_or_pure) self.value |= SHARED;
- },
- else => self.set(attribute),
- }
- }
-};
-
-/// https://learn.microsoft.com/en-us/windows/win32/intl/language-identifiers
-pub const Language = packed struct(u16) {
- // Note: This is the default no matter what locale the current system is set to,
- // e.g. even if the system's locale is en-GB, en-US will still be the
- // default language for resources in the Win32 rc compiler.
- primary_language_id: u10 = lang.LANG_ENGLISH,
- sublanguage_id: u6 = lang.SUBLANG_ENGLISH_US,
-
- /// Default language ID as a u16
- pub const default: u16 = (Language{}).asInt();
-
- pub fn fromInt(int: u16) Language {
- return @bitCast(int);
- }
-
- pub fn asInt(self: Language) u16 {
- return @bitCast(self);
- }
-};
-
-/// https://learn.microsoft.com/en-us/windows/win32/api/winuser/ns-winuser-dlgitemtemplate#remarks
-pub const ControlClass = enum(u16) {
- button = 0x80,
- edit = 0x81,
- static = 0x82,
- listbox = 0x83,
- scrollbar = 0x84,
- combobox = 0x85,
-
- pub fn fromControl(control: rc.Control) ?ControlClass {
- return switch (control) {
- // zig fmt: off
- .auto3state, .autocheckbox, .autoradiobutton,
- .checkbox, .defpushbutton, .groupbox, .pushbox,
- .pushbutton, .radiobutton, .state3, .userbutton => .button,
- // zig fmt: on
- .combobox => .combobox,
- .control => null,
- .ctext, .icon, .ltext, .rtext => .static,
- .edittext, .hedit, .iedit => .edit,
- .listbox => .listbox,
- .scrollbar => .scrollbar,
- };
- }
-
- pub fn getImpliedStyle(control: rc.Control) u32 {
- var style = WS.CHILD | WS.VISIBLE;
- switch (control) {
- .auto3state => style |= BS.AUTO3STATE | WS.TABSTOP,
- .autocheckbox => style |= BS.AUTOCHECKBOX | WS.TABSTOP,
- .autoradiobutton => style |= BS.AUTORADIOBUTTON,
- .checkbox => style |= BS.CHECKBOX | WS.TABSTOP,
- .combobox => {},
- .control => {},
- .ctext => style |= SS.CENTER | WS.GROUP,
- .defpushbutton => style |= BS.DEFPUSHBUTTON | WS.TABSTOP,
- .edittext, .hedit, .iedit => style |= WS.TABSTOP | WS.BORDER,
- .groupbox => style |= BS.GROUPBOX,
- .icon => style |= SS.ICON,
- .listbox => style |= LBS.NOTIFY | WS.BORDER,
- .ltext => style |= WS.GROUP,
- .pushbox => style |= BS.PUSHBOX | WS.TABSTOP,
- .pushbutton => style |= WS.TABSTOP,
- .radiobutton => style |= BS.RADIOBUTTON,
- .rtext => style |= SS.RIGHT | WS.GROUP,
- .scrollbar => {},
- .state3 => style |= BS.@"3STATE" | WS.TABSTOP,
- .userbutton => style |= BS.USERBUTTON | WS.TABSTOP,
- }
- return style;
- }
-};
-
-pub const NameOrOrdinal = union(enum) {
- // UTF-16 LE
- name: [:0]const u16,
- ordinal: u16,
-
- pub fn deinit(self: NameOrOrdinal, allocator: Allocator) void {
- switch (self) {
- .name => |name| {
- allocator.free(name);
- },
- .ordinal => {},
- }
- }
-
- /// Returns the full length of the amount of bytes that would be written by `write`
- /// (e.g. for an ordinal it will return the length including the 0xFFFF indicator)
- pub fn byteLen(self: NameOrOrdinal) usize {
- switch (self) {
- .name => |name| {
- // + 1 for 0-terminated
- return (name.len + 1) * @sizeOf(u16);
- },
- .ordinal => return 4,
- }
- }
-
- pub fn write(self: NameOrOrdinal, writer: anytype) !void {
- switch (self) {
- .name => |name| {
- try writer.writeAll(std.mem.sliceAsBytes(name[0 .. name.len + 1]));
- },
- .ordinal => |ordinal| {
- try writer.writeInt(u16, 0xffff, .little);
- try writer.writeInt(u16, ordinal, .little);
- },
- }
- }
-
- pub fn writeEmpty(writer: anytype) !void {
- try writer.writeInt(u16, 0, .little);
- }
-
- pub fn fromString(allocator: Allocator, bytes: SourceBytes) !NameOrOrdinal {
- if (maybeOrdinalFromString(bytes)) |ordinal| {
- return ordinal;
- }
- return nameFromString(allocator, bytes);
- }
-
- pub fn nameFromString(allocator: Allocator, bytes: SourceBytes) !NameOrOrdinal {
- // Names have a limit of 256 UTF-16 code units + null terminator
- var buf = try std.ArrayList(u16).initCapacity(allocator, @min(257, bytes.slice.len));
- errdefer buf.deinit();
-
- var i: usize = 0;
- while (bytes.code_page.codepointAt(i, bytes.slice)) |codepoint| : (i += codepoint.byte_len) {
- if (buf.items.len == 256) break;
-
- const c = codepoint.value;
- if (c == Codepoint.invalid) {
- try buf.append(std.mem.nativeToLittle(u16, '�'));
- } else if (c < 0x7F) {
- // ASCII chars in names are always converted to uppercase
- try buf.append(std.mem.nativeToLittle(u16, std.ascii.toUpper(@intCast(c))));
- } else if (c < 0x10000) {
- const short: u16 = @intCast(c);
- try buf.append(std.mem.nativeToLittle(u16, short));
- } else {
- const high = @as(u16, @intCast((c - 0x10000) >> 10)) + 0xD800;
- try buf.append(std.mem.nativeToLittle(u16, high));
-
- // Note: This can cut-off in the middle of a UTF-16 surrogate pair,
- // i.e. it can make the string end with an unpaired high surrogate
- if (buf.items.len == 256) break;
-
- const low = @as(u16, @intCast(c & 0x3FF)) + 0xDC00;
- try buf.append(std.mem.nativeToLittle(u16, low));
- }
- }
-
- return NameOrOrdinal{ .name = try buf.toOwnedSliceSentinel(0) };
- }
-
- /// Returns `null` if the bytes do not form a valid number.
- /// Does not allow non-ASCII digits (which the Win32 RC compiler does allow
- /// in base 10 numbers, see `maybeNonAsciiOrdinalFromString`).
- pub fn maybeOrdinalFromString(bytes: SourceBytes) ?NameOrOrdinal {
- var buf = bytes.slice;
- var radix: u8 = 10;
- if (buf.len > 2 and buf[0] == '0') {
- switch (buf[1]) {
- '0'...'9' => {},
- 'x', 'X' => {
- radix = 16;
- buf = buf[2..];
- // only the first 4 hex digits matter, anything else is ignored
- // i.e. 0x12345 is treated as if it were 0x1234
- buf.len = @min(buf.len, 4);
- },
- else => return null,
- }
- }
-
- var i: usize = 0;
- var result: u16 = 0;
- while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) {
- const c = codepoint.value;
- const digit: u8 = switch (c) {
- 0x00...0x7F => std.fmt.charToDigit(@intCast(c), radix) catch switch (radix) {
- 10 => return null,
- // non-hex-digits are treated as a terminator rather than invalidating
- // the number (note: if there are no valid hex digits then the result
- // will be zero which is not treated as a valid number)
- 16 => break,
- else => unreachable,
- },
- else => if (radix == 10) return null else break,
- };
-
- if (result != 0) {
- result *%= radix;
- }
- result +%= digit;
- }
-
- // Anything that resolves to zero is not interpretted as a number
- if (result == 0) return null;
- return NameOrOrdinal{ .ordinal = result };
- }
-
- /// The Win32 RC compiler uses `iswdigit` for digit detection for base 10
- /// numbers, which means that non-ASCII digits are 'accepted' but handled
- /// in a totally unintuitive manner, leading to arbitrary results.
- ///
- /// This function will return the value that such an ordinal 'would' have
- /// if it was run through the Win32 RC compiler. This allows us to disallow
- /// non-ASCII digits in number literals but still detect when the Win32
- /// RC compiler would have allowed them, so that a proper warning/error
- /// can be emitted.
- pub fn maybeNonAsciiOrdinalFromString(bytes: SourceBytes) ?NameOrOrdinal {
- const buf = bytes.slice;
- const radix = 10;
- if (buf.len > 2 and buf[0] == '0') {
- switch (buf[1]) {
- // We only care about base 10 numbers here
- 'x', 'X' => return null,
- else => {},
- }
- }
-
- var i: usize = 0;
- var result: u16 = 0;
- while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) {
- const c = codepoint.value;
- const digit: u16 = digit: {
- const is_digit = (c >= '0' and c <= '9') or isNonAsciiDigit(c);
- if (!is_digit) return null;
- break :digit @intCast(c - '0');
- };
-
- if (result != 0) {
- result *%= radix;
- }
- result +%= digit;
- }
-
- // Anything that resolves to zero is not interpretted as a number
- if (result == 0) return null;
- return NameOrOrdinal{ .ordinal = result };
- }
-
- pub fn predefinedResourceType(self: NameOrOrdinal) ?RT {
- switch (self) {
- .ordinal => |ordinal| {
- if (ordinal >= 256) return null;
- switch (@as(RT, @enumFromInt(ordinal))) {
- .ACCELERATOR,
- .ANICURSOR,
- .ANIICON,
- .BITMAP,
- .CURSOR,
- .DIALOG,
- .DLGINCLUDE,
- .DLGINIT,
- .FONT,
- .FONTDIR,
- .GROUP_CURSOR,
- .GROUP_ICON,
- .HTML,
- .ICON,
- .MANIFEST,
- .MENU,
- .MESSAGETABLE,
- .PLUGPLAY,
- .RCDATA,
- .STRING,
- .TOOLBAR,
- .VERSION,
- .VXD,
- => |rt| return rt,
- _ => return null,
- }
- },
- .name => return null,
- }
- }
-};
-
-fn expectNameOrOrdinal(expected: NameOrOrdinal, actual: NameOrOrdinal) !void {
- switch (expected) {
- .name => {
- if (actual != .name) return error.TestExpectedEqual;
- try std.testing.expectEqualSlices(u16, expected.name, actual.name);
- },
- .ordinal => {
- if (actual != .ordinal) return error.TestExpectedEqual;
- try std.testing.expectEqual(expected.ordinal, actual.ordinal);
- },
- }
-}
-
-test "NameOrOrdinal" {
- var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
- defer arena.deinit();
-
- const allocator = arena.allocator();
-
- // zero is treated as a string
- try expectNameOrOrdinal(
- NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("0") },
- try NameOrOrdinal.fromString(allocator, .{ .slice = "0", .code_page = .windows1252 }),
- );
- // any non-digit byte invalidates the number
- try expectNameOrOrdinal(
- NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1A") },
- try NameOrOrdinal.fromString(allocator, .{ .slice = "1a", .code_page = .windows1252 }),
- );
- try expectNameOrOrdinal(
- NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1ÿ") },
- try NameOrOrdinal.fromString(allocator, .{ .slice = "1\xff", .code_page = .windows1252 }),
- );
- try expectNameOrOrdinal(
- NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1€") },
- try NameOrOrdinal.fromString(allocator, .{ .slice = "1€", .code_page = .utf8 }),
- );
- try expectNameOrOrdinal(
- NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("1�") },
- try NameOrOrdinal.fromString(allocator, .{ .slice = "1\x80", .code_page = .utf8 }),
- );
- // same with overflow that resolves to 0
- try expectNameOrOrdinal(
- NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("65536") },
- try NameOrOrdinal.fromString(allocator, .{ .slice = "65536", .code_page = .windows1252 }),
- );
- // hex zero is also treated as a string
- try expectNameOrOrdinal(
- NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("0X0") },
- try NameOrOrdinal.fromString(allocator, .{ .slice = "0x0", .code_page = .windows1252 }),
- );
- // hex numbers work
- try expectNameOrOrdinal(
- NameOrOrdinal{ .ordinal = 0x100 },
- try NameOrOrdinal.fromString(allocator, .{ .slice = "0x100", .code_page = .windows1252 }),
- );
- // only the first 4 hex digits matter
- try expectNameOrOrdinal(
- NameOrOrdinal{ .ordinal = 0x1234 },
- try NameOrOrdinal.fromString(allocator, .{ .slice = "0X12345", .code_page = .windows1252 }),
- );
- // octal is not supported so it gets treated as a string
- try expectNameOrOrdinal(
- NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("0O1234") },
- try NameOrOrdinal.fromString(allocator, .{ .slice = "0o1234", .code_page = .windows1252 }),
- );
- // overflow wraps
- try expectNameOrOrdinal(
- NameOrOrdinal{ .ordinal = @truncate(65635) },
- try NameOrOrdinal.fromString(allocator, .{ .slice = "65635", .code_page = .windows1252 }),
- );
- // non-hex-digits in a hex literal are treated as a terminator
- try expectNameOrOrdinal(
- NameOrOrdinal{ .ordinal = 0x4 },
- try NameOrOrdinal.fromString(allocator, .{ .slice = "0x4n", .code_page = .windows1252 }),
- );
- try expectNameOrOrdinal(
- NameOrOrdinal{ .ordinal = 0xFA },
- try NameOrOrdinal.fromString(allocator, .{ .slice = "0xFAZ92348", .code_page = .windows1252 }),
- );
- // 0 at the start is allowed
- try expectNameOrOrdinal(
- NameOrOrdinal{ .ordinal = 50 },
- try NameOrOrdinal.fromString(allocator, .{ .slice = "050", .code_page = .windows1252 }),
- );
- // limit of 256 UTF-16 code units, can cut off between a surrogate pair
- {
- var expected = blk: {
- // the input before the 𐐷 character, but uppercased
- const expected_u8_bytes = "00614982008907933748980730280674788429543776231864944218790698304852300002973622122844631429099469274282385299397783838528QFFL7SHNSIETG0QKLR1UYPBTUV1PMFQRRA0VJDG354GQEDJMUPGPP1W1EXVNTZVEIZ6K3IPQM1AWGEYALMEODYVEZGOD3MFMGEY8FNR4JUETTB1PZDEWSNDRGZUA8SNXP3NGO";
- var buf: [256:0]u16 = undefined;
- for (expected_u8_bytes, 0..) |byte, i| {
- buf[i] = std.mem.nativeToLittle(u16, byte);
- }
- // surrogate pair that is now orphaned
- buf[255] = std.mem.nativeToLittle(u16, 0xD801);
- break :blk buf;
- };
- try expectNameOrOrdinal(
- NameOrOrdinal{ .name = &expected },
- try NameOrOrdinal.fromString(allocator, .{
- .slice = "00614982008907933748980730280674788429543776231864944218790698304852300002973622122844631429099469274282385299397783838528qffL7ShnSIETg0qkLr1UYpbtuv1PMFQRRa0VjDG354GQedJmUPgpp1w1ExVnTzVEiz6K3iPqM1AWGeYALmeODyvEZGOD3MfmGey8fnR4jUeTtB1PzdeWsNDrGzuA8Snxp3NGO𐐷",
- .code_page = .utf8,
- }),
- );
- }
-}
-
-test "NameOrOrdinal code page awareness" {
- var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
- defer arena.deinit();
-
- const allocator = arena.allocator();
-
- try expectNameOrOrdinal(
- NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("��𐐷") },
- try NameOrOrdinal.fromString(allocator, .{
- .slice = "\xF0\x80\x80𐐷",
- .code_page = .utf8,
- }),
- );
- try expectNameOrOrdinal(
- // The UTF-8 representation of 𐐷 is 0xF0 0x90 0x90 0xB7. In order to provide valid
- // UTF-8 to utf8ToUtf16LeStringLiteral, it uses the UTF-8 representation of the codepoint
- // <U+0x90> which is 0xC2 0x90. The code units in the expected UTF-16 string are:
- // { 0x00F0, 0x20AC, 0x20AC, 0x00F0, 0x0090, 0x0090, 0x00B7 }
- NameOrOrdinal{ .name = std.unicode.utf8ToUtf16LeStringLiteral("ð€€ð\xC2\x90\xC2\x90·") },
- try NameOrOrdinal.fromString(allocator, .{
- .slice = "\xF0\x80\x80𐐷",
- .code_page = .windows1252,
- }),
- );
-}
-
-/// https://learn.microsoft.com/en-us/windows/win32/api/winuser/ns-winuser-accel#members
-/// https://devblogs.microsoft.com/oldnewthing/20070316-00/?p=27593
-pub const AcceleratorModifiers = struct {
- value: u8 = 0,
- explicit_ascii_or_virtkey: bool = false,
-
- pub const ASCII = 0;
- pub const VIRTKEY = 1;
- pub const NOINVERT = 1 << 1;
- pub const SHIFT = 1 << 2;
- pub const CONTROL = 1 << 3;
- pub const ALT = 1 << 4;
- /// Marker for the last accelerator in an accelerator table
- pub const last_accelerator_in_table = 1 << 7;
-
- pub fn apply(self: *AcceleratorModifiers, modifier: rc.AcceleratorTypeAndOptions) void {
- if (modifier == .ascii or modifier == .virtkey) self.explicit_ascii_or_virtkey = true;
- self.value |= modifierValue(modifier);
- }
-
- pub fn isSet(self: AcceleratorModifiers, modifier: rc.AcceleratorTypeAndOptions) bool {
- // ASCII is set whenever VIRTKEY is not
- if (modifier == .ascii) return self.value & modifierValue(.virtkey) == 0;
- return self.value & modifierValue(modifier) != 0;
- }
-
- fn modifierValue(modifier: rc.AcceleratorTypeAndOptions) u8 {
- return switch (modifier) {
- .ascii => ASCII,
- .virtkey => VIRTKEY,
- .noinvert => NOINVERT,
- .shift => SHIFT,
- .control => CONTROL,
- .alt => ALT,
- };
- }
-
- pub fn markLast(self: *AcceleratorModifiers) void {
- self.value |= last_accelerator_in_table;
- }
-};
-
-const AcceleratorKeyCodepointTranslator = struct {
- string_type: literals.StringType,
-
- pub fn translate(self: @This(), maybe_parsed: ?literals.IterativeStringParser.ParsedCodepoint) ?u21 {
- const parsed = maybe_parsed orelse return null;
- if (parsed.codepoint == Codepoint.invalid) return 0xFFFD;
- if (parsed.from_escaped_integer and self.string_type == .ascii) {
- return windows1252.toCodepoint(@intCast(parsed.codepoint));
- }
- return parsed.codepoint;
- }
-};
-
-pub const ParseAcceleratorKeyStringError = error{ EmptyAccelerator, AcceleratorTooLong, InvalidControlCharacter, ControlCharacterOutOfRange };
-
-/// Expects bytes to be the full bytes of a string literal token (e.g. including the "" or L"").
-pub fn parseAcceleratorKeyString(bytes: SourceBytes, is_virt: bool, options: literals.StringParseOptions) (ParseAcceleratorKeyStringError || Allocator.Error)!u16 {
- if (bytes.slice.len == 0) {
- return error.EmptyAccelerator;
- }
-
- var parser = literals.IterativeStringParser.init(bytes, options);
- var translator = AcceleratorKeyCodepointTranslator{ .string_type = parser.declared_string_type };
-
- const first_codepoint = translator.translate(try parser.next()) orelse return error.EmptyAccelerator;
- // 0 is treated as a terminator, so this is equivalent to an empty string
- if (first_codepoint == 0) return error.EmptyAccelerator;
-
- if (first_codepoint == '^') {
- // Note: Emitting this warning unconditonally whenever ^ is the first character
- // matches the Win32 RC behavior, but it's questionable whether or not
- // the warning should be emitted for ^^ since that results in the ASCII
- // character ^ being written to the .res.
- if (is_virt and options.diagnostics != null) {
- try options.diagnostics.?.diagnostics.append(.{
- .err = .ascii_character_not_equivalent_to_virtual_key_code,
- .type = .warning,
- .token = options.diagnostics.?.token,
- });
- }
-
- const c = translator.translate(try parser.next()) orelse return error.InvalidControlCharacter;
- switch (c) {
- '^' => return '^', // special case
- 'a'...'z', 'A'...'Z' => return std.ascii.toUpper(@intCast(c)) - 0x40,
- // Note: The Windows RC compiler allows more than just A-Z, but what it allows
- // seems to be tied to some sort of Unicode-aware 'is character' function or something.
- // The full list of codepoints that trigger an out-of-range error can be found here:
- // https://gist.github.com/squeek502/2e9d0a4728a83eed074ad9785a209fd0
- // For codepoints >= 0x80 that don't trigger the error, the Windows RC compiler takes the
- // codepoint and does the `- 0x40` transformation as if it were A-Z which couldn't lead
- // to anything useable, so there's no point in emulating that behavior--erroring for
- // all non-[a-zA-Z] makes much more sense and is what was probably intended by the
- // Windows RC compiler.
- else => return error.ControlCharacterOutOfRange,
- }
- @compileError("this should be unreachable");
- }
-
- const second_codepoint = translator.translate(try parser.next());
-
- var result: u32 = initial_value: {
- if (first_codepoint >= 0x10000) {
- if (second_codepoint != null and second_codepoint.? != 0) return error.AcceleratorTooLong;
- // No idea why it works this way, but this seems to match the Windows RC
- // behavior for codepoints >= 0x10000
- const low = @as(u16, @intCast(first_codepoint & 0x3FF)) + 0xDC00;
- const extra = (first_codepoint - 0x10000) / 0x400;
- break :initial_value low + extra * 0x100;
- }
- break :initial_value first_codepoint;
- };
-
- // 0 is treated as a terminator
- if (second_codepoint != null and second_codepoint.? == 0) return @truncate(result);
-
- const third_codepoint = translator.translate(try parser.next());
- // 0 is treated as a terminator, so a 0 in the third position is fine but
- // anything else is too many codepoints for an accelerator
- if (third_codepoint != null and third_codepoint.? != 0) return error.AcceleratorTooLong;
-
- if (second_codepoint) |c| {
- if (c >= 0x10000) return error.AcceleratorTooLong;
- result <<= 8;
- result += c;
- } else if (is_virt) {
- switch (result) {
- 'a'...'z' => result -= 0x20, // toUpper
- else => {},
- }
- }
- return @truncate(result);
-}
-
-test "accelerator keys" {
- try std.testing.expectEqual(@as(u16, 1), try parseAcceleratorKeyString(
- .{ .slice = "\"^a\"", .code_page = .windows1252 },
- false,
- .{},
- ));
- try std.testing.expectEqual(@as(u16, 1), try parseAcceleratorKeyString(
- .{ .slice = "\"^A\"", .code_page = .windows1252 },
- false,
- .{},
- ));
- try std.testing.expectEqual(@as(u16, 26), try parseAcceleratorKeyString(
- .{ .slice = "\"^Z\"", .code_page = .windows1252 },
- false,
- .{},
- ));
- try std.testing.expectEqual(@as(u16, '^'), try parseAcceleratorKeyString(
- .{ .slice = "\"^^\"", .code_page = .windows1252 },
- false,
- .{},
- ));
-
- try std.testing.expectEqual(@as(u16, 'a'), try parseAcceleratorKeyString(
- .{ .slice = "\"a\"", .code_page = .windows1252 },
- false,
- .{},
- ));
- try std.testing.expectEqual(@as(u16, 0x6162), try parseAcceleratorKeyString(
- .{ .slice = "\"ab\"", .code_page = .windows1252 },
- false,
- .{},
- ));
-
- try std.testing.expectEqual(@as(u16, 'C'), try parseAcceleratorKeyString(
- .{ .slice = "\"c\"", .code_page = .windows1252 },
- true,
- .{},
- ));
- try std.testing.expectEqual(@as(u16, 0x6363), try parseAcceleratorKeyString(
- .{ .slice = "\"cc\"", .code_page = .windows1252 },
- true,
- .{},
- ));
-
- // \x00 or any escape that evaluates to zero acts as a terminator, everything past it
- // is ignored
- try std.testing.expectEqual(@as(u16, 'a'), try parseAcceleratorKeyString(
- .{ .slice = "\"a\\0bcdef\"", .code_page = .windows1252 },
- false,
- .{},
- ));
-
- // \x80 is € in Windows-1252, which is Unicode codepoint 20AC
- try std.testing.expectEqual(@as(u16, 0x20AC), try parseAcceleratorKeyString(
- .{ .slice = "\"\x80\"", .code_page = .windows1252 },
- false,
- .{},
- ));
- // This depends on the code page, though, with codepage 65001, \x80
- // on its own is invalid UTF-8 so it gets converted to the replacement character
- try std.testing.expectEqual(@as(u16, 0xFFFD), try parseAcceleratorKeyString(
- .{ .slice = "\"\x80\"", .code_page = .utf8 },
- false,
- .{},
- ));
- try std.testing.expectEqual(@as(u16, 0xCCAC), try parseAcceleratorKeyString(
- .{ .slice = "\"\x80\x80\"", .code_page = .windows1252 },
- false,
- .{},
- ));
- // This also behaves the same with escaped characters
- try std.testing.expectEqual(@as(u16, 0x20AC), try parseAcceleratorKeyString(
- .{ .slice = "\"\\x80\"", .code_page = .windows1252 },
- false,
- .{},
- ));
- // Even with utf8 code page
- try std.testing.expectEqual(@as(u16, 0x20AC), try parseAcceleratorKeyString(
- .{ .slice = "\"\\x80\"", .code_page = .utf8 },
- false,
- .{},
- ));
- try std.testing.expectEqual(@as(u16, 0xCCAC), try parseAcceleratorKeyString(
- .{ .slice = "\"\\x80\\x80\"", .code_page = .windows1252 },
- false,
- .{},
- ));
- // Wide string with the actual characters behaves like the ASCII string version
- try std.testing.expectEqual(@as(u16, 0xCCAC), try parseAcceleratorKeyString(
- .{ .slice = "L\"\x80\x80\"", .code_page = .windows1252 },
- false,
- .{},
- ));
- // But wide string with escapes behaves differently
- try std.testing.expectEqual(@as(u16, 0x8080), try parseAcceleratorKeyString(
- .{ .slice = "L\"\\x80\\x80\"", .code_page = .windows1252 },
- false,
- .{},
- ));
- // and invalid escapes within wide strings get skipped
- try std.testing.expectEqual(@as(u16, 'z'), try parseAcceleratorKeyString(
- .{ .slice = "L\"\\Hz\"", .code_page = .windows1252 },
- false,
- .{},
- ));
-
- // any non-A-Z codepoints are illegal
- try std.testing.expectError(error.ControlCharacterOutOfRange, parseAcceleratorKeyString(
- .{ .slice = "\"^\x83\"", .code_page = .windows1252 },
- false,
- .{},
- ));
- try std.testing.expectError(error.ControlCharacterOutOfRange, parseAcceleratorKeyString(
- .{ .slice = "\"^1\"", .code_page = .windows1252 },
- false,
- .{},
- ));
- try std.testing.expectError(error.InvalidControlCharacter, parseAcceleratorKeyString(
- .{ .slice = "\"^\"", .code_page = .windows1252 },
- false,
- .{},
- ));
- try std.testing.expectError(error.EmptyAccelerator, parseAcceleratorKeyString(
- .{ .slice = "\"\"", .code_page = .windows1252 },
- false,
- .{},
- ));
- try std.testing.expectError(error.AcceleratorTooLong, parseAcceleratorKeyString(
- .{ .slice = "\"hello\"", .code_page = .windows1252 },
- false,
- .{},
- ));
- try std.testing.expectError(error.ControlCharacterOutOfRange, parseAcceleratorKeyString(
- .{ .slice = "\"^\x80\"", .code_page = .windows1252 },
- false,
- .{},
- ));
-
- // Invalid UTF-8 gets converted to 0xFFFD, multiple invalids get shifted and added together
- // The behavior is the same for ascii and wide strings
- try std.testing.expectEqual(@as(u16, 0xFCFD), try parseAcceleratorKeyString(
- .{ .slice = "\"\x80\x80\"", .code_page = .utf8 },
- false,
- .{},
- ));
- try std.testing.expectEqual(@as(u16, 0xFCFD), try parseAcceleratorKeyString(
- .{ .slice = "L\"\x80\x80\"", .code_page = .utf8 },
- false,
- .{},
- ));
-
- // Codepoints >= 0x10000
- try std.testing.expectEqual(@as(u16, 0xDD00), try parseAcceleratorKeyString(
- .{ .slice = "\"\xF0\x90\x84\x80\"", .code_page = .utf8 },
- false,
- .{},
- ));
- try std.testing.expectEqual(@as(u16, 0xDD00), try parseAcceleratorKeyString(
- .{ .slice = "L\"\xF0\x90\x84\x80\"", .code_page = .utf8 },
- false,
- .{},
- ));
- try std.testing.expectEqual(@as(u16, 0x9C01), try parseAcceleratorKeyString(
- .{ .slice = "\"\xF4\x80\x80\x81\"", .code_page = .utf8 },
- false,
- .{},
- ));
- // anything before or after a codepoint >= 0x10000 causes an error
- try std.testing.expectError(error.AcceleratorTooLong, parseAcceleratorKeyString(
- .{ .slice = "\"a\xF0\x90\x80\x80\"", .code_page = .utf8 },
- false,
- .{},
- ));
- try std.testing.expectError(error.AcceleratorTooLong, parseAcceleratorKeyString(
- .{ .slice = "\"\xF0\x90\x80\x80a\"", .code_page = .utf8 },
- false,
- .{},
- ));
-}
-
-pub const ForcedOrdinal = struct {
- pub fn fromBytes(bytes: SourceBytes) u16 {
- var i: usize = 0;
- var result: u21 = 0;
- while (bytes.code_page.codepointAt(i, bytes.slice)) |codepoint| : (i += codepoint.byte_len) {
- const c = switch (codepoint.value) {
- // Codepoints that would need a surrogate pair in UTF-16 are
- // broken up into their UTF-16 code units and each code unit
- // is interpreted as a digit.
- 0x10000...0x10FFFF => {
- const high = @as(u16, @intCast((codepoint.value - 0x10000) >> 10)) + 0xD800;
- if (result != 0) result *%= 10;
- result +%= high -% '0';
-
- const low = @as(u16, @intCast(codepoint.value & 0x3FF)) + 0xDC00;
- if (result != 0) result *%= 10;
- result +%= low -% '0';
- continue;
- },
- Codepoint.invalid => 0xFFFD,
- else => codepoint.value,
- };
- if (result != 0) result *%= 10;
- result +%= c -% '0';
- }
- return @truncate(result);
- }
-
- pub fn fromUtf16Le(utf16: [:0]const u16) u16 {
- var result: u16 = 0;
- for (utf16) |code_unit| {
- if (result != 0) result *%= 10;
- result +%= std.mem.littleToNative(u16, code_unit) -% '0';
- }
- return result;
- }
-};
-
-test "forced ordinal" {
- try std.testing.expectEqual(@as(u16, 3200), ForcedOrdinal.fromBytes(.{ .slice = "3200", .code_page = .windows1252 }));
- try std.testing.expectEqual(@as(u16, 0x33), ForcedOrdinal.fromBytes(.{ .slice = "1+1", .code_page = .windows1252 }));
- try std.testing.expectEqual(@as(u16, 65531), ForcedOrdinal.fromBytes(.{ .slice = "1!", .code_page = .windows1252 }));
-
- try std.testing.expectEqual(@as(u16, 0x122), ForcedOrdinal.fromBytes(.{ .slice = "0\x8C", .code_page = .windows1252 }));
- try std.testing.expectEqual(@as(u16, 0x122), ForcedOrdinal.fromBytes(.{ .slice = "0Œ", .code_page = .utf8 }));
-
- // invalid UTF-8 gets converted to 0xFFFD (replacement char) and then interpreted as a digit
- try std.testing.expectEqual(@as(u16, 0xFFCD), ForcedOrdinal.fromBytes(.{ .slice = "0\x81", .code_page = .utf8 }));
- // codepoints >= 0x10000
- try std.testing.expectEqual(@as(u16, 0x49F2), ForcedOrdinal.fromBytes(.{ .slice = "0\u{10002}", .code_page = .utf8 }));
- try std.testing.expectEqual(@as(u16, 0x4AF0), ForcedOrdinal.fromBytes(.{ .slice = "0\u{10100}", .code_page = .utf8 }));
-
- // From UTF-16
- try std.testing.expectEqual(@as(u16, 0x122), ForcedOrdinal.fromUtf16Le(&[_:0]u16{ std.mem.nativeToLittle(u16, '0'), std.mem.nativeToLittle(u16, 'Œ') }));
- try std.testing.expectEqual(@as(u16, 0x4AF0), ForcedOrdinal.fromUtf16Le(std.unicode.utf8ToUtf16LeStringLiteral("0\u{10100}")));
-}
-
-/// https://learn.microsoft.com/en-us/windows/win32/api/verrsrc/ns-verrsrc-vs_fixedfileinfo
-pub const FixedFileInfo = struct {
- file_version: Version = .{},
- product_version: Version = .{},
- file_flags_mask: u32 = 0,
- file_flags: u32 = 0,
- file_os: u32 = 0,
- file_type: u32 = 0,
- file_subtype: u32 = 0,
- file_date: Version = .{}, // TODO: I think this is always all zeroes?
-
- pub const signature = 0xFEEF04BD;
- // Note: This corresponds to a version of 1.0
- pub const version = 0x00010000;
-
- pub const byte_len = 0x34;
- pub const key = std.unicode.utf8ToUtf16LeStringLiteral("VS_VERSION_INFO");
-
- pub const Version = struct {
- parts: [4]u16 = [_]u16{0} ** 4,
-
- pub fn mostSignificantCombinedParts(self: Version) u32 {
- return (@as(u32, self.parts[0]) << 16) + self.parts[1];
- }
-
- pub fn leastSignificantCombinedParts(self: Version) u32 {
- return (@as(u32, self.parts[2]) << 16) + self.parts[3];
- }
- };
-
- pub fn write(self: FixedFileInfo, writer: anytype) !void {
- try writer.writeInt(u32, signature, .little);
- try writer.writeInt(u32, version, .little);
- try writer.writeInt(u32, self.file_version.mostSignificantCombinedParts(), .little);
- try writer.writeInt(u32, self.file_version.leastSignificantCombinedParts(), .little);
- try writer.writeInt(u32, self.product_version.mostSignificantCombinedParts(), .little);
- try writer.writeInt(u32, self.product_version.leastSignificantCombinedParts(), .little);
- try writer.writeInt(u32, self.file_flags_mask, .little);
- try writer.writeInt(u32, self.file_flags, .little);
- try writer.writeInt(u32, self.file_os, .little);
- try writer.writeInt(u32, self.file_type, .little);
- try writer.writeInt(u32, self.file_subtype, .little);
- try writer.writeInt(u32, self.file_date.mostSignificantCombinedParts(), .little);
- try writer.writeInt(u32, self.file_date.leastSignificantCombinedParts(), .little);
- }
-};
-
-test "FixedFileInfo.Version" {
- const version = FixedFileInfo.Version{
- .parts = .{ 1, 2, 3, 4 },
- };
- try std.testing.expectEqual(@as(u32, 0x00010002), version.mostSignificantCombinedParts());
- try std.testing.expectEqual(@as(u32, 0x00030004), version.leastSignificantCombinedParts());
-}
-
-pub const VersionNode = struct {
- pub const type_string: u16 = 1;
- pub const type_binary: u16 = 0;
-};
-
-pub const MenuItemFlags = struct {
- value: u16 = 0,
-
- pub fn apply(self: *MenuItemFlags, option: rc.MenuItem.Option) void {
- self.value |= optionValue(option);
- }
-
- pub fn isSet(self: MenuItemFlags, option: rc.MenuItem.Option) bool {
- return self.value & optionValue(option) != 0;
- }
-
- fn optionValue(option: rc.MenuItem.Option) u16 {
- return @intCast(switch (option) {
- .checked => MF.CHECKED,
- .grayed => MF.GRAYED,
- .help => MF.HELP,
- .inactive => MF.DISABLED,
- .menubarbreak => MF.MENUBARBREAK,
- .menubreak => MF.MENUBREAK,
- });
- }
-
- pub fn markLast(self: *MenuItemFlags) void {
- self.value |= @intCast(MF.END);
- }
-};
-
-/// Menu Flags from WinUser.h
-/// This is not complete, it only contains what is needed
-pub const MF = struct {
- pub const GRAYED: u32 = 0x00000001;
- pub const DISABLED: u32 = 0x00000002;
- pub const CHECKED: u32 = 0x00000008;
- pub const POPUP: u32 = 0x00000010;
- pub const MENUBARBREAK: u32 = 0x00000020;
- pub const MENUBREAK: u32 = 0x00000040;
- pub const HELP: u32 = 0x00004000;
- pub const END: u32 = 0x00000080;
-};
-
-/// Window Styles from WinUser.h
-pub const WS = struct {
- pub const OVERLAPPED: u32 = 0x00000000;
- pub const POPUP: u32 = 0x80000000;
- pub const CHILD: u32 = 0x40000000;
- pub const MINIMIZE: u32 = 0x20000000;
- pub const VISIBLE: u32 = 0x10000000;
- pub const DISABLED: u32 = 0x08000000;
- pub const CLIPSIBLINGS: u32 = 0x04000000;
- pub const CLIPCHILDREN: u32 = 0x02000000;
- pub const MAXIMIZE: u32 = 0x01000000;
- pub const CAPTION: u32 = BORDER | DLGFRAME;
- pub const BORDER: u32 = 0x00800000;
- pub const DLGFRAME: u32 = 0x00400000;
- pub const VSCROLL: u32 = 0x00200000;
- pub const HSCROLL: u32 = 0x00100000;
- pub const SYSMENU: u32 = 0x00080000;
- pub const THICKFRAME: u32 = 0x00040000;
- pub const GROUP: u32 = 0x00020000;
- pub const TABSTOP: u32 = 0x00010000;
-
- pub const MINIMIZEBOX: u32 = 0x00020000;
- pub const MAXIMIZEBOX: u32 = 0x00010000;
-
- pub const TILED: u32 = OVERLAPPED;
- pub const ICONIC: u32 = MINIMIZE;
- pub const SIZEBOX: u32 = THICKFRAME;
- pub const TILEDWINDOW: u32 = OVERLAPPEDWINDOW;
-
- // Common Window Styles
- pub const OVERLAPPEDWINDOW: u32 = OVERLAPPED | CAPTION | SYSMENU | THICKFRAME | MINIMIZEBOX | MAXIMIZEBOX;
- pub const POPUPWINDOW: u32 = POPUP | BORDER | SYSMENU;
- pub const CHILDWINDOW: u32 = CHILD;
-};
-
-/// Dialog Box Template Styles from WinUser.h
-pub const DS = struct {
- pub const SETFONT: u32 = 0x40;
-};
-
-/// Button Control Styles from WinUser.h
-/// This is not complete, it only contains what is needed
-pub const BS = struct {
- pub const PUSHBUTTON: u32 = 0x00000000;
- pub const DEFPUSHBUTTON: u32 = 0x00000001;
- pub const CHECKBOX: u32 = 0x00000002;
- pub const AUTOCHECKBOX: u32 = 0x00000003;
- pub const RADIOBUTTON: u32 = 0x00000004;
- pub const @"3STATE": u32 = 0x00000005;
- pub const AUTO3STATE: u32 = 0x00000006;
- pub const GROUPBOX: u32 = 0x00000007;
- pub const USERBUTTON: u32 = 0x00000008;
- pub const AUTORADIOBUTTON: u32 = 0x00000009;
- pub const PUSHBOX: u32 = 0x0000000A;
- pub const OWNERDRAW: u32 = 0x0000000B;
- pub const TYPEMASK: u32 = 0x0000000F;
- pub const LEFTTEXT: u32 = 0x00000020;
-};
-
-/// Static Control Constants from WinUser.h
-/// This is not complete, it only contains what is needed
-pub const SS = struct {
- pub const LEFT: u32 = 0x00000000;
- pub const CENTER: u32 = 0x00000001;
- pub const RIGHT: u32 = 0x00000002;
- pub const ICON: u32 = 0x00000003;
-};
-
-/// Listbox Styles from WinUser.h
-/// This is not complete, it only contains what is needed
-pub const LBS = struct {
- pub const NOTIFY: u32 = 0x0001;
-};
diff --git a/src/resinator/source_mapping.zig b/src/resinator/source_mapping.zig
@@ -1,687 +0,0 @@
-const std = @import("std");
-const Allocator = std.mem.Allocator;
-const UncheckedSliceWriter = @import("utils.zig").UncheckedSliceWriter;
-const parseQuotedAsciiString = @import("literals.zig").parseQuotedAsciiString;
-const lex = @import("lex.zig");
-
-pub const ParseLineCommandsResult = struct {
- result: []u8,
- mappings: SourceMappings,
-};
-
-const CurrentMapping = struct {
- line_num: usize = 1,
- filename: std.ArrayListUnmanaged(u8) = .{},
- pending: bool = true,
- ignore_contents: bool = false,
-};
-
-pub const ParseAndRemoveLineCommandsOptions = struct {
- initial_filename: ?[]const u8 = null,
-};
-
-/// Parses and removes #line commands as well as all source code that is within a file
-/// with .c or .h extensions.
-///
-/// > RC treats files with the .c and .h extensions in a special manner. It
-/// > assumes that a file with one of these extensions does not contain
-/// > resources. If a file has the .c or .h file name extension, RC ignores all
-/// > lines in the file except the preprocessor directives. Therefore, to
-/// > include a file that contains resources in another resource script, give
-/// > the file to be included an extension other than .c or .h.
-/// from https://learn.microsoft.com/en-us/windows/win32/menurc/preprocessor-directives
-///
-/// Returns a slice of `buf` with the aforementioned stuff removed as well as a mapping
-/// between the lines and their corresponding lines in their original files.
-///
-/// `buf` must be at least as long as `source`
-/// In-place transformation is supported (i.e. `source` and `buf` can be the same slice)
-///
-/// If `options.initial_filename` is provided, that filename is guaranteed to be
-/// within the `mappings.files` table and `root_filename_offset` will be set appropriately.
-pub fn parseAndRemoveLineCommands(allocator: Allocator, source: []const u8, buf: []u8, options: ParseAndRemoveLineCommandsOptions) !ParseLineCommandsResult {
- var parse_result = ParseLineCommandsResult{
- .result = undefined,
- .mappings = .{},
- };
- errdefer parse_result.mappings.deinit(allocator);
-
- var current_mapping: CurrentMapping = .{};
- defer current_mapping.filename.deinit(allocator);
-
- if (options.initial_filename) |initial_filename| {
- try current_mapping.filename.appendSlice(allocator, initial_filename);
- parse_result.mappings.root_filename_offset = try parse_result.mappings.files.put(allocator, initial_filename);
- }
-
- std.debug.assert(buf.len >= source.len);
- var result = UncheckedSliceWriter{ .slice = buf };
- const State = enum {
- line_start,
- preprocessor,
- non_preprocessor,
- };
- var state: State = .line_start;
- var index: usize = 0;
- var pending_start: ?usize = null;
- var preprocessor_start: usize = 0;
- var line_number: usize = 1;
- while (index < source.len) : (index += 1) {
- const c = source[index];
- switch (state) {
- .line_start => switch (c) {
- '#' => {
- preprocessor_start = index;
- state = .preprocessor;
- if (pending_start == null) {
- pending_start = index;
- }
- },
- '\r', '\n' => {
- const is_crlf = formsLineEndingPair(source, c, index + 1);
- try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
- if (!current_mapping.ignore_contents) {
- result.write(c);
- if (is_crlf) result.write(source[index + 1]);
- line_number += 1;
- }
- if (is_crlf) index += 1;
- pending_start = null;
- },
- ' ', '\t', '\x0b', '\x0c' => {
- if (pending_start == null) {
- pending_start = index;
- }
- },
- else => {
- state = .non_preprocessor;
- if (pending_start != null) {
- if (!current_mapping.ignore_contents) {
- result.writeSlice(source[pending_start.? .. index + 1]);
- }
- pending_start = null;
- continue;
- }
- if (!current_mapping.ignore_contents) {
- result.write(c);
- }
- },
- },
- .preprocessor => switch (c) {
- '\r', '\n' => {
- // Now that we have the full line we can decide what to do with it
- const preprocessor_str = source[preprocessor_start..index];
- const is_crlf = formsLineEndingPair(source, c, index + 1);
- if (std.mem.startsWith(u8, preprocessor_str, "#line")) {
- try handleLineCommand(allocator, preprocessor_str, ¤t_mapping);
- } else {
- try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
- if (!current_mapping.ignore_contents) {
- const line_ending_len: usize = if (is_crlf) 2 else 1;
- result.writeSlice(source[pending_start.? .. index + line_ending_len]);
- line_number += 1;
- }
- }
- if (is_crlf) index += 1;
- state = .line_start;
- pending_start = null;
- },
- else => {},
- },
- .non_preprocessor => switch (c) {
- '\r', '\n' => {
- const is_crlf = formsLineEndingPair(source, c, index + 1);
- try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
- if (!current_mapping.ignore_contents) {
- result.write(c);
- if (is_crlf) result.write(source[index + 1]);
- line_number += 1;
- }
- if (is_crlf) index += 1;
- state = .line_start;
- pending_start = null;
- },
- else => {
- if (!current_mapping.ignore_contents) {
- result.write(c);
- }
- },
- },
- }
- } else {
- switch (state) {
- .line_start => {},
- .non_preprocessor => {
- try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
- },
- .preprocessor => {
- // Now that we have the full line we can decide what to do with it
- const preprocessor_str = source[preprocessor_start..index];
- if (std.mem.startsWith(u8, preprocessor_str, "#line")) {
- try handleLineCommand(allocator, preprocessor_str, ¤t_mapping);
- } else {
- try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
- if (!current_mapping.ignore_contents) {
- result.writeSlice(source[pending_start.?..index]);
- }
- }
- },
- }
- }
-
- parse_result.result = result.getWritten();
-
- // Remove whitespace from the end of the result. This avoids issues when the
- // preprocessor adds a newline to the end of the file, since then the
- // post-preprocessed source could have more lines than the corresponding input source and
- // the inserted line can't be mapped to any lines in the original file.
- // There's no way that whitespace at the end of a file can affect the parsing
- // of the RC script so this is okay to do unconditionally.
- // TODO: There might be a better way around this
- while (parse_result.result.len > 0 and std.ascii.isWhitespace(parse_result.result[parse_result.result.len - 1])) {
- parse_result.result.len -= 1;
- }
-
- // If there have been no line mappings at all, then we're dealing with an empty file.
- // In this case, we want to fake a line mapping just so that we return something
- // that is useable in the same way that a non-empty mapping would be.
- if (parse_result.mappings.mapping.items.len == 0) {
- try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping);
- }
-
- return parse_result;
-}
-
-/// Note: This should function the same as lex.LineHandler.currentIndexFormsLineEndingPair
-pub fn formsLineEndingPair(source: []const u8, line_ending: u8, next_index: usize) bool {
- if (next_index >= source.len) return false;
-
- const next_ending = source[next_index];
- if (next_ending != '\r' and next_ending != '\n') return false;
-
- // can't be \n\n or \r\r
- if (line_ending == next_ending) return false;
-
- return true;
-}
-
-pub fn handleLineEnd(allocator: Allocator, post_processed_line_number: usize, mapping: *SourceMappings, current_mapping: *CurrentMapping) !void {
- const filename_offset = try mapping.files.put(allocator, current_mapping.filename.items);
-
- try mapping.set(allocator, post_processed_line_number, .{
- .start_line = current_mapping.line_num,
- .end_line = current_mapping.line_num,
- .filename_offset = filename_offset,
- });
-
- current_mapping.line_num += 1;
- current_mapping.pending = false;
-}
-
-// TODO: Might want to provide diagnostics on invalid line commands instead of just returning
-pub fn handleLineCommand(allocator: Allocator, line_command: []const u8, current_mapping: *CurrentMapping) error{OutOfMemory}!void {
- // TODO: Are there other whitespace characters that should be included?
- var tokenizer = std.mem.tokenize(u8, line_command, " \t");
- const line_directive = tokenizer.next() orelse return; // #line
- if (!std.mem.eql(u8, line_directive, "#line")) return;
- const linenum_str = tokenizer.next() orelse return;
- const linenum = std.fmt.parseUnsigned(usize, linenum_str, 10) catch return;
-
- var filename_literal = tokenizer.rest();
- while (filename_literal.len > 0 and std.ascii.isWhitespace(filename_literal[filename_literal.len - 1])) {
- filename_literal.len -= 1;
- }
- if (filename_literal.len < 2) return;
- const is_quoted = filename_literal[0] == '"' and filename_literal[filename_literal.len - 1] == '"';
- if (!is_quoted) return;
- const filename = parseFilename(allocator, filename_literal[1 .. filename_literal.len - 1]) catch |err| switch (err) {
- error.OutOfMemory => |e| return e,
- else => return,
- };
- defer allocator.free(filename);
-
- // \x00 bytes in the filename is incompatible with how StringTable works
- if (std.mem.indexOfScalar(u8, filename, '\x00') != null) return;
-
- current_mapping.line_num = linenum;
- current_mapping.filename.clearRetainingCapacity();
- try current_mapping.filename.appendSlice(allocator, filename);
- current_mapping.pending = true;
- current_mapping.ignore_contents = std.ascii.endsWithIgnoreCase(filename, ".c") or std.ascii.endsWithIgnoreCase(filename, ".h");
-}
-
-pub fn parseAndRemoveLineCommandsAlloc(allocator: Allocator, source: []const u8, options: ParseAndRemoveLineCommandsOptions) !ParseLineCommandsResult {
- const buf = try allocator.alloc(u8, source.len);
- errdefer allocator.free(buf);
- var result = try parseAndRemoveLineCommands(allocator, source, buf, options);
- result.result = try allocator.realloc(buf, result.result.len);
- return result;
-}
-
-/// C-style string parsing with a few caveats:
-/// - The str cannot contain newlines or carriage returns
-/// - Hex and octal escape are limited to u8
-/// - No handling/support for L, u, or U prefixed strings
-/// - The start and end double quotes should be omitted from the `str`
-/// - Other than the above, does not assume any validity of the strings (i.e. there
-/// may be unescaped double quotes within the str) and will return error.InvalidString
-/// on any problems found.
-///
-/// The result is a UTF-8 encoded string.
-fn parseFilename(allocator: Allocator, str: []const u8) error{ OutOfMemory, InvalidString }![]u8 {
- const State = enum {
- string,
- escape,
- escape_hex,
- escape_octal,
- escape_u,
- };
-
- var filename = try std.ArrayList(u8).initCapacity(allocator, str.len);
- errdefer filename.deinit();
- var state: State = .string;
- var index: usize = 0;
- var escape_len: usize = undefined;
- var escape_val: u64 = undefined;
- var escape_expected_len: u8 = undefined;
- while (index < str.len) : (index += 1) {
- const c = str[index];
- switch (state) {
- .string => switch (c) {
- '\\' => state = .escape,
- '"' => return error.InvalidString,
- else => filename.appendAssumeCapacity(c),
- },
- .escape => switch (c) {
- '\'', '"', '\\', '?', 'n', 'r', 't', 'a', 'b', 'e', 'f', 'v' => {
- const escaped_c = switch (c) {
- '\'', '"', '\\', '?' => c,
- 'n' => '\n',
- 'r' => '\r',
- 't' => '\t',
- 'a' => '\x07',
- 'b' => '\x08',
- 'e' => '\x1b', // non-standard
- 'f' => '\x0c',
- 'v' => '\x0b',
- else => unreachable,
- };
- filename.appendAssumeCapacity(escaped_c);
- state = .string;
- },
- 'x' => {
- escape_val = 0;
- escape_len = 0;
- state = .escape_hex;
- },
- '0'...'7' => {
- escape_val = std.fmt.charToDigit(c, 8) catch unreachable;
- escape_len = 1;
- state = .escape_octal;
- },
- 'u' => {
- escape_val = 0;
- escape_len = 0;
- state = .escape_u;
- escape_expected_len = 4;
- },
- 'U' => {
- escape_val = 0;
- escape_len = 0;
- state = .escape_u;
- escape_expected_len = 8;
- },
- else => return error.InvalidString,
- },
- .escape_hex => switch (c) {
- '0'...'9', 'a'...'f', 'A'...'F' => {
- const digit = std.fmt.charToDigit(c, 16) catch unreachable;
- if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 16) catch return error.InvalidString;
- escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString;
- escape_len += 1;
- },
- else => {
- if (escape_len == 0) return error.InvalidString;
- filename.appendAssumeCapacity(@intCast(escape_val));
- state = .string;
- index -= 1; // reconsume
- },
- },
- .escape_octal => switch (c) {
- '0'...'7' => {
- const digit = std.fmt.charToDigit(c, 8) catch unreachable;
- if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 8) catch return error.InvalidString;
- escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString;
- escape_len += 1;
- if (escape_len == 3) {
- filename.appendAssumeCapacity(@intCast(escape_val));
- state = .string;
- }
- },
- else => {
- if (escape_len == 0) return error.InvalidString;
- filename.appendAssumeCapacity(@intCast(escape_val));
- state = .string;
- index -= 1; // reconsume
- },
- },
- .escape_u => switch (c) {
- '0'...'9', 'a'...'f', 'A'...'F' => {
- const digit = std.fmt.charToDigit(c, 16) catch unreachable;
- if (escape_val != 0) escape_val = std.math.mul(u21, @as(u21, @intCast(escape_val)), 16) catch return error.InvalidString;
- escape_val = std.math.add(u21, @as(u21, @intCast(escape_val)), digit) catch return error.InvalidString;
- escape_len += 1;
- if (escape_len == escape_expected_len) {
- var buf: [4]u8 = undefined;
- const utf8_len = std.unicode.utf8Encode(@intCast(escape_val), &buf) catch return error.InvalidString;
- filename.appendSliceAssumeCapacity(buf[0..utf8_len]);
- state = .string;
- }
- },
- // Requires escape_expected_len valid hex digits
- else => return error.InvalidString,
- },
- }
- } else {
- switch (state) {
- .string => {},
- .escape, .escape_u => return error.InvalidString,
- .escape_hex => {
- if (escape_len == 0) return error.InvalidString;
- filename.appendAssumeCapacity(@intCast(escape_val));
- },
- .escape_octal => {
- filename.appendAssumeCapacity(@intCast(escape_val));
- },
- }
- }
-
- return filename.toOwnedSlice();
-}
-
-fn testParseFilename(expected: []const u8, input: []const u8) !void {
- const parsed = try parseFilename(std.testing.allocator, input);
- defer std.testing.allocator.free(parsed);
-
- return std.testing.expectEqualSlices(u8, expected, parsed);
-}
-
-test parseFilename {
- try testParseFilename("'\"?\\\t\n\r\x11", "\\'\\\"\\?\\\\\\t\\n\\r\\x11");
- try testParseFilename("\xABz\x53", "\\xABz\\123");
- try testParseFilename("⚡⚡", "\\u26A1\\U000026A1");
- try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\""));
- try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\"));
- try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\u"));
- try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\U"));
- try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\x"));
- try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xZZ"));
- try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xABCDEF"));
- try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\777"));
-}
-
-pub const SourceMappings = struct {
- /// line number -> span where the index is (line number - 1)
- mapping: std.ArrayListUnmanaged(SourceSpan) = .{},
- files: StringTable = .{},
- /// The default assumes that the first filename added is the root file.
- /// The value should be set to the correct offset if that assumption does not hold.
- root_filename_offset: u32 = 0,
-
- pub const SourceSpan = struct {
- start_line: usize,
- end_line: usize,
- filename_offset: u32,
- };
-
- pub fn deinit(self: *SourceMappings, allocator: Allocator) void {
- self.files.deinit(allocator);
- self.mapping.deinit(allocator);
- }
-
- pub fn set(self: *SourceMappings, allocator: Allocator, line_num: usize, span: SourceSpan) !void {
- const ptr = try self.expandAndGet(allocator, line_num);
- ptr.* = span;
- }
-
- pub fn has(self: SourceMappings, line_num: usize) bool {
- return self.mapping.items.len >= line_num;
- }
-
- /// Note: `line_num` is 1-indexed
- pub fn get(self: SourceMappings, line_num: usize) SourceSpan {
- return self.mapping.items[line_num - 1];
- }
-
- pub fn getPtr(self: SourceMappings, line_num: usize) *SourceSpan {
- return &self.mapping.items[line_num - 1];
- }
-
- /// Expands the number of lines in the mapping to include the requested
- /// line number (if necessary) and returns a pointer to the value at that
- /// line number.
- ///
- /// Note: `line_num` is 1-indexed
- pub fn expandAndGet(self: *SourceMappings, allocator: Allocator, line_num: usize) !*SourceSpan {
- try self.mapping.resize(allocator, line_num);
- return &self.mapping.items[line_num - 1];
- }
-
- pub fn collapse(self: *SourceMappings, line_num: usize, num_following_lines_to_collapse: usize) void {
- std.debug.assert(num_following_lines_to_collapse > 0);
-
- var span_to_collapse_into = self.getPtr(line_num);
- const last_collapsed_span = self.get(line_num + num_following_lines_to_collapse);
- span_to_collapse_into.end_line = last_collapsed_span.end_line;
-
- const after_collapsed_start = line_num + num_following_lines_to_collapse;
- const new_num_lines = self.mapping.items.len - num_following_lines_to_collapse;
- std.mem.copyForwards(SourceSpan, self.mapping.items[line_num..new_num_lines], self.mapping.items[after_collapsed_start..]);
-
- self.mapping.items.len = new_num_lines;
- }
-
- /// Returns true if the line is from the main/root file (i.e. not a file that has been
- /// `#include`d).
- pub fn isRootFile(self: *SourceMappings, line_num: usize) bool {
- const line_mapping = self.get(line_num);
- if (line_mapping.filename_offset == self.root_filename_offset) return true;
- return false;
- }
-};
-
-test "SourceMappings collapse" {
- const allocator = std.testing.allocator;
-
- var mappings = SourceMappings{};
- defer mappings.deinit(allocator);
- const filename_offset = try mappings.files.put(allocator, "test.rc");
-
- try mappings.set(allocator, 1, .{ .start_line = 1, .end_line = 1, .filename_offset = filename_offset });
- try mappings.set(allocator, 2, .{ .start_line = 2, .end_line = 3, .filename_offset = filename_offset });
- try mappings.set(allocator, 3, .{ .start_line = 4, .end_line = 4, .filename_offset = filename_offset });
- try mappings.set(allocator, 4, .{ .start_line = 5, .end_line = 5, .filename_offset = filename_offset });
-
- mappings.collapse(1, 2);
-
- try std.testing.expectEqual(@as(usize, 2), mappings.mapping.items.len);
- try std.testing.expectEqual(@as(usize, 4), mappings.mapping.items[0].end_line);
- try std.testing.expectEqual(@as(usize, 5), mappings.mapping.items[1].end_line);
-}
-
-/// Same thing as StringTable in Zig's src/Wasm.zig
-pub const StringTable = struct {
- data: std.ArrayListUnmanaged(u8) = .{},
- map: std.HashMapUnmanaged(u32, void, std.hash_map.StringIndexContext, std.hash_map.default_max_load_percentage) = .{},
-
- pub fn deinit(self: *StringTable, allocator: Allocator) void {
- self.data.deinit(allocator);
- self.map.deinit(allocator);
- }
-
- pub fn put(self: *StringTable, allocator: Allocator, value: []const u8) !u32 {
- const result = try self.map.getOrPutContextAdapted(
- allocator,
- value,
- std.hash_map.StringIndexAdapter{ .bytes = &self.data },
- .{ .bytes = &self.data },
- );
- if (result.found_existing) {
- return result.key_ptr.*;
- }
-
- try self.data.ensureUnusedCapacity(allocator, value.len + 1);
- const offset: u32 = @intCast(self.data.items.len);
-
- self.data.appendSliceAssumeCapacity(value);
- self.data.appendAssumeCapacity(0);
-
- result.key_ptr.* = offset;
-
- return offset;
- }
-
- pub fn get(self: StringTable, offset: u32) []const u8 {
- std.debug.assert(offset < self.data.items.len);
- return std.mem.sliceTo(@as([*:0]const u8, @ptrCast(self.data.items.ptr + offset)), 0);
- }
-
- pub fn getOffset(self: *StringTable, value: []const u8) ?u32 {
- return self.map.getKeyAdapted(
- value,
- std.hash_map.StringIndexAdapter{ .bytes = &self.data },
- );
- }
-};
-
-const ExpectedSourceSpan = struct {
- start_line: usize,
- end_line: usize,
- filename: []const u8,
-};
-
-fn testParseAndRemoveLineCommands(
- expected: []const u8,
- comptime expected_spans: []const ExpectedSourceSpan,
- source: []const u8,
- options: ParseAndRemoveLineCommandsOptions,
-) !void {
- var results = try parseAndRemoveLineCommandsAlloc(std.testing.allocator, source, options);
- defer std.testing.allocator.free(results.result);
- defer results.mappings.deinit(std.testing.allocator);
-
- try std.testing.expectEqualStrings(expected, results.result);
-
- expectEqualMappings(expected_spans, results.mappings) catch |err| {
- std.debug.print("\nexpected mappings:\n", .{});
- for (expected_spans, 0..) |span, i| {
- const line_num = i + 1;
- std.debug.print("{}: {s}:{}-{}\n", .{ line_num, span.filename, span.start_line, span.end_line });
- }
- std.debug.print("\nactual mappings:\n", .{});
- for (results.mappings.mapping.items, 0..) |span, i| {
- const line_num = i + 1;
- const filename = results.mappings.files.get(span.filename_offset);
- std.debug.print("{}: {s}:{}-{}\n", .{ line_num, filename, span.start_line, span.end_line });
- }
- std.debug.print("\n", .{});
- return err;
- };
-}
-
-fn expectEqualMappings(expected_spans: []const ExpectedSourceSpan, mappings: SourceMappings) !void {
- try std.testing.expectEqual(expected_spans.len, mappings.mapping.items.len);
- for (expected_spans, 0..) |expected_span, i| {
- const line_num = i + 1;
- const span = mappings.get(line_num);
- const filename = mappings.files.get(span.filename_offset);
- try std.testing.expectEqual(expected_span.start_line, span.start_line);
- try std.testing.expectEqual(expected_span.end_line, span.end_line);
- try std.testing.expectEqualStrings(expected_span.filename, filename);
- }
-}
-
-test "basic" {
- try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{
- .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
- }, "#line 1 \"blah.rc\"", .{});
-}
-
-test "only removes line commands" {
- try testParseAndRemoveLineCommands(
- \\#pragma code_page(65001)
- , &[_]ExpectedSourceSpan{
- .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
- },
- \\#line 1 "blah.rc"
- \\#pragma code_page(65001)
- , .{});
-}
-
-test "whitespace and line endings" {
- try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{
- .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
- }, "#line \t 1 \t \"blah.rc\"\r\n", .{});
-}
-
-test "example" {
- try testParseAndRemoveLineCommands(
- \\
- \\included RCDATA {"hello"}
- , &[_]ExpectedSourceSpan{
- .{ .start_line = 1, .end_line = 1, .filename = "./included.rc" },
- .{ .start_line = 2, .end_line = 2, .filename = "./included.rc" },
- },
- \\#line 1 "rcdata.rc"
- \\#line 1 "<built-in>"
- \\#line 1 "<built-in>"
- \\#line 355 "<built-in>"
- \\#line 1 "<command line>"
- \\#line 1 "<built-in>"
- \\#line 1 "rcdata.rc"
- \\#line 1 "./header.h"
- \\
- \\
- \\2 RCDATA {"blah"}
- \\
- \\
- \\#line 1 "./included.rc"
- \\
- \\included RCDATA {"hello"}
- \\#line 7 "./header.h"
- \\#line 1 "rcdata.rc"
- , .{});
-}
-
-test "CRLF and other line endings" {
- try testParseAndRemoveLineCommands(
- "hello\r\n#pragma code_page(65001)\r\nworld",
- &[_]ExpectedSourceSpan{
- .{ .start_line = 1, .end_line = 1, .filename = "crlf.rc" },
- .{ .start_line = 2, .end_line = 2, .filename = "crlf.rc" },
- .{ .start_line = 3, .end_line = 3, .filename = "crlf.rc" },
- },
- "#line 1 \"crlf.rc\"\r\n#line 1 \"<built-in>\"\r#line 1 \"crlf.rc\"\n\rhello\r\n#pragma code_page(65001)\r\nworld\r\n",
- .{},
- );
-}
-
-test "no line commands" {
- try testParseAndRemoveLineCommands(
- \\1 RCDATA {"blah"}
- \\2 RCDATA {"blah"}
- , &[_]ExpectedSourceSpan{
- .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" },
- .{ .start_line = 2, .end_line = 2, .filename = "blah.rc" },
- },
- \\1 RCDATA {"blah"}
- \\2 RCDATA {"blah"}
- , .{ .initial_filename = "blah.rc" });
-}
-
-test "in place" {
- var mut_source = "#line 1 \"blah.rc\"".*;
- var result = try parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{});
- defer result.mappings.deinit(std.testing.allocator);
- try std.testing.expectEqualStrings("", result.result);
-}
diff --git a/src/resinator/utils.zig b/src/resinator/utils.zig
@@ -1,112 +0,0 @@
-const std = @import("std");
-const builtin = @import("builtin");
-
-/// Like std.io.FixedBufferStream but does no bounds checking
-pub const UncheckedSliceWriter = struct {
- const Self = @This();
-
- pos: usize = 0,
- slice: []u8,
-
- pub fn write(self: *Self, char: u8) void {
- self.slice[self.pos] = char;
- self.pos += 1;
- }
-
- pub fn writeSlice(self: *Self, slice: []const u8) void {
- for (slice) |c| {
- self.write(c);
- }
- }
-
- pub fn getWritten(self: Self) []u8 {
- return self.slice[0..self.pos];
- }
-};
-
-/// Cross-platform 'std.fs.Dir.openFile' wrapper that will always return IsDir if
-/// a directory is attempted to be opened.
-/// TODO: Remove once https://github.com/ziglang/zig/issues/5732 is addressed.
-pub fn openFileNotDir(cwd: std.fs.Dir, path: []const u8, flags: std.fs.File.OpenFlags) std.fs.File.OpenError!std.fs.File {
- const file = try cwd.openFile(path, flags);
- errdefer file.close();
- // https://github.com/ziglang/zig/issues/5732
- if (builtin.os.tag != .windows) {
- const stat = try file.stat();
-
- if (stat.kind == .directory)
- return error.IsDir;
- }
- return file;
-}
-
-/// Emulates the Windows implementation of `iswdigit`, but only returns true
-/// for the non-ASCII digits that `iswdigit` on Windows would return true for.
-pub fn isNonAsciiDigit(c: u21) bool {
- return switch (c) {
- '²',
- '³',
- '¹',
- '\u{660}'...'\u{669}',
- '\u{6F0}'...'\u{6F9}',
- '\u{7C0}'...'\u{7C9}',
- '\u{966}'...'\u{96F}',
- '\u{9E6}'...'\u{9EF}',
- '\u{A66}'...'\u{A6F}',
- '\u{AE6}'...'\u{AEF}',
- '\u{B66}'...'\u{B6F}',
- '\u{BE6}'...'\u{BEF}',
- '\u{C66}'...'\u{C6F}',
- '\u{CE6}'...'\u{CEF}',
- '\u{D66}'...'\u{D6F}',
- '\u{E50}'...'\u{E59}',
- '\u{ED0}'...'\u{ED9}',
- '\u{F20}'...'\u{F29}',
- '\u{1040}'...'\u{1049}',
- '\u{1090}'...'\u{1099}',
- '\u{17E0}'...'\u{17E9}',
- '\u{1810}'...'\u{1819}',
- '\u{1946}'...'\u{194F}',
- '\u{19D0}'...'\u{19D9}',
- '\u{1B50}'...'\u{1B59}',
- '\u{1BB0}'...'\u{1BB9}',
- '\u{1C40}'...'\u{1C49}',
- '\u{1C50}'...'\u{1C59}',
- '\u{A620}'...'\u{A629}',
- '\u{A8D0}'...'\u{A8D9}',
- '\u{A900}'...'\u{A909}',
- '\u{AA50}'...'\u{AA59}',
- '\u{FF10}'...'\u{FF19}',
- => true,
- else => false,
- };
-}
-
-/// Used for generic colored errors/warnings/notes, more context-specific error messages
-/// are handled elsewhere.
-pub fn renderErrorMessage(writer: anytype, config: std.io.tty.Config, msg_type: enum { err, warning, note }, comptime format: []const u8, args: anytype) !void {
- switch (msg_type) {
- .err => {
- try config.setColor(writer, .bold);
- try config.setColor(writer, .red);
- try writer.writeAll("error: ");
- },
- .warning => {
- try config.setColor(writer, .bold);
- try config.setColor(writer, .yellow);
- try writer.writeAll("warning: ");
- },
- .note => {
- try config.setColor(writer, .reset);
- try config.setColor(writer, .cyan);
- try writer.writeAll("note: ");
- },
- }
- try config.setColor(writer, .reset);
- if (msg_type == .err) {
- try config.setColor(writer, .bold);
- }
- try writer.print(format, args);
- try writer.writeByte('\n');
- try config.setColor(writer, .reset);
-}