zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

literals.zig (49670B) - Raw


      1 const std = @import("std");
      2 const code_pages = @import("code_pages.zig");
      3 const SupportedCodePage = code_pages.SupportedCodePage;
      4 const windows1252 = @import("windows1252.zig");
      5 const ErrorDetails = @import("errors.zig").ErrorDetails;
      6 const DiagnosticsContext = @import("errors.zig").DiagnosticsContext;
      7 const Token = @import("lex.zig").Token;
      8 
      9 /// rc is maximally liberal in terms of what it accepts as a number literal
     10 /// for data values. As long as it starts with a number or - or ~, that's good enough.
     11 pub fn isValidNumberDataLiteral(str: []const u8) bool {
     12     if (str.len == 0) return false;
     13     switch (str[0]) {
     14         '~', '-', '0'...'9' => return true,
     15         else => return false,
     16     }
     17 }
     18 
     19 pub const SourceBytes = struct {
     20     slice: []const u8,
     21     code_page: SupportedCodePage,
     22 };
     23 
     24 pub const StringType = enum { ascii, wide };
     25 
     26 /// Valid escapes:
     27 ///  "" -> "
     28 ///  \a, \A => 0x08 (not 0x07 like in C)
     29 ///  \n => 0x0A
     30 ///  \r => 0x0D
     31 ///  \t, \T => 0x09
     32 ///  \\ => \
     33 ///  \nnn => byte with numeric value given by nnn interpreted as octal
     34 ///          (wraps on overflow, number of digits can be 1-3 for ASCII strings
     35 ///          and 1-7 for wide strings)
     36 ///  \xhh => byte with numeric value given by hh interpreted as hex
     37 ///          (number of digits can be 0-2 for ASCII strings and 0-4 for
     38 ///          wide strings)
     39 ///  \<\r+> => \
     40 ///  \<[\r\n\t ]+> => <nothing>
     41 ///
     42 /// Special cases:
     43 ///  <\t> => 1-8 spaces, dependent on columns in the source rc file itself
     44 ///  <\r> => <nothing>
     45 ///  <\n+><\w+?\n?> => <space><\n>
     46 ///
     47 /// Special, especially weird case:
     48 ///  \"" => "
     49 /// NOTE: This leads to footguns because the preprocessor can start parsing things
     50 ///       out-of-sync with the RC compiler, expanding macros within string literals, etc.
     51 ///       This parse function handles this case the same as the Windows RC compiler, but
     52 ///       \" within a string literal is treated as an error by the lexer, so the relevant
     53 ///       branches should never actually be hit during this function.
     54 pub const IterativeStringParser = struct {
     55     source: []const u8,
     56     code_page: SupportedCodePage,
     57     /// The type of the string inferred by the prefix (L"" or "")
     58     /// This is what matters for things like the maximum digits in an
     59     /// escape sequence, whether or not invalid escape sequences are skipped, etc.
     60     declared_string_type: StringType,
     61     pending_codepoint: ?u21 = null,
     62     num_pending_spaces: u8 = 0,
     63     index: usize = 0,
     64     column: usize = 0,
     65     diagnostics: ?DiagnosticsContext = null,
     66     seen_tab: bool = false,
     67 
     68     const State = enum {
     69         normal,
     70         quote,
     71         newline,
     72         escaped,
     73         escaped_cr,
     74         escaped_newlines,
     75         escaped_octal,
     76         escaped_hex,
     77     };
     78 
     79     pub fn init(bytes: SourceBytes, options: StringParseOptions) IterativeStringParser {
     80         const declared_string_type: StringType = switch (bytes.slice[0]) {
     81             'L', 'l' => .wide,
     82             else => .ascii,
     83         };
     84         var source = bytes.slice[1 .. bytes.slice.len - 1]; // remove ""
     85         var column = options.start_column + 1; // for the removed "
     86         if (declared_string_type == .wide) {
     87             source = source[1..]; // remove L
     88             column += 1; // for the removed L
     89         }
     90         return .{
     91             .source = source,
     92             .code_page = bytes.code_page,
     93             .declared_string_type = declared_string_type,
     94             .column = column,
     95             .diagnostics = options.diagnostics,
     96         };
     97     }
     98 
     99     pub const ParsedCodepoint = struct {
    100         codepoint: u21,
    101         /// Note: If this is true, `codepoint` will have an effective maximum value
    102         /// of 0xFFFF, as `codepoint` is calculated using wrapping arithmetic on a u16.
    103         /// If the value needs to be truncated to a smaller integer (e.g. for ASCII string
    104         /// literals), then that must be done by the caller.
    105         from_escaped_integer: bool = false,
    106         /// Denotes that the codepoint is:
    107         /// - Escaped (has a \ in front of it), and
    108         /// - Has a value >= U+10000, meaning it would be encoded as a surrogate
    109         ///   pair in UTF-16, and
    110         /// - Is part of a wide string literal
    111         ///
    112         /// Normally in wide string literals, invalid escapes are omitted
    113         /// during parsing (the codepoints are not returned at all during
    114         /// the `next` call), but this is a special case in which the
    115         /// escape only applies to the high surrogate pair of the codepoint.
    116         ///
    117         /// TODO: Maybe just return the low surrogate codepoint by itself in this case.
    118         escaped_surrogate_pair: bool = false,
    119     };
    120 
    121     pub fn next(self: *IterativeStringParser) std.mem.Allocator.Error!?ParsedCodepoint {
    122         const result = try self.nextUnchecked();
    123         if (self.diagnostics != null and result != null and !result.?.from_escaped_integer) {
    124             switch (result.?.codepoint) {
    125                 0x0900, 0x0A00, 0x0A0D, 0x2000, 0x0D00 => {
    126                     const err: ErrorDetails.Error = if (result.?.codepoint == 0xD00)
    127                         .rc_would_miscompile_codepoint_skip
    128                     else
    129                         .rc_would_miscompile_codepoint_whitespace;
    130                     try self.diagnostics.?.diagnostics.append(ErrorDetails{
    131                         .err = err,
    132                         .type = .warning,
    133                         .code_page = self.code_page,
    134                         .token = self.diagnostics.?.token,
    135                         .extra = .{ .number = result.?.codepoint },
    136                     });
    137                 },
    138                 0xFFFE, 0xFFFF => {
    139                     try self.diagnostics.?.diagnostics.append(ErrorDetails{
    140                         .err = .rc_would_miscompile_codepoint_bom,
    141                         .type = .warning,
    142                         .code_page = self.code_page,
    143                         .token = self.diagnostics.?.token,
    144                         .extra = .{ .number = result.?.codepoint },
    145                     });
    146                     try self.diagnostics.?.diagnostics.append(ErrorDetails{
    147                         .err = .rc_would_miscompile_codepoint_bom,
    148                         .type = .note,
    149                         .code_page = self.code_page,
    150                         .token = self.diagnostics.?.token,
    151                         .print_source_line = false,
    152                         .extra = .{ .number = result.?.codepoint },
    153                     });
    154                 },
    155                 else => {},
    156             }
    157         }
    158         return result;
    159     }
    160 
    161     pub fn nextUnchecked(self: *IterativeStringParser) std.mem.Allocator.Error!?ParsedCodepoint {
    162         if (self.num_pending_spaces > 0) {
    163             // Ensure that we don't get into this predicament so we can ensure that
    164             // the order of processing any pending stuff doesn't matter
    165             std.debug.assert(self.pending_codepoint == null);
    166             self.num_pending_spaces -= 1;
    167             return .{ .codepoint = ' ' };
    168         }
    169         if (self.pending_codepoint) |pending_codepoint| {
    170             self.pending_codepoint = null;
    171             return .{ .codepoint = pending_codepoint };
    172         }
    173         if (self.index >= self.source.len) return null;
    174 
    175         var state: State = .normal;
    176         var string_escape_n: u16 = 0;
    177         var string_escape_i: u8 = 0;
    178         const max_octal_escape_digits: u8 = switch (self.declared_string_type) {
    179             .ascii => 3,
    180             .wide => 7,
    181         };
    182         const max_hex_escape_digits: u8 = switch (self.declared_string_type) {
    183             .ascii => 2,
    184             .wide => 4,
    185         };
    186 
    187         var backtrack: bool = undefined;
    188         while (self.code_page.codepointAt(self.index, self.source)) |codepoint| : ({
    189             if (!backtrack) self.index += codepoint.byte_len;
    190         }) {
    191             backtrack = false;
    192             const c = codepoint.value;
    193             defer {
    194                 if (!backtrack) {
    195                     if (c == '\t') {
    196                         self.column += columnsUntilTabStop(self.column, 8);
    197                     } else {
    198                         self.column += codepoint.byte_len;
    199                     }
    200                 }
    201             }
    202             switch (state) {
    203                 .normal => switch (c) {
    204                     '\\' => state = .escaped,
    205                     '"' => state = .quote,
    206                     '\r' => {},
    207                     '\n' => state = .newline,
    208                     '\t' => {
    209                         // Only warn about a tab getting converted to spaces once per string
    210                         if (self.diagnostics != null and !self.seen_tab) {
    211                             try self.diagnostics.?.diagnostics.append(ErrorDetails{
    212                                 .err = .tab_converted_to_spaces,
    213                                 .type = .warning,
    214                                 .code_page = self.code_page,
    215                                 .token = self.diagnostics.?.token,
    216                             });
    217                             try self.diagnostics.?.diagnostics.append(ErrorDetails{
    218                                 .err = .tab_converted_to_spaces,
    219                                 .type = .note,
    220                                 .code_page = self.code_page,
    221                                 .token = self.diagnostics.?.token,
    222                                 .print_source_line = false,
    223                             });
    224                             self.seen_tab = true;
    225                         }
    226                         const cols = columnsUntilTabStop(self.column, 8);
    227                         self.num_pending_spaces = @intCast(cols - 1);
    228                         self.index += codepoint.byte_len;
    229                         return .{ .codepoint = ' ' };
    230                     },
    231                     else => {
    232                         self.index += codepoint.byte_len;
    233                         return .{ .codepoint = c };
    234                     },
    235                 },
    236                 .quote => switch (c) {
    237                     '"' => {
    238                         // "" => "
    239                         self.index += codepoint.byte_len;
    240                         return .{ .codepoint = '"' };
    241                     },
    242                     else => unreachable, // this is a bug in the lexer
    243                 },
    244                 .newline => switch (c) {
    245                     '\r', ' ', '\t', '\n', '\x0b', '\x0c', '\xa0' => {},
    246                     else => {
    247                         // we intentionally avoid incrementing self.index
    248                         // to handle the current char in the next call,
    249                         // and we set backtrack so column count is handled correctly
    250                         backtrack = true;
    251 
    252                         // <space><newline>
    253                         self.pending_codepoint = '\n';
    254                         return .{ .codepoint = ' ' };
    255                     },
    256                 },
    257                 .escaped => switch (c) {
    258                     '\r' => state = .escaped_cr,
    259                     '\n' => state = .escaped_newlines,
    260                     '0'...'7' => {
    261                         string_escape_n = std.fmt.charToDigit(@intCast(c), 8) catch unreachable;
    262                         string_escape_i = 1;
    263                         state = .escaped_octal;
    264                     },
    265                     'x', 'X' => {
    266                         string_escape_n = 0;
    267                         string_escape_i = 0;
    268                         state = .escaped_hex;
    269                     },
    270                     else => {
    271                         switch (c) {
    272                             'a', 'A' => {
    273                                 self.index += codepoint.byte_len;
    274                                 // might be a bug in RC, but matches its behavior
    275                                 return .{ .codepoint = '\x08' };
    276                             },
    277                             'n' => {
    278                                 self.index += codepoint.byte_len;
    279                                 return .{ .codepoint = '\n' };
    280                             },
    281                             'r' => {
    282                                 self.index += codepoint.byte_len;
    283                                 return .{ .codepoint = '\r' };
    284                             },
    285                             't', 'T' => {
    286                                 self.index += codepoint.byte_len;
    287                                 return .{ .codepoint = '\t' };
    288                             },
    289                             '\\' => {
    290                                 self.index += codepoint.byte_len;
    291                                 return .{ .codepoint = '\\' };
    292                             },
    293                             '"' => {
    294                                 // \" is a special case that doesn't get the \ included,
    295                                 backtrack = true;
    296                             },
    297                             else => switch (self.declared_string_type) {
    298                                 .wide => {
    299                                     // All invalid escape sequences are skipped in wide strings,
    300                                     // but there is a special case around \<tab> where the \
    301                                     // is skipped but the tab character is processed.
    302                                     // It's actually a bit weirder than that, though, since
    303                                     // the preprocessor is the one that does the <tab> -> spaces
    304                                     // conversion, so it goes something like this:
    305                                     //
    306                                     // Before preprocessing: L"\<tab>"
    307                                     // After preprocessing:  L"\     "
    308                                     //
    309                                     // So the parser only sees an escaped space character followed
    310                                     // by some other number of spaces >= 0.
    311                                     //
    312                                     // However, our preprocessor keeps tab characters intact, so we emulate
    313                                     // the above behavior by skipping the \ and then outputting one less
    314                                     // space than normal for the <tab> character.
    315                                     if (c == '\t') {
    316                                         // Only warn about a tab getting converted to spaces once per string
    317                                         if (self.diagnostics != null and !self.seen_tab) {
    318                                             try self.diagnostics.?.diagnostics.append(ErrorDetails{
    319                                                 .err = .tab_converted_to_spaces,
    320                                                 .type = .warning,
    321                                                 .code_page = self.code_page,
    322                                                 .token = self.diagnostics.?.token,
    323                                             });
    324                                             try self.diagnostics.?.diagnostics.append(ErrorDetails{
    325                                                 .err = .tab_converted_to_spaces,
    326                                                 .type = .note,
    327                                                 .code_page = self.code_page,
    328                                                 .token = self.diagnostics.?.token,
    329                                                 .print_source_line = false,
    330                                             });
    331                                             self.seen_tab = true;
    332                                         }
    333 
    334                                         const cols = columnsUntilTabStop(self.column, 8);
    335                                         // If the tab character would only be converted to a single space,
    336                                         // then we can just skip both the \ and the <tab> and move on.
    337                                         if (cols > 1) {
    338                                             self.num_pending_spaces = @intCast(cols - 2);
    339                                             self.index += codepoint.byte_len;
    340                                             return .{ .codepoint = ' ' };
    341                                         }
    342                                     }
    343                                     // There's a second special case when the codepoint would be encoded
    344                                     // as a surrogate pair in UTF-16, as the escape 'applies' to the
    345                                     // high surrogate pair only in this instance. This is a side-effect
    346                                     // of the Win32 RC compiler preprocessor outputting UTF-16 and the
    347                                     // compiler itself seemingly working on code units instead of code points
    348                                     // in this particular instance.
    349                                     //
    350                                     // We emulate this behavior by emitting the codepoint, but with a marker
    351                                     // that indicates that it needs to be handled specially.
    352                                     if (c >= 0x10000 and c != code_pages.Codepoint.invalid) {
    353                                         self.index += codepoint.byte_len;
    354                                         return .{ .codepoint = c, .escaped_surrogate_pair = true };
    355                                     }
    356                                 },
    357                                 .ascii => {
    358                                     // we intentionally avoid incrementing self.index
    359                                     // to handle the current char in the next call,
    360                                     // and we set backtrack so column count is handled correctly
    361                                     backtrack = true;
    362                                     return .{ .codepoint = '\\' };
    363                                 },
    364                             },
    365                         }
    366                         state = .normal;
    367                     },
    368                 },
    369                 .escaped_cr => switch (c) {
    370                     '\r' => {},
    371                     '\n' => state = .escaped_newlines,
    372                     else => {
    373                         // we intentionally avoid incrementing self.index
    374                         // to handle the current char in the next call,
    375                         // and we set backtrack so column count is handled correctly
    376                         backtrack = true;
    377                         return .{ .codepoint = '\\' };
    378                     },
    379                 },
    380                 .escaped_newlines => switch (c) {
    381                     '\r', '\n', '\t', ' ', '\x0b', '\x0c', '\xa0' => {},
    382                     else => {
    383                         // backtrack so that we handle the current char properly
    384                         backtrack = true;
    385                         state = .normal;
    386                     },
    387                 },
    388                 .escaped_octal => switch (c) {
    389                     '0'...'7' => {
    390                         // Note: We use wrapping arithmetic on a u16 here since there's been no observed
    391                         // string parsing scenario where an escaped integer with a value >= the u16
    392                         // max is interpreted as anything but the truncated u16 value.
    393                         string_escape_n *%= 8;
    394                         string_escape_n +%= std.fmt.charToDigit(@intCast(c), 8) catch unreachable;
    395                         string_escape_i += 1;
    396                         if (string_escape_i == max_octal_escape_digits) {
    397                             self.index += codepoint.byte_len;
    398                             return .{ .codepoint = string_escape_n, .from_escaped_integer = true };
    399                         }
    400                     },
    401                     else => {
    402                         // we intentionally avoid incrementing self.index
    403                         // to handle the current char in the next call,
    404                         // and we set backtrack so column count is handled correctly
    405                         backtrack = true;
    406 
    407                         // write out whatever byte we have parsed so far
    408                         return .{ .codepoint = string_escape_n, .from_escaped_integer = true };
    409                     },
    410                 },
    411                 .escaped_hex => switch (c) {
    412                     '0'...'9', 'a'...'f', 'A'...'F' => {
    413                         string_escape_n *= 16;
    414                         string_escape_n += std.fmt.charToDigit(@intCast(c), 16) catch unreachable;
    415                         string_escape_i += 1;
    416                         if (string_escape_i == max_hex_escape_digits) {
    417                             self.index += codepoint.byte_len;
    418                             return .{ .codepoint = string_escape_n, .from_escaped_integer = true };
    419                         }
    420                     },
    421                     else => {
    422                         // we intentionally avoid incrementing self.index
    423                         // to handle the current char in the next call,
    424                         // and we set backtrack so column count is handled correctly
    425                         backtrack = true;
    426 
    427                         // write out whatever byte we have parsed so far
    428                         // (even with 0 actual digits, \x alone parses to 0)
    429                         const escaped_value = string_escape_n;
    430                         return .{ .codepoint = escaped_value, .from_escaped_integer = true };
    431                     },
    432                 },
    433             }
    434         }
    435 
    436         switch (state) {
    437             .normal, .escaped_newlines => {},
    438             .newline => {
    439                 // <space><newline>
    440                 self.pending_codepoint = '\n';
    441                 return .{ .codepoint = ' ' };
    442             },
    443             .escaped, .escaped_cr => return .{ .codepoint = '\\' },
    444             .escaped_octal, .escaped_hex => {
    445                 return .{ .codepoint = string_escape_n, .from_escaped_integer = true };
    446             },
    447             .quote => unreachable, // this is a bug in the lexer
    448         }
    449 
    450         return null;
    451     }
    452 };
    453 
    454 pub const StringParseOptions = struct {
    455     start_column: usize = 0,
    456     diagnostics: ?DiagnosticsContext = null,
    457     output_code_page: SupportedCodePage,
    458 };
    459 
    460 pub fn parseQuotedString(
    461     comptime literal_type: StringType,
    462     allocator: std.mem.Allocator,
    463     bytes: SourceBytes,
    464     options: StringParseOptions,
    465 ) !(switch (literal_type) {
    466     .ascii => []u8,
    467     .wide => [:0]u16,
    468 }) {
    469     const T = if (literal_type == .ascii) u8 else u16;
    470     std.debug.assert(bytes.slice.len >= 2); // must at least have 2 double quote chars
    471 
    472     var buf = try std.array_list.Managed(T).initCapacity(allocator, bytes.slice.len);
    473     errdefer buf.deinit();
    474 
    475     var iterative_parser = IterativeStringParser.init(bytes, options);
    476 
    477     while (try iterative_parser.next()) |parsed| {
    478         const c = parsed.codepoint;
    479         switch (literal_type) {
    480             .ascii => switch (options.output_code_page) {
    481                 .windows1252 => {
    482                     if (parsed.from_escaped_integer) {
    483                         try buf.append(@truncate(c));
    484                     } else if (windows1252.bestFitFromCodepoint(c)) |best_fit| {
    485                         try buf.append(best_fit);
    486                     } else if (c < 0x10000 or c == code_pages.Codepoint.invalid) {
    487                         try buf.append('?');
    488                     } else {
    489                         try buf.appendSlice("??");
    490                     }
    491                 },
    492                 .utf8 => {
    493                     var codepoint_to_encode = c;
    494                     if (parsed.from_escaped_integer) {
    495                         codepoint_to_encode = @as(T, @truncate(c));
    496                     }
    497                     const escaped_integer_outside_ascii_range = parsed.from_escaped_integer and codepoint_to_encode > 0x7F;
    498                     if (escaped_integer_outside_ascii_range or c == code_pages.Codepoint.invalid) {
    499                         codepoint_to_encode = '�';
    500                     }
    501                     var utf8_buf: [4]u8 = undefined;
    502                     const utf8_len = std.unicode.utf8Encode(codepoint_to_encode, &utf8_buf) catch unreachable;
    503                     try buf.appendSlice(utf8_buf[0..utf8_len]);
    504                 },
    505             },
    506             .wide => {
    507                 // Parsing any string type as a wide string is handled separately, see parseQuotedStringAsWideString
    508                 std.debug.assert(iterative_parser.declared_string_type == .wide);
    509                 if (parsed.from_escaped_integer) {
    510                     try buf.append(std.mem.nativeToLittle(u16, @truncate(c)));
    511                 } else if (c == code_pages.Codepoint.invalid) {
    512                     try buf.append(std.mem.nativeToLittle(u16, '�'));
    513                 } else if (c < 0x10000) {
    514                     const short: u16 = @intCast(c);
    515                     try buf.append(std.mem.nativeToLittle(u16, short));
    516                 } else {
    517                     if (!parsed.escaped_surrogate_pair) {
    518                         const high = @as(u16, @intCast((c - 0x10000) >> 10)) + 0xD800;
    519                         try buf.append(std.mem.nativeToLittle(u16, high));
    520                     }
    521                     const low = @as(u16, @intCast(c & 0x3FF)) + 0xDC00;
    522                     try buf.append(std.mem.nativeToLittle(u16, low));
    523                 }
    524             },
    525         }
    526     }
    527 
    528     if (literal_type == .wide) {
    529         return buf.toOwnedSliceSentinel(0);
    530     } else {
    531         return buf.toOwnedSlice();
    532     }
    533 }
    534 
    535 pub fn parseQuotedAsciiString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![]u8 {
    536     std.debug.assert(bytes.slice.len >= 2); // ""
    537     return parseQuotedString(.ascii, allocator, bytes, options);
    538 }
    539 
    540 pub fn parseQuotedWideString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![:0]u16 {
    541     std.debug.assert(bytes.slice.len >= 3); // L""
    542     return parseQuotedString(.wide, allocator, bytes, options);
    543 }
    544 
    545 /// Parses any string type into a wide string.
    546 /// If the string is declared as a wide string (L""), then it is handled normally.
    547 /// Otherwise, things are fairly normal with the exception of escaped integers.
    548 /// Escaped integers are handled by:
    549 /// - Truncating the escape to a u8
    550 /// - Reinterpeting the u8 as a byte from the *output* code page
    551 /// - Outputting the codepoint that corresponds to the interpreted byte, or � if no such
    552 ///   interpretation is possible
    553 /// For example, if the code page is UTF-8, then while \x80 is a valid start byte, it's
    554 /// interpreted as a single byte, so it ends up being seen as invalid and � is outputted.
    555 /// If the code page is Windows-1252, then \x80 is interpreted to be € which has the
    556 /// codepoint U+20AC, so the UTF-16 encoding of U+20AC is outputted.
    557 pub fn parseQuotedStringAsWideString(allocator: std.mem.Allocator, bytes: SourceBytes, options: StringParseOptions) ![:0]u16 {
    558     std.debug.assert(bytes.slice.len >= 2); // ""
    559 
    560     if (bytes.slice[0] == 'l' or bytes.slice[0] == 'L') {
    561         return parseQuotedWideString(allocator, bytes, options);
    562     }
    563 
    564     // Note: We're only handling the case of parsing an ASCII string into a wide string from here on out.
    565     // TODO: The logic below is similar to that in AcceleratorKeyCodepointTranslator, might be worth merging the two
    566 
    567     var buf = try std.array_list.Managed(u16).initCapacity(allocator, bytes.slice.len);
    568     errdefer buf.deinit();
    569 
    570     var iterative_parser = IterativeStringParser.init(bytes, options);
    571 
    572     while (try iterative_parser.next()) |parsed| {
    573         const c = parsed.codepoint;
    574         if (parsed.from_escaped_integer) {
    575             std.debug.assert(c != code_pages.Codepoint.invalid);
    576             const byte_to_interpret: u8 = @truncate(c);
    577             const code_unit_to_encode: u16 = switch (options.output_code_page) {
    578                 .windows1252 => windows1252.toCodepoint(byte_to_interpret),
    579                 .utf8 => if (byte_to_interpret > 0x7F) '�' else byte_to_interpret,
    580             };
    581             try buf.append(std.mem.nativeToLittle(u16, code_unit_to_encode));
    582         } else if (c == code_pages.Codepoint.invalid) {
    583             try buf.append(std.mem.nativeToLittle(u16, '�'));
    584         } else if (c < 0x10000) {
    585             const short: u16 = @intCast(c);
    586             try buf.append(std.mem.nativeToLittle(u16, short));
    587         } else {
    588             if (!parsed.escaped_surrogate_pair) {
    589                 const high = @as(u16, @intCast((c - 0x10000) >> 10)) + 0xD800;
    590                 try buf.append(std.mem.nativeToLittle(u16, high));
    591             }
    592             const low = @as(u16, @intCast(c & 0x3FF)) + 0xDC00;
    593             try buf.append(std.mem.nativeToLittle(u16, low));
    594         }
    595     }
    596 
    597     return buf.toOwnedSliceSentinel(0);
    598 }
    599 
    600 test "parse quoted ascii string" {
    601     var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
    602     defer arena_allocator.deinit();
    603     const arena = arena_allocator.allocator();
    604 
    605     try std.testing.expectEqualSlices(u8, "hello", try parseQuotedAsciiString(arena, .{
    606         .slice =
    607         \\"hello"
    608         ,
    609         .code_page = .windows1252,
    610     }, .{
    611         .output_code_page = .windows1252,
    612     }));
    613     // hex with 0 digits
    614     try std.testing.expectEqualSlices(u8, "\x00", try parseQuotedAsciiString(arena, .{
    615         .slice =
    616         \\"\x"
    617         ,
    618         .code_page = .windows1252,
    619     }, .{
    620         .output_code_page = .windows1252,
    621     }));
    622     // hex max of 2 digits
    623     try std.testing.expectEqualSlices(u8, "\xFFf", try parseQuotedAsciiString(arena, .{
    624         .slice =
    625         \\"\XfFf"
    626         ,
    627         .code_page = .windows1252,
    628     }, .{
    629         .output_code_page = .windows1252,
    630     }));
    631     // octal with invalid octal digit
    632     try std.testing.expectEqualSlices(u8, "\x019", try parseQuotedAsciiString(arena, .{
    633         .slice =
    634         \\"\19"
    635         ,
    636         .code_page = .windows1252,
    637     }, .{
    638         .output_code_page = .windows1252,
    639     }));
    640     // escaped quotes
    641     try std.testing.expectEqualSlices(u8, " \" ", try parseQuotedAsciiString(arena, .{
    642         .slice =
    643         \\" "" "
    644         ,
    645         .code_page = .windows1252,
    646     }, .{
    647         .output_code_page = .windows1252,
    648     }));
    649     // backslash right before escaped quotes
    650     try std.testing.expectEqualSlices(u8, "\"", try parseQuotedAsciiString(arena, .{
    651         .slice =
    652         \\"\"""
    653         ,
    654         .code_page = .windows1252,
    655     }, .{
    656         .output_code_page = .windows1252,
    657     }));
    658     // octal overflow
    659     try std.testing.expectEqualSlices(u8, "\x01", try parseQuotedAsciiString(arena, .{
    660         .slice =
    661         \\"\401"
    662         ,
    663         .code_page = .windows1252,
    664     }, .{
    665         .output_code_page = .windows1252,
    666     }));
    667     // escapes
    668     try std.testing.expectEqualSlices(u8, "\x08\n\r\t\\", try parseQuotedAsciiString(arena, .{
    669         .slice =
    670         \\"\a\n\r\t\\"
    671         ,
    672         .code_page = .windows1252,
    673     }, .{
    674         .output_code_page = .windows1252,
    675     }));
    676     // uppercase escapes
    677     try std.testing.expectEqualSlices(u8, "\x08\\N\\R\t\\", try parseQuotedAsciiString(arena, .{
    678         .slice =
    679         \\"\A\N\R\T\\"
    680         ,
    681         .code_page = .windows1252,
    682     }, .{
    683         .output_code_page = .windows1252,
    684     }));
    685     // backslash on its own
    686     try std.testing.expectEqualSlices(u8, "\\", try parseQuotedAsciiString(arena, .{
    687         .slice =
    688         \\"\"
    689         ,
    690         .code_page = .windows1252,
    691     }, .{
    692         .output_code_page = .windows1252,
    693     }));
    694     // unrecognized escapes
    695     try std.testing.expectEqualSlices(u8, "\\b", try parseQuotedAsciiString(arena, .{
    696         .slice =
    697         \\"\b"
    698         ,
    699         .code_page = .windows1252,
    700     }, .{
    701         .output_code_page = .windows1252,
    702     }));
    703     // escaped carriage returns
    704     try std.testing.expectEqualSlices(u8, "\\", try parseQuotedAsciiString(
    705         arena,
    706         .{ .slice = "\"\\\r\r\r\r\r\"", .code_page = .windows1252 },
    707         .{ .output_code_page = .windows1252 },
    708     ));
    709     // escaped newlines
    710     try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
    711         arena,
    712         .{ .slice = "\"\\\n\n\n\n\n\"", .code_page = .windows1252 },
    713         .{ .output_code_page = .windows1252 },
    714     ));
    715     // escaped CRLF pairs
    716     try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
    717         arena,
    718         .{ .slice = "\"\\\r\n\r\n\r\n\r\n\r\n\"", .code_page = .windows1252 },
    719         .{ .output_code_page = .windows1252 },
    720     ));
    721     // escaped newlines with other whitespace
    722     try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
    723         arena,
    724         .{ .slice = "\"\\\n    \t\r\n \r\t\n  \t\"", .code_page = .windows1252 },
    725         .{ .output_code_page = .windows1252 },
    726     ));
    727     // literal tab characters get converted to spaces (dependent on source file columns)
    728     try std.testing.expectEqualSlices(u8, "       ", try parseQuotedAsciiString(
    729         arena,
    730         .{ .slice = "\"\t\"", .code_page = .windows1252 },
    731         .{ .output_code_page = .windows1252 },
    732     ));
    733     try std.testing.expectEqualSlices(u8, "abc    ", try parseQuotedAsciiString(
    734         arena,
    735         .{ .slice = "\"abc\t\"", .code_page = .windows1252 },
    736         .{ .output_code_page = .windows1252 },
    737     ));
    738     try std.testing.expectEqualSlices(u8, "abcdefg        ", try parseQuotedAsciiString(
    739         arena,
    740         .{ .slice = "\"abcdefg\t\"", .code_page = .windows1252 },
    741         .{ .output_code_page = .windows1252 },
    742     ));
    743     try std.testing.expectEqualSlices(u8, "\\      ", try parseQuotedAsciiString(
    744         arena,
    745         .{ .slice = "\"\\\t\"", .code_page = .windows1252 },
    746         .{ .output_code_page = .windows1252 },
    747     ));
    748     // literal CR's get dropped
    749     try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
    750         arena,
    751         .{ .slice = "\"\r\r\r\r\r\"", .code_page = .windows1252 },
    752         .{ .output_code_page = .windows1252 },
    753     ));
    754     // contiguous newlines and whitespace get collapsed to <space><newline>
    755     try std.testing.expectEqualSlices(u8, " \n", try parseQuotedAsciiString(
    756         arena,
    757         .{ .slice = "\"\n\r\r  \r\n \t  \"", .code_page = .windows1252 },
    758         .{ .output_code_page = .windows1252 },
    759     ));
    760 }
    761 
    762 test "parse quoted ascii string with utf8 code page" {
    763     var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
    764     defer arena_allocator.deinit();
    765     const arena = arena_allocator.allocator();
    766 
    767     try std.testing.expectEqualSlices(u8, "", try parseQuotedAsciiString(
    768         arena,
    769         .{ .slice = "\"\"", .code_page = .utf8 },
    770         .{ .output_code_page = .windows1252 },
    771     ));
    772     // Codepoints that don't have a Windows-1252 representation get converted to ?
    773     try std.testing.expectEqualSlices(u8, "?????????", try parseQuotedAsciiString(
    774         arena,
    775         .{ .slice = "\"кириллица\"", .code_page = .utf8 },
    776         .{ .output_code_page = .windows1252 },
    777     ));
    778     // Codepoints that have a best fit mapping get converted accordingly,
    779     // these are box drawing codepoints
    780     try std.testing.expectEqualSlices(u8, "\x2b\x2d\x2b", try parseQuotedAsciiString(
    781         arena,
    782         .{ .slice = "\"┌─┐\"", .code_page = .utf8 },
    783         .{ .output_code_page = .windows1252 },
    784     ));
    785     // Invalid UTF-8 gets converted to ? depending on well-formedness
    786     try std.testing.expectEqualSlices(u8, "????", try parseQuotedAsciiString(
    787         arena,
    788         .{ .slice = "\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 },
    789         .{ .output_code_page = .windows1252 },
    790     ));
    791     // Codepoints that would require a UTF-16 surrogate pair get converted to ??
    792     try std.testing.expectEqualSlices(u8, "??", try parseQuotedAsciiString(
    793         arena,
    794         .{ .slice = "\"\xF2\xAF\xBA\xB4\"", .code_page = .utf8 },
    795         .{ .output_code_page = .windows1252 },
    796     ));
    797 
    798     // Output code page changes how invalid UTF-8 gets converted, since it
    799     // now encodes the result as UTF-8 so it can write replacement characters.
    800     try std.testing.expectEqualSlices(u8, "����", try parseQuotedAsciiString(
    801         arena,
    802         .{ .slice = "\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 },
    803         .{ .output_code_page = .utf8 },
    804     ));
    805     try std.testing.expectEqualSlices(u8, "\xF2\xAF\xBA\xB4", try parseQuotedAsciiString(
    806         arena,
    807         .{ .slice = "\"\xF2\xAF\xBA\xB4\"", .code_page = .utf8 },
    808         .{ .output_code_page = .utf8 },
    809     ));
    810 
    811     // This used to cause integer overflow when reconsuming the 4-byte long codepoint
    812     // after the escaped CRLF pair.
    813     try std.testing.expectEqualSlices(u8, "\u{10348}", try parseQuotedAsciiString(
    814         arena,
    815         .{ .slice = "\"\\\r\n\u{10348}\"", .code_page = .utf8 },
    816         .{ .output_code_page = .utf8 },
    817     ));
    818 }
    819 
    820 test "parse quoted string with different input/output code pages" {
    821     var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
    822     defer arena_allocator.deinit();
    823     const arena = arena_allocator.allocator();
    824 
    825     try std.testing.expectEqualSlices(u8, "€���\x60\x7F", try parseQuotedAsciiString(
    826         arena,
    827         .{ .slice = "\"\x80\\x8a\\600\\612\\540\\577\"", .code_page = .windows1252 },
    828         .{ .output_code_page = .utf8 },
    829     ));
    830 }
    831 
    832 test "parse quoted wide string" {
    833     var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
    834     defer arena_allocator.deinit();
    835     const arena = arena_allocator.allocator();
    836 
    837     try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("hello"), try parseQuotedWideString(arena, .{
    838         .slice =
    839         \\L"hello"
    840         ,
    841         .code_page = .windows1252,
    842     }, .{
    843         .output_code_page = .windows1252,
    844     }));
    845     // hex with 0 digits
    846     try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{0x0}, try parseQuotedWideString(arena, .{
    847         .slice =
    848         \\L"\x"
    849         ,
    850         .code_page = .windows1252,
    851     }, .{
    852         .output_code_page = .windows1252,
    853     }));
    854     // hex max of 4 digits
    855     try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ std.mem.nativeToLittle(u16, 0xFFFF), std.mem.nativeToLittle(u16, 'f') }, try parseQuotedWideString(arena, .{
    856         .slice =
    857         \\L"\XfFfFf"
    858         ,
    859         .code_page = .windows1252,
    860     }, .{
    861         .output_code_page = .windows1252,
    862     }));
    863     // octal max of 7 digits
    864     try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ std.mem.nativeToLittle(u16, 0x9493), std.mem.nativeToLittle(u16, '3'), std.mem.nativeToLittle(u16, '3') }, try parseQuotedWideString(arena, .{
    865         .slice =
    866         \\L"\111222333"
    867         ,
    868         .code_page = .windows1252,
    869     }, .{
    870         .output_code_page = .windows1252,
    871     }));
    872     // octal overflow
    873     try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{std.mem.nativeToLittle(u16, 0xFF01)}, try parseQuotedWideString(arena, .{
    874         .slice =
    875         \\L"\777401"
    876         ,
    877         .code_page = .windows1252,
    878     }, .{
    879         .output_code_page = .windows1252,
    880     }));
    881     // literal tab characters get converted to spaces (dependent on source file columns)
    882     try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("abcdefg       "), try parseQuotedWideString(
    883         arena,
    884         .{ .slice = "L\"abcdefg\t\"", .code_page = .windows1252 },
    885         .{ .output_code_page = .windows1252 },
    886     ));
    887     // Windows-1252 conversion
    888     try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("ðð€€€"), try parseQuotedWideString(
    889         arena,
    890         .{ .slice = "L\"\xf0\xf0\x80\x80\x80\"", .code_page = .windows1252 },
    891         .{ .output_code_page = .windows1252 },
    892     ));
    893     // Invalid escape sequences are skipped
    894     try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral(""), try parseQuotedWideString(
    895         arena,
    896         .{ .slice = "L\"\\H\"", .code_page = .windows1252 },
    897         .{ .output_code_page = .windows1252 },
    898     ));
    899 }
    900 
    901 test "parse quoted wide string with utf8 code page" {
    902     var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
    903     defer arena_allocator.deinit();
    904     const arena = arena_allocator.allocator();
    905 
    906     try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{}, try parseQuotedWideString(
    907         arena,
    908         .{ .slice = "L\"\"", .code_page = .utf8 },
    909         .{ .output_code_page = .windows1252 },
    910     ));
    911     try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("кириллица"), try parseQuotedWideString(
    912         arena,
    913         .{ .slice = "L\"кириллица\"", .code_page = .utf8 },
    914         .{ .output_code_page = .windows1252 },
    915     ));
    916     // Invalid UTF-8 gets converted to � depending on well-formedness
    917     try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("����"), try parseQuotedWideString(
    918         arena,
    919         .{ .slice = "L\"\xf0\xf0\x80\x80\x80\"", .code_page = .utf8 },
    920         .{ .output_code_page = .windows1252 },
    921     ));
    922 }
    923 
    924 test "parse quoted ascii string as wide string" {
    925     var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
    926     defer arena_allocator.deinit();
    927     const arena = arena_allocator.allocator();
    928 
    929     try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("кириллица"), try parseQuotedStringAsWideString(
    930         arena,
    931         .{ .slice = "\"кириллица\"", .code_page = .utf8 },
    932         .{ .output_code_page = .windows1252 },
    933     ));
    934     // Whether or not invalid escapes are skipped is still determined by the L prefix
    935     try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral("\\H"), try parseQuotedStringAsWideString(
    936         arena,
    937         .{ .slice = "\"\\H\"", .code_page = .windows1252 },
    938         .{ .output_code_page = .windows1252 },
    939     ));
    940     try std.testing.expectEqualSentinel(u16, 0, std.unicode.utf8ToUtf16LeStringLiteral(""), try parseQuotedStringAsWideString(
    941         arena,
    942         .{ .slice = "L\"\\H\"", .code_page = .windows1252 },
    943         .{ .output_code_page = .windows1252 },
    944     ));
    945     // Maximum escape sequence value is also determined by the L prefix
    946     try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{ std.mem.nativeToLittle(u16, 0x12), std.mem.nativeToLittle(u16, '3'), std.mem.nativeToLittle(u16, '4') }, try parseQuotedStringAsWideString(
    947         arena,
    948         .{ .slice = "\"\\x1234\"", .code_page = .windows1252 },
    949         .{ .output_code_page = .windows1252 },
    950     ));
    951     try std.testing.expectEqualSentinel(u16, 0, &[_:0]u16{std.mem.nativeToLittle(u16, 0x1234)}, try parseQuotedStringAsWideString(
    952         arena,
    953         .{ .slice = "L\"\\x1234\"", .code_page = .windows1252 },
    954         .{ .output_code_page = .windows1252 },
    955     ));
    956 }
    957 
    958 pub fn columnsUntilTabStop(column: usize, tab_columns: usize) usize {
    959     // 0 => 8, 1 => 7, 2 => 6, 3 => 5, 4 => 4
    960     // 5 => 3, 6 => 2, 7 => 1, 8 => 8
    961     return tab_columns - (column % tab_columns);
    962 }
    963 
    964 pub fn columnWidth(cur_column: usize, c: u8, tab_columns: usize) usize {
    965     return switch (c) {
    966         '\t' => columnsUntilTabStop(cur_column, tab_columns),
    967         else => 1,
    968     };
    969 }
    970 
    971 pub const Number = struct {
    972     value: u32,
    973     is_long: bool = false,
    974 
    975     pub fn asWord(self: Number) u16 {
    976         return @truncate(self.value);
    977     }
    978 
    979     pub fn evaluateOperator(lhs: Number, operator_char: u8, rhs: Number) Number {
    980         const result = switch (operator_char) {
    981             '-' => lhs.value -% rhs.value,
    982             '+' => lhs.value +% rhs.value,
    983             '|' => lhs.value | rhs.value,
    984             '&' => lhs.value & rhs.value,
    985             else => unreachable, // invalid operator, this would be a lexer/parser bug
    986         };
    987         return .{
    988             .value = result,
    989             .is_long = lhs.is_long or rhs.is_long,
    990         };
    991     }
    992 };
    993 
    994 /// Assumes that number literals normally rejected by RC's preprocessor
    995 /// are similarly rejected before being parsed.
    996 ///
    997 /// Relevant RC preprocessor errors:
    998 ///  RC2021: expected exponent value, not '<digit>'
    999 ///   example that is rejected: 1e1
   1000 ///   example that is accepted: 1ea
   1001 ///   (this function will parse the two examples above the same)
   1002 pub fn parseNumberLiteral(bytes: SourceBytes) Number {
   1003     std.debug.assert(bytes.slice.len > 0);
   1004     var result = Number{ .value = 0, .is_long = false };
   1005     var radix: u8 = 10;
   1006     var buf = bytes.slice;
   1007 
   1008     const Prefix = enum { none, minus, complement };
   1009     var prefix: Prefix = .none;
   1010     switch (buf[0]) {
   1011         '-' => {
   1012             prefix = .minus;
   1013             buf = buf[1..];
   1014         },
   1015         '~' => {
   1016             prefix = .complement;
   1017             buf = buf[1..];
   1018         },
   1019         else => {},
   1020     }
   1021 
   1022     if (buf.len > 2 and buf[0] == '0') {
   1023         switch (buf[1]) {
   1024             'o' => { // octal radix prefix is case-sensitive
   1025                 radix = 8;
   1026                 buf = buf[2..];
   1027             },
   1028             'x', 'X' => {
   1029                 radix = 16;
   1030                 buf = buf[2..];
   1031             },
   1032             else => {},
   1033         }
   1034     }
   1035 
   1036     var i: usize = 0;
   1037     while (bytes.code_page.codepointAt(i, buf)) |codepoint| : (i += codepoint.byte_len) {
   1038         const c = codepoint.value;
   1039         if (c == 'L' or c == 'l') {
   1040             result.is_long = true;
   1041             break;
   1042         }
   1043         const digit = switch (c) {
   1044             // On invalid digit for the radix, just stop parsing but don't fail
   1045             0x00...0x7F => std.fmt.charToDigit(@intCast(c), radix) catch break,
   1046             else => break,
   1047         };
   1048 
   1049         if (result.value != 0) {
   1050             result.value *%= radix;
   1051         }
   1052         result.value +%= digit;
   1053     }
   1054 
   1055     switch (prefix) {
   1056         .none => {},
   1057         .minus => result.value = 0 -% result.value,
   1058         .complement => result.value = ~result.value,
   1059     }
   1060 
   1061     return result;
   1062 }
   1063 
   1064 test "parse number literal" {
   1065     try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "0", .code_page = .windows1252 }));
   1066     try std.testing.expectEqual(Number{ .value = 1, .is_long = false }, parseNumberLiteral(.{ .slice = "1", .code_page = .windows1252 }));
   1067     try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "1L", .code_page = .windows1252 }));
   1068     try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "1l", .code_page = .windows1252 }));
   1069     try std.testing.expectEqual(Number{ .value = 1, .is_long = false }, parseNumberLiteral(.{ .slice = "1garbageL", .code_page = .windows1252 }));
   1070     try std.testing.expectEqual(Number{ .value = 4294967295, .is_long = false }, parseNumberLiteral(.{ .slice = "4294967295", .code_page = .windows1252 }));
   1071     try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "4294967296", .code_page = .windows1252 }));
   1072     try std.testing.expectEqual(Number{ .value = 1, .is_long = true }, parseNumberLiteral(.{ .slice = "4294967297L", .code_page = .windows1252 }));
   1073 
   1074     // can handle any length of number, wraps on overflow appropriately
   1075     const big_overflow = parseNumberLiteral(.{ .slice = "1000000000000000000000000000000000000000000000000000000000000000000000000000000090000000001", .code_page = .windows1252 });
   1076     try std.testing.expectEqual(Number{ .value = 4100654081, .is_long = false }, big_overflow);
   1077     try std.testing.expectEqual(@as(u16, 1025), big_overflow.asWord());
   1078 
   1079     try std.testing.expectEqual(Number{ .value = 0x20, .is_long = false }, parseNumberLiteral(.{ .slice = "0x20", .code_page = .windows1252 }));
   1080     try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2AL", .code_page = .windows1252 }));
   1081     try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL", .code_page = .windows1252 }));
   1082     try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL", .code_page = .windows1252 }));
   1083 
   1084     try std.testing.expectEqual(Number{ .value = 0o20, .is_long = false }, parseNumberLiteral(.{ .slice = "0o20", .code_page = .windows1252 }));
   1085     try std.testing.expectEqual(Number{ .value = 0o20, .is_long = true }, parseNumberLiteral(.{ .slice = "0o20L", .code_page = .windows1252 }));
   1086     try std.testing.expectEqual(Number{ .value = 0o2, .is_long = false }, parseNumberLiteral(.{ .slice = "0o29", .code_page = .windows1252 }));
   1087     try std.testing.expectEqual(Number{ .value = 0, .is_long = false }, parseNumberLiteral(.{ .slice = "0O29", .code_page = .windows1252 }));
   1088 
   1089     try std.testing.expectEqual(Number{ .value = 0xFFFFFFFF, .is_long = false }, parseNumberLiteral(.{ .slice = "-1", .code_page = .windows1252 }));
   1090     try std.testing.expectEqual(Number{ .value = 0xFFFFFFFE, .is_long = false }, parseNumberLiteral(.{ .slice = "~1", .code_page = .windows1252 }));
   1091     try std.testing.expectEqual(Number{ .value = 0xFFFFFFFF, .is_long = true }, parseNumberLiteral(.{ .slice = "-4294967297L", .code_page = .windows1252 }));
   1092     try std.testing.expectEqual(Number{ .value = 0xFFFFFFFE, .is_long = true }, parseNumberLiteral(.{ .slice = "~4294967297L", .code_page = .windows1252 }));
   1093     try std.testing.expectEqual(Number{ .value = 0xFFFFFFFD, .is_long = false }, parseNumberLiteral(.{ .slice = "-0X3", .code_page = .windows1252 }));
   1094 
   1095     // anything after L is ignored
   1096     try std.testing.expectEqual(Number{ .value = 0x2A, .is_long = true }, parseNumberLiteral(.{ .slice = "0x2aL5", .code_page = .windows1252 }));
   1097 }