zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

blob c8f33dbf (71354B) - Raw


      1 // SPDX-License-Identifier: MIT
      2 // Copyright (c) 2015-2020 Zig Contributors
      3 // This file is part of [zig](https://ziglang.org/), which is MIT licensed.
      4 // The MIT license requires this copyright notice to be included in all copies
      5 // and substantial portions of the software.
      6 const std = @import("../std.zig");
      7 const mem = std.mem;
      8 
      9 pub const Token = struct {
     10     id: Id,
     11     loc: Loc,
     12 
     13     pub const Loc = struct {
     14         start: usize,
     15         end: usize,
     16     };
     17 
     18     pub const keywords = std.ComptimeStringMap(Id, .{
     19         .{ "align", .Keyword_align },
     20         .{ "allowzero", .Keyword_allowzero },
     21         .{ "and", .Keyword_and },
     22         .{ "anyframe", .Keyword_anyframe },
     23         .{ "anytype", .Keyword_anytype },
     24         .{ "asm", .Keyword_asm },
     25         .{ "async", .Keyword_async },
     26         .{ "await", .Keyword_await },
     27         .{ "break", .Keyword_break },
     28         .{ "callconv", .Keyword_callconv },
     29         .{ "catch", .Keyword_catch },
     30         .{ "comptime", .Keyword_comptime },
     31         .{ "const", .Keyword_const },
     32         .{ "continue", .Keyword_continue },
     33         .{ "defer", .Keyword_defer },
     34         .{ "else", .Keyword_else },
     35         .{ "enum", .Keyword_enum },
     36         .{ "errdefer", .Keyword_errdefer },
     37         .{ "error", .Keyword_error },
     38         .{ "export", .Keyword_export },
     39         .{ "extern", .Keyword_extern },
     40         .{ "false", .Keyword_false },
     41         .{ "fn", .Keyword_fn },
     42         .{ "for", .Keyword_for },
     43         .{ "if", .Keyword_if },
     44         .{ "inline", .Keyword_inline },
     45         .{ "noalias", .Keyword_noalias },
     46         .{ "noasync", .Keyword_nosuspend }, // TODO: remove this
     47         .{ "noinline", .Keyword_noinline },
     48         .{ "nosuspend", .Keyword_nosuspend },
     49         .{ "null", .Keyword_null },
     50         .{ "opaque", .Keyword_opaque },
     51         .{ "or", .Keyword_or },
     52         .{ "orelse", .Keyword_orelse },
     53         .{ "packed", .Keyword_packed },
     54         .{ "pub", .Keyword_pub },
     55         .{ "resume", .Keyword_resume },
     56         .{ "return", .Keyword_return },
     57         .{ "linksection", .Keyword_linksection },
     58         .{ "struct", .Keyword_struct },
     59         .{ "suspend", .Keyword_suspend },
     60         .{ "switch", .Keyword_switch },
     61         .{ "test", .Keyword_test },
     62         .{ "threadlocal", .Keyword_threadlocal },
     63         .{ "true", .Keyword_true },
     64         .{ "try", .Keyword_try },
     65         .{ "undefined", .Keyword_undefined },
     66         .{ "union", .Keyword_union },
     67         .{ "unreachable", .Keyword_unreachable },
     68         .{ "usingnamespace", .Keyword_usingnamespace },
     69         .{ "var", .Keyword_var },
     70         .{ "volatile", .Keyword_volatile },
     71         .{ "while", .Keyword_while },
     72     });
     73 
     74     pub fn getKeyword(bytes: []const u8) ?Id {
     75         return keywords.get(bytes);
     76     }
     77 
     78     pub const Id = enum {
     79         Invalid,
     80         Invalid_ampersands,
     81         Identifier,
     82         StringLiteral,
     83         MultilineStringLiteralLine,
     84         CharLiteral,
     85         Eof,
     86         Builtin,
     87         Bang,
     88         Pipe,
     89         PipePipe,
     90         PipeEqual,
     91         Equal,
     92         EqualEqual,
     93         EqualAngleBracketRight,
     94         BangEqual,
     95         LParen,
     96         RParen,
     97         Semicolon,
     98         Percent,
     99         PercentEqual,
    100         LBrace,
    101         RBrace,
    102         LBracket,
    103         RBracket,
    104         Period,
    105         PeriodAsterisk,
    106         Ellipsis2,
    107         Ellipsis3,
    108         Caret,
    109         CaretEqual,
    110         Plus,
    111         PlusPlus,
    112         PlusEqual,
    113         PlusPercent,
    114         PlusPercentEqual,
    115         Minus,
    116         MinusEqual,
    117         MinusPercent,
    118         MinusPercentEqual,
    119         Asterisk,
    120         AsteriskEqual,
    121         AsteriskAsterisk,
    122         AsteriskPercent,
    123         AsteriskPercentEqual,
    124         Arrow,
    125         Colon,
    126         Slash,
    127         SlashEqual,
    128         Comma,
    129         Ampersand,
    130         AmpersandEqual,
    131         QuestionMark,
    132         AngleBracketLeft,
    133         AngleBracketLeftEqual,
    134         AngleBracketAngleBracketLeft,
    135         AngleBracketAngleBracketLeftEqual,
    136         AngleBracketRight,
    137         AngleBracketRightEqual,
    138         AngleBracketAngleBracketRight,
    139         AngleBracketAngleBracketRightEqual,
    140         Tilde,
    141         IntegerLiteral,
    142         FloatLiteral,
    143         LineComment,
    144         DocComment,
    145         ContainerDocComment,
    146         ShebangLine,
    147         Keyword_align,
    148         Keyword_allowzero,
    149         Keyword_and,
    150         Keyword_anyframe,
    151         Keyword_anytype,
    152         Keyword_asm,
    153         Keyword_async,
    154         Keyword_await,
    155         Keyword_break,
    156         Keyword_callconv,
    157         Keyword_catch,
    158         Keyword_comptime,
    159         Keyword_const,
    160         Keyword_continue,
    161         Keyword_defer,
    162         Keyword_else,
    163         Keyword_enum,
    164         Keyword_errdefer,
    165         Keyword_error,
    166         Keyword_export,
    167         Keyword_extern,
    168         Keyword_false,
    169         Keyword_fn,
    170         Keyword_for,
    171         Keyword_if,
    172         Keyword_inline,
    173         Keyword_noalias,
    174         Keyword_noinline,
    175         Keyword_nosuspend,
    176         Keyword_null,
    177         Keyword_opaque,
    178         Keyword_or,
    179         Keyword_orelse,
    180         Keyword_packed,
    181         Keyword_pub,
    182         Keyword_resume,
    183         Keyword_return,
    184         Keyword_linksection,
    185         Keyword_struct,
    186         Keyword_suspend,
    187         Keyword_switch,
    188         Keyword_test,
    189         Keyword_threadlocal,
    190         Keyword_true,
    191         Keyword_try,
    192         Keyword_undefined,
    193         Keyword_union,
    194         Keyword_unreachable,
    195         Keyword_usingnamespace,
    196         Keyword_var,
    197         Keyword_volatile,
    198         Keyword_while,
    199 
    200         pub fn symbol(id: Id) []const u8 {
    201             return switch (id) {
    202                 .Invalid => "Invalid",
    203                 .Invalid_ampersands => "&&",
    204                 .Identifier => "Identifier",
    205                 .StringLiteral => "StringLiteral",
    206                 .MultilineStringLiteralLine => "MultilineStringLiteralLine",
    207                 .CharLiteral => "CharLiteral",
    208                 .Eof => "Eof",
    209                 .Builtin => "Builtin",
    210                 .IntegerLiteral => "IntegerLiteral",
    211                 .FloatLiteral => "FloatLiteral",
    212                 .LineComment => "LineComment",
    213                 .DocComment => "DocComment",
    214                 .ContainerDocComment => "ContainerDocComment",
    215                 .ShebangLine => "ShebangLine",
    216 
    217                 .Bang => "!",
    218                 .Pipe => "|",
    219                 .PipePipe => "||",
    220                 .PipeEqual => "|=",
    221                 .Equal => "=",
    222                 .EqualEqual => "==",
    223                 .EqualAngleBracketRight => "=>",
    224                 .BangEqual => "!=",
    225                 .LParen => "(",
    226                 .RParen => ")",
    227                 .Semicolon => ";",
    228                 .Percent => "%",
    229                 .PercentEqual => "%=",
    230                 .LBrace => "{",
    231                 .RBrace => "}",
    232                 .LBracket => "[",
    233                 .RBracket => "]",
    234                 .Period => ".",
    235                 .PeriodAsterisk => ".*",
    236                 .Ellipsis2 => "..",
    237                 .Ellipsis3 => "...",
    238                 .Caret => "^",
    239                 .CaretEqual => "^=",
    240                 .Plus => "+",
    241                 .PlusPlus => "++",
    242                 .PlusEqual => "+=",
    243                 .PlusPercent => "+%",
    244                 .PlusPercentEqual => "+%=",
    245                 .Minus => "-",
    246                 .MinusEqual => "-=",
    247                 .MinusPercent => "-%",
    248                 .MinusPercentEqual => "-%=",
    249                 .Asterisk => "*",
    250                 .AsteriskEqual => "*=",
    251                 .AsteriskAsterisk => "**",
    252                 .AsteriskPercent => "*%",
    253                 .AsteriskPercentEqual => "*%=",
    254                 .Arrow => "->",
    255                 .Colon => ":",
    256                 .Slash => "/",
    257                 .SlashEqual => "/=",
    258                 .Comma => ",",
    259                 .Ampersand => "&",
    260                 .AmpersandEqual => "&=",
    261                 .QuestionMark => "?",
    262                 .AngleBracketLeft => "<",
    263                 .AngleBracketLeftEqual => "<=",
    264                 .AngleBracketAngleBracketLeft => "<<",
    265                 .AngleBracketAngleBracketLeftEqual => "<<=",
    266                 .AngleBracketRight => ">",
    267                 .AngleBracketRightEqual => ">=",
    268                 .AngleBracketAngleBracketRight => ">>",
    269                 .AngleBracketAngleBracketRightEqual => ">>=",
    270                 .Tilde => "~",
    271                 .Keyword_align => "align",
    272                 .Keyword_allowzero => "allowzero",
    273                 .Keyword_and => "and",
    274                 .Keyword_anyframe => "anyframe",
    275                 .Keyword_anytype => "anytype",
    276                 .Keyword_asm => "asm",
    277                 .Keyword_async => "async",
    278                 .Keyword_await => "await",
    279                 .Keyword_break => "break",
    280                 .Keyword_callconv => "callconv",
    281                 .Keyword_catch => "catch",
    282                 .Keyword_comptime => "comptime",
    283                 .Keyword_const => "const",
    284                 .Keyword_continue => "continue",
    285                 .Keyword_defer => "defer",
    286                 .Keyword_else => "else",
    287                 .Keyword_enum => "enum",
    288                 .Keyword_errdefer => "errdefer",
    289                 .Keyword_error => "error",
    290                 .Keyword_export => "export",
    291                 .Keyword_extern => "extern",
    292                 .Keyword_false => "false",
    293                 .Keyword_fn => "fn",
    294                 .Keyword_for => "for",
    295                 .Keyword_if => "if",
    296                 .Keyword_inline => "inline",
    297                 .Keyword_noalias => "noalias",
    298                 .Keyword_noinline => "noinline",
    299                 .Keyword_nosuspend => "nosuspend",
    300                 .Keyword_null => "null",
    301                 .Keyword_opaque => "opaque",
    302                 .Keyword_or => "or",
    303                 .Keyword_orelse => "orelse",
    304                 .Keyword_packed => "packed",
    305                 .Keyword_pub => "pub",
    306                 .Keyword_resume => "resume",
    307                 .Keyword_return => "return",
    308                 .Keyword_linksection => "linksection",
    309                 .Keyword_struct => "struct",
    310                 .Keyword_suspend => "suspend",
    311                 .Keyword_switch => "switch",
    312                 .Keyword_test => "test",
    313                 .Keyword_threadlocal => "threadlocal",
    314                 .Keyword_true => "true",
    315                 .Keyword_try => "try",
    316                 .Keyword_undefined => "undefined",
    317                 .Keyword_union => "union",
    318                 .Keyword_unreachable => "unreachable",
    319                 .Keyword_usingnamespace => "usingnamespace",
    320                 .Keyword_var => "var",
    321                 .Keyword_volatile => "volatile",
    322                 .Keyword_while => "while",
    323             };
    324         }
    325     };
    326 };
    327 
    328 pub const Tokenizer = struct {
    329     buffer: []const u8,
    330     index: usize,
    331     pending_invalid_token: ?Token,
    332 
    333     /// For debugging purposes
    334     pub fn dump(self: *Tokenizer, token: *const Token) void {
    335         std.debug.warn("{} \"{}\"\n", .{ @tagName(token.id), self.buffer[token.start..token.end] });
    336     }
    337 
    338     pub fn init(buffer: []const u8) Tokenizer {
    339         // Skip the UTF-8 BOM if present
    340         const src_start = if (mem.startsWith(u8, buffer, "\xEF\xBB\xBF")) 3 else @as(usize, 0);
    341         return Tokenizer{
    342             .buffer = buffer,
    343             .index = src_start,
    344             .pending_invalid_token = null,
    345         };
    346     }
    347 
    348     const State = enum {
    349         start,
    350         identifier,
    351         builtin,
    352         string_literal,
    353         string_literal_backslash,
    354         multiline_string_literal_line,
    355         char_literal,
    356         char_literal_backslash,
    357         char_literal_hex_escape,
    358         char_literal_unicode_escape_saw_u,
    359         char_literal_unicode_escape,
    360         char_literal_unicode_invalid,
    361         char_literal_unicode,
    362         char_literal_end,
    363         backslash,
    364         equal,
    365         bang,
    366         pipe,
    367         minus,
    368         minus_percent,
    369         asterisk,
    370         asterisk_percent,
    371         slash,
    372         line_comment_start,
    373         line_comment,
    374         doc_comment_start,
    375         doc_comment,
    376         container_doc_comment,
    377         zero,
    378         int_literal_dec,
    379         int_literal_dec_no_underscore,
    380         int_literal_bin,
    381         int_literal_bin_no_underscore,
    382         int_literal_oct,
    383         int_literal_oct_no_underscore,
    384         int_literal_hex,
    385         int_literal_hex_no_underscore,
    386         num_dot_dec,
    387         num_dot_hex,
    388         float_fraction_dec,
    389         float_fraction_dec_no_underscore,
    390         float_fraction_hex,
    391         float_fraction_hex_no_underscore,
    392         float_exponent_unsigned,
    393         float_exponent_num,
    394         float_exponent_num_no_underscore,
    395         ampersand,
    396         caret,
    397         percent,
    398         plus,
    399         plus_percent,
    400         angle_bracket_left,
    401         angle_bracket_angle_bracket_left,
    402         angle_bracket_right,
    403         angle_bracket_angle_bracket_right,
    404         period,
    405         period_2,
    406         saw_at_sign,
    407     };
    408 
    409     fn isIdentifierChar(char: u8) bool {
    410         return std.ascii.isAlNum(char) or char == '_';
    411     }
    412 
    413     pub fn next(self: *Tokenizer) Token {
    414         if (self.pending_invalid_token) |token| {
    415             self.pending_invalid_token = null;
    416             return token;
    417         }
    418         const start_index = self.index;
    419         var state: State = .start;
    420         var result = Token{
    421             .id = .Eof,
    422             .loc = .{
    423                 .start = self.index,
    424                 .end = undefined,
    425             },
    426         };
    427         var seen_escape_digits: usize = undefined;
    428         var remaining_code_units: usize = undefined;
    429         while (self.index < self.buffer.len) : (self.index += 1) {
    430             const c = self.buffer[self.index];
    431             switch (state) {
    432                 .start => switch (c) {
    433                     ' ', '\n', '\t', '\r' => {
    434                         result.loc.start = self.index + 1;
    435                     },
    436                     '"' => {
    437                         state = .string_literal;
    438                         result.id = .StringLiteral;
    439                     },
    440                     '\'' => {
    441                         state = .char_literal;
    442                     },
    443                     'a'...'z', 'A'...'Z', '_' => {
    444                         state = .identifier;
    445                         result.id = .Identifier;
    446                     },
    447                     '@' => {
    448                         state = .saw_at_sign;
    449                     },
    450                     '=' => {
    451                         state = .equal;
    452                     },
    453                     '!' => {
    454                         state = .bang;
    455                     },
    456                     '|' => {
    457                         state = .pipe;
    458                     },
    459                     '(' => {
    460                         result.id = .LParen;
    461                         self.index += 1;
    462                         break;
    463                     },
    464                     ')' => {
    465                         result.id = .RParen;
    466                         self.index += 1;
    467                         break;
    468                     },
    469                     '[' => {
    470                         result.id = .LBracket;
    471                         self.index += 1;
    472                         break;
    473                     },
    474                     ']' => {
    475                         result.id = .RBracket;
    476                         self.index += 1;
    477                         break;
    478                     },
    479                     ';' => {
    480                         result.id = .Semicolon;
    481                         self.index += 1;
    482                         break;
    483                     },
    484                     ',' => {
    485                         result.id = .Comma;
    486                         self.index += 1;
    487                         break;
    488                     },
    489                     '?' => {
    490                         result.id = .QuestionMark;
    491                         self.index += 1;
    492                         break;
    493                     },
    494                     ':' => {
    495                         result.id = .Colon;
    496                         self.index += 1;
    497                         break;
    498                     },
    499                     '%' => {
    500                         state = .percent;
    501                     },
    502                     '*' => {
    503                         state = .asterisk;
    504                     },
    505                     '+' => {
    506                         state = .plus;
    507                     },
    508                     '<' => {
    509                         state = .angle_bracket_left;
    510                     },
    511                     '>' => {
    512                         state = .angle_bracket_right;
    513                     },
    514                     '^' => {
    515                         state = .caret;
    516                     },
    517                     '\\' => {
    518                         state = .backslash;
    519                         result.id = .MultilineStringLiteralLine;
    520                     },
    521                     '{' => {
    522                         result.id = .LBrace;
    523                         self.index += 1;
    524                         break;
    525                     },
    526                     '}' => {
    527                         result.id = .RBrace;
    528                         self.index += 1;
    529                         break;
    530                     },
    531                     '~' => {
    532                         result.id = .Tilde;
    533                         self.index += 1;
    534                         break;
    535                     },
    536                     '.' => {
    537                         state = .period;
    538                     },
    539                     '-' => {
    540                         state = .minus;
    541                     },
    542                     '/' => {
    543                         state = .slash;
    544                     },
    545                     '&' => {
    546                         state = .ampersand;
    547                     },
    548                     '0' => {
    549                         state = .zero;
    550                         result.id = .IntegerLiteral;
    551                     },
    552                     '1'...'9' => {
    553                         state = .int_literal_dec;
    554                         result.id = .IntegerLiteral;
    555                     },
    556                     else => {
    557                         result.id = .Invalid;
    558                         self.index += 1;
    559                         break;
    560                     },
    561                 },
    562 
    563                 .saw_at_sign => switch (c) {
    564                     '"' => {
    565                         result.id = .Identifier;
    566                         state = .string_literal;
    567                     },
    568                     else => {
    569                         // reinterpret as a builtin
    570                         self.index -= 1;
    571                         state = .builtin;
    572                         result.id = .Builtin;
    573                     },
    574                 },
    575 
    576                 .ampersand => switch (c) {
    577                     '&' => {
    578                         result.id = .Invalid_ampersands;
    579                         self.index += 1;
    580                         break;
    581                     },
    582                     '=' => {
    583                         result.id = .AmpersandEqual;
    584                         self.index += 1;
    585                         break;
    586                     },
    587                     else => {
    588                         result.id = .Ampersand;
    589                         break;
    590                     },
    591                 },
    592 
    593                 .asterisk => switch (c) {
    594                     '=' => {
    595                         result.id = .AsteriskEqual;
    596                         self.index += 1;
    597                         break;
    598                     },
    599                     '*' => {
    600                         result.id = .AsteriskAsterisk;
    601                         self.index += 1;
    602                         break;
    603                     },
    604                     '%' => {
    605                         state = .asterisk_percent;
    606                     },
    607                     else => {
    608                         result.id = .Asterisk;
    609                         break;
    610                     },
    611                 },
    612 
    613                 .asterisk_percent => switch (c) {
    614                     '=' => {
    615                         result.id = .AsteriskPercentEqual;
    616                         self.index += 1;
    617                         break;
    618                     },
    619                     else => {
    620                         result.id = .AsteriskPercent;
    621                         break;
    622                     },
    623                 },
    624 
    625                 .percent => switch (c) {
    626                     '=' => {
    627                         result.id = .PercentEqual;
    628                         self.index += 1;
    629                         break;
    630                     },
    631                     else => {
    632                         result.id = .Percent;
    633                         break;
    634                     },
    635                 },
    636 
    637                 .plus => switch (c) {
    638                     '=' => {
    639                         result.id = .PlusEqual;
    640                         self.index += 1;
    641                         break;
    642                     },
    643                     '+' => {
    644                         result.id = .PlusPlus;
    645                         self.index += 1;
    646                         break;
    647                     },
    648                     '%' => {
    649                         state = .plus_percent;
    650                     },
    651                     else => {
    652                         result.id = .Plus;
    653                         break;
    654                     },
    655                 },
    656 
    657                 .plus_percent => switch (c) {
    658                     '=' => {
    659                         result.id = .PlusPercentEqual;
    660                         self.index += 1;
    661                         break;
    662                     },
    663                     else => {
    664                         result.id = .PlusPercent;
    665                         break;
    666                     },
    667                 },
    668 
    669                 .caret => switch (c) {
    670                     '=' => {
    671                         result.id = .CaretEqual;
    672                         self.index += 1;
    673                         break;
    674                     },
    675                     else => {
    676                         result.id = .Caret;
    677                         break;
    678                     },
    679                 },
    680 
    681                 .identifier => switch (c) {
    682                     'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
    683                     else => {
    684                         if (Token.getKeyword(self.buffer[result.loc.start..self.index])) |id| {
    685                             result.id = id;
    686                         }
    687                         break;
    688                     },
    689                 },
    690                 .builtin => switch (c) {
    691                     'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
    692                     else => break,
    693                 },
    694                 .backslash => switch (c) {
    695                     '\\' => {
    696                         state = .multiline_string_literal_line;
    697                     },
    698                     else => break,
    699                 },
    700                 .string_literal => switch (c) {
    701                     '\\' => {
    702                         state = .string_literal_backslash;
    703                     },
    704                     '"' => {
    705                         self.index += 1;
    706                         break;
    707                     },
    708                     '\n', '\r' => break, // Look for this error later.
    709                     else => self.checkLiteralCharacter(),
    710                 },
    711 
    712                 .string_literal_backslash => switch (c) {
    713                     '\n', '\r' => break, // Look for this error later.
    714                     else => {
    715                         state = .string_literal;
    716                     },
    717                 },
    718 
    719                 .char_literal => switch (c) {
    720                     '\\' => {
    721                         state = .char_literal_backslash;
    722                     },
    723                     '\'', 0x80...0xbf, 0xf8...0xff => {
    724                         result.id = .Invalid;
    725                         break;
    726                     },
    727                     0xc0...0xdf => { // 110xxxxx
    728                         remaining_code_units = 1;
    729                         state = .char_literal_unicode;
    730                     },
    731                     0xe0...0xef => { // 1110xxxx
    732                         remaining_code_units = 2;
    733                         state = .char_literal_unicode;
    734                     },
    735                     0xf0...0xf7 => { // 11110xxx
    736                         remaining_code_units = 3;
    737                         state = .char_literal_unicode;
    738                     },
    739                     else => {
    740                         state = .char_literal_end;
    741                     },
    742                 },
    743 
    744                 .char_literal_backslash => switch (c) {
    745                     '\n' => {
    746                         result.id = .Invalid;
    747                         break;
    748                     },
    749                     'x' => {
    750                         state = .char_literal_hex_escape;
    751                         seen_escape_digits = 0;
    752                     },
    753                     'u' => {
    754                         state = .char_literal_unicode_escape_saw_u;
    755                     },
    756                     else => {
    757                         state = .char_literal_end;
    758                     },
    759                 },
    760 
    761                 .char_literal_hex_escape => switch (c) {
    762                     '0'...'9', 'a'...'f', 'A'...'F' => {
    763                         seen_escape_digits += 1;
    764                         if (seen_escape_digits == 2) {
    765                             state = .char_literal_end;
    766                         }
    767                     },
    768                     else => {
    769                         result.id = .Invalid;
    770                         break;
    771                     },
    772                 },
    773 
    774                 .char_literal_unicode_escape_saw_u => switch (c) {
    775                     '{' => {
    776                         state = .char_literal_unicode_escape;
    777                         seen_escape_digits = 0;
    778                     },
    779                     else => {
    780                         result.id = .Invalid;
    781                         state = .char_literal_unicode_invalid;
    782                     },
    783                 },
    784 
    785                 .char_literal_unicode_escape => switch (c) {
    786                     '0'...'9', 'a'...'f', 'A'...'F' => {
    787                         seen_escape_digits += 1;
    788                     },
    789                     '}' => {
    790                         if (seen_escape_digits == 0) {
    791                             result.id = .Invalid;
    792                             state = .char_literal_unicode_invalid;
    793                         } else {
    794                             state = .char_literal_end;
    795                         }
    796                     },
    797                     else => {
    798                         result.id = .Invalid;
    799                         state = .char_literal_unicode_invalid;
    800                     },
    801                 },
    802 
    803                 .char_literal_unicode_invalid => switch (c) {
    804                     // Keep consuming characters until an obvious stopping point.
    805                     // This consolidates e.g. `u{0ab1Q}` into a single invalid token
    806                     // instead of creating the tokens `u{0ab1`, `Q`, `}`
    807                     '0'...'9', 'a'...'z', 'A'...'Z', '}' => {},
    808                     else => break,
    809                 },
    810 
    811                 .char_literal_end => switch (c) {
    812                     '\'' => {
    813                         result.id = .CharLiteral;
    814                         self.index += 1;
    815                         break;
    816                     },
    817                     else => {
    818                         result.id = .Invalid;
    819                         break;
    820                     },
    821                 },
    822 
    823                 .char_literal_unicode => switch (c) {
    824                     0x80...0xbf => {
    825                         remaining_code_units -= 1;
    826                         if (remaining_code_units == 0) {
    827                             state = .char_literal_end;
    828                         }
    829                     },
    830                     else => {
    831                         result.id = .Invalid;
    832                         break;
    833                     },
    834                 },
    835 
    836                 .multiline_string_literal_line => switch (c) {
    837                     '\n' => {
    838                         self.index += 1;
    839                         break;
    840                     },
    841                     '\t' => {},
    842                     else => self.checkLiteralCharacter(),
    843                 },
    844 
    845                 .bang => switch (c) {
    846                     '=' => {
    847                         result.id = .BangEqual;
    848                         self.index += 1;
    849                         break;
    850                     },
    851                     else => {
    852                         result.id = .Bang;
    853                         break;
    854                     },
    855                 },
    856 
    857                 .pipe => switch (c) {
    858                     '=' => {
    859                         result.id = .PipeEqual;
    860                         self.index += 1;
    861                         break;
    862                     },
    863                     '|' => {
    864                         result.id = .PipePipe;
    865                         self.index += 1;
    866                         break;
    867                     },
    868                     else => {
    869                         result.id = .Pipe;
    870                         break;
    871                     },
    872                 },
    873 
    874                 .equal => switch (c) {
    875                     '=' => {
    876                         result.id = .EqualEqual;
    877                         self.index += 1;
    878                         break;
    879                     },
    880                     '>' => {
    881                         result.id = .EqualAngleBracketRight;
    882                         self.index += 1;
    883                         break;
    884                     },
    885                     else => {
    886                         result.id = .Equal;
    887                         break;
    888                     },
    889                 },
    890 
    891                 .minus => switch (c) {
    892                     '>' => {
    893                         result.id = .Arrow;
    894                         self.index += 1;
    895                         break;
    896                     },
    897                     '=' => {
    898                         result.id = .MinusEqual;
    899                         self.index += 1;
    900                         break;
    901                     },
    902                     '%' => {
    903                         state = .minus_percent;
    904                     },
    905                     else => {
    906                         result.id = .Minus;
    907                         break;
    908                     },
    909                 },
    910 
    911                 .minus_percent => switch (c) {
    912                     '=' => {
    913                         result.id = .MinusPercentEqual;
    914                         self.index += 1;
    915                         break;
    916                     },
    917                     else => {
    918                         result.id = .MinusPercent;
    919                         break;
    920                     },
    921                 },
    922 
    923                 .angle_bracket_left => switch (c) {
    924                     '<' => {
    925                         state = .angle_bracket_angle_bracket_left;
    926                     },
    927                     '=' => {
    928                         result.id = .AngleBracketLeftEqual;
    929                         self.index += 1;
    930                         break;
    931                     },
    932                     else => {
    933                         result.id = .AngleBracketLeft;
    934                         break;
    935                     },
    936                 },
    937 
    938                 .angle_bracket_angle_bracket_left => switch (c) {
    939                     '=' => {
    940                         result.id = .AngleBracketAngleBracketLeftEqual;
    941                         self.index += 1;
    942                         break;
    943                     },
    944                     else => {
    945                         result.id = .AngleBracketAngleBracketLeft;
    946                         break;
    947                     },
    948                 },
    949 
    950                 .angle_bracket_right => switch (c) {
    951                     '>' => {
    952                         state = .angle_bracket_angle_bracket_right;
    953                     },
    954                     '=' => {
    955                         result.id = .AngleBracketRightEqual;
    956                         self.index += 1;
    957                         break;
    958                     },
    959                     else => {
    960                         result.id = .AngleBracketRight;
    961                         break;
    962                     },
    963                 },
    964 
    965                 .angle_bracket_angle_bracket_right => switch (c) {
    966                     '=' => {
    967                         result.id = .AngleBracketAngleBracketRightEqual;
    968                         self.index += 1;
    969                         break;
    970                     },
    971                     else => {
    972                         result.id = .AngleBracketAngleBracketRight;
    973                         break;
    974                     },
    975                 },
    976 
    977                 .period => switch (c) {
    978                     '.' => {
    979                         state = .period_2;
    980                     },
    981                     '*' => {
    982                         result.id = .PeriodAsterisk;
    983                         self.index += 1;
    984                         break;
    985                     },
    986                     else => {
    987                         result.id = .Period;
    988                         break;
    989                     },
    990                 },
    991 
    992                 .period_2 => switch (c) {
    993                     '.' => {
    994                         result.id = .Ellipsis3;
    995                         self.index += 1;
    996                         break;
    997                     },
    998                     else => {
    999                         result.id = .Ellipsis2;
   1000                         break;
   1001                     },
   1002                 },
   1003 
   1004                 .slash => switch (c) {
   1005                     '/' => {
   1006                         state = .line_comment_start;
   1007                         result.id = .LineComment;
   1008                     },
   1009                     '=' => {
   1010                         result.id = .SlashEqual;
   1011                         self.index += 1;
   1012                         break;
   1013                     },
   1014                     else => {
   1015                         result.id = .Slash;
   1016                         break;
   1017                     },
   1018                 },
   1019                 .line_comment_start => switch (c) {
   1020                     '/' => {
   1021                         state = .doc_comment_start;
   1022                     },
   1023                     '!' => {
   1024                         result.id = .ContainerDocComment;
   1025                         state = .container_doc_comment;
   1026                     },
   1027                     '\n' => break,
   1028                     '\t', '\r' => state = .line_comment,
   1029                     else => {
   1030                         state = .line_comment;
   1031                         self.checkLiteralCharacter();
   1032                     },
   1033                 },
   1034                 .doc_comment_start => switch (c) {
   1035                     '/' => {
   1036                         state = .line_comment;
   1037                     },
   1038                     '\n' => {
   1039                         result.id = .DocComment;
   1040                         break;
   1041                     },
   1042                     '\t', '\r' => {
   1043                         state = .doc_comment;
   1044                         result.id = .DocComment;
   1045                     },
   1046                     else => {
   1047                         state = .doc_comment;
   1048                         result.id = .DocComment;
   1049                         self.checkLiteralCharacter();
   1050                     },
   1051                 },
   1052                 .line_comment, .doc_comment, .container_doc_comment => switch (c) {
   1053                     '\n' => break,
   1054                     '\t', '\r' => {},
   1055                     else => self.checkLiteralCharacter(),
   1056                 },
   1057                 .zero => switch (c) {
   1058                     'b' => {
   1059                         state = .int_literal_bin_no_underscore;
   1060                     },
   1061                     'o' => {
   1062                         state = .int_literal_oct_no_underscore;
   1063                     },
   1064                     'x' => {
   1065                         state = .int_literal_hex_no_underscore;
   1066                     },
   1067                     '0'...'9', '_', '.', 'e', 'E' => {
   1068                         // reinterpret as a decimal number
   1069                         self.index -= 1;
   1070                         state = .int_literal_dec;
   1071                     },
   1072                     else => {
   1073                         if (isIdentifierChar(c)) {
   1074                             result.id = .Invalid;
   1075                         }
   1076                         break;
   1077                     },
   1078                 },
   1079                 .int_literal_bin_no_underscore => switch (c) {
   1080                     '0'...'1' => {
   1081                         state = .int_literal_bin;
   1082                     },
   1083                     else => {
   1084                         result.id = .Invalid;
   1085                         break;
   1086                     },
   1087                 },
   1088                 .int_literal_bin => switch (c) {
   1089                     '_' => {
   1090                         state = .int_literal_bin_no_underscore;
   1091                     },
   1092                     '0'...'1' => {},
   1093                     else => {
   1094                         if (isIdentifierChar(c)) {
   1095                             result.id = .Invalid;
   1096                         }
   1097                         break;
   1098                     },
   1099                 },
   1100                 .int_literal_oct_no_underscore => switch (c) {
   1101                     '0'...'7' => {
   1102                         state = .int_literal_oct;
   1103                     },
   1104                     else => {
   1105                         result.id = .Invalid;
   1106                         break;
   1107                     },
   1108                 },
   1109                 .int_literal_oct => switch (c) {
   1110                     '_' => {
   1111                         state = .int_literal_oct_no_underscore;
   1112                     },
   1113                     '0'...'7' => {},
   1114                     else => {
   1115                         if (isIdentifierChar(c)) {
   1116                             result.id = .Invalid;
   1117                         }
   1118                         break;
   1119                     },
   1120                 },
   1121                 .int_literal_dec_no_underscore => switch (c) {
   1122                     '0'...'9' => {
   1123                         state = .int_literal_dec;
   1124                     },
   1125                     else => {
   1126                         result.id = .Invalid;
   1127                         break;
   1128                     },
   1129                 },
   1130                 .int_literal_dec => switch (c) {
   1131                     '_' => {
   1132                         state = .int_literal_dec_no_underscore;
   1133                     },
   1134                     '.' => {
   1135                         state = .num_dot_dec;
   1136                         result.id = .FloatLiteral;
   1137                     },
   1138                     'e', 'E' => {
   1139                         state = .float_exponent_unsigned;
   1140                         result.id = .FloatLiteral;
   1141                     },
   1142                     '0'...'9' => {},
   1143                     else => {
   1144                         if (isIdentifierChar(c)) {
   1145                             result.id = .Invalid;
   1146                         }
   1147                         break;
   1148                     },
   1149                 },
   1150                 .int_literal_hex_no_underscore => switch (c) {
   1151                     '0'...'9', 'a'...'f', 'A'...'F' => {
   1152                         state = .int_literal_hex;
   1153                     },
   1154                     else => {
   1155                         result.id = .Invalid;
   1156                         break;
   1157                     },
   1158                 },
   1159                 .int_literal_hex => switch (c) {
   1160                     '_' => {
   1161                         state = .int_literal_hex_no_underscore;
   1162                     },
   1163                     '.' => {
   1164                         state = .num_dot_hex;
   1165                         result.id = .FloatLiteral;
   1166                     },
   1167                     'p', 'P' => {
   1168                         state = .float_exponent_unsigned;
   1169                         result.id = .FloatLiteral;
   1170                     },
   1171                     '0'...'9', 'a'...'f', 'A'...'F' => {},
   1172                     else => {
   1173                         if (isIdentifierChar(c)) {
   1174                             result.id = .Invalid;
   1175                         }
   1176                         break;
   1177                     },
   1178                 },
   1179                 .num_dot_dec => switch (c) {
   1180                     '.' => {
   1181                         result.id = .IntegerLiteral;
   1182                         self.index -= 1;
   1183                         state = .start;
   1184                         break;
   1185                     },
   1186                     'e', 'E' => {
   1187                         state = .float_exponent_unsigned;
   1188                     },
   1189                     '0'...'9' => {
   1190                         state = .float_fraction_dec;
   1191                     },
   1192                     else => {
   1193                         if (isIdentifierChar(c)) {
   1194                             result.id = .Invalid;
   1195                         }
   1196                         break;
   1197                     },
   1198                 },
   1199                 .num_dot_hex => switch (c) {
   1200                     '.' => {
   1201                         result.id = .IntegerLiteral;
   1202                         self.index -= 1;
   1203                         state = .start;
   1204                         break;
   1205                     },
   1206                     'p', 'P' => {
   1207                         state = .float_exponent_unsigned;
   1208                     },
   1209                     '0'...'9', 'a'...'f', 'A'...'F' => {
   1210                         result.id = .FloatLiteral;
   1211                         state = .float_fraction_hex;
   1212                     },
   1213                     else => {
   1214                         if (isIdentifierChar(c)) {
   1215                             result.id = .Invalid;
   1216                         }
   1217                         break;
   1218                     },
   1219                 },
   1220                 .float_fraction_dec_no_underscore => switch (c) {
   1221                     '0'...'9' => {
   1222                         state = .float_fraction_dec;
   1223                     },
   1224                     else => {
   1225                         result.id = .Invalid;
   1226                         break;
   1227                     },
   1228                 },
   1229                 .float_fraction_dec => switch (c) {
   1230                     '_' => {
   1231                         state = .float_fraction_dec_no_underscore;
   1232                     },
   1233                     'e', 'E' => {
   1234                         state = .float_exponent_unsigned;
   1235                     },
   1236                     '0'...'9' => {},
   1237                     else => {
   1238                         if (isIdentifierChar(c)) {
   1239                             result.id = .Invalid;
   1240                         }
   1241                         break;
   1242                     },
   1243                 },
   1244                 .float_fraction_hex_no_underscore => switch (c) {
   1245                     '0'...'9', 'a'...'f', 'A'...'F' => {
   1246                         state = .float_fraction_hex;
   1247                     },
   1248                     else => {
   1249                         result.id = .Invalid;
   1250                         break;
   1251                     },
   1252                 },
   1253                 .float_fraction_hex => switch (c) {
   1254                     '_' => {
   1255                         state = .float_fraction_hex_no_underscore;
   1256                     },
   1257                     'p', 'P' => {
   1258                         state = .float_exponent_unsigned;
   1259                     },
   1260                     '0'...'9', 'a'...'f', 'A'...'F' => {},
   1261                     else => {
   1262                         if (isIdentifierChar(c)) {
   1263                             result.id = .Invalid;
   1264                         }
   1265                         break;
   1266                     },
   1267                 },
   1268                 .float_exponent_unsigned => switch (c) {
   1269                     '+', '-' => {
   1270                         state = .float_exponent_num_no_underscore;
   1271                     },
   1272                     else => {
   1273                         // reinterpret as a normal exponent number
   1274                         self.index -= 1;
   1275                         state = .float_exponent_num_no_underscore;
   1276                     },
   1277                 },
   1278                 .float_exponent_num_no_underscore => switch (c) {
   1279                     '0'...'9' => {
   1280                         state = .float_exponent_num;
   1281                     },
   1282                     else => {
   1283                         result.id = .Invalid;
   1284                         break;
   1285                     },
   1286                 },
   1287                 .float_exponent_num => switch (c) {
   1288                     '_' => {
   1289                         state = .float_exponent_num_no_underscore;
   1290                     },
   1291                     '0'...'9' => {},
   1292                     else => {
   1293                         if (isIdentifierChar(c)) {
   1294                             result.id = .Invalid;
   1295                         }
   1296                         break;
   1297                     },
   1298                 },
   1299             }
   1300         } else if (self.index == self.buffer.len) {
   1301             switch (state) {
   1302                 .start,
   1303                 .int_literal_dec,
   1304                 .int_literal_bin,
   1305                 .int_literal_oct,
   1306                 .int_literal_hex,
   1307                 .num_dot_dec,
   1308                 .num_dot_hex,
   1309                 .float_fraction_dec,
   1310                 .float_fraction_hex,
   1311                 .float_exponent_num,
   1312                 .string_literal, // find this error later
   1313                 .multiline_string_literal_line,
   1314                 .builtin,
   1315                 => {},
   1316 
   1317                 .identifier => {
   1318                     if (Token.getKeyword(self.buffer[result.loc.start..self.index])) |id| {
   1319                         result.id = id;
   1320                     }
   1321                 },
   1322                 .line_comment, .line_comment_start => {
   1323                     result.id = .LineComment;
   1324                 },
   1325                 .doc_comment, .doc_comment_start => {
   1326                     result.id = .DocComment;
   1327                 },
   1328                 .container_doc_comment => {
   1329                     result.id = .ContainerDocComment;
   1330                 },
   1331 
   1332                 .int_literal_dec_no_underscore,
   1333                 .int_literal_bin_no_underscore,
   1334                 .int_literal_oct_no_underscore,
   1335                 .int_literal_hex_no_underscore,
   1336                 .float_fraction_dec_no_underscore,
   1337                 .float_fraction_hex_no_underscore,
   1338                 .float_exponent_num_no_underscore,
   1339                 .float_exponent_unsigned,
   1340                 .saw_at_sign,
   1341                 .backslash,
   1342                 .char_literal,
   1343                 .char_literal_backslash,
   1344                 .char_literal_hex_escape,
   1345                 .char_literal_unicode_escape_saw_u,
   1346                 .char_literal_unicode_escape,
   1347                 .char_literal_unicode_invalid,
   1348                 .char_literal_end,
   1349                 .char_literal_unicode,
   1350                 .string_literal_backslash,
   1351                 => {
   1352                     result.id = .Invalid;
   1353                 },
   1354 
   1355                 .equal => {
   1356                     result.id = .Equal;
   1357                 },
   1358                 .bang => {
   1359                     result.id = .Bang;
   1360                 },
   1361                 .minus => {
   1362                     result.id = .Minus;
   1363                 },
   1364                 .slash => {
   1365                     result.id = .Slash;
   1366                 },
   1367                 .zero => {
   1368                     result.id = .IntegerLiteral;
   1369                 },
   1370                 .ampersand => {
   1371                     result.id = .Ampersand;
   1372                 },
   1373                 .period => {
   1374                     result.id = .Period;
   1375                 },
   1376                 .period_2 => {
   1377                     result.id = .Ellipsis2;
   1378                 },
   1379                 .pipe => {
   1380                     result.id = .Pipe;
   1381                 },
   1382                 .angle_bracket_angle_bracket_right => {
   1383                     result.id = .AngleBracketAngleBracketRight;
   1384                 },
   1385                 .angle_bracket_right => {
   1386                     result.id = .AngleBracketRight;
   1387                 },
   1388                 .angle_bracket_angle_bracket_left => {
   1389                     result.id = .AngleBracketAngleBracketLeft;
   1390                 },
   1391                 .angle_bracket_left => {
   1392                     result.id = .AngleBracketLeft;
   1393                 },
   1394                 .plus_percent => {
   1395                     result.id = .PlusPercent;
   1396                 },
   1397                 .plus => {
   1398                     result.id = .Plus;
   1399                 },
   1400                 .percent => {
   1401                     result.id = .Percent;
   1402                 },
   1403                 .caret => {
   1404                     result.id = .Caret;
   1405                 },
   1406                 .asterisk_percent => {
   1407                     result.id = .AsteriskPercent;
   1408                 },
   1409                 .asterisk => {
   1410                     result.id = .Asterisk;
   1411                 },
   1412                 .minus_percent => {
   1413                     result.id = .MinusPercent;
   1414                 },
   1415             }
   1416         }
   1417 
   1418         if (result.id == .Eof) {
   1419             if (self.pending_invalid_token) |token| {
   1420                 self.pending_invalid_token = null;
   1421                 return token;
   1422             }
   1423         }
   1424 
   1425         result.loc.end = self.index;
   1426         return result;
   1427     }
   1428 
   1429     fn checkLiteralCharacter(self: *Tokenizer) void {
   1430         if (self.pending_invalid_token != null) return;
   1431         const invalid_length = self.getInvalidCharacterLength();
   1432         if (invalid_length == 0) return;
   1433         self.pending_invalid_token = .{
   1434             .id = .Invalid,
   1435             .loc = .{
   1436                 .start = self.index,
   1437                 .end = self.index + invalid_length,
   1438             },
   1439         };
   1440     }
   1441 
   1442     fn getInvalidCharacterLength(self: *Tokenizer) u3 {
   1443         const c0 = self.buffer[self.index];
   1444         if (c0 < 0x80) {
   1445             if (c0 < 0x20 or c0 == 0x7f) {
   1446                 // ascii control codes are never allowed
   1447                 // (note that \n was checked before we got here)
   1448                 return 1;
   1449             }
   1450             // looks fine to me.
   1451             return 0;
   1452         } else {
   1453             // check utf8-encoded character.
   1454             const length = std.unicode.utf8ByteSequenceLength(c0) catch return 1;
   1455             if (self.index + length > self.buffer.len) {
   1456                 return @intCast(u3, self.buffer.len - self.index);
   1457             }
   1458             const bytes = self.buffer[self.index .. self.index + length];
   1459             switch (length) {
   1460                 2 => {
   1461                     const value = std.unicode.utf8Decode2(bytes) catch return length;
   1462                     if (value == 0x85) return length; // U+0085 (NEL)
   1463                 },
   1464                 3 => {
   1465                     const value = std.unicode.utf8Decode3(bytes) catch return length;
   1466                     if (value == 0x2028) return length; // U+2028 (LS)
   1467                     if (value == 0x2029) return length; // U+2029 (PS)
   1468                 },
   1469                 4 => {
   1470                     _ = std.unicode.utf8Decode4(bytes) catch return length;
   1471                 },
   1472                 else => unreachable,
   1473             }
   1474             self.index += length - 1;
   1475             return 0;
   1476         }
   1477     }
   1478 };
   1479 
   1480 test "tokenizer" {
   1481     testTokenize("test", &[_]Token.Id{.Keyword_test});
   1482 }
   1483 
   1484 test "tokenizer - unknown length pointer and then c pointer" {
   1485     testTokenize(
   1486         \\[*]u8
   1487         \\[*c]u8
   1488     , &[_]Token.Id{
   1489         .LBracket,
   1490         .Asterisk,
   1491         .RBracket,
   1492         .Identifier,
   1493         .LBracket,
   1494         .Asterisk,
   1495         .Identifier,
   1496         .RBracket,
   1497         .Identifier,
   1498     });
   1499 }
   1500 
   1501 test "tokenizer - char literal with hex escape" {
   1502     testTokenize(
   1503         \\'\x1b'
   1504     , &[_]Token.Id{.CharLiteral});
   1505     testTokenize(
   1506         \\'\x1'
   1507     , &[_]Token.Id{ .Invalid, .Invalid });
   1508 }
   1509 
   1510 test "tokenizer - char literal with unicode escapes" {
   1511     // Valid unicode escapes
   1512     testTokenize(
   1513         \\'\u{3}'
   1514     , &[_]Token.Id{.CharLiteral});
   1515     testTokenize(
   1516         \\'\u{01}'
   1517     , &[_]Token.Id{.CharLiteral});
   1518     testTokenize(
   1519         \\'\u{2a}'
   1520     , &[_]Token.Id{.CharLiteral});
   1521     testTokenize(
   1522         \\'\u{3f9}'
   1523     , &[_]Token.Id{.CharLiteral});
   1524     testTokenize(
   1525         \\'\u{6E09aBc1523}'
   1526     , &[_]Token.Id{.CharLiteral});
   1527     testTokenize(
   1528         \\"\u{440}"
   1529     , &[_]Token.Id{.StringLiteral});
   1530 
   1531     // Invalid unicode escapes
   1532     testTokenize(
   1533         \\'\u'
   1534     , &[_]Token.Id{.Invalid});
   1535     testTokenize(
   1536         \\'\u{{'
   1537     , &[_]Token.Id{ .Invalid, .Invalid });
   1538     testTokenize(
   1539         \\'\u{}'
   1540     , &[_]Token.Id{ .Invalid, .Invalid });
   1541     testTokenize(
   1542         \\'\u{s}'
   1543     , &[_]Token.Id{ .Invalid, .Invalid });
   1544     testTokenize(
   1545         \\'\u{2z}'
   1546     , &[_]Token.Id{ .Invalid, .Invalid });
   1547     testTokenize(
   1548         \\'\u{4a'
   1549     , &[_]Token.Id{.Invalid});
   1550 
   1551     // Test old-style unicode literals
   1552     testTokenize(
   1553         \\'\u0333'
   1554     , &[_]Token.Id{ .Invalid, .Invalid });
   1555     testTokenize(
   1556         \\'\U0333'
   1557     , &[_]Token.Id{ .Invalid, .IntegerLiteral, .Invalid });
   1558 }
   1559 
   1560 test "tokenizer - char literal with unicode code point" {
   1561     testTokenize(
   1562         \\'💩'
   1563     , &[_]Token.Id{.CharLiteral});
   1564 }
   1565 
   1566 test "tokenizer - float literal e exponent" {
   1567     testTokenize("a = 4.94065645841246544177e-324;\n", &[_]Token.Id{
   1568         .Identifier,
   1569         .Equal,
   1570         .FloatLiteral,
   1571         .Semicolon,
   1572     });
   1573 }
   1574 
   1575 test "tokenizer - float literal p exponent" {
   1576     testTokenize("a = 0x1.a827999fcef32p+1022;\n", &[_]Token.Id{
   1577         .Identifier,
   1578         .Equal,
   1579         .FloatLiteral,
   1580         .Semicolon,
   1581     });
   1582 }
   1583 
   1584 test "tokenizer - chars" {
   1585     testTokenize("'c'", &[_]Token.Id{.CharLiteral});
   1586 }
   1587 
   1588 test "tokenizer - invalid token characters" {
   1589     testTokenize("#", &[_]Token.Id{.Invalid});
   1590     testTokenize("`", &[_]Token.Id{.Invalid});
   1591     testTokenize("'c", &[_]Token.Id{.Invalid});
   1592     testTokenize("'", &[_]Token.Id{.Invalid});
   1593     testTokenize("''", &[_]Token.Id{ .Invalid, .Invalid });
   1594 }
   1595 
   1596 test "tokenizer - invalid literal/comment characters" {
   1597     testTokenize("\"\x00\"", &[_]Token.Id{
   1598         .StringLiteral,
   1599         .Invalid,
   1600     });
   1601     testTokenize("//\x00", &[_]Token.Id{
   1602         .LineComment,
   1603         .Invalid,
   1604     });
   1605     testTokenize("//\x1f", &[_]Token.Id{
   1606         .LineComment,
   1607         .Invalid,
   1608     });
   1609     testTokenize("//\x7f", &[_]Token.Id{
   1610         .LineComment,
   1611         .Invalid,
   1612     });
   1613 }
   1614 
   1615 test "tokenizer - utf8" {
   1616     testTokenize("//\xc2\x80", &[_]Token.Id{.LineComment});
   1617     testTokenize("//\xf4\x8f\xbf\xbf", &[_]Token.Id{.LineComment});
   1618 }
   1619 
   1620 test "tokenizer - invalid utf8" {
   1621     testTokenize("//\x80", &[_]Token.Id{
   1622         .LineComment,
   1623         .Invalid,
   1624     });
   1625     testTokenize("//\xbf", &[_]Token.Id{
   1626         .LineComment,
   1627         .Invalid,
   1628     });
   1629     testTokenize("//\xf8", &[_]Token.Id{
   1630         .LineComment,
   1631         .Invalid,
   1632     });
   1633     testTokenize("//\xff", &[_]Token.Id{
   1634         .LineComment,
   1635         .Invalid,
   1636     });
   1637     testTokenize("//\xc2\xc0", &[_]Token.Id{
   1638         .LineComment,
   1639         .Invalid,
   1640     });
   1641     testTokenize("//\xe0", &[_]Token.Id{
   1642         .LineComment,
   1643         .Invalid,
   1644     });
   1645     testTokenize("//\xf0", &[_]Token.Id{
   1646         .LineComment,
   1647         .Invalid,
   1648     });
   1649     testTokenize("//\xf0\x90\x80\xc0", &[_]Token.Id{
   1650         .LineComment,
   1651         .Invalid,
   1652     });
   1653 }
   1654 
   1655 test "tokenizer - illegal unicode codepoints" {
   1656     // unicode newline characters.U+0085, U+2028, U+2029
   1657     testTokenize("//\xc2\x84", &[_]Token.Id{.LineComment});
   1658     testTokenize("//\xc2\x85", &[_]Token.Id{
   1659         .LineComment,
   1660         .Invalid,
   1661     });
   1662     testTokenize("//\xc2\x86", &[_]Token.Id{.LineComment});
   1663     testTokenize("//\xe2\x80\xa7", &[_]Token.Id{.LineComment});
   1664     testTokenize("//\xe2\x80\xa8", &[_]Token.Id{
   1665         .LineComment,
   1666         .Invalid,
   1667     });
   1668     testTokenize("//\xe2\x80\xa9", &[_]Token.Id{
   1669         .LineComment,
   1670         .Invalid,
   1671     });
   1672     testTokenize("//\xe2\x80\xaa", &[_]Token.Id{.LineComment});
   1673 }
   1674 
   1675 test "tokenizer - string identifier and builtin fns" {
   1676     testTokenize(
   1677         \\const @"if" = @import("std");
   1678     , &[_]Token.Id{
   1679         .Keyword_const,
   1680         .Identifier,
   1681         .Equal,
   1682         .Builtin,
   1683         .LParen,
   1684         .StringLiteral,
   1685         .RParen,
   1686         .Semicolon,
   1687     });
   1688 }
   1689 
   1690 test "tokenizer - multiline string literal with literal tab" {
   1691     testTokenize(
   1692         \\\\foo	bar
   1693     , &[_]Token.Id{
   1694         .MultilineStringLiteralLine,
   1695     });
   1696 }
   1697 
   1698 test "tokenizer - comments with literal tab" {
   1699     testTokenize(
   1700         \\//foo	bar
   1701         \\//!foo	bar
   1702         \\///foo	bar
   1703         \\//	foo
   1704         \\///	foo
   1705         \\///	/foo
   1706     , &[_]Token.Id{
   1707         .LineComment,
   1708         .ContainerDocComment,
   1709         .DocComment,
   1710         .LineComment,
   1711         .DocComment,
   1712         .DocComment,
   1713     });
   1714 }
   1715 
   1716 test "tokenizer - pipe and then invalid" {
   1717     testTokenize("||=", &[_]Token.Id{
   1718         .PipePipe,
   1719         .Equal,
   1720     });
   1721 }
   1722 
   1723 test "tokenizer - line comment and doc comment" {
   1724     testTokenize("//", &[_]Token.Id{.LineComment});
   1725     testTokenize("// a / b", &[_]Token.Id{.LineComment});
   1726     testTokenize("// /", &[_]Token.Id{.LineComment});
   1727     testTokenize("/// a", &[_]Token.Id{.DocComment});
   1728     testTokenize("///", &[_]Token.Id{.DocComment});
   1729     testTokenize("////", &[_]Token.Id{.LineComment});
   1730     testTokenize("//!", &[_]Token.Id{.ContainerDocComment});
   1731     testTokenize("//!!", &[_]Token.Id{.ContainerDocComment});
   1732 }
   1733 
   1734 test "tokenizer - line comment followed by identifier" {
   1735     testTokenize(
   1736         \\    Unexpected,
   1737         \\    // another
   1738         \\    Another,
   1739     , &[_]Token.Id{
   1740         .Identifier,
   1741         .Comma,
   1742         .LineComment,
   1743         .Identifier,
   1744         .Comma,
   1745     });
   1746 }
   1747 
   1748 test "tokenizer - UTF-8 BOM is recognized and skipped" {
   1749     testTokenize("\xEF\xBB\xBFa;\n", &[_]Token.Id{
   1750         .Identifier,
   1751         .Semicolon,
   1752     });
   1753 }
   1754 
   1755 test "correctly parse pointer assignment" {
   1756     testTokenize("b.*=3;\n", &[_]Token.Id{
   1757         .Identifier,
   1758         .PeriodAsterisk,
   1759         .Equal,
   1760         .IntegerLiteral,
   1761         .Semicolon,
   1762     });
   1763 }
   1764 
   1765 test "tokenizer - range literals" {
   1766     testTokenize("0...9", &[_]Token.Id{ .IntegerLiteral, .Ellipsis3, .IntegerLiteral });
   1767     testTokenize("'0'...'9'", &[_]Token.Id{ .CharLiteral, .Ellipsis3, .CharLiteral });
   1768     testTokenize("0x00...0x09", &[_]Token.Id{ .IntegerLiteral, .Ellipsis3, .IntegerLiteral });
   1769     testTokenize("0b00...0b11", &[_]Token.Id{ .IntegerLiteral, .Ellipsis3, .IntegerLiteral });
   1770     testTokenize("0o00...0o11", &[_]Token.Id{ .IntegerLiteral, .Ellipsis3, .IntegerLiteral });
   1771 }
   1772 
   1773 test "tokenizer - number literals decimal" {
   1774     testTokenize("0", &[_]Token.Id{.IntegerLiteral});
   1775     testTokenize("1", &[_]Token.Id{.IntegerLiteral});
   1776     testTokenize("2", &[_]Token.Id{.IntegerLiteral});
   1777     testTokenize("3", &[_]Token.Id{.IntegerLiteral});
   1778     testTokenize("4", &[_]Token.Id{.IntegerLiteral});
   1779     testTokenize("5", &[_]Token.Id{.IntegerLiteral});
   1780     testTokenize("6", &[_]Token.Id{.IntegerLiteral});
   1781     testTokenize("7", &[_]Token.Id{.IntegerLiteral});
   1782     testTokenize("8", &[_]Token.Id{.IntegerLiteral});
   1783     testTokenize("9", &[_]Token.Id{.IntegerLiteral});
   1784     testTokenize("1..", &[_]Token.Id{ .IntegerLiteral, .Ellipsis2 });
   1785     testTokenize("0a", &[_]Token.Id{ .Invalid, .Identifier });
   1786     testTokenize("9b", &[_]Token.Id{ .Invalid, .Identifier });
   1787     testTokenize("1z", &[_]Token.Id{ .Invalid, .Identifier });
   1788     testTokenize("1z_1", &[_]Token.Id{ .Invalid, .Identifier });
   1789     testTokenize("9z3", &[_]Token.Id{ .Invalid, .Identifier });
   1790 
   1791     testTokenize("0_0", &[_]Token.Id{.IntegerLiteral});
   1792     testTokenize("0001", &[_]Token.Id{.IntegerLiteral});
   1793     testTokenize("01234567890", &[_]Token.Id{.IntegerLiteral});
   1794     testTokenize("012_345_6789_0", &[_]Token.Id{.IntegerLiteral});
   1795     testTokenize("0_1_2_3_4_5_6_7_8_9_0", &[_]Token.Id{.IntegerLiteral});
   1796 
   1797     testTokenize("00_", &[_]Token.Id{.Invalid});
   1798     testTokenize("0_0_", &[_]Token.Id{.Invalid});
   1799     testTokenize("0__0", &[_]Token.Id{ .Invalid, .Identifier });
   1800     testTokenize("0_0f", &[_]Token.Id{ .Invalid, .Identifier });
   1801     testTokenize("0_0_f", &[_]Token.Id{ .Invalid, .Identifier });
   1802     testTokenize("0_0_f_00", &[_]Token.Id{ .Invalid, .Identifier });
   1803     testTokenize("1_,", &[_]Token.Id{ .Invalid, .Comma });
   1804 
   1805     testTokenize("1.", &[_]Token.Id{.FloatLiteral});
   1806     testTokenize("0.0", &[_]Token.Id{.FloatLiteral});
   1807     testTokenize("1.0", &[_]Token.Id{.FloatLiteral});
   1808     testTokenize("10.0", &[_]Token.Id{.FloatLiteral});
   1809     testTokenize("0e0", &[_]Token.Id{.FloatLiteral});
   1810     testTokenize("1e0", &[_]Token.Id{.FloatLiteral});
   1811     testTokenize("1e100", &[_]Token.Id{.FloatLiteral});
   1812     testTokenize("1.e100", &[_]Token.Id{.FloatLiteral});
   1813     testTokenize("1.0e100", &[_]Token.Id{.FloatLiteral});
   1814     testTokenize("1.0e+100", &[_]Token.Id{.FloatLiteral});
   1815     testTokenize("1.0e-100", &[_]Token.Id{.FloatLiteral});
   1816     testTokenize("1_0_0_0.0_0_0_0_0_1e1_0_0_0", &[_]Token.Id{.FloatLiteral});
   1817     testTokenize("1.+", &[_]Token.Id{ .FloatLiteral, .Plus });
   1818 
   1819     testTokenize("1e", &[_]Token.Id{.Invalid});
   1820     testTokenize("1.0e1f0", &[_]Token.Id{ .Invalid, .Identifier });
   1821     testTokenize("1.0p100", &[_]Token.Id{ .Invalid, .Identifier });
   1822     testTokenize("1.0p-100", &[_]Token.Id{ .Invalid, .Identifier, .Minus, .IntegerLiteral });
   1823     testTokenize("1.0p1f0", &[_]Token.Id{ .Invalid, .Identifier });
   1824     testTokenize("1.0_,", &[_]Token.Id{ .Invalid, .Comma });
   1825     testTokenize("1_.0", &[_]Token.Id{ .Invalid, .Period, .IntegerLiteral });
   1826     testTokenize("1._", &[_]Token.Id{ .Invalid, .Identifier });
   1827     testTokenize("1.a", &[_]Token.Id{ .Invalid, .Identifier });
   1828     testTokenize("1.z", &[_]Token.Id{ .Invalid, .Identifier });
   1829     testTokenize("1._0", &[_]Token.Id{ .Invalid, .Identifier });
   1830     testTokenize("1._+", &[_]Token.Id{ .Invalid, .Identifier, .Plus });
   1831     testTokenize("1._e", &[_]Token.Id{ .Invalid, .Identifier });
   1832     testTokenize("1.0e", &[_]Token.Id{.Invalid});
   1833     testTokenize("1.0e,", &[_]Token.Id{ .Invalid, .Comma });
   1834     testTokenize("1.0e_", &[_]Token.Id{ .Invalid, .Identifier });
   1835     testTokenize("1.0e+_", &[_]Token.Id{ .Invalid, .Identifier });
   1836     testTokenize("1.0e-_", &[_]Token.Id{ .Invalid, .Identifier });
   1837     testTokenize("1.0e0_+", &[_]Token.Id{ .Invalid, .Plus });
   1838 }
   1839 
   1840 test "tokenizer - number literals binary" {
   1841     testTokenize("0b0", &[_]Token.Id{.IntegerLiteral});
   1842     testTokenize("0b1", &[_]Token.Id{.IntegerLiteral});
   1843     testTokenize("0b2", &[_]Token.Id{ .Invalid, .IntegerLiteral });
   1844     testTokenize("0b3", &[_]Token.Id{ .Invalid, .IntegerLiteral });
   1845     testTokenize("0b4", &[_]Token.Id{ .Invalid, .IntegerLiteral });
   1846     testTokenize("0b5", &[_]Token.Id{ .Invalid, .IntegerLiteral });
   1847     testTokenize("0b6", &[_]Token.Id{ .Invalid, .IntegerLiteral });
   1848     testTokenize("0b7", &[_]Token.Id{ .Invalid, .IntegerLiteral });
   1849     testTokenize("0b8", &[_]Token.Id{ .Invalid, .IntegerLiteral });
   1850     testTokenize("0b9", &[_]Token.Id{ .Invalid, .IntegerLiteral });
   1851     testTokenize("0ba", &[_]Token.Id{ .Invalid, .Identifier });
   1852     testTokenize("0bb", &[_]Token.Id{ .Invalid, .Identifier });
   1853     testTokenize("0bc", &[_]Token.Id{ .Invalid, .Identifier });
   1854     testTokenize("0bd", &[_]Token.Id{ .Invalid, .Identifier });
   1855     testTokenize("0be", &[_]Token.Id{ .Invalid, .Identifier });
   1856     testTokenize("0bf", &[_]Token.Id{ .Invalid, .Identifier });
   1857     testTokenize("0bz", &[_]Token.Id{ .Invalid, .Identifier });
   1858 
   1859     testTokenize("0b0000_0000", &[_]Token.Id{.IntegerLiteral});
   1860     testTokenize("0b1111_1111", &[_]Token.Id{.IntegerLiteral});
   1861     testTokenize("0b10_10_10_10", &[_]Token.Id{.IntegerLiteral});
   1862     testTokenize("0b0_1_0_1_0_1_0_1", &[_]Token.Id{.IntegerLiteral});
   1863     testTokenize("0b1.", &[_]Token.Id{ .IntegerLiteral, .Period });
   1864     testTokenize("0b1.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral });
   1865 
   1866     testTokenize("0B0", &[_]Token.Id{ .Invalid, .Identifier });
   1867     testTokenize("0b_", &[_]Token.Id{ .Invalid, .Identifier });
   1868     testTokenize("0b_0", &[_]Token.Id{ .Invalid, .Identifier });
   1869     testTokenize("0b1_", &[_]Token.Id{.Invalid});
   1870     testTokenize("0b0__1", &[_]Token.Id{ .Invalid, .Identifier });
   1871     testTokenize("0b0_1_", &[_]Token.Id{.Invalid});
   1872     testTokenize("0b1e", &[_]Token.Id{ .Invalid, .Identifier });
   1873     testTokenize("0b1p", &[_]Token.Id{ .Invalid, .Identifier });
   1874     testTokenize("0b1e0", &[_]Token.Id{ .Invalid, .Identifier });
   1875     testTokenize("0b1p0", &[_]Token.Id{ .Invalid, .Identifier });
   1876     testTokenize("0b1_,", &[_]Token.Id{ .Invalid, .Comma });
   1877 }
   1878 
   1879 test "tokenizer - number literals octal" {
   1880     testTokenize("0o0", &[_]Token.Id{.IntegerLiteral});
   1881     testTokenize("0o1", &[_]Token.Id{.IntegerLiteral});
   1882     testTokenize("0o2", &[_]Token.Id{.IntegerLiteral});
   1883     testTokenize("0o3", &[_]Token.Id{.IntegerLiteral});
   1884     testTokenize("0o4", &[_]Token.Id{.IntegerLiteral});
   1885     testTokenize("0o5", &[_]Token.Id{.IntegerLiteral});
   1886     testTokenize("0o6", &[_]Token.Id{.IntegerLiteral});
   1887     testTokenize("0o7", &[_]Token.Id{.IntegerLiteral});
   1888     testTokenize("0o8", &[_]Token.Id{ .Invalid, .IntegerLiteral });
   1889     testTokenize("0o9", &[_]Token.Id{ .Invalid, .IntegerLiteral });
   1890     testTokenize("0oa", &[_]Token.Id{ .Invalid, .Identifier });
   1891     testTokenize("0ob", &[_]Token.Id{ .Invalid, .Identifier });
   1892     testTokenize("0oc", &[_]Token.Id{ .Invalid, .Identifier });
   1893     testTokenize("0od", &[_]Token.Id{ .Invalid, .Identifier });
   1894     testTokenize("0oe", &[_]Token.Id{ .Invalid, .Identifier });
   1895     testTokenize("0of", &[_]Token.Id{ .Invalid, .Identifier });
   1896     testTokenize("0oz", &[_]Token.Id{ .Invalid, .Identifier });
   1897 
   1898     testTokenize("0o01234567", &[_]Token.Id{.IntegerLiteral});
   1899     testTokenize("0o0123_4567", &[_]Token.Id{.IntegerLiteral});
   1900     testTokenize("0o01_23_45_67", &[_]Token.Id{.IntegerLiteral});
   1901     testTokenize("0o0_1_2_3_4_5_6_7", &[_]Token.Id{.IntegerLiteral});
   1902     testTokenize("0o7.", &[_]Token.Id{ .IntegerLiteral, .Period });
   1903     testTokenize("0o7.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral });
   1904 
   1905     testTokenize("0O0", &[_]Token.Id{ .Invalid, .Identifier });
   1906     testTokenize("0o_", &[_]Token.Id{ .Invalid, .Identifier });
   1907     testTokenize("0o_0", &[_]Token.Id{ .Invalid, .Identifier });
   1908     testTokenize("0o1_", &[_]Token.Id{.Invalid});
   1909     testTokenize("0o0__1", &[_]Token.Id{ .Invalid, .Identifier });
   1910     testTokenize("0o0_1_", &[_]Token.Id{.Invalid});
   1911     testTokenize("0o1e", &[_]Token.Id{ .Invalid, .Identifier });
   1912     testTokenize("0o1p", &[_]Token.Id{ .Invalid, .Identifier });
   1913     testTokenize("0o1e0", &[_]Token.Id{ .Invalid, .Identifier });
   1914     testTokenize("0o1p0", &[_]Token.Id{ .Invalid, .Identifier });
   1915     testTokenize("0o_,", &[_]Token.Id{ .Invalid, .Identifier, .Comma });
   1916 }
   1917 
   1918 test "tokenizer - number literals hexadeciaml" {
   1919     testTokenize("0x0", &[_]Token.Id{.IntegerLiteral});
   1920     testTokenize("0x1", &[_]Token.Id{.IntegerLiteral});
   1921     testTokenize("0x2", &[_]Token.Id{.IntegerLiteral});
   1922     testTokenize("0x3", &[_]Token.Id{.IntegerLiteral});
   1923     testTokenize("0x4", &[_]Token.Id{.IntegerLiteral});
   1924     testTokenize("0x5", &[_]Token.Id{.IntegerLiteral});
   1925     testTokenize("0x6", &[_]Token.Id{.IntegerLiteral});
   1926     testTokenize("0x7", &[_]Token.Id{.IntegerLiteral});
   1927     testTokenize("0x8", &[_]Token.Id{.IntegerLiteral});
   1928     testTokenize("0x9", &[_]Token.Id{.IntegerLiteral});
   1929     testTokenize("0xa", &[_]Token.Id{.IntegerLiteral});
   1930     testTokenize("0xb", &[_]Token.Id{.IntegerLiteral});
   1931     testTokenize("0xc", &[_]Token.Id{.IntegerLiteral});
   1932     testTokenize("0xd", &[_]Token.Id{.IntegerLiteral});
   1933     testTokenize("0xe", &[_]Token.Id{.IntegerLiteral});
   1934     testTokenize("0xf", &[_]Token.Id{.IntegerLiteral});
   1935     testTokenize("0xA", &[_]Token.Id{.IntegerLiteral});
   1936     testTokenize("0xB", &[_]Token.Id{.IntegerLiteral});
   1937     testTokenize("0xC", &[_]Token.Id{.IntegerLiteral});
   1938     testTokenize("0xD", &[_]Token.Id{.IntegerLiteral});
   1939     testTokenize("0xE", &[_]Token.Id{.IntegerLiteral});
   1940     testTokenize("0xF", &[_]Token.Id{.IntegerLiteral});
   1941     testTokenize("0x0z", &[_]Token.Id{ .Invalid, .Identifier });
   1942     testTokenize("0xz", &[_]Token.Id{ .Invalid, .Identifier });
   1943 
   1944     testTokenize("0x0123456789ABCDEF", &[_]Token.Id{.IntegerLiteral});
   1945     testTokenize("0x0123_4567_89AB_CDEF", &[_]Token.Id{.IntegerLiteral});
   1946     testTokenize("0x01_23_45_67_89AB_CDE_F", &[_]Token.Id{.IntegerLiteral});
   1947     testTokenize("0x0_1_2_3_4_5_6_7_8_9_A_B_C_D_E_F", &[_]Token.Id{.IntegerLiteral});
   1948 
   1949     testTokenize("0X0", &[_]Token.Id{ .Invalid, .Identifier });
   1950     testTokenize("0x_", &[_]Token.Id{ .Invalid, .Identifier });
   1951     testTokenize("0x_1", &[_]Token.Id{ .Invalid, .Identifier });
   1952     testTokenize("0x1_", &[_]Token.Id{.Invalid});
   1953     testTokenize("0x0__1", &[_]Token.Id{ .Invalid, .Identifier });
   1954     testTokenize("0x0_1_", &[_]Token.Id{.Invalid});
   1955     testTokenize("0x_,", &[_]Token.Id{ .Invalid, .Identifier, .Comma });
   1956 
   1957     testTokenize("0x1.", &[_]Token.Id{.FloatLiteral});
   1958     testTokenize("0x1.0", &[_]Token.Id{.FloatLiteral});
   1959     testTokenize("0xF.", &[_]Token.Id{.FloatLiteral});
   1960     testTokenize("0xF.0", &[_]Token.Id{.FloatLiteral});
   1961     testTokenize("0xF.F", &[_]Token.Id{.FloatLiteral});
   1962     testTokenize("0xF.Fp0", &[_]Token.Id{.FloatLiteral});
   1963     testTokenize("0xF.FP0", &[_]Token.Id{.FloatLiteral});
   1964     testTokenize("0x1p0", &[_]Token.Id{.FloatLiteral});
   1965     testTokenize("0xfp0", &[_]Token.Id{.FloatLiteral});
   1966     testTokenize("0x1.+0xF.", &[_]Token.Id{ .FloatLiteral, .Plus, .FloatLiteral });
   1967 
   1968     testTokenize("0x0123456.789ABCDEF", &[_]Token.Id{.FloatLiteral});
   1969     testTokenize("0x0_123_456.789_ABC_DEF", &[_]Token.Id{.FloatLiteral});
   1970     testTokenize("0x0_1_2_3_4_5_6.7_8_9_A_B_C_D_E_F", &[_]Token.Id{.FloatLiteral});
   1971     testTokenize("0x0p0", &[_]Token.Id{.FloatLiteral});
   1972     testTokenize("0x0.0p0", &[_]Token.Id{.FloatLiteral});
   1973     testTokenize("0xff.ffp10", &[_]Token.Id{.FloatLiteral});
   1974     testTokenize("0xff.ffP10", &[_]Token.Id{.FloatLiteral});
   1975     testTokenize("0xff.p10", &[_]Token.Id{.FloatLiteral});
   1976     testTokenize("0xffp10", &[_]Token.Id{.FloatLiteral});
   1977     testTokenize("0xff_ff.ff_ffp1_0_0_0", &[_]Token.Id{.FloatLiteral});
   1978     testTokenize("0xf_f_f_f.f_f_f_fp+1_000", &[_]Token.Id{.FloatLiteral});
   1979     testTokenize("0xf_f_f_f.f_f_f_fp-1_00_0", &[_]Token.Id{.FloatLiteral});
   1980 
   1981     testTokenize("0x1e", &[_]Token.Id{.IntegerLiteral});
   1982     testTokenize("0x1e0", &[_]Token.Id{.IntegerLiteral});
   1983     testTokenize("0x1p", &[_]Token.Id{.Invalid});
   1984     testTokenize("0xfp0z1", &[_]Token.Id{ .Invalid, .Identifier });
   1985     testTokenize("0xff.ffpff", &[_]Token.Id{ .Invalid, .Identifier });
   1986     testTokenize("0x0.p", &[_]Token.Id{.Invalid});
   1987     testTokenize("0x0.z", &[_]Token.Id{ .Invalid, .Identifier });
   1988     testTokenize("0x0._", &[_]Token.Id{ .Invalid, .Identifier });
   1989     testTokenize("0x0_.0", &[_]Token.Id{ .Invalid, .Period, .IntegerLiteral });
   1990     testTokenize("0x0_.0.0", &[_]Token.Id{ .Invalid, .Period, .FloatLiteral });
   1991     testTokenize("0x0._0", &[_]Token.Id{ .Invalid, .Identifier });
   1992     testTokenize("0x0.0_", &[_]Token.Id{.Invalid});
   1993     testTokenize("0x0_p0", &[_]Token.Id{ .Invalid, .Identifier });
   1994     testTokenize("0x0_.p0", &[_]Token.Id{ .Invalid, .Period, .Identifier });
   1995     testTokenize("0x0._p0", &[_]Token.Id{ .Invalid, .Identifier });
   1996     testTokenize("0x0.0_p0", &[_]Token.Id{ .Invalid, .Identifier });
   1997     testTokenize("0x0._0p0", &[_]Token.Id{ .Invalid, .Identifier });
   1998     testTokenize("0x0.0p_0", &[_]Token.Id{ .Invalid, .Identifier });
   1999     testTokenize("0x0.0p+_0", &[_]Token.Id{ .Invalid, .Identifier });
   2000     testTokenize("0x0.0p-_0", &[_]Token.Id{ .Invalid, .Identifier });
   2001     testTokenize("0x0.0p0_", &[_]Token.Id{ .Invalid, .Eof });
   2002 }
   2003 
   2004 fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void {
   2005     var tokenizer = Tokenizer.init(source);
   2006     for (expected_tokens) |expected_token_id| {
   2007         const token = tokenizer.next();
   2008         if (token.id != expected_token_id) {
   2009             std.debug.panic("expected {}, found {}\n", .{ @tagName(expected_token_id), @tagName(token.id) });
   2010         }
   2011     }
   2012     const last_token = tokenizer.next();
   2013     std.testing.expect(last_token.id == .Eof);
   2014 }