zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

Tokenizer.zig (70343B) - Raw


      1 const std = @import("std");
      2 const assert = std.debug.assert;
      3 const Compilation = @import("Compilation.zig");
      4 const Source = @import("Source.zig");
      5 const LangOpts = @import("LangOpts.zig");
      6 
      7 pub const Token = struct {
      8     id: Id,
      9     source: Source.Id,
     10     start: u32 = 0,
     11     end: u32 = 0,
     12     line: u32 = 0,
     13 
     14     pub const Id = enum(u8) {
     15         invalid,
     16         nl,
     17         whitespace,
     18         eof,
     19         /// identifier containing solely basic character set characters
     20         identifier,
     21         /// identifier with at least one extended character
     22         extended_identifier,
     23 
     24         // string literals with prefixes
     25         string_literal,
     26         string_literal_utf_16,
     27         string_literal_utf_8,
     28         string_literal_utf_32,
     29         string_literal_wide,
     30 
     31         /// Any string literal with an embedded newline or EOF
     32         /// Always a parser error; by default just a warning from preprocessor
     33         unterminated_string_literal,
     34 
     35         // <foobar> only generated by preprocessor
     36         macro_string,
     37 
     38         // char literals with prefixes
     39         char_literal,
     40         char_literal_utf_8,
     41         char_literal_utf_16,
     42         char_literal_utf_32,
     43         char_literal_wide,
     44 
     45         /// Any character literal with nothing inside the quotes
     46         /// Always a parser error; by default just a warning from preprocessor
     47         empty_char_literal,
     48 
     49         /// Any character literal with an embedded newline or EOF
     50         /// Always a parser error; by default just a warning from preprocessor
     51         unterminated_char_literal,
     52 
     53         /// `/* */` style comment without a closing `*/` before EOF
     54         unterminated_comment,
     55 
     56         /// Integer literal tokens generated by preprocessor.
     57         one,
     58         zero,
     59 
     60         bang,
     61         bang_equal,
     62         pipe,
     63         pipe_pipe,
     64         pipe_equal,
     65         equal,
     66         equal_equal,
     67         l_paren,
     68         r_paren,
     69         l_brace,
     70         r_brace,
     71         l_bracket,
     72         r_bracket,
     73         period,
     74         ellipsis,
     75         caret,
     76         caret_equal,
     77         plus,
     78         plus_plus,
     79         plus_equal,
     80         minus,
     81         minus_minus,
     82         minus_equal,
     83         asterisk,
     84         asterisk_equal,
     85         percent,
     86         percent_equal,
     87         arrow,
     88         colon,
     89         colon_colon,
     90         semicolon,
     91         slash,
     92         slash_equal,
     93         comma,
     94         ampersand,
     95         ampersand_ampersand,
     96         ampersand_equal,
     97         question_mark,
     98         angle_bracket_left,
     99         angle_bracket_left_equal,
    100         angle_bracket_angle_bracket_left,
    101         angle_bracket_angle_bracket_left_equal,
    102         angle_bracket_right,
    103         angle_bracket_right_equal,
    104         angle_bracket_angle_bracket_right,
    105         angle_bracket_angle_bracket_right_equal,
    106         tilde,
    107         hash,
    108         hash_hash,
    109 
    110         /// Special token to speed up preprocessing, `loc.end` will be an index to the param list.
    111         macro_param,
    112         /// Special token to signal that the argument must be replaced without expansion (e.g. in concatenation)
    113         macro_param_no_expand,
    114         /// Special token to speed up preprocessing, `loc.end` will be an index to the param list.
    115         stringify_param,
    116         /// Same as stringify_param, but for var args
    117         stringify_va_args,
    118         /// Special macro whitespace, always equal to a single space
    119         macro_ws,
    120         /// Special token for implementing __has_attribute
    121         macro_param_has_attribute,
    122         /// Special token for implementing __has_c_attribute
    123         macro_param_has_c_attribute,
    124         /// Special token for implementing __has_declspec_attribute
    125         macro_param_has_declspec_attribute,
    126         /// Special token for implementing __has_warning
    127         macro_param_has_warning,
    128         /// Special token for implementing __has_feature
    129         macro_param_has_feature,
    130         /// Special token for implementing __has_extension
    131         macro_param_has_extension,
    132         /// Special token for implementing __has_builtin
    133         macro_param_has_builtin,
    134         /// Special token for implementing __has_include
    135         macro_param_has_include,
    136         /// Special token for implementing __has_include_next
    137         macro_param_has_include_next,
    138         /// Special token for implementing __has_embed
    139         macro_param_has_embed,
    140         /// Special token for implementing __is_identifier
    141         macro_param_is_identifier,
    142         /// Special token for implementing __FILE__
    143         macro_file,
    144         /// Special token for implementing __LINE__
    145         macro_line,
    146         /// Special token for implementing __COUNTER__
    147         macro_counter,
    148         /// Special token for implementing _Pragma
    149         macro_param_pragma_operator,
    150 
    151         /// Special identifier for implementing __func__
    152         macro_func,
    153         /// Special identifier for implementing __FUNCTION__
    154         macro_function,
    155         /// Special identifier for implementing __PRETTY_FUNCTION__
    156         macro_pretty_func,
    157 
    158         keyword_auto,
    159         keyword_auto_type,
    160         keyword_break,
    161         keyword_case,
    162         keyword_char,
    163         keyword_const,
    164         keyword_continue,
    165         keyword_default,
    166         keyword_do,
    167         keyword_double,
    168         keyword_else,
    169         keyword_enum,
    170         keyword_extern,
    171         keyword_float,
    172         keyword_for,
    173         keyword_goto,
    174         keyword_if,
    175         keyword_int,
    176         keyword_long,
    177         keyword_register,
    178         keyword_return,
    179         keyword_short,
    180         keyword_signed,
    181         keyword_signed1,
    182         keyword_signed2,
    183         keyword_sizeof,
    184         keyword_static,
    185         keyword_struct,
    186         keyword_switch,
    187         keyword_typedef,
    188         keyword_typeof1,
    189         keyword_typeof2,
    190         keyword_union,
    191         keyword_unsigned,
    192         keyword_void,
    193         keyword_volatile,
    194         keyword_while,
    195 
    196         // ISO C99
    197         keyword_bool,
    198         keyword_complex,
    199         keyword_imaginary,
    200         keyword_inline,
    201         keyword_restrict,
    202 
    203         // ISO C11
    204         keyword_alignas,
    205         keyword_alignof,
    206         keyword_atomic,
    207         keyword_generic,
    208         keyword_noreturn,
    209         keyword_static_assert,
    210         keyword_thread_local,
    211 
    212         // ISO C23
    213         keyword_bit_int,
    214         keyword_c23_alignas,
    215         keyword_c23_alignof,
    216         keyword_c23_bool,
    217         keyword_c23_static_assert,
    218         keyword_c23_thread_local,
    219         keyword_constexpr,
    220         keyword_true,
    221         keyword_false,
    222         keyword_nullptr,
    223         keyword_typeof_unqual,
    224 
    225         // Preprocessor directives
    226         keyword_include,
    227         keyword_include_next,
    228         keyword_embed,
    229         keyword_define,
    230         keyword_defined,
    231         keyword_undef,
    232         keyword_ifdef,
    233         keyword_ifndef,
    234         keyword_elif,
    235         keyword_elifdef,
    236         keyword_elifndef,
    237         keyword_endif,
    238         keyword_error,
    239         keyword_warning,
    240         keyword_pragma,
    241         keyword_line,
    242         keyword_va_args,
    243         keyword_va_opt,
    244 
    245         // gcc keywords
    246         keyword_const1,
    247         keyword_const2,
    248         keyword_inline1,
    249         keyword_inline2,
    250         keyword_volatile1,
    251         keyword_volatile2,
    252         keyword_restrict1,
    253         keyword_restrict2,
    254         keyword_alignof1,
    255         keyword_alignof2,
    256         keyword_typeof,
    257         keyword_attribute1,
    258         keyword_attribute2,
    259         keyword_extension,
    260         keyword_asm,
    261         keyword_asm1,
    262         keyword_asm2,
    263         /// _Float128
    264         keyword_float128_1,
    265         /// __float128
    266         keyword_float128_2,
    267         keyword_int128,
    268         keyword_imag1,
    269         keyword_imag2,
    270         keyword_real1,
    271         keyword_real2,
    272         keyword_float16,
    273 
    274         // clang keywords
    275         keyword_fp16,
    276 
    277         // ms keywords
    278         keyword_declspec,
    279         keyword_int64,
    280         keyword_int64_2,
    281         keyword_int32,
    282         keyword_int32_2,
    283         keyword_int16,
    284         keyword_int16_2,
    285         keyword_int8,
    286         keyword_int8_2,
    287         keyword_stdcall,
    288         keyword_stdcall2,
    289         keyword_thiscall,
    290         keyword_thiscall2,
    291         keyword_vectorcall,
    292         keyword_vectorcall2,
    293 
    294         // builtins that require special parsing
    295         builtin_choose_expr,
    296         builtin_va_arg,
    297         builtin_offsetof,
    298         builtin_bitoffsetof,
    299         builtin_types_compatible_p,
    300 
    301         /// Generated by #embed directive
    302         /// Decimal value with no prefix or suffix
    303         embed_byte,
    304 
    305         /// preprocessor number
    306         /// An optional period, followed by a digit 0-9, followed by any number of letters
    307         /// digits, underscores, periods, and exponents (e+, e-, E+, E-, p+, p-, P+, P-)
    308         pp_num,
    309 
    310         /// preprocessor placemarker token
    311         /// generated if `##` is used with a zero-token argument
    312         /// removed after substitution, so the parser should never see this
    313         /// See C99 6.10.3.3.2
    314         placemarker,
    315 
    316         /// Virtual linemarker token output from preprocessor to indicate start of a new include
    317         include_start,
    318 
    319         /// Virtual linemarker token output from preprocessor to indicate resuming a file after
    320         /// completion of the preceding #include
    321         include_resume,
    322 
    323         /// A comment token if asked to preserve comments.
    324         comment,
    325 
    326         /// Return true if token is identifier or keyword.
    327         pub fn isMacroIdentifier(id: Id) bool {
    328             switch (id) {
    329                 .keyword_include,
    330                 .keyword_include_next,
    331                 .keyword_embed,
    332                 .keyword_define,
    333                 .keyword_defined,
    334                 .keyword_undef,
    335                 .keyword_ifdef,
    336                 .keyword_ifndef,
    337                 .keyword_elif,
    338                 .keyword_elifdef,
    339                 .keyword_elifndef,
    340                 .keyword_endif,
    341                 .keyword_error,
    342                 .keyword_warning,
    343                 .keyword_pragma,
    344                 .keyword_line,
    345                 .keyword_va_args,
    346                 .keyword_va_opt,
    347                 .macro_func,
    348                 .macro_function,
    349                 .macro_pretty_func,
    350                 .keyword_auto,
    351                 .keyword_auto_type,
    352                 .keyword_break,
    353                 .keyword_case,
    354                 .keyword_char,
    355                 .keyword_const,
    356                 .keyword_continue,
    357                 .keyword_default,
    358                 .keyword_do,
    359                 .keyword_double,
    360                 .keyword_else,
    361                 .keyword_enum,
    362                 .keyword_extern,
    363                 .keyword_float,
    364                 .keyword_for,
    365                 .keyword_goto,
    366                 .keyword_if,
    367                 .keyword_int,
    368                 .keyword_long,
    369                 .keyword_register,
    370                 .keyword_return,
    371                 .keyword_short,
    372                 .keyword_signed,
    373                 .keyword_signed1,
    374                 .keyword_signed2,
    375                 .keyword_sizeof,
    376                 .keyword_static,
    377                 .keyword_struct,
    378                 .keyword_switch,
    379                 .keyword_typedef,
    380                 .keyword_union,
    381                 .keyword_unsigned,
    382                 .keyword_void,
    383                 .keyword_volatile,
    384                 .keyword_while,
    385                 .keyword_bool,
    386                 .keyword_complex,
    387                 .keyword_imaginary,
    388                 .keyword_inline,
    389                 .keyword_restrict,
    390                 .keyword_alignas,
    391                 .keyword_alignof,
    392                 .keyword_atomic,
    393                 .keyword_generic,
    394                 .keyword_noreturn,
    395                 .keyword_static_assert,
    396                 .keyword_thread_local,
    397                 .identifier,
    398                 .extended_identifier,
    399                 .keyword_typeof,
    400                 .keyword_typeof1,
    401                 .keyword_typeof2,
    402                 .keyword_const1,
    403                 .keyword_const2,
    404                 .keyword_inline1,
    405                 .keyword_inline2,
    406                 .keyword_volatile1,
    407                 .keyword_volatile2,
    408                 .keyword_restrict1,
    409                 .keyword_restrict2,
    410                 .keyword_alignof1,
    411                 .keyword_alignof2,
    412                 .builtin_choose_expr,
    413                 .builtin_va_arg,
    414                 .builtin_offsetof,
    415                 .builtin_bitoffsetof,
    416                 .builtin_types_compatible_p,
    417                 .keyword_attribute1,
    418                 .keyword_attribute2,
    419                 .keyword_extension,
    420                 .keyword_asm,
    421                 .keyword_asm1,
    422                 .keyword_asm2,
    423                 .keyword_float128_1,
    424                 .keyword_float128_2,
    425                 .keyword_int128,
    426                 .keyword_imag1,
    427                 .keyword_imag2,
    428                 .keyword_real1,
    429                 .keyword_real2,
    430                 .keyword_float16,
    431                 .keyword_fp16,
    432                 .keyword_declspec,
    433                 .keyword_int64,
    434                 .keyword_int64_2,
    435                 .keyword_int32,
    436                 .keyword_int32_2,
    437                 .keyword_int16,
    438                 .keyword_int16_2,
    439                 .keyword_int8,
    440                 .keyword_int8_2,
    441                 .keyword_stdcall,
    442                 .keyword_stdcall2,
    443                 .keyword_thiscall,
    444                 .keyword_thiscall2,
    445                 .keyword_vectorcall,
    446                 .keyword_vectorcall2,
    447                 .keyword_bit_int,
    448                 .keyword_c23_alignas,
    449                 .keyword_c23_alignof,
    450                 .keyword_c23_bool,
    451                 .keyword_c23_static_assert,
    452                 .keyword_c23_thread_local,
    453                 .keyword_constexpr,
    454                 .keyword_true,
    455                 .keyword_false,
    456                 .keyword_nullptr,
    457                 .keyword_typeof_unqual,
    458                 => return true,
    459                 else => return false,
    460             }
    461         }
    462 
    463         /// Turn macro keywords into identifiers.
    464         /// `keyword_defined` is special since it should only turn into an identifier if
    465         /// we are *not* in an #if or #elif expression
    466         pub fn simplifyMacroKeywordExtra(id: *Id, defined_to_identifier: bool) void {
    467             switch (id.*) {
    468                 .keyword_include,
    469                 .keyword_include_next,
    470                 .keyword_embed,
    471                 .keyword_define,
    472                 .keyword_undef,
    473                 .keyword_ifdef,
    474                 .keyword_ifndef,
    475                 .keyword_elif,
    476                 .keyword_elifdef,
    477                 .keyword_elifndef,
    478                 .keyword_endif,
    479                 .keyword_error,
    480                 .keyword_warning,
    481                 .keyword_pragma,
    482                 .keyword_line,
    483                 .keyword_va_args,
    484                 .keyword_va_opt,
    485                 => id.* = .identifier,
    486                 .keyword_defined => if (defined_to_identifier) {
    487                     id.* = .identifier;
    488                 },
    489                 else => {},
    490             }
    491         }
    492 
    493         pub fn simplifyMacroKeyword(id: *Id) void {
    494             simplifyMacroKeywordExtra(id, false);
    495         }
    496 
    497         pub fn lexeme(id: Id) ?[]const u8 {
    498             return switch (id) {
    499                 .include_start,
    500                 .include_resume,
    501                 => unreachable,
    502 
    503                 .unterminated_comment,
    504                 .invalid,
    505                 .identifier,
    506                 .extended_identifier,
    507                 .string_literal,
    508                 .string_literal_utf_16,
    509                 .string_literal_utf_8,
    510                 .string_literal_utf_32,
    511                 .string_literal_wide,
    512                 .unterminated_string_literal,
    513                 .unterminated_char_literal,
    514                 .empty_char_literal,
    515                 .char_literal,
    516                 .char_literal_utf_8,
    517                 .char_literal_utf_16,
    518                 .char_literal_utf_32,
    519                 .char_literal_wide,
    520                 .macro_string,
    521                 .whitespace,
    522                 .pp_num,
    523                 .embed_byte,
    524                 .comment,
    525                 => null,
    526 
    527                 .zero => "0",
    528                 .one => "1",
    529 
    530                 .nl,
    531                 .eof,
    532                 .macro_param,
    533                 .macro_param_no_expand,
    534                 .stringify_param,
    535                 .stringify_va_args,
    536                 .macro_param_has_attribute,
    537                 .macro_param_has_c_attribute,
    538                 .macro_param_has_declspec_attribute,
    539                 .macro_param_has_warning,
    540                 .macro_param_has_feature,
    541                 .macro_param_has_extension,
    542                 .macro_param_has_builtin,
    543                 .macro_param_has_include,
    544                 .macro_param_has_include_next,
    545                 .macro_param_has_embed,
    546                 .macro_param_is_identifier,
    547                 .macro_file,
    548                 .macro_line,
    549                 .macro_counter,
    550                 .macro_param_pragma_operator,
    551                 .placemarker,
    552                 => "",
    553                 .macro_ws => " ",
    554 
    555                 .macro_func => "__func__",
    556                 .macro_function => "__FUNCTION__",
    557                 .macro_pretty_func => "__PRETTY_FUNCTION__",
    558 
    559                 .bang => "!",
    560                 .bang_equal => "!=",
    561                 .pipe => "|",
    562                 .pipe_pipe => "||",
    563                 .pipe_equal => "|=",
    564                 .equal => "=",
    565                 .equal_equal => "==",
    566                 .l_paren => "(",
    567                 .r_paren => ")",
    568                 .l_brace => "{",
    569                 .r_brace => "}",
    570                 .l_bracket => "[",
    571                 .r_bracket => "]",
    572                 .period => ".",
    573                 .ellipsis => "...",
    574                 .caret => "^",
    575                 .caret_equal => "^=",
    576                 .plus => "+",
    577                 .plus_plus => "++",
    578                 .plus_equal => "+=",
    579                 .minus => "-",
    580                 .minus_minus => "--",
    581                 .minus_equal => "-=",
    582                 .asterisk => "*",
    583                 .asterisk_equal => "*=",
    584                 .percent => "%",
    585                 .percent_equal => "%=",
    586                 .arrow => "->",
    587                 .colon => ":",
    588                 .colon_colon => "::",
    589                 .semicolon => ";",
    590                 .slash => "/",
    591                 .slash_equal => "/=",
    592                 .comma => ",",
    593                 .ampersand => "&",
    594                 .ampersand_ampersand => "&&",
    595                 .ampersand_equal => "&=",
    596                 .question_mark => "?",
    597                 .angle_bracket_left => "<",
    598                 .angle_bracket_left_equal => "<=",
    599                 .angle_bracket_angle_bracket_left => "<<",
    600                 .angle_bracket_angle_bracket_left_equal => "<<=",
    601                 .angle_bracket_right => ">",
    602                 .angle_bracket_right_equal => ">=",
    603                 .angle_bracket_angle_bracket_right => ">>",
    604                 .angle_bracket_angle_bracket_right_equal => ">>=",
    605                 .tilde => "~",
    606                 .hash => "#",
    607                 .hash_hash => "##",
    608 
    609                 .keyword_auto => "auto",
    610                 .keyword_auto_type => "__auto_type",
    611                 .keyword_break => "break",
    612                 .keyword_case => "case",
    613                 .keyword_char => "char",
    614                 .keyword_const => "const",
    615                 .keyword_continue => "continue",
    616                 .keyword_default => "default",
    617                 .keyword_do => "do",
    618                 .keyword_double => "double",
    619                 .keyword_else => "else",
    620                 .keyword_enum => "enum",
    621                 .keyword_extern => "extern",
    622                 .keyword_float => "float",
    623                 .keyword_for => "for",
    624                 .keyword_goto => "goto",
    625                 .keyword_if => "if",
    626                 .keyword_int => "int",
    627                 .keyword_long => "long",
    628                 .keyword_register => "register",
    629                 .keyword_return => "return",
    630                 .keyword_short => "short",
    631                 .keyword_signed => "signed",
    632                 .keyword_signed1 => "__signed",
    633                 .keyword_signed2 => "__signed__",
    634                 .keyword_sizeof => "sizeof",
    635                 .keyword_static => "static",
    636                 .keyword_struct => "struct",
    637                 .keyword_switch => "switch",
    638                 .keyword_typedef => "typedef",
    639                 .keyword_typeof => "typeof",
    640                 .keyword_union => "union",
    641                 .keyword_unsigned => "unsigned",
    642                 .keyword_void => "void",
    643                 .keyword_volatile => "volatile",
    644                 .keyword_while => "while",
    645                 .keyword_bool => "_Bool",
    646                 .keyword_complex => "_Complex",
    647                 .keyword_imaginary => "_Imaginary",
    648                 .keyword_inline => "inline",
    649                 .keyword_restrict => "restrict",
    650                 .keyword_alignas => "_Alignas",
    651                 .keyword_alignof => "_Alignof",
    652                 .keyword_atomic => "_Atomic",
    653                 .keyword_generic => "_Generic",
    654                 .keyword_noreturn => "_Noreturn",
    655                 .keyword_static_assert => "_Static_assert",
    656                 .keyword_thread_local => "_Thread_local",
    657                 .keyword_bit_int => "_BitInt",
    658                 .keyword_c23_alignas => "alignas",
    659                 .keyword_c23_alignof => "alignof",
    660                 .keyword_c23_bool => "bool",
    661                 .keyword_c23_static_assert => "static_assert",
    662                 .keyword_c23_thread_local => "thread_local",
    663                 .keyword_constexpr => "constexpr",
    664                 .keyword_true => "true",
    665                 .keyword_false => "false",
    666                 .keyword_nullptr => "nullptr",
    667                 .keyword_typeof_unqual => "typeof_unqual",
    668                 .keyword_include => "include",
    669                 .keyword_include_next => "include_next",
    670                 .keyword_embed => "embed",
    671                 .keyword_define => "define",
    672                 .keyword_defined => "defined",
    673                 .keyword_undef => "undef",
    674                 .keyword_ifdef => "ifdef",
    675                 .keyword_ifndef => "ifndef",
    676                 .keyword_elif => "elif",
    677                 .keyword_elifdef => "elifdef",
    678                 .keyword_elifndef => "elifndef",
    679                 .keyword_endif => "endif",
    680                 .keyword_error => "error",
    681                 .keyword_warning => "warning",
    682                 .keyword_pragma => "pragma",
    683                 .keyword_line => "line",
    684                 .keyword_va_args => "__VA_ARGS__",
    685                 .keyword_va_opt => "__VA_OPT__",
    686                 .keyword_const1 => "__const",
    687                 .keyword_const2 => "__const__",
    688                 .keyword_inline1 => "__inline",
    689                 .keyword_inline2 => "__inline__",
    690                 .keyword_volatile1 => "__volatile",
    691                 .keyword_volatile2 => "__volatile__",
    692                 .keyword_restrict1 => "__restrict",
    693                 .keyword_restrict2 => "__restrict__",
    694                 .keyword_alignof1 => "__alignof",
    695                 .keyword_alignof2 => "__alignof__",
    696                 .keyword_typeof1 => "__typeof",
    697                 .keyword_typeof2 => "__typeof__",
    698                 .builtin_choose_expr => "__builtin_choose_expr",
    699                 .builtin_va_arg => "__builtin_va_arg",
    700                 .builtin_offsetof => "__builtin_offsetof",
    701                 .builtin_bitoffsetof => "__builtin_bitoffsetof",
    702                 .builtin_types_compatible_p => "__builtin_types_compatible_p",
    703                 .keyword_attribute1 => "__attribute",
    704                 .keyword_attribute2 => "__attribute__",
    705                 .keyword_extension => "__extension__",
    706                 .keyword_asm => "asm",
    707                 .keyword_asm1 => "__asm",
    708                 .keyword_asm2 => "__asm__",
    709                 .keyword_float128_1 => "_Float128",
    710                 .keyword_float128_2 => "__float128",
    711                 .keyword_int128 => "__int128",
    712                 .keyword_imag1 => "__imag",
    713                 .keyword_imag2 => "__imag__",
    714                 .keyword_real1 => "__real",
    715                 .keyword_real2 => "__real__",
    716                 .keyword_float16 => "_Float16",
    717                 .keyword_fp16 => "__fp16",
    718                 .keyword_declspec => "__declspec",
    719                 .keyword_int64 => "__int64",
    720                 .keyword_int64_2 => "_int64",
    721                 .keyword_int32 => "__int32",
    722                 .keyword_int32_2 => "_int32",
    723                 .keyword_int16 => "__int16",
    724                 .keyword_int16_2 => "_int16",
    725                 .keyword_int8 => "__int8",
    726                 .keyword_int8_2 => "_int8",
    727                 .keyword_stdcall => "__stdcall",
    728                 .keyword_stdcall2 => "_stdcall",
    729                 .keyword_thiscall => "__thiscall",
    730                 .keyword_thiscall2 => "_thiscall",
    731                 .keyword_vectorcall => "__vectorcall",
    732                 .keyword_vectorcall2 => "_vectorcall",
    733             };
    734         }
    735 
    736         pub fn symbol(id: Id) []const u8 {
    737             return switch (id) {
    738                 .macro_string => unreachable,
    739                 .invalid => "invalid bytes",
    740                 .identifier,
    741                 .extended_identifier,
    742                 .macro_func,
    743                 .macro_function,
    744                 .macro_pretty_func,
    745                 .builtin_choose_expr,
    746                 .builtin_va_arg,
    747                 .builtin_offsetof,
    748                 .builtin_bitoffsetof,
    749                 .builtin_types_compatible_p,
    750                 => "an identifier",
    751                 .string_literal,
    752                 .string_literal_utf_16,
    753                 .string_literal_utf_8,
    754                 .string_literal_utf_32,
    755                 .string_literal_wide,
    756                 .unterminated_string_literal,
    757                 => "a string literal",
    758                 .char_literal,
    759                 .char_literal_utf_8,
    760                 .char_literal_utf_16,
    761                 .char_literal_utf_32,
    762                 .char_literal_wide,
    763                 .unterminated_char_literal,
    764                 .empty_char_literal,
    765                 => "a character literal",
    766                 .pp_num, .embed_byte => "A number",
    767                 else => id.lexeme().?,
    768             };
    769         }
    770 
    771         /// tokens that can start an expression parsed by Preprocessor.expr
    772         /// Note that eof, r_paren, and string literals cannot actually start a
    773         /// preprocessor expression, but we include them here so that a nicer
    774         /// error message can be generated by the parser.
    775         pub fn validPreprocessorExprStart(id: Id) bool {
    776             return switch (id) {
    777                 .eof,
    778                 .r_paren,
    779                 .string_literal,
    780                 .string_literal_utf_16,
    781                 .string_literal_utf_8,
    782                 .string_literal_utf_32,
    783                 .string_literal_wide,
    784 
    785                 .char_literal,
    786                 .char_literal_utf_8,
    787                 .char_literal_utf_16,
    788                 .char_literal_utf_32,
    789                 .char_literal_wide,
    790                 .l_paren,
    791                 .plus,
    792                 .minus,
    793                 .tilde,
    794                 .bang,
    795                 .identifier,
    796                 .extended_identifier,
    797                 .keyword_defined,
    798                 .one,
    799                 .zero,
    800                 .pp_num,
    801                 .keyword_true,
    802                 .keyword_false,
    803                 => true,
    804                 else => false,
    805             };
    806         }
    807 
    808         pub fn allowsDigraphs(id: Id, langopts: LangOpts) bool {
    809             return switch (id) {
    810                 .l_bracket,
    811                 .r_bracket,
    812                 .l_brace,
    813                 .r_brace,
    814                 .hash,
    815                 .hash_hash,
    816                 => langopts.hasDigraphs(),
    817                 else => false,
    818             };
    819         }
    820 
    821         pub fn canOpenGCCAsmStmt(id: Id) bool {
    822             return switch (id) {
    823                 .keyword_volatile, .keyword_volatile1, .keyword_volatile2, .keyword_inline, .keyword_inline1, .keyword_inline2, .keyword_goto, .l_paren => true,
    824                 else => false,
    825             };
    826         }
    827 
    828         pub fn isStringLiteral(id: Id) bool {
    829             return switch (id) {
    830                 .string_literal, .string_literal_utf_16, .string_literal_utf_8, .string_literal_utf_32, .string_literal_wide => true,
    831                 else => false,
    832             };
    833         }
    834     };
    835 
    836     /// double underscore and underscore + capital letter identifiers
    837     /// belong to the implementation namespace, so we always convert them
    838     /// to keywords.
    839     pub fn getTokenId(langopts: LangOpts, str: []const u8) Token.Id {
    840         const kw = all_kws.get(str) orelse return .identifier;
    841         const standard = langopts.standard;
    842         return switch (kw) {
    843             .keyword_inline => if (standard.isGNU() or standard.atLeast(.c99)) kw else .identifier,
    844             .keyword_restrict => if (standard.atLeast(.c99)) kw else .identifier,
    845             .keyword_typeof => if (standard.isGNU() or standard.atLeast(.c23)) kw else .identifier,
    846             .keyword_asm => if (standard.isGNU()) kw else .identifier,
    847             .keyword_declspec => if (langopts.declspec_attrs) kw else .identifier,
    848 
    849             .keyword_c23_alignas,
    850             .keyword_c23_alignof,
    851             .keyword_c23_bool,
    852             .keyword_c23_static_assert,
    853             .keyword_c23_thread_local,
    854             .keyword_constexpr,
    855             .keyword_true,
    856             .keyword_false,
    857             .keyword_nullptr,
    858             .keyword_typeof_unqual,
    859             .keyword_elifdef,
    860             .keyword_elifndef,
    861             => if (standard.atLeast(.c23)) kw else .identifier,
    862 
    863             .keyword_int64,
    864             .keyword_int64_2,
    865             .keyword_int32,
    866             .keyword_int32_2,
    867             .keyword_int16,
    868             .keyword_int16_2,
    869             .keyword_int8,
    870             .keyword_int8_2,
    871             .keyword_stdcall2,
    872             .keyword_thiscall2,
    873             .keyword_vectorcall2,
    874             => if (langopts.ms_extensions) kw else .identifier,
    875             else => kw,
    876         };
    877     }
    878 
    879     const all_kws = std.StaticStringMap(Id).initComptime(.{
    880         .{ "auto", .keyword_auto },
    881         .{ "break", .keyword_break },
    882         .{ "case", .keyword_case },
    883         .{ "char", .keyword_char },
    884         .{ "const", .keyword_const },
    885         .{ "continue", .keyword_continue },
    886         .{ "default", .keyword_default },
    887         .{ "do", .keyword_do },
    888         .{ "double", .keyword_double },
    889         .{ "else", .keyword_else },
    890         .{ "enum", .keyword_enum },
    891         .{ "extern", .keyword_extern },
    892         .{ "float", .keyword_float },
    893         .{ "for", .keyword_for },
    894         .{ "goto", .keyword_goto },
    895         .{ "if", .keyword_if },
    896         .{ "int", .keyword_int },
    897         .{ "long", .keyword_long },
    898         .{ "register", .keyword_register },
    899         .{ "return", .keyword_return },
    900         .{ "short", .keyword_short },
    901         .{ "signed", .keyword_signed },
    902         .{ "__signed", .keyword_signed1 },
    903         .{ "__signed__", .keyword_signed2 },
    904         .{ "sizeof", .keyword_sizeof },
    905         .{ "static", .keyword_static },
    906         .{ "struct", .keyword_struct },
    907         .{ "switch", .keyword_switch },
    908         .{ "typedef", .keyword_typedef },
    909         .{ "union", .keyword_union },
    910         .{ "unsigned", .keyword_unsigned },
    911         .{ "void", .keyword_void },
    912         .{ "volatile", .keyword_volatile },
    913         .{ "while", .keyword_while },
    914         .{ "__typeof__", .keyword_typeof2 },
    915         .{ "__typeof", .keyword_typeof1 },
    916 
    917         // ISO C99
    918         .{ "_Bool", .keyword_bool },
    919         .{ "_Complex", .keyword_complex },
    920         .{ "_Imaginary", .keyword_imaginary },
    921         .{ "inline", .keyword_inline },
    922         .{ "restrict", .keyword_restrict },
    923 
    924         // ISO C11
    925         .{ "_Alignas", .keyword_alignas },
    926         .{ "_Alignof", .keyword_alignof },
    927         .{ "_Atomic", .keyword_atomic },
    928         .{ "_Generic", .keyword_generic },
    929         .{ "_Noreturn", .keyword_noreturn },
    930         .{ "_Static_assert", .keyword_static_assert },
    931         .{ "_Thread_local", .keyword_thread_local },
    932 
    933         // ISO C23
    934         .{ "_BitInt", .keyword_bit_int },
    935         .{ "alignas", .keyword_c23_alignas },
    936         .{ "alignof", .keyword_c23_alignof },
    937         .{ "bool", .keyword_c23_bool },
    938         .{ "static_assert", .keyword_c23_static_assert },
    939         .{ "thread_local", .keyword_c23_thread_local },
    940         .{ "constexpr", .keyword_constexpr },
    941         .{ "true", .keyword_true },
    942         .{ "false", .keyword_false },
    943         .{ "nullptr", .keyword_nullptr },
    944         .{ "typeof_unqual", .keyword_typeof_unqual },
    945 
    946         // Preprocessor directives
    947         .{ "include", .keyword_include },
    948         .{ "include_next", .keyword_include_next },
    949         .{ "embed", .keyword_embed },
    950         .{ "define", .keyword_define },
    951         .{ "defined", .keyword_defined },
    952         .{ "undef", .keyword_undef },
    953         .{ "ifdef", .keyword_ifdef },
    954         .{ "ifndef", .keyword_ifndef },
    955         .{ "elif", .keyword_elif },
    956         .{ "elifdef", .keyword_elifdef },
    957         .{ "elifndef", .keyword_elifndef },
    958         .{ "endif", .keyword_endif },
    959         .{ "error", .keyword_error },
    960         .{ "warning", .keyword_warning },
    961         .{ "pragma", .keyword_pragma },
    962         .{ "line", .keyword_line },
    963         .{ "__VA_ARGS__", .keyword_va_args },
    964         .{ "__VA_OPT__", .keyword_va_opt },
    965         .{ "__func__", .macro_func },
    966         .{ "__FUNCTION__", .macro_function },
    967         .{ "__PRETTY_FUNCTION__", .macro_pretty_func },
    968 
    969         // gcc keywords
    970         .{ "__auto_type", .keyword_auto_type },
    971         .{ "__const", .keyword_const1 },
    972         .{ "__const__", .keyword_const2 },
    973         .{ "__inline", .keyword_inline1 },
    974         .{ "__inline__", .keyword_inline2 },
    975         .{ "__volatile", .keyword_volatile1 },
    976         .{ "__volatile__", .keyword_volatile2 },
    977         .{ "__restrict", .keyword_restrict1 },
    978         .{ "__restrict__", .keyword_restrict2 },
    979         .{ "__alignof", .keyword_alignof1 },
    980         .{ "__alignof__", .keyword_alignof2 },
    981         .{ "typeof", .keyword_typeof },
    982         .{ "__attribute", .keyword_attribute1 },
    983         .{ "__attribute__", .keyword_attribute2 },
    984         .{ "__extension__", .keyword_extension },
    985         .{ "asm", .keyword_asm },
    986         .{ "__asm", .keyword_asm1 },
    987         .{ "__asm__", .keyword_asm2 },
    988         .{ "_Float128", .keyword_float128_1 },
    989         .{ "__float128", .keyword_float128_2 },
    990         .{ "__int128", .keyword_int128 },
    991         .{ "__imag", .keyword_imag1 },
    992         .{ "__imag__", .keyword_imag2 },
    993         .{ "__real", .keyword_real1 },
    994         .{ "__real__", .keyword_real2 },
    995         .{ "_Float16", .keyword_float16 },
    996 
    997         // clang keywords
    998         .{ "__fp16", .keyword_fp16 },
    999 
   1000         // ms keywords
   1001         .{ "__declspec", .keyword_declspec },
   1002         .{ "__int64", .keyword_int64 },
   1003         .{ "_int64", .keyword_int64_2 },
   1004         .{ "__int32", .keyword_int32 },
   1005         .{ "_int32", .keyword_int32_2 },
   1006         .{ "__int16", .keyword_int16 },
   1007         .{ "_int16", .keyword_int16_2 },
   1008         .{ "__int8", .keyword_int8 },
   1009         .{ "_int8", .keyword_int8_2 },
   1010         .{ "__stdcall", .keyword_stdcall },
   1011         .{ "_stdcall", .keyword_stdcall2 },
   1012         .{ "__thiscall", .keyword_thiscall },
   1013         .{ "_thiscall", .keyword_thiscall2 },
   1014         .{ "__vectorcall", .keyword_vectorcall },
   1015         .{ "_vectorcall", .keyword_vectorcall2 },
   1016 
   1017         // builtins that require special parsing
   1018         .{ "__builtin_choose_expr", .builtin_choose_expr },
   1019         .{ "__builtin_va_arg", .builtin_va_arg },
   1020         .{ "__builtin_offsetof", .builtin_offsetof },
   1021         .{ "__builtin_bitoffsetof", .builtin_bitoffsetof },
   1022         .{ "__builtin_types_compatible_p", .builtin_types_compatible_p },
   1023     });
   1024 };
   1025 
   1026 const Tokenizer = @This();
   1027 
   1028 buf: []const u8,
   1029 index: u32 = 0,
   1030 source: Source.Id,
   1031 langopts: LangOpts,
   1032 line: u32 = 1,
   1033 
   1034 pub fn next(self: *Tokenizer) Token {
   1035     var state: enum {
   1036         start,
   1037         whitespace,
   1038         u,
   1039         u8,
   1040         U,
   1041         L,
   1042         string_literal,
   1043         char_literal_start,
   1044         char_literal,
   1045         char_escape_sequence,
   1046         string_escape_sequence,
   1047         identifier,
   1048         extended_identifier,
   1049         equal,
   1050         bang,
   1051         pipe,
   1052         colon,
   1053         percent,
   1054         asterisk,
   1055         plus,
   1056         angle_bracket_left,
   1057         angle_bracket_angle_bracket_left,
   1058         angle_bracket_right,
   1059         angle_bracket_angle_bracket_right,
   1060         caret,
   1061         period,
   1062         period2,
   1063         minus,
   1064         slash,
   1065         ampersand,
   1066         hash,
   1067         hash_digraph,
   1068         hash_hash_digraph_partial,
   1069         line_comment,
   1070         multi_line_comment,
   1071         multi_line_comment_asterisk,
   1072         multi_line_comment_done,
   1073         pp_num,
   1074         pp_num_exponent,
   1075         pp_num_digit_separator,
   1076     } = .start;
   1077 
   1078     var start = self.index;
   1079     var id: Token.Id = .eof;
   1080 
   1081     while (self.index < self.buf.len) : (self.index += 1) {
   1082         const c = self.buf[self.index];
   1083         switch (state) {
   1084             .start => switch (c) {
   1085                 '\n' => {
   1086                     id = .nl;
   1087                     self.index += 1;
   1088                     self.line += 1;
   1089                     break;
   1090                 },
   1091                 '"' => {
   1092                     id = .string_literal;
   1093                     state = .string_literal;
   1094                 },
   1095                 '\'' => {
   1096                     id = .char_literal;
   1097                     state = .char_literal_start;
   1098                 },
   1099                 'u' => state = .u,
   1100                 'U' => state = .U,
   1101                 'L' => state = .L,
   1102                 'a'...'t', 'v'...'z', 'A'...'K', 'M'...'T', 'V'...'Z', '_' => state = .identifier,
   1103                 '=' => state = .equal,
   1104                 '!' => state = .bang,
   1105                 '|' => state = .pipe,
   1106                 '(' => {
   1107                     id = .l_paren;
   1108                     self.index += 1;
   1109                     break;
   1110                 },
   1111                 ')' => {
   1112                     id = .r_paren;
   1113                     self.index += 1;
   1114                     break;
   1115                 },
   1116                 '[' => {
   1117                     id = .l_bracket;
   1118                     self.index += 1;
   1119                     break;
   1120                 },
   1121                 ']' => {
   1122                     id = .r_bracket;
   1123                     self.index += 1;
   1124                     break;
   1125                 },
   1126                 ';' => {
   1127                     id = .semicolon;
   1128                     self.index += 1;
   1129                     break;
   1130                 },
   1131                 ',' => {
   1132                     id = .comma;
   1133                     self.index += 1;
   1134                     break;
   1135                 },
   1136                 '?' => {
   1137                     id = .question_mark;
   1138                     self.index += 1;
   1139                     break;
   1140                 },
   1141                 ':' => state = .colon,
   1142                 '%' => state = .percent,
   1143                 '*' => state = .asterisk,
   1144                 '+' => state = .plus,
   1145                 '<' => state = .angle_bracket_left,
   1146                 '>' => state = .angle_bracket_right,
   1147                 '^' => state = .caret,
   1148                 '{' => {
   1149                     id = .l_brace;
   1150                     self.index += 1;
   1151                     break;
   1152                 },
   1153                 '}' => {
   1154                     id = .r_brace;
   1155                     self.index += 1;
   1156                     break;
   1157                 },
   1158                 '~' => {
   1159                     id = .tilde;
   1160                     self.index += 1;
   1161                     break;
   1162                 },
   1163                 '.' => state = .period,
   1164                 '-' => state = .minus,
   1165                 '/' => state = .slash,
   1166                 '&' => state = .ampersand,
   1167                 '#' => state = .hash,
   1168                 '0'...'9' => state = .pp_num,
   1169                 '\t', '\x0B', '\x0C', ' ' => state = .whitespace,
   1170                 '$' => if (self.langopts.dollars_in_identifiers) {
   1171                     state = .extended_identifier;
   1172                 } else {
   1173                     id = .invalid;
   1174                     self.index += 1;
   1175                     break;
   1176                 },
   1177                 0x1A => if (self.langopts.ms_extensions) {
   1178                     id = .eof;
   1179                     break;
   1180                 } else {
   1181                     id = .invalid;
   1182                     self.index += 1;
   1183                     break;
   1184                 },
   1185                 0x80...0xFF => state = .extended_identifier,
   1186                 else => {
   1187                     id = .invalid;
   1188                     self.index += 1;
   1189                     break;
   1190                 },
   1191             },
   1192             .whitespace => switch (c) {
   1193                 '\t', '\x0B', '\x0C', ' ' => {},
   1194                 else => {
   1195                     id = .whitespace;
   1196                     break;
   1197                 },
   1198             },
   1199             .u => switch (c) {
   1200                 '8' => {
   1201                     state = .u8;
   1202                 },
   1203                 '\'' => {
   1204                     id = .char_literal_utf_16;
   1205                     state = .char_literal_start;
   1206                 },
   1207                 '\"' => {
   1208                     id = .string_literal_utf_16;
   1209                     state = .string_literal;
   1210                 },
   1211                 else => {
   1212                     self.index -= 1;
   1213                     state = .identifier;
   1214                 },
   1215             },
   1216             .u8 => switch (c) {
   1217                 '\"' => {
   1218                     id = .string_literal_utf_8;
   1219                     state = .string_literal;
   1220                 },
   1221                 '\'' => {
   1222                     id = .char_literal_utf_8;
   1223                     state = .char_literal_start;
   1224                 },
   1225                 else => {
   1226                     self.index -= 1;
   1227                     state = .identifier;
   1228                 },
   1229             },
   1230             .U => switch (c) {
   1231                 '\'' => {
   1232                     id = .char_literal_utf_32;
   1233                     state = .char_literal_start;
   1234                 },
   1235                 '\"' => {
   1236                     id = .string_literal_utf_32;
   1237                     state = .string_literal;
   1238                 },
   1239                 else => {
   1240                     self.index -= 1;
   1241                     state = .identifier;
   1242                 },
   1243             },
   1244             .L => switch (c) {
   1245                 '\'' => {
   1246                     id = .char_literal_wide;
   1247                     state = .char_literal_start;
   1248                 },
   1249                 '\"' => {
   1250                     id = .string_literal_wide;
   1251                     state = .string_literal;
   1252                 },
   1253                 else => {
   1254                     self.index -= 1;
   1255                     state = .identifier;
   1256                 },
   1257             },
   1258             .string_literal => switch (c) {
   1259                 '\\' => {
   1260                     state = .string_escape_sequence;
   1261                 },
   1262                 '"' => {
   1263                     self.index += 1;
   1264                     break;
   1265                 },
   1266                 '\n' => {
   1267                     id = .unterminated_string_literal;
   1268                     break;
   1269                 },
   1270                 '\r' => unreachable,
   1271                 else => {},
   1272             },
   1273             .char_literal_start => switch (c) {
   1274                 '\\' => {
   1275                     state = .char_escape_sequence;
   1276                 },
   1277                 '\'' => {
   1278                     id = .empty_char_literal;
   1279                     self.index += 1;
   1280                     break;
   1281                 },
   1282                 '\n' => {
   1283                     id = .unterminated_char_literal;
   1284                     break;
   1285                 },
   1286                 else => {
   1287                     state = .char_literal;
   1288                 },
   1289             },
   1290             .char_literal => switch (c) {
   1291                 '\\' => {
   1292                     state = .char_escape_sequence;
   1293                 },
   1294                 '\'' => {
   1295                     self.index += 1;
   1296                     break;
   1297                 },
   1298                 '\n' => {
   1299                     id = .unterminated_char_literal;
   1300                     break;
   1301                 },
   1302                 else => {},
   1303             },
   1304             .char_escape_sequence => switch (c) {
   1305                 '\r', '\n' => {
   1306                     id = .unterminated_char_literal;
   1307                     break;
   1308                 },
   1309                 else => state = .char_literal,
   1310             },
   1311             .string_escape_sequence => switch (c) {
   1312                 '\r', '\n' => {
   1313                     id = .unterminated_string_literal;
   1314                     break;
   1315                 },
   1316                 else => state = .string_literal,
   1317             },
   1318             .identifier, .extended_identifier => switch (c) {
   1319                 'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
   1320                 '$' => if (self.langopts.dollars_in_identifiers) {
   1321                     state = .extended_identifier;
   1322                 } else {
   1323                     id = if (state == .identifier) Token.getTokenId(self.langopts, self.buf[start..self.index]) else .extended_identifier;
   1324                     break;
   1325                 },
   1326                 0x80...0xFF => state = .extended_identifier,
   1327                 else => {
   1328                     id = if (state == .identifier) Token.getTokenId(self.langopts, self.buf[start..self.index]) else .extended_identifier;
   1329                     break;
   1330                 },
   1331             },
   1332             .equal => switch (c) {
   1333                 '=' => {
   1334                     id = .equal_equal;
   1335                     self.index += 1;
   1336                     break;
   1337                 },
   1338                 else => {
   1339                     id = .equal;
   1340                     break;
   1341                 },
   1342             },
   1343             .bang => switch (c) {
   1344                 '=' => {
   1345                     id = .bang_equal;
   1346                     self.index += 1;
   1347                     break;
   1348                 },
   1349                 else => {
   1350                     id = .bang;
   1351                     break;
   1352                 },
   1353             },
   1354             .pipe => switch (c) {
   1355                 '=' => {
   1356                     id = .pipe_equal;
   1357                     self.index += 1;
   1358                     break;
   1359                 },
   1360                 '|' => {
   1361                     id = .pipe_pipe;
   1362                     self.index += 1;
   1363                     break;
   1364                 },
   1365                 else => {
   1366                     id = .pipe;
   1367                     break;
   1368                 },
   1369             },
   1370             .colon => switch (c) {
   1371                 '>' => {
   1372                     if (self.langopts.hasDigraphs()) {
   1373                         id = .r_bracket;
   1374                         self.index += 1;
   1375                     } else {
   1376                         id = .colon;
   1377                     }
   1378                     break;
   1379                 },
   1380                 ':' => {
   1381                     if (self.langopts.standard.atLeast(.c23)) {
   1382                         id = .colon_colon;
   1383                         self.index += 1;
   1384                         break;
   1385                     } else {
   1386                         id = .colon;
   1387                         break;
   1388                     }
   1389                 },
   1390                 else => {
   1391                     id = .colon;
   1392                     break;
   1393                 },
   1394             },
   1395             .percent => switch (c) {
   1396                 '=' => {
   1397                     id = .percent_equal;
   1398                     self.index += 1;
   1399                     break;
   1400                 },
   1401                 '>' => {
   1402                     if (self.langopts.hasDigraphs()) {
   1403                         id = .r_brace;
   1404                         self.index += 1;
   1405                     } else {
   1406                         id = .percent;
   1407                     }
   1408                     break;
   1409                 },
   1410                 ':' => {
   1411                     if (self.langopts.hasDigraphs()) {
   1412                         state = .hash_digraph;
   1413                     } else {
   1414                         id = .percent;
   1415                         break;
   1416                     }
   1417                 },
   1418                 else => {
   1419                     id = .percent;
   1420                     break;
   1421                 },
   1422             },
   1423             .asterisk => switch (c) {
   1424                 '=' => {
   1425                     id = .asterisk_equal;
   1426                     self.index += 1;
   1427                     break;
   1428                 },
   1429                 else => {
   1430                     id = .asterisk;
   1431                     break;
   1432                 },
   1433             },
   1434             .plus => switch (c) {
   1435                 '=' => {
   1436                     id = .plus_equal;
   1437                     self.index += 1;
   1438                     break;
   1439                 },
   1440                 '+' => {
   1441                     id = .plus_plus;
   1442                     self.index += 1;
   1443                     break;
   1444                 },
   1445                 else => {
   1446                     id = .plus;
   1447                     break;
   1448                 },
   1449             },
   1450             .angle_bracket_left => switch (c) {
   1451                 '<' => state = .angle_bracket_angle_bracket_left,
   1452                 '=' => {
   1453                     id = .angle_bracket_left_equal;
   1454                     self.index += 1;
   1455                     break;
   1456                 },
   1457                 ':' => {
   1458                     if (self.langopts.hasDigraphs()) {
   1459                         id = .l_bracket;
   1460                         self.index += 1;
   1461                     } else {
   1462                         id = .angle_bracket_left;
   1463                     }
   1464                     break;
   1465                 },
   1466                 '%' => {
   1467                     if (self.langopts.hasDigraphs()) {
   1468                         id = .l_brace;
   1469                         self.index += 1;
   1470                     } else {
   1471                         id = .angle_bracket_left;
   1472                     }
   1473                     break;
   1474                 },
   1475                 else => {
   1476                     id = .angle_bracket_left;
   1477                     break;
   1478                 },
   1479             },
   1480             .angle_bracket_angle_bracket_left => switch (c) {
   1481                 '=' => {
   1482                     id = .angle_bracket_angle_bracket_left_equal;
   1483                     self.index += 1;
   1484                     break;
   1485                 },
   1486                 else => {
   1487                     id = .angle_bracket_angle_bracket_left;
   1488                     break;
   1489                 },
   1490             },
   1491             .angle_bracket_right => switch (c) {
   1492                 '>' => state = .angle_bracket_angle_bracket_right,
   1493                 '=' => {
   1494                     id = .angle_bracket_right_equal;
   1495                     self.index += 1;
   1496                     break;
   1497                 },
   1498                 else => {
   1499                     id = .angle_bracket_right;
   1500                     break;
   1501                 },
   1502             },
   1503             .angle_bracket_angle_bracket_right => switch (c) {
   1504                 '=' => {
   1505                     id = .angle_bracket_angle_bracket_right_equal;
   1506                     self.index += 1;
   1507                     break;
   1508                 },
   1509                 else => {
   1510                     id = .angle_bracket_angle_bracket_right;
   1511                     break;
   1512                 },
   1513             },
   1514             .caret => switch (c) {
   1515                 '=' => {
   1516                     id = .caret_equal;
   1517                     self.index += 1;
   1518                     break;
   1519                 },
   1520                 else => {
   1521                     id = .caret;
   1522                     break;
   1523                 },
   1524             },
   1525             .period => switch (c) {
   1526                 '.' => state = .period2,
   1527                 '0'...'9' => state = .pp_num,
   1528                 else => {
   1529                     id = .period;
   1530                     break;
   1531                 },
   1532             },
   1533             .period2 => switch (c) {
   1534                 '.' => {
   1535                     id = .ellipsis;
   1536                     self.index += 1;
   1537                     break;
   1538                 },
   1539                 else => {
   1540                     id = .period;
   1541                     self.index -= 1;
   1542                     break;
   1543                 },
   1544             },
   1545             .minus => switch (c) {
   1546                 '>' => {
   1547                     id = .arrow;
   1548                     self.index += 1;
   1549                     break;
   1550                 },
   1551                 '=' => {
   1552                     id = .minus_equal;
   1553                     self.index += 1;
   1554                     break;
   1555                 },
   1556                 '-' => {
   1557                     id = .minus_minus;
   1558                     self.index += 1;
   1559                     break;
   1560                 },
   1561                 else => {
   1562                     id = .minus;
   1563                     break;
   1564                 },
   1565             },
   1566             .ampersand => switch (c) {
   1567                 '&' => {
   1568                     id = .ampersand_ampersand;
   1569                     self.index += 1;
   1570                     break;
   1571                 },
   1572                 '=' => {
   1573                     id = .ampersand_equal;
   1574                     self.index += 1;
   1575                     break;
   1576                 },
   1577                 else => {
   1578                     id = .ampersand;
   1579                     break;
   1580                 },
   1581             },
   1582             .hash => switch (c) {
   1583                 '#' => {
   1584                     id = .hash_hash;
   1585                     self.index += 1;
   1586                     break;
   1587                 },
   1588                 else => {
   1589                     id = .hash;
   1590                     break;
   1591                 },
   1592             },
   1593             .hash_digraph => switch (c) {
   1594                 '%' => state = .hash_hash_digraph_partial,
   1595                 else => {
   1596                     id = .hash;
   1597                     break;
   1598                 },
   1599             },
   1600             .hash_hash_digraph_partial => switch (c) {
   1601                 ':' => {
   1602                     id = .hash_hash;
   1603                     self.index += 1;
   1604                     break;
   1605                 },
   1606                 else => {
   1607                     id = .hash;
   1608                     self.index -= 1; // re-tokenize the percent
   1609                     break;
   1610                 },
   1611             },
   1612             .slash => switch (c) {
   1613                 '/' => state = .line_comment,
   1614                 '*' => state = .multi_line_comment,
   1615                 '=' => {
   1616                     id = .slash_equal;
   1617                     self.index += 1;
   1618                     break;
   1619                 },
   1620                 else => {
   1621                     id = .slash;
   1622                     break;
   1623                 },
   1624             },
   1625             .line_comment => switch (c) {
   1626                 '\n' => {
   1627                     if (self.langopts.preserve_comments) {
   1628                         id = .comment;
   1629                         break;
   1630                     }
   1631                     self.index -= 1;
   1632                     state = .start;
   1633                 },
   1634                 else => {},
   1635             },
   1636             .multi_line_comment => switch (c) {
   1637                 '*' => state = .multi_line_comment_asterisk,
   1638                 '\n' => self.line += 1,
   1639                 else => {},
   1640             },
   1641             .multi_line_comment_asterisk => switch (c) {
   1642                 '/' => {
   1643                     if (self.langopts.preserve_comments) {
   1644                         self.index += 1;
   1645                         id = .comment;
   1646                         break;
   1647                     }
   1648                     state = .multi_line_comment_done;
   1649                 },
   1650                 '\n' => {
   1651                     self.line += 1;
   1652                     state = .multi_line_comment;
   1653                 },
   1654                 '*' => {},
   1655                 else => state = .multi_line_comment,
   1656             },
   1657             .multi_line_comment_done => switch (c) {
   1658                 '\n' => {
   1659                     start = self.index;
   1660                     id = .nl;
   1661                     self.index += 1;
   1662                     self.line += 1;
   1663                     break;
   1664                 },
   1665                 '\r' => unreachable,
   1666                 '\t', '\x0B', '\x0C', ' ' => {
   1667                     start = self.index;
   1668                     state = .whitespace;
   1669                 },
   1670                 else => {
   1671                     id = .whitespace;
   1672                     break;
   1673                 },
   1674             },
   1675             .pp_num => switch (c) {
   1676                 'a'...'d',
   1677                 'A'...'D',
   1678                 'f'...'o',
   1679                 'F'...'O',
   1680                 'q'...'z',
   1681                 'Q'...'Z',
   1682                 '0'...'9',
   1683                 '_',
   1684                 '.',
   1685                 => {},
   1686                 'e', 'E', 'p', 'P' => state = .pp_num_exponent,
   1687                 '\'' => if (self.langopts.standard.atLeast(.c23)) {
   1688                     state = .pp_num_digit_separator;
   1689                 } else {
   1690                     id = .pp_num;
   1691                     break;
   1692                 },
   1693                 else => {
   1694                     id = .pp_num;
   1695                     break;
   1696                 },
   1697             },
   1698             .pp_num_digit_separator => switch (c) {
   1699                 'a'...'d',
   1700                 'A'...'D',
   1701                 'f'...'o',
   1702                 'F'...'O',
   1703                 'q'...'z',
   1704                 'Q'...'Z',
   1705                 '0'...'9',
   1706                 '_',
   1707                 => state = .pp_num,
   1708                 else => {
   1709                     self.index -= 1;
   1710                     id = .pp_num;
   1711                     break;
   1712                 },
   1713             },
   1714             .pp_num_exponent => switch (c) {
   1715                 'a'...'o',
   1716                 'q'...'z',
   1717                 'A'...'O',
   1718                 'Q'...'Z',
   1719                 '0'...'9',
   1720                 '_',
   1721                 '.',
   1722                 '+',
   1723                 '-',
   1724                 => state = .pp_num,
   1725                 'p', 'P' => {},
   1726                 else => {
   1727                     id = .pp_num;
   1728                     break;
   1729                 },
   1730             },
   1731         }
   1732     } else if (self.index == self.buf.len) {
   1733         switch (state) {
   1734             .start, .line_comment => {},
   1735             .u, .u8, .U, .L, .identifier => id = Token.getTokenId(self.langopts, self.buf[start..self.index]),
   1736             .extended_identifier => id = .extended_identifier,
   1737 
   1738             .period2 => {
   1739                 self.index -= 1;
   1740                 id = .period;
   1741             },
   1742 
   1743             .multi_line_comment,
   1744             .multi_line_comment_asterisk,
   1745             => id = .unterminated_comment,
   1746 
   1747             .char_escape_sequence, .char_literal, .char_literal_start => id = .unterminated_char_literal,
   1748             .string_escape_sequence, .string_literal => id = .unterminated_string_literal,
   1749 
   1750             .whitespace => id = .whitespace,
   1751             .multi_line_comment_done => id = .whitespace,
   1752 
   1753             .equal => id = .equal,
   1754             .bang => id = .bang,
   1755             .minus => id = .minus,
   1756             .slash => id = .slash,
   1757             .ampersand => id = .ampersand,
   1758             .hash => id = .hash,
   1759             .period => id = .period,
   1760             .pipe => id = .pipe,
   1761             .angle_bracket_angle_bracket_right => id = .angle_bracket_angle_bracket_right,
   1762             .angle_bracket_right => id = .angle_bracket_right,
   1763             .angle_bracket_angle_bracket_left => id = .angle_bracket_angle_bracket_left,
   1764             .angle_bracket_left => id = .angle_bracket_left,
   1765             .plus => id = .plus,
   1766             .colon => id = .colon,
   1767             .percent => id = .percent,
   1768             .caret => id = .caret,
   1769             .asterisk => id = .asterisk,
   1770             .hash_digraph => id = .hash,
   1771             .hash_hash_digraph_partial => {
   1772                 id = .hash;
   1773                 self.index -= 1; // re-tokenize the percent
   1774             },
   1775             .pp_num, .pp_num_exponent, .pp_num_digit_separator => id = .pp_num,
   1776         }
   1777     }
   1778 
   1779     return .{
   1780         .id = id,
   1781         .start = start,
   1782         .end = self.index,
   1783         .line = self.line,
   1784         .source = self.source,
   1785     };
   1786 }
   1787 
   1788 pub fn nextNoWS(self: *Tokenizer) Token {
   1789     var tok = self.next();
   1790     while (tok.id == .whitespace or tok.id == .comment) tok = self.next();
   1791     return tok;
   1792 }
   1793 
   1794 pub fn nextNoWSComments(self: *Tokenizer) Token {
   1795     var tok = self.next();
   1796     while (tok.id == .whitespace) tok = self.next();
   1797     return tok;
   1798 }
   1799 
   1800 /// Try to tokenize a '::' even if not supported by the current language standard.
   1801 pub fn colonColon(self: *Tokenizer) Token {
   1802     var tok = self.nextNoWS();
   1803     if (tok.id == .colon and self.index < self.buf.len and self.buf[self.index] == ':') {
   1804         self.index += 1;
   1805         tok.id = .colon_colon;
   1806     }
   1807     return tok;
   1808 }
   1809 
   1810 test "operators" {
   1811     try expectTokens(
   1812         \\ ! != | || |= = ==
   1813         \\ ( ) { } [ ] . .. ...
   1814         \\ ^ ^= + ++ += - -- -=
   1815         \\ * *= % %= -> : ; / /=
   1816         \\ , & && &= ? < <= <<
   1817         \\  <<= > >= >> >>= ~ # ##
   1818         \\
   1819     , &.{
   1820         .bang,
   1821         .bang_equal,
   1822         .pipe,
   1823         .pipe_pipe,
   1824         .pipe_equal,
   1825         .equal,
   1826         .equal_equal,
   1827         .nl,
   1828         .l_paren,
   1829         .r_paren,
   1830         .l_brace,
   1831         .r_brace,
   1832         .l_bracket,
   1833         .r_bracket,
   1834         .period,
   1835         .period,
   1836         .period,
   1837         .ellipsis,
   1838         .nl,
   1839         .caret,
   1840         .caret_equal,
   1841         .plus,
   1842         .plus_plus,
   1843         .plus_equal,
   1844         .minus,
   1845         .minus_minus,
   1846         .minus_equal,
   1847         .nl,
   1848         .asterisk,
   1849         .asterisk_equal,
   1850         .percent,
   1851         .percent_equal,
   1852         .arrow,
   1853         .colon,
   1854         .semicolon,
   1855         .slash,
   1856         .slash_equal,
   1857         .nl,
   1858         .comma,
   1859         .ampersand,
   1860         .ampersand_ampersand,
   1861         .ampersand_equal,
   1862         .question_mark,
   1863         .angle_bracket_left,
   1864         .angle_bracket_left_equal,
   1865         .angle_bracket_angle_bracket_left,
   1866         .nl,
   1867         .angle_bracket_angle_bracket_left_equal,
   1868         .angle_bracket_right,
   1869         .angle_bracket_right_equal,
   1870         .angle_bracket_angle_bracket_right,
   1871         .angle_bracket_angle_bracket_right_equal,
   1872         .tilde,
   1873         .hash,
   1874         .hash_hash,
   1875         .nl,
   1876     });
   1877 }
   1878 
   1879 test "keywords" {
   1880     try expectTokens(
   1881         \\auto __auto_type break case char const continue default do
   1882         \\double else enum extern float for goto if int 
   1883         \\long register return short signed sizeof static 
   1884         \\struct switch typedef union unsigned void volatile 
   1885         \\while _Bool _Complex _Imaginary inline restrict _Alignas 
   1886         \\_Alignof _Atomic _Generic _Noreturn _Static_assert _Thread_local 
   1887         \\__attribute __attribute__
   1888         \\
   1889     , &.{
   1890         .keyword_auto,
   1891         .keyword_auto_type,
   1892         .keyword_break,
   1893         .keyword_case,
   1894         .keyword_char,
   1895         .keyword_const,
   1896         .keyword_continue,
   1897         .keyword_default,
   1898         .keyword_do,
   1899         .nl,
   1900         .keyword_double,
   1901         .keyword_else,
   1902         .keyword_enum,
   1903         .keyword_extern,
   1904         .keyword_float,
   1905         .keyword_for,
   1906         .keyword_goto,
   1907         .keyword_if,
   1908         .keyword_int,
   1909         .nl,
   1910         .keyword_long,
   1911         .keyword_register,
   1912         .keyword_return,
   1913         .keyword_short,
   1914         .keyword_signed,
   1915         .keyword_sizeof,
   1916         .keyword_static,
   1917         .nl,
   1918         .keyword_struct,
   1919         .keyword_switch,
   1920         .keyword_typedef,
   1921         .keyword_union,
   1922         .keyword_unsigned,
   1923         .keyword_void,
   1924         .keyword_volatile,
   1925         .nl,
   1926         .keyword_while,
   1927         .keyword_bool,
   1928         .keyword_complex,
   1929         .keyword_imaginary,
   1930         .keyword_inline,
   1931         .keyword_restrict,
   1932         .keyword_alignas,
   1933         .nl,
   1934         .keyword_alignof,
   1935         .keyword_atomic,
   1936         .keyword_generic,
   1937         .keyword_noreturn,
   1938         .keyword_static_assert,
   1939         .keyword_thread_local,
   1940         .nl,
   1941         .keyword_attribute1,
   1942         .keyword_attribute2,
   1943         .nl,
   1944     });
   1945 }
   1946 
   1947 test "preprocessor keywords" {
   1948     try expectTokens(
   1949         \\#include
   1950         \\#include_next
   1951         \\#embed
   1952         \\#define
   1953         \\#ifdef
   1954         \\#ifndef
   1955         \\#error
   1956         \\#pragma
   1957         \\
   1958     , &.{
   1959         .hash,
   1960         .keyword_include,
   1961         .nl,
   1962         .hash,
   1963         .keyword_include_next,
   1964         .nl,
   1965         .hash,
   1966         .keyword_embed,
   1967         .nl,
   1968         .hash,
   1969         .keyword_define,
   1970         .nl,
   1971         .hash,
   1972         .keyword_ifdef,
   1973         .nl,
   1974         .hash,
   1975         .keyword_ifndef,
   1976         .nl,
   1977         .hash,
   1978         .keyword_error,
   1979         .nl,
   1980         .hash,
   1981         .keyword_pragma,
   1982         .nl,
   1983     });
   1984 }
   1985 
   1986 test "line continuation" {
   1987     try expectTokens(
   1988         \\#define foo \
   1989         \\  bar
   1990         \\"foo\
   1991         \\ bar"
   1992         \\#define "foo"
   1993         \\ "bar"
   1994         \\#define "foo" \
   1995         \\ "bar"
   1996     , &.{
   1997         .hash,
   1998         .keyword_define,
   1999         .identifier,
   2000         .identifier,
   2001         .nl,
   2002         .string_literal,
   2003         .nl,
   2004         .hash,
   2005         .keyword_define,
   2006         .string_literal,
   2007         .nl,
   2008         .string_literal,
   2009         .nl,
   2010         .hash,
   2011         .keyword_define,
   2012         .string_literal,
   2013         .string_literal,
   2014     });
   2015 }
   2016 
   2017 test "string prefix" {
   2018     try expectTokens(
   2019         \\"foo"
   2020         \\u"foo"
   2021         \\u8"foo"
   2022         \\U"foo"
   2023         \\L"foo"
   2024         \\'foo'
   2025         \\u8'A'
   2026         \\u'foo'
   2027         \\U'foo'
   2028         \\L'foo'
   2029         \\
   2030     , &.{
   2031         .string_literal,
   2032         .nl,
   2033         .string_literal_utf_16,
   2034         .nl,
   2035         .string_literal_utf_8,
   2036         .nl,
   2037         .string_literal_utf_32,
   2038         .nl,
   2039         .string_literal_wide,
   2040         .nl,
   2041         .char_literal,
   2042         .nl,
   2043         .char_literal_utf_8,
   2044         .nl,
   2045         .char_literal_utf_16,
   2046         .nl,
   2047         .char_literal_utf_32,
   2048         .nl,
   2049         .char_literal_wide,
   2050         .nl,
   2051     });
   2052 }
   2053 
   2054 test "num suffixes" {
   2055     try expectTokens(
   2056         \\ 1.0f 1.0L 1.0 .0 1. 0x1p0f 0X1p0
   2057         \\ 0l 0lu 0ll 0llu 0
   2058         \\ 1u 1ul 1ull 1
   2059         \\ 1.0i 1.0I
   2060         \\ 1.0if 1.0If 1.0fi 1.0fI
   2061         \\ 1.0il 1.0Il 1.0li 1.0lI
   2062         \\
   2063     , &.{
   2064         .pp_num,
   2065         .pp_num,
   2066         .pp_num,
   2067         .pp_num,
   2068         .pp_num,
   2069         .pp_num,
   2070         .pp_num,
   2071         .nl,
   2072         .pp_num,
   2073         .pp_num,
   2074         .pp_num,
   2075         .pp_num,
   2076         .pp_num,
   2077         .nl,
   2078         .pp_num,
   2079         .pp_num,
   2080         .pp_num,
   2081         .pp_num,
   2082         .nl,
   2083         .pp_num,
   2084         .pp_num,
   2085         .nl,
   2086         .pp_num,
   2087         .pp_num,
   2088         .pp_num,
   2089         .pp_num,
   2090         .nl,
   2091         .pp_num,
   2092         .pp_num,
   2093         .pp_num,
   2094         .pp_num,
   2095         .nl,
   2096     });
   2097 }
   2098 
   2099 test "comments" {
   2100     try expectTokens(
   2101         \\//foo
   2102         \\#foo
   2103     , &.{
   2104         .nl,
   2105         .hash,
   2106         .identifier,
   2107     });
   2108 }
   2109 
   2110 test "extended identifiers" {
   2111     try expectTokens("π“ͺ𝓻𝓸𝓬𝓬", &.{.extended_identifier});
   2112     try expectTokens("uπ“ͺ𝓻𝓸𝓬𝓬", &.{.extended_identifier});
   2113     try expectTokens("u8π“ͺ𝓻𝓸𝓬𝓬", &.{.extended_identifier});
   2114     try expectTokens("Uπ“ͺ𝓻𝓸𝓬𝓬", &.{.extended_identifier});
   2115     try expectTokens("Lπ“ͺ𝓻𝓸𝓬𝓬", &.{.extended_identifier});
   2116     try expectTokens("1β„’", &.{ .pp_num, .extended_identifier });
   2117     try expectTokens("1.β„’", &.{ .pp_num, .extended_identifier });
   2118     try expectTokens("..β„’", &.{ .period, .period, .extended_identifier });
   2119     try expectTokens("0β„’", &.{ .pp_num, .extended_identifier });
   2120     try expectTokens("0b\u{E0000}", &.{ .pp_num, .extended_identifier });
   2121     try expectTokens("0b0\u{E0000}", &.{ .pp_num, .extended_identifier });
   2122     try expectTokens("01\u{E0000}", &.{ .pp_num, .extended_identifier });
   2123     try expectTokens("010\u{E0000}", &.{ .pp_num, .extended_identifier });
   2124     try expectTokens("0x\u{E0000}", &.{ .pp_num, .extended_identifier });
   2125     try expectTokens("0x0\u{E0000}", &.{ .pp_num, .extended_identifier });
   2126     try expectTokens("\"\\0\u{E0000}\"", &.{.string_literal});
   2127     try expectTokens("\"\\x\u{E0000}\"", &.{.string_literal});
   2128     try expectTokens("\"\\u\u{E0000}\"", &.{.string_literal});
   2129     try expectTokens("1e\u{E0000}", &.{ .pp_num, .extended_identifier });
   2130     try expectTokens("1e1\u{E0000}", &.{ .pp_num, .extended_identifier });
   2131 }
   2132 
   2133 test "digraphs" {
   2134     try expectTokens("%:<::><%%>%:%:", &.{ .hash, .l_bracket, .r_bracket, .l_brace, .r_brace, .hash_hash });
   2135     try expectTokens("\"%:<::><%%>%:%:\"", &.{.string_literal});
   2136     try expectTokens("%:%42 %:%", &.{ .hash, .percent, .pp_num, .hash, .percent });
   2137 }
   2138 
   2139 test "C23 keywords" {
   2140     try expectTokensExtra("true false alignas alignof bool static_assert thread_local nullptr typeof_unqual", &.{
   2141         .keyword_true,
   2142         .keyword_false,
   2143         .keyword_c23_alignas,
   2144         .keyword_c23_alignof,
   2145         .keyword_c23_bool,
   2146         .keyword_c23_static_assert,
   2147         .keyword_c23_thread_local,
   2148         .keyword_nullptr,
   2149         .keyword_typeof_unqual,
   2150     }, .c23);
   2151 }
   2152 
   2153 test "Tokenizer fuzz test" {
   2154     var comp = Compilation.init(std.testing.allocator, std.fs.cwd());
   2155     defer comp.deinit();
   2156 
   2157     const input_bytes = std.testing.fuzzInput(.{});
   2158     if (input_bytes.len == 0) return;
   2159 
   2160     const source = try comp.addSourceFromBuffer("fuzz.c", input_bytes);
   2161 
   2162     var tokenizer: Tokenizer = .{
   2163         .buf = source.buf,
   2164         .source = source.id,
   2165         .langopts = comp.langopts,
   2166     };
   2167     while (true) {
   2168         const prev_index = tokenizer.index;
   2169         const tok = tokenizer.next();
   2170         if (tok.id == .eof) break;
   2171         try std.testing.expect(prev_index < tokenizer.index); // ensure that the tokenizer always makes progress
   2172     }
   2173 }
   2174 
   2175 fn expectTokensExtra(contents: []const u8, expected_tokens: []const Token.Id, standard: ?LangOpts.Standard) !void {
   2176     var comp = Compilation.init(std.testing.allocator, std.fs.cwd());
   2177     defer comp.deinit();
   2178     if (standard) |provided| {
   2179         comp.langopts.standard = provided;
   2180     }
   2181     const source = try comp.addSourceFromBuffer("path", contents);
   2182     var tokenizer = Tokenizer{
   2183         .buf = source.buf,
   2184         .source = source.id,
   2185         .langopts = comp.langopts,
   2186     };
   2187     var i: usize = 0;
   2188     while (i < expected_tokens.len) {
   2189         const token = tokenizer.next();
   2190         if (token.id == .whitespace) continue;
   2191         const expected_token_id = expected_tokens[i];
   2192         i += 1;
   2193         if (!std.meta.eql(token.id, expected_token_id)) {
   2194             std.debug.print("expected {s}, found {s}\n", .{ @tagName(expected_token_id), @tagName(token.id) });
   2195             return error.TokensDoNotEqual;
   2196         }
   2197     }
   2198     const last_token = tokenizer.next();
   2199     try std.testing.expect(last_token.id == .eof);
   2200 }
   2201 
   2202 fn expectTokens(contents: []const u8, expected_tokens: []const Token.Id) !void {
   2203     return expectTokensExtra(contents, expected_tokens, null);
   2204 }