zig0

my attempts at zig bootstrapping in C
Log | Files | Refs | README | LICENSE

tokenizer.h (9620B) - Raw


      1 #ifndef _ZIG0_TOKENIZER_H__
      2 #define _ZIG0_TOKENIZER_H__
      3 
      4 #include <stdbool.h>
      5 #include <stdint.h>
      6 
      7 #define TOKENIZER_FOREACH_TAG_ENUM(TAG)                    \
      8     TAG(TOKEN_INVALID)                                     \
      9     TAG(TOKEN_INVALID_PERIODASTERISKS)                     \
     10     TAG(TOKEN_IDENTIFIER)                                  \
     11     TAG(TOKEN_STRING_LITERAL)                              \
     12     TAG(TOKEN_MULTILINE_STRING_LITERAL_LINE)               \
     13     TAG(TOKEN_CHAR_LITERAL)                                \
     14     TAG(TOKEN_EOF)                                         \
     15     TAG(TOKEN_BUILTIN)                                     \
     16     TAG(TOKEN_BANG)                                        \
     17     TAG(TOKEN_PIPE)                                        \
     18     TAG(TOKEN_PIPE_PIPE)                                   \
     19     TAG(TOKEN_PIPE_EQUAL)                                  \
     20     TAG(TOKEN_EQUAL)                                       \
     21     TAG(TOKEN_EQUAL_EQUAL)                                 \
     22     TAG(TOKEN_EQUAL_ANGLE_BRACKET_RIGHT)                   \
     23     TAG(TOKEN_BANG_EQUAL)                                  \
     24     TAG(TOKEN_L_PAREN)                                     \
     25     TAG(TOKEN_R_PAREN)                                     \
     26     TAG(TOKEN_SEMICOLON)                                   \
     27     TAG(TOKEN_PERCENT)                                     \
     28     TAG(TOKEN_PERCENT_EQUAL)                               \
     29     TAG(TOKEN_L_BRACE)                                     \
     30     TAG(TOKEN_R_BRACE)                                     \
     31     TAG(TOKEN_L_BRACKET)                                   \
     32     TAG(TOKEN_R_BRACKET)                                   \
     33     TAG(TOKEN_PERIOD)                                      \
     34     TAG(TOKEN_PERIOD_ASTERISK)                             \
     35     TAG(TOKEN_ELLIPSIS2)                                   \
     36     TAG(TOKEN_ELLIPSIS3)                                   \
     37     TAG(TOKEN_CARET)                                       \
     38     TAG(TOKEN_CARET_EQUAL)                                 \
     39     TAG(TOKEN_PLUS)                                        \
     40     TAG(TOKEN_PLUS_PLUS)                                   \
     41     TAG(TOKEN_PLUS_EQUAL)                                  \
     42     TAG(TOKEN_PLUS_PERCENT)                                \
     43     TAG(TOKEN_PLUS_PERCENT_EQUAL)                          \
     44     TAG(TOKEN_PLUS_PIPE)                                   \
     45     TAG(TOKEN_PLUS_PIPE_EQUAL)                             \
     46     TAG(TOKEN_MINUS)                                       \
     47     TAG(TOKEN_MINUS_EQUAL)                                 \
     48     TAG(TOKEN_MINUS_PERCENT)                               \
     49     TAG(TOKEN_MINUS_PERCENT_EQUAL)                         \
     50     TAG(TOKEN_MINUS_PIPE)                                  \
     51     TAG(TOKEN_MINUS_PIPE_EQUAL)                            \
     52     TAG(TOKEN_ASTERISK)                                    \
     53     TAG(TOKEN_ASTERISK_EQUAL)                              \
     54     TAG(TOKEN_ASTERISK_ASTERISK)                           \
     55     TAG(TOKEN_ASTERISK_PERCENT)                            \
     56     TAG(TOKEN_ASTERISK_PERCENT_EQUAL)                      \
     57     TAG(TOKEN_ASTERISK_PIPE)                               \
     58     TAG(TOKEN_ASTERISK_PIPE_EQUAL)                         \
     59     TAG(TOKEN_ARROW)                                       \
     60     TAG(TOKEN_COLON)                                       \
     61     TAG(TOKEN_SLASH)                                       \
     62     TAG(TOKEN_SLASH_EQUAL)                                 \
     63     TAG(TOKEN_COMMA)                                       \
     64     TAG(TOKEN_AMPERSAND)                                   \
     65     TAG(TOKEN_AMPERSAND_EQUAL)                             \
     66     TAG(TOKEN_QUESTION_MARK)                               \
     67     TAG(TOKEN_ANGLE_BRACKET_LEFT)                          \
     68     TAG(TOKEN_ANGLE_BRACKET_LEFT_EQUAL)                    \
     69     TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT)            \
     70     TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL)      \
     71     TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE)       \
     72     TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL) \
     73     TAG(TOKEN_ANGLE_BRACKET_RIGHT)                         \
     74     TAG(TOKEN_ANGLE_BRACKET_RIGHT_EQUAL)                   \
     75     TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT)           \
     76     TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL)     \
     77     TAG(TOKEN_TILDE)                                       \
     78     TAG(TOKEN_NUMBER_LITERAL)                              \
     79     TAG(TOKEN_DOC_COMMENT)                                 \
     80     TAG(TOKEN_CONTAINER_DOC_COMMENT)                       \
     81     TAG(TOKEN_KEYWORD_ADDRSPACE)                           \
     82     TAG(TOKEN_KEYWORD_ALIGN)                               \
     83     TAG(TOKEN_KEYWORD_ALLOWZERO)                           \
     84     TAG(TOKEN_KEYWORD_AND)                                 \
     85     TAG(TOKEN_KEYWORD_ANYFRAME)                            \
     86     TAG(TOKEN_KEYWORD_ANYTYPE)                             \
     87     TAG(TOKEN_KEYWORD_ASM)                                 \
     88     TAG(TOKEN_KEYWORD_BREAK)                               \
     89     TAG(TOKEN_KEYWORD_CALLCONV)                            \
     90     TAG(TOKEN_KEYWORD_CATCH)                               \
     91     TAG(TOKEN_KEYWORD_COMPTIME)                            \
     92     TAG(TOKEN_KEYWORD_CONST)                               \
     93     TAG(TOKEN_KEYWORD_CONTINUE)                            \
     94     TAG(TOKEN_KEYWORD_DEFER)                               \
     95     TAG(TOKEN_KEYWORD_ELSE)                                \
     96     TAG(TOKEN_KEYWORD_ENUM)                                \
     97     TAG(TOKEN_KEYWORD_ERRDEFER)                            \
     98     TAG(TOKEN_KEYWORD_ERROR)                               \
     99     TAG(TOKEN_KEYWORD_EXPORT)                              \
    100     TAG(TOKEN_KEYWORD_EXTERN)                              \
    101     TAG(TOKEN_KEYWORD_FN)                                  \
    102     TAG(TOKEN_KEYWORD_FOR)                                 \
    103     TAG(TOKEN_KEYWORD_IF)                                  \
    104     TAG(TOKEN_KEYWORD_INLINE)                              \
    105     TAG(TOKEN_KEYWORD_NOALIAS)                             \
    106     TAG(TOKEN_KEYWORD_NOINLINE)                            \
    107     TAG(TOKEN_KEYWORD_NOSUSPEND)                           \
    108     TAG(TOKEN_KEYWORD_OPAQUE)                              \
    109     TAG(TOKEN_KEYWORD_OR)                                  \
    110     TAG(TOKEN_KEYWORD_ORELSE)                              \
    111     TAG(TOKEN_KEYWORD_PACKED)                              \
    112     TAG(TOKEN_KEYWORD_PUB)                                 \
    113     TAG(TOKEN_KEYWORD_RESUME)                              \
    114     TAG(TOKEN_KEYWORD_RETURN)                              \
    115     TAG(TOKEN_KEYWORD_LINKSECTION)                         \
    116     TAG(TOKEN_KEYWORD_STRUCT)                              \
    117     TAG(TOKEN_KEYWORD_SUSPEND)                             \
    118     TAG(TOKEN_KEYWORD_SWITCH)                              \
    119     TAG(TOKEN_KEYWORD_TEST)                                \
    120     TAG(TOKEN_KEYWORD_THREADLOCAL)                         \
    121     TAG(TOKEN_KEYWORD_TRY)                                 \
    122     TAG(TOKEN_KEYWORD_UNION)                               \
    123     TAG(TOKEN_KEYWORD_UNREACHABLE)                         \
    124     TAG(TOKEN_KEYWORD_VAR)                                 \
    125     TAG(TOKEN_KEYWORD_VOLATILE)                            \
    126     TAG(TOKEN_KEYWORD_WHILE)
    127 
    128 #define TOKENIZER_GENERATE_ENUM(ENUM) ENUM,
    129 #define TOKENIZER_GENERATE_CASE(ENUM) \
    130     case ENUM:                        \
    131         return #ENUM;
    132 
    133 // First define the enum
    134 typedef enum {
    135     TOKENIZER_FOREACH_TAG_ENUM(TOKENIZER_GENERATE_ENUM)
    136 } TokenizerTag;
    137 
    138 const char* tokenizerGetTagString(TokenizerTag tag);
    139 
    140 typedef enum {
    141     TOKENIZER_STATE_START,
    142     TOKENIZER_STATE_EXPECT_NEWLINE,
    143     TOKENIZER_STATE_IDENTIFIER,
    144     TOKENIZER_STATE_BUILTIN,
    145     TOKENIZER_STATE_STRING_LITERAL,
    146     TOKENIZER_STATE_STRING_LITERAL_BACKSLASH,
    147     TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE,
    148     TOKENIZER_STATE_CHAR_LITERAL,
    149     TOKENIZER_STATE_CHAR_LITERAL_BACKSLASH,
    150     TOKENIZER_STATE_BACKSLASH,
    151     TOKENIZER_STATE_EQUAL,
    152     TOKENIZER_STATE_BANG,
    153     TOKENIZER_STATE_PIPE,
    154     TOKENIZER_STATE_MINUS,
    155     TOKENIZER_STATE_MINUS_PERCENT,
    156     TOKENIZER_STATE_MINUS_PIPE,
    157     TOKENIZER_STATE_ASTERISK,
    158     TOKENIZER_STATE_ASTERISK_PERCENT,
    159     TOKENIZER_STATE_ASTERISK_PIPE,
    160     TOKENIZER_STATE_SLASH,
    161     TOKENIZER_STATE_LINE_COMMENT_START,
    162     TOKENIZER_STATE_LINE_COMMENT,
    163     TOKENIZER_STATE_DOC_COMMENT_START,
    164     TOKENIZER_STATE_DOC_COMMENT,
    165     TOKENIZER_STATE_INT,
    166     TOKENIZER_STATE_INT_EXPONENT,
    167     TOKENIZER_STATE_INT_PERIOD,
    168     TOKENIZER_STATE_FLOAT,
    169     TOKENIZER_STATE_FLOAT_EXPONENT,
    170     TOKENIZER_STATE_AMPERSAND,
    171     TOKENIZER_STATE_CARET,
    172     TOKENIZER_STATE_PERCENT,
    173     TOKENIZER_STATE_PLUS,
    174     TOKENIZER_STATE_PLUS_PERCENT,
    175     TOKENIZER_STATE_PLUS_PIPE,
    176     TOKENIZER_STATE_ANGLE_BRACKET_LEFT,
    177     TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT,
    178     TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE,
    179     TOKENIZER_STATE_ANGLE_BRACKET_RIGHT,
    180     TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT,
    181     TOKENIZER_STATE_PERIOD,
    182     TOKENIZER_STATE_PERIOD_2,
    183     TOKENIZER_STATE_PERIOD_ASTERISK,
    184     TOKENIZER_STATE_SAW_AT_SIGN,
    185     TOKENIZER_STATE_INVALID,
    186 } TokenizerState;
    187 
    188 typedef struct {
    189     TokenizerTag tag;
    190     struct {
    191         uint32_t start, end;
    192     } loc;
    193 } TokenizerToken;
    194 
    195 typedef struct {
    196     const char* buffer;
    197     const uint32_t buffer_len;
    198     uint32_t index;
    199 } Tokenizer;
    200 
    201 Tokenizer tokenizerInit(const char* buffer, uint32_t len);
    202 TokenizerToken tokenizerNext(Tokenizer* self);
    203 
    204 #endif