tokenizer.h (9620B) - Raw
1 #ifndef _ZIG0_TOKENIZER_H__ 2 #define _ZIG0_TOKENIZER_H__ 3 4 #include <stdbool.h> 5 #include <stdint.h> 6 7 #define TOKENIZER_FOREACH_TAG_ENUM(TAG) \ 8 TAG(TOKEN_INVALID) \ 9 TAG(TOKEN_INVALID_PERIODASTERISKS) \ 10 TAG(TOKEN_IDENTIFIER) \ 11 TAG(TOKEN_STRING_LITERAL) \ 12 TAG(TOKEN_MULTILINE_STRING_LITERAL_LINE) \ 13 TAG(TOKEN_CHAR_LITERAL) \ 14 TAG(TOKEN_EOF) \ 15 TAG(TOKEN_BUILTIN) \ 16 TAG(TOKEN_BANG) \ 17 TAG(TOKEN_PIPE) \ 18 TAG(TOKEN_PIPE_PIPE) \ 19 TAG(TOKEN_PIPE_EQUAL) \ 20 TAG(TOKEN_EQUAL) \ 21 TAG(TOKEN_EQUAL_EQUAL) \ 22 TAG(TOKEN_EQUAL_ANGLE_BRACKET_RIGHT) \ 23 TAG(TOKEN_BANG_EQUAL) \ 24 TAG(TOKEN_L_PAREN) \ 25 TAG(TOKEN_R_PAREN) \ 26 TAG(TOKEN_SEMICOLON) \ 27 TAG(TOKEN_PERCENT) \ 28 TAG(TOKEN_PERCENT_EQUAL) \ 29 TAG(TOKEN_L_BRACE) \ 30 TAG(TOKEN_R_BRACE) \ 31 TAG(TOKEN_L_BRACKET) \ 32 TAG(TOKEN_R_BRACKET) \ 33 TAG(TOKEN_PERIOD) \ 34 TAG(TOKEN_PERIOD_ASTERISK) \ 35 TAG(TOKEN_ELLIPSIS2) \ 36 TAG(TOKEN_ELLIPSIS3) \ 37 TAG(TOKEN_CARET) \ 38 TAG(TOKEN_CARET_EQUAL) \ 39 TAG(TOKEN_PLUS) \ 40 TAG(TOKEN_PLUS_PLUS) \ 41 TAG(TOKEN_PLUS_EQUAL) \ 42 TAG(TOKEN_PLUS_PERCENT) \ 43 TAG(TOKEN_PLUS_PERCENT_EQUAL) \ 44 TAG(TOKEN_PLUS_PIPE) \ 45 TAG(TOKEN_PLUS_PIPE_EQUAL) \ 46 TAG(TOKEN_MINUS) \ 47 TAG(TOKEN_MINUS_EQUAL) \ 48 TAG(TOKEN_MINUS_PERCENT) \ 49 TAG(TOKEN_MINUS_PERCENT_EQUAL) \ 50 TAG(TOKEN_MINUS_PIPE) \ 51 TAG(TOKEN_MINUS_PIPE_EQUAL) \ 52 TAG(TOKEN_ASTERISK) \ 53 TAG(TOKEN_ASTERISK_EQUAL) \ 54 TAG(TOKEN_ASTERISK_ASTERISK) \ 55 TAG(TOKEN_ASTERISK_PERCENT) \ 56 TAG(TOKEN_ASTERISK_PERCENT_EQUAL) \ 57 TAG(TOKEN_ASTERISK_PIPE) \ 58 TAG(TOKEN_ASTERISK_PIPE_EQUAL) \ 59 TAG(TOKEN_ARROW) \ 60 TAG(TOKEN_COLON) \ 61 TAG(TOKEN_SLASH) \ 62 TAG(TOKEN_SLASH_EQUAL) \ 63 TAG(TOKEN_COMMA) \ 64 TAG(TOKEN_AMPERSAND) \ 65 TAG(TOKEN_AMPERSAND_EQUAL) \ 66 TAG(TOKEN_QUESTION_MARK) \ 67 TAG(TOKEN_ANGLE_BRACKET_LEFT) \ 68 TAG(TOKEN_ANGLE_BRACKET_LEFT_EQUAL) \ 69 TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT) \ 70 TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL) \ 71 TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE) \ 72 TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL) \ 73 TAG(TOKEN_ANGLE_BRACKET_RIGHT) \ 74 TAG(TOKEN_ANGLE_BRACKET_RIGHT_EQUAL) \ 75 TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT) \ 76 TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL) \ 77 TAG(TOKEN_TILDE) \ 78 TAG(TOKEN_NUMBER_LITERAL) \ 79 TAG(TOKEN_DOC_COMMENT) \ 80 TAG(TOKEN_CONTAINER_DOC_COMMENT) \ 81 TAG(TOKEN_KEYWORD_ADDRSPACE) \ 82 TAG(TOKEN_KEYWORD_ALIGN) \ 83 TAG(TOKEN_KEYWORD_ALLOWZERO) \ 84 TAG(TOKEN_KEYWORD_AND) \ 85 TAG(TOKEN_KEYWORD_ANYFRAME) \ 86 TAG(TOKEN_KEYWORD_ANYTYPE) \ 87 TAG(TOKEN_KEYWORD_ASM) \ 88 TAG(TOKEN_KEYWORD_BREAK) \ 89 TAG(TOKEN_KEYWORD_CALLCONV) \ 90 TAG(TOKEN_KEYWORD_CATCH) \ 91 TAG(TOKEN_KEYWORD_COMPTIME) \ 92 TAG(TOKEN_KEYWORD_CONST) \ 93 TAG(TOKEN_KEYWORD_CONTINUE) \ 94 TAG(TOKEN_KEYWORD_DEFER) \ 95 TAG(TOKEN_KEYWORD_ELSE) \ 96 TAG(TOKEN_KEYWORD_ENUM) \ 97 TAG(TOKEN_KEYWORD_ERRDEFER) \ 98 TAG(TOKEN_KEYWORD_ERROR) \ 99 TAG(TOKEN_KEYWORD_EXPORT) \ 100 TAG(TOKEN_KEYWORD_EXTERN) \ 101 TAG(TOKEN_KEYWORD_FN) \ 102 TAG(TOKEN_KEYWORD_FOR) \ 103 TAG(TOKEN_KEYWORD_IF) \ 104 TAG(TOKEN_KEYWORD_INLINE) \ 105 TAG(TOKEN_KEYWORD_NOALIAS) \ 106 TAG(TOKEN_KEYWORD_NOINLINE) \ 107 TAG(TOKEN_KEYWORD_NOSUSPEND) \ 108 TAG(TOKEN_KEYWORD_OPAQUE) \ 109 TAG(TOKEN_KEYWORD_OR) \ 110 TAG(TOKEN_KEYWORD_ORELSE) \ 111 TAG(TOKEN_KEYWORD_PACKED) \ 112 TAG(TOKEN_KEYWORD_PUB) \ 113 TAG(TOKEN_KEYWORD_RESUME) \ 114 TAG(TOKEN_KEYWORD_RETURN) \ 115 TAG(TOKEN_KEYWORD_LINKSECTION) \ 116 TAG(TOKEN_KEYWORD_STRUCT) \ 117 TAG(TOKEN_KEYWORD_SUSPEND) \ 118 TAG(TOKEN_KEYWORD_SWITCH) \ 119 TAG(TOKEN_KEYWORD_TEST) \ 120 TAG(TOKEN_KEYWORD_THREADLOCAL) \ 121 TAG(TOKEN_KEYWORD_TRY) \ 122 TAG(TOKEN_KEYWORD_UNION) \ 123 TAG(TOKEN_KEYWORD_UNREACHABLE) \ 124 TAG(TOKEN_KEYWORD_VAR) \ 125 TAG(TOKEN_KEYWORD_VOLATILE) \ 126 TAG(TOKEN_KEYWORD_WHILE) 127 128 #define TOKENIZER_GENERATE_ENUM(ENUM) ENUM, 129 #define TOKENIZER_GENERATE_CASE(ENUM) \ 130 case ENUM: \ 131 return #ENUM; 132 133 // First define the enum 134 typedef enum { 135 TOKENIZER_FOREACH_TAG_ENUM(TOKENIZER_GENERATE_ENUM) 136 } TokenizerTag; 137 138 const char* tokenizerGetTagString(TokenizerTag tag); 139 140 typedef enum { 141 TOKENIZER_STATE_START, 142 TOKENIZER_STATE_EXPECT_NEWLINE, 143 TOKENIZER_STATE_IDENTIFIER, 144 TOKENIZER_STATE_BUILTIN, 145 TOKENIZER_STATE_STRING_LITERAL, 146 TOKENIZER_STATE_STRING_LITERAL_BACKSLASH, 147 TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE, 148 TOKENIZER_STATE_CHAR_LITERAL, 149 TOKENIZER_STATE_CHAR_LITERAL_BACKSLASH, 150 TOKENIZER_STATE_BACKSLASH, 151 TOKENIZER_STATE_EQUAL, 152 TOKENIZER_STATE_BANG, 153 TOKENIZER_STATE_PIPE, 154 TOKENIZER_STATE_MINUS, 155 TOKENIZER_STATE_MINUS_PERCENT, 156 TOKENIZER_STATE_MINUS_PIPE, 157 TOKENIZER_STATE_ASTERISK, 158 TOKENIZER_STATE_ASTERISK_PERCENT, 159 TOKENIZER_STATE_ASTERISK_PIPE, 160 TOKENIZER_STATE_SLASH, 161 TOKENIZER_STATE_LINE_COMMENT_START, 162 TOKENIZER_STATE_LINE_COMMENT, 163 TOKENIZER_STATE_DOC_COMMENT_START, 164 TOKENIZER_STATE_DOC_COMMENT, 165 TOKENIZER_STATE_INT, 166 TOKENIZER_STATE_INT_EXPONENT, 167 TOKENIZER_STATE_INT_PERIOD, 168 TOKENIZER_STATE_FLOAT, 169 TOKENIZER_STATE_FLOAT_EXPONENT, 170 TOKENIZER_STATE_AMPERSAND, 171 TOKENIZER_STATE_CARET, 172 TOKENIZER_STATE_PERCENT, 173 TOKENIZER_STATE_PLUS, 174 TOKENIZER_STATE_PLUS_PERCENT, 175 TOKENIZER_STATE_PLUS_PIPE, 176 TOKENIZER_STATE_ANGLE_BRACKET_LEFT, 177 TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT, 178 TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE, 179 TOKENIZER_STATE_ANGLE_BRACKET_RIGHT, 180 TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT, 181 TOKENIZER_STATE_PERIOD, 182 TOKENIZER_STATE_PERIOD_2, 183 TOKENIZER_STATE_PERIOD_ASTERISK, 184 TOKENIZER_STATE_SAW_AT_SIGN, 185 TOKENIZER_STATE_INVALID, 186 } TokenizerState; 187 188 typedef struct { 189 TokenizerTag tag; 190 struct { 191 uint32_t start, end; 192 } loc; 193 } TokenizerToken; 194 195 typedef struct { 196 const char* buffer; 197 const uint32_t buffer_len; 198 uint32_t index; 199 } Tokenizer; 200 201 Tokenizer tokenizerInit(const char* buffer, uint32_t len); 202 TokenizerToken tokenizerNext(Tokenizer* self); 203 204 #endif