#ifndef _ZIG1_TOKENIZER_H__ #define _ZIG1_TOKENIZER_H__ #include #include #define TOKENIZER_FOREACH_TAG_ENUM(TAG) \ TAG(TOKENIZER_TAG_INVALID) \ TAG(TOKENIZER_TAG_INVALID_PERIODASTERISKS) \ TAG(TOKENIZER_TAG_IDENTIFIER) \ TAG(TOKENIZER_TAG_STRING_LITERAL) \ TAG(TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE) \ TAG(TOKENIZER_TAG_CHAR_LITERAL) \ TAG(TOKENIZER_TAG_EOF) \ TAG(TOKENIZER_TAG_BUILTIN) \ TAG(TOKENIZER_TAG_BANG) \ TAG(TOKENIZER_TAG_PIPE) \ TAG(TOKENIZER_TAG_PIPE_PIPE) \ TAG(TOKENIZER_TAG_PIPE_EQUAL) \ TAG(TOKENIZER_TAG_EQUAL) \ TAG(TOKENIZER_TAG_EQUAL_EQUAL) \ TAG(TOKENIZER_TAG_EQUAL_ANGLE_BRACKET_RIGHT) \ TAG(TOKENIZER_TAG_BANG_EQUAL) \ TAG(TOKENIZER_TAG_L_PAREN) \ TAG(TOKENIZER_TAG_R_PAREN) \ TAG(TOKENIZER_TAG_SEMICOLON) \ TAG(TOKENIZER_TAG_PERCENT) \ TAG(TOKENIZER_TAG_PERCENT_EQUAL) \ TAG(TOKENIZER_TAG_L_BRACE) \ TAG(TOKENIZER_TAG_R_BRACE) \ TAG(TOKENIZER_TAG_L_BRACKET) \ TAG(TOKENIZER_TAG_R_BRACKET) \ TAG(TOKENIZER_TAG_PERIOD) \ TAG(TOKENIZER_TAG_PERIOD_ASTERISK) \ TAG(TOKENIZER_TAG_ELLIPSIS2) \ TAG(TOKENIZER_TAG_ELLIPSIS3) \ TAG(TOKENIZER_TAG_CARET) \ TAG(TOKENIZER_TAG_CARET_EQUAL) \ TAG(TOKENIZER_TAG_PLUS) \ TAG(TOKENIZER_TAG_PLUS_PLUS) \ TAG(TOKENIZER_TAG_PLUS_EQUAL) \ TAG(TOKENIZER_TAG_PLUS_PERCENT) \ TAG(TOKENIZER_TAG_PLUS_PERCENT_EQUAL) \ TAG(TOKENIZER_TAG_PLUS_PIPE) \ TAG(TOKENIZER_TAG_PLUS_PIPE_EQUAL) \ TAG(TOKENIZER_TAG_MINUS) \ TAG(TOKENIZER_TAG_MINUS_EQUAL) \ TAG(TOKENIZER_TAG_MINUS_PERCENT) \ TAG(TOKENIZER_TAG_MINUS_PERCENT_EQUAL) \ TAG(TOKENIZER_TAG_MINUS_PIPE) \ TAG(TOKENIZER_TAG_MINUS_PIPE_EQUAL) \ TAG(TOKENIZER_TAG_ASTERISK) \ TAG(TOKENIZER_TAG_ASTERISK_EQUAL) \ TAG(TOKENIZER_TAG_ASTERISK_ASTERISK) \ TAG(TOKENIZER_TAG_ASTERISK_PERCENT) \ TAG(TOKENIZER_TAG_ASTERISK_PERCENT_EQUAL) \ TAG(TOKENIZER_TAG_ASTERISK_PIPE) \ TAG(TOKENIZER_TAG_ASTERISK_PIPE_EQUAL) \ TAG(TOKENIZER_TAG_ARROW) \ TAG(TOKENIZER_TAG_COLON) \ TAG(TOKENIZER_TAG_SLASH) \ TAG(TOKENIZER_TAG_SLASH_EQUAL) \ TAG(TOKENIZER_TAG_COMMA) \ TAG(TOKENIZER_TAG_AMPERSAND) \ TAG(TOKENIZER_TAG_AMPERSAND_EQUAL) \ TAG(TOKENIZER_TAG_QUESTION_MARK) \ TAG(TOKENIZER_TAG_ANGLE_BRACKET_LEFT) \ TAG(TOKENIZER_TAG_ANGLE_BRACKET_LEFT_EQUAL) \ TAG(TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT) \ TAG(TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL) \ TAG(TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE) \ TAG(TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL) \ TAG(TOKENIZER_TAG_ANGLE_BRACKET_RIGHT) \ TAG(TOKENIZER_TAG_ANGLE_BRACKET_RIGHT_EQUAL) \ TAG(TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT) \ TAG(TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL) \ TAG(TOKENIZER_TAG_TILDE) \ TAG(TOKENIZER_TAG_NUMBER_LITERAL) \ TAG(TOKENIZER_TAG_DOC_COMMENT) \ TAG(TOKENIZER_TAG_CONTAINER_DOC_COMMENT) \ TAG(TOKENIZER_TAG_KEYWORD_ADDRSPACE) \ TAG(TOKENIZER_TAG_KEYWORD_ALIGN) \ TAG(TOKENIZER_TAG_KEYWORD_ALLOWZERO) \ TAG(TOKENIZER_TAG_KEYWORD_AND) \ TAG(TOKENIZER_TAG_KEYWORD_ANYFRAME) \ TAG(TOKENIZER_TAG_KEYWORD_ANYTYPE) \ TAG(TOKENIZER_TAG_KEYWORD_ASM) \ TAG(TOKENIZER_TAG_KEYWORD_ASYNC) \ TAG(TOKENIZER_TAG_KEYWORD_AWAIT) \ TAG(TOKENIZER_TAG_KEYWORD_BREAK) \ TAG(TOKENIZER_TAG_KEYWORD_CALLCONV) \ TAG(TOKENIZER_TAG_KEYWORD_CATCH) \ TAG(TOKENIZER_TAG_KEYWORD_COMPTIME) \ TAG(TOKENIZER_TAG_KEYWORD_CONST) \ TAG(TOKENIZER_TAG_KEYWORD_CONTINUE) \ TAG(TOKENIZER_TAG_KEYWORD_DEFER) \ TAG(TOKENIZER_TAG_KEYWORD_ELSE) \ TAG(TOKENIZER_TAG_KEYWORD_ENUM) \ TAG(TOKENIZER_TAG_KEYWORD_ERRDEFER) \ TAG(TOKENIZER_TAG_KEYWORD_ERROR) \ TAG(TOKENIZER_TAG_KEYWORD_EXPORT) \ TAG(TOKENIZER_TAG_KEYWORD_EXTERN) \ TAG(TOKENIZER_TAG_KEYWORD_FN) \ TAG(TOKENIZER_TAG_KEYWORD_FOR) \ TAG(TOKENIZER_TAG_KEYWORD_IF) \ TAG(TOKENIZER_TAG_KEYWORD_INLINE) \ TAG(TOKENIZER_TAG_KEYWORD_NOALIAS) \ TAG(TOKENIZER_TAG_KEYWORD_NOINLINE) \ TAG(TOKENIZER_TAG_KEYWORD_NOSUSPEND) \ TAG(TOKENIZER_TAG_KEYWORD_OPAQUE) \ TAG(TOKENIZER_TAG_KEYWORD_OR) \ TAG(TOKENIZER_TAG_KEYWORD_ORELSE) \ TAG(TOKENIZER_TAG_KEYWORD_PACKED) \ TAG(TOKENIZER_TAG_KEYWORD_PUB) \ TAG(TOKENIZER_TAG_KEYWORD_RESUME) \ TAG(TOKENIZER_TAG_KEYWORD_RETURN) \ TAG(TOKENIZER_TAG_KEYWORD_LINKSECTION) \ TAG(TOKENIZER_TAG_KEYWORD_STRUCT) \ TAG(TOKENIZER_TAG_KEYWORD_SUSPEND) \ TAG(TOKENIZER_TAG_KEYWORD_SWITCH) \ TAG(TOKENIZER_TAG_KEYWORD_TEST) \ TAG(TOKENIZER_TAG_KEYWORD_THREADLOCAL) \ TAG(TOKENIZER_TAG_KEYWORD_TRY) \ TAG(TOKENIZER_TAG_KEYWORD_UNION) \ TAG(TOKENIZER_TAG_KEYWORD_UNREACHABLE) \ TAG(TOKENIZER_TAG_KEYWORD_USINGNAMESPACE) \ TAG(TOKENIZER_TAG_KEYWORD_VAR) \ TAG(TOKENIZER_TAG_KEYWORD_VOLATILE) \ TAG(TOKENIZER_TAG_KEYWORD_WHILE) #define TOKENIZER_GENERATE_ENUM(ENUM) ENUM, #define TOKENIZER_GENERATE_CASE(ENUM) case ENUM: return #ENUM; // First define the enum typedef enum { TOKENIZER_FOREACH_TAG_ENUM(TOKENIZER_GENERATE_ENUM) } TokenizerTag; const char* tokenizerGetTagString(TokenizerTag tag); typedef enum { TOKENIZER_STATE_START, TOKENIZER_STATE_EXPECT_NEWLINE, TOKENIZER_STATE_IDENTIFIER, TOKENIZER_STATE_BUILTIN, TOKENIZER_STATE_STRING_LITERAL, TOKENIZER_STATE_STRING_LITERAL_BACKSLASH, TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE, TOKENIZER_STATE_CHAR_LITERAL, TOKENIZER_STATE_CHAR_LITERAL_BACKSLASH, TOKENIZER_STATE_BACKSLASH, TOKENIZER_STATE_EQUAL, TOKENIZER_STATE_BANG, TOKENIZER_STATE_PIPE, TOKENIZER_STATE_MINUS, TOKENIZER_STATE_MINUS_PERCENT, TOKENIZER_STATE_MINUS_PIPE, TOKENIZER_STATE_ASTERISK, TOKENIZER_STATE_ASTERISK_PERCENT, TOKENIZER_STATE_ASTERISK_PIPE, TOKENIZER_STATE_SLASH, TOKENIZER_STATE_LINE_COMMENT_START, TOKENIZER_STATE_LINE_COMMENT, TOKENIZER_STATE_DOC_COMMENT_START, TOKENIZER_STATE_DOC_COMMENT, TOKENIZER_STATE_INT, TOKENIZER_STATE_INT_EXPONENT, TOKENIZER_STATE_INT_PERIOD, TOKENIZER_STATE_FLOAT, TOKENIZER_STATE_FLOAT_EXPONENT, TOKENIZER_STATE_AMPERSAND, TOKENIZER_STATE_CARET, TOKENIZER_STATE_PERCENT, TOKENIZER_STATE_PLUS, TOKENIZER_STATE_PLUS_PERCENT, TOKENIZER_STATE_PLUS_PIPE, TOKENIZER_STATE_ANGLE_BRACKET_LEFT, TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT, TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE, TOKENIZER_STATE_ANGLE_BRACKET_RIGHT, TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT, TOKENIZER_STATE_PERIOD, TOKENIZER_STATE_PERIOD_2, TOKENIZER_STATE_PERIOD_ASTERISK, TOKENIZER_STATE_SAW_AT_SIGN, TOKENIZER_STATE_INVALID, } TokenizerState; typedef struct { TokenizerTag tag; struct { uint32_t start, end; } loc; } TokenizerToken; typedef struct { const char* buffer; const uint32_t buffer_len; uint32_t index; } Tokenizer; Tokenizer tokenizerInit(const char* buffer, uint32_t len); TokenizerToken tokenizerNext(Tokenizer* self); #endif