208 lines
9.6 KiB
C
208 lines
9.6 KiB
C
#ifndef _ZIG0_TOKENIZER_H__
|
|
#define _ZIG0_TOKENIZER_H__
|
|
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
|
|
#define TOKENIZER_FOREACH_TAG_ENUM(TAG) \
|
|
TAG(TOKEN_INVALID) \
|
|
TAG(TOKEN_INVALID_PERIODASTERISKS) \
|
|
TAG(TOKEN_IDENTIFIER) \
|
|
TAG(TOKEN_STRING_LITERAL) \
|
|
TAG(TOKEN_MULTILINE_STRING_LITERAL_LINE) \
|
|
TAG(TOKEN_CHAR_LITERAL) \
|
|
TAG(TOKEN_EOF) \
|
|
TAG(TOKEN_BUILTIN) \
|
|
TAG(TOKEN_BANG) \
|
|
TAG(TOKEN_PIPE) \
|
|
TAG(TOKEN_PIPE_PIPE) \
|
|
TAG(TOKEN_PIPE_EQUAL) \
|
|
TAG(TOKEN_EQUAL) \
|
|
TAG(TOKEN_EQUAL_EQUAL) \
|
|
TAG(TOKEN_EQUAL_ANGLE_BRACKET_RIGHT) \
|
|
TAG(TOKEN_BANG_EQUAL) \
|
|
TAG(TOKEN_L_PAREN) \
|
|
TAG(TOKEN_R_PAREN) \
|
|
TAG(TOKEN_SEMICOLON) \
|
|
TAG(TOKEN_PERCENT) \
|
|
TAG(TOKEN_PERCENT_EQUAL) \
|
|
TAG(TOKEN_L_BRACE) \
|
|
TAG(TOKEN_R_BRACE) \
|
|
TAG(TOKEN_L_BRACKET) \
|
|
TAG(TOKEN_R_BRACKET) \
|
|
TAG(TOKEN_PERIOD) \
|
|
TAG(TOKEN_PERIOD_ASTERISK) \
|
|
TAG(TOKEN_ELLIPSIS2) \
|
|
TAG(TOKEN_ELLIPSIS3) \
|
|
TAG(TOKEN_CARET) \
|
|
TAG(TOKEN_CARET_EQUAL) \
|
|
TAG(TOKEN_PLUS) \
|
|
TAG(TOKEN_PLUS_PLUS) \
|
|
TAG(TOKEN_PLUS_EQUAL) \
|
|
TAG(TOKEN_PLUS_PERCENT) \
|
|
TAG(TOKEN_PLUS_PERCENT_EQUAL) \
|
|
TAG(TOKEN_PLUS_PIPE) \
|
|
TAG(TOKEN_PLUS_PIPE_EQUAL) \
|
|
TAG(TOKEN_MINUS) \
|
|
TAG(TOKEN_MINUS_EQUAL) \
|
|
TAG(TOKEN_MINUS_PERCENT) \
|
|
TAG(TOKEN_MINUS_PERCENT_EQUAL) \
|
|
TAG(TOKEN_MINUS_PIPE) \
|
|
TAG(TOKEN_MINUS_PIPE_EQUAL) \
|
|
TAG(TOKEN_ASTERISK) \
|
|
TAG(TOKEN_ASTERISK_EQUAL) \
|
|
TAG(TOKEN_ASTERISK_ASTERISK) \
|
|
TAG(TOKEN_ASTERISK_PERCENT) \
|
|
TAG(TOKEN_ASTERISK_PERCENT_EQUAL) \
|
|
TAG(TOKEN_ASTERISK_PIPE) \
|
|
TAG(TOKEN_ASTERISK_PIPE_EQUAL) \
|
|
TAG(TOKEN_ARROW) \
|
|
TAG(TOKEN_COLON) \
|
|
TAG(TOKEN_SLASH) \
|
|
TAG(TOKEN_SLASH_EQUAL) \
|
|
TAG(TOKEN_COMMA) \
|
|
TAG(TOKEN_AMPERSAND) \
|
|
TAG(TOKEN_AMPERSAND_EQUAL) \
|
|
TAG(TOKEN_QUESTION_MARK) \
|
|
TAG(TOKEN_ANGLE_BRACKET_LEFT) \
|
|
TAG(TOKEN_ANGLE_BRACKET_LEFT_EQUAL) \
|
|
TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT) \
|
|
TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL) \
|
|
TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE) \
|
|
TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL) \
|
|
TAG(TOKEN_ANGLE_BRACKET_RIGHT) \
|
|
TAG(TOKEN_ANGLE_BRACKET_RIGHT_EQUAL) \
|
|
TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT) \
|
|
TAG(TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL) \
|
|
TAG(TOKEN_TILDE) \
|
|
TAG(TOKEN_NUMBER_LITERAL) \
|
|
TAG(TOKEN_DOC_COMMENT) \
|
|
TAG(TOKEN_CONTAINER_DOC_COMMENT) \
|
|
TAG(TOKEN_KEYWORD_ADDRSPACE) \
|
|
TAG(TOKEN_KEYWORD_ALIGN) \
|
|
TAG(TOKEN_KEYWORD_ALLOWZERO) \
|
|
TAG(TOKEN_KEYWORD_AND) \
|
|
TAG(TOKEN_KEYWORD_ANYFRAME) \
|
|
TAG(TOKEN_KEYWORD_ANYTYPE) \
|
|
TAG(TOKEN_KEYWORD_ASM) \
|
|
TAG(TOKEN_KEYWORD_ASYNC) \
|
|
TAG(TOKEN_KEYWORD_AWAIT) \
|
|
TAG(TOKEN_KEYWORD_BREAK) \
|
|
TAG(TOKEN_KEYWORD_CALLCONV) \
|
|
TAG(TOKEN_KEYWORD_CATCH) \
|
|
TAG(TOKEN_KEYWORD_COMPTIME) \
|
|
TAG(TOKEN_KEYWORD_CONST) \
|
|
TAG(TOKEN_KEYWORD_CONTINUE) \
|
|
TAG(TOKEN_KEYWORD_DEFER) \
|
|
TAG(TOKEN_KEYWORD_ELSE) \
|
|
TAG(TOKEN_KEYWORD_ENUM) \
|
|
TAG(TOKEN_KEYWORD_ERRDEFER) \
|
|
TAG(TOKEN_KEYWORD_ERROR) \
|
|
TAG(TOKEN_KEYWORD_EXPORT) \
|
|
TAG(TOKEN_KEYWORD_EXTERN) \
|
|
TAG(TOKEN_KEYWORD_FN) \
|
|
TAG(TOKEN_KEYWORD_FOR) \
|
|
TAG(TOKEN_KEYWORD_IF) \
|
|
TAG(TOKEN_KEYWORD_INLINE) \
|
|
TAG(TOKEN_KEYWORD_NOALIAS) \
|
|
TAG(TOKEN_KEYWORD_NOINLINE) \
|
|
TAG(TOKEN_KEYWORD_NOSUSPEND) \
|
|
TAG(TOKEN_KEYWORD_OPAQUE) \
|
|
TAG(TOKEN_KEYWORD_OR) \
|
|
TAG(TOKEN_KEYWORD_ORELSE) \
|
|
TAG(TOKEN_KEYWORD_PACKED) \
|
|
TAG(TOKEN_KEYWORD_PUB) \
|
|
TAG(TOKEN_KEYWORD_RESUME) \
|
|
TAG(TOKEN_KEYWORD_RETURN) \
|
|
TAG(TOKEN_KEYWORD_LINKSECTION) \
|
|
TAG(TOKEN_KEYWORD_STRUCT) \
|
|
TAG(TOKEN_KEYWORD_SUSPEND) \
|
|
TAG(TOKEN_KEYWORD_SWITCH) \
|
|
TAG(TOKEN_KEYWORD_TEST) \
|
|
TAG(TOKEN_KEYWORD_THREADLOCAL) \
|
|
TAG(TOKEN_KEYWORD_TRY) \
|
|
TAG(TOKEN_KEYWORD_UNION) \
|
|
TAG(TOKEN_KEYWORD_UNREACHABLE) \
|
|
TAG(TOKEN_KEYWORD_USINGNAMESPACE) \
|
|
TAG(TOKEN_KEYWORD_VAR) \
|
|
TAG(TOKEN_KEYWORD_VOLATILE) \
|
|
TAG(TOKEN_KEYWORD_WHILE)
|
|
|
|
#define TOKENIZER_GENERATE_ENUM(ENUM) ENUM,
|
|
#define TOKENIZER_GENERATE_CASE(ENUM) \
|
|
case ENUM: \
|
|
return #ENUM;
|
|
|
|
// First define the enum
|
|
typedef enum {
|
|
TOKENIZER_FOREACH_TAG_ENUM(TOKENIZER_GENERATE_ENUM)
|
|
} TokenizerTag;
|
|
|
|
const char* tokenizerGetTagString(TokenizerTag tag);
|
|
|
|
typedef enum {
|
|
TOKENIZER_STATE_START,
|
|
TOKENIZER_STATE_EXPECT_NEWLINE,
|
|
TOKENIZER_STATE_IDENTIFIER,
|
|
TOKENIZER_STATE_BUILTIN,
|
|
TOKENIZER_STATE_STRING_LITERAL,
|
|
TOKENIZER_STATE_STRING_LITERAL_BACKSLASH,
|
|
TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE,
|
|
TOKENIZER_STATE_CHAR_LITERAL,
|
|
TOKENIZER_STATE_CHAR_LITERAL_BACKSLASH,
|
|
TOKENIZER_STATE_BACKSLASH,
|
|
TOKENIZER_STATE_EQUAL,
|
|
TOKENIZER_STATE_BANG,
|
|
TOKENIZER_STATE_PIPE,
|
|
TOKENIZER_STATE_MINUS,
|
|
TOKENIZER_STATE_MINUS_PERCENT,
|
|
TOKENIZER_STATE_MINUS_PIPE,
|
|
TOKENIZER_STATE_ASTERISK,
|
|
TOKENIZER_STATE_ASTERISK_PERCENT,
|
|
TOKENIZER_STATE_ASTERISK_PIPE,
|
|
TOKENIZER_STATE_SLASH,
|
|
TOKENIZER_STATE_LINE_COMMENT_START,
|
|
TOKENIZER_STATE_LINE_COMMENT,
|
|
TOKENIZER_STATE_DOC_COMMENT_START,
|
|
TOKENIZER_STATE_DOC_COMMENT,
|
|
TOKENIZER_STATE_INT,
|
|
TOKENIZER_STATE_INT_EXPONENT,
|
|
TOKENIZER_STATE_INT_PERIOD,
|
|
TOKENIZER_STATE_FLOAT,
|
|
TOKENIZER_STATE_FLOAT_EXPONENT,
|
|
TOKENIZER_STATE_AMPERSAND,
|
|
TOKENIZER_STATE_CARET,
|
|
TOKENIZER_STATE_PERCENT,
|
|
TOKENIZER_STATE_PLUS,
|
|
TOKENIZER_STATE_PLUS_PERCENT,
|
|
TOKENIZER_STATE_PLUS_PIPE,
|
|
TOKENIZER_STATE_ANGLE_BRACKET_LEFT,
|
|
TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT,
|
|
TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE,
|
|
TOKENIZER_STATE_ANGLE_BRACKET_RIGHT,
|
|
TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT,
|
|
TOKENIZER_STATE_PERIOD,
|
|
TOKENIZER_STATE_PERIOD_2,
|
|
TOKENIZER_STATE_PERIOD_ASTERISK,
|
|
TOKENIZER_STATE_SAW_AT_SIGN,
|
|
TOKENIZER_STATE_INVALID,
|
|
} TokenizerState;
|
|
|
|
typedef struct {
|
|
TokenizerTag tag;
|
|
struct {
|
|
uint32_t start, end;
|
|
} loc;
|
|
} TokenizerToken;
|
|
|
|
typedef struct {
|
|
const char* buffer;
|
|
const uint32_t buffer_len;
|
|
uint32_t index;
|
|
} Tokenizer;
|
|
|
|
Tokenizer tokenizerInit(const char* buffer, uint32_t len);
|
|
TokenizerToken tokenizerNext(Tokenizer* self);
|
|
|
|
#endif
|