Files
zig0/tokenizer.h

208 lines
10 KiB
C

#ifndef _ZIG1_TOKENIZER_H__
#define _ZIG1_TOKENIZER_H__
#include <stdbool.h>
#include <stdint.h>
#define TOKENIZER_FOREACH_TAG_ENUM(TAG) \
TAG(TOKENIZER_TAG_INVALID) \
TAG(TOKENIZER_TAG_INVALID_PERIODASTERISKS) \
TAG(TOKENIZER_TAG_IDENTIFIER) \
TAG(TOKENIZER_TAG_STRING_LITERAL) \
TAG(TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE) \
TAG(TOKENIZER_TAG_CHAR_LITERAL) \
TAG(TOKENIZER_TAG_EOF) \
TAG(TOKENIZER_TAG_BUILTIN) \
TAG(TOKENIZER_TAG_BANG) \
TAG(TOKENIZER_TAG_PIPE) \
TAG(TOKENIZER_TAG_PIPE_PIPE) \
TAG(TOKENIZER_TAG_PIPE_EQUAL) \
TAG(TOKENIZER_TAG_EQUAL) \
TAG(TOKENIZER_TAG_EQUAL_EQUAL) \
TAG(TOKENIZER_TAG_EQUAL_ANGLE_BRACKET_RIGHT) \
TAG(TOKENIZER_TAG_BANG_EQUAL) \
TAG(TOKENIZER_TAG_L_PAREN) \
TAG(TOKENIZER_TAG_R_PAREN) \
TAG(TOKENIZER_TAG_SEMICOLON) \
TAG(TOKENIZER_TAG_PERCENT) \
TAG(TOKENIZER_TAG_PERCENT_EQUAL) \
TAG(TOKENIZER_TAG_L_BRACE) \
TAG(TOKENIZER_TAG_R_BRACE) \
TAG(TOKENIZER_TAG_L_BRACKET) \
TAG(TOKENIZER_TAG_R_BRACKET) \
TAG(TOKENIZER_TAG_PERIOD) \
TAG(TOKENIZER_TAG_PERIOD_ASTERISK) \
TAG(TOKENIZER_TAG_ELLIPSIS2) \
TAG(TOKENIZER_TAG_ELLIPSIS3) \
TAG(TOKENIZER_TAG_CARET) \
TAG(TOKENIZER_TAG_CARET_EQUAL) \
TAG(TOKENIZER_TAG_PLUS) \
TAG(TOKENIZER_TAG_PLUS_PLUS) \
TAG(TOKENIZER_TAG_PLUS_EQUAL) \
TAG(TOKENIZER_TAG_PLUS_PERCENT) \
TAG(TOKENIZER_TAG_PLUS_PERCENT_EQUAL) \
TAG(TOKENIZER_TAG_PLUS_PIPE) \
TAG(TOKENIZER_TAG_PLUS_PIPE_EQUAL) \
TAG(TOKENIZER_TAG_MINUS) \
TAG(TOKENIZER_TAG_MINUS_EQUAL) \
TAG(TOKENIZER_TAG_MINUS_PERCENT) \
TAG(TOKENIZER_TAG_MINUS_PERCENT_EQUAL) \
TAG(TOKENIZER_TAG_MINUS_PIPE) \
TAG(TOKENIZER_TAG_MINUS_PIPE_EQUAL) \
TAG(TOKENIZER_TAG_ASTERISK) \
TAG(TOKENIZER_TAG_ASTERISK_EQUAL) \
TAG(TOKENIZER_TAG_ASTERISK_ASTERISK) \
TAG(TOKENIZER_TAG_ASTERISK_PERCENT) \
TAG(TOKENIZER_TAG_ASTERISK_PERCENT_EQUAL) \
TAG(TOKENIZER_TAG_ASTERISK_PIPE) \
TAG(TOKENIZER_TAG_ASTERISK_PIPE_EQUAL) \
TAG(TOKENIZER_TAG_ARROW) \
TAG(TOKENIZER_TAG_COLON) \
TAG(TOKENIZER_TAG_SLASH) \
TAG(TOKENIZER_TAG_SLASH_EQUAL) \
TAG(TOKENIZER_TAG_COMMA) \
TAG(TOKENIZER_TAG_AMPERSAND) \
TAG(TOKENIZER_TAG_AMPERSAND_EQUAL) \
TAG(TOKENIZER_TAG_QUESTION_MARK) \
TAG(TOKENIZER_TAG_ANGLE_BRACKET_LEFT) \
TAG(TOKENIZER_TAG_ANGLE_BRACKET_LEFT_EQUAL) \
TAG(TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT) \
TAG(TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL) \
TAG(TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE) \
TAG(TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL) \
TAG(TOKENIZER_TAG_ANGLE_BRACKET_RIGHT) \
TAG(TOKENIZER_TAG_ANGLE_BRACKET_RIGHT_EQUAL) \
TAG(TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT) \
TAG(TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL) \
TAG(TOKENIZER_TAG_TILDE) \
TAG(TOKENIZER_TAG_NUMBER_LITERAL) \
TAG(TOKENIZER_TAG_DOC_COMMENT) \
TAG(TOKENIZER_TAG_CONTAINER_DOC_COMMENT) \
TAG(TOKENIZER_TAG_KEYWORD_ADDRSPACE) \
TAG(TOKENIZER_TAG_KEYWORD_ALIGN) \
TAG(TOKENIZER_TAG_KEYWORD_ALLOWZERO) \
TAG(TOKENIZER_TAG_KEYWORD_AND) \
TAG(TOKENIZER_TAG_KEYWORD_ANYFRAME) \
TAG(TOKENIZER_TAG_KEYWORD_ANYTYPE) \
TAG(TOKENIZER_TAG_KEYWORD_ASM) \
TAG(TOKENIZER_TAG_KEYWORD_ASYNC) \
TAG(TOKENIZER_TAG_KEYWORD_AWAIT) \
TAG(TOKENIZER_TAG_KEYWORD_BREAK) \
TAG(TOKENIZER_TAG_KEYWORD_CALLCONV) \
TAG(TOKENIZER_TAG_KEYWORD_CATCH) \
TAG(TOKENIZER_TAG_KEYWORD_COMPTIME) \
TAG(TOKENIZER_TAG_KEYWORD_CONST) \
TAG(TOKENIZER_TAG_KEYWORD_CONTINUE) \
TAG(TOKENIZER_TAG_KEYWORD_DEFER) \
TAG(TOKENIZER_TAG_KEYWORD_ELSE) \
TAG(TOKENIZER_TAG_KEYWORD_ENUM) \
TAG(TOKENIZER_TAG_KEYWORD_ERRDEFER) \
TAG(TOKENIZER_TAG_KEYWORD_ERROR) \
TAG(TOKENIZER_TAG_KEYWORD_EXPORT) \
TAG(TOKENIZER_TAG_KEYWORD_EXTERN) \
TAG(TOKENIZER_TAG_KEYWORD_FN) \
TAG(TOKENIZER_TAG_KEYWORD_FOR) \
TAG(TOKENIZER_TAG_KEYWORD_IF) \
TAG(TOKENIZER_TAG_KEYWORD_INLINE) \
TAG(TOKENIZER_TAG_KEYWORD_NOALIAS) \
TAG(TOKENIZER_TAG_KEYWORD_NOINLINE) \
TAG(TOKENIZER_TAG_KEYWORD_NOSUSPEND) \
TAG(TOKENIZER_TAG_KEYWORD_OPAQUE) \
TAG(TOKENIZER_TAG_KEYWORD_OR) \
TAG(TOKENIZER_TAG_KEYWORD_ORELSE) \
TAG(TOKENIZER_TAG_KEYWORD_PACKED) \
TAG(TOKENIZER_TAG_KEYWORD_PUB) \
TAG(TOKENIZER_TAG_KEYWORD_RESUME) \
TAG(TOKENIZER_TAG_KEYWORD_RETURN) \
TAG(TOKENIZER_TAG_KEYWORD_LINKSECTION) \
TAG(TOKENIZER_TAG_KEYWORD_STRUCT) \
TAG(TOKENIZER_TAG_KEYWORD_SUSPEND) \
TAG(TOKENIZER_TAG_KEYWORD_SWITCH) \
TAG(TOKENIZER_TAG_KEYWORD_TEST) \
TAG(TOKENIZER_TAG_KEYWORD_THREADLOCAL) \
TAG(TOKENIZER_TAG_KEYWORD_TRY) \
TAG(TOKENIZER_TAG_KEYWORD_UNION) \
TAG(TOKENIZER_TAG_KEYWORD_UNREACHABLE) \
TAG(TOKENIZER_TAG_KEYWORD_USINGNAMESPACE) \
TAG(TOKENIZER_TAG_KEYWORD_VAR) \
TAG(TOKENIZER_TAG_KEYWORD_VOLATILE) \
TAG(TOKENIZER_TAG_KEYWORD_WHILE)
#define TOKENIZER_GENERATE_ENUM(ENUM) ENUM,
#define TOKENIZER_GENERATE_CASE(ENUM) \
case ENUM: \
return #ENUM;
// First define the enum
typedef enum {
TOKENIZER_FOREACH_TAG_ENUM(TOKENIZER_GENERATE_ENUM)
} TokenizerTag;
const char* tokenizerGetTagString(TokenizerTag tag);
typedef enum {
TOKENIZER_STATE_START,
TOKENIZER_STATE_EXPECT_NEWLINE,
TOKENIZER_STATE_IDENTIFIER,
TOKENIZER_STATE_BUILTIN,
TOKENIZER_STATE_STRING_LITERAL,
TOKENIZER_STATE_STRING_LITERAL_BACKSLASH,
TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE,
TOKENIZER_STATE_CHAR_LITERAL,
TOKENIZER_STATE_CHAR_LITERAL_BACKSLASH,
TOKENIZER_STATE_BACKSLASH,
TOKENIZER_STATE_EQUAL,
TOKENIZER_STATE_BANG,
TOKENIZER_STATE_PIPE,
TOKENIZER_STATE_MINUS,
TOKENIZER_STATE_MINUS_PERCENT,
TOKENIZER_STATE_MINUS_PIPE,
TOKENIZER_STATE_ASTERISK,
TOKENIZER_STATE_ASTERISK_PERCENT,
TOKENIZER_STATE_ASTERISK_PIPE,
TOKENIZER_STATE_SLASH,
TOKENIZER_STATE_LINE_COMMENT_START,
TOKENIZER_STATE_LINE_COMMENT,
TOKENIZER_STATE_DOC_COMMENT_START,
TOKENIZER_STATE_DOC_COMMENT,
TOKENIZER_STATE_INT,
TOKENIZER_STATE_INT_EXPONENT,
TOKENIZER_STATE_INT_PERIOD,
TOKENIZER_STATE_FLOAT,
TOKENIZER_STATE_FLOAT_EXPONENT,
TOKENIZER_STATE_AMPERSAND,
TOKENIZER_STATE_CARET,
TOKENIZER_STATE_PERCENT,
TOKENIZER_STATE_PLUS,
TOKENIZER_STATE_PLUS_PERCENT,
TOKENIZER_STATE_PLUS_PIPE,
TOKENIZER_STATE_ANGLE_BRACKET_LEFT,
TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT,
TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE,
TOKENIZER_STATE_ANGLE_BRACKET_RIGHT,
TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT,
TOKENIZER_STATE_PERIOD,
TOKENIZER_STATE_PERIOD_2,
TOKENIZER_STATE_PERIOD_ASTERISK,
TOKENIZER_STATE_SAW_AT_SIGN,
TOKENIZER_STATE_INVALID,
} TokenizerState;
typedef struct {
TokenizerTag tag;
struct {
uint32_t start, end;
} loc;
} TokenizerToken;
typedef struct {
const char* buffer;
const uint32_t buffer_len;
uint32_t index;
} Tokenizer;
Tokenizer tokenizerInit(const char* buffer, uint32_t len);
TokenizerToken tokenizerNext(Tokenizer* self);
#endif