Files
zig0/tokenizer.h
2024-12-13 09:39:02 +02:00

197 lines
5.4 KiB
C

#ifndef __ZIG1_TOKENIZER_H__
#define __ZIG1_TOKENIZER_H__
#include <stdbool.h>
#include <stdint.h>
typedef enum {
TOKEN_TAG_INVALID,
TOKEN_TAG_INVALID_PERIODASTERISKS,
TOKEN_TAG_IDENTIFIER,
TOKEN_TAG_STRING_LITERAL,
TOKEN_TAG_MULTILINE_STRING_LITERAL_LINE,
TOKEN_TAG_CHAR_LITERAL,
TOKEN_TAG_EOF,
TOKEN_TAG_BUILTIN,
TOKEN_TAG_BANG,
TOKEN_TAG_PIPE,
TOKEN_TAG_PIPE_PIPE,
TOKEN_TAG_PIPE_EQUAL,
TOKEN_TAG_EQUAL,
TOKEN_TAG_EQUAL_EQUAL,
TOKEN_TAG_EQUAL_ANGLE_BRACKET_RIGHT,
TOKEN_TAG_BANG_EQUAL,
TOKEN_TAG_L_PAREN,
TOKEN_TAG_R_PAREN,
TOKEN_TAG_SEMICOLON,
TOKEN_TAG_PERCENT,
TOKEN_TAG_PERCENT_EQUAL,
TOKEN_TAG_L_BRACE,
TOKEN_TAG_R_BRACE,
TOKEN_TAG_L_BRACKET,
TOKEN_TAG_R_BRACKET,
TOKEN_TAG_PERIOD,
TOKEN_TAG_PERIOD_ASTERISK,
TOKEN_TAG_ELLIPSIS2,
TOKEN_TAG_ELLIPSIS3,
TOKEN_TAG_CARET,
TOKEN_TAG_CARET_EQUAL,
TOKEN_TAG_PLUS,
TOKEN_TAG_PLUS_PLUS,
TOKEN_TAG_PLUS_EQUAL,
TOKEN_TAG_PLUS_PERCENT,
TOKEN_TAG_PLUS_PERCENT_EQUAL,
TOKEN_TAG_PLUS_PIPE,
TOKEN_TAG_PLUS_PIPE_EQUAL,
TOKEN_TAG_MINUS,
TOKEN_TAG_MINUS_EQUAL,
TOKEN_TAG_MINUS_PERCENT,
TOKEN_TAG_MINUS_PERCENT_EQUAL,
TOKEN_TAG_MINUS_PIPE,
TOKEN_TAG_MINUS_PIPE_EQUAL,
TOKEN_TAG_ASTERISK,
TOKEN_TAG_ASTERISK_EQUAL,
TOKEN_TAG_ASTERISK_ASTERISK,
TOKEN_TAG_ASTERISK_PERCENT,
TOKEN_TAG_ASTERISK_PERCENT_EQUAL,
TOKEN_TAG_ASTERISK_PIPE,
TOKEN_TAG_ASTERISK_PIPE_EQUAL,
TOKEN_TAG_ARROW,
TOKEN_TAG_COLON,
TOKEN_TAG_SLASH,
TOKEN_TAG_SLASH_EQUAL,
TOKEN_TAG_COMMA,
TOKEN_TAG_AMPERSAND,
TOKEN_TAG_AMPERSAND_EQUAL,
TOKEN_TAG_QUESTION_MARK,
TOKEN_TAG_ANGLE_BRACKET_LEFT,
TOKEN_TAG_ANGLE_BRACKET_LEFT_EQUAL,
TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT,
TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL,
TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE,
TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL,
TOKEN_TAG_ANGLE_BRACKET_RIGHT,
TOKEN_TAG_ANGLE_BRACKET_RIGHT_EQUAL,
TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT,
TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL,
TOKEN_TAG_TILDE,
TOKEN_TAG_NUMBER_LITERAL,
TOKEN_TAG_DOC_COMMENT,
TOKEN_TAG_CONTAINER_DOC_COMMENT,
TOKEN_TAG_KEYWORD_ADDRSPACE,
TOKEN_TAG_KEYWORD_ALIGN,
TOKEN_TAG_KEYWORD_ALLOWZERO,
TOKEN_TAG_KEYWORD_AND,
TOKEN_TAG_KEYWORD_ANYFRAME,
TOKEN_TAG_KEYWORD_ANYTYPE,
TOKEN_TAG_KEYWORD_ASM,
TOKEN_TAG_KEYWORD_ASYNC,
TOKEN_TAG_KEYWORD_AWAIT,
TOKEN_TAG_KEYWORD_BREAK,
TOKEN_TAG_KEYWORD_CALLCONV,
TOKEN_TAG_KEYWORD_CATCH,
TOKEN_TAG_KEYWORD_COMPTIME,
TOKEN_TAG_KEYWORD_CONST,
TOKEN_TAG_KEYWORD_CONTINUE,
TOKEN_TAG_KEYWORD_DEFER,
TOKEN_TAG_KEYWORD_ELSE,
TOKEN_TAG_KEYWORD_ENUM,
TOKEN_TAG_KEYWORD_ERRDEFER,
TOKEN_TAG_KEYWORD_ERROR,
TOKEN_TAG_KEYWORD_EXPORT,
TOKEN_TAG_KEYWORD_EXTERN,
TOKEN_TAG_KEYWORD_FN,
TOKEN_TAG_KEYWORD_FOR,
TOKEN_TAG_KEYWORD_IF,
TOKEN_TAG_KEYWORD_INLINE,
TOKEN_TAG_KEYWORD_NOALIAS,
TOKEN_TAG_KEYWORD_NOINLINE,
TOKEN_TAG_KEYWORD_NOSUSPEND,
TOKEN_TAG_KEYWORD_OPAQUE,
TOKEN_TAG_KEYWORD_OR,
TOKEN_TAG_KEYWORD_ORELSE,
TOKEN_TAG_KEYWORD_PACKED,
TOKEN_TAG_KEYWORD_PUB,
TOKEN_TAG_KEYWORD_RESUME,
TOKEN_TAG_KEYWORD_RETURN,
TOKEN_TAG_KEYWORD_LINKSECTION,
TOKEN_TAG_KEYWORD_STRUCT,
TOKEN_TAG_KEYWORD_SUSPEND,
TOKEN_TAG_KEYWORD_SWITCH,
TOKEN_TAG_KEYWORD_TEST,
TOKEN_TAG_KEYWORD_THREADLOCAL,
TOKEN_TAG_KEYWORD_TRY,
TOKEN_TAG_KEYWORD_UNION,
TOKEN_TAG_KEYWORD_UNREACHABLE,
TOKEN_TAG_KEYWORD_USINGNAMESPACE,
TOKEN_TAG_KEYWORD_VAR,
TOKEN_TAG_KEYWORD_VOLATILE,
TOKEN_TAG_KEYWORD_WHILE,
} token_tag;
typedef enum {
TOKEN_STATE_START,
TOKEN_STATE_EXPECT_NEWLINE,
TOKEN_STATE_IDENTIFIER,
TOKEN_STATE_BUILTIN,
TOKEN_STATE_STRING_LITERAL,
TOKEN_STATE_STRING_LITERAL_BACKSLASH,
TOKEN_STATE_MULTILINE_STRING_LITERAL_LINE,
TOKEN_STATE_CHAR_LITERAL,
TOKEN_STATE_CHAR_LITERAL_BACKSLASH,
TOKEN_STATE_BACKSLASH,
TOKEN_STATE_EQUAL,
TOKEN_STATE_BANG,
TOKEN_STATE_PIPE,
TOKEN_STATE_MINUS,
TOKEN_STATE_MINUS_PERCENT,
TOKEN_STATE_MINUS_PIPE,
TOKEN_STATE_ASTERISK,
TOKEN_STATE_ASTERISK_PERCENT,
TOKEN_STATE_ASTERISK_PIPE,
TOKEN_STATE_SLASH,
TOKEN_STATE_LINE_COMMENT_START,
TOKEN_STATE_LINE_COMMENT,
TOKEN_STATE_DOC_COMMENT_START,
TOKEN_STATE_DOC_COMMENT,
TOKEN_STATE_INT,
TOKEN_STATE_INT_EXPONENT,
TOKEN_STATE_INT_PERIOD,
TOKEN_STATE_FLOAT,
TOKEN_STATE_FLOAT_EXPONENT,
TOKEN_STATE_AMPERSAND,
TOKEN_STATE_CARET,
TOKEN_STATE_PERCENT,
TOKEN_STATE_PLUS,
TOKEN_STATE_PLUS_PERCENT,
TOKEN_STATE_PLUS_PIPE,
TOKEN_STATE_ANGLE_BRACKET_LEFT,
TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT,
TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE,
TOKEN_STATE_ANGLE_BRACKET_RIGHT,
TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT,
TOKEN_STATE_PERIOD,
TOKEN_STATE_PERIOD_2,
TOKEN_STATE_PERIOD_ASTERISK,
TOKEN_STATE_SAW_AT_SIGN,
TOKEN_STATE_INVALID,
} token_state;
typedef struct {
token_tag tag;
struct {
uint32_t start, end;
} loc;
} token;
typedef struct {
const char* buffer;
uint32_t buffer_len;
uint32_t index;
} tokenizer;
tokenizer tokenizer_init(const char* buffer, uint32_t len);
token tokenizer_next(tokenizer* self);
#endif