#include #include #include #include #include "tokenizer.h" typedef struct { const char* keyword; TokenizerTag tag; } KeywordMap; const char* tokenizerGetTagString(TokenizerTag tag) { switch (tag) { TOKENIZER_FOREACH_TAG_ENUM(TOKENIZER_GENERATE_CASE) default: return "UNKNOWN"; } } const KeywordMap keywords[] = { { "addrspace", TOKENIZER_TAG_KEYWORD_ADDRSPACE }, { "align", TOKENIZER_TAG_KEYWORD_ALIGN }, { "allowzero", TOKENIZER_TAG_KEYWORD_ALLOWZERO }, { "and", TOKENIZER_TAG_KEYWORD_AND }, { "anyframe", TOKENIZER_TAG_KEYWORD_ANYFRAME }, { "anytype", TOKENIZER_TAG_KEYWORD_ANYTYPE }, { "asm", TOKENIZER_TAG_KEYWORD_ASM }, { "async", TOKENIZER_TAG_KEYWORD_ASYNC }, { "await", TOKENIZER_TAG_KEYWORD_AWAIT }, { "break", TOKENIZER_TAG_KEYWORD_BREAK }, { "callconv", TOKENIZER_TAG_KEYWORD_CALLCONV }, { "catch", TOKENIZER_TAG_KEYWORD_CATCH }, { "comptime", TOKENIZER_TAG_KEYWORD_COMPTIME }, { "const", TOKENIZER_TAG_KEYWORD_CONST }, { "continue", TOKENIZER_TAG_KEYWORD_CONTINUE }, { "defer", TOKENIZER_TAG_KEYWORD_DEFER }, { "else", TOKENIZER_TAG_KEYWORD_ELSE }, { "enum", TOKENIZER_TAG_KEYWORD_ENUM }, { "errdefer", TOKENIZER_TAG_KEYWORD_ERRDEFER }, { "error", TOKENIZER_TAG_KEYWORD_ERROR }, { "export", TOKENIZER_TAG_KEYWORD_EXPORT }, { "extern", TOKENIZER_TAG_KEYWORD_EXTERN }, { "fn", TOKENIZER_TAG_KEYWORD_FN }, { "for", TOKENIZER_TAG_KEYWORD_FOR }, { "if", TOKENIZER_TAG_KEYWORD_IF }, { "inline", TOKENIZER_TAG_KEYWORD_INLINE }, { "linksection", TOKENIZER_TAG_KEYWORD_LINKSECTION }, { "noalias", TOKENIZER_TAG_KEYWORD_NOALIAS }, { "noinline", TOKENIZER_TAG_KEYWORD_NOINLINE }, { "nosuspend", TOKENIZER_TAG_KEYWORD_NOSUSPEND }, { "opaque", TOKENIZER_TAG_KEYWORD_OPAQUE }, { "or", TOKENIZER_TAG_KEYWORD_OR }, { "orelse", TOKENIZER_TAG_KEYWORD_ORELSE }, { "packed", TOKENIZER_TAG_KEYWORD_PACKED }, { "pub", TOKENIZER_TAG_KEYWORD_PUB }, { "resume", TOKENIZER_TAG_KEYWORD_RESUME }, { "return", TOKENIZER_TAG_KEYWORD_RETURN }, { "struct", TOKENIZER_TAG_KEYWORD_STRUCT }, { "suspend", TOKENIZER_TAG_KEYWORD_SUSPEND }, { "switch", TOKENIZER_TAG_KEYWORD_SWITCH }, { "test", TOKENIZER_TAG_KEYWORD_TEST }, { "threadlocal", TOKENIZER_TAG_KEYWORD_THREADLOCAL }, { "try", TOKENIZER_TAG_KEYWORD_TRY }, { "union", TOKENIZER_TAG_KEYWORD_UNION }, { "unreachable", TOKENIZER_TAG_KEYWORD_UNREACHABLE }, { "usingnamespace", TOKENIZER_TAG_KEYWORD_USINGNAMESPACE }, { "var", TOKENIZER_TAG_KEYWORD_VAR }, { "volatile", TOKENIZER_TAG_KEYWORD_VOLATILE }, { "while", TOKENIZER_TAG_KEYWORD_WHILE } }; // TODO binary search static TokenizerTag getKeyword(const char* bytes, const uint32_t len) { for (unsigned long i = 0; i < sizeof(keywords) / sizeof(KeywordMap); i++) { size_t klen = strlen(keywords[i].keyword); size_t minlen = klen < len ? klen : len; int cmp = strncmp(bytes, keywords[i].keyword, minlen); if (cmp == 0) { if (len == klen) { return keywords[i].tag; } else { return TOKENIZER_TAG_INVALID; } } else if (cmp < 0) { return TOKENIZER_TAG_INVALID; } } return TOKENIZER_TAG_INVALID; } Tokenizer tokenizerInit(const char* buffer, const uint32_t len) { return (Tokenizer) { .buffer = buffer, .buffer_len = len, .index = (len >= 3 && memcmp(buffer, "\xEF\xBB\xBF", 3) == 0) ? 3 : 0, }; } TokenizerToken tokenizerNext(Tokenizer* self) { TokenizerToken result = (TokenizerToken) { .tag = TOKENIZER_TAG_INVALID, .loc = { .start = 0, }, }; TokenizerState state = TOKENIZER_STATE_START; state: switch (state) { case TOKENIZER_STATE_START: switch (self->buffer[self->index]) { case 0: if (self->index == self->buffer_len) { return (TokenizerToken) { .tag = TOKENIZER_TAG_EOF, .loc = { .start = self->index, .end = self->index, } }; } else { state = TOKENIZER_STATE_INVALID; goto state; } case ' ': case '\n': case '\t': case '\r': self->index++; result.loc.start = self->index; goto state; case '"': result.tag = TOKENIZER_TAG_STRING_LITERAL; state = TOKENIZER_STATE_STRING_LITERAL; goto state; case '\'': result.tag = TOKENIZER_TAG_CHAR_LITERAL; state = TOKENIZER_STATE_CHAR_LITERAL; goto state; case 'a' ... 'z': case 'A' ... 'Z': case '_': result.tag = TOKENIZER_TAG_IDENTIFIER; state = TOKENIZER_STATE_IDENTIFIER; goto state; case '@': state = TOKENIZER_STATE_SAW_AT_SIGN; goto state; case '=': state = TOKENIZER_STATE_EQUAL; goto state; case '!': state = TOKENIZER_STATE_BANG; goto state; case '|': state = TOKENIZER_STATE_PIPE; goto state; case '(': result.tag = TOKENIZER_TAG_L_PAREN; self->index++; break; case ')': result.tag = TOKENIZER_TAG_R_PAREN; self->index++; break; case '[': result.tag = TOKENIZER_TAG_L_BRACKET; self->index++; break; case ']': result.tag = TOKENIZER_TAG_R_BRACKET; self->index++; break; case ';': result.tag = TOKENIZER_TAG_SEMICOLON; self->index++; break; case ',': result.tag = TOKENIZER_TAG_COMMA; self->index++; break; case '?': result.tag = TOKENIZER_TAG_QUESTION_MARK; self->index++; break; case ':': result.tag = TOKENIZER_TAG_COLON; self->index++; break; case '%': state = TOKENIZER_STATE_PERCENT; goto state; case '*': state = TOKENIZER_STATE_ASTERISK; goto state; case '+': state = TOKENIZER_STATE_PLUS; goto state; case '<': state = TOKENIZER_STATE_ANGLE_BRACKET_LEFT; goto state; case '>': state = TOKENIZER_STATE_ANGLE_BRACKET_RIGHT; goto state; case '^': state = TOKENIZER_STATE_CARET; goto state; case '\\': result.tag = TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE; state = TOKENIZER_STATE_BACKSLASH; goto state; case '{': result.tag = TOKENIZER_TAG_L_BRACE; self->index++; break; case '}': result.tag = TOKENIZER_TAG_R_BRACE; self->index++; break; case '~': result.tag = TOKENIZER_TAG_TILDE; self->index++; break; case '.': state = TOKENIZER_STATE_PERIOD; goto state; case '-': state = TOKENIZER_STATE_MINUS; goto state; case '/': state = TOKENIZER_STATE_SLASH; goto state; case '&': state = TOKENIZER_STATE_AMPERSAND; goto state; case '0' ... '9': result.tag = TOKENIZER_TAG_NUMBER_LITERAL; self->index++; state = TOKENIZER_STATE_INT; goto state; default: state = TOKENIZER_STATE_INVALID; goto state; }; break; case TOKENIZER_STATE_EXPECT_NEWLINE: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index == self->buffer_len) { result.tag = TOKENIZER_TAG_INVALID; } else { state = TOKENIZER_STATE_INVALID; goto state; } break; case '\n': self->index++; result.loc.start = self->index; state = TOKENIZER_STATE_START; goto state; default: state = TOKENIZER_STATE_INVALID; goto state; } break; case TOKENIZER_STATE_INVALID: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index == self->buffer_len) { result.tag = TOKENIZER_TAG_INVALID; } else { state = TOKENIZER_STATE_INVALID; goto state; } break; case '\n': result.tag = TOKENIZER_TAG_INVALID; break; default: state = TOKENIZER_STATE_INVALID; goto state; } break; case TOKENIZER_STATE_SAW_AT_SIGN: self->index++; switch (self->buffer[self->index]) { case 0: case '\n': result.tag = TOKENIZER_TAG_INVALID; break; case '"': result.tag = TOKENIZER_TAG_IDENTIFIER; state = TOKENIZER_STATE_STRING_LITERAL; goto state; case 'a' ... 'z': case 'A' ... 'Z': case '_': result.tag = TOKENIZER_TAG_BUILTIN; state = TOKENIZER_STATE_BUILTIN; goto state; default: state = TOKENIZER_STATE_INVALID; goto state; } break; case TOKENIZER_STATE_AMPERSAND: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKENIZER_TAG_AMPERSAND_EQUAL; self->index++; break; default: result.tag = TOKENIZER_TAG_AMPERSAND; break; } break; case TOKENIZER_STATE_ASTERISK: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKENIZER_TAG_ASTERISK_EQUAL; self->index++; break; case '*': result.tag = TOKENIZER_TAG_ASTERISK_ASTERISK; self->index++; break; case '%': state = TOKENIZER_STATE_ASTERISK_PERCENT; goto state; case '|': state = TOKENIZER_STATE_ASTERISK_PIPE; goto state; default: result.tag = TOKENIZER_TAG_ASTERISK; break; } break; case TOKENIZER_STATE_ASTERISK_PERCENT: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKENIZER_TAG_ASTERISK_PERCENT_EQUAL; self->index++; break; default: result.tag = TOKENIZER_TAG_ASTERISK_PERCENT; break; } break; case TOKENIZER_STATE_ASTERISK_PIPE: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKENIZER_TAG_ASTERISK_PIPE_EQUAL; self->index++; break; default: result.tag = TOKENIZER_TAG_ASTERISK_PIPE; break; } break; case TOKENIZER_STATE_PERCENT: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKENIZER_TAG_PERCENT_EQUAL; self->index++; break; default: result.tag = TOKENIZER_TAG_PERCENT; break; } break; case TOKENIZER_STATE_PLUS: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKENIZER_TAG_PLUS_EQUAL; self->index++; break; case '+': result.tag = TOKENIZER_TAG_PLUS_PLUS; self->index++; break; case '%': state = TOKENIZER_STATE_PLUS_PERCENT; goto state; case '|': state = TOKENIZER_STATE_PLUS_PIPE; goto state; default: result.tag = TOKENIZER_TAG_PLUS; break; } break; case TOKENIZER_STATE_PLUS_PERCENT: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKENIZER_TAG_PLUS_PERCENT_EQUAL; self->index++; break; default: result.tag = TOKENIZER_TAG_PLUS_PERCENT; break; } break; case TOKENIZER_STATE_PLUS_PIPE: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKENIZER_TAG_PLUS_PIPE_EQUAL; self->index++; break; default: result.tag = TOKENIZER_TAG_PLUS_PIPE; break; } break; case TOKENIZER_STATE_CARET: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKENIZER_TAG_CARET_EQUAL; self->index++; break; default: result.tag = TOKENIZER_TAG_CARET; break; } break; case TOKENIZER_STATE_IDENTIFIER: self->index++; switch (self->buffer[self->index]) { case 'a' ... 'z': case 'A' ... 'Z': case '_': case '0' ... '9': state = TOKENIZER_STATE_IDENTIFIER; goto state; default:; // Once we're at C23, this semicolon can be removed. const char* start = self->buffer + result.loc.start; uint32_t len = self->index - result.loc.start; TokenizerTag tag = getKeyword(start, len); if (tag != TOKENIZER_TAG_INVALID) { result.tag = tag; } } break; case TOKENIZER_STATE_BUILTIN: self->index++; switch (self->buffer[self->index]) { case 'a' ... 'z': case 'A' ... 'Z': case '_': case '0' ... '9': state = TOKENIZER_STATE_BUILTIN; goto state; break; } break; case TOKENIZER_STATE_BACKSLASH: self->index++; switch (self->buffer[self->index]) { case 0: result.tag = TOKENIZER_TAG_INVALID; break; case '\\': state = TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE; goto state; case '\n': result.tag = TOKENIZER_TAG_INVALID; break; default: state = TOKENIZER_STATE_INVALID; goto state; } break; case TOKENIZER_STATE_STRING_LITERAL: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { state = TOKENIZER_STATE_INVALID; goto state; } else { result.tag = TOKENIZER_TAG_INVALID; } break; case '\n': result.tag = TOKENIZER_TAG_INVALID; break; case '\\': state = TOKENIZER_STATE_STRING_LITERAL_BACKSLASH; goto state; case '"': self->index++; break; case 0x01 ... 0x09: case 0x0b ... 0x1f: case 0x7f: state = TOKENIZER_STATE_INVALID; goto state; default: state = TOKENIZER_STATE_STRING_LITERAL; goto state; } break; case TOKENIZER_STATE_STRING_LITERAL_BACKSLASH: self->index++; switch (self->buffer[self->index]) { case 0: case '\n': result.tag = TOKENIZER_TAG_INVALID; break; default: state = TOKENIZER_STATE_STRING_LITERAL; goto state; } break; case TOKENIZER_STATE_CHAR_LITERAL: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { state = TOKENIZER_STATE_INVALID; goto state; } else { result.tag = TOKENIZER_TAG_INVALID; } break; case '\n': result.tag = TOKENIZER_TAG_INVALID; break; case '\\': state = TOKENIZER_STATE_CHAR_LITERAL_BACKSLASH; goto state; case '\'': self->index++; break; case 0x01 ... 0x09: case 0x0b ... 0x1f: case 0x7f: state = TOKENIZER_STATE_INVALID; goto state; default: state = TOKENIZER_STATE_CHAR_LITERAL; goto state; } break; case TOKENIZER_STATE_CHAR_LITERAL_BACKSLASH: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { state = TOKENIZER_STATE_INVALID; goto state; } else { result.tag = TOKENIZER_TAG_INVALID; } break; case '\n': result.tag = TOKENIZER_TAG_INVALID; break; case 0x01 ... 0x09: case 0x0b ... 0x1f: case 0x7f: state = TOKENIZER_STATE_INVALID; goto state; default: state = TOKENIZER_STATE_CHAR_LITERAL; goto state; } break; case TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { state = TOKENIZER_STATE_INVALID; goto state; } break; case '\n': break; case '\r': if (self->buffer[self->index + 1] != '\n') { state = TOKENIZER_STATE_INVALID; goto state; } break; case 0x01 ... 0x09: case 0x0b ... 0x0c: case 0x0e ... 0x1f: case 0x7f: state = TOKENIZER_STATE_INVALID; goto state; default: state = TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE; goto state; } break; case TOKENIZER_STATE_BANG: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKENIZER_TAG_BANG_EQUAL; self->index++; break; default: result.tag = TOKENIZER_TAG_BANG; break; } break; case TOKENIZER_STATE_PIPE: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKENIZER_TAG_PIPE_EQUAL; self->index++; break; case '|': result.tag = TOKENIZER_TAG_PIPE_PIPE; self->index++; break; default: result.tag = TOKENIZER_TAG_PIPE; break; } break; case TOKENIZER_STATE_EQUAL: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKENIZER_TAG_EQUAL_EQUAL; self->index++; break; case '>': result.tag = TOKENIZER_TAG_EQUAL_ANGLE_BRACKET_RIGHT; self->index++; break; default: result.tag = TOKENIZER_TAG_EQUAL; break; } break; case TOKENIZER_STATE_MINUS: self->index++; switch (self->buffer[self->index]) { case '>': result.tag = TOKENIZER_TAG_ARROW; self->index++; break; case '=': result.tag = TOKENIZER_TAG_MINUS_EQUAL; self->index++; break; case '%': state = TOKENIZER_STATE_MINUS_PERCENT; goto state; case '|': state = TOKENIZER_STATE_MINUS_PIPE; goto state; default: result.tag = TOKENIZER_TAG_MINUS; break; } break; case TOKENIZER_STATE_MINUS_PERCENT: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKENIZER_TAG_MINUS_PERCENT_EQUAL; self->index++; break; default: result.tag = TOKENIZER_TAG_MINUS_PERCENT; break; } break; case TOKENIZER_STATE_MINUS_PIPE: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKENIZER_TAG_MINUS_PIPE_EQUAL; self->index++; break; default: result.tag = TOKENIZER_TAG_MINUS_PIPE; break; } break; case TOKENIZER_STATE_ANGLE_BRACKET_LEFT: self->index++; switch (self->buffer[self->index]) { case '<': state = TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT; goto state; case '=': result.tag = TOKENIZER_TAG_ANGLE_BRACKET_LEFT_EQUAL; self->index++; break; default: result.tag = TOKENIZER_TAG_ANGLE_BRACKET_LEFT; break; } break; case TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL; self->index++; break; case '|': state = TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE; goto state; default: result.tag = TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT; break; } break; case TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL; self->index++; break; default: result.tag = TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE; break; } break; case TOKENIZER_STATE_ANGLE_BRACKET_RIGHT: self->index++; switch (self->buffer[self->index]) { case '>': state = TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT; goto state; case '=': result.tag = TOKENIZER_TAG_ANGLE_BRACKET_RIGHT_EQUAL; self->index++; break; default: result.tag = TOKENIZER_TAG_ANGLE_BRACKET_RIGHT; break; } break; case TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL; self->index++; break; default: result.tag = TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT; break; } break; case TOKENIZER_STATE_PERIOD: self->index++; switch (self->buffer[self->index]) { case '.': state = TOKENIZER_STATE_PERIOD_2; goto state; case '*': state = TOKENIZER_STATE_PERIOD_ASTERISK; goto state; default: result.tag = TOKENIZER_TAG_PERIOD; break; } break; case TOKENIZER_STATE_PERIOD_2: self->index++; switch (self->buffer[self->index]) { case '.': result.tag = TOKENIZER_TAG_ELLIPSIS3; self->index++; break; default: result.tag = TOKENIZER_TAG_ELLIPSIS2; break; } break; case TOKENIZER_STATE_PERIOD_ASTERISK: self->index++; switch (self->buffer[self->index]) { case '*': result.tag = TOKENIZER_TAG_INVALID_PERIODASTERISKS; break; default: result.tag = TOKENIZER_TAG_PERIOD_ASTERISK; break; } break; case TOKENIZER_STATE_SLASH: self->index++; switch (self->buffer[self->index]) { case '/': state = TOKENIZER_STATE_LINE_COMMENT_START; goto state; case '=': result.tag = TOKENIZER_TAG_SLASH_EQUAL; self->index++; break; default: result.tag = TOKENIZER_TAG_SLASH; break; } break; case TOKENIZER_STATE_LINE_COMMENT_START: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { state = TOKENIZER_STATE_INVALID; goto state; } else { return (TokenizerToken) { .tag = TOKENIZER_TAG_EOF, .loc = { .start = self->index, .end = self->index } }; } break; case '!': result.tag = TOKENIZER_TAG_CONTAINER_DOC_COMMENT; state = TOKENIZER_STATE_DOC_COMMENT; goto state; case '\n': self->index++; result.loc.start = self->index; state = TOKENIZER_STATE_START; goto state; case '/': state = TOKENIZER_STATE_DOC_COMMENT_START; goto state; case '\r': state = TOKENIZER_STATE_EXPECT_NEWLINE; goto state; case 0x01 ... 0x09: case 0x0b ... 0x0c: case 0x0e ... 0x1f: case 0x7f: state = TOKENIZER_STATE_INVALID; goto state; default: state = TOKENIZER_STATE_LINE_COMMENT; goto state; } break; case TOKENIZER_STATE_DOC_COMMENT_START: self->index++; switch (self->buffer[self->index]) { case 0: case '\n': result.tag = TOKENIZER_TAG_DOC_COMMENT; break; case '\r': if (self->buffer[self->index + 1] == '\n') { result.tag = TOKENIZER_TAG_DOC_COMMENT; } else { state = TOKENIZER_STATE_INVALID; goto state; } break; case '/': state = TOKENIZER_STATE_LINE_COMMENT; goto state; case 0x01 ... 0x09: case 0x0b ... 0x0c: case 0x0e ... 0x1f: case 0x7f: state = TOKENIZER_STATE_INVALID; goto state; default: result.tag = TOKENIZER_TAG_DOC_COMMENT; state = TOKENIZER_STATE_DOC_COMMENT; goto state; } break; case TOKENIZER_STATE_LINE_COMMENT: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { state = TOKENIZER_STATE_INVALID; goto state; } else { return (TokenizerToken) { .tag = TOKENIZER_TAG_EOF, .loc = { .start = self->index, .end = self->index } }; } break; case '\n': self->index++; result.loc.start = self->index; state = TOKENIZER_STATE_START; goto state; case '\r': state = TOKENIZER_STATE_EXPECT_NEWLINE; goto state; case 0x01 ... 0x09: case 0x0b ... 0x0c: case 0x0e ... 0x1f: case 0x7f: state = TOKENIZER_STATE_INVALID; goto state; default: state = TOKENIZER_STATE_LINE_COMMENT; goto state; } break; case TOKENIZER_STATE_DOC_COMMENT: self->index++; switch (self->buffer[self->index]) { case 0: case '\n': break; case '\r': if (self->buffer[self->index + 1] != '\n') { state = TOKENIZER_STATE_INVALID; goto state; } break; case 0x01 ... 0x09: case 0x0b ... 0x0c: case 0x0e ... 0x1f: case 0x7f: state = TOKENIZER_STATE_INVALID; goto state; default: state = TOKENIZER_STATE_DOC_COMMENT; goto state; } break; case TOKENIZER_STATE_INT: switch (self->buffer[self->index]) { case '.': state = TOKENIZER_STATE_INT_PERIOD; goto state; case '_': case 'a' ... 'd': case 'f' ... 'o': case 'q' ... 'z': case 'A' ... 'D': case 'F' ... 'O': case 'Q' ... 'Z': case '0' ... '9': self->index++; state = TOKENIZER_STATE_INT; goto state; case 'e': case 'E': case 'p': case 'P': state = TOKENIZER_STATE_INT_EXPONENT; goto state; default: break; } break; case TOKENIZER_STATE_INT_EXPONENT: self->index++; switch (self->buffer[self->index]) { case '-': case '+': self->index++; state = TOKENIZER_STATE_FLOAT; goto state; default: state = TOKENIZER_STATE_INT; goto state; } break; case TOKENIZER_STATE_INT_PERIOD: self->index++; switch (self->buffer[self->index]) { case '_': case 'a' ... 'd': case 'f' ... 'o': case 'q' ... 'z': case 'A' ... 'D': case 'F' ... 'O': case 'Q' ... 'Z': case '0' ... '9': self->index++; state = TOKENIZER_STATE_FLOAT; goto state; case 'e': case 'E': case 'p': case 'P': state = TOKENIZER_STATE_FLOAT_EXPONENT; goto state; default: self->index--; break; } break; case TOKENIZER_STATE_FLOAT: switch (self->buffer[self->index]) { case '_': case 'a' ... 'd': case 'f' ... 'o': case 'q' ... 'z': case 'A' ... 'D': case 'F' ... 'O': case 'Q' ... 'Z': case '0' ... '9': self->index++; state = TOKENIZER_STATE_FLOAT; goto state; case 'e': case 'E': case 'p': case 'P': state = TOKENIZER_STATE_FLOAT_EXPONENT; goto state; default: break; } break; case TOKENIZER_STATE_FLOAT_EXPONENT: self->index++; switch (self->buffer[self->index]) { case '-': case '+': self->index++; state = TOKENIZER_STATE_FLOAT; goto state; default: state = TOKENIZER_STATE_FLOAT; goto state; } break; } result.loc.end = self->index; return result; }