#include #include #include #include #include "tokenizer.h" typedef struct { const char* keyword; TokenizerTag tag; } KeywordMap; const char* tokenizerGetTagString(TokenizerTag tag) { switch (tag) { TOKENIZER_FOREACH_TAG_ENUM(TOKENIZER_GENERATE_CASE) default: return "UNKNOWN"; } } // clang-format off const KeywordMap keywords[] = { { "addrspace", TOKEN_KEYWORD_ADDRSPACE }, { "align", TOKEN_KEYWORD_ALIGN }, { "allowzero", TOKEN_KEYWORD_ALLOWZERO }, { "and", TOKEN_KEYWORD_AND }, { "anyframe", TOKEN_KEYWORD_ANYFRAME }, { "anytype", TOKEN_KEYWORD_ANYTYPE }, { "asm", TOKEN_KEYWORD_ASM }, { "async", TOKEN_KEYWORD_ASYNC }, { "await", TOKEN_KEYWORD_AWAIT }, { "break", TOKEN_KEYWORD_BREAK }, { "callconv", TOKEN_KEYWORD_CALLCONV }, { "catch", TOKEN_KEYWORD_CATCH }, { "comptime", TOKEN_KEYWORD_COMPTIME }, { "const", TOKEN_KEYWORD_CONST }, { "continue", TOKEN_KEYWORD_CONTINUE }, { "defer", TOKEN_KEYWORD_DEFER }, { "else", TOKEN_KEYWORD_ELSE }, { "enum", TOKEN_KEYWORD_ENUM }, { "errdefer", TOKEN_KEYWORD_ERRDEFER }, { "error", TOKEN_KEYWORD_ERROR }, { "export", TOKEN_KEYWORD_EXPORT }, { "extern", TOKEN_KEYWORD_EXTERN }, { "fn", TOKEN_KEYWORD_FN }, { "for", TOKEN_KEYWORD_FOR }, { "if", TOKEN_KEYWORD_IF }, { "inline", TOKEN_KEYWORD_INLINE }, { "linksection", TOKEN_KEYWORD_LINKSECTION }, { "noalias", TOKEN_KEYWORD_NOALIAS }, { "noinline", TOKEN_KEYWORD_NOINLINE }, { "nosuspend", TOKEN_KEYWORD_NOSUSPEND }, { "opaque", TOKEN_KEYWORD_OPAQUE }, { "or", TOKEN_KEYWORD_OR }, { "orelse", TOKEN_KEYWORD_ORELSE }, { "packed", TOKEN_KEYWORD_PACKED }, { "pub", TOKEN_KEYWORD_PUB }, { "resume", TOKEN_KEYWORD_RESUME }, { "return", TOKEN_KEYWORD_RETURN }, { "struct", TOKEN_KEYWORD_STRUCT }, { "suspend", TOKEN_KEYWORD_SUSPEND }, { "switch", TOKEN_KEYWORD_SWITCH }, { "test", TOKEN_KEYWORD_TEST }, { "threadlocal", TOKEN_KEYWORD_THREADLOCAL }, { "try", TOKEN_KEYWORD_TRY }, { "union", TOKEN_KEYWORD_UNION }, { "unreachable", TOKEN_KEYWORD_UNREACHABLE }, { "usingnamespace", TOKEN_KEYWORD_USINGNAMESPACE }, { "var", TOKEN_KEYWORD_VAR }, { "volatile", TOKEN_KEYWORD_VOLATILE }, { "while", TOKEN_KEYWORD_WHILE } }; // clang-format on // TODO binary search static TokenizerTag getKeyword(const char* bytes, const uint32_t len) { for (unsigned long i = 0; i < sizeof(keywords) / sizeof(KeywordMap); i++) { size_t klen = strlen(keywords[i].keyword); size_t minlen = klen < len ? klen : len; int cmp = strncmp(bytes, keywords[i].keyword, minlen); if (cmp == 0) { if (len == klen) { return keywords[i].tag; } else { return TOKEN_INVALID; } } else if (cmp < 0) { return TOKEN_INVALID; } } return TOKEN_INVALID; } Tokenizer tokenizerInit(const char* buffer, const uint32_t len) { return (Tokenizer) { .buffer = buffer, .buffer_len = len, .index = (len >= 3 && memcmp(buffer, "\xEF\xBB\xBF", 3) == 0) ? 3 : 0, }; } TokenizerToken tokenizerNext(Tokenizer* self) { TokenizerToken result = (TokenizerToken) { .tag = TOKEN_INVALID, .loc = { .start = 0, }, }; TokenizerState state = TOKENIZER_STATE_START; state: switch (state) { case TOKENIZER_STATE_START: switch (self->buffer[self->index]) { case 0: if (self->index == self->buffer_len) { return (TokenizerToken) { .tag = TOKEN_EOF, .loc = { .start = self->index, .end = self->index, } }; } else { state = TOKENIZER_STATE_INVALID; goto state; } case ' ': case '\n': case '\t': case '\r': self->index++; result.loc.start = self->index; goto state; case '"': result.tag = TOKEN_STRING_LITERAL; state = TOKENIZER_STATE_STRING_LITERAL; goto state; case '\'': result.tag = TOKEN_CHAR_LITERAL; state = TOKENIZER_STATE_CHAR_LITERAL; goto state; case 'a' ... 'z': case 'A' ... 'Z': case '_': result.tag = TOKEN_IDENTIFIER; state = TOKENIZER_STATE_IDENTIFIER; goto state; case '@': state = TOKENIZER_STATE_SAW_AT_SIGN; goto state; case '=': state = TOKENIZER_STATE_EQUAL; goto state; case '!': state = TOKENIZER_STATE_BANG; goto state; case '|': state = TOKENIZER_STATE_PIPE; goto state; case '(': result.tag = TOKEN_L_PAREN; self->index++; break; case ')': result.tag = TOKEN_R_PAREN; self->index++; break; case '[': result.tag = TOKEN_L_BRACKET; self->index++; break; case ']': result.tag = TOKEN_R_BRACKET; self->index++; break; case ';': result.tag = TOKEN_SEMICOLON; self->index++; break; case ',': result.tag = TOKEN_COMMA; self->index++; break; case '?': result.tag = TOKEN_QUESTION_MARK; self->index++; break; case ':': result.tag = TOKEN_COLON; self->index++; break; case '%': state = TOKENIZER_STATE_PERCENT; goto state; case '*': state = TOKENIZER_STATE_ASTERISK; goto state; case '+': state = TOKENIZER_STATE_PLUS; goto state; case '<': state = TOKENIZER_STATE_ANGLE_BRACKET_LEFT; goto state; case '>': state = TOKENIZER_STATE_ANGLE_BRACKET_RIGHT; goto state; case '^': state = TOKENIZER_STATE_CARET; goto state; case '\\': result.tag = TOKEN_MULTILINE_STRING_LITERAL_LINE; state = TOKENIZER_STATE_BACKSLASH; goto state; case '{': result.tag = TOKEN_L_BRACE; self->index++; break; case '}': result.tag = TOKEN_R_BRACE; self->index++; break; case '~': result.tag = TOKEN_TILDE; self->index++; break; case '.': state = TOKENIZER_STATE_PERIOD; goto state; case '-': state = TOKENIZER_STATE_MINUS; goto state; case '/': state = TOKENIZER_STATE_SLASH; goto state; case '&': state = TOKENIZER_STATE_AMPERSAND; goto state; case '0' ... '9': result.tag = TOKEN_NUMBER_LITERAL; self->index++; state = TOKENIZER_STATE_INT; goto state; default: state = TOKENIZER_STATE_INVALID; goto state; }; break; case TOKENIZER_STATE_EXPECT_NEWLINE: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index == self->buffer_len) { result.tag = TOKEN_INVALID; } else { state = TOKENIZER_STATE_INVALID; goto state; } break; case '\n': self->index++; result.loc.start = self->index; state = TOKENIZER_STATE_START; goto state; default: state = TOKENIZER_STATE_INVALID; goto state; } break; case TOKENIZER_STATE_INVALID: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index == self->buffer_len) { result.tag = TOKEN_INVALID; } else { state = TOKENIZER_STATE_INVALID; goto state; } break; case '\n': result.tag = TOKEN_INVALID; break; default: state = TOKENIZER_STATE_INVALID; goto state; } break; case TOKENIZER_STATE_SAW_AT_SIGN: self->index++; switch (self->buffer[self->index]) { case 0: case '\n': result.tag = TOKEN_INVALID; break; case '"': result.tag = TOKEN_IDENTIFIER; state = TOKENIZER_STATE_STRING_LITERAL; goto state; case 'a' ... 'z': case 'A' ... 'Z': case '_': result.tag = TOKEN_BUILTIN; state = TOKENIZER_STATE_BUILTIN; goto state; default: state = TOKENIZER_STATE_INVALID; goto state; } break; case TOKENIZER_STATE_AMPERSAND: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_AMPERSAND_EQUAL; self->index++; break; default: result.tag = TOKEN_AMPERSAND; break; } break; case TOKENIZER_STATE_ASTERISK: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_ASTERISK_EQUAL; self->index++; break; case '*': result.tag = TOKEN_ASTERISK_ASTERISK; self->index++; break; case '%': state = TOKENIZER_STATE_ASTERISK_PERCENT; goto state; case '|': state = TOKENIZER_STATE_ASTERISK_PIPE; goto state; default: result.tag = TOKEN_ASTERISK; break; } break; case TOKENIZER_STATE_ASTERISK_PERCENT: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_ASTERISK_PERCENT_EQUAL; self->index++; break; default: result.tag = TOKEN_ASTERISK_PERCENT; break; } break; case TOKENIZER_STATE_ASTERISK_PIPE: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_ASTERISK_PIPE_EQUAL; self->index++; break; default: result.tag = TOKEN_ASTERISK_PIPE; break; } break; case TOKENIZER_STATE_PERCENT: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_PERCENT_EQUAL; self->index++; break; default: result.tag = TOKEN_PERCENT; break; } break; case TOKENIZER_STATE_PLUS: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_PLUS_EQUAL; self->index++; break; case '+': result.tag = TOKEN_PLUS_PLUS; self->index++; break; case '%': state = TOKENIZER_STATE_PLUS_PERCENT; goto state; case '|': state = TOKENIZER_STATE_PLUS_PIPE; goto state; default: result.tag = TOKEN_PLUS; break; } break; case TOKENIZER_STATE_PLUS_PERCENT: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_PLUS_PERCENT_EQUAL; self->index++; break; default: result.tag = TOKEN_PLUS_PERCENT; break; } break; case TOKENIZER_STATE_PLUS_PIPE: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_PLUS_PIPE_EQUAL; self->index++; break; default: result.tag = TOKEN_PLUS_PIPE; break; } break; case TOKENIZER_STATE_CARET: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_CARET_EQUAL; self->index++; break; default: result.tag = TOKEN_CARET; break; } break; case TOKENIZER_STATE_IDENTIFIER: self->index++; switch (self->buffer[self->index]) { case 'a' ... 'z': case 'A' ... 'Z': case '_': case '0' ... '9': state = TOKENIZER_STATE_IDENTIFIER; goto state; default:; // Once we're at C23, this semicolon can be removed. const char* start = self->buffer + result.loc.start; uint32_t len = self->index - result.loc.start; TokenizerTag tag = getKeyword(start, len); if (tag != TOKEN_INVALID) result.tag = tag; } break; case TOKENIZER_STATE_BUILTIN: self->index++; switch (self->buffer[self->index]) { case 'a' ... 'z': case 'A' ... 'Z': case '_': case '0' ... '9': state = TOKENIZER_STATE_BUILTIN; goto state; break; } break; case TOKENIZER_STATE_BACKSLASH: self->index++; switch (self->buffer[self->index]) { case 0: result.tag = TOKEN_INVALID; break; case '\\': state = TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE; goto state; case '\n': result.tag = TOKEN_INVALID; break; default: state = TOKENIZER_STATE_INVALID; goto state; } break; case TOKENIZER_STATE_STRING_LITERAL: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { state = TOKENIZER_STATE_INVALID; goto state; } else { result.tag = TOKEN_INVALID; } break; case '\n': result.tag = TOKEN_INVALID; break; case '\\': state = TOKENIZER_STATE_STRING_LITERAL_BACKSLASH; goto state; case '"': self->index++; break; case 0x01 ... 0x09: case 0x0b ... 0x1f: case 0x7f: state = TOKENIZER_STATE_INVALID; goto state; default: state = TOKENIZER_STATE_STRING_LITERAL; goto state; } break; case TOKENIZER_STATE_STRING_LITERAL_BACKSLASH: self->index++; switch (self->buffer[self->index]) { case 0: case '\n': result.tag = TOKEN_INVALID; break; default: state = TOKENIZER_STATE_STRING_LITERAL; goto state; } break; case TOKENIZER_STATE_CHAR_LITERAL: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { state = TOKENIZER_STATE_INVALID; goto state; } else { result.tag = TOKEN_INVALID; } break; case '\n': result.tag = TOKEN_INVALID; break; case '\\': state = TOKENIZER_STATE_CHAR_LITERAL_BACKSLASH; goto state; case '\'': self->index++; break; case 0x01 ... 0x09: case 0x0b ... 0x1f: case 0x7f: state = TOKENIZER_STATE_INVALID; goto state; default: state = TOKENIZER_STATE_CHAR_LITERAL; goto state; } break; case TOKENIZER_STATE_CHAR_LITERAL_BACKSLASH: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { state = TOKENIZER_STATE_INVALID; goto state; } else { result.tag = TOKEN_INVALID; } break; case '\n': result.tag = TOKEN_INVALID; break; case 0x01 ... 0x09: case 0x0b ... 0x1f: case 0x7f: state = TOKENIZER_STATE_INVALID; goto state; default: state = TOKENIZER_STATE_CHAR_LITERAL; goto state; } break; case TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { state = TOKENIZER_STATE_INVALID; goto state; } break; case '\n': break; case '\r': if (self->buffer[self->index + 1] != '\n') { state = TOKENIZER_STATE_INVALID; goto state; } break; case 0x01 ... 0x09: case 0x0b ... 0x0c: case 0x0e ... 0x1f: case 0x7f: state = TOKENIZER_STATE_INVALID; goto state; default: state = TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE; goto state; } break; case TOKENIZER_STATE_BANG: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_BANG_EQUAL; self->index++; break; default: result.tag = TOKEN_BANG; break; } break; case TOKENIZER_STATE_PIPE: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_PIPE_EQUAL; self->index++; break; case '|': result.tag = TOKEN_PIPE_PIPE; self->index++; break; default: result.tag = TOKEN_PIPE; break; } break; case TOKENIZER_STATE_EQUAL: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_EQUAL_EQUAL; self->index++; break; case '>': result.tag = TOKEN_EQUAL_ANGLE_BRACKET_RIGHT; self->index++; break; default: result.tag = TOKEN_EQUAL; break; } break; case TOKENIZER_STATE_MINUS: self->index++; switch (self->buffer[self->index]) { case '>': result.tag = TOKEN_ARROW; self->index++; break; case '=': result.tag = TOKEN_MINUS_EQUAL; self->index++; break; case '%': state = TOKENIZER_STATE_MINUS_PERCENT; goto state; case '|': state = TOKENIZER_STATE_MINUS_PIPE; goto state; default: result.tag = TOKEN_MINUS; break; } break; case TOKENIZER_STATE_MINUS_PERCENT: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_MINUS_PERCENT_EQUAL; self->index++; break; default: result.tag = TOKEN_MINUS_PERCENT; break; } break; case TOKENIZER_STATE_MINUS_PIPE: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_MINUS_PIPE_EQUAL; self->index++; break; default: result.tag = TOKEN_MINUS_PIPE; break; } break; case TOKENIZER_STATE_ANGLE_BRACKET_LEFT: self->index++; switch (self->buffer[self->index]) { case '<': state = TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT; goto state; case '=': result.tag = TOKEN_ANGLE_BRACKET_LEFT_EQUAL; self->index++; break; default: result.tag = TOKEN_ANGLE_BRACKET_LEFT; break; } break; case TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL; self->index++; break; case '|': state = TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE; goto state; default: result.tag = TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT; break; } break; case TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL; self->index++; break; default: result.tag = TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE; break; } break; case TOKENIZER_STATE_ANGLE_BRACKET_RIGHT: self->index++; switch (self->buffer[self->index]) { case '>': state = TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT; goto state; case '=': result.tag = TOKEN_ANGLE_BRACKET_RIGHT_EQUAL; self->index++; break; default: result.tag = TOKEN_ANGLE_BRACKET_RIGHT; break; } break; case TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL; self->index++; break; default: result.tag = TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT; break; } break; case TOKENIZER_STATE_PERIOD: self->index++; switch (self->buffer[self->index]) { case '.': state = TOKENIZER_STATE_PERIOD_2; goto state; case '*': state = TOKENIZER_STATE_PERIOD_ASTERISK; goto state; default: result.tag = TOKEN_PERIOD; break; } break; case TOKENIZER_STATE_PERIOD_2: self->index++; switch (self->buffer[self->index]) { case '.': result.tag = TOKEN_ELLIPSIS3; self->index++; break; default: result.tag = TOKEN_ELLIPSIS2; break; } break; case TOKENIZER_STATE_PERIOD_ASTERISK: self->index++; switch (self->buffer[self->index]) { case '*': result.tag = TOKEN_INVALID_PERIODASTERISKS; break; default: result.tag = TOKEN_PERIOD_ASTERISK; break; } break; case TOKENIZER_STATE_SLASH: self->index++; switch (self->buffer[self->index]) { case '/': state = TOKENIZER_STATE_LINE_COMMENT_START; goto state; case '=': result.tag = TOKEN_SLASH_EQUAL; self->index++; break; default: result.tag = TOKEN_SLASH; break; } break; case TOKENIZER_STATE_LINE_COMMENT_START: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { state = TOKENIZER_STATE_INVALID; goto state; } else { return (TokenizerToken) { .tag = TOKEN_EOF, .loc = { .start = self->index, .end = self->index, } }; } break; case '!': result.tag = TOKEN_CONTAINER_DOC_COMMENT; state = TOKENIZER_STATE_DOC_COMMENT; goto state; case '\n': self->index++; result.loc.start = self->index; state = TOKENIZER_STATE_START; goto state; case '/': state = TOKENIZER_STATE_DOC_COMMENT_START; goto state; case '\r': state = TOKENIZER_STATE_EXPECT_NEWLINE; goto state; case 0x01 ... 0x09: case 0x0b ... 0x0c: case 0x0e ... 0x1f: case 0x7f: state = TOKENIZER_STATE_INVALID; goto state; default: state = TOKENIZER_STATE_LINE_COMMENT; goto state; } break; case TOKENIZER_STATE_DOC_COMMENT_START: self->index++; switch (self->buffer[self->index]) { case 0: case '\n': result.tag = TOKEN_DOC_COMMENT; break; case '\r': if (self->buffer[self->index + 1] == '\n') { result.tag = TOKEN_DOC_COMMENT; } else { state = TOKENIZER_STATE_INVALID; goto state; } break; case '/': state = TOKENIZER_STATE_LINE_COMMENT; goto state; case 0x01 ... 0x09: case 0x0b ... 0x0c: case 0x0e ... 0x1f: case 0x7f: state = TOKENIZER_STATE_INVALID; goto state; default: result.tag = TOKEN_DOC_COMMENT; state = TOKENIZER_STATE_DOC_COMMENT; goto state; } break; case TOKENIZER_STATE_LINE_COMMENT: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { state = TOKENIZER_STATE_INVALID; goto state; } else { return (TokenizerToken) { .tag = TOKEN_EOF, .loc = { .start = self->index, .end = self->index, } }; } break; case '\n': self->index++; result.loc.start = self->index; state = TOKENIZER_STATE_START; goto state; case '\r': state = TOKENIZER_STATE_EXPECT_NEWLINE; goto state; case 0x01 ... 0x09: case 0x0b ... 0x0c: case 0x0e ... 0x1f: case 0x7f: state = TOKENIZER_STATE_INVALID; goto state; default: state = TOKENIZER_STATE_LINE_COMMENT; goto state; } break; case TOKENIZER_STATE_DOC_COMMENT: self->index++; switch (self->buffer[self->index]) { case 0: case '\n': break; case '\r': if (self->buffer[self->index + 1] != '\n') { state = TOKENIZER_STATE_INVALID; goto state; } break; case 0x01 ... 0x09: case 0x0b ... 0x0c: case 0x0e ... 0x1f: case 0x7f: state = TOKENIZER_STATE_INVALID; goto state; default: state = TOKENIZER_STATE_DOC_COMMENT; goto state; } break; case TOKENIZER_STATE_INT: switch (self->buffer[self->index]) { case '.': state = TOKENIZER_STATE_INT_PERIOD; goto state; case '_': case 'a' ... 'd': case 'f' ... 'o': case 'q' ... 'z': case 'A' ... 'D': case 'F' ... 'O': case 'Q' ... 'Z': case '0' ... '9': self->index++; state = TOKENIZER_STATE_INT; goto state; case 'e': case 'E': case 'p': case 'P': state = TOKENIZER_STATE_INT_EXPONENT; goto state; default: break; } break; case TOKENIZER_STATE_INT_EXPONENT: self->index++; switch (self->buffer[self->index]) { case '-': case '+': self->index++; state = TOKENIZER_STATE_FLOAT; goto state; default: state = TOKENIZER_STATE_INT; goto state; } break; case TOKENIZER_STATE_INT_PERIOD: self->index++; switch (self->buffer[self->index]) { case '_': case 'a' ... 'd': case 'f' ... 'o': case 'q' ... 'z': case 'A' ... 'D': case 'F' ... 'O': case 'Q' ... 'Z': case '0' ... '9': self->index++; state = TOKENIZER_STATE_FLOAT; goto state; case 'e': case 'E': case 'p': case 'P': state = TOKENIZER_STATE_FLOAT_EXPONENT; goto state; default: self->index--; break; } break; case TOKENIZER_STATE_FLOAT: switch (self->buffer[self->index]) { case '_': case 'a' ... 'd': case 'f' ... 'o': case 'q' ... 'z': case 'A' ... 'D': case 'F' ... 'O': case 'Q' ... 'Z': case '0' ... '9': self->index++; state = TOKENIZER_STATE_FLOAT; goto state; case 'e': case 'E': case 'p': case 'P': state = TOKENIZER_STATE_FLOAT_EXPONENT; goto state; default: break; } break; case TOKENIZER_STATE_FLOAT_EXPONENT: self->index++; switch (self->buffer[self->index]) { case '-': case '+': self->index++; state = TOKENIZER_STATE_FLOAT; goto state; default: state = TOKENIZER_STATE_FLOAT; goto state; } break; } result.loc.end = self->index; return result; }