// tokenizer for zig d48611ba67c7871cb348f28a01b89d8771170dd8 #include #include #include #include #include "tokenizer.h" typedef struct { const char* keyword; token_tag tag; } keyword_map; const keyword_map keywords[] = { { "addrspace", TOKEN_TAG_KEYWORD_ADDRSPACE }, { "align", TOKEN_TAG_KEYWORD_ALIGN }, { "allowzero", TOKEN_TAG_KEYWORD_ALLOWZERO }, { "and", TOKEN_TAG_KEYWORD_AND }, { "anyframe", TOKEN_TAG_KEYWORD_ANYFRAME }, { "anytype", TOKEN_TAG_KEYWORD_ANYTYPE }, { "asm", TOKEN_TAG_KEYWORD_ASM }, { "async", TOKEN_TAG_KEYWORD_ASYNC }, { "await", TOKEN_TAG_KEYWORD_AWAIT }, { "break", TOKEN_TAG_KEYWORD_BREAK }, { "callconv", TOKEN_TAG_KEYWORD_CALLCONV }, { "catch", TOKEN_TAG_KEYWORD_CATCH }, { "comptime", TOKEN_TAG_KEYWORD_COMPTIME }, { "const", TOKEN_TAG_KEYWORD_CONST }, { "continue", TOKEN_TAG_KEYWORD_CONTINUE }, { "defer", TOKEN_TAG_KEYWORD_DEFER }, { "else", TOKEN_TAG_KEYWORD_ELSE }, { "enum", TOKEN_TAG_KEYWORD_ENUM }, { "errdefer", TOKEN_TAG_KEYWORD_ERRDEFER }, { "error", TOKEN_TAG_KEYWORD_ERROR }, { "export", TOKEN_TAG_KEYWORD_EXPORT }, { "extern", TOKEN_TAG_KEYWORD_EXTERN }, { "fn", TOKEN_TAG_KEYWORD_FN }, { "for", TOKEN_TAG_KEYWORD_FOR }, { "if", TOKEN_TAG_KEYWORD_IF }, { "inline", TOKEN_TAG_KEYWORD_INLINE }, { "linksection", TOKEN_TAG_KEYWORD_LINKSECTION }, { "noalias", TOKEN_TAG_KEYWORD_NOALIAS }, { "noinline", TOKEN_TAG_KEYWORD_NOINLINE }, { "nosuspend", TOKEN_TAG_KEYWORD_NOSUSPEND }, { "opaque", TOKEN_TAG_KEYWORD_OPAQUE }, { "or", TOKEN_TAG_KEYWORD_OR }, { "orelse", TOKEN_TAG_KEYWORD_ORELSE }, { "packed", TOKEN_TAG_KEYWORD_PACKED }, { "pub", TOKEN_TAG_KEYWORD_PUB }, { "resume", TOKEN_TAG_KEYWORD_RESUME }, { "return", TOKEN_TAG_KEYWORD_RETURN }, { "struct", TOKEN_TAG_KEYWORD_STRUCT }, { "suspend", TOKEN_TAG_KEYWORD_SUSPEND }, { "switch", TOKEN_TAG_KEYWORD_SWITCH }, { "test", TOKEN_TAG_KEYWORD_TEST }, { "threadlocal", TOKEN_TAG_KEYWORD_THREADLOCAL }, { "try", TOKEN_TAG_KEYWORD_TRY }, { "union", TOKEN_TAG_KEYWORD_UNION }, { "unreachable", TOKEN_TAG_KEYWORD_UNREACHABLE }, { "usingnamespace", TOKEN_TAG_KEYWORD_USINGNAMESPACE }, { "var", TOKEN_TAG_KEYWORD_VAR }, { "volatile", TOKEN_TAG_KEYWORD_VOLATILE }, { "while", TOKEN_TAG_KEYWORD_WHILE } }; // TODO binary search static token_tag get_keyword(const char* bytes, uint32_t len) { for (unsigned long i = 0; i < sizeof(keywords) / sizeof(keyword_map); i++) { size_t klen = strlen(keywords[i].keyword); size_t minlen = klen < len ? klen : len; int cmp = strncmp(bytes, keywords[i].keyword, minlen); if (cmp == 0) { if (len == klen) { return keywords[i].tag; } else { return TOKEN_TAG_INVALID; } } else if (cmp < 0) { return TOKEN_TAG_INVALID; } continue; } return TOKEN_TAG_INVALID; } tokenizer tokenizer_init(const char* buffer, uint32_t len) { return (tokenizer) { .buffer = buffer, .buffer_len = len, .index = (len >= 3 && memcmp(buffer, "\xEF\xBB\xBF", 3) == 0) ? 3 : 0, }; } token tokenizer_next(tokenizer* self) { token result = (token) { .tag = TOKEN_TAG_INVALID, .loc = { .start = 0, }, }; token_state state = TOKEN_STATE_START; state: switch (state) { case TOKEN_STATE_START: switch (self->buffer[self->index]) { case 0: if (self->index == self->buffer_len) { return (token) { .tag = TOKEN_TAG_EOF, .loc = { .start = self->index, .end = self->index, } }; } else { state = TOKEN_STATE_INVALID; goto state; } case ' ': case '\n': case '\t': case '\r': self->index++; result.loc.start = self->index; goto state; case '"': result.tag = TOKEN_TAG_STRING_LITERAL; state = TOKEN_STATE_STRING_LITERAL; goto state; case '\'': result.tag = TOKEN_TAG_CHAR_LITERAL; state = TOKEN_STATE_CHAR_LITERAL; goto state; case 'a' ... 'z': case 'A' ... 'Z': case '_': result.tag = TOKEN_TAG_IDENTIFIER; state = TOKEN_STATE_IDENTIFIER; goto state; case '@': state = TOKEN_STATE_SAW_AT_SIGN; goto state; case '=': state = TOKEN_STATE_EQUAL; goto state; case '!': state = TOKEN_STATE_BANG; goto state; case '|': state = TOKEN_STATE_PIPE; goto state; case '(': result.tag = TOKEN_TAG_L_PAREN; self->index++; break; case ')': result.tag = TOKEN_TAG_R_PAREN; self->index++; break; case '[': result.tag = TOKEN_TAG_L_BRACKET; self->index++; break; case ']': result.tag = TOKEN_TAG_R_BRACKET; self->index++; break; case ';': result.tag = TOKEN_TAG_SEMICOLON; self->index++; break; case ',': result.tag = TOKEN_TAG_COMMA; self->index++; break; case '?': result.tag = TOKEN_TAG_QUESTION_MARK; self->index++; break; case ':': result.tag = TOKEN_TAG_COLON; self->index++; break; case '%': state = TOKEN_STATE_PERCENT; goto state; case '*': state = TOKEN_STATE_ASTERISK; goto state; case '+': state = TOKEN_STATE_PLUS; goto state; case '<': state = TOKEN_STATE_ANGLE_BRACKET_LEFT; goto state; case '>': state = TOKEN_STATE_ANGLE_BRACKET_RIGHT; goto state; case '^': state = TOKEN_STATE_CARET; goto state; case '\\': result.tag = TOKEN_TAG_MULTILINE_STRING_LITERAL_LINE; state = TOKEN_STATE_BACKSLASH; goto state; case '{': result.tag = TOKEN_TAG_L_BRACE; self->index++; break; case '}': result.tag = TOKEN_TAG_R_BRACE; self->index++; break; case '~': result.tag = TOKEN_TAG_TILDE; self->index++; break; case '.': state = TOKEN_STATE_PERIOD; goto state; case '-': state = TOKEN_STATE_MINUS; goto state; case '/': state = TOKEN_STATE_SLASH; goto state; case '&': state = TOKEN_STATE_AMPERSAND; goto state; case '0' ... '9': result.tag = TOKEN_TAG_NUMBER_LITERAL; self->index++; state = TOKEN_STATE_INT; goto state; default: state = TOKEN_STATE_INVALID; goto state; }; break; case TOKEN_STATE_EXPECT_NEWLINE: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index == self->buffer_len) { result.tag = TOKEN_TAG_INVALID; } else { state = TOKEN_STATE_INVALID; goto state; } break; case '\n': self->index++; result.loc.start = self->index; state = TOKEN_STATE_START; goto state; default: state = TOKEN_STATE_INVALID; goto state; } break; case TOKEN_STATE_INVALID: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index == self->buffer_len) { result.tag = TOKEN_TAG_INVALID; } else { state = TOKEN_STATE_INVALID; goto state; } break; case '\n': result.tag = TOKEN_TAG_INVALID; break; default: state = TOKEN_STATE_INVALID; goto state; } break; case TOKEN_STATE_SAW_AT_SIGN: self->index++; switch (self->buffer[self->index]) { case 0: case '\n': result.tag = TOKEN_TAG_INVALID; break; case '"': result.tag = TOKEN_TAG_IDENTIFIER; state = TOKEN_STATE_STRING_LITERAL; goto state; case 'a' ... 'z': case 'A' ... 'Z': case '_': result.tag = TOKEN_TAG_BUILTIN; state = TOKEN_STATE_BUILTIN; goto state; default: state = TOKEN_STATE_INVALID; goto state; } break; case TOKEN_STATE_AMPERSAND: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_TAG_AMPERSAND_EQUAL; self->index++; break; default: result.tag = TOKEN_TAG_AMPERSAND; break; } break; case TOKEN_STATE_ASTERISK: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_TAG_ASTERISK_EQUAL; self->index++; break; case '*': result.tag = TOKEN_TAG_ASTERISK_ASTERISK; self->index++; break; case '%': state = TOKEN_STATE_ASTERISK_PERCENT; goto state; case '|': state = TOKEN_STATE_ASTERISK_PIPE; goto state; default: result.tag = TOKEN_TAG_ASTERISK; break; } break; case TOKEN_STATE_ASTERISK_PERCENT: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_TAG_ASTERISK_PERCENT_EQUAL; self->index++; break; default: result.tag = TOKEN_TAG_ASTERISK_PERCENT; break; } break; case TOKEN_STATE_ASTERISK_PIPE: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_TAG_ASTERISK_PIPE_EQUAL; self->index++; break; default: result.tag = TOKEN_TAG_ASTERISK_PIPE; break; } break; case TOKEN_STATE_PERCENT: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_TAG_PERCENT_EQUAL; self->index++; break; default: result.tag = TOKEN_TAG_PERCENT; break; } break; case TOKEN_STATE_PLUS: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_TAG_PLUS_EQUAL; self->index++; break; case '+': result.tag = TOKEN_TAG_PLUS_PLUS; self->index++; break; case '%': state = TOKEN_STATE_PLUS_PERCENT; goto state; case '|': state = TOKEN_STATE_PLUS_PIPE; goto state; default: result.tag = TOKEN_TAG_PLUS; break; } break; case TOKEN_STATE_PLUS_PERCENT: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_TAG_PLUS_PERCENT_EQUAL; self->index++; break; default: result.tag = TOKEN_TAG_PLUS_PERCENT; break; } break; case TOKEN_STATE_PLUS_PIPE: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_TAG_PLUS_PIPE_EQUAL; self->index++; break; default: result.tag = TOKEN_TAG_PLUS_PIPE; break; } break; case TOKEN_STATE_CARET: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_TAG_CARET_EQUAL; self->index++; break; default: result.tag = TOKEN_TAG_CARET; break; } break; case TOKEN_STATE_IDENTIFIER: self->index++; switch (self->buffer[self->index]) { case 'a' ... 'z': case 'A' ... 'Z': case '_': case '0' ... '9': state = TOKEN_STATE_IDENTIFIER; goto state; default:; // Once we're at C23, this semicolon can be removed. const char* start = self->buffer + result.loc.start; uint32_t len = self->index - result.loc.start; token_tag tag = get_keyword(start, len); if (tag != TOKEN_TAG_INVALID) { result.tag = tag; } } break; case TOKEN_STATE_BUILTIN: self->index++; switch (self->buffer[self->index]) { case 'a' ... 'z': case 'A' ... 'Z': case '_': case '0' ... '9': state = TOKEN_STATE_BUILTIN; goto state; break; } break; case TOKEN_STATE_BACKSLASH: self->index++; switch (self->buffer[self->index]) { case 0: result.tag = TOKEN_TAG_INVALID; break; case '\\': state = TOKEN_STATE_MULTILINE_STRING_LITERAL_LINE; goto state; case '\n': result.tag = TOKEN_TAG_INVALID; break; default: state = TOKEN_STATE_INVALID; goto state; } break; case TOKEN_STATE_STRING_LITERAL: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { state = TOKEN_STATE_INVALID; goto state; } else { result.tag = TOKEN_TAG_INVALID; } break; case '\n': result.tag = TOKEN_TAG_INVALID; break; case '\\': state = TOKEN_STATE_STRING_LITERAL_BACKSLASH; goto state; case '"': self->index++; break; case 0x01 ... 0x09: case 0x0b ... 0x1f: case 0x7f: state = TOKEN_STATE_INVALID; goto state; default: state = TOKEN_STATE_STRING_LITERAL; goto state; } break; case TOKEN_STATE_STRING_LITERAL_BACKSLASH: self->index++; switch (self->buffer[self->index]) { case 0: case '\n': result.tag = TOKEN_TAG_INVALID; break; default: state = TOKEN_STATE_STRING_LITERAL; goto state; } break; case TOKEN_STATE_CHAR_LITERAL: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { state = TOKEN_STATE_INVALID; goto state; } else { result.tag = TOKEN_TAG_INVALID; } break; case '\n': result.tag = TOKEN_TAG_INVALID; break; case '\\': state = TOKEN_STATE_CHAR_LITERAL_BACKSLASH; goto state; case '\'': self->index++; break; case 0x01 ... 0x09: case 0x0b ... 0x1f: case 0x7f: state = TOKEN_STATE_INVALID; goto state; default: state = TOKEN_STATE_CHAR_LITERAL; goto state; } break; case TOKEN_STATE_CHAR_LITERAL_BACKSLASH: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { state = TOKEN_STATE_INVALID; goto state; } else { result.tag = TOKEN_TAG_INVALID; } break; case '\n': result.tag = TOKEN_TAG_INVALID; break; case 0x01 ... 0x09: case 0x0b ... 0x1f: case 0x7f: state = TOKEN_STATE_INVALID; goto state; default: state = TOKEN_STATE_CHAR_LITERAL; goto state; } break; case TOKEN_STATE_MULTILINE_STRING_LITERAL_LINE: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { state = TOKEN_STATE_INVALID; goto state; } break; case '\n': break; case '\r': if (self->buffer[self->index + 1] != '\n') { state = TOKEN_STATE_INVALID; goto state; } break; case 0x01 ... 0x09: case 0x0b ... 0x0c: case 0x0e ... 0x1f: case 0x7f: state = TOKEN_STATE_INVALID; goto state; default: state = TOKEN_STATE_MULTILINE_STRING_LITERAL_LINE; goto state; } break; case TOKEN_STATE_BANG: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_TAG_BANG_EQUAL; self->index++; break; default: result.tag = TOKEN_TAG_BANG; break; } break; case TOKEN_STATE_PIPE: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_TAG_PIPE_EQUAL; self->index++; break; case '|': result.tag = TOKEN_TAG_PIPE_PIPE; self->index++; break; default: result.tag = TOKEN_TAG_PIPE; break; } break; case TOKEN_STATE_EQUAL: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_TAG_EQUAL_EQUAL; self->index++; break; case '>': result.tag = TOKEN_TAG_EQUAL_ANGLE_BRACKET_RIGHT; self->index++; break; default: result.tag = TOKEN_TAG_EQUAL; break; } break; case TOKEN_STATE_MINUS: self->index++; switch (self->buffer[self->index]) { case '>': result.tag = TOKEN_TAG_ARROW; self->index++; break; case '=': result.tag = TOKEN_TAG_MINUS_EQUAL; self->index++; break; case '%': state = TOKEN_STATE_MINUS_PERCENT; goto state; case '|': state = TOKEN_STATE_MINUS_PIPE; goto state; default: result.tag = TOKEN_TAG_MINUS; break; } break; case TOKEN_STATE_MINUS_PERCENT: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_TAG_MINUS_PERCENT_EQUAL; self->index++; break; default: result.tag = TOKEN_TAG_MINUS_PERCENT; break; } break; case TOKEN_STATE_MINUS_PIPE: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_TAG_MINUS_PIPE_EQUAL; self->index++; break; default: result.tag = TOKEN_TAG_MINUS_PIPE; break; } break; case TOKEN_STATE_ANGLE_BRACKET_LEFT: self->index++; switch (self->buffer[self->index]) { case '<': state = TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT; goto state; case '=': result.tag = TOKEN_TAG_ANGLE_BRACKET_LEFT_EQUAL; self->index++; break; default: result.tag = TOKEN_TAG_ANGLE_BRACKET_LEFT; break; } break; case TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL; self->index++; break; case '|': state = TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE; goto state; default: result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT; break; } break; case TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL; self->index++; break; default: result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE; break; } break; case TOKEN_STATE_ANGLE_BRACKET_RIGHT: self->index++; switch (self->buffer[self->index]) { case '>': state = TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT; goto state; case '=': result.tag = TOKEN_TAG_ANGLE_BRACKET_RIGHT_EQUAL; self->index++; break; default: result.tag = TOKEN_TAG_ANGLE_BRACKET_RIGHT; break; } break; case TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT: self->index++; switch (self->buffer[self->index]) { case '=': result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL; self->index++; break; default: result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT; break; } break; case TOKEN_STATE_PERIOD: self->index++; switch (self->buffer[self->index]) { case '.': state = TOKEN_STATE_PERIOD_2; goto state; case '*': state = TOKEN_STATE_PERIOD_ASTERISK; goto state; default: result.tag = TOKEN_TAG_PERIOD; break; } break; case TOKEN_STATE_PERIOD_2: self->index++; switch (self->buffer[self->index]) { case '.': result.tag = TOKEN_TAG_ELLIPSIS3; self->index++; break; default: result.tag = TOKEN_TAG_ELLIPSIS2; break; } break; case TOKEN_STATE_PERIOD_ASTERISK: self->index++; switch (self->buffer[self->index]) { case '*': result.tag = TOKEN_TAG_INVALID_PERIODASTERISKS; break; default: result.tag = TOKEN_TAG_PERIOD_ASTERISK; break; } break; case TOKEN_STATE_SLASH: self->index++; switch (self->buffer[self->index]) { case '/': state = TOKEN_STATE_LINE_COMMENT_START; goto state; case '=': result.tag = TOKEN_TAG_SLASH_EQUAL; self->index++; break; default: result.tag = TOKEN_TAG_SLASH; break; } break; case TOKEN_STATE_LINE_COMMENT_START: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { state = TOKEN_STATE_INVALID; goto state; } else { return (token) { .tag = TOKEN_TAG_EOF, .loc = { .start = self->index, .end = self->index } }; } break; case '!': result.tag = TOKEN_TAG_CONTAINER_DOC_COMMENT; state = TOKEN_STATE_DOC_COMMENT; goto state; case '\n': self->index++; result.loc.start = self->index; state = TOKEN_STATE_START; goto state; case '/': state = TOKEN_STATE_DOC_COMMENT_START; goto state; case '\r': state = TOKEN_STATE_EXPECT_NEWLINE; goto state; case 0x01 ... 0x09: case 0x0b ... 0x0c: case 0x0e ... 0x1f: case 0x7f: state = TOKEN_STATE_INVALID; goto state; default: state = TOKEN_STATE_LINE_COMMENT; goto state; } break; case TOKEN_STATE_DOC_COMMENT_START: self->index++; switch (self->buffer[self->index]) { case 0: case '\n': result.tag = TOKEN_TAG_DOC_COMMENT; break; case '\r': if (self->buffer[self->index + 1] == '\n') { result.tag = TOKEN_TAG_DOC_COMMENT; } else { state = TOKEN_STATE_INVALID; goto state; } break; case '/': state = TOKEN_STATE_LINE_COMMENT; goto state; case 0x01 ... 0x09: case 0x0b ... 0x0c: case 0x0e ... 0x1f: case 0x7f: state = TOKEN_STATE_INVALID; goto state; default: result.tag = TOKEN_TAG_DOC_COMMENT; state = TOKEN_STATE_DOC_COMMENT; goto state; } break; case TOKEN_STATE_LINE_COMMENT: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { state = TOKEN_STATE_INVALID; goto state; } else { return (token) { .tag = TOKEN_TAG_EOF, .loc = { .start = self->index, .end = self->index } }; } break; case '\n': self->index++; result.loc.start = self->index; state = TOKEN_STATE_START; goto state; case '\r': state = TOKEN_STATE_EXPECT_NEWLINE; goto state; case 0x01 ... 0x09: case 0x0b ... 0x0c: case 0x0e ... 0x1f: case 0x7f: state = TOKEN_STATE_INVALID; goto state; default: state = TOKEN_STATE_LINE_COMMENT; goto state; } break; case TOKEN_STATE_DOC_COMMENT: self->index++; switch (self->buffer[self->index]) { case 0: case '\n': break; case '\r': if (self->buffer[self->index + 1] != '\n') { state = TOKEN_STATE_INVALID; goto state; } break; case 0x01 ... 0x09: case 0x0b ... 0x0c: case 0x0e ... 0x1f: case 0x7f: state = TOKEN_STATE_INVALID; goto state; default: state = TOKEN_STATE_DOC_COMMENT; goto state; } break; case TOKEN_STATE_INT: switch (self->buffer[self->index]) { case '.': state = TOKEN_STATE_INT_PERIOD; goto state; case '_': case 'a' ... 'd': case 'f' ... 'o': case 'q' ... 'z': case 'A' ... 'D': case 'F' ... 'O': case 'Q' ... 'Z': case '0' ... '9': self->index++; state = TOKEN_STATE_INT; goto state; case 'e': case 'E': case 'p': case 'P': state = TOKEN_STATE_INT_EXPONENT; goto state; default: break; } break; case TOKEN_STATE_INT_EXPONENT: self->index++; switch (self->buffer[self->index]) { case '-': case '+': self->index++; state = TOKEN_STATE_FLOAT; goto state; default: state = TOKEN_STATE_INT; goto state; } break; case TOKEN_STATE_INT_PERIOD: self->index++; switch (self->buffer[self->index]) { case '_': case 'a' ... 'd': case 'f' ... 'o': case 'q' ... 'z': case 'A' ... 'D': case 'F' ... 'O': case 'Q' ... 'Z': case '0' ... '9': self->index++; state = TOKEN_STATE_FLOAT; goto state; case 'e': case 'E': case 'p': case 'P': state = TOKEN_STATE_FLOAT_EXPONENT; goto state; default: self->index--; break; } break; case TOKEN_STATE_FLOAT: switch (self->buffer[self->index]) { case '_': case 'a' ... 'd': case 'f' ... 'o': case 'q' ... 'z': case 'A' ... 'D': case 'F' ... 'O': case 'Q' ... 'Z': case '0' ... '9': self->index++; state = TOKEN_STATE_FLOAT; goto state; case 'e': case 'E': case 'p': case 'P': state = TOKEN_STATE_FLOAT_EXPONENT; goto state; default: break; } break; case TOKEN_STATE_FLOAT_EXPONENT: self->index++; switch (self->buffer[self->index]) { case '-': case '+': self->index++; state = TOKEN_STATE_FLOAT; goto state; default: state = TOKEN_STATE_FLOAT; goto state; } break; } result.loc.end = self->index; return result; }