Files
zig0/tokenizer.c
2024-12-13 09:39:02 +02:00

1098 lines
29 KiB
C

// tokenizer for zig d48611ba67c7871cb348f28a01b89d8771170dd8
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include "tokenizer.h"
typedef struct {
const char* keyword;
token_tag tag;
} keyword_map;
const keyword_map keywords[] = {
{ "addrspace", TOKEN_TAG_KEYWORD_ADDRSPACE },
{ "align", TOKEN_TAG_KEYWORD_ALIGN },
{ "allowzero", TOKEN_TAG_KEYWORD_ALLOWZERO },
{ "and", TOKEN_TAG_KEYWORD_AND },
{ "anyframe", TOKEN_TAG_KEYWORD_ANYFRAME },
{ "anytype", TOKEN_TAG_KEYWORD_ANYTYPE },
{ "asm", TOKEN_TAG_KEYWORD_ASM },
{ "async", TOKEN_TAG_KEYWORD_ASYNC },
{ "await", TOKEN_TAG_KEYWORD_AWAIT },
{ "break", TOKEN_TAG_KEYWORD_BREAK },
{ "callconv", TOKEN_TAG_KEYWORD_CALLCONV },
{ "catch", TOKEN_TAG_KEYWORD_CATCH },
{ "comptime", TOKEN_TAG_KEYWORD_COMPTIME },
{ "const", TOKEN_TAG_KEYWORD_CONST },
{ "continue", TOKEN_TAG_KEYWORD_CONTINUE },
{ "defer", TOKEN_TAG_KEYWORD_DEFER },
{ "else", TOKEN_TAG_KEYWORD_ELSE },
{ "enum", TOKEN_TAG_KEYWORD_ENUM },
{ "errdefer", TOKEN_TAG_KEYWORD_ERRDEFER },
{ "error", TOKEN_TAG_KEYWORD_ERROR },
{ "export", TOKEN_TAG_KEYWORD_EXPORT },
{ "extern", TOKEN_TAG_KEYWORD_EXTERN },
{ "fn", TOKEN_TAG_KEYWORD_FN },
{ "for", TOKEN_TAG_KEYWORD_FOR },
{ "if", TOKEN_TAG_KEYWORD_IF },
{ "inline", TOKEN_TAG_KEYWORD_INLINE },
{ "linksection", TOKEN_TAG_KEYWORD_LINKSECTION },
{ "noalias", TOKEN_TAG_KEYWORD_NOALIAS },
{ "noinline", TOKEN_TAG_KEYWORD_NOINLINE },
{ "nosuspend", TOKEN_TAG_KEYWORD_NOSUSPEND },
{ "opaque", TOKEN_TAG_KEYWORD_OPAQUE },
{ "or", TOKEN_TAG_KEYWORD_OR },
{ "orelse", TOKEN_TAG_KEYWORD_ORELSE },
{ "packed", TOKEN_TAG_KEYWORD_PACKED },
{ "pub", TOKEN_TAG_KEYWORD_PUB },
{ "resume", TOKEN_TAG_KEYWORD_RESUME },
{ "return", TOKEN_TAG_KEYWORD_RETURN },
{ "struct", TOKEN_TAG_KEYWORD_STRUCT },
{ "suspend", TOKEN_TAG_KEYWORD_SUSPEND },
{ "switch", TOKEN_TAG_KEYWORD_SWITCH },
{ "test", TOKEN_TAG_KEYWORD_TEST },
{ "threadlocal", TOKEN_TAG_KEYWORD_THREADLOCAL },
{ "try", TOKEN_TAG_KEYWORD_TRY },
{ "union", TOKEN_TAG_KEYWORD_UNION },
{ "unreachable", TOKEN_TAG_KEYWORD_UNREACHABLE },
{ "usingnamespace", TOKEN_TAG_KEYWORD_USINGNAMESPACE },
{ "var", TOKEN_TAG_KEYWORD_VAR },
{ "volatile", TOKEN_TAG_KEYWORD_VOLATILE },
{ "while", TOKEN_TAG_KEYWORD_WHILE }
};
// TODO binary search
static token_tag get_keyword(const char* bytes, uint32_t len)
{
for (unsigned long i = 0; i < sizeof(keywords) / sizeof(keyword_map); i++) {
size_t klen = strlen(keywords[i].keyword);
size_t minlen = klen < len ? klen : len;
int cmp = strncmp(bytes, keywords[i].keyword, minlen);
if (cmp == 0) {
if (len == klen) {
return keywords[i].tag;
} else {
return TOKEN_TAG_INVALID;
}
} else if (cmp < 0) {
return TOKEN_TAG_INVALID;
}
continue;
}
return TOKEN_TAG_INVALID;
}
tokenizer tokenizer_init(const char* buffer, uint32_t len)
{
return (tokenizer) {
.buffer = buffer,
.buffer_len = len,
.index = (len >= 3 && memcmp(buffer, "\xEF\xBB\xBF", 3) == 0) ? 3 : 0,
};
}
token tokenizer_next(tokenizer* self)
{
token result = (token) {
.tag = TOKEN_TAG_INVALID,
.loc = {
.start = 0,
},
};
token_state state = TOKEN_STATE_START;
state:
switch (state) {
case TOKEN_STATE_START:
switch (self->buffer[self->index]) {
case 0:
if (self->index == self->buffer_len) {
return (token) {
.tag = TOKEN_TAG_EOF,
.loc = {
.start = self->index,
.end = self->index,
}
};
} else {
state = TOKEN_STATE_INVALID;
goto state;
}
case ' ':
case '\n':
case '\t':
case '\r':
self->index++;
result.loc.start = self->index;
goto state;
case '"':
result.tag = TOKEN_TAG_STRING_LITERAL;
state = TOKEN_STATE_STRING_LITERAL;
goto state;
case '\'':
result.tag = TOKEN_TAG_CHAR_LITERAL;
state = TOKEN_STATE_CHAR_LITERAL;
goto state;
case 'a' ... 'z':
case 'A' ... 'Z':
case '_':
result.tag = TOKEN_TAG_IDENTIFIER;
state = TOKEN_STATE_IDENTIFIER;
goto state;
case '@':
state = TOKEN_STATE_SAW_AT_SIGN;
goto state;
case '=':
state = TOKEN_STATE_EQUAL;
goto state;
case '!':
state = TOKEN_STATE_BANG;
goto state;
case '|':
state = TOKEN_STATE_PIPE;
goto state;
case '(':
result.tag = TOKEN_TAG_L_PAREN;
self->index++;
break;
case ')':
result.tag = TOKEN_TAG_R_PAREN;
self->index++;
break;
case '[':
result.tag = TOKEN_TAG_L_BRACKET;
self->index++;
break;
case ']':
result.tag = TOKEN_TAG_R_BRACKET;
self->index++;
break;
case ';':
result.tag = TOKEN_TAG_SEMICOLON;
self->index++;
break;
case ',':
result.tag = TOKEN_TAG_COMMA;
self->index++;
break;
case '?':
result.tag = TOKEN_TAG_QUESTION_MARK;
self->index++;
break;
case ':':
result.tag = TOKEN_TAG_COLON;
self->index++;
break;
case '%':
state = TOKEN_STATE_PERCENT;
goto state;
case '*':
state = TOKEN_STATE_ASTERISK;
goto state;
case '+':
state = TOKEN_STATE_PLUS;
goto state;
case '<':
state = TOKEN_STATE_ANGLE_BRACKET_LEFT;
goto state;
case '>':
state = TOKEN_STATE_ANGLE_BRACKET_RIGHT;
goto state;
case '^':
state = TOKEN_STATE_CARET;
goto state;
case '\\':
result.tag = TOKEN_TAG_MULTILINE_STRING_LITERAL_LINE;
state = TOKEN_STATE_BACKSLASH;
goto state;
case '{':
result.tag = TOKEN_TAG_L_BRACE;
self->index++;
break;
case '}':
result.tag = TOKEN_TAG_R_BRACE;
self->index++;
break;
case '~':
result.tag = TOKEN_TAG_TILDE;
self->index++;
break;
case '.':
state = TOKEN_STATE_PERIOD;
goto state;
case '-':
state = TOKEN_STATE_MINUS;
goto state;
case '/':
state = TOKEN_STATE_SLASH;
goto state;
case '&':
state = TOKEN_STATE_AMPERSAND;
goto state;
case '0' ... '9':
result.tag = TOKEN_TAG_NUMBER_LITERAL;
self->index++;
state = TOKEN_STATE_INT;
goto state;
default:
state = TOKEN_STATE_INVALID;
goto state;
};
break;
case TOKEN_STATE_EXPECT_NEWLINE:
self->index++;
switch (self->buffer[self->index]) {
case 0:
if (self->index == self->buffer_len) {
result.tag = TOKEN_TAG_INVALID;
} else {
state = TOKEN_STATE_INVALID;
goto state;
}
break;
case '\n':
self->index++;
result.loc.start = self->index;
state = TOKEN_STATE_START;
goto state;
default:
state = TOKEN_STATE_INVALID;
goto state;
}
break;
case TOKEN_STATE_INVALID:
self->index++;
switch (self->buffer[self->index]) {
case 0:
if (self->index == self->buffer_len) {
result.tag = TOKEN_TAG_INVALID;
} else {
state = TOKEN_STATE_INVALID;
goto state;
}
break;
case '\n':
result.tag = TOKEN_TAG_INVALID;
break;
default:
state = TOKEN_STATE_INVALID;
goto state;
}
break;
case TOKEN_STATE_SAW_AT_SIGN:
self->index++;
switch (self->buffer[self->index]) {
case 0:
case '\n':
result.tag = TOKEN_TAG_INVALID;
break;
case '"':
result.tag = TOKEN_TAG_IDENTIFIER;
state = TOKEN_STATE_STRING_LITERAL;
goto state;
case 'a' ... 'z':
case 'A' ... 'Z':
case '_':
result.tag = TOKEN_TAG_BUILTIN;
state = TOKEN_STATE_BUILTIN;
goto state;
default:
state = TOKEN_STATE_INVALID;
goto state;
}
break;
case TOKEN_STATE_AMPERSAND:
self->index++;
switch (self->buffer[self->index]) {
case '=':
result.tag = TOKEN_TAG_AMPERSAND_EQUAL;
self->index++;
break;
default:
result.tag = TOKEN_TAG_AMPERSAND;
break;
}
break;
case TOKEN_STATE_ASTERISK:
self->index++;
switch (self->buffer[self->index]) {
case '=':
result.tag = TOKEN_TAG_ASTERISK_EQUAL;
self->index++;
break;
case '*':
result.tag = TOKEN_TAG_ASTERISK_ASTERISK;
self->index++;
break;
case '%':
state = TOKEN_STATE_ASTERISK_PERCENT;
goto state;
case '|':
state = TOKEN_STATE_ASTERISK_PIPE;
goto state;
default:
result.tag = TOKEN_TAG_ASTERISK;
break;
}
break;
case TOKEN_STATE_ASTERISK_PERCENT:
self->index++;
switch (self->buffer[self->index]) {
case '=':
result.tag = TOKEN_TAG_ASTERISK_PERCENT_EQUAL;
self->index++;
break;
default:
result.tag = TOKEN_TAG_ASTERISK_PERCENT;
break;
}
break;
case TOKEN_STATE_ASTERISK_PIPE:
self->index++;
switch (self->buffer[self->index]) {
case '=':
result.tag = TOKEN_TAG_ASTERISK_PIPE_EQUAL;
self->index++;
break;
default:
result.tag = TOKEN_TAG_ASTERISK_PIPE;
break;
}
break;
case TOKEN_STATE_PERCENT:
self->index++;
switch (self->buffer[self->index]) {
case '=':
result.tag = TOKEN_TAG_PERCENT_EQUAL;
self->index++;
break;
default:
result.tag = TOKEN_TAG_PERCENT;
break;
}
break;
case TOKEN_STATE_PLUS:
self->index++;
switch (self->buffer[self->index]) {
case '=':
result.tag = TOKEN_TAG_PLUS_EQUAL;
self->index++;
break;
case '+':
result.tag = TOKEN_TAG_PLUS_PLUS;
self->index++;
break;
case '%':
state = TOKEN_STATE_PLUS_PERCENT;
goto state;
case '|':
state = TOKEN_STATE_PLUS_PIPE;
goto state;
default:
result.tag = TOKEN_TAG_PLUS;
break;
}
break;
case TOKEN_STATE_PLUS_PERCENT:
self->index++;
switch (self->buffer[self->index]) {
case '=':
result.tag = TOKEN_TAG_PLUS_PERCENT_EQUAL;
self->index++;
break;
default:
result.tag = TOKEN_TAG_PLUS_PERCENT;
break;
}
break;
case TOKEN_STATE_PLUS_PIPE:
self->index++;
switch (self->buffer[self->index]) {
case '=':
result.tag = TOKEN_TAG_PLUS_PIPE_EQUAL;
self->index++;
break;
default:
result.tag = TOKEN_TAG_PLUS_PIPE;
break;
}
break;
case TOKEN_STATE_CARET:
self->index++;
switch (self->buffer[self->index]) {
case '=':
result.tag = TOKEN_TAG_CARET_EQUAL;
self->index++;
break;
default:
result.tag = TOKEN_TAG_CARET;
break;
}
break;
case TOKEN_STATE_IDENTIFIER:
self->index++;
switch (self->buffer[self->index]) {
case 'a' ... 'z':
case 'A' ... 'Z':
case '_':
case '0' ... '9':
state = TOKEN_STATE_IDENTIFIER;
goto state;
default:; // Once we're at C23, this semicolon can be removed.
const char* start = self->buffer + result.loc.start;
uint32_t len = self->index - result.loc.start;
token_tag tag = get_keyword(start, len);
if (tag != TOKEN_TAG_INVALID) {
result.tag = tag;
}
}
break;
case TOKEN_STATE_BUILTIN:
self->index++;
switch (self->buffer[self->index]) {
case 'a' ... 'z':
case 'A' ... 'Z':
case '_':
case '0' ... '9':
state = TOKEN_STATE_BUILTIN;
goto state;
break;
}
break;
case TOKEN_STATE_BACKSLASH:
self->index++;
switch (self->buffer[self->index]) {
case 0:
result.tag = TOKEN_TAG_INVALID;
break;
case '\\':
state = TOKEN_STATE_MULTILINE_STRING_LITERAL_LINE;
goto state;
case '\n':
result.tag = TOKEN_TAG_INVALID;
break;
default:
state = TOKEN_STATE_INVALID;
goto state;
}
break;
case TOKEN_STATE_STRING_LITERAL:
self->index++;
switch (self->buffer[self->index]) {
case 0:
if (self->index != self->buffer_len) {
state = TOKEN_STATE_INVALID;
goto state;
} else {
result.tag = TOKEN_TAG_INVALID;
}
break;
case '\n':
result.tag = TOKEN_TAG_INVALID;
break;
case '\\':
state = TOKEN_STATE_STRING_LITERAL_BACKSLASH;
goto state;
case '"':
self->index++;
break;
case 0x01 ... 0x09:
case 0x0b ... 0x1f:
case 0x7f:
state = TOKEN_STATE_INVALID;
goto state;
default:
state = TOKEN_STATE_STRING_LITERAL;
goto state;
}
break;
case TOKEN_STATE_STRING_LITERAL_BACKSLASH:
self->index++;
switch (self->buffer[self->index]) {
case 0:
case '\n':
result.tag = TOKEN_TAG_INVALID;
break;
default:
state = TOKEN_STATE_STRING_LITERAL;
goto state;
}
break;
case TOKEN_STATE_CHAR_LITERAL:
self->index++;
switch (self->buffer[self->index]) {
case 0:
if (self->index != self->buffer_len) {
state = TOKEN_STATE_INVALID;
goto state;
} else {
result.tag = TOKEN_TAG_INVALID;
}
break;
case '\n':
result.tag = TOKEN_TAG_INVALID;
break;
case '\\':
state = TOKEN_STATE_CHAR_LITERAL_BACKSLASH;
goto state;
case '\'':
self->index++;
break;
case 0x01 ... 0x09:
case 0x0b ... 0x1f:
case 0x7f:
state = TOKEN_STATE_INVALID;
goto state;
default:
state = TOKEN_STATE_CHAR_LITERAL;
goto state;
}
break;
case TOKEN_STATE_CHAR_LITERAL_BACKSLASH:
self->index++;
switch (self->buffer[self->index]) {
case 0:
if (self->index != self->buffer_len) {
state = TOKEN_STATE_INVALID;
goto state;
} else {
result.tag = TOKEN_TAG_INVALID;
}
break;
case '\n':
result.tag = TOKEN_TAG_INVALID;
break;
case 0x01 ... 0x09:
case 0x0b ... 0x1f:
case 0x7f:
state = TOKEN_STATE_INVALID;
goto state;
default:
state = TOKEN_STATE_CHAR_LITERAL;
goto state;
}
break;
case TOKEN_STATE_MULTILINE_STRING_LITERAL_LINE:
self->index++;
switch (self->buffer[self->index]) {
case 0:
if (self->index != self->buffer_len) {
state = TOKEN_STATE_INVALID;
goto state;
}
break;
case '\n':
break;
case '\r':
if (self->buffer[self->index + 1] != '\n') {
state = TOKEN_STATE_INVALID;
goto state;
}
break;
case 0x01 ... 0x09:
case 0x0b ... 0x0c:
case 0x0e ... 0x1f:
case 0x7f:
state = TOKEN_STATE_INVALID;
goto state;
default:
state = TOKEN_STATE_MULTILINE_STRING_LITERAL_LINE;
goto state;
}
break;
case TOKEN_STATE_BANG:
self->index++;
switch (self->buffer[self->index]) {
case '=':
result.tag = TOKEN_TAG_BANG_EQUAL;
self->index++;
break;
default:
result.tag = TOKEN_TAG_BANG;
break;
}
break;
case TOKEN_STATE_PIPE:
self->index++;
switch (self->buffer[self->index]) {
case '=':
result.tag = TOKEN_TAG_PIPE_EQUAL;
self->index++;
break;
case '|':
result.tag = TOKEN_TAG_PIPE_PIPE;
self->index++;
break;
default:
result.tag = TOKEN_TAG_PIPE;
break;
}
break;
case TOKEN_STATE_EQUAL:
self->index++;
switch (self->buffer[self->index]) {
case '=':
result.tag = TOKEN_TAG_EQUAL_EQUAL;
self->index++;
break;
case '>':
result.tag = TOKEN_TAG_EQUAL_ANGLE_BRACKET_RIGHT;
self->index++;
break;
default:
result.tag = TOKEN_TAG_EQUAL;
break;
}
break;
case TOKEN_STATE_MINUS:
self->index++;
switch (self->buffer[self->index]) {
case '>':
result.tag = TOKEN_TAG_ARROW;
self->index++;
break;
case '=':
result.tag = TOKEN_TAG_MINUS_EQUAL;
self->index++;
break;
case '%':
state = TOKEN_STATE_MINUS_PERCENT;
goto state;
case '|':
state = TOKEN_STATE_MINUS_PIPE;
goto state;
default:
result.tag = TOKEN_TAG_MINUS;
break;
}
break;
case TOKEN_STATE_MINUS_PERCENT:
self->index++;
switch (self->buffer[self->index]) {
case '=':
result.tag = TOKEN_TAG_MINUS_PERCENT_EQUAL;
self->index++;
break;
default:
result.tag = TOKEN_TAG_MINUS_PERCENT;
break;
}
break;
case TOKEN_STATE_MINUS_PIPE:
self->index++;
switch (self->buffer[self->index]) {
case '=':
result.tag = TOKEN_TAG_MINUS_PIPE_EQUAL;
self->index++;
break;
default:
result.tag = TOKEN_TAG_MINUS_PIPE;
break;
}
break;
case TOKEN_STATE_ANGLE_BRACKET_LEFT:
self->index++;
switch (self->buffer[self->index]) {
case '<':
state = TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT;
goto state;
case '=':
result.tag = TOKEN_TAG_ANGLE_BRACKET_LEFT_EQUAL;
self->index++;
break;
default:
result.tag = TOKEN_TAG_ANGLE_BRACKET_LEFT;
break;
}
break;
case TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT:
self->index++;
switch (self->buffer[self->index]) {
case '=':
result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL;
self->index++;
break;
case '|':
state = TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE;
goto state;
default:
result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT;
break;
}
break;
case TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE:
self->index++;
switch (self->buffer[self->index]) {
case '=':
result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL;
self->index++;
break;
default:
result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE;
break;
}
break;
case TOKEN_STATE_ANGLE_BRACKET_RIGHT:
self->index++;
switch (self->buffer[self->index]) {
case '>':
state = TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT;
goto state;
case '=':
result.tag = TOKEN_TAG_ANGLE_BRACKET_RIGHT_EQUAL;
self->index++;
break;
default:
result.tag = TOKEN_TAG_ANGLE_BRACKET_RIGHT;
break;
}
break;
case TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT:
self->index++;
switch (self->buffer[self->index]) {
case '=':
result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL;
self->index++;
break;
default:
result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT;
break;
}
break;
case TOKEN_STATE_PERIOD:
self->index++;
switch (self->buffer[self->index]) {
case '.':
state = TOKEN_STATE_PERIOD_2;
goto state;
case '*':
state = TOKEN_STATE_PERIOD_ASTERISK;
goto state;
default:
result.tag = TOKEN_TAG_PERIOD;
break;
}
break;
case TOKEN_STATE_PERIOD_2:
self->index++;
switch (self->buffer[self->index]) {
case '.':
result.tag = TOKEN_TAG_ELLIPSIS3;
self->index++;
break;
default:
result.tag = TOKEN_TAG_ELLIPSIS2;
break;
}
break;
case TOKEN_STATE_PERIOD_ASTERISK:
self->index++;
switch (self->buffer[self->index]) {
case '*':
result.tag = TOKEN_TAG_INVALID_PERIODASTERISKS;
break;
default:
result.tag = TOKEN_TAG_PERIOD_ASTERISK;
break;
}
break;
case TOKEN_STATE_SLASH:
self->index++;
switch (self->buffer[self->index]) {
case '/':
state = TOKEN_STATE_LINE_COMMENT_START;
goto state;
case '=':
result.tag = TOKEN_TAG_SLASH_EQUAL;
self->index++;
break;
default:
result.tag = TOKEN_TAG_SLASH;
break;
}
break;
case TOKEN_STATE_LINE_COMMENT_START:
self->index++;
switch (self->buffer[self->index]) {
case 0:
if (self->index != self->buffer_len) {
state = TOKEN_STATE_INVALID;
goto state;
} else {
return (token) {
.tag = TOKEN_TAG_EOF,
.loc = {
.start = self->index,
.end = self->index }
};
}
break;
case '!':
result.tag = TOKEN_TAG_CONTAINER_DOC_COMMENT;
state = TOKEN_STATE_DOC_COMMENT;
goto state;
case '\n':
self->index++;
result.loc.start = self->index;
state = TOKEN_STATE_START;
goto state;
case '/':
state = TOKEN_STATE_DOC_COMMENT_START;
goto state;
case '\r':
state = TOKEN_STATE_EXPECT_NEWLINE;
goto state;
case 0x01 ... 0x09:
case 0x0b ... 0x0c:
case 0x0e ... 0x1f:
case 0x7f:
state = TOKEN_STATE_INVALID;
goto state;
default:
state = TOKEN_STATE_LINE_COMMENT;
goto state;
}
break;
case TOKEN_STATE_DOC_COMMENT_START:
self->index++;
switch (self->buffer[self->index]) {
case 0:
case '\n':
result.tag = TOKEN_TAG_DOC_COMMENT;
break;
case '\r':
if (self->buffer[self->index + 1] == '\n') {
result.tag = TOKEN_TAG_DOC_COMMENT;
} else {
state = TOKEN_STATE_INVALID;
goto state;
}
break;
case '/':
state = TOKEN_STATE_LINE_COMMENT;
goto state;
case 0x01 ... 0x09:
case 0x0b ... 0x0c:
case 0x0e ... 0x1f:
case 0x7f:
state = TOKEN_STATE_INVALID;
goto state;
default:
result.tag = TOKEN_TAG_DOC_COMMENT;
state = TOKEN_STATE_DOC_COMMENT;
goto state;
}
break;
case TOKEN_STATE_LINE_COMMENT:
self->index++;
switch (self->buffer[self->index]) {
case 0:
if (self->index != self->buffer_len) {
state = TOKEN_STATE_INVALID;
goto state;
} else {
return (token) {
.tag = TOKEN_TAG_EOF,
.loc = {
.start = self->index,
.end = self->index }
};
}
break;
case '\n':
self->index++;
result.loc.start = self->index;
state = TOKEN_STATE_START;
goto state;
case '\r':
state = TOKEN_STATE_EXPECT_NEWLINE;
goto state;
case 0x01 ... 0x09:
case 0x0b ... 0x0c:
case 0x0e ... 0x1f:
case 0x7f:
state = TOKEN_STATE_INVALID;
goto state;
default:
state = TOKEN_STATE_LINE_COMMENT;
goto state;
}
break;
case TOKEN_STATE_DOC_COMMENT:
self->index++;
switch (self->buffer[self->index]) {
case 0:
case '\n':
break;
case '\r':
if (self->buffer[self->index + 1] != '\n') {
state = TOKEN_STATE_INVALID;
goto state;
}
break;
case 0x01 ... 0x09:
case 0x0b ... 0x0c:
case 0x0e ... 0x1f:
case 0x7f:
state = TOKEN_STATE_INVALID;
goto state;
default:
state = TOKEN_STATE_DOC_COMMENT;
goto state;
}
break;
case TOKEN_STATE_INT:
switch (self->buffer[self->index]) {
case '.':
state = TOKEN_STATE_INT_PERIOD;
goto state;
case '_':
case 'a' ... 'd':
case 'f' ... 'o':
case 'q' ... 'z':
case 'A' ... 'D':
case 'F' ... 'O':
case 'Q' ... 'Z':
case '0' ... '9':
self->index++;
state = TOKEN_STATE_INT;
goto state;
case 'e':
case 'E':
case 'p':
case 'P':
state = TOKEN_STATE_INT_EXPONENT;
goto state;
default:
break;
}
break;
case TOKEN_STATE_INT_EXPONENT:
self->index++;
switch (self->buffer[self->index]) {
case '-':
case '+':
self->index++;
state = TOKEN_STATE_FLOAT;
goto state;
default:
state = TOKEN_STATE_INT;
goto state;
}
break;
case TOKEN_STATE_INT_PERIOD:
self->index++;
switch (self->buffer[self->index]) {
case '_':
case 'a' ... 'd':
case 'f' ... 'o':
case 'q' ... 'z':
case 'A' ... 'D':
case 'F' ... 'O':
case 'Q' ... 'Z':
case '0' ... '9':
self->index++;
state = TOKEN_STATE_FLOAT;
goto state;
case 'e':
case 'E':
case 'p':
case 'P':
state = TOKEN_STATE_FLOAT_EXPONENT;
goto state;
default:
self->index--;
break;
}
break;
case TOKEN_STATE_FLOAT:
switch (self->buffer[self->index]) {
case '_':
case 'a' ... 'd':
case 'f' ... 'o':
case 'q' ... 'z':
case 'A' ... 'D':
case 'F' ... 'O':
case 'Q' ... 'Z':
case '0' ... '9':
self->index++;
state = TOKEN_STATE_FLOAT;
goto state;
case 'e':
case 'E':
case 'p':
case 'P':
state = TOKEN_STATE_FLOAT_EXPONENT;
goto state;
default:
break;
}
break;
case TOKEN_STATE_FLOAT_EXPONENT:
self->index++;
switch (self->buffer[self->index]) {
case '-':
case '+':
self->index++;
state = TOKEN_STATE_FLOAT;
goto state;
default:
state = TOKEN_STATE_FLOAT;
goto state;
}
break;
}
result.loc.end = self->index;
return result;
}