making parser
This commit is contained in:
24
tokenizer.c
24
tokenizer.c
@@ -7,10 +7,10 @@
|
||||
|
||||
typedef struct {
|
||||
const char* keyword;
|
||||
tokenizer_tag tag;
|
||||
} keyword_map;
|
||||
tokenizerTag tag;
|
||||
} keywordMap;
|
||||
|
||||
const keyword_map keywords[] = {
|
||||
const keywordMap keywords[] = {
|
||||
{ "addrspace", TOKENIZER_TAG_KEYWORD_ADDRSPACE },
|
||||
{ "align", TOKENIZER_TAG_KEYWORD_ALIGN },
|
||||
{ "allowzero", TOKENIZER_TAG_KEYWORD_ALLOWZERO },
|
||||
@@ -63,9 +63,9 @@ const keyword_map keywords[] = {
|
||||
};
|
||||
|
||||
// TODO binary search
|
||||
static tokenizer_tag get_keyword(const char* bytes, const uint32_t len)
|
||||
static tokenizerTag get_keyword(const char* bytes, const uint32_t len)
|
||||
{
|
||||
for (unsigned long i = 0; i < sizeof(keywords) / sizeof(keyword_map); i++) {
|
||||
for (unsigned long i = 0; i < sizeof(keywords) / sizeof(keywordMap); i++) {
|
||||
size_t klen = strlen(keywords[i].keyword);
|
||||
size_t minlen = klen < len ? klen : len;
|
||||
int cmp = strncmp(bytes, keywords[i].keyword, minlen);
|
||||
@@ -91,16 +91,16 @@ tokenizer tokenizer_init(const char* buffer, const uint32_t len)
|
||||
};
|
||||
}
|
||||
|
||||
tokenizer_token tokenizer_next(tokenizer* self)
|
||||
tokenizerToken tokenizer_next(tokenizer* self)
|
||||
{
|
||||
tokenizer_token result = (tokenizer_token) {
|
||||
tokenizerToken result = (tokenizerToken) {
|
||||
.tag = TOKENIZER_TAG_INVALID,
|
||||
.loc = {
|
||||
.start = 0,
|
||||
},
|
||||
};
|
||||
|
||||
tokenizer_state state = TOKENIZER_STATE_START;
|
||||
tokenizerState state = TOKENIZER_STATE_START;
|
||||
|
||||
state:
|
||||
switch (state) {
|
||||
@@ -108,7 +108,7 @@ state:
|
||||
switch (self->buffer[self->index]) {
|
||||
case 0:
|
||||
if (self->index == self->buffer_len) {
|
||||
return (tokenizer_token) {
|
||||
return (tokenizerToken) {
|
||||
.tag = TOKENIZER_TAG_EOF,
|
||||
.loc = {
|
||||
.start = self->index,
|
||||
@@ -455,7 +455,7 @@ state:
|
||||
default:; // Once we're at C23, this semicolon can be removed.
|
||||
const char* start = self->buffer + result.loc.start;
|
||||
uint32_t len = self->index - result.loc.start;
|
||||
tokenizer_tag tag = get_keyword(start, len);
|
||||
tokenizerTag tag = get_keyword(start, len);
|
||||
if (tag != TOKENIZER_TAG_INVALID) {
|
||||
result.tag = tag;
|
||||
}
|
||||
@@ -856,7 +856,7 @@ state:
|
||||
state = TOKENIZER_STATE_INVALID;
|
||||
goto state;
|
||||
} else {
|
||||
return (tokenizer_token) {
|
||||
return (tokenizerToken) {
|
||||
.tag = TOKENIZER_TAG_EOF,
|
||||
.loc = {
|
||||
.start = self->index,
|
||||
@@ -930,7 +930,7 @@ state:
|
||||
state = TOKENIZER_STATE_INVALID;
|
||||
goto state;
|
||||
} else {
|
||||
return (tokenizer_token) {
|
||||
return (tokenizerToken) {
|
||||
.tag = TOKENIZER_TAG_EOF,
|
||||
.loc = {
|
||||
.start = self->index,
|
||||
|
||||
Reference in New Issue
Block a user