This commit is contained in:
2024-12-22 22:40:01 +02:00
parent db35aa7722
commit a9c4b5c7ab
7 changed files with 253 additions and 205 deletions

108
ast.c
View File

@@ -4,85 +4,77 @@
#include <stdlib.h>
#include "ast.h"
#include "common.h"
#include "parser.h"
#define N 1024
ast astParse(const char* source, const uint32_t len)
Ast astParse(const char* source, const uint32_t len)
{
uint32_t estimated_token_count = len / 8;
TokenizerTag* token_tags = NULL;
AstIndex* token_starts = NULL;
AstNodeTag* nodes_tags = NULL;
AstTokenIndex* main_tokens = NULL;
AstData* nodes_datas = NULL;
AstNodeIndex* extra_data_arr = NULL;
AstNodeIndex* scratch_arr = NULL;
if (!(token_tags = calloc(estimated_token_count, sizeof(TokenizerTag))))
exit(1);
if (!(token_starts = calloc(estimated_token_count, sizeof(AstIndex))))
exit(1);
// Initialize token list
AstTokenList tokens = {
.len = 0,
.cap = estimated_token_count,
.tags = SLICE_INIT(TokenizerTag, estimated_token_count),
.starts = SLICE_INIT(AstIndex, estimated_token_count)
};
// Tokenize
Tokenizer tok = tokenizerInit(source, len);
uint32_t tokens_len = 0;
for (; tokens_len <= estimated_token_count; tokens_len++) {
if (tokens_len == estimated_token_count) {
while (true) {
if (tokens.len >= tokens.cap) {
fprintf(stderr, "too many tokens, bump estimated_token_count\n");
exit(1);
}
TokenizerToken token = tokenizerNext(&tok);
token_tags[tokens_len] = token.tag;
token_starts[tokens_len] = token.loc.start;
tokens.tags[tokens.len] = token.tag;
tokens.starts[tokens.len] = token.loc.start;
tokens.len++;
if (token.tag == TOKENIZER_TAG_EOF)
break;
}
uint32_t estimated_node_count = (tokens_len + 2) / 2;
if (!(nodes_tags = calloc(estimated_node_count, sizeof(AstNodeTag))))
exit(1);
if (!(main_tokens = calloc(estimated_node_count, sizeof(AstTokenIndex))))
exit(1);
if (!(nodes_datas = calloc(estimated_node_count, sizeof(AstData))))
exit(1);
if (!(extra_data_arr = calloc(N, sizeof(AstNodeIndex))))
exit(1);
if (!(scratch_arr = calloc(N, sizeof(AstNodeIndex))))
exit(1);
Parser p = (Parser) {
.source = source,
.source_len = len,
.token_tags = token_tags,
.token_starts = token_starts,
.tokens_len = tokens_len,
.tok_i = 0,
.nodes = (AstNodeList) {
.len = 0,
.cap = estimated_node_count,
.tags = nodes_tags,
.main_tokens = main_tokens,
.datas = nodes_datas,
},
.extra_data = (ParserNodeIndexSlice) { .len = 0, .cap = N, .arr = extra_data_arr },
.scratch = (ParserNodeIndexSlice) { .len = 0, .cap = N, .arr = scratch_arr },
// Initialize node list
uint32_t estimated_node_count = (tokens.len + 2) / 2;
AstNodeList nodes = {
.len = 0,
.cap = estimated_node_count,
.tags = SLICE_INIT(AstNodeTag, estimated_node_count),
.main_tokens = SLICE_INIT(AstTokenIndex, estimated_node_count),
.datas = SLICE_INIT(AstData, estimated_node_count)
};
free(scratch_arr);
// Initialize parser
Parser p = {
.source = source,
.source_len = len,
.token_tags = tokens.tags,
.token_starts = tokens.starts,
.tokens_len = tokens.len,
.tok_i = 0,
.nodes = nodes,
.extra_data = {
.len = 0,
.cap = N,
.arr = SLICE_INIT(AstNodeIndex, N) },
.scratch = { .len = 0, .cap = N, .arr = SLICE_INIT(AstNodeIndex, N) }
};
free(p.scratch.arr); // Parser takes ownership
parseRoot(&p);
return (ast) {
return (Ast) {
.source = source,
.tokens.tags = token_tags,
.tokens.starts = token_starts,
.source_len = len,
.tokens = tokens,
.nodes = p.nodes,
.extra_data = p.extra_data.arr,
.extra_data_len = p.extra_data.len,
.extra_data = {
.len = p.extra_data.len,
.cap = p.extra_data.cap,
.arr = p.extra_data.arr,
},
};
}