rename types

This commit is contained in:
2024-12-22 22:31:16 +02:00
parent 228b215259
commit db35aa7722
10 changed files with 112 additions and 127 deletions

72
ast.c
View File

@@ -8,74 +8,74 @@
#define N 1024
ast ast_parse(const char* source, const uint32_t len, int* err)
ast astParse(const char* source, const uint32_t len)
{
uint32_t estimated_token_count = len / 8;
tokenizerTag* token_tags = NULL;
astIndex* token_starts = NULL;
astNodeTag* nodes_tags = NULL;
astTokenIndex* main_tokens = NULL;
astData* nodes_datas = NULL;
astNodeIndex* extra_data_arr = NULL;
astNodeIndex* scratch_arr = NULL;
TokenizerTag* token_tags = NULL;
AstIndex* token_starts = NULL;
AstNodeTag* nodes_tags = NULL;
AstTokenIndex* main_tokens = NULL;
AstData* nodes_datas = NULL;
AstNodeIndex* extra_data_arr = NULL;
AstNodeIndex* scratch_arr = NULL;
if (!(token_tags = calloc(estimated_token_count, sizeof(tokenizerTag))))
goto err;
if (!(token_tags = calloc(estimated_token_count, sizeof(TokenizerTag))))
exit(1);
if (!(token_starts = calloc(estimated_token_count, sizeof(astIndex))))
goto err;
if (!(token_starts = calloc(estimated_token_count, sizeof(AstIndex))))
exit(1);
tokenizer tok = tokenizer_init(source, len);
Tokenizer tok = tokenizerInit(source, len);
uint32_t tokens_len = 0;
for (; tokens_len <= estimated_token_count; tokens_len++) {
if (tokens_len == estimated_token_count) {
fprintf(stderr, "too many tokens, bump estimated_token_count\n");
goto err;
exit(1);
}
tokenizerToken token = tokenizer_next(&tok);
TokenizerToken token = tokenizerNext(&tok);
token_tags[tokens_len] = token.tag;
token_starts[tokens_len] = token.loc.start;
}
uint32_t estimated_node_count = (tokens_len + 2) / 2;
if (!(nodes_tags = calloc(estimated_node_count, sizeof(astNodeTag))))
goto err;
if (!(nodes_tags = calloc(estimated_node_count, sizeof(AstNodeTag))))
exit(1);
if (!(main_tokens = calloc(estimated_node_count, sizeof(astTokenIndex))))
goto err;
if (!(main_tokens = calloc(estimated_node_count, sizeof(AstTokenIndex))))
exit(1);
if (!(nodes_datas = calloc(estimated_node_count, sizeof(astData))))
goto err;
if (!(nodes_datas = calloc(estimated_node_count, sizeof(AstData))))
exit(1);
if (!(extra_data_arr = calloc(N, sizeof(astNodeIndex))))
goto err;
if (!(extra_data_arr = calloc(N, sizeof(AstNodeIndex))))
exit(1);
if (!(scratch_arr = calloc(N, sizeof(astNodeIndex))))
goto err;
if (!(scratch_arr = calloc(N, sizeof(AstNodeIndex))))
exit(1);
parser p = (parser) {
Parser p = (Parser) {
.source = source,
.source_len = len,
.token_tags = token_tags,
.token_starts = token_starts,
.tokens_len = tokens_len,
.tok_i = 0,
.nodes = (astNodeList) {
.nodes = (AstNodeList) {
.len = 0,
.cap = estimated_node_count,
.tags = nodes_tags,
.main_tokens = main_tokens,
.datas = nodes_datas,
},
.extra_data = (parserNodeIndexSlice) { .len = 0, .cap = N, .arr = extra_data_arr },
.scratch = (parserNodeIndexSlice) { .len = 0, .cap = N, .arr = scratch_arr },
.extra_data = (ParserNodeIndexSlice) { .len = 0, .cap = N, .arr = extra_data_arr },
.scratch = (ParserNodeIndexSlice) { .len = 0, .cap = N, .arr = scratch_arr },
};
free(scratch_arr);
parse_root(&p);
parseRoot(&p);
return (ast) {
.source = source,
@@ -85,16 +85,4 @@ ast ast_parse(const char* source, const uint32_t len, int* err)
.extra_data = p.extra_data.arr,
.extra_data_len = p.extra_data.len,
};
err:
free(token_tags);
free(token_starts);
free(nodes_tags);
free(main_tokens);
free(nodes_datas);
free(extra_data_arr);
free(scratch_arr);
*err = 1;
return (ast) {};
}

30
ast.h
View File

@@ -479,23 +479,23 @@ typedef enum {
AST_NODE_TAG_ERROR_VALUE,
/// `lhs!rhs`. main_token is the `!`.
AST_NODE_TAG_ERROR_UNION,
} astNodeTag;
} AstNodeTag;
typedef int32_t astTokenIndex;
typedef uint32_t astNodeIndex;
typedef uint32_t astIndex;
typedef int32_t AstTokenIndex;
typedef uint32_t AstNodeIndex;
typedef uint32_t AstIndex;
typedef struct {
astIndex lhs, rhs;
} astData;
AstIndex lhs, rhs;
} AstData;
typedef struct {
uint32_t len;
uint32_t cap;
astNodeTag* tags;
astTokenIndex* main_tokens;
astData* datas;
} astNodeList;
AstNodeTag* tags;
AstTokenIndex* main_tokens;
AstData* datas;
} AstNodeList;
typedef struct {
const char* source;
@@ -503,16 +503,16 @@ typedef struct {
struct {
uint32_t len;
tokenizerTag* tags;
astIndex* starts;
TokenizerTag* tags;
AstIndex* starts;
} tokens;
astNodeList nodes;
AstNodeList nodes;
astNodeIndex* extra_data;
AstNodeIndex* extra_data;
uint32_t extra_data_len;
} ast;
ast ast_parse(const char* source, uint32_t len, int* err);
ast astParse(const char* source, uint32_t len);
#endif

View File

@@ -96,6 +96,7 @@ pub fn build(b: *std.Build) !void {
cppcheck.addArgs(&.{
"--quiet",
"--error-exitcode=1",
"--check-level=exhaustive",
"--enable=all",
"--suppress=missingIncludeSystem",
"--suppress=checkersReport",

6
main.c
View File

@@ -2,8 +2,8 @@
#include <stdio.h>
#include <stdlib.h>
int zig1_run(char* program, char** msg);
int zig1_run_file(char* fname, char** msg);
int zig1Run(char* program, char** msg);
int zig1RunFile(char* fname, char** msg);
static void usage(const char* argv0)
{
@@ -18,7 +18,7 @@ int main(int argc, char** argv)
}
char* msg;
switch (zig1_run_file(argv[1], &msg)) {
switch (zig1RunFile(argv[1], &msg)) {
case 0:
return 0;
break;

View File

@@ -1,12 +1,13 @@
#include <stdio.h>
#include <stdlib.h>
#include "parser.h"
typedef struct {
uint32_t len;
astNodeIndex lhs, rhs;
AstNodeIndex lhs, rhs;
bool trailing;
} members;
} Members;
typedef struct {
enum {
@@ -17,24 +18,31 @@ typedef struct {
union {
uint32_t end;
} payload;
} field_state;
} FieldState;
static astTokenIndex next_token(parser* p)
int parseRoot(Parser* p)
{
return ++p->tok_i;
p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT;
p->nodes.main_tokens[p->nodes.len] = 0;
// members root_members = parseContainerMembers(p);
return 0;
}
static astTokenIndex eat_token(parser* p, tokenizerTag tag)
static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; }
static AstTokenIndex eatToken(Parser* p, TokenizerTag tag)
{
return (p->token_tags[p->tok_i] == tag) ? next_token(p) : -1;
return (p->token_tags[p->tok_i] == tag) ? nextToken(p) : -1;
}
static members parse_container_members(parser* p)
static Members parseContainerMembers(Parser* p)
{
const uint32_t scratch_top = p->scratch.len;
members res = (members) {};
Members res = (Members) {};
// ast_token_index last_field;
while (eat_token(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT) != -1)
while (eatToken(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT) != -1)
;
// bool trailing = false;
@@ -166,25 +174,15 @@ static members parse_container_members(parser* p)
case TOKENIZER_TAG_KEYWORD_WHILE:;
const char* str = tokenizerTagString[p->token_tags[p->tok_i]];
fprintf(stderr, "keyword %s not implemented\n", str);
goto cleanup;
exit(1);
case TOKENIZER_TAG_KEYWORD_PUB:
p->tok_i++;
// AstNodeIndex top_level_decl = expectTopLevelDecl(*p);
break;
// TODO do work
}
}
cleanup:
p->scratch.len = scratch_top;
return res;
}
int parse_root(parser* p)
{
p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT;
p->nodes.main_tokens[p->nodes.len] = 0;
// members root_members = parse_container_members(p);
return 0;
}

View File

@@ -9,24 +9,24 @@
typedef struct {
uint32_t len;
uint32_t cap;
astNodeIndex* arr;
} parserNodeIndexSlice;
AstNodeIndex* arr;
} ParserNodeIndexSlice;
typedef struct {
const char* source;
const uint32_t source_len;
tokenizerTag* token_tags;
astIndex* token_starts;
TokenizerTag* token_tags;
AstIndex* token_starts;
uint32_t tokens_len;
astTokenIndex tok_i;
AstTokenIndex tok_i;
astNodeList nodes;
parserNodeIndexSlice extra_data;
parserNodeIndexSlice scratch;
} parser;
AstNodeList nodes;
ParserNodeIndexSlice extra_data;
ParserNodeIndexSlice scratch;
} Parser;
int parse_root(parser*);
int parseRoot(Parser*);
#endif

View File

@@ -7,10 +7,10 @@
typedef struct {
const char* keyword;
tokenizerTag tag;
} keywordMap;
TokenizerTag tag;
} KeywordMap;
const keywordMap keywords[] = {
const KeywordMap keywords[] = {
{ "addrspace", TOKENIZER_TAG_KEYWORD_ADDRSPACE },
{ "align", TOKENIZER_TAG_KEYWORD_ALIGN },
{ "allowzero", TOKENIZER_TAG_KEYWORD_ALLOWZERO },
@@ -63,9 +63,9 @@ const keywordMap keywords[] = {
};
// TODO binary search
static tokenizerTag get_keyword(const char* bytes, const uint32_t len)
static TokenizerTag getKeyword(const char* bytes, const uint32_t len)
{
for (unsigned long i = 0; i < sizeof(keywords) / sizeof(keywordMap); i++) {
for (unsigned long i = 0; i < sizeof(keywords) / sizeof(KeywordMap); i++) {
size_t klen = strlen(keywords[i].keyword);
size_t minlen = klen < len ? klen : len;
int cmp = strncmp(bytes, keywords[i].keyword, minlen);
@@ -82,25 +82,25 @@ static tokenizerTag get_keyword(const char* bytes, const uint32_t len)
return TOKENIZER_TAG_INVALID;
}
tokenizer tokenizer_init(const char* buffer, const uint32_t len)
Tokenizer tokenizerInit(const char* buffer, const uint32_t len)
{
return (tokenizer) {
return (Tokenizer) {
.buffer = buffer,
.buffer_len = len,
.index = (len >= 3 && memcmp(buffer, "\xEF\xBB\xBF", 3) == 0) ? 3 : 0,
};
}
tokenizerToken tokenizer_next(tokenizer* self)
TokenizerToken tokenizerNext(Tokenizer* self)
{
tokenizerToken result = (tokenizerToken) {
TokenizerToken result = (TokenizerToken) {
.tag = TOKENIZER_TAG_INVALID,
.loc = {
.start = 0,
},
};
tokenizerState state = TOKENIZER_STATE_START;
TokenizerState state = TOKENIZER_STATE_START;
state:
switch (state) {
@@ -108,7 +108,7 @@ state:
switch (self->buffer[self->index]) {
case 0:
if (self->index == self->buffer_len) {
return (tokenizerToken) {
return (TokenizerToken) {
.tag = TOKENIZER_TAG_EOF,
.loc = {
.start = self->index,
@@ -455,7 +455,7 @@ state:
default:; // Once we're at C23, this semicolon can be removed.
const char* start = self->buffer + result.loc.start;
uint32_t len = self->index - result.loc.start;
tokenizerTag tag = get_keyword(start, len);
TokenizerTag tag = getKeyword(start, len);
if (tag != TOKENIZER_TAG_INVALID) {
result.tag = tag;
}
@@ -856,7 +856,7 @@ state:
state = TOKENIZER_STATE_INVALID;
goto state;
} else {
return (tokenizerToken) {
return (TokenizerToken) {
.tag = TOKENIZER_TAG_EOF,
.loc = {
.start = self->index,
@@ -930,7 +930,7 @@ state:
state = TOKENIZER_STATE_INVALID;
goto state;
} else {
return (tokenizerToken) {
return (TokenizerToken) {
.tag = TOKENIZER_TAG_EOF,
.loc = {
.start = self->index,

View File

@@ -133,7 +133,7 @@
typedef enum {
FOREACH_TOKENIZER_TAG_ENUM(GENERATE_ENUM)
} tokenizerTag;
} TokenizerTag;
static const char *tokenizerTagString[] = {
FOREACH_TOKENIZER_TAG_ENUM(GENERATE_STRING)
@@ -185,22 +185,22 @@ typedef enum {
TOKENIZER_STATE_PERIOD_ASTERISK,
TOKENIZER_STATE_SAW_AT_SIGN,
TOKENIZER_STATE_INVALID,
} tokenizerState;
} TokenizerState;
typedef struct {
tokenizerTag tag;
TokenizerTag tag;
struct {
uint32_t start, end;
} loc;
} tokenizerToken;
} TokenizerToken;
typedef struct {
const char* buffer;
const uint32_t buffer_len;
uint32_t index;
} tokenizer;
} Tokenizer;
tokenizer tokenizer_init(const char* buffer, uint32_t len);
tokenizerToken tokenizer_next(tokenizer* self);
Tokenizer tokenizerInit(const char* buffer, uint32_t len);
TokenizerToken tokenizerNext(Tokenizer* self);
#endif

View File

@@ -138,6 +138,15 @@ fn zigToken(token: c_uint) Token.Tag {
// Copy-pasted from lib/std/zig/tokenizer.zig
fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !void {
// Do the C thing
var ctokenizer = c.tokenizerInit(source.ptr, @intCast(source.len));
for (expected_token_tags) |expected_token_tag| {
const token = c.tokenizerNext(&ctokenizer);
try std.testing.expectEqual(expected_token_tag, zigToken(token.tag));
}
const last_token = c.tokenizerNext(&ctokenizer);
try std.testing.expectEqual(Token.Tag.eof, zigToken(last_token.tag));
// uncomment when Zig source and compiler get in sync (e.g. with 0.14)
//var tokenizer = Tokenizer.init(source);
//for (expected_token_tags) |expected_token_tag| {
@@ -149,17 +158,6 @@ fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !v
//// recovered by opinionated means outside the scope of this implementation.
//const last_token = tokenizer.next();
//try std.testing.expectEqual(Token.Tag.eof, last_token.tag);
//try std.testing.expectEqual(source.len, last_token.loc.start);
//try std.testing.expectEqual(source.len, last_token.loc.end);
// Do the C thing
var ctokenizer = c.tokenizer_init(source.ptr, @intCast(source.len));
for (expected_token_tags) |expected_token_tag| {
const token = c.tokenizer_next(&ctokenizer);
try std.testing.expectEqual(expected_token_tag, zigToken(token.tag));
}
const last_token = c.tokenizer_next(&ctokenizer);
try std.testing.expectEqual(Token.Tag.eof, zigToken(last_token.tag));
try std.testing.expectEqual(source.len, last_token.loc.start);
try std.testing.expectEqual(source.len, last_token.loc.end);
}

6
zig1.c
View File

@@ -6,7 +6,7 @@
// - code = 0: program successfully terminated.
// - code = 1: panicked, panic message in msg. Caller should free msg.
// - code = 2: interpreter error, error in msg. Caller should free msg.
int zig1_run(const char* program, char** msg)
int zig1Run(const char* program, char** msg)
{
(void)program;
(void)msg;
@@ -15,7 +15,7 @@ int zig1_run(const char* program, char** msg)
// API: run and:
// code = 3: abnormal error, expect something in stderr.
int zig1_run_file(const char* fname, char** msg)
int zig1RunFile(const char* fname, char** msg)
{
FILE* f = fopen(fname, "r");
if (f == NULL) {
@@ -53,7 +53,7 @@ int zig1_run_file(const char* fname, char** msg)
fclose(f);
program[fsize] = 0;
int code = zig1_run(program, msg);
int code = zig1Run(program, msg);
free(program);
return code;
}