rename types

This commit is contained in:
2024-12-22 22:31:16 +02:00
parent 228b215259
commit db35aa7722
10 changed files with 112 additions and 127 deletions

72
ast.c
View File

@@ -8,74 +8,74 @@
#define N 1024 #define N 1024
ast ast_parse(const char* source, const uint32_t len, int* err) ast astParse(const char* source, const uint32_t len)
{ {
uint32_t estimated_token_count = len / 8; uint32_t estimated_token_count = len / 8;
tokenizerTag* token_tags = NULL; TokenizerTag* token_tags = NULL;
astIndex* token_starts = NULL; AstIndex* token_starts = NULL;
astNodeTag* nodes_tags = NULL; AstNodeTag* nodes_tags = NULL;
astTokenIndex* main_tokens = NULL; AstTokenIndex* main_tokens = NULL;
astData* nodes_datas = NULL; AstData* nodes_datas = NULL;
astNodeIndex* extra_data_arr = NULL; AstNodeIndex* extra_data_arr = NULL;
astNodeIndex* scratch_arr = NULL; AstNodeIndex* scratch_arr = NULL;
if (!(token_tags = calloc(estimated_token_count, sizeof(tokenizerTag)))) if (!(token_tags = calloc(estimated_token_count, sizeof(TokenizerTag))))
goto err; exit(1);
if (!(token_starts = calloc(estimated_token_count, sizeof(astIndex)))) if (!(token_starts = calloc(estimated_token_count, sizeof(AstIndex))))
goto err; exit(1);
tokenizer tok = tokenizer_init(source, len); Tokenizer tok = tokenizerInit(source, len);
uint32_t tokens_len = 0; uint32_t tokens_len = 0;
for (; tokens_len <= estimated_token_count; tokens_len++) { for (; tokens_len <= estimated_token_count; tokens_len++) {
if (tokens_len == estimated_token_count) { if (tokens_len == estimated_token_count) {
fprintf(stderr, "too many tokens, bump estimated_token_count\n"); fprintf(stderr, "too many tokens, bump estimated_token_count\n");
goto err; exit(1);
} }
tokenizerToken token = tokenizer_next(&tok); TokenizerToken token = tokenizerNext(&tok);
token_tags[tokens_len] = token.tag; token_tags[tokens_len] = token.tag;
token_starts[tokens_len] = token.loc.start; token_starts[tokens_len] = token.loc.start;
} }
uint32_t estimated_node_count = (tokens_len + 2) / 2; uint32_t estimated_node_count = (tokens_len + 2) / 2;
if (!(nodes_tags = calloc(estimated_node_count, sizeof(astNodeTag)))) if (!(nodes_tags = calloc(estimated_node_count, sizeof(AstNodeTag))))
goto err; exit(1);
if (!(main_tokens = calloc(estimated_node_count, sizeof(astTokenIndex)))) if (!(main_tokens = calloc(estimated_node_count, sizeof(AstTokenIndex))))
goto err; exit(1);
if (!(nodes_datas = calloc(estimated_node_count, sizeof(astData)))) if (!(nodes_datas = calloc(estimated_node_count, sizeof(AstData))))
goto err; exit(1);
if (!(extra_data_arr = calloc(N, sizeof(astNodeIndex)))) if (!(extra_data_arr = calloc(N, sizeof(AstNodeIndex))))
goto err; exit(1);
if (!(scratch_arr = calloc(N, sizeof(astNodeIndex)))) if (!(scratch_arr = calloc(N, sizeof(AstNodeIndex))))
goto err; exit(1);
parser p = (parser) { Parser p = (Parser) {
.source = source, .source = source,
.source_len = len, .source_len = len,
.token_tags = token_tags, .token_tags = token_tags,
.token_starts = token_starts, .token_starts = token_starts,
.tokens_len = tokens_len, .tokens_len = tokens_len,
.tok_i = 0, .tok_i = 0,
.nodes = (astNodeList) { .nodes = (AstNodeList) {
.len = 0, .len = 0,
.cap = estimated_node_count, .cap = estimated_node_count,
.tags = nodes_tags, .tags = nodes_tags,
.main_tokens = main_tokens, .main_tokens = main_tokens,
.datas = nodes_datas, .datas = nodes_datas,
}, },
.extra_data = (parserNodeIndexSlice) { .len = 0, .cap = N, .arr = extra_data_arr }, .extra_data = (ParserNodeIndexSlice) { .len = 0, .cap = N, .arr = extra_data_arr },
.scratch = (parserNodeIndexSlice) { .len = 0, .cap = N, .arr = scratch_arr }, .scratch = (ParserNodeIndexSlice) { .len = 0, .cap = N, .arr = scratch_arr },
}; };
free(scratch_arr); free(scratch_arr);
parse_root(&p); parseRoot(&p);
return (ast) { return (ast) {
.source = source, .source = source,
@@ -85,16 +85,4 @@ ast ast_parse(const char* source, const uint32_t len, int* err)
.extra_data = p.extra_data.arr, .extra_data = p.extra_data.arr,
.extra_data_len = p.extra_data.len, .extra_data_len = p.extra_data.len,
}; };
err:
free(token_tags);
free(token_starts);
free(nodes_tags);
free(main_tokens);
free(nodes_datas);
free(extra_data_arr);
free(scratch_arr);
*err = 1;
return (ast) {};
} }

30
ast.h
View File

@@ -479,23 +479,23 @@ typedef enum {
AST_NODE_TAG_ERROR_VALUE, AST_NODE_TAG_ERROR_VALUE,
/// `lhs!rhs`. main_token is the `!`. /// `lhs!rhs`. main_token is the `!`.
AST_NODE_TAG_ERROR_UNION, AST_NODE_TAG_ERROR_UNION,
} astNodeTag; } AstNodeTag;
typedef int32_t astTokenIndex; typedef int32_t AstTokenIndex;
typedef uint32_t astNodeIndex; typedef uint32_t AstNodeIndex;
typedef uint32_t astIndex; typedef uint32_t AstIndex;
typedef struct { typedef struct {
astIndex lhs, rhs; AstIndex lhs, rhs;
} astData; } AstData;
typedef struct { typedef struct {
uint32_t len; uint32_t len;
uint32_t cap; uint32_t cap;
astNodeTag* tags; AstNodeTag* tags;
astTokenIndex* main_tokens; AstTokenIndex* main_tokens;
astData* datas; AstData* datas;
} astNodeList; } AstNodeList;
typedef struct { typedef struct {
const char* source; const char* source;
@@ -503,16 +503,16 @@ typedef struct {
struct { struct {
uint32_t len; uint32_t len;
tokenizerTag* tags; TokenizerTag* tags;
astIndex* starts; AstIndex* starts;
} tokens; } tokens;
astNodeList nodes; AstNodeList nodes;
astNodeIndex* extra_data; AstNodeIndex* extra_data;
uint32_t extra_data_len; uint32_t extra_data_len;
} ast; } ast;
ast ast_parse(const char* source, uint32_t len, int* err); ast astParse(const char* source, uint32_t len);
#endif #endif

View File

@@ -96,6 +96,7 @@ pub fn build(b: *std.Build) !void {
cppcheck.addArgs(&.{ cppcheck.addArgs(&.{
"--quiet", "--quiet",
"--error-exitcode=1", "--error-exitcode=1",
"--check-level=exhaustive",
"--enable=all", "--enable=all",
"--suppress=missingIncludeSystem", "--suppress=missingIncludeSystem",
"--suppress=checkersReport", "--suppress=checkersReport",

6
main.c
View File

@@ -2,8 +2,8 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
int zig1_run(char* program, char** msg); int zig1Run(char* program, char** msg);
int zig1_run_file(char* fname, char** msg); int zig1RunFile(char* fname, char** msg);
static void usage(const char* argv0) static void usage(const char* argv0)
{ {
@@ -18,7 +18,7 @@ int main(int argc, char** argv)
} }
char* msg; char* msg;
switch (zig1_run_file(argv[1], &msg)) { switch (zig1RunFile(argv[1], &msg)) {
case 0: case 0:
return 0; return 0;
break; break;

View File

@@ -1,12 +1,13 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#include "parser.h" #include "parser.h"
typedef struct { typedef struct {
uint32_t len; uint32_t len;
astNodeIndex lhs, rhs; AstNodeIndex lhs, rhs;
bool trailing; bool trailing;
} members; } Members;
typedef struct { typedef struct {
enum { enum {
@@ -17,24 +18,31 @@ typedef struct {
union { union {
uint32_t end; uint32_t end;
} payload; } payload;
} field_state; } FieldState;
static astTokenIndex next_token(parser* p) int parseRoot(Parser* p)
{ {
return ++p->tok_i; p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT;
p->nodes.main_tokens[p->nodes.len] = 0;
// members root_members = parseContainerMembers(p);
return 0;
} }
static astTokenIndex eat_token(parser* p, tokenizerTag tag) static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; }
static AstTokenIndex eatToken(Parser* p, TokenizerTag tag)
{ {
return (p->token_tags[p->tok_i] == tag) ? next_token(p) : -1; return (p->token_tags[p->tok_i] == tag) ? nextToken(p) : -1;
} }
static members parse_container_members(parser* p) static Members parseContainerMembers(Parser* p)
{ {
const uint32_t scratch_top = p->scratch.len; const uint32_t scratch_top = p->scratch.len;
members res = (members) {}; Members res = (Members) {};
// ast_token_index last_field; // ast_token_index last_field;
while (eat_token(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT) != -1) while (eatToken(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT) != -1)
; ;
// bool trailing = false; // bool trailing = false;
@@ -166,25 +174,15 @@ static members parse_container_members(parser* p)
case TOKENIZER_TAG_KEYWORD_WHILE:; case TOKENIZER_TAG_KEYWORD_WHILE:;
const char* str = tokenizerTagString[p->token_tags[p->tok_i]]; const char* str = tokenizerTagString[p->token_tags[p->tok_i]];
fprintf(stderr, "keyword %s not implemented\n", str); fprintf(stderr, "keyword %s not implemented\n", str);
goto cleanup; exit(1);
case TOKENIZER_TAG_KEYWORD_PUB: case TOKENIZER_TAG_KEYWORD_PUB:
p->tok_i++; p->tok_i++;
// AstNodeIndex top_level_decl = expectTopLevelDecl(*p);
break; break;
// TODO do work // TODO do work
} }
} }
cleanup:
p->scratch.len = scratch_top; p->scratch.len = scratch_top;
return res; return res;
} }
int parse_root(parser* p)
{
p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT;
p->nodes.main_tokens[p->nodes.len] = 0;
// members root_members = parse_container_members(p);
return 0;
}

View File

@@ -9,24 +9,24 @@
typedef struct { typedef struct {
uint32_t len; uint32_t len;
uint32_t cap; uint32_t cap;
astNodeIndex* arr; AstNodeIndex* arr;
} parserNodeIndexSlice; } ParserNodeIndexSlice;
typedef struct { typedef struct {
const char* source; const char* source;
const uint32_t source_len; const uint32_t source_len;
tokenizerTag* token_tags; TokenizerTag* token_tags;
astIndex* token_starts; AstIndex* token_starts;
uint32_t tokens_len; uint32_t tokens_len;
astTokenIndex tok_i; AstTokenIndex tok_i;
astNodeList nodes; AstNodeList nodes;
parserNodeIndexSlice extra_data; ParserNodeIndexSlice extra_data;
parserNodeIndexSlice scratch; ParserNodeIndexSlice scratch;
} parser; } Parser;
int parse_root(parser*); int parseRoot(Parser*);
#endif #endif

View File

@@ -7,10 +7,10 @@
typedef struct { typedef struct {
const char* keyword; const char* keyword;
tokenizerTag tag; TokenizerTag tag;
} keywordMap; } KeywordMap;
const keywordMap keywords[] = { const KeywordMap keywords[] = {
{ "addrspace", TOKENIZER_TAG_KEYWORD_ADDRSPACE }, { "addrspace", TOKENIZER_TAG_KEYWORD_ADDRSPACE },
{ "align", TOKENIZER_TAG_KEYWORD_ALIGN }, { "align", TOKENIZER_TAG_KEYWORD_ALIGN },
{ "allowzero", TOKENIZER_TAG_KEYWORD_ALLOWZERO }, { "allowzero", TOKENIZER_TAG_KEYWORD_ALLOWZERO },
@@ -63,9 +63,9 @@ const keywordMap keywords[] = {
}; };
// TODO binary search // TODO binary search
static tokenizerTag get_keyword(const char* bytes, const uint32_t len) static TokenizerTag getKeyword(const char* bytes, const uint32_t len)
{ {
for (unsigned long i = 0; i < sizeof(keywords) / sizeof(keywordMap); i++) { for (unsigned long i = 0; i < sizeof(keywords) / sizeof(KeywordMap); i++) {
size_t klen = strlen(keywords[i].keyword); size_t klen = strlen(keywords[i].keyword);
size_t minlen = klen < len ? klen : len; size_t minlen = klen < len ? klen : len;
int cmp = strncmp(bytes, keywords[i].keyword, minlen); int cmp = strncmp(bytes, keywords[i].keyword, minlen);
@@ -82,25 +82,25 @@ static tokenizerTag get_keyword(const char* bytes, const uint32_t len)
return TOKENIZER_TAG_INVALID; return TOKENIZER_TAG_INVALID;
} }
tokenizer tokenizer_init(const char* buffer, const uint32_t len) Tokenizer tokenizerInit(const char* buffer, const uint32_t len)
{ {
return (tokenizer) { return (Tokenizer) {
.buffer = buffer, .buffer = buffer,
.buffer_len = len, .buffer_len = len,
.index = (len >= 3 && memcmp(buffer, "\xEF\xBB\xBF", 3) == 0) ? 3 : 0, .index = (len >= 3 && memcmp(buffer, "\xEF\xBB\xBF", 3) == 0) ? 3 : 0,
}; };
} }
tokenizerToken tokenizer_next(tokenizer* self) TokenizerToken tokenizerNext(Tokenizer* self)
{ {
tokenizerToken result = (tokenizerToken) { TokenizerToken result = (TokenizerToken) {
.tag = TOKENIZER_TAG_INVALID, .tag = TOKENIZER_TAG_INVALID,
.loc = { .loc = {
.start = 0, .start = 0,
}, },
}; };
tokenizerState state = TOKENIZER_STATE_START; TokenizerState state = TOKENIZER_STATE_START;
state: state:
switch (state) { switch (state) {
@@ -108,7 +108,7 @@ state:
switch (self->buffer[self->index]) { switch (self->buffer[self->index]) {
case 0: case 0:
if (self->index == self->buffer_len) { if (self->index == self->buffer_len) {
return (tokenizerToken) { return (TokenizerToken) {
.tag = TOKENIZER_TAG_EOF, .tag = TOKENIZER_TAG_EOF,
.loc = { .loc = {
.start = self->index, .start = self->index,
@@ -455,7 +455,7 @@ state:
default:; // Once we're at C23, this semicolon can be removed. default:; // Once we're at C23, this semicolon can be removed.
const char* start = self->buffer + result.loc.start; const char* start = self->buffer + result.loc.start;
uint32_t len = self->index - result.loc.start; uint32_t len = self->index - result.loc.start;
tokenizerTag tag = get_keyword(start, len); TokenizerTag tag = getKeyword(start, len);
if (tag != TOKENIZER_TAG_INVALID) { if (tag != TOKENIZER_TAG_INVALID) {
result.tag = tag; result.tag = tag;
} }
@@ -856,7 +856,7 @@ state:
state = TOKENIZER_STATE_INVALID; state = TOKENIZER_STATE_INVALID;
goto state; goto state;
} else { } else {
return (tokenizerToken) { return (TokenizerToken) {
.tag = TOKENIZER_TAG_EOF, .tag = TOKENIZER_TAG_EOF,
.loc = { .loc = {
.start = self->index, .start = self->index,
@@ -930,7 +930,7 @@ state:
state = TOKENIZER_STATE_INVALID; state = TOKENIZER_STATE_INVALID;
goto state; goto state;
} else { } else {
return (tokenizerToken) { return (TokenizerToken) {
.tag = TOKENIZER_TAG_EOF, .tag = TOKENIZER_TAG_EOF,
.loc = { .loc = {
.start = self->index, .start = self->index,

View File

@@ -133,7 +133,7 @@
typedef enum { typedef enum {
FOREACH_TOKENIZER_TAG_ENUM(GENERATE_ENUM) FOREACH_TOKENIZER_TAG_ENUM(GENERATE_ENUM)
} tokenizerTag; } TokenizerTag;
static const char *tokenizerTagString[] = { static const char *tokenizerTagString[] = {
FOREACH_TOKENIZER_TAG_ENUM(GENERATE_STRING) FOREACH_TOKENIZER_TAG_ENUM(GENERATE_STRING)
@@ -185,22 +185,22 @@ typedef enum {
TOKENIZER_STATE_PERIOD_ASTERISK, TOKENIZER_STATE_PERIOD_ASTERISK,
TOKENIZER_STATE_SAW_AT_SIGN, TOKENIZER_STATE_SAW_AT_SIGN,
TOKENIZER_STATE_INVALID, TOKENIZER_STATE_INVALID,
} tokenizerState; } TokenizerState;
typedef struct { typedef struct {
tokenizerTag tag; TokenizerTag tag;
struct { struct {
uint32_t start, end; uint32_t start, end;
} loc; } loc;
} tokenizerToken; } TokenizerToken;
typedef struct { typedef struct {
const char* buffer; const char* buffer;
const uint32_t buffer_len; const uint32_t buffer_len;
uint32_t index; uint32_t index;
} tokenizer; } Tokenizer;
tokenizer tokenizer_init(const char* buffer, uint32_t len); Tokenizer tokenizerInit(const char* buffer, uint32_t len);
tokenizerToken tokenizer_next(tokenizer* self); TokenizerToken tokenizerNext(Tokenizer* self);
#endif #endif

View File

@@ -138,6 +138,15 @@ fn zigToken(token: c_uint) Token.Tag {
// Copy-pasted from lib/std/zig/tokenizer.zig // Copy-pasted from lib/std/zig/tokenizer.zig
fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !void { fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !void {
// Do the C thing
var ctokenizer = c.tokenizerInit(source.ptr, @intCast(source.len));
for (expected_token_tags) |expected_token_tag| {
const token = c.tokenizerNext(&ctokenizer);
try std.testing.expectEqual(expected_token_tag, zigToken(token.tag));
}
const last_token = c.tokenizerNext(&ctokenizer);
try std.testing.expectEqual(Token.Tag.eof, zigToken(last_token.tag));
// uncomment when Zig source and compiler get in sync (e.g. with 0.14) // uncomment when Zig source and compiler get in sync (e.g. with 0.14)
//var tokenizer = Tokenizer.init(source); //var tokenizer = Tokenizer.init(source);
//for (expected_token_tags) |expected_token_tag| { //for (expected_token_tags) |expected_token_tag| {
@@ -149,17 +158,6 @@ fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !v
//// recovered by opinionated means outside the scope of this implementation. //// recovered by opinionated means outside the scope of this implementation.
//const last_token = tokenizer.next(); //const last_token = tokenizer.next();
//try std.testing.expectEqual(Token.Tag.eof, last_token.tag); //try std.testing.expectEqual(Token.Tag.eof, last_token.tag);
//try std.testing.expectEqual(source.len, last_token.loc.start);
//try std.testing.expectEqual(source.len, last_token.loc.end);
// Do the C thing
var ctokenizer = c.tokenizer_init(source.ptr, @intCast(source.len));
for (expected_token_tags) |expected_token_tag| {
const token = c.tokenizer_next(&ctokenizer);
try std.testing.expectEqual(expected_token_tag, zigToken(token.tag));
}
const last_token = c.tokenizer_next(&ctokenizer);
try std.testing.expectEqual(Token.Tag.eof, zigToken(last_token.tag));
try std.testing.expectEqual(source.len, last_token.loc.start); try std.testing.expectEqual(source.len, last_token.loc.start);
try std.testing.expectEqual(source.len, last_token.loc.end); try std.testing.expectEqual(source.len, last_token.loc.end);
} }

6
zig1.c
View File

@@ -6,7 +6,7 @@
// - code = 0: program successfully terminated. // - code = 0: program successfully terminated.
// - code = 1: panicked, panic message in msg. Caller should free msg. // - code = 1: panicked, panic message in msg. Caller should free msg.
// - code = 2: interpreter error, error in msg. Caller should free msg. // - code = 2: interpreter error, error in msg. Caller should free msg.
int zig1_run(const char* program, char** msg) int zig1Run(const char* program, char** msg)
{ {
(void)program; (void)program;
(void)msg; (void)msg;
@@ -15,7 +15,7 @@ int zig1_run(const char* program, char** msg)
// API: run and: // API: run and:
// code = 3: abnormal error, expect something in stderr. // code = 3: abnormal error, expect something in stderr.
int zig1_run_file(const char* fname, char** msg) int zig1RunFile(const char* fname, char** msg)
{ {
FILE* f = fopen(fname, "r"); FILE* f = fopen(fname, "r");
if (f == NULL) { if (f == NULL) {
@@ -53,7 +53,7 @@ int zig1_run_file(const char* fname, char** msg)
fclose(f); fclose(f);
program[fsize] = 0; program[fsize] = 0;
int code = zig1_run(program, msg); int code = zig1Run(program, msg);
free(program); free(program);
return code; return code;
} }