diff --git a/ast.c b/ast.c index 80fb9d1..2ae8bf7 100644 --- a/ast.c +++ b/ast.c @@ -8,74 +8,74 @@ #define N 1024 -ast ast_parse(const char* source, const uint32_t len, int* err) +ast astParse(const char* source, const uint32_t len) { uint32_t estimated_token_count = len / 8; - tokenizerTag* token_tags = NULL; - astIndex* token_starts = NULL; - astNodeTag* nodes_tags = NULL; - astTokenIndex* main_tokens = NULL; - astData* nodes_datas = NULL; - astNodeIndex* extra_data_arr = NULL; - astNodeIndex* scratch_arr = NULL; + TokenizerTag* token_tags = NULL; + AstIndex* token_starts = NULL; + AstNodeTag* nodes_tags = NULL; + AstTokenIndex* main_tokens = NULL; + AstData* nodes_datas = NULL; + AstNodeIndex* extra_data_arr = NULL; + AstNodeIndex* scratch_arr = NULL; - if (!(token_tags = calloc(estimated_token_count, sizeof(tokenizerTag)))) - goto err; + if (!(token_tags = calloc(estimated_token_count, sizeof(TokenizerTag)))) + exit(1); - if (!(token_starts = calloc(estimated_token_count, sizeof(astIndex)))) - goto err; + if (!(token_starts = calloc(estimated_token_count, sizeof(AstIndex)))) + exit(1); - tokenizer tok = tokenizer_init(source, len); + Tokenizer tok = tokenizerInit(source, len); uint32_t tokens_len = 0; for (; tokens_len <= estimated_token_count; tokens_len++) { if (tokens_len == estimated_token_count) { fprintf(stderr, "too many tokens, bump estimated_token_count\n"); - goto err; + exit(1); } - tokenizerToken token = tokenizer_next(&tok); + TokenizerToken token = tokenizerNext(&tok); token_tags[tokens_len] = token.tag; token_starts[tokens_len] = token.loc.start; } uint32_t estimated_node_count = (tokens_len + 2) / 2; - if (!(nodes_tags = calloc(estimated_node_count, sizeof(astNodeTag)))) - goto err; + if (!(nodes_tags = calloc(estimated_node_count, sizeof(AstNodeTag)))) + exit(1); - if (!(main_tokens = calloc(estimated_node_count, sizeof(astTokenIndex)))) - goto err; + if (!(main_tokens = calloc(estimated_node_count, sizeof(AstTokenIndex)))) + exit(1); - if (!(nodes_datas = calloc(estimated_node_count, sizeof(astData)))) - goto err; + if (!(nodes_datas = calloc(estimated_node_count, sizeof(AstData)))) + exit(1); - if (!(extra_data_arr = calloc(N, sizeof(astNodeIndex)))) - goto err; + if (!(extra_data_arr = calloc(N, sizeof(AstNodeIndex)))) + exit(1); - if (!(scratch_arr = calloc(N, sizeof(astNodeIndex)))) - goto err; + if (!(scratch_arr = calloc(N, sizeof(AstNodeIndex)))) + exit(1); - parser p = (parser) { + Parser p = (Parser) { .source = source, .source_len = len, .token_tags = token_tags, .token_starts = token_starts, .tokens_len = tokens_len, .tok_i = 0, - .nodes = (astNodeList) { + .nodes = (AstNodeList) { .len = 0, .cap = estimated_node_count, .tags = nodes_tags, .main_tokens = main_tokens, .datas = nodes_datas, }, - .extra_data = (parserNodeIndexSlice) { .len = 0, .cap = N, .arr = extra_data_arr }, - .scratch = (parserNodeIndexSlice) { .len = 0, .cap = N, .arr = scratch_arr }, + .extra_data = (ParserNodeIndexSlice) { .len = 0, .cap = N, .arr = extra_data_arr }, + .scratch = (ParserNodeIndexSlice) { .len = 0, .cap = N, .arr = scratch_arr }, }; free(scratch_arr); - parse_root(&p); + parseRoot(&p); return (ast) { .source = source, @@ -85,16 +85,4 @@ ast ast_parse(const char* source, const uint32_t len, int* err) .extra_data = p.extra_data.arr, .extra_data_len = p.extra_data.len, }; - -err: - free(token_tags); - free(token_starts); - free(nodes_tags); - free(main_tokens); - free(nodes_datas); - free(extra_data_arr); - free(scratch_arr); - - *err = 1; - return (ast) {}; } diff --git a/ast.h b/ast.h index 7b96778..ec9d16f 100644 --- a/ast.h +++ b/ast.h @@ -479,23 +479,23 @@ typedef enum { AST_NODE_TAG_ERROR_VALUE, /// `lhs!rhs`. main_token is the `!`. AST_NODE_TAG_ERROR_UNION, -} astNodeTag; +} AstNodeTag; -typedef int32_t astTokenIndex; -typedef uint32_t astNodeIndex; -typedef uint32_t astIndex; +typedef int32_t AstTokenIndex; +typedef uint32_t AstNodeIndex; +typedef uint32_t AstIndex; typedef struct { - astIndex lhs, rhs; -} astData; + AstIndex lhs, rhs; +} AstData; typedef struct { uint32_t len; uint32_t cap; - astNodeTag* tags; - astTokenIndex* main_tokens; - astData* datas; -} astNodeList; + AstNodeTag* tags; + AstTokenIndex* main_tokens; + AstData* datas; +} AstNodeList; typedef struct { const char* source; @@ -503,16 +503,16 @@ typedef struct { struct { uint32_t len; - tokenizerTag* tags; - astIndex* starts; + TokenizerTag* tags; + AstIndex* starts; } tokens; - astNodeList nodes; + AstNodeList nodes; - astNodeIndex* extra_data; + AstNodeIndex* extra_data; uint32_t extra_data_len; } ast; -ast ast_parse(const char* source, uint32_t len, int* err); +ast astParse(const char* source, uint32_t len); #endif diff --git a/build.zig b/build.zig index 3f23427..df07f7b 100644 --- a/build.zig +++ b/build.zig @@ -96,6 +96,7 @@ pub fn build(b: *std.Build) !void { cppcheck.addArgs(&.{ "--quiet", "--error-exitcode=1", + "--check-level=exhaustive", "--enable=all", "--suppress=missingIncludeSystem", "--suppress=checkersReport", diff --git a/main.c b/main.c index 5c933f5..329308b 100644 --- a/main.c +++ b/main.c @@ -2,8 +2,8 @@ #include #include -int zig1_run(char* program, char** msg); -int zig1_run_file(char* fname, char** msg); +int zig1Run(char* program, char** msg); +int zig1RunFile(char* fname, char** msg); static void usage(const char* argv0) { @@ -18,7 +18,7 @@ int main(int argc, char** argv) } char* msg; - switch (zig1_run_file(argv[1], &msg)) { + switch (zig1RunFile(argv[1], &msg)) { case 0: return 0; break; diff --git a/parser.c b/parser.c index aef725d..02f98e2 100644 --- a/parser.c +++ b/parser.c @@ -1,12 +1,13 @@ #include +#include #include "parser.h" typedef struct { uint32_t len; - astNodeIndex lhs, rhs; + AstNodeIndex lhs, rhs; bool trailing; -} members; +} Members; typedef struct { enum { @@ -17,24 +18,31 @@ typedef struct { union { uint32_t end; } payload; -} field_state; +} FieldState; -static astTokenIndex next_token(parser* p) +int parseRoot(Parser* p) { - return ++p->tok_i; + p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT; + p->nodes.main_tokens[p->nodes.len] = 0; + + // members root_members = parseContainerMembers(p); + + return 0; } -static astTokenIndex eat_token(parser* p, tokenizerTag tag) +static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; } + +static AstTokenIndex eatToken(Parser* p, TokenizerTag tag) { - return (p->token_tags[p->tok_i] == tag) ? next_token(p) : -1; + return (p->token_tags[p->tok_i] == tag) ? nextToken(p) : -1; } -static members parse_container_members(parser* p) +static Members parseContainerMembers(Parser* p) { const uint32_t scratch_top = p->scratch.len; - members res = (members) {}; + Members res = (Members) {}; // ast_token_index last_field; - while (eat_token(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT) != -1) + while (eatToken(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT) != -1) ; // bool trailing = false; @@ -166,25 +174,15 @@ static members parse_container_members(parser* p) case TOKENIZER_TAG_KEYWORD_WHILE:; const char* str = tokenizerTagString[p->token_tags[p->tok_i]]; fprintf(stderr, "keyword %s not implemented\n", str); - goto cleanup; + exit(1); case TOKENIZER_TAG_KEYWORD_PUB: p->tok_i++; + // AstNodeIndex top_level_decl = expectTopLevelDecl(*p); break; // TODO do work } } -cleanup: p->scratch.len = scratch_top; return res; } - -int parse_root(parser* p) -{ - p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT; - p->nodes.main_tokens[p->nodes.len] = 0; - - // members root_members = parse_container_members(p); - - return 0; -} diff --git a/parser.h b/parser.h index 9532bf6..577b0b1 100644 --- a/parser.h +++ b/parser.h @@ -9,24 +9,24 @@ typedef struct { uint32_t len; uint32_t cap; - astNodeIndex* arr; -} parserNodeIndexSlice; + AstNodeIndex* arr; +} ParserNodeIndexSlice; typedef struct { const char* source; const uint32_t source_len; - tokenizerTag* token_tags; - astIndex* token_starts; + TokenizerTag* token_tags; + AstIndex* token_starts; uint32_t tokens_len; - astTokenIndex tok_i; + AstTokenIndex tok_i; - astNodeList nodes; - parserNodeIndexSlice extra_data; - parserNodeIndexSlice scratch; -} parser; + AstNodeList nodes; + ParserNodeIndexSlice extra_data; + ParserNodeIndexSlice scratch; +} Parser; -int parse_root(parser*); +int parseRoot(Parser*); #endif diff --git a/tokenizer.c b/tokenizer.c index 9852e53..0f9d7f3 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -7,10 +7,10 @@ typedef struct { const char* keyword; - tokenizerTag tag; -} keywordMap; + TokenizerTag tag; +} KeywordMap; -const keywordMap keywords[] = { +const KeywordMap keywords[] = { { "addrspace", TOKENIZER_TAG_KEYWORD_ADDRSPACE }, { "align", TOKENIZER_TAG_KEYWORD_ALIGN }, { "allowzero", TOKENIZER_TAG_KEYWORD_ALLOWZERO }, @@ -63,9 +63,9 @@ const keywordMap keywords[] = { }; // TODO binary search -static tokenizerTag get_keyword(const char* bytes, const uint32_t len) +static TokenizerTag getKeyword(const char* bytes, const uint32_t len) { - for (unsigned long i = 0; i < sizeof(keywords) / sizeof(keywordMap); i++) { + for (unsigned long i = 0; i < sizeof(keywords) / sizeof(KeywordMap); i++) { size_t klen = strlen(keywords[i].keyword); size_t minlen = klen < len ? klen : len; int cmp = strncmp(bytes, keywords[i].keyword, minlen); @@ -82,25 +82,25 @@ static tokenizerTag get_keyword(const char* bytes, const uint32_t len) return TOKENIZER_TAG_INVALID; } -tokenizer tokenizer_init(const char* buffer, const uint32_t len) +Tokenizer tokenizerInit(const char* buffer, const uint32_t len) { - return (tokenizer) { + return (Tokenizer) { .buffer = buffer, .buffer_len = len, .index = (len >= 3 && memcmp(buffer, "\xEF\xBB\xBF", 3) == 0) ? 3 : 0, }; } -tokenizerToken tokenizer_next(tokenizer* self) +TokenizerToken tokenizerNext(Tokenizer* self) { - tokenizerToken result = (tokenizerToken) { + TokenizerToken result = (TokenizerToken) { .tag = TOKENIZER_TAG_INVALID, .loc = { .start = 0, }, }; - tokenizerState state = TOKENIZER_STATE_START; + TokenizerState state = TOKENIZER_STATE_START; state: switch (state) { @@ -108,7 +108,7 @@ state: switch (self->buffer[self->index]) { case 0: if (self->index == self->buffer_len) { - return (tokenizerToken) { + return (TokenizerToken) { .tag = TOKENIZER_TAG_EOF, .loc = { .start = self->index, @@ -455,7 +455,7 @@ state: default:; // Once we're at C23, this semicolon can be removed. const char* start = self->buffer + result.loc.start; uint32_t len = self->index - result.loc.start; - tokenizerTag tag = get_keyword(start, len); + TokenizerTag tag = getKeyword(start, len); if (tag != TOKENIZER_TAG_INVALID) { result.tag = tag; } @@ -856,7 +856,7 @@ state: state = TOKENIZER_STATE_INVALID; goto state; } else { - return (tokenizerToken) { + return (TokenizerToken) { .tag = TOKENIZER_TAG_EOF, .loc = { .start = self->index, @@ -930,7 +930,7 @@ state: state = TOKENIZER_STATE_INVALID; goto state; } else { - return (tokenizerToken) { + return (TokenizerToken) { .tag = TOKENIZER_TAG_EOF, .loc = { .start = self->index, diff --git a/tokenizer.h b/tokenizer.h index 2b04e30..b9b108b 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -133,7 +133,7 @@ typedef enum { FOREACH_TOKENIZER_TAG_ENUM(GENERATE_ENUM) -} tokenizerTag; +} TokenizerTag; static const char *tokenizerTagString[] = { FOREACH_TOKENIZER_TAG_ENUM(GENERATE_STRING) @@ -185,22 +185,22 @@ typedef enum { TOKENIZER_STATE_PERIOD_ASTERISK, TOKENIZER_STATE_SAW_AT_SIGN, TOKENIZER_STATE_INVALID, -} tokenizerState; +} TokenizerState; typedef struct { - tokenizerTag tag; + TokenizerTag tag; struct { uint32_t start, end; } loc; -} tokenizerToken; +} TokenizerToken; typedef struct { const char* buffer; const uint32_t buffer_len; uint32_t index; -} tokenizer; +} Tokenizer; -tokenizer tokenizer_init(const char* buffer, uint32_t len); -tokenizerToken tokenizer_next(tokenizer* self); +Tokenizer tokenizerInit(const char* buffer, uint32_t len); +TokenizerToken tokenizerNext(Tokenizer* self); #endif diff --git a/tokenizer_test.zig b/tokenizer_test.zig index e36920a..f571ce0 100644 --- a/tokenizer_test.zig +++ b/tokenizer_test.zig @@ -138,6 +138,15 @@ fn zigToken(token: c_uint) Token.Tag { // Copy-pasted from lib/std/zig/tokenizer.zig fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !void { + // Do the C thing + var ctokenizer = c.tokenizerInit(source.ptr, @intCast(source.len)); + for (expected_token_tags) |expected_token_tag| { + const token = c.tokenizerNext(&ctokenizer); + try std.testing.expectEqual(expected_token_tag, zigToken(token.tag)); + } + const last_token = c.tokenizerNext(&ctokenizer); + try std.testing.expectEqual(Token.Tag.eof, zigToken(last_token.tag)); + // uncomment when Zig source and compiler get in sync (e.g. with 0.14) //var tokenizer = Tokenizer.init(source); //for (expected_token_tags) |expected_token_tag| { @@ -149,17 +158,6 @@ fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !v //// recovered by opinionated means outside the scope of this implementation. //const last_token = tokenizer.next(); //try std.testing.expectEqual(Token.Tag.eof, last_token.tag); - //try std.testing.expectEqual(source.len, last_token.loc.start); - //try std.testing.expectEqual(source.len, last_token.loc.end); - - // Do the C thing - var ctokenizer = c.tokenizer_init(source.ptr, @intCast(source.len)); - for (expected_token_tags) |expected_token_tag| { - const token = c.tokenizer_next(&ctokenizer); - try std.testing.expectEqual(expected_token_tag, zigToken(token.tag)); - } - const last_token = c.tokenizer_next(&ctokenizer); - try std.testing.expectEqual(Token.Tag.eof, zigToken(last_token.tag)); try std.testing.expectEqual(source.len, last_token.loc.start); try std.testing.expectEqual(source.len, last_token.loc.end); } diff --git a/zig1.c b/zig1.c index 93ba67f..1557145 100644 --- a/zig1.c +++ b/zig1.c @@ -6,7 +6,7 @@ // - code = 0: program successfully terminated. // - code = 1: panicked, panic message in msg. Caller should free msg. // - code = 2: interpreter error, error in msg. Caller should free msg. -int zig1_run(const char* program, char** msg) +int zig1Run(const char* program, char** msg) { (void)program; (void)msg; @@ -15,7 +15,7 @@ int zig1_run(const char* program, char** msg) // API: run and: // code = 3: abnormal error, expect something in stderr. -int zig1_run_file(const char* fname, char** msg) +int zig1RunFile(const char* fname, char** msg) { FILE* f = fopen(fname, "r"); if (f == NULL) { @@ -53,7 +53,7 @@ int zig1_run_file(const char* fname, char** msg) fclose(f); program[fsize] = 0; - int code = zig1_run(program, msg); + int code = zig1Run(program, msg); free(program); return code; }