diff --git a/ast.c b/ast.c index f94ddb7..f8bd3cd 100644 --- a/ast.c +++ b/ast.c @@ -2,37 +2,97 @@ #include #include #include -#include #include "ast.h" +#include "parse.h" -int ast_parse(const char* source, uint32_t len, ast *result) { +ast ast_parse(const char* source, const uint32_t len, int* err) +{ uint32_t estimated_token_count = len / 8; - tokenizer_tag* tokens_tag = calloc(estimated_token_count, sizeof(tokenizer_tag)); - if (tokens_tag == NULL) { - perror("calloc"); - return 1; - } - ast_index* tokens_start = calloc(estimated_token_count, sizeof(ast_index)); - if (tokens_start == NULL) { - free(tokens_tag); - perror("calloc"); - return 1; - } + tokenizer_tag* token_tags = NULL; + ast_index* token_starts = NULL; + ast_node_tag* nodes_tags = NULL; + ast_token_index* main_tokens = NULL; + ast_data* nodes_datas = NULL; + ast_node_index* extra_data_arr = NULL; + ast_node_index* scratch_arr = NULL; - tokenizer tokenizer = tokenizer_init(source, len); - for (uint32_t i = 0; i <= estimated_token_count; i++) { - if (i == estimated_token_count) { + if (!(token_tags = calloc(estimated_token_count, sizeof(tokenizer_tag)))) + goto err; + + if (!(token_starts = calloc(estimated_token_count, sizeof(ast_index)))) + goto err; + + tokenizer tok = tokenizer_init(source, len); + uint32_t tokens_len = 0; + for (; tokens_len <= estimated_token_count; tokens_len++) { + if (tokens_len == estimated_token_count) { fprintf(stderr, "too many tokens, bump estimated_token_count\n"); - return 1; + goto err; } - tokenizer_token token = tokenizer_next(&tokenizer); - tokens_tag[i] = token.tag; - tokens_start[i] = token.loc.start; + tokenizer_token token = tokenizer_next(&tok); + token_tags[tokens_len] = token.tag; + token_starts[tokens_len] = token.loc.start; } - /* TODO parser */ + uint32_t estimated_node_count = (tokens_len + 2) / 2; - return 0; + if (!(nodes_tags = calloc(estimated_node_count, sizeof(ast_node_tag)))) + goto err; + + if (!(main_tokens = calloc(estimated_node_count, sizeof(ast_token_index)))) + goto err; + + if (!(nodes_datas = calloc(estimated_node_count, sizeof(ast_data)))) + goto err; + + if (!(extra_data_arr = calloc(16, sizeof(ast_token_index)))) + goto err; + + if (!(scratch_arr = calloc(16, sizeof(ast_token_index)))) + goto err; + + parser p = (parser) { + .source = source, + .source_len = len, + .token_tags = token_tags, + .token_starts = token_starts, + .tokens_len = tokens_len, + .tok_i = 0, + .nodes = (ast_node_list) { + .len = 0, + .cap = estimated_node_count, + .tags = nodes_tags, + .main_tokens = main_tokens, + .datas = nodes_datas, + }, + .extra_data = (parser_node_index_slice) { .len = 0, .cap = 16, .arr = extra_data_arr }, + .scratch = (parser_node_index_slice) { .len = 0, .cap = 16, .arr = scratch_arr }, + }; + + free(scratch_arr); + + // TODO work + + return (ast) { + .source = source, + .tokens.tags = token_tags, + .tokens.starts = token_starts, + .nodes = p.nodes, + .extra_data = p.extra_data.arr, + .extra_data_len = p.extra_data.len, + }; + +err: + free(token_tags); + free(token_starts); + free(nodes_tags); + free(main_tokens); + free(nodes_datas); + free(extra_data_arr); + free(scratch_arr); + + *err = 1; + return (ast) {}; } diff --git a/ast.h b/ast.h index 443e5f8..728d274 100644 --- a/ast.h +++ b/ast.h @@ -486,26 +486,33 @@ typedef uint32_t ast_node_index; typedef uint32_t ast_index; typedef struct { - ast_node_tag tag; - ast_token_index main_token; - struct { - ast_index lhs, rhs; - } data; -} ast_node; + ast_index lhs, rhs; +} ast_data; + +typedef struct { + uint32_t len; + uint32_t cap; + ast_node_tag* tags; + ast_token_index* main_tokens; + ast_data* datas; +} ast_node_list; typedef struct { const char* source; - uint32_t source_len; + const uint32_t source_len; - tokenizer_tag* tokens_tag; - ast_index* tokens_start; - uint32_t tokens_len; + struct { + uint32_t len; + tokenizer_tag* tags; + ast_index* starts; + } tokens; + + ast_node_list nodes; - ast_node* nodes; - uint32_t nodes_len; ast_node_index* extra_data; + uint32_t extra_data_len; } ast; -int ast_parse(const char* source, uint32_t len, ast *result); +ast ast_parse(const char* source, uint32_t len, int* err); #endif diff --git a/build.zig b/build.zig index 8e36628..e1245b3 100644 --- a/build.zig +++ b/build.zig @@ -1,5 +1,8 @@ const std = @import("std"); +const c_lib = &[_][]const u8{ "tokenizer.c", "ast.c", "zig1.c" }; +const all_c_files = c_lib ++ &[_][]const u8{"main.c"}; + pub fn build(b: *std.Build) void { const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{}); @@ -9,8 +12,8 @@ pub fn build(b: *std.Build) void { .optimize = optimize, .target = target, }); - lib.addCSourceFile(.{ - .file = b.path("tokenizer.c"), + lib.addCSourceFiles(.{ + .files = c_lib, .flags = &[_][]const u8{ "-std=c11", "-Wall", @@ -35,13 +38,44 @@ pub fn build(b: *std.Build) void { lib.linkLibC(); const test_step = b.step("test", "Run unit tests"); - const test_exe = b.addTest(.{ .root_source_file = b.path("test_all.zig"), .optimize = optimize, }); test_exe.linkLibrary(lib); test_exe.addIncludePath(b.path(".")); - test_step.dependOn(&b.addRunArtifact(test_exe).step); + + const lint_step = b.step("lint", "Run linters"); + const clang_format = b.addSystemCommand(&.{"clang-format"}); + clang_format.addArgs(&.{ "--style=webkit", "-i" }); + for (all_c_files) |cfile| clang_format.addFileArg(b.path(cfile)); + lint_step.dependOn(&clang_format.step); + + const clang_analyze = b.addSystemCommand(&.{"clang"}); + clang_analyze.addArgs(&.{ + "--analyze", + "--analyzer-output", + "text", + "-Wno-unused-command-line-argument", + "-Werror", + }); + for (all_c_files) |cfile| clang_analyze.addFileArg(b.path(cfile)); + lint_step.dependOn(&clang_analyze.step); + + const gcc_analyze = b.addSystemCommand(&.{"gcc"}); + gcc_analyze.addArgs(&.{ "--analyzer", "-Werror", "-o", "/dev/null" }); + for (all_c_files) |cfile| gcc_analyze.addFileArg(b.path(cfile)); + lint_step.dependOn(&gcc_analyze.step); + + const cppcheck = b.addSystemCommand(&.{"cppcheck"}); + cppcheck.addArgs(&.{ + "--enable=all", + "--suppress=missingIncludeSystem", + "--suppress=checkersReport", + "--quiet", + "--suppress=unusedFunction", // TODO remove after plumbing is done + }); + for (all_c_files) |cfile| cppcheck.addFileArg(b.path(cfile)); + lint_step.dependOn(&cppcheck.step); } diff --git a/main.c b/main.c index 7509364..5c933f5 100644 --- a/main.c +++ b/main.c @@ -5,7 +5,7 @@ int zig1_run(char* program, char** msg); int zig1_run_file(char* fname, char** msg); -static void usage(char* argv0) +static void usage(const char* argv0) { fprintf(stderr, "Usage: %s program.zig\n", argv0); } diff --git a/parse.h b/parse.h new file mode 100644 index 0000000..765fa98 --- /dev/null +++ b/parse.h @@ -0,0 +1,30 @@ +#ifndef _ZIG1_PARSE_H__ +#define _ZIG1_PARSE_H__ + +#include +#include + +#include "ast.h" + +typedef struct { + uint32_t len; + uint32_t cap; + ast_node_index* arr; +} parser_node_index_slice; + +typedef struct { + const char* source; + const uint32_t source_len; + + tokenizer_tag* token_tags; + ast_index* token_starts; + uint32_t tokens_len; + + ast_token_index tok_i; + + ast_node_list nodes; + parser_node_index_slice extra_data; + parser_node_index_slice scratch; +} parser; + +#endif diff --git a/tokenizer.c b/tokenizer.c index 9e1bb41..a2df204 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -63,7 +63,7 @@ const keyword_map keywords[] = { }; // TODO binary search -static tokenizer_tag get_keyword(const char* bytes, uint32_t len) +static tokenizer_tag get_keyword(const char* bytes, const uint32_t len) { for (unsigned long i = 0; i < sizeof(keywords) / sizeof(keyword_map); i++) { size_t klen = strlen(keywords[i].keyword); @@ -78,12 +78,11 @@ static tokenizer_tag get_keyword(const char* bytes, uint32_t len) } else if (cmp < 0) { return TOKENIZER_TAG_INVALID; } - continue; } return TOKENIZER_TAG_INVALID; } -tokenizer tokenizer_init(const char* buffer, uint32_t len) +tokenizer tokenizer_init(const char* buffer, const uint32_t len) { return (tokenizer) { .buffer = buffer, diff --git a/tokenizer.h b/tokenizer.h index 206ab50..d1dc242 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -186,7 +186,7 @@ typedef struct { typedef struct { const char* buffer; - uint32_t buffer_len; + const uint32_t buffer_len; uint32_t index; } tokenizer; diff --git a/zig1.c b/zig1.c index 614ba16..93ba67f 100644 --- a/zig1.c +++ b/zig1.c @@ -6,11 +6,16 @@ // - code = 0: program successfully terminated. // - code = 1: panicked, panic message in msg. Caller should free msg. // - code = 2: interpreter error, error in msg. Caller should free msg. -int zig1_run(char* program, char** msg) { return 0; } +int zig1_run(const char* program, char** msg) +{ + (void)program; + (void)msg; + return 0; +} // API: run and: // code = 3: abnormal error, expect something in stderr. -int zig1_run_file(char* fname, char** msg) +int zig1_run_file(const char* fname, char** msg) { FILE* f = fopen(fname, "r"); if (f == NULL) { @@ -18,12 +23,13 @@ int zig1_run_file(char* fname, char** msg) return 3; } fseek(f, 0, SEEK_END); - long fsize = ftell(f); - if (fsize == -1) { + long fsizel = ftell(f); + if (fsizel == -1) { perror("ftell"); fclose(f); return 3; } + unsigned long fsize = (unsigned long)fsizel; fseek(f, 0, SEEK_SET); char* program = malloc(fsize + 1);