commit 7361b6058dc13619a3a5b51f3c818ea5084c04b4 (tree)
parent c2915d2eaa7dfe79d219505ee9f750a195f86673
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Wed, 18 Dec 2024 22:34:22 +0200
linters, some ast headers
Diffstat:
| M | ast.c | | | 104 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------- |
| M | ast.h | | | 33 | ++++++++++++++++++++------------- |
| M | build.zig | | | 42 | ++++++++++++++++++++++++++++++++++++++---- |
| M | main.c | | | 2 | +- |
| A | parse.h | | | 30 | ++++++++++++++++++++++++++++++ |
| M | tokenizer.c | | | 5 | ++--- |
| M | tokenizer.h | | | 2 | +- |
| M | zig1.c | | | 14 | ++++++++++---- |
8 files changed, 184 insertions(+), 48 deletions(-)
diff --git a/ast.c b/ast.c
@@ -2,37 +2,97 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
-#include <string.h>
#include "ast.h"
+#include "parse.h"
-int ast_parse(const char* source, uint32_t len, ast *result) {
+ast ast_parse(const char* source, const uint32_t len, int* err)
+{
uint32_t estimated_token_count = len / 8;
- tokenizer_tag* tokens_tag = calloc(estimated_token_count, sizeof(tokenizer_tag));
- if (tokens_tag == NULL) {
- perror("calloc");
- return 1;
- }
- ast_index* tokens_start = calloc(estimated_token_count, sizeof(ast_index));
- if (tokens_start == NULL) {
- free(tokens_tag);
- perror("calloc");
- return 1;
- }
+ tokenizer_tag* token_tags = NULL;
+ ast_index* token_starts = NULL;
+ ast_node_tag* nodes_tags = NULL;
+ ast_token_index* main_tokens = NULL;
+ ast_data* nodes_datas = NULL;
+ ast_node_index* extra_data_arr = NULL;
+ ast_node_index* scratch_arr = NULL;
+
+ if (!(token_tags = calloc(estimated_token_count, sizeof(tokenizer_tag))))
+ goto err;
+
+ if (!(token_starts = calloc(estimated_token_count, sizeof(ast_index))))
+ goto err;
- tokenizer tokenizer = tokenizer_init(source, len);
- for (uint32_t i = 0; i <= estimated_token_count; i++) {
- if (i == estimated_token_count) {
+ tokenizer tok = tokenizer_init(source, len);
+ uint32_t tokens_len = 0;
+ for (; tokens_len <= estimated_token_count; tokens_len++) {
+ if (tokens_len == estimated_token_count) {
fprintf(stderr, "too many tokens, bump estimated_token_count\n");
- return 1;
+ goto err;
}
- tokenizer_token token = tokenizer_next(&tokenizer);
- tokens_tag[i] = token.tag;
- tokens_start[i] = token.loc.start;
+ tokenizer_token token = tokenizer_next(&tok);
+ token_tags[tokens_len] = token.tag;
+ token_starts[tokens_len] = token.loc.start;
}
- /* TODO parser */
+ uint32_t estimated_node_count = (tokens_len + 2) / 2;
+
+ if (!(nodes_tags = calloc(estimated_node_count, sizeof(ast_node_tag))))
+ goto err;
+
+ if (!(main_tokens = calloc(estimated_node_count, sizeof(ast_token_index))))
+ goto err;
+
+ if (!(nodes_datas = calloc(estimated_node_count, sizeof(ast_data))))
+ goto err;
+
+ if (!(extra_data_arr = calloc(16, sizeof(ast_token_index))))
+ goto err;
+
+ if (!(scratch_arr = calloc(16, sizeof(ast_token_index))))
+ goto err;
+
+ parser p = (parser) {
+ .source = source,
+ .source_len = len,
+ .token_tags = token_tags,
+ .token_starts = token_starts,
+ .tokens_len = tokens_len,
+ .tok_i = 0,
+ .nodes = (ast_node_list) {
+ .len = 0,
+ .cap = estimated_node_count,
+ .tags = nodes_tags,
+ .main_tokens = main_tokens,
+ .datas = nodes_datas,
+ },
+ .extra_data = (parser_node_index_slice) { .len = 0, .cap = 16, .arr = extra_data_arr },
+ .scratch = (parser_node_index_slice) { .len = 0, .cap = 16, .arr = scratch_arr },
+ };
+
+ free(scratch_arr);
+
+ // TODO work
+
+ return (ast) {
+ .source = source,
+ .tokens.tags = token_tags,
+ .tokens.starts = token_starts,
+ .nodes = p.nodes,
+ .extra_data = p.extra_data.arr,
+ .extra_data_len = p.extra_data.len,
+ };
+
+err:
+ free(token_tags);
+ free(token_starts);
+ free(nodes_tags);
+ free(main_tokens);
+ free(nodes_datas);
+ free(extra_data_arr);
+ free(scratch_arr);
- return 0;
+ *err = 1;
+ return (ast) {};
}
diff --git a/ast.h b/ast.h
@@ -486,26 +486,33 @@ typedef uint32_t ast_node_index;
typedef uint32_t ast_index;
typedef struct {
- ast_node_tag tag;
- ast_token_index main_token;
- struct {
- ast_index lhs, rhs;
- } data;
-} ast_node;
+ ast_index lhs, rhs;
+} ast_data;
+
+typedef struct {
+ uint32_t len;
+ uint32_t cap;
+ ast_node_tag* tags;
+ ast_token_index* main_tokens;
+ ast_data* datas;
+} ast_node_list;
typedef struct {
const char* source;
- uint32_t source_len;
+ const uint32_t source_len;
+
+ struct {
+ uint32_t len;
+ tokenizer_tag* tags;
+ ast_index* starts;
+ } tokens;
- tokenizer_tag* tokens_tag;
- ast_index* tokens_start;
- uint32_t tokens_len;
+ ast_node_list nodes;
- ast_node* nodes;
- uint32_t nodes_len;
ast_node_index* extra_data;
+ uint32_t extra_data_len;
} ast;
-int ast_parse(const char* source, uint32_t len, ast *result);
+ast ast_parse(const char* source, uint32_t len, int* err);
#endif
diff --git a/build.zig b/build.zig
@@ -1,5 +1,8 @@
const std = @import("std");
+const c_lib = &[_][]const u8{ "tokenizer.c", "ast.c", "zig1.c" };
+const all_c_files = c_lib ++ &[_][]const u8{"main.c"};
+
pub fn build(b: *std.Build) void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
@@ -9,8 +12,8 @@ pub fn build(b: *std.Build) void {
.optimize = optimize,
.target = target,
});
- lib.addCSourceFile(.{
- .file = b.path("tokenizer.c"),
+ lib.addCSourceFiles(.{
+ .files = c_lib,
.flags = &[_][]const u8{
"-std=c11",
"-Wall",
@@ -35,13 +38,44 @@ pub fn build(b: *std.Build) void {
lib.linkLibC();
const test_step = b.step("test", "Run unit tests");
-
const test_exe = b.addTest(.{
.root_source_file = b.path("test_all.zig"),
.optimize = optimize,
});
test_exe.linkLibrary(lib);
test_exe.addIncludePath(b.path("."));
-
test_step.dependOn(&b.addRunArtifact(test_exe).step);
+
+ const lint_step = b.step("lint", "Run linters");
+ const clang_format = b.addSystemCommand(&.{"clang-format"});
+ clang_format.addArgs(&.{ "--style=webkit", "-i" });
+ for (all_c_files) |cfile| clang_format.addFileArg(b.path(cfile));
+ lint_step.dependOn(&clang_format.step);
+
+ const clang_analyze = b.addSystemCommand(&.{"clang"});
+ clang_analyze.addArgs(&.{
+ "--analyze",
+ "--analyzer-output",
+ "text",
+ "-Wno-unused-command-line-argument",
+ "-Werror",
+ });
+ for (all_c_files) |cfile| clang_analyze.addFileArg(b.path(cfile));
+ lint_step.dependOn(&clang_analyze.step);
+
+ const gcc_analyze = b.addSystemCommand(&.{"gcc"});
+ gcc_analyze.addArgs(&.{ "--analyzer", "-Werror", "-o", "/dev/null" });
+ for (all_c_files) |cfile| gcc_analyze.addFileArg(b.path(cfile));
+ lint_step.dependOn(&gcc_analyze.step);
+
+ const cppcheck = b.addSystemCommand(&.{"cppcheck"});
+ cppcheck.addArgs(&.{
+ "--enable=all",
+ "--suppress=missingIncludeSystem",
+ "--suppress=checkersReport",
+ "--quiet",
+ "--suppress=unusedFunction", // TODO remove after plumbing is done
+ });
+ for (all_c_files) |cfile| cppcheck.addFileArg(b.path(cfile));
+ lint_step.dependOn(&cppcheck.step);
}
diff --git a/main.c b/main.c
@@ -5,7 +5,7 @@
int zig1_run(char* program, char** msg);
int zig1_run_file(char* fname, char** msg);
-static void usage(char* argv0)
+static void usage(const char* argv0)
{
fprintf(stderr, "Usage: %s program.zig\n", argv0);
}
diff --git a/parse.h b/parse.h
@@ -0,0 +1,30 @@
+#ifndef _ZIG1_PARSE_H__
+#define _ZIG1_PARSE_H__
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "ast.h"
+
+typedef struct {
+ uint32_t len;
+ uint32_t cap;
+ ast_node_index* arr;
+} parser_node_index_slice;
+
+typedef struct {
+ const char* source;
+ const uint32_t source_len;
+
+ tokenizer_tag* token_tags;
+ ast_index* token_starts;
+ uint32_t tokens_len;
+
+ ast_token_index tok_i;
+
+ ast_node_list nodes;
+ parser_node_index_slice extra_data;
+ parser_node_index_slice scratch;
+} parser;
+
+#endif
diff --git a/tokenizer.c b/tokenizer.c
@@ -63,7 +63,7 @@ const keyword_map keywords[] = {
};
// TODO binary search
-static tokenizer_tag get_keyword(const char* bytes, uint32_t len)
+static tokenizer_tag get_keyword(const char* bytes, const uint32_t len)
{
for (unsigned long i = 0; i < sizeof(keywords) / sizeof(keyword_map); i++) {
size_t klen = strlen(keywords[i].keyword);
@@ -78,12 +78,11 @@ static tokenizer_tag get_keyword(const char* bytes, uint32_t len)
} else if (cmp < 0) {
return TOKENIZER_TAG_INVALID;
}
- continue;
}
return TOKENIZER_TAG_INVALID;
}
-tokenizer tokenizer_init(const char* buffer, uint32_t len)
+tokenizer tokenizer_init(const char* buffer, const uint32_t len)
{
return (tokenizer) {
.buffer = buffer,
diff --git a/tokenizer.h b/tokenizer.h
@@ -186,7 +186,7 @@ typedef struct {
typedef struct {
const char* buffer;
- uint32_t buffer_len;
+ const uint32_t buffer_len;
uint32_t index;
} tokenizer;
diff --git a/zig1.c b/zig1.c
@@ -6,11 +6,16 @@
// - code = 0: program successfully terminated.
// - code = 1: panicked, panic message in msg. Caller should free msg.
// - code = 2: interpreter error, error in msg. Caller should free msg.
-int zig1_run(char* program, char** msg) { return 0; }
+int zig1_run(const char* program, char** msg)
+{
+ (void)program;
+ (void)msg;
+ return 0;
+}
// API: run and:
// code = 3: abnormal error, expect something in stderr.
-int zig1_run_file(char* fname, char** msg)
+int zig1_run_file(const char* fname, char** msg)
{
FILE* f = fopen(fname, "r");
if (f == NULL) {
@@ -18,12 +23,13 @@ int zig1_run_file(char* fname, char** msg)
return 3;
}
fseek(f, 0, SEEK_END);
- long fsize = ftell(f);
- if (fsize == -1) {
+ long fsizel = ftell(f);
+ if (fsizel == -1) {
perror("ftell");
fclose(f);
return 3;
}
+ unsigned long fsize = (unsigned long)fsizel;
fseek(f, 0, SEEK_SET);
char* program = malloc(fsize + 1);