linters, some ast headers

2024-12-18 22:34:22 +02:00
parent c2915d2eaa
commit 7361b6058d
8 changed files with 184 additions and 48 deletions
--- a/ast.c
+++ b/ast.c
@@ -2,37 +2,97 @@
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "ast.h"
 #include "parse.h"
-int ast_parse(const char* source, uint32_t len, ast *result) {
+ast ast_parse(const char* source, const uint32_t len, int* err)
 {
    uint32_t estimated_token_count = len / 8;
-    tokenizer_tag* tokens_tag = calloc(estimated_token_count, sizeof(tokenizer_tag));
+    tokenizer_tag* token_tags = NULL;
-    if (tokens_tag == NULL) {
+    ast_index* token_starts = NULL;
-        perror("calloc");
+    ast_node_tag* nodes_tags = NULL;
-        return 1;
+    ast_token_index* main_tokens = NULL;
-    }
+    ast_data* nodes_datas = NULL;
-    ast_index* tokens_start = calloc(estimated_token_count, sizeof(ast_index));
+    ast_node_index* extra_data_arr = NULL;
-    if (tokens_start == NULL) {
+    ast_node_index* scratch_arr = NULL;
        free(tokens_tag);
        perror("calloc");
        return 1;
    }
-    tokenizer tokenizer = tokenizer_init(source, len);
+    if (!(token_tags = calloc(estimated_token_count, sizeof(tokenizer_tag))))
-    for (uint32_t i = 0; i <= estimated_token_count; i++) {
+        goto err;
-        if (i == estimated_token_count) {
+
    if (!(token_starts = calloc(estimated_token_count, sizeof(ast_index))))
        goto err;
    tokenizer tok = tokenizer_init(source, len);
    uint32_t tokens_len = 0;
    for (; tokens_len <= estimated_token_count; tokens_len++) {
        if (tokens_len == estimated_token_count) {
            fprintf(stderr, "too many tokens, bump estimated_token_count\n");
-            return 1;
+            goto err;
        }
-        tokenizer_token token = tokenizer_next(&tokenizer);
+        tokenizer_token token = tokenizer_next(&tok);
-        tokens_tag[i] = token.tag;
+        token_tags[tokens_len] = token.tag;
-        tokens_start[i] = token.loc.start;
+        token_starts[tokens_len] = token.loc.start;
    }
-    /* TODO parser */
+    uint32_t estimated_node_count = (tokens_len + 2) / 2;
-    return 0;
+    if (!(nodes_tags = calloc(estimated_node_count, sizeof(ast_node_tag))))
        goto err;
    if (!(main_tokens = calloc(estimated_node_count, sizeof(ast_token_index))))
        goto err;
    if (!(nodes_datas = calloc(estimated_node_count, sizeof(ast_data))))
        goto err;
    if (!(extra_data_arr = calloc(16, sizeof(ast_token_index))))
        goto err;
    if (!(scratch_arr = calloc(16, sizeof(ast_token_index))))
        goto err;
    parser p = (parser) {
        .source = source,
        .source_len = len,
        .token_tags = token_tags,
        .token_starts = token_starts,
        .tokens_len = tokens_len,
        .tok_i = 0,
        .nodes = (ast_node_list) {
            .len = 0,
            .cap = estimated_node_count,
            .tags = nodes_tags,
            .main_tokens = main_tokens,
            .datas = nodes_datas,
        },
        .extra_data = (parser_node_index_slice) { .len = 0, .cap = 16, .arr = extra_data_arr },
        .scratch = (parser_node_index_slice) { .len = 0, .cap = 16, .arr = scratch_arr },
    };
    free(scratch_arr);
    // TODO work
    return (ast) {
        .source = source,
        .tokens.tags = token_tags,
        .tokens.starts = token_starts,
        .nodes = p.nodes,
        .extra_data = p.extra_data.arr,
        .extra_data_len = p.extra_data.len,
    };
 err:
    free(token_tags);
    free(token_starts);
    free(nodes_tags);
    free(main_tokens);
    free(nodes_datas);
    free(extra_data_arr);
    free(scratch_arr);
    *err = 1;
    return (ast) {};
 }
--- a/ast.h
+++ b/ast.h
@@ -486,26 +486,33 @@ typedef uint32_t ast_node_index;
 typedef uint32_t ast_index;
 typedef struct {
    ast_node_tag tag;
    ast_token_index main_token;
    struct {
    ast_index lhs, rhs;
-    } data;
+} ast_data;
-} ast_node;
+
 typedef struct {
    uint32_t len;
    uint32_t cap;
    ast_node_tag* tags;
    ast_token_index* main_tokens;
    ast_data* datas;
 } ast_node_list;
 typedef struct {
    const char* source;
-    uint32_t source_len;
+    const uint32_t source_len;
-    tokenizer_tag* tokens_tag;
+    struct {
-    ast_index* tokens_start;
+        uint32_t len;
-    uint32_t tokens_len;
+        tokenizer_tag* tags;
        ast_index* starts;
    } tokens;
    ast_node_list nodes;
    ast_node* nodes;
    uint32_t nodes_len;
    ast_node_index* extra_data;
    uint32_t extra_data_len;
 } ast;
-int ast_parse(const char* source, uint32_t len, ast *result);
+ast ast_parse(const char* source, uint32_t len, int* err);
 #endif
--- a/build.zig
+++ b/build.zig
@@ -1,5 +1,8 @@
 const std = @import("std");
 const c_lib = &[_][]const u8{ "tokenizer.c", "ast.c", "zig1.c" };
 const all_c_files = c_lib ++ &[_][]const u8{"main.c"};
 pub fn build(b: *std.Build) void {
    const target = b.standardTargetOptions(.{});
    const optimize = b.standardOptimizeOption(.{});
@@ -9,8 +12,8 @@ pub fn build(b: *std.Build) void {
        .optimize = optimize,
        .target = target,
    });
-    lib.addCSourceFile(.{
+    lib.addCSourceFiles(.{
-        .file = b.path("tokenizer.c"),
+        .files = c_lib,
        .flags = &[_][]const u8{
            "-std=c11",
            "-Wall",
@@ -35,13 +38,44 @@ pub fn build(b: *std.Build) void {
    lib.linkLibC();
    const test_step = b.step("test", "Run unit tests");
    const test_exe = b.addTest(.{
        .root_source_file = b.path("test_all.zig"),
        .optimize = optimize,
    });
    test_exe.linkLibrary(lib);
    test_exe.addIncludePath(b.path("."));
    test_step.dependOn(&b.addRunArtifact(test_exe).step);
    const lint_step = b.step("lint", "Run linters");
    const clang_format = b.addSystemCommand(&.{"clang-format"});
    clang_format.addArgs(&.{ "--style=webkit", "-i" });
    for (all_c_files) |cfile| clang_format.addFileArg(b.path(cfile));
    lint_step.dependOn(&clang_format.step);
    const clang_analyze = b.addSystemCommand(&.{"clang"});
    clang_analyze.addArgs(&.{
        "--analyze",
        "--analyzer-output",
        "text",
        "-Wno-unused-command-line-argument",
        "-Werror",
    });
    for (all_c_files) |cfile| clang_analyze.addFileArg(b.path(cfile));
    lint_step.dependOn(&clang_analyze.step);
    const gcc_analyze = b.addSystemCommand(&.{"gcc"});
    gcc_analyze.addArgs(&.{ "--analyzer", "-Werror", "-o", "/dev/null" });
    for (all_c_files) |cfile| gcc_analyze.addFileArg(b.path(cfile));
    lint_step.dependOn(&gcc_analyze.step);
    const cppcheck = b.addSystemCommand(&.{"cppcheck"});
    cppcheck.addArgs(&.{
        "--enable=all",
        "--suppress=missingIncludeSystem",
        "--suppress=checkersReport",
        "--quiet",
        "--suppress=unusedFunction", // TODO remove after plumbing is done
    });
    for (all_c_files) |cfile| cppcheck.addFileArg(b.path(cfile));
    lint_step.dependOn(&cppcheck.step);
 }
--- a/main.c
+++ b/main.c
@@ -5,7 +5,7 @@
 int zig1_run(char* program, char** msg);
 int zig1_run_file(char* fname, char** msg);
-static void usage(char* argv0)
+static void usage(const char* argv0)
 {
    fprintf(stderr, "Usage: %s program.zig\n", argv0);
 }
--- a/parse.h
+++ b/parse.h
@@ -0,0 +1,30 @@
 #ifndef _ZIG1_PARSE_H__
 #define _ZIG1_PARSE_H__
 #include <stdbool.h>
 #include <stdint.h>
 #include "ast.h"
 typedef struct {
    uint32_t len;
    uint32_t cap;
    ast_node_index* arr;
 } parser_node_index_slice;
 typedef struct {
    const char* source;
    const uint32_t source_len;
    tokenizer_tag* token_tags;
    ast_index* token_starts;
    uint32_t tokens_len;
    ast_token_index tok_i;
    ast_node_list nodes;
    parser_node_index_slice extra_data;
    parser_node_index_slice scratch;
 } parser;
 #endif
--- a/tokenizer.c
+++ b/tokenizer.c
@@ -63,7 +63,7 @@ const keyword_map keywords[] = {
 };
 // TODO binary search
-static tokenizer_tag get_keyword(const char* bytes, uint32_t len)
+static tokenizer_tag get_keyword(const char* bytes, const uint32_t len)
 {
    for (unsigned long i = 0; i < sizeof(keywords) / sizeof(keyword_map); i++) {
        size_t klen = strlen(keywords[i].keyword);
@@ -78,12 +78,11 @@ static tokenizer_tag get_keyword(const char* bytes, uint32_t len)
        } else if (cmp < 0) {
            return TOKENIZER_TAG_INVALID;
        }
        continue;
    }
    return TOKENIZER_TAG_INVALID;
 }
-tokenizer tokenizer_init(const char* buffer, uint32_t len)
+tokenizer tokenizer_init(const char* buffer, const uint32_t len)
 {
    return (tokenizer) {
        .buffer = buffer,
--- a/tokenizer.h
+++ b/tokenizer.h
@@ -186,7 +186,7 @@ typedef struct {
 typedef struct {
    const char* buffer;
-    uint32_t buffer_len;
+    const uint32_t buffer_len;
    uint32_t index;
 } tokenizer;
--- a/zig1.c
+++ b/zig1.c
@@ -6,11 +6,16 @@
 // - code = 0: program successfully terminated.
 // - code = 1: panicked, panic message in msg. Caller should free msg.
 // - code = 2: interpreter error, error in msg. Caller should free msg.
-int zig1_run(char* program, char** msg) { return 0; }
+int zig1_run(const char* program, char** msg)
 {
    (void)program;
    (void)msg;
    return 0;
 }
 // API: run and:
 // code = 3: abnormal error, expect something in stderr.
-int zig1_run_file(char* fname, char** msg)
+int zig1_run_file(const char* fname, char** msg)
 {
    FILE* f = fopen(fname, "r");
    if (f == NULL) {
@@ -18,12 +23,13 @@ int zig1_run_file(char* fname, char** msg)
        return 3;
    }
    fseek(f, 0, SEEK_END);
-    long fsize = ftell(f);
+    long fsizel = ftell(f);
-    if (fsize == -1) {
+    if (fsizel == -1) {
        perror("ftell");
        fclose(f);
        return 3;
    }
    unsigned long fsize = (unsigned long)fsizel;
    fseek(f, 0, SEEK_SET);
    char* program = malloc(fsize + 1);