linters, some ast headers

This commit is contained in:
2024-12-18 22:34:22 +02:00
parent c2915d2eaa
commit 7361b6058d
8 changed files with 184 additions and 48 deletions

104
ast.c
View File

@@ -2,37 +2,97 @@
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h>
#include "ast.h" #include "ast.h"
#include "parse.h"
int ast_parse(const char* source, uint32_t len, ast *result) { ast ast_parse(const char* source, const uint32_t len, int* err)
{
uint32_t estimated_token_count = len / 8; uint32_t estimated_token_count = len / 8;
tokenizer_tag* tokens_tag = calloc(estimated_token_count, sizeof(tokenizer_tag)); tokenizer_tag* token_tags = NULL;
if (tokens_tag == NULL) { ast_index* token_starts = NULL;
perror("calloc"); ast_node_tag* nodes_tags = NULL;
return 1; ast_token_index* main_tokens = NULL;
} ast_data* nodes_datas = NULL;
ast_index* tokens_start = calloc(estimated_token_count, sizeof(ast_index)); ast_node_index* extra_data_arr = NULL;
if (tokens_start == NULL) { ast_node_index* scratch_arr = NULL;
free(tokens_tag);
perror("calloc");
return 1;
}
tokenizer tokenizer = tokenizer_init(source, len); if (!(token_tags = calloc(estimated_token_count, sizeof(tokenizer_tag))))
for (uint32_t i = 0; i <= estimated_token_count; i++) { goto err;
if (i == estimated_token_count) {
if (!(token_starts = calloc(estimated_token_count, sizeof(ast_index))))
goto err;
tokenizer tok = tokenizer_init(source, len);
uint32_t tokens_len = 0;
for (; tokens_len <= estimated_token_count; tokens_len++) {
if (tokens_len == estimated_token_count) {
fprintf(stderr, "too many tokens, bump estimated_token_count\n"); fprintf(stderr, "too many tokens, bump estimated_token_count\n");
return 1; goto err;
} }
tokenizer_token token = tokenizer_next(&tokenizer); tokenizer_token token = tokenizer_next(&tok);
tokens_tag[i] = token.tag; token_tags[tokens_len] = token.tag;
tokens_start[i] = token.loc.start; token_starts[tokens_len] = token.loc.start;
} }
/* TODO parser */ uint32_t estimated_node_count = (tokens_len + 2) / 2;
return 0; if (!(nodes_tags = calloc(estimated_node_count, sizeof(ast_node_tag))))
goto err;
if (!(main_tokens = calloc(estimated_node_count, sizeof(ast_token_index))))
goto err;
if (!(nodes_datas = calloc(estimated_node_count, sizeof(ast_data))))
goto err;
if (!(extra_data_arr = calloc(16, sizeof(ast_token_index))))
goto err;
if (!(scratch_arr = calloc(16, sizeof(ast_token_index))))
goto err;
parser p = (parser) {
.source = source,
.source_len = len,
.token_tags = token_tags,
.token_starts = token_starts,
.tokens_len = tokens_len,
.tok_i = 0,
.nodes = (ast_node_list) {
.len = 0,
.cap = estimated_node_count,
.tags = nodes_tags,
.main_tokens = main_tokens,
.datas = nodes_datas,
},
.extra_data = (parser_node_index_slice) { .len = 0, .cap = 16, .arr = extra_data_arr },
.scratch = (parser_node_index_slice) { .len = 0, .cap = 16, .arr = scratch_arr },
};
free(scratch_arr);
// TODO work
return (ast) {
.source = source,
.tokens.tags = token_tags,
.tokens.starts = token_starts,
.nodes = p.nodes,
.extra_data = p.extra_data.arr,
.extra_data_len = p.extra_data.len,
};
err:
free(token_tags);
free(token_starts);
free(nodes_tags);
free(main_tokens);
free(nodes_datas);
free(extra_data_arr);
free(scratch_arr);
*err = 1;
return (ast) {};
} }

31
ast.h
View File

@@ -486,26 +486,33 @@ typedef uint32_t ast_node_index;
typedef uint32_t ast_index; typedef uint32_t ast_index;
typedef struct { typedef struct {
ast_node_tag tag;
ast_token_index main_token;
struct {
ast_index lhs, rhs; ast_index lhs, rhs;
} data; } ast_data;
} ast_node;
typedef struct {
uint32_t len;
uint32_t cap;
ast_node_tag* tags;
ast_token_index* main_tokens;
ast_data* datas;
} ast_node_list;
typedef struct { typedef struct {
const char* source; const char* source;
uint32_t source_len; const uint32_t source_len;
tokenizer_tag* tokens_tag; struct {
ast_index* tokens_start; uint32_t len;
uint32_t tokens_len; tokenizer_tag* tags;
ast_index* starts;
} tokens;
ast_node_list nodes;
ast_node* nodes;
uint32_t nodes_len;
ast_node_index* extra_data; ast_node_index* extra_data;
uint32_t extra_data_len;
} ast; } ast;
int ast_parse(const char* source, uint32_t len, ast *result); ast ast_parse(const char* source, uint32_t len, int* err);
#endif #endif

View File

@@ -1,5 +1,8 @@
const std = @import("std"); const std = @import("std");
const c_lib = &[_][]const u8{ "tokenizer.c", "ast.c", "zig1.c" };
const all_c_files = c_lib ++ &[_][]const u8{"main.c"};
pub fn build(b: *std.Build) void { pub fn build(b: *std.Build) void {
const target = b.standardTargetOptions(.{}); const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{}); const optimize = b.standardOptimizeOption(.{});
@@ -9,8 +12,8 @@ pub fn build(b: *std.Build) void {
.optimize = optimize, .optimize = optimize,
.target = target, .target = target,
}); });
lib.addCSourceFile(.{ lib.addCSourceFiles(.{
.file = b.path("tokenizer.c"), .files = c_lib,
.flags = &[_][]const u8{ .flags = &[_][]const u8{
"-std=c11", "-std=c11",
"-Wall", "-Wall",
@@ -35,13 +38,44 @@ pub fn build(b: *std.Build) void {
lib.linkLibC(); lib.linkLibC();
const test_step = b.step("test", "Run unit tests"); const test_step = b.step("test", "Run unit tests");
const test_exe = b.addTest(.{ const test_exe = b.addTest(.{
.root_source_file = b.path("test_all.zig"), .root_source_file = b.path("test_all.zig"),
.optimize = optimize, .optimize = optimize,
}); });
test_exe.linkLibrary(lib); test_exe.linkLibrary(lib);
test_exe.addIncludePath(b.path(".")); test_exe.addIncludePath(b.path("."));
test_step.dependOn(&b.addRunArtifact(test_exe).step); test_step.dependOn(&b.addRunArtifact(test_exe).step);
const lint_step = b.step("lint", "Run linters");
const clang_format = b.addSystemCommand(&.{"clang-format"});
clang_format.addArgs(&.{ "--style=webkit", "-i" });
for (all_c_files) |cfile| clang_format.addFileArg(b.path(cfile));
lint_step.dependOn(&clang_format.step);
const clang_analyze = b.addSystemCommand(&.{"clang"});
clang_analyze.addArgs(&.{
"--analyze",
"--analyzer-output",
"text",
"-Wno-unused-command-line-argument",
"-Werror",
});
for (all_c_files) |cfile| clang_analyze.addFileArg(b.path(cfile));
lint_step.dependOn(&clang_analyze.step);
const gcc_analyze = b.addSystemCommand(&.{"gcc"});
gcc_analyze.addArgs(&.{ "--analyzer", "-Werror", "-o", "/dev/null" });
for (all_c_files) |cfile| gcc_analyze.addFileArg(b.path(cfile));
lint_step.dependOn(&gcc_analyze.step);
const cppcheck = b.addSystemCommand(&.{"cppcheck"});
cppcheck.addArgs(&.{
"--enable=all",
"--suppress=missingIncludeSystem",
"--suppress=checkersReport",
"--quiet",
"--suppress=unusedFunction", // TODO remove after plumbing is done
});
for (all_c_files) |cfile| cppcheck.addFileArg(b.path(cfile));
lint_step.dependOn(&cppcheck.step);
} }

2
main.c
View File

@@ -5,7 +5,7 @@
int zig1_run(char* program, char** msg); int zig1_run(char* program, char** msg);
int zig1_run_file(char* fname, char** msg); int zig1_run_file(char* fname, char** msg);
static void usage(char* argv0) static void usage(const char* argv0)
{ {
fprintf(stderr, "Usage: %s program.zig\n", argv0); fprintf(stderr, "Usage: %s program.zig\n", argv0);
} }

30
parse.h Normal file
View File

@@ -0,0 +1,30 @@
#ifndef _ZIG1_PARSE_H__
#define _ZIG1_PARSE_H__
#include <stdbool.h>
#include <stdint.h>
#include "ast.h"
typedef struct {
uint32_t len;
uint32_t cap;
ast_node_index* arr;
} parser_node_index_slice;
typedef struct {
const char* source;
const uint32_t source_len;
tokenizer_tag* token_tags;
ast_index* token_starts;
uint32_t tokens_len;
ast_token_index tok_i;
ast_node_list nodes;
parser_node_index_slice extra_data;
parser_node_index_slice scratch;
} parser;
#endif

View File

@@ -63,7 +63,7 @@ const keyword_map keywords[] = {
}; };
// TODO binary search // TODO binary search
static tokenizer_tag get_keyword(const char* bytes, uint32_t len) static tokenizer_tag get_keyword(const char* bytes, const uint32_t len)
{ {
for (unsigned long i = 0; i < sizeof(keywords) / sizeof(keyword_map); i++) { for (unsigned long i = 0; i < sizeof(keywords) / sizeof(keyword_map); i++) {
size_t klen = strlen(keywords[i].keyword); size_t klen = strlen(keywords[i].keyword);
@@ -78,12 +78,11 @@ static tokenizer_tag get_keyword(const char* bytes, uint32_t len)
} else if (cmp < 0) { } else if (cmp < 0) {
return TOKENIZER_TAG_INVALID; return TOKENIZER_TAG_INVALID;
} }
continue;
} }
return TOKENIZER_TAG_INVALID; return TOKENIZER_TAG_INVALID;
} }
tokenizer tokenizer_init(const char* buffer, uint32_t len) tokenizer tokenizer_init(const char* buffer, const uint32_t len)
{ {
return (tokenizer) { return (tokenizer) {
.buffer = buffer, .buffer = buffer,

View File

@@ -186,7 +186,7 @@ typedef struct {
typedef struct { typedef struct {
const char* buffer; const char* buffer;
uint32_t buffer_len; const uint32_t buffer_len;
uint32_t index; uint32_t index;
} tokenizer; } tokenizer;

14
zig1.c
View File

@@ -6,11 +6,16 @@
// - code = 0: program successfully terminated. // - code = 0: program successfully terminated.
// - code = 1: panicked, panic message in msg. Caller should free msg. // - code = 1: panicked, panic message in msg. Caller should free msg.
// - code = 2: interpreter error, error in msg. Caller should free msg. // - code = 2: interpreter error, error in msg. Caller should free msg.
int zig1_run(char* program, char** msg) { return 0; } int zig1_run(const char* program, char** msg)
{
(void)program;
(void)msg;
return 0;
}
// API: run and: // API: run and:
// code = 3: abnormal error, expect something in stderr. // code = 3: abnormal error, expect something in stderr.
int zig1_run_file(char* fname, char** msg) int zig1_run_file(const char* fname, char** msg)
{ {
FILE* f = fopen(fname, "r"); FILE* f = fopen(fname, "r");
if (f == NULL) { if (f == NULL) {
@@ -18,12 +23,13 @@ int zig1_run_file(char* fname, char** msg)
return 3; return 3;
} }
fseek(f, 0, SEEK_END); fseek(f, 0, SEEK_END);
long fsize = ftell(f); long fsizel = ftell(f);
if (fsize == -1) { if (fsizel == -1) {
perror("ftell"); perror("ftell");
fclose(f); fclose(f);
return 3; return 3;
} }
unsigned long fsize = (unsigned long)fsizel;
fseek(f, 0, SEEK_SET); fseek(f, 0, SEEK_SET);
char* program = malloc(fsize + 1); char* program = malloc(fsize + 1);