This commit is contained in:
2024-12-22 22:40:01 +02:00
parent db35aa7722
commit a9c4b5c7ab
7 changed files with 253 additions and 205 deletions

104
ast.c
View File

@@ -4,85 +4,77 @@
#include <stdlib.h> #include <stdlib.h>
#include "ast.h" #include "ast.h"
#include "common.h"
#include "parser.h" #include "parser.h"
#define N 1024 #define N 1024
ast astParse(const char* source, const uint32_t len) Ast astParse(const char* source, const uint32_t len)
{ {
uint32_t estimated_token_count = len / 8; uint32_t estimated_token_count = len / 8;
TokenizerTag* token_tags = NULL; // Initialize token list
AstIndex* token_starts = NULL; AstTokenList tokens = {
AstNodeTag* nodes_tags = NULL; .len = 0,
AstTokenIndex* main_tokens = NULL; .cap = estimated_token_count,
AstData* nodes_datas = NULL; .tags = SLICE_INIT(TokenizerTag, estimated_token_count),
AstNodeIndex* extra_data_arr = NULL; .starts = SLICE_INIT(AstIndex, estimated_token_count)
AstNodeIndex* scratch_arr = NULL; };
if (!(token_tags = calloc(estimated_token_count, sizeof(TokenizerTag))))
exit(1);
if (!(token_starts = calloc(estimated_token_count, sizeof(AstIndex))))
exit(1);
// Tokenize
Tokenizer tok = tokenizerInit(source, len); Tokenizer tok = tokenizerInit(source, len);
uint32_t tokens_len = 0; while (true) {
for (; tokens_len <= estimated_token_count; tokens_len++) { if (tokens.len >= tokens.cap) {
if (tokens_len == estimated_token_count) {
fprintf(stderr, "too many tokens, bump estimated_token_count\n"); fprintf(stderr, "too many tokens, bump estimated_token_count\n");
exit(1); exit(1);
} }
TokenizerToken token = tokenizerNext(&tok); TokenizerToken token = tokenizerNext(&tok);
token_tags[tokens_len] = token.tag; tokens.tags[tokens.len] = token.tag;
token_starts[tokens_len] = token.loc.start; tokens.starts[tokens.len] = token.loc.start;
tokens.len++;
if (token.tag == TOKENIZER_TAG_EOF)
break;
} }
uint32_t estimated_node_count = (tokens_len + 2) / 2; // Initialize node list
uint32_t estimated_node_count = (tokens.len + 2) / 2;
if (!(nodes_tags = calloc(estimated_node_count, sizeof(AstNodeTag)))) AstNodeList nodes = {
exit(1);
if (!(main_tokens = calloc(estimated_node_count, sizeof(AstTokenIndex))))
exit(1);
if (!(nodes_datas = calloc(estimated_node_count, sizeof(AstData))))
exit(1);
if (!(extra_data_arr = calloc(N, sizeof(AstNodeIndex))))
exit(1);
if (!(scratch_arr = calloc(N, sizeof(AstNodeIndex))))
exit(1);
Parser p = (Parser) {
.source = source,
.source_len = len,
.token_tags = token_tags,
.token_starts = token_starts,
.tokens_len = tokens_len,
.tok_i = 0,
.nodes = (AstNodeList) {
.len = 0, .len = 0,
.cap = estimated_node_count, .cap = estimated_node_count,
.tags = nodes_tags, .tags = SLICE_INIT(AstNodeTag, estimated_node_count),
.main_tokens = main_tokens, .main_tokens = SLICE_INIT(AstTokenIndex, estimated_node_count),
.datas = nodes_datas, .datas = SLICE_INIT(AstData, estimated_node_count)
},
.extra_data = (ParserNodeIndexSlice) { .len = 0, .cap = N, .arr = extra_data_arr },
.scratch = (ParserNodeIndexSlice) { .len = 0, .cap = N, .arr = scratch_arr },
}; };
free(scratch_arr); // Initialize parser
Parser p = {
.source = source,
.source_len = len,
.token_tags = tokens.tags,
.token_starts = tokens.starts,
.tokens_len = tokens.len,
.tok_i = 0,
.nodes = nodes,
.extra_data = {
.len = 0,
.cap = N,
.arr = SLICE_INIT(AstNodeIndex, N) },
.scratch = { .len = 0, .cap = N, .arr = SLICE_INIT(AstNodeIndex, N) }
};
free(p.scratch.arr); // Parser takes ownership
parseRoot(&p); parseRoot(&p);
return (ast) { return (Ast) {
.source = source, .source = source,
.tokens.tags = token_tags, .source_len = len,
.tokens.starts = token_starts, .tokens = tokens,
.nodes = p.nodes, .nodes = p.nodes,
.extra_data = p.extra_data.arr, .extra_data = {
.extra_data_len = p.extra_data.len, .len = p.extra_data.len,
.cap = p.extra_data.cap,
.arr = p.extra_data.arr,
},
}; };
} }

34
ast.h
View File

@@ -486,7 +486,8 @@ typedef uint32_t AstNodeIndex;
typedef uint32_t AstIndex; typedef uint32_t AstIndex;
typedef struct { typedef struct {
AstIndex lhs, rhs; AstIndex lhs;
AstIndex rhs;
} AstData; } AstData;
typedef struct { typedef struct {
@@ -498,21 +499,34 @@ typedef struct {
} AstNodeList; } AstNodeList;
typedef struct { typedef struct {
const char* source;
const uint32_t source_len;
struct {
uint32_t len; uint32_t len;
uint32_t cap;
TokenizerTag* tags; TokenizerTag* tags;
AstIndex* starts; AstIndex* starts;
} tokens; } AstTokenList;
typedef struct {
uint32_t len;
uint32_t cap;
AstNodeIndex* arr;
} AstExtraData;
typedef struct {
const char* source;
uint32_t source_len;
AstTokenList tokens;
AstNodeList nodes; AstNodeList nodes;
AstExtraData extra_data;
} Ast;
AstNodeIndex* extra_data; Ast astParse(const char* source, uint32_t len);
uint32_t extra_data_len;
} ast;
ast astParse(const char* source, uint32_t len); // MultiArrayList
void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional);
void astTokenListEnsureCapacity(AstTokenList* list, uint32_t additional);
void astNodeListAppend(AstNodeList* list, AstNodeTag tag,
AstTokenIndex main_token, AstData data);
void astTokenListAppend(AstTokenList* list, TokenizerTag tag, AstIndex start);
#endif #endif

View File

@@ -1,12 +1,20 @@
const std = @import("std"); const std = @import("std");
const headers = &[_][]const u8{
"common.h",
"ast.h",
"parser.h",
};
const c_lib_files = &[_][]const u8{ const c_lib_files = &[_][]const u8{
"tokenizer.c", "tokenizer.c",
"ast.c", "ast.c",
"zig1.c", "zig1.c",
"parser.c", "parser.c",
}; };
const all_c_files = c_lib_files ++ &[_][]const u8{"main.c"}; const all_c_files = c_lib_files ++ &[_][]const u8{"main.c"};
const cflags = &[_][]const u8{ const cflags = &[_][]const u8{
"-std=c11", "-std=c11",
"-Wall", "-Wall",
@@ -73,7 +81,7 @@ pub fn build(b: *std.Build) !void {
const lint_step = b.step("lint", "Run linters"); const lint_step = b.step("lint", "Run linters");
const clang_format = b.addSystemCommand(&.{"clang-format"}); const clang_format = b.addSystemCommand(&.{"clang-format"});
clang_format.addArgs(&.{ "--style=webkit", "-i" }); clang_format.addArgs(&.{ "--style=webkit", "-i" });
for (all_c_files) |cfile| clang_format.addFileArg(b.path(cfile)); for (all_c_files ++ headers) |f| clang_format.addFileArg(b.path(f));
lint_step.dependOn(&clang_format.step); lint_step.dependOn(&clang_format.step);
const clang_analyze = b.addSystemCommand(&.{"clang"}); const clang_analyze = b.addSystemCommand(&.{"clang"});

30
common.h Normal file
View File

@@ -0,0 +1,30 @@
// common.h
#ifndef _ZIG1_COMMON_H__
#define _ZIG1_COMMON_H__
#include <stdint.h>
#include <stdlib.h>
#define SLICE_INIT(Type, initial_cap) ({ \
Type* arr = calloc(initial_cap, sizeof(Type)); \
if (!arr) \
exit(1); \
(__typeof__(Type*)) { arr }; \
})
#define SLICE_RESIZE(slice, Type, new_cap) ({ \
uint32_t cap = (new_cap); \
Type* new_arr = realloc((slice)->arr, cap * sizeof(Type)); \
if (!new_arr) \
exit(1); \
(slice)->arr = new_arr; \
(slice)->cap = cap; \
})
#define SLICE_ENSURE_CAPACITY(slice, Type, additional) ({ \
if ((slice)->len + (additional) > (slice)->cap) { \
SLICE_RESIZE(slice, Type, ((slice)->cap * 2 > (slice)->len + (additional)) ? (slice)->cap * 2 : (slice)->len + (additional)); \
} \
})
#endif

View File

@@ -3,12 +3,6 @@
#include "parser.h" #include "parser.h"
typedef struct {
uint32_t len;
AstNodeIndex lhs, rhs;
bool trailing;
} Members;
typedef struct { typedef struct {
enum { enum {
FIELD_STATE_NONE, FIELD_STATE_NONE,

View File

@@ -1,11 +1,12 @@
// parser.h
#ifndef _ZIG1_PARSE_H__ #ifndef _ZIG1_PARSE_H__
#define _ZIG1_PARSE_H__ #define _ZIG1_PARSE_H__
#include "ast.h"
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
#include "ast.h" // Standard slice
typedef struct { typedef struct {
uint32_t len; uint32_t len;
uint32_t cap; uint32_t cap;
@@ -13,8 +14,15 @@ typedef struct {
} ParserNodeIndexSlice; } ParserNodeIndexSlice;
typedef struct { typedef struct {
uint32_t len;
AstNodeIndex lhs;
AstNodeIndex rhs;
bool trailing;
} Members;
typedef struct Parser {
const char* source; const char* source;
const uint32_t source_len; uint32_t source_len;
TokenizerTag* token_tags; TokenizerTag* token_tags;
AstIndex* token_starts; AstIndex* token_starts;
@@ -27,6 +35,8 @@ typedef struct {
ParserNodeIndexSlice scratch; ParserNodeIndexSlice scratch;
} Parser; } Parser;
int parseRoot(Parser*); Parser* parserInit(const char* source, uint32_t len);
void parserDeinit(Parser* parser);
int parseRoot(Parser* parser);
#endif #endif

View File

@@ -126,7 +126,7 @@
TAG(TOKENIZER_TAG_KEYWORD_USINGNAMESPACE) \ TAG(TOKENIZER_TAG_KEYWORD_USINGNAMESPACE) \
TAG(TOKENIZER_TAG_KEYWORD_VAR) \ TAG(TOKENIZER_TAG_KEYWORD_VAR) \
TAG(TOKENIZER_TAG_KEYWORD_VOLATILE) \ TAG(TOKENIZER_TAG_KEYWORD_VOLATILE) \
TAG(TOKENIZER_TAG_KEYWORD_WHILE) \ TAG(TOKENIZER_TAG_KEYWORD_WHILE)
#define GENERATE_ENUM(ENUM) ENUM, #define GENERATE_ENUM(ENUM) ENUM,
#define GENERATE_STRING(STRING) #STRING, #define GENERATE_STRING(STRING) #STRING,
@@ -135,7 +135,7 @@ typedef enum {
FOREACH_TOKENIZER_TAG_ENUM(GENERATE_ENUM) FOREACH_TOKENIZER_TAG_ENUM(GENERATE_ENUM)
} TokenizerTag; } TokenizerTag;
static const char *tokenizerTagString[] = { static const char* tokenizerTagString[] = {
FOREACH_TOKENIZER_TAG_ENUM(GENERATE_STRING) FOREACH_TOKENIZER_TAG_ENUM(GENERATE_STRING)
}; };