rename types
This commit is contained in:
72
ast.c
72
ast.c
@@ -8,74 +8,74 @@
|
||||
|
||||
#define N 1024
|
||||
|
||||
ast ast_parse(const char* source, const uint32_t len, int* err)
|
||||
ast astParse(const char* source, const uint32_t len)
|
||||
{
|
||||
uint32_t estimated_token_count = len / 8;
|
||||
|
||||
tokenizerTag* token_tags = NULL;
|
||||
astIndex* token_starts = NULL;
|
||||
astNodeTag* nodes_tags = NULL;
|
||||
astTokenIndex* main_tokens = NULL;
|
||||
astData* nodes_datas = NULL;
|
||||
astNodeIndex* extra_data_arr = NULL;
|
||||
astNodeIndex* scratch_arr = NULL;
|
||||
TokenizerTag* token_tags = NULL;
|
||||
AstIndex* token_starts = NULL;
|
||||
AstNodeTag* nodes_tags = NULL;
|
||||
AstTokenIndex* main_tokens = NULL;
|
||||
AstData* nodes_datas = NULL;
|
||||
AstNodeIndex* extra_data_arr = NULL;
|
||||
AstNodeIndex* scratch_arr = NULL;
|
||||
|
||||
if (!(token_tags = calloc(estimated_token_count, sizeof(tokenizerTag))))
|
||||
goto err;
|
||||
if (!(token_tags = calloc(estimated_token_count, sizeof(TokenizerTag))))
|
||||
exit(1);
|
||||
|
||||
if (!(token_starts = calloc(estimated_token_count, sizeof(astIndex))))
|
||||
goto err;
|
||||
if (!(token_starts = calloc(estimated_token_count, sizeof(AstIndex))))
|
||||
exit(1);
|
||||
|
||||
tokenizer tok = tokenizer_init(source, len);
|
||||
Tokenizer tok = tokenizerInit(source, len);
|
||||
uint32_t tokens_len = 0;
|
||||
for (; tokens_len <= estimated_token_count; tokens_len++) {
|
||||
if (tokens_len == estimated_token_count) {
|
||||
fprintf(stderr, "too many tokens, bump estimated_token_count\n");
|
||||
goto err;
|
||||
exit(1);
|
||||
}
|
||||
tokenizerToken token = tokenizer_next(&tok);
|
||||
TokenizerToken token = tokenizerNext(&tok);
|
||||
token_tags[tokens_len] = token.tag;
|
||||
token_starts[tokens_len] = token.loc.start;
|
||||
}
|
||||
|
||||
uint32_t estimated_node_count = (tokens_len + 2) / 2;
|
||||
|
||||
if (!(nodes_tags = calloc(estimated_node_count, sizeof(astNodeTag))))
|
||||
goto err;
|
||||
if (!(nodes_tags = calloc(estimated_node_count, sizeof(AstNodeTag))))
|
||||
exit(1);
|
||||
|
||||
if (!(main_tokens = calloc(estimated_node_count, sizeof(astTokenIndex))))
|
||||
goto err;
|
||||
if (!(main_tokens = calloc(estimated_node_count, sizeof(AstTokenIndex))))
|
||||
exit(1);
|
||||
|
||||
if (!(nodes_datas = calloc(estimated_node_count, sizeof(astData))))
|
||||
goto err;
|
||||
if (!(nodes_datas = calloc(estimated_node_count, sizeof(AstData))))
|
||||
exit(1);
|
||||
|
||||
if (!(extra_data_arr = calloc(N, sizeof(astNodeIndex))))
|
||||
goto err;
|
||||
if (!(extra_data_arr = calloc(N, sizeof(AstNodeIndex))))
|
||||
exit(1);
|
||||
|
||||
if (!(scratch_arr = calloc(N, sizeof(astNodeIndex))))
|
||||
goto err;
|
||||
if (!(scratch_arr = calloc(N, sizeof(AstNodeIndex))))
|
||||
exit(1);
|
||||
|
||||
parser p = (parser) {
|
||||
Parser p = (Parser) {
|
||||
.source = source,
|
||||
.source_len = len,
|
||||
.token_tags = token_tags,
|
||||
.token_starts = token_starts,
|
||||
.tokens_len = tokens_len,
|
||||
.tok_i = 0,
|
||||
.nodes = (astNodeList) {
|
||||
.nodes = (AstNodeList) {
|
||||
.len = 0,
|
||||
.cap = estimated_node_count,
|
||||
.tags = nodes_tags,
|
||||
.main_tokens = main_tokens,
|
||||
.datas = nodes_datas,
|
||||
},
|
||||
.extra_data = (parserNodeIndexSlice) { .len = 0, .cap = N, .arr = extra_data_arr },
|
||||
.scratch = (parserNodeIndexSlice) { .len = 0, .cap = N, .arr = scratch_arr },
|
||||
.extra_data = (ParserNodeIndexSlice) { .len = 0, .cap = N, .arr = extra_data_arr },
|
||||
.scratch = (ParserNodeIndexSlice) { .len = 0, .cap = N, .arr = scratch_arr },
|
||||
};
|
||||
|
||||
free(scratch_arr);
|
||||
|
||||
parse_root(&p);
|
||||
parseRoot(&p);
|
||||
|
||||
return (ast) {
|
||||
.source = source,
|
||||
@@ -85,16 +85,4 @@ ast ast_parse(const char* source, const uint32_t len, int* err)
|
||||
.extra_data = p.extra_data.arr,
|
||||
.extra_data_len = p.extra_data.len,
|
||||
};
|
||||
|
||||
err:
|
||||
free(token_tags);
|
||||
free(token_starts);
|
||||
free(nodes_tags);
|
||||
free(main_tokens);
|
||||
free(nodes_datas);
|
||||
free(extra_data_arr);
|
||||
free(scratch_arr);
|
||||
|
||||
*err = 1;
|
||||
return (ast) {};
|
||||
}
|
||||
|
||||
30
ast.h
30
ast.h
@@ -479,23 +479,23 @@ typedef enum {
|
||||
AST_NODE_TAG_ERROR_VALUE,
|
||||
/// `lhs!rhs`. main_token is the `!`.
|
||||
AST_NODE_TAG_ERROR_UNION,
|
||||
} astNodeTag;
|
||||
} AstNodeTag;
|
||||
|
||||
typedef int32_t astTokenIndex;
|
||||
typedef uint32_t astNodeIndex;
|
||||
typedef uint32_t astIndex;
|
||||
typedef int32_t AstTokenIndex;
|
||||
typedef uint32_t AstNodeIndex;
|
||||
typedef uint32_t AstIndex;
|
||||
|
||||
typedef struct {
|
||||
astIndex lhs, rhs;
|
||||
} astData;
|
||||
AstIndex lhs, rhs;
|
||||
} AstData;
|
||||
|
||||
typedef struct {
|
||||
uint32_t len;
|
||||
uint32_t cap;
|
||||
astNodeTag* tags;
|
||||
astTokenIndex* main_tokens;
|
||||
astData* datas;
|
||||
} astNodeList;
|
||||
AstNodeTag* tags;
|
||||
AstTokenIndex* main_tokens;
|
||||
AstData* datas;
|
||||
} AstNodeList;
|
||||
|
||||
typedef struct {
|
||||
const char* source;
|
||||
@@ -503,16 +503,16 @@ typedef struct {
|
||||
|
||||
struct {
|
||||
uint32_t len;
|
||||
tokenizerTag* tags;
|
||||
astIndex* starts;
|
||||
TokenizerTag* tags;
|
||||
AstIndex* starts;
|
||||
} tokens;
|
||||
|
||||
astNodeList nodes;
|
||||
AstNodeList nodes;
|
||||
|
||||
astNodeIndex* extra_data;
|
||||
AstNodeIndex* extra_data;
|
||||
uint32_t extra_data_len;
|
||||
} ast;
|
||||
|
||||
ast ast_parse(const char* source, uint32_t len, int* err);
|
||||
ast astParse(const char* source, uint32_t len);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -96,6 +96,7 @@ pub fn build(b: *std.Build) !void {
|
||||
cppcheck.addArgs(&.{
|
||||
"--quiet",
|
||||
"--error-exitcode=1",
|
||||
"--check-level=exhaustive",
|
||||
"--enable=all",
|
||||
"--suppress=missingIncludeSystem",
|
||||
"--suppress=checkersReport",
|
||||
|
||||
6
main.c
6
main.c
@@ -2,8 +2,8 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int zig1_run(char* program, char** msg);
|
||||
int zig1_run_file(char* fname, char** msg);
|
||||
int zig1Run(char* program, char** msg);
|
||||
int zig1RunFile(char* fname, char** msg);
|
||||
|
||||
static void usage(const char* argv0)
|
||||
{
|
||||
@@ -18,7 +18,7 @@ int main(int argc, char** argv)
|
||||
}
|
||||
|
||||
char* msg;
|
||||
switch (zig1_run_file(argv[1], &msg)) {
|
||||
switch (zig1RunFile(argv[1], &msg)) {
|
||||
case 0:
|
||||
return 0;
|
||||
break;
|
||||
|
||||
42
parser.c
42
parser.c
@@ -1,12 +1,13 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "parser.h"
|
||||
|
||||
typedef struct {
|
||||
uint32_t len;
|
||||
astNodeIndex lhs, rhs;
|
||||
AstNodeIndex lhs, rhs;
|
||||
bool trailing;
|
||||
} members;
|
||||
} Members;
|
||||
|
||||
typedef struct {
|
||||
enum {
|
||||
@@ -17,24 +18,31 @@ typedef struct {
|
||||
union {
|
||||
uint32_t end;
|
||||
} payload;
|
||||
} field_state;
|
||||
} FieldState;
|
||||
|
||||
static astTokenIndex next_token(parser* p)
|
||||
int parseRoot(Parser* p)
|
||||
{
|
||||
return ++p->tok_i;
|
||||
p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT;
|
||||
p->nodes.main_tokens[p->nodes.len] = 0;
|
||||
|
||||
// members root_members = parseContainerMembers(p);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static astTokenIndex eat_token(parser* p, tokenizerTag tag)
|
||||
static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; }
|
||||
|
||||
static AstTokenIndex eatToken(Parser* p, TokenizerTag tag)
|
||||
{
|
||||
return (p->token_tags[p->tok_i] == tag) ? next_token(p) : -1;
|
||||
return (p->token_tags[p->tok_i] == tag) ? nextToken(p) : -1;
|
||||
}
|
||||
|
||||
static members parse_container_members(parser* p)
|
||||
static Members parseContainerMembers(Parser* p)
|
||||
{
|
||||
const uint32_t scratch_top = p->scratch.len;
|
||||
members res = (members) {};
|
||||
Members res = (Members) {};
|
||||
// ast_token_index last_field;
|
||||
while (eat_token(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT) != -1)
|
||||
while (eatToken(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT) != -1)
|
||||
;
|
||||
|
||||
// bool trailing = false;
|
||||
@@ -166,25 +174,15 @@ static members parse_container_members(parser* p)
|
||||
case TOKENIZER_TAG_KEYWORD_WHILE:;
|
||||
const char* str = tokenizerTagString[p->token_tags[p->tok_i]];
|
||||
fprintf(stderr, "keyword %s not implemented\n", str);
|
||||
goto cleanup;
|
||||
exit(1);
|
||||
case TOKENIZER_TAG_KEYWORD_PUB:
|
||||
p->tok_i++;
|
||||
// AstNodeIndex top_level_decl = expectTopLevelDecl(*p);
|
||||
break;
|
||||
// TODO do work
|
||||
}
|
||||
}
|
||||
|
||||
cleanup:
|
||||
p->scratch.len = scratch_top;
|
||||
return res;
|
||||
}
|
||||
|
||||
int parse_root(parser* p)
|
||||
{
|
||||
p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT;
|
||||
p->nodes.main_tokens[p->nodes.len] = 0;
|
||||
|
||||
// members root_members = parse_container_members(p);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
20
parser.h
20
parser.h
@@ -9,24 +9,24 @@
|
||||
typedef struct {
|
||||
uint32_t len;
|
||||
uint32_t cap;
|
||||
astNodeIndex* arr;
|
||||
} parserNodeIndexSlice;
|
||||
AstNodeIndex* arr;
|
||||
} ParserNodeIndexSlice;
|
||||
|
||||
typedef struct {
|
||||
const char* source;
|
||||
const uint32_t source_len;
|
||||
|
||||
tokenizerTag* token_tags;
|
||||
astIndex* token_starts;
|
||||
TokenizerTag* token_tags;
|
||||
AstIndex* token_starts;
|
||||
uint32_t tokens_len;
|
||||
|
||||
astTokenIndex tok_i;
|
||||
AstTokenIndex tok_i;
|
||||
|
||||
astNodeList nodes;
|
||||
parserNodeIndexSlice extra_data;
|
||||
parserNodeIndexSlice scratch;
|
||||
} parser;
|
||||
AstNodeList nodes;
|
||||
ParserNodeIndexSlice extra_data;
|
||||
ParserNodeIndexSlice scratch;
|
||||
} Parser;
|
||||
|
||||
int parse_root(parser*);
|
||||
int parseRoot(Parser*);
|
||||
|
||||
#endif
|
||||
|
||||
28
tokenizer.c
28
tokenizer.c
@@ -7,10 +7,10 @@
|
||||
|
||||
typedef struct {
|
||||
const char* keyword;
|
||||
tokenizerTag tag;
|
||||
} keywordMap;
|
||||
TokenizerTag tag;
|
||||
} KeywordMap;
|
||||
|
||||
const keywordMap keywords[] = {
|
||||
const KeywordMap keywords[] = {
|
||||
{ "addrspace", TOKENIZER_TAG_KEYWORD_ADDRSPACE },
|
||||
{ "align", TOKENIZER_TAG_KEYWORD_ALIGN },
|
||||
{ "allowzero", TOKENIZER_TAG_KEYWORD_ALLOWZERO },
|
||||
@@ -63,9 +63,9 @@ const keywordMap keywords[] = {
|
||||
};
|
||||
|
||||
// TODO binary search
|
||||
static tokenizerTag get_keyword(const char* bytes, const uint32_t len)
|
||||
static TokenizerTag getKeyword(const char* bytes, const uint32_t len)
|
||||
{
|
||||
for (unsigned long i = 0; i < sizeof(keywords) / sizeof(keywordMap); i++) {
|
||||
for (unsigned long i = 0; i < sizeof(keywords) / sizeof(KeywordMap); i++) {
|
||||
size_t klen = strlen(keywords[i].keyword);
|
||||
size_t minlen = klen < len ? klen : len;
|
||||
int cmp = strncmp(bytes, keywords[i].keyword, minlen);
|
||||
@@ -82,25 +82,25 @@ static tokenizerTag get_keyword(const char* bytes, const uint32_t len)
|
||||
return TOKENIZER_TAG_INVALID;
|
||||
}
|
||||
|
||||
tokenizer tokenizer_init(const char* buffer, const uint32_t len)
|
||||
Tokenizer tokenizerInit(const char* buffer, const uint32_t len)
|
||||
{
|
||||
return (tokenizer) {
|
||||
return (Tokenizer) {
|
||||
.buffer = buffer,
|
||||
.buffer_len = len,
|
||||
.index = (len >= 3 && memcmp(buffer, "\xEF\xBB\xBF", 3) == 0) ? 3 : 0,
|
||||
};
|
||||
}
|
||||
|
||||
tokenizerToken tokenizer_next(tokenizer* self)
|
||||
TokenizerToken tokenizerNext(Tokenizer* self)
|
||||
{
|
||||
tokenizerToken result = (tokenizerToken) {
|
||||
TokenizerToken result = (TokenizerToken) {
|
||||
.tag = TOKENIZER_TAG_INVALID,
|
||||
.loc = {
|
||||
.start = 0,
|
||||
},
|
||||
};
|
||||
|
||||
tokenizerState state = TOKENIZER_STATE_START;
|
||||
TokenizerState state = TOKENIZER_STATE_START;
|
||||
|
||||
state:
|
||||
switch (state) {
|
||||
@@ -108,7 +108,7 @@ state:
|
||||
switch (self->buffer[self->index]) {
|
||||
case 0:
|
||||
if (self->index == self->buffer_len) {
|
||||
return (tokenizerToken) {
|
||||
return (TokenizerToken) {
|
||||
.tag = TOKENIZER_TAG_EOF,
|
||||
.loc = {
|
||||
.start = self->index,
|
||||
@@ -455,7 +455,7 @@ state:
|
||||
default:; // Once we're at C23, this semicolon can be removed.
|
||||
const char* start = self->buffer + result.loc.start;
|
||||
uint32_t len = self->index - result.loc.start;
|
||||
tokenizerTag tag = get_keyword(start, len);
|
||||
TokenizerTag tag = getKeyword(start, len);
|
||||
if (tag != TOKENIZER_TAG_INVALID) {
|
||||
result.tag = tag;
|
||||
}
|
||||
@@ -856,7 +856,7 @@ state:
|
||||
state = TOKENIZER_STATE_INVALID;
|
||||
goto state;
|
||||
} else {
|
||||
return (tokenizerToken) {
|
||||
return (TokenizerToken) {
|
||||
.tag = TOKENIZER_TAG_EOF,
|
||||
.loc = {
|
||||
.start = self->index,
|
||||
@@ -930,7 +930,7 @@ state:
|
||||
state = TOKENIZER_STATE_INVALID;
|
||||
goto state;
|
||||
} else {
|
||||
return (tokenizerToken) {
|
||||
return (TokenizerToken) {
|
||||
.tag = TOKENIZER_TAG_EOF,
|
||||
.loc = {
|
||||
.start = self->index,
|
||||
|
||||
14
tokenizer.h
14
tokenizer.h
@@ -133,7 +133,7 @@
|
||||
|
||||
typedef enum {
|
||||
FOREACH_TOKENIZER_TAG_ENUM(GENERATE_ENUM)
|
||||
} tokenizerTag;
|
||||
} TokenizerTag;
|
||||
|
||||
static const char *tokenizerTagString[] = {
|
||||
FOREACH_TOKENIZER_TAG_ENUM(GENERATE_STRING)
|
||||
@@ -185,22 +185,22 @@ typedef enum {
|
||||
TOKENIZER_STATE_PERIOD_ASTERISK,
|
||||
TOKENIZER_STATE_SAW_AT_SIGN,
|
||||
TOKENIZER_STATE_INVALID,
|
||||
} tokenizerState;
|
||||
} TokenizerState;
|
||||
|
||||
typedef struct {
|
||||
tokenizerTag tag;
|
||||
TokenizerTag tag;
|
||||
struct {
|
||||
uint32_t start, end;
|
||||
} loc;
|
||||
} tokenizerToken;
|
||||
} TokenizerToken;
|
||||
|
||||
typedef struct {
|
||||
const char* buffer;
|
||||
const uint32_t buffer_len;
|
||||
uint32_t index;
|
||||
} tokenizer;
|
||||
} Tokenizer;
|
||||
|
||||
tokenizer tokenizer_init(const char* buffer, uint32_t len);
|
||||
tokenizerToken tokenizer_next(tokenizer* self);
|
||||
Tokenizer tokenizerInit(const char* buffer, uint32_t len);
|
||||
TokenizerToken tokenizerNext(Tokenizer* self);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -138,6 +138,15 @@ fn zigToken(token: c_uint) Token.Tag {
|
||||
|
||||
// Copy-pasted from lib/std/zig/tokenizer.zig
|
||||
fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !void {
|
||||
// Do the C thing
|
||||
var ctokenizer = c.tokenizerInit(source.ptr, @intCast(source.len));
|
||||
for (expected_token_tags) |expected_token_tag| {
|
||||
const token = c.tokenizerNext(&ctokenizer);
|
||||
try std.testing.expectEqual(expected_token_tag, zigToken(token.tag));
|
||||
}
|
||||
const last_token = c.tokenizerNext(&ctokenizer);
|
||||
try std.testing.expectEqual(Token.Tag.eof, zigToken(last_token.tag));
|
||||
|
||||
// uncomment when Zig source and compiler get in sync (e.g. with 0.14)
|
||||
//var tokenizer = Tokenizer.init(source);
|
||||
//for (expected_token_tags) |expected_token_tag| {
|
||||
@@ -149,17 +158,6 @@ fn testTokenize(source: [:0]const u8, expected_token_tags: []const Token.Tag) !v
|
||||
//// recovered by opinionated means outside the scope of this implementation.
|
||||
//const last_token = tokenizer.next();
|
||||
//try std.testing.expectEqual(Token.Tag.eof, last_token.tag);
|
||||
//try std.testing.expectEqual(source.len, last_token.loc.start);
|
||||
//try std.testing.expectEqual(source.len, last_token.loc.end);
|
||||
|
||||
// Do the C thing
|
||||
var ctokenizer = c.tokenizer_init(source.ptr, @intCast(source.len));
|
||||
for (expected_token_tags) |expected_token_tag| {
|
||||
const token = c.tokenizer_next(&ctokenizer);
|
||||
try std.testing.expectEqual(expected_token_tag, zigToken(token.tag));
|
||||
}
|
||||
const last_token = c.tokenizer_next(&ctokenizer);
|
||||
try std.testing.expectEqual(Token.Tag.eof, zigToken(last_token.tag));
|
||||
try std.testing.expectEqual(source.len, last_token.loc.start);
|
||||
try std.testing.expectEqual(source.len, last_token.loc.end);
|
||||
}
|
||||
|
||||
6
zig1.c
6
zig1.c
@@ -6,7 +6,7 @@
|
||||
// - code = 0: program successfully terminated.
|
||||
// - code = 1: panicked, panic message in msg. Caller should free msg.
|
||||
// - code = 2: interpreter error, error in msg. Caller should free msg.
|
||||
int zig1_run(const char* program, char** msg)
|
||||
int zig1Run(const char* program, char** msg)
|
||||
{
|
||||
(void)program;
|
||||
(void)msg;
|
||||
@@ -15,7 +15,7 @@ int zig1_run(const char* program, char** msg)
|
||||
|
||||
// API: run and:
|
||||
// code = 3: abnormal error, expect something in stderr.
|
||||
int zig1_run_file(const char* fname, char** msg)
|
||||
int zig1RunFile(const char* fname, char** msg)
|
||||
{
|
||||
FILE* f = fopen(fname, "r");
|
||||
if (f == NULL) {
|
||||
@@ -53,7 +53,7 @@ int zig1_run_file(const char* fname, char** msg)
|
||||
fclose(f);
|
||||
program[fsize] = 0;
|
||||
|
||||
int code = zig1_run(program, msg);
|
||||
int code = zig1Run(program, msg);
|
||||
free(program);
|
||||
return code;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user