From 6006a802e1dd2a3f06a5aa9db5e9b1e7bbee850c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Fri, 27 Dec 2024 12:34:08 +0200 Subject: [PATCH] making tcc happier --- .clang-format | 2 + ast.c | 49 +---- ast.h | 21 +- build.zig | 2 +- common.h | 31 ++- main.c | 6 +- parser.c | 547 +++++++++++++++++++++++++++++++++----------------- parser.h | 14 +- tokenizer.c | 12 +- zig1.c | 6 +- 10 files changed, 416 insertions(+), 274 deletions(-) create mode 100644 .clang-format diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..d1078a7 --- /dev/null +++ b/.clang-format @@ -0,0 +1,2 @@ +BasedOnStyle: WebKit +BreakBeforeBraces: Attach diff --git a/ast.c b/ast.c index 17dfe30..03fa262 100644 --- a/ast.c +++ b/ast.c @@ -9,45 +9,15 @@ #define N 1024 -void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional) -{ - const uint32_t new_len = list->len + additional; - if (new_len <= list->cap) { - return; - } - - const uint32_t new_cap = new_len > list->cap * 2 ? new_len : list->cap * 2; - list->tags = realloc(list->tags, new_cap * sizeof(AstNodeTag)); - list->main_tokens = realloc(list->main_tokens, new_cap * sizeof(AstTokenIndex)); - list->datas = realloc(list->datas, new_cap * sizeof(AstData)); - if (!list->tags || !list->main_tokens || !list->datas) - exit(1); - list->cap = new_cap; -} - -AstNodeIndex astNodeListAppend( - AstNodeList* list, - AstNodeTag tag, - AstTokenIndex main_token, - AstData data) -{ - astNodeListEnsureCapacity(list, 1); - list->tags[list->len] = tag; - list->main_tokens[list->len] = main_token; - list->datas[list->len] = data; - return list->len++; -} - -Ast astParse(const char* source, const uint32_t len) -{ +Ast astParse(const char* source, const uint32_t len) { uint32_t estimated_token_count = len / 8; // Initialize token list AstTokenList tokens = { .len = 0, .cap = estimated_token_count, - .tags = SLICE_INIT(TokenizerTag, estimated_token_count), - .starts = SLICE_INIT(AstIndex, estimated_token_count) + .tags = ARR_INIT(TokenizerTag, estimated_token_count), + .starts = ARR_INIT(AstIndex, estimated_token_count) }; // Tokenize @@ -70,9 +40,9 @@ Ast astParse(const char* source, const uint32_t len) AstNodeList nodes = { .len = 0, .cap = estimated_node_count, - .tags = SLICE_INIT(AstNodeTag, estimated_node_count), - .main_tokens = SLICE_INIT(AstTokenIndex, estimated_node_count), - .datas = SLICE_INIT(AstData, estimated_node_count) + .tags = ARR_INIT(AstNodeTag, estimated_node_count), + .main_tokens = ARR_INIT(AstTokenIndex, estimated_node_count), + .datas = ARR_INIT(AstData, estimated_node_count) }; // Initialize parser @@ -84,11 +54,8 @@ Ast astParse(const char* source, const uint32_t len) .tokens_len = tokens.len, .tok_i = 0, .nodes = nodes, - .extra_data = { - .len = 0, - .cap = N, - .arr = SLICE_INIT(AstNodeIndex, N) }, - .scratch = { .len = 0, .cap = N, .arr = SLICE_INIT(AstNodeIndex, N) } + .extra_data = SLICE_INIT(AstNodeIndex, N), + .scratch = SLICE_INIT(AstNodeIndex, N) }; free(p.scratch.arr); // Parser takes ownership diff --git a/ast.h b/ast.h index fba2052..2f032f4 100644 --- a/ast.h +++ b/ast.h @@ -4,6 +4,7 @@ #include #include +#include "common.h" #include "tokenizer.h" typedef enum { @@ -498,6 +499,12 @@ typedef struct { AstData* datas; } AstNodeList; +typedef struct { + AstNodeTag tag; + AstTokenIndex main_token; + AstData data; +} AstNodeItem; + typedef struct { uint32_t len; uint32_t cap; @@ -505,18 +512,14 @@ typedef struct { AstIndex* starts; } AstTokenList; -typedef struct { - uint32_t len; - uint32_t cap; - AstNodeIndex* arr; -} AstExtraData; +typedef SLICE(AstNodeIndex) AstNodeIndexSlice; typedef struct { const char* source; uint32_t source_len; AstTokenList tokens; AstNodeList nodes; - AstExtraData extra_data; + AstNodeIndexSlice extra_data; } Ast; typedef struct AstPtrType { @@ -596,11 +599,7 @@ typedef struct AstError { Ast astParse(const char* source, uint32_t len); -// MultiArrayList -void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional); -void astTokenListEnsureCapacity(AstTokenList* list, uint32_t additional); - -AstNodeIndex astNodeListAppend(AstNodeList* list, AstNodeTag tag, AstTokenIndex main_token, AstData data); +AstNodeIndex astNodeListAppend(AstNodeList*, AstNodeItem); void astTokenListAppend(AstTokenList* list, TokenizerTag tag, AstIndex start); #endif diff --git a/build.zig b/build.zig index ddbc0a8..6448bf5 100644 --- a/build.zig +++ b/build.zig @@ -79,7 +79,7 @@ pub fn build(b: *std.Build) !void { const lint_step = b.step("lint", "Run linters"); const clang_format = b.addSystemCommand(&.{"clang-format"}); - clang_format.addArgs(&.{ "--style=webkit", "--verbose", "-Werror", "-i" }); + clang_format.addArgs(&.{ "--verbose", "-Werror", "-i" }); for (all_c_files ++ headers) |f| clang_format.addFileArg(b.path(f)); lint_step.dependOn(&clang_format.step); diff --git a/common.h b/common.h index ac7690f..4f410be 100644 --- a/common.h +++ b/common.h @@ -5,14 +5,28 @@ #include #include -#define SLICE_INIT(Type, initial_cap) ({ \ +#define SLICE(Type) \ + struct Type##Slice { \ + uint32_t len; \ + uint32_t cap; \ + Type* arr; \ + } + +#define ARR_INIT(Type, initial_cap) ({ \ Type* arr = calloc(initial_cap, sizeof(Type)); \ if (!arr) \ exit(1); \ - (__typeof__(Type*)) { arr }; \ + arr; \ }) -#define SLICE_RESIZE(slice, Type, new_cap) ({ \ +#define SLICE_INIT(Type, initial_cap) \ + { \ + .len = 0, \ + .cap = (initial_cap), \ + .arr = ARR_INIT(Type, initial_cap) \ + } + +#define SLICE_RESIZE(Type, slice, new_cap) ({ \ uint32_t cap = (new_cap); \ Type* new_arr = realloc((slice)->arr, cap * sizeof(Type)); \ if (!new_arr) \ @@ -21,12 +35,17 @@ (slice)->cap = cap; \ }) -#define SLICE_ENSURE_CAPACITY(slice, Type, additional) ({ \ +#define SLICE_ENSURE_CAPACITY(Type, slice, additional) ({ \ if ((slice)->len + (additional) > (slice)->cap) { \ - SLICE_RESIZE(slice, \ - Type, \ + SLICE_RESIZE(Type, \ + slice, \ ((slice)->cap * 2 > (slice)->len + (additional)) ? (slice)->cap * 2 : (slice)->len + (additional)); \ } \ }) +#define SLICE_APPEND(Type, slice, item) ({ \ + SLICE_ENSURE_CAPACITY(Type, slice, 1); \ + (slice)->arr[(slice)->len++] = (item); \ +}) + #endif diff --git a/main.c b/main.c index 329308b..9f3ea68 100644 --- a/main.c +++ b/main.c @@ -5,13 +5,11 @@ int zig1Run(char* program, char** msg); int zig1RunFile(char* fname, char** msg); -static void usage(const char* argv0) -{ +static void usage(const char* argv0) { fprintf(stderr, "Usage: %s program.zig\n", argv0); } -int main(int argc, char** argv) -{ +int main(int argc, char** argv) { if (argc != 2) { usage(argv[0]); return 1; diff --git a/parser.c b/parser.c index 7ba9bb2..401ae63 100644 --- a/parser.c +++ b/parser.c @@ -1,7 +1,9 @@ #include #include +#include #include "ast.h" +#include "common.h" #include "parser.h" const AstNodeIndex null_node = 0; @@ -22,8 +24,7 @@ typedef struct { AstSubRange multi; } SmallSpan; -void parseRoot(Parser* p) -{ +void parseRoot(Parser* p) { p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT; p->nodes.main_tokens[p->nodes.len] = 0; @@ -32,8 +33,7 @@ void parseRoot(Parser* p) static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; } -static AstTokenIndex eatToken(Parser* p, TokenizerTag tag, bool* ok) -{ +static AstTokenIndex eatToken(Parser* p, TokenizerTag tag, bool* ok) { if (p->token_tags[p->tok_i] == tag) { if (ok != NULL) *ok = true; @@ -41,30 +41,124 @@ static AstTokenIndex eatToken(Parser* p, TokenizerTag tag, bool* ok) } else { if (ok != NULL) *ok = false; - return (AstTokenIndex) {}; + return 0; } } -static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeTag tag, AstTokenIndex main_token, AstData data) -{ - p->nodes.tags[i] = tag; - p->nodes.main_tokens[i] = main_token; - p->nodes.datas[i] = data; +static void eatDocComments(Parser* p) { + bool ok; + while (eatToken(p, TOKENIZER_TAG_DOC_COMMENT, &ok), ok) { } +} + +static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeItem item) { + p->nodes.tags[i] = item.tag; + p->nodes.main_tokens[i] = item.main_token; + p->nodes.datas[i] = item.data; return i; } +static void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional) { + const uint32_t new_len = list->len + additional; + if (new_len <= list->cap) { + return; + } + + const uint32_t new_cap = new_len > list->cap * 2 ? new_len : list->cap * 2; + list->tags = realloc(list->tags, new_cap * sizeof(AstNodeTag)); + list->main_tokens = realloc(list->main_tokens, new_cap * sizeof(AstTokenIndex)); + list->datas = realloc(list->datas, new_cap * sizeof(AstData)); + if (!list->tags || !list->main_tokens || !list->datas) + exit(1); + list->cap = new_cap; +} + +static AstNodeIndex addNode(AstNodeList* nodes, AstNodeItem item) { + astNodeListEnsureCapacity(nodes, 1); + nodes->tags[nodes->len] = item.tag; + nodes->main_tokens[nodes->len] = item.main_token; + nodes->datas[nodes->len] = item.data; + return nodes->len++; +} + static AstNodeIndex parseTypeExpr(Parser* p); -static AstNodeIndex expectTypeExpr(Parser* p) -{ +static AstNodeIndex expectTypeExpr(Parser* p) { const AstNodeIndex node = parseTypeExpr(p); if (node == 0) exit(1); return node; } -static AstNodeIndex parsePrimaryTypeExpr(Parser* p) -{ +static AstNodeIndex parseByteAlign(Parser* p) { + bool ok; + eatToken(p, TOKENIZER_TAG_KEYWORD_ALIGN, &ok); + if (!ok) { + return null_node; + } + fprintf(stderr, "parseByteAlign cannot parse alginment\n"); + exit(1); + return 0; // tcc +} + +typedef struct { + AstNodeIndex align_expr, value_expr; +} NodeContainerField; + +static AstNodeIndex expectContainerField(Parser* p) { + eatToken(p, TOKENIZER_TAG_KEYWORD_COMPTIME, NULL); + const AstTokenIndex main_token = p->tok_i; + if (p->token_tags[p->tok_i] == TOKENIZER_TAG_IDENTIFIER && p->token_tags[p->tok_i + 1] == TOKENIZER_TAG_COLON) + p->tok_i += 2; + + const AstNodeIndex type_expr = expectTypeExpr(p); + const AstNodeIndex align_expr = parseByteAlign(p); + const AstNodeIndex value_expr = 0; + bool ok; + eatToken(p, TOKENIZER_TAG_EQUAL, &ok); + if (ok) { + fprintf(stderr, "expectContainerField does not support expr\n"); + exit(1); + } + + if (align_expr == 0) { + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = AST_NODE_TAG_CONTAINER_FIELD_INIT, + .main_token = main_token, + .data = { + .lhs = type_expr, + .rhs = value_expr, + }, + }); + } else if (value_expr == 0) { + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = AST_NODE_TAG_CONTAINER_FIELD_ALIGN, + .main_token = main_token, + .data = { + .lhs = type_expr, + .rhs = align_expr, + }, + }); + } else { + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = AST_NODE_TAG_CONTAINER_FIELD, + .main_token = main_token, + .data = { + .lhs = type_expr, + .rhs = addExtra(p, (NodeContainerField) { + .align_expr = align_expr, + .value_expr = value_expr, + }) }, + }); + } +} + +static AstNodeIndex parsePrimaryTypeExpr(Parser* p) { const TokenizerTag tok = p->token_tags[p->tok_i]; switch (tok) { case TOKENIZER_TAG_CHAR_LITERAL: @@ -86,18 +180,17 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) case TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE: fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); exit(1); - break; case TOKENIZER_TAG_IDENTIFIER: if (p->token_tags[p->tok_i + 1] == TOKENIZER_TAG_COLON) { fprintf(stderr, "parsePrimaryTypeExpr does not support identifier followed by colon\n"); exit(1); } - return astNodeListAppend( + return addNode( &p->nodes, - AST_NODE_TAG_IDENTIFIER, - nextToken(p), - (AstData) {}); - break; + (AstNodeItem) { + .tag = AST_NODE_TAG_IDENTIFIER, + .main_token = nextToken(p), + .data = {} }); case TOKENIZER_TAG_KEYWORD_INLINE: case TOKENIZER_TAG_KEYWORD_FOR: case TOKENIZER_TAG_KEYWORD_WHILE: @@ -106,29 +199,26 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) case TOKENIZER_TAG_L_PAREN: fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); exit(1); - break; default: return null_node; } } -static AstNodeIndex parseSuffixOp(Parser *p) { +static AstNodeIndex parseSuffixOp(Parser* p) { const TokenizerTag tok = p->token_tags[p->tok_i]; switch (tok) { - case TOKENIZER_TAG_L_BRACKET: - case TOKENIZER_TAG_PERIOD_ASTERISK: - case TOKENIZER_TAG_INVALID_PERIODASTERISKS: - case TOKENIZER_TAG_PERIOD: - fprintf(stderr, "parseSuffixOp does not support %s\n", tokenizerGetTagString(tok)); - exit(1); - break; - default: - return null_node; + case TOKENIZER_TAG_L_BRACKET: + case TOKENIZER_TAG_PERIOD_ASTERISK: + case TOKENIZER_TAG_INVALID_PERIODASTERISKS: + case TOKENIZER_TAG_PERIOD: + fprintf(stderr, "parseSuffixOp does not support %s\n", tokenizerGetTagString(tok)); + exit(1); + default: + return null_node; } } -static AstNodeIndex parseSuffixExpr(Parser* p) -{ +static AstNodeIndex parseSuffixExpr(Parser* p) { bool ok; eatToken(p, TOKENIZER_TAG_KEYWORD_ASYNC, &ok); if (ok) { @@ -140,7 +230,7 @@ static AstNodeIndex parseSuffixExpr(Parser* p) if (res == 0) return res; - while(true) { + while (true) { const AstNodeIndex suffix_op = parseSuffixOp(p); if (suffix_op != 0) { res = suffix_op; @@ -155,8 +245,19 @@ static AstNodeIndex parseSuffixExpr(Parser* p) } } -static AstNodeIndex parseErrorUnionExpr(Parser* p) -{ +static AstNodeIndex expectToken(Parser* p, TokenizerTag tag, bool* ok) { + if (p->token_tags[p->tok_i] == tag) { + if (ok != NULL) + *ok = true; + return nextToken(p); + } else { + if (ok != NULL) + *ok = false; + return 0; + } +} + +static AstNodeIndex parseErrorUnionExpr(Parser* p) { const AstNodeIndex suffix_expr = parseSuffixExpr(p); if (suffix_expr == 0) return null_node; @@ -164,18 +265,18 @@ static AstNodeIndex parseErrorUnionExpr(Parser* p) const AstNodeIndex bang = eatToken(p, TOKENIZER_TAG_BANG, &ok); if (!ok) return suffix_expr; - return astNodeListAppend( + return addNode( &p->nodes, - AST_NODE_TAG_ERROR_UNION, - bang, - (AstData) { - .lhs = suffix_expr, - .rhs = expectTypeExpr(p), - }); + (AstNodeItem) { + .tag = AST_NODE_TAG_ERROR_UNION, + .main_token = bang, + .data = { + .lhs = suffix_expr, + .rhs = expectTypeExpr(p), + } }); } -static AstNodeIndex parseTypeExpr(Parser* p) -{ +static AstNodeIndex parseTypeExpr(Parser* p) { const AstNodeIndex tok = p->token_tags[p->tok_i]; switch (tok) { case TOKENIZER_TAG_QUESTION_MARK: @@ -185,14 +286,12 @@ static AstNodeIndex parseTypeExpr(Parser* p) case TOKENIZER_TAG_L_BRACKET: fprintf(stderr, "parseTypeExpr not supported for %s\n", tokenizerGetTagString(tok)); exit(1); - break; default: return parseErrorUnionExpr(p); } } -static SmallSpan parseParamDeclList(Parser* p) -{ +static SmallSpan parseParamDeclList(Parser* p) { // can only parse functions with no declarations bool ok; AstTokenIndex got_token = eatToken(p, TOKENIZER_TAG_L_PAREN, &ok); @@ -212,15 +311,13 @@ static SmallSpan parseParamDeclList(Parser* p) }; } -static uint32_t reserveNode(Parser* p, AstNodeTag tag) -{ +static uint32_t reserveNode(Parser* p, AstNodeTag tag) { astNodeListEnsureCapacity(&p->nodes, p->nodes.len + 1); p->nodes.tags[p->nodes.len - 1] = tag; return p->nodes.len - 1; } -static AstNodeIndex parseFnProto(Parser* p) -{ +static AstNodeIndex parseFnProto(Parser* p) { bool ok; AstNodeIndex fn_token = eatToken(p, TOKENIZER_TAG_KEYWORD_FN, &ok); if (!ok) @@ -239,21 +336,146 @@ static AstNodeIndex parseFnProto(Parser* p) eatToken(p, TOKENIZER_TAG_BANG, NULL); const AstNodeIndex return_type_expr = parseTypeExpr(p); + return 0; } -static AstNodeIndex parseBlock(Parser *p) { +static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) { + bool ok; + if (eatToken(p, TOKENIZER_TAG_KEYWORD_COMPTIME, &ok), ok) { + fprintf(stderr, "expectStatement: comptime keyword not yet supported\n"); + exit(1); + } + + const AstNodeIndex tok = p->token_tags[p->tok_i]; + switch (tok) { + case TOKENIZER_TAG_KEYWORD_NOSUSPEND: + case TOKENIZER_TAG_KEYWORD_SUSPEND: + case TOKENIZER_TAG_KEYWORD_DEFER: + case TOKENIZER_TAG_KEYWORD_ERRDEFER: + case TOKENIZER_TAG_KEYWORD_IF: + case TOKENIZER_TAG_KEYWORD_ENUM: + case TOKENIZER_TAG_KEYWORD_STRUCT: + case TOKENIZER_TAG_KEYWORD_UNION:; + const char* tok_str = tokenizerGetTagString(tok); + fprintf(stderr, "expectStatement does not yet support keyword %s\n", tok_str); + exit(1); + default:; + } + // TODO continue + return 1; +} + +typedef struct { + AstNodeIndexSlice* scratch; + uint32_t old_len; +} CleanupScratch; +static void cleanupScratch(CleanupScratch* c) { c->scratch->len = c->old_len; } + +static AstNodeIndex parseBlock(Parser* p) { bool ok; const AstNodeIndex lbrace = eatToken(p, TOKENIZER_TAG_L_BRACE, &ok); if (!ok) return null_node; - const uint32_t scratch_top = p->scratch.len; + CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = { + .scratch = &p->scratch, + .old_len = p->scratch.len, + }; -cleanup: + while (1) { + if (p->token_tags[p->tok_i] == TOKENIZER_TAG_R_BRACE) + break; + + // "const AstNodeIndex statement" once tinycc supports typeof_unqual (C23) + AstNodeIndex statement = expectStatement(p, true); + if (statement == 0) + break; + SLICE_APPEND(AstNodeIndex, &p->scratch, statement); + } + expectToken(p, TOKENIZER_TAG_R_BRACE, NULL); + const bool semicolon = (p->token_tags[p->tok_i] - 2 == TOKENIZER_TAG_SEMICOLON); + + switch (p->scratch.len - scratch_top.old_len) { + case 0: + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = AST_NODE_TAG_BLOCK_TWO, + .main_token = lbrace, + .data = { + .lhs = 0, + .rhs = 0, + }, + }); + case 1: + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = semicolon ? AST_NODE_TAG_BLOCK_TWO_SEMICOLON : AST_NODE_TAG_BLOCK_TWO, + .main_token = lbrace, + .data = { + .lhs = p->scratch.arr[scratch_top.old_len], + .rhs = 0, + }, + }); + case 2: + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = semicolon ? AST_NODE_TAG_BLOCK_TWO_SEMICOLON : AST_NODE_TAG_BLOCK_TWO, + .main_token = lbrace, + .data = { + .lhs = p->scratch.arr[scratch_top.old_len], + .rhs = p->scratch.arr[scratch_top.old_len + 1], + }, + }); + default:; + const uint32_t extra = p->scratch.len - scratch_top.old_len; + SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->scratch, extra); + memcpy( + &p->extra_data.arr[p->extra_data.len], + &p->scratch.arr[scratch_top.old_len], + sizeof(AstNodeIndex) * extra); + p->extra_data.len += extra; + return addNode( + &p->nodes, + (AstNodeItem) { + .tag = semicolon ? AST_NODE_TAG_BLOCK_SEMICOLON : AST_NODE_TAG_BLOCK, + .main_token = lbrace, + .data = { + .lhs = p->scratch.arr[scratch_top.old_len], + .rhs = p->scratch.arr[p->scratch.len], + }, + }); + } + + return 0; } -static AstNodeIndex expectTopLevelDecl(Parser* p) -{ +static AstNodeIndex parseVarDeclProto(Parser* p) { + bool ok; + eatToken(p, TOKENIZER_TAG_KEYWORD_CONST, &ok); + if (!ok) { + eatToken(p, TOKENIZER_TAG_KEYWORD_VAR, &ok); + if (!ok) + return null_node; + } + fprintf(stderr, "parseVarDeclProto: parsing vars is not supported\n"); + exit(1); + return 0; // tcc +} + +static AstNodeIndex parseGlobalVarDecl(Parser* p) { + const AstNodeIndex var_decl = parseVarDeclProto(p); + if (var_decl == 0) { + return null_node; + } + fprintf(stderr, "parseGlobalVarDecl does not support parsing var decls\n"); + exit(1); + return 0; // tcc +} + +static AstNodeIndex expectTopLevelDecl(Parser* p) { AstTokenIndex extern_export_inline_token = p->tok_i++; bool is_extern = false; bool expect_fn = false; @@ -282,7 +504,6 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) case TOKENIZER_TAG_SEMICOLON: p->tok_i++; return fn_proto; - break; case TOKENIZER_TAG_L_BRACE: if (is_extern) exit(1); @@ -292,9 +513,11 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) return setNode( p, fn_decl_index, - AST_NODE_TAG_FN_DECL, - p->nodes.main_tokens[fn_proto], - (AstData) { .lhs = fn_proto, .rhs = body_block }); + (AstNodeItem) { + .tag = AST_NODE_TAG_FN_DECL, + .main_token = p->nodes.main_tokens[fn_proto], + .data = { .lhs = fn_proto, .rhs = body_block }, + }); default: exit(1); // Expected semicolon or left brace } @@ -312,10 +535,10 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) // assuming the program is correct... fprintf(stderr, "the next token should be usingnamespace, which is not supported\n"); exit(1); + return 0; // make tcc happy } -static Members parseContainerMembers(Parser* p) -{ +static Members parseContainerMembers(Parser* p) { const uint32_t scratch_top = p->scratch.len; Members res = (Members) {}; // ast_token_index last_field; @@ -323,143 +546,89 @@ static Members parseContainerMembers(Parser* p) while (eatToken(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT, &ok) && ok) ; - // bool trailing = false; - while (1) { + FieldState field_state = { .tag = FIELD_STATE_NONE }; - // SKIP eat doc comments + bool trailing = false; + AstNodeIndex top_level_decl; + while (1) { + eatDocComments(p); switch (p->token_tags[p->tok_i]) { - case TOKENIZER_TAG_INVALID: - case TOKENIZER_TAG_INVALID_PERIODASTERISKS: - case TOKENIZER_TAG_IDENTIFIER: - case TOKENIZER_TAG_STRING_LITERAL: - case TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE: - case TOKENIZER_TAG_CHAR_LITERAL: - case TOKENIZER_TAG_EOF: - case TOKENIZER_TAG_BUILTIN: - case TOKENIZER_TAG_BANG: - case TOKENIZER_TAG_PIPE: - case TOKENIZER_TAG_PIPE_PIPE: - case TOKENIZER_TAG_PIPE_EQUAL: - case TOKENIZER_TAG_EQUAL: - case TOKENIZER_TAG_EQUAL_EQUAL: - case TOKENIZER_TAG_EQUAL_ANGLE_BRACKET_RIGHT: - case TOKENIZER_TAG_BANG_EQUAL: - case TOKENIZER_TAG_L_PAREN: - case TOKENIZER_TAG_R_PAREN: - case TOKENIZER_TAG_SEMICOLON: - case TOKENIZER_TAG_PERCENT: - case TOKENIZER_TAG_PERCENT_EQUAL: - case TOKENIZER_TAG_L_BRACE: - case TOKENIZER_TAG_R_BRACE: - case TOKENIZER_TAG_L_BRACKET: - case TOKENIZER_TAG_R_BRACKET: - case TOKENIZER_TAG_PERIOD: - case TOKENIZER_TAG_PERIOD_ASTERISK: - case TOKENIZER_TAG_ELLIPSIS2: - case TOKENIZER_TAG_ELLIPSIS3: - case TOKENIZER_TAG_CARET: - case TOKENIZER_TAG_CARET_EQUAL: - case TOKENIZER_TAG_PLUS: - case TOKENIZER_TAG_PLUS_PLUS: - case TOKENIZER_TAG_PLUS_EQUAL: - case TOKENIZER_TAG_PLUS_PERCENT: - case TOKENIZER_TAG_PLUS_PERCENT_EQUAL: - case TOKENIZER_TAG_PLUS_PIPE: - case TOKENIZER_TAG_PLUS_PIPE_EQUAL: - case TOKENIZER_TAG_MINUS: - case TOKENIZER_TAG_MINUS_EQUAL: - case TOKENIZER_TAG_MINUS_PERCENT: - case TOKENIZER_TAG_MINUS_PERCENT_EQUAL: - case TOKENIZER_TAG_MINUS_PIPE: - case TOKENIZER_TAG_MINUS_PIPE_EQUAL: - case TOKENIZER_TAG_ASTERISK: - case TOKENIZER_TAG_ASTERISK_EQUAL: - case TOKENIZER_TAG_ASTERISK_ASTERISK: - case TOKENIZER_TAG_ASTERISK_PERCENT: - case TOKENIZER_TAG_ASTERISK_PERCENT_EQUAL: - case TOKENIZER_TAG_ASTERISK_PIPE: - case TOKENIZER_TAG_ASTERISK_PIPE_EQUAL: - case TOKENIZER_TAG_ARROW: - case TOKENIZER_TAG_COLON: - case TOKENIZER_TAG_SLASH: - case TOKENIZER_TAG_SLASH_EQUAL: - case TOKENIZER_TAG_COMMA: - case TOKENIZER_TAG_AMPERSAND: - case TOKENIZER_TAG_AMPERSAND_EQUAL: - case TOKENIZER_TAG_QUESTION_MARK: - case TOKENIZER_TAG_ANGLE_BRACKET_LEFT: - case TOKENIZER_TAG_ANGLE_BRACKET_LEFT_EQUAL: - case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT: - case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL: - case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE: - case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL: - case TOKENIZER_TAG_ANGLE_BRACKET_RIGHT: - case TOKENIZER_TAG_ANGLE_BRACKET_RIGHT_EQUAL: - case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT: - case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL: - case TOKENIZER_TAG_TILDE: - case TOKENIZER_TAG_NUMBER_LITERAL: - case TOKENIZER_TAG_DOC_COMMENT: - case TOKENIZER_TAG_CONTAINER_DOC_COMMENT: - case TOKENIZER_TAG_KEYWORD_ADDRSPACE: - case TOKENIZER_TAG_KEYWORD_ALIGN: - case TOKENIZER_TAG_KEYWORD_ALLOWZERO: - case TOKENIZER_TAG_KEYWORD_AND: - case TOKENIZER_TAG_KEYWORD_ANYFRAME: - case TOKENIZER_TAG_KEYWORD_ANYTYPE: - case TOKENIZER_TAG_KEYWORD_ASM: - case TOKENIZER_TAG_KEYWORD_ASYNC: - case TOKENIZER_TAG_KEYWORD_AWAIT: - case TOKENIZER_TAG_KEYWORD_BREAK: - case TOKENIZER_TAG_KEYWORD_CALLCONV: - case TOKENIZER_TAG_KEYWORD_CATCH: - case TOKENIZER_TAG_KEYWORD_COMPTIME: - case TOKENIZER_TAG_KEYWORD_CONST: - case TOKENIZER_TAG_KEYWORD_CONTINUE: - case TOKENIZER_TAG_KEYWORD_DEFER: - case TOKENIZER_TAG_KEYWORD_ELSE: - case TOKENIZER_TAG_KEYWORD_ENUM: - case TOKENIZER_TAG_KEYWORD_ERRDEFER: - case TOKENIZER_TAG_KEYWORD_ERROR: - case TOKENIZER_TAG_KEYWORD_EXPORT: - case TOKENIZER_TAG_KEYWORD_EXTERN: - case TOKENIZER_TAG_KEYWORD_FN: - case TOKENIZER_TAG_KEYWORD_FOR: - case TOKENIZER_TAG_KEYWORD_IF: - case TOKENIZER_TAG_KEYWORD_INLINE: - case TOKENIZER_TAG_KEYWORD_NOALIAS: - case TOKENIZER_TAG_KEYWORD_NOINLINE: - case TOKENIZER_TAG_KEYWORD_NOSUSPEND: - case TOKENIZER_TAG_KEYWORD_OPAQUE: - case TOKENIZER_TAG_KEYWORD_OR: - case TOKENIZER_TAG_KEYWORD_ORELSE: - case TOKENIZER_TAG_KEYWORD_PACKED: - case TOKENIZER_TAG_KEYWORD_RESUME: - case TOKENIZER_TAG_KEYWORD_RETURN: - case TOKENIZER_TAG_KEYWORD_LINKSECTION: - case TOKENIZER_TAG_KEYWORD_STRUCT: - case TOKENIZER_TAG_KEYWORD_SUSPEND: - case TOKENIZER_TAG_KEYWORD_SWITCH: + case TOKENIZER_TAG_KEYWORD_TEST: - case TOKENIZER_TAG_KEYWORD_THREADLOCAL: - case TOKENIZER_TAG_KEYWORD_TRY: - case TOKENIZER_TAG_KEYWORD_UNION: - case TOKENIZER_TAG_KEYWORD_UNREACHABLE: - case TOKENIZER_TAG_KEYWORD_USINGNAMESPACE: - case TOKENIZER_TAG_KEYWORD_VAR: - case TOKENIZER_TAG_KEYWORD_VOLATILE: - case TOKENIZER_TAG_KEYWORD_WHILE:; + case TOKENIZER_TAG_KEYWORD_COMPTIME: + case TOKENIZER_TAG_KEYWORD_USINGNAMESPACE:; const char* str = tokenizerGetTagString(p->token_tags[p->tok_i]); - fprintf(stderr, "keyword %s not implemented\n", str); + fprintf(stderr, "%s not implemented in parseContainerMembers\n", str); exit(1); case TOKENIZER_TAG_KEYWORD_PUB: p->tok_i++; - // AstNodeIndex top_level_decl = expectTopLevelDecl(*p); + top_level_decl = expectTopLevelDecl(p); + if (top_level_decl != 0) { + if (field_state.tag == FIELD_STATE_SEEN) { + field_state.tag = FIELD_STATE_END; + field_state.payload.end = top_level_decl; + } + SLICE_APPEND(AstNodeIndex, &p->scratch, top_level_decl); + } + trailing = (p->token_tags[p->tok_i - 1] == TOKENIZER_TAG_SEMICOLON); break; - // TODO do work + + case TOKENIZER_TAG_KEYWORD_CONST: + case TOKENIZER_TAG_KEYWORD_VAR: + case TOKENIZER_TAG_KEYWORD_THREADLOCAL: + case TOKENIZER_TAG_KEYWORD_EXPORT: + case TOKENIZER_TAG_KEYWORD_EXTERN: + case TOKENIZER_TAG_KEYWORD_INLINE: + case TOKENIZER_TAG_KEYWORD_NOINLINE: + case TOKENIZER_TAG_KEYWORD_FN:; + top_level_decl = expectTopLevelDecl(p); + if (top_level_decl != 0) { + if (field_state.tag == FIELD_STATE_SEEN) { + field_state.tag = FIELD_STATE_END; + field_state.payload.end = top_level_decl; + } + SLICE_APPEND(AstNodeIndex, &p->scratch, top_level_decl); + } + trailing = (p->token_tags[p->tok_i - 1] == TOKENIZER_TAG_SEMICOLON); + break; + case TOKENIZER_TAG_EOF: + case TOKENIZER_TAG_R_BRACE: + goto break_loop; + default:; + // skip parseCStyleContainer + + const AstNodeIndex identifier = p->tok_i; + const AstNodeIndex container_field = expectContainerField(p); + switch (field_state.tag) { + case FIELD_STATE_NONE: + field_state.tag = FIELD_STATE_SEEN; + break; + case FIELD_STATE_SEEN: + break; + case FIELD_STATE_END: + fprintf(stderr, "parseContainerMembers error condition\n"); + exit(1); + } + SLICE_APPEND(AstNodeIndex, &p->scratch, container_field); + switch (p->token_tags[p->tok_i]) { + case TOKENIZER_TAG_COMMA: + p->tok_i++; + trailing = true; + continue; + case TOKENIZER_TAG_R_BRACE: + case TOKENIZER_TAG_EOF: + trailing = false; + goto break_loop; + default: + continue; + } + + findNextContainerMember(p); + continue; } } +break_loop: p->scratch.len = scratch_top; return res; diff --git a/parser.h b/parser.h index 3929062..beae5f8 100644 --- a/parser.h +++ b/parser.h @@ -3,16 +3,10 @@ #define _ZIG1_PARSE_H__ #include "ast.h" +#include "common.h" #include #include -// Standard slice -typedef struct { - uint32_t len; - uint32_t cap; - AstNodeIndex* arr; -} ParserNodeIndexSlice; - typedef struct { uint32_t len; AstNodeIndex lhs; @@ -20,7 +14,7 @@ typedef struct { bool trailing; } Members; -typedef struct Parser { +typedef struct { const char* source; uint32_t source_len; @@ -31,8 +25,8 @@ typedef struct Parser { AstTokenIndex tok_i; AstNodeList nodes; - ParserNodeIndexSlice extra_data; - ParserNodeIndexSlice scratch; + AstNodeIndexSlice extra_data; + AstNodeIndexSlice scratch; } Parser; Parser* parserInit(const char* source, uint32_t len); diff --git a/tokenizer.c b/tokenizer.c index 74160bc..06bffd2 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -10,8 +10,7 @@ typedef struct { TokenizerTag tag; } KeywordMap; -const char* tokenizerGetTagString(TokenizerTag tag) -{ +const char* tokenizerGetTagString(TokenizerTag tag) { switch (tag) { TOKENIZER_FOREACH_TAG_ENUM(TOKENIZER_GENERATE_CASE) default: @@ -72,8 +71,7 @@ const KeywordMap keywords[] = { }; // TODO binary search -static TokenizerTag getKeyword(const char* bytes, const uint32_t len) -{ +static TokenizerTag getKeyword(const char* bytes, const uint32_t len) { for (unsigned long i = 0; i < sizeof(keywords) / sizeof(KeywordMap); i++) { size_t klen = strlen(keywords[i].keyword); size_t minlen = klen < len ? klen : len; @@ -91,8 +89,7 @@ static TokenizerTag getKeyword(const char* bytes, const uint32_t len) return TOKENIZER_TAG_INVALID; } -Tokenizer tokenizerInit(const char* buffer, const uint32_t len) -{ +Tokenizer tokenizerInit(const char* buffer, const uint32_t len) { return (Tokenizer) { .buffer = buffer, .buffer_len = len, @@ -100,8 +97,7 @@ Tokenizer tokenizerInit(const char* buffer, const uint32_t len) }; } -TokenizerToken tokenizerNext(Tokenizer* self) -{ +TokenizerToken tokenizerNext(Tokenizer* self) { TokenizerToken result = (TokenizerToken) { .tag = TOKENIZER_TAG_INVALID, .loc = { diff --git a/zig1.c b/zig1.c index 1557145..cdde141 100644 --- a/zig1.c +++ b/zig1.c @@ -6,8 +6,7 @@ // - code = 0: program successfully terminated. // - code = 1: panicked, panic message in msg. Caller should free msg. // - code = 2: interpreter error, error in msg. Caller should free msg. -int zig1Run(const char* program, char** msg) -{ +int zig1Run(const char* program, char** msg) { (void)program; (void)msg; return 0; @@ -15,8 +14,7 @@ int zig1Run(const char* program, char** msg) // API: run and: // code = 3: abnormal error, expect something in stderr. -int zig1RunFile(const char* fname, char** msg) -{ +int zig1RunFile(const char* fname, char** msg) { FILE* f = fopen(fname, "r"); if (f == NULL) { perror("fopen");