commit 6006a802e1dd2a3f06a5aa9db5e9b1e7bbee850c (tree)
parent 6ae7d7320d87af37484af685de26e77230d299c3
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Fri, 27 Dec 2024 12:34:08 +0200
making tcc happier
Diffstat:
10 files changed, 415 insertions(+), 273 deletions(-)
diff --git a/.clang-format b/.clang-format
@@ -0,0 +1,2 @@
+BasedOnStyle: WebKit
+BreakBeforeBraces: Attach
diff --git a/ast.c b/ast.c
@@ -9,45 +9,15 @@
#define N 1024
-void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional)
-{
- const uint32_t new_len = list->len + additional;
- if (new_len <= list->cap) {
- return;
- }
-
- const uint32_t new_cap = new_len > list->cap * 2 ? new_len : list->cap * 2;
- list->tags = realloc(list->tags, new_cap * sizeof(AstNodeTag));
- list->main_tokens = realloc(list->main_tokens, new_cap * sizeof(AstTokenIndex));
- list->datas = realloc(list->datas, new_cap * sizeof(AstData));
- if (!list->tags || !list->main_tokens || !list->datas)
- exit(1);
- list->cap = new_cap;
-}
-
-AstNodeIndex astNodeListAppend(
- AstNodeList* list,
- AstNodeTag tag,
- AstTokenIndex main_token,
- AstData data)
-{
- astNodeListEnsureCapacity(list, 1);
- list->tags[list->len] = tag;
- list->main_tokens[list->len] = main_token;
- list->datas[list->len] = data;
- return list->len++;
-}
-
-Ast astParse(const char* source, const uint32_t len)
-{
+Ast astParse(const char* source, const uint32_t len) {
uint32_t estimated_token_count = len / 8;
// Initialize token list
AstTokenList tokens = {
.len = 0,
.cap = estimated_token_count,
- .tags = SLICE_INIT(TokenizerTag, estimated_token_count),
- .starts = SLICE_INIT(AstIndex, estimated_token_count)
+ .tags = ARR_INIT(TokenizerTag, estimated_token_count),
+ .starts = ARR_INIT(AstIndex, estimated_token_count)
};
// Tokenize
@@ -70,9 +40,9 @@ Ast astParse(const char* source, const uint32_t len)
AstNodeList nodes = {
.len = 0,
.cap = estimated_node_count,
- .tags = SLICE_INIT(AstNodeTag, estimated_node_count),
- .main_tokens = SLICE_INIT(AstTokenIndex, estimated_node_count),
- .datas = SLICE_INIT(AstData, estimated_node_count)
+ .tags = ARR_INIT(AstNodeTag, estimated_node_count),
+ .main_tokens = ARR_INIT(AstTokenIndex, estimated_node_count),
+ .datas = ARR_INIT(AstData, estimated_node_count)
};
// Initialize parser
@@ -84,11 +54,8 @@ Ast astParse(const char* source, const uint32_t len)
.tokens_len = tokens.len,
.tok_i = 0,
.nodes = nodes,
- .extra_data = {
- .len = 0,
- .cap = N,
- .arr = SLICE_INIT(AstNodeIndex, N) },
- .scratch = { .len = 0, .cap = N, .arr = SLICE_INIT(AstNodeIndex, N) }
+ .extra_data = SLICE_INIT(AstNodeIndex, N),
+ .scratch = SLICE_INIT(AstNodeIndex, N)
};
free(p.scratch.arr); // Parser takes ownership
diff --git a/ast.h b/ast.h
@@ -4,6 +4,7 @@
#include <stdbool.h>
#include <stdint.h>
+#include "common.h"
#include "tokenizer.h"
typedef enum {
@@ -499,24 +500,26 @@ typedef struct {
} AstNodeList;
typedef struct {
+ AstNodeTag tag;
+ AstTokenIndex main_token;
+ AstData data;
+} AstNodeItem;
+
+typedef struct {
uint32_t len;
uint32_t cap;
TokenizerTag* tags;
AstIndex* starts;
} AstTokenList;
-typedef struct {
- uint32_t len;
- uint32_t cap;
- AstNodeIndex* arr;
-} AstExtraData;
+typedef SLICE(AstNodeIndex) AstNodeIndexSlice;
typedef struct {
const char* source;
uint32_t source_len;
AstTokenList tokens;
AstNodeList nodes;
- AstExtraData extra_data;
+ AstNodeIndexSlice extra_data;
} Ast;
typedef struct AstPtrType {
@@ -596,11 +599,7 @@ typedef struct AstError {
Ast astParse(const char* source, uint32_t len);
-// MultiArrayList
-void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional);
-void astTokenListEnsureCapacity(AstTokenList* list, uint32_t additional);
-
-AstNodeIndex astNodeListAppend(AstNodeList* list, AstNodeTag tag, AstTokenIndex main_token, AstData data);
+AstNodeIndex astNodeListAppend(AstNodeList*, AstNodeItem);
void astTokenListAppend(AstTokenList* list, TokenizerTag tag, AstIndex start);
#endif
diff --git a/build.zig b/build.zig
@@ -79,7 +79,7 @@ pub fn build(b: *std.Build) !void {
const lint_step = b.step("lint", "Run linters");
const clang_format = b.addSystemCommand(&.{"clang-format"});
- clang_format.addArgs(&.{ "--style=webkit", "--verbose", "-Werror", "-i" });
+ clang_format.addArgs(&.{ "--verbose", "-Werror", "-i" });
for (all_c_files ++ headers) |f| clang_format.addFileArg(b.path(f));
lint_step.dependOn(&clang_format.step);
diff --git a/common.h b/common.h
@@ -5,14 +5,28 @@
#include <stdint.h>
#include <stdlib.h>
-#define SLICE_INIT(Type, initial_cap) ({ \
+#define SLICE(Type) \
+ struct Type##Slice { \
+ uint32_t len; \
+ uint32_t cap; \
+ Type* arr; \
+ }
+
+#define ARR_INIT(Type, initial_cap) ({ \
Type* arr = calloc(initial_cap, sizeof(Type)); \
if (!arr) \
exit(1); \
- (__typeof__(Type*)) { arr }; \
+ arr; \
})
-#define SLICE_RESIZE(slice, Type, new_cap) ({ \
+#define SLICE_INIT(Type, initial_cap) \
+ { \
+ .len = 0, \
+ .cap = (initial_cap), \
+ .arr = ARR_INIT(Type, initial_cap) \
+ }
+
+#define SLICE_RESIZE(Type, slice, new_cap) ({ \
uint32_t cap = (new_cap); \
Type* new_arr = realloc((slice)->arr, cap * sizeof(Type)); \
if (!new_arr) \
@@ -21,12 +35,17 @@
(slice)->cap = cap; \
})
-#define SLICE_ENSURE_CAPACITY(slice, Type, additional) ({ \
+#define SLICE_ENSURE_CAPACITY(Type, slice, additional) ({ \
if ((slice)->len + (additional) > (slice)->cap) { \
- SLICE_RESIZE(slice, \
- Type, \
+ SLICE_RESIZE(Type, \
+ slice, \
((slice)->cap * 2 > (slice)->len + (additional)) ? (slice)->cap * 2 : (slice)->len + (additional)); \
} \
})
+#define SLICE_APPEND(Type, slice, item) ({ \
+ SLICE_ENSURE_CAPACITY(Type, slice, 1); \
+ (slice)->arr[(slice)->len++] = (item); \
+})
+
#endif
diff --git a/main.c b/main.c
@@ -5,13 +5,11 @@
int zig1Run(char* program, char** msg);
int zig1RunFile(char* fname, char** msg);
-static void usage(const char* argv0)
-{
+static void usage(const char* argv0) {
fprintf(stderr, "Usage: %s program.zig\n", argv0);
}
-int main(int argc, char** argv)
-{
+int main(int argc, char** argv) {
if (argc != 2) {
usage(argv[0]);
return 1;
diff --git a/parser.c b/parser.c
@@ -1,7 +1,9 @@
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include "ast.h"
+#include "common.h"
#include "parser.h"
const AstNodeIndex null_node = 0;
@@ -22,8 +24,7 @@ typedef struct {
AstSubRange multi;
} SmallSpan;
-void parseRoot(Parser* p)
-{
+void parseRoot(Parser* p) {
p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT;
p->nodes.main_tokens[p->nodes.len] = 0;
@@ -32,8 +33,7 @@ void parseRoot(Parser* p)
static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; }
-static AstTokenIndex eatToken(Parser* p, TokenizerTag tag, bool* ok)
-{
+static AstTokenIndex eatToken(Parser* p, TokenizerTag tag, bool* ok) {
if (p->token_tags[p->tok_i] == tag) {
if (ok != NULL)
*ok = true;
@@ -41,30 +41,124 @@ static AstTokenIndex eatToken(Parser* p, TokenizerTag tag, bool* ok)
} else {
if (ok != NULL)
*ok = false;
- return (AstTokenIndex) {};
+ return 0;
}
}
-static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeTag tag, AstTokenIndex main_token, AstData data)
-{
- p->nodes.tags[i] = tag;
- p->nodes.main_tokens[i] = main_token;
- p->nodes.datas[i] = data;
+static void eatDocComments(Parser* p) {
+ bool ok;
+ while (eatToken(p, TOKENIZER_TAG_DOC_COMMENT, &ok), ok) { }
+}
+
+static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeItem item) {
+ p->nodes.tags[i] = item.tag;
+ p->nodes.main_tokens[i] = item.main_token;
+ p->nodes.datas[i] = item.data;
return i;
}
+static void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional) {
+ const uint32_t new_len = list->len + additional;
+ if (new_len <= list->cap) {
+ return;
+ }
+
+ const uint32_t new_cap = new_len > list->cap * 2 ? new_len : list->cap * 2;
+ list->tags = realloc(list->tags, new_cap * sizeof(AstNodeTag));
+ list->main_tokens = realloc(list->main_tokens, new_cap * sizeof(AstTokenIndex));
+ list->datas = realloc(list->datas, new_cap * sizeof(AstData));
+ if (!list->tags || !list->main_tokens || !list->datas)
+ exit(1);
+ list->cap = new_cap;
+}
+
+static AstNodeIndex addNode(AstNodeList* nodes, AstNodeItem item) {
+ astNodeListEnsureCapacity(nodes, 1);
+ nodes->tags[nodes->len] = item.tag;
+ nodes->main_tokens[nodes->len] = item.main_token;
+ nodes->datas[nodes->len] = item.data;
+ return nodes->len++;
+}
+
static AstNodeIndex parseTypeExpr(Parser* p);
-static AstNodeIndex expectTypeExpr(Parser* p)
-{
+static AstNodeIndex expectTypeExpr(Parser* p) {
const AstNodeIndex node = parseTypeExpr(p);
if (node == 0)
exit(1);
return node;
}
-static AstNodeIndex parsePrimaryTypeExpr(Parser* p)
-{
+static AstNodeIndex parseByteAlign(Parser* p) {
+ bool ok;
+ eatToken(p, TOKENIZER_TAG_KEYWORD_ALIGN, &ok);
+ if (!ok) {
+ return null_node;
+ }
+ fprintf(stderr, "parseByteAlign cannot parse alginment\n");
+ exit(1);
+ return 0; // tcc
+}
+
+typedef struct {
+ AstNodeIndex align_expr, value_expr;
+} NodeContainerField;
+
+static AstNodeIndex expectContainerField(Parser* p) {
+ eatToken(p, TOKENIZER_TAG_KEYWORD_COMPTIME, NULL);
+ const AstTokenIndex main_token = p->tok_i;
+ if (p->token_tags[p->tok_i] == TOKENIZER_TAG_IDENTIFIER && p->token_tags[p->tok_i + 1] == TOKENIZER_TAG_COLON)
+ p->tok_i += 2;
+
+ const AstNodeIndex type_expr = expectTypeExpr(p);
+ const AstNodeIndex align_expr = parseByteAlign(p);
+ const AstNodeIndex value_expr = 0;
+ bool ok;
+ eatToken(p, TOKENIZER_TAG_EQUAL, &ok);
+ if (ok) {
+ fprintf(stderr, "expectContainerField does not support expr\n");
+ exit(1);
+ }
+
+ if (align_expr == 0) {
+ return addNode(
+ &p->nodes,
+ (AstNodeItem) {
+ .tag = AST_NODE_TAG_CONTAINER_FIELD_INIT,
+ .main_token = main_token,
+ .data = {
+ .lhs = type_expr,
+ .rhs = value_expr,
+ },
+ });
+ } else if (value_expr == 0) {
+ return addNode(
+ &p->nodes,
+ (AstNodeItem) {
+ .tag = AST_NODE_TAG_CONTAINER_FIELD_ALIGN,
+ .main_token = main_token,
+ .data = {
+ .lhs = type_expr,
+ .rhs = align_expr,
+ },
+ });
+ } else {
+ return addNode(
+ &p->nodes,
+ (AstNodeItem) {
+ .tag = AST_NODE_TAG_CONTAINER_FIELD,
+ .main_token = main_token,
+ .data = {
+ .lhs = type_expr,
+ .rhs = addExtra(p, (NodeContainerField) {
+ .align_expr = align_expr,
+ .value_expr = value_expr,
+ }) },
+ });
+ }
+}
+
+static AstNodeIndex parsePrimaryTypeExpr(Parser* p) {
const TokenizerTag tok = p->token_tags[p->tok_i];
switch (tok) {
case TOKENIZER_TAG_CHAR_LITERAL:
@@ -86,18 +180,17 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p)
case TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok));
exit(1);
- break;
case TOKENIZER_TAG_IDENTIFIER:
if (p->token_tags[p->tok_i + 1] == TOKENIZER_TAG_COLON) {
fprintf(stderr, "parsePrimaryTypeExpr does not support identifier followed by colon\n");
exit(1);
}
- return astNodeListAppend(
+ return addNode(
&p->nodes,
- AST_NODE_TAG_IDENTIFIER,
- nextToken(p),
- (AstData) {});
- break;
+ (AstNodeItem) {
+ .tag = AST_NODE_TAG_IDENTIFIER,
+ .main_token = nextToken(p),
+ .data = {} });
case TOKENIZER_TAG_KEYWORD_INLINE:
case TOKENIZER_TAG_KEYWORD_FOR:
case TOKENIZER_TAG_KEYWORD_WHILE:
@@ -106,29 +199,26 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p)
case TOKENIZER_TAG_L_PAREN:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok));
exit(1);
- break;
default:
return null_node;
}
}
-static AstNodeIndex parseSuffixOp(Parser *p) {
+static AstNodeIndex parseSuffixOp(Parser* p) {
const TokenizerTag tok = p->token_tags[p->tok_i];
switch (tok) {
- case TOKENIZER_TAG_L_BRACKET:
- case TOKENIZER_TAG_PERIOD_ASTERISK:
- case TOKENIZER_TAG_INVALID_PERIODASTERISKS:
- case TOKENIZER_TAG_PERIOD:
- fprintf(stderr, "parseSuffixOp does not support %s\n", tokenizerGetTagString(tok));
- exit(1);
- break;
- default:
- return null_node;
+ case TOKENIZER_TAG_L_BRACKET:
+ case TOKENIZER_TAG_PERIOD_ASTERISK:
+ case TOKENIZER_TAG_INVALID_PERIODASTERISKS:
+ case TOKENIZER_TAG_PERIOD:
+ fprintf(stderr, "parseSuffixOp does not support %s\n", tokenizerGetTagString(tok));
+ exit(1);
+ default:
+ return null_node;
}
}
-static AstNodeIndex parseSuffixExpr(Parser* p)
-{
+static AstNodeIndex parseSuffixExpr(Parser* p) {
bool ok;
eatToken(p, TOKENIZER_TAG_KEYWORD_ASYNC, &ok);
if (ok) {
@@ -140,7 +230,7 @@ static AstNodeIndex parseSuffixExpr(Parser* p)
if (res == 0)
return res;
- while(true) {
+ while (true) {
const AstNodeIndex suffix_op = parseSuffixOp(p);
if (suffix_op != 0) {
res = suffix_op;
@@ -155,8 +245,19 @@ static AstNodeIndex parseSuffixExpr(Parser* p)
}
}
-static AstNodeIndex parseErrorUnionExpr(Parser* p)
-{
+static AstNodeIndex expectToken(Parser* p, TokenizerTag tag, bool* ok) {
+ if (p->token_tags[p->tok_i] == tag) {
+ if (ok != NULL)
+ *ok = true;
+ return nextToken(p);
+ } else {
+ if (ok != NULL)
+ *ok = false;
+ return 0;
+ }
+}
+
+static AstNodeIndex parseErrorUnionExpr(Parser* p) {
const AstNodeIndex suffix_expr = parseSuffixExpr(p);
if (suffix_expr == 0)
return null_node;
@@ -164,18 +265,18 @@ static AstNodeIndex parseErrorUnionExpr(Parser* p)
const AstNodeIndex bang = eatToken(p, TOKENIZER_TAG_BANG, &ok);
if (!ok)
return suffix_expr;
- return astNodeListAppend(
+ return addNode(
&p->nodes,
- AST_NODE_TAG_ERROR_UNION,
- bang,
- (AstData) {
- .lhs = suffix_expr,
- .rhs = expectTypeExpr(p),
- });
+ (AstNodeItem) {
+ .tag = AST_NODE_TAG_ERROR_UNION,
+ .main_token = bang,
+ .data = {
+ .lhs = suffix_expr,
+ .rhs = expectTypeExpr(p),
+ } });
}
-static AstNodeIndex parseTypeExpr(Parser* p)
-{
+static AstNodeIndex parseTypeExpr(Parser* p) {
const AstNodeIndex tok = p->token_tags[p->tok_i];
switch (tok) {
case TOKENIZER_TAG_QUESTION_MARK:
@@ -185,14 +286,12 @@ static AstNodeIndex parseTypeExpr(Parser* p)
case TOKENIZER_TAG_L_BRACKET:
fprintf(stderr, "parseTypeExpr not supported for %s\n", tokenizerGetTagString(tok));
exit(1);
- break;
default:
return parseErrorUnionExpr(p);
}
}
-static SmallSpan parseParamDeclList(Parser* p)
-{
+static SmallSpan parseParamDeclList(Parser* p) {
// can only parse functions with no declarations
bool ok;
AstTokenIndex got_token = eatToken(p, TOKENIZER_TAG_L_PAREN, &ok);
@@ -212,15 +311,13 @@ static SmallSpan parseParamDeclList(Parser* p)
};
}
-static uint32_t reserveNode(Parser* p, AstNodeTag tag)
-{
+static uint32_t reserveNode(Parser* p, AstNodeTag tag) {
astNodeListEnsureCapacity(&p->nodes, p->nodes.len + 1);
p->nodes.tags[p->nodes.len - 1] = tag;
return p->nodes.len - 1;
}
-static AstNodeIndex parseFnProto(Parser* p)
-{
+static AstNodeIndex parseFnProto(Parser* p) {
bool ok;
AstNodeIndex fn_token = eatToken(p, TOKENIZER_TAG_KEYWORD_FN, &ok);
if (!ok)
@@ -239,21 +336,146 @@ static AstNodeIndex parseFnProto(Parser* p)
eatToken(p, TOKENIZER_TAG_BANG, NULL);
const AstNodeIndex return_type_expr = parseTypeExpr(p);
+ return 0;
+}
+
+static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) {
+ bool ok;
+ if (eatToken(p, TOKENIZER_TAG_KEYWORD_COMPTIME, &ok), ok) {
+ fprintf(stderr, "expectStatement: comptime keyword not yet supported\n");
+ exit(1);
+ }
+
+ const AstNodeIndex tok = p->token_tags[p->tok_i];
+ switch (tok) {
+ case TOKENIZER_TAG_KEYWORD_NOSUSPEND:
+ case TOKENIZER_TAG_KEYWORD_SUSPEND:
+ case TOKENIZER_TAG_KEYWORD_DEFER:
+ case TOKENIZER_TAG_KEYWORD_ERRDEFER:
+ case TOKENIZER_TAG_KEYWORD_IF:
+ case TOKENIZER_TAG_KEYWORD_ENUM:
+ case TOKENIZER_TAG_KEYWORD_STRUCT:
+ case TOKENIZER_TAG_KEYWORD_UNION:;
+ const char* tok_str = tokenizerGetTagString(tok);
+ fprintf(stderr, "expectStatement does not yet support keyword %s\n", tok_str);
+ exit(1);
+ default:;
+ }
+ // TODO continue
+ return 1;
}
-static AstNodeIndex parseBlock(Parser *p) {
+typedef struct {
+ AstNodeIndexSlice* scratch;
+ uint32_t old_len;
+} CleanupScratch;
+static void cleanupScratch(CleanupScratch* c) { c->scratch->len = c->old_len; }
+
+static AstNodeIndex parseBlock(Parser* p) {
bool ok;
const AstNodeIndex lbrace = eatToken(p, TOKENIZER_TAG_L_BRACE, &ok);
if (!ok)
return null_node;
- const uint32_t scratch_top = p->scratch.len;
+ CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = {
+ .scratch = &p->scratch,
+ .old_len = p->scratch.len,
+ };
+
+ while (1) {
+ if (p->token_tags[p->tok_i] == TOKENIZER_TAG_R_BRACE)
+ break;
-cleanup:
+ // "const AstNodeIndex statement" once tinycc supports typeof_unqual (C23)
+ AstNodeIndex statement = expectStatement(p, true);
+ if (statement == 0)
+ break;
+ SLICE_APPEND(AstNodeIndex, &p->scratch, statement);
+ }
+ expectToken(p, TOKENIZER_TAG_R_BRACE, NULL);
+ const bool semicolon = (p->token_tags[p->tok_i] - 2 == TOKENIZER_TAG_SEMICOLON);
+
+ switch (p->scratch.len - scratch_top.old_len) {
+ case 0:
+ return addNode(
+ &p->nodes,
+ (AstNodeItem) {
+ .tag = AST_NODE_TAG_BLOCK_TWO,
+ .main_token = lbrace,
+ .data = {
+ .lhs = 0,
+ .rhs = 0,
+ },
+ });
+ case 1:
+ return addNode(
+ &p->nodes,
+ (AstNodeItem) {
+ .tag = semicolon ? AST_NODE_TAG_BLOCK_TWO_SEMICOLON : AST_NODE_TAG_BLOCK_TWO,
+ .main_token = lbrace,
+ .data = {
+ .lhs = p->scratch.arr[scratch_top.old_len],
+ .rhs = 0,
+ },
+ });
+ case 2:
+ return addNode(
+ &p->nodes,
+ (AstNodeItem) {
+ .tag = semicolon ? AST_NODE_TAG_BLOCK_TWO_SEMICOLON : AST_NODE_TAG_BLOCK_TWO,
+ .main_token = lbrace,
+ .data = {
+ .lhs = p->scratch.arr[scratch_top.old_len],
+ .rhs = p->scratch.arr[scratch_top.old_len + 1],
+ },
+ });
+ default:;
+ const uint32_t extra = p->scratch.len - scratch_top.old_len;
+ SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->scratch, extra);
+ memcpy(
+ &p->extra_data.arr[p->extra_data.len],
+ &p->scratch.arr[scratch_top.old_len],
+ sizeof(AstNodeIndex) * extra);
+ p->extra_data.len += extra;
+ return addNode(
+ &p->nodes,
+ (AstNodeItem) {
+ .tag = semicolon ? AST_NODE_TAG_BLOCK_SEMICOLON : AST_NODE_TAG_BLOCK,
+ .main_token = lbrace,
+ .data = {
+ .lhs = p->scratch.arr[scratch_top.old_len],
+ .rhs = p->scratch.arr[p->scratch.len],
+ },
+ });
+ }
+
+ return 0;
}
-static AstNodeIndex expectTopLevelDecl(Parser* p)
-{
+static AstNodeIndex parseVarDeclProto(Parser* p) {
+ bool ok;
+ eatToken(p, TOKENIZER_TAG_KEYWORD_CONST, &ok);
+ if (!ok) {
+ eatToken(p, TOKENIZER_TAG_KEYWORD_VAR, &ok);
+ if (!ok)
+ return null_node;
+ }
+ fprintf(stderr, "parseVarDeclProto: parsing vars is not supported\n");
+ exit(1);
+ return 0; // tcc
+}
+
+static AstNodeIndex parseGlobalVarDecl(Parser* p) {
+ const AstNodeIndex var_decl = parseVarDeclProto(p);
+ if (var_decl == 0) {
+ return null_node;
+ }
+ fprintf(stderr, "parseGlobalVarDecl does not support parsing var decls\n");
+ exit(1);
+ return 0; // tcc
+}
+
+static AstNodeIndex expectTopLevelDecl(Parser* p) {
AstTokenIndex extern_export_inline_token = p->tok_i++;
bool is_extern = false;
bool expect_fn = false;
@@ -282,7 +504,6 @@ static AstNodeIndex expectTopLevelDecl(Parser* p)
case TOKENIZER_TAG_SEMICOLON:
p->tok_i++;
return fn_proto;
- break;
case TOKENIZER_TAG_L_BRACE:
if (is_extern)
exit(1);
@@ -292,9 +513,11 @@ static AstNodeIndex expectTopLevelDecl(Parser* p)
return setNode(
p,
fn_decl_index,
- AST_NODE_TAG_FN_DECL,
- p->nodes.main_tokens[fn_proto],
- (AstData) { .lhs = fn_proto, .rhs = body_block });
+ (AstNodeItem) {
+ .tag = AST_NODE_TAG_FN_DECL,
+ .main_token = p->nodes.main_tokens[fn_proto],
+ .data = { .lhs = fn_proto, .rhs = body_block },
+ });
default:
exit(1); // Expected semicolon or left brace
}
@@ -312,10 +535,10 @@ static AstNodeIndex expectTopLevelDecl(Parser* p)
// assuming the program is correct...
fprintf(stderr, "the next token should be usingnamespace, which is not supported\n");
exit(1);
+ return 0; // make tcc happy
}
-static Members parseContainerMembers(Parser* p)
-{
+static Members parseContainerMembers(Parser* p) {
const uint32_t scratch_top = p->scratch.len;
Members res = (Members) {};
// ast_token_index last_field;
@@ -323,143 +546,89 @@ static Members parseContainerMembers(Parser* p)
while (eatToken(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT, &ok) && ok)
;
- // bool trailing = false;
- while (1) {
+ FieldState field_state = { .tag = FIELD_STATE_NONE };
- // SKIP eat doc comments
+ bool trailing = false;
+ AstNodeIndex top_level_decl;
+ while (1) {
+ eatDocComments(p);
switch (p->token_tags[p->tok_i]) {
- case TOKENIZER_TAG_INVALID:
- case TOKENIZER_TAG_INVALID_PERIODASTERISKS:
- case TOKENIZER_TAG_IDENTIFIER:
- case TOKENIZER_TAG_STRING_LITERAL:
- case TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE:
- case TOKENIZER_TAG_CHAR_LITERAL:
- case TOKENIZER_TAG_EOF:
- case TOKENIZER_TAG_BUILTIN:
- case TOKENIZER_TAG_BANG:
- case TOKENIZER_TAG_PIPE:
- case TOKENIZER_TAG_PIPE_PIPE:
- case TOKENIZER_TAG_PIPE_EQUAL:
- case TOKENIZER_TAG_EQUAL:
- case TOKENIZER_TAG_EQUAL_EQUAL:
- case TOKENIZER_TAG_EQUAL_ANGLE_BRACKET_RIGHT:
- case TOKENIZER_TAG_BANG_EQUAL:
- case TOKENIZER_TAG_L_PAREN:
- case TOKENIZER_TAG_R_PAREN:
- case TOKENIZER_TAG_SEMICOLON:
- case TOKENIZER_TAG_PERCENT:
- case TOKENIZER_TAG_PERCENT_EQUAL:
- case TOKENIZER_TAG_L_BRACE:
- case TOKENIZER_TAG_R_BRACE:
- case TOKENIZER_TAG_L_BRACKET:
- case TOKENIZER_TAG_R_BRACKET:
- case TOKENIZER_TAG_PERIOD:
- case TOKENIZER_TAG_PERIOD_ASTERISK:
- case TOKENIZER_TAG_ELLIPSIS2:
- case TOKENIZER_TAG_ELLIPSIS3:
- case TOKENIZER_TAG_CARET:
- case TOKENIZER_TAG_CARET_EQUAL:
- case TOKENIZER_TAG_PLUS:
- case TOKENIZER_TAG_PLUS_PLUS:
- case TOKENIZER_TAG_PLUS_EQUAL:
- case TOKENIZER_TAG_PLUS_PERCENT:
- case TOKENIZER_TAG_PLUS_PERCENT_EQUAL:
- case TOKENIZER_TAG_PLUS_PIPE:
- case TOKENIZER_TAG_PLUS_PIPE_EQUAL:
- case TOKENIZER_TAG_MINUS:
- case TOKENIZER_TAG_MINUS_EQUAL:
- case TOKENIZER_TAG_MINUS_PERCENT:
- case TOKENIZER_TAG_MINUS_PERCENT_EQUAL:
- case TOKENIZER_TAG_MINUS_PIPE:
- case TOKENIZER_TAG_MINUS_PIPE_EQUAL:
- case TOKENIZER_TAG_ASTERISK:
- case TOKENIZER_TAG_ASTERISK_EQUAL:
- case TOKENIZER_TAG_ASTERISK_ASTERISK:
- case TOKENIZER_TAG_ASTERISK_PERCENT:
- case TOKENIZER_TAG_ASTERISK_PERCENT_EQUAL:
- case TOKENIZER_TAG_ASTERISK_PIPE:
- case TOKENIZER_TAG_ASTERISK_PIPE_EQUAL:
- case TOKENIZER_TAG_ARROW:
- case TOKENIZER_TAG_COLON:
- case TOKENIZER_TAG_SLASH:
- case TOKENIZER_TAG_SLASH_EQUAL:
- case TOKENIZER_TAG_COMMA:
- case TOKENIZER_TAG_AMPERSAND:
- case TOKENIZER_TAG_AMPERSAND_EQUAL:
- case TOKENIZER_TAG_QUESTION_MARK:
- case TOKENIZER_TAG_ANGLE_BRACKET_LEFT:
- case TOKENIZER_TAG_ANGLE_BRACKET_LEFT_EQUAL:
- case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT:
- case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL:
- case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE:
- case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL:
- case TOKENIZER_TAG_ANGLE_BRACKET_RIGHT:
- case TOKENIZER_TAG_ANGLE_BRACKET_RIGHT_EQUAL:
- case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT:
- case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL:
- case TOKENIZER_TAG_TILDE:
- case TOKENIZER_TAG_NUMBER_LITERAL:
- case TOKENIZER_TAG_DOC_COMMENT:
- case TOKENIZER_TAG_CONTAINER_DOC_COMMENT:
- case TOKENIZER_TAG_KEYWORD_ADDRSPACE:
- case TOKENIZER_TAG_KEYWORD_ALIGN:
- case TOKENIZER_TAG_KEYWORD_ALLOWZERO:
- case TOKENIZER_TAG_KEYWORD_AND:
- case TOKENIZER_TAG_KEYWORD_ANYFRAME:
- case TOKENIZER_TAG_KEYWORD_ANYTYPE:
- case TOKENIZER_TAG_KEYWORD_ASM:
- case TOKENIZER_TAG_KEYWORD_ASYNC:
- case TOKENIZER_TAG_KEYWORD_AWAIT:
- case TOKENIZER_TAG_KEYWORD_BREAK:
- case TOKENIZER_TAG_KEYWORD_CALLCONV:
- case TOKENIZER_TAG_KEYWORD_CATCH:
+
+ case TOKENIZER_TAG_KEYWORD_TEST:
case TOKENIZER_TAG_KEYWORD_COMPTIME:
+ case TOKENIZER_TAG_KEYWORD_USINGNAMESPACE:;
+ const char* str = tokenizerGetTagString(p->token_tags[p->tok_i]);
+ fprintf(stderr, "%s not implemented in parseContainerMembers\n", str);
+ exit(1);
+ case TOKENIZER_TAG_KEYWORD_PUB:
+ p->tok_i++;
+ top_level_decl = expectTopLevelDecl(p);
+ if (top_level_decl != 0) {
+ if (field_state.tag == FIELD_STATE_SEEN) {
+ field_state.tag = FIELD_STATE_END;
+ field_state.payload.end = top_level_decl;
+ }
+ SLICE_APPEND(AstNodeIndex, &p->scratch, top_level_decl);
+ }
+ trailing = (p->token_tags[p->tok_i - 1] == TOKENIZER_TAG_SEMICOLON);
+ break;
+
case TOKENIZER_TAG_KEYWORD_CONST:
- case TOKENIZER_TAG_KEYWORD_CONTINUE:
- case TOKENIZER_TAG_KEYWORD_DEFER:
- case TOKENIZER_TAG_KEYWORD_ELSE:
- case TOKENIZER_TAG_KEYWORD_ENUM:
- case TOKENIZER_TAG_KEYWORD_ERRDEFER:
- case TOKENIZER_TAG_KEYWORD_ERROR:
+ case TOKENIZER_TAG_KEYWORD_VAR:
+ case TOKENIZER_TAG_KEYWORD_THREADLOCAL:
case TOKENIZER_TAG_KEYWORD_EXPORT:
case TOKENIZER_TAG_KEYWORD_EXTERN:
- case TOKENIZER_TAG_KEYWORD_FN:
- case TOKENIZER_TAG_KEYWORD_FOR:
- case TOKENIZER_TAG_KEYWORD_IF:
case TOKENIZER_TAG_KEYWORD_INLINE:
- case TOKENIZER_TAG_KEYWORD_NOALIAS:
case TOKENIZER_TAG_KEYWORD_NOINLINE:
- case TOKENIZER_TAG_KEYWORD_NOSUSPEND:
- case TOKENIZER_TAG_KEYWORD_OPAQUE:
- case TOKENIZER_TAG_KEYWORD_OR:
- case TOKENIZER_TAG_KEYWORD_ORELSE:
- case TOKENIZER_TAG_KEYWORD_PACKED:
- case TOKENIZER_TAG_KEYWORD_RESUME:
- case TOKENIZER_TAG_KEYWORD_RETURN:
- case TOKENIZER_TAG_KEYWORD_LINKSECTION:
- case TOKENIZER_TAG_KEYWORD_STRUCT:
- case TOKENIZER_TAG_KEYWORD_SUSPEND:
- case TOKENIZER_TAG_KEYWORD_SWITCH:
- case TOKENIZER_TAG_KEYWORD_TEST:
- case TOKENIZER_TAG_KEYWORD_THREADLOCAL:
- case TOKENIZER_TAG_KEYWORD_TRY:
- case TOKENIZER_TAG_KEYWORD_UNION:
- case TOKENIZER_TAG_KEYWORD_UNREACHABLE:
- case TOKENIZER_TAG_KEYWORD_USINGNAMESPACE:
- case TOKENIZER_TAG_KEYWORD_VAR:
- case TOKENIZER_TAG_KEYWORD_VOLATILE:
- case TOKENIZER_TAG_KEYWORD_WHILE:;
- const char* str = tokenizerGetTagString(p->token_tags[p->tok_i]);
- fprintf(stderr, "keyword %s not implemented\n", str);
- exit(1);
- case TOKENIZER_TAG_KEYWORD_PUB:
- p->tok_i++;
- // AstNodeIndex top_level_decl = expectTopLevelDecl(*p);
+ case TOKENIZER_TAG_KEYWORD_FN:;
+ top_level_decl = expectTopLevelDecl(p);
+ if (top_level_decl != 0) {
+ if (field_state.tag == FIELD_STATE_SEEN) {
+ field_state.tag = FIELD_STATE_END;
+ field_state.payload.end = top_level_decl;
+ }
+ SLICE_APPEND(AstNodeIndex, &p->scratch, top_level_decl);
+ }
+ trailing = (p->token_tags[p->tok_i - 1] == TOKENIZER_TAG_SEMICOLON);
break;
- // TODO do work
+ case TOKENIZER_TAG_EOF:
+ case TOKENIZER_TAG_R_BRACE:
+ goto break_loop;
+ default:;
+ // skip parseCStyleContainer
+
+ const AstNodeIndex identifier = p->tok_i;
+ const AstNodeIndex container_field = expectContainerField(p);
+ switch (field_state.tag) {
+ case FIELD_STATE_NONE:
+ field_state.tag = FIELD_STATE_SEEN;
+ break;
+ case FIELD_STATE_SEEN:
+ break;
+ case FIELD_STATE_END:
+ fprintf(stderr, "parseContainerMembers error condition\n");
+ exit(1);
+ }
+ SLICE_APPEND(AstNodeIndex, &p->scratch, container_field);
+ switch (p->token_tags[p->tok_i]) {
+ case TOKENIZER_TAG_COMMA:
+ p->tok_i++;
+ trailing = true;
+ continue;
+ case TOKENIZER_TAG_R_BRACE:
+ case TOKENIZER_TAG_EOF:
+ trailing = false;
+ goto break_loop;
+ default:
+ continue;
+ }
+
+ findNextContainerMember(p);
+ continue;
}
}
+break_loop:
p->scratch.len = scratch_top;
return res;
diff --git a/parser.h b/parser.h
@@ -3,16 +3,10 @@
#define _ZIG1_PARSE_H__
#include "ast.h"
+#include "common.h"
#include <stdbool.h>
#include <stdint.h>
-// Standard slice
-typedef struct {
- uint32_t len;
- uint32_t cap;
- AstNodeIndex* arr;
-} ParserNodeIndexSlice;
-
typedef struct {
uint32_t len;
AstNodeIndex lhs;
@@ -20,7 +14,7 @@ typedef struct {
bool trailing;
} Members;
-typedef struct Parser {
+typedef struct {
const char* source;
uint32_t source_len;
@@ -31,8 +25,8 @@ typedef struct Parser {
AstTokenIndex tok_i;
AstNodeList nodes;
- ParserNodeIndexSlice extra_data;
- ParserNodeIndexSlice scratch;
+ AstNodeIndexSlice extra_data;
+ AstNodeIndexSlice scratch;
} Parser;
Parser* parserInit(const char* source, uint32_t len);
diff --git a/tokenizer.c b/tokenizer.c
@@ -10,8 +10,7 @@ typedef struct {
TokenizerTag tag;
} KeywordMap;
-const char* tokenizerGetTagString(TokenizerTag tag)
-{
+const char* tokenizerGetTagString(TokenizerTag tag) {
switch (tag) {
TOKENIZER_FOREACH_TAG_ENUM(TOKENIZER_GENERATE_CASE)
default:
@@ -72,8 +71,7 @@ const KeywordMap keywords[] = {
};
// TODO binary search
-static TokenizerTag getKeyword(const char* bytes, const uint32_t len)
-{
+static TokenizerTag getKeyword(const char* bytes, const uint32_t len) {
for (unsigned long i = 0; i < sizeof(keywords) / sizeof(KeywordMap); i++) {
size_t klen = strlen(keywords[i].keyword);
size_t minlen = klen < len ? klen : len;
@@ -91,8 +89,7 @@ static TokenizerTag getKeyword(const char* bytes, const uint32_t len)
return TOKENIZER_TAG_INVALID;
}
-Tokenizer tokenizerInit(const char* buffer, const uint32_t len)
-{
+Tokenizer tokenizerInit(const char* buffer, const uint32_t len) {
return (Tokenizer) {
.buffer = buffer,
.buffer_len = len,
@@ -100,8 +97,7 @@ Tokenizer tokenizerInit(const char* buffer, const uint32_t len)
};
}
-TokenizerToken tokenizerNext(Tokenizer* self)
-{
+TokenizerToken tokenizerNext(Tokenizer* self) {
TokenizerToken result = (TokenizerToken) {
.tag = TOKENIZER_TAG_INVALID,
.loc = {
diff --git a/zig1.c b/zig1.c
@@ -6,8 +6,7 @@
// - code = 0: program successfully terminated.
// - code = 1: panicked, panic message in msg. Caller should free msg.
// - code = 2: interpreter error, error in msg. Caller should free msg.
-int zig1Run(const char* program, char** msg)
-{
+int zig1Run(const char* program, char** msg) {
(void)program;
(void)msg;
return 0;
@@ -15,8 +14,7 @@ int zig1Run(const char* program, char** msg)
// API: run and:
// code = 3: abnormal error, expect something in stderr.
-int zig1RunFile(const char* fname, char** msg)
-{
+int zig1RunFile(const char* fname, char** msg) {
FILE* f = fopen(fname, "r");
if (f == NULL) {
perror("fopen");