commit 6ae7d7320d87af37484af685de26e77230d299c3 (tree)
parent ef3ef64abde0494b11f1e90f912f47101da79895
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Wed, 25 Dec 2024 23:44:33 +0200
adding more parser — starts breaking the build
Diffstat:
5 files changed, 292 insertions(+), 10 deletions(-)
diff --git a/ast.c b/ast.c
@@ -25,14 +25,17 @@ void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional)
list->cap = new_cap;
}
-void astNodeListAppend(AstNodeList* list, AstNodeTag tag,
- AstTokenIndex main_token, AstData data)
+AstNodeIndex astNodeListAppend(
+ AstNodeList* list,
+ AstNodeTag tag,
+ AstTokenIndex main_token,
+ AstData data)
{
astNodeListEnsureCapacity(list, 1);
list->tags[list->len] = tag;
list->main_tokens[list->len] = main_token;
list->datas[list->len] = data;
- list->len++;
+ return list->len++;
}
Ast astParse(const char* source, const uint32_t len)
diff --git a/ast.h b/ast.h
@@ -600,8 +600,7 @@ Ast astParse(const char* source, uint32_t len);
void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional);
void astTokenListEnsureCapacity(AstTokenList* list, uint32_t additional);
-void astNodeListAppend(AstNodeList* list, AstNodeTag tag,
- AstTokenIndex main_token, AstData data);
+AstNodeIndex astNodeListAppend(AstNodeList* list, AstNodeTag tag, AstTokenIndex main_token, AstData data);
void astTokenListAppend(AstTokenList* list, TokenizerTag tag, AstIndex start);
#endif
diff --git a/build.zig b/build.zig
@@ -27,7 +27,6 @@ const cflags = &[_][]const u8{
"-Wformat=2",
"-fno-common",
"-Wconversion",
- "-Wswitch-enum",
"-Wuninitialized",
"-Wdouble-promotion",
"-fstack-protector-all",
@@ -80,7 +79,7 @@ pub fn build(b: *std.Build) !void {
const lint_step = b.step("lint", "Run linters");
const clang_format = b.addSystemCommand(&.{"clang-format"});
- clang_format.addArgs(&.{ "--style=webkit", "-i" });
+ clang_format.addArgs(&.{ "--style=webkit", "--verbose", "-Werror", "-i" });
for (all_c_files ++ headers) |f| clang_format.addFileArg(b.path(f));
lint_step.dependOn(&clang_format.step);
diff --git a/parser.c b/parser.c
@@ -1,8 +1,11 @@
#include <stdio.h>
#include <stdlib.h>
+#include "ast.h"
#include "parser.h"
+const AstNodeIndex null_node = 0;
+
typedef struct {
enum {
FIELD_STATE_NONE,
@@ -14,6 +17,11 @@ typedef struct {
} payload;
} FieldState;
+typedef struct {
+ AstNodeIndex zero_or_one;
+ AstSubRange multi;
+} SmallSpan;
+
void parseRoot(Parser* p)
{
p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT;
@@ -27,14 +35,285 @@ static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; }
static AstTokenIndex eatToken(Parser* p, TokenizerTag tag, bool* ok)
{
if (p->token_tags[p->tok_i] == tag) {
- *ok = true;
+ if (ok != NULL)
+ *ok = true;
return nextToken(p);
} else {
- *ok = false;
+ if (ok != NULL)
+ *ok = false;
return (AstTokenIndex) {};
}
}
+static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeTag tag, AstTokenIndex main_token, AstData data)
+{
+ p->nodes.tags[i] = tag;
+ p->nodes.main_tokens[i] = main_token;
+ p->nodes.datas[i] = data;
+ return i;
+}
+
+static AstNodeIndex parseTypeExpr(Parser* p);
+
+static AstNodeIndex expectTypeExpr(Parser* p)
+{
+ const AstNodeIndex node = parseTypeExpr(p);
+ if (node == 0)
+ exit(1);
+ return node;
+}
+
+static AstNodeIndex parsePrimaryTypeExpr(Parser* p)
+{
+ const TokenizerTag tok = p->token_tags[p->tok_i];
+ switch (tok) {
+ case TOKENIZER_TAG_CHAR_LITERAL:
+ case TOKENIZER_TAG_NUMBER_LITERAL:
+ case TOKENIZER_TAG_KEYWORD_UNREACHABLE:
+ case TOKENIZER_TAG_KEYWORD_ANYFRAME:
+ case TOKENIZER_TAG_STRING_LITERAL:
+ case TOKENIZER_TAG_BUILTIN:
+ case TOKENIZER_TAG_KEYWORD_FN:
+ case TOKENIZER_TAG_KEYWORD_IF:
+ case TOKENIZER_TAG_KEYWORD_SWITCH:
+ case TOKENIZER_TAG_KEYWORD_EXTERN:
+ case TOKENIZER_TAG_KEYWORD_PACKED:
+ case TOKENIZER_TAG_KEYWORD_STRUCT:
+ case TOKENIZER_TAG_KEYWORD_OPAQUE:
+ case TOKENIZER_TAG_KEYWORD_ENUM:
+ case TOKENIZER_TAG_KEYWORD_UNION:
+ case TOKENIZER_TAG_KEYWORD_COMPTIME:
+ case TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE:
+ fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok));
+ exit(1);
+ break;
+ case TOKENIZER_TAG_IDENTIFIER:
+ if (p->token_tags[p->tok_i + 1] == TOKENIZER_TAG_COLON) {
+ fprintf(stderr, "parsePrimaryTypeExpr does not support identifier followed by colon\n");
+ exit(1);
+ }
+ return astNodeListAppend(
+ &p->nodes,
+ AST_NODE_TAG_IDENTIFIER,
+ nextToken(p),
+ (AstData) {});
+ break;
+ case TOKENIZER_TAG_KEYWORD_INLINE:
+ case TOKENIZER_TAG_KEYWORD_FOR:
+ case TOKENIZER_TAG_KEYWORD_WHILE:
+ case TOKENIZER_TAG_PERIOD:
+ case TOKENIZER_TAG_KEYWORD_ERROR:
+ case TOKENIZER_TAG_L_PAREN:
+ fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok));
+ exit(1);
+ break;
+ default:
+ return null_node;
+ }
+}
+
+static AstNodeIndex parseSuffixOp(Parser *p) {
+ const TokenizerTag tok = p->token_tags[p->tok_i];
+ switch (tok) {
+ case TOKENIZER_TAG_L_BRACKET:
+ case TOKENIZER_TAG_PERIOD_ASTERISK:
+ case TOKENIZER_TAG_INVALID_PERIODASTERISKS:
+ case TOKENIZER_TAG_PERIOD:
+ fprintf(stderr, "parseSuffixOp does not support %s\n", tokenizerGetTagString(tok));
+ exit(1);
+ break;
+ default:
+ return null_node;
+ }
+}
+
+static AstNodeIndex parseSuffixExpr(Parser* p)
+{
+ bool ok;
+ eatToken(p, TOKENIZER_TAG_KEYWORD_ASYNC, &ok);
+ if (ok) {
+ fprintf(stderr, "async not supported\n");
+ exit(1);
+ }
+
+ AstNodeIndex res = parsePrimaryTypeExpr(p);
+ if (res == 0)
+ return res;
+
+ while(true) {
+ const AstNodeIndex suffix_op = parseSuffixOp(p);
+ if (suffix_op != 0) {
+ res = suffix_op;
+ continue;
+ }
+ eatToken(p, TOKENIZER_TAG_L_PAREN, &ok);
+ if (ok) {
+ fprintf(stderr, "parseSuffixExpr does not support expr with parens\n");
+ exit(1);
+ }
+ return res;
+ }
+}
+
+static AstNodeIndex parseErrorUnionExpr(Parser* p)
+{
+ const AstNodeIndex suffix_expr = parseSuffixExpr(p);
+ if (suffix_expr == 0)
+ return null_node;
+ bool ok;
+ const AstNodeIndex bang = eatToken(p, TOKENIZER_TAG_BANG, &ok);
+ if (!ok)
+ return suffix_expr;
+ return astNodeListAppend(
+ &p->nodes,
+ AST_NODE_TAG_ERROR_UNION,
+ bang,
+ (AstData) {
+ .lhs = suffix_expr,
+ .rhs = expectTypeExpr(p),
+ });
+}
+
+static AstNodeIndex parseTypeExpr(Parser* p)
+{
+ const AstNodeIndex tok = p->token_tags[p->tok_i];
+ switch (tok) {
+ case TOKENIZER_TAG_QUESTION_MARK:
+ case TOKENIZER_TAG_KEYWORD_ANYFRAME:
+ case TOKENIZER_TAG_ASTERISK:
+ case TOKENIZER_TAG_ASTERISK_ASTERISK:
+ case TOKENIZER_TAG_L_BRACKET:
+ fprintf(stderr, "parseTypeExpr not supported for %s\n", tokenizerGetTagString(tok));
+ exit(1);
+ break;
+ default:
+ return parseErrorUnionExpr(p);
+ }
+}
+
+static SmallSpan parseParamDeclList(Parser* p)
+{
+ // can only parse functions with no declarations
+ bool ok;
+ AstTokenIndex got_token = eatToken(p, TOKENIZER_TAG_L_PAREN, &ok);
+ if (!ok) {
+ fprintf(stderr, "expected (, got %s\n", tokenizerGetTagString(got_token));
+ exit(1);
+ }
+
+ got_token = eatToken(p, TOKENIZER_TAG_R_PAREN, &ok);
+ if (!ok) {
+ fprintf(stderr, "expected ), got %s\n", tokenizerGetTagString(got_token));
+ exit(1);
+ }
+
+ return (SmallSpan) {
+ .zero_or_one = 0,
+ };
+}
+
+static uint32_t reserveNode(Parser* p, AstNodeTag tag)
+{
+ astNodeListEnsureCapacity(&p->nodes, p->nodes.len + 1);
+ p->nodes.tags[p->nodes.len - 1] = tag;
+ return p->nodes.len - 1;
+}
+
+static AstNodeIndex parseFnProto(Parser* p)
+{
+ bool ok;
+ AstNodeIndex fn_token = eatToken(p, TOKENIZER_TAG_KEYWORD_FN, &ok);
+ if (!ok)
+ return null_node;
+
+ AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_TAG_FN_PROTO);
+
+ eatToken(p, TOKENIZER_TAG_IDENTIFIER, NULL);
+
+ SmallSpan params = parseParamDeclList(p);
+ // const params = try p.parseParamDeclList();
+ // const align_expr = try p.parseByteAlign();
+ // const addrspace_expr = try p.parseAddrSpace();
+ // const section_expr = try p.parseLinkSection();
+ // const callconv_expr = try p.parseCallconv();
+ eatToken(p, TOKENIZER_TAG_BANG, NULL);
+
+ const AstNodeIndex return_type_expr = parseTypeExpr(p);
+}
+
+static AstNodeIndex parseBlock(Parser *p) {
+ bool ok;
+ const AstNodeIndex lbrace = eatToken(p, TOKENIZER_TAG_L_BRACE, &ok);
+ if (!ok)
+ return null_node;
+
+ const uint32_t scratch_top = p->scratch.len;
+
+cleanup:
+}
+
+static AstNodeIndex expectTopLevelDecl(Parser* p)
+{
+ AstTokenIndex extern_export_inline_token = p->tok_i++;
+ bool is_extern = false;
+ bool expect_fn = false;
+ bool expect_var_or_fn = false;
+
+ switch (p->token_tags[extern_export_inline_token]) {
+ case TOKENIZER_TAG_KEYWORD_EXTERN:
+ eatToken(p, TOKENIZER_TAG_STRING_LITERAL, NULL);
+ is_extern = true;
+ expect_var_or_fn = true;
+ break;
+ case TOKENIZER_TAG_KEYWORD_EXPORT:
+ expect_var_or_fn = true;
+ break;
+ case TOKENIZER_TAG_KEYWORD_INLINE:
+ case TOKENIZER_TAG_KEYWORD_NOINLINE:
+ expect_fn = true;
+ break;
+ default:
+ p->tok_i--;
+ }
+
+ AstNodeIndex fn_proto = parseFnProto(p);
+ if (fn_proto != 0) {
+ switch (p->token_tags[p->tok_i]) {
+ case TOKENIZER_TAG_SEMICOLON:
+ p->tok_i++;
+ return fn_proto;
+ break;
+ case TOKENIZER_TAG_L_BRACE:
+ if (is_extern)
+ exit(1);
+
+ AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_TAG_FN_DECL);
+ AstNodeIndex body_block = parseBlock(p);
+ return setNode(
+ p,
+ fn_decl_index,
+ AST_NODE_TAG_FN_DECL,
+ p->nodes.main_tokens[fn_proto],
+ (AstData) { .lhs = fn_proto, .rhs = body_block });
+ default:
+ exit(1); // Expected semicolon or left brace
+ }
+ }
+
+ if (expect_fn)
+ exit(1);
+
+ AstTokenIndex thread_local_token = eatToken(p, TOKENIZER_TAG_KEYWORD_THREADLOCAL, NULL);
+ AstNodeIndex var_decl = parseGlobalVarDecl(p);
+ if (var_decl != 0) {
+ return var_decl;
+ }
+
+ // assuming the program is correct...
+ fprintf(stderr, "the next token should be usingnamespace, which is not supported\n");
+ exit(1);
+}
+
static Members parseContainerMembers(Parser* p)
{
const uint32_t scratch_top = p->scratch.len;
diff --git a/tokenizer.h b/tokenizer.h
@@ -129,7 +129,9 @@
TAG(TOKENIZER_TAG_KEYWORD_WHILE)
#define TOKENIZER_GENERATE_ENUM(ENUM) ENUM,
-#define TOKENIZER_GENERATE_CASE(ENUM) case ENUM: return #ENUM;
+#define TOKENIZER_GENERATE_CASE(ENUM) \
+ case ENUM: \
+ return #ENUM;
// First define the enum
typedef enum {