adding more parser — starts breaking the build

This commit is contained in:
2024-12-25 23:44:33 +02:00
parent ef3ef64abd
commit 6ae7d7320d
5 changed files with 292 additions and 10 deletions

9
ast.c
View File

@@ -25,14 +25,17 @@ void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional)
list->cap = new_cap; list->cap = new_cap;
} }
void astNodeListAppend(AstNodeList* list, AstNodeTag tag, AstNodeIndex astNodeListAppend(
AstTokenIndex main_token, AstData data) AstNodeList* list,
AstNodeTag tag,
AstTokenIndex main_token,
AstData data)
{ {
astNodeListEnsureCapacity(list, 1); astNodeListEnsureCapacity(list, 1);
list->tags[list->len] = tag; list->tags[list->len] = tag;
list->main_tokens[list->len] = main_token; list->main_tokens[list->len] = main_token;
list->datas[list->len] = data; list->datas[list->len] = data;
list->len++; return list->len++;
} }
Ast astParse(const char* source, const uint32_t len) Ast astParse(const char* source, const uint32_t len)

3
ast.h
View File

@@ -600,8 +600,7 @@ Ast astParse(const char* source, uint32_t len);
void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional); void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional);
void astTokenListEnsureCapacity(AstTokenList* list, uint32_t additional); void astTokenListEnsureCapacity(AstTokenList* list, uint32_t additional);
void astNodeListAppend(AstNodeList* list, AstNodeTag tag, AstNodeIndex astNodeListAppend(AstNodeList* list, AstNodeTag tag, AstTokenIndex main_token, AstData data);
AstTokenIndex main_token, AstData data);
void astTokenListAppend(AstTokenList* list, TokenizerTag tag, AstIndex start); void astTokenListAppend(AstTokenList* list, TokenizerTag tag, AstIndex start);
#endif #endif

View File

@@ -27,7 +27,6 @@ const cflags = &[_][]const u8{
"-Wformat=2", "-Wformat=2",
"-fno-common", "-fno-common",
"-Wconversion", "-Wconversion",
"-Wswitch-enum",
"-Wuninitialized", "-Wuninitialized",
"-Wdouble-promotion", "-Wdouble-promotion",
"-fstack-protector-all", "-fstack-protector-all",
@@ -80,7 +79,7 @@ pub fn build(b: *std.Build) !void {
const lint_step = b.step("lint", "Run linters"); const lint_step = b.step("lint", "Run linters");
const clang_format = b.addSystemCommand(&.{"clang-format"}); const clang_format = b.addSystemCommand(&.{"clang-format"});
clang_format.addArgs(&.{ "--style=webkit", "-i" }); clang_format.addArgs(&.{ "--style=webkit", "--verbose", "-Werror", "-i" });
for (all_c_files ++ headers) |f| clang_format.addFileArg(b.path(f)); for (all_c_files ++ headers) |f| clang_format.addFileArg(b.path(f));
lint_step.dependOn(&clang_format.step); lint_step.dependOn(&clang_format.step);

279
parser.c
View File

@@ -1,8 +1,11 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include "ast.h"
#include "parser.h" #include "parser.h"
const AstNodeIndex null_node = 0;
typedef struct { typedef struct {
enum { enum {
FIELD_STATE_NONE, FIELD_STATE_NONE,
@@ -14,6 +17,11 @@ typedef struct {
} payload; } payload;
} FieldState; } FieldState;
typedef struct {
AstNodeIndex zero_or_one;
AstSubRange multi;
} SmallSpan;
void parseRoot(Parser* p) void parseRoot(Parser* p)
{ {
p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT; p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT;
@@ -27,14 +35,285 @@ static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; }
static AstTokenIndex eatToken(Parser* p, TokenizerTag tag, bool* ok) static AstTokenIndex eatToken(Parser* p, TokenizerTag tag, bool* ok)
{ {
if (p->token_tags[p->tok_i] == tag) { if (p->token_tags[p->tok_i] == tag) {
if (ok != NULL)
*ok = true; *ok = true;
return nextToken(p); return nextToken(p);
} else { } else {
if (ok != NULL)
*ok = false; *ok = false;
return (AstTokenIndex) {}; return (AstTokenIndex) {};
} }
} }
static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeTag tag, AstTokenIndex main_token, AstData data)
{
p->nodes.tags[i] = tag;
p->nodes.main_tokens[i] = main_token;
p->nodes.datas[i] = data;
return i;
}
static AstNodeIndex parseTypeExpr(Parser* p);
static AstNodeIndex expectTypeExpr(Parser* p)
{
const AstNodeIndex node = parseTypeExpr(p);
if (node == 0)
exit(1);
return node;
}
static AstNodeIndex parsePrimaryTypeExpr(Parser* p)
{
const TokenizerTag tok = p->token_tags[p->tok_i];
switch (tok) {
case TOKENIZER_TAG_CHAR_LITERAL:
case TOKENIZER_TAG_NUMBER_LITERAL:
case TOKENIZER_TAG_KEYWORD_UNREACHABLE:
case TOKENIZER_TAG_KEYWORD_ANYFRAME:
case TOKENIZER_TAG_STRING_LITERAL:
case TOKENIZER_TAG_BUILTIN:
case TOKENIZER_TAG_KEYWORD_FN:
case TOKENIZER_TAG_KEYWORD_IF:
case TOKENIZER_TAG_KEYWORD_SWITCH:
case TOKENIZER_TAG_KEYWORD_EXTERN:
case TOKENIZER_TAG_KEYWORD_PACKED:
case TOKENIZER_TAG_KEYWORD_STRUCT:
case TOKENIZER_TAG_KEYWORD_OPAQUE:
case TOKENIZER_TAG_KEYWORD_ENUM:
case TOKENIZER_TAG_KEYWORD_UNION:
case TOKENIZER_TAG_KEYWORD_COMPTIME:
case TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok));
exit(1);
break;
case TOKENIZER_TAG_IDENTIFIER:
if (p->token_tags[p->tok_i + 1] == TOKENIZER_TAG_COLON) {
fprintf(stderr, "parsePrimaryTypeExpr does not support identifier followed by colon\n");
exit(1);
}
return astNodeListAppend(
&p->nodes,
AST_NODE_TAG_IDENTIFIER,
nextToken(p),
(AstData) {});
break;
case TOKENIZER_TAG_KEYWORD_INLINE:
case TOKENIZER_TAG_KEYWORD_FOR:
case TOKENIZER_TAG_KEYWORD_WHILE:
case TOKENIZER_TAG_PERIOD:
case TOKENIZER_TAG_KEYWORD_ERROR:
case TOKENIZER_TAG_L_PAREN:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok));
exit(1);
break;
default:
return null_node;
}
}
static AstNodeIndex parseSuffixOp(Parser *p) {
const TokenizerTag tok = p->token_tags[p->tok_i];
switch (tok) {
case TOKENIZER_TAG_L_BRACKET:
case TOKENIZER_TAG_PERIOD_ASTERISK:
case TOKENIZER_TAG_INVALID_PERIODASTERISKS:
case TOKENIZER_TAG_PERIOD:
fprintf(stderr, "parseSuffixOp does not support %s\n", tokenizerGetTagString(tok));
exit(1);
break;
default:
return null_node;
}
}
static AstNodeIndex parseSuffixExpr(Parser* p)
{
bool ok;
eatToken(p, TOKENIZER_TAG_KEYWORD_ASYNC, &ok);
if (ok) {
fprintf(stderr, "async not supported\n");
exit(1);
}
AstNodeIndex res = parsePrimaryTypeExpr(p);
if (res == 0)
return res;
while(true) {
const AstNodeIndex suffix_op = parseSuffixOp(p);
if (suffix_op != 0) {
res = suffix_op;
continue;
}
eatToken(p, TOKENIZER_TAG_L_PAREN, &ok);
if (ok) {
fprintf(stderr, "parseSuffixExpr does not support expr with parens\n");
exit(1);
}
return res;
}
}
static AstNodeIndex parseErrorUnionExpr(Parser* p)
{
const AstNodeIndex suffix_expr = parseSuffixExpr(p);
if (suffix_expr == 0)
return null_node;
bool ok;
const AstNodeIndex bang = eatToken(p, TOKENIZER_TAG_BANG, &ok);
if (!ok)
return suffix_expr;
return astNodeListAppend(
&p->nodes,
AST_NODE_TAG_ERROR_UNION,
bang,
(AstData) {
.lhs = suffix_expr,
.rhs = expectTypeExpr(p),
});
}
static AstNodeIndex parseTypeExpr(Parser* p)
{
const AstNodeIndex tok = p->token_tags[p->tok_i];
switch (tok) {
case TOKENIZER_TAG_QUESTION_MARK:
case TOKENIZER_TAG_KEYWORD_ANYFRAME:
case TOKENIZER_TAG_ASTERISK:
case TOKENIZER_TAG_ASTERISK_ASTERISK:
case TOKENIZER_TAG_L_BRACKET:
fprintf(stderr, "parseTypeExpr not supported for %s\n", tokenizerGetTagString(tok));
exit(1);
break;
default:
return parseErrorUnionExpr(p);
}
}
static SmallSpan parseParamDeclList(Parser* p)
{
// can only parse functions with no declarations
bool ok;
AstTokenIndex got_token = eatToken(p, TOKENIZER_TAG_L_PAREN, &ok);
if (!ok) {
fprintf(stderr, "expected (, got %s\n", tokenizerGetTagString(got_token));
exit(1);
}
got_token = eatToken(p, TOKENIZER_TAG_R_PAREN, &ok);
if (!ok) {
fprintf(stderr, "expected ), got %s\n", tokenizerGetTagString(got_token));
exit(1);
}
return (SmallSpan) {
.zero_or_one = 0,
};
}
static uint32_t reserveNode(Parser* p, AstNodeTag tag)
{
astNodeListEnsureCapacity(&p->nodes, p->nodes.len + 1);
p->nodes.tags[p->nodes.len - 1] = tag;
return p->nodes.len - 1;
}
static AstNodeIndex parseFnProto(Parser* p)
{
bool ok;
AstNodeIndex fn_token = eatToken(p, TOKENIZER_TAG_KEYWORD_FN, &ok);
if (!ok)
return null_node;
AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_TAG_FN_PROTO);
eatToken(p, TOKENIZER_TAG_IDENTIFIER, NULL);
SmallSpan params = parseParamDeclList(p);
// const params = try p.parseParamDeclList();
// const align_expr = try p.parseByteAlign();
// const addrspace_expr = try p.parseAddrSpace();
// const section_expr = try p.parseLinkSection();
// const callconv_expr = try p.parseCallconv();
eatToken(p, TOKENIZER_TAG_BANG, NULL);
const AstNodeIndex return_type_expr = parseTypeExpr(p);
}
static AstNodeIndex parseBlock(Parser *p) {
bool ok;
const AstNodeIndex lbrace = eatToken(p, TOKENIZER_TAG_L_BRACE, &ok);
if (!ok)
return null_node;
const uint32_t scratch_top = p->scratch.len;
cleanup:
}
static AstNodeIndex expectTopLevelDecl(Parser* p)
{
AstTokenIndex extern_export_inline_token = p->tok_i++;
bool is_extern = false;
bool expect_fn = false;
bool expect_var_or_fn = false;
switch (p->token_tags[extern_export_inline_token]) {
case TOKENIZER_TAG_KEYWORD_EXTERN:
eatToken(p, TOKENIZER_TAG_STRING_LITERAL, NULL);
is_extern = true;
expect_var_or_fn = true;
break;
case TOKENIZER_TAG_KEYWORD_EXPORT:
expect_var_or_fn = true;
break;
case TOKENIZER_TAG_KEYWORD_INLINE:
case TOKENIZER_TAG_KEYWORD_NOINLINE:
expect_fn = true;
break;
default:
p->tok_i--;
}
AstNodeIndex fn_proto = parseFnProto(p);
if (fn_proto != 0) {
switch (p->token_tags[p->tok_i]) {
case TOKENIZER_TAG_SEMICOLON:
p->tok_i++;
return fn_proto;
break;
case TOKENIZER_TAG_L_BRACE:
if (is_extern)
exit(1);
AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_TAG_FN_DECL);
AstNodeIndex body_block = parseBlock(p);
return setNode(
p,
fn_decl_index,
AST_NODE_TAG_FN_DECL,
p->nodes.main_tokens[fn_proto],
(AstData) { .lhs = fn_proto, .rhs = body_block });
default:
exit(1); // Expected semicolon or left brace
}
}
if (expect_fn)
exit(1);
AstTokenIndex thread_local_token = eatToken(p, TOKENIZER_TAG_KEYWORD_THREADLOCAL, NULL);
AstNodeIndex var_decl = parseGlobalVarDecl(p);
if (var_decl != 0) {
return var_decl;
}
// assuming the program is correct...
fprintf(stderr, "the next token should be usingnamespace, which is not supported\n");
exit(1);
}
static Members parseContainerMembers(Parser* p) static Members parseContainerMembers(Parser* p)
{ {
const uint32_t scratch_top = p->scratch.len; const uint32_t scratch_top = p->scratch.len;

View File

@@ -129,7 +129,9 @@
TAG(TOKENIZER_TAG_KEYWORD_WHILE) TAG(TOKENIZER_TAG_KEYWORD_WHILE)
#define TOKENIZER_GENERATE_ENUM(ENUM) ENUM, #define TOKENIZER_GENERATE_ENUM(ENUM) ENUM,
#define TOKENIZER_GENERATE_CASE(ENUM) case ENUM: return #ENUM; #define TOKENIZER_GENERATE_CASE(ENUM) \
case ENUM: \
return #ENUM;
// First define the enum // First define the enum
typedef enum { typedef enum {