Files
zig0/parser.c

467 lines
14 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include "ast.h"
#include "parser.h"
const AstNodeIndex null_node = 0;
typedef struct {
enum {
FIELD_STATE_NONE,
FIELD_STATE_SEEN,
FIELD_STATE_END // sets "end"
} tag;
union {
uint32_t end;
} payload;
} FieldState;
typedef struct {
AstNodeIndex zero_or_one;
AstSubRange multi;
} SmallSpan;
void parseRoot(Parser* p)
{
p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT;
p->nodes.main_tokens[p->nodes.len] = 0;
// members root_members = parseContainerMembers(p);
}
static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; }
static AstTokenIndex eatToken(Parser* p, TokenizerTag tag, bool* ok)
{
if (p->token_tags[p->tok_i] == tag) {
if (ok != NULL)
*ok = true;
return nextToken(p);
} else {
if (ok != NULL)
*ok = false;
return (AstTokenIndex) {};
}
}
static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeTag tag, AstTokenIndex main_token, AstData data)
{
p->nodes.tags[i] = tag;
p->nodes.main_tokens[i] = main_token;
p->nodes.datas[i] = data;
return i;
}
static AstNodeIndex parseTypeExpr(Parser* p);
static AstNodeIndex expectTypeExpr(Parser* p)
{
const AstNodeIndex node = parseTypeExpr(p);
if (node == 0)
exit(1);
return node;
}
static AstNodeIndex parsePrimaryTypeExpr(Parser* p)
{
const TokenizerTag tok = p->token_tags[p->tok_i];
switch (tok) {
case TOKENIZER_TAG_CHAR_LITERAL:
case TOKENIZER_TAG_NUMBER_LITERAL:
case TOKENIZER_TAG_KEYWORD_UNREACHABLE:
case TOKENIZER_TAG_KEYWORD_ANYFRAME:
case TOKENIZER_TAG_STRING_LITERAL:
case TOKENIZER_TAG_BUILTIN:
case TOKENIZER_TAG_KEYWORD_FN:
case TOKENIZER_TAG_KEYWORD_IF:
case TOKENIZER_TAG_KEYWORD_SWITCH:
case TOKENIZER_TAG_KEYWORD_EXTERN:
case TOKENIZER_TAG_KEYWORD_PACKED:
case TOKENIZER_TAG_KEYWORD_STRUCT:
case TOKENIZER_TAG_KEYWORD_OPAQUE:
case TOKENIZER_TAG_KEYWORD_ENUM:
case TOKENIZER_TAG_KEYWORD_UNION:
case TOKENIZER_TAG_KEYWORD_COMPTIME:
case TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok));
exit(1);
break;
case TOKENIZER_TAG_IDENTIFIER:
if (p->token_tags[p->tok_i + 1] == TOKENIZER_TAG_COLON) {
fprintf(stderr, "parsePrimaryTypeExpr does not support identifier followed by colon\n");
exit(1);
}
return astNodeListAppend(
&p->nodes,
AST_NODE_TAG_IDENTIFIER,
nextToken(p),
(AstData) {});
break;
case TOKENIZER_TAG_KEYWORD_INLINE:
case TOKENIZER_TAG_KEYWORD_FOR:
case TOKENIZER_TAG_KEYWORD_WHILE:
case TOKENIZER_TAG_PERIOD:
case TOKENIZER_TAG_KEYWORD_ERROR:
case TOKENIZER_TAG_L_PAREN:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok));
exit(1);
break;
default:
return null_node;
}
}
static AstNodeIndex parseSuffixOp(Parser *p) {
const TokenizerTag tok = p->token_tags[p->tok_i];
switch (tok) {
case TOKENIZER_TAG_L_BRACKET:
case TOKENIZER_TAG_PERIOD_ASTERISK:
case TOKENIZER_TAG_INVALID_PERIODASTERISKS:
case TOKENIZER_TAG_PERIOD:
fprintf(stderr, "parseSuffixOp does not support %s\n", tokenizerGetTagString(tok));
exit(1);
break;
default:
return null_node;
}
}
static AstNodeIndex parseSuffixExpr(Parser* p)
{
bool ok;
eatToken(p, TOKENIZER_TAG_KEYWORD_ASYNC, &ok);
if (ok) {
fprintf(stderr, "async not supported\n");
exit(1);
}
AstNodeIndex res = parsePrimaryTypeExpr(p);
if (res == 0)
return res;
while(true) {
const AstNodeIndex suffix_op = parseSuffixOp(p);
if (suffix_op != 0) {
res = suffix_op;
continue;
}
eatToken(p, TOKENIZER_TAG_L_PAREN, &ok);
if (ok) {
fprintf(stderr, "parseSuffixExpr does not support expr with parens\n");
exit(1);
}
return res;
}
}
static AstNodeIndex parseErrorUnionExpr(Parser* p)
{
const AstNodeIndex suffix_expr = parseSuffixExpr(p);
if (suffix_expr == 0)
return null_node;
bool ok;
const AstNodeIndex bang = eatToken(p, TOKENIZER_TAG_BANG, &ok);
if (!ok)
return suffix_expr;
return astNodeListAppend(
&p->nodes,
AST_NODE_TAG_ERROR_UNION,
bang,
(AstData) {
.lhs = suffix_expr,
.rhs = expectTypeExpr(p),
});
}
static AstNodeIndex parseTypeExpr(Parser* p)
{
const AstNodeIndex tok = p->token_tags[p->tok_i];
switch (tok) {
case TOKENIZER_TAG_QUESTION_MARK:
case TOKENIZER_TAG_KEYWORD_ANYFRAME:
case TOKENIZER_TAG_ASTERISK:
case TOKENIZER_TAG_ASTERISK_ASTERISK:
case TOKENIZER_TAG_L_BRACKET:
fprintf(stderr, "parseTypeExpr not supported for %s\n", tokenizerGetTagString(tok));
exit(1);
break;
default:
return parseErrorUnionExpr(p);
}
}
static SmallSpan parseParamDeclList(Parser* p)
{
// can only parse functions with no declarations
bool ok;
AstTokenIndex got_token = eatToken(p, TOKENIZER_TAG_L_PAREN, &ok);
if (!ok) {
fprintf(stderr, "expected (, got %s\n", tokenizerGetTagString(got_token));
exit(1);
}
got_token = eatToken(p, TOKENIZER_TAG_R_PAREN, &ok);
if (!ok) {
fprintf(stderr, "expected ), got %s\n", tokenizerGetTagString(got_token));
exit(1);
}
return (SmallSpan) {
.zero_or_one = 0,
};
}
static uint32_t reserveNode(Parser* p, AstNodeTag tag)
{
astNodeListEnsureCapacity(&p->nodes, p->nodes.len + 1);
p->nodes.tags[p->nodes.len - 1] = tag;
return p->nodes.len - 1;
}
static AstNodeIndex parseFnProto(Parser* p)
{
bool ok;
AstNodeIndex fn_token = eatToken(p, TOKENIZER_TAG_KEYWORD_FN, &ok);
if (!ok)
return null_node;
AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_TAG_FN_PROTO);
eatToken(p, TOKENIZER_TAG_IDENTIFIER, NULL);
SmallSpan params = parseParamDeclList(p);
// const params = try p.parseParamDeclList();
// const align_expr = try p.parseByteAlign();
// const addrspace_expr = try p.parseAddrSpace();
// const section_expr = try p.parseLinkSection();
// const callconv_expr = try p.parseCallconv();
eatToken(p, TOKENIZER_TAG_BANG, NULL);
const AstNodeIndex return_type_expr = parseTypeExpr(p);
}
static AstNodeIndex parseBlock(Parser *p) {
bool ok;
const AstNodeIndex lbrace = eatToken(p, TOKENIZER_TAG_L_BRACE, &ok);
if (!ok)
return null_node;
const uint32_t scratch_top = p->scratch.len;
cleanup:
}
static AstNodeIndex expectTopLevelDecl(Parser* p)
{
AstTokenIndex extern_export_inline_token = p->tok_i++;
bool is_extern = false;
bool expect_fn = false;
bool expect_var_or_fn = false;
switch (p->token_tags[extern_export_inline_token]) {
case TOKENIZER_TAG_KEYWORD_EXTERN:
eatToken(p, TOKENIZER_TAG_STRING_LITERAL, NULL);
is_extern = true;
expect_var_or_fn = true;
break;
case TOKENIZER_TAG_KEYWORD_EXPORT:
expect_var_or_fn = true;
break;
case TOKENIZER_TAG_KEYWORD_INLINE:
case TOKENIZER_TAG_KEYWORD_NOINLINE:
expect_fn = true;
break;
default:
p->tok_i--;
}
AstNodeIndex fn_proto = parseFnProto(p);
if (fn_proto != 0) {
switch (p->token_tags[p->tok_i]) {
case TOKENIZER_TAG_SEMICOLON:
p->tok_i++;
return fn_proto;
break;
case TOKENIZER_TAG_L_BRACE:
if (is_extern)
exit(1);
AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_TAG_FN_DECL);
AstNodeIndex body_block = parseBlock(p);
return setNode(
p,
fn_decl_index,
AST_NODE_TAG_FN_DECL,
p->nodes.main_tokens[fn_proto],
(AstData) { .lhs = fn_proto, .rhs = body_block });
default:
exit(1); // Expected semicolon or left brace
}
}
if (expect_fn)
exit(1);
AstTokenIndex thread_local_token = eatToken(p, TOKENIZER_TAG_KEYWORD_THREADLOCAL, NULL);
AstNodeIndex var_decl = parseGlobalVarDecl(p);
if (var_decl != 0) {
return var_decl;
}
// assuming the program is correct...
fprintf(stderr, "the next token should be usingnamespace, which is not supported\n");
exit(1);
}
static Members parseContainerMembers(Parser* p)
{
const uint32_t scratch_top = p->scratch.len;
Members res = (Members) {};
// ast_token_index last_field;
bool ok;
while (eatToken(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT, &ok) && ok)
;
// bool trailing = false;
while (1) {
// SKIP eat doc comments
switch (p->token_tags[p->tok_i]) {
case TOKENIZER_TAG_INVALID:
case TOKENIZER_TAG_INVALID_PERIODASTERISKS:
case TOKENIZER_TAG_IDENTIFIER:
case TOKENIZER_TAG_STRING_LITERAL:
case TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE:
case TOKENIZER_TAG_CHAR_LITERAL:
case TOKENIZER_TAG_EOF:
case TOKENIZER_TAG_BUILTIN:
case TOKENIZER_TAG_BANG:
case TOKENIZER_TAG_PIPE:
case TOKENIZER_TAG_PIPE_PIPE:
case TOKENIZER_TAG_PIPE_EQUAL:
case TOKENIZER_TAG_EQUAL:
case TOKENIZER_TAG_EQUAL_EQUAL:
case TOKENIZER_TAG_EQUAL_ANGLE_BRACKET_RIGHT:
case TOKENIZER_TAG_BANG_EQUAL:
case TOKENIZER_TAG_L_PAREN:
case TOKENIZER_TAG_R_PAREN:
case TOKENIZER_TAG_SEMICOLON:
case TOKENIZER_TAG_PERCENT:
case TOKENIZER_TAG_PERCENT_EQUAL:
case TOKENIZER_TAG_L_BRACE:
case TOKENIZER_TAG_R_BRACE:
case TOKENIZER_TAG_L_BRACKET:
case TOKENIZER_TAG_R_BRACKET:
case TOKENIZER_TAG_PERIOD:
case TOKENIZER_TAG_PERIOD_ASTERISK:
case TOKENIZER_TAG_ELLIPSIS2:
case TOKENIZER_TAG_ELLIPSIS3:
case TOKENIZER_TAG_CARET:
case TOKENIZER_TAG_CARET_EQUAL:
case TOKENIZER_TAG_PLUS:
case TOKENIZER_TAG_PLUS_PLUS:
case TOKENIZER_TAG_PLUS_EQUAL:
case TOKENIZER_TAG_PLUS_PERCENT:
case TOKENIZER_TAG_PLUS_PERCENT_EQUAL:
case TOKENIZER_TAG_PLUS_PIPE:
case TOKENIZER_TAG_PLUS_PIPE_EQUAL:
case TOKENIZER_TAG_MINUS:
case TOKENIZER_TAG_MINUS_EQUAL:
case TOKENIZER_TAG_MINUS_PERCENT:
case TOKENIZER_TAG_MINUS_PERCENT_EQUAL:
case TOKENIZER_TAG_MINUS_PIPE:
case TOKENIZER_TAG_MINUS_PIPE_EQUAL:
case TOKENIZER_TAG_ASTERISK:
case TOKENIZER_TAG_ASTERISK_EQUAL:
case TOKENIZER_TAG_ASTERISK_ASTERISK:
case TOKENIZER_TAG_ASTERISK_PERCENT:
case TOKENIZER_TAG_ASTERISK_PERCENT_EQUAL:
case TOKENIZER_TAG_ASTERISK_PIPE:
case TOKENIZER_TAG_ASTERISK_PIPE_EQUAL:
case TOKENIZER_TAG_ARROW:
case TOKENIZER_TAG_COLON:
case TOKENIZER_TAG_SLASH:
case TOKENIZER_TAG_SLASH_EQUAL:
case TOKENIZER_TAG_COMMA:
case TOKENIZER_TAG_AMPERSAND:
case TOKENIZER_TAG_AMPERSAND_EQUAL:
case TOKENIZER_TAG_QUESTION_MARK:
case TOKENIZER_TAG_ANGLE_BRACKET_LEFT:
case TOKENIZER_TAG_ANGLE_BRACKET_LEFT_EQUAL:
case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT:
case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL:
case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE:
case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL:
case TOKENIZER_TAG_ANGLE_BRACKET_RIGHT:
case TOKENIZER_TAG_ANGLE_BRACKET_RIGHT_EQUAL:
case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT:
case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL:
case TOKENIZER_TAG_TILDE:
case TOKENIZER_TAG_NUMBER_LITERAL:
case TOKENIZER_TAG_DOC_COMMENT:
case TOKENIZER_TAG_CONTAINER_DOC_COMMENT:
case TOKENIZER_TAG_KEYWORD_ADDRSPACE:
case TOKENIZER_TAG_KEYWORD_ALIGN:
case TOKENIZER_TAG_KEYWORD_ALLOWZERO:
case TOKENIZER_TAG_KEYWORD_AND:
case TOKENIZER_TAG_KEYWORD_ANYFRAME:
case TOKENIZER_TAG_KEYWORD_ANYTYPE:
case TOKENIZER_TAG_KEYWORD_ASM:
case TOKENIZER_TAG_KEYWORD_ASYNC:
case TOKENIZER_TAG_KEYWORD_AWAIT:
case TOKENIZER_TAG_KEYWORD_BREAK:
case TOKENIZER_TAG_KEYWORD_CALLCONV:
case TOKENIZER_TAG_KEYWORD_CATCH:
case TOKENIZER_TAG_KEYWORD_COMPTIME:
case TOKENIZER_TAG_KEYWORD_CONST:
case TOKENIZER_TAG_KEYWORD_CONTINUE:
case TOKENIZER_TAG_KEYWORD_DEFER:
case TOKENIZER_TAG_KEYWORD_ELSE:
case TOKENIZER_TAG_KEYWORD_ENUM:
case TOKENIZER_TAG_KEYWORD_ERRDEFER:
case TOKENIZER_TAG_KEYWORD_ERROR:
case TOKENIZER_TAG_KEYWORD_EXPORT:
case TOKENIZER_TAG_KEYWORD_EXTERN:
case TOKENIZER_TAG_KEYWORD_FN:
case TOKENIZER_TAG_KEYWORD_FOR:
case TOKENIZER_TAG_KEYWORD_IF:
case TOKENIZER_TAG_KEYWORD_INLINE:
case TOKENIZER_TAG_KEYWORD_NOALIAS:
case TOKENIZER_TAG_KEYWORD_NOINLINE:
case TOKENIZER_TAG_KEYWORD_NOSUSPEND:
case TOKENIZER_TAG_KEYWORD_OPAQUE:
case TOKENIZER_TAG_KEYWORD_OR:
case TOKENIZER_TAG_KEYWORD_ORELSE:
case TOKENIZER_TAG_KEYWORD_PACKED:
case TOKENIZER_TAG_KEYWORD_RESUME:
case TOKENIZER_TAG_KEYWORD_RETURN:
case TOKENIZER_TAG_KEYWORD_LINKSECTION:
case TOKENIZER_TAG_KEYWORD_STRUCT:
case TOKENIZER_TAG_KEYWORD_SUSPEND:
case TOKENIZER_TAG_KEYWORD_SWITCH:
case TOKENIZER_TAG_KEYWORD_TEST:
case TOKENIZER_TAG_KEYWORD_THREADLOCAL:
case TOKENIZER_TAG_KEYWORD_TRY:
case TOKENIZER_TAG_KEYWORD_UNION:
case TOKENIZER_TAG_KEYWORD_UNREACHABLE:
case TOKENIZER_TAG_KEYWORD_USINGNAMESPACE:
case TOKENIZER_TAG_KEYWORD_VAR:
case TOKENIZER_TAG_KEYWORD_VOLATILE:
case TOKENIZER_TAG_KEYWORD_WHILE:;
const char* str = tokenizerGetTagString(p->token_tags[p->tok_i]);
fprintf(stderr, "keyword %s not implemented\n", str);
exit(1);
case TOKENIZER_TAG_KEYWORD_PUB:
p->tok_i++;
// AstNodeIndex top_level_decl = expectTopLevelDecl(*p);
break;
// TODO do work
}
}
p->scratch.len = scratch_top;
return res;
}