467 lines
14 KiB
C
467 lines
14 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
#include "ast.h"
|
|
#include "parser.h"
|
|
|
|
const AstNodeIndex null_node = 0;
|
|
|
|
typedef struct {
|
|
enum {
|
|
FIELD_STATE_NONE,
|
|
FIELD_STATE_SEEN,
|
|
FIELD_STATE_END // sets "end"
|
|
} tag;
|
|
union {
|
|
uint32_t end;
|
|
} payload;
|
|
} FieldState;
|
|
|
|
typedef struct {
|
|
AstNodeIndex zero_or_one;
|
|
AstSubRange multi;
|
|
} SmallSpan;
|
|
|
|
void parseRoot(Parser* p)
|
|
{
|
|
p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT;
|
|
p->nodes.main_tokens[p->nodes.len] = 0;
|
|
|
|
// members root_members = parseContainerMembers(p);
|
|
}
|
|
|
|
static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; }
|
|
|
|
static AstTokenIndex eatToken(Parser* p, TokenizerTag tag, bool* ok)
|
|
{
|
|
if (p->token_tags[p->tok_i] == tag) {
|
|
if (ok != NULL)
|
|
*ok = true;
|
|
return nextToken(p);
|
|
} else {
|
|
if (ok != NULL)
|
|
*ok = false;
|
|
return (AstTokenIndex) {};
|
|
}
|
|
}
|
|
|
|
static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeTag tag, AstTokenIndex main_token, AstData data)
|
|
{
|
|
p->nodes.tags[i] = tag;
|
|
p->nodes.main_tokens[i] = main_token;
|
|
p->nodes.datas[i] = data;
|
|
return i;
|
|
}
|
|
|
|
static AstNodeIndex parseTypeExpr(Parser* p);
|
|
|
|
static AstNodeIndex expectTypeExpr(Parser* p)
|
|
{
|
|
const AstNodeIndex node = parseTypeExpr(p);
|
|
if (node == 0)
|
|
exit(1);
|
|
return node;
|
|
}
|
|
|
|
static AstNodeIndex parsePrimaryTypeExpr(Parser* p)
|
|
{
|
|
const TokenizerTag tok = p->token_tags[p->tok_i];
|
|
switch (tok) {
|
|
case TOKENIZER_TAG_CHAR_LITERAL:
|
|
case TOKENIZER_TAG_NUMBER_LITERAL:
|
|
case TOKENIZER_TAG_KEYWORD_UNREACHABLE:
|
|
case TOKENIZER_TAG_KEYWORD_ANYFRAME:
|
|
case TOKENIZER_TAG_STRING_LITERAL:
|
|
case TOKENIZER_TAG_BUILTIN:
|
|
case TOKENIZER_TAG_KEYWORD_FN:
|
|
case TOKENIZER_TAG_KEYWORD_IF:
|
|
case TOKENIZER_TAG_KEYWORD_SWITCH:
|
|
case TOKENIZER_TAG_KEYWORD_EXTERN:
|
|
case TOKENIZER_TAG_KEYWORD_PACKED:
|
|
case TOKENIZER_TAG_KEYWORD_STRUCT:
|
|
case TOKENIZER_TAG_KEYWORD_OPAQUE:
|
|
case TOKENIZER_TAG_KEYWORD_ENUM:
|
|
case TOKENIZER_TAG_KEYWORD_UNION:
|
|
case TOKENIZER_TAG_KEYWORD_COMPTIME:
|
|
case TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE:
|
|
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok));
|
|
exit(1);
|
|
break;
|
|
case TOKENIZER_TAG_IDENTIFIER:
|
|
if (p->token_tags[p->tok_i + 1] == TOKENIZER_TAG_COLON) {
|
|
fprintf(stderr, "parsePrimaryTypeExpr does not support identifier followed by colon\n");
|
|
exit(1);
|
|
}
|
|
return astNodeListAppend(
|
|
&p->nodes,
|
|
AST_NODE_TAG_IDENTIFIER,
|
|
nextToken(p),
|
|
(AstData) {});
|
|
break;
|
|
case TOKENIZER_TAG_KEYWORD_INLINE:
|
|
case TOKENIZER_TAG_KEYWORD_FOR:
|
|
case TOKENIZER_TAG_KEYWORD_WHILE:
|
|
case TOKENIZER_TAG_PERIOD:
|
|
case TOKENIZER_TAG_KEYWORD_ERROR:
|
|
case TOKENIZER_TAG_L_PAREN:
|
|
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok));
|
|
exit(1);
|
|
break;
|
|
default:
|
|
return null_node;
|
|
}
|
|
}
|
|
|
|
static AstNodeIndex parseSuffixOp(Parser *p) {
|
|
const TokenizerTag tok = p->token_tags[p->tok_i];
|
|
switch (tok) {
|
|
case TOKENIZER_TAG_L_BRACKET:
|
|
case TOKENIZER_TAG_PERIOD_ASTERISK:
|
|
case TOKENIZER_TAG_INVALID_PERIODASTERISKS:
|
|
case TOKENIZER_TAG_PERIOD:
|
|
fprintf(stderr, "parseSuffixOp does not support %s\n", tokenizerGetTagString(tok));
|
|
exit(1);
|
|
break;
|
|
default:
|
|
return null_node;
|
|
}
|
|
}
|
|
|
|
static AstNodeIndex parseSuffixExpr(Parser* p)
|
|
{
|
|
bool ok;
|
|
eatToken(p, TOKENIZER_TAG_KEYWORD_ASYNC, &ok);
|
|
if (ok) {
|
|
fprintf(stderr, "async not supported\n");
|
|
exit(1);
|
|
}
|
|
|
|
AstNodeIndex res = parsePrimaryTypeExpr(p);
|
|
if (res == 0)
|
|
return res;
|
|
|
|
while(true) {
|
|
const AstNodeIndex suffix_op = parseSuffixOp(p);
|
|
if (suffix_op != 0) {
|
|
res = suffix_op;
|
|
continue;
|
|
}
|
|
eatToken(p, TOKENIZER_TAG_L_PAREN, &ok);
|
|
if (ok) {
|
|
fprintf(stderr, "parseSuffixExpr does not support expr with parens\n");
|
|
exit(1);
|
|
}
|
|
return res;
|
|
}
|
|
}
|
|
|
|
static AstNodeIndex parseErrorUnionExpr(Parser* p)
|
|
{
|
|
const AstNodeIndex suffix_expr = parseSuffixExpr(p);
|
|
if (suffix_expr == 0)
|
|
return null_node;
|
|
bool ok;
|
|
const AstNodeIndex bang = eatToken(p, TOKENIZER_TAG_BANG, &ok);
|
|
if (!ok)
|
|
return suffix_expr;
|
|
return astNodeListAppend(
|
|
&p->nodes,
|
|
AST_NODE_TAG_ERROR_UNION,
|
|
bang,
|
|
(AstData) {
|
|
.lhs = suffix_expr,
|
|
.rhs = expectTypeExpr(p),
|
|
});
|
|
}
|
|
|
|
static AstNodeIndex parseTypeExpr(Parser* p)
|
|
{
|
|
const AstNodeIndex tok = p->token_tags[p->tok_i];
|
|
switch (tok) {
|
|
case TOKENIZER_TAG_QUESTION_MARK:
|
|
case TOKENIZER_TAG_KEYWORD_ANYFRAME:
|
|
case TOKENIZER_TAG_ASTERISK:
|
|
case TOKENIZER_TAG_ASTERISK_ASTERISK:
|
|
case TOKENIZER_TAG_L_BRACKET:
|
|
fprintf(stderr, "parseTypeExpr not supported for %s\n", tokenizerGetTagString(tok));
|
|
exit(1);
|
|
break;
|
|
default:
|
|
return parseErrorUnionExpr(p);
|
|
}
|
|
}
|
|
|
|
static SmallSpan parseParamDeclList(Parser* p)
|
|
{
|
|
// can only parse functions with no declarations
|
|
bool ok;
|
|
AstTokenIndex got_token = eatToken(p, TOKENIZER_TAG_L_PAREN, &ok);
|
|
if (!ok) {
|
|
fprintf(stderr, "expected (, got %s\n", tokenizerGetTagString(got_token));
|
|
exit(1);
|
|
}
|
|
|
|
got_token = eatToken(p, TOKENIZER_TAG_R_PAREN, &ok);
|
|
if (!ok) {
|
|
fprintf(stderr, "expected ), got %s\n", tokenizerGetTagString(got_token));
|
|
exit(1);
|
|
}
|
|
|
|
return (SmallSpan) {
|
|
.zero_or_one = 0,
|
|
};
|
|
}
|
|
|
|
static uint32_t reserveNode(Parser* p, AstNodeTag tag)
|
|
{
|
|
astNodeListEnsureCapacity(&p->nodes, p->nodes.len + 1);
|
|
p->nodes.tags[p->nodes.len - 1] = tag;
|
|
return p->nodes.len - 1;
|
|
}
|
|
|
|
static AstNodeIndex parseFnProto(Parser* p)
|
|
{
|
|
bool ok;
|
|
AstNodeIndex fn_token = eatToken(p, TOKENIZER_TAG_KEYWORD_FN, &ok);
|
|
if (!ok)
|
|
return null_node;
|
|
|
|
AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_TAG_FN_PROTO);
|
|
|
|
eatToken(p, TOKENIZER_TAG_IDENTIFIER, NULL);
|
|
|
|
SmallSpan params = parseParamDeclList(p);
|
|
// const params = try p.parseParamDeclList();
|
|
// const align_expr = try p.parseByteAlign();
|
|
// const addrspace_expr = try p.parseAddrSpace();
|
|
// const section_expr = try p.parseLinkSection();
|
|
// const callconv_expr = try p.parseCallconv();
|
|
eatToken(p, TOKENIZER_TAG_BANG, NULL);
|
|
|
|
const AstNodeIndex return_type_expr = parseTypeExpr(p);
|
|
}
|
|
|
|
static AstNodeIndex parseBlock(Parser *p) {
|
|
bool ok;
|
|
const AstNodeIndex lbrace = eatToken(p, TOKENIZER_TAG_L_BRACE, &ok);
|
|
if (!ok)
|
|
return null_node;
|
|
|
|
const uint32_t scratch_top = p->scratch.len;
|
|
|
|
cleanup:
|
|
}
|
|
|
|
static AstNodeIndex expectTopLevelDecl(Parser* p)
|
|
{
|
|
AstTokenIndex extern_export_inline_token = p->tok_i++;
|
|
bool is_extern = false;
|
|
bool expect_fn = false;
|
|
bool expect_var_or_fn = false;
|
|
|
|
switch (p->token_tags[extern_export_inline_token]) {
|
|
case TOKENIZER_TAG_KEYWORD_EXTERN:
|
|
eatToken(p, TOKENIZER_TAG_STRING_LITERAL, NULL);
|
|
is_extern = true;
|
|
expect_var_or_fn = true;
|
|
break;
|
|
case TOKENIZER_TAG_KEYWORD_EXPORT:
|
|
expect_var_or_fn = true;
|
|
break;
|
|
case TOKENIZER_TAG_KEYWORD_INLINE:
|
|
case TOKENIZER_TAG_KEYWORD_NOINLINE:
|
|
expect_fn = true;
|
|
break;
|
|
default:
|
|
p->tok_i--;
|
|
}
|
|
|
|
AstNodeIndex fn_proto = parseFnProto(p);
|
|
if (fn_proto != 0) {
|
|
switch (p->token_tags[p->tok_i]) {
|
|
case TOKENIZER_TAG_SEMICOLON:
|
|
p->tok_i++;
|
|
return fn_proto;
|
|
break;
|
|
case TOKENIZER_TAG_L_BRACE:
|
|
if (is_extern)
|
|
exit(1);
|
|
|
|
AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_TAG_FN_DECL);
|
|
AstNodeIndex body_block = parseBlock(p);
|
|
return setNode(
|
|
p,
|
|
fn_decl_index,
|
|
AST_NODE_TAG_FN_DECL,
|
|
p->nodes.main_tokens[fn_proto],
|
|
(AstData) { .lhs = fn_proto, .rhs = body_block });
|
|
default:
|
|
exit(1); // Expected semicolon or left brace
|
|
}
|
|
}
|
|
|
|
if (expect_fn)
|
|
exit(1);
|
|
|
|
AstTokenIndex thread_local_token = eatToken(p, TOKENIZER_TAG_KEYWORD_THREADLOCAL, NULL);
|
|
AstNodeIndex var_decl = parseGlobalVarDecl(p);
|
|
if (var_decl != 0) {
|
|
return var_decl;
|
|
}
|
|
|
|
// assuming the program is correct...
|
|
fprintf(stderr, "the next token should be usingnamespace, which is not supported\n");
|
|
exit(1);
|
|
}
|
|
|
|
static Members parseContainerMembers(Parser* p)
|
|
{
|
|
const uint32_t scratch_top = p->scratch.len;
|
|
Members res = (Members) {};
|
|
// ast_token_index last_field;
|
|
bool ok;
|
|
while (eatToken(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT, &ok) && ok)
|
|
;
|
|
|
|
// bool trailing = false;
|
|
while (1) {
|
|
|
|
// SKIP eat doc comments
|
|
|
|
switch (p->token_tags[p->tok_i]) {
|
|
case TOKENIZER_TAG_INVALID:
|
|
case TOKENIZER_TAG_INVALID_PERIODASTERISKS:
|
|
case TOKENIZER_TAG_IDENTIFIER:
|
|
case TOKENIZER_TAG_STRING_LITERAL:
|
|
case TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE:
|
|
case TOKENIZER_TAG_CHAR_LITERAL:
|
|
case TOKENIZER_TAG_EOF:
|
|
case TOKENIZER_TAG_BUILTIN:
|
|
case TOKENIZER_TAG_BANG:
|
|
case TOKENIZER_TAG_PIPE:
|
|
case TOKENIZER_TAG_PIPE_PIPE:
|
|
case TOKENIZER_TAG_PIPE_EQUAL:
|
|
case TOKENIZER_TAG_EQUAL:
|
|
case TOKENIZER_TAG_EQUAL_EQUAL:
|
|
case TOKENIZER_TAG_EQUAL_ANGLE_BRACKET_RIGHT:
|
|
case TOKENIZER_TAG_BANG_EQUAL:
|
|
case TOKENIZER_TAG_L_PAREN:
|
|
case TOKENIZER_TAG_R_PAREN:
|
|
case TOKENIZER_TAG_SEMICOLON:
|
|
case TOKENIZER_TAG_PERCENT:
|
|
case TOKENIZER_TAG_PERCENT_EQUAL:
|
|
case TOKENIZER_TAG_L_BRACE:
|
|
case TOKENIZER_TAG_R_BRACE:
|
|
case TOKENIZER_TAG_L_BRACKET:
|
|
case TOKENIZER_TAG_R_BRACKET:
|
|
case TOKENIZER_TAG_PERIOD:
|
|
case TOKENIZER_TAG_PERIOD_ASTERISK:
|
|
case TOKENIZER_TAG_ELLIPSIS2:
|
|
case TOKENIZER_TAG_ELLIPSIS3:
|
|
case TOKENIZER_TAG_CARET:
|
|
case TOKENIZER_TAG_CARET_EQUAL:
|
|
case TOKENIZER_TAG_PLUS:
|
|
case TOKENIZER_TAG_PLUS_PLUS:
|
|
case TOKENIZER_TAG_PLUS_EQUAL:
|
|
case TOKENIZER_TAG_PLUS_PERCENT:
|
|
case TOKENIZER_TAG_PLUS_PERCENT_EQUAL:
|
|
case TOKENIZER_TAG_PLUS_PIPE:
|
|
case TOKENIZER_TAG_PLUS_PIPE_EQUAL:
|
|
case TOKENIZER_TAG_MINUS:
|
|
case TOKENIZER_TAG_MINUS_EQUAL:
|
|
case TOKENIZER_TAG_MINUS_PERCENT:
|
|
case TOKENIZER_TAG_MINUS_PERCENT_EQUAL:
|
|
case TOKENIZER_TAG_MINUS_PIPE:
|
|
case TOKENIZER_TAG_MINUS_PIPE_EQUAL:
|
|
case TOKENIZER_TAG_ASTERISK:
|
|
case TOKENIZER_TAG_ASTERISK_EQUAL:
|
|
case TOKENIZER_TAG_ASTERISK_ASTERISK:
|
|
case TOKENIZER_TAG_ASTERISK_PERCENT:
|
|
case TOKENIZER_TAG_ASTERISK_PERCENT_EQUAL:
|
|
case TOKENIZER_TAG_ASTERISK_PIPE:
|
|
case TOKENIZER_TAG_ASTERISK_PIPE_EQUAL:
|
|
case TOKENIZER_TAG_ARROW:
|
|
case TOKENIZER_TAG_COLON:
|
|
case TOKENIZER_TAG_SLASH:
|
|
case TOKENIZER_TAG_SLASH_EQUAL:
|
|
case TOKENIZER_TAG_COMMA:
|
|
case TOKENIZER_TAG_AMPERSAND:
|
|
case TOKENIZER_TAG_AMPERSAND_EQUAL:
|
|
case TOKENIZER_TAG_QUESTION_MARK:
|
|
case TOKENIZER_TAG_ANGLE_BRACKET_LEFT:
|
|
case TOKENIZER_TAG_ANGLE_BRACKET_LEFT_EQUAL:
|
|
case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT:
|
|
case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL:
|
|
case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE:
|
|
case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL:
|
|
case TOKENIZER_TAG_ANGLE_BRACKET_RIGHT:
|
|
case TOKENIZER_TAG_ANGLE_BRACKET_RIGHT_EQUAL:
|
|
case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT:
|
|
case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL:
|
|
case TOKENIZER_TAG_TILDE:
|
|
case TOKENIZER_TAG_NUMBER_LITERAL:
|
|
case TOKENIZER_TAG_DOC_COMMENT:
|
|
case TOKENIZER_TAG_CONTAINER_DOC_COMMENT:
|
|
case TOKENIZER_TAG_KEYWORD_ADDRSPACE:
|
|
case TOKENIZER_TAG_KEYWORD_ALIGN:
|
|
case TOKENIZER_TAG_KEYWORD_ALLOWZERO:
|
|
case TOKENIZER_TAG_KEYWORD_AND:
|
|
case TOKENIZER_TAG_KEYWORD_ANYFRAME:
|
|
case TOKENIZER_TAG_KEYWORD_ANYTYPE:
|
|
case TOKENIZER_TAG_KEYWORD_ASM:
|
|
case TOKENIZER_TAG_KEYWORD_ASYNC:
|
|
case TOKENIZER_TAG_KEYWORD_AWAIT:
|
|
case TOKENIZER_TAG_KEYWORD_BREAK:
|
|
case TOKENIZER_TAG_KEYWORD_CALLCONV:
|
|
case TOKENIZER_TAG_KEYWORD_CATCH:
|
|
case TOKENIZER_TAG_KEYWORD_COMPTIME:
|
|
case TOKENIZER_TAG_KEYWORD_CONST:
|
|
case TOKENIZER_TAG_KEYWORD_CONTINUE:
|
|
case TOKENIZER_TAG_KEYWORD_DEFER:
|
|
case TOKENIZER_TAG_KEYWORD_ELSE:
|
|
case TOKENIZER_TAG_KEYWORD_ENUM:
|
|
case TOKENIZER_TAG_KEYWORD_ERRDEFER:
|
|
case TOKENIZER_TAG_KEYWORD_ERROR:
|
|
case TOKENIZER_TAG_KEYWORD_EXPORT:
|
|
case TOKENIZER_TAG_KEYWORD_EXTERN:
|
|
case TOKENIZER_TAG_KEYWORD_FN:
|
|
case TOKENIZER_TAG_KEYWORD_FOR:
|
|
case TOKENIZER_TAG_KEYWORD_IF:
|
|
case TOKENIZER_TAG_KEYWORD_INLINE:
|
|
case TOKENIZER_TAG_KEYWORD_NOALIAS:
|
|
case TOKENIZER_TAG_KEYWORD_NOINLINE:
|
|
case TOKENIZER_TAG_KEYWORD_NOSUSPEND:
|
|
case TOKENIZER_TAG_KEYWORD_OPAQUE:
|
|
case TOKENIZER_TAG_KEYWORD_OR:
|
|
case TOKENIZER_TAG_KEYWORD_ORELSE:
|
|
case TOKENIZER_TAG_KEYWORD_PACKED:
|
|
case TOKENIZER_TAG_KEYWORD_RESUME:
|
|
case TOKENIZER_TAG_KEYWORD_RETURN:
|
|
case TOKENIZER_TAG_KEYWORD_LINKSECTION:
|
|
case TOKENIZER_TAG_KEYWORD_STRUCT:
|
|
case TOKENIZER_TAG_KEYWORD_SUSPEND:
|
|
case TOKENIZER_TAG_KEYWORD_SWITCH:
|
|
case TOKENIZER_TAG_KEYWORD_TEST:
|
|
case TOKENIZER_TAG_KEYWORD_THREADLOCAL:
|
|
case TOKENIZER_TAG_KEYWORD_TRY:
|
|
case TOKENIZER_TAG_KEYWORD_UNION:
|
|
case TOKENIZER_TAG_KEYWORD_UNREACHABLE:
|
|
case TOKENIZER_TAG_KEYWORD_USINGNAMESPACE:
|
|
case TOKENIZER_TAG_KEYWORD_VAR:
|
|
case TOKENIZER_TAG_KEYWORD_VOLATILE:
|
|
case TOKENIZER_TAG_KEYWORD_WHILE:;
|
|
const char* str = tokenizerGetTagString(p->token_tags[p->tok_i]);
|
|
fprintf(stderr, "keyword %s not implemented\n", str);
|
|
exit(1);
|
|
case TOKENIZER_TAG_KEYWORD_PUB:
|
|
p->tok_i++;
|
|
// AstNodeIndex top_level_decl = expectTopLevelDecl(*p);
|
|
break;
|
|
// TODO do work
|
|
}
|
|
}
|
|
|
|
p->scratch.len = scratch_top;
|
|
return res;
|
|
}
|