#include #include #include "ast.h" #include "parser.h" const AstNodeIndex null_node = 0; typedef struct { enum { FIELD_STATE_NONE, FIELD_STATE_SEEN, FIELD_STATE_END // sets "end" } tag; union { uint32_t end; } payload; } FieldState; typedef struct { AstNodeIndex zero_or_one; AstSubRange multi; } SmallSpan; void parseRoot(Parser* p) { p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT; p->nodes.main_tokens[p->nodes.len] = 0; // members root_members = parseContainerMembers(p); } static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; } static AstTokenIndex eatToken(Parser* p, TokenizerTag tag, bool* ok) { if (p->token_tags[p->tok_i] == tag) { if (ok != NULL) *ok = true; return nextToken(p); } else { if (ok != NULL) *ok = false; return (AstTokenIndex) {}; } } static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeTag tag, AstTokenIndex main_token, AstData data) { p->nodes.tags[i] = tag; p->nodes.main_tokens[i] = main_token; p->nodes.datas[i] = data; return i; } static AstNodeIndex parseTypeExpr(Parser* p); static AstNodeIndex expectTypeExpr(Parser* p) { const AstNodeIndex node = parseTypeExpr(p); if (node == 0) exit(1); return node; } static AstNodeIndex parsePrimaryTypeExpr(Parser* p) { const TokenizerTag tok = p->token_tags[p->tok_i]; switch (tok) { case TOKENIZER_TAG_CHAR_LITERAL: case TOKENIZER_TAG_NUMBER_LITERAL: case TOKENIZER_TAG_KEYWORD_UNREACHABLE: case TOKENIZER_TAG_KEYWORD_ANYFRAME: case TOKENIZER_TAG_STRING_LITERAL: case TOKENIZER_TAG_BUILTIN: case TOKENIZER_TAG_KEYWORD_FN: case TOKENIZER_TAG_KEYWORD_IF: case TOKENIZER_TAG_KEYWORD_SWITCH: case TOKENIZER_TAG_KEYWORD_EXTERN: case TOKENIZER_TAG_KEYWORD_PACKED: case TOKENIZER_TAG_KEYWORD_STRUCT: case TOKENIZER_TAG_KEYWORD_OPAQUE: case TOKENIZER_TAG_KEYWORD_ENUM: case TOKENIZER_TAG_KEYWORD_UNION: case TOKENIZER_TAG_KEYWORD_COMPTIME: case TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE: fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); exit(1); break; case TOKENIZER_TAG_IDENTIFIER: if (p->token_tags[p->tok_i + 1] == TOKENIZER_TAG_COLON) { fprintf(stderr, "parsePrimaryTypeExpr does not support identifier followed by colon\n"); exit(1); } return astNodeListAppend( &p->nodes, AST_NODE_TAG_IDENTIFIER, nextToken(p), (AstData) {}); break; case TOKENIZER_TAG_KEYWORD_INLINE: case TOKENIZER_TAG_KEYWORD_FOR: case TOKENIZER_TAG_KEYWORD_WHILE: case TOKENIZER_TAG_PERIOD: case TOKENIZER_TAG_KEYWORD_ERROR: case TOKENIZER_TAG_L_PAREN: fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); exit(1); break; default: return null_node; } } static AstNodeIndex parseSuffixOp(Parser *p) { const TokenizerTag tok = p->token_tags[p->tok_i]; switch (tok) { case TOKENIZER_TAG_L_BRACKET: case TOKENIZER_TAG_PERIOD_ASTERISK: case TOKENIZER_TAG_INVALID_PERIODASTERISKS: case TOKENIZER_TAG_PERIOD: fprintf(stderr, "parseSuffixOp does not support %s\n", tokenizerGetTagString(tok)); exit(1); break; default: return null_node; } } static AstNodeIndex parseSuffixExpr(Parser* p) { bool ok; eatToken(p, TOKENIZER_TAG_KEYWORD_ASYNC, &ok); if (ok) { fprintf(stderr, "async not supported\n"); exit(1); } AstNodeIndex res = parsePrimaryTypeExpr(p); if (res == 0) return res; while(true) { const AstNodeIndex suffix_op = parseSuffixOp(p); if (suffix_op != 0) { res = suffix_op; continue; } eatToken(p, TOKENIZER_TAG_L_PAREN, &ok); if (ok) { fprintf(stderr, "parseSuffixExpr does not support expr with parens\n"); exit(1); } return res; } } static AstNodeIndex parseErrorUnionExpr(Parser* p) { const AstNodeIndex suffix_expr = parseSuffixExpr(p); if (suffix_expr == 0) return null_node; bool ok; const AstNodeIndex bang = eatToken(p, TOKENIZER_TAG_BANG, &ok); if (!ok) return suffix_expr; return astNodeListAppend( &p->nodes, AST_NODE_TAG_ERROR_UNION, bang, (AstData) { .lhs = suffix_expr, .rhs = expectTypeExpr(p), }); } static AstNodeIndex parseTypeExpr(Parser* p) { const AstNodeIndex tok = p->token_tags[p->tok_i]; switch (tok) { case TOKENIZER_TAG_QUESTION_MARK: case TOKENIZER_TAG_KEYWORD_ANYFRAME: case TOKENIZER_TAG_ASTERISK: case TOKENIZER_TAG_ASTERISK_ASTERISK: case TOKENIZER_TAG_L_BRACKET: fprintf(stderr, "parseTypeExpr not supported for %s\n", tokenizerGetTagString(tok)); exit(1); break; default: return parseErrorUnionExpr(p); } } static SmallSpan parseParamDeclList(Parser* p) { // can only parse functions with no declarations bool ok; AstTokenIndex got_token = eatToken(p, TOKENIZER_TAG_L_PAREN, &ok); if (!ok) { fprintf(stderr, "expected (, got %s\n", tokenizerGetTagString(got_token)); exit(1); } got_token = eatToken(p, TOKENIZER_TAG_R_PAREN, &ok); if (!ok) { fprintf(stderr, "expected ), got %s\n", tokenizerGetTagString(got_token)); exit(1); } return (SmallSpan) { .zero_or_one = 0, }; } static uint32_t reserveNode(Parser* p, AstNodeTag tag) { astNodeListEnsureCapacity(&p->nodes, p->nodes.len + 1); p->nodes.tags[p->nodes.len - 1] = tag; return p->nodes.len - 1; } static AstNodeIndex parseFnProto(Parser* p) { bool ok; AstNodeIndex fn_token = eatToken(p, TOKENIZER_TAG_KEYWORD_FN, &ok); if (!ok) return null_node; AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_TAG_FN_PROTO); eatToken(p, TOKENIZER_TAG_IDENTIFIER, NULL); SmallSpan params = parseParamDeclList(p); // const params = try p.parseParamDeclList(); // const align_expr = try p.parseByteAlign(); // const addrspace_expr = try p.parseAddrSpace(); // const section_expr = try p.parseLinkSection(); // const callconv_expr = try p.parseCallconv(); eatToken(p, TOKENIZER_TAG_BANG, NULL); const AstNodeIndex return_type_expr = parseTypeExpr(p); } static AstNodeIndex parseBlock(Parser *p) { bool ok; const AstNodeIndex lbrace = eatToken(p, TOKENIZER_TAG_L_BRACE, &ok); if (!ok) return null_node; const uint32_t scratch_top = p->scratch.len; cleanup: } static AstNodeIndex expectTopLevelDecl(Parser* p) { AstTokenIndex extern_export_inline_token = p->tok_i++; bool is_extern = false; bool expect_fn = false; bool expect_var_or_fn = false; switch (p->token_tags[extern_export_inline_token]) { case TOKENIZER_TAG_KEYWORD_EXTERN: eatToken(p, TOKENIZER_TAG_STRING_LITERAL, NULL); is_extern = true; expect_var_or_fn = true; break; case TOKENIZER_TAG_KEYWORD_EXPORT: expect_var_or_fn = true; break; case TOKENIZER_TAG_KEYWORD_INLINE: case TOKENIZER_TAG_KEYWORD_NOINLINE: expect_fn = true; break; default: p->tok_i--; } AstNodeIndex fn_proto = parseFnProto(p); if (fn_proto != 0) { switch (p->token_tags[p->tok_i]) { case TOKENIZER_TAG_SEMICOLON: p->tok_i++; return fn_proto; break; case TOKENIZER_TAG_L_BRACE: if (is_extern) exit(1); AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_TAG_FN_DECL); AstNodeIndex body_block = parseBlock(p); return setNode( p, fn_decl_index, AST_NODE_TAG_FN_DECL, p->nodes.main_tokens[fn_proto], (AstData) { .lhs = fn_proto, .rhs = body_block }); default: exit(1); // Expected semicolon or left brace } } if (expect_fn) exit(1); AstTokenIndex thread_local_token = eatToken(p, TOKENIZER_TAG_KEYWORD_THREADLOCAL, NULL); AstNodeIndex var_decl = parseGlobalVarDecl(p); if (var_decl != 0) { return var_decl; } // assuming the program is correct... fprintf(stderr, "the next token should be usingnamespace, which is not supported\n"); exit(1); } static Members parseContainerMembers(Parser* p) { const uint32_t scratch_top = p->scratch.len; Members res = (Members) {}; // ast_token_index last_field; bool ok; while (eatToken(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT, &ok) && ok) ; // bool trailing = false; while (1) { // SKIP eat doc comments switch (p->token_tags[p->tok_i]) { case TOKENIZER_TAG_INVALID: case TOKENIZER_TAG_INVALID_PERIODASTERISKS: case TOKENIZER_TAG_IDENTIFIER: case TOKENIZER_TAG_STRING_LITERAL: case TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE: case TOKENIZER_TAG_CHAR_LITERAL: case TOKENIZER_TAG_EOF: case TOKENIZER_TAG_BUILTIN: case TOKENIZER_TAG_BANG: case TOKENIZER_TAG_PIPE: case TOKENIZER_TAG_PIPE_PIPE: case TOKENIZER_TAG_PIPE_EQUAL: case TOKENIZER_TAG_EQUAL: case TOKENIZER_TAG_EQUAL_EQUAL: case TOKENIZER_TAG_EQUAL_ANGLE_BRACKET_RIGHT: case TOKENIZER_TAG_BANG_EQUAL: case TOKENIZER_TAG_L_PAREN: case TOKENIZER_TAG_R_PAREN: case TOKENIZER_TAG_SEMICOLON: case TOKENIZER_TAG_PERCENT: case TOKENIZER_TAG_PERCENT_EQUAL: case TOKENIZER_TAG_L_BRACE: case TOKENIZER_TAG_R_BRACE: case TOKENIZER_TAG_L_BRACKET: case TOKENIZER_TAG_R_BRACKET: case TOKENIZER_TAG_PERIOD: case TOKENIZER_TAG_PERIOD_ASTERISK: case TOKENIZER_TAG_ELLIPSIS2: case TOKENIZER_TAG_ELLIPSIS3: case TOKENIZER_TAG_CARET: case TOKENIZER_TAG_CARET_EQUAL: case TOKENIZER_TAG_PLUS: case TOKENIZER_TAG_PLUS_PLUS: case TOKENIZER_TAG_PLUS_EQUAL: case TOKENIZER_TAG_PLUS_PERCENT: case TOKENIZER_TAG_PLUS_PERCENT_EQUAL: case TOKENIZER_TAG_PLUS_PIPE: case TOKENIZER_TAG_PLUS_PIPE_EQUAL: case TOKENIZER_TAG_MINUS: case TOKENIZER_TAG_MINUS_EQUAL: case TOKENIZER_TAG_MINUS_PERCENT: case TOKENIZER_TAG_MINUS_PERCENT_EQUAL: case TOKENIZER_TAG_MINUS_PIPE: case TOKENIZER_TAG_MINUS_PIPE_EQUAL: case TOKENIZER_TAG_ASTERISK: case TOKENIZER_TAG_ASTERISK_EQUAL: case TOKENIZER_TAG_ASTERISK_ASTERISK: case TOKENIZER_TAG_ASTERISK_PERCENT: case TOKENIZER_TAG_ASTERISK_PERCENT_EQUAL: case TOKENIZER_TAG_ASTERISK_PIPE: case TOKENIZER_TAG_ASTERISK_PIPE_EQUAL: case TOKENIZER_TAG_ARROW: case TOKENIZER_TAG_COLON: case TOKENIZER_TAG_SLASH: case TOKENIZER_TAG_SLASH_EQUAL: case TOKENIZER_TAG_COMMA: case TOKENIZER_TAG_AMPERSAND: case TOKENIZER_TAG_AMPERSAND_EQUAL: case TOKENIZER_TAG_QUESTION_MARK: case TOKENIZER_TAG_ANGLE_BRACKET_LEFT: case TOKENIZER_TAG_ANGLE_BRACKET_LEFT_EQUAL: case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT: case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL: case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE: case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL: case TOKENIZER_TAG_ANGLE_BRACKET_RIGHT: case TOKENIZER_TAG_ANGLE_BRACKET_RIGHT_EQUAL: case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT: case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL: case TOKENIZER_TAG_TILDE: case TOKENIZER_TAG_NUMBER_LITERAL: case TOKENIZER_TAG_DOC_COMMENT: case TOKENIZER_TAG_CONTAINER_DOC_COMMENT: case TOKENIZER_TAG_KEYWORD_ADDRSPACE: case TOKENIZER_TAG_KEYWORD_ALIGN: case TOKENIZER_TAG_KEYWORD_ALLOWZERO: case TOKENIZER_TAG_KEYWORD_AND: case TOKENIZER_TAG_KEYWORD_ANYFRAME: case TOKENIZER_TAG_KEYWORD_ANYTYPE: case TOKENIZER_TAG_KEYWORD_ASM: case TOKENIZER_TAG_KEYWORD_ASYNC: case TOKENIZER_TAG_KEYWORD_AWAIT: case TOKENIZER_TAG_KEYWORD_BREAK: case TOKENIZER_TAG_KEYWORD_CALLCONV: case TOKENIZER_TAG_KEYWORD_CATCH: case TOKENIZER_TAG_KEYWORD_COMPTIME: case TOKENIZER_TAG_KEYWORD_CONST: case TOKENIZER_TAG_KEYWORD_CONTINUE: case TOKENIZER_TAG_KEYWORD_DEFER: case TOKENIZER_TAG_KEYWORD_ELSE: case TOKENIZER_TAG_KEYWORD_ENUM: case TOKENIZER_TAG_KEYWORD_ERRDEFER: case TOKENIZER_TAG_KEYWORD_ERROR: case TOKENIZER_TAG_KEYWORD_EXPORT: case TOKENIZER_TAG_KEYWORD_EXTERN: case TOKENIZER_TAG_KEYWORD_FN: case TOKENIZER_TAG_KEYWORD_FOR: case TOKENIZER_TAG_KEYWORD_IF: case TOKENIZER_TAG_KEYWORD_INLINE: case TOKENIZER_TAG_KEYWORD_NOALIAS: case TOKENIZER_TAG_KEYWORD_NOINLINE: case TOKENIZER_TAG_KEYWORD_NOSUSPEND: case TOKENIZER_TAG_KEYWORD_OPAQUE: case TOKENIZER_TAG_KEYWORD_OR: case TOKENIZER_TAG_KEYWORD_ORELSE: case TOKENIZER_TAG_KEYWORD_PACKED: case TOKENIZER_TAG_KEYWORD_RESUME: case TOKENIZER_TAG_KEYWORD_RETURN: case TOKENIZER_TAG_KEYWORD_LINKSECTION: case TOKENIZER_TAG_KEYWORD_STRUCT: case TOKENIZER_TAG_KEYWORD_SUSPEND: case TOKENIZER_TAG_KEYWORD_SWITCH: case TOKENIZER_TAG_KEYWORD_TEST: case TOKENIZER_TAG_KEYWORD_THREADLOCAL: case TOKENIZER_TAG_KEYWORD_TRY: case TOKENIZER_TAG_KEYWORD_UNION: case TOKENIZER_TAG_KEYWORD_UNREACHABLE: case TOKENIZER_TAG_KEYWORD_USINGNAMESPACE: case TOKENIZER_TAG_KEYWORD_VAR: case TOKENIZER_TAG_KEYWORD_VOLATILE: case TOKENIZER_TAG_KEYWORD_WHILE:; const char* str = tokenizerGetTagString(p->token_tags[p->tok_i]); fprintf(stderr, "keyword %s not implemented\n", str); exit(1); case TOKENIZER_TAG_KEYWORD_PUB: p->tok_i++; // AstNodeIndex top_level_decl = expectTopLevelDecl(*p); break; // TODO do work } } p->scratch.len = scratch_top; return res; }