#include #include #include #include "ast.h" #include "common.h" #include "parser.h" const AstNodeIndex null_node = 0; typedef struct { enum { FIELD_STATE_NONE, FIELD_STATE_SEEN, FIELD_STATE_END // sets "end" } tag; union { uint32_t end; } payload; } FieldState; typedef struct { AstNodeIndex zero_or_one; AstSubRange multi; } SmallSpan; void parseRoot(Parser* p) { p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT; p->nodes.main_tokens[p->nodes.len] = 0; // members root_members = parseContainerMembers(p); } static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; } static AstTokenIndex eatToken(Parser* p, TokenizerTag tag, bool* ok) { if (p->token_tags[p->tok_i] == tag) { if (ok != NULL) *ok = true; return nextToken(p); } else { if (ok != NULL) *ok = false; return 0; } } static void eatDocComments(Parser* p) { bool ok; while (eatToken(p, TOKENIZER_TAG_DOC_COMMENT, &ok), ok) { } } static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeItem item) { p->nodes.tags[i] = item.tag; p->nodes.main_tokens[i] = item.main_token; p->nodes.datas[i] = item.data; return i; } static void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional) { const uint32_t new_len = list->len + additional; if (new_len <= list->cap) { return; } const uint32_t new_cap = new_len > list->cap * 2 ? new_len : list->cap * 2; list->tags = realloc(list->tags, new_cap * sizeof(AstNodeTag)); list->main_tokens = realloc(list->main_tokens, new_cap * sizeof(AstTokenIndex)); list->datas = realloc(list->datas, new_cap * sizeof(AstData)); if (!list->tags || !list->main_tokens || !list->datas) exit(1); list->cap = new_cap; } static AstNodeIndex addNode(AstNodeList* nodes, AstNodeItem item) { astNodeListEnsureCapacity(nodes, 1); nodes->tags[nodes->len] = item.tag; nodes->main_tokens[nodes->len] = item.main_token; nodes->datas[nodes->len] = item.data; return nodes->len++; } static AstNodeIndex parseTypeExpr(Parser* p); static AstNodeIndex expectTypeExpr(Parser* p) { const AstNodeIndex node = parseTypeExpr(p); if (node == 0) exit(1); return node; } static AstNodeIndex parseByteAlign(Parser* p) { bool ok; eatToken(p, TOKENIZER_TAG_KEYWORD_ALIGN, &ok); if (!ok) { return null_node; } fprintf(stderr, "parseByteAlign cannot parse alginment\n"); exit(1); return 0; // tcc } typedef struct { AstNodeIndex align_expr, value_expr; } NodeContainerField; static AstNodeIndex expectContainerField(Parser* p) { eatToken(p, TOKENIZER_TAG_KEYWORD_COMPTIME, NULL); const AstTokenIndex main_token = p->tok_i; if (p->token_tags[p->tok_i] == TOKENIZER_TAG_IDENTIFIER && p->token_tags[p->tok_i + 1] == TOKENIZER_TAG_COLON) p->tok_i += 2; const AstNodeIndex type_expr = expectTypeExpr(p); const AstNodeIndex align_expr = parseByteAlign(p); const AstNodeIndex value_expr = 0; bool ok; eatToken(p, TOKENIZER_TAG_EQUAL, &ok); if (ok) { fprintf(stderr, "expectContainerField does not support expr\n"); exit(1); } if (align_expr == 0) { return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_TAG_CONTAINER_FIELD_INIT, .main_token = main_token, .data = { .lhs = type_expr, .rhs = value_expr, }, }); } else if (value_expr == 0) { return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_TAG_CONTAINER_FIELD_ALIGN, .main_token = main_token, .data = { .lhs = type_expr, .rhs = align_expr, }, }); } else { return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_TAG_CONTAINER_FIELD, .main_token = main_token, .data = { .lhs = type_expr, .rhs = addExtra(p, (NodeContainerField) { .align_expr = align_expr, .value_expr = value_expr, }) }, }); } } static AstNodeIndex parsePrimaryTypeExpr(Parser* p) { const TokenizerTag tok = p->token_tags[p->tok_i]; switch (tok) { case TOKENIZER_TAG_CHAR_LITERAL: case TOKENIZER_TAG_NUMBER_LITERAL: case TOKENIZER_TAG_KEYWORD_UNREACHABLE: case TOKENIZER_TAG_KEYWORD_ANYFRAME: case TOKENIZER_TAG_STRING_LITERAL: case TOKENIZER_TAG_BUILTIN: case TOKENIZER_TAG_KEYWORD_FN: case TOKENIZER_TAG_KEYWORD_IF: case TOKENIZER_TAG_KEYWORD_SWITCH: case TOKENIZER_TAG_KEYWORD_EXTERN: case TOKENIZER_TAG_KEYWORD_PACKED: case TOKENIZER_TAG_KEYWORD_STRUCT: case TOKENIZER_TAG_KEYWORD_OPAQUE: case TOKENIZER_TAG_KEYWORD_ENUM: case TOKENIZER_TAG_KEYWORD_UNION: case TOKENIZER_TAG_KEYWORD_COMPTIME: case TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE: fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); exit(1); case TOKENIZER_TAG_IDENTIFIER: if (p->token_tags[p->tok_i + 1] == TOKENIZER_TAG_COLON) { fprintf(stderr, "parsePrimaryTypeExpr does not support identifier followed by colon\n"); exit(1); } return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_TAG_IDENTIFIER, .main_token = nextToken(p), .data = {} }); case TOKENIZER_TAG_KEYWORD_INLINE: case TOKENIZER_TAG_KEYWORD_FOR: case TOKENIZER_TAG_KEYWORD_WHILE: case TOKENIZER_TAG_PERIOD: case TOKENIZER_TAG_KEYWORD_ERROR: case TOKENIZER_TAG_L_PAREN: fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); exit(1); default: return null_node; } } static AstNodeIndex parseSuffixOp(Parser* p) { const TokenizerTag tok = p->token_tags[p->tok_i]; switch (tok) { case TOKENIZER_TAG_L_BRACKET: case TOKENIZER_TAG_PERIOD_ASTERISK: case TOKENIZER_TAG_INVALID_PERIODASTERISKS: case TOKENIZER_TAG_PERIOD: fprintf(stderr, "parseSuffixOp does not support %s\n", tokenizerGetTagString(tok)); exit(1); default: return null_node; } } static AstNodeIndex parseSuffixExpr(Parser* p) { bool ok; eatToken(p, TOKENIZER_TAG_KEYWORD_ASYNC, &ok); if (ok) { fprintf(stderr, "async not supported\n"); exit(1); } AstNodeIndex res = parsePrimaryTypeExpr(p); if (res == 0) return res; while (true) { const AstNodeIndex suffix_op = parseSuffixOp(p); if (suffix_op != 0) { res = suffix_op; continue; } eatToken(p, TOKENIZER_TAG_L_PAREN, &ok); if (ok) { fprintf(stderr, "parseSuffixExpr does not support expr with parens\n"); exit(1); } return res; } } static AstNodeIndex expectToken(Parser* p, TokenizerTag tag, bool* ok) { if (p->token_tags[p->tok_i] == tag) { if (ok != NULL) *ok = true; return nextToken(p); } else { if (ok != NULL) *ok = false; return 0; } } static AstNodeIndex parseErrorUnionExpr(Parser* p) { const AstNodeIndex suffix_expr = parseSuffixExpr(p); if (suffix_expr == 0) return null_node; bool ok; const AstNodeIndex bang = eatToken(p, TOKENIZER_TAG_BANG, &ok); if (!ok) return suffix_expr; return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_TAG_ERROR_UNION, .main_token = bang, .data = { .lhs = suffix_expr, .rhs = expectTypeExpr(p), } }); } static AstNodeIndex parseTypeExpr(Parser* p) { const AstNodeIndex tok = p->token_tags[p->tok_i]; switch (tok) { case TOKENIZER_TAG_QUESTION_MARK: case TOKENIZER_TAG_KEYWORD_ANYFRAME: case TOKENIZER_TAG_ASTERISK: case TOKENIZER_TAG_ASTERISK_ASTERISK: case TOKENIZER_TAG_L_BRACKET: fprintf(stderr, "parseTypeExpr not supported for %s\n", tokenizerGetTagString(tok)); exit(1); default: return parseErrorUnionExpr(p); } } static SmallSpan parseParamDeclList(Parser* p) { // can only parse functions with no declarations bool ok; AstTokenIndex got_token = eatToken(p, TOKENIZER_TAG_L_PAREN, &ok); if (!ok) { fprintf(stderr, "expected (, got %s\n", tokenizerGetTagString(got_token)); exit(1); } got_token = eatToken(p, TOKENIZER_TAG_R_PAREN, &ok); if (!ok) { fprintf(stderr, "expected ), got %s\n", tokenizerGetTagString(got_token)); exit(1); } return (SmallSpan) { .zero_or_one = 0, }; } static uint32_t reserveNode(Parser* p, AstNodeTag tag) { astNodeListEnsureCapacity(&p->nodes, p->nodes.len + 1); p->nodes.tags[p->nodes.len - 1] = tag; return p->nodes.len - 1; } static AstNodeIndex parseFnProto(Parser* p) { bool ok; AstNodeIndex fn_token = eatToken(p, TOKENIZER_TAG_KEYWORD_FN, &ok); if (!ok) return null_node; AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_TAG_FN_PROTO); eatToken(p, TOKENIZER_TAG_IDENTIFIER, NULL); SmallSpan params = parseParamDeclList(p); // const params = try p.parseParamDeclList(); // const align_expr = try p.parseByteAlign(); // const addrspace_expr = try p.parseAddrSpace(); // const section_expr = try p.parseLinkSection(); // const callconv_expr = try p.parseCallconv(); eatToken(p, TOKENIZER_TAG_BANG, NULL); const AstNodeIndex return_type_expr = parseTypeExpr(p); return 0; } static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) { bool ok; if (eatToken(p, TOKENIZER_TAG_KEYWORD_COMPTIME, &ok), ok) { fprintf(stderr, "expectStatement: comptime keyword not yet supported\n"); exit(1); } const AstNodeIndex tok = p->token_tags[p->tok_i]; switch (tok) { case TOKENIZER_TAG_KEYWORD_NOSUSPEND: case TOKENIZER_TAG_KEYWORD_SUSPEND: case TOKENIZER_TAG_KEYWORD_DEFER: case TOKENIZER_TAG_KEYWORD_ERRDEFER: case TOKENIZER_TAG_KEYWORD_IF: case TOKENIZER_TAG_KEYWORD_ENUM: case TOKENIZER_TAG_KEYWORD_STRUCT: case TOKENIZER_TAG_KEYWORD_UNION:; const char* tok_str = tokenizerGetTagString(tok); fprintf(stderr, "expectStatement does not yet support keyword %s\n", tok_str); exit(1); default:; } // TODO continue return 1; } typedef struct { AstNodeIndexSlice* scratch; uint32_t old_len; } CleanupScratch; static void cleanupScratch(CleanupScratch* c) { c->scratch->len = c->old_len; } static AstNodeIndex parseBlock(Parser* p) { bool ok; const AstNodeIndex lbrace = eatToken(p, TOKENIZER_TAG_L_BRACE, &ok); if (!ok) return null_node; CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = { .scratch = &p->scratch, .old_len = p->scratch.len, }; while (1) { if (p->token_tags[p->tok_i] == TOKENIZER_TAG_R_BRACE) break; // "const AstNodeIndex statement" once tinycc supports typeof_unqual (C23) AstNodeIndex statement = expectStatement(p, true); if (statement == 0) break; SLICE_APPEND(AstNodeIndex, &p->scratch, statement); } expectToken(p, TOKENIZER_TAG_R_BRACE, NULL); const bool semicolon = (p->token_tags[p->tok_i] - 2 == TOKENIZER_TAG_SEMICOLON); switch (p->scratch.len - scratch_top.old_len) { case 0: return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_TAG_BLOCK_TWO, .main_token = lbrace, .data = { .lhs = 0, .rhs = 0, }, }); case 1: return addNode( &p->nodes, (AstNodeItem) { .tag = semicolon ? AST_NODE_TAG_BLOCK_TWO_SEMICOLON : AST_NODE_TAG_BLOCK_TWO, .main_token = lbrace, .data = { .lhs = p->scratch.arr[scratch_top.old_len], .rhs = 0, }, }); case 2: return addNode( &p->nodes, (AstNodeItem) { .tag = semicolon ? AST_NODE_TAG_BLOCK_TWO_SEMICOLON : AST_NODE_TAG_BLOCK_TWO, .main_token = lbrace, .data = { .lhs = p->scratch.arr[scratch_top.old_len], .rhs = p->scratch.arr[scratch_top.old_len + 1], }, }); default:; const uint32_t extra = p->scratch.len - scratch_top.old_len; SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->scratch, extra); memcpy( &p->extra_data.arr[p->extra_data.len], &p->scratch.arr[scratch_top.old_len], sizeof(AstNodeIndex) * extra); p->extra_data.len += extra; return addNode( &p->nodes, (AstNodeItem) { .tag = semicolon ? AST_NODE_TAG_BLOCK_SEMICOLON : AST_NODE_TAG_BLOCK, .main_token = lbrace, .data = { .lhs = p->scratch.arr[scratch_top.old_len], .rhs = p->scratch.arr[p->scratch.len], }, }); } return 0; } static AstNodeIndex parseVarDeclProto(Parser* p) { bool ok; eatToken(p, TOKENIZER_TAG_KEYWORD_CONST, &ok); if (!ok) { eatToken(p, TOKENIZER_TAG_KEYWORD_VAR, &ok); if (!ok) return null_node; } fprintf(stderr, "parseVarDeclProto: parsing vars is not supported\n"); exit(1); return 0; // tcc } static AstNodeIndex parseGlobalVarDecl(Parser* p) { const AstNodeIndex var_decl = parseVarDeclProto(p); if (var_decl == 0) { return null_node; } fprintf(stderr, "parseGlobalVarDecl does not support parsing var decls\n"); exit(1); return 0; // tcc } static AstNodeIndex expectTopLevelDecl(Parser* p) { AstTokenIndex extern_export_inline_token = p->tok_i++; bool is_extern = false; bool expect_fn = false; bool expect_var_or_fn = false; switch (p->token_tags[extern_export_inline_token]) { case TOKENIZER_TAG_KEYWORD_EXTERN: eatToken(p, TOKENIZER_TAG_STRING_LITERAL, NULL); is_extern = true; expect_var_or_fn = true; break; case TOKENIZER_TAG_KEYWORD_EXPORT: expect_var_or_fn = true; break; case TOKENIZER_TAG_KEYWORD_INLINE: case TOKENIZER_TAG_KEYWORD_NOINLINE: expect_fn = true; break; default: p->tok_i--; } AstNodeIndex fn_proto = parseFnProto(p); if (fn_proto != 0) { switch (p->token_tags[p->tok_i]) { case TOKENIZER_TAG_SEMICOLON: p->tok_i++; return fn_proto; case TOKENIZER_TAG_L_BRACE: if (is_extern) exit(1); AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_TAG_FN_DECL); AstNodeIndex body_block = parseBlock(p); return setNode( p, fn_decl_index, (AstNodeItem) { .tag = AST_NODE_TAG_FN_DECL, .main_token = p->nodes.main_tokens[fn_proto], .data = { .lhs = fn_proto, .rhs = body_block }, }); default: exit(1); // Expected semicolon or left brace } } if (expect_fn) exit(1); AstTokenIndex thread_local_token = eatToken(p, TOKENIZER_TAG_KEYWORD_THREADLOCAL, NULL); AstNodeIndex var_decl = parseGlobalVarDecl(p); if (var_decl != 0) { return var_decl; } // assuming the program is correct... fprintf(stderr, "the next token should be usingnamespace, which is not supported\n"); exit(1); return 0; // make tcc happy } static Members parseContainerMembers(Parser* p) { const uint32_t scratch_top = p->scratch.len; Members res = (Members) {}; // ast_token_index last_field; bool ok; while (eatToken(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT, &ok) && ok) ; FieldState field_state = { .tag = FIELD_STATE_NONE }; bool trailing = false; AstNodeIndex top_level_decl; while (1) { eatDocComments(p); switch (p->token_tags[p->tok_i]) { case TOKENIZER_TAG_KEYWORD_TEST: case TOKENIZER_TAG_KEYWORD_COMPTIME: case TOKENIZER_TAG_KEYWORD_USINGNAMESPACE:; const char* str = tokenizerGetTagString(p->token_tags[p->tok_i]); fprintf(stderr, "%s not implemented in parseContainerMembers\n", str); exit(1); case TOKENIZER_TAG_KEYWORD_PUB: p->tok_i++; top_level_decl = expectTopLevelDecl(p); if (top_level_decl != 0) { if (field_state.tag == FIELD_STATE_SEEN) { field_state.tag = FIELD_STATE_END; field_state.payload.end = top_level_decl; } SLICE_APPEND(AstNodeIndex, &p->scratch, top_level_decl); } trailing = (p->token_tags[p->tok_i - 1] == TOKENIZER_TAG_SEMICOLON); break; case TOKENIZER_TAG_KEYWORD_CONST: case TOKENIZER_TAG_KEYWORD_VAR: case TOKENIZER_TAG_KEYWORD_THREADLOCAL: case TOKENIZER_TAG_KEYWORD_EXPORT: case TOKENIZER_TAG_KEYWORD_EXTERN: case TOKENIZER_TAG_KEYWORD_INLINE: case TOKENIZER_TAG_KEYWORD_NOINLINE: case TOKENIZER_TAG_KEYWORD_FN:; top_level_decl = expectTopLevelDecl(p); if (top_level_decl != 0) { if (field_state.tag == FIELD_STATE_SEEN) { field_state.tag = FIELD_STATE_END; field_state.payload.end = top_level_decl; } SLICE_APPEND(AstNodeIndex, &p->scratch, top_level_decl); } trailing = (p->token_tags[p->tok_i - 1] == TOKENIZER_TAG_SEMICOLON); break; case TOKENIZER_TAG_EOF: case TOKENIZER_TAG_R_BRACE: goto break_loop; default:; // skip parseCStyleContainer const AstNodeIndex identifier = p->tok_i; const AstNodeIndex container_field = expectContainerField(p); switch (field_state.tag) { case FIELD_STATE_NONE: field_state.tag = FIELD_STATE_SEEN; break; case FIELD_STATE_SEEN: break; case FIELD_STATE_END: fprintf(stderr, "parseContainerMembers error condition\n"); exit(1); } SLICE_APPEND(AstNodeIndex, &p->scratch, container_field); switch (p->token_tags[p->tok_i]) { case TOKENIZER_TAG_COMMA: p->tok_i++; trailing = true; continue; case TOKENIZER_TAG_R_BRACE: case TOKENIZER_TAG_EOF: trailing = false; goto break_loop; default: continue; } findNextContainerMember(p); continue; } } break_loop: p->scratch.len = scratch_top; return res; }