#include "common.h" #include #include #include #include #include "ast.h" #include "parser.h" const AstNodeIndex null_node = 0; const AstTokenIndex null_token = ~(AstTokenIndex)(0); typedef struct { uint32_t len; AstNodeIndex lhs; AstNodeIndex rhs; bool trailing; } Members; static AstNodeIndex parsePrefixExpr(Parser*); static AstNodeIndex parseTypeExpr(Parser*); static AstNodeIndex parseBlock(Parser* p); static AstNodeIndex parseLabeledStatement(Parser*); static AstNodeIndex parseExpr(Parser*); static AstNodeIndex expectExpr(Parser*); static AstNodeIndex expectSemicolon(Parser*); static AstTokenIndex expectToken(Parser*, TokenizerTag); static AstNodeIndex parseFnProto(Parser*); static Members parseContainerMembers(Parser*); static AstNodeIndex parseInitList(Parser*, AstNodeIndex, AstTokenIndex); typedef struct { enum { FIELD_STATE_NONE, FIELD_STATE_SEEN, FIELD_STATE_END } tag; union { uint32_t end; } payload; } FieldState; typedef struct { enum { SMALL_SPAN_ZERO_OR_ONE, SMALL_SPAN_MULTI } tag; union { AstNodeIndex zero_or_one; AstSubRange multi; } payload; } SmallSpan; typedef struct { AstNodeIndexSlice* scratch; uint32_t old_len; } CleanupScratch; static CleanupScratch initCleanupScratch(Parser* p) { return (CleanupScratch) { .scratch = &p->scratch, .old_len = p->scratch.len, }; } static void cleanupScratch(CleanupScratch* c) { c->scratch->len = c->old_len; } static AstSubRange listToSpan( Parser* p, const AstNodeIndex* list, uint32_t count) { SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count); memcpy(p->extra_data.arr + p->extra_data.len, list, count * sizeof(AstNodeIndex)); p->extra_data.len += count; return (AstSubRange) { .start = p->extra_data.len - count, .end = p->extra_data.len, }; } static AstSubRange membersToSpan(const Members self, Parser* p) { if (self.len <= 2) { const AstNodeIndex nodes[] = { self.lhs, self.rhs }; return listToSpan(p, nodes, self.len); } else { return (AstSubRange) { .start = self.lhs, .end = self.rhs }; } } static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; } static AstTokenIndex eatToken(Parser* p, TokenizerTag tag) { if (p->token_tags[p->tok_i] == tag) { return nextToken(p); } else { return null_token; } } static AstTokenIndex assertToken(Parser* p, TokenizerTag tag) { const AstTokenIndex token = nextToken(p); assert(p->token_tags[token] == tag); return token; } static void eatDocComments(Parser* p) { while (eatToken(p, TOKEN_DOC_COMMENT) != null_token) { } } static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeItem item) { p->nodes.tags[i] = item.tag; p->nodes.main_tokens[i] = item.main_token; p->nodes.datas[i] = item.data; return i; } static void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional) { const uint32_t new_len = list->len + additional; if (new_len <= list->cap) { return; } const uint32_t new_cap = new_len > list->cap * 2 ? new_len : list->cap * 2; list->tags = realloc(list->tags, new_cap * sizeof(AstNodeTag)); list->main_tokens = realloc(list->main_tokens, new_cap * sizeof(AstTokenIndex)); list->datas = realloc(list->datas, new_cap * sizeof(AstData)); if (!list->tags || !list->main_tokens || !list->datas) exit(1); list->cap = new_cap; } static AstNodeIndex addNode(AstNodeList* nodes, AstNodeItem item) { astNodeListEnsureCapacity(nodes, 1); nodes->tags[nodes->len] = item.tag; nodes->main_tokens[nodes->len] = item.main_token; nodes->datas[nodes->len] = item.data; return nodes->len++; } static AstNodeIndex addExtra( Parser* p, const AstNodeIndex* extra, uint32_t count) { const AstNodeIndex result = p->extra_data.len; SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count); memcpy(p->extra_data.arr + p->extra_data.len, extra, count * sizeof(AstNodeIndex)); p->extra_data.len += count; return result; } static AstNodeIndex parseByteAlign(Parser* p) { if (eatToken(p, TOKEN_KEYWORD_ALIGN) == null_token) return null_node; expectToken(p, TOKEN_L_PAREN); const AstNodeIndex expr = expectExpr(p); expectToken(p, TOKEN_R_PAREN); return expr; } static AstNodeIndex parseAddrSpace(Parser* p) { if (eatToken(p, TOKEN_KEYWORD_ADDRSPACE) == null_token) return null_node; expectToken(p, TOKEN_L_PAREN); const AstNodeIndex expr = expectExpr(p); expectToken(p, TOKEN_R_PAREN); return expr; } static AstNodeIndex parseLinkSection(Parser* p) { if (eatToken(p, TOKEN_KEYWORD_LINKSECTION) == null_token) return null_node; expectToken(p, TOKEN_L_PAREN); const AstNodeIndex expr = expectExpr(p); expectToken(p, TOKEN_R_PAREN); return expr; } static AstNodeIndex parseCallconv(Parser* p) { if (eatToken(p, TOKEN_KEYWORD_CALLCONV) == null_token) return null_node; expectToken(p, TOKEN_L_PAREN); const AstNodeIndex expr = expectExpr(p); expectToken(p, TOKEN_R_PAREN); return expr; } typedef struct { AstNodeIndex align_expr, value_expr; } NodeContainerField; static AstNodeIndex expectContainerField(Parser* p) { eatToken(p, TOKEN_KEYWORD_COMPTIME); const AstTokenIndex main_token = p->tok_i; if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER && p->token_tags[p->tok_i + 1] == TOKEN_COLON) p->tok_i += 2; const AstNodeIndex type_expr = parseTypeExpr(p); const AstNodeIndex align_expr = parseByteAlign(p); const AstNodeIndex value_expr = eatToken(p, TOKEN_EQUAL) != null_token ? expectExpr(p) : 0; if (align_expr == 0) { return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_CONTAINER_FIELD_INIT, .main_token = main_token, .data = { .lhs = type_expr, .rhs = value_expr, }, }); } else if (value_expr == 0) { return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_CONTAINER_FIELD_ALIGN, .main_token = main_token, .data = { .lhs = type_expr, .rhs = align_expr, }, }); } else { return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_CONTAINER_FIELD, .main_token = main_token, .data = { .lhs = type_expr, .rhs = addExtra(p, (AstNodeIndex[]) { align_expr, value_expr }, 2), }, }); } } static AstNodeIndex parseBuiltinCall(Parser* p) { const AstTokenIndex builtin_token = assertToken(p, TOKEN_BUILTIN); assertToken(p, TOKEN_L_PAREN); CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p); while (true) { if (eatToken(p, TOKEN_R_PAREN) != null_token) break; const AstNodeIndex param = expectExpr(p); SLICE_APPEND(AstNodeIndex, &p->scratch, param); switch (p->token_tags[p->tok_i]) { case TOKEN_COMMA: p->tok_i++; break; case TOKEN_R_PAREN: p->tok_i++; goto end_loop; default: fprintf(stderr, "expected comma after arg\n"); exit(1); } } end_loop:; const bool comma = (p->token_tags[p->tok_i - 2] == TOKEN_COMMA); const uint32_t params_len = p->scratch.len - scratch_top.old_len; switch (params_len) { case 0: return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_BUILTIN_CALL_TWO, .main_token = builtin_token, .data = { .lhs = 0, .rhs = 0, }, }); case 1: return addNode(&p->nodes, (AstNodeItem) { .tag = comma ? AST_NODE_BUILTIN_CALL_TWO_COMMA : AST_NODE_BUILTIN_CALL_TWO, .main_token = builtin_token, .data = { .lhs = p->scratch.arr[scratch_top.old_len], .rhs = 0, }, }); case 2: return addNode(&p->nodes, (AstNodeItem) { .tag = comma ? AST_NODE_BUILTIN_CALL_TWO_COMMA : AST_NODE_BUILTIN_CALL_TWO, .main_token = builtin_token, .data = { .lhs = p->scratch.arr[scratch_top.old_len], .rhs = p->scratch.arr[scratch_top.old_len+1], }, }); default:; const AstSubRange span = listToSpan(p, &p->scratch.arr[scratch_top.old_len], params_len); return addNode( &p->nodes, (AstNodeItem) { .tag = comma ? AST_NODE_BUILTIN_CALL_COMMA : AST_NODE_BUILTIN_CALL, .main_token = builtin_token, .data = { .lhs = span.start, .rhs = span.end, }, }); } } static AstNodeIndex parseContainerDeclAuto(Parser* p) { const AstTokenIndex main_token = nextToken(p); AstNodeIndex arg_expr = null_node; switch (p->token_tags[main_token]) { case TOKEN_KEYWORD_OPAQUE: break; case TOKEN_KEYWORD_STRUCT: case TOKEN_KEYWORD_ENUM: if (eatToken(p, TOKEN_L_PAREN) != null_token) { arg_expr = expectExpr(p); expectToken(p, TOKEN_R_PAREN); } break; case TOKEN_KEYWORD_UNION: if (eatToken(p, TOKEN_L_PAREN) != null_token) { if (eatToken(p, TOKEN_KEYWORD_ENUM) != null_token) { if (eatToken(p, TOKEN_L_PAREN) != null_token) { const AstNodeIndex enum_tag_expr = expectExpr(p); expectToken(p, TOKEN_R_PAREN); expectToken(p, TOKEN_R_PAREN); expectToken(p, TOKEN_L_BRACE); const Members members = parseContainerMembers(p); const AstSubRange members_span = membersToSpan(members, p); expectToken(p, TOKEN_R_BRACE); return addNode( &p->nodes, (AstNodeItem) { .tag = members.trailing ? AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING : AST_NODE_TAGGED_UNION_ENUM_TAG, .main_token = main_token, .data = { .lhs = enum_tag_expr, .rhs = addExtra(p, (AstNodeIndex[]) { members_span.start, members_span.end }, 2), }, }); } expectToken(p, TOKEN_R_PAREN); expectToken(p, TOKEN_L_BRACE); const Members members = parseContainerMembers(p); expectToken(p, TOKEN_R_BRACE); if (members.len <= 2) { return addNode(&p->nodes, (AstNodeItem) { .tag = members.trailing ? AST_NODE_TAGGED_UNION_TWO_TRAILING : AST_NODE_TAGGED_UNION_TWO, .main_token = main_token, .data = { .lhs = members.lhs, .rhs = members.rhs }, }); } const AstSubRange span = membersToSpan(members, p); return addNode(&p->nodes, (AstNodeItem) { .tag = members.trailing ? AST_NODE_TAGGED_UNION_TRAILING : AST_NODE_TAGGED_UNION, .main_token = main_token, .data = { .lhs = span.start, .rhs = span.end }, }); } arg_expr = expectExpr(p); expectToken(p, TOKEN_R_PAREN); } break; default: fprintf(stderr, "parseContainerDeclAuto: unexpected token\n"); exit(1); } expectToken(p, TOKEN_L_BRACE); const Members members = parseContainerMembers(p); expectToken(p, TOKEN_R_BRACE); if (arg_expr == null_node) { if (members.len <= 2) { return addNode(&p->nodes, (AstNodeItem) { .tag = members.trailing ? AST_NODE_CONTAINER_DECL_TWO_TRAILING : AST_NODE_CONTAINER_DECL_TWO, .main_token = main_token, .data = { .lhs = members.lhs, .rhs = members.rhs }, }); } const AstSubRange span = membersToSpan(members, p); return addNode(&p->nodes, (AstNodeItem) { .tag = members.trailing ? AST_NODE_CONTAINER_DECL_TRAILING : AST_NODE_CONTAINER_DECL, .main_token = main_token, .data = { .lhs = span.start, .rhs = span.end }, }); } const AstSubRange span = membersToSpan(members, p); return addNode( &p->nodes, (AstNodeItem) { .tag = members.trailing ? AST_NODE_CONTAINER_DECL_ARG_TRAILING : AST_NODE_CONTAINER_DECL_ARG, .main_token = main_token, .data = { .lhs = arg_expr, .rhs = addExtra(p, (AstNodeIndex[]) { span.start, span.end }, 2), }, }); } static AstNodeIndex parsePrimaryTypeExpr(Parser* p) { const TokenizerTag tok = p->token_tags[p->tok_i]; switch (tok) { case TOKEN_CHAR_LITERAL: return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_CHAR_LITERAL, .main_token = nextToken(p), .data = {}, }); case TOKEN_NUMBER_LITERAL: return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_NUMBER_LITERAL, .main_token = nextToken(p), .data = {}, }); case TOKEN_KEYWORD_UNREACHABLE: return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_UNREACHABLE_LITERAL, .main_token = nextToken(p), .data = {}, }); case TOKEN_KEYWORD_ANYFRAME: fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); exit(1); case TOKEN_STRING_LITERAL: return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_STRING_LITERAL, .main_token = nextToken(p), .data = {}, }); case TOKEN_BUILTIN: return parseBuiltinCall(p); case TOKEN_KEYWORD_FN: return parseFnProto(p); case TOKEN_KEYWORD_IF: case TOKEN_KEYWORD_SWITCH: fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); exit(1); case TOKEN_KEYWORD_EXTERN: case TOKEN_KEYWORD_PACKED: // extern/packed can precede struct/union/enum switch (p->token_tags[p->tok_i + 1]) { case TOKEN_KEYWORD_STRUCT: case TOKEN_KEYWORD_UNION: case TOKEN_KEYWORD_ENUM: p->tok_i++; // consume extern/packed return parseContainerDeclAuto(p); default: fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); exit(1); } case TOKEN_KEYWORD_STRUCT: case TOKEN_KEYWORD_OPAQUE: case TOKEN_KEYWORD_ENUM: case TOKEN_KEYWORD_UNION: return parseContainerDeclAuto(p); case TOKEN_KEYWORD_COMPTIME: return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_COMPTIME, .main_token = nextToken(p), .data = { .lhs = parseTypeExpr(p), .rhs = 0 }, }); case TOKEN_MULTILINE_STRING_LITERAL_LINE: { const AstTokenIndex first = nextToken(p); AstTokenIndex last = first; while (p->token_tags[p->tok_i] == TOKEN_MULTILINE_STRING_LITERAL_LINE) last = nextToken(p); return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_MULTILINE_STRING_LITERAL, .main_token = first, .data = { .lhs = first, .rhs = last }, }); } case TOKEN_IDENTIFIER: return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_IDENTIFIER, .main_token = nextToken(p), .data = {}, }); case TOKEN_KEYWORD_INLINE: case TOKEN_KEYWORD_FOR: case TOKEN_KEYWORD_WHILE: case TOKEN_PERIOD: switch (p->token_tags[p->tok_i + 1]) { case TOKEN_IDENTIFIER: { const AstTokenIndex dot = nextToken(p); return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_ENUM_LITERAL, .main_token = nextToken(p), .data = { .lhs = dot, .rhs = 0 }, }); } case TOKEN_L_BRACE: { // Anonymous struct/array init: .{ ... } const AstTokenIndex lbrace = p->tok_i + 1; p->tok_i = lbrace + 1; return parseInitList(p, null_node, lbrace); } default: fprintf(stderr, "parsePrimaryTypeExpr: unsupported period suffix %s\n", tokenizerGetTagString(p->token_tags[p->tok_i + 1])); exit(1); } return 0; // tcc case TOKEN_KEYWORD_ERROR: fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); exit(1); case TOKEN_L_PAREN: { const AstTokenIndex lparen = nextToken(p); const AstNodeIndex inner = expectExpr(p); const AstTokenIndex rparen = expectToken(p, TOKEN_R_PAREN); return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_GROUPED_EXPRESSION, .main_token = lparen, .data = { .lhs = inner, .rhs = rparen }, }); } default: return null_node; } } static AstNodeIndex parseSuffixOp(Parser* p, AstNodeIndex lhs) { const TokenizerTag tok = p->token_tags[p->tok_i]; switch (tok) { case TOKEN_L_BRACKET: { const AstTokenIndex lbracket = nextToken(p); const AstNodeIndex index_expr = expectExpr(p); switch (p->token_tags[p->tok_i]) { case TOKEN_R_BRACKET: p->tok_i++; return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_ARRAY_ACCESS, .main_token = lbracket, .data = { .lhs = lhs, .rhs = index_expr }, }); case TOKEN_ELLIPSIS2: { p->tok_i++; // consume .. const AstNodeIndex end_expr = parseExpr(p); if (eatToken(p, TOKEN_COLON) != null_token) { const AstNodeIndex sentinel = expectExpr(p); expectToken(p, TOKEN_R_BRACKET); // end_expr 0 means "no end" — encode as ~0 for // OptionalIndex.none const AstNodeIndex opt_end = end_expr == 0 ? ~(AstNodeIndex)0 : end_expr; return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_SLICE_SENTINEL, .main_token = lbracket, .data = { .lhs = lhs, .rhs = addExtra(p, (AstNodeIndex[]) { index_expr, opt_end, sentinel }, 3), }, }); } expectToken(p, TOKEN_R_BRACKET); if (end_expr == 0) { return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_SLICE_OPEN, .main_token = lbracket, .data = { .lhs = lhs, .rhs = index_expr }, }); } return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_SLICE, .main_token = lbracket, .data = { .lhs = lhs, .rhs = addExtra(p, (AstNodeIndex[]) { index_expr, end_expr }, 2), }, }); } default: fprintf( stderr, "parseSuffixOp: expected ] or .. after index expr\n"); exit(1); } return 0; // tcc } case TOKEN_PERIOD_ASTERISK: case TOKEN_INVALID_PERIODASTERISKS: fprintf(stderr, "parseSuffixOp does not support %s\n", tokenizerGetTagString(tok)); exit(1); case TOKEN_PERIOD: if (p->token_tags[p->tok_i + 1] == TOKEN_IDENTIFIER) { const AstTokenIndex dot = nextToken(p); return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_FIELD_ACCESS, .main_token = dot, .data = { .lhs = lhs, .rhs = nextToken(p) }, }); } if (p->token_tags[p->tok_i + 1] == TOKEN_ASTERISK) { const AstTokenIndex dot = nextToken(p); nextToken(p); // consume the * return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_DEREF, .main_token = dot, .data = { .lhs = lhs, .rhs = 0 }, }); } if (p->token_tags[p->tok_i + 1] == TOKEN_QUESTION_MARK) { const AstTokenIndex dot = nextToken(p); return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_UNWRAP_OPTIONAL, .main_token = dot, .data = { .lhs = lhs, .rhs = nextToken(p) }, }); } fprintf(stderr, "parseSuffixOp: unsupported period suffix\n"); exit(1); return 0; // tcc default: return null_node; } } static AstNodeIndex parseSuffixExpr(Parser* p) { if (eatToken(p, TOKEN_KEYWORD_ASYNC) != null_token) { fprintf(stderr, "async not supported\n"); exit(1); } AstNodeIndex res = parsePrimaryTypeExpr(p); if (res == 0) return res; while (true) { const AstNodeIndex suffix_op = parseSuffixOp(p, res); if (suffix_op != 0) { res = suffix_op; continue; } const AstTokenIndex lparen = eatToken(p, TOKEN_L_PAREN); if (lparen == null_token) return res; CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p); while (true) { if (eatToken(p, TOKEN_R_PAREN) != null_token) break; const AstNodeIndex arg = expectExpr(p); SLICE_APPEND(AstNodeIndex, &p->scratch, arg); if (p->token_tags[p->tok_i] == TOKEN_COMMA) { p->tok_i++; continue; } expectToken(p, TOKEN_R_PAREN); break; } const bool comma = p->token_tags[p->tok_i - 2] == TOKEN_COMMA; const uint32_t params_len = p->scratch.len - scratch_top.old_len; switch (params_len) { case 0: return addNode( &p->nodes, (AstNodeItem) { .tag = comma ? AST_NODE_CALL_ONE_COMMA : AST_NODE_CALL_ONE, .main_token = lparen, .data = { .lhs = res, .rhs = 0, }, }); case 1: return addNode( &p->nodes, (AstNodeItem) { .tag = comma ? AST_NODE_CALL_ONE_COMMA : AST_NODE_CALL_ONE, .main_token = lparen, .data = { .lhs = res, .rhs = p->scratch.arr[scratch_top.old_len], }, }); default:; const AstSubRange span = listToSpan( p, &p->scratch.arr[scratch_top.old_len], params_len); return addNode( &p->nodes, (AstNodeItem) { .tag = comma ? AST_NODE_CALL_COMMA : AST_NODE_CALL, .main_token = lparen, .data = { .lhs = res, .rhs = addExtra(p, (AstNodeIndex[]) { span.start, span.end, }, 2), }, }); } } } static AstTokenIndex expectToken(Parser* p, TokenizerTag tag) { if (p->token_tags[p->tok_i] == tag) { return nextToken(p); } else { fprintf(stderr, "expected token %s, got %s\n", tokenizerGetTagString(tag), tokenizerGetTagString(p->token_tags[p->tok_i])); exit(1); } return 0; // tcc } static AstNodeIndex expectSemicolon(Parser* p) { return expectToken(p, TOKEN_SEMICOLON); } static AstNodeIndex parseErrorUnionExpr(Parser* p) { const AstNodeIndex suffix_expr = parseSuffixExpr(p); if (suffix_expr == 0) return null_node; const AstNodeIndex bang = eatToken(p, TOKEN_BANG); if (bang == null_token) return suffix_expr; return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_ERROR_UNION, .main_token = bang, .data = { .lhs = suffix_expr, .rhs = parseTypeExpr(p), }, }); } static AstNodeIndex parseTypeExpr(Parser* p) { const TokenizerTag tok = p->token_tags[p->tok_i]; switch (tok) { case TOKEN_QUESTION_MARK: return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_OPTIONAL_TYPE, .main_token = nextToken(p), .data = { .lhs = parseTypeExpr(p), .rhs = 0 }, }); case TOKEN_KEYWORD_ANYFRAME: fprintf(stderr, "parseTypeExpr not supported for %s\n", tokenizerGetTagString(tok)); exit(1); case TOKEN_ASTERISK: { const AstTokenIndex asterisk = nextToken(p); const AstNodeIndex align_expr = parseByteAlign(p); const AstNodeIndex sentinel = eatToken(p, TOKEN_COLON) != null_token ? parseExpr(p) : 0; // const/volatile/allowzero are pointer modifiers consumed here. // They are not stored in the AST node; the renderer re-derives // them from token positions. while (p->token_tags[p->tok_i] == TOKEN_KEYWORD_CONST || p->token_tags[p->tok_i] == TOKEN_KEYWORD_VOLATILE || p->token_tags[p->tok_i] == TOKEN_KEYWORD_ALLOWZERO) p->tok_i++; const AstNodeIndex child_type = parseTypeExpr(p); if (sentinel != 0) { return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_PTR_TYPE_SENTINEL, .main_token = asterisk, .data = { .lhs = sentinel, .rhs = child_type }, }); } if (align_expr != 0) { return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_PTR_TYPE_ALIGNED, .main_token = asterisk, .data = { .lhs = align_expr, .rhs = child_type }, }); } return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_PTR_TYPE_ALIGNED, .main_token = asterisk, .data = { .lhs = 0, .rhs = child_type }, }); } case TOKEN_ASTERISK_ASTERISK: fprintf(stderr, "parseTypeExpr not supported for %s\n", tokenizerGetTagString(tok)); exit(1); case TOKEN_L_BRACKET: { const AstTokenIndex lbracket = nextToken(p); if (p->token_tags[p->tok_i] == TOKEN_ASTERISK) { fprintf( stderr, "parseTypeExpr: [*] pointer types not implemented\n"); exit(1); } const AstNodeIndex len_expr = parseExpr(p); const AstNodeIndex sentinel = eatToken(p, TOKEN_COLON) != null_token ? expectExpr(p) : 0; expectToken(p, TOKEN_R_BRACKET); if (len_expr == 0) { // Slice type: []T or [:s]T // const/volatile/allowzero are pointer modifiers consumed here. // They are not stored in the AST node; the renderer re-derives // them from token positions. while (p->token_tags[p->tok_i] == TOKEN_KEYWORD_CONST || p->token_tags[p->tok_i] == TOKEN_KEYWORD_VOLATILE || p->token_tags[p->tok_i] == TOKEN_KEYWORD_ALLOWZERO) p->tok_i++; const AstNodeIndex elem_type = parseTypeExpr(p); if (sentinel != 0) { return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_PTR_TYPE_SENTINEL, .main_token = lbracket, .data = { .lhs = sentinel, .rhs = elem_type }, }); } return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_PTR_TYPE_ALIGNED, .main_token = lbracket, .data = { .lhs = 0, .rhs = elem_type }, }); } // Array type: [N]T or [N:s]T const AstNodeIndex elem_type = parseTypeExpr(p); if (sentinel == 0) { return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_ARRAY_TYPE, .main_token = lbracket, .data = { .lhs = len_expr, .rhs = elem_type }, }); } return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_ARRAY_TYPE_SENTINEL, .main_token = lbracket, .data = { .lhs = len_expr, .rhs = addExtra(p, (AstNodeIndex[]) { sentinel, elem_type }, 2), }, }); } default: return parseErrorUnionExpr(p); } return 0; // tcc } static SmallSpan parseParamDeclList(Parser* p) { expectToken(p, TOKEN_L_PAREN); CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p); while (true) { if (eatToken(p, TOKEN_R_PAREN) != null_token) break; eatDocComments(p); // Check for comptime or noalias eatToken(p, TOKEN_KEYWORD_COMPTIME); eatToken(p, TOKEN_KEYWORD_NOALIAS); // Check for name: type or just type if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER && p->token_tags[p->tok_i + 1] == TOKEN_COLON) { p->tok_i += 2; // consume name and colon } else if (p->token_tags[p->tok_i] == TOKEN_ELLIPSIS3) { // anytype (...) varargs p->tok_i++; if (eatToken(p, TOKEN_R_PAREN) != null_token) break; expectToken(p, TOKEN_COMMA); continue; } const AstNodeIndex type_expr = parseTypeExpr(p); if (type_expr != 0) SLICE_APPEND(AstNodeIndex, &p->scratch, type_expr); if (p->token_tags[p->tok_i] == TOKEN_COMMA) { p->tok_i++; continue; } expectToken(p, TOKEN_R_PAREN); break; } const uint32_t params_len = p->scratch.len - scratch_top.old_len; switch (params_len) { case 0: return (SmallSpan) { .tag = SMALL_SPAN_ZERO_OR_ONE, .payload = { .zero_or_one = 0 }, }; case 1: return (SmallSpan) { .tag = SMALL_SPAN_ZERO_OR_ONE, .payload = { .zero_or_one = p->scratch.arr[scratch_top.old_len] }, }; default:; const AstSubRange span = listToSpan(p, &p->scratch.arr[scratch_top.old_len], params_len); return (SmallSpan) { .tag = SMALL_SPAN_MULTI, .payload = { .multi = span }, }; } } static uint32_t reserveNode(Parser* p, AstNodeTag tag) { astNodeListEnsureCapacity(&p->nodes, 1); p->nodes.len++; p->nodes.tags[p->nodes.len - 1] = tag; return p->nodes.len - 1; } static AstNodeIndex parseFnProto(Parser* p) { AstTokenIndex fn_token = eatToken(p, TOKEN_KEYWORD_FN); if (fn_token == null_token) return null_node; AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_FN_PROTO); eatToken(p, TOKEN_IDENTIFIER); SmallSpan params = parseParamDeclList(p); const AstNodeIndex align_expr = parseByteAlign(p); const AstNodeIndex addrspace_expr = parseAddrSpace(p); const AstNodeIndex section_expr = parseLinkSection(p); const AstNodeIndex callconv_expr = parseCallconv(p); eatToken(p, TOKEN_BANG); const AstNodeIndex return_type_expr = parseTypeExpr(p); if (align_expr == 0 && section_expr == 0 && callconv_expr == 0 && addrspace_expr == 0) { switch (params.tag) { case SMALL_SPAN_ZERO_OR_ONE: return setNode(p, fn_proto_index, (AstNodeItem) { .tag = AST_NODE_FN_PROTO_SIMPLE, .main_token = fn_token, .data = { .lhs = params.payload.zero_or_one, .rhs = return_type_expr, }, }); case SMALL_SPAN_MULTI: return setNode(p, fn_proto_index, (AstNodeItem) { .tag = AST_NODE_FN_PROTO_MULTI, .main_token = fn_token, .data = { .lhs = addExtra(p, (AstNodeIndex[]) { params.payload.multi.start, params.payload.multi.end }, 2), .rhs = return_type_expr, }, }); } } // Complex fn proto with align/section/callconv/addrspace // Extra data fields are OptionalIndex: 0 → ~0 (none) #define OPT(x) ((x) == 0 ? ~(AstNodeIndex)0 : (x)) switch (params.tag) { case SMALL_SPAN_ZERO_OR_ONE: return setNode(p, fn_proto_index, (AstNodeItem) { .tag = AST_NODE_FN_PROTO_ONE, .main_token = fn_token, .data = { .lhs = addExtra(p, (AstNodeIndex[]) { OPT(params.payload.zero_or_one), OPT(align_expr), OPT(addrspace_expr), OPT(section_expr), OPT(callconv_expr) }, 5), .rhs = return_type_expr, }, }); case SMALL_SPAN_MULTI: return setNode(p, fn_proto_index, (AstNodeItem) { .tag = AST_NODE_FN_PROTO, .main_token = fn_token, .data = { .lhs = addExtra(p, (AstNodeIndex[]) { params.payload.multi.start, params.payload.multi.end, OPT(align_expr), OPT(addrspace_expr), OPT(section_expr), OPT(callconv_expr) }, 6), .rhs = return_type_expr, }, }); } #undef OPT return 0; // tcc } static AstTokenIndex parseBlockLabel(Parser* p) { if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER && p->token_tags[p->tok_i + 1] == TOKEN_COLON) { const AstTokenIndex identifier = p->tok_i; p->tok_i += 2; return identifier; } return null_node; } static AstNodeIndex parseForStatement(Parser* p) { const AstNodeIndex for_token = eatToken(p, TOKEN_KEYWORD_FOR); if (for_token == null_token) return null_node; (void)for_token; fprintf(stderr, "parseForStatement cannot parse for statements\n"); return 0; // tcc } static AstNodeIndex parseWhileStatement(Parser* p) { const AstNodeIndex while_token = eatToken(p, TOKEN_KEYWORD_WHILE); if (while_token == null_token) return null_node; (void)while_token; fprintf(stderr, "parseWhileStatement cannot parse while statements\n"); return 0; // tcc } static AstNodeIndex parseLoopStatement(Parser* p) { const AstTokenIndex inline_token = eatToken(p, TOKEN_KEYWORD_INLINE); const AstNodeIndex for_statement = parseForStatement(p); if (for_statement != 0) return for_statement; const AstNodeIndex while_statement = parseWhileStatement(p); if (while_statement != 0) return while_statement; if (inline_token == null_token) return null_node; fprintf( stderr, "seen 'inline', there should have been a 'for' or 'while'\n"); exit(1); return 0; // tcc } static AstNodeIndex parseAssignExpr(Parser* p) { (void)p; fprintf(stderr, "parseAssignExpr not implemented\n"); exit(1); return 0; // tcc } static AstNodeIndex parseVarDeclProto(Parser* p) { AstTokenIndex mut_token; if ((mut_token = eatToken(p, TOKEN_KEYWORD_CONST)) == null_token) if ((mut_token = eatToken(p, TOKEN_KEYWORD_VAR)) == null_token) return null_node; expectToken(p, TOKEN_IDENTIFIER); const AstNodeIndex type_node = eatToken(p, TOKEN_COLON) == null_token ? 0 : parseTypeExpr(p); const AstNodeIndex align_node = parseByteAlign(p); const AstNodeIndex addrspace_node = parseAddrSpace(p); const AstNodeIndex section_node = parseLinkSection(p); if (section_node == 0 && addrspace_node == 0) { if (align_node == 0) { return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_SIMPLE_VAR_DECL, .main_token = mut_token, .data = { .lhs = type_node, .rhs = 0, }, }); } fprintf(stderr, "parseVarDecl got something too complicated\n"); exit(1); } else { fprintf(stderr, "parseVarDecl got something too complicated\n"); exit(1); } return 0; // tcc } static AstTokenIndex parseBreakLabel(Parser* p) { if (eatToken(p, TOKEN_COLON) == null_token) return null_token; return expectToken(p, TOKEN_IDENTIFIER); } // parseFieldInit tries to parse .field_name = expr; returns 0 if not a // field init static AstNodeIndex parseFieldInit(Parser* p) { if (p->token_tags[p->tok_i] == TOKEN_PERIOD && p->token_tags[p->tok_i + 1] == TOKEN_IDENTIFIER && p->token_tags[p->tok_i + 2] == TOKEN_EQUAL) { p->tok_i += 3; return expectExpr(p); } return null_node; } // parseInitList parses the contents of { ... } for struct/array init. // lhs is the type expression (0 for anonymous .{...}). // lbrace is the lbrace token index. static AstNodeIndex parseInitList( Parser* p, AstNodeIndex lhs, AstTokenIndex lbrace) { CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p); const AstNodeIndex field_init = parseFieldInit(p); if (field_init != 0) { // Struct init SLICE_APPEND(AstNodeIndex, &p->scratch, field_init); while (true) { if (p->token_tags[p->tok_i] == TOKEN_COMMA) p->tok_i++; else if (p->token_tags[p->tok_i] == TOKEN_R_BRACE) { p->tok_i++; break; } else { fprintf( stderr, "parseInitList: expected , or } in struct init\n"); exit(1); } if (eatToken(p, TOKEN_R_BRACE) != null_token) break; const AstNodeIndex next = parseFieldInit(p); assert(next != 0); SLICE_APPEND(AstNodeIndex, &p->scratch, next); } const bool comma = p->token_tags[p->tok_i - 2] == TOKEN_COMMA; const uint32_t inits_len = p->scratch.len - scratch_top.old_len; if (lhs == 0) { // Anonymous struct init: .{...} switch (inits_len) { case 0: case 1: case 2: return addNode(&p->nodes, (AstNodeItem) { .tag = comma ? AST_NODE_STRUCT_INIT_DOT_TWO_COMMA : AST_NODE_STRUCT_INIT_DOT_TWO, .main_token = lbrace, .data = { .lhs = inits_len >= 1 ? p->scratch.arr[scratch_top.old_len] : 0, .rhs = inits_len >= 2 ? p->scratch.arr[scratch_top.old_len + 1] : 0, }, }); default:; const AstSubRange span = listToSpan( p, &p->scratch.arr[scratch_top.old_len], inits_len); return addNode(&p->nodes, (AstNodeItem) { .tag = comma ? AST_NODE_STRUCT_INIT_DOT_COMMA : AST_NODE_STRUCT_INIT_DOT, .main_token = lbrace, .data = { .lhs = span.start, .rhs = span.end }, }); } } // Named struct init: X{...} switch (inits_len) { case 0: case 1: return addNode(&p->nodes, (AstNodeItem) { .tag = comma ? AST_NODE_STRUCT_INIT_ONE_COMMA : AST_NODE_STRUCT_INIT_ONE, .main_token = lbrace, .data = { .lhs = lhs, .rhs = inits_len >= 1 ? p->scratch.arr[scratch_top.old_len] : 0, }, }); default:; const AstSubRange span = listToSpan( p, &p->scratch.arr[scratch_top.old_len], inits_len); return addNode(&p->nodes, (AstNodeItem) { .tag = comma ? AST_NODE_STRUCT_INIT_COMMA : AST_NODE_STRUCT_INIT, .main_token = lbrace, .data = { .lhs = lhs, .rhs = addExtra(p, (AstNodeIndex[]) { span.start, span.end }, 2), }, }); } } // Array init or empty init while (true) { if (eatToken(p, TOKEN_R_BRACE) != null_token) break; const AstNodeIndex elem = expectExpr(p); SLICE_APPEND(AstNodeIndex, &p->scratch, elem); if (p->token_tags[p->tok_i] == TOKEN_COMMA) p->tok_i++; else if (p->token_tags[p->tok_i] == TOKEN_R_BRACE) { p->tok_i++; break; } else { fprintf(stderr, "parseInitList: expected , or } in array init\n"); exit(1); } } const bool comma = p->token_tags[p->tok_i - 2] == TOKEN_COMMA; const uint32_t elems_len = p->scratch.len - scratch_top.old_len; if (lhs == 0) { // Anonymous array init: .{a, b, ...} switch (elems_len) { case 0: case 1: case 2: return addNode(&p->nodes, (AstNodeItem) { .tag = comma ? AST_NODE_ARRAY_INIT_DOT_TWO_COMMA : AST_NODE_ARRAY_INIT_DOT_TWO, .main_token = lbrace, .data = { .lhs = elems_len >= 1 ? p->scratch.arr[scratch_top.old_len] : 0, .rhs = elems_len >= 2 ? p->scratch.arr[scratch_top.old_len + 1] : 0, }, }); default:; const AstSubRange span = listToSpan( p, &p->scratch.arr[scratch_top.old_len], elems_len); return addNode(&p->nodes, (AstNodeItem) { .tag = comma ? AST_NODE_ARRAY_INIT_DOT_COMMA : AST_NODE_ARRAY_INIT_DOT, .main_token = lbrace, .data = { .lhs = span.start, .rhs = span.end }, }); } } // Named init: X{a, b, ...} switch (elems_len) { case 0: // Empty init X{} — treat as struct init return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_STRUCT_INIT_ONE, .main_token = lbrace, .data = { .lhs = lhs, .rhs = 0 }, }); case 1: return addNode(&p->nodes, (AstNodeItem) { .tag = comma ? AST_NODE_ARRAY_INIT_ONE_COMMA : AST_NODE_ARRAY_INIT_ONE, .main_token = lbrace, .data = { .lhs = lhs, .rhs = p->scratch.arr[scratch_top.old_len], }, }); default:; const AstSubRange span = listToSpan(p, &p->scratch.arr[scratch_top.old_len], elems_len); return addNode(&p->nodes, (AstNodeItem) { .tag = comma ? AST_NODE_ARRAY_INIT_COMMA : AST_NODE_ARRAY_INIT, .main_token = lbrace, .data = { .lhs = lhs, .rhs = addExtra(p, (AstNodeIndex[]) { span.start, span.end }, 2), }, }); } } static AstNodeIndex parseCurlySuffixExpr(Parser* p) { const AstNodeIndex lhs = parseTypeExpr(p); if (lhs == 0) return null_node; const AstTokenIndex lbrace = eatToken(p, TOKEN_L_BRACE); if (lbrace == null_token) return lhs; return parseInitList(p, lhs, lbrace); } typedef struct { int8_t prec; AstNodeTag tag; enum { ASSOC_LEFT, ASSOC_NONE, } assoc; } OperInfo; static OperInfo operTable(TokenizerTag tok_tag) { switch (tok_tag) { case TOKEN_KEYWORD_OR: return (OperInfo) { .prec = 10, .tag = AST_NODE_BOOL_OR }; case TOKEN_KEYWORD_AND: return (OperInfo) { .prec = 20, .tag = AST_NODE_BOOL_AND }; case TOKEN_EQUAL_EQUAL: return (OperInfo) { .prec = 30, .tag = AST_NODE_EQUAL_EQUAL, .assoc = ASSOC_NONE }; case TOKEN_BANG_EQUAL: return (OperInfo) { .prec = 30, .tag = AST_NODE_BANG_EQUAL, .assoc = ASSOC_NONE }; case TOKEN_ANGLE_BRACKET_LEFT: return (OperInfo) { .prec = 30, .tag = AST_NODE_LESS_THAN, .assoc = ASSOC_NONE }; case TOKEN_ANGLE_BRACKET_RIGHT: return (OperInfo) { .prec = 30, .tag = AST_NODE_GREATER_THAN, .assoc = ASSOC_NONE }; case TOKEN_ANGLE_BRACKET_LEFT_EQUAL: return (OperInfo) { .prec = 30, .tag = AST_NODE_LESS_OR_EQUAL, .assoc = ASSOC_NONE }; case TOKEN_ANGLE_BRACKET_RIGHT_EQUAL: return (OperInfo) { .prec = 30, .tag = AST_NODE_GREATER_OR_EQUAL, .assoc = ASSOC_NONE }; case TOKEN_AMPERSAND: return (OperInfo) { .prec = 40, .tag = AST_NODE_BIT_AND }; case TOKEN_CARET: return (OperInfo) { .prec = 40, .tag = AST_NODE_BIT_XOR }; case TOKEN_PIPE: return (OperInfo) { .prec = 40, .tag = AST_NODE_BIT_OR }; case TOKEN_KEYWORD_ORELSE: return (OperInfo) { .prec = 40, .tag = AST_NODE_ORELSE }; case TOKEN_KEYWORD_CATCH: return (OperInfo) { .prec = 40, .tag = AST_NODE_CATCH }; case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT: return (OperInfo) { .prec = 50, .tag = AST_NODE_SHL }; case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE: return (OperInfo) { .prec = 50, .tag = AST_NODE_SHL_SAT }; case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT: return (OperInfo) { .prec = 50, .tag = AST_NODE_SHR }; case TOKEN_PLUS: return (OperInfo) { .prec = 60, .tag = AST_NODE_ADD }; case TOKEN_MINUS: return (OperInfo) { .prec = 60, .tag = AST_NODE_SUB }; case TOKEN_PLUS_PLUS: return (OperInfo) { .prec = 60, .tag = AST_NODE_ARRAY_CAT }; case TOKEN_PLUS_PERCENT: return (OperInfo) { .prec = 60, .tag = AST_NODE_ADD_WRAP }; case TOKEN_MINUS_PERCENT: return (OperInfo) { .prec = 60, .tag = AST_NODE_SUB_WRAP }; case TOKEN_PLUS_PIPE: return (OperInfo) { .prec = 60, .tag = AST_NODE_ADD_SAT }; case TOKEN_MINUS_PIPE: return (OperInfo) { .prec = 60, .tag = AST_NODE_SUB_SAT }; case TOKEN_PIPE_PIPE: return (OperInfo) { .prec = 70, .tag = AST_NODE_MERGE_ERROR_SETS }; case TOKEN_ASTERISK: return (OperInfo) { .prec = 70, .tag = AST_NODE_MUL }; case TOKEN_SLASH: return (OperInfo) { .prec = 70, .tag = AST_NODE_DIV }; case TOKEN_PERCENT: return (OperInfo) { .prec = 70, .tag = AST_NODE_MOD }; case TOKEN_ASTERISK_ASTERISK: return (OperInfo) { .prec = 70, .tag = AST_NODE_ARRAY_MULT }; case TOKEN_ASTERISK_PERCENT: return (OperInfo) { .prec = 70, .tag = AST_NODE_MUL_WRAP }; case TOKEN_ASTERISK_PIPE: return (OperInfo) { .prec = 70, .tag = AST_NODE_MUL_SAT }; default: return (OperInfo) { .prec = -1, .tag = AST_NODE_ROOT }; } } static AstNodeIndex parseExprPrecedence(Parser* p, int32_t min_prec) { assert(min_prec >= 0); AstNodeIndex node = parsePrefixExpr(p); if (node == 0) return null_node; int8_t banned_prec = -1; while (true) { const TokenizerTag tok_tag = p->token_tags[p->tok_i]; const OperInfo info = operTable(tok_tag); if (info.prec < min_prec) break; assert(info.prec != banned_prec); const AstTokenIndex oper_token = nextToken(p); if (tok_tag == TOKEN_KEYWORD_CATCH) { fprintf(stderr, "parsePayload not supported\n"); exit(1); return 0; // tcc } const AstNodeIndex rhs = parseExprPrecedence(p, info.prec + 1); assert(rhs != 0); node = addNode( &p->nodes, (AstNodeItem) { .tag = info.tag, .main_token = oper_token, .data = { .lhs = node, .rhs = rhs, }, }); if (info.assoc == ASSOC_NONE) banned_prec = info.prec; } return node; } static AstNodeIndex parseExpr(Parser* p) { return parseExprPrecedence(p, 0); } static AstNodeIndex expectExpr(Parser* p) { const AstNodeIndex node = parseExpr(p); assert(node != 0); return node; } static AstNodeIndex parsePrimaryExpr(Parser* p) { const char* tok = tokenizerGetTagString(p->token_tags[p->tok_i]); switch (p->token_tags[p->tok_i]) { case TOKEN_KEYWORD_ASM: case TOKEN_KEYWORD_IF: fprintf(stderr, "parsePrimaryExpr does not implement %s\n", tok); exit(1); break; case TOKEN_KEYWORD_BREAK: return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_BREAK, .main_token = nextToken(p), .data = { .lhs = parseBreakLabel(p), .rhs = parseExpr(p), }, }); case TOKEN_KEYWORD_CONTINUE: return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_CONTINUE, .main_token = nextToken(p), .data = { .lhs = parseBreakLabel(p), .rhs = parseExpr(p), }, }); case TOKEN_KEYWORD_COMPTIME: return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_COMPTIME, .main_token = nextToken(p), .data = { .lhs = expectExpr(p), .rhs = 0 }, }); case TOKEN_KEYWORD_NOSUSPEND: return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_NOSUSPEND, .main_token = nextToken(p), .data = { .lhs = expectExpr(p), .rhs = 0 }, }); case TOKEN_KEYWORD_RESUME: return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_RESUME, .main_token = nextToken(p), .data = { .lhs = expectExpr(p), .rhs = 0 }, }); case TOKEN_KEYWORD_RETURN: return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_RETURN, .main_token = nextToken(p), .data = { .lhs = parseExpr(p), .rhs = 0 }, }); case TOKEN_IDENTIFIER: if (p->token_tags[p->tok_i + 1] == TOKEN_COLON) { switch (p->token_tags[p->tok_i + 2]) { case TOKEN_KEYWORD_INLINE: case TOKEN_KEYWORD_FOR: case TOKEN_KEYWORD_WHILE: fprintf(stderr, "parsePrimaryExpr NotImplemented\n"); exit(1); return 0; // tcc case TOKEN_L_BRACE: p->tok_i += 2; return parseBlock(p); default: return parseCurlySuffixExpr(p); } } else { return parseCurlySuffixExpr(p); } case TOKEN_KEYWORD_INLINE: case TOKEN_KEYWORD_FOR: case TOKEN_KEYWORD_WHILE: fprintf(stderr, "parsePrimaryExpr does not implement %s\n", tok); exit(1); return 0; // tcc case TOKEN_L_BRACE: return parseBlock(p); default: return parseCurlySuffixExpr(p); } return 0; // tcc } static AstNodeIndex parsePrefixExpr(Parser* p) { AstNodeTag tag; switch (p->token_tags[p->tok_i]) { case TOKEN_BANG: tag = AST_NODE_BOOL_NOT; break; case TOKEN_MINUS: tag = AST_NODE_NEGATION; break; case TOKEN_TILDE: tag = AST_NODE_BIT_NOT; break; case TOKEN_MINUS_PERCENT: tag = AST_NODE_NEGATION_WRAP; break; case TOKEN_AMPERSAND: tag = AST_NODE_ADDRESS_OF; break; case TOKEN_KEYWORD_TRY: tag = AST_NODE_TRY; break; case TOKEN_KEYWORD_AWAIT: tag = AST_NODE_AWAIT; break; default: return parsePrimaryExpr(p); } return addNode( &p->nodes, (AstNodeItem) { .tag = tag, .main_token = nextToken(p), .data = { .lhs = parsePrefixExpr(p), .rhs = 0, }, }); } static AstNodeIndex expectVarDeclExprStatement(Parser* p) { CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p); while (true) { const AstNodeIndex var_decl_proto = parseVarDeclProto(p); if (var_decl_proto != 0) { SLICE_APPEND(AstNodeIndex, &p->scratch, var_decl_proto); } else { const AstNodeIndex expr = parseExpr(p); SLICE_APPEND(AstNodeIndex, &p->scratch, expr); } if (eatToken(p, TOKEN_COMMA) == null_token) break; } const uint32_t lhs_count = p->scratch.len - scratch_top.old_len; assert(lhs_count > 0); if (lhs_count == 1) { const AstNodeIndex lhs = p->scratch.arr[scratch_top.old_len]; switch (p->token_tags[p->tok_i]) { case TOKEN_SEMICOLON: p->tok_i++; return lhs; case TOKEN_EQUAL: { // Check if lhs is a var decl that needs initialization const AstNodeTag lhs_tag = p->nodes.tags[lhs]; if (lhs_tag == AST_NODE_SIMPLE_VAR_DECL || lhs_tag == AST_NODE_ALIGNED_VAR_DECL || lhs_tag == AST_NODE_LOCAL_VAR_DECL || lhs_tag == AST_NODE_GLOBAL_VAR_DECL) { p->tok_i++; p->nodes.datas[lhs].rhs = expectExpr(p); expectSemicolon(p); return lhs; } // Regular assignment expression const AstTokenIndex eq_token = nextToken(p); const AstNodeIndex rhs = expectExpr(p); expectSemicolon(p); return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_ASSIGN, .main_token = eq_token, .data = { .lhs = lhs, .rhs = rhs }, }); } default: fprintf(stderr, "expectVarDeclExprStatement: assignment not implemented " "for token %s\n", tokenizerGetTagString(p->token_tags[p->tok_i])); exit(1); } } fprintf( stderr, "expectVarDeclExprStatement: destructuring not implemented\n"); exit(1); return 0; // tcc } static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) { const AstTokenIndex comptime_token = eatToken(p, TOKEN_KEYWORD_COMPTIME); if (comptime_token != null_token) { // comptime followed by block => comptime block statement const AstNodeIndex block = parseBlock(p); if (block != 0) { return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_COMPTIME, .main_token = comptime_token, .data = { .lhs = block, .rhs = 0 }, }); } // comptime var decl or expression if (allow_defer_var) { return expectVarDeclExprStatement(p); } fprintf( stderr, "expectStatement: comptime keyword not supported here\n"); exit(1); } const AstNodeIndex tok = p->token_tags[p->tok_i]; switch (tok) { case TOKEN_KEYWORD_NOSUSPEND: case TOKEN_KEYWORD_SUSPEND: case TOKEN_KEYWORD_DEFER: case TOKEN_KEYWORD_ERRDEFER: case TOKEN_KEYWORD_IF: case TOKEN_KEYWORD_ENUM: case TOKEN_KEYWORD_STRUCT: case TOKEN_KEYWORD_UNION:; const char* tok_str = tokenizerGetTagString(tok); fprintf( stderr, "expectStatement does not support keyword %s\n", tok_str); exit(1); default:; } const AstNodeIndex labeled_statement = parseLabeledStatement(p); if (labeled_statement != 0) return labeled_statement; if (allow_defer_var) { return expectVarDeclExprStatement(p); } else { const AstNodeIndex assign_expr = parseAssignExpr(p); expectSemicolon(p); return assign_expr; } } static AstNodeIndex parseBlock(Parser* p) { const AstNodeIndex lbrace = eatToken(p, TOKEN_L_BRACE); if (lbrace == null_token) return null_node; CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p); while (1) { if (p->token_tags[p->tok_i] == TOKEN_R_BRACE) break; // "const AstNodeIndex statement" once tinycc supports typeof_unqual // (C23) AstNodeIndex statement = expectStatement(p, true); if (statement == 0) break; SLICE_APPEND(AstNodeIndex, &p->scratch, statement); } expectToken(p, TOKEN_R_BRACE); const bool semicolon = (p->token_tags[p->tok_i - 2] == TOKEN_SEMICOLON); const uint32_t statements_len = p->scratch.len - scratch_top.old_len; switch (statements_len) { case 0: return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_BLOCK_TWO, .main_token = lbrace, .data = { .lhs = 0, .rhs = 0, }, }); case 1: return addNode( &p->nodes, (AstNodeItem) { .tag = semicolon ? AST_NODE_BLOCK_TWO_SEMICOLON : AST_NODE_BLOCK_TWO, .main_token = lbrace, .data = { .lhs = p->scratch.arr[scratch_top.old_len], .rhs = 0, }, }); case 2: return addNode( &p->nodes, (AstNodeItem) { .tag = semicolon ? AST_NODE_BLOCK_TWO_SEMICOLON : AST_NODE_BLOCK_TWO, .main_token = lbrace, .data = { .lhs = p->scratch.arr[scratch_top.old_len], .rhs = p->scratch.arr[scratch_top.old_len + 1], }, }); default:; const AstSubRange span = listToSpan( p, &p->scratch.arr[scratch_top.old_len], statements_len); return addNode( &p->nodes, (AstNodeItem) { .tag = semicolon ? AST_NODE_BLOCK_SEMICOLON : AST_NODE_BLOCK, .main_token = lbrace, .data = { .lhs = span.start, .rhs = span.end, }, }); } return 0; } static AstNodeIndex parseLabeledStatement(Parser* p) { const AstNodeIndex label_token = parseBlockLabel(p); const AstNodeIndex block = parseBlock(p); if (block != 0) return block; const AstNodeIndex loop_stmt = parseLoopStatement(p); if (loop_stmt != 0) return loop_stmt; if (label_token != 0) { fprintf(stderr, "parseLabeledStatement does not support labels\n"); exit(1); } return null_node; } static AstNodeIndex parseGlobalVarDecl(Parser* p) { const AstNodeIndex var_decl = parseVarDeclProto(p); if (var_decl == 0) { return null_node; } if (eatToken(p, TOKEN_EQUAL) != null_token) { const AstNodeIndex init_expr = expectExpr(p); p->nodes.datas[var_decl].rhs = init_expr; } expectToken(p, TOKEN_SEMICOLON); return var_decl; } static AstNodeIndex expectTopLevelDecl(Parser* p) { AstTokenIndex extern_export_inline_token = nextToken(p); switch (p->token_tags[extern_export_inline_token]) { case TOKEN_KEYWORD_EXTERN: eatToken(p, TOKEN_STRING_LITERAL); break; case TOKEN_KEYWORD_EXPORT: case TOKEN_KEYWORD_INLINE: case TOKEN_KEYWORD_NOINLINE: break; default: p->tok_i--; } AstNodeIndex fn_proto = parseFnProto(p); if (fn_proto != 0) { switch (p->token_tags[p->tok_i]) { case TOKEN_SEMICOLON: p->tok_i++; return fn_proto; case TOKEN_L_BRACE:; AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_FN_DECL); AstNodeIndex body_block = parseBlock(p); return setNode(p, fn_decl_index, (AstNodeItem) { .tag = AST_NODE_FN_DECL, .main_token = p->nodes.main_tokens[fn_proto], .data = { .lhs = fn_proto, .rhs = body_block }, }); default: exit(1); // Expected semicolon or left brace } } eatToken(p, TOKEN_KEYWORD_THREADLOCAL); AstNodeIndex var_decl = parseGlobalVarDecl(p); if (var_decl != 0) { return var_decl; } // assuming the program is correct... fprintf(stderr, "the next token should be usingnamespace, which is not supported\n"); exit(1); return 0; // make tcc happy } static void findNextContainerMember(Parser* p) { uint32_t level = 0; while (true) { AstTokenIndex tok = nextToken(p); switch (p->token_tags[tok]) { // Any of these can start a new top level declaration case TOKEN_KEYWORD_TEST: case TOKEN_KEYWORD_COMPTIME: case TOKEN_KEYWORD_PUB: case TOKEN_KEYWORD_EXPORT: case TOKEN_KEYWORD_EXTERN: case TOKEN_KEYWORD_INLINE: case TOKEN_KEYWORD_NOINLINE: case TOKEN_KEYWORD_USINGNAMESPACE: case TOKEN_KEYWORD_THREADLOCAL: case TOKEN_KEYWORD_CONST: case TOKEN_KEYWORD_VAR: case TOKEN_KEYWORD_FN: if (level == 0) { p->tok_i--; return; } break; case TOKEN_IDENTIFIER: if (p->token_tags[tok + 1] == TOKEN_COMMA && level == 0) { p->tok_i--; return; } break; case TOKEN_COMMA: case TOKEN_SEMICOLON: // This decl was likely meant to end here if (level == 0) return; break; case TOKEN_L_PAREN: case TOKEN_L_BRACKET: case TOKEN_L_BRACE: level++; break; case TOKEN_R_PAREN: case TOKEN_R_BRACKET: if (level != 0) level--; break; case TOKEN_R_BRACE: if (level == 0) { // end of container, exit p->tok_i--; return; } level--; break; case TOKEN_EOF: p->tok_i--; return; default: break; } } } static Members parseContainerMembers(Parser* p) { CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p); while (eatToken(p, TOKEN_CONTAINER_DOC_COMMENT) != null_token) ; FieldState field_state = { .tag = FIELD_STATE_NONE }; bool trailing = false; while (1) { eatDocComments(p); switch (p->token_tags[p->tok_i]) { case TOKEN_KEYWORD_TEST: { const AstTokenIndex test_token = nextToken(p); // test name can be a string literal or identifier, or omitted const AstTokenIndex test_name = (p->token_tags[p->tok_i] == TOKEN_STRING_LITERAL || p->token_tags[p->tok_i] == TOKEN_IDENTIFIER) ? nextToken(p) : null_token; const AstNodeIndex body = parseBlock(p); assert(body != 0); const AstNodeIndex test_decl = addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_TEST_DECL, .main_token = test_token, .data = { .lhs = test_name, .rhs = body }, }); SLICE_APPEND(AstNodeIndex, &p->scratch, test_decl); trailing = p->token_tags[p->tok_i - 1] == TOKEN_R_BRACE; break; } case TOKEN_KEYWORD_USINGNAMESPACE:; const char* str = tokenizerGetTagString(p->token_tags[p->tok_i]); fprintf( stderr, "%s not implemented in parseContainerMembers\n", str); exit(1); case TOKEN_KEYWORD_COMPTIME: // comptime can be a container field modifier or a comptime // block/decl. Check if it's followed by a block (comptime { ... // }). if (p->token_tags[p->tok_i + 1] == TOKEN_L_BRACE) { const AstTokenIndex comptime_token = nextToken(p); const AstNodeIndex block_node = parseBlock(p); SLICE_APPEND(AstNodeIndex, &p->scratch, addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_COMPTIME, .main_token = comptime_token, .data = { .lhs = block_node, .rhs = 0 }, })); trailing = p->token_tags[p->tok_i - 1] == TOKEN_R_BRACE; break; } // Otherwise it's a container field with comptime modifier goto container_field; case TOKEN_KEYWORD_PUB: { p->tok_i++; AstNodeIndex top_level_decl = expectTopLevelDecl(p); if (top_level_decl != 0) { if (field_state.tag == FIELD_STATE_SEEN) { field_state.tag = FIELD_STATE_END; field_state.payload.end = top_level_decl; } SLICE_APPEND(AstNodeIndex, &p->scratch, top_level_decl); } trailing = p->token_tags[p->tok_i - 1] == TOKEN_SEMICOLON; break; } case TOKEN_KEYWORD_CONST: case TOKEN_KEYWORD_VAR: case TOKEN_KEYWORD_THREADLOCAL: case TOKEN_KEYWORD_EXPORT: case TOKEN_KEYWORD_EXTERN: case TOKEN_KEYWORD_INLINE: case TOKEN_KEYWORD_NOINLINE: case TOKEN_KEYWORD_FN: { const AstNodeIndex top_level_decl = expectTopLevelDecl(p); if (top_level_decl != 0) { if (field_state.tag == FIELD_STATE_SEEN) { field_state.tag = FIELD_STATE_END; field_state.payload.end = top_level_decl; } SLICE_APPEND(AstNodeIndex, &p->scratch, top_level_decl); } trailing = (p->token_tags[p->tok_i - 1] == TOKEN_SEMICOLON); break; } case TOKEN_EOF: case TOKEN_R_BRACE: goto break_loop; container_field: default:; // skip parseCStyleContainer const AstNodeIndex field_node = expectContainerField(p); switch (field_state.tag) { case FIELD_STATE_NONE: field_state.tag = FIELD_STATE_SEEN; break; case FIELD_STATE_SEEN: break; case FIELD_STATE_END: fprintf(stderr, "parseContainerMembers error condition\n"); exit(1); } SLICE_APPEND(AstNodeIndex, &p->scratch, field_node); switch (p->token_tags[p->tok_i]) { case TOKEN_COMMA: p->tok_i++; trailing = true; continue; case TOKEN_R_BRACE: case TOKEN_EOF: trailing = false; goto break_loop; default:; } findNextContainerMember(p); continue; } } break_loop:; const uint32_t items_len = p->scratch.len - scratch_top.old_len; switch (items_len) { case 0: return (Members) { .len = 0, .lhs = 0, .rhs = 0, .trailing = trailing, }; case 1: return (Members) { .len = 1, .lhs = p->scratch.arr[scratch_top.old_len], .rhs = 0, .trailing = trailing, }; case 2: return (Members) { .len = 2, .lhs = p->scratch.arr[scratch_top.old_len], .rhs = p->scratch.arr[scratch_top.old_len + 1], .trailing = trailing, }; default:; const AstSubRange span = listToSpan(p, &p->scratch.arr[scratch_top.old_len], items_len); return (Members) { .len = items_len, .lhs = span.start, .rhs = span.end, .trailing = trailing, }; } } void parseRoot(Parser* p) { addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_ROOT, .main_token = 0 }); Members root_members = parseContainerMembers(p); AstSubRange root_decls = membersToSpan(root_members, p); p->nodes.datas[0].lhs = root_decls.start; p->nodes.datas[0].rhs = root_decls.end; }