#include #include #include #include #include "ast.h" #include "common.h" #include "parser.h" const AstNodeIndex null_node = 0; const AstTokenIndex null_token = ~(AstTokenIndex)(0); static AstNodeIndex parsePrefixExpr(Parser*); static AstNodeIndex parseTypeExpr(Parser*); static AstNodeIndex parseBlock(Parser* p); static AstNodeIndex parseLabeledStatement(Parser*); static AstNodeIndex parseExpr(Parser*); static AstNodeIndex expectExpr(Parser*); typedef struct { enum { FIELD_STATE_NONE, FIELD_STATE_SEEN, FIELD_STATE_END } tag; union { uint32_t end; } payload; } FieldState; typedef struct { enum { SMALL_SPAN_ZERO_OR_ONE, SMALL_SPAN_MULTI } tag; union { AstNodeIndex zero_or_one; AstSubRange multi; } payload; } SmallSpan; typedef struct { AstNodeIndexSlice* scratch; uint32_t old_len; } CleanupScratch; static CleanupScratch initCleanupScratch(Parser* p) { return (CleanupScratch) { .scratch = &p->scratch, .old_len = p->scratch.len, }; } static void cleanupScratch(CleanupScratch* c) { c->scratch->len = c->old_len; } static AstSubRange listToSpan( Parser* p, const AstNodeIndex* list, uint32_t count) { SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count); memcpy(p->extra_data.arr, list, count * sizeof(AstNodeIndex)); p->extra_data.len += count; return (AstSubRange) { .start = p->extra_data.len - count, .end = p->extra_data.len, }; } typedef struct { uint32_t len; AstNodeIndex lhs; AstNodeIndex rhs; bool trailing; } Members; static AstSubRange membersToSpan(const Members self, Parser* p) { if (self.len <= 2) { const AstNodeIndex nodes[] = { self.lhs, self.rhs }; return listToSpan(p, nodes, self.len); } else { return (AstSubRange) { .start = self.lhs, .end = self.rhs }; } } static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; } static AstTokenIndex eatToken(Parser* p, TokenizerTag tag) { if (p->token_tags[p->tok_i] == tag) { return nextToken(p); } else { return null_token; } } static AstTokenIndex assertToken(Parser* p, TokenizerTag tag) { const AstTokenIndex token = nextToken(p); assert(p->token_tags[token] == tag); return token; } static void eatDocComments(Parser* p) { while (eatToken(p, TOKEN_DOC_COMMENT) != null_token) { } } static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeItem item) { p->nodes.tags[i] = item.tag; p->nodes.main_tokens[i] = item.main_token; p->nodes.datas[i] = item.data; return i; } static void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional) { const uint32_t new_len = list->len + additional; if (new_len <= list->cap) { return; } const uint32_t new_cap = new_len > list->cap * 2 ? new_len : list->cap * 2; list->tags = realloc(list->tags, new_cap * sizeof(AstNodeTag)); list->main_tokens = realloc(list->main_tokens, new_cap * sizeof(AstTokenIndex)); list->datas = realloc(list->datas, new_cap * sizeof(AstData)); if (!list->tags || !list->main_tokens || !list->datas) exit(1); list->cap = new_cap; } static AstNodeIndex addNode(AstNodeList* nodes, AstNodeItem item) { astNodeListEnsureCapacity(nodes, 1); nodes->tags[nodes->len] = item.tag; nodes->main_tokens[nodes->len] = item.main_token; nodes->datas[nodes->len] = item.data; return nodes->len++; } static AstNodeIndex addExtra( Parser* p, const AstNodeIndex* extra, uint32_t count) { const AstNodeIndex result = p->extra_data.len; SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count); memcpy(p->extra_data.arr, extra, count * sizeof(AstNodeIndex)); return result; } static AstNodeIndex parseByteAlign(Parser* p) { if (eatToken(p, TOKEN_KEYWORD_ALIGN) == null_token) return null_node; fprintf(stderr, "parseByteAlign cannot parse alignment\n"); exit(1); return 0; // tcc } static AstNodeIndex parseAddrSpace(Parser* p) { if (eatToken(p, TOKEN_KEYWORD_ADDRSPACE) == null_token) return null_node; fprintf(stderr, "parseAddrSpace cannot parse addrspace\n"); exit(1); return 0; // tcc } static AstNodeIndex parseLinkSection(Parser* p) { if (eatToken(p, TOKEN_KEYWORD_LINKSECTION) == null_token) return null_node; fprintf(stderr, "parseLinkSection cannot parse linksection\n"); exit(1); return 0; // tcc } static AstNodeIndex parseCallconv(Parser* p) { if (eatToken(p, TOKEN_KEYWORD_CALLCONV) == null_token) return null_node; fprintf(stderr, "parseCallconv cannot parse callconv\n"); exit(1); return 0; // tcc } typedef struct { AstNodeIndex align_expr, value_expr; } NodeContainerField; static AstNodeIndex expectContainerField(Parser* p) { eatToken(p, TOKEN_KEYWORD_COMPTIME); const AstTokenIndex main_token = p->tok_i; if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER && p->token_tags[p->tok_i + 1] == TOKEN_COLON) p->tok_i += 2; const AstNodeIndex type_expr = parseTypeExpr(p); const AstNodeIndex align_expr = parseByteAlign(p); if (eatToken(p, TOKEN_EQUAL) != null_token) { fprintf(stderr, "expectContainerField does not support expr\n"); exit(1); } const AstNodeIndex value_expr = 0; if (align_expr == 0) { return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_CONTAINER_FIELD_INIT, .main_token = main_token, .data = { .lhs = type_expr, .rhs = value_expr, }, }); } else if (value_expr == 0) { return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_CONTAINER_FIELD_ALIGN, .main_token = main_token, .data = { .lhs = type_expr, .rhs = align_expr, }, }); } else { return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_CONTAINER_FIELD, .main_token = main_token, .data = { .lhs = type_expr, .rhs = addExtra(p, (AstNodeIndex[]) { align_expr, value_expr }, 2), }, }); } } static AstNodeIndex parseBuiltinCall(Parser* p) { const AstTokenIndex builtin_token = assertToken(p, TOKEN_BUILTIN); assertToken(p, TOKEN_L_PAREN); CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p); while (true) { if (eatToken(p, TOKEN_R_PAREN) != null_token) break; const AstNodeIndex param = expectExpr(p); SLICE_APPEND(AstNodeIndex, &p->scratch, param); switch (p->token_tags[p->tok_i]) { case TOKEN_COMMA: p->tok_i++; break; case TOKEN_R_PAREN: p->tok_i++; goto end_loop; default: fprintf(stderr, "expected comma after arg\n"); exit(1); } } end_loop:; const bool comma = (p->token_tags[p->tok_i - 2] == TOKEN_COMMA); const uint32_t params_len = p->scratch.len - scratch_top.old_len; switch (params_len) { case 0: return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_BUILTIN_CALL_TWO, .main_token = builtin_token, .data = { .lhs = 0, .rhs = 0, }, }); case 1: return addNode(&p->nodes, (AstNodeItem) { .tag = comma ? AST_NODE_BUILTIN_CALL_TWO_COMMA : AST_NODE_BUILTIN_CALL_TWO, .main_token = builtin_token, .data = { .lhs = p->scratch.arr[scratch_top.old_len], .rhs = 0, }, }); case 2: return addNode(&p->nodes, (AstNodeItem) { .tag = comma ? AST_NODE_BUILTIN_CALL_TWO_COMMA : AST_NODE_BUILTIN_CALL_TWO, .main_token = builtin_token, .data = { .lhs = p->scratch.arr[scratch_top.old_len], .rhs = p->scratch.arr[scratch_top.old_len+1], }, }); default:; const AstSubRange span = listToSpan(p, &p->scratch.arr[scratch_top.old_len], params_len); return addNode( &p->nodes, (AstNodeItem) { .tag = comma ? AST_NODE_BUILTIN_CALL_COMMA : AST_NODE_BUILTIN_CALL, .main_token = builtin_token, .data = { .lhs = span.start, .rhs = span.end, }, }); } } static AstNodeIndex parsePrimaryTypeExpr(Parser* p) { const TokenizerTag tok = p->token_tags[p->tok_i]; switch (tok) { case TOKEN_CHAR_LITERAL: case TOKEN_NUMBER_LITERAL: case TOKEN_KEYWORD_UNREACHABLE: case TOKEN_KEYWORD_ANYFRAME: fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); exit(1); case TOKEN_STRING_LITERAL: return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_STRING_LITERAL, .main_token = nextToken(p), .data = {}, }); case TOKEN_BUILTIN: return parseBuiltinCall(p); case TOKEN_KEYWORD_FN: case TOKEN_KEYWORD_IF: case TOKEN_KEYWORD_SWITCH: case TOKEN_KEYWORD_EXTERN: case TOKEN_KEYWORD_PACKED: case TOKEN_KEYWORD_STRUCT: case TOKEN_KEYWORD_OPAQUE: case TOKEN_KEYWORD_ENUM: case TOKEN_KEYWORD_UNION: case TOKEN_KEYWORD_COMPTIME: case TOKEN_MULTILINE_STRING_LITERAL_LINE: fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); exit(1); case TOKEN_IDENTIFIER: if (p->token_tags[p->tok_i + 1] == TOKEN_COLON) { fprintf(stderr, "parsePrimaryTypeExpr does not support identifier followed by " "colon\n"); exit(1); } return addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_IDENTIFIER, .main_token = nextToken(p), .data = {}, }); case TOKEN_KEYWORD_INLINE: case TOKEN_KEYWORD_FOR: case TOKEN_KEYWORD_WHILE: case TOKEN_PERIOD: case TOKEN_KEYWORD_ERROR: case TOKEN_L_PAREN: fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); exit(1); default: return null_node; } } static AstNodeIndex parseSuffixOp(Parser* p, AstNodeIndex lhs) { (void)lhs; const TokenizerTag tok = p->token_tags[p->tok_i]; switch (tok) { case TOKEN_L_BRACKET: case TOKEN_PERIOD_ASTERISK: case TOKEN_INVALID_PERIODASTERISKS: case TOKEN_PERIOD: fprintf(stderr, "parseSuffixOp does not support %s\n", tokenizerGetTagString(tok)); exit(1); default: return null_node; } } static AstNodeIndex parseSuffixExpr(Parser* p) { if (eatToken(p, TOKEN_KEYWORD_ASYNC) != null_token) { fprintf(stderr, "async not supported\n"); exit(1); } AstNodeIndex res = parsePrimaryTypeExpr(p); if (res == 0) return res; while (true) { const AstNodeIndex suffix_op = parseSuffixOp(p, res); if (suffix_op != 0) { res = suffix_op; continue; } const AstTokenIndex lparen = eatToken(p, TOKEN_L_PAREN); if (lparen == null_token) return res; CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p); while (true) { if (eatToken(p, TOKEN_R_PAREN) != null_token) break; fprintf(stderr, "parseSuffixExpr can only parse ()\n"); exit(1); } const bool comma = p->token_tags[p->tok_i - 2] == TOKEN_COMMA; const uint32_t params_len = p->scratch.len - scratch_top.old_len; switch (params_len) { case 0: return addNode( &p->nodes, (AstNodeItem) { .tag = comma ? AST_NODE_CALL_ONE_COMMA : AST_NODE_CALL_ONE, .main_token = lparen, .data = { .lhs = res, .rhs = 0, }, }); case 1: return addNode( &p->nodes, (AstNodeItem) { .tag = comma ? AST_NODE_CALL_ONE_COMMA : AST_NODE_CALL_ONE, .main_token = lparen, .data = { .lhs = res, .rhs = p->scratch.arr[scratch_top.old_len], }, }); default:; const AstSubRange span = listToSpan( p, &p->scratch.arr[scratch_top.old_len], params_len); return addNode( &p->nodes, (AstNodeItem) { .tag = comma ? AST_NODE_CALL_COMMA : AST_NODE_CALL, .main_token = lparen, .data = { .lhs = res, .rhs = addExtra(p, (AstNodeIndex[]) { span.start, span.end, }, 2), }, }); } } } static AstTokenIndex expectToken(Parser* p, TokenizerTag tag) { if (p->token_tags[p->tok_i] == tag) { return nextToken(p); } else { fprintf(stderr, "expected token %s, got %s\n", tokenizerGetTagString(tag), tokenizerGetTagString(p->token_tags[p->tok_i])); exit(1); } return 0; // tcc } static AstNodeIndex parseErrorUnionExpr(Parser* p) { const AstNodeIndex suffix_expr = parseSuffixExpr(p); if (suffix_expr == 0) return null_node; const AstNodeIndex bang = eatToken(p, TOKEN_BANG); if (bang == null_token) return suffix_expr; return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_ERROR_UNION, .main_token = bang, .data = { .lhs = suffix_expr, .rhs = parseTypeExpr(p), }, }); } static AstNodeIndex parseTypeExpr(Parser* p) { const AstNodeIndex tok = p->token_tags[p->tok_i]; switch (tok) { case TOKEN_QUESTION_MARK: case TOKEN_KEYWORD_ANYFRAME: case TOKEN_ASTERISK: case TOKEN_ASTERISK_ASTERISK: case TOKEN_L_BRACKET: fprintf(stderr, "parseTypeExpr not supported for %s\n", tokenizerGetTagString(tok)); exit(1); default: return parseErrorUnionExpr(p); } } static SmallSpan parseParamDeclList(Parser* p) { // can only parse functions with no declarations expectToken(p, TOKEN_L_PAREN); expectToken(p, TOKEN_R_PAREN); return (SmallSpan) { .tag = SMALL_SPAN_ZERO_OR_ONE, }; } static uint32_t reserveNode(Parser* p, AstNodeTag tag) { astNodeListEnsureCapacity(&p->nodes, 1); p->nodes.len++; p->nodes.tags[p->nodes.len - 1] = tag; return p->nodes.len - 1; } static AstNodeIndex parseFnProto(Parser* p) { AstNodeIndex fn_token = eatToken(p, TOKEN_KEYWORD_FN); if (fn_token == null_node) return null_node; AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_FN_PROTO); eatToken(p, TOKEN_IDENTIFIER); SmallSpan params = parseParamDeclList(p); const AstNodeIndex align_expr = parseByteAlign(p); const AstNodeIndex addrspace_expr = parseAddrSpace(p); const AstNodeIndex section_expr = parseLinkSection(p); const AstNodeIndex callconv_expr = parseCallconv(p); eatToken(p, TOKEN_BANG); const AstNodeIndex return_type_expr = parseTypeExpr(p); if (align_expr == 0 && section_expr == 0 && callconv_expr == 0 && addrspace_expr == 0) { switch (params.tag) { case SMALL_SPAN_ZERO_OR_ONE: return setNode( p, fn_proto_index, (AstNodeItem) { .tag = AST_NODE_FN_PROTO_SIMPLE, .main_token = fn_token, .data = { .lhs = params.payload.zero_or_one, .rhs = return_type_expr, }, }); break; case SMALL_SPAN_MULTI: fprintf(stderr, "parseFnProto does not support multi params\n"); exit(1); } } fprintf(stderr, "parseFnProto does not support complex function decls\n"); exit(1); return 0; // tcc } static AstTokenIndex parseBlockLabel(Parser* p) { if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER && p->token_tags[p->tok_i + 1] == TOKEN_COLON) { const AstTokenIndex identifier = p->tok_i; p->tok_i += 2; return identifier; } return null_node; } static AstNodeIndex parseForStatement(Parser* p) { const AstNodeIndex for_token = eatToken(p, TOKEN_KEYWORD_FOR); if (for_token == null_token) return null_node; (void)for_token; fprintf(stderr, "parseForStatement cannot parse for statements\n"); return 0; // tcc } static AstNodeIndex parseWhileStatement(Parser* p) { const AstNodeIndex while_token = eatToken(p, TOKEN_KEYWORD_WHILE); if (while_token == null_token) return null_node; (void)while_token; fprintf(stderr, "parseWhileStatement cannot parse while statements\n"); return 0; // tcc } static AstNodeIndex parseLoopStatement(Parser* p) { const AstTokenIndex inline_token = eatToken(p, TOKEN_KEYWORD_INLINE); const AstNodeIndex for_statement = parseForStatement(p); if (for_statement != 0) return for_statement; const AstNodeIndex while_statement = parseWhileStatement(p); if (while_statement != 0) return while_statement; if (inline_token == null_token) return null_node; fprintf( stderr, "seen 'inline', there should have been a 'for' or 'while'\n"); exit(1); return 0; // tcc } static AstNodeIndex parseAssignExpr(Parser* p) { (void)p; fprintf(stderr, "parseAssignExpr not implemented\n"); exit(1); return 0; // tcc } static AstNodeIndex parseVarDeclProto(Parser* p) { AstTokenIndex mut_token; if ((mut_token = eatToken(p, TOKEN_KEYWORD_CONST)) == null_token) if ((mut_token = eatToken(p, TOKEN_KEYWORD_VAR)) == null_token) return null_node; expectToken(p, TOKEN_IDENTIFIER); const AstNodeIndex type_node = eatToken(p, TOKEN_COLON) == null_token ? 0 : parseTypeExpr(p); const AstNodeIndex align_node = parseByteAlign(p); const AstNodeIndex addrspace_node = parseAddrSpace(p); const AstNodeIndex section_node = parseLinkSection(p); if (section_node == 0 && addrspace_node == 0) { if (align_node == 0) { return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_SIMPLE_VAR_DECL, .main_token = mut_token, .data = { .lhs = type_node, .rhs = 0, }, }); } fprintf(stderr, "parseVarDecl got something too complicated\n"); exit(1); } else { fprintf(stderr, "parseVarDecl got something too complicated\n"); exit(1); } return 0; // tcc } static AstTokenIndex parseBreakLabel(Parser* p) { if (eatToken(p, TOKEN_COLON) == null_node) return null_node; return expectToken(p, TOKEN_IDENTIFIER); } static AstNodeIndex parseCurlySuffixExpr(Parser* p) { const AstNodeIndex lhs = parseTypeExpr(p); if (lhs == 0) return null_node; const AstTokenIndex lbrace = eatToken(p, TOKEN_L_BRACE); if (lbrace == null_token) return lhs; fprintf(stderr, "parseCurlySuffixExpr is not implemented\n"); exit(1); return 0; // tcc } typedef struct { int8_t prec; AstNodeTag tag; enum { ASSOC_LEFT, ASSOC_NONE, } assoc; } OperInfo; static OperInfo operTable(TokenizerTag tok_tag) { switch (tok_tag) { case TOKEN_KEYWORD_OR: return (OperInfo) { .prec = 10, .tag = AST_NODE_BOOL_OR }; case TOKEN_KEYWORD_AND: return (OperInfo) { .prec = 20, .tag = AST_NODE_BOOL_AND }; case TOKEN_EQUAL_EQUAL: return (OperInfo) { .prec = 30, .tag = AST_NODE_EQUAL_EQUAL, .assoc = ASSOC_NONE }; case TOKEN_BANG_EQUAL: return (OperInfo) { .prec = 30, .tag = AST_NODE_BANG_EQUAL, .assoc = ASSOC_NONE }; case TOKEN_ANGLE_BRACKET_LEFT: return (OperInfo) { .prec = 30, .tag = AST_NODE_LESS_THAN, .assoc = ASSOC_NONE }; case TOKEN_ANGLE_BRACKET_RIGHT: return (OperInfo) { .prec = 30, .tag = AST_NODE_GREATER_THAN, .assoc = ASSOC_NONE }; case TOKEN_ANGLE_BRACKET_LEFT_EQUAL: return (OperInfo) { .prec = 30, .tag = AST_NODE_LESS_OR_EQUAL, .assoc = ASSOC_NONE }; case TOKEN_ANGLE_BRACKET_RIGHT_EQUAL: return (OperInfo) { .prec = 30, .tag = AST_NODE_GREATER_OR_EQUAL, .assoc = ASSOC_NONE }; case TOKEN_AMPERSAND: return (OperInfo) { .prec = 40, .tag = AST_NODE_BIT_AND }; case TOKEN_CARET: return (OperInfo) { .prec = 40, .tag = AST_NODE_BIT_XOR }; case TOKEN_PIPE: return (OperInfo) { .prec = 40, .tag = AST_NODE_BIT_OR }; case TOKEN_KEYWORD_ORELSE: return (OperInfo) { .prec = 40, .tag = AST_NODE_ORELSE }; case TOKEN_KEYWORD_CATCH: return (OperInfo) { .prec = 40, .tag = AST_NODE_CATCH }; case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT: return (OperInfo) { .prec = 50, .tag = AST_NODE_SHL }; case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE: return (OperInfo) { .prec = 50, .tag = AST_NODE_SHL_SAT }; case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT: return (OperInfo) { .prec = 50, .tag = AST_NODE_SHR }; case TOKEN_PLUS: return (OperInfo) { .prec = 60, .tag = AST_NODE_ADD }; case TOKEN_MINUS: return (OperInfo) { .prec = 60, .tag = AST_NODE_SUB }; case TOKEN_PLUS_PLUS: return (OperInfo) { .prec = 60, .tag = AST_NODE_ARRAY_CAT }; case TOKEN_PLUS_PERCENT: return (OperInfo) { .prec = 60, .tag = AST_NODE_ADD_WRAP }; case TOKEN_MINUS_PERCENT: return (OperInfo) { .prec = 60, .tag = AST_NODE_SUB_WRAP }; case TOKEN_PLUS_PIPE: return (OperInfo) { .prec = 60, .tag = AST_NODE_ADD_SAT }; case TOKEN_MINUS_PIPE: return (OperInfo) { .prec = 60, .tag = AST_NODE_SUB_SAT }; case TOKEN_PIPE_PIPE: return (OperInfo) { .prec = 70, .tag = AST_NODE_MERGE_ERROR_SETS }; case TOKEN_ASTERISK: return (OperInfo) { .prec = 70, .tag = AST_NODE_MUL }; case TOKEN_SLASH: return (OperInfo) { .prec = 70, .tag = AST_NODE_DIV }; case TOKEN_PERCENT: return (OperInfo) { .prec = 70, .tag = AST_NODE_MOD }; case TOKEN_ASTERISK_ASTERISK: return (OperInfo) { .prec = 70, .tag = AST_NODE_ARRAY_MULT }; case TOKEN_ASTERISK_PERCENT: return (OperInfo) { .prec = 70, .tag = AST_NODE_MUL_WRAP }; case TOKEN_ASTERISK_PIPE: return (OperInfo) { .prec = 70, .tag = AST_NODE_MUL_SAT }; default: return (OperInfo) { .prec = -1, .tag = AST_NODE_ROOT }; } } static AstNodeIndex parseExprPrecedence(Parser* p, int32_t min_prec) { assert(min_prec >= 0); AstNodeIndex node = parsePrefixExpr(p); if (node == 0) return null_node; int8_t banned_prec = -1; while (true) { const TokenizerTag tok_tag = p->token_tags[p->tok_i]; const OperInfo info = operTable(tok_tag); if (info.prec < min_prec) break; assert(info.prec != banned_prec); const AstTokenIndex oper_token = nextToken(p); if (tok_tag == TOKEN_KEYWORD_CATCH) { fprintf(stderr, "parsePayload not supported\n"); exit(1); return 0; // tcc } const AstNodeIndex rhs = parseExprPrecedence(p, info.prec + 1); assert(rhs != 0); node = addNode( &p->nodes, (AstNodeItem) { .tag = info.tag, .main_token = oper_token, .data = { .lhs = node, .rhs = rhs, }, }); if (info.assoc == ASSOC_NONE) banned_prec = info.prec; } return node; } static AstNodeIndex parseExpr(Parser* p) { return parseExprPrecedence(p, 0); } static AstNodeIndex expectExpr(Parser* p) { const AstNodeIndex node = parseExpr(p); assert(node != 0); return node; } static AstNodeIndex parsePrimaryExpr(Parser* p) { const char* tok = tokenizerGetTagString(p->token_tags[p->tok_i]); switch (p->token_tags[p->tok_i]) { case TOKEN_KEYWORD_ASM: case TOKEN_KEYWORD_IF: fprintf(stderr, "parsePrimaryExpr does not implement %s\n", tok); exit(1); break; case TOKEN_KEYWORD_BREAK: return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_BREAK, .main_token = nextToken(p), .data = { .lhs = parseBreakLabel(p), .rhs = parseExpr(p), }, }); case TOKEN_KEYWORD_CONTINUE: return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_CONTINUE, .main_token = nextToken(p), .data = { .lhs = parseBreakLabel(p), .rhs = parseExpr(p), }, }); case TOKEN_KEYWORD_COMPTIME: case TOKEN_KEYWORD_NOSUSPEND: case TOKEN_KEYWORD_RESUME: case TOKEN_KEYWORD_RETURN: fprintf(stderr, "parsePrimaryExpr does not implement %s\n", tok); exit(1); return 0; // tcc case TOKEN_IDENTIFIER: if (p->token_tags[p->tok_i + 1] == TOKEN_COLON) { switch (p->token_tags[p->tok_i + 2]) { case TOKEN_KEYWORD_INLINE: case TOKEN_KEYWORD_FOR: case TOKEN_KEYWORD_WHILE: fprintf(stderr, "parsePrimaryExpr NotImplemented\n"); exit(1); return 0; // tcc case TOKEN_L_BRACE: p->tok_i += 2; return parseBlock(p); default: return parseCurlySuffixExpr(p); } } else { return parseCurlySuffixExpr(p); } case TOKEN_KEYWORD_INLINE: case TOKEN_KEYWORD_FOR: case TOKEN_KEYWORD_WHILE: fprintf(stderr, "parsePrimaryExpr does not implement %s\n", tok); exit(1); return 0; // tcc case TOKEN_L_BRACE: return parseBlock(p); default: return parseCurlySuffixExpr(p); } return 0; // tcc } static AstNodeIndex parsePrefixExpr(Parser* p) { AstNodeTag tag; switch (p->token_tags[p->tok_i]) { case TOKEN_BANG: tag = AST_NODE_BOOL_NOT; break; case TOKEN_MINUS: tag = AST_NODE_NEGATION; break; case TOKEN_TILDE: tag = AST_NODE_BIT_NOT; break; case TOKEN_MINUS_PERCENT: tag = AST_NODE_NEGATION_WRAP; break; case TOKEN_AMPERSAND: tag = AST_NODE_ADDRESS_OF; break; case TOKEN_KEYWORD_TRY: tag = AST_NODE_TRY; break; case TOKEN_KEYWORD_AWAIT: tag = AST_NODE_AWAIT; break; default: return parsePrimaryExpr(p); } return addNode( &p->nodes, (AstNodeItem) { .tag = tag, .main_token = nextToken(p), .data = { .lhs = parsePrefixExpr(p), .rhs = 0, }, }); } static AstNodeIndex expectVarDeclExprStatement(Parser* p) { CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p); while (true) { const AstNodeIndex var_decl_proto = parseVarDeclProto(p); if (var_decl_proto != 0) { SLICE_APPEND(AstNodeIndex, &p->scratch, var_decl_proto); } else { const AstNodeIndex expr = parseExpr(p); SLICE_APPEND(AstNodeIndex, &p->scratch, expr); } if (eatToken(p, TOKEN_COMMA) == null_node) break; } const uint32_t lhs_count = p->scratch.len - scratch_top.old_len; assert(lhs_count > 0); fprintf(stderr, "expectVarDeclExprStatement only partially implemented\n"); exit(1); return 0; // tcc } static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) { if (eatToken(p, TOKEN_KEYWORD_COMPTIME) != null_token) { fprintf(stderr, "expectStatement: comptime keyword not supported\n"); exit(1); } const AstNodeIndex tok = p->token_tags[p->tok_i]; switch (tok) { case TOKEN_KEYWORD_NOSUSPEND: case TOKEN_KEYWORD_SUSPEND: case TOKEN_KEYWORD_DEFER: case TOKEN_KEYWORD_ERRDEFER: case TOKEN_KEYWORD_IF: case TOKEN_KEYWORD_ENUM: case TOKEN_KEYWORD_STRUCT: case TOKEN_KEYWORD_UNION:; const char* tok_str = tokenizerGetTagString(tok); fprintf( stderr, "expectStatement does not support keyword %s\n", tok_str); exit(1); default:; } const AstNodeIndex labeled_statement = parseLabeledStatement(p); if (labeled_statement != 0) return labeled_statement; if (allow_defer_var) { return expectVarDeclExprStatement(p); } else { return parseAssignExpr(p); } } static AstNodeIndex parseBlock(Parser* p) { const AstNodeIndex lbrace = eatToken(p, TOKEN_L_BRACE); if (lbrace == null_token) return null_node; CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p); while (1) { if (p->token_tags[p->tok_i] == TOKEN_R_BRACE) break; // "const AstNodeIndex statement" once tinycc supports typeof_unqual // (C23) AstNodeIndex statement = expectStatement(p, true); if (statement == 0) break; SLICE_APPEND(AstNodeIndex, &p->scratch, statement); } expectToken(p, TOKEN_R_BRACE); const bool semicolon = (p->token_tags[p->tok_i] - 2 == TOKEN_SEMICOLON); const uint32_t statements_len = p->scratch.len - scratch_top.old_len; switch (statements_len) { case 0: return addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_BLOCK_TWO, .main_token = lbrace, .data = { .lhs = 0, .rhs = 0, }, }); case 1: return addNode( &p->nodes, (AstNodeItem) { .tag = semicolon ? AST_NODE_BLOCK_TWO_SEMICOLON : AST_NODE_BLOCK_TWO, .main_token = lbrace, .data = { .lhs = p->scratch.arr[scratch_top.old_len], .rhs = 0, }, }); case 2: return addNode( &p->nodes, (AstNodeItem) { .tag = semicolon ? AST_NODE_BLOCK_TWO_SEMICOLON : AST_NODE_BLOCK_TWO, .main_token = lbrace, .data = { .lhs = p->scratch.arr[scratch_top.old_len], .rhs = p->scratch.arr[scratch_top.old_len + 1], }, }); default:; const AstSubRange span = listToSpan( p, &p->scratch.arr[scratch_top.old_len], statements_len); return addNode( &p->nodes, (AstNodeItem) { .tag = semicolon ? AST_NODE_BLOCK_SEMICOLON : AST_NODE_BLOCK, .main_token = lbrace, .data = { .lhs = span.start, .rhs = span.end, }, }); } return 0; } static AstNodeIndex parseLabeledStatement(Parser* p) { const AstNodeIndex label_token = parseBlockLabel(p); const AstNodeIndex block = parseBlock(p); if (block != 0) return block; const AstNodeIndex loop_stmt = parseLoopStatement(p); if (loop_stmt != 0) return loop_stmt; if (label_token != 0) { fprintf(stderr, "parseLabeledStatement does not support labels\n"); exit(1); } return null_node; } static AstNodeIndex parseGlobalVarDecl(Parser* p) { const AstNodeIndex var_decl = parseVarDeclProto(p); if (var_decl == 0) { return null_node; } fprintf(stderr, "parseGlobalVarDecl does not support parsing var decls\n"); exit(1); return 0; // tcc } static AstNodeIndex expectTopLevelDecl(Parser* p) { AstTokenIndex extern_export_inline_token = nextToken(p); switch (p->token_tags[extern_export_inline_token]) { case TOKEN_KEYWORD_EXTERN: eatToken(p, TOKEN_STRING_LITERAL); break; case TOKEN_KEYWORD_EXPORT: case TOKEN_KEYWORD_INLINE: case TOKEN_KEYWORD_NOINLINE: break; default: p->tok_i--; } AstNodeIndex fn_proto = parseFnProto(p); if (fn_proto != 0) { switch (p->token_tags[p->tok_i]) { case TOKEN_SEMICOLON: p->tok_i++; return fn_proto; case TOKEN_L_BRACE:; AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_FN_DECL); AstNodeIndex body_block = parseBlock(p); return setNode(p, fn_decl_index, (AstNodeItem) { .tag = AST_NODE_FN_DECL, .main_token = p->nodes.main_tokens[fn_proto], .data = { .lhs = fn_proto, .rhs = body_block }, }); default: exit(1); // Expected semicolon or left brace } } eatToken(p, TOKEN_KEYWORD_THREADLOCAL); AstNodeIndex var_decl = parseGlobalVarDecl(p); if (var_decl != 0) { return var_decl; } // assuming the program is correct... fprintf(stderr, "the next token should be usingnamespace, which is not supported\n"); exit(1); return 0; // make tcc happy } void findNextContainerMember(Parser* p) { uint32_t level = 0; while (true) { AstTokenIndex tok = nextToken(p); switch (p->token_tags[tok]) { // Any of these can start a new top level declaration case TOKEN_KEYWORD_TEST: case TOKEN_KEYWORD_COMPTIME: case TOKEN_KEYWORD_PUB: case TOKEN_KEYWORD_EXPORT: case TOKEN_KEYWORD_EXTERN: case TOKEN_KEYWORD_INLINE: case TOKEN_KEYWORD_NOINLINE: case TOKEN_KEYWORD_USINGNAMESPACE: case TOKEN_KEYWORD_THREADLOCAL: case TOKEN_KEYWORD_CONST: case TOKEN_KEYWORD_VAR: case TOKEN_KEYWORD_FN: if (level == 0) { p->tok_i--; return; } break; case TOKEN_IDENTIFIER: if (p->token_tags[tok + 1] == TOKEN_COMMA && level == 0) { p->tok_i--; return; } break; case TOKEN_COMMA: case TOKEN_SEMICOLON: // This decl was likely meant to end here if (level == 0) return; break; case TOKEN_L_PAREN: case TOKEN_L_BRACKET: case TOKEN_L_BRACE: level++; break; case TOKEN_R_PAREN: case TOKEN_R_BRACKET: if (level != 0) level--; break; case TOKEN_R_BRACE: if (level == 0) { // end of container, exit p->tok_i--; return; } level--; break; case TOKEN_EOF: p->tok_i--; return; default: break; } } } static Members parseContainerMembers(Parser* p) { CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p); while (eatToken(p, TOKEN_CONTAINER_DOC_COMMENT) != null_token) ; FieldState field_state = { .tag = FIELD_STATE_NONE }; bool trailing = false; while (1) { eatDocComments(p); switch (p->token_tags[p->tok_i]) { case TOKEN_KEYWORD_TEST: case TOKEN_KEYWORD_COMPTIME: case TOKEN_KEYWORD_USINGNAMESPACE:; const char* str = tokenizerGetTagString(p->token_tags[p->tok_i]); fprintf( stderr, "%s not implemented in parseContainerMembers\n", str); exit(1); case TOKEN_KEYWORD_PUB: { p->tok_i++; AstNodeIndex top_level_decl = expectTopLevelDecl(p); if (top_level_decl != 0) { if (field_state.tag == FIELD_STATE_SEEN) { field_state.tag = FIELD_STATE_END; field_state.payload.end = top_level_decl; } SLICE_APPEND(AstNodeIndex, &p->scratch, top_level_decl); } trailing = p->token_tags[p->tok_i - 1] == TOKEN_SEMICOLON; break; } case TOKEN_KEYWORD_CONST: case TOKEN_KEYWORD_VAR: case TOKEN_KEYWORD_THREADLOCAL: case TOKEN_KEYWORD_EXPORT: case TOKEN_KEYWORD_EXTERN: case TOKEN_KEYWORD_INLINE: case TOKEN_KEYWORD_NOINLINE: case TOKEN_KEYWORD_FN: { const AstNodeIndex top_level_decl = expectTopLevelDecl(p); if (top_level_decl != 0) { if (field_state.tag == FIELD_STATE_SEEN) { field_state.tag = FIELD_STATE_END; field_state.payload.end = top_level_decl; } SLICE_APPEND(AstNodeIndex, &p->scratch, top_level_decl); } trailing = (p->token_tags[p->tok_i - 1] == TOKEN_SEMICOLON); break; } case TOKEN_EOF: case TOKEN_R_BRACE: goto break_loop; default:; // skip parseCStyleContainer const AstNodeIndex container_field = expectContainerField(p); switch (field_state.tag) { case FIELD_STATE_NONE: field_state.tag = FIELD_STATE_SEEN; break; case FIELD_STATE_SEEN: break; case FIELD_STATE_END: fprintf(stderr, "parseContainerMembers error condition\n"); exit(1); } SLICE_APPEND(AstNodeIndex, &p->scratch, container_field); switch (p->token_tags[p->tok_i]) { case TOKEN_COMMA: p->tok_i++; trailing = true; continue; case TOKEN_R_BRACE: case TOKEN_EOF: trailing = false; goto break_loop; default:; } findNextContainerMember(p); continue; } } break_loop:; const uint32_t items_len = p->scratch.len - scratch_top.old_len; switch (items_len) { case 0: return (Members) { .len = 0, .lhs = 0, .rhs = 0, .trailing = trailing, }; case 1: return (Members) { .len = 1, .lhs = p->scratch.arr[scratch_top.old_len], .rhs = 0, .trailing = trailing, }; case 2: return (Members) { .len = 2, .lhs = p->scratch.arr[scratch_top.old_len], .rhs = p->scratch.arr[scratch_top.old_len + 1], .trailing = trailing, }; default:; const AstSubRange span = listToSpan(p, &p->scratch.arr[scratch_top.old_len], items_len); return (Members) { .len = items_len, .lhs = span.start, .rhs = span.end, .trailing = trailing, }; } } void parseRoot(Parser* p) { addNode( &p->nodes, (AstNodeItem) { .tag = AST_NODE_ROOT, .main_token = 0 }); Members root_members = parseContainerMembers(p); AstSubRange root_decls = membersToSpan(root_members, p); p->nodes.datas[0].lhs = root_decls.start; p->nodes.datas[0].rhs = root_decls.end; }