Files
zig0/parser.c
2025-01-09 07:27:39 +01:00

1261 lines
38 KiB
C

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "ast.h"
#include "common.h"
#include "parser.h"
const AstNodeIndex null_node = 0;
const AstTokenIndex null_token = ~(AstTokenIndex)(0);
static AstNodeIndex parsePrefixExpr(Parser*);
static AstNodeIndex parseTypeExpr(Parser*);
static AstNodeIndex parseBlock(Parser* p);
static AstNodeIndex parseLabeledStatement(Parser*);
static AstNodeIndex parseExpr(Parser*);
typedef struct {
enum { FIELD_STATE_NONE, FIELD_STATE_SEEN, FIELD_STATE_END } tag;
union {
uint32_t end;
} payload;
} FieldState;
typedef struct {
enum { SMALL_SPAN_ZERO_OR_ONE, SMALL_SPAN_MULTI } tag;
union {
AstNodeIndex zero_or_one;
AstSubRange multi;
} payload;
} SmallSpan;
typedef struct {
AstNodeIndexSlice* scratch;
uint32_t old_len;
} CleanupScratch;
static CleanupScratch initCleanupScratch(Parser* p) {
return (CleanupScratch) {
.scratch = &p->scratch,
.old_len = p->scratch.len,
};
}
static void cleanupScratch(CleanupScratch* c) { c->scratch->len = c->old_len; }
static AstSubRange listToSpan(
Parser* p, const AstNodeIndex* list, uint32_t count) {
SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count);
memcpy(p->extra_data.arr, list, count * sizeof(AstNodeIndex));
p->extra_data.len += count;
return (AstSubRange) {
.start = p->extra_data.len - count,
.end = p->extra_data.len,
};
}
typedef struct {
uint32_t len;
AstNodeIndex lhs;
AstNodeIndex rhs;
bool trailing;
} Members;
static AstSubRange membersToSpan(const Members self, Parser* p) {
if (self.len <= 2) {
const AstNodeIndex nodes[] = { self.lhs, self.rhs };
return listToSpan(p, nodes, self.len);
} else {
return (AstSubRange) { .start = self.lhs, .end = self.rhs };
}
}
static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; }
static AstTokenIndex eatToken(Parser* p, TokenizerTag tag) {
if (p->token_tags[p->tok_i] == tag) {
return nextToken(p);
} else {
return null_token;
}
}
static AstTokenIndex assertToken(Parser* p, TokenizerTag tag) {
const AstTokenIndex token = nextToken(p);
assert(p->token_tags[token] == tag);
return token;
}
static void eatDocComments(Parser* p) {
while (eatToken(p, TOKEN_DOC_COMMENT) != null_token) { }
}
static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeItem item) {
p->nodes.tags[i] = item.tag;
p->nodes.main_tokens[i] = item.main_token;
p->nodes.datas[i] = item.data;
return i;
}
static void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional) {
const uint32_t new_len = list->len + additional;
if (new_len <= list->cap) {
return;
}
const uint32_t new_cap = new_len > list->cap * 2 ? new_len : list->cap * 2;
list->tags = realloc(list->tags, new_cap * sizeof(AstNodeTag));
list->main_tokens
= realloc(list->main_tokens, new_cap * sizeof(AstTokenIndex));
list->datas = realloc(list->datas, new_cap * sizeof(AstData));
if (!list->tags || !list->main_tokens || !list->datas)
exit(1);
list->cap = new_cap;
}
static AstNodeIndex addNode(AstNodeList* nodes, AstNodeItem item) {
astNodeListEnsureCapacity(nodes, 1);
nodes->tags[nodes->len] = item.tag;
nodes->main_tokens[nodes->len] = item.main_token;
nodes->datas[nodes->len] = item.data;
return nodes->len++;
}
static AstNodeIndex addExtra(
Parser* p, const AstNodeIndex* extra, uint32_t count) {
const AstNodeIndex result = p->extra_data.len;
SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count);
memcpy(p->extra_data.arr, extra, count * sizeof(AstNodeIndex));
return result;
}
static AstNodeIndex parseByteAlign(Parser* p) {
if (eatToken(p, TOKEN_KEYWORD_ALIGN) == null_token)
return null_node;
fprintf(stderr, "parseByteAlign cannot parse alignment\n");
exit(1);
return 0; // tcc
}
static AstNodeIndex parseAddrSpace(Parser* p) {
if (eatToken(p, TOKEN_KEYWORD_ADDRSPACE) == null_token)
return null_node;
fprintf(stderr, "parseAddrSpace cannot parse addrspace\n");
exit(1);
return 0; // tcc
}
static AstNodeIndex parseLinkSection(Parser* p) {
if (eatToken(p, TOKEN_KEYWORD_LINKSECTION) == null_token)
return null_node;
fprintf(stderr, "parseLinkSection cannot parse linksection\n");
exit(1);
return 0; // tcc
}
static AstNodeIndex parseCallconv(Parser* p) {
if (eatToken(p, TOKEN_KEYWORD_CALLCONV) == null_token)
return null_node;
fprintf(stderr, "parseCallconv cannot parse callconv\n");
exit(1);
return 0; // tcc
}
typedef struct {
AstNodeIndex align_expr, value_expr;
} NodeContainerField;
static AstNodeIndex expectContainerField(Parser* p) {
eatToken(p, TOKEN_KEYWORD_COMPTIME);
const AstTokenIndex main_token = p->tok_i;
if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER
&& p->token_tags[p->tok_i + 1] == TOKEN_COLON)
p->tok_i += 2;
const AstNodeIndex type_expr = parseTypeExpr(p);
const AstNodeIndex align_expr = parseByteAlign(p);
if (eatToken(p, TOKEN_EQUAL) != null_token) {
fprintf(stderr, "expectContainerField does not support expr\n");
exit(1);
}
const AstNodeIndex value_expr = 0;
if (align_expr == 0) {
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_CONTAINER_FIELD_INIT,
.main_token = main_token,
.data = {
.lhs = type_expr,
.rhs = value_expr,
},
});
} else if (value_expr == 0) {
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_CONTAINER_FIELD_ALIGN,
.main_token = main_token,
.data = {
.lhs = type_expr,
.rhs = align_expr,
},
});
} else {
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_CONTAINER_FIELD,
.main_token = main_token,
.data = {
.lhs = type_expr,
.rhs = addExtra(p, (AstNodeIndex[]) { align_expr, value_expr }, 2),
},
});
}
}
static AstNodeIndex parseBuiltinCall(Parser* p) {
const AstNodeIndex builtin_token = assertToken(p, TOKEN_BUILTIN);
eatToken(p, TOKEN_L_PAREN);
CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch)))
= initCleanupScratch(p);
while (true) {
if (eatToken(p, TOKEN_R_PAREN) != null_token)
break;
const AstNodeIndex param = parseExpr(p);
SLICE_APPEND(AstNodeIndex, &p->scratch, param);
// TODO finish
}
(void)builtin_token;
fprintf(stderr, "parseBuiltinCall not implemented\n");
exit(1);
return 0; // tcc
}
static AstNodeIndex parsePrimaryTypeExpr(Parser* p) {
const TokenizerTag tok = p->token_tags[p->tok_i];
switch (tok) {
case TOKEN_CHAR_LITERAL:
case TOKEN_NUMBER_LITERAL:
case TOKEN_KEYWORD_UNREACHABLE:
case TOKEN_KEYWORD_ANYFRAME:
case TOKEN_STRING_LITERAL:
case TOKEN_BUILTIN:
return parseBuiltinCall(p);
case TOKEN_KEYWORD_FN:
case TOKEN_KEYWORD_IF:
case TOKEN_KEYWORD_SWITCH:
case TOKEN_KEYWORD_EXTERN:
case TOKEN_KEYWORD_PACKED:
case TOKEN_KEYWORD_STRUCT:
case TOKEN_KEYWORD_OPAQUE:
case TOKEN_KEYWORD_ENUM:
case TOKEN_KEYWORD_UNION:
case TOKEN_KEYWORD_COMPTIME:
case TOKEN_MULTILINE_STRING_LITERAL_LINE:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n",
tokenizerGetTagString(tok));
exit(1);
case TOKEN_IDENTIFIER:
if (p->token_tags[p->tok_i + 1] == TOKEN_COLON) {
fprintf(stderr,
"parsePrimaryTypeExpr does not support identifier followed by "
"colon\n");
exit(1);
}
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_IDENTIFIER,
.main_token = nextToken(p),
.data = {},
});
case TOKEN_KEYWORD_INLINE:
case TOKEN_KEYWORD_FOR:
case TOKEN_KEYWORD_WHILE:
case TOKEN_PERIOD:
case TOKEN_KEYWORD_ERROR:
case TOKEN_L_PAREN:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n",
tokenizerGetTagString(tok));
exit(1);
default:
return null_node;
}
}
static AstNodeIndex parseSuffixOp(Parser* p, AstNodeIndex lhs) {
(void)lhs;
const TokenizerTag tok = p->token_tags[p->tok_i];
switch (tok) {
case TOKEN_L_BRACKET:
case TOKEN_PERIOD_ASTERISK:
case TOKEN_INVALID_PERIODASTERISKS:
case TOKEN_PERIOD:
fprintf(stderr, "parseSuffixOp does not support %s\n",
tokenizerGetTagString(tok));
exit(1);
default:
return null_node;
}
}
static AstNodeIndex parseSuffixExpr(Parser* p) {
if (eatToken(p, TOKEN_KEYWORD_ASYNC) != null_token) {
fprintf(stderr, "async not supported\n");
exit(1);
}
AstNodeIndex res = parsePrimaryTypeExpr(p);
if (res == 0)
return res;
while (true) {
const AstNodeIndex suffix_op = parseSuffixOp(p, res);
if (suffix_op != 0) {
res = suffix_op;
continue;
}
const AstTokenIndex lparen = eatToken(p, TOKEN_L_PAREN);
if (lparen == null_token)
return res;
CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch)))
= initCleanupScratch(p);
while (true) {
if (eatToken(p, TOKEN_R_PAREN) != null_token)
break;
fprintf(stderr, "parseSuffixExpr can only parse ()\n");
exit(1);
}
const bool comma = p->token_tags[p->tok_i - 2] == TOKEN_COMMA;
const uint32_t params_len = p->scratch.len - scratch_top.old_len;
switch (params_len) {
case 0:
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = comma ? AST_NODE_CALL_ONE_COMMA : AST_NODE_CALL_ONE,
.main_token = lparen,
.data = {
.lhs = res,
.rhs = 0,
},
});
case 1:
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = comma ? AST_NODE_CALL_ONE_COMMA : AST_NODE_CALL_ONE,
.main_token = lparen,
.data = {
.lhs = res,
.rhs = p->scratch.arr[scratch_top.old_len],
},
});
default:;
const AstSubRange span = listToSpan(
p, &p->scratch.arr[scratch_top.old_len], params_len);
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = comma ? AST_NODE_CALL_COMMA : AST_NODE_CALL,
.main_token = lparen,
.data = {
.lhs = res,
.rhs = addExtra(p, (AstNodeIndex[]) {
span.start,
span.end,
}, 2),
},
});
}
}
}
static AstTokenIndex expectToken(Parser* p, TokenizerTag tag) {
if (p->token_tags[p->tok_i] == tag) {
return nextToken(p);
} else {
fprintf(stderr, "expected token %s, got %s\n",
tokenizerGetTagString(tag),
tokenizerGetTagString(p->token_tags[p->tok_i]));
exit(1);
}
return 0; // tcc
}
static AstNodeIndex parseErrorUnionExpr(Parser* p) {
const AstNodeIndex suffix_expr = parseSuffixExpr(p);
if (suffix_expr == 0)
return null_node;
const AstNodeIndex bang = eatToken(p, TOKEN_BANG);
if (bang == null_token)
return suffix_expr;
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_ERROR_UNION,
.main_token = bang,
.data = {
.lhs = suffix_expr,
.rhs = parseTypeExpr(p),
},
});
}
static AstNodeIndex parseTypeExpr(Parser* p) {
const AstNodeIndex tok = p->token_tags[p->tok_i];
switch (tok) {
case TOKEN_QUESTION_MARK:
case TOKEN_KEYWORD_ANYFRAME:
case TOKEN_ASTERISK:
case TOKEN_ASTERISK_ASTERISK:
case TOKEN_L_BRACKET:
fprintf(stderr, "parseTypeExpr not supported for %s\n",
tokenizerGetTagString(tok));
exit(1);
default:
return parseErrorUnionExpr(p);
}
}
static SmallSpan parseParamDeclList(Parser* p) {
// can only parse functions with no declarations
expectToken(p, TOKEN_L_PAREN);
expectToken(p, TOKEN_R_PAREN);
return (SmallSpan) {
.tag = SMALL_SPAN_ZERO_OR_ONE,
};
}
static uint32_t reserveNode(Parser* p, AstNodeTag tag) {
astNodeListEnsureCapacity(&p->nodes, 1);
p->nodes.len++;
p->nodes.tags[p->nodes.len - 1] = tag;
return p->nodes.len - 1;
}
static AstNodeIndex parseFnProto(Parser* p) {
AstNodeIndex fn_token = eatToken(p, TOKEN_KEYWORD_FN);
if (fn_token == null_node)
return null_node;
AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_FN_PROTO);
eatToken(p, TOKEN_IDENTIFIER);
SmallSpan params = parseParamDeclList(p);
const AstNodeIndex align_expr = parseByteAlign(p);
const AstNodeIndex addrspace_expr = parseAddrSpace(p);
const AstNodeIndex section_expr = parseLinkSection(p);
const AstNodeIndex callconv_expr = parseCallconv(p);
eatToken(p, TOKEN_BANG);
const AstNodeIndex return_type_expr = parseTypeExpr(p);
if (align_expr == 0 && section_expr == 0 && callconv_expr == 0
&& addrspace_expr == 0) {
switch (params.tag) {
case SMALL_SPAN_ZERO_OR_ONE:
return setNode(
p,
fn_proto_index,
(AstNodeItem) {
.tag = AST_NODE_FN_PROTO_SIMPLE,
.main_token = fn_token,
.data = {
.lhs = params.payload.zero_or_one,
.rhs = return_type_expr,
},
});
break;
case SMALL_SPAN_MULTI:
fprintf(stderr, "parseFnProto does not support multi params\n");
exit(1);
}
}
fprintf(stderr, "parseFnProto does not support complex function decls\n");
exit(1);
return 0; // tcc
}
static AstTokenIndex parseBlockLabel(Parser* p) {
if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER
&& p->token_tags[p->tok_i + 1] == TOKEN_COLON) {
const AstTokenIndex identifier = p->tok_i;
p->tok_i += 2;
return identifier;
}
return null_node;
}
static AstNodeIndex parseForStatement(Parser* p) {
const AstNodeIndex for_token = eatToken(p, TOKEN_KEYWORD_FOR);
if (for_token == null_token)
return null_node;
(void)for_token;
fprintf(stderr, "parseForStatement cannot parse for statements\n");
return 0; // tcc
}
static AstNodeIndex parseWhileStatement(Parser* p) {
const AstNodeIndex while_token = eatToken(p, TOKEN_KEYWORD_WHILE);
if (while_token == null_token)
return null_node;
(void)while_token;
fprintf(stderr, "parseWhileStatement cannot parse while statements\n");
return 0; // tcc
}
static AstNodeIndex parseLoopStatement(Parser* p) {
const AstTokenIndex inline_token = eatToken(p, TOKEN_KEYWORD_INLINE);
const AstNodeIndex for_statement = parseForStatement(p);
if (for_statement != 0)
return for_statement;
const AstNodeIndex while_statement = parseWhileStatement(p);
if (while_statement != 0)
return while_statement;
if (inline_token == null_token)
return null_node;
fprintf(
stderr, "seen 'inline', there should have been a 'for' or 'while'\n");
exit(1);
return 0; // tcc
}
static AstNodeIndex parseAssignExpr(Parser* p) {
(void)p;
fprintf(stderr, "parseAssignExpr not implemented\n");
exit(1);
return 0; // tcc
}
static AstNodeIndex parseVarDeclProto(Parser* p) {
AstTokenIndex mut_token;
if ((mut_token = eatToken(p, TOKEN_KEYWORD_CONST)) == null_token)
if ((mut_token = eatToken(p, TOKEN_KEYWORD_VAR)) == null_token)
return null_node;
expectToken(p, TOKEN_IDENTIFIER);
const AstNodeIndex type_node
= eatToken(p, TOKEN_COLON) == null_token ? 0 : parseTypeExpr(p);
const AstNodeIndex align_node = parseByteAlign(p);
const AstNodeIndex addrspace_node = parseAddrSpace(p);
const AstNodeIndex section_node = parseLinkSection(p);
if (section_node == 0 && addrspace_node == 0) {
if (align_node == 0) {
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_SIMPLE_VAR_DECL,
.main_token = mut_token,
.data = {
.lhs = type_node,
.rhs = 0,
},
});
}
fprintf(stderr, "parseVarDecl got something too complicated\n");
exit(1);
} else {
fprintf(stderr, "parseVarDecl got something too complicated\n");
exit(1);
}
return 0; // tcc
}
static AstTokenIndex parseBreakLabel(Parser* p) {
if (eatToken(p, TOKEN_COLON) == null_node)
return null_node;
return expectToken(p, TOKEN_IDENTIFIER);
}
static AstNodeIndex parseCurlySuffixExpr(Parser* p) {
const AstNodeIndex lhs = parseTypeExpr(p);
if (lhs == 0)
return null_node;
const AstTokenIndex lbrace = eatToken(p, TOKEN_L_BRACE);
if (lbrace == null_token)
return lhs;
fprintf(stderr, "parseCurlySuffixExpr is not implemented\n");
exit(1);
return 0; // tcc
}
typedef struct {
int8_t prec;
AstNodeTag tag;
enum {
ASSOC_LEFT,
ASSOC_NONE,
} assoc;
} OperInfo;
static OperInfo operTable(TokenizerTag tok_tag) {
switch (tok_tag) {
case TOKEN_KEYWORD_OR:
return (OperInfo) { .prec = 10, .tag = AST_NODE_BOOL_OR };
case TOKEN_KEYWORD_AND:
return (OperInfo) { .prec = 20, .tag = AST_NODE_BOOL_AND };
case TOKEN_EQUAL_EQUAL:
return (OperInfo) {
.prec = 30, .tag = AST_NODE_EQUAL_EQUAL, .assoc = ASSOC_NONE
};
case TOKEN_BANG_EQUAL:
return (OperInfo) {
.prec = 30, .tag = AST_NODE_BANG_EQUAL, .assoc = ASSOC_NONE
};
case TOKEN_ANGLE_BRACKET_LEFT:
return (OperInfo) {
.prec = 30, .tag = AST_NODE_LESS_THAN, .assoc = ASSOC_NONE
};
case TOKEN_ANGLE_BRACKET_RIGHT:
return (OperInfo) {
.prec = 30, .tag = AST_NODE_GREATER_THAN, .assoc = ASSOC_NONE
};
case TOKEN_ANGLE_BRACKET_LEFT_EQUAL:
return (OperInfo) {
.prec = 30, .tag = AST_NODE_LESS_OR_EQUAL, .assoc = ASSOC_NONE
};
case TOKEN_ANGLE_BRACKET_RIGHT_EQUAL:
return (OperInfo) {
.prec = 30, .tag = AST_NODE_GREATER_OR_EQUAL, .assoc = ASSOC_NONE
};
case TOKEN_AMPERSAND:
return (OperInfo) { .prec = 40, .tag = AST_NODE_BIT_AND };
case TOKEN_CARET:
return (OperInfo) { .prec = 40, .tag = AST_NODE_BIT_XOR };
case TOKEN_PIPE:
return (OperInfo) { .prec = 40, .tag = AST_NODE_BIT_OR };
case TOKEN_KEYWORD_ORELSE:
return (OperInfo) { .prec = 40, .tag = AST_NODE_ORELSE };
case TOKEN_KEYWORD_CATCH:
return (OperInfo) { .prec = 40, .tag = AST_NODE_CATCH };
case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT:
return (OperInfo) { .prec = 50, .tag = AST_NODE_SHL };
case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE:
return (OperInfo) { .prec = 50, .tag = AST_NODE_SHL_SAT };
case TOKEN_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT:
return (OperInfo) { .prec = 50, .tag = AST_NODE_SHR };
case TOKEN_PLUS:
return (OperInfo) { .prec = 60, .tag = AST_NODE_ADD };
case TOKEN_MINUS:
return (OperInfo) { .prec = 60, .tag = AST_NODE_SUB };
case TOKEN_PLUS_PLUS:
return (OperInfo) { .prec = 60, .tag = AST_NODE_ARRAY_CAT };
case TOKEN_PLUS_PERCENT:
return (OperInfo) { .prec = 60, .tag = AST_NODE_ADD_WRAP };
case TOKEN_MINUS_PERCENT:
return (OperInfo) { .prec = 60, .tag = AST_NODE_SUB_WRAP };
case TOKEN_PLUS_PIPE:
return (OperInfo) { .prec = 60, .tag = AST_NODE_ADD_SAT };
case TOKEN_MINUS_PIPE:
return (OperInfo) { .prec = 60, .tag = AST_NODE_SUB_SAT };
case TOKEN_PIPE_PIPE:
return (OperInfo) { .prec = 70, .tag = AST_NODE_MERGE_ERROR_SETS };
case TOKEN_ASTERISK:
return (OperInfo) { .prec = 70, .tag = AST_NODE_MUL };
case TOKEN_SLASH:
return (OperInfo) { .prec = 70, .tag = AST_NODE_DIV };
case TOKEN_PERCENT:
return (OperInfo) { .prec = 70, .tag = AST_NODE_MOD };
case TOKEN_ASTERISK_ASTERISK:
return (OperInfo) { .prec = 70, .tag = AST_NODE_ARRAY_MULT };
case TOKEN_ASTERISK_PERCENT:
return (OperInfo) { .prec = 70, .tag = AST_NODE_MUL_WRAP };
case TOKEN_ASTERISK_PIPE:
return (OperInfo) { .prec = 70, .tag = AST_NODE_MUL_SAT };
default:
return (OperInfo) { .prec = -1, .tag = AST_NODE_ROOT };
}
}
static AstNodeIndex parseExprPrecedence(Parser* p, int32_t min_prec) {
(void)p;
assert(min_prec >= 0);
AstNodeIndex node = parsePrefixExpr(p);
if (node == 0)
return null_node;
int8_t banned_prec = -1;
while (true) {
const TokenizerTag tok_tag = p->token_tags[p->tok_i];
const OperInfo info = operTable(tok_tag);
if (info.prec < min_prec)
break;
assert(info.prec != banned_prec);
const AstTokenIndex oper_token = nextToken(p);
if (tok_tag == TOKEN_KEYWORD_CATCH) {
fprintf(stderr, "parsePayload not supported\n");
exit(1);
return 0; // tcc
}
const AstNodeIndex rhs = parseExprPrecedence(p, info.prec + 1);
assert(rhs != 0);
node = addNode(
&p->nodes,
(AstNodeItem) {
.tag = info.tag,
.main_token = oper_token,
.data = {
.lhs = node,
.rhs = rhs,
},
});
if (info.assoc == ASSOC_NONE)
banned_prec = info.prec;
}
return node;
}
static AstNodeIndex parseExpr(Parser* p) { return parseExprPrecedence(p, 0); }
static AstNodeIndex parsePrimaryExpr(Parser* p) {
const char* tok = tokenizerGetTagString(p->token_tags[p->tok_i]);
switch (p->token_tags[p->tok_i]) {
case TOKEN_KEYWORD_ASM:
case TOKEN_KEYWORD_IF:
fprintf(stderr, "parsePrimaryExpr does not implement %s\n", tok);
exit(1);
break;
case TOKEN_KEYWORD_BREAK:
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_BREAK,
.main_token = nextToken(p),
.data = {
.lhs = parseBreakLabel(p),
.rhs = parseExpr(p),
},
});
case TOKEN_KEYWORD_CONTINUE:
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_CONTINUE,
.main_token = nextToken(p),
.data = {
.lhs = parseBreakLabel(p),
.rhs = parseExpr(p),
},
});
case TOKEN_KEYWORD_COMPTIME:
case TOKEN_KEYWORD_NOSUSPEND:
case TOKEN_KEYWORD_RESUME:
case TOKEN_KEYWORD_RETURN:
fprintf(stderr, "parsePrimaryExpr does not implement %s\n", tok);
exit(1);
return 0; // tcc
case TOKEN_IDENTIFIER:
if (p->token_tags[p->tok_i + 1] == TOKEN_COLON) {
switch (p->token_tags[p->tok_i + 2]) {
case TOKEN_KEYWORD_INLINE:
case TOKEN_KEYWORD_FOR:
case TOKEN_KEYWORD_WHILE:
fprintf(stderr, "parsePrimaryExpr NotImplemented\n");
exit(1);
return 0; // tcc
case TOKEN_L_BRACE:
p->tok_i += 2;
return parseBlock(p);
default:
return parseCurlySuffixExpr(p);
}
} else {
return parseCurlySuffixExpr(p);
}
case TOKEN_KEYWORD_INLINE:
case TOKEN_KEYWORD_FOR:
case TOKEN_KEYWORD_WHILE:
fprintf(stderr, "parsePrimaryExpr does not implement %s\n", tok);
exit(1);
return 0; // tcc
case TOKEN_L_BRACE:
return parseBlock(p);
default:
return parseCurlySuffixExpr(p);
}
}
static AstNodeIndex parsePrefixExpr(Parser* p) {
AstNodeTag tag;
switch (p->token_tags[p->tok_i]) {
case TOKEN_BANG:
tag = AST_NODE_BOOL_NOT;
break;
case TOKEN_MINUS:
tag = AST_NODE_NEGATION;
break;
case TOKEN_TILDE:
tag = AST_NODE_BIT_NOT;
break;
case TOKEN_MINUS_PERCENT:
tag = AST_NODE_NEGATION_WRAP;
break;
case TOKEN_AMPERSAND:
tag = AST_NODE_ADDRESS_OF;
break;
case TOKEN_KEYWORD_TRY:
tag = AST_NODE_TRY;
break;
case TOKEN_KEYWORD_AWAIT:
tag = AST_NODE_AWAIT;
break;
default:
return parsePrimaryExpr(p);
}
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = tag,
.main_token = nextToken(p),
.data = {
.lhs = parsePrefixExpr(p),
.rhs = 0,
},
});
}
static AstNodeIndex expectVarDeclExprStatement(Parser* p) {
CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch)))
= initCleanupScratch(p);
while (true) {
const AstNodeIndex var_decl_proto = parseVarDeclProto(p);
if (var_decl_proto != 0) {
SLICE_APPEND(AstNodeIndex, &p->scratch, var_decl_proto);
} else {
const AstNodeIndex expr = parseExpr(p);
SLICE_APPEND(AstNodeIndex, &p->scratch, expr);
}
if (eatToken(p, TOKEN_COMMA) == null_node)
break;
}
const uint32_t lhs_count = p->scratch.len - scratch_top.old_len;
assert(lhs_count > 0);
fprintf(stderr, "expectVarDeclExprStatement only partially implemented\n");
exit(1);
return 0; // tcc
}
static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) {
if (eatToken(p, TOKEN_KEYWORD_COMPTIME) != null_token) {
fprintf(stderr, "expectStatement: comptime keyword not supported\n");
exit(1);
}
const AstNodeIndex tok = p->token_tags[p->tok_i];
switch (tok) {
case TOKEN_KEYWORD_NOSUSPEND:
case TOKEN_KEYWORD_SUSPEND:
case TOKEN_KEYWORD_DEFER:
case TOKEN_KEYWORD_ERRDEFER:
case TOKEN_KEYWORD_IF:
case TOKEN_KEYWORD_ENUM:
case TOKEN_KEYWORD_STRUCT:
case TOKEN_KEYWORD_UNION:;
const char* tok_str = tokenizerGetTagString(tok);
fprintf(
stderr, "expectStatement does not support keyword %s\n", tok_str);
exit(1);
default:;
}
const AstNodeIndex labeled_statement = parseLabeledStatement(p);
if (labeled_statement != 0)
return labeled_statement;
if (allow_defer_var) {
return expectVarDeclExprStatement(p);
} else {
return parseAssignExpr(p);
}
}
static AstNodeIndex parseBlock(Parser* p) {
const AstNodeIndex lbrace = eatToken(p, TOKEN_L_BRACE);
if (lbrace == null_token)
return null_node;
CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch)))
= initCleanupScratch(p);
while (1) {
if (p->token_tags[p->tok_i] == TOKEN_R_BRACE)
break;
// "const AstNodeIndex statement" once tinycc supports typeof_unqual
// (C23)
AstNodeIndex statement = expectStatement(p, true);
if (statement == 0)
break;
SLICE_APPEND(AstNodeIndex, &p->scratch, statement);
}
expectToken(p, TOKEN_R_BRACE);
const bool semicolon = (p->token_tags[p->tok_i] - 2 == TOKEN_SEMICOLON);
const uint32_t statements_len = p->scratch.len - scratch_top.old_len;
switch (statements_len) {
case 0:
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_BLOCK_TWO,
.main_token = lbrace,
.data = {
.lhs = 0,
.rhs = 0,
},
});
case 1:
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = semicolon ? AST_NODE_BLOCK_TWO_SEMICOLON : AST_NODE_BLOCK_TWO,
.main_token = lbrace,
.data = {
.lhs = p->scratch.arr[scratch_top.old_len],
.rhs = 0,
},
});
case 2:
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = semicolon ? AST_NODE_BLOCK_TWO_SEMICOLON : AST_NODE_BLOCK_TWO,
.main_token = lbrace,
.data = {
.lhs = p->scratch.arr[scratch_top.old_len],
.rhs = p->scratch.arr[scratch_top.old_len + 1],
},
});
default:;
const AstSubRange span = listToSpan(
p, &p->scratch.arr[scratch_top.old_len], statements_len);
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = semicolon ? AST_NODE_BLOCK_SEMICOLON : AST_NODE_BLOCK,
.main_token = lbrace,
.data = {
.lhs = span.start,
.rhs = span.end,
},
});
}
return 0;
}
static AstNodeIndex parseLabeledStatement(Parser* p) {
const AstNodeIndex label_token = parseBlockLabel(p);
const AstNodeIndex block = parseBlock(p);
if (block != 0)
return block;
const AstNodeIndex loop_stmt = parseLoopStatement(p);
if (loop_stmt != 0)
return loop_stmt;
if (label_token != 0) {
fprintf(stderr, "parseLabeledStatement does not support labels\n");
exit(1);
}
return null_node;
}
static AstNodeIndex parseGlobalVarDecl(Parser* p) {
const AstNodeIndex var_decl = parseVarDeclProto(p);
if (var_decl == 0) {
return null_node;
}
fprintf(stderr, "parseGlobalVarDecl does not support parsing var decls\n");
exit(1);
return 0; // tcc
}
static AstNodeIndex expectTopLevelDecl(Parser* p) {
AstTokenIndex extern_export_inline_token = nextToken(p);
switch (p->token_tags[extern_export_inline_token]) {
case TOKEN_KEYWORD_EXTERN:
eatToken(p, TOKEN_STRING_LITERAL);
break;
case TOKEN_KEYWORD_EXPORT:
case TOKEN_KEYWORD_INLINE:
case TOKEN_KEYWORD_NOINLINE:
break;
default:
p->tok_i--;
}
AstNodeIndex fn_proto = parseFnProto(p);
if (fn_proto != 0) {
switch (p->token_tags[p->tok_i]) {
case TOKEN_SEMICOLON:
p->tok_i++;
return fn_proto;
case TOKEN_L_BRACE:;
AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_FN_DECL);
AstNodeIndex body_block = parseBlock(p);
return setNode(p, fn_decl_index,
(AstNodeItem) {
.tag = AST_NODE_FN_DECL,
.main_token = p->nodes.main_tokens[fn_proto],
.data = { .lhs = fn_proto, .rhs = body_block },
});
default:
exit(1); // Expected semicolon or left brace
}
}
eatToken(p, TOKEN_KEYWORD_THREADLOCAL);
AstNodeIndex var_decl = parseGlobalVarDecl(p);
if (var_decl != 0) {
return var_decl;
}
// assuming the program is correct...
fprintf(stderr,
"the next token should be usingnamespace, which is not supported\n");
exit(1);
return 0; // make tcc happy
}
void findNextContainerMember(Parser* p) {
uint32_t level = 0;
while (true) {
AstTokenIndex tok = nextToken(p);
switch (p->token_tags[tok]) {
// Any of these can start a new top level declaration
case TOKEN_KEYWORD_TEST:
case TOKEN_KEYWORD_COMPTIME:
case TOKEN_KEYWORD_PUB:
case TOKEN_KEYWORD_EXPORT:
case TOKEN_KEYWORD_EXTERN:
case TOKEN_KEYWORD_INLINE:
case TOKEN_KEYWORD_NOINLINE:
case TOKEN_KEYWORD_USINGNAMESPACE:
case TOKEN_KEYWORD_THREADLOCAL:
case TOKEN_KEYWORD_CONST:
case TOKEN_KEYWORD_VAR:
case TOKEN_KEYWORD_FN:
if (level == 0) {
p->tok_i--;
return;
}
break;
case TOKEN_IDENTIFIER:
if (p->token_tags[tok + 1] == TOKEN_COMMA && level == 0) {
p->tok_i--;
return;
}
break;
case TOKEN_COMMA:
case TOKEN_SEMICOLON:
// This decl was likely meant to end here
if (level == 0)
return;
break;
case TOKEN_L_PAREN:
case TOKEN_L_BRACKET:
case TOKEN_L_BRACE:
level++;
break;
case TOKEN_R_PAREN:
case TOKEN_R_BRACKET:
if (level != 0)
level--;
break;
case TOKEN_R_BRACE:
if (level == 0) {
// end of container, exit
p->tok_i--;
return;
}
level--;
break;
case TOKEN_EOF:
p->tok_i--;
return;
default:
break;
}
}
}
static Members parseContainerMembers(Parser* p) {
CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch)))
= initCleanupScratch(p);
while (eatToken(p, TOKEN_CONTAINER_DOC_COMMENT) != null_token)
;
FieldState field_state = { .tag = FIELD_STATE_NONE };
bool trailing = false;
while (1) {
eatDocComments(p);
switch (p->token_tags[p->tok_i]) {
case TOKEN_KEYWORD_TEST:
case TOKEN_KEYWORD_COMPTIME:
case TOKEN_KEYWORD_USINGNAMESPACE:;
const char* str = tokenizerGetTagString(p->token_tags[p->tok_i]);
fprintf(
stderr, "%s not implemented in parseContainerMembers\n", str);
exit(1);
case TOKEN_KEYWORD_PUB: {
p->tok_i++;
AstNodeIndex top_level_decl = expectTopLevelDecl(p);
if (top_level_decl != 0) {
if (field_state.tag == FIELD_STATE_SEEN) {
field_state.tag = FIELD_STATE_END;
field_state.payload.end = top_level_decl;
}
SLICE_APPEND(AstNodeIndex, &p->scratch, top_level_decl);
}
trailing = p->token_tags[p->tok_i - 1] == TOKEN_SEMICOLON;
break;
}
case TOKEN_KEYWORD_CONST:
case TOKEN_KEYWORD_VAR:
case TOKEN_KEYWORD_THREADLOCAL:
case TOKEN_KEYWORD_EXPORT:
case TOKEN_KEYWORD_EXTERN:
case TOKEN_KEYWORD_INLINE:
case TOKEN_KEYWORD_NOINLINE:
case TOKEN_KEYWORD_FN: {
const AstNodeIndex top_level_decl = expectTopLevelDecl(p);
if (top_level_decl != 0) {
if (field_state.tag == FIELD_STATE_SEEN) {
field_state.tag = FIELD_STATE_END;
field_state.payload.end = top_level_decl;
}
SLICE_APPEND(AstNodeIndex, &p->scratch, top_level_decl);
}
trailing = (p->token_tags[p->tok_i - 1] == TOKEN_SEMICOLON);
break;
}
case TOKEN_EOF:
case TOKEN_R_BRACE:
goto break_loop;
default:;
// skip parseCStyleContainer
const AstNodeIndex container_field = expectContainerField(p);
switch (field_state.tag) {
case FIELD_STATE_NONE:
field_state.tag = FIELD_STATE_SEEN;
break;
case FIELD_STATE_SEEN:
break;
case FIELD_STATE_END:
fprintf(stderr, "parseContainerMembers error condition\n");
exit(1);
}
SLICE_APPEND(AstNodeIndex, &p->scratch, container_field);
switch (p->token_tags[p->tok_i]) {
case TOKEN_COMMA:
p->tok_i++;
trailing = true;
continue;
case TOKEN_R_BRACE:
case TOKEN_EOF:
trailing = false;
goto break_loop;
default:;
}
findNextContainerMember(p);
continue;
}
}
break_loop:;
const uint32_t items_len = p->scratch.len - scratch_top.old_len;
switch (items_len) {
case 0:
return (Members) {
.len = 0,
.lhs = 0,
.rhs = 0,
.trailing = trailing,
};
case 1:
return (Members) {
.len = 1,
.lhs = p->scratch.arr[scratch_top.old_len],
.rhs = 0,
.trailing = trailing,
};
case 2:
return (Members) {
.len = 2,
.lhs = p->scratch.arr[scratch_top.old_len],
.rhs = p->scratch.arr[scratch_top.old_len + 1],
.trailing = trailing,
};
default:;
const AstSubRange span
= listToSpan(p, &p->scratch.arr[scratch_top.old_len], items_len);
return (Members) {
.len = items_len,
.lhs = span.start,
.rhs = span.end,
.trailing = trailing,
};
}
}
void parseRoot(Parser* p) {
addNode(
&p->nodes, (AstNodeItem) { .tag = AST_NODE_ROOT, .main_token = 0 });
Members root_members = parseContainerMembers(p);
AstSubRange root_decls = membersToSpan(root_members, p);
p->nodes.datas[0].lhs = root_decls.start;
p->nodes.datas[0].rhs = root_decls.end;
}