This commit is contained in:
2025-01-08 19:04:40 +01:00
parent a987479617
commit aa0fab43e4
6 changed files with 166 additions and 123 deletions

134
parser.c
View File

@@ -10,21 +10,14 @@ const AstNodeIndex null_node = 0;
const AstTokenIndex null_token = ~(AstTokenIndex)(0);
typedef struct {
enum {
FIELD_STATE_NONE,
FIELD_STATE_SEEN,
FIELD_STATE_END
} tag;
enum { FIELD_STATE_NONE, FIELD_STATE_SEEN, FIELD_STATE_END } tag;
union {
uint32_t end;
} payload;
} FieldState;
typedef struct {
enum {
SMALL_SPAN_ZERO_OR_ONE,
SMALL_SPAN_MULTI
} tag;
enum { SMALL_SPAN_ZERO_OR_ONE, SMALL_SPAN_MULTI } tag;
union {
AstNodeIndex zero_or_one;
AstSubRange multi;
@@ -43,11 +36,10 @@ static CleanupScratch initCleanupScratch(Parser* p) {
};
}
static void cleanupScratch(CleanupScratch* c) {
c->scratch->len = c->old_len;
}
static void cleanupScratch(CleanupScratch* c) { c->scratch->len = c->old_len; }
static AstSubRange listToSpan(Parser* p, const AstNodeIndex* list, uint32_t count) {
static AstSubRange listToSpan(
Parser* p, const AstNodeIndex* list, uint32_t count) {
SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count);
memcpy(p->extra_data.arr, list, count * sizeof(AstNodeIndex));
p->extra_data.len += count;
@@ -73,9 +65,7 @@ static AstSubRange membersToSpan(const Members self, Parser* p) {
}
}
static AstTokenIndex nextToken(Parser* p) {
return p->tok_i++;
}
static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; }
static AstTokenIndex eatToken(Parser* p, TokenizerTag tag) {
if (p->token_tags[p->tok_i] == tag) {
@@ -104,7 +94,8 @@ static void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional) {
const uint32_t new_cap = new_len > list->cap * 2 ? new_len : list->cap * 2;
list->tags = realloc(list->tags, new_cap * sizeof(AstNodeTag));
list->main_tokens = realloc(list->main_tokens, new_cap * sizeof(AstTokenIndex));
list->main_tokens
= realloc(list->main_tokens, new_cap * sizeof(AstTokenIndex));
list->datas = realloc(list->datas, new_cap * sizeof(AstData));
if (!list->tags || !list->main_tokens || !list->datas)
exit(1);
@@ -119,7 +110,8 @@ static AstNodeIndex addNode(AstNodeList* nodes, AstNodeItem item) {
return nodes->len++;
}
static AstNodeIndex addExtra(Parser* p, const AstNodeIndex* extra, uint32_t count) {
static AstNodeIndex addExtra(
Parser* p, const AstNodeIndex* extra, uint32_t count) {
const AstNodeIndex result = p->extra_data.len;
SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count);
memcpy(p->extra_data.arr, extra, count * sizeof(AstNodeIndex));
@@ -167,7 +159,8 @@ typedef struct {
static AstNodeIndex expectContainerField(Parser* p) {
eatToken(p, TOKEN_KEYWORD_COMPTIME);
const AstTokenIndex main_token = p->tok_i;
if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER && p->token_tags[p->tok_i + 1] == TOKEN_COLON)
if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER
&& p->token_tags[p->tok_i + 1] == TOKEN_COLON)
p->tok_i += 2;
const AstNodeIndex type_expr = parseTypeExpr(p);
@@ -234,26 +227,30 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) {
case TOKEN_KEYWORD_UNION:
case TOKEN_KEYWORD_COMPTIME:
case TOKEN_MULTILINE_STRING_LITERAL_LINE:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok));
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n",
tokenizerGetTagString(tok));
exit(1);
case TOKEN_IDENTIFIER:
if (p->token_tags[p->tok_i + 1] == TOKEN_COLON) {
fprintf(stderr, "parsePrimaryTypeExpr does not support identifier followed by colon\n");
fprintf(stderr,
"parsePrimaryTypeExpr does not support identifier followed by "
"colon\n");
exit(1);
}
return addNode(
&p->nodes,
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_IDENTIFIER,
.main_token = nextToken(p),
.data = {} });
.data = {},
});
case TOKEN_KEYWORD_INLINE:
case TOKEN_KEYWORD_FOR:
case TOKEN_KEYWORD_WHILE:
case TOKEN_PERIOD:
case TOKEN_KEYWORD_ERROR:
case TOKEN_L_PAREN:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok));
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n",
tokenizerGetTagString(tok));
exit(1);
default:
return null_node;
@@ -268,7 +265,8 @@ static AstNodeIndex parseSuffixOp(Parser* p, AstNodeIndex lhs) {
case TOKEN_PERIOD_ASTERISK:
case TOKEN_INVALID_PERIODASTERISKS:
case TOKEN_PERIOD:
fprintf(stderr, "parseSuffixOp does not support %s\n", tokenizerGetTagString(tok));
fprintf(stderr, "parseSuffixOp does not support %s\n",
tokenizerGetTagString(tok));
exit(1);
default:
return null_node;
@@ -295,7 +293,8 @@ static AstNodeIndex parseSuffixExpr(Parser* p) {
if (lparen == null_token)
return res;
CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p);
CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch)))
= initCleanupScratch(p);
while (true) {
if (eatToken(p, TOKEN_R_PAREN) != null_token)
break;
@@ -329,7 +328,8 @@ static AstNodeIndex parseSuffixExpr(Parser* p) {
},
});
default:;
const AstSubRange span = listToSpan(p, &p->scratch.arr[scratch_top.old_len], params_len);
const AstSubRange span = listToSpan(
p, &p->scratch.arr[scratch_top.old_len], params_len);
return addNode(
&p->nodes,
(AstNodeItem) {
@@ -337,7 +337,10 @@ static AstNodeIndex parseSuffixExpr(Parser* p) {
.main_token = lparen,
.data = {
.lhs = res,
.rhs = addExtra(p, (AstNodeIndex[]) { span.start, span.end }, 2),
.rhs = addExtra(p, (AstNodeIndex[]) {
span.start,
span.end,
}, 2),
},
});
}
@@ -385,7 +388,8 @@ static AstNodeIndex parseTypeExpr(Parser* p) {
case TOKEN_ASTERISK:
case TOKEN_ASTERISK_ASTERISK:
case TOKEN_L_BRACKET:
fprintf(stderr, "parseTypeExpr not supported for %s\n", tokenizerGetTagString(tok));
fprintf(stderr, "parseTypeExpr not supported for %s\n",
tokenizerGetTagString(tok));
exit(1);
default:
return parseErrorUnionExpr(p);
@@ -427,7 +431,8 @@ static AstNodeIndex parseFnProto(Parser* p) {
const AstNodeIndex return_type_expr = parseTypeExpr(p);
if (align_expr == 0 && section_expr == 0 && callconv_expr == 0 && addrspace_expr == 0) {
if (align_expr == 0 && section_expr == 0 && callconv_expr == 0
&& addrspace_expr == 0) {
switch (params.tag) {
case SMALL_SPAN_ZERO_OR_ONE:
return setNode(
@@ -454,7 +459,8 @@ static AstNodeIndex parseFnProto(Parser* p) {
}
static AstTokenIndex parseBlockLabel(Parser* p) {
if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER && p->token_tags[p->tok_i + 1] == TOKEN_COLON) {
if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER
&& p->token_tags[p->tok_i + 1] == TOKEN_COLON) {
const AstTokenIndex identifier = p->tok_i;
p->tok_i += 2;
return identifier;
@@ -496,7 +502,8 @@ static AstNodeIndex parseLoopStatement(Parser* p) {
if (inline_token == null_token)
return null_node;
fprintf(stderr, "seen 'inline', there should have been a 'for' or 'while'\n");
fprintf(
stderr, "seen 'inline', there should have been a 'for' or 'while'\n");
exit(1);
return 0; // tcc
}
@@ -509,7 +516,8 @@ static AstNodeIndex parseAssignExpr(Parser* p) {
}
static AstNodeIndex parseVarDeclProto(Parser* p) {
if (eatToken(p, TOKEN_KEYWORD_CONST) == null_token || eatToken(p, TOKEN_KEYWORD_VAR) == null_token)
if (eatToken(p, TOKEN_KEYWORD_CONST) == null_token
|| eatToken(p, TOKEN_KEYWORD_VAR) == null_token)
return null_node;
fprintf(stderr, "parseVarDeclProto: parsing vars is not supported\n");
@@ -534,17 +542,29 @@ static OperInfo operTable(TokenizerTag tok_tag) {
return (OperInfo) { .prec = 20, .tag = AST_NODE_BOOL_AND };
case TOKEN_EQUAL_EQUAL:
return (OperInfo) { .prec = 30, .tag = AST_NODE_EQUAL_EQUAL, .assoc = ASSOC_NONE };
return (OperInfo) {
.prec = 30, .tag = AST_NODE_EQUAL_EQUAL, .assoc = ASSOC_NONE
};
case TOKEN_BANG_EQUAL:
return (OperInfo) { .prec = 30, .tag = AST_NODE_BANG_EQUAL, .assoc = ASSOC_NONE };
return (OperInfo) {
.prec = 30, .tag = AST_NODE_BANG_EQUAL, .assoc = ASSOC_NONE
};
case TOKEN_ANGLE_BRACKET_LEFT:
return (OperInfo) { .prec = 30, .tag = AST_NODE_LESS_THAN, .assoc = ASSOC_NONE };
return (OperInfo) {
.prec = 30, .tag = AST_NODE_LESS_THAN, .assoc = ASSOC_NONE
};
case TOKEN_ANGLE_BRACKET_RIGHT:
return (OperInfo) { .prec = 30, .tag = AST_NODE_GREATER_THAN, .assoc = ASSOC_NONE };
return (OperInfo) {
.prec = 30, .tag = AST_NODE_GREATER_THAN, .assoc = ASSOC_NONE
};
case TOKEN_ANGLE_BRACKET_LEFT_EQUAL:
return (OperInfo) { .prec = 30, .tag = AST_NODE_LESS_OR_EQUAL, .assoc = ASSOC_NONE };
return (OperInfo) {
.prec = 30, .tag = AST_NODE_LESS_OR_EQUAL, .assoc = ASSOC_NONE
};
case TOKEN_ANGLE_BRACKET_RIGHT_EQUAL:
return (OperInfo) { .prec = 30, .tag = AST_NODE_GREATER_OR_EQUAL, .assoc = ASSOC_NONE };
return (OperInfo) {
.prec = 30, .tag = AST_NODE_GREATER_OR_EQUAL, .assoc = ASSOC_NONE
};
case TOKEN_AMPERSAND:
return (OperInfo) { .prec = 40, .tag = AST_NODE_BIT_AND };
@@ -600,7 +620,8 @@ static OperInfo operTable(TokenizerTag tok_tag) {
}
static AstNodeIndex expectVarDeclExprStatement(Parser* p) {
CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p);
CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch)))
= initCleanupScratch(p);
// while(true) {
// const AstNodeIndex var_decl_proto = parseVarDeclProto(p);
@@ -634,7 +655,8 @@ static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) {
case TOKEN_KEYWORD_STRUCT:
case TOKEN_KEYWORD_UNION:;
const char* tok_str = tokenizerGetTagString(tok);
fprintf(stderr, "expectStatement does not support keyword %s\n", tok_str);
fprintf(
stderr, "expectStatement does not support keyword %s\n", tok_str);
exit(1);
default:;
}
@@ -655,13 +677,15 @@ static AstNodeIndex parseBlock(Parser* p) {
if (lbrace == null_token)
return null_node;
CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p);
CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch)))
= initCleanupScratch(p);
while (1) {
if (p->token_tags[p->tok_i] == TOKEN_R_BRACE)
break;
// "const AstNodeIndex statement" once tinycc supports typeof_unqual (C23)
// "const AstNodeIndex statement" once tinycc supports typeof_unqual
// (C23)
AstNodeIndex statement = expectStatement(p, true);
if (statement == 0)
break;
@@ -706,7 +730,8 @@ static AstNodeIndex parseBlock(Parser* p) {
},
});
default:;
const AstSubRange span = listToSpan(p, &p->scratch.arr[scratch_top.old_len], statements_len);
const AstSubRange span = listToSpan(
p, &p->scratch.arr[scratch_top.old_len], statements_len);
return addNode(
&p->nodes,
(AstNodeItem) {
@@ -774,9 +799,7 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) {
case TOKEN_L_BRACE:;
AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_FN_DECL);
AstNodeIndex body_block = parseBlock(p);
return setNode(
p,
fn_decl_index,
return setNode(p, fn_decl_index,
(AstNodeItem) {
.tag = AST_NODE_FN_DECL,
.main_token = p->nodes.main_tokens[fn_proto],
@@ -794,7 +817,8 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) {
}
// assuming the program is correct...
fprintf(stderr, "the next token should be usingnamespace, which is not supported\n");
fprintf(stderr,
"the next token should be usingnamespace, which is not supported\n");
exit(1);
return 0; // make tcc happy
}
@@ -864,7 +888,8 @@ void findNextContainerMember(Parser* p) {
}
static Members parseContainerMembers(Parser* p) {
CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p);
CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch)))
= initCleanupScratch(p);
while (eatToken(p, TOKEN_CONTAINER_DOC_COMMENT) != null_token)
;
@@ -878,7 +903,8 @@ static Members parseContainerMembers(Parser* p) {
case TOKEN_KEYWORD_COMPTIME:
case TOKEN_KEYWORD_USINGNAMESPACE:;
const char* str = tokenizerGetTagString(p->token_tags[p->tok_i]);
fprintf(stderr, "%s not implemented in parseContainerMembers\n", str);
fprintf(
stderr, "%s not implemented in parseContainerMembers\n", str);
exit(1);
case TOKEN_KEYWORD_PUB: {
p->tok_i++;
@@ -972,7 +998,8 @@ break_loop:;
.trailing = trailing,
};
default:;
const AstSubRange span = listToSpan(p, &p->scratch.arr[scratch_top.old_len], items_len);
const AstSubRange span
= listToSpan(p, &p->scratch.arr[scratch_top.old_len], items_len);
return (Members) {
.len = items_len,
.lhs = span.start,
@@ -983,7 +1010,8 @@ break_loop:;
}
void parseRoot(Parser* p) {
addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_ROOT, .main_token = 0 });
addNode(
&p->nodes, (AstNodeItem) { .tag = AST_NODE_ROOT, .main_token = 0 });
Members root_members = parseContainerMembers(p);
AstSubRange root_decls = membersToSpan(root_members, p);