This commit is contained in:
2025-01-08 19:04:40 +01:00
parent a987479617
commit aa0fab43e4
6 changed files with 166 additions and 123 deletions

View File

@@ -1,3 +1,3 @@
BasedOnStyle: WebKit BasedOnStyle: WebKit
BreakBeforeBraces: Attach BreakBeforeBraces: Attach
ColumnLimit: 80 ColumnLimit: 79

5
ast.c
View File

@@ -9,7 +9,8 @@
#define N 1024 #define N 1024
static void astTokenListEnsureCapacity(AstTokenList* list, uint32_t additional) { static void astTokenListEnsureCapacity(
AstTokenList* list, uint32_t additional) {
const uint32_t new_len = list->len + additional; const uint32_t new_len = list->len + additional;
if (new_len <= list->cap) { if (new_len <= list->cap) {
return; return;
@@ -30,7 +31,7 @@ Ast astParse(const char* source, const uint32_t len) {
.len = 0, .len = 0,
.cap = estimated_token_count, .cap = estimated_token_count,
.tags = ARR_INIT(TokenizerTag, estimated_token_count), .tags = ARR_INIT(TokenizerTag, estimated_token_count),
.starts = ARR_INIT(AstIndex, estimated_token_count) .starts = ARR_INIT(AstIndex, estimated_token_count),
}; };
Tokenizer tok = tokenizerInit(source, len); Tokenizer tok = tokenizerInit(source, len);

51
ast.h
View File

@@ -40,7 +40,8 @@ typedef enum {
/// lhs catch rhs /// lhs catch rhs
/// lhs catch |err| rhs /// lhs catch |err| rhs
/// main_token is the `catch` keyword. /// main_token is the `catch` keyword.
/// payload is determined by looking at the next token after the `catch` keyword. /// payload is determined by looking at the next token after the `catch`
/// keyword.
AST_NODE_CATCH, AST_NODE_CATCH,
/// `lhs.a`. main_token is the dot. rhs is the identifier token index. /// `lhs.a`. main_token is the dot. rhs is the identifier token index.
AST_NODE_FIELD_ACCESS, AST_NODE_FIELD_ACCESS,
@@ -196,7 +197,8 @@ typedef enum {
/// main_token might be a ** token, which is shared with a parent/child /// main_token might be a ** token, which is shared with a parent/child
/// pointer type and may require special handling. /// pointer type and may require special handling.
AST_NODE_PTR_TYPE, AST_NODE_PTR_TYPE,
/// lhs is index into ptr_type_bit_range. rhs is the element type expression. /// lhs is index into ptr_type_bit_range. rhs is the element type
/// expression.
/// main_token is the asterisk if a single item pointer or the lbracket /// main_token is the asterisk if a single item pointer or the lbracket
/// if a slice, many-item pointer, or C-pointer /// if a slice, many-item pointer, or C-pointer
/// main_token might be a ** token, which is shared with a parent/child /// main_token might be a ** token, which is shared with a parent/child
@@ -208,7 +210,8 @@ typedef enum {
/// `lhs[b..c]`. rhs is index into Slice /// `lhs[b..c]`. rhs is index into Slice
/// main_token is the lbracket. /// main_token is the lbracket.
AST_NODE_SLICE, AST_NODE_SLICE,
/// `lhs[b..c :d]`. rhs is index into SliceSentinel. Slice end c can be omitted. /// `lhs[b..c :d]`. rhs is index into SliceSentinel. Slice end c can be
/// omitted.
/// main_token is the lbracket. /// main_token is the lbracket.
AST_NODE_SLICE_SENTINEL, AST_NODE_SLICE_SENTINEL,
/// `lhs.*`. rhs is unused. /// `lhs.*`. rhs is unused.
@@ -221,7 +224,8 @@ typedef enum {
AST_NODE_ARRAY_INIT_ONE_COMMA, AST_NODE_ARRAY_INIT_ONE_COMMA,
/// `.{lhs, rhs}`. lhs and rhs can be omitted. /// `.{lhs, rhs}`. lhs and rhs can be omitted.
AST_NODE_ARRAY_INIT_DOT_TWO, AST_NODE_ARRAY_INIT_DOT_TWO,
/// Same as `array_init_dot_two` except there is known to be a trailing comma /// Same as `array_init_dot_two` except there is known to be a trailing
/// comma
/// before the final rbrace. /// before the final rbrace.
AST_NODE_ARRAY_INIT_DOT_TWO_COMMA, AST_NODE_ARRAY_INIT_DOT_TWO_COMMA,
/// `.{a, b}`. `sub_list[lhs..rhs]`. /// `.{a, b}`. `sub_list[lhs..rhs]`.
@@ -229,7 +233,8 @@ typedef enum {
/// Same as `array_init_dot` except there is known to be a trailing comma /// Same as `array_init_dot` except there is known to be a trailing comma
/// before the final rbrace. /// before the final rbrace.
AST_NODE_ARRAY_INIT_DOT_COMMA, AST_NODE_ARRAY_INIT_DOT_COMMA,
/// `lhs{a, b}`. `sub_range_list[rhs]`. lhs can be omitted which means `.{a, b}`. /// `lhs{a, b}`. `sub_range_list[rhs]`. lhs can be omitted which means
/// `.{a, b}`.
AST_NODE_ARRAY_INIT, AST_NODE_ARRAY_INIT,
/// Same as `array_init` except there is known to be a trailing comma /// Same as `array_init` except there is known to be a trailing comma
/// before the final rbrace. /// before the final rbrace.
@@ -244,7 +249,8 @@ typedef enum {
/// main_token is the lbrace. /// main_token is the lbrace.
/// No trailing comma before the rbrace. /// No trailing comma before the rbrace.
AST_NODE_STRUCT_INIT_DOT_TWO, AST_NODE_STRUCT_INIT_DOT_TWO,
/// Same as `struct_init_dot_two` except there is known to be a trailing comma /// Same as `struct_init_dot_two` except there is known to be a trailing
/// comma
/// before the final rbrace. /// before the final rbrace.
AST_NODE_STRUCT_INIT_DOT_TWO_COMMA, AST_NODE_STRUCT_INIT_DOT_TWO_COMMA,
/// `.{.a = b, .c = d}`. `sub_list[lhs..rhs]`. /// `.{.a = b, .c = d}`. `sub_list[lhs..rhs]`.
@@ -283,7 +289,8 @@ typedef enum {
/// main_token is the `(`. /// main_token is the `(`.
AST_NODE_ASYNC_CALL_COMMA, AST_NODE_ASYNC_CALL_COMMA,
/// `switch(lhs) {}`. `SubRange[rhs]`. /// `switch(lhs) {}`. `SubRange[rhs]`.
/// `main_token` is the identifier of a preceding label, if any; otherwise `switch`. /// `main_token` is the identifier of a preceding label, if any; otherwise
/// `switch`.
AST_NODE_SWITCH, AST_NODE_SWITCH,
/// Same as switch except there is known to be a trailing comma /// Same as switch except there is known to be a trailing comma
/// before the final rbrace /// before the final rbrace
@@ -313,7 +320,8 @@ typedef enum {
AST_NODE_WHILE, AST_NODE_WHILE,
/// `for (lhs) rhs`. /// `for (lhs) rhs`.
AST_NODE_FOR_SIMPLE, AST_NODE_FOR_SIMPLE,
/// `for (lhs[0..inputs]) lhs[inputs + 1] else lhs[inputs + 2]`. `For[rhs]`. /// `for (lhs[0..inputs]) lhs[inputs + 1] else lhs[inputs + 2]`.
/// `For[rhs]`.
AST_NODE_FOR, AST_NODE_FOR,
/// `lhs..rhs`. rhs can be omitted. /// `lhs..rhs`. rhs can be omitted.
AST_NODE_FOR_RANGE, AST_NODE_FOR_RANGE,
@@ -346,13 +354,15 @@ typedef enum {
/// main_token is the `fn` keyword. /// main_token is the `fn` keyword.
/// extern function declarations use this tag. /// extern function declarations use this tag.
AST_NODE_FN_PROTO_MULTI, AST_NODE_FN_PROTO_MULTI,
/// `fn (a: b) addrspace(e) linksection(f) callconv(g) rhs`. `FnProtoOne[lhs]`. /// `fn (a: b) addrspace(e) linksection(f) callconv(g) rhs`.
/// `FnProtoOne[lhs]`.
/// zero or one parameters. /// zero or one parameters.
/// anytype and ... parameters are omitted from the AST tree. /// anytype and ... parameters are omitted from the AST tree.
/// main_token is the `fn` keyword. /// main_token is the `fn` keyword.
/// extern function declarations use this tag. /// extern function declarations use this tag.
AST_NODE_FN_PROTO_ONE, AST_NODE_FN_PROTO_ONE,
/// `fn (a: b, c: d) addrspace(e) linksection(f) callconv(g) rhs`. `FnProto[lhs]`. /// `fn (a: b, c: d) addrspace(e) linksection(f) callconv(g) rhs`.
/// `FnProto[lhs]`.
/// anytype and ... parameters are omitted from the AST tree. /// anytype and ... parameters are omitted from the AST tree.
/// main_token is the `fn` keyword. /// main_token is the `fn` keyword.
/// extern function declarations use this tag. /// extern function declarations use this tag.
@@ -373,8 +383,10 @@ typedef enum {
/// Both lhs and rhs unused. /// Both lhs and rhs unused.
AST_NODE_UNREACHABLE_LITERAL, AST_NODE_UNREACHABLE_LITERAL,
/// Both lhs and rhs unused. /// Both lhs and rhs unused.
/// Most identifiers will not have explicit AST nodes, however for expressions /// Most identifiers will not have explicit AST nodes, however for
/// which could be one of many different kinds of AST nodes, there will be an /// expressions
/// which could be one of many different kinds of AST nodes, there will be
/// an
/// identifier AST node for it. /// identifier AST node for it.
AST_NODE_IDENTIFIER, AST_NODE_IDENTIFIER,
/// lhs is the dot token index, rhs unused, main_token is the identifier. /// lhs is the dot token index, rhs unused, main_token is the identifier.
@@ -392,23 +404,27 @@ typedef enum {
/// `@a(lhs, rhs)`. lhs and rhs may be omitted. /// `@a(lhs, rhs)`. lhs and rhs may be omitted.
/// main_token is the builtin token. /// main_token is the builtin token.
AST_NODE_BUILTIN_CALL_TWO, AST_NODE_BUILTIN_CALL_TWO,
/// Same as builtin_call_two but there is known to be a trailing comma before the rparen. /// Same as builtin_call_two but there is known to be a trailing comma
/// before the rparen.
AST_NODE_BUILTIN_CALL_TWO_COMMA, AST_NODE_BUILTIN_CALL_TWO_COMMA,
/// `@a(b, c)`. `sub_list[lhs..rhs]`. /// `@a(b, c)`. `sub_list[lhs..rhs]`.
/// main_token is the builtin token. /// main_token is the builtin token.
AST_NODE_BUILTIN_CALL, AST_NODE_BUILTIN_CALL,
/// Same as builtin_call but there is known to be a trailing comma before the rparen. /// Same as builtin_call but there is known to be a trailing comma before
/// the rparen.
AST_NODE_BUILTIN_CALL_COMMA, AST_NODE_BUILTIN_CALL_COMMA,
/// `error{a, b}`. /// `error{a, b}`.
/// rhs is the rbrace, lhs is unused. /// rhs is the rbrace, lhs is unused.
AST_NODE_ERROR_SET_DECL, AST_NODE_ERROR_SET_DECL,
/// `struct {}`, `union {}`, `opaque {}`, `enum {}`. `extra_data[lhs..rhs]`. /// `struct {}`, `union {}`, `opaque {}`, `enum {}`.
/// `extra_data[lhs..rhs]`.
/// main_token is `struct`, `union`, `opaque`, `enum` keyword. /// main_token is `struct`, `union`, `opaque`, `enum` keyword.
AST_NODE_CONTAINER_DECL, AST_NODE_CONTAINER_DECL,
/// Same as ContainerDecl but there is known to be a trailing comma /// Same as ContainerDecl but there is known to be a trailing comma
/// or semicolon before the rbrace. /// or semicolon before the rbrace.
AST_NODE_CONTAINER_DECL_TRAILING, AST_NODE_CONTAINER_DECL_TRAILING,
/// `struct {lhs, rhs}`, `union {lhs, rhs}`, `opaque {lhs, rhs}`, `enum {lhs, rhs}`. /// `struct {lhs, rhs}`, `union {lhs, rhs}`, `opaque {lhs, rhs}`, `enum
/// {lhs, rhs}`.
/// lhs or rhs can be omitted. /// lhs or rhs can be omitted.
/// main_token is `struct`, `union`, `opaque`, `enum` keyword. /// main_token is `struct`, `union`, `opaque`, `enum` keyword.
AST_NODE_CONTAINER_DECL_TWO, AST_NODE_CONTAINER_DECL_TWO,
@@ -458,7 +474,8 @@ typedef enum {
/// `{lhs rhs}`. rhs or lhs can be omitted. /// `{lhs rhs}`. rhs or lhs can be omitted.
/// main_token points at the lbrace. /// main_token points at the lbrace.
AST_NODE_BLOCK_TWO, AST_NODE_BLOCK_TWO,
/// Same as block_two but there is known to be a semicolon before the rbrace. /// Same as block_two but there is known to be a semicolon before the
/// rbrace.
AST_NODE_BLOCK_TWO_SEMICOLON, AST_NODE_BLOCK_TWO_SEMICOLON,
/// `{}`. `sub_list[lhs..rhs]`. /// `{}`. `sub_list[lhs..rhs]`.
/// main_token points at the lbrace. /// main_token points at the lbrace.

View File

@@ -5,49 +5,50 @@
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#define SLICE(Type) \ #define SLICE(Type) \
struct Type##Slice { \ struct Type##Slice { \
uint32_t len; \ uint32_t len; \
uint32_t cap; \ uint32_t cap; \
Type* arr; \ Type* arr; \
} }
#define ARR_INIT(Type, initial_cap) ({ \ #define ARR_INIT(Type, initial_cap) \
Type* arr = calloc(initial_cap, sizeof(Type)); \ ({ \
if (!arr) \ Type* arr = calloc(initial_cap, sizeof(Type)); \
exit(1); \ if (!arr) \
arr; \ exit(1); \
}) arr; \
})
#define SLICE_INIT(Type, initial_cap) \ #define SLICE_INIT(Type, initial_cap) \
{ \ { .len = 0, .cap = (initial_cap), .arr = ARR_INIT(Type, initial_cap) }
.len = 0, \
.cap = (initial_cap), \
.arr = ARR_INIT(Type, initial_cap) \
}
#define SLICE_RESIZE(Type, slice, new_cap) ({ \ #define SLICE_RESIZE(Type, slice, new_cap) \
const uint32_t cap = (new_cap); \ ({ \
Type* new_arr = realloc((slice)->arr, cap * sizeof(Type)); \ const uint32_t cap = (new_cap); \
if (new_arr == NULL) { \ Type* new_arr = realloc((slice)->arr, cap * sizeof(Type)); \
free((slice)->arr); \ if (new_arr == NULL) { \
exit(1); \ free((slice)->arr); \
} \ exit(1); \
(slice)->arr = new_arr; \ } \
(slice)->cap = cap; \ (slice)->arr = new_arr; \
}) (slice)->cap = cap; \
})
#define SLICE_ENSURE_CAPACITY(Type, slice, additional) ({ \ #define SLICE_ENSURE_CAPACITY(Type, slice, additional) \
if ((slice)->len + (additional) > (slice)->cap) { \ ({ \
SLICE_RESIZE(Type, \ if ((slice)->len + (additional) > (slice)->cap) { \
slice, \ SLICE_RESIZE(Type, slice, \
((slice)->cap * 2 > (slice)->len + (additional)) ? (slice)->cap * 2 : (slice)->len + (additional)); \ ((slice)->cap * 2 > (slice)->len + (additional)) \
} \ ? (slice)->cap * 2 \
}) : (slice)->len + (additional)); \
} \
})
#define SLICE_APPEND(Type, slice, item) ({ \ #define SLICE_APPEND(Type, slice, item) \
SLICE_ENSURE_CAPACITY(Type, slice, 1); \ ({ \
(slice)->arr[(slice)->len++] = (item); \ SLICE_ENSURE_CAPACITY(Type, slice, 1); \
}) (slice)->arr[(slice)->len++] = (item); \
})
#endif #endif

134
parser.c
View File

@@ -10,21 +10,14 @@ const AstNodeIndex null_node = 0;
const AstTokenIndex null_token = ~(AstTokenIndex)(0); const AstTokenIndex null_token = ~(AstTokenIndex)(0);
typedef struct { typedef struct {
enum { enum { FIELD_STATE_NONE, FIELD_STATE_SEEN, FIELD_STATE_END } tag;
FIELD_STATE_NONE,
FIELD_STATE_SEEN,
FIELD_STATE_END
} tag;
union { union {
uint32_t end; uint32_t end;
} payload; } payload;
} FieldState; } FieldState;
typedef struct { typedef struct {
enum { enum { SMALL_SPAN_ZERO_OR_ONE, SMALL_SPAN_MULTI } tag;
SMALL_SPAN_ZERO_OR_ONE,
SMALL_SPAN_MULTI
} tag;
union { union {
AstNodeIndex zero_or_one; AstNodeIndex zero_or_one;
AstSubRange multi; AstSubRange multi;
@@ -43,11 +36,10 @@ static CleanupScratch initCleanupScratch(Parser* p) {
}; };
} }
static void cleanupScratch(CleanupScratch* c) { static void cleanupScratch(CleanupScratch* c) { c->scratch->len = c->old_len; }
c->scratch->len = c->old_len;
}
static AstSubRange listToSpan(Parser* p, const AstNodeIndex* list, uint32_t count) { static AstSubRange listToSpan(
Parser* p, const AstNodeIndex* list, uint32_t count) {
SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count); SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count);
memcpy(p->extra_data.arr, list, count * sizeof(AstNodeIndex)); memcpy(p->extra_data.arr, list, count * sizeof(AstNodeIndex));
p->extra_data.len += count; p->extra_data.len += count;
@@ -73,9 +65,7 @@ static AstSubRange membersToSpan(const Members self, Parser* p) {
} }
} }
static AstTokenIndex nextToken(Parser* p) { static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; }
return p->tok_i++;
}
static AstTokenIndex eatToken(Parser* p, TokenizerTag tag) { static AstTokenIndex eatToken(Parser* p, TokenizerTag tag) {
if (p->token_tags[p->tok_i] == tag) { if (p->token_tags[p->tok_i] == tag) {
@@ -104,7 +94,8 @@ static void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional) {
const uint32_t new_cap = new_len > list->cap * 2 ? new_len : list->cap * 2; const uint32_t new_cap = new_len > list->cap * 2 ? new_len : list->cap * 2;
list->tags = realloc(list->tags, new_cap * sizeof(AstNodeTag)); list->tags = realloc(list->tags, new_cap * sizeof(AstNodeTag));
list->main_tokens = realloc(list->main_tokens, new_cap * sizeof(AstTokenIndex)); list->main_tokens
= realloc(list->main_tokens, new_cap * sizeof(AstTokenIndex));
list->datas = realloc(list->datas, new_cap * sizeof(AstData)); list->datas = realloc(list->datas, new_cap * sizeof(AstData));
if (!list->tags || !list->main_tokens || !list->datas) if (!list->tags || !list->main_tokens || !list->datas)
exit(1); exit(1);
@@ -119,7 +110,8 @@ static AstNodeIndex addNode(AstNodeList* nodes, AstNodeItem item) {
return nodes->len++; return nodes->len++;
} }
static AstNodeIndex addExtra(Parser* p, const AstNodeIndex* extra, uint32_t count) { static AstNodeIndex addExtra(
Parser* p, const AstNodeIndex* extra, uint32_t count) {
const AstNodeIndex result = p->extra_data.len; const AstNodeIndex result = p->extra_data.len;
SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count); SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count);
memcpy(p->extra_data.arr, extra, count * sizeof(AstNodeIndex)); memcpy(p->extra_data.arr, extra, count * sizeof(AstNodeIndex));
@@ -167,7 +159,8 @@ typedef struct {
static AstNodeIndex expectContainerField(Parser* p) { static AstNodeIndex expectContainerField(Parser* p) {
eatToken(p, TOKEN_KEYWORD_COMPTIME); eatToken(p, TOKEN_KEYWORD_COMPTIME);
const AstTokenIndex main_token = p->tok_i; const AstTokenIndex main_token = p->tok_i;
if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER && p->token_tags[p->tok_i + 1] == TOKEN_COLON) if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER
&& p->token_tags[p->tok_i + 1] == TOKEN_COLON)
p->tok_i += 2; p->tok_i += 2;
const AstNodeIndex type_expr = parseTypeExpr(p); const AstNodeIndex type_expr = parseTypeExpr(p);
@@ -234,26 +227,30 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) {
case TOKEN_KEYWORD_UNION: case TOKEN_KEYWORD_UNION:
case TOKEN_KEYWORD_COMPTIME: case TOKEN_KEYWORD_COMPTIME:
case TOKEN_MULTILINE_STRING_LITERAL_LINE: case TOKEN_MULTILINE_STRING_LITERAL_LINE:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n",
tokenizerGetTagString(tok));
exit(1); exit(1);
case TOKEN_IDENTIFIER: case TOKEN_IDENTIFIER:
if (p->token_tags[p->tok_i + 1] == TOKEN_COLON) { if (p->token_tags[p->tok_i + 1] == TOKEN_COLON) {
fprintf(stderr, "parsePrimaryTypeExpr does not support identifier followed by colon\n"); fprintf(stderr,
"parsePrimaryTypeExpr does not support identifier followed by "
"colon\n");
exit(1); exit(1);
} }
return addNode( return addNode(&p->nodes,
&p->nodes,
(AstNodeItem) { (AstNodeItem) {
.tag = AST_NODE_IDENTIFIER, .tag = AST_NODE_IDENTIFIER,
.main_token = nextToken(p), .main_token = nextToken(p),
.data = {} }); .data = {},
});
case TOKEN_KEYWORD_INLINE: case TOKEN_KEYWORD_INLINE:
case TOKEN_KEYWORD_FOR: case TOKEN_KEYWORD_FOR:
case TOKEN_KEYWORD_WHILE: case TOKEN_KEYWORD_WHILE:
case TOKEN_PERIOD: case TOKEN_PERIOD:
case TOKEN_KEYWORD_ERROR: case TOKEN_KEYWORD_ERROR:
case TOKEN_L_PAREN: case TOKEN_L_PAREN:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n",
tokenizerGetTagString(tok));
exit(1); exit(1);
default: default:
return null_node; return null_node;
@@ -268,7 +265,8 @@ static AstNodeIndex parseSuffixOp(Parser* p, AstNodeIndex lhs) {
case TOKEN_PERIOD_ASTERISK: case TOKEN_PERIOD_ASTERISK:
case TOKEN_INVALID_PERIODASTERISKS: case TOKEN_INVALID_PERIODASTERISKS:
case TOKEN_PERIOD: case TOKEN_PERIOD:
fprintf(stderr, "parseSuffixOp does not support %s\n", tokenizerGetTagString(tok)); fprintf(stderr, "parseSuffixOp does not support %s\n",
tokenizerGetTagString(tok));
exit(1); exit(1);
default: default:
return null_node; return null_node;
@@ -295,7 +293,8 @@ static AstNodeIndex parseSuffixExpr(Parser* p) {
if (lparen == null_token) if (lparen == null_token)
return res; return res;
CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p); CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch)))
= initCleanupScratch(p);
while (true) { while (true) {
if (eatToken(p, TOKEN_R_PAREN) != null_token) if (eatToken(p, TOKEN_R_PAREN) != null_token)
break; break;
@@ -329,7 +328,8 @@ static AstNodeIndex parseSuffixExpr(Parser* p) {
}, },
}); });
default:; default:;
const AstSubRange span = listToSpan(p, &p->scratch.arr[scratch_top.old_len], params_len); const AstSubRange span = listToSpan(
p, &p->scratch.arr[scratch_top.old_len], params_len);
return addNode( return addNode(
&p->nodes, &p->nodes,
(AstNodeItem) { (AstNodeItem) {
@@ -337,7 +337,10 @@ static AstNodeIndex parseSuffixExpr(Parser* p) {
.main_token = lparen, .main_token = lparen,
.data = { .data = {
.lhs = res, .lhs = res,
.rhs = addExtra(p, (AstNodeIndex[]) { span.start, span.end }, 2), .rhs = addExtra(p, (AstNodeIndex[]) {
span.start,
span.end,
}, 2),
}, },
}); });
} }
@@ -385,7 +388,8 @@ static AstNodeIndex parseTypeExpr(Parser* p) {
case TOKEN_ASTERISK: case TOKEN_ASTERISK:
case TOKEN_ASTERISK_ASTERISK: case TOKEN_ASTERISK_ASTERISK:
case TOKEN_L_BRACKET: case TOKEN_L_BRACKET:
fprintf(stderr, "parseTypeExpr not supported for %s\n", tokenizerGetTagString(tok)); fprintf(stderr, "parseTypeExpr not supported for %s\n",
tokenizerGetTagString(tok));
exit(1); exit(1);
default: default:
return parseErrorUnionExpr(p); return parseErrorUnionExpr(p);
@@ -427,7 +431,8 @@ static AstNodeIndex parseFnProto(Parser* p) {
const AstNodeIndex return_type_expr = parseTypeExpr(p); const AstNodeIndex return_type_expr = parseTypeExpr(p);
if (align_expr == 0 && section_expr == 0 && callconv_expr == 0 && addrspace_expr == 0) { if (align_expr == 0 && section_expr == 0 && callconv_expr == 0
&& addrspace_expr == 0) {
switch (params.tag) { switch (params.tag) {
case SMALL_SPAN_ZERO_OR_ONE: case SMALL_SPAN_ZERO_OR_ONE:
return setNode( return setNode(
@@ -454,7 +459,8 @@ static AstNodeIndex parseFnProto(Parser* p) {
} }
static AstTokenIndex parseBlockLabel(Parser* p) { static AstTokenIndex parseBlockLabel(Parser* p) {
if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER && p->token_tags[p->tok_i + 1] == TOKEN_COLON) { if (p->token_tags[p->tok_i] == TOKEN_IDENTIFIER
&& p->token_tags[p->tok_i + 1] == TOKEN_COLON) {
const AstTokenIndex identifier = p->tok_i; const AstTokenIndex identifier = p->tok_i;
p->tok_i += 2; p->tok_i += 2;
return identifier; return identifier;
@@ -496,7 +502,8 @@ static AstNodeIndex parseLoopStatement(Parser* p) {
if (inline_token == null_token) if (inline_token == null_token)
return null_node; return null_node;
fprintf(stderr, "seen 'inline', there should have been a 'for' or 'while'\n"); fprintf(
stderr, "seen 'inline', there should have been a 'for' or 'while'\n");
exit(1); exit(1);
return 0; // tcc return 0; // tcc
} }
@@ -509,7 +516,8 @@ static AstNodeIndex parseAssignExpr(Parser* p) {
} }
static AstNodeIndex parseVarDeclProto(Parser* p) { static AstNodeIndex parseVarDeclProto(Parser* p) {
if (eatToken(p, TOKEN_KEYWORD_CONST) == null_token || eatToken(p, TOKEN_KEYWORD_VAR) == null_token) if (eatToken(p, TOKEN_KEYWORD_CONST) == null_token
|| eatToken(p, TOKEN_KEYWORD_VAR) == null_token)
return null_node; return null_node;
fprintf(stderr, "parseVarDeclProto: parsing vars is not supported\n"); fprintf(stderr, "parseVarDeclProto: parsing vars is not supported\n");
@@ -534,17 +542,29 @@ static OperInfo operTable(TokenizerTag tok_tag) {
return (OperInfo) { .prec = 20, .tag = AST_NODE_BOOL_AND }; return (OperInfo) { .prec = 20, .tag = AST_NODE_BOOL_AND };
case TOKEN_EQUAL_EQUAL: case TOKEN_EQUAL_EQUAL:
return (OperInfo) { .prec = 30, .tag = AST_NODE_EQUAL_EQUAL, .assoc = ASSOC_NONE }; return (OperInfo) {
.prec = 30, .tag = AST_NODE_EQUAL_EQUAL, .assoc = ASSOC_NONE
};
case TOKEN_BANG_EQUAL: case TOKEN_BANG_EQUAL:
return (OperInfo) { .prec = 30, .tag = AST_NODE_BANG_EQUAL, .assoc = ASSOC_NONE }; return (OperInfo) {
.prec = 30, .tag = AST_NODE_BANG_EQUAL, .assoc = ASSOC_NONE
};
case TOKEN_ANGLE_BRACKET_LEFT: case TOKEN_ANGLE_BRACKET_LEFT:
return (OperInfo) { .prec = 30, .tag = AST_NODE_LESS_THAN, .assoc = ASSOC_NONE }; return (OperInfo) {
.prec = 30, .tag = AST_NODE_LESS_THAN, .assoc = ASSOC_NONE
};
case TOKEN_ANGLE_BRACKET_RIGHT: case TOKEN_ANGLE_BRACKET_RIGHT:
return (OperInfo) { .prec = 30, .tag = AST_NODE_GREATER_THAN, .assoc = ASSOC_NONE }; return (OperInfo) {
.prec = 30, .tag = AST_NODE_GREATER_THAN, .assoc = ASSOC_NONE
};
case TOKEN_ANGLE_BRACKET_LEFT_EQUAL: case TOKEN_ANGLE_BRACKET_LEFT_EQUAL:
return (OperInfo) { .prec = 30, .tag = AST_NODE_LESS_OR_EQUAL, .assoc = ASSOC_NONE }; return (OperInfo) {
.prec = 30, .tag = AST_NODE_LESS_OR_EQUAL, .assoc = ASSOC_NONE
};
case TOKEN_ANGLE_BRACKET_RIGHT_EQUAL: case TOKEN_ANGLE_BRACKET_RIGHT_EQUAL:
return (OperInfo) { .prec = 30, .tag = AST_NODE_GREATER_OR_EQUAL, .assoc = ASSOC_NONE }; return (OperInfo) {
.prec = 30, .tag = AST_NODE_GREATER_OR_EQUAL, .assoc = ASSOC_NONE
};
case TOKEN_AMPERSAND: case TOKEN_AMPERSAND:
return (OperInfo) { .prec = 40, .tag = AST_NODE_BIT_AND }; return (OperInfo) { .prec = 40, .tag = AST_NODE_BIT_AND };
@@ -600,7 +620,8 @@ static OperInfo operTable(TokenizerTag tok_tag) {
} }
static AstNodeIndex expectVarDeclExprStatement(Parser* p) { static AstNodeIndex expectVarDeclExprStatement(Parser* p) {
CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p); CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch)))
= initCleanupScratch(p);
// while(true) { // while(true) {
// const AstNodeIndex var_decl_proto = parseVarDeclProto(p); // const AstNodeIndex var_decl_proto = parseVarDeclProto(p);
@@ -634,7 +655,8 @@ static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) {
case TOKEN_KEYWORD_STRUCT: case TOKEN_KEYWORD_STRUCT:
case TOKEN_KEYWORD_UNION:; case TOKEN_KEYWORD_UNION:;
const char* tok_str = tokenizerGetTagString(tok); const char* tok_str = tokenizerGetTagString(tok);
fprintf(stderr, "expectStatement does not support keyword %s\n", tok_str); fprintf(
stderr, "expectStatement does not support keyword %s\n", tok_str);
exit(1); exit(1);
default:; default:;
} }
@@ -655,13 +677,15 @@ static AstNodeIndex parseBlock(Parser* p) {
if (lbrace == null_token) if (lbrace == null_token)
return null_node; return null_node;
CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p); CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch)))
= initCleanupScratch(p);
while (1) { while (1) {
if (p->token_tags[p->tok_i] == TOKEN_R_BRACE) if (p->token_tags[p->tok_i] == TOKEN_R_BRACE)
break; break;
// "const AstNodeIndex statement" once tinycc supports typeof_unqual (C23) // "const AstNodeIndex statement" once tinycc supports typeof_unqual
// (C23)
AstNodeIndex statement = expectStatement(p, true); AstNodeIndex statement = expectStatement(p, true);
if (statement == 0) if (statement == 0)
break; break;
@@ -706,7 +730,8 @@ static AstNodeIndex parseBlock(Parser* p) {
}, },
}); });
default:; default:;
const AstSubRange span = listToSpan(p, &p->scratch.arr[scratch_top.old_len], statements_len); const AstSubRange span = listToSpan(
p, &p->scratch.arr[scratch_top.old_len], statements_len);
return addNode( return addNode(
&p->nodes, &p->nodes,
(AstNodeItem) { (AstNodeItem) {
@@ -774,9 +799,7 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) {
case TOKEN_L_BRACE:; case TOKEN_L_BRACE:;
AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_FN_DECL); AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_FN_DECL);
AstNodeIndex body_block = parseBlock(p); AstNodeIndex body_block = parseBlock(p);
return setNode( return setNode(p, fn_decl_index,
p,
fn_decl_index,
(AstNodeItem) { (AstNodeItem) {
.tag = AST_NODE_FN_DECL, .tag = AST_NODE_FN_DECL,
.main_token = p->nodes.main_tokens[fn_proto], .main_token = p->nodes.main_tokens[fn_proto],
@@ -794,7 +817,8 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) {
} }
// assuming the program is correct... // assuming the program is correct...
fprintf(stderr, "the next token should be usingnamespace, which is not supported\n"); fprintf(stderr,
"the next token should be usingnamespace, which is not supported\n");
exit(1); exit(1);
return 0; // make tcc happy return 0; // make tcc happy
} }
@@ -864,7 +888,8 @@ void findNextContainerMember(Parser* p) {
} }
static Members parseContainerMembers(Parser* p) { static Members parseContainerMembers(Parser* p) {
CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = initCleanupScratch(p); CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch)))
= initCleanupScratch(p);
while (eatToken(p, TOKEN_CONTAINER_DOC_COMMENT) != null_token) while (eatToken(p, TOKEN_CONTAINER_DOC_COMMENT) != null_token)
; ;
@@ -878,7 +903,8 @@ static Members parseContainerMembers(Parser* p) {
case TOKEN_KEYWORD_COMPTIME: case TOKEN_KEYWORD_COMPTIME:
case TOKEN_KEYWORD_USINGNAMESPACE:; case TOKEN_KEYWORD_USINGNAMESPACE:;
const char* str = tokenizerGetTagString(p->token_tags[p->tok_i]); const char* str = tokenizerGetTagString(p->token_tags[p->tok_i]);
fprintf(stderr, "%s not implemented in parseContainerMembers\n", str); fprintf(
stderr, "%s not implemented in parseContainerMembers\n", str);
exit(1); exit(1);
case TOKEN_KEYWORD_PUB: { case TOKEN_KEYWORD_PUB: {
p->tok_i++; p->tok_i++;
@@ -972,7 +998,8 @@ break_loop:;
.trailing = trailing, .trailing = trailing,
}; };
default:; default:;
const AstSubRange span = listToSpan(p, &p->scratch.arr[scratch_top.old_len], items_len); const AstSubRange span
= listToSpan(p, &p->scratch.arr[scratch_top.old_len], items_len);
return (Members) { return (Members) {
.len = items_len, .len = items_len,
.lhs = span.start, .lhs = span.start,
@@ -983,7 +1010,8 @@ break_loop:;
} }
void parseRoot(Parser* p) { void parseRoot(Parser* p) {
addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_ROOT, .main_token = 0 }); addNode(
&p->nodes, (AstNodeItem) { .tag = AST_NODE_ROOT, .main_token = 0 });
Members root_members = parseContainerMembers(p); Members root_members = parseContainerMembers(p);
AstSubRange root_decls = membersToSpan(root_members, p); AstSubRange root_decls = membersToSpan(root_members, p);

View File

@@ -18,6 +18,7 @@ const char* tokenizerGetTagString(TokenizerTag tag) {
} }
} }
// clang-format off
const KeywordMap keywords[] = { const KeywordMap keywords[] = {
{ "addrspace", TOKEN_KEYWORD_ADDRSPACE }, { "addrspace", TOKEN_KEYWORD_ADDRSPACE },
{ "align", TOKEN_KEYWORD_ALIGN }, { "align", TOKEN_KEYWORD_ALIGN },
@@ -69,6 +70,7 @@ const KeywordMap keywords[] = {
{ "volatile", TOKEN_KEYWORD_VOLATILE }, { "volatile", TOKEN_KEYWORD_VOLATILE },
{ "while", TOKEN_KEYWORD_WHILE } { "while", TOKEN_KEYWORD_WHILE }
}; };
// clang-format on
// TODO binary search // TODO binary search
static TokenizerTag getKeyword(const char* bytes, const uint32_t len) { static TokenizerTag getKeyword(const char* bytes, const uint32_t len) {
@@ -113,13 +115,11 @@ state:
switch (self->buffer[self->index]) { switch (self->buffer[self->index]) {
case 0: case 0:
if (self->index == self->buffer_len) { if (self->index == self->buffer_len) {
return (TokenizerToken) { return (TokenizerToken) { .tag = TOKEN_EOF,
.tag = TOKEN_EOF,
.loc = { .loc = {
.start = self->index, .start = self->index,
.end = self->index, .end = self->index,
} } };
};
} else { } else {
state = TOKENIZER_STATE_INVALID; state = TOKENIZER_STATE_INVALID;
goto state; goto state;
@@ -860,13 +860,11 @@ state:
state = TOKENIZER_STATE_INVALID; state = TOKENIZER_STATE_INVALID;
goto state; goto state;
} else { } else {
return (TokenizerToken) { return (TokenizerToken) { .tag = TOKEN_EOF,
.tag = TOKEN_EOF,
.loc = { .loc = {
.start = self->index, .start = self->index,
.end = self->index, .end = self->index,
} } };
};
} }
break; break;
case '!': case '!':
@@ -935,13 +933,11 @@ state:
state = TOKENIZER_STATE_INVALID; state = TOKENIZER_STATE_INVALID;
goto state; goto state;
} else { } else {
return (TokenizerToken) { return (TokenizerToken) { .tag = TOKEN_EOF,
.tag = TOKEN_EOF,
.loc = { .loc = {
.start = self->index, .start = self->index,
.end = self->index, .end = self->index,
} } };
};
} }
break; break;
case '\n': case '\n':