making tcc happier

This commit is contained in:
2024-12-27 12:34:08 +02:00
parent 6ae7d7320d
commit 6006a802e1
10 changed files with 416 additions and 274 deletions

2
.clang-format Normal file
View File

@@ -0,0 +1,2 @@
BasedOnStyle: WebKit
BreakBeforeBraces: Attach

49
ast.c
View File

@@ -9,45 +9,15 @@
#define N 1024 #define N 1024
void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional) Ast astParse(const char* source, const uint32_t len) {
{
const uint32_t new_len = list->len + additional;
if (new_len <= list->cap) {
return;
}
const uint32_t new_cap = new_len > list->cap * 2 ? new_len : list->cap * 2;
list->tags = realloc(list->tags, new_cap * sizeof(AstNodeTag));
list->main_tokens = realloc(list->main_tokens, new_cap * sizeof(AstTokenIndex));
list->datas = realloc(list->datas, new_cap * sizeof(AstData));
if (!list->tags || !list->main_tokens || !list->datas)
exit(1);
list->cap = new_cap;
}
AstNodeIndex astNodeListAppend(
AstNodeList* list,
AstNodeTag tag,
AstTokenIndex main_token,
AstData data)
{
astNodeListEnsureCapacity(list, 1);
list->tags[list->len] = tag;
list->main_tokens[list->len] = main_token;
list->datas[list->len] = data;
return list->len++;
}
Ast astParse(const char* source, const uint32_t len)
{
uint32_t estimated_token_count = len / 8; uint32_t estimated_token_count = len / 8;
// Initialize token list // Initialize token list
AstTokenList tokens = { AstTokenList tokens = {
.len = 0, .len = 0,
.cap = estimated_token_count, .cap = estimated_token_count,
.tags = SLICE_INIT(TokenizerTag, estimated_token_count), .tags = ARR_INIT(TokenizerTag, estimated_token_count),
.starts = SLICE_INIT(AstIndex, estimated_token_count) .starts = ARR_INIT(AstIndex, estimated_token_count)
}; };
// Tokenize // Tokenize
@@ -70,9 +40,9 @@ Ast astParse(const char* source, const uint32_t len)
AstNodeList nodes = { AstNodeList nodes = {
.len = 0, .len = 0,
.cap = estimated_node_count, .cap = estimated_node_count,
.tags = SLICE_INIT(AstNodeTag, estimated_node_count), .tags = ARR_INIT(AstNodeTag, estimated_node_count),
.main_tokens = SLICE_INIT(AstTokenIndex, estimated_node_count), .main_tokens = ARR_INIT(AstTokenIndex, estimated_node_count),
.datas = SLICE_INIT(AstData, estimated_node_count) .datas = ARR_INIT(AstData, estimated_node_count)
}; };
// Initialize parser // Initialize parser
@@ -84,11 +54,8 @@ Ast astParse(const char* source, const uint32_t len)
.tokens_len = tokens.len, .tokens_len = tokens.len,
.tok_i = 0, .tok_i = 0,
.nodes = nodes, .nodes = nodes,
.extra_data = { .extra_data = SLICE_INIT(AstNodeIndex, N),
.len = 0, .scratch = SLICE_INIT(AstNodeIndex, N)
.cap = N,
.arr = SLICE_INIT(AstNodeIndex, N) },
.scratch = { .len = 0, .cap = N, .arr = SLICE_INIT(AstNodeIndex, N) }
}; };
free(p.scratch.arr); // Parser takes ownership free(p.scratch.arr); // Parser takes ownership

21
ast.h
View File

@@ -4,6 +4,7 @@
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
#include "common.h"
#include "tokenizer.h" #include "tokenizer.h"
typedef enum { typedef enum {
@@ -498,6 +499,12 @@ typedef struct {
AstData* datas; AstData* datas;
} AstNodeList; } AstNodeList;
typedef struct {
AstNodeTag tag;
AstTokenIndex main_token;
AstData data;
} AstNodeItem;
typedef struct { typedef struct {
uint32_t len; uint32_t len;
uint32_t cap; uint32_t cap;
@@ -505,18 +512,14 @@ typedef struct {
AstIndex* starts; AstIndex* starts;
} AstTokenList; } AstTokenList;
typedef struct { typedef SLICE(AstNodeIndex) AstNodeIndexSlice;
uint32_t len;
uint32_t cap;
AstNodeIndex* arr;
} AstExtraData;
typedef struct { typedef struct {
const char* source; const char* source;
uint32_t source_len; uint32_t source_len;
AstTokenList tokens; AstTokenList tokens;
AstNodeList nodes; AstNodeList nodes;
AstExtraData extra_data; AstNodeIndexSlice extra_data;
} Ast; } Ast;
typedef struct AstPtrType { typedef struct AstPtrType {
@@ -596,11 +599,7 @@ typedef struct AstError {
Ast astParse(const char* source, uint32_t len); Ast astParse(const char* source, uint32_t len);
// MultiArrayList AstNodeIndex astNodeListAppend(AstNodeList*, AstNodeItem);
void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional);
void astTokenListEnsureCapacity(AstTokenList* list, uint32_t additional);
AstNodeIndex astNodeListAppend(AstNodeList* list, AstNodeTag tag, AstTokenIndex main_token, AstData data);
void astTokenListAppend(AstTokenList* list, TokenizerTag tag, AstIndex start); void astTokenListAppend(AstTokenList* list, TokenizerTag tag, AstIndex start);
#endif #endif

View File

@@ -79,7 +79,7 @@ pub fn build(b: *std.Build) !void {
const lint_step = b.step("lint", "Run linters"); const lint_step = b.step("lint", "Run linters");
const clang_format = b.addSystemCommand(&.{"clang-format"}); const clang_format = b.addSystemCommand(&.{"clang-format"});
clang_format.addArgs(&.{ "--style=webkit", "--verbose", "-Werror", "-i" }); clang_format.addArgs(&.{ "--verbose", "-Werror", "-i" });
for (all_c_files ++ headers) |f| clang_format.addFileArg(b.path(f)); for (all_c_files ++ headers) |f| clang_format.addFileArg(b.path(f));
lint_step.dependOn(&clang_format.step); lint_step.dependOn(&clang_format.step);

View File

@@ -5,14 +5,28 @@
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#define SLICE_INIT(Type, initial_cap) ({ \ #define SLICE(Type) \
struct Type##Slice { \
uint32_t len; \
uint32_t cap; \
Type* arr; \
}
#define ARR_INIT(Type, initial_cap) ({ \
Type* arr = calloc(initial_cap, sizeof(Type)); \ Type* arr = calloc(initial_cap, sizeof(Type)); \
if (!arr) \ if (!arr) \
exit(1); \ exit(1); \
(__typeof__(Type*)) { arr }; \ arr; \
}) })
#define SLICE_RESIZE(slice, Type, new_cap) ({ \ #define SLICE_INIT(Type, initial_cap) \
{ \
.len = 0, \
.cap = (initial_cap), \
.arr = ARR_INIT(Type, initial_cap) \
}
#define SLICE_RESIZE(Type, slice, new_cap) ({ \
uint32_t cap = (new_cap); \ uint32_t cap = (new_cap); \
Type* new_arr = realloc((slice)->arr, cap * sizeof(Type)); \ Type* new_arr = realloc((slice)->arr, cap * sizeof(Type)); \
if (!new_arr) \ if (!new_arr) \
@@ -21,12 +35,17 @@
(slice)->cap = cap; \ (slice)->cap = cap; \
}) })
#define SLICE_ENSURE_CAPACITY(slice, Type, additional) ({ \ #define SLICE_ENSURE_CAPACITY(Type, slice, additional) ({ \
if ((slice)->len + (additional) > (slice)->cap) { \ if ((slice)->len + (additional) > (slice)->cap) { \
SLICE_RESIZE(slice, \ SLICE_RESIZE(Type, \
Type, \ slice, \
((slice)->cap * 2 > (slice)->len + (additional)) ? (slice)->cap * 2 : (slice)->len + (additional)); \ ((slice)->cap * 2 > (slice)->len + (additional)) ? (slice)->cap * 2 : (slice)->len + (additional)); \
} \ } \
}) })
#define SLICE_APPEND(Type, slice, item) ({ \
SLICE_ENSURE_CAPACITY(Type, slice, 1); \
(slice)->arr[(slice)->len++] = (item); \
})
#endif #endif

6
main.c
View File

@@ -5,13 +5,11 @@
int zig1Run(char* program, char** msg); int zig1Run(char* program, char** msg);
int zig1RunFile(char* fname, char** msg); int zig1RunFile(char* fname, char** msg);
static void usage(const char* argv0) static void usage(const char* argv0) {
{
fprintf(stderr, "Usage: %s program.zig\n", argv0); fprintf(stderr, "Usage: %s program.zig\n", argv0);
} }
int main(int argc, char** argv) int main(int argc, char** argv) {
{
if (argc != 2) { if (argc != 2) {
usage(argv[0]); usage(argv[0]);
return 1; return 1;

527
parser.c
View File

@@ -1,7 +1,9 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h>
#include "ast.h" #include "ast.h"
#include "common.h"
#include "parser.h" #include "parser.h"
const AstNodeIndex null_node = 0; const AstNodeIndex null_node = 0;
@@ -22,8 +24,7 @@ typedef struct {
AstSubRange multi; AstSubRange multi;
} SmallSpan; } SmallSpan;
void parseRoot(Parser* p) void parseRoot(Parser* p) {
{
p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT; p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT;
p->nodes.main_tokens[p->nodes.len] = 0; p->nodes.main_tokens[p->nodes.len] = 0;
@@ -32,8 +33,7 @@ void parseRoot(Parser* p)
static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; } static AstTokenIndex nextToken(Parser* p) { return p->tok_i++; }
static AstTokenIndex eatToken(Parser* p, TokenizerTag tag, bool* ok) static AstTokenIndex eatToken(Parser* p, TokenizerTag tag, bool* ok) {
{
if (p->token_tags[p->tok_i] == tag) { if (p->token_tags[p->tok_i] == tag) {
if (ok != NULL) if (ok != NULL)
*ok = true; *ok = true;
@@ -41,30 +41,124 @@ static AstTokenIndex eatToken(Parser* p, TokenizerTag tag, bool* ok)
} else { } else {
if (ok != NULL) if (ok != NULL)
*ok = false; *ok = false;
return (AstTokenIndex) {}; return 0;
} }
} }
static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeTag tag, AstTokenIndex main_token, AstData data) static void eatDocComments(Parser* p) {
{ bool ok;
p->nodes.tags[i] = tag; while (eatToken(p, TOKENIZER_TAG_DOC_COMMENT, &ok), ok) { }
p->nodes.main_tokens[i] = main_token; }
p->nodes.datas[i] = data;
static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeItem item) {
p->nodes.tags[i] = item.tag;
p->nodes.main_tokens[i] = item.main_token;
p->nodes.datas[i] = item.data;
return i; return i;
} }
static void astNodeListEnsureCapacity(AstNodeList* list, uint32_t additional) {
const uint32_t new_len = list->len + additional;
if (new_len <= list->cap) {
return;
}
const uint32_t new_cap = new_len > list->cap * 2 ? new_len : list->cap * 2;
list->tags = realloc(list->tags, new_cap * sizeof(AstNodeTag));
list->main_tokens = realloc(list->main_tokens, new_cap * sizeof(AstTokenIndex));
list->datas = realloc(list->datas, new_cap * sizeof(AstData));
if (!list->tags || !list->main_tokens || !list->datas)
exit(1);
list->cap = new_cap;
}
static AstNodeIndex addNode(AstNodeList* nodes, AstNodeItem item) {
astNodeListEnsureCapacity(nodes, 1);
nodes->tags[nodes->len] = item.tag;
nodes->main_tokens[nodes->len] = item.main_token;
nodes->datas[nodes->len] = item.data;
return nodes->len++;
}
static AstNodeIndex parseTypeExpr(Parser* p); static AstNodeIndex parseTypeExpr(Parser* p);
static AstNodeIndex expectTypeExpr(Parser* p) static AstNodeIndex expectTypeExpr(Parser* p) {
{
const AstNodeIndex node = parseTypeExpr(p); const AstNodeIndex node = parseTypeExpr(p);
if (node == 0) if (node == 0)
exit(1); exit(1);
return node; return node;
} }
static AstNodeIndex parsePrimaryTypeExpr(Parser* p) static AstNodeIndex parseByteAlign(Parser* p) {
{ bool ok;
eatToken(p, TOKENIZER_TAG_KEYWORD_ALIGN, &ok);
if (!ok) {
return null_node;
}
fprintf(stderr, "parseByteAlign cannot parse alginment\n");
exit(1);
return 0; // tcc
}
typedef struct {
AstNodeIndex align_expr, value_expr;
} NodeContainerField;
static AstNodeIndex expectContainerField(Parser* p) {
eatToken(p, TOKENIZER_TAG_KEYWORD_COMPTIME, NULL);
const AstTokenIndex main_token = p->tok_i;
if (p->token_tags[p->tok_i] == TOKENIZER_TAG_IDENTIFIER && p->token_tags[p->tok_i + 1] == TOKENIZER_TAG_COLON)
p->tok_i += 2;
const AstNodeIndex type_expr = expectTypeExpr(p);
const AstNodeIndex align_expr = parseByteAlign(p);
const AstNodeIndex value_expr = 0;
bool ok;
eatToken(p, TOKENIZER_TAG_EQUAL, &ok);
if (ok) {
fprintf(stderr, "expectContainerField does not support expr\n");
exit(1);
}
if (align_expr == 0) {
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_TAG_CONTAINER_FIELD_INIT,
.main_token = main_token,
.data = {
.lhs = type_expr,
.rhs = value_expr,
},
});
} else if (value_expr == 0) {
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_TAG_CONTAINER_FIELD_ALIGN,
.main_token = main_token,
.data = {
.lhs = type_expr,
.rhs = align_expr,
},
});
} else {
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_TAG_CONTAINER_FIELD,
.main_token = main_token,
.data = {
.lhs = type_expr,
.rhs = addExtra(p, (NodeContainerField) {
.align_expr = align_expr,
.value_expr = value_expr,
}) },
});
}
}
static AstNodeIndex parsePrimaryTypeExpr(Parser* p) {
const TokenizerTag tok = p->token_tags[p->tok_i]; const TokenizerTag tok = p->token_tags[p->tok_i];
switch (tok) { switch (tok) {
case TOKENIZER_TAG_CHAR_LITERAL: case TOKENIZER_TAG_CHAR_LITERAL:
@@ -86,18 +180,17 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p)
case TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE: case TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok));
exit(1); exit(1);
break;
case TOKENIZER_TAG_IDENTIFIER: case TOKENIZER_TAG_IDENTIFIER:
if (p->token_tags[p->tok_i + 1] == TOKENIZER_TAG_COLON) { if (p->token_tags[p->tok_i + 1] == TOKENIZER_TAG_COLON) {
fprintf(stderr, "parsePrimaryTypeExpr does not support identifier followed by colon\n"); fprintf(stderr, "parsePrimaryTypeExpr does not support identifier followed by colon\n");
exit(1); exit(1);
} }
return astNodeListAppend( return addNode(
&p->nodes, &p->nodes,
AST_NODE_TAG_IDENTIFIER, (AstNodeItem) {
nextToken(p), .tag = AST_NODE_TAG_IDENTIFIER,
(AstData) {}); .main_token = nextToken(p),
break; .data = {} });
case TOKENIZER_TAG_KEYWORD_INLINE: case TOKENIZER_TAG_KEYWORD_INLINE:
case TOKENIZER_TAG_KEYWORD_FOR: case TOKENIZER_TAG_KEYWORD_FOR:
case TOKENIZER_TAG_KEYWORD_WHILE: case TOKENIZER_TAG_KEYWORD_WHILE:
@@ -106,13 +199,12 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p)
case TOKENIZER_TAG_L_PAREN: case TOKENIZER_TAG_L_PAREN:
fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok)); fprintf(stderr, "parsePrimaryTypeExpr does not support %s\n", tokenizerGetTagString(tok));
exit(1); exit(1);
break;
default: default:
return null_node; return null_node;
} }
} }
static AstNodeIndex parseSuffixOp(Parser *p) { static AstNodeIndex parseSuffixOp(Parser* p) {
const TokenizerTag tok = p->token_tags[p->tok_i]; const TokenizerTag tok = p->token_tags[p->tok_i];
switch (tok) { switch (tok) {
case TOKENIZER_TAG_L_BRACKET: case TOKENIZER_TAG_L_BRACKET:
@@ -121,14 +213,12 @@ static AstNodeIndex parseSuffixOp(Parser *p) {
case TOKENIZER_TAG_PERIOD: case TOKENIZER_TAG_PERIOD:
fprintf(stderr, "parseSuffixOp does not support %s\n", tokenizerGetTagString(tok)); fprintf(stderr, "parseSuffixOp does not support %s\n", tokenizerGetTagString(tok));
exit(1); exit(1);
break;
default: default:
return null_node; return null_node;
} }
} }
static AstNodeIndex parseSuffixExpr(Parser* p) static AstNodeIndex parseSuffixExpr(Parser* p) {
{
bool ok; bool ok;
eatToken(p, TOKENIZER_TAG_KEYWORD_ASYNC, &ok); eatToken(p, TOKENIZER_TAG_KEYWORD_ASYNC, &ok);
if (ok) { if (ok) {
@@ -140,7 +230,7 @@ static AstNodeIndex parseSuffixExpr(Parser* p)
if (res == 0) if (res == 0)
return res; return res;
while(true) { while (true) {
const AstNodeIndex suffix_op = parseSuffixOp(p); const AstNodeIndex suffix_op = parseSuffixOp(p);
if (suffix_op != 0) { if (suffix_op != 0) {
res = suffix_op; res = suffix_op;
@@ -155,8 +245,19 @@ static AstNodeIndex parseSuffixExpr(Parser* p)
} }
} }
static AstNodeIndex parseErrorUnionExpr(Parser* p) static AstNodeIndex expectToken(Parser* p, TokenizerTag tag, bool* ok) {
{ if (p->token_tags[p->tok_i] == tag) {
if (ok != NULL)
*ok = true;
return nextToken(p);
} else {
if (ok != NULL)
*ok = false;
return 0;
}
}
static AstNodeIndex parseErrorUnionExpr(Parser* p) {
const AstNodeIndex suffix_expr = parseSuffixExpr(p); const AstNodeIndex suffix_expr = parseSuffixExpr(p);
if (suffix_expr == 0) if (suffix_expr == 0)
return null_node; return null_node;
@@ -164,18 +265,18 @@ static AstNodeIndex parseErrorUnionExpr(Parser* p)
const AstNodeIndex bang = eatToken(p, TOKENIZER_TAG_BANG, &ok); const AstNodeIndex bang = eatToken(p, TOKENIZER_TAG_BANG, &ok);
if (!ok) if (!ok)
return suffix_expr; return suffix_expr;
return astNodeListAppend( return addNode(
&p->nodes, &p->nodes,
AST_NODE_TAG_ERROR_UNION, (AstNodeItem) {
bang, .tag = AST_NODE_TAG_ERROR_UNION,
(AstData) { .main_token = bang,
.data = {
.lhs = suffix_expr, .lhs = suffix_expr,
.rhs = expectTypeExpr(p), .rhs = expectTypeExpr(p),
}); } });
} }
static AstNodeIndex parseTypeExpr(Parser* p) static AstNodeIndex parseTypeExpr(Parser* p) {
{
const AstNodeIndex tok = p->token_tags[p->tok_i]; const AstNodeIndex tok = p->token_tags[p->tok_i];
switch (tok) { switch (tok) {
case TOKENIZER_TAG_QUESTION_MARK: case TOKENIZER_TAG_QUESTION_MARK:
@@ -185,14 +286,12 @@ static AstNodeIndex parseTypeExpr(Parser* p)
case TOKENIZER_TAG_L_BRACKET: case TOKENIZER_TAG_L_BRACKET:
fprintf(stderr, "parseTypeExpr not supported for %s\n", tokenizerGetTagString(tok)); fprintf(stderr, "parseTypeExpr not supported for %s\n", tokenizerGetTagString(tok));
exit(1); exit(1);
break;
default: default:
return parseErrorUnionExpr(p); return parseErrorUnionExpr(p);
} }
} }
static SmallSpan parseParamDeclList(Parser* p) static SmallSpan parseParamDeclList(Parser* p) {
{
// can only parse functions with no declarations // can only parse functions with no declarations
bool ok; bool ok;
AstTokenIndex got_token = eatToken(p, TOKENIZER_TAG_L_PAREN, &ok); AstTokenIndex got_token = eatToken(p, TOKENIZER_TAG_L_PAREN, &ok);
@@ -212,15 +311,13 @@ static SmallSpan parseParamDeclList(Parser* p)
}; };
} }
static uint32_t reserveNode(Parser* p, AstNodeTag tag) static uint32_t reserveNode(Parser* p, AstNodeTag tag) {
{
astNodeListEnsureCapacity(&p->nodes, p->nodes.len + 1); astNodeListEnsureCapacity(&p->nodes, p->nodes.len + 1);
p->nodes.tags[p->nodes.len - 1] = tag; p->nodes.tags[p->nodes.len - 1] = tag;
return p->nodes.len - 1; return p->nodes.len - 1;
} }
static AstNodeIndex parseFnProto(Parser* p) static AstNodeIndex parseFnProto(Parser* p) {
{
bool ok; bool ok;
AstNodeIndex fn_token = eatToken(p, TOKENIZER_TAG_KEYWORD_FN, &ok); AstNodeIndex fn_token = eatToken(p, TOKENIZER_TAG_KEYWORD_FN, &ok);
if (!ok) if (!ok)
@@ -239,21 +336,146 @@ static AstNodeIndex parseFnProto(Parser* p)
eatToken(p, TOKENIZER_TAG_BANG, NULL); eatToken(p, TOKENIZER_TAG_BANG, NULL);
const AstNodeIndex return_type_expr = parseTypeExpr(p); const AstNodeIndex return_type_expr = parseTypeExpr(p);
return 0;
} }
static AstNodeIndex parseBlock(Parser *p) { static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) {
bool ok;
if (eatToken(p, TOKENIZER_TAG_KEYWORD_COMPTIME, &ok), ok) {
fprintf(stderr, "expectStatement: comptime keyword not yet supported\n");
exit(1);
}
const AstNodeIndex tok = p->token_tags[p->tok_i];
switch (tok) {
case TOKENIZER_TAG_KEYWORD_NOSUSPEND:
case TOKENIZER_TAG_KEYWORD_SUSPEND:
case TOKENIZER_TAG_KEYWORD_DEFER:
case TOKENIZER_TAG_KEYWORD_ERRDEFER:
case TOKENIZER_TAG_KEYWORD_IF:
case TOKENIZER_TAG_KEYWORD_ENUM:
case TOKENIZER_TAG_KEYWORD_STRUCT:
case TOKENIZER_TAG_KEYWORD_UNION:;
const char* tok_str = tokenizerGetTagString(tok);
fprintf(stderr, "expectStatement does not yet support keyword %s\n", tok_str);
exit(1);
default:;
}
// TODO continue
return 1;
}
typedef struct {
AstNodeIndexSlice* scratch;
uint32_t old_len;
} CleanupScratch;
static void cleanupScratch(CleanupScratch* c) { c->scratch->len = c->old_len; }
static AstNodeIndex parseBlock(Parser* p) {
bool ok; bool ok;
const AstNodeIndex lbrace = eatToken(p, TOKENIZER_TAG_L_BRACE, &ok); const AstNodeIndex lbrace = eatToken(p, TOKENIZER_TAG_L_BRACE, &ok);
if (!ok) if (!ok)
return null_node; return null_node;
const uint32_t scratch_top = p->scratch.len; CleanupScratch scratch_top __attribute__((__cleanup__(cleanupScratch))) = {
.scratch = &p->scratch,
.old_len = p->scratch.len,
};
cleanup: while (1) {
if (p->token_tags[p->tok_i] == TOKENIZER_TAG_R_BRACE)
break;
// "const AstNodeIndex statement" once tinycc supports typeof_unqual (C23)
AstNodeIndex statement = expectStatement(p, true);
if (statement == 0)
break;
SLICE_APPEND(AstNodeIndex, &p->scratch, statement);
}
expectToken(p, TOKENIZER_TAG_R_BRACE, NULL);
const bool semicolon = (p->token_tags[p->tok_i] - 2 == TOKENIZER_TAG_SEMICOLON);
switch (p->scratch.len - scratch_top.old_len) {
case 0:
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_TAG_BLOCK_TWO,
.main_token = lbrace,
.data = {
.lhs = 0,
.rhs = 0,
},
});
case 1:
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = semicolon ? AST_NODE_TAG_BLOCK_TWO_SEMICOLON : AST_NODE_TAG_BLOCK_TWO,
.main_token = lbrace,
.data = {
.lhs = p->scratch.arr[scratch_top.old_len],
.rhs = 0,
},
});
case 2:
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = semicolon ? AST_NODE_TAG_BLOCK_TWO_SEMICOLON : AST_NODE_TAG_BLOCK_TWO,
.main_token = lbrace,
.data = {
.lhs = p->scratch.arr[scratch_top.old_len],
.rhs = p->scratch.arr[scratch_top.old_len + 1],
},
});
default:;
const uint32_t extra = p->scratch.len - scratch_top.old_len;
SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->scratch, extra);
memcpy(
&p->extra_data.arr[p->extra_data.len],
&p->scratch.arr[scratch_top.old_len],
sizeof(AstNodeIndex) * extra);
p->extra_data.len += extra;
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = semicolon ? AST_NODE_TAG_BLOCK_SEMICOLON : AST_NODE_TAG_BLOCK,
.main_token = lbrace,
.data = {
.lhs = p->scratch.arr[scratch_top.old_len],
.rhs = p->scratch.arr[p->scratch.len],
},
});
}
return 0;
} }
static AstNodeIndex expectTopLevelDecl(Parser* p) static AstNodeIndex parseVarDeclProto(Parser* p) {
{ bool ok;
eatToken(p, TOKENIZER_TAG_KEYWORD_CONST, &ok);
if (!ok) {
eatToken(p, TOKENIZER_TAG_KEYWORD_VAR, &ok);
if (!ok)
return null_node;
}
fprintf(stderr, "parseVarDeclProto: parsing vars is not supported\n");
exit(1);
return 0; // tcc
}
static AstNodeIndex parseGlobalVarDecl(Parser* p) {
const AstNodeIndex var_decl = parseVarDeclProto(p);
if (var_decl == 0) {
return null_node;
}
fprintf(stderr, "parseGlobalVarDecl does not support parsing var decls\n");
exit(1);
return 0; // tcc
}
static AstNodeIndex expectTopLevelDecl(Parser* p) {
AstTokenIndex extern_export_inline_token = p->tok_i++; AstTokenIndex extern_export_inline_token = p->tok_i++;
bool is_extern = false; bool is_extern = false;
bool expect_fn = false; bool expect_fn = false;
@@ -282,7 +504,6 @@ static AstNodeIndex expectTopLevelDecl(Parser* p)
case TOKENIZER_TAG_SEMICOLON: case TOKENIZER_TAG_SEMICOLON:
p->tok_i++; p->tok_i++;
return fn_proto; return fn_proto;
break;
case TOKENIZER_TAG_L_BRACE: case TOKENIZER_TAG_L_BRACE:
if (is_extern) if (is_extern)
exit(1); exit(1);
@@ -292,9 +513,11 @@ static AstNodeIndex expectTopLevelDecl(Parser* p)
return setNode( return setNode(
p, p,
fn_decl_index, fn_decl_index,
AST_NODE_TAG_FN_DECL, (AstNodeItem) {
p->nodes.main_tokens[fn_proto], .tag = AST_NODE_TAG_FN_DECL,
(AstData) { .lhs = fn_proto, .rhs = body_block }); .main_token = p->nodes.main_tokens[fn_proto],
.data = { .lhs = fn_proto, .rhs = body_block },
});
default: default:
exit(1); // Expected semicolon or left brace exit(1); // Expected semicolon or left brace
} }
@@ -312,10 +535,10 @@ static AstNodeIndex expectTopLevelDecl(Parser* p)
// assuming the program is correct... // assuming the program is correct...
fprintf(stderr, "the next token should be usingnamespace, which is not supported\n"); fprintf(stderr, "the next token should be usingnamespace, which is not supported\n");
exit(1); exit(1);
return 0; // make tcc happy
} }
static Members parseContainerMembers(Parser* p) static Members parseContainerMembers(Parser* p) {
{
const uint32_t scratch_top = p->scratch.len; const uint32_t scratch_top = p->scratch.len;
Members res = (Members) {}; Members res = (Members) {};
// ast_token_index last_field; // ast_token_index last_field;
@@ -323,143 +546,89 @@ static Members parseContainerMembers(Parser* p)
while (eatToken(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT, &ok) && ok) while (eatToken(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT, &ok) && ok)
; ;
// bool trailing = false; FieldState field_state = { .tag = FIELD_STATE_NONE };
while (1) {
// SKIP eat doc comments bool trailing = false;
AstNodeIndex top_level_decl;
while (1) {
eatDocComments(p);
switch (p->token_tags[p->tok_i]) { switch (p->token_tags[p->tok_i]) {
case TOKENIZER_TAG_INVALID:
case TOKENIZER_TAG_INVALID_PERIODASTERISKS:
case TOKENIZER_TAG_IDENTIFIER:
case TOKENIZER_TAG_STRING_LITERAL:
case TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE:
case TOKENIZER_TAG_CHAR_LITERAL:
case TOKENIZER_TAG_EOF:
case TOKENIZER_TAG_BUILTIN:
case TOKENIZER_TAG_BANG:
case TOKENIZER_TAG_PIPE:
case TOKENIZER_TAG_PIPE_PIPE:
case TOKENIZER_TAG_PIPE_EQUAL:
case TOKENIZER_TAG_EQUAL:
case TOKENIZER_TAG_EQUAL_EQUAL:
case TOKENIZER_TAG_EQUAL_ANGLE_BRACKET_RIGHT:
case TOKENIZER_TAG_BANG_EQUAL:
case TOKENIZER_TAG_L_PAREN:
case TOKENIZER_TAG_R_PAREN:
case TOKENIZER_TAG_SEMICOLON:
case TOKENIZER_TAG_PERCENT:
case TOKENIZER_TAG_PERCENT_EQUAL:
case TOKENIZER_TAG_L_BRACE:
case TOKENIZER_TAG_R_BRACE:
case TOKENIZER_TAG_L_BRACKET:
case TOKENIZER_TAG_R_BRACKET:
case TOKENIZER_TAG_PERIOD:
case TOKENIZER_TAG_PERIOD_ASTERISK:
case TOKENIZER_TAG_ELLIPSIS2:
case TOKENIZER_TAG_ELLIPSIS3:
case TOKENIZER_TAG_CARET:
case TOKENIZER_TAG_CARET_EQUAL:
case TOKENIZER_TAG_PLUS:
case TOKENIZER_TAG_PLUS_PLUS:
case TOKENIZER_TAG_PLUS_EQUAL:
case TOKENIZER_TAG_PLUS_PERCENT:
case TOKENIZER_TAG_PLUS_PERCENT_EQUAL:
case TOKENIZER_TAG_PLUS_PIPE:
case TOKENIZER_TAG_PLUS_PIPE_EQUAL:
case TOKENIZER_TAG_MINUS:
case TOKENIZER_TAG_MINUS_EQUAL:
case TOKENIZER_TAG_MINUS_PERCENT:
case TOKENIZER_TAG_MINUS_PERCENT_EQUAL:
case TOKENIZER_TAG_MINUS_PIPE:
case TOKENIZER_TAG_MINUS_PIPE_EQUAL:
case TOKENIZER_TAG_ASTERISK:
case TOKENIZER_TAG_ASTERISK_EQUAL:
case TOKENIZER_TAG_ASTERISK_ASTERISK:
case TOKENIZER_TAG_ASTERISK_PERCENT:
case TOKENIZER_TAG_ASTERISK_PERCENT_EQUAL:
case TOKENIZER_TAG_ASTERISK_PIPE:
case TOKENIZER_TAG_ASTERISK_PIPE_EQUAL:
case TOKENIZER_TAG_ARROW:
case TOKENIZER_TAG_COLON:
case TOKENIZER_TAG_SLASH:
case TOKENIZER_TAG_SLASH_EQUAL:
case TOKENIZER_TAG_COMMA:
case TOKENIZER_TAG_AMPERSAND:
case TOKENIZER_TAG_AMPERSAND_EQUAL:
case TOKENIZER_TAG_QUESTION_MARK:
case TOKENIZER_TAG_ANGLE_BRACKET_LEFT:
case TOKENIZER_TAG_ANGLE_BRACKET_LEFT_EQUAL:
case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT:
case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL:
case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE:
case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL:
case TOKENIZER_TAG_ANGLE_BRACKET_RIGHT:
case TOKENIZER_TAG_ANGLE_BRACKET_RIGHT_EQUAL:
case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT:
case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL:
case TOKENIZER_TAG_TILDE:
case TOKENIZER_TAG_NUMBER_LITERAL:
case TOKENIZER_TAG_DOC_COMMENT:
case TOKENIZER_TAG_CONTAINER_DOC_COMMENT:
case TOKENIZER_TAG_KEYWORD_ADDRSPACE:
case TOKENIZER_TAG_KEYWORD_ALIGN:
case TOKENIZER_TAG_KEYWORD_ALLOWZERO:
case TOKENIZER_TAG_KEYWORD_AND:
case TOKENIZER_TAG_KEYWORD_ANYFRAME:
case TOKENIZER_TAG_KEYWORD_ANYTYPE:
case TOKENIZER_TAG_KEYWORD_ASM:
case TOKENIZER_TAG_KEYWORD_ASYNC:
case TOKENIZER_TAG_KEYWORD_AWAIT:
case TOKENIZER_TAG_KEYWORD_BREAK:
case TOKENIZER_TAG_KEYWORD_CALLCONV:
case TOKENIZER_TAG_KEYWORD_CATCH:
case TOKENIZER_TAG_KEYWORD_COMPTIME:
case TOKENIZER_TAG_KEYWORD_CONST:
case TOKENIZER_TAG_KEYWORD_CONTINUE:
case TOKENIZER_TAG_KEYWORD_DEFER:
case TOKENIZER_TAG_KEYWORD_ELSE:
case TOKENIZER_TAG_KEYWORD_ENUM:
case TOKENIZER_TAG_KEYWORD_ERRDEFER:
case TOKENIZER_TAG_KEYWORD_ERROR:
case TOKENIZER_TAG_KEYWORD_EXPORT:
case TOKENIZER_TAG_KEYWORD_EXTERN:
case TOKENIZER_TAG_KEYWORD_FN:
case TOKENIZER_TAG_KEYWORD_FOR:
case TOKENIZER_TAG_KEYWORD_IF:
case TOKENIZER_TAG_KEYWORD_INLINE:
case TOKENIZER_TAG_KEYWORD_NOALIAS:
case TOKENIZER_TAG_KEYWORD_NOINLINE:
case TOKENIZER_TAG_KEYWORD_NOSUSPEND:
case TOKENIZER_TAG_KEYWORD_OPAQUE:
case TOKENIZER_TAG_KEYWORD_OR:
case TOKENIZER_TAG_KEYWORD_ORELSE:
case TOKENIZER_TAG_KEYWORD_PACKED:
case TOKENIZER_TAG_KEYWORD_RESUME:
case TOKENIZER_TAG_KEYWORD_RETURN:
case TOKENIZER_TAG_KEYWORD_LINKSECTION:
case TOKENIZER_TAG_KEYWORD_STRUCT:
case TOKENIZER_TAG_KEYWORD_SUSPEND:
case TOKENIZER_TAG_KEYWORD_SWITCH:
case TOKENIZER_TAG_KEYWORD_TEST: case TOKENIZER_TAG_KEYWORD_TEST:
case TOKENIZER_TAG_KEYWORD_THREADLOCAL: case TOKENIZER_TAG_KEYWORD_COMPTIME:
case TOKENIZER_TAG_KEYWORD_TRY: case TOKENIZER_TAG_KEYWORD_USINGNAMESPACE:;
case TOKENIZER_TAG_KEYWORD_UNION:
case TOKENIZER_TAG_KEYWORD_UNREACHABLE:
case TOKENIZER_TAG_KEYWORD_USINGNAMESPACE:
case TOKENIZER_TAG_KEYWORD_VAR:
case TOKENIZER_TAG_KEYWORD_VOLATILE:
case TOKENIZER_TAG_KEYWORD_WHILE:;
const char* str = tokenizerGetTagString(p->token_tags[p->tok_i]); const char* str = tokenizerGetTagString(p->token_tags[p->tok_i]);
fprintf(stderr, "keyword %s not implemented\n", str); fprintf(stderr, "%s not implemented in parseContainerMembers\n", str);
exit(1); exit(1);
case TOKENIZER_TAG_KEYWORD_PUB: case TOKENIZER_TAG_KEYWORD_PUB:
p->tok_i++; p->tok_i++;
// AstNodeIndex top_level_decl = expectTopLevelDecl(*p); top_level_decl = expectTopLevelDecl(p);
if (top_level_decl != 0) {
if (field_state.tag == FIELD_STATE_SEEN) {
field_state.tag = FIELD_STATE_END;
field_state.payload.end = top_level_decl;
}
SLICE_APPEND(AstNodeIndex, &p->scratch, top_level_decl);
}
trailing = (p->token_tags[p->tok_i - 1] == TOKENIZER_TAG_SEMICOLON);
break; break;
// TODO do work
case TOKENIZER_TAG_KEYWORD_CONST:
case TOKENIZER_TAG_KEYWORD_VAR:
case TOKENIZER_TAG_KEYWORD_THREADLOCAL:
case TOKENIZER_TAG_KEYWORD_EXPORT:
case TOKENIZER_TAG_KEYWORD_EXTERN:
case TOKENIZER_TAG_KEYWORD_INLINE:
case TOKENIZER_TAG_KEYWORD_NOINLINE:
case TOKENIZER_TAG_KEYWORD_FN:;
top_level_decl = expectTopLevelDecl(p);
if (top_level_decl != 0) {
if (field_state.tag == FIELD_STATE_SEEN) {
field_state.tag = FIELD_STATE_END;
field_state.payload.end = top_level_decl;
}
SLICE_APPEND(AstNodeIndex, &p->scratch, top_level_decl);
}
trailing = (p->token_tags[p->tok_i - 1] == TOKENIZER_TAG_SEMICOLON);
break;
case TOKENIZER_TAG_EOF:
case TOKENIZER_TAG_R_BRACE:
goto break_loop;
default:;
// skip parseCStyleContainer
const AstNodeIndex identifier = p->tok_i;
const AstNodeIndex container_field = expectContainerField(p);
switch (field_state.tag) {
case FIELD_STATE_NONE:
field_state.tag = FIELD_STATE_SEEN;
break;
case FIELD_STATE_SEEN:
break;
case FIELD_STATE_END:
fprintf(stderr, "parseContainerMembers error condition\n");
exit(1);
}
SLICE_APPEND(AstNodeIndex, &p->scratch, container_field);
switch (p->token_tags[p->tok_i]) {
case TOKENIZER_TAG_COMMA:
p->tok_i++;
trailing = true;
continue;
case TOKENIZER_TAG_R_BRACE:
case TOKENIZER_TAG_EOF:
trailing = false;
goto break_loop;
default:
continue;
}
findNextContainerMember(p);
continue;
} }
} }
break_loop:
p->scratch.len = scratch_top; p->scratch.len = scratch_top;
return res; return res;

View File

@@ -3,16 +3,10 @@
#define _ZIG1_PARSE_H__ #define _ZIG1_PARSE_H__
#include "ast.h" #include "ast.h"
#include "common.h"
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
// Standard slice
typedef struct {
uint32_t len;
uint32_t cap;
AstNodeIndex* arr;
} ParserNodeIndexSlice;
typedef struct { typedef struct {
uint32_t len; uint32_t len;
AstNodeIndex lhs; AstNodeIndex lhs;
@@ -20,7 +14,7 @@ typedef struct {
bool trailing; bool trailing;
} Members; } Members;
typedef struct Parser { typedef struct {
const char* source; const char* source;
uint32_t source_len; uint32_t source_len;
@@ -31,8 +25,8 @@ typedef struct Parser {
AstTokenIndex tok_i; AstTokenIndex tok_i;
AstNodeList nodes; AstNodeList nodes;
ParserNodeIndexSlice extra_data; AstNodeIndexSlice extra_data;
ParserNodeIndexSlice scratch; AstNodeIndexSlice scratch;
} Parser; } Parser;
Parser* parserInit(const char* source, uint32_t len); Parser* parserInit(const char* source, uint32_t len);

View File

@@ -10,8 +10,7 @@ typedef struct {
TokenizerTag tag; TokenizerTag tag;
} KeywordMap; } KeywordMap;
const char* tokenizerGetTagString(TokenizerTag tag) const char* tokenizerGetTagString(TokenizerTag tag) {
{
switch (tag) { switch (tag) {
TOKENIZER_FOREACH_TAG_ENUM(TOKENIZER_GENERATE_CASE) TOKENIZER_FOREACH_TAG_ENUM(TOKENIZER_GENERATE_CASE)
default: default:
@@ -72,8 +71,7 @@ const KeywordMap keywords[] = {
}; };
// TODO binary search // TODO binary search
static TokenizerTag getKeyword(const char* bytes, const uint32_t len) static TokenizerTag getKeyword(const char* bytes, const uint32_t len) {
{
for (unsigned long i = 0; i < sizeof(keywords) / sizeof(KeywordMap); i++) { for (unsigned long i = 0; i < sizeof(keywords) / sizeof(KeywordMap); i++) {
size_t klen = strlen(keywords[i].keyword); size_t klen = strlen(keywords[i].keyword);
size_t minlen = klen < len ? klen : len; size_t minlen = klen < len ? klen : len;
@@ -91,8 +89,7 @@ static TokenizerTag getKeyword(const char* bytes, const uint32_t len)
return TOKENIZER_TAG_INVALID; return TOKENIZER_TAG_INVALID;
} }
Tokenizer tokenizerInit(const char* buffer, const uint32_t len) Tokenizer tokenizerInit(const char* buffer, const uint32_t len) {
{
return (Tokenizer) { return (Tokenizer) {
.buffer = buffer, .buffer = buffer,
.buffer_len = len, .buffer_len = len,
@@ -100,8 +97,7 @@ Tokenizer tokenizerInit(const char* buffer, const uint32_t len)
}; };
} }
TokenizerToken tokenizerNext(Tokenizer* self) TokenizerToken tokenizerNext(Tokenizer* self) {
{
TokenizerToken result = (TokenizerToken) { TokenizerToken result = (TokenizerToken) {
.tag = TOKENIZER_TAG_INVALID, .tag = TOKENIZER_TAG_INVALID,
.loc = { .loc = {

6
zig1.c
View File

@@ -6,8 +6,7 @@
// - code = 0: program successfully terminated. // - code = 0: program successfully terminated.
// - code = 1: panicked, panic message in msg. Caller should free msg. // - code = 1: panicked, panic message in msg. Caller should free msg.
// - code = 2: interpreter error, error in msg. Caller should free msg. // - code = 2: interpreter error, error in msg. Caller should free msg.
int zig1Run(const char* program, char** msg) int zig1Run(const char* program, char** msg) {
{
(void)program; (void)program;
(void)msg; (void)msg;
return 0; return 0;
@@ -15,8 +14,7 @@ int zig1Run(const char* program, char** msg)
// API: run and: // API: run and:
// code = 3: abnormal error, expect something in stderr. // code = 3: abnormal error, expect something in stderr.
int zig1RunFile(const char* fname, char** msg) int zig1RunFile(const char* fname, char** msg) {
{
FILE* f = fopen(fname, "r"); FILE* f = fopen(fname, "r");
if (f == NULL) { if (f == NULL) {
perror("fopen"); perror("fopen");