commit b8a52d3f39abd7cfedc3541379d22e3f215f2152 (tree)
parent 6006a802e1dd2a3f06a5aa9db5e9b1e7bbee850c
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Mon, 30 Dec 2024 01:05:10 +0200
More parser — lint+tests pass again
Diffstat:
10 files changed, 299 insertions(+), 59 deletions(-)
diff --git a/README.md b/README.md
@@ -1 +1 @@
-zig1 aspires to be a zig 0.13.0-2448-gd48611ba67c7 interpreter.
+zig0 aspires to be an interpreter of zig 0.13.0-2578-gec60156f187a C backend.
diff --git a/ast.h b/ast.h
@@ -1,5 +1,5 @@
-#ifndef _ZIG1_AST_H__
-#define _ZIG1_AST_H__
+#ifndef _ZIG0_AST_H__
+#define _ZIG0_AST_H__
#include <stdbool.h>
#include <stdint.h>
diff --git a/build.zig b/build.zig
@@ -9,7 +9,7 @@ const headers = &[_][]const u8{
const c_lib_files = &[_][]const u8{
"tokenizer.c",
"ast.c",
- "zig1.c",
+ "zig0.c",
"parser.c",
};
@@ -109,6 +109,7 @@ pub fn build(b: *std.Build) !void {
"--suppress=checkersReport",
"--suppress=unusedFunction", // TODO remove after plumbing is done
"--suppress=unusedStructMember", // TODO remove after plumbing is done
+ "--suppress=knownConditionTrueFalse", // TODO remove after plumbing is done
});
for (all_c_files) |cfile| cppcheck.addFileArg(b.path(cfile));
lint_step.dependOn(&cppcheck.step);
diff --git a/common.h b/common.h
@@ -1,6 +1,6 @@
// common.h
-#ifndef _ZIG1_COMMON_H__
-#define _ZIG1_COMMON_H__
+#ifndef _ZIG0_COMMON_H__
+#define _ZIG0_COMMON_H__
#include <stdint.h>
#include <stdlib.h>
diff --git a/main.c b/main.c
@@ -2,8 +2,8 @@
#include <stdio.h>
#include <stdlib.h>
-int zig1Run(char* program, char** msg);
-int zig1RunFile(char* fname, char** msg);
+int zig0Run(char* program, char** msg);
+int zig0RunFile(char* fname, char** msg);
static void usage(const char* argv0) {
fprintf(stderr, "Usage: %s program.zig\n", argv0);
@@ -16,7 +16,7 @@ int main(int argc, char** argv) {
}
char* msg;
- switch (zig1RunFile(argv[1], &msg)) {
+ switch (zig0RunFile(argv[1], &msg)) {
case 0:
return 0;
break;
diff --git a/parser.c b/parser.c
@@ -20,8 +20,14 @@ typedef struct {
} FieldState;
typedef struct {
- AstNodeIndex zero_or_one;
- AstSubRange multi;
+ enum {
+ SMALL_SPAN_ZERO_OR_ONE,
+ SMALL_SPAN_MULTI
+ } tag;
+ union {
+ AstNodeIndex zero_or_one;
+ AstSubRange multi;
+ } payload;
} SmallSpan;
void parseRoot(Parser* p) {
@@ -50,6 +56,16 @@ static void eatDocComments(Parser* p) {
while (eatToken(p, TOKENIZER_TAG_DOC_COMMENT, &ok), ok) { }
}
+static void expectSemicolon(Parser* p) {
+ bool ok;
+ eatToken(p, TOKENIZER_TAG_SEMICOLON, &ok);
+ if (ok)
+ return;
+
+ fprintf(stderr, "expected semicolon\n");
+ exit(1);
+}
+
static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeItem item) {
p->nodes.tags[i] = item.tag;
p->nodes.main_tokens[i] = item.main_token;
@@ -80,6 +96,13 @@ static AstNodeIndex addNode(AstNodeList* nodes, AstNodeItem item) {
return nodes->len++;
}
+static AstNodeIndex addExtra(Parser* p, const AstNodeIndex* extra, uint32_t count) {
+ const AstNodeIndex result = p->extra_data.len;
+ SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count);
+ memcpy(&p->extra_data.arr, extra, count * sizeof(AstNodeIndex));
+ return result;
+}
+
static AstNodeIndex parseTypeExpr(Parser* p);
static AstNodeIndex expectTypeExpr(Parser* p) {
@@ -92,14 +115,43 @@ static AstNodeIndex expectTypeExpr(Parser* p) {
static AstNodeIndex parseByteAlign(Parser* p) {
bool ok;
eatToken(p, TOKENIZER_TAG_KEYWORD_ALIGN, &ok);
- if (!ok) {
+ if (!ok)
return null_node;
- }
fprintf(stderr, "parseByteAlign cannot parse alginment\n");
exit(1);
return 0; // tcc
}
+static AstNodeIndex parseAddrSpace(Parser* p) {
+ bool ok;
+ eatToken(p, TOKENIZER_TAG_KEYWORD_ADDRSPACE, &ok);
+ if (!ok)
+ return null_node;
+ fprintf(stderr, "parseAddrSpace cannot parse addrspace\n");
+ exit(1);
+ return 0; // tcc
+}
+
+static AstNodeIndex parseLinkSection(Parser* p) {
+ bool ok;
+ eatToken(p, TOKENIZER_TAG_KEYWORD_LINKSECTION, &ok);
+ if (!ok)
+ return null_node;
+ fprintf(stderr, "parseLinkSection cannot parse linksection\n");
+ exit(1);
+ return 0; // tcc
+}
+
+static AstNodeIndex parseCallconv(Parser* p) {
+ bool ok;
+ eatToken(p, TOKENIZER_TAG_KEYWORD_CALLCONV, &ok);
+ if (!ok)
+ return null_node;
+ fprintf(stderr, "parseCallconv cannot parse callconv\n");
+ exit(1);
+ return 0; // tcc
+}
+
typedef struct {
AstNodeIndex align_expr, value_expr;
} NodeContainerField;
@@ -112,13 +164,13 @@ static AstNodeIndex expectContainerField(Parser* p) {
const AstNodeIndex type_expr = expectTypeExpr(p);
const AstNodeIndex align_expr = parseByteAlign(p);
- const AstNodeIndex value_expr = 0;
bool ok;
eatToken(p, TOKENIZER_TAG_EQUAL, &ok);
if (ok) {
fprintf(stderr, "expectContainerField does not support expr\n");
exit(1);
}
+ const AstNodeIndex value_expr = 0;
if (align_expr == 0) {
return addNode(
@@ -150,10 +202,8 @@ static AstNodeIndex expectContainerField(Parser* p) {
.main_token = main_token,
.data = {
.lhs = type_expr,
- .rhs = addExtra(p, (NodeContainerField) {
- .align_expr = align_expr,
- .value_expr = value_expr,
- }) },
+ .rhs = addExtra(p, (AstNodeIndex[]) { align_expr, value_expr }, 2),
+ },
});
}
}
@@ -307,7 +357,7 @@ static SmallSpan parseParamDeclList(Parser* p) {
}
return (SmallSpan) {
- .zero_or_one = 0,
+ .tag = SMALL_SPAN_ZERO_OR_ONE,
};
}
@@ -328,21 +378,104 @@ static AstNodeIndex parseFnProto(Parser* p) {
eatToken(p, TOKENIZER_TAG_IDENTIFIER, NULL);
SmallSpan params = parseParamDeclList(p);
- // const params = try p.parseParamDeclList();
- // const align_expr = try p.parseByteAlign();
- // const addrspace_expr = try p.parseAddrSpace();
- // const section_expr = try p.parseLinkSection();
- // const callconv_expr = try p.parseCallconv();
+ const AstNodeIndex align_expr = parseByteAlign(p);
+ const AstNodeIndex addrspace_expr = parseAddrSpace(p);
+ const AstNodeIndex section_expr = parseLinkSection(p);
+ const AstNodeIndex callconv_expr = parseCallconv(p);
eatToken(p, TOKENIZER_TAG_BANG, NULL);
const AstNodeIndex return_type_expr = parseTypeExpr(p);
- return 0;
+
+ if (align_expr == 0 && section_expr == 0 && callconv_expr == 0 && addrspace_expr == 0) {
+ if (params.tag == SMALL_SPAN_ZERO_OR_ONE)
+ return setNode(
+ p,
+ fn_proto_index,
+ (AstNodeItem) {
+ .tag = AST_NODE_TAG_FN_PROTO_SIMPLE,
+ .main_token = fn_token,
+ .data = {
+ .lhs = params.payload.zero_or_one,
+ .rhs = return_type_expr,
+ },
+ });
+ }
+
+ fprintf(stderr, "parseFnProto does not support complex function decls\n");
+ exit(1);
+ return 0; // tcc
+}
+
+static AstTokenIndex parseBlockLabel(Parser* p) {
+ if (p->token_tags[p->tok_i] == TOKENIZER_TAG_IDENTIFIER && p->token_tags[p->tok_i + 1] == TOKENIZER_TAG_COLON) {
+ const AstTokenIndex identifier = p->tok_i;
+ p->tok_i += 2;
+ return identifier;
+ }
+ return null_node;
+}
+
+static AstNodeIndex parseForStatement(Parser* p) {
+ bool ok;
+ const AstNodeIndex for_token = eatToken(p, TOKENIZER_TAG_KEYWORD_FOR, &ok);
+ if (!ok)
+ return null_node;
+
+ (void)for_token;
+ fprintf(stderr, "parseForStatement cannot parse for statements\n");
+ return 0; // tcc
+}
+
+static AstNodeIndex parseWhileStatement(Parser* p) {
+ bool ok;
+ const AstNodeIndex while_token = eatToken(p, TOKENIZER_TAG_KEYWORD_WHILE, &ok);
+ if (!ok)
+ return null_node;
+
+ (void)while_token;
+ fprintf(stderr, "parseWhileStatement cannot parse while statements\n");
+ return 0; // tcc
+}
+
+static AstNodeIndex parseLoopStatement(Parser* p) {
+ bool ok_inline_token;
+ eatToken(p, TOKENIZER_TAG_KEYWORD_INLINE, &ok_inline_token);
+
+ const AstNodeIndex for_statement = parseForStatement(p);
+ if (for_statement != 0)
+ return for_statement;
+
+ const AstNodeIndex while_statement = parseWhileStatement(p);
+ if (while_statement != 0)
+ return while_statement;
+
+ if (!ok_inline_token)
+ return null_node;
+
+ fprintf(stderr, "If we've seen 'inline', there should have been a 'for' or 'while'\n");
+ exit(1);
+ return 0; // tcc
+}
+
+static AstNodeIndex parseAssignExpr(Parser* p) {
+ (void)p;
+ fprintf(stderr, "parseAssignExpr not implemented\n");
+ exit(1);
+ return 0; // tcc
+}
+
+static AstNodeIndex expectVarDeclExprStatement(Parser* p) {
+ (void)p;
+ fprintf(stderr, "expectVarDeclExprStatement not implemented\n");
+ exit(1);
+ return 0; // tcc
}
+static AstNodeIndex parseLabeledStatement(Parser*);
static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) {
bool ok;
if (eatToken(p, TOKENIZER_TAG_KEYWORD_COMPTIME, &ok), ok) {
- fprintf(stderr, "expectStatement: comptime keyword not yet supported\n");
+ fprintf(stderr, "expectStatement: comptime keyword not supported\n");
exit(1);
}
@@ -357,12 +490,20 @@ static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) {
case TOKENIZER_TAG_KEYWORD_STRUCT:
case TOKENIZER_TAG_KEYWORD_UNION:;
const char* tok_str = tokenizerGetTagString(tok);
- fprintf(stderr, "expectStatement does not yet support keyword %s\n", tok_str);
+ fprintf(stderr, "expectStatement does not support keyword %s\n", tok_str);
exit(1);
default:;
}
- // TODO continue
- return 1;
+
+ const AstNodeIndex labeled_statement = parseLabeledStatement(p);
+ if (labeled_statement != 0)
+ return labeled_statement;
+
+ if (allow_defer_var) {
+ return expectVarDeclExprStatement(p);
+ } else {
+ return parseAssignExpr(p);
+ }
}
typedef struct {
@@ -452,6 +593,24 @@ static AstNodeIndex parseBlock(Parser* p) {
return 0;
}
+static AstNodeIndex parseLabeledStatement(Parser* p) {
+ const AstNodeIndex label_token = parseBlockLabel(p);
+ const AstNodeIndex block = parseBlock(p);
+ if (block != 0)
+ return block;
+
+ const AstNodeIndex loop_stmt = parseLoopStatement(p);
+ if (loop_stmt != 0)
+ return loop_stmt;
+
+ if (label_token != 0) {
+ fprintf(stderr, "parseLabeledStatement does not support labels\n");
+ exit(1);
+ }
+
+ return null_node;
+}
+
static AstNodeIndex parseVarDeclProto(Parser* p) {
bool ok;
eatToken(p, TOKENIZER_TAG_KEYWORD_CONST, &ok);
@@ -476,23 +635,15 @@ static AstNodeIndex parseGlobalVarDecl(Parser* p) {
}
static AstNodeIndex expectTopLevelDecl(Parser* p) {
- AstTokenIndex extern_export_inline_token = p->tok_i++;
- bool is_extern = false;
- bool expect_fn = false;
- bool expect_var_or_fn = false;
+ AstTokenIndex extern_export_inline_token = nextToken(p);
switch (p->token_tags[extern_export_inline_token]) {
case TOKENIZER_TAG_KEYWORD_EXTERN:
eatToken(p, TOKENIZER_TAG_STRING_LITERAL, NULL);
- is_extern = true;
- expect_var_or_fn = true;
break;
case TOKENIZER_TAG_KEYWORD_EXPORT:
- expect_var_or_fn = true;
- break;
case TOKENIZER_TAG_KEYWORD_INLINE:
case TOKENIZER_TAG_KEYWORD_NOINLINE:
- expect_fn = true;
break;
default:
p->tok_i--;
@@ -504,10 +655,7 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) {
case TOKENIZER_TAG_SEMICOLON:
p->tok_i++;
return fn_proto;
- case TOKENIZER_TAG_L_BRACE:
- if (is_extern)
- exit(1);
-
+ case TOKENIZER_TAG_L_BRACE:;
AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_TAG_FN_DECL);
AstNodeIndex body_block = parseBlock(p);
return setNode(
@@ -523,10 +671,7 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) {
}
}
- if (expect_fn)
- exit(1);
-
- AstTokenIndex thread_local_token = eatToken(p, TOKENIZER_TAG_KEYWORD_THREADLOCAL, NULL);
+ eatToken(p, TOKENIZER_TAG_KEYWORD_THREADLOCAL, NULL);
AstNodeIndex var_decl = parseGlobalVarDecl(p);
if (var_decl != 0) {
return var_decl;
@@ -538,9 +683,72 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) {
return 0; // make tcc happy
}
+void findNextContainerMember(Parser* p) {
+ uint32_t level = 0;
+
+ while (true) {
+ AstTokenIndex tok = nextToken(p);
+
+ switch (p->token_tags[tok]) {
+ // Any of these can start a new top level declaration
+ case TOKENIZER_TAG_KEYWORD_TEST:
+ case TOKENIZER_TAG_KEYWORD_COMPTIME:
+ case TOKENIZER_TAG_KEYWORD_PUB:
+ case TOKENIZER_TAG_KEYWORD_EXPORT:
+ case TOKENIZER_TAG_KEYWORD_EXTERN:
+ case TOKENIZER_TAG_KEYWORD_INLINE:
+ case TOKENIZER_TAG_KEYWORD_NOINLINE:
+ case TOKENIZER_TAG_KEYWORD_USINGNAMESPACE:
+ case TOKENIZER_TAG_KEYWORD_THREADLOCAL:
+ case TOKENIZER_TAG_KEYWORD_CONST:
+ case TOKENIZER_TAG_KEYWORD_VAR:
+ case TOKENIZER_TAG_KEYWORD_FN:
+ if (level == 0) {
+ p->tok_i--;
+ return;
+ }
+ break;
+ case TOKENIZER_TAG_IDENTIFIER:
+ if (p->token_tags[tok + 1] == TOKENIZER_TAG_COMMA && level == 0) {
+ p->tok_i--;
+ return;
+ }
+ break;
+ case TOKENIZER_TAG_COMMA:
+ case TOKENIZER_TAG_SEMICOLON:
+ // This decl was likely meant to end here
+ if (level == 0)
+ return;
+ break;
+ case TOKENIZER_TAG_L_PAREN:
+ case TOKENIZER_TAG_L_BRACKET:
+ case TOKENIZER_TAG_L_BRACE:
+ level++;
+ break;
+ case TOKENIZER_TAG_R_PAREN:
+ case TOKENIZER_TAG_R_BRACKET:
+ if (level != 0)
+ level--;
+ break;
+ case TOKENIZER_TAG_R_BRACE:
+ if (level == 0) {
+ // end of container, exit
+ p->tok_i--;
+ return;
+ }
+ level--;
+ break;
+ case TOKENIZER_TAG_EOF:
+ p->tok_i--;
+ return;
+ default:
+ break;
+ }
+ }
+}
+
static Members parseContainerMembers(Parser* p) {
const uint32_t scratch_top = p->scratch.len;
- Members res = (Members) {};
// ast_token_index last_field;
bool ok;
while (eatToken(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT, &ok) && ok)
@@ -597,8 +805,6 @@ static Members parseContainerMembers(Parser* p) {
goto break_loop;
default:;
// skip parseCStyleContainer
-
- const AstNodeIndex identifier = p->tok_i;
const AstNodeIndex container_field = expectContainerField(p);
switch (field_state.tag) {
case FIELD_STATE_NONE:
@@ -628,8 +834,41 @@ static Members parseContainerMembers(Parser* p) {
continue;
}
}
-break_loop:
+break_loop:;
+
+ const uint32_t scratch_len = p->scratch.len;
p->scratch.len = scratch_top;
- return res;
+
+ const uint32_t n_items = scratch_len - scratch_top;
+ switch (n_items) {
+ case 0:
+ return (Members) {
+ .len = 0,
+ .lhs = 0,
+ .rhs = 0,
+ .trailing = trailing,
+ };
+ case 1:
+ return (Members) {
+ .len = 1,
+ .lhs = p->scratch.arr[scratch_top],
+ .rhs = 0,
+ .trailing = trailing,
+ };
+ case 2:
+ return (Members) {
+ .len = 2,
+ .lhs = p->scratch.arr[scratch_top],
+ .rhs = p->scratch.arr[scratch_top + 1],
+ .trailing = trailing,
+ };
+ default:
+ return (Members) {
+ .len = n_items,
+ .lhs = p->scratch.arr[scratch_top],
+ .rhs = p->scratch.arr[scratch_len],
+ .trailing = trailing,
+ };
+ }
}
diff --git a/parser.h b/parser.h
@@ -1,6 +1,6 @@
// parser.h
-#ifndef _ZIG1_PARSE_H__
-#define _ZIG1_PARSE_H__
+#ifndef _ZIG0_PARSE_H__
+#define _ZIG0_PARSE_H__
#include "ast.h"
#include "common.h"
diff --git a/test_all.zig b/test_all.zig
@@ -1,3 +1,3 @@
-test "zig1 test suite" {
+test "zig0 test suite" {
_ = @import("tokenizer_test.zig");
}
diff --git a/tokenizer.h b/tokenizer.h
@@ -1,5 +1,5 @@
-#ifndef _ZIG1_TOKENIZER_H__
-#define _ZIG1_TOKENIZER_H__
+#ifndef _ZIG0_TOKENIZER_H__
+#define _ZIG0_TOKENIZER_H__
#include <stdbool.h>
#include <stdint.h>
diff --git a/zig1.c b/zig1.c
@@ -6,7 +6,7 @@
// - code = 0: program successfully terminated.
// - code = 1: panicked, panic message in msg. Caller should free msg.
// - code = 2: interpreter error, error in msg. Caller should free msg.
-int zig1Run(const char* program, char** msg) {
+int zig0Run(const char* program, char** msg) {
(void)program;
(void)msg;
return 0;
@@ -14,7 +14,7 @@ int zig1Run(const char* program, char** msg) {
// API: run and:
// code = 3: abnormal error, expect something in stderr.
-int zig1RunFile(const char* fname, char** msg) {
+int zig0RunFile(const char* fname, char** msg) {
FILE* f = fopen(fname, "r");
if (f == NULL) {
perror("fopen");
@@ -51,7 +51,7 @@ int zig1RunFile(const char* fname, char** msg) {
fclose(f);
program[fsize] = 0;
- int code = zig1Run(program, msg);
+ int code = zig0Run(program, msg);
free(program);
return code;
}