More parser — lint+tests pass again

This commit is contained in:
2024-12-30 01:05:10 +02:00
parent 6006a802e1
commit b8a52d3f39
10 changed files with 299 additions and 59 deletions

View File

@@ -1 +1 @@
zig1 aspires to be a zig 0.13.0-2448-gd48611ba67c7 interpreter. zig0 aspires to be an interpreter of zig 0.13.0-2578-gec60156f187a C backend.

4
ast.h
View File

@@ -1,5 +1,5 @@
#ifndef _ZIG1_AST_H__ #ifndef _ZIG0_AST_H__
#define _ZIG1_AST_H__ #define _ZIG0_AST_H__
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>

View File

@@ -9,7 +9,7 @@ const headers = &[_][]const u8{
const c_lib_files = &[_][]const u8{ const c_lib_files = &[_][]const u8{
"tokenizer.c", "tokenizer.c",
"ast.c", "ast.c",
"zig1.c", "zig0.c",
"parser.c", "parser.c",
}; };
@@ -109,6 +109,7 @@ pub fn build(b: *std.Build) !void {
"--suppress=checkersReport", "--suppress=checkersReport",
"--suppress=unusedFunction", // TODO remove after plumbing is done "--suppress=unusedFunction", // TODO remove after plumbing is done
"--suppress=unusedStructMember", // TODO remove after plumbing is done "--suppress=unusedStructMember", // TODO remove after plumbing is done
"--suppress=knownConditionTrueFalse", // TODO remove after plumbing is done
}); });
for (all_c_files) |cfile| cppcheck.addFileArg(b.path(cfile)); for (all_c_files) |cfile| cppcheck.addFileArg(b.path(cfile));
lint_step.dependOn(&cppcheck.step); lint_step.dependOn(&cppcheck.step);

View File

@@ -1,6 +1,6 @@
// common.h // common.h
#ifndef _ZIG1_COMMON_H__ #ifndef _ZIG0_COMMON_H__
#define _ZIG1_COMMON_H__ #define _ZIG0_COMMON_H__
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>

6
main.c
View File

@@ -2,8 +2,8 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
int zig1Run(char* program, char** msg); int zig0Run(char* program, char** msg);
int zig1RunFile(char* fname, char** msg); int zig0RunFile(char* fname, char** msg);
static void usage(const char* argv0) { static void usage(const char* argv0) {
fprintf(stderr, "Usage: %s program.zig\n", argv0); fprintf(stderr, "Usage: %s program.zig\n", argv0);
@@ -16,7 +16,7 @@ int main(int argc, char** argv) {
} }
char* msg; char* msg;
switch (zig1RunFile(argv[1], &msg)) { switch (zig0RunFile(argv[1], &msg)) {
case 0: case 0:
return 0; return 0;
break; break;

319
parser.c
View File

@@ -20,8 +20,14 @@ typedef struct {
} FieldState; } FieldState;
typedef struct { typedef struct {
enum {
SMALL_SPAN_ZERO_OR_ONE,
SMALL_SPAN_MULTI
} tag;
union {
AstNodeIndex zero_or_one; AstNodeIndex zero_or_one;
AstSubRange multi; AstSubRange multi;
} payload;
} SmallSpan; } SmallSpan;
void parseRoot(Parser* p) { void parseRoot(Parser* p) {
@@ -50,6 +56,16 @@ static void eatDocComments(Parser* p) {
while (eatToken(p, TOKENIZER_TAG_DOC_COMMENT, &ok), ok) { } while (eatToken(p, TOKENIZER_TAG_DOC_COMMENT, &ok), ok) { }
} }
static void expectSemicolon(Parser* p) {
bool ok;
eatToken(p, TOKENIZER_TAG_SEMICOLON, &ok);
if (ok)
return;
fprintf(stderr, "expected semicolon\n");
exit(1);
}
static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeItem item) { static AstNodeIndex setNode(Parser* p, uint32_t i, AstNodeItem item) {
p->nodes.tags[i] = item.tag; p->nodes.tags[i] = item.tag;
p->nodes.main_tokens[i] = item.main_token; p->nodes.main_tokens[i] = item.main_token;
@@ -80,6 +96,13 @@ static AstNodeIndex addNode(AstNodeList* nodes, AstNodeItem item) {
return nodes->len++; return nodes->len++;
} }
static AstNodeIndex addExtra(Parser* p, const AstNodeIndex* extra, uint32_t count) {
const AstNodeIndex result = p->extra_data.len;
SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count);
memcpy(&p->extra_data.arr, extra, count * sizeof(AstNodeIndex));
return result;
}
static AstNodeIndex parseTypeExpr(Parser* p); static AstNodeIndex parseTypeExpr(Parser* p);
static AstNodeIndex expectTypeExpr(Parser* p) { static AstNodeIndex expectTypeExpr(Parser* p) {
@@ -92,14 +115,43 @@ static AstNodeIndex expectTypeExpr(Parser* p) {
static AstNodeIndex parseByteAlign(Parser* p) { static AstNodeIndex parseByteAlign(Parser* p) {
bool ok; bool ok;
eatToken(p, TOKENIZER_TAG_KEYWORD_ALIGN, &ok); eatToken(p, TOKENIZER_TAG_KEYWORD_ALIGN, &ok);
if (!ok) { if (!ok)
return null_node; return null_node;
}
fprintf(stderr, "parseByteAlign cannot parse alginment\n"); fprintf(stderr, "parseByteAlign cannot parse alginment\n");
exit(1); exit(1);
return 0; // tcc return 0; // tcc
} }
static AstNodeIndex parseAddrSpace(Parser* p) {
bool ok;
eatToken(p, TOKENIZER_TAG_KEYWORD_ADDRSPACE, &ok);
if (!ok)
return null_node;
fprintf(stderr, "parseAddrSpace cannot parse addrspace\n");
exit(1);
return 0; // tcc
}
static AstNodeIndex parseLinkSection(Parser* p) {
bool ok;
eatToken(p, TOKENIZER_TAG_KEYWORD_LINKSECTION, &ok);
if (!ok)
return null_node;
fprintf(stderr, "parseLinkSection cannot parse linksection\n");
exit(1);
return 0; // tcc
}
static AstNodeIndex parseCallconv(Parser* p) {
bool ok;
eatToken(p, TOKENIZER_TAG_KEYWORD_CALLCONV, &ok);
if (!ok)
return null_node;
fprintf(stderr, "parseCallconv cannot parse callconv\n");
exit(1);
return 0; // tcc
}
typedef struct { typedef struct {
AstNodeIndex align_expr, value_expr; AstNodeIndex align_expr, value_expr;
} NodeContainerField; } NodeContainerField;
@@ -112,13 +164,13 @@ static AstNodeIndex expectContainerField(Parser* p) {
const AstNodeIndex type_expr = expectTypeExpr(p); const AstNodeIndex type_expr = expectTypeExpr(p);
const AstNodeIndex align_expr = parseByteAlign(p); const AstNodeIndex align_expr = parseByteAlign(p);
const AstNodeIndex value_expr = 0;
bool ok; bool ok;
eatToken(p, TOKENIZER_TAG_EQUAL, &ok); eatToken(p, TOKENIZER_TAG_EQUAL, &ok);
if (ok) { if (ok) {
fprintf(stderr, "expectContainerField does not support expr\n"); fprintf(stderr, "expectContainerField does not support expr\n");
exit(1); exit(1);
} }
const AstNodeIndex value_expr = 0;
if (align_expr == 0) { if (align_expr == 0) {
return addNode( return addNode(
@@ -150,10 +202,8 @@ static AstNodeIndex expectContainerField(Parser* p) {
.main_token = main_token, .main_token = main_token,
.data = { .data = {
.lhs = type_expr, .lhs = type_expr,
.rhs = addExtra(p, (NodeContainerField) { .rhs = addExtra(p, (AstNodeIndex[]) { align_expr, value_expr }, 2),
.align_expr = align_expr, },
.value_expr = value_expr,
}) },
}); });
} }
} }
@@ -307,7 +357,7 @@ static SmallSpan parseParamDeclList(Parser* p) {
} }
return (SmallSpan) { return (SmallSpan) {
.zero_or_one = 0, .tag = SMALL_SPAN_ZERO_OR_ONE,
}; };
} }
@@ -328,21 +378,104 @@ static AstNodeIndex parseFnProto(Parser* p) {
eatToken(p, TOKENIZER_TAG_IDENTIFIER, NULL); eatToken(p, TOKENIZER_TAG_IDENTIFIER, NULL);
SmallSpan params = parseParamDeclList(p); SmallSpan params = parseParamDeclList(p);
// const params = try p.parseParamDeclList(); const AstNodeIndex align_expr = parseByteAlign(p);
// const align_expr = try p.parseByteAlign(); const AstNodeIndex addrspace_expr = parseAddrSpace(p);
// const addrspace_expr = try p.parseAddrSpace(); const AstNodeIndex section_expr = parseLinkSection(p);
// const section_expr = try p.parseLinkSection(); const AstNodeIndex callconv_expr = parseCallconv(p);
// const callconv_expr = try p.parseCallconv();
eatToken(p, TOKENIZER_TAG_BANG, NULL); eatToken(p, TOKENIZER_TAG_BANG, NULL);
const AstNodeIndex return_type_expr = parseTypeExpr(p); const AstNodeIndex return_type_expr = parseTypeExpr(p);
return 0;
if (align_expr == 0 && section_expr == 0 && callconv_expr == 0 && addrspace_expr == 0) {
if (params.tag == SMALL_SPAN_ZERO_OR_ONE)
return setNode(
p,
fn_proto_index,
(AstNodeItem) {
.tag = AST_NODE_TAG_FN_PROTO_SIMPLE,
.main_token = fn_token,
.data = {
.lhs = params.payload.zero_or_one,
.rhs = return_type_expr,
},
});
}
fprintf(stderr, "parseFnProto does not support complex function decls\n");
exit(1);
return 0; // tcc
} }
static AstTokenIndex parseBlockLabel(Parser* p) {
if (p->token_tags[p->tok_i] == TOKENIZER_TAG_IDENTIFIER && p->token_tags[p->tok_i + 1] == TOKENIZER_TAG_COLON) {
const AstTokenIndex identifier = p->tok_i;
p->tok_i += 2;
return identifier;
}
return null_node;
}
static AstNodeIndex parseForStatement(Parser* p) {
bool ok;
const AstNodeIndex for_token = eatToken(p, TOKENIZER_TAG_KEYWORD_FOR, &ok);
if (!ok)
return null_node;
(void)for_token;
fprintf(stderr, "parseForStatement cannot parse for statements\n");
return 0; // tcc
}
static AstNodeIndex parseWhileStatement(Parser* p) {
bool ok;
const AstNodeIndex while_token = eatToken(p, TOKENIZER_TAG_KEYWORD_WHILE, &ok);
if (!ok)
return null_node;
(void)while_token;
fprintf(stderr, "parseWhileStatement cannot parse while statements\n");
return 0; // tcc
}
static AstNodeIndex parseLoopStatement(Parser* p) {
bool ok_inline_token;
eatToken(p, TOKENIZER_TAG_KEYWORD_INLINE, &ok_inline_token);
const AstNodeIndex for_statement = parseForStatement(p);
if (for_statement != 0)
return for_statement;
const AstNodeIndex while_statement = parseWhileStatement(p);
if (while_statement != 0)
return while_statement;
if (!ok_inline_token)
return null_node;
fprintf(stderr, "If we've seen 'inline', there should have been a 'for' or 'while'\n");
exit(1);
return 0; // tcc
}
static AstNodeIndex parseAssignExpr(Parser* p) {
(void)p;
fprintf(stderr, "parseAssignExpr not implemented\n");
exit(1);
return 0; // tcc
}
static AstNodeIndex expectVarDeclExprStatement(Parser* p) {
(void)p;
fprintf(stderr, "expectVarDeclExprStatement not implemented\n");
exit(1);
return 0; // tcc
}
static AstNodeIndex parseLabeledStatement(Parser*);
static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) { static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) {
bool ok; bool ok;
if (eatToken(p, TOKENIZER_TAG_KEYWORD_COMPTIME, &ok), ok) { if (eatToken(p, TOKENIZER_TAG_KEYWORD_COMPTIME, &ok), ok) {
fprintf(stderr, "expectStatement: comptime keyword not yet supported\n"); fprintf(stderr, "expectStatement: comptime keyword not supported\n");
exit(1); exit(1);
} }
@@ -357,12 +490,20 @@ static AstNodeIndex expectStatement(Parser* p, bool allow_defer_var) {
case TOKENIZER_TAG_KEYWORD_STRUCT: case TOKENIZER_TAG_KEYWORD_STRUCT:
case TOKENIZER_TAG_KEYWORD_UNION:; case TOKENIZER_TAG_KEYWORD_UNION:;
const char* tok_str = tokenizerGetTagString(tok); const char* tok_str = tokenizerGetTagString(tok);
fprintf(stderr, "expectStatement does not yet support keyword %s\n", tok_str); fprintf(stderr, "expectStatement does not support keyword %s\n", tok_str);
exit(1); exit(1);
default:; default:;
} }
// TODO continue
return 1; const AstNodeIndex labeled_statement = parseLabeledStatement(p);
if (labeled_statement != 0)
return labeled_statement;
if (allow_defer_var) {
return expectVarDeclExprStatement(p);
} else {
return parseAssignExpr(p);
}
} }
typedef struct { typedef struct {
@@ -452,6 +593,24 @@ static AstNodeIndex parseBlock(Parser* p) {
return 0; return 0;
} }
static AstNodeIndex parseLabeledStatement(Parser* p) {
const AstNodeIndex label_token = parseBlockLabel(p);
const AstNodeIndex block = parseBlock(p);
if (block != 0)
return block;
const AstNodeIndex loop_stmt = parseLoopStatement(p);
if (loop_stmt != 0)
return loop_stmt;
if (label_token != 0) {
fprintf(stderr, "parseLabeledStatement does not support labels\n");
exit(1);
}
return null_node;
}
static AstNodeIndex parseVarDeclProto(Parser* p) { static AstNodeIndex parseVarDeclProto(Parser* p) {
bool ok; bool ok;
eatToken(p, TOKENIZER_TAG_KEYWORD_CONST, &ok); eatToken(p, TOKENIZER_TAG_KEYWORD_CONST, &ok);
@@ -476,23 +635,15 @@ static AstNodeIndex parseGlobalVarDecl(Parser* p) {
} }
static AstNodeIndex expectTopLevelDecl(Parser* p) { static AstNodeIndex expectTopLevelDecl(Parser* p) {
AstTokenIndex extern_export_inline_token = p->tok_i++; AstTokenIndex extern_export_inline_token = nextToken(p);
bool is_extern = false;
bool expect_fn = false;
bool expect_var_or_fn = false;
switch (p->token_tags[extern_export_inline_token]) { switch (p->token_tags[extern_export_inline_token]) {
case TOKENIZER_TAG_KEYWORD_EXTERN: case TOKENIZER_TAG_KEYWORD_EXTERN:
eatToken(p, TOKENIZER_TAG_STRING_LITERAL, NULL); eatToken(p, TOKENIZER_TAG_STRING_LITERAL, NULL);
is_extern = true;
expect_var_or_fn = true;
break; break;
case TOKENIZER_TAG_KEYWORD_EXPORT: case TOKENIZER_TAG_KEYWORD_EXPORT:
expect_var_or_fn = true;
break;
case TOKENIZER_TAG_KEYWORD_INLINE: case TOKENIZER_TAG_KEYWORD_INLINE:
case TOKENIZER_TAG_KEYWORD_NOINLINE: case TOKENIZER_TAG_KEYWORD_NOINLINE:
expect_fn = true;
break; break;
default: default:
p->tok_i--; p->tok_i--;
@@ -504,10 +655,7 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) {
case TOKENIZER_TAG_SEMICOLON: case TOKENIZER_TAG_SEMICOLON:
p->tok_i++; p->tok_i++;
return fn_proto; return fn_proto;
case TOKENIZER_TAG_L_BRACE: case TOKENIZER_TAG_L_BRACE:;
if (is_extern)
exit(1);
AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_TAG_FN_DECL); AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_TAG_FN_DECL);
AstNodeIndex body_block = parseBlock(p); AstNodeIndex body_block = parseBlock(p);
return setNode( return setNode(
@@ -523,10 +671,7 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) {
} }
} }
if (expect_fn) eatToken(p, TOKENIZER_TAG_KEYWORD_THREADLOCAL, NULL);
exit(1);
AstTokenIndex thread_local_token = eatToken(p, TOKENIZER_TAG_KEYWORD_THREADLOCAL, NULL);
AstNodeIndex var_decl = parseGlobalVarDecl(p); AstNodeIndex var_decl = parseGlobalVarDecl(p);
if (var_decl != 0) { if (var_decl != 0) {
return var_decl; return var_decl;
@@ -538,9 +683,72 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) {
return 0; // make tcc happy return 0; // make tcc happy
} }
void findNextContainerMember(Parser* p) {
uint32_t level = 0;
while (true) {
AstTokenIndex tok = nextToken(p);
switch (p->token_tags[tok]) {
// Any of these can start a new top level declaration
case TOKENIZER_TAG_KEYWORD_TEST:
case TOKENIZER_TAG_KEYWORD_COMPTIME:
case TOKENIZER_TAG_KEYWORD_PUB:
case TOKENIZER_TAG_KEYWORD_EXPORT:
case TOKENIZER_TAG_KEYWORD_EXTERN:
case TOKENIZER_TAG_KEYWORD_INLINE:
case TOKENIZER_TAG_KEYWORD_NOINLINE:
case TOKENIZER_TAG_KEYWORD_USINGNAMESPACE:
case TOKENIZER_TAG_KEYWORD_THREADLOCAL:
case TOKENIZER_TAG_KEYWORD_CONST:
case TOKENIZER_TAG_KEYWORD_VAR:
case TOKENIZER_TAG_KEYWORD_FN:
if (level == 0) {
p->tok_i--;
return;
}
break;
case TOKENIZER_TAG_IDENTIFIER:
if (p->token_tags[tok + 1] == TOKENIZER_TAG_COMMA && level == 0) {
p->tok_i--;
return;
}
break;
case TOKENIZER_TAG_COMMA:
case TOKENIZER_TAG_SEMICOLON:
// This decl was likely meant to end here
if (level == 0)
return;
break;
case TOKENIZER_TAG_L_PAREN:
case TOKENIZER_TAG_L_BRACKET:
case TOKENIZER_TAG_L_BRACE:
level++;
break;
case TOKENIZER_TAG_R_PAREN:
case TOKENIZER_TAG_R_BRACKET:
if (level != 0)
level--;
break;
case TOKENIZER_TAG_R_BRACE:
if (level == 0) {
// end of container, exit
p->tok_i--;
return;
}
level--;
break;
case TOKENIZER_TAG_EOF:
p->tok_i--;
return;
default:
break;
}
}
}
static Members parseContainerMembers(Parser* p) { static Members parseContainerMembers(Parser* p) {
const uint32_t scratch_top = p->scratch.len; const uint32_t scratch_top = p->scratch.len;
Members res = (Members) {};
// ast_token_index last_field; // ast_token_index last_field;
bool ok; bool ok;
while (eatToken(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT, &ok) && ok) while (eatToken(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT, &ok) && ok)
@@ -597,8 +805,6 @@ static Members parseContainerMembers(Parser* p) {
goto break_loop; goto break_loop;
default:; default:;
// skip parseCStyleContainer // skip parseCStyleContainer
const AstNodeIndex identifier = p->tok_i;
const AstNodeIndex container_field = expectContainerField(p); const AstNodeIndex container_field = expectContainerField(p);
switch (field_state.tag) { switch (field_state.tag) {
case FIELD_STATE_NONE: case FIELD_STATE_NONE:
@@ -628,8 +834,41 @@ static Members parseContainerMembers(Parser* p) {
continue; continue;
} }
} }
break_loop:
break_loop:;
const uint32_t scratch_len = p->scratch.len;
p->scratch.len = scratch_top; p->scratch.len = scratch_top;
return res;
const uint32_t n_items = scratch_len - scratch_top;
switch (n_items) {
case 0:
return (Members) {
.len = 0,
.lhs = 0,
.rhs = 0,
.trailing = trailing,
};
case 1:
return (Members) {
.len = 1,
.lhs = p->scratch.arr[scratch_top],
.rhs = 0,
.trailing = trailing,
};
case 2:
return (Members) {
.len = 2,
.lhs = p->scratch.arr[scratch_top],
.rhs = p->scratch.arr[scratch_top + 1],
.trailing = trailing,
};
default:
return (Members) {
.len = n_items,
.lhs = p->scratch.arr[scratch_top],
.rhs = p->scratch.arr[scratch_len],
.trailing = trailing,
};
}
} }

View File

@@ -1,6 +1,6 @@
// parser.h // parser.h
#ifndef _ZIG1_PARSE_H__ #ifndef _ZIG0_PARSE_H__
#define _ZIG1_PARSE_H__ #define _ZIG0_PARSE_H__
#include "ast.h" #include "ast.h"
#include "common.h" #include "common.h"

View File

@@ -1,3 +1,3 @@
test "zig1 test suite" { test "zig0 test suite" {
_ = @import("tokenizer_test.zig"); _ = @import("tokenizer_test.zig");
} }

View File

@@ -1,5 +1,5 @@
#ifndef _ZIG1_TOKENIZER_H__ #ifndef _ZIG0_TOKENIZER_H__
#define _ZIG1_TOKENIZER_H__ #define _ZIG0_TOKENIZER_H__
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>

6
zig1.c
View File

@@ -6,7 +6,7 @@
// - code = 0: program successfully terminated. // - code = 0: program successfully terminated.
// - code = 1: panicked, panic message in msg. Caller should free msg. // - code = 1: panicked, panic message in msg. Caller should free msg.
// - code = 2: interpreter error, error in msg. Caller should free msg. // - code = 2: interpreter error, error in msg. Caller should free msg.
int zig1Run(const char* program, char** msg) { int zig0Run(const char* program, char** msg) {
(void)program; (void)program;
(void)msg; (void)msg;
return 0; return 0;
@@ -14,7 +14,7 @@ int zig1Run(const char* program, char** msg) {
// API: run and: // API: run and:
// code = 3: abnormal error, expect something in stderr. // code = 3: abnormal error, expect something in stderr.
int zig1RunFile(const char* fname, char** msg) { int zig0RunFile(const char* fname, char** msg) {
FILE* f = fopen(fname, "r"); FILE* f = fopen(fname, "r");
if (f == NULL) { if (f == NULL) {
perror("fopen"); perror("fopen");
@@ -51,7 +51,7 @@ int zig1RunFile(const char* fname, char** msg) {
fclose(f); fclose(f);
program[fsize] = 0; program[fsize] = 0;
int code = zig1Run(program, msg); int code = zig0Run(program, msg);
free(program); free(program);
return code; return code;
} }