commit 2948cfd7cf8f0f1da852fc26c5e9680b4ae71ca0 (tree)
parent 8b9a22907e80872af3a88b4f085f400ee3eed876
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Tue, 10 Feb 2026 10:03:26 +0000
zig 0.15.1
With opus 4.6
Diffstat:
8 files changed, 332 insertions(+), 38 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
/.zig-cache/
+/zig-out/
*.o
diff --git a/AGENTS.md b/AGENTS.md
@@ -0,0 +1 @@
+See README.md for useful information about working on this.
diff --git a/README.md b/README.md
@@ -1 +1,29 @@
zig0 aspires to be an interpreter of zig 0.14.0 C backend.
+
+# Testing
+
+Where the following $CC are supported: `clang`, `gcc` and `tcc`. Then:
+
+ zig build test -Dcc=$CC
+
+Static analysis:
+
+ zig build fmt lint
+
+# Debugging tips
+
+Test runs infinitely? Build the test program executable:
+
+ $ zig build test -Dno-exec
+
+And then run it, capturing the stack trace:
+
+gdb -batch \
+ -ex "python import threading; threading.Timer(1.0, lambda: gdb.post_event(lambda: gdb.execute('interrupt'))).start()" \
+ -ex run \
+ -ex "bt full" \
+ -ex quit \
+ zig-out/bin/test
+
+
+You are welcome to replace `-ex "bt full"` with anything other of interest.
diff --git a/build.zig b/build.zig
@@ -39,16 +39,20 @@ pub fn build(b: *std.Build) !void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
- const lib = b.addStaticLibrary(.{
- .name = "tokenizer",
+ const lib_mod = b.createModule(.{
.optimize = optimize,
.target = target,
+ .link_libc = true,
+ });
+ const lib = b.addLibrary(.{
+ .name = "tokenizer",
+ .root_module = lib_mod,
});
const cc = b.option([]const u8, "cc", "C compiler") orelse "zig";
if (std.mem.eql(u8, cc, "zig"))
- lib.addCSourceFiles(.{ .files = c_lib_files, .flags = cflags })
+ lib.root_module.addCSourceFiles(.{ .files = c_lib_files, .flags = cflags })
else for (c_lib_files) |cfile| {
const cc1 = b.addSystemCommand(&.{cc});
cc1.addArgs(cflags ++ .{"-g"});
@@ -60,22 +64,30 @@ pub fn build(b: *std.Build) !void {
cc1.addArg("-c");
cc1.addFileArg(b.path(cfile));
cc1.addArg("-o");
- lib.addObjectFile(cc1.addOutputFileArg(try std.fmt.allocPrint(
+ lib.root_module.addObjectFile(cc1.addOutputFileArg(try std.fmt.allocPrint(
b.allocator,
"{s}.o",
.{cfile[0 .. cfile.len - 2]},
)));
}
- lib.linkLibC();
+
+ const no_exec = b.option(bool, "no-exec", "Compile test binary without running it") orelse false;
const test_step = b.step("test", "Run unit tests");
- const test_exe = b.addTest(.{
+ const test_mod = b.createModule(.{
.root_source_file = b.path("test_all.zig"),
.optimize = optimize,
+ .target = target,
});
- test_exe.linkLibrary(lib);
- test_exe.addIncludePath(b.path("."));
- test_step.dependOn(&b.addRunArtifact(test_exe).step);
+ test_mod.linkLibrary(lib);
+ test_mod.addIncludePath(b.path("."));
+ const test_exe = b.addTest(.{ .root_module = test_mod });
+ if (no_exec) {
+ const install = b.addInstallArtifact(test_exe, .{});
+ test_step.dependOn(&install.step);
+ } else {
+ test_step.dependOn(&b.addRunArtifact(test_exe).step);
+ }
const fmt_step = b.step("fmt", "clang-format");
const clang_format = b.addSystemCommand(&.{ "clang-format", "-Werror", "-i" });
diff --git a/parser.c b/parser.c
@@ -130,7 +130,9 @@ static AstNodeIndex addExtra(
Parser* p, const AstNodeIndex* extra, uint32_t count) {
const AstNodeIndex result = p->extra_data.len;
SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count);
- memcpy(p->extra_data.arr, extra, count * sizeof(AstNodeIndex));
+ memcpy(p->extra_data.arr + p->extra_data.len, extra,
+ count * sizeof(AstNodeIndex));
+ p->extra_data.len += count;
return result;
}
@@ -947,14 +949,28 @@ static AstNodeIndex expectVarDeclExprStatement(Parser* p) {
const AstNodeIndex expr = parseExpr(p);
SLICE_APPEND(AstNodeIndex, &p->scratch, expr);
}
- if (eatToken(p, TOKEN_COMMA) == null_node)
+ if (eatToken(p, TOKEN_COMMA) == null_token)
break;
}
const uint32_t lhs_count = p->scratch.len - scratch_top.old_len;
assert(lhs_count > 0);
- fprintf(stderr, "expectVarDeclExprStatement only partially implemented\n");
+ if (lhs_count == 1) {
+ const AstNodeIndex lhs = p->scratch.arr[scratch_top.old_len];
+ switch (p->token_tags[p->tok_i]) {
+ case TOKEN_SEMICOLON:
+ p->tok_i++;
+ return lhs;
+ default:
+ fprintf(stderr,
+ "expectVarDeclExprStatement: assignment not implemented\n");
+ exit(1);
+ }
+ }
+
+ fprintf(
+ stderr, "expectVarDeclExprStatement: destructuring not implemented\n");
exit(1);
return 0; // tcc
}
@@ -1015,7 +1031,7 @@ static AstNodeIndex parseBlock(Parser* p) {
SLICE_APPEND(AstNodeIndex, &p->scratch, statement);
}
expectToken(p, TOKEN_R_BRACE);
- const bool semicolon = (p->token_tags[p->tok_i] - 2 == TOKEN_SEMICOLON);
+ const bool semicolon = (p->token_tags[p->tok_i - 2] == TOKEN_SEMICOLON);
const uint32_t statements_len = p->scratch.len - scratch_top.old_len;
switch (statements_len) {
diff --git a/parser_test.zig b/parser_test.zig
@@ -13,7 +13,6 @@ const zigToken = @import("./tokenizer_test.zig").zigToken;
fn zigNode(token: c_uint) Ast.Node.Tag {
return switch (token) {
c.AST_NODE_ROOT => .root,
- c.AST_NODE_USINGNAMESPACE => .@"usingnamespace",
c.AST_NODE_TEST_DECL => .test_decl,
c.AST_NODE_GLOBAL_VAR_DECL => .global_var_decl,
c.AST_NODE_LOCAL_VAR_DECL => .local_var_decl,
@@ -78,7 +77,6 @@ fn zigNode(token: c_uint) Ast.Node.Tag {
c.AST_NODE_NEGATION_WRAP => .negation_wrap,
c.AST_NODE_ADDRESS_OF => .address_of,
c.AST_NODE_TRY => .@"try",
- c.AST_NODE_AWAIT => .@"await",
c.AST_NODE_OPTIONAL_TYPE => .optional_type,
c.AST_NODE_ARRAY_TYPE => .array_type,
c.AST_NODE_ARRAY_TYPE_SENTINEL => .array_type_sentinel,
@@ -109,12 +107,8 @@ fn zigNode(token: c_uint) Ast.Node.Tag {
c.AST_NODE_STRUCT_INIT_COMMA => .struct_init_comma,
c.AST_NODE_CALL_ONE => .call_one,
c.AST_NODE_CALL_ONE_COMMA => .call_one_comma,
- c.AST_NODE_ASYNC_CALL_ONE => .async_call_one,
- c.AST_NODE_ASYNC_CALL_ONE_COMMA => .async_call_one_comma,
c.AST_NODE_CALL => .call,
c.AST_NODE_CALL_COMMA => .call_comma,
- c.AST_NODE_ASYNC_CALL => .async_call,
- c.AST_NODE_ASYNC_CALL_COMMA => .async_call_comma,
c.AST_NODE_SWITCH => .@"switch",
c.AST_NODE_SWITCH_COMMA => .switch_comma,
c.AST_NODE_SWITCH_CASE_ONE => .switch_case_one,
@@ -186,6 +180,253 @@ fn zigNode(token: c_uint) Ast.Node.Tag {
};
}
+fn toIndex(v: u32) Ast.Node.Index {
+ return @enumFromInt(v);
+}
+
+fn toOptIndex(v: u32) Ast.Node.OptionalIndex {
+ return if (v == 0) .none else @enumFromInt(v);
+}
+
+fn toExtraIndex(v: u32) Ast.ExtraIndex {
+ return @enumFromInt(v);
+}
+
+fn toOptTokenIndex(v: u32) Ast.OptionalTokenIndex {
+ return @enumFromInt(v);
+}
+
+fn zigData(tag: Ast.Node.Tag, lhs: u32, rhs: u32) Ast.Node.Data {
+ return switch (tag) {
+ // data unused
+ .identifier,
+ .string_literal,
+ .char_literal,
+ .number_literal,
+ .unreachable_literal,
+ .anyframe_literal,
+ .enum_literal,
+ .error_value,
+ => .{ .opt_node_and_opt_node = .{ toOptIndex(lhs), toOptIndex(rhs) } },
+
+ // .node (single node index)
+ .@"defer",
+ .@"comptime",
+ .@"nosuspend",
+ .@"suspend",
+ .@"resume",
+ .bool_not,
+ .negation,
+ .bit_not,
+ .negation_wrap,
+ .address_of,
+ .@"try",
+ .deref,
+ => .{ .node = toIndex(lhs) },
+
+ // .opt_node (single optional node)
+ .@"return",
+ .optional_type,
+ => .{ .opt_node = toOptIndex(lhs) },
+
+ // .node_and_node
+ .fn_decl,
+ .container_field_align,
+ .error_union,
+ .@"catch",
+ .field_access,
+ .unwrap_optional,
+ .equal_equal,
+ .bang_equal,
+ .less_than,
+ .greater_than,
+ .less_or_equal,
+ .greater_or_equal,
+ .assign_mul,
+ .assign_div,
+ .assign_mod,
+ .assign_add,
+ .assign_sub,
+ .assign_shl,
+ .assign_shl_sat,
+ .assign_shr,
+ .assign_bit_and,
+ .assign_bit_xor,
+ .assign_bit_or,
+ .assign_mul_wrap,
+ .assign_add_wrap,
+ .assign_sub_wrap,
+ .assign_mul_sat,
+ .assign_add_sat,
+ .assign_sub_sat,
+ .assign,
+ .merge_error_sets,
+ .mul,
+ .div,
+ .mod,
+ .array_mult,
+ .mul_wrap,
+ .mul_sat,
+ .add,
+ .sub,
+ .array_cat,
+ .add_wrap,
+ .sub_wrap,
+ .add_sat,
+ .sub_sat,
+ .shl,
+ .shl_sat,
+ .shr,
+ .bit_and,
+ .bit_xor,
+ .bit_or,
+ .@"orelse",
+ .bool_and,
+ .bool_or,
+ .array_type,
+ .array_access,
+ .switch_range,
+ => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } },
+
+ // .opt_node_and_opt_node
+ .fn_proto_simple,
+ .simple_var_decl,
+ .block_two,
+ .block_two_semicolon,
+ .builtin_call_two,
+ .builtin_call_two_comma,
+ .container_decl_two,
+ .container_decl_two_trailing,
+ .tagged_union_two,
+ .tagged_union_two_trailing,
+ .struct_init_dot_two,
+ .struct_init_dot_two_comma,
+ .array_init_dot_two,
+ .array_init_dot_two_comma,
+ => .{ .opt_node_and_opt_node = .{ toOptIndex(lhs), toOptIndex(rhs) } },
+
+ // .node_and_opt_node
+ .call_one,
+ .call_one_comma,
+ .struct_init_one,
+ .struct_init_one_comma,
+ .container_field_init,
+ .aligned_var_decl,
+ .array_init_one,
+ .array_init_one_comma,
+ => .{ .node_and_opt_node = .{ toIndex(lhs), toOptIndex(rhs) } },
+
+ // .opt_node_and_node
+ .ptr_type_aligned,
+ .ptr_type_sentinel,
+ .switch_case_one,
+ .switch_case_inline_one,
+ => .{ .opt_node_and_node = .{ toOptIndex(lhs), toIndex(rhs) } },
+
+ // .node_and_extra
+ .call,
+ .call_comma,
+ .container_field,
+ .array_type_sentinel,
+ .slice,
+ .slice_sentinel,
+ .array_init,
+ .array_init_comma,
+ .struct_init,
+ .struct_init_comma,
+ .@"switch",
+ .switch_comma,
+ .container_decl_arg,
+ .container_decl_arg_trailing,
+ .tagged_union_enum_tag,
+ .tagged_union_enum_tag_trailing,
+ .@"asm",
+ => .{ .node_and_extra = .{ toIndex(lhs), toExtraIndex(rhs) } },
+
+ // .extra_and_node
+ .assign_destructure,
+ .switch_case,
+ .switch_case_inline,
+ .ptr_type,
+ .ptr_type_bit_range,
+ => .{ .extra_and_node = .{ toExtraIndex(lhs), toIndex(rhs) } },
+
+ // .extra_and_opt_node
+ .global_var_decl,
+ .local_var_decl,
+ .fn_proto_multi,
+ .fn_proto_one,
+ .fn_proto,
+ => .{ .extra_and_opt_node = .{ toExtraIndex(lhs), toOptIndex(rhs) } },
+
+ // .extra_range (SubRange)
+ .root,
+ .block,
+ .block_semicolon,
+ .builtin_call,
+ .builtin_call_comma,
+ .container_decl,
+ .container_decl_trailing,
+ .tagged_union,
+ .tagged_union_trailing,
+ .array_init_dot,
+ .array_init_dot_comma,
+ .struct_init_dot,
+ .struct_init_dot_comma,
+ => .{ .extra_range = .{ .start = toExtraIndex(lhs), .end = toExtraIndex(rhs) } },
+
+ // .node_and_token
+ .grouped_expression,
+ .asm_input,
+ => .{ .node_and_token = .{ toIndex(lhs), rhs } },
+
+ // .opt_node_and_token
+ .asm_output,
+ => .{ .opt_node_and_token = .{ toOptIndex(lhs), rhs } },
+
+ // .opt_token_and_node
+ .test_decl,
+ .@"errdefer",
+ => .{ .opt_token_and_node = .{ toOptTokenIndex(lhs), toIndex(rhs) } },
+
+ // .opt_token_and_opt_node
+ .@"break",
+ .@"continue",
+ => .{ .opt_token_and_opt_node = .{ toOptTokenIndex(lhs), toOptIndex(rhs) } },
+
+ // .token_and_token
+ .error_set_decl,
+ .multiline_string_literal,
+ => .{ .token_and_token = .{ lhs, rhs } },
+
+ // .token_and_node
+ .anyframe_type,
+ => .{ .token_and_node = .{ lhs, toIndex(rhs) } },
+
+ // .node_and_node for slice_open (lhs[rhs..])
+ .slice_open,
+ => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } },
+
+ .while_simple,
+ .while_cont,
+ .@"while",
+ .for_simple,
+ .if_simple,
+ .@"if",
+ => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } },
+
+ .for_range,
+ => .{ .node_and_opt_node = .{ toIndex(lhs), toOptIndex(rhs) } },
+
+ .@"for",
+ => .{ .@"for" = .{ toExtraIndex(lhs), @bitCast(rhs) } },
+
+ .asm_simple,
+ .asm_legacy,
+ => .{ .node_and_extra = .{ toIndex(lhs), toExtraIndex(rhs) } },
+ };
+}
+
// zigAst converts a c.Ast to std.Zig.Ast. The resulting Ast should be freed with deinit().
fn zigAst(gpa: Allocator, c_ast: c.Ast) !Ast {
var tokens = Ast.TokenList{};
@@ -202,27 +443,22 @@ fn zigAst(gpa: Allocator, c_ast: c.Ast) !Ast {
try nodes.resize(gpa, c_ast.nodes.len);
errdefer nodes.deinit(gpa);
- for (0..c_ast.nodes.len) |i|
+ for (0..c_ast.nodes.len) |i| {
+ const tag = zigNode(c_ast.nodes.tags[i]);
nodes.set(i, .{
- .tag = zigNode(c_ast.nodes.tags[i]),
+ .tag = tag,
.main_token = c_ast.nodes.main_tokens[i],
- .data = Ast.Node.Data{
- .lhs = c_ast.nodes.datas[i].lhs,
- .rhs = c_ast.nodes.datas[i].rhs,
- },
+ .data = zigData(tag, c_ast.nodes.datas[i].lhs, c_ast.nodes.datas[i].rhs),
});
+ }
- var extra_data = try gpa.alloc(Ast.Node.Index, c_ast.extra_data.len);
+ const extra_data = try gpa.alloc(u32, c_ast.extra_data.len);
errdefer gpa.free(extra_data);
- std.mem.copyForwards(
- Ast.Node.Index,
- extra_data[0..],
- c_ast.extra_data.arr[0..c_ast.extra_data.len],
- );
+ @memcpy(extra_data, c_ast.extra_data.arr[0..c_ast.extra_data.len]);
// creating a dummy `errors` slice, so deinit can free it.
const errors = try gpa.alloc(Ast.Error, 0);
- errdefer gpa.deinit(errors);
+ errdefer gpa.free(errors);
return Ast{
.source = c_ast.source[0..c_ast.source_len :0],
@@ -247,7 +483,9 @@ const maxInt = std.math.maxInt;
var fixed_buffer_mem: [100 * 1024]u8 = undefined;
fn testParse(source: [:0]const u8, allocator: mem.Allocator, anything_changed: *bool) ![]u8 {
- const stderr = io.getStdErr().writer();
+ var stderr_buf: [4096]u8 = undefined;
+ var stderr_file_writer = std.fs.File.stderr().writer(&stderr_buf);
+ const stderr = &stderr_file_writer.interface;
//var tree = try std.zig.Ast.parse(allocator, source, .zig);
var c_tree = c.astParse(source, @intCast(source.len));
@@ -273,7 +511,7 @@ fn testParse(source: [:0]const u8, allocator: mem.Allocator, anything_changed: *
return error.ParseError;
}
- const formatted = try tree.render(allocator);
+ const formatted = try tree.renderAlloc(allocator);
anything_changed.* = !mem.eql(u8, formatted, source);
return formatted;
}
@@ -313,6 +551,7 @@ test "zig fmt: remove extra whitespace at start and end of file with comment bet
);
}
+
test "my function" {
try testCanonical(
\\pub fn main() void {
diff --git a/tokenizer.c b/tokenizer.c
@@ -103,7 +103,7 @@ TokenizerToken tokenizerNext(Tokenizer* self) {
TokenizerToken result = (TokenizerToken) {
.tag = TOKEN_INVALID,
.loc = {
- .start = 0,
+ .start = self->index,
},
};
diff --git a/tokenizer_test.zig b/tokenizer_test.zig
@@ -90,8 +90,6 @@ pub fn zigToken(token: c_uint) Token.Tag {
c.TOKEN_KEYWORD_ANYFRAME => .keyword_anyframe,
c.TOKEN_KEYWORD_ANYTYPE => .keyword_anytype,
c.TOKEN_KEYWORD_ASM => .keyword_asm,
- c.TOKEN_KEYWORD_ASYNC => .keyword_async,
- c.TOKEN_KEYWORD_AWAIT => .keyword_await,
c.TOKEN_KEYWORD_BREAK => .keyword_break,
c.TOKEN_KEYWORD_CALLCONV => .keyword_callconv,
c.TOKEN_KEYWORD_CATCH => .keyword_catch,
@@ -128,7 +126,6 @@ pub fn zigToken(token: c_uint) Token.Tag {
c.TOKEN_KEYWORD_TRY => .keyword_try,
c.TOKEN_KEYWORD_UNION => .keyword_union,
c.TOKEN_KEYWORD_UNREACHABLE => .keyword_unreachable,
- c.TOKEN_KEYWORD_USINGNAMESPACE => .keyword_usingnamespace,
c.TOKEN_KEYWORD_VAR => .keyword_var,
c.TOKEN_KEYWORD_VOLATILE => .keyword_volatile,
c.TOKEN_KEYWORD_WHILE => .keyword_while,