From 2948cfd7cf8f0f1da852fc26c5e9680b4ae71ca0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Tue, 10 Feb 2026 10:03:26 +0000 Subject: [PATCH] zig 0.15.1 With opus 4.6 --- .gitignore | 1 + AGENTS.md | 1 + README.md | 28 +++++ build.zig | 30 +++-- parser.c | 24 +++- parser_test.zig | 281 +++++++++++++++++++++++++++++++++++++++++---- tokenizer.c | 2 +- tokenizer_test.zig | 3 - 8 files changed, 332 insertions(+), 38 deletions(-) create mode 100644 AGENTS.md diff --git a/.gitignore b/.gitignore index 94d7938b9e..56de7d8925 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /.zig-cache/ +/zig-out/ *.o diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000000..71d054f0cd --- /dev/null +++ b/AGENTS.md @@ -0,0 +1 @@ +See README.md for useful information about working on this. diff --git a/README.md b/README.md index 43aa1d722f..e08265d4b2 100644 --- a/README.md +++ b/README.md @@ -1 +1,29 @@ zig0 aspires to be an interpreter of zig 0.14.0 C backend. + +# Testing + +Where the following $CC are supported: `clang`, `gcc` and `tcc`. Then: + + zig build test -Dcc=$CC + +Static analysis: + + zig build fmt lint + +# Debugging tips + +Test runs infinitely? Build the test program executable: + + $ zig build test -Dno-exec + +And then run it, capturing the stack trace: + +gdb -batch \ + -ex "python import threading; threading.Timer(1.0, lambda: gdb.post_event(lambda: gdb.execute('interrupt'))).start()" \ + -ex run \ + -ex "bt full" \ + -ex quit \ + zig-out/bin/test + + +You are welcome to replace `-ex "bt full"` with anything other of interest. diff --git a/build.zig b/build.zig index e88256a2bc..c2ddd1ea79 100644 --- a/build.zig +++ b/build.zig @@ -39,16 +39,20 @@ pub fn build(b: *std.Build) !void { const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{}); - const lib = b.addStaticLibrary(.{ - .name = "tokenizer", + const lib_mod = b.createModule(.{ .optimize = optimize, .target = target, + .link_libc = true, + }); + const lib = b.addLibrary(.{ + .name = "tokenizer", + .root_module = lib_mod, }); const cc = b.option([]const u8, "cc", "C compiler") orelse "zig"; if (std.mem.eql(u8, cc, "zig")) - lib.addCSourceFiles(.{ .files = c_lib_files, .flags = cflags }) + lib.root_module.addCSourceFiles(.{ .files = c_lib_files, .flags = cflags }) else for (c_lib_files) |cfile| { const cc1 = b.addSystemCommand(&.{cc}); cc1.addArgs(cflags ++ .{"-g"}); @@ -60,22 +64,30 @@ pub fn build(b: *std.Build) !void { cc1.addArg("-c"); cc1.addFileArg(b.path(cfile)); cc1.addArg("-o"); - lib.addObjectFile(cc1.addOutputFileArg(try std.fmt.allocPrint( + lib.root_module.addObjectFile(cc1.addOutputFileArg(try std.fmt.allocPrint( b.allocator, "{s}.o", .{cfile[0 .. cfile.len - 2]}, ))); } - lib.linkLibC(); + + const no_exec = b.option(bool, "no-exec", "Compile test binary without running it") orelse false; const test_step = b.step("test", "Run unit tests"); - const test_exe = b.addTest(.{ + const test_mod = b.createModule(.{ .root_source_file = b.path("test_all.zig"), .optimize = optimize, + .target = target, }); - test_exe.linkLibrary(lib); - test_exe.addIncludePath(b.path(".")); - test_step.dependOn(&b.addRunArtifact(test_exe).step); + test_mod.linkLibrary(lib); + test_mod.addIncludePath(b.path(".")); + const test_exe = b.addTest(.{ .root_module = test_mod }); + if (no_exec) { + const install = b.addInstallArtifact(test_exe, .{}); + test_step.dependOn(&install.step); + } else { + test_step.dependOn(&b.addRunArtifact(test_exe).step); + } const fmt_step = b.step("fmt", "clang-format"); const clang_format = b.addSystemCommand(&.{ "clang-format", "-Werror", "-i" }); diff --git a/parser.c b/parser.c index 9195891ea5..328c6d1508 100644 --- a/parser.c +++ b/parser.c @@ -130,7 +130,9 @@ static AstNodeIndex addExtra( Parser* p, const AstNodeIndex* extra, uint32_t count) { const AstNodeIndex result = p->extra_data.len; SLICE_ENSURE_CAPACITY(AstNodeIndex, &p->extra_data, count); - memcpy(p->extra_data.arr, extra, count * sizeof(AstNodeIndex)); + memcpy(p->extra_data.arr + p->extra_data.len, extra, + count * sizeof(AstNodeIndex)); + p->extra_data.len += count; return result; } @@ -947,14 +949,28 @@ static AstNodeIndex expectVarDeclExprStatement(Parser* p) { const AstNodeIndex expr = parseExpr(p); SLICE_APPEND(AstNodeIndex, &p->scratch, expr); } - if (eatToken(p, TOKEN_COMMA) == null_node) + if (eatToken(p, TOKEN_COMMA) == null_token) break; } const uint32_t lhs_count = p->scratch.len - scratch_top.old_len; assert(lhs_count > 0); - fprintf(stderr, "expectVarDeclExprStatement only partially implemented\n"); + if (lhs_count == 1) { + const AstNodeIndex lhs = p->scratch.arr[scratch_top.old_len]; + switch (p->token_tags[p->tok_i]) { + case TOKEN_SEMICOLON: + p->tok_i++; + return lhs; + default: + fprintf(stderr, + "expectVarDeclExprStatement: assignment not implemented\n"); + exit(1); + } + } + + fprintf( + stderr, "expectVarDeclExprStatement: destructuring not implemented\n"); exit(1); return 0; // tcc } @@ -1015,7 +1031,7 @@ static AstNodeIndex parseBlock(Parser* p) { SLICE_APPEND(AstNodeIndex, &p->scratch, statement); } expectToken(p, TOKEN_R_BRACE); - const bool semicolon = (p->token_tags[p->tok_i] - 2 == TOKEN_SEMICOLON); + const bool semicolon = (p->token_tags[p->tok_i - 2] == TOKEN_SEMICOLON); const uint32_t statements_len = p->scratch.len - scratch_top.old_len; switch (statements_len) { diff --git a/parser_test.zig b/parser_test.zig index cfc9558879..e1037ebb76 100644 --- a/parser_test.zig +++ b/parser_test.zig @@ -13,7 +13,6 @@ const zigToken = @import("./tokenizer_test.zig").zigToken; fn zigNode(token: c_uint) Ast.Node.Tag { return switch (token) { c.AST_NODE_ROOT => .root, - c.AST_NODE_USINGNAMESPACE => .@"usingnamespace", c.AST_NODE_TEST_DECL => .test_decl, c.AST_NODE_GLOBAL_VAR_DECL => .global_var_decl, c.AST_NODE_LOCAL_VAR_DECL => .local_var_decl, @@ -78,7 +77,6 @@ fn zigNode(token: c_uint) Ast.Node.Tag { c.AST_NODE_NEGATION_WRAP => .negation_wrap, c.AST_NODE_ADDRESS_OF => .address_of, c.AST_NODE_TRY => .@"try", - c.AST_NODE_AWAIT => .@"await", c.AST_NODE_OPTIONAL_TYPE => .optional_type, c.AST_NODE_ARRAY_TYPE => .array_type, c.AST_NODE_ARRAY_TYPE_SENTINEL => .array_type_sentinel, @@ -109,12 +107,8 @@ fn zigNode(token: c_uint) Ast.Node.Tag { c.AST_NODE_STRUCT_INIT_COMMA => .struct_init_comma, c.AST_NODE_CALL_ONE => .call_one, c.AST_NODE_CALL_ONE_COMMA => .call_one_comma, - c.AST_NODE_ASYNC_CALL_ONE => .async_call_one, - c.AST_NODE_ASYNC_CALL_ONE_COMMA => .async_call_one_comma, c.AST_NODE_CALL => .call, c.AST_NODE_CALL_COMMA => .call_comma, - c.AST_NODE_ASYNC_CALL => .async_call, - c.AST_NODE_ASYNC_CALL_COMMA => .async_call_comma, c.AST_NODE_SWITCH => .@"switch", c.AST_NODE_SWITCH_COMMA => .switch_comma, c.AST_NODE_SWITCH_CASE_ONE => .switch_case_one, @@ -186,6 +180,253 @@ fn zigNode(token: c_uint) Ast.Node.Tag { }; } +fn toIndex(v: u32) Ast.Node.Index { + return @enumFromInt(v); +} + +fn toOptIndex(v: u32) Ast.Node.OptionalIndex { + return if (v == 0) .none else @enumFromInt(v); +} + +fn toExtraIndex(v: u32) Ast.ExtraIndex { + return @enumFromInt(v); +} + +fn toOptTokenIndex(v: u32) Ast.OptionalTokenIndex { + return @enumFromInt(v); +} + +fn zigData(tag: Ast.Node.Tag, lhs: u32, rhs: u32) Ast.Node.Data { + return switch (tag) { + // data unused + .identifier, + .string_literal, + .char_literal, + .number_literal, + .unreachable_literal, + .anyframe_literal, + .enum_literal, + .error_value, + => .{ .opt_node_and_opt_node = .{ toOptIndex(lhs), toOptIndex(rhs) } }, + + // .node (single node index) + .@"defer", + .@"comptime", + .@"nosuspend", + .@"suspend", + .@"resume", + .bool_not, + .negation, + .bit_not, + .negation_wrap, + .address_of, + .@"try", + .deref, + => .{ .node = toIndex(lhs) }, + + // .opt_node (single optional node) + .@"return", + .optional_type, + => .{ .opt_node = toOptIndex(lhs) }, + + // .node_and_node + .fn_decl, + .container_field_align, + .error_union, + .@"catch", + .field_access, + .unwrap_optional, + .equal_equal, + .bang_equal, + .less_than, + .greater_than, + .less_or_equal, + .greater_or_equal, + .assign_mul, + .assign_div, + .assign_mod, + .assign_add, + .assign_sub, + .assign_shl, + .assign_shl_sat, + .assign_shr, + .assign_bit_and, + .assign_bit_xor, + .assign_bit_or, + .assign_mul_wrap, + .assign_add_wrap, + .assign_sub_wrap, + .assign_mul_sat, + .assign_add_sat, + .assign_sub_sat, + .assign, + .merge_error_sets, + .mul, + .div, + .mod, + .array_mult, + .mul_wrap, + .mul_sat, + .add, + .sub, + .array_cat, + .add_wrap, + .sub_wrap, + .add_sat, + .sub_sat, + .shl, + .shl_sat, + .shr, + .bit_and, + .bit_xor, + .bit_or, + .@"orelse", + .bool_and, + .bool_or, + .array_type, + .array_access, + .switch_range, + => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } }, + + // .opt_node_and_opt_node + .fn_proto_simple, + .simple_var_decl, + .block_two, + .block_two_semicolon, + .builtin_call_two, + .builtin_call_two_comma, + .container_decl_two, + .container_decl_two_trailing, + .tagged_union_two, + .tagged_union_two_trailing, + .struct_init_dot_two, + .struct_init_dot_two_comma, + .array_init_dot_two, + .array_init_dot_two_comma, + => .{ .opt_node_and_opt_node = .{ toOptIndex(lhs), toOptIndex(rhs) } }, + + // .node_and_opt_node + .call_one, + .call_one_comma, + .struct_init_one, + .struct_init_one_comma, + .container_field_init, + .aligned_var_decl, + .array_init_one, + .array_init_one_comma, + => .{ .node_and_opt_node = .{ toIndex(lhs), toOptIndex(rhs) } }, + + // .opt_node_and_node + .ptr_type_aligned, + .ptr_type_sentinel, + .switch_case_one, + .switch_case_inline_one, + => .{ .opt_node_and_node = .{ toOptIndex(lhs), toIndex(rhs) } }, + + // .node_and_extra + .call, + .call_comma, + .container_field, + .array_type_sentinel, + .slice, + .slice_sentinel, + .array_init, + .array_init_comma, + .struct_init, + .struct_init_comma, + .@"switch", + .switch_comma, + .container_decl_arg, + .container_decl_arg_trailing, + .tagged_union_enum_tag, + .tagged_union_enum_tag_trailing, + .@"asm", + => .{ .node_and_extra = .{ toIndex(lhs), toExtraIndex(rhs) } }, + + // .extra_and_node + .assign_destructure, + .switch_case, + .switch_case_inline, + .ptr_type, + .ptr_type_bit_range, + => .{ .extra_and_node = .{ toExtraIndex(lhs), toIndex(rhs) } }, + + // .extra_and_opt_node + .global_var_decl, + .local_var_decl, + .fn_proto_multi, + .fn_proto_one, + .fn_proto, + => .{ .extra_and_opt_node = .{ toExtraIndex(lhs), toOptIndex(rhs) } }, + + // .extra_range (SubRange) + .root, + .block, + .block_semicolon, + .builtin_call, + .builtin_call_comma, + .container_decl, + .container_decl_trailing, + .tagged_union, + .tagged_union_trailing, + .array_init_dot, + .array_init_dot_comma, + .struct_init_dot, + .struct_init_dot_comma, + => .{ .extra_range = .{ .start = toExtraIndex(lhs), .end = toExtraIndex(rhs) } }, + + // .node_and_token + .grouped_expression, + .asm_input, + => .{ .node_and_token = .{ toIndex(lhs), rhs } }, + + // .opt_node_and_token + .asm_output, + => .{ .opt_node_and_token = .{ toOptIndex(lhs), rhs } }, + + // .opt_token_and_node + .test_decl, + .@"errdefer", + => .{ .opt_token_and_node = .{ toOptTokenIndex(lhs), toIndex(rhs) } }, + + // .opt_token_and_opt_node + .@"break", + .@"continue", + => .{ .opt_token_and_opt_node = .{ toOptTokenIndex(lhs), toOptIndex(rhs) } }, + + // .token_and_token + .error_set_decl, + .multiline_string_literal, + => .{ .token_and_token = .{ lhs, rhs } }, + + // .token_and_node + .anyframe_type, + => .{ .token_and_node = .{ lhs, toIndex(rhs) } }, + + // .node_and_node for slice_open (lhs[rhs..]) + .slice_open, + => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } }, + + .while_simple, + .while_cont, + .@"while", + .for_simple, + .if_simple, + .@"if", + => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } }, + + .for_range, + => .{ .node_and_opt_node = .{ toIndex(lhs), toOptIndex(rhs) } }, + + .@"for", + => .{ .@"for" = .{ toExtraIndex(lhs), @bitCast(rhs) } }, + + .asm_simple, + .asm_legacy, + => .{ .node_and_extra = .{ toIndex(lhs), toExtraIndex(rhs) } }, + }; +} + // zigAst converts a c.Ast to std.Zig.Ast. The resulting Ast should be freed with deinit(). fn zigAst(gpa: Allocator, c_ast: c.Ast) !Ast { var tokens = Ast.TokenList{}; @@ -202,27 +443,22 @@ fn zigAst(gpa: Allocator, c_ast: c.Ast) !Ast { try nodes.resize(gpa, c_ast.nodes.len); errdefer nodes.deinit(gpa); - for (0..c_ast.nodes.len) |i| + for (0..c_ast.nodes.len) |i| { + const tag = zigNode(c_ast.nodes.tags[i]); nodes.set(i, .{ - .tag = zigNode(c_ast.nodes.tags[i]), + .tag = tag, .main_token = c_ast.nodes.main_tokens[i], - .data = Ast.Node.Data{ - .lhs = c_ast.nodes.datas[i].lhs, - .rhs = c_ast.nodes.datas[i].rhs, - }, + .data = zigData(tag, c_ast.nodes.datas[i].lhs, c_ast.nodes.datas[i].rhs), }); + } - var extra_data = try gpa.alloc(Ast.Node.Index, c_ast.extra_data.len); + const extra_data = try gpa.alloc(u32, c_ast.extra_data.len); errdefer gpa.free(extra_data); - std.mem.copyForwards( - Ast.Node.Index, - extra_data[0..], - c_ast.extra_data.arr[0..c_ast.extra_data.len], - ); + @memcpy(extra_data, c_ast.extra_data.arr[0..c_ast.extra_data.len]); // creating a dummy `errors` slice, so deinit can free it. const errors = try gpa.alloc(Ast.Error, 0); - errdefer gpa.deinit(errors); + errdefer gpa.free(errors); return Ast{ .source = c_ast.source[0..c_ast.source_len :0], @@ -247,7 +483,9 @@ const maxInt = std.math.maxInt; var fixed_buffer_mem: [100 * 1024]u8 = undefined; fn testParse(source: [:0]const u8, allocator: mem.Allocator, anything_changed: *bool) ![]u8 { - const stderr = io.getStdErr().writer(); + var stderr_buf: [4096]u8 = undefined; + var stderr_file_writer = std.fs.File.stderr().writer(&stderr_buf); + const stderr = &stderr_file_writer.interface; //var tree = try std.zig.Ast.parse(allocator, source, .zig); var c_tree = c.astParse(source, @intCast(source.len)); @@ -273,7 +511,7 @@ fn testParse(source: [:0]const u8, allocator: mem.Allocator, anything_changed: * return error.ParseError; } - const formatted = try tree.render(allocator); + const formatted = try tree.renderAlloc(allocator); anything_changed.* = !mem.eql(u8, formatted, source); return formatted; } @@ -313,6 +551,7 @@ test "zig fmt: remove extra whitespace at start and end of file with comment bet ); } + test "my function" { try testCanonical( \\pub fn main() void { diff --git a/tokenizer.c b/tokenizer.c index 6dc88035a6..24bf4681ab 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -103,7 +103,7 @@ TokenizerToken tokenizerNext(Tokenizer* self) { TokenizerToken result = (TokenizerToken) { .tag = TOKEN_INVALID, .loc = { - .start = 0, + .start = self->index, }, }; diff --git a/tokenizer_test.zig b/tokenizer_test.zig index cdaa540c09..de4bc7a553 100644 --- a/tokenizer_test.zig +++ b/tokenizer_test.zig @@ -90,8 +90,6 @@ pub fn zigToken(token: c_uint) Token.Tag { c.TOKEN_KEYWORD_ANYFRAME => .keyword_anyframe, c.TOKEN_KEYWORD_ANYTYPE => .keyword_anytype, c.TOKEN_KEYWORD_ASM => .keyword_asm, - c.TOKEN_KEYWORD_ASYNC => .keyword_async, - c.TOKEN_KEYWORD_AWAIT => .keyword_await, c.TOKEN_KEYWORD_BREAK => .keyword_break, c.TOKEN_KEYWORD_CALLCONV => .keyword_callconv, c.TOKEN_KEYWORD_CATCH => .keyword_catch, @@ -128,7 +126,6 @@ pub fn zigToken(token: c_uint) Token.Tag { c.TOKEN_KEYWORD_TRY => .keyword_try, c.TOKEN_KEYWORD_UNION => .keyword_union, c.TOKEN_KEYWORD_UNREACHABLE => .keyword_unreachable, - c.TOKEN_KEYWORD_USINGNAMESPACE => .keyword_usingnamespace, c.TOKEN_KEYWORD_VAR => .keyword_var, c.TOKEN_KEYWORD_VOLATILE => .keyword_volatile, c.TOKEN_KEYWORD_WHILE => .keyword_while,