diff --git a/README.md b/README.md index b4e658b660..a6ca6f47bd 100644 --- a/README.md +++ b/README.md @@ -4,11 +4,7 @@ zig0 aspires to be an interpreter of zig 0.15.1 written in C. Quick test: - zig build - -If it complains about formatting, here is a mutable command that will fix it: - - zig build fmt + zig build fmt && zig build # Debugging tips diff --git a/parser_test.zig b/parser_test.zig index fd7ba96ae0..9f945ad016 100644 --- a/parser_test.zig +++ b/parser_test.zig @@ -1,551 +1,9 @@ const std = @import("std"); -const testing = std.testing; - -const Ast = std.zig.Ast; -const Allocator = std.mem.Allocator; - -const c = @cImport({ - @cInclude("ast.h"); -}); - -const zigToken = @import("./tokenizer_test.zig").zigToken; - -fn zigNode(token: c_uint) Ast.Node.Tag { - return switch (token) { - c.AST_NODE_ROOT => .root, - c.AST_NODE_TEST_DECL => .test_decl, - c.AST_NODE_GLOBAL_VAR_DECL => .global_var_decl, - c.AST_NODE_LOCAL_VAR_DECL => .local_var_decl, - c.AST_NODE_SIMPLE_VAR_DECL => .simple_var_decl, - c.AST_NODE_ALIGNED_VAR_DECL => .aligned_var_decl, - c.AST_NODE_ERRDEFER => .@"errdefer", - c.AST_NODE_DEFER => .@"defer", - c.AST_NODE_CATCH => .@"catch", - c.AST_NODE_FIELD_ACCESS => .field_access, - c.AST_NODE_UNWRAP_OPTIONAL => .unwrap_optional, - c.AST_NODE_EQUAL_EQUAL => .equal_equal, - c.AST_NODE_BANG_EQUAL => .bang_equal, - c.AST_NODE_LESS_THAN => .less_than, - c.AST_NODE_GREATER_THAN => .greater_than, - c.AST_NODE_LESS_OR_EQUAL => .less_or_equal, - c.AST_NODE_GREATER_OR_EQUAL => .greater_or_equal, - c.AST_NODE_ASSIGN_MUL => .assign_mul, - c.AST_NODE_ASSIGN_DIV => .assign_div, - c.AST_NODE_ASSIGN_MOD => .assign_mod, - c.AST_NODE_ASSIGN_ADD => .assign_add, - c.AST_NODE_ASSIGN_SUB => .assign_sub, - c.AST_NODE_ASSIGN_SHL => .assign_shl, - c.AST_NODE_ASSIGN_SHL_SAT => .assign_shl_sat, - c.AST_NODE_ASSIGN_SHR => .assign_shr, - c.AST_NODE_ASSIGN_BIT_AND => .assign_bit_and, - c.AST_NODE_ASSIGN_BIT_XOR => .assign_bit_xor, - c.AST_NODE_ASSIGN_BIT_OR => .assign_bit_or, - c.AST_NODE_ASSIGN_MUL_WRAP => .assign_mul_wrap, - c.AST_NODE_ASSIGN_ADD_WRAP => .assign_add_wrap, - c.AST_NODE_ASSIGN_SUB_WRAP => .assign_sub_wrap, - c.AST_NODE_ASSIGN_MUL_SAT => .assign_mul_sat, - c.AST_NODE_ASSIGN_ADD_SAT => .assign_add_sat, - c.AST_NODE_ASSIGN_SUB_SAT => .assign_sub_sat, - c.AST_NODE_ASSIGN => .assign, - c.AST_NODE_ASSIGN_DESTRUCTURE => .assign_destructure, - c.AST_NODE_MERGE_ERROR_SETS => .merge_error_sets, - c.AST_NODE_MUL => .mul, - c.AST_NODE_DIV => .div, - c.AST_NODE_MOD => .mod, - c.AST_NODE_ARRAY_MULT => .array_mult, - c.AST_NODE_MUL_WRAP => .mul_wrap, - c.AST_NODE_MUL_SAT => .mul_sat, - c.AST_NODE_ADD => .add, - c.AST_NODE_SUB => .sub, - c.AST_NODE_ARRAY_CAT => .array_cat, - c.AST_NODE_ADD_WRAP => .add_wrap, - c.AST_NODE_SUB_WRAP => .sub_wrap, - c.AST_NODE_ADD_SAT => .add_sat, - c.AST_NODE_SUB_SAT => .sub_sat, - c.AST_NODE_SHL => .shl, - c.AST_NODE_SHL_SAT => .shl_sat, - c.AST_NODE_SHR => .shr, - c.AST_NODE_BIT_AND => .bit_and, - c.AST_NODE_BIT_XOR => .bit_xor, - c.AST_NODE_BIT_OR => .bit_or, - c.AST_NODE_ORELSE => .@"orelse", - c.AST_NODE_BOOL_AND => .bool_and, - c.AST_NODE_BOOL_OR => .bool_or, - c.AST_NODE_BOOL_NOT => .bool_not, - c.AST_NODE_NEGATION => .negation, - c.AST_NODE_BIT_NOT => .bit_not, - c.AST_NODE_NEGATION_WRAP => .negation_wrap, - c.AST_NODE_ADDRESS_OF => .address_of, - c.AST_NODE_TRY => .@"try", - c.AST_NODE_OPTIONAL_TYPE => .optional_type, - c.AST_NODE_ARRAY_TYPE => .array_type, - c.AST_NODE_ARRAY_TYPE_SENTINEL => .array_type_sentinel, - c.AST_NODE_PTR_TYPE_ALIGNED => .ptr_type_aligned, - c.AST_NODE_PTR_TYPE_SENTINEL => .ptr_type_sentinel, - c.AST_NODE_PTR_TYPE => .ptr_type, - c.AST_NODE_PTR_TYPE_BIT_RANGE => .ptr_type_bit_range, - c.AST_NODE_SLICE_OPEN => .slice_open, - c.AST_NODE_SLICE => .slice, - c.AST_NODE_SLICE_SENTINEL => .slice_sentinel, - c.AST_NODE_DEREF => .deref, - c.AST_NODE_ARRAY_ACCESS => .array_access, - c.AST_NODE_ARRAY_INIT_ONE => .array_init_one, - c.AST_NODE_ARRAY_INIT_ONE_COMMA => .array_init_one_comma, - c.AST_NODE_ARRAY_INIT_DOT_TWO => .array_init_dot_two, - c.AST_NODE_ARRAY_INIT_DOT_TWO_COMMA => .array_init_dot_two_comma, - c.AST_NODE_ARRAY_INIT_DOT => .array_init_dot, - c.AST_NODE_ARRAY_INIT_DOT_COMMA => .array_init_dot_comma, - c.AST_NODE_ARRAY_INIT => .array_init, - c.AST_NODE_ARRAY_INIT_COMMA => .array_init_comma, - c.AST_NODE_STRUCT_INIT_ONE => .struct_init_one, - c.AST_NODE_STRUCT_INIT_ONE_COMMA => .struct_init_one_comma, - c.AST_NODE_STRUCT_INIT_DOT_TWO => .struct_init_dot_two, - c.AST_NODE_STRUCT_INIT_DOT_TWO_COMMA => .struct_init_dot_two_comma, - c.AST_NODE_STRUCT_INIT_DOT => .struct_init_dot, - c.AST_NODE_STRUCT_INIT_DOT_COMMA => .struct_init_dot_comma, - c.AST_NODE_STRUCT_INIT => .struct_init, - c.AST_NODE_STRUCT_INIT_COMMA => .struct_init_comma, - c.AST_NODE_CALL_ONE => .call_one, - c.AST_NODE_CALL_ONE_COMMA => .call_one_comma, - c.AST_NODE_CALL => .call, - c.AST_NODE_CALL_COMMA => .call_comma, - c.AST_NODE_SWITCH => .@"switch", - c.AST_NODE_SWITCH_COMMA => .switch_comma, - c.AST_NODE_SWITCH_CASE_ONE => .switch_case_one, - c.AST_NODE_SWITCH_CASE_INLINE_ONE => .switch_case_inline_one, - c.AST_NODE_SWITCH_CASE => .switch_case, - c.AST_NODE_SWITCH_CASE_INLINE => .switch_case_inline, - c.AST_NODE_SWITCH_RANGE => .switch_range, - c.AST_NODE_WHILE_SIMPLE => .while_simple, - c.AST_NODE_WHILE_CONT => .while_cont, - c.AST_NODE_WHILE => .@"while", - c.AST_NODE_FOR_SIMPLE => .for_simple, - c.AST_NODE_FOR => .@"for", - c.AST_NODE_FOR_RANGE => .for_range, - c.AST_NODE_IF_SIMPLE => .if_simple, - c.AST_NODE_IF => .@"if", - c.AST_NODE_SUSPEND => .@"suspend", - c.AST_NODE_RESUME => .@"resume", - c.AST_NODE_CONTINUE => .@"continue", - c.AST_NODE_BREAK => .@"break", - c.AST_NODE_RETURN => .@"return", - c.AST_NODE_FN_PROTO_SIMPLE => .fn_proto_simple, - c.AST_NODE_FN_PROTO_MULTI => .fn_proto_multi, - c.AST_NODE_FN_PROTO_ONE => .fn_proto_one, - c.AST_NODE_FN_PROTO => .fn_proto, - c.AST_NODE_FN_DECL => .fn_decl, - c.AST_NODE_ANYFRAME_TYPE => .anyframe_type, - c.AST_NODE_ANYFRAME_LITERAL => .anyframe_literal, - c.AST_NODE_CHAR_LITERAL => .char_literal, - c.AST_NODE_NUMBER_LITERAL => .number_literal, - c.AST_NODE_UNREACHABLE_LITERAL => .unreachable_literal, - c.AST_NODE_IDENTIFIER => .identifier, - c.AST_NODE_ENUM_LITERAL => .enum_literal, - c.AST_NODE_STRING_LITERAL => .string_literal, - c.AST_NODE_MULTILINE_STRING_LITERAL => .multiline_string_literal, - c.AST_NODE_GROUPED_EXPRESSION => .grouped_expression, - c.AST_NODE_BUILTIN_CALL_TWO => .builtin_call_two, - c.AST_NODE_BUILTIN_CALL_TWO_COMMA => .builtin_call_two_comma, - c.AST_NODE_BUILTIN_CALL => .builtin_call, - c.AST_NODE_BUILTIN_CALL_COMMA => .builtin_call_comma, - c.AST_NODE_ERROR_SET_DECL => .error_set_decl, - c.AST_NODE_CONTAINER_DECL => .container_decl, - c.AST_NODE_CONTAINER_DECL_TRAILING => .container_decl_trailing, - c.AST_NODE_CONTAINER_DECL_TWO => .container_decl_two, - c.AST_NODE_CONTAINER_DECL_TWO_TRAILING => .container_decl_two_trailing, - c.AST_NODE_CONTAINER_DECL_ARG => .container_decl_arg, - c.AST_NODE_CONTAINER_DECL_ARG_TRAILING => .container_decl_arg_trailing, - c.AST_NODE_TAGGED_UNION => .tagged_union, - c.AST_NODE_TAGGED_UNION_TRAILING => .tagged_union_trailing, - c.AST_NODE_TAGGED_UNION_TWO => .tagged_union_two, - c.AST_NODE_TAGGED_UNION_TWO_TRAILING => .tagged_union_two_trailing, - c.AST_NODE_TAGGED_UNION_ENUM_TAG => .tagged_union_enum_tag, - c.AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING => .tagged_union_enum_tag_trailing, - c.AST_NODE_CONTAINER_FIELD_INIT => .container_field_init, - c.AST_NODE_CONTAINER_FIELD_ALIGN => .container_field_align, - c.AST_NODE_CONTAINER_FIELD => .container_field, - c.AST_NODE_COMPTIME => .@"comptime", - c.AST_NODE_NOSUSPEND => .@"nosuspend", - c.AST_NODE_BLOCK_TWO => .block_two, - c.AST_NODE_BLOCK_TWO_SEMICOLON => .block_two_semicolon, - c.AST_NODE_BLOCK => .block, - c.AST_NODE_BLOCK_SEMICOLON => .block_semicolon, - c.AST_NODE_ASM_SIMPLE => .asm_simple, - c.AST_NODE_ASM_LEGACY => .asm_legacy, - c.AST_NODE_ASM => .@"asm", - c.AST_NODE_ASM_OUTPUT => .asm_output, - c.AST_NODE_ASM_INPUT => .asm_input, - c.AST_NODE_ERROR_VALUE => .error_value, - c.AST_NODE_ERROR_UNION => .error_union, - else => undefined, - }; -} - -fn toIndex(v: u32) Ast.Node.Index { - return @enumFromInt(v); -} - -fn toOptIndex(v: u32) Ast.Node.OptionalIndex { - return if (v == 0) .none else @enumFromInt(v); -} - -fn toExtraIndex(v: u32) Ast.ExtraIndex { - return @enumFromInt(v); -} - -fn toOptTokenIndex(v: u32) Ast.OptionalTokenIndex { - return @enumFromInt(v); -} - -fn zigData(tag: Ast.Node.Tag, lhs: u32, rhs: u32) Ast.Node.Data { - return switch (tag) { - // data unused - .identifier, - .string_literal, - .char_literal, - .number_literal, - .unreachable_literal, - .anyframe_literal, - .enum_literal, - .error_value, - => .{ .opt_node_and_opt_node = .{ toOptIndex(lhs), toOptIndex(rhs) } }, - - // .node (single node index) - .@"defer", - .@"comptime", - .@"nosuspend", - .@"suspend", - .@"resume", - .bool_not, - .negation, - .bit_not, - .negation_wrap, - .address_of, - .@"try", - .deref, - .optional_type, - => .{ .node = toIndex(lhs) }, - - // .opt_node (single optional node) - .@"return", - => .{ .opt_node = toOptIndex(lhs) }, - - // .node_and_node - .fn_decl, - .container_field_align, - .error_union, - .@"catch", - .equal_equal, - .bang_equal, - .less_than, - .greater_than, - .less_or_equal, - .greater_or_equal, - .assign_mul, - .assign_div, - .assign_mod, - .assign_add, - .assign_sub, - .assign_shl, - .assign_shl_sat, - .assign_shr, - .assign_bit_and, - .assign_bit_xor, - .assign_bit_or, - .assign_mul_wrap, - .assign_add_wrap, - .assign_sub_wrap, - .assign_mul_sat, - .assign_add_sat, - .assign_sub_sat, - .assign, - .merge_error_sets, - .mul, - .div, - .mod, - .array_mult, - .mul_wrap, - .mul_sat, - .add, - .sub, - .array_cat, - .add_wrap, - .sub_wrap, - .add_sat, - .sub_sat, - .shl, - .shl_sat, - .shr, - .bit_and, - .bit_xor, - .bit_or, - .@"orelse", - .bool_and, - .bool_or, - .array_type, - .array_access, - .switch_range, - => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } }, - - // .opt_node_and_opt_node - .fn_proto_simple, - .simple_var_decl, - .block_two, - .block_two_semicolon, - .builtin_call_two, - .builtin_call_two_comma, - .container_decl_two, - .container_decl_two_trailing, - .tagged_union_two, - .tagged_union_two_trailing, - .struct_init_dot_two, - .struct_init_dot_two_comma, - .array_init_dot_two, - .array_init_dot_two_comma, - => .{ .opt_node_and_opt_node = .{ toOptIndex(lhs), toOptIndex(rhs) } }, - - // .node_and_opt_node - .call_one, - .call_one_comma, - .struct_init_one, - .struct_init_one_comma, - .container_field_init, - .aligned_var_decl, - => .{ .node_and_opt_node = .{ toIndex(lhs), toOptIndex(rhs) } }, - - // .node_and_node (array_init_one uses node_and_node, not - // node_and_opt_node) - .array_init_one, - .array_init_one_comma, - => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } }, - - // .opt_node_and_node - .ptr_type_aligned, - .ptr_type_sentinel, - .switch_case_one, - .switch_case_inline_one, - => .{ .opt_node_and_node = .{ toOptIndex(lhs), toIndex(rhs) } }, - - // .node_and_extra - .call, - .call_comma, - .container_field, - .array_type_sentinel, - .slice, - .slice_sentinel, - .array_init, - .array_init_comma, - .struct_init, - .struct_init_comma, - .@"switch", - .switch_comma, - .container_decl_arg, - .container_decl_arg_trailing, - .tagged_union_enum_tag, - .tagged_union_enum_tag_trailing, - .@"asm", - => .{ .node_and_extra = .{ toIndex(lhs), toExtraIndex(rhs) } }, - - // .extra_and_node - .assign_destructure, - .switch_case, - .switch_case_inline, - .ptr_type, - .ptr_type_bit_range, - => .{ .extra_and_node = .{ toExtraIndex(lhs), toIndex(rhs) } }, - - // .extra_and_opt_node - .global_var_decl, - .local_var_decl, - .fn_proto_multi, - .fn_proto_one, - .fn_proto, - => .{ .extra_and_opt_node = .{ toExtraIndex(lhs), toOptIndex(rhs) } }, - - // .extra_range (SubRange) - .root, - .block, - .block_semicolon, - .builtin_call, - .builtin_call_comma, - .container_decl, - .container_decl_trailing, - .tagged_union, - .tagged_union_trailing, - .array_init_dot, - .array_init_dot_comma, - .struct_init_dot, - .struct_init_dot_comma, - => .{ .extra_range = .{ .start = toExtraIndex(lhs), .end = toExtraIndex(rhs) } }, - - // .node_and_token - .grouped_expression, - .asm_input, - .asm_simple, - .field_access, - .unwrap_optional, - => .{ .node_and_token = .{ toIndex(lhs), rhs } }, - - // .opt_node_and_token - .asm_output, - => .{ .opt_node_and_token = .{ toOptIndex(lhs), rhs } }, - - // .opt_token_and_node - .test_decl, - .@"errdefer", - => .{ .opt_token_and_node = .{ toOptTokenIndex(lhs), toIndex(rhs) } }, - - // .opt_token_and_opt_node - .@"break", - .@"continue", - => .{ .opt_token_and_opt_node = .{ toOptTokenIndex(lhs), toOptIndex(rhs) } }, - - // .token_and_token - .error_set_decl, - .multiline_string_literal, - => .{ .token_and_token = .{ lhs, rhs } }, - - // .token_and_node - .anyframe_type, - => .{ .token_and_node = .{ lhs, toIndex(rhs) } }, - - // .node_and_node for slice_open (lhs[rhs..]) - .slice_open, - => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } }, - - .while_simple, - .for_simple, - .if_simple, - => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } }, - - .while_cont, - .@"while", - .@"if", - => .{ .node_and_extra = .{ toIndex(lhs), toExtraIndex(rhs) } }, - - .for_range, - => .{ .node_and_opt_node = .{ toIndex(lhs), toOptIndex(rhs) } }, - - .@"for", - => .{ .@"for" = .{ toExtraIndex(lhs), @bitCast(rhs) } }, - - .asm_legacy, - => .{ .node_and_extra = .{ toIndex(lhs), toExtraIndex(rhs) } }, - }; -} - -// zigAst converts a c.Ast to std.Zig.Ast. The resulting Ast should be freed with deinit(). -fn zigAst(gpa: Allocator, c_ast: c.Ast) !Ast { - var tokens = Ast.TokenList{}; - try tokens.resize(gpa, c_ast.tokens.len); - errdefer tokens.deinit(gpa); - - for (0..c_ast.tokens.len) |i| - tokens.set(i, .{ - .tag = zigToken(c_ast.tokens.tags[i]), - .start = c_ast.tokens.starts[i], - }); - - var nodes = Ast.NodeList{}; - try nodes.resize(gpa, c_ast.nodes.len); - errdefer nodes.deinit(gpa); - - for (0..c_ast.nodes.len) |i| { - const tag = zigNode(c_ast.nodes.tags[i]); - nodes.set(i, .{ - .tag = tag, - .main_token = c_ast.nodes.main_tokens[i], - .data = zigData(tag, c_ast.nodes.datas[i].lhs, c_ast.nodes.datas[i].rhs), - }); - } - - const extra_data = try gpa.alloc(u32, c_ast.extra_data.len); - errdefer gpa.free(extra_data); - @memcpy(extra_data, c_ast.extra_data.arr[0..c_ast.extra_data.len]); - - // creating a dummy `errors` slice, so deinit can free it. - const errors = try gpa.alloc(Ast.Error, 0); - errdefer gpa.free(errors); - - return Ast{ - .source = c_ast.source[0..c_ast.source_len :0], - .mode = .zig, - .tokens = tokens.slice(), - .nodes = nodes.slice(), - .extra_data = extra_data, - .errors = errors, - }; -} - - -// copy-past from parser_test.zig const mem = std.mem; const print = std.debug.print; const io = std.io; const maxInt = std.math.maxInt; -var fixed_buffer_mem: [100 * 1024]u8 = undefined; - -fn testParse(source: [:0]const u8, allocator: mem.Allocator, anything_changed: *bool) ![]u8 { - var stderr_buf: [4096]u8 = undefined; - var stderr_file_writer = std.fs.File.stderr().writer(&stderr_buf); - const stderr = &stderr_file_writer.interface; - - //var tree = try std.zig.Ast.parse(allocator, source, .zig); - var c_tree = c.astParse(source, @intCast(source.len)); - defer c.astDeinit(&c_tree); - var tree = try zigAst(allocator, c_tree); - defer tree.deinit(allocator); - - for (tree.errors) |parse_error| { - const loc = tree.tokenLocation(0, parse_error.token); - try stderr.print("(memory buffer):{d}:{d}: error: ", .{ loc.line + 1, loc.column + 1 }); - try tree.renderError(parse_error, stderr); - try stderr.print("\n{s}\n", .{source[loc.line_start..loc.line_end]}); - { - var i: usize = 0; - while (i < loc.column) : (i += 1) { - try stderr.writeAll(" "); - } - try stderr.writeAll("^"); - } - try stderr.writeAll("\n"); - } - if (tree.errors.len != 0) { - return error.ParseError; - } - - const formatted = try tree.renderAlloc(allocator); - anything_changed.* = !mem.eql(u8, formatted, source); - return formatted; -} -fn testTransformImpl(allocator: mem.Allocator, fba: *std.heap.FixedBufferAllocator, source: [:0]const u8, expected_source: []const u8) !void { - // reset the fixed buffer allocator each run so that it can be re-used for each - // iteration of the failing index - fba.reset(); - var anything_changed: bool = undefined; - const result_source = try testParse(source, allocator, &anything_changed); - try std.testing.expectEqualStrings(expected_source, result_source); - const changes_expected = source.ptr != expected_source.ptr; - if (anything_changed != changes_expected) { - print("std.zig.render returned {} instead of {}\n", .{ anything_changed, changes_expected }); - return error.TestFailed; - } - try std.testing.expect(anything_changed == changes_expected); - allocator.free(result_source); -} -fn testTransform(source: [:0]const u8, expected_source: []const u8) !void { - var fixed_allocator = std.heap.FixedBufferAllocator.init(fixed_buffer_mem[0..]); - return std.testing.checkAllAllocationFailures(fixed_allocator.allocator(), testTransformImpl, .{ &fixed_allocator, source, expected_source }); -} -fn testCanonical(source: [:0]const u8) !void { - return testTransform(source, source); -} - - - - - test "zig fmt: remove extra whitespace at start and end of file with comment between" { try testTransform( \\ @@ -807,6 +265,13 @@ test "zig fmt: top-level tuple function call type" { ); } +test "zig fmt: top-level enum missing 'const name ='" { + try testError( + \\enum(u32) + \\ + , &[_]Error{.expected_token}); +} + test "zig fmt: top-level for/while loop" { try testCanonical( \\for (foo) |_| foo @@ -4897,7 +4362,6 @@ test "zig fmt: Indent comma correctly after multiline string literals in arg lis ); } - test "zig fmt: regression test for #5722" { try testCanonical( \\pub fn sendViewTags(self: Self) void { @@ -5178,7 +4642,6 @@ test "zig fmt: proper indent line comment after multi-line single expr while loo ); } - test "zig fmt: extern function with missing param name" { try testCanonical( \\extern fn a( @@ -5190,7 +4653,6 @@ test "zig fmt: extern function with missing param name" { ); } - test "zig fmt: respect extra newline between switch items" { try testCanonical( \\const a = switch (b) { @@ -5366,7 +4828,6 @@ test "zig fmt: preserve container doc comment in container without trailing comm ); } - test "zig fmt: no space before newline before multiline string" { try testCanonical( \\const S = struct { @@ -5477,7 +4938,6 @@ test "zig fmt: binop indentation in if statement" { ); } - test "zig fmt: test indentation of if expressions" { try testCanonical( \\test { @@ -5514,17 +4974,559 @@ test "zig fmt: test indentation of if expressions" { ); } - test "Ast header smoke test" { try std.testing.expectEqual(zigNode(c.AST_NODE_IF), Ast.Node.Tag.@"if"); } -test "my function" { - try testCanonical( - \\pub fn main() void { - \\ @panic("hello"); - \\} - \\ - ); +var fixed_buffer_mem: [100 * 1024]u8 = undefined; + +fn testParse(source: [:0]const u8, allocator: mem.Allocator, anything_changed: *bool) ![]u8 { + var stderr_buf: [4096]u8 = undefined; + var stderr_file_writer = std.fs.File.stderr().writer(&stderr_buf); + const stderr = &stderr_file_writer.interface; + + //var tree = try std.zig.Ast.parse(allocator, source, .zig); + var c_tree = c.astParse(source, @intCast(source.len)); + defer c.astDeinit(&c_tree); + var tree = try zigAst(allocator, c_tree); + defer tree.deinit(allocator); + + for (tree.errors) |parse_error| { + const loc = tree.tokenLocation(0, parse_error.token); + try stderr.print("(memory buffer):{d}:{d}: error: ", .{ loc.line + 1, loc.column + 1 }); + try tree.renderError(parse_error, stderr); + try stderr.print("\n{s}\n", .{source[loc.line_start..loc.line_end]}); + { + var i: usize = 0; + while (i < loc.column) : (i += 1) { + try stderr.writeAll(" "); + } + try stderr.writeAll("^"); + } + try stderr.writeAll("\n"); + } + if (tree.errors.len != 0) { + return error.ParseError; + } + + const formatted = try tree.renderAlloc(allocator); + anything_changed.* = !mem.eql(u8, formatted, source); + return formatted; +} +fn testTransformImpl(allocator: mem.Allocator, fba: *std.heap.FixedBufferAllocator, source: [:0]const u8, expected_source: []const u8) !void { + // reset the fixed buffer allocator each run so that it can be re-used for each + // iteration of the failing index + fba.reset(); + var anything_changed: bool = undefined; + const result_source = try testParse(source, allocator, &anything_changed); + try std.testing.expectEqualStrings(expected_source, result_source); + const changes_expected = source.ptr != expected_source.ptr; + if (anything_changed != changes_expected) { + print("std.zig.render returned {} instead of {}\n", .{ anything_changed, changes_expected }); + return error.TestFailed; + } + try std.testing.expect(anything_changed == changes_expected); + allocator.free(result_source); +} +fn testTransform(source: [:0]const u8, expected_source: []const u8) !void { + var fixed_allocator = std.heap.FixedBufferAllocator.init(fixed_buffer_mem[0..]); + return std.testing.checkAllAllocationFailures(fixed_allocator.allocator(), testTransformImpl, .{ &fixed_allocator, source, expected_source }); +} +fn testCanonical(source: [:0]const u8) !void { + return testTransform(source, source); } +const Error = std.zig.Ast.Error.Tag; + +fn testError(source: [:0]const u8, expected_errors: []const Error) !void { + var tree = try std.zig.Ast.parse(std.testing.allocator, source, .zig); + defer tree.deinit(std.testing.allocator); + + std.testing.expectEqual(expected_errors.len, tree.errors.len) catch |err| { + std.debug.print("errors found: {any}\n", .{tree.errors}); + return err; + }; + for (expected_errors, 0..) |expected, i| { + try std.testing.expectEqual(expected, tree.errors[i].tag); + } +} + +const testing = std.testing; + +const Ast = std.zig.Ast; +const Allocator = std.mem.Allocator; + +const c = @cImport({ + @cInclude("ast.h"); +}); + +const zigToken = @import("./tokenizer_test.zig").zigToken; + +fn zigNode(token: c_uint) Ast.Node.Tag { + return switch (token) { + c.AST_NODE_ROOT => .root, + c.AST_NODE_TEST_DECL => .test_decl, + c.AST_NODE_GLOBAL_VAR_DECL => .global_var_decl, + c.AST_NODE_LOCAL_VAR_DECL => .local_var_decl, + c.AST_NODE_SIMPLE_VAR_DECL => .simple_var_decl, + c.AST_NODE_ALIGNED_VAR_DECL => .aligned_var_decl, + c.AST_NODE_ERRDEFER => .@"errdefer", + c.AST_NODE_DEFER => .@"defer", + c.AST_NODE_CATCH => .@"catch", + c.AST_NODE_FIELD_ACCESS => .field_access, + c.AST_NODE_UNWRAP_OPTIONAL => .unwrap_optional, + c.AST_NODE_EQUAL_EQUAL => .equal_equal, + c.AST_NODE_BANG_EQUAL => .bang_equal, + c.AST_NODE_LESS_THAN => .less_than, + c.AST_NODE_GREATER_THAN => .greater_than, + c.AST_NODE_LESS_OR_EQUAL => .less_or_equal, + c.AST_NODE_GREATER_OR_EQUAL => .greater_or_equal, + c.AST_NODE_ASSIGN_MUL => .assign_mul, + c.AST_NODE_ASSIGN_DIV => .assign_div, + c.AST_NODE_ASSIGN_MOD => .assign_mod, + c.AST_NODE_ASSIGN_ADD => .assign_add, + c.AST_NODE_ASSIGN_SUB => .assign_sub, + c.AST_NODE_ASSIGN_SHL => .assign_shl, + c.AST_NODE_ASSIGN_SHL_SAT => .assign_shl_sat, + c.AST_NODE_ASSIGN_SHR => .assign_shr, + c.AST_NODE_ASSIGN_BIT_AND => .assign_bit_and, + c.AST_NODE_ASSIGN_BIT_XOR => .assign_bit_xor, + c.AST_NODE_ASSIGN_BIT_OR => .assign_bit_or, + c.AST_NODE_ASSIGN_MUL_WRAP => .assign_mul_wrap, + c.AST_NODE_ASSIGN_ADD_WRAP => .assign_add_wrap, + c.AST_NODE_ASSIGN_SUB_WRAP => .assign_sub_wrap, + c.AST_NODE_ASSIGN_MUL_SAT => .assign_mul_sat, + c.AST_NODE_ASSIGN_ADD_SAT => .assign_add_sat, + c.AST_NODE_ASSIGN_SUB_SAT => .assign_sub_sat, + c.AST_NODE_ASSIGN => .assign, + c.AST_NODE_ASSIGN_DESTRUCTURE => .assign_destructure, + c.AST_NODE_MERGE_ERROR_SETS => .merge_error_sets, + c.AST_NODE_MUL => .mul, + c.AST_NODE_DIV => .div, + c.AST_NODE_MOD => .mod, + c.AST_NODE_ARRAY_MULT => .array_mult, + c.AST_NODE_MUL_WRAP => .mul_wrap, + c.AST_NODE_MUL_SAT => .mul_sat, + c.AST_NODE_ADD => .add, + c.AST_NODE_SUB => .sub, + c.AST_NODE_ARRAY_CAT => .array_cat, + c.AST_NODE_ADD_WRAP => .add_wrap, + c.AST_NODE_SUB_WRAP => .sub_wrap, + c.AST_NODE_ADD_SAT => .add_sat, + c.AST_NODE_SUB_SAT => .sub_sat, + c.AST_NODE_SHL => .shl, + c.AST_NODE_SHL_SAT => .shl_sat, + c.AST_NODE_SHR => .shr, + c.AST_NODE_BIT_AND => .bit_and, + c.AST_NODE_BIT_XOR => .bit_xor, + c.AST_NODE_BIT_OR => .bit_or, + c.AST_NODE_ORELSE => .@"orelse", + c.AST_NODE_BOOL_AND => .bool_and, + c.AST_NODE_BOOL_OR => .bool_or, + c.AST_NODE_BOOL_NOT => .bool_not, + c.AST_NODE_NEGATION => .negation, + c.AST_NODE_BIT_NOT => .bit_not, + c.AST_NODE_NEGATION_WRAP => .negation_wrap, + c.AST_NODE_ADDRESS_OF => .address_of, + c.AST_NODE_TRY => .@"try", + c.AST_NODE_OPTIONAL_TYPE => .optional_type, + c.AST_NODE_ARRAY_TYPE => .array_type, + c.AST_NODE_ARRAY_TYPE_SENTINEL => .array_type_sentinel, + c.AST_NODE_PTR_TYPE_ALIGNED => .ptr_type_aligned, + c.AST_NODE_PTR_TYPE_SENTINEL => .ptr_type_sentinel, + c.AST_NODE_PTR_TYPE => .ptr_type, + c.AST_NODE_PTR_TYPE_BIT_RANGE => .ptr_type_bit_range, + c.AST_NODE_SLICE_OPEN => .slice_open, + c.AST_NODE_SLICE => .slice, + c.AST_NODE_SLICE_SENTINEL => .slice_sentinel, + c.AST_NODE_DEREF => .deref, + c.AST_NODE_ARRAY_ACCESS => .array_access, + c.AST_NODE_ARRAY_INIT_ONE => .array_init_one, + c.AST_NODE_ARRAY_INIT_ONE_COMMA => .array_init_one_comma, + c.AST_NODE_ARRAY_INIT_DOT_TWO => .array_init_dot_two, + c.AST_NODE_ARRAY_INIT_DOT_TWO_COMMA => .array_init_dot_two_comma, + c.AST_NODE_ARRAY_INIT_DOT => .array_init_dot, + c.AST_NODE_ARRAY_INIT_DOT_COMMA => .array_init_dot_comma, + c.AST_NODE_ARRAY_INIT => .array_init, + c.AST_NODE_ARRAY_INIT_COMMA => .array_init_comma, + c.AST_NODE_STRUCT_INIT_ONE => .struct_init_one, + c.AST_NODE_STRUCT_INIT_ONE_COMMA => .struct_init_one_comma, + c.AST_NODE_STRUCT_INIT_DOT_TWO => .struct_init_dot_two, + c.AST_NODE_STRUCT_INIT_DOT_TWO_COMMA => .struct_init_dot_two_comma, + c.AST_NODE_STRUCT_INIT_DOT => .struct_init_dot, + c.AST_NODE_STRUCT_INIT_DOT_COMMA => .struct_init_dot_comma, + c.AST_NODE_STRUCT_INIT => .struct_init, + c.AST_NODE_STRUCT_INIT_COMMA => .struct_init_comma, + c.AST_NODE_CALL_ONE => .call_one, + c.AST_NODE_CALL_ONE_COMMA => .call_one_comma, + c.AST_NODE_CALL => .call, + c.AST_NODE_CALL_COMMA => .call_comma, + c.AST_NODE_SWITCH => .@"switch", + c.AST_NODE_SWITCH_COMMA => .switch_comma, + c.AST_NODE_SWITCH_CASE_ONE => .switch_case_one, + c.AST_NODE_SWITCH_CASE_INLINE_ONE => .switch_case_inline_one, + c.AST_NODE_SWITCH_CASE => .switch_case, + c.AST_NODE_SWITCH_CASE_INLINE => .switch_case_inline, + c.AST_NODE_SWITCH_RANGE => .switch_range, + c.AST_NODE_WHILE_SIMPLE => .while_simple, + c.AST_NODE_WHILE_CONT => .while_cont, + c.AST_NODE_WHILE => .@"while", + c.AST_NODE_FOR_SIMPLE => .for_simple, + c.AST_NODE_FOR => .@"for", + c.AST_NODE_FOR_RANGE => .for_range, + c.AST_NODE_IF_SIMPLE => .if_simple, + c.AST_NODE_IF => .@"if", + c.AST_NODE_SUSPEND => .@"suspend", + c.AST_NODE_RESUME => .@"resume", + c.AST_NODE_CONTINUE => .@"continue", + c.AST_NODE_BREAK => .@"break", + c.AST_NODE_RETURN => .@"return", + c.AST_NODE_FN_PROTO_SIMPLE => .fn_proto_simple, + c.AST_NODE_FN_PROTO_MULTI => .fn_proto_multi, + c.AST_NODE_FN_PROTO_ONE => .fn_proto_one, + c.AST_NODE_FN_PROTO => .fn_proto, + c.AST_NODE_FN_DECL => .fn_decl, + c.AST_NODE_ANYFRAME_TYPE => .anyframe_type, + c.AST_NODE_ANYFRAME_LITERAL => .anyframe_literal, + c.AST_NODE_CHAR_LITERAL => .char_literal, + c.AST_NODE_NUMBER_LITERAL => .number_literal, + c.AST_NODE_UNREACHABLE_LITERAL => .unreachable_literal, + c.AST_NODE_IDENTIFIER => .identifier, + c.AST_NODE_ENUM_LITERAL => .enum_literal, + c.AST_NODE_STRING_LITERAL => .string_literal, + c.AST_NODE_MULTILINE_STRING_LITERAL => .multiline_string_literal, + c.AST_NODE_GROUPED_EXPRESSION => .grouped_expression, + c.AST_NODE_BUILTIN_CALL_TWO => .builtin_call_two, + c.AST_NODE_BUILTIN_CALL_TWO_COMMA => .builtin_call_two_comma, + c.AST_NODE_BUILTIN_CALL => .builtin_call, + c.AST_NODE_BUILTIN_CALL_COMMA => .builtin_call_comma, + c.AST_NODE_ERROR_SET_DECL => .error_set_decl, + c.AST_NODE_CONTAINER_DECL => .container_decl, + c.AST_NODE_CONTAINER_DECL_TRAILING => .container_decl_trailing, + c.AST_NODE_CONTAINER_DECL_TWO => .container_decl_two, + c.AST_NODE_CONTAINER_DECL_TWO_TRAILING => .container_decl_two_trailing, + c.AST_NODE_CONTAINER_DECL_ARG => .container_decl_arg, + c.AST_NODE_CONTAINER_DECL_ARG_TRAILING => .container_decl_arg_trailing, + c.AST_NODE_TAGGED_UNION => .tagged_union, + c.AST_NODE_TAGGED_UNION_TRAILING => .tagged_union_trailing, + c.AST_NODE_TAGGED_UNION_TWO => .tagged_union_two, + c.AST_NODE_TAGGED_UNION_TWO_TRAILING => .tagged_union_two_trailing, + c.AST_NODE_TAGGED_UNION_ENUM_TAG => .tagged_union_enum_tag, + c.AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING => .tagged_union_enum_tag_trailing, + c.AST_NODE_CONTAINER_FIELD_INIT => .container_field_init, + c.AST_NODE_CONTAINER_FIELD_ALIGN => .container_field_align, + c.AST_NODE_CONTAINER_FIELD => .container_field, + c.AST_NODE_COMPTIME => .@"comptime", + c.AST_NODE_NOSUSPEND => .@"nosuspend", + c.AST_NODE_BLOCK_TWO => .block_two, + c.AST_NODE_BLOCK_TWO_SEMICOLON => .block_two_semicolon, + c.AST_NODE_BLOCK => .block, + c.AST_NODE_BLOCK_SEMICOLON => .block_semicolon, + c.AST_NODE_ASM_SIMPLE => .asm_simple, + c.AST_NODE_ASM_LEGACY => .asm_legacy, + c.AST_NODE_ASM => .@"asm", + c.AST_NODE_ASM_OUTPUT => .asm_output, + c.AST_NODE_ASM_INPUT => .asm_input, + c.AST_NODE_ERROR_VALUE => .error_value, + c.AST_NODE_ERROR_UNION => .error_union, + else => undefined, + }; +} + +fn toIndex(v: u32) Ast.Node.Index { + return @enumFromInt(v); +} + +fn toOptIndex(v: u32) Ast.Node.OptionalIndex { + return if (v == 0) .none else @enumFromInt(v); +} + +fn toExtraIndex(v: u32) Ast.ExtraIndex { + return @enumFromInt(v); +} + +fn toOptTokenIndex(v: u32) Ast.OptionalTokenIndex { + return @enumFromInt(v); +} + +fn zigData(tag: Ast.Node.Tag, lhs: u32, rhs: u32) Ast.Node.Data { + return switch (tag) { + // data unused + .identifier, + .string_literal, + .char_literal, + .number_literal, + .unreachable_literal, + .anyframe_literal, + .enum_literal, + .error_value, + => .{ .opt_node_and_opt_node = .{ toOptIndex(lhs), toOptIndex(rhs) } }, + + // .node (single node index) + .@"defer", + .@"comptime", + .@"nosuspend", + .@"suspend", + .@"resume", + .bool_not, + .negation, + .bit_not, + .negation_wrap, + .address_of, + .@"try", + .deref, + .optional_type, + => .{ .node = toIndex(lhs) }, + + // .opt_node (single optional node) + .@"return", + => .{ .opt_node = toOptIndex(lhs) }, + + // .node_and_node + .fn_decl, + .container_field_align, + .error_union, + .@"catch", + .equal_equal, + .bang_equal, + .less_than, + .greater_than, + .less_or_equal, + .greater_or_equal, + .assign_mul, + .assign_div, + .assign_mod, + .assign_add, + .assign_sub, + .assign_shl, + .assign_shl_sat, + .assign_shr, + .assign_bit_and, + .assign_bit_xor, + .assign_bit_or, + .assign_mul_wrap, + .assign_add_wrap, + .assign_sub_wrap, + .assign_mul_sat, + .assign_add_sat, + .assign_sub_sat, + .assign, + .merge_error_sets, + .mul, + .div, + .mod, + .array_mult, + .mul_wrap, + .mul_sat, + .add, + .sub, + .array_cat, + .add_wrap, + .sub_wrap, + .add_sat, + .sub_sat, + .shl, + .shl_sat, + .shr, + .bit_and, + .bit_xor, + .bit_or, + .@"orelse", + .bool_and, + .bool_or, + .array_type, + .array_access, + .switch_range, + => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } }, + + // .opt_node_and_opt_node + .fn_proto_simple, + .simple_var_decl, + .block_two, + .block_two_semicolon, + .builtin_call_two, + .builtin_call_two_comma, + .container_decl_two, + .container_decl_two_trailing, + .tagged_union_two, + .tagged_union_two_trailing, + .struct_init_dot_two, + .struct_init_dot_two_comma, + .array_init_dot_two, + .array_init_dot_two_comma, + => .{ .opt_node_and_opt_node = .{ toOptIndex(lhs), toOptIndex(rhs) } }, + + // .node_and_opt_node + .call_one, + .call_one_comma, + .struct_init_one, + .struct_init_one_comma, + .container_field_init, + .aligned_var_decl, + => .{ .node_and_opt_node = .{ toIndex(lhs), toOptIndex(rhs) } }, + + // .node_and_node (array_init_one uses node_and_node, not + // node_and_opt_node) + .array_init_one, + .array_init_one_comma, + => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } }, + + // .opt_node_and_node + .ptr_type_aligned, + .ptr_type_sentinel, + .switch_case_one, + .switch_case_inline_one, + => .{ .opt_node_and_node = .{ toOptIndex(lhs), toIndex(rhs) } }, + + // .node_and_extra + .call, + .call_comma, + .container_field, + .array_type_sentinel, + .slice, + .slice_sentinel, + .array_init, + .array_init_comma, + .struct_init, + .struct_init_comma, + .@"switch", + .switch_comma, + .container_decl_arg, + .container_decl_arg_trailing, + .tagged_union_enum_tag, + .tagged_union_enum_tag_trailing, + .@"asm", + => .{ .node_and_extra = .{ toIndex(lhs), toExtraIndex(rhs) } }, + + // .extra_and_node + .assign_destructure, + .switch_case, + .switch_case_inline, + .ptr_type, + .ptr_type_bit_range, + => .{ .extra_and_node = .{ toExtraIndex(lhs), toIndex(rhs) } }, + + // .extra_and_opt_node + .global_var_decl, + .local_var_decl, + .fn_proto_multi, + .fn_proto_one, + .fn_proto, + => .{ .extra_and_opt_node = .{ toExtraIndex(lhs), toOptIndex(rhs) } }, + + // .extra_range (SubRange) + .root, + .block, + .block_semicolon, + .builtin_call, + .builtin_call_comma, + .container_decl, + .container_decl_trailing, + .tagged_union, + .tagged_union_trailing, + .array_init_dot, + .array_init_dot_comma, + .struct_init_dot, + .struct_init_dot_comma, + => .{ .extra_range = .{ .start = toExtraIndex(lhs), .end = toExtraIndex(rhs) } }, + + // .node_and_token + .grouped_expression, + .asm_input, + .asm_simple, + .field_access, + .unwrap_optional, + => .{ .node_and_token = .{ toIndex(lhs), rhs } }, + + // .opt_node_and_token + .asm_output, + => .{ .opt_node_and_token = .{ toOptIndex(lhs), rhs } }, + + // .opt_token_and_node + .test_decl, + .@"errdefer", + => .{ .opt_token_and_node = .{ toOptTokenIndex(lhs), toIndex(rhs) } }, + + // .opt_token_and_opt_node + .@"break", + .@"continue", + => .{ .opt_token_and_opt_node = .{ toOptTokenIndex(lhs), toOptIndex(rhs) } }, + + // .token_and_token + .error_set_decl, + .multiline_string_literal, + => .{ .token_and_token = .{ lhs, rhs } }, + + // .token_and_node + .anyframe_type, + => .{ .token_and_node = .{ lhs, toIndex(rhs) } }, + + // .node_and_node for slice_open (lhs[rhs..]) + .slice_open, + => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } }, + + .while_simple, + .for_simple, + .if_simple, + => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } }, + + .while_cont, + .@"while", + .@"if", + => .{ .node_and_extra = .{ toIndex(lhs), toExtraIndex(rhs) } }, + + .for_range, + => .{ .node_and_opt_node = .{ toIndex(lhs), toOptIndex(rhs) } }, + + .@"for", + => .{ .@"for" = .{ toExtraIndex(lhs), @bitCast(rhs) } }, + + .asm_legacy, + => .{ .node_and_extra = .{ toIndex(lhs), toExtraIndex(rhs) } }, + }; +} + +// zigAst converts a c.Ast to std.Zig.Ast. The resulting Ast should be freed with deinit(). +fn zigAst(gpa: Allocator, c_ast: c.Ast) !Ast { + var tokens = Ast.TokenList{}; + try tokens.resize(gpa, c_ast.tokens.len); + errdefer tokens.deinit(gpa); + + for (0..c_ast.tokens.len) |i| + tokens.set(i, .{ + .tag = zigToken(c_ast.tokens.tags[i]), + .start = c_ast.tokens.starts[i], + }); + + var nodes = Ast.NodeList{}; + try nodes.resize(gpa, c_ast.nodes.len); + errdefer nodes.deinit(gpa); + + for (0..c_ast.nodes.len) |i| { + const tag = zigNode(c_ast.nodes.tags[i]); + nodes.set(i, .{ + .tag = tag, + .main_token = c_ast.nodes.main_tokens[i], + .data = zigData(tag, c_ast.nodes.datas[i].lhs, c_ast.nodes.datas[i].rhs), + }); + } + + const extra_data = try gpa.alloc(u32, c_ast.extra_data.len); + errdefer gpa.free(extra_data); + @memcpy(extra_data, c_ast.extra_data.arr[0..c_ast.extra_data.len]); + + // creating a dummy `errors` slice, so deinit can free it. + const errors = try gpa.alloc(Ast.Error, 0); + errdefer gpa.free(errors); + + return Ast{ + .source = c_ast.source[0..c_ast.source_len :0], + .mode = .zig, + .tokens = tokens.slice(), + .nodes = nodes.slice(), + .extra_data = extra_data, + .errors = errors, + }; +} + +// copy-past from parser_test.zig