const std = @import("std"); const testing = std.testing; const Ast = std.zig.Ast; const Allocator = std.mem.Allocator; const c = @cImport({ @cInclude("ast.h"); }); const zigToken = @import("./tokenizer_test.zig").zigToken; fn zigNode(token: c_uint) Ast.Node.Tag { return switch (token) { c.AST_NODE_ROOT => .root, c.AST_NODE_TEST_DECL => .test_decl, c.AST_NODE_GLOBAL_VAR_DECL => .global_var_decl, c.AST_NODE_LOCAL_VAR_DECL => .local_var_decl, c.AST_NODE_SIMPLE_VAR_DECL => .simple_var_decl, c.AST_NODE_ALIGNED_VAR_DECL => .aligned_var_decl, c.AST_NODE_ERRDEFER => .@"errdefer", c.AST_NODE_DEFER => .@"defer", c.AST_NODE_CATCH => .@"catch", c.AST_NODE_FIELD_ACCESS => .field_access, c.AST_NODE_UNWRAP_OPTIONAL => .unwrap_optional, c.AST_NODE_EQUAL_EQUAL => .equal_equal, c.AST_NODE_BANG_EQUAL => .bang_equal, c.AST_NODE_LESS_THAN => .less_than, c.AST_NODE_GREATER_THAN => .greater_than, c.AST_NODE_LESS_OR_EQUAL => .less_or_equal, c.AST_NODE_GREATER_OR_EQUAL => .greater_or_equal, c.AST_NODE_ASSIGN_MUL => .assign_mul, c.AST_NODE_ASSIGN_DIV => .assign_div, c.AST_NODE_ASSIGN_MOD => .assign_mod, c.AST_NODE_ASSIGN_ADD => .assign_add, c.AST_NODE_ASSIGN_SUB => .assign_sub, c.AST_NODE_ASSIGN_SHL => .assign_shl, c.AST_NODE_ASSIGN_SHL_SAT => .assign_shl_sat, c.AST_NODE_ASSIGN_SHR => .assign_shr, c.AST_NODE_ASSIGN_BIT_AND => .assign_bit_and, c.AST_NODE_ASSIGN_BIT_XOR => .assign_bit_xor, c.AST_NODE_ASSIGN_BIT_OR => .assign_bit_or, c.AST_NODE_ASSIGN_MUL_WRAP => .assign_mul_wrap, c.AST_NODE_ASSIGN_ADD_WRAP => .assign_add_wrap, c.AST_NODE_ASSIGN_SUB_WRAP => .assign_sub_wrap, c.AST_NODE_ASSIGN_MUL_SAT => .assign_mul_sat, c.AST_NODE_ASSIGN_ADD_SAT => .assign_add_sat, c.AST_NODE_ASSIGN_SUB_SAT => .assign_sub_sat, c.AST_NODE_ASSIGN => .assign, c.AST_NODE_ASSIGN_DESTRUCTURE => .assign_destructure, c.AST_NODE_MERGE_ERROR_SETS => .merge_error_sets, c.AST_NODE_MUL => .mul, c.AST_NODE_DIV => .div, c.AST_NODE_MOD => .mod, c.AST_NODE_ARRAY_MULT => .array_mult, c.AST_NODE_MUL_WRAP => .mul_wrap, c.AST_NODE_MUL_SAT => .mul_sat, c.AST_NODE_ADD => .add, c.AST_NODE_SUB => .sub, c.AST_NODE_ARRAY_CAT => .array_cat, c.AST_NODE_ADD_WRAP => .add_wrap, c.AST_NODE_SUB_WRAP => .sub_wrap, c.AST_NODE_ADD_SAT => .add_sat, c.AST_NODE_SUB_SAT => .sub_sat, c.AST_NODE_SHL => .shl, c.AST_NODE_SHL_SAT => .shl_sat, c.AST_NODE_SHR => .shr, c.AST_NODE_BIT_AND => .bit_and, c.AST_NODE_BIT_XOR => .bit_xor, c.AST_NODE_BIT_OR => .bit_or, c.AST_NODE_ORELSE => .@"orelse", c.AST_NODE_BOOL_AND => .bool_and, c.AST_NODE_BOOL_OR => .bool_or, c.AST_NODE_BOOL_NOT => .bool_not, c.AST_NODE_NEGATION => .negation, c.AST_NODE_BIT_NOT => .bit_not, c.AST_NODE_NEGATION_WRAP => .negation_wrap, c.AST_NODE_ADDRESS_OF => .address_of, c.AST_NODE_TRY => .@"try", c.AST_NODE_OPTIONAL_TYPE => .optional_type, c.AST_NODE_ARRAY_TYPE => .array_type, c.AST_NODE_ARRAY_TYPE_SENTINEL => .array_type_sentinel, c.AST_NODE_PTR_TYPE_ALIGNED => .ptr_type_aligned, c.AST_NODE_PTR_TYPE_SENTINEL => .ptr_type_sentinel, c.AST_NODE_PTR_TYPE => .ptr_type, c.AST_NODE_PTR_TYPE_BIT_RANGE => .ptr_type_bit_range, c.AST_NODE_SLICE_OPEN => .slice_open, c.AST_NODE_SLICE => .slice, c.AST_NODE_SLICE_SENTINEL => .slice_sentinel, c.AST_NODE_DEREF => .deref, c.AST_NODE_ARRAY_ACCESS => .array_access, c.AST_NODE_ARRAY_INIT_ONE => .array_init_one, c.AST_NODE_ARRAY_INIT_ONE_COMMA => .array_init_one_comma, c.AST_NODE_ARRAY_INIT_DOT_TWO => .array_init_dot_two, c.AST_NODE_ARRAY_INIT_DOT_TWO_COMMA => .array_init_dot_two_comma, c.AST_NODE_ARRAY_INIT_DOT => .array_init_dot, c.AST_NODE_ARRAY_INIT_DOT_COMMA => .array_init_dot_comma, c.AST_NODE_ARRAY_INIT => .array_init, c.AST_NODE_ARRAY_INIT_COMMA => .array_init_comma, c.AST_NODE_STRUCT_INIT_ONE => .struct_init_one, c.AST_NODE_STRUCT_INIT_ONE_COMMA => .struct_init_one_comma, c.AST_NODE_STRUCT_INIT_DOT_TWO => .struct_init_dot_two, c.AST_NODE_STRUCT_INIT_DOT_TWO_COMMA => .struct_init_dot_two_comma, c.AST_NODE_STRUCT_INIT_DOT => .struct_init_dot, c.AST_NODE_STRUCT_INIT_DOT_COMMA => .struct_init_dot_comma, c.AST_NODE_STRUCT_INIT => .struct_init, c.AST_NODE_STRUCT_INIT_COMMA => .struct_init_comma, c.AST_NODE_CALL_ONE => .call_one, c.AST_NODE_CALL_ONE_COMMA => .call_one_comma, c.AST_NODE_CALL => .call, c.AST_NODE_CALL_COMMA => .call_comma, c.AST_NODE_SWITCH => .@"switch", c.AST_NODE_SWITCH_COMMA => .switch_comma, c.AST_NODE_SWITCH_CASE_ONE => .switch_case_one, c.AST_NODE_SWITCH_CASE_INLINE_ONE => .switch_case_inline_one, c.AST_NODE_SWITCH_CASE => .switch_case, c.AST_NODE_SWITCH_CASE_INLINE => .switch_case_inline, c.AST_NODE_SWITCH_RANGE => .switch_range, c.AST_NODE_WHILE_SIMPLE => .while_simple, c.AST_NODE_WHILE_CONT => .while_cont, c.AST_NODE_WHILE => .@"while", c.AST_NODE_FOR_SIMPLE => .for_simple, c.AST_NODE_FOR => .@"for", c.AST_NODE_FOR_RANGE => .for_range, c.AST_NODE_IF_SIMPLE => .if_simple, c.AST_NODE_IF => .@"if", c.AST_NODE_SUSPEND => .@"suspend", c.AST_NODE_RESUME => .@"resume", c.AST_NODE_CONTINUE => .@"continue", c.AST_NODE_BREAK => .@"break", c.AST_NODE_RETURN => .@"return", c.AST_NODE_FN_PROTO_SIMPLE => .fn_proto_simple, c.AST_NODE_FN_PROTO_MULTI => .fn_proto_multi, c.AST_NODE_FN_PROTO_ONE => .fn_proto_one, c.AST_NODE_FN_PROTO => .fn_proto, c.AST_NODE_FN_DECL => .fn_decl, c.AST_NODE_ANYFRAME_TYPE => .anyframe_type, c.AST_NODE_ANYFRAME_LITERAL => .anyframe_literal, c.AST_NODE_CHAR_LITERAL => .char_literal, c.AST_NODE_NUMBER_LITERAL => .number_literal, c.AST_NODE_UNREACHABLE_LITERAL => .unreachable_literal, c.AST_NODE_IDENTIFIER => .identifier, c.AST_NODE_ENUM_LITERAL => .enum_literal, c.AST_NODE_STRING_LITERAL => .string_literal, c.AST_NODE_MULTILINE_STRING_LITERAL => .multiline_string_literal, c.AST_NODE_GROUPED_EXPRESSION => .grouped_expression, c.AST_NODE_BUILTIN_CALL_TWO => .builtin_call_two, c.AST_NODE_BUILTIN_CALL_TWO_COMMA => .builtin_call_two_comma, c.AST_NODE_BUILTIN_CALL => .builtin_call, c.AST_NODE_BUILTIN_CALL_COMMA => .builtin_call_comma, c.AST_NODE_ERROR_SET_DECL => .error_set_decl, c.AST_NODE_CONTAINER_DECL => .container_decl, c.AST_NODE_CONTAINER_DECL_TRAILING => .container_decl_trailing, c.AST_NODE_CONTAINER_DECL_TWO => .container_decl_two, c.AST_NODE_CONTAINER_DECL_TWO_TRAILING => .container_decl_two_trailing, c.AST_NODE_CONTAINER_DECL_ARG => .container_decl_arg, c.AST_NODE_CONTAINER_DECL_ARG_TRAILING => .container_decl_arg_trailing, c.AST_NODE_TAGGED_UNION => .tagged_union, c.AST_NODE_TAGGED_UNION_TRAILING => .tagged_union_trailing, c.AST_NODE_TAGGED_UNION_TWO => .tagged_union_two, c.AST_NODE_TAGGED_UNION_TWO_TRAILING => .tagged_union_two_trailing, c.AST_NODE_TAGGED_UNION_ENUM_TAG => .tagged_union_enum_tag, c.AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING => .tagged_union_enum_tag_trailing, c.AST_NODE_CONTAINER_FIELD_INIT => .container_field_init, c.AST_NODE_CONTAINER_FIELD_ALIGN => .container_field_align, c.AST_NODE_CONTAINER_FIELD => .container_field, c.AST_NODE_COMPTIME => .@"comptime", c.AST_NODE_NOSUSPEND => .@"nosuspend", c.AST_NODE_BLOCK_TWO => .block_two, c.AST_NODE_BLOCK_TWO_SEMICOLON => .block_two_semicolon, c.AST_NODE_BLOCK => .block, c.AST_NODE_BLOCK_SEMICOLON => .block_semicolon, c.AST_NODE_ASM_SIMPLE => .asm_simple, c.AST_NODE_ASM => .@"asm", c.AST_NODE_ASM_OUTPUT => .asm_output, c.AST_NODE_ASM_INPUT => .asm_input, c.AST_NODE_ERROR_VALUE => .error_value, c.AST_NODE_ERROR_UNION => .error_union, else => undefined, }; } fn toIndex(v: u32) Ast.Node.Index { return @enumFromInt(v); } fn toOptIndex(v: u32) Ast.Node.OptionalIndex { return if (v == 0) .none else @enumFromInt(v); } fn toExtraIndex(v: u32) Ast.ExtraIndex { return @enumFromInt(v); } fn toOptTokenIndex(v: u32) Ast.OptionalTokenIndex { return @enumFromInt(v); } fn zigData(tag: Ast.Node.Tag, lhs: u32, rhs: u32) Ast.Node.Data { return switch (tag) { // data unused .identifier, .string_literal, .char_literal, .number_literal, .unreachable_literal, .anyframe_literal, .enum_literal, .error_value, => .{ .opt_node_and_opt_node = .{ toOptIndex(lhs), toOptIndex(rhs) } }, // .node (single node index) .@"defer", .@"comptime", .@"nosuspend", .@"suspend", .@"resume", .bool_not, .negation, .bit_not, .negation_wrap, .address_of, .@"try", .deref, => .{ .node = toIndex(lhs) }, // .opt_node (single optional node) .@"return", .optional_type, => .{ .opt_node = toOptIndex(lhs) }, // .node_and_node .fn_decl, .container_field_align, .error_union, .@"catch", .field_access, .unwrap_optional, .equal_equal, .bang_equal, .less_than, .greater_than, .less_or_equal, .greater_or_equal, .assign_mul, .assign_div, .assign_mod, .assign_add, .assign_sub, .assign_shl, .assign_shl_sat, .assign_shr, .assign_bit_and, .assign_bit_xor, .assign_bit_or, .assign_mul_wrap, .assign_add_wrap, .assign_sub_wrap, .assign_mul_sat, .assign_add_sat, .assign_sub_sat, .assign, .merge_error_sets, .mul, .div, .mod, .array_mult, .mul_wrap, .mul_sat, .add, .sub, .array_cat, .add_wrap, .sub_wrap, .add_sat, .sub_sat, .shl, .shl_sat, .shr, .bit_and, .bit_xor, .bit_or, .@"orelse", .bool_and, .bool_or, .array_type, .array_access, .switch_range, => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } }, // .opt_node_and_opt_node .fn_proto_simple, .simple_var_decl, .block_two, .block_two_semicolon, .builtin_call_two, .builtin_call_two_comma, .container_decl_two, .container_decl_two_trailing, .tagged_union_two, .tagged_union_two_trailing, .struct_init_dot_two, .struct_init_dot_two_comma, .array_init_dot_two, .array_init_dot_two_comma, => .{ .opt_node_and_opt_node = .{ toOptIndex(lhs), toOptIndex(rhs) } }, // .node_and_opt_node .call_one, .call_one_comma, .struct_init_one, .struct_init_one_comma, .container_field_init, .aligned_var_decl, .array_init_one, .array_init_one_comma, => .{ .node_and_opt_node = .{ toIndex(lhs), toOptIndex(rhs) } }, // .opt_node_and_node .ptr_type_aligned, .ptr_type_sentinel, .switch_case_one, .switch_case_inline_one, => .{ .opt_node_and_node = .{ toOptIndex(lhs), toIndex(rhs) } }, // .node_and_extra .call, .call_comma, .container_field, .array_type_sentinel, .slice, .slice_sentinel, .array_init, .array_init_comma, .struct_init, .struct_init_comma, .@"switch", .switch_comma, .container_decl_arg, .container_decl_arg_trailing, .tagged_union_enum_tag, .tagged_union_enum_tag_trailing, .@"asm", => .{ .node_and_extra = .{ toIndex(lhs), toExtraIndex(rhs) } }, // .extra_and_node .assign_destructure, .switch_case, .switch_case_inline, .ptr_type, .ptr_type_bit_range, => .{ .extra_and_node = .{ toExtraIndex(lhs), toIndex(rhs) } }, // .extra_and_opt_node .global_var_decl, .local_var_decl, .fn_proto_multi, .fn_proto_one, .fn_proto, => .{ .extra_and_opt_node = .{ toExtraIndex(lhs), toOptIndex(rhs) } }, // .extra_range (SubRange) .root, .block, .block_semicolon, .builtin_call, .builtin_call_comma, .container_decl, .container_decl_trailing, .tagged_union, .tagged_union_trailing, .array_init_dot, .array_init_dot_comma, .struct_init_dot, .struct_init_dot_comma, => .{ .extra_range = .{ .start = toExtraIndex(lhs), .end = toExtraIndex(rhs) } }, // .node_and_token .grouped_expression, .asm_input, => .{ .node_and_token = .{ toIndex(lhs), rhs } }, // .opt_node_and_token .asm_output, => .{ .opt_node_and_token = .{ toOptIndex(lhs), rhs } }, // .opt_token_and_node .test_decl, .@"errdefer", => .{ .opt_token_and_node = .{ toOptTokenIndex(lhs), toIndex(rhs) } }, // .opt_token_and_opt_node .@"break", .@"continue", => .{ .opt_token_and_opt_node = .{ toOptTokenIndex(lhs), toOptIndex(rhs) } }, // .token_and_token .error_set_decl, .multiline_string_literal, => .{ .token_and_token = .{ lhs, rhs } }, // .token_and_node .anyframe_type, => .{ .token_and_node = .{ lhs, toIndex(rhs) } }, // .node_and_node for slice_open (lhs[rhs..]) .slice_open, => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } }, .while_simple, .while_cont, .@"while", .for_simple, .if_simple, .@"if", => .{ .node_and_node = .{ toIndex(lhs), toIndex(rhs) } }, .for_range, => .{ .node_and_opt_node = .{ toIndex(lhs), toOptIndex(rhs) } }, .@"for", => .{ .@"for" = .{ toExtraIndex(lhs), @bitCast(rhs) } }, .asm_simple, .asm_legacy, => .{ .node_and_extra = .{ toIndex(lhs), toExtraIndex(rhs) } }, }; } // zigAst converts a c.Ast to std.Zig.Ast. The resulting Ast should be freed with deinit(). fn zigAst(gpa: Allocator, c_ast: c.Ast) !Ast { var tokens = Ast.TokenList{}; try tokens.resize(gpa, c_ast.tokens.len); errdefer tokens.deinit(gpa); for (0..c_ast.tokens.len) |i| tokens.set(i, .{ .tag = zigToken(c_ast.tokens.tags[i]), .start = c_ast.tokens.starts[i], }); var nodes = Ast.NodeList{}; try nodes.resize(gpa, c_ast.nodes.len); errdefer nodes.deinit(gpa); for (0..c_ast.nodes.len) |i| { const tag = zigNode(c_ast.nodes.tags[i]); nodes.set(i, .{ .tag = tag, .main_token = c_ast.nodes.main_tokens[i], .data = zigData(tag, c_ast.nodes.datas[i].lhs, c_ast.nodes.datas[i].rhs), }); } const extra_data = try gpa.alloc(u32, c_ast.extra_data.len); errdefer gpa.free(extra_data); @memcpy(extra_data, c_ast.extra_data.arr[0..c_ast.extra_data.len]); // creating a dummy `errors` slice, so deinit can free it. const errors = try gpa.alloc(Ast.Error, 0); errdefer gpa.free(errors); return Ast{ .source = c_ast.source[0..c_ast.source_len :0], .mode = .zig, .tokens = tokens.slice(), .nodes = nodes.slice(), .extra_data = extra_data, .errors = errors, }; } test "Ast header smoke test" { try std.testing.expectEqual(zigNode(c.AST_NODE_IF), Ast.Node.Tag.@"if"); } // copy-past from parser_test.zig const mem = std.mem; const print = std.debug.print; const io = std.io; const maxInt = std.math.maxInt; var fixed_buffer_mem: [100 * 1024]u8 = undefined; fn testParse(source: [:0]const u8, allocator: mem.Allocator, anything_changed: *bool) ![]u8 { var stderr_buf: [4096]u8 = undefined; var stderr_file_writer = std.fs.File.stderr().writer(&stderr_buf); const stderr = &stderr_file_writer.interface; //var tree = try std.zig.Ast.parse(allocator, source, .zig); var c_tree = c.astParse(source, @intCast(source.len)); defer c.astDeinit(&c_tree); var tree = try zigAst(allocator, c_tree); defer tree.deinit(allocator); for (tree.errors) |parse_error| { const loc = tree.tokenLocation(0, parse_error.token); try stderr.print("(memory buffer):{d}:{d}: error: ", .{ loc.line + 1, loc.column + 1 }); try tree.renderError(parse_error, stderr); try stderr.print("\n{s}\n", .{source[loc.line_start..loc.line_end]}); { var i: usize = 0; while (i < loc.column) : (i += 1) { try stderr.writeAll(" "); } try stderr.writeAll("^"); } try stderr.writeAll("\n"); } if (tree.errors.len != 0) { return error.ParseError; } const formatted = try tree.renderAlloc(allocator); anything_changed.* = !mem.eql(u8, formatted, source); return formatted; } fn testTransformImpl(allocator: mem.Allocator, fba: *std.heap.FixedBufferAllocator, source: [:0]const u8, expected_source: []const u8) !void { // reset the fixed buffer allocator each run so that it can be re-used for each // iteration of the failing index fba.reset(); var anything_changed: bool = undefined; const result_source = try testParse(source, allocator, &anything_changed); try std.testing.expectEqualStrings(expected_source, result_source); const changes_expected = source.ptr != expected_source.ptr; if (anything_changed != changes_expected) { print("std.zig.render returned {} instead of {}\n", .{ anything_changed, changes_expected }); return error.TestFailed; } try std.testing.expect(anything_changed == changes_expected); allocator.free(result_source); } fn testTransform(source: [:0]const u8, expected_source: []const u8) !void { var fixed_allocator = std.heap.FixedBufferAllocator.init(fixed_buffer_mem[0..]); return std.testing.checkAllAllocationFailures(fixed_allocator.allocator(), testTransformImpl, .{ &fixed_allocator, source, expected_source }); } fn testCanonical(source: [:0]const u8) !void { return testTransform(source, source); } test "zig fmt: remove extra whitespace at start and end of file with comment between" { try testTransform( \\ \\ \\// hello \\ \\ , \\// hello \\ ); } test "my function" { try testCanonical( \\pub fn main() void { \\ @panic("hello"); \\} \\ ); }