diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index cdf80880de..0da83244fd 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2418,7 +2418,7 @@ fn genBodyBlock(self: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { - @setEvalBranchQuota(12_300); + @setEvalBranchQuota(12_400); const pt = cg.pt; const zcu = pt.zcu; const ip = &zcu.intern_pool; @@ -2486,8 +2486,6 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .reduce_optimized => try cg.airReduce(inst), .aggregate_init => try cg.airAggregateInit(inst), .prefetch => try cg.airPrefetch(inst), - - .array_elem_val => try cg.airArrayElemVal(inst), // zig fmt: on .arg => if (cg.debug_output != .none) { @@ -15150,7 +15148,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, .each = .{ .once = &.{ - .{ ._, ._, .lea, .dst0p, .leaa(.src0, .add_src0_elem_size_times_src1), ._, ._ }, + .{ ._, ._, .lea, .dst0p, .leaa(.src0, .add_src0_elem_size_mul_src1), ._, ._ }, } }, }, .{ .dst_constraints = .{ .{ .elem_size_is = 1 }, .any }, @@ -15264,7 +15262,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, .each = .{ .once = &.{ - .{ ._, ._, .lea, .dst0p, .leaa(.src0, .sub_src0_elem_size_times_src1), ._, ._ }, + .{ ._, ._, .lea, .dst0p, .leaa(.src0, .sub_src0_elem_size_mul_src1), ._, ._ }, } }, }, .{ .dst_constraints = .{ .{ .elem_size_is = 1 }, .any }, @@ -52951,6 +52949,200 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { try ops[0].toOffset(0, cg); try ops[0].finish(inst, &.{ty_op.operand}, &ops, cg); }, + .array_elem_val => if (use_old) try cg.airArrayElemVal(inst) else { + const bin_op = air_datas[@intFromEnum(inst)].bin_op; + const array_ty = cg.typeOf(bin_op.lhs); + const res_ty = array_ty.elemType2(zcu); + var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); + var res: [1]Temp = undefined; + cg.select(&res, &.{res_ty}, &ops, comptime &.{ .{ + .src_constraints = .{ .{ .bool_vec = .dword }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .imm32, .none } }, + }, + .dst_temps = .{ .{ .cc = .c }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .bt, .src0d, .ua(.none, .add_src1_rem_32), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .bool_vec = .dword }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .none } }, + }, + .dst_temps = .{ .{ .cc = .c }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .bt, .src0d, .src1d, ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .bool_vec = .qword }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .imm32, .none } }, + }, + .dst_temps = .{ .{ .cc = .c }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .bt, .src0q, .ua(.none, .add_src1_rem_64), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .bool_vec = .qword }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .none } }, + }, + .dst_temps = .{ .{ .cc = .c }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .bt, .src0q, .src1q, ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .any_bool_vec, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .imm32, .none } }, + }, + .dst_temps = .{ .{ .cc = .c }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .bt, .mema(.src0d, .add_src1_div_8_down_4), .ua(.none, .add_src1_rem_32), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .any_bool_vec, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_gpr, .none } }, + }, + .dst_temps = .{ .{ .cc = .c }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, ._, .bt, .src0d, .src1d, ._, ._ }, + } }, + }, .{ + .dst_constraints = .{ .{ .int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .simm32, .none } }, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .mema(.src0b, .add_src0_elem_size_mul_src1), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{ .{ .int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_gpr, .none } }, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .memi(.src0b, .src1), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{ .{ .int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .simm32, .none } }, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .mema(.src0w, .add_src0_elem_size_mul_src1), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{ .{ .int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_gpr, .none } }, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .memsi(.src0w, .@"2", .src1), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{ .{ .int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .simm32, .none } }, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0d, .mema(.src0d, .add_src0_elem_size_mul_src1), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{ .{ .int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_gpr, .none } }, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0d, .memsi(.src0d, .@"4", .src1), ._, ._ }, + } }, + }, .{ + .dst_constraints = .{ .{ .int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .simm32, .none } }, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0q, .mema(.src0q, .add_src0_elem_size_mul_src1), ._, ._ }, + } }, + }, .{ + .required_features = .{ .@"64bit", null, null, null }, + .dst_constraints = .{ .{ .int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_gpr, .none } }, + }, + .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .dst0q, .memsi(.src0q, .@"8", .src1), ._, ._ }, + } }, + } }) catch |err| switch (err) { + error.SelectFailed => { + const elem_size = res_ty.abiSize(zcu); + const base = try cg.tempAllocReg(.usize, abi.RegisterClass.gp); + while (try ops[0].toBase(false, cg) or + try ops[1].toRegClass(true, .general_purpose, cg)) + {} + const base_reg = base.tracking(cg).short.register.to64(); + const rhs_reg = ops[1].tracking(cg).short.register.to64(); + if (!std.math.isPowerOfTwo(elem_size)) { + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterRegisterImmediate( + .{ .i_, .mul }, + rhs_reg, + rhs_reg, + .u(elem_size), + ); + try cg.asmRegisterMemory( + .{ ._, .lea }, + base_reg, + try ops[0].tracking(cg).short.mem(cg, .{ .index = rhs_reg }), + ); + } else if (elem_size > 8) { + try cg.spillEflagsIfOccupied(); + try cg.asmRegisterImmediate( + .{ ._l, .sh }, + rhs_reg, + .u(std.math.log2_int(u64, elem_size)), + ); + try cg.asmRegisterMemory( + .{ ._, .lea }, + base_reg, + try ops[0].tracking(cg).short.mem(cg, .{ .index = rhs_reg }), + ); + } else try cg.asmRegisterMemory( + .{ ._, .lea }, + base_reg, + try ops[0].tracking(cg).short.mem(cg, .{ + .index = rhs_reg, + .scale = .fromFactor(@intCast(elem_size)), + }), + ); + // Hack around Sema insanity: lhs could be an arbitrarily large comptime-known array + // which could easily get spilled by the upcoming `load`, which would infinite recurse + // since spilling an array requires the same operation that triggered the spill. + try ops[0].die(cg); + ops[0] = base; + res[0] = try ops[0].load(res_ty, .{}, cg); + }, + else => |e| return e, + }; + try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg); + }, .slice_elem_val, .ptr_elem_val => |air_tag| if (use_old) switch (air_tag) { else => unreachable, .slice_elem_val => try cg.airSliceElemVal(inst), @@ -52968,7 +53160,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, .each = .{ .once = &.{ - .{ ._, ._, .movzx, .dst0d, .leaa(.src0b, .add_src0_elem_size_times_src1), ._, ._ }, + .{ ._, ._, .movzx, .dst0d, .leaa(.src0b, .add_src0_elem_size_mul_src1), ._, ._ }, } }, }, .{ .dst_constraints = .{ .{ .int = .byte }, .any }, @@ -52986,7 +53178,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, .each = .{ .once = &.{ - .{ ._, ._, .movzx, .dst0d, .leaa(.src0w, .add_src0_elem_size_times_src1), ._, ._ }, + .{ ._, ._, .movzx, .dst0d, .leaa(.src0w, .add_src0_elem_size_mul_src1), ._, ._ }, } }, }, .{ .dst_constraints = .{ .{ .int = .word }, .any }, @@ -53004,7 +53196,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .dst0d, .leaa(.src0d, .add_src0_elem_size_times_src1), ._, ._ }, + .{ ._, ._, .mov, .dst0d, .leaa(.src0d, .add_src0_elem_size_mul_src1), ._, ._ }, } }, }, .{ .dst_constraints = .{ .{ .int = .dword }, .any }, @@ -53022,7 +53214,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .dst_temps = .{ .{ .rc = .general_purpose }, .unused }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .dst0q, .leaa(.src0q, .add_src0_elem_size_times_src1), ._, ._ }, + .{ ._, ._, .mov, .dst0q, .leaa(.src0q, .add_src0_elem_size_mul_src1), ._, ._ }, } }, }, .{ .required_features = .{ .@"64bit", null, null, null }, @@ -53040,8 +53232,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { while (true) for (&ops) |*op| { if (try op.toRegClass(true, .general_purpose, cg)) break; } else break; - const lhs_reg = ops[0].unwrap(cg).temp.tracking(cg).short.register.to64(); - const rhs_reg = ops[1].unwrap(cg).temp.tracking(cg).short.register.to64(); + const lhs_reg = ops[0].tracking(cg).short.register.to64(); + const rhs_reg = ops[1].tracking(cg).short.register.to64(); if (!std.math.isPowerOfTwo(elem_size)) { try cg.spillEflagsIfOccupied(); try cg.asmRegisterRegisterImmediate( @@ -53052,7 +53244,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { ); try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ .base = .{ .reg = lhs_reg }, - .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, + .mod = .{ .rm = .{ .index = rhs_reg } }, }); } else if (elem_size > 8) { try cg.spillEflagsIfOccupied(); @@ -53063,12 +53255,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { ); try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ .base = .{ .reg = lhs_reg }, - .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, + .mod = .{ .rm = .{ .index = rhs_reg } }, }); } else try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ .base = .{ .reg = lhs_reg }, .mod = .{ .rm = .{ - .size = .qword, .index = rhs_reg, .scale = .fromFactor(@intCast(elem_size)), } }, @@ -53095,8 +53286,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { while (true) for (&ops) |*op| { if (try op.toRegClass(true, .general_purpose, cg)) break; } else break; - const lhs_reg = ops[0].unwrap(cg).temp.tracking(cg).short.register.to64(); - const rhs_reg = ops[1].unwrap(cg).temp.tracking(cg).short.register.to64(); + const lhs_reg = ops[0].tracking(cg).short.register.to64(); + const rhs_reg = ops[1].tracking(cg).short.register.to64(); if (!std.math.isPowerOfTwo(elem_size)) { try cg.spillEflagsIfOccupied(); try cg.asmRegisterRegisterImmediate( @@ -53107,7 +53298,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { ); try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ .base = .{ .reg = lhs_reg }, - .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, + .mod = .{ .rm = .{ .index = rhs_reg } }, }); } else if (elem_size > 8) { try cg.spillEflagsIfOccupied(); @@ -53118,12 +53309,11 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { ); try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ .base = .{ .reg = lhs_reg }, - .mod = .{ .rm = .{ .size = .qword, .index = rhs_reg } }, + .mod = .{ .rm = .{ .index = rhs_reg } }, }); } else try cg.asmRegisterMemory(.{ ._, .lea }, lhs_reg, .{ .base = .{ .reg = lhs_reg }, .mod = .{ .rm = .{ - .size = .qword, .index = rhs_reg, .scale = .fromFactor(@intCast(elem_size)), } }, @@ -75183,7 +75373,7 @@ fn airErrUnionPayloadPtrSet(self: *CodeGen, inst: Air.Inst.Index) !void { registerAlias(dst_reg, dst_abi_size), .{ .base = .{ .reg = src_reg }, - .mod = .{ .rm = .{ .size = .qword, .disp = pl_off } }, + .mod = .{ .rm = .{ .disp = pl_off } }, }, ); break :result .{ .register = dst_reg }; @@ -75446,7 +75636,7 @@ fn airPtrSliceLenPtr(self: *CodeGen, inst: Air.Inst.Index) !void { registerAlias(dst_reg, dst_abi_size), .{ .base = .{ .reg = src_reg }, - .mod = .{ .rm = .{ .size = .qword, .disp = 8 } }, + .mod = .{ .rm = .{ .disp = 8 } }, }, ); @@ -75700,7 +75890,7 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { try self.asmRegisterMemory( .{ ._, .lea }, addr_reg, - .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{ .size = .qword } } }, + .{ .base = .{ .frame = frame_index } }, ); }, .load_frame => |frame_addr| try self.asmRegisterMemory( @@ -75708,7 +75898,7 @@ fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { addr_reg, .{ .base = .{ .frame = frame_addr.index }, - .mod = .{ .rm = .{ .size = .qword, .disp = frame_addr.off } }, + .mod = .{ .rm = .{ .disp = frame_addr.off } }, }, ), .memory, @@ -76717,7 +76907,6 @@ fn airBitReverse(self: *CodeGen, inst: Air.Inst.Index) !void { .{ .base = .{ .reg = dst.to64() }, .mod = .{ .rm = .{ - .size = .qword, .index = tmp.to64(), .scale = .@"4", } }, @@ -76744,7 +76933,6 @@ fn airBitReverse(self: *CodeGen, inst: Air.Inst.Index) !void { .{ .base = .{ .reg = tmp.to64() }, .mod = .{ .rm = .{ - .size = .qword, .index = dst.to64(), .scale = .@"2", } }, @@ -85591,7 +85779,6 @@ fn genSetReg( dst_reg.to64(), .{ .base = .{ .reloc = sym_off.sym_index }, - .mod = .{ .rm = .{ .size = .qword } }, }, ); if (sym_off.off != 0) try self.asmRegisterMemory( @@ -85599,10 +85786,7 @@ fn genSetReg( dst_reg.to64(), .{ .base = .{ .reg = dst_reg.to64() }, - .mod = .{ .rm = .{ - .size = .qword, - .disp = sym_off.off, - } }, + .mod = .{ .rm = .{ .disp = sym_off.off } }, }, ); }, @@ -85816,18 +86000,12 @@ fn genSetMem( const src_reg = registerAlias(reg_off.reg, abi_size); try self.asmRegisterMemory(.{ ._, .lea }, src_reg, .{ .base = .{ .reg = src_reg }, - .mod = .{ .rm = .{ - .size = .qword, - .disp = reg_off.off, - } }, + .mod = .{ .rm = .{ .disp = reg_off.off } }, }); try self.genSetMem(base, disp, ty, .{ .register = reg_off.reg }, opts); return self.asmRegisterMemory(.{ ._, .lea }, src_reg, .{ .base = .{ .reg = src_reg }, - .mod = .{ .rm = .{ - .size = .qword, - .disp = -reg_off.off, - } }, + .mod = .{ .rm = .{ .disp = -reg_off.off } }, }); }, else => |e| return e, @@ -87185,10 +87363,7 @@ fn airErrorName(self: *CodeGen, inst: Air.Inst.Index) !void { start_reg.to64(), .{ .base = .{ .reg = addr_reg.to64() }, - .mod = .{ .rm = .{ - .size = .dword, - .index = start_reg.to64(), - } }, + .mod = .{ .rm = .{ .index = start_reg.to64() } }, }, ); try self.asmRegisterMemory( @@ -87196,10 +87371,7 @@ fn airErrorName(self: *CodeGen, inst: Air.Inst.Index) !void { end_reg.to32(), .{ .base = .{ .reg = end_reg.to64() }, - .mod = .{ .rm = .{ - .size = .byte, - .disp = -1, - } }, + .mod = .{ .rm = .{ .disp = -1 } }, }, ); @@ -89375,17 +89547,11 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void { try self.genSetReg(addr_reg, ptr_anyopaque_ty, reg_save_area, .{}); if (!unused) try self.asmRegisterMemory(.{ ._, .lea }, addr_reg, .{ .base = .{ .reg = addr_reg }, - .mod = .{ .rm = .{ - .size = .qword, - .index = offset_reg.to64(), - } }, + .mod = .{ .rm = .{ .index = offset_reg.to64() } }, }); try self.asmRegisterMemory(.{ ._, .lea }, offset_reg, .{ .base = .{ .reg = offset_reg.to64() }, - .mod = .{ .rm = .{ - .size = .qword, - .disp = 8, - } }, + .mod = .{ .rm = .{ .disp = 8 } }, }); try self.genCopy(.c_uint, gp_offset, .{ .register = offset_reg }, .{}); const done_reloc = try self.asmJmpReloc(undefined); @@ -89394,10 +89560,7 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void { try self.genSetReg(addr_reg, ptr_anyopaque_ty, overflow_arg_area, .{}); try self.asmRegisterMemory(.{ ._, .lea }, offset_reg.to64(), .{ .base = .{ .reg = addr_reg }, - .mod = .{ .rm = .{ - .size = .qword, - .disp = @intCast(@max(promote_ty.abiSize(zcu), 8)), - } }, + .mod = .{ .rm = .{ .disp = @intCast(@max(promote_ty.abiSize(zcu), 8)) } }, }); try self.genCopy( ptr_anyopaque_ty, @@ -89423,17 +89586,11 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void { try self.genSetReg(addr_reg, ptr_anyopaque_ty, reg_save_area, .{}); if (!unused) try self.asmRegisterMemory(.{ ._, .lea }, addr_reg, .{ .base = .{ .reg = addr_reg }, - .mod = .{ .rm = .{ - .size = .qword, - .index = offset_reg.to64(), - } }, + .mod = .{ .rm = .{ .index = offset_reg.to64() } }, }); try self.asmRegisterMemory(.{ ._, .lea }, offset_reg, .{ .base = .{ .reg = offset_reg.to64() }, - .mod = .{ .rm = .{ - .size = .qword, - .disp = 16, - } }, + .mod = .{ .rm = .{ .disp = 16 } }, }); try self.genCopy(.c_uint, fp_offset, .{ .register = offset_reg }, .{}); const done_reloc = try self.asmJmpReloc(undefined); @@ -89442,10 +89599,7 @@ fn airVaArg(self: *CodeGen, inst: Air.Inst.Index) !void { try self.genSetReg(addr_reg, ptr_anyopaque_ty, overflow_arg_area, .{}); try self.asmRegisterMemory(.{ ._, .lea }, offset_reg.to64(), .{ .base = .{ .reg = addr_reg }, - .mod = .{ .rm = .{ - .size = .qword, - .disp = @intCast(@max(promote_ty.abiSize(zcu), 8)), - } }, + .mod = .{ .rm = .{ .disp = @intCast(@max(promote_ty.abiSize(zcu), 8)) } }, }); try self.genCopy( ptr_anyopaque_ty, @@ -90505,10 +90659,7 @@ const Temp = struct { new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{ .base = .{ .reg = reg.to64() }, - .mod = .{ .rm = .{ - .size = .qword, - .disp = off, - } }, + .mod = .{ .rm = .{ .disp = off } }, }); }, .register_offset => |reg_off| { @@ -90517,10 +90668,7 @@ const Temp = struct { new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{ .base = .{ .reg = reg_off.reg.to64() }, - .mod = .{ .rm = .{ - .size = .qword, - .disp = reg_off.off + off, - } }, + .mod = .{ .rm = .{ .disp = reg_off.off + off } }, }); }, .lea_symbol => |sym_off| new_temp_index.tracking(cg).* = .init(.{ .lea_symbol = .{ @@ -90627,10 +90775,7 @@ const Temp = struct { new_temp_index.tracking(cg).* = .init(.{ .register = new_reg }); try cg.asmRegisterMemory(.{ ._, .lea }, new_reg.to64(), .{ .base = .{ .reg = reg_off.reg.to64() }, - .mod = .{ .rm = .{ - .size = .qword, - .disp = reg_off.off + @as(u31, limb_index) * 8, - } }, + .mod = .{ .rm = .{ .disp = reg_off.off + @as(u31, limb_index) * 8 } }, }); }, .load_symbol => |sym_off| { @@ -93462,13 +93607,14 @@ const Select = struct { elem_size, src0_elem_size, dst0_elem_size, - src0_elem_size_times_src1, + src0_elem_size_mul_src1, + src1, log2_src0_elem_size, smin, smax, umax, }, - op: enum(u2) { mul, div, rem_8_mul }, + op: enum(u2) { mul, div, div_8_down, rem_8_mul }, rhs: Memory.Scale, const none: Adjust = .{ .sign = .pos, .lhs = .none, .op = .mul, .rhs = .@"1" }; @@ -93512,8 +93658,11 @@ const Select = struct { const add_8_src0_elem_size: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size, .op = .mul, .rhs = .@"8" }; const add_src0_elem_size_div_8: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size, .op = .div, .rhs = .@"8" }; const sub_src0_elem_size: Adjust = .{ .sign = .neg, .lhs = .src0_elem_size, .op = .mul, .rhs = .@"1" }; - const add_src0_elem_size_times_src1: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size_times_src1, .op = .mul, .rhs = .@"1" }; - const sub_src0_elem_size_times_src1: Adjust = .{ .sign = .neg, .lhs = .src0_elem_size_times_src1, .op = .mul, .rhs = .@"1" }; + const add_src0_elem_size_mul_src1: Adjust = .{ .sign = .pos, .lhs = .src0_elem_size_mul_src1, .op = .mul, .rhs = .@"1" }; + const sub_src0_elem_size_mul_src1: Adjust = .{ .sign = .neg, .lhs = .src0_elem_size_mul_src1, .op = .mul, .rhs = .@"1" }; + const add_src1_div_8_down_4: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .div_8_down, .rhs = .@"4" }; + const add_src1_rem_32: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .rem_8_mul, .rhs = .@"4" }; + const add_src1_rem_64: Adjust = .{ .sign = .pos, .lhs = .src1, .op = .rem_8_mul, .rhs = .@"8" }; const add_log2_src0_elem_size: Adjust = .{ .sign = .pos, .lhs = .log2_src0_elem_size, .op = .mul, .rhs = .@"1" }; const add_dst0_elem_size: Adjust = .{ .sign = .pos, .lhs = .dst0_elem_size, .op = .mul, .rhs = .@"1" }; const add_elem_limbs: Adjust = .{ .sign = .pos, .lhs = .elem_limbs, .op = .mul, .rhs = .@"1" }; @@ -94086,8 +94235,9 @@ const Select = struct { .elem_size => @intCast(op.base.ref.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)), .src0_elem_size => @intCast(Select.Operand.Ref.src0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)), .dst0_elem_size => @intCast(Select.Operand.Ref.dst0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu)), - .src0_elem_size_times_src1 => @intCast(Select.Operand.Ref.src0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu) * + .src0_elem_size_mul_src1 => @intCast(Select.Operand.Ref.src0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu) * Select.Operand.Ref.src1.valueOf(s).immediate), + .src1 => @intCast(Select.Operand.Ref.src1.valueOf(s).immediate), .log2_src0_elem_size => @intCast(std.math.log2(Select.Operand.Ref.src0.typeOf(s).elemType2(s.cg.pt.zcu).abiSize(s.cg.pt.zcu))), .smin => @as(SignedImm, std.math.minInt(SignedImm)) >> @truncate( -%op.base.ref.typeOf(s).scalarType(s.cg.pt.zcu).bitSize(s.cg.pt.zcu), @@ -94107,6 +94257,7 @@ const Select = struct { break :op_res op_res[0]; }, .div => @shrExact(lhs, rhs), + .div_8_down => lhs >> 3 & @as(SignedImm, -1) << rhs, .rem_8_mul => lhs & (@as(SignedImm, 1) << @intCast(@as(u3, 3) + rhs)) - 1, }; return switch (op.flags.adjust.sign) { diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index 15b83ccf23..f6e34ce2bf 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -431,7 +431,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) _ = lower.reloc(.{ .linker_tlsld = sym_index }, 0); lower.result_insts[lower.result_insts_len] = try .new(.none, .lea, &.{ .{ .reg = .rdi }, - .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) }, + .{ .mem = Memory.initRip(.none, 0) }, }, lower.target); lower.result_insts_len += 1; _ = lower.reloc(.{ @@ -443,7 +443,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) lower.result_insts_len += 1; _ = lower.reloc(.{ .linker_dtpoff = sym_index }, 0); emit_mnemonic = .lea; - break :op .{ .mem = Memory.initSib(mem_op.sib.ptr_size, .{ + break :op .{ .mem = Memory.initSib(.none, .{ .base = .{ .reg = .rax }, .disp = std.math.minInt(i32), }) }; @@ -456,7 +456,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) lower.result_insts_len += 1; _ = lower.reloc(.{ .linker_reloc = sym_index }, 0); emit_mnemonic = .lea; - break :op .{ .mem = Memory.initSib(mem_op.sib.ptr_size, .{ + break :op .{ .mem = Memory.initSib(.none, .{ .base = .{ .reg = .rax }, .disp = std.math.minInt(i32), }) }; @@ -465,10 +465,10 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) _ = lower.reloc(.{ .linker_reloc = sym_index }, 0); if (lower.pic) switch (mnemonic) { - .lea => { - if (elf_sym.flags.is_extern_ptr) emit_mnemonic = .mov; - break :op .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) }; - }, + .lea => if (elf_sym.flags.is_extern_ptr) { + emit_mnemonic = .mov; + break :op .{ .mem = Memory.initRip(.ptr, 0) }; + } else break :op .{ .mem = Memory.initRip(.none, 0) }, .mov => { if (elf_sym.flags.is_extern_ptr) { const reg = ops[0].reg; @@ -505,7 +505,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) _ = lower.reloc(.{ .linker_reloc = sym_index }, 0); lower.result_insts[lower.result_insts_len] = try .new(.none, .mov, &.{ .{ .reg = .rdi }, - .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) }, + .{ .mem = Memory.initRip(.ptr, 0) }, }, lower.target); lower.result_insts_len += 1; lower.result_insts[lower.result_insts_len] = try .new(.none, .call, &.{ @@ -518,10 +518,10 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) _ = lower.reloc(.{ .linker_reloc = sym_index }, 0); break :op switch (mnemonic) { - .lea => { - if (macho_sym.flags.is_extern_ptr) emit_mnemonic = .mov; - break :op .{ .mem = Memory.initRip(mem_op.sib.ptr_size, 0) }; - }, + .lea => if (macho_sym.flags.is_extern_ptr) { + emit_mnemonic = .mov; + break :op .{ .mem = Memory.initRip(.ptr, 0) }; + } else break :op .{ .mem = Memory.initRip(.none, 0) }, .mov => { if (macho_sym.flags.is_extern_ptr) { const reg = ops[0].reg; diff --git a/test/behavior/x86_64/mem.zig b/test/behavior/x86_64/mem.zig index 768273f48a..139e3a1471 100644 --- a/test/behavior/x86_64/mem.zig +++ b/test/behavior/x86_64/mem.zig @@ -1,4 +1,4 @@ -fn access(comptime array: anytype) !void { +fn accessSlice(comptime array: anytype) !void { var slice: []const @typeInfo(@TypeOf(array)).array.child = undefined; slice = &array; inline for (0.., &array) |ct_index, *elem| { @@ -20,18 +20,153 @@ fn access(comptime array: anytype) !void { if (slice[rt_index] != elem.*) return error.Unexpected; } } -test access { - try access([3]u8{ 0xdb, 0xef, 0xbd }); - try access([3]u16{ 0x340e, 0x3654, 0x88d7 }); - try access([3]u32{ 0xd424c2c0, 0x2d6ac466, 0x5a0cfaba }); - try access([3]u64{ +test accessSlice { + try accessSlice([3]u8{ 0xdb, 0xef, 0xbd }); + try accessSlice([3]u16{ 0x340e, 0x3654, 0x88d7 }); + try accessSlice([3]u32{ 0xd424c2c0, 0x2d6ac466, 0x5a0cfaba }); + try accessSlice([3]u64{ 0x9327a4f5221666a6, 0x5c34d3ddd84a8b12, 0xbae087f39f649260, }); - try access([3]u128{ + try accessSlice([3]u128{ 0x601cf010065444d4d42d5536dd9b95db, 0xa03f592fcaa22d40af23a0c735531e3c, 0x5da44907b31602b95c2d93f0b582ceab, }); } + +fn accessVector(comptime init: anytype) !void { + const Vector = @TypeOf(init); + var vector: Vector = undefined; + vector = init; + inline for (0..@typeInfo(Vector).vector.len) |ct_index| { + var rt_index: usize = undefined; + rt_index = ct_index; + if (&vector[rt_index] != &vector[ct_index]) return error.Unexpected; + if (vector[rt_index] != vector[ct_index]) return error.Unexpected; + } +} +test accessVector { + try accessVector(@Vector(1, bool){ + false, + }); + try accessVector(@Vector(2, bool){ + false, true, + }); + try accessVector(@Vector(3, bool){ + true, true, false, + }); + try accessVector(@Vector(5, bool){ + true, false, true, false, true, + }); + try accessVector(@Vector(7, bool){ + true, false, true, true, true, false, true, + }); + try accessVector(@Vector(8, bool){ + false, true, false, true, false, false, false, true, + }); + try accessVector(@Vector(9, bool){ + true, true, false, true, false, false, false, false, + true, + }); + try accessVector(@Vector(15, bool){ + false, true, true, true, false, true, false, false, + true, true, false, false, true, false, false, + }); + try accessVector(@Vector(16, bool){ + true, true, false, true, false, false, false, false, + false, true, true, false, false, false, true, true, + }); + try accessVector(@Vector(17, bool){ + true, false, true, true, false, true, false, true, + true, true, true, false, false, false, true, true, + false, + }); + try accessVector(@Vector(31, bool){ + true, false, true, true, false, true, true, true, + false, true, false, true, false, true, true, true, + false, false, true, false, false, false, false, true, + true, true, true, false, false, false, false, + }); + try accessVector(@Vector(32, bool){ + true, true, false, false, false, true, true, true, + false, true, true, true, false, true, false, true, + false, true, false, true, false, true, true, false, + false, false, false, false, false, true, true, true, + }); + try accessVector(@Vector(33, bool){ + true, false, false, false, false, true, true, true, + false, false, true, false, true, true, false, true, + true, true, false, true, true, false, false, false, + false, true, false, false, false, true, true, false, + false, + }); + try accessVector(@Vector(63, bool){ + false, false, true, true, true, false, true, true, + true, false, true, true, true, false, true, false, + true, true, false, true, false, true, true, true, + false, false, true, false, false, false, false, true, + true, true, true, true, false, true, false, true, + true, true, false, false, true, false, false, true, + false, true, false, false, false, false, true, true, + false, true, false, false, true, true, true, + }); + try accessVector(@Vector(64, bool){ + false, false, true, true, true, false, true, true, + true, false, true, true, false, true, true, false, + false, false, false, false, true, true, false, true, + true, true, true, true, false, false, false, true, + true, false, true, true, false, false, true, false, + false, true, true, false, true, true, false, false, + true, true, false, true, false, true, true, true, + false, true, true, false, false, false, false, false, + }); + try accessVector(@Vector(65, bool){ + false, false, true, true, true, true, true, true, + true, false, false, false, false, true, true, false, + true, false, true, true, true, false, false, false, + true, false, true, true, false, true, true, true, + true, true, false, true, true, false, true, false, + false, true, false, true, false, false, true, false, + true, false, true, true, true, false, true, true, + false, false, true, true, true, true, false, false, + true, + }); + try accessVector(@Vector(8, u8){ + 0x60, 0xf7, 0xf4, 0xb0, 0x05, 0xd3, 0x06, 0x78, + }); + try accessVector(@Vector(8, u16){ + 0x9c91, 0xfb8b, 0x7f80, 0x8304, 0x6e52, 0xd8ef, 0x37fc, 0x7851, + }); + try accessVector(@Vector(8, u32){ + 0x688b88e2, 0x68e2b7a2, 0x87574680, 0xab4f0769, + 0x75472bb5, 0xa791f2ae, 0xeb2ed416, 0x5f05ce82, + }); + try accessVector(@Vector(8, u64){ + 0xdefd1ddffaedf818, 0x91c78a29d3d59890, + 0x842aaf8fd3c7b785, 0x970a07b8f9f4a6b3, + 0x21b2425d1a428246, 0xea50e41174a7977b, + 0x08d0f1c4f5978b74, 0x8dc88a7fd85e0e67, + }); + try accessVector(@Vector(8, u128){ + 0x6f2cbde1fb219b1e73d7f774d10f0d94, + 0x7c1412616cda20436d7106691d8ba4cc, + 0x4ee940b50e97675b3b35d7872a35b5ad, + 0x6d994fb8caa1b2fac48acbb68fa2d2f1, + 0xdee698c7ec8de9b5940903e3fc665b63, + 0x0751491a509e4a1ce8cfa6d62fe9e74c, + 0x3d880f0a927ce3bfc2682b72070fcd50, + 0x82f0eec62881598699eeb93fbb456e95, + }); + try accessVector(@Vector(8, u256){ + 0x6ee4f35fe624d365952f73960791238ac781bfba782abc7866a691063e43ce48, + 0xb006491f54a9c9292458a5835b7d5f4cfa18136f175eef0a13bb8adf5c3dc061, + 0xd6e25ca1bc5685fc52609e261b9065bc05a8662e9291660033dd7f6d98e562b3, + 0x992c5e54e0e6331dac258996be7dae9b2a2eff323a39043ba8d2721420dc5f5c, + 0x257313f45fb3556d0fc323d5f38c953e9a093fe2278655312b6a5b64aab9d901, + 0x6c8ad2182b9a3b2b19c2c9b152956b383d0fee2e3fbd5b02ed72227446a7b221, + 0xd80cafc2252b289793799675e43f97ba4a5448c7b57e1544a464687b435efc7b, + 0xfcb480f2d70afd53c4689dd3f5db7638c24302f2a6a15f738167db090d91fb28, + }); +}