From 019c8844811ffb8b385ac8891cfd17cbf60d104a Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 8 May 2023 16:43:57 -0400 Subject: [PATCH 01/24] x86_64: add missing multply of `f16` --- src/arch/x86_64/CodeGen.zig | 5 +++++ test/behavior/floatop.zig | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 2dc1cc8ee4..63e3416079 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -6345,6 +6345,7 @@ fn genBinOp( switch (air_tag) { .add => .{ .v_ss, .add }, .sub => .{ .v_ss, .sub }, + .mul => .{ .v_ss, .mul }, .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div }, .max => .{ .v_ss, .max }, .min => .{ .v_ss, .max }, @@ -6421,6 +6422,7 @@ fn genBinOp( switch (air_tag) { .add => .{ .v_ss, .add }, .sub => .{ .v_ss, .sub }, + .mul => .{ .v_ss, .mul }, .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div }, .max => .{ .v_ss, .max }, .min => .{ .v_ss, .max }, @@ -6468,6 +6470,7 @@ fn genBinOp( switch (air_tag) { .add => .{ .v_ps, .add }, .sub => .{ .v_ps, .sub }, + .mul => .{ .v_ps, .mul }, .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, .max => .{ .v_ps, .max }, .min => .{ .v_ps, .max }, @@ -6507,6 +6510,7 @@ fn genBinOp( switch (air_tag) { .add => .{ .v_ps, .add }, .sub => .{ .v_ps, .sub }, + .mul => .{ .v_ps, .mul }, .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, .max => .{ .v_ps, .max }, .min => .{ .v_ps, .max }, @@ -6546,6 +6550,7 @@ fn genBinOp( switch (air_tag) { .add => .{ .v_ps, .add }, .sub => .{ .v_ps, .sub }, + .mul => .{ .v_ps, .mul }, .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, .max => .{ .v_ps, .max }, .min => .{ .v_ps, .max }, diff --git a/test/behavior/floatop.zig b/test/behavior/floatop.zig index d2035c656f..9d17b05865 100644 --- a/test/behavior/floatop.zig +++ b/test/behavior/floatop.zig @@ -184,7 +184,7 @@ test "more @sqrt f16 tests" { test "another, possibly redundant @sqrt test" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (no_x86_64_hardware_f16_support) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; try testSqrtLegacy(f64, 12.0); From e98e58691f2c0759c8534080446cf6faecd30eb0 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 8 May 2023 18:34:45 -0400 Subject: [PATCH 02/24] x86_64: fix crash with logging enabled --- src/arch/x86_64/CodeGen.zig | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 63e3416079..fe2b23e126 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1460,6 +1460,15 @@ fn asmMemoryRegister(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, reg: Regist } fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, imm: Immediate) !void { + const payload = try self.addExtra(Mir.Imm32{ .imm = switch (imm) { + .signed => |s| @bitCast(u32, s), + .unsigned => |u| @intCast(u32, u), + } }); + assert(payload + 1 == switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }); _ = try self.addInst(.{ .tag = tag[1], .ops = switch (m) { @@ -1475,17 +1484,9 @@ fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, imm: Immed }, .data = .{ .x = .{ .fixes = tag[0], - .payload = try self.addExtra(Mir.Imm32{ .imm = switch (imm) { - .signed => |s| @bitCast(u32, s), - .unsigned => |u| @intCast(u32, u), - } }), + .payload = payload, } }, }); - _ = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - }; } fn asmMemoryRegisterRegister( From 1667e831cf8099271d97c9391fe4400622e6f96f Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 8 May 2023 18:35:03 -0400 Subject: [PATCH 03/24] x86_64: fix issues with getting float fields --- src/arch/x86_64/CodeGen.zig | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index fe2b23e126..f4dbd502e7 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -5325,8 +5325,10 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { const index = extra.field_index; const container_ty = self.air.typeOf(operand); + const container_rc = regClassForType(container_ty); const field_ty = container_ty.structFieldType(index); if (!field_ty.hasRuntimeBitsIgnoreComptime()) break :result .none; + const field_rc = regClassForType(field_ty); const src_mcv = try self.resolveInst(operand); const field_off = switch (container_ty.containerLayout()) { @@ -5410,30 +5412,23 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { if (field_extra_bits > 0) try self.truncateRegister(field_ty, dst_reg); const dst_mcv = MCValue{ .register = dst_reg }; - const dst_rc = regClassForType(field_ty); - if (dst_rc.eql(gp)) break :result dst_mcv; - - const result_reg = try self.register_manager.allocReg(inst, dst_rc); - try self.genSetReg(result_reg, field_ty, dst_mcv); - break :result .{ .register = result_reg }; + break :result if (field_rc.supersetOf(gp)) + dst_mcv + else + try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv); }, .register => |reg| { const reg_lock = self.register_manager.lockRegAssumeUnused(reg); defer self.register_manager.unlockReg(reg_lock); - const dst_mcv = if (self.reuseOperand(inst, operand, 0, src_mcv)) - src_mcv + const dst_reg = if (src_mcv.isRegister() and field_rc.supersetOf(container_rc) and + self.reuseOperand(inst, operand, 0, src_mcv)) + src_mcv.getReg().? else - try self.copyToRegisterWithInstTracking( - inst, - Type.usize, - .{ .register = reg.to64() }, - ); - const dst_mcv_lock: ?RegisterLock = switch (dst_mcv) { - .register => |a_reg| self.register_manager.lockReg(a_reg), - else => null, - }; - defer if (dst_mcv_lock) |lock| self.register_manager.unlockReg(lock); + try self.copyToTmpRegister(Type.usize, .{ .register = reg.to64() }); + const dst_mcv = MCValue{ .register = dst_reg }; + const dst_lock = self.register_manager.lockReg(dst_reg); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); // Shift by struct_field_offset. try self.genShiftBinOpMir( @@ -5460,7 +5455,11 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { registerAlias(dst_mcv.register, field_byte_size), ); } - break :result dst_mcv; + + break :result if (field_rc.supersetOf(gp)) + dst_mcv + else + try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv); }, .register_overflow => |ro| { switch (index) { From e36e9323f4c4ce66013ba4774ff0145fc9cad34d Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 8 May 2023 19:31:16 -0400 Subject: [PATCH 04/24] x86_64: implement union_init --- src/arch/x86_64/CodeGen.zig | 43 +++++++++++++++++++++++++++++++++--- test/behavior/bugs/13366.zig | 1 - test/behavior/struct.zig | 1 - test/behavior/tuple.zig | 1 - test/behavior/union.zig | 7 ------ 5 files changed, 40 insertions(+), 13 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index f4dbd502e7..2c7751cff5 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -10147,9 +10147,46 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { fn airUnionInit(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.UnionInit, ty_pl.payload).data; - _ = extra; - return self.fail("TODO implement airUnionInit for x86_64", .{}); - //return self.finishAir(inst, result, .{ extra.init, .none, .none }); + const result: MCValue = result: { + const union_ty = self.air.typeOfIndex(inst); + const layout = union_ty.unionGetLayout(self.target.*); + + const src_ty = self.air.typeOf(extra.init); + const src_mcv = try self.resolveInst(extra.init); + if (layout.tag_size == 0) { + if (self.reuseOperand(inst, extra.init, 0, src_mcv)) break :result src_mcv; + + const dst_mcv = try self.allocRegOrMem(inst, true); + try self.genCopy(src_ty, dst_mcv, src_mcv); + break :result dst_mcv; + } + + const dst_mcv = try self.allocRegOrMem(inst, false); + + const union_obj = union_ty.cast(Type.Payload.Union).?.data; + const field_name = union_obj.fields.keys()[extra.field_index]; + const tag_ty = union_ty.unionTagTypeSafety().?; + const field_index = @intCast(u32, tag_ty.enumFieldIndex(field_name).?); + var tag_pl = Value.Payload.U32{ .base = .{ .tag = .enum_field_index }, .data = field_index }; + const tag_val = Value.initPayload(&tag_pl.base); + var tag_int_pl: Value.Payload.U64 = undefined; + const tag_int_val = tag_val.enumToInt(tag_ty, &tag_int_pl); + const tag_int = tag_int_val.toUnsignedInt(self.target.*); + const tag_off = if (layout.tag_align < layout.payload_align) + @intCast(i32, layout.payload_size) + else + 0; + try self.genCopy(tag_ty, dst_mcv.address().offset(tag_off).deref(), .{ .immediate = tag_int }); + + const pl_off = if (layout.tag_align < layout.payload_align) + 0 + else + @intCast(i32, layout.tag_size); + try self.genCopy(src_ty, dst_mcv.address().offset(pl_off).deref(), src_mcv); + + break :result dst_mcv; + }; + return self.finishAir(inst, result, .{ extra.init, .none, .none }); } fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void { diff --git a/test/behavior/bugs/13366.zig b/test/behavior/bugs/13366.zig index cec015cc40..8419fbe265 100644 --- a/test/behavior/bugs/13366.zig +++ b/test/behavior/bugs/13366.zig @@ -14,7 +14,6 @@ const Block = struct { test { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_x86) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO diff --git a/test/behavior/struct.zig b/test/behavior/struct.zig index 797a22c3a8..659acbf56b 100644 --- a/test/behavior/struct.zig +++ b/test/behavior/struct.zig @@ -1352,7 +1352,6 @@ test "struct field init value is size of the struct" { } test "under-aligned struct field" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO diff --git a/test/behavior/tuple.zig b/test/behavior/tuple.zig index 2ce1922e50..b6fde88af2 100644 --- a/test/behavior/tuple.zig +++ b/test/behavior/tuple.zig @@ -367,7 +367,6 @@ test "branching inside tuple literal" { test "tuple initialized with a runtime known value" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; diff --git a/test/behavior/union.zig b/test/behavior/union.zig index 26a5d060df..b03e40214f 100644 --- a/test/behavior/union.zig +++ b/test/behavior/union.zig @@ -821,7 +821,6 @@ test "return union init with void payload" { } test "@unionInit stored to a const" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO @@ -994,7 +993,6 @@ test "function call result coerces from tagged union to the tag" { } test "cast from anonymous struct to union" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO @@ -1028,7 +1026,6 @@ test "cast from anonymous struct to union" { } test "cast from pointer to anonymous struct to pointer to union" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO @@ -1199,7 +1196,6 @@ test "global variable struct contains union initialized to non-most-aligned fiel test "union with no result loc initiated with a runtime value" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; @@ -1217,7 +1213,6 @@ test "union with no result loc initiated with a runtime value" { test "union with a large struct field" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; @@ -1288,7 +1283,6 @@ test "extern union most-aligned field is smaller" { } test "return an extern union from C calling convention" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -1397,7 +1391,6 @@ test "union and enum field order doesn't match" { } test "@unionInit uses tag value instead of field index" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO From 81664f17d5ec4382057bc3d2c8774d8804791a04 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 8 May 2023 20:04:40 -0400 Subject: [PATCH 05/24] codegen: implement global enum_numbered --- src/codegen.zig | 14 +++++++++----- test/behavior/union.zig | 2 -- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/codegen.zig b/src/codegen.zig index 9d479b90cd..7f65df2804 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -1189,12 +1189,16 @@ pub fn genTypedValue( .enum_simple => { return GenResult.mcv(.{ .immediate = field_index.data }); }, - .enum_full, .enum_nonexhaustive => { - const enum_full = typed_value.ty.cast(Type.Payload.EnumFull).?.data; - if (enum_full.values.count() != 0) { - const tag_val = enum_full.values.keys()[field_index.data]; + .enum_numbered, .enum_full, .enum_nonexhaustive => { + const enum_values = if (typed_value.ty.castTag(.enum_numbered)) |pl| + pl.data.values + else + typed_value.ty.cast(Type.Payload.EnumFull).?.data.values; + if (enum_values.count() != 0) { + const tag_val = enum_values.keys()[field_index.data]; + var buf: Type.Payload.Bits = undefined; return genTypedValue(bin_file, src_loc, .{ - .ty = enum_full.tag_ty, + .ty = typed_value.ty.intTagType(&buf), .val = tag_val, }, owner_decl_index); } else { diff --git a/test/behavior/union.zig b/test/behavior/union.zig index b03e40214f..c84072fb8d 100644 --- a/test/behavior/union.zig +++ b/test/behavior/union.zig @@ -362,7 +362,6 @@ const MultipleChoice = union(enum(u32)) { D = 1000, }; test "simple union(enum(u32))" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -641,7 +640,6 @@ test "tagged union with all void fields but a meaningful tag" { } test "union(enum(u32)) with specified and unspecified tag values" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO From 1336619979cfd5145c042ba7e2c6d0fbafc53574 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 8 May 2023 23:13:23 -0400 Subject: [PATCH 06/24] x86_64: fix field_ptr nonsense --- src/arch/x86_64/CodeGen.zig | 71 ++++++------------------------ test/behavior/field_parent_ptr.zig | 4 -- test/behavior/union.zig | 1 - 3 files changed, 14 insertions(+), 62 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 2c7751cff5..d1bc23b826 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -5257,64 +5257,24 @@ fn airStructFieldPtrIndex(self: *Self, inst: Air.Inst.Index, index: u8) !void { fn fieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, index: u32) !MCValue { const ptr_field_ty = self.air.typeOfIndex(inst); - const mcv = try self.resolveInst(operand); const ptr_container_ty = self.air.typeOf(operand); const container_ty = ptr_container_ty.childType(); - const field_offset = switch (container_ty.containerLayout()) { - .Auto, .Extern => @intCast(u32, container_ty.structFieldOffset(index, self.target.*)), + const field_offset = @intCast(i32, switch (container_ty.containerLayout()) { + .Auto, .Extern => container_ty.structFieldOffset(index, self.target.*), .Packed => if (container_ty.zigTypeTag() == .Struct and ptr_field_ty.ptrInfo().data.host_size == 0) container_ty.packedStructFieldByteOffset(index, self.target.*) else 0, - }; + }); - const result: MCValue = result: { - switch (mcv) { - .load_frame, .lea_tlv, .load_tlv => { - const offset_reg = try self.copyToTmpRegister(Type.usize, .{ - .immediate = field_offset, - }); - const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg); - defer self.register_manager.unlockReg(offset_reg_lock); - - const dst_mcv = try self.copyToRegisterWithInstTracking(inst, Type.usize, switch (mcv) { - .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, - else => mcv, - }); - try self.genBinOpMir(.{ ._, .add }, Type.usize, dst_mcv, .{ .register = offset_reg }); - break :result dst_mcv; - }, - .indirect => |reg_off| break :result .{ .indirect = .{ - .reg = reg_off.reg, - .off = reg_off.off + @intCast(i32, field_offset), - } }, - .lea_frame => |frame_addr| break :result .{ .lea_frame = .{ - .index = frame_addr.index, - .off = frame_addr.off + @intCast(i32, field_offset), - } }, - .register, .register_offset => { - const src_reg = mcv.getReg().?; - const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); - defer self.register_manager.unlockReg(src_lock); - - const dst_mcv: MCValue = if (self.reuseOperand(inst, operand, 0, mcv)) - mcv - else - .{ .register = try self.copyToTmpRegister(ptr_field_ty, mcv) }; - break :result .{ .register_offset = .{ - .reg = dst_mcv.getReg().?, - .off = switch (dst_mcv) { - .register => 0, - .register_offset => |reg_off| reg_off.off, - else => unreachable, - } + @intCast(i32, field_offset), - } }; - }, - else => return self.fail("TODO implement fieldPtr for {}", .{mcv}), - } - }; - return result; + const src_mcv = try self.resolveInst(operand); + const dst_mcv = if (switch (src_mcv) { + .immediate, .lea_frame => true, + .register, .register_offset => self.reuseOperand(inst, operand, 0, src_mcv), + else => false, + }) src_mcv else try self.copyToRegisterWithInstTracking(inst, ptr_field_ty, src_mcv); + return dst_mcv.offset(field_offset); } fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { @@ -6717,7 +6677,6 @@ fn genBinOpMir( .dead, .undef, .immediate, - .register_offset, .eflags, .register_overflow, .lea_direct, @@ -6726,7 +6685,9 @@ fn genBinOpMir( .lea_frame, .reserved_frame, => unreachable, // unmodifiable destination - .register => |dst_reg| { + .register, .register_offset => { + assert(dst_mcv.isRegister()); + const dst_reg = dst_mcv.getReg().?; const dst_alias = registerAlias(dst_reg, abi_size); switch (src_mcv) { .none, @@ -8625,11 +8586,7 @@ fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.FixedTag { } fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void { - const src_lock = switch (src_mcv) { - .register => |reg| self.register_manager.lockReg(reg), - .register_overflow => |ro| self.register_manager.lockReg(ro.reg), - else => null, - }; + const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; defer if (src_lock) |lock| self.register_manager.unlockReg(lock); switch (dst_mcv) { diff --git a/test/behavior/field_parent_ptr.zig b/test/behavior/field_parent_ptr.zig index 1310b96678..80026bdc4b 100644 --- a/test/behavior/field_parent_ptr.zig +++ b/test/behavior/field_parent_ptr.zig @@ -11,7 +11,6 @@ test "@fieldParentPtr non-first field" { } test "@fieldParentPtr first field" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; @@ -51,7 +50,6 @@ fn testParentFieldPtrFirst(a: *const bool) !void { } test "@fieldParentPtr untagged union" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO @@ -79,7 +77,6 @@ fn testFieldParentPtrUnion(c: *const i32) !void { } test "@fieldParentPtr tagged union" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO @@ -107,7 +104,6 @@ fn testFieldParentPtrTaggedUnion(c: *const i32) !void { } test "@fieldParentPtr extern union" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO diff --git a/test/behavior/union.zig b/test/behavior/union.zig index c84072fb8d..c6acfb3f84 100644 --- a/test/behavior/union.zig +++ b/test/behavior/union.zig @@ -1164,7 +1164,6 @@ test "union enum type gets a separate scope" { } test "global variable struct contains union initialized to non-most-aligned field" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO From c23e80e671686278ea2ea23d164a2c0839ca372c Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Tue, 9 May 2023 03:15:27 -0400 Subject: [PATCH 07/24] x86_64: implement `@splat` --- src/arch/x86_64/CodeGen.zig | 208 +++++++++++++++++++++++++++++++++- src/arch/x86_64/Encoding.zig | 13 ++- src/arch/x86_64/Lower.zig | 2 + src/arch/x86_64/Mir.zig | 15 +++ src/arch/x86_64/encodings.zig | 41 ++++++- 5 files changed, 270 insertions(+), 9 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index d1bc23b826..29232b5284 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -8561,7 +8561,8 @@ fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.FixedTag { }, 32 => switch (ty.vectorLen()) { 1 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov }, - 2...4 => return if (self.hasFeature(.avx)) + 2 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov }, + 3...4 => return if (self.hasFeature(.avx)) if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu }, 5...8 => if (self.hasFeature(.avx)) @@ -8577,6 +8578,14 @@ fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.FixedTag { return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }, else => {}, }, + 128 => switch (ty.vectorLen()) { + 1 => return if (self.hasFeature(.avx)) + if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu }, + 2 => if (self.hasFeature(.avx)) + return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }, + else => {}, + }, else => {}, }, else => {}, @@ -9939,9 +9948,200 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { fn airSplat(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; - _ = ty_op; - return self.fail("TODO implement airSplat for x86_64", .{}); - //return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); + const vector_ty = self.air.typeOfIndex(inst); + const dst_rc = regClassForType(vector_ty); + const scalar_ty = vector_ty.scalarType(); + + const src_mcv = try self.resolveInst(ty_op.operand); + const result: MCValue = result: { + switch (scalar_ty.zigTypeTag()) { + else => {}, + .Float => switch (scalar_ty.floatBits(self.target.*)) { + 32 => switch (vector_ty.vectorLen()) { + 1 => { + if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv; + const dst_reg = try self.register_manager.allocReg(inst, dst_rc); + try self.genSetReg(dst_reg, scalar_ty, src_mcv); + break :result .{ .register = dst_reg }; + }, + 2...4 => { + if (self.hasFeature(.avx)) { + const dst_reg = try self.register_manager.allocReg(inst, dst_rc); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + .{ .v_ss, .broadcast }, + dst_reg.to128(), + src_mcv.mem(.dword), + ) else { + const src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(scalar_ty, src_mcv); + try self.asmRegisterRegisterRegisterImmediate( + .{ .v_ps, .shuf }, + dst_reg.to128(), + src_reg.to128(), + src_reg.to128(), + Immediate.u(0), + ); + } + break :result .{ .register = dst_reg }; + } else { + const dst_mcv = if (src_mcv.isRegister() and + self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + src_mcv + else + try self.copyToRegisterWithInstTracking(inst, scalar_ty, src_mcv); + const dst_reg = dst_mcv.getReg().?; + try self.asmRegisterRegisterImmediate( + .{ ._ps, .shuf }, + dst_reg.to128(), + dst_reg.to128(), + Immediate.u(0), + ); + break :result dst_mcv; + } + }, + 5...8 => if (self.hasFeature(.avx)) { + const dst_reg = try self.register_manager.allocReg(inst, dst_rc); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + .{ .v_ss, .broadcast }, + dst_reg.to256(), + src_mcv.mem(.dword), + ) else { + const src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(scalar_ty, src_mcv); + if (self.hasFeature(.avx2)) try self.asmRegisterRegister( + .{ .v_ss, .broadcast }, + dst_reg.to256(), + src_reg.to128(), + ) else { + try self.asmRegisterRegisterRegisterImmediate( + .{ .v_ps, .shuf }, + dst_reg.to128(), + src_reg.to128(), + src_reg.to128(), + Immediate.u(0), + ); + try self.asmRegisterRegisterRegisterImmediate( + .{ .v_f128, .insert }, + dst_reg.to256(), + dst_reg.to256(), + dst_reg.to128(), + Immediate.u(1), + ); + } + } + break :result .{ .register = dst_reg }; + }, + else => {}, + }, + 64 => switch (vector_ty.vectorLen()) { + 1 => { + if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv; + const dst_reg = try self.register_manager.allocReg(inst, dst_rc); + try self.genSetReg(dst_reg, scalar_ty, src_mcv); + break :result .{ .register = dst_reg }; + }, + 2 => { + const dst_reg = try self.register_manager.allocReg(inst, dst_rc); + if (self.hasFeature(.sse3)) { + if (src_mcv.isMemory()) try self.asmRegisterMemory( + if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup }, + dst_reg.to128(), + src_mcv.mem(.qword), + ) else try self.asmRegisterRegister( + if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup }, + dst_reg.to128(), + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(), + ); + break :result .{ .register = dst_reg }; + } else try self.asmRegisterRegister( + .{ ._ps, .movlh }, + dst_reg.to128(), + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(), + ); + }, + 3...4 => if (self.hasFeature(.avx)) { + const dst_reg = try self.register_manager.allocReg(inst, dst_rc); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + .{ .v_sd, .broadcast }, + dst_reg.to256(), + src_mcv.mem(.qword), + ) else { + const src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(scalar_ty, src_mcv); + if (self.hasFeature(.avx2)) try self.asmRegisterRegister( + .{ .v_sd, .broadcast }, + dst_reg.to256(), + src_reg.to128(), + ) else { + try self.asmRegisterRegister( + .{ .v_, .movddup }, + dst_reg.to128(), + src_reg.to128(), + ); + try self.asmRegisterRegisterRegisterImmediate( + .{ .v_f128, .insert }, + dst_reg.to256(), + dst_reg.to256(), + dst_reg.to128(), + Immediate.u(1), + ); + } + } + break :result .{ .register = dst_reg }; + }, + else => {}, + }, + 128 => switch (vector_ty.vectorLen()) { + 1 => { + if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv; + const dst_reg = try self.register_manager.allocReg(inst, dst_rc); + try self.genSetReg(dst_reg, scalar_ty, src_mcv); + break :result .{ .register = dst_reg }; + }, + 2 => if (self.hasFeature(.avx)) { + const dst_reg = try self.register_manager.allocReg(inst, dst_rc); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + .{ .v_f128, .broadcast }, + dst_reg.to256(), + src_mcv.mem(.xword), + ) else { + const src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(scalar_ty, src_mcv); + try self.asmRegisterRegisterRegisterImmediate( + .{ .v_f128, .insert }, + dst_reg.to256(), + src_reg.to256(), + src_reg.to128(), + Immediate.u(1), + ); + } + break :result .{ .register = dst_reg }; + }, + else => {}, + }, + 16, 80 => {}, + else => unreachable, + }, + } + return self.fail("TODO implement airSplat for {}", .{ + vector_ty.fmt(self.bin_file.options.module.?), + }); + }; + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } fn airSelect(self: *Self, inst: Air.Inst.Index) !void { diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index b6b49e8939..073128b85e 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -270,10 +270,12 @@ pub const Mnemonic = enum { divps, divss, maxps, maxss, minps, minss, - movaps, movhlps, movss, movups, + movaps, movhlps, movlhps, + movss, movups, mulps, mulss, orps, pextrw, pinsrw, + shufps, sqrtps, sqrtss, subps, subss, ucomiss, @@ -296,6 +298,7 @@ pub const Mnemonic = enum { psrld, psrlq, psrlw, punpckhbw, punpckhdq, punpckhqdq, punpckhwd, punpcklbw, punpckldq, punpcklqdq, punpcklwd, + shufpd, sqrtpd, sqrtsd, subpd, subsd, ucomisd, @@ -303,17 +306,22 @@ pub const Mnemonic = enum { // SSE3 movddup, movshdup, movsldup, // SSE4.1 + extractps, + insertps, pextrb, pextrd, pextrq, pinsrb, pinsrd, pinsrq, roundpd, roundps, roundsd, roundss, // AVX vaddpd, vaddps, vaddsd, vaddss, + vbroadcastf128, vbroadcastsd, vbroadcastss, vcvtsd2ss, vcvtsi2sd, vcvtsi2ss, vcvtss2sd, vdivpd, vdivps, vdivsd, vdivss, + vextractf128, vextractps, + vinsertf128, vinsertps, vmaxpd, vmaxps, vmaxsd, vmaxss, vminpd, vminps, vminsd, vminss, vmovapd, vmovaps, - vmovddup, vmovhlps, + vmovddup, vmovhlps, vmovlhps, vmovsd, vmovshdup, vmovsldup, vmovss, @@ -326,6 +334,7 @@ pub const Mnemonic = enum { vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd, vpunpcklbw, vpunpckldq, vpunpcklqdq, vpunpcklwd, vroundpd, vroundps, vroundsd, vroundss, + vshufpd, vshufps, vsqrtpd, vsqrtps, vsqrtsd, vsqrtss, vsubpd, vsubps, vsubsd, vsubss, // F16C diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index c32e7fc974..c893429912 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -300,6 +300,8 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void { else .none, }, mnemonic: { + @setEvalBranchQuota(2_000); + comptime var max_len = 0; inline for (@typeInfo(Mnemonic).Enum.fields) |field| max_len = @max(field.name.len, max_len); var buf: [max_len]u8 = undefined; diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 0a7b5597b3..18c2903045 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -256,6 +256,8 @@ pub const Inst = struct { v_sd, /// VEX-Encoded ___ Packed Double-Precision Values v_pd, + /// VEX-Encoded ___ 128-Bits Of Floating-Point Data + v_f128, /// Mask ___ Byte k_b, @@ -454,6 +456,8 @@ pub const Inst = struct { mova, /// Move packed single-precision floating-point values high to low movhl, + /// Move packed single-precision floating-point values low to high + movlh, /// Move unaligned packed single-precision floating-point values /// Move unaligned packed double-precision floating-point values movu, @@ -488,6 +492,9 @@ pub const Inst = struct { cvtsi2sd, /// Convert scalar single-precision floating-point value to scalar double-precision floating-point value cvtss2sd, + /// Packed interleave shuffle of quadruplets of single-precision floating-point values + /// Packed interleave shuffle of pairs of double-precision floating-point values + shuf, /// Shuffle packed high words shufh, /// Shuffle packed low words @@ -520,12 +527,20 @@ pub const Inst = struct { /// Replicate single floating-point values movsldup, + /// Extract packed floating-point values + extract, + /// Insert scalar single-precision floating-point value + /// Insert packed floating-point values + insert, /// Round packed single-precision floating-point values /// Round scalar single-precision floating-point value /// Round packed double-precision floating-point values /// Round scalar double-precision floating-point value round, + /// Load with broadcast floating-point data + broadcast, + /// Convert 16-bit floating-point values to single-precision floating-point values cvtph2ps, /// Convert single-precision floating-point values to 16-bit floating-point values diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 2b9d530c1e..f56f31da7f 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -867,6 +867,8 @@ pub const table = [_]Entry{ .{ .movhlps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .none, .sse }, + .{ .movlhps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .none, .sse }, + .{ .movss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .none, .sse }, .{ .movss, .mr, &.{ .xmm_m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .none, .sse }, @@ -879,14 +881,16 @@ pub const table = [_]Entry{ .{ .orps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .none, .sse }, - .{ .subps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5c }, 0, .none, .sse }, - - .{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .none, .sse }, + .{ .shufps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc6 }, 0, .none, .sse }, .{ .sqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .none, .sse }, .{ .sqrtss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x51 }, 0, .none, .sse }, + .{ .subps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5c }, 0, .none, .sse }, + + .{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .none, .sse }, + .{ .ucomiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2e }, 0, .none, .sse }, .{ .xorps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .none, .sse }, @@ -967,6 +971,8 @@ pub const table = [_]Entry{ .{ .punpckldq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .none, .sse2 }, .{ .punpcklqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .none, .sse2 }, + .{ .shufpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc6 }, 0, .none, .sse2 }, + .{ .sqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .none, .sse2 }, .{ .sqrtsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .none, .sse2 }, @@ -990,6 +996,10 @@ pub const table = [_]Entry{ .{ .movsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .none, .sse3 }, // SSE4.1 + .{ .extractps, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x17 }, 0, .none, .sse4_1 }, + + .{ .insertps, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x21 }, 0, .none, .sse4_1 }, + .{ .pextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .none, .sse4_1 }, .{ .pextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .none, .sse4_1 }, .{ .pextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .long, .sse4_1 }, @@ -1019,6 +1029,11 @@ pub const table = [_]Entry{ .{ .vaddss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx }, + .{ .vbroadcastss, .rm, &.{ .xmm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx }, + .{ .vbroadcastss, .rm, &.{ .ymm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx }, + .{ .vbroadcastsd, .rm, &.{ .ymm, .m64 }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx }, + .{ .vbroadcastf128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x1a }, 0, .vex_256_w0, .avx }, + .{ .vcvtsd2ss, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx }, .{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx }, @@ -1039,6 +1054,14 @@ pub const table = [_]Entry{ .{ .vdivss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .vex_lig_wig, .avx }, + .{ .vextractf128, .mri, &.{ .xmm_m128, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x19 }, 0, .vex_256_w0, .avx }, + + .{ .vextractps, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x17 }, 0, .vex_128_wig, .avx }, + + .{ .vinsertf128, .rvmi, &.{ .ymm, .ymm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x18 }, 0, .vex_256_w0, .avx }, + + .{ .vinsertps, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x21 }, 0, .vex_128_wig, .avx }, + .{ .vmaxpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_128_wig, .avx }, .{ .vmaxpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_256_wig, .avx }, @@ -1074,6 +1097,8 @@ pub const table = [_]Entry{ .{ .vmovhlps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, + .{ .vmovlhps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .vex_128_wig, .avx }, + .{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, .{ .vmovsd, .rm, &.{ .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, .{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx }, @@ -1150,6 +1175,12 @@ pub const table = [_]Entry{ .{ .vroundss, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .vex_lig_wig, .avx }, + .{ .vshufpd, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc6 }, 0, .vex_128_wig, .avx }, + .{ .vshufpd, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0xc6 }, 0, .vex_256_wig, .avx }, + + .{ .vshufps, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc6 }, 0, .vex_128_wig, .avx }, + .{ .vshufps, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x0f, 0xc6 }, 0, .vex_256_wig, .avx }, + .{ .vsqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .vex_128_wig, .avx }, .{ .vsqrtpd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x51 }, 0, .vex_256_wig, .avx }, @@ -1201,6 +1232,10 @@ pub const table = [_]Entry{ .{ .vfmadd231ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_lig_w0, .fma }, // AVX2 + .{ .vbroadcastss, .rm, &.{ .xmm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx2 }, + .{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 }, + .{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 }, + .{ .vpsrlw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_256_wig, .avx2 }, .{ .vpsrlw, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_256_wig, .avx2 }, .{ .vpsrld, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_256_wig, .avx2 }, From 3681da25f865d499cffe923b7f0721cf759d3591 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Fri, 12 May 2023 01:16:52 -0400 Subject: [PATCH 08/24] x86_64: remove scratch data tags --- src/arch/x86_64/CodeGen.zig | 8 ++++---- src/arch/x86_64/Lower.zig | 32 ++++++++++++++++---------------- src/arch/x86_64/Mir.zig | 24 ++++++------------------ 3 files changed, 26 insertions(+), 38 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 29232b5284..4f5bf89989 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1079,9 +1079,9 @@ fn asmSetccRegister(self: *Self, reg: Register, cc: bits.Condition) !void { .fixes = Mir.Inst.Fixes.fromCondition(cc), .r1 = reg, } }, - .z_and_np, .nz_or_p => .{ .r_scratch = .{ + .z_and_np, .nz_or_p => .{ .rr = .{ .r1 = reg, - .scratch_reg = (try self.register_manager.allocReg(null, gp)).to8(), + .r2 = (try self.register_manager.allocReg(null, gp)).to8(), } }, }, }); @@ -1120,8 +1120,8 @@ fn asmSetccMemory(self: *Self, m: Memory, cc: bits.Condition) !void { .fixes = Mir.Inst.Fixes.fromCondition(cc), .payload = payload, } }, - .z_and_np, .nz_or_p => .{ .x_scratch = .{ - .scratch_reg = (try self.register_manager.allocReg(null, gp)).to8(), + .z_and_np, .nz_or_p => .{ .rx = .{ + .r1 = (try self.register_manager.allocReg(null, gp)).to8(), .payload = payload, } }, }, diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index c893429912..f6bce992e6 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -95,54 +95,54 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { }, .pseudo_set_z_and_np_r => { try lower.emit(.none, .setz, &.{ - .{ .reg = inst.data.r_scratch.r1 }, + .{ .reg = inst.data.rr.r1 }, }); try lower.emit(.none, .setnp, &.{ - .{ .reg = inst.data.r_scratch.scratch_reg }, + .{ .reg = inst.data.rr.r2 }, }); try lower.emit(.none, .@"and", &.{ - .{ .reg = inst.data.r_scratch.r1 }, - .{ .reg = inst.data.r_scratch.scratch_reg }, + .{ .reg = inst.data.rr.r1 }, + .{ .reg = inst.data.rr.r2 }, }); }, .pseudo_set_z_and_np_m_sib, .pseudo_set_z_and_np_m_rip, => { try lower.emit(.none, .setz, &.{ - .{ .mem = lower.mem(inst.ops, inst.data.x_scratch.payload) }, + .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, }); try lower.emit(.none, .setnp, &.{ - .{ .reg = inst.data.x_scratch.scratch_reg }, + .{ .reg = inst.data.rx.r1 }, }); try lower.emit(.none, .@"and", &.{ - .{ .mem = lower.mem(inst.ops, inst.data.x_scratch.payload) }, - .{ .reg = inst.data.x_scratch.scratch_reg }, + .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, + .{ .reg = inst.data.rx.r1 }, }); }, .pseudo_set_nz_or_p_r => { try lower.emit(.none, .setnz, &.{ - .{ .reg = inst.data.r_scratch.r1 }, + .{ .reg = inst.data.rr.r1 }, }); try lower.emit(.none, .setp, &.{ - .{ .reg = inst.data.r_scratch.scratch_reg }, + .{ .reg = inst.data.rr.r2 }, }); try lower.emit(.none, .@"or", &.{ - .{ .reg = inst.data.r_scratch.r1 }, - .{ .reg = inst.data.r_scratch.scratch_reg }, + .{ .reg = inst.data.rr.r1 }, + .{ .reg = inst.data.rr.r2 }, }); }, .pseudo_set_nz_or_p_m_sib, .pseudo_set_nz_or_p_m_rip, => { try lower.emit(.none, .setnz, &.{ - .{ .mem = lower.mem(inst.ops, inst.data.x_scratch.payload) }, + .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, }); try lower.emit(.none, .setp, &.{ - .{ .reg = inst.data.x_scratch.scratch_reg }, + .{ .reg = inst.data.rx.r1 }, }); try lower.emit(.none, .@"or", &.{ - .{ .mem = lower.mem(inst.ops, inst.data.x_scratch.payload) }, - .{ .reg = inst.data.x_scratch.scratch_reg }, + .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, + .{ .reg = inst.data.rx.r1 }, }); }, .pseudo_j_z_and_np_inst => { diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 18c2903045..919974e7d2 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -711,27 +711,27 @@ pub const Inst = struct { pseudo_cmov_nz_or_p_rm_rip, /// Set byte if zero flag set and parity flag not set /// Requires a scratch register! - /// Uses `r_scratch` payload. + /// Uses `rr` payload. pseudo_set_z_and_np_r, /// Set byte if zero flag set and parity flag not set /// Requires a scratch register! - /// Uses `x_scratch` payload. + /// Uses `rx` payload. pseudo_set_z_and_np_m_sib, /// Set byte if zero flag set and parity flag not set /// Requires a scratch register! - /// Uses `x_scratch` payload. + /// Uses `rx` payload. pseudo_set_z_and_np_m_rip, /// Set byte if zero flag not set or parity flag set /// Requires a scratch register! - /// Uses `r_scratch` payload. + /// Uses `rr` payload. pseudo_set_nz_or_p_r, /// Set byte if zero flag not set or parity flag set /// Requires a scratch register! - /// Uses `x_scratch` payload. + /// Uses `rx` payload. pseudo_set_nz_or_p_m_sib, /// Set byte if zero flag not set or parity flag set /// Requires a scratch register! - /// Uses `x_scratch` payload. + /// Uses `rx` payload. pseudo_set_nz_or_p_m_rip, /// Jump if zero flag set and parity flag not set /// Uses `inst` payload. @@ -836,18 +836,6 @@ pub const Inst = struct { i: u8, payload: u32, }, - /// Register, scratch register - r_scratch: struct { - fixes: Fixes = ._, - r1: Register, - scratch_reg: Register, - }, - /// Scratch register, followed by Custom payload found in extra. - x_scratch: struct { - fixes: Fixes = ._, - scratch_reg: Register, - payload: u32, - }, /// Custom payload found in extra. x: struct { fixes: Fixes = ._, From f83ebd8e6c95cf37d498936cae26d3a743cddc7f Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Fri, 12 May 2023 02:11:37 -0400 Subject: [PATCH 09/24] x86_64: implement stack probing --- src/arch/x86_64/CodeGen.zig | 63 ++++++++++++++++++++---- src/arch/x86_64/Lower.zig | 97 +++++++++++++++++++++++++++++++++++++ src/arch/x86_64/Mir.zig | 12 +++++ test/behavior/memset.zig | 2 - 4 files changed, 163 insertions(+), 11 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 4f5bf89989..523faa5cb2 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1550,7 +1550,9 @@ fn gen(self: *Self) InnerError!void { const backpatch_push_callee_preserved_regs = try self.asmPlaceholder(); try self.asmRegisterRegister(.{ ._, .mov }, .rbp, .rsp); const backpatch_frame_align = try self.asmPlaceholder(); + const backpatch_frame_align_extra = try self.asmPlaceholder(); const backpatch_stack_alloc = try self.asmPlaceholder(); + const backpatch_stack_alloc_extra = try self.asmPlaceholder(); switch (self.ret_mcv.long) { .none, .unreach => {}, @@ -1599,24 +1601,67 @@ fn gen(self: *Self) InnerError!void { const need_stack_adjust = frame_layout.stack_adjust > 0; const need_save_reg = frame_layout.save_reg_list.count() > 0; if (need_frame_align) { + const page_align = @as(u32, math.maxInt(u32)) << 12; self.mir_instructions.set(backpatch_frame_align, .{ .tag = .@"and", .ops = .ri_s, .data = .{ .ri = .{ .r1 = .rsp, - .i = frame_layout.stack_mask, + .i = @max(frame_layout.stack_mask, page_align), } }, }); + if (frame_layout.stack_mask < page_align) { + self.mir_instructions.set(backpatch_frame_align_extra, .{ + .tag = .pseudo, + .ops = .pseudo_probe_align_ri_s, + .data = .{ .ri = .{ + .r1 = .rsp, + .i = ~frame_layout.stack_mask & page_align, + } }, + }); + } } if (need_stack_adjust) { - self.mir_instructions.set(backpatch_stack_alloc, .{ - .tag = .sub, - .ops = .ri_s, - .data = .{ .ri = .{ - .r1 = .rsp, - .i = frame_layout.stack_adjust, - } }, - }); + const page_size: u32 = 1 << 12; + if (frame_layout.stack_adjust <= page_size) { + self.mir_instructions.set(backpatch_stack_alloc, .{ + .tag = .sub, + .ops = .ri_s, + .data = .{ .ri = .{ + .r1 = .rsp, + .i = frame_layout.stack_adjust, + } }, + }); + } else if (frame_layout.stack_adjust < + page_size * Lower.pseudo_probe_adjust_unrolled_max_insts) + { + self.mir_instructions.set(backpatch_stack_alloc, .{ + .tag = .pseudo, + .ops = .pseudo_probe_adjust_unrolled_ri_s, + .data = .{ .ri = .{ + .r1 = .rsp, + .i = frame_layout.stack_adjust, + } }, + }); + } else { + self.mir_instructions.set(backpatch_stack_alloc, .{ + .tag = .pseudo, + .ops = .pseudo_probe_adjust_setup_rri_s, + .data = .{ .rri = .{ + .r1 = .rsp, + .r2 = .rax, + .i = frame_layout.stack_adjust, + } }, + }); + self.mir_instructions.set(backpatch_stack_alloc_extra, .{ + .tag = .pseudo, + .ops = .pseudo_probe_adjust_loop_rr, + .data = .{ .rr = .{ + .r1 = .rsp, + .r2 = .rax, + } }, + }); + } } if (need_frame_align or need_stack_adjust) { self.mir_instructions.set(backpatch_stack_dealloc, .{ diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index f6bce992e6..65d2b64398 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -9,19 +9,33 @@ result_insts_len: u8 = undefined, result_relocs_len: u8 = undefined, result_insts: [ std.mem.max(usize, &.{ + 1, // non-pseudo instructions 2, // cmovcc: cmovcc \ cmovcc 3, // setcc: setcc \ setcc \ logicop 2, // jcc: jcc \ jcc + pseudo_probe_align_insts, + pseudo_probe_adjust_unrolled_max_insts, + pseudo_probe_adjust_setup_insts, + pseudo_probe_adjust_loop_insts, abi.Win64.callee_preserved_regs.len, // push_regs/pop_regs abi.SysV.callee_preserved_regs.len, // push_regs/pop_regs }) ]Instruction = undefined, result_relocs: [ std.mem.max(usize, &.{ + 1, // jmp/jcc/call/mov/lea: jmp/jcc/call/mov/lea 2, // jcc: jcc \ jcc + 2, // test \ jcc \ probe \ sub \ jmp + 1, // probe \ sub \ jcc }) ]Reloc = undefined, +pub const pseudo_probe_align_insts = 5; // test \ jcc \ probe \ sub \ jmp +pub const pseudo_probe_adjust_unrolled_max_insts = + pseudo_probe_adjust_setup_insts + pseudo_probe_adjust_loop_insts; +pub const pseudo_probe_adjust_setup_insts = 2; // mov \ sub +pub const pseudo_probe_adjust_loop_insts = 3; // probe \ sub \ jcc + pub const Error = error{ OutOfMemory, LowerFail, @@ -62,6 +76,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { else => try lower.generic(inst), .pseudo => switch (inst.ops) { .pseudo_cmov_z_and_np_rr => { + assert(inst.data.rr.fixes == ._); try lower.emit(.none, .cmovnz, &.{ .{ .reg = inst.data.rr.r2 }, .{ .reg = inst.data.rr.r1 }, @@ -72,6 +87,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { }); }, .pseudo_cmov_nz_or_p_rr => { + assert(inst.data.rr.fixes == ._); try lower.emit(.none, .cmovnz, &.{ .{ .reg = inst.data.rr.r1 }, .{ .reg = inst.data.rr.r2 }, @@ -84,6 +100,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .pseudo_cmov_nz_or_p_rm_sib, .pseudo_cmov_nz_or_p_rm_rip, => { + assert(inst.data.rx.fixes == ._); try lower.emit(.none, .cmovnz, &.{ .{ .reg = inst.data.rx.r1 }, .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, @@ -94,6 +111,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { }); }, .pseudo_set_z_and_np_r => { + assert(inst.data.rr.fixes == ._); try lower.emit(.none, .setz, &.{ .{ .reg = inst.data.rr.r1 }, }); @@ -108,6 +126,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .pseudo_set_z_and_np_m_sib, .pseudo_set_z_and_np_m_rip, => { + assert(inst.data.rx.fixes == ._); try lower.emit(.none, .setz, &.{ .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, }); @@ -120,6 +139,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { }); }, .pseudo_set_nz_or_p_r => { + assert(inst.data.rr.fixes == ._); try lower.emit(.none, .setnz, &.{ .{ .reg = inst.data.rr.r1 }, }); @@ -134,6 +154,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { .pseudo_set_nz_or_p_m_sib, .pseudo_set_nz_or_p_m_rip, => { + assert(inst.data.rx.fixes == ._); try lower.emit(.none, .setnz, &.{ .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, }); @@ -146,6 +167,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { }); }, .pseudo_j_z_and_np_inst => { + assert(inst.data.inst.fixes == ._); try lower.emit(.none, .jnz, &.{ .{ .imm = lower.reloc(.{ .inst = index + 1 }) }, }); @@ -154,6 +176,7 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { }); }, .pseudo_j_nz_or_p_inst => { + assert(inst.data.inst.fixes == ._); try lower.emit(.none, .jnz, &.{ .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }) }, }); @@ -162,6 +185,78 @@ pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { }); }, + .pseudo_probe_align_ri_s => { + try lower.emit(.none, .@"test", &.{ + .{ .reg = inst.data.ri.r1 }, + .{ .imm = Immediate.s(@bitCast(i32, inst.data.ri.i)) }, + }); + try lower.emit(.none, .jz, &.{ + .{ .imm = lower.reloc(.{ .inst = index + 1 }) }, + }); + try lower.emit(.none, .lea, &.{ + .{ .reg = inst.data.ri.r1 }, + .{ .mem = Memory.sib(.qword, .{ + .base = .{ .reg = inst.data.ri.r1 }, + .disp = -page_size, + }) }, + }); + try lower.emit(.none, .@"test", &.{ + .{ .mem = Memory.sib(.dword, .{ + .base = .{ .reg = inst.data.ri.r1 }, + }) }, + .{ .reg = inst.data.ri.r1.to32() }, + }); + try lower.emit(.none, .jmp, &.{ + .{ .imm = lower.reloc(.{ .inst = index }) }, + }); + assert(lower.result_insts_len == pseudo_probe_align_insts); + }, + .pseudo_probe_adjust_unrolled_ri_s => { + var offset = page_size; + while (offset < @bitCast(i32, inst.data.ri.i)) : (offset += page_size) { + try lower.emit(.none, .@"test", &.{ + .{ .mem = Memory.sib(.dword, .{ + .base = .{ .reg = inst.data.ri.r1 }, + .disp = -offset, + }) }, + .{ .reg = inst.data.ri.r1.to32() }, + }); + } + try lower.emit(.none, .sub, &.{ + .{ .reg = inst.data.ri.r1 }, + .{ .imm = Immediate.s(@bitCast(i32, inst.data.ri.i)) }, + }); + assert(lower.result_insts_len <= pseudo_probe_adjust_unrolled_max_insts); + }, + .pseudo_probe_adjust_setup_rri_s => { + try lower.emit(.none, .mov, &.{ + .{ .reg = inst.data.rri.r2.to32() }, + .{ .imm = Immediate.s(@bitCast(i32, inst.data.rri.i)) }, + }); + try lower.emit(.none, .sub, &.{ + .{ .reg = inst.data.rri.r1 }, + .{ .reg = inst.data.rri.r2 }, + }); + assert(lower.result_insts_len == pseudo_probe_adjust_setup_insts); + }, + .pseudo_probe_adjust_loop_rr => { + try lower.emit(.none, .@"test", &.{ + .{ .mem = Memory.sib(.dword, .{ + .base = .{ .reg = inst.data.rr.r1 }, + .scale_index = .{ .scale = 1, .index = inst.data.rr.r2 }, + .disp = -page_size, + }) }, + .{ .reg = inst.data.rr.r1.to32() }, + }); + try lower.emit(.none, .sub, &.{ + .{ .reg = inst.data.rr.r2 }, + .{ .imm = Immediate.s(page_size) }, + }); + try lower.emit(.none, .jae, &.{ + .{ .imm = lower.reloc(.{ .inst = index }) }, + }); + assert(lower.result_insts_len == pseudo_probe_adjust_loop_insts); + }, .pseudo_push_reg_list => try lower.pushPopRegList(.push, inst), .pseudo_pop_reg_list => try lower.pushPopRegList(.pop, inst), @@ -440,6 +535,8 @@ fn pushPopRegList(lower: *Lower, comptime mnemonic: Mnemonic, inst: Mir.Inst) Er }}); } +const page_size: i32 = 1 << 12; + const abi = @import("abi.zig"); const assert = std.debug.assert; const bits = @import("bits.zig"); diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 919974e7d2..f26bf97e82 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -740,6 +740,18 @@ pub const Inst = struct { /// Uses `inst` payload. pseudo_j_nz_or_p_inst, + /// Probe alignment + /// Uses `ri` payload + pseudo_probe_align_ri_s, + /// Probe adjust unrolled + /// Uses `ri` payload + pseudo_probe_adjust_unrolled_ri_s, + /// Probe adjust setup + /// Uses `rri` payload + pseudo_probe_adjust_setup_rri_s, + /// Probe adjust loop + /// Uses `rr` payload + pseudo_probe_adjust_loop_rr, /// Push registers /// Uses `reg_list` payload. pseudo_push_reg_list, diff --git a/test/behavior/memset.zig b/test/behavior/memset.zig index e111b5c523..be09ef655f 100644 --- a/test/behavior/memset.zig +++ b/test/behavior/memset.zig @@ -120,7 +120,6 @@ test "memset with large array element, runtime known" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.os.tag == .windows) return error.SkipZigTest; const A = [128]u64; var buf: [5]A = undefined; @@ -139,7 +138,6 @@ test "memset with large array element, comptime known" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.os.tag == .windows) return error.SkipZigTest; const A = [128]u64; var buf: [5]A = undefined; From 2cbd442a9df16ab2d13d03041631f516269c9f64 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sat, 13 May 2023 03:05:49 -0400 Subject: [PATCH 10/24] x86_64: implement integer vector movement --- src/arch/x86_64/CodeGen.zig | 372 ++++++++++++++++++++---------- test/behavior/maximum_minimum.zig | 1 - test/behavior/vector.zig | 3 - 3 files changed, 251 insertions(+), 125 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 523faa5cb2..c04bb1d2a5 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -8579,56 +8579,174 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { return self.finishAirResult(inst, result); } -fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.FixedTag { +const MoveStrategy = union(enum) { + move: Mir.Inst.FixedTag, + insert_extract: InsertExtract, + vex_insert_extract: InsertExtract, + + const InsertExtract = struct { + insert: Mir.Inst.FixedTag, + extract: Mir.Inst.FixedTag, + imm: Immediate, + }; +}; +fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy { switch (ty.zigTypeTag()) { - else => return .{ ._, .mov }, + else => return .{ .move = .{ ._, .mov } }, .Float => switch (ty.floatBits(self.target.*)) { - 16 => unreachable, // needs special handling - 32 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov }, - 64 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov }, - 128 => return if (self.hasFeature(.avx)) + 16 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ + .insert = .{ .vp_w, .insr }, + .extract = .{ .vp_w, .extr }, + .imm = Immediate.u(0), + } } else .{ .insert_extract = .{ + .insert = .{ .p_w, .insr }, + .extract = .{ .p_w, .extr }, + .imm = Immediate.u(0), + } }, + 32 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov } }, + 64 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov } }, + 128 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } - else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu }, + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, else => {}, }, .Vector => switch (ty.childType().zigTypeTag()) { - .Float => switch (ty.childType().floatBits(self.target.*)) { + .Int => switch (ty.childType().intInfo(self.target.*).bits) { + 8 => switch (ty.vectorLen()) { + 1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{ + .insert = .{ .vp_b, .insr }, + .extract = .{ .vp_b, .extr }, + .imm = Immediate.u(0), + } } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{ + .insert = .{ .p_b, .insr }, + .extract = .{ .p_b, .extr }, + .imm = Immediate.u(0), + } }, + 2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ + .insert = .{ .vp_w, .insr }, + .extract = .{ .vp_w, .extr }, + .imm = Immediate.u(0), + } } else .{ .insert_extract = .{ + .insert = .{ .p_w, .insr }, + .extract = .{ .p_w, .extr }, + .imm = Immediate.u(0), + } }, + 3...4 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_ss, .mov } + else + .{ ._ss, .mov } }, + 5...8 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_sd, .mov } + else + .{ ._sd, .mov } }, + else => {}, + }, 16 => switch (ty.vectorLen()) { - 1 => unreachable, // needs special handling - 2 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov }, - 3...4 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov }, - 5...8 => return if (self.hasFeature(.avx)) - if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } - else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu }, - 9...16 => if (self.hasFeature(.avx)) - return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }, + 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ + .insert = .{ .vp_w, .insr }, + .extract = .{ .vp_w, .extr }, + .imm = Immediate.u(0), + } } else .{ .insert_extract = .{ + .insert = .{ .p_w, .insr }, + .extract = .{ .p_w, .extr }, + .imm = Immediate.u(0), + } }, + 2 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_ss, .mov } + else + .{ ._ss, .mov } }, + 3...4 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_sd, .mov } + else + .{ ._sd, .mov } }, + 5...8 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_ps, .mov } + else + .{ ._ps, .mov } }, else => {}, }, 32 => switch (ty.vectorLen()) { - 1 => return if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov }, - 2 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov }, - 3...4 => return if (self.hasFeature(.avx)) + 1 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_ss, .mov } + else + .{ ._ss, .mov } }, + 2 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_sd, .mov } + else + .{ ._sd, .mov } }, + 3...4 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } - else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu }, + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, 5...8 => if (self.hasFeature(.avx)) - return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }, + return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } }, else => {}, }, 64 => switch (ty.vectorLen()) { - 1 => return if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov }, - 2 => return if (self.hasFeature(.avx)) + 1 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_sd, .mov } + else + .{ ._sd, .mov } }, + 2 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } - else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu }, + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, 3...4 => if (self.hasFeature(.avx)) - return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }, + return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } }, + else => {}, + }, + else => {}, + }, + .Float => switch (ty.childType().floatBits(self.target.*)) { + 16 => switch (ty.vectorLen()) { + 1 => {}, + 2 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_ss, .mov } + else + .{ ._ss, .mov } }, + 3...4 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_sd, .mov } + else + .{ ._sd, .mov } }, + 5...8 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, + 9...16 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } }, + else => {}, + }, + 32 => switch (ty.vectorLen()) { + 1 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_ss, .mov } + else + .{ ._ss, .mov } }, + 2 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_sd, .mov } + else + .{ ._sd, .mov } }, + 3...4 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, + 5...8 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } }, + else => {}, + }, + 64 => switch (ty.vectorLen()) { + 1 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_sd, .mov } + else + .{ ._sd, .mov } }, + 2 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, + 3...4 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } }, else => {}, }, 128 => switch (ty.vectorLen()) { - 1 => return if (self.hasFeature(.avx)) + 1 => return .{ .move = if (self.hasFeature(.avx)) if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } - else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu }, + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, 2 => if (self.hasFeature(.avx)) - return if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu }, + return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } }, else => {}, }, else => {}, @@ -8636,7 +8754,7 @@ fn movMirTag(self: *Self, ty: Type, aligned: bool) !Mir.Inst.FixedTag { else => {}, }, } - return self.fail("TODO movMirTag for {}", .{ty.fmt(self.bin_file.options.module.?)}); + return self.fail("TODO moveStrategy for {}", .{ty.fmt(self.bin_file.options.module.?)}); } fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void { @@ -8764,6 +8882,7 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr .load_frame, .lea_frame, => { + const dst_alias = registerAlias(dst_reg, abi_size); const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) { .register_offset, .indirect => |reg_off| .{ .base = .{ .reg = reg_off.reg }, @@ -8775,71 +8894,81 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr }, else => unreachable, }); - if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) - try self.asmRegisterMemoryImmediate( - .{ .p_w, .insr }, - registerAlias(dst_reg, abi_size), + switch (@as(MoveStrategy, switch (src_mcv) { + .register_offset => |reg_off| switch (reg_off.off) { + 0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }), + else => .{ .move = .{ ._, .lea } }, + }, + .indirect => try self.moveStrategy(ty, false), + .load_frame => |frame_addr| try self.moveStrategy( + ty, + self.getFrameAddrAlignment(frame_addr) >= ty.abiAlignment(self.target.*), + ), + .lea_frame => .{ .move = .{ ._, .lea } }, + else => unreachable, + })) { + .move => |tag| try self.asmRegisterMemory(tag, dst_alias, src_mem), + .insert_extract => |ie| try self.asmRegisterMemoryImmediate( + ie.insert, + dst_alias, src_mem, - Immediate.u(0), - ) - else - try self.asmRegisterMemory( - switch (src_mcv) { - .register_offset => |reg_off| switch (reg_off.off) { - 0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }), - else => .{ ._, .lea }, - }, - .indirect => try self.movMirTag(ty, false), - .load_frame => |frame_addr| try self.movMirTag( - ty, - self.getFrameAddrAlignment(frame_addr) >= ty.abiAlignment(self.target.*), - ), - .lea_frame => .{ ._, .lea }, - else => unreachable, - }, - registerAlias(dst_reg, abi_size), + ie.imm, + ), + .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate( + ie.insert, + dst_alias, + dst_alias, src_mem, - ); + ie.imm, + ), + } }, .memory, .load_direct, .load_got, .load_tlv => { switch (src_mcv) { .memory => |addr| if (math.cast(i32, @bitCast(i64, addr))) |small_addr| { + const dst_alias = registerAlias(dst_reg, abi_size); const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .{ .reg = .ds }, .disp = small_addr, }); - return if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) - self.asmRegisterMemoryImmediate( - .{ .p_w, .insr }, - registerAlias(dst_reg, abi_size), + switch (try self.moveStrategy(ty, mem.isAlignedGeneric( + u32, + @bitCast(u32, small_addr), + ty.abiAlignment(self.target.*), + ))) { + .move => |tag| try self.asmRegisterMemory(tag, dst_alias, src_mem), + .insert_extract => |ie| try self.asmRegisterMemoryImmediate( + ie.insert, + dst_alias, src_mem, - Immediate.u(0), - ) - else - self.asmRegisterMemory( - try self.movMirTag(ty, mem.isAlignedGeneric( - u32, - @bitCast(u32, small_addr), - ty.abiAlignment(self.target.*), - )), - registerAlias(dst_reg, abi_size), + ie.imm, + ), + .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate( + ie.insert, + dst_alias, + dst_alias, src_mem, - ); + ie.imm, + ), + } }, - .load_direct => |sym_index| if (!ty.isRuntimeFloat()) { - const atom_index = try self.owner.getSymbolIndex(self); - _ = try self.addInst(.{ - .tag = .mov, - .ops = .direct_reloc, - .data = .{ .rx = .{ - .r1 = dst_reg.to64(), - .payload = try self.addExtra(Mir.Reloc{ - .atom_index = atom_index, - .sym_index = sym_index, - }), - } }, - }); - return; + .load_direct => |sym_index| switch (ty.zigTypeTag()) { + else => { + const atom_index = try self.owner.getSymbolIndex(self); + _ = try self.addInst(.{ + .tag = .mov, + .ops = .direct_reloc, + .data = .{ .rx = .{ + .r1 = dst_reg.to64(), + .payload = try self.addExtra(Mir.Reloc{ + .atom_index = atom_index, + .sym_index = sym_index, + }), + } }, + }); + return; + }, + .Float, .Vector => {}, }, .load_got, .load_tlv => {}, else => unreachable, @@ -8849,22 +8978,26 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); defer self.register_manager.unlockReg(addr_lock); + const dst_alias = registerAlias(dst_reg, abi_size); const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .{ .reg = addr_reg }, }); - if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) - try self.asmRegisterMemoryImmediate( - .{ .p_w, .insr }, - registerAlias(dst_reg, abi_size), + switch (try self.moveStrategy(ty, false)) { + .move => |tag| try self.asmRegisterMemory(tag, dst_alias, src_mem), + .insert_extract => |ie| try self.asmRegisterMemoryImmediate( + ie.insert, + dst_alias, src_mem, - Immediate.u(0), - ) - else - try self.asmRegisterMemory( - try self.movMirTag(ty, false), - registerAlias(dst_reg, abi_size), + ie.imm, + ), + .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate( + ie.insert, + dst_alias, + dst_alias, src_mem, - ); + ie.imm, + ), + } }, .lea_direct, .lea_got => |sym_index| { const atom_index = try self.owner.getSymbolIndex(self); @@ -8966,36 +9099,33 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal Memory.PtrSize.fromSize(abi_size), .{ .base = base, .disp = disp }, ); - if (ty.isRuntimeFloat() and ty.floatBits(self.target.*) == 16) - try self.asmMemoryRegisterImmediate( - .{ .p_w, .extr }, + const src_alias = registerAlias(src_reg, abi_size); + switch (try self.moveStrategy(ty, switch (base) { + .none => mem.isAlignedGeneric( + u32, + @bitCast(u32, disp), + ty.abiAlignment(self.target.*), + ), + .reg => |reg| switch (reg) { + .es, .cs, .ss, .ds => mem.isAlignedGeneric( + u32, + @bitCast(u32, disp), + ty.abiAlignment(self.target.*), + ), + else => false, + }, + .frame => |frame_index| self.getFrameAddrAlignment( + .{ .index = frame_index, .off = disp }, + ) >= ty.abiAlignment(self.target.*), + })) { + .move => |tag| try self.asmMemoryRegister(tag, dst_mem, src_alias), + .insert_extract, .vex_insert_extract => |ie| try self.asmMemoryRegisterImmediate( + ie.extract, dst_mem, - src_reg.to128(), - Immediate.u(0), - ) - else - try self.asmMemoryRegister( - try self.movMirTag(ty, switch (base) { - .none => mem.isAlignedGeneric( - u32, - @bitCast(u32, disp), - ty.abiAlignment(self.target.*), - ), - .reg => |reg| switch (reg) { - .es, .cs, .ss, .ds => mem.isAlignedGeneric( - u32, - @bitCast(u32, disp), - ty.abiAlignment(self.target.*), - ), - else => false, - }, - .frame => |frame_index| self.getFrameAddrAlignment( - .{ .index = frame_index, .off = disp }, - ) >= ty.abiAlignment(self.target.*), - }), - dst_mem, - registerAlias(src_reg, abi_size), - ); + src_alias, + ie.imm, + ), + } }, .register_overflow => |ro| { try self.genSetMem( diff --git a/test/behavior/maximum_minimum.zig b/test/behavior/maximum_minimum.zig index 6496e00afd..b4d2160713 100644 --- a/test/behavior/maximum_minimum.zig +++ b/test/behavior/maximum_minimum.zig @@ -146,7 +146,6 @@ test "@min/@max more than two arguments" { test "@min/@max more than two vector arguments" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index 537879b5c9..87ccdfb567 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -1129,7 +1129,6 @@ test "loading the second vector from a slice of vectors" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO @setRuntimeSafety(false); var small_bases = [2]@Vector(2, u8){ @@ -1219,7 +1218,6 @@ test "zero multiplicand" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO const zeros = @Vector(2, u32){ 0.0, 0.0 }; var ones = @Vector(2, u32){ 1.0, 1.0 }; @@ -1324,7 +1322,6 @@ test "store to vector in slice" { test "addition of vectors represented as strings" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; const V = @Vector(3, u8); From 57c38f6433c8024d1946bcf1b5b7d0892fc751a7 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sat, 13 May 2023 02:24:41 -0400 Subject: [PATCH 11/24] x86_64: implement global payload pointers --- src/arch/x86_64/CodeGen.zig | 7 +++++-- src/codegen.zig | 34 ++++++++++++++++++++++++++++++---- test/behavior/optional.zig | 2 -- 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index c04bb1d2a5..4aa2443295 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -3467,14 +3467,17 @@ fn airOptionalPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); } - const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + const dst_mcv: MCValue = if (src_mcv.isRegister() and + self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv + else if (self.liveness.isUnused(inst)) + .{ .register = try self.copyToTmpRegister(dst_ty, src_mcv) } else try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); const pl_ty = dst_ty.childType(); const pl_abi_size = @intCast(i32, pl_ty.abiSize(self.target.*)); - try self.genSetMem(.{ .reg = dst_mcv.register }, pl_abi_size, Type.bool, .{ .immediate = 1 }); + try self.genSetMem(.{ .reg = dst_mcv.getReg().? }, pl_abi_size, Type.bool, .{ .immediate = 1 }); break :result if (self.liveness.isUnused(inst)) .unreach else dst_mcv; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); diff --git a/src/codegen.zig b/src/codegen.zig index 7f65df2804..7a22d0b218 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -380,7 +380,7 @@ pub fn generateSymbol( return Result.ok; }, - .field_ptr, .elem_ptr => return lowerParentPtr( + .field_ptr, .elem_ptr, .opt_payload_ptr => return lowerParentPtr( bin_file, src_loc, typed_value, @@ -812,7 +812,6 @@ fn lowerParentPtr( reloc_info: RelocInfo, ) CodeGenError!Result { const target = bin_file.options.target; - switch (parent_ptr.tag()) { .field_ptr => { const field_ptr = parent_ptr.castTag(.field_ptr).?.data; @@ -858,6 +857,31 @@ fn lowerParentPtr( reloc_info.offset(@intCast(u32, elem_ptr.index * elem_ptr.elem_ty.abiSize(target))), ); }, + .opt_payload_ptr => { + const opt_payload_ptr = parent_ptr.castTag(.opt_payload_ptr).?.data; + return lowerParentPtr( + bin_file, + src_loc, + typed_value, + opt_payload_ptr.container_ptr, + code, + debug_output, + reloc_info, + ); + }, + .eu_payload_ptr => { + const eu_payload_ptr = parent_ptr.castTag(.eu_payload_ptr).?.data; + const pl_ty = eu_payload_ptr.container_ty.errorUnionPayload(); + return lowerParentPtr( + bin_file, + src_loc, + typed_value, + eu_payload_ptr.container_ptr, + code, + debug_output, + reloc_info.offset(@intCast(u32, errUnionPayloadOffset(pl_ty, target))), + ); + }, .variable, .decl_ref, .decl_ref_mut => |tag| return lowerDeclRef( bin_file, src_loc, @@ -1262,9 +1286,10 @@ pub fn genTypedValue( } pub fn errUnionPayloadOffset(payload_ty: Type, target: std.Target) u64 { + if (!payload_ty.hasRuntimeBitsIgnoreComptime()) return 0; const payload_align = payload_ty.abiAlignment(target); const error_align = Type.anyerror.abiAlignment(target); - if (payload_align >= error_align) { + if (payload_align >= error_align or !payload_ty.hasRuntimeBitsIgnoreComptime()) { return 0; } else { return mem.alignForwardGeneric(u64, Type.anyerror.abiSize(target), payload_align); @@ -1272,9 +1297,10 @@ pub fn errUnionPayloadOffset(payload_ty: Type, target: std.Target) u64 { } pub fn errUnionErrorOffset(payload_ty: Type, target: std.Target) u64 { + if (!payload_ty.hasRuntimeBitsIgnoreComptime()) return 0; const payload_align = payload_ty.abiAlignment(target); const error_align = Type.anyerror.abiAlignment(target); - if (payload_align >= error_align) { + if (payload_align >= error_align and payload_ty.hasRuntimeBitsIgnoreComptime()) { return mem.alignForwardGeneric(u64, payload_ty.abiSize(target), error_align); } else { return 0; diff --git a/test/behavior/optional.zig b/test/behavior/optional.zig index 34d8337608..e62065cf25 100644 --- a/test/behavior/optional.zig +++ b/test/behavior/optional.zig @@ -74,7 +74,6 @@ test "optional with void type" { test "address of unwrap optional" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; @@ -365,7 +364,6 @@ test "optional pointer to zero bit optional payload" { } test "optional pointer to zero bit error union payload" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO From 904ffb41de9caa3f8f99806518d719beef832b7c Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sat, 13 May 2023 02:51:46 -0400 Subject: [PATCH 12/24] x86_64: implement calling function references --- src/arch/x86_64/CodeGen.zig | 18 +++++++++++------- test/behavior/bugs/1277.zig | 1 - test/behavior/bugs/12801-2.zig | 1 - test/behavior/fn.zig | 1 - 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 4aa2443295..e4f28e34cf 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -7378,11 +7378,15 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier // on linking. const mod = self.bin_file.options.module.?; if (self.air.value(callee)) |func_value| { - if (func_value.castTag(.function)) |func_payload| { - const func = func_payload.data; - + if (if (func_value.castTag(.function)) |func_payload| + func_payload.data.owner_decl + else if (func_value.castTag(.decl_ref)) |decl_ref_payload| + decl_ref_payload.data + else + null) |owner_decl| + { if (self.bin_file.cast(link.File.Elf)) |elf_file| { - const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl); + const atom_index = try elf_file.getOrCreateAtomForDecl(owner_decl); const atom = elf_file.getAtom(atom_index); _ = try atom.getOrCreateOffsetTableEntry(elf_file); const got_addr = atom.getOffsetTableAddress(elf_file); @@ -7391,17 +7395,17 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier .disp = @intCast(i32, got_addr), })); } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { - const atom = try coff_file.getOrCreateAtomForDecl(func.owner_decl); + const atom = try coff_file.getOrCreateAtomForDecl(owner_decl); const sym_index = coff_file.getAtom(atom).getSymbolIndex().?; try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index }); try self.asmRegister(.{ ._, .call }, .rax); } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { - const atom = try macho_file.getOrCreateAtomForDecl(func.owner_decl); + const atom = try macho_file.getOrCreateAtomForDecl(owner_decl); const sym_index = macho_file.getAtom(atom).getSymbolIndex().?; try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index }); try self.asmRegister(.{ ._, .call }, .rax); } else if (self.bin_file.cast(link.File.Plan9)) |p9| { - const decl_block_index = try p9.seeDecl(func.owner_decl); + const decl_block_index = try p9.seeDecl(owner_decl); const decl_block = p9.getDeclBlock(decl_block_index); const ptr_bits = self.target.cpu.arch.ptrBitWidth(); const ptr_bytes: u64 = @divExact(ptr_bits, 8); diff --git a/test/behavior/bugs/1277.zig b/test/behavior/bugs/1277.zig index 64f8430cdb..d5312a378e 100644 --- a/test/behavior/bugs/1277.zig +++ b/test/behavior/bugs/1277.zig @@ -14,7 +14,6 @@ fn f() i32 { test "don't emit an LLVM global for a const function when it's in an optional in a struct" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; diff --git a/test/behavior/bugs/12801-2.zig b/test/behavior/bugs/12801-2.zig index e6243487b0..6b145e9925 100644 --- a/test/behavior/bugs/12801-2.zig +++ b/test/behavior/bugs/12801-2.zig @@ -16,7 +16,6 @@ const Auto = struct { test { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; diff --git a/test/behavior/fn.zig b/test/behavior/fn.zig index 53f964c70a..c84eb48d2e 100644 --- a/test/behavior/fn.zig +++ b/test/behavior/fn.zig @@ -502,7 +502,6 @@ test "method call with optional pointer first param" { } test "using @ptrCast on function pointers" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO From 72b4657053f94222c735b7c51c380649ce23c30e Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sat, 13 May 2023 03:42:20 -0400 Subject: [PATCH 13/24] Dwarf: fix overflow write byte_size --- src/link/Dwarf.zig | 94 ++++++++++++------------ test/behavior/int_comparison_elision.zig | 1 - 2 files changed, 47 insertions(+), 48 deletions(-) diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig index 2ec0dedc6f..c134f60316 100644 --- a/src/link/Dwarf.zig +++ b/src/link/Dwarf.zig @@ -184,12 +184,14 @@ pub const DeclState = struct { try dbg_info_buffer.append(@enumToInt(AbbrevKind.pad1)); }, .Bool => { - try dbg_info_buffer.appendSlice(&[_]u8{ - @enumToInt(AbbrevKind.base_type), - DW.ATE.boolean, // DW.AT.encoding , DW.FORM.data1 - 1, // DW.AT.byte_size, DW.FORM.data1 - 'b', 'o', 'o', 'l', 0, // DW.AT.name, DW.FORM.string - }); + try dbg_info_buffer.ensureUnusedCapacity(12); + dbg_info_buffer.appendAssumeCapacity(@enumToInt(AbbrevKind.base_type)); + // DW.AT.encoding, DW.FORM.data1 + dbg_info_buffer.appendAssumeCapacity(DW.ATE.boolean); + // DW.AT.byte_size, DW.FORM.udata + try leb128.writeULEB128(dbg_info_buffer.writer(), ty.abiSize(target)); + // DW.AT.name, DW.FORM.string + try dbg_info_buffer.writer().print("{}\x00", .{ty.fmt(module)}); }, .Int => { const info = ty.intInfo(target); @@ -200,9 +202,9 @@ pub const DeclState = struct { .signed => DW.ATE.signed, .unsigned => DW.ATE.unsigned, }); - // DW.AT.byte_size, DW.FORM.data1 - dbg_info_buffer.appendAssumeCapacity(@intCast(u8, ty.abiSize(target))); - // DW.AT.name, DW.FORM.string + // DW.AT.byte_size, DW.FORM.udata + try leb128.writeULEB128(dbg_info_buffer.writer(), ty.abiSize(target)); + // DW.AT.name, DW.FORM.string try dbg_info_buffer.writer().print("{}\x00", .{ty.fmt(module)}); }, .Optional => { @@ -211,9 +213,9 @@ pub const DeclState = struct { dbg_info_buffer.appendAssumeCapacity(@enumToInt(AbbrevKind.base_type)); // DW.AT.encoding, DW.FORM.data1 dbg_info_buffer.appendAssumeCapacity(DW.ATE.address); - // DW.AT.byte_size, DW.FORM.data1 - dbg_info_buffer.appendAssumeCapacity(@intCast(u8, ty.abiSize(target))); - // DW.AT.name, DW.FORM.string + // DW.AT.byte_size, DW.FORM.udata + try leb128.writeULEB128(dbg_info_buffer.writer(), ty.abiSize(target)); + // DW.AT.name, DW.FORM.string try dbg_info_buffer.writer().print("{}\x00", .{ty.fmt(module)}); } else { // Non-pointer optionals are structs: struct { .maybe = *, .val = * } @@ -221,7 +223,7 @@ pub const DeclState = struct { const payload_ty = ty.optionalChild(buf); // DW.AT.structure_type try dbg_info_buffer.append(@enumToInt(AbbrevKind.struct_type)); - // DW.AT.byte_size, DW.FORM.sdata + // DW.AT.byte_size, DW.FORM.udata const abi_size = ty.abiSize(target); try leb128.writeULEB128(dbg_info_buffer.writer(), abi_size); // DW.AT.name, DW.FORM.string @@ -236,7 +238,7 @@ pub const DeclState = struct { var index = dbg_info_buffer.items.len; try dbg_info_buffer.resize(index + 4); try self.addTypeRelocGlobal(atom_index, Type.bool, @intCast(u32, index)); - // DW.AT.data_member_location, DW.FORM.sdata + // DW.AT.data_member_location, DW.FORM.udata try dbg_info_buffer.ensureUnusedCapacity(6); dbg_info_buffer.appendAssumeCapacity(0); // DW.AT.member @@ -248,7 +250,7 @@ pub const DeclState = struct { index = dbg_info_buffer.items.len; try dbg_info_buffer.resize(index + 4); try self.addTypeRelocGlobal(atom_index, payload_ty, @intCast(u32, index)); - // DW.AT.data_member_location, DW.FORM.sdata + // DW.AT.data_member_location, DW.FORM.udata const offset = abi_size - payload_ty.abiSize(target); try leb128.writeULEB128(dbg_info_buffer.writer(), offset); // DW.AT.structure_type delimit children @@ -263,8 +265,8 @@ pub const DeclState = struct { // DW.AT.structure_type try dbg_info_buffer.ensureUnusedCapacity(2); dbg_info_buffer.appendAssumeCapacity(@enumToInt(AbbrevKind.struct_type)); - // DW.AT.byte_size, DW.FORM.sdata - dbg_info_buffer.appendAssumeCapacity(ptr_bytes * 2); + // DW.AT.byte_size, DW.FORM.udata + try leb128.writeULEB128(dbg_info_buffer.writer(), ty.abiSize(target)); // DW.AT.name, DW.FORM.string try dbg_info_buffer.writer().print("{}\x00", .{ty.fmt(module)}); // DW.AT.member @@ -279,7 +281,7 @@ pub const DeclState = struct { var buf = try arena.create(Type.SlicePtrFieldTypeBuffer); const ptr_ty = ty.slicePtrFieldType(buf); try self.addTypeRelocGlobal(atom_index, ptr_ty, @intCast(u32, index)); - // DW.AT.data_member_location, DW.FORM.sdata + // DW.AT.data_member_location, DW.FORM.udata try dbg_info_buffer.ensureUnusedCapacity(6); dbg_info_buffer.appendAssumeCapacity(0); // DW.AT.member @@ -291,7 +293,7 @@ pub const DeclState = struct { index = dbg_info_buffer.items.len; try dbg_info_buffer.resize(index + 4); try self.addTypeRelocGlobal(atom_index, Type.usize, @intCast(u32, index)); - // DW.AT.data_member_location, DW.FORM.sdata + // DW.AT.data_member_location, DW.FORM.udata try dbg_info_buffer.ensureUnusedCapacity(2); dbg_info_buffer.appendAssumeCapacity(ptr_bytes); // DW.AT.structure_type delimit children @@ -329,9 +331,8 @@ pub const DeclState = struct { .Struct => blk: { // DW.AT.structure_type try dbg_info_buffer.append(@enumToInt(AbbrevKind.struct_type)); - // DW.AT.byte_size, DW.FORM.sdata - const abi_size = ty.abiSize(target); - try leb128.writeULEB128(dbg_info_buffer.writer(), abi_size); + // DW.AT.byte_size, DW.FORM.udata + try leb128.writeULEB128(dbg_info_buffer.writer(), ty.abiSize(target)); switch (ty.tag()) { .tuple, .anon_struct => { @@ -348,7 +349,7 @@ pub const DeclState = struct { var index = dbg_info_buffer.items.len; try dbg_info_buffer.resize(index + 4); try self.addTypeRelocGlobal(atom_index, field, @intCast(u32, index)); - // DW.AT.data_member_location, DW.FORM.sdata + // DW.AT.data_member_location, DW.FORM.udata const field_off = ty.structFieldOffset(field_index, target); try leb128.writeULEB128(dbg_info_buffer.writer(), field_off); } @@ -380,7 +381,7 @@ pub const DeclState = struct { var index = dbg_info_buffer.items.len; try dbg_info_buffer.resize(index + 4); try self.addTypeRelocGlobal(atom_index, field.ty, @intCast(u32, index)); - // DW.AT.data_member_location, DW.FORM.sdata + // DW.AT.data_member_location, DW.FORM.udata const field_off = ty.structFieldOffset(field_index, target); try leb128.writeULEB128(dbg_info_buffer.writer(), field_off); } @@ -393,9 +394,8 @@ pub const DeclState = struct { .Enum => { // DW.AT.enumeration_type try dbg_info_buffer.append(@enumToInt(AbbrevKind.enum_type)); - // DW.AT.byte_size, DW.FORM.sdata - const abi_size = ty.abiSize(target); - try leb128.writeULEB128(dbg_info_buffer.writer(), abi_size); + // DW.AT.byte_size, DW.FORM.udata + try leb128.writeULEB128(dbg_info_buffer.writer(), ty.abiSize(target)); // DW.AT.name, DW.FORM.string const enum_name = try ty.nameAllocArena(arena, module); try dbg_info_buffer.ensureUnusedCapacity(enum_name.len + 1); @@ -446,7 +446,7 @@ pub const DeclState = struct { if (is_tagged) { // DW.AT.structure_type try dbg_info_buffer.append(@enumToInt(AbbrevKind.struct_type)); - // DW.AT.byte_size, DW.FORM.sdata + // DW.AT.byte_size, DW.FORM.udata try leb128.writeULEB128(dbg_info_buffer.writer(), layout.abi_size); // DW.AT.name, DW.FORM.string try dbg_info_buffer.ensureUnusedCapacity(union_name.len + 1); @@ -463,13 +463,13 @@ pub const DeclState = struct { const inner_union_index = dbg_info_buffer.items.len; try dbg_info_buffer.resize(inner_union_index + 4); try self.addTypeRelocLocal(atom_index, @intCast(u32, inner_union_index), 5); - // DW.AT.data_member_location, DW.FORM.sdata + // DW.AT.data_member_location, DW.FORM.udata try leb128.writeULEB128(dbg_info_buffer.writer(), payload_offset); } // DW.AT.union_type try dbg_info_buffer.append(@enumToInt(AbbrevKind.union_type)); - // DW.AT.byte_size, DW.FORM.sdata, + // DW.AT.byte_size, DW.FORM.udata, try leb128.writeULEB128(dbg_info_buffer.writer(), layout.payload_size); // DW.AT.name, DW.FORM.string if (is_tagged) { @@ -490,7 +490,7 @@ pub const DeclState = struct { const index = dbg_info_buffer.items.len; try dbg_info_buffer.resize(index + 4); try self.addTypeRelocGlobal(atom_index, field.ty, @intCast(u32, index)); - // DW.AT.data_member_location, DW.FORM.sdata + // DW.AT.data_member_location, DW.FORM.udata try dbg_info_buffer.append(0); } // DW.AT.union_type delimit children @@ -507,7 +507,7 @@ pub const DeclState = struct { const index = dbg_info_buffer.items.len; try dbg_info_buffer.resize(index + 4); try self.addTypeRelocGlobal(atom_index, union_obj.tag_ty, @intCast(u32, index)); - // DW.AT.data_member_location, DW.FORM.sdata + // DW.AT.data_member_location, DW.FORM.udata try leb128.writeULEB128(dbg_info_buffer.writer(), tag_offset); // DW.AT.structure_type delimit children @@ -534,7 +534,7 @@ pub const DeclState = struct { // DW.AT.structure_type try dbg_info_buffer.append(@enumToInt(AbbrevKind.struct_type)); - // DW.AT.byte_size, DW.FORM.sdata + // DW.AT.byte_size, DW.FORM.udata try leb128.writeULEB128(dbg_info_buffer.writer(), abi_size); // DW.AT.name, DW.FORM.string const name = try ty.nameAllocArena(arena, module); @@ -551,7 +551,7 @@ pub const DeclState = struct { const index = dbg_info_buffer.items.len; try dbg_info_buffer.resize(index + 4); try self.addTypeRelocGlobal(atom_index, payload_ty, @intCast(u32, index)); - // DW.AT.data_member_location, DW.FORM.sdata + // DW.AT.data_member_location, DW.FORM.udata try leb128.writeULEB128(dbg_info_buffer.writer(), payload_off); } @@ -566,7 +566,7 @@ pub const DeclState = struct { const index = dbg_info_buffer.items.len; try dbg_info_buffer.resize(index + 4); try self.addTypeRelocGlobal(atom_index, error_ty, @intCast(u32, index)); - // DW.AT.data_member_location, DW.FORM.sdata + // DW.AT.data_member_location, DW.FORM.udata try leb128.writeULEB128(dbg_info_buffer.writer(), error_off); } @@ -647,8 +647,8 @@ pub const DeclState = struct { try dbg_info.ensureUnusedCapacity(5 + name_with_null.len); const index = dbg_info.items.len; - try dbg_info.resize(index + 4); // dw.at.type, dw.form.ref4 - try self.addTypeRelocGlobal(atom_index, ty, @intCast(u32, index)); // DW.AT.type, DW.FORM.ref4 + try dbg_info.resize(index + 4); // dw.at.type, dw.form.ref4 + try self.addTypeRelocGlobal(atom_index, ty, @intCast(u32, index)); // DW.AT.type, DW.FORM.ref4 dbg_info.appendSliceAssumeCapacity(name_with_null); // DW.AT.name, DW.FORM.string } @@ -790,7 +790,7 @@ pub const DeclState = struct { try dbg_info.ensureUnusedCapacity(5 + name_with_null.len); const index = dbg_info.items.len; - try dbg_info.resize(index + 4); // dw.at.type, dw.form.ref4 + try dbg_info.resize(index + 4); // dw.at.type, dw.form.ref4 try self.addTypeRelocGlobal(atom_index, child_ty, @intCast(u32, index)); dbg_info.appendSliceAssumeCapacity(name_with_null); // DW.AT.name, DW.FORM.string } @@ -993,13 +993,13 @@ pub fn initDeclState(self: *Dwarf, mod: *Module, decl_index: Module.Decl.Index) // "relocations" and have to be in this fixed place so that functions can be // moved in virtual address space. assert(dbg_info_low_pc_reloc_index == dbg_info_buffer.items.len); - dbg_info_buffer.items.len += ptr_width_bytes; // DW.AT.low_pc, DW.FORM.addr + dbg_info_buffer.items.len += ptr_width_bytes; // DW.AT.low_pc, DW.FORM.addr assert(self.getRelocDbgInfoSubprogramHighPC() == dbg_info_buffer.items.len); - dbg_info_buffer.items.len += 4; // DW.AT.high_pc, DW.FORM.data4 + dbg_info_buffer.items.len += 4; // DW.AT.high_pc, DW.FORM.data4 // if (fn_ret_has_bits) { try decl_state.addTypeRelocGlobal(di_atom_index, fn_ret_type, @intCast(u32, dbg_info_buffer.items.len)); - dbg_info_buffer.items.len += 4; // DW.AT.type, DW.FORM.ref4 + dbg_info_buffer.items.len += 4; // DW.AT.type, DW.FORM.ref4 } dbg_info_buffer.appendSliceAssumeCapacity(decl_name_with_null); // DW.AT.name, DW.FORM.string @@ -1619,7 +1619,7 @@ pub fn writeDbgAbbrev(self: *Dwarf) !void { DW.AT.encoding, DW.FORM.data1, DW.AT.byte_size, - DW.FORM.data1, + DW.FORM.udata, DW.AT.name, DW.FORM.string, 0, @@ -1635,7 +1635,7 @@ pub fn writeDbgAbbrev(self: *Dwarf) !void { DW.TAG.structure_type, DW.CHILDREN.yes, // header DW.AT.byte_size, - DW.FORM.sdata, + DW.FORM.udata, DW.AT.name, DW.FORM.string, 0, @@ -1648,14 +1648,14 @@ pub fn writeDbgAbbrev(self: *Dwarf) !void { DW.AT.type, DW.FORM.ref4, DW.AT.data_member_location, - DW.FORM.sdata, + DW.FORM.udata, 0, 0, // table sentinel @enumToInt(AbbrevKind.enum_type), DW.TAG.enumeration_type, DW.CHILDREN.yes, // header DW.AT.byte_size, - DW.FORM.sdata, + DW.FORM.udata, DW.AT.name, DW.FORM.string, 0, @@ -1673,7 +1673,7 @@ pub fn writeDbgAbbrev(self: *Dwarf) !void { DW.TAG.union_type, DW.CHILDREN.yes, // header DW.AT.byte_size, - DW.FORM.sdata, + DW.FORM.udata, DW.AT.name, DW.FORM.string, 0, @@ -2628,7 +2628,7 @@ fn addDbgInfoErrorSet( // DW.AT.enumeration_type try dbg_info_buffer.append(@enumToInt(AbbrevKind.enum_type)); - // DW.AT.byte_size, DW.FORM.sdata + // DW.AT.byte_size, DW.FORM.udata const abi_size = Type.anyerror.abiSize(target); try leb128.writeULEB128(dbg_info_buffer.writer(), abi_size); // DW.AT.name, DW.FORM.string diff --git a/test/behavior/int_comparison_elision.zig b/test/behavior/int_comparison_elision.zig index ecaa176ebb..32c670fff4 100644 --- a/test/behavior/int_comparison_elision.zig +++ b/test/behavior/int_comparison_elision.zig @@ -15,7 +15,6 @@ test "int comparison elision" { // TODO: support int types > 128 bits wide in other backends if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO From e08eab664861461b0adbe7984881f72b5a36a979 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sat, 13 May 2023 14:06:26 -0400 Subject: [PATCH 14/24] x86_64: add missing encoding feature requirements --- src/arch/x86_64/Encoding.zig | 3 +++ src/arch/x86_64/encodings.zig | 18 +++++++++--------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 073128b85e..537a03fa2a 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -691,8 +691,11 @@ pub const Feature = enum { none, avx, avx2, + bmi, f16c, fma, + lzcnt, + popcnt, sse, sse2, sse3, diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index f56f31da7f..a7a50867c3 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -354,9 +354,9 @@ pub const table = [_]Entry{ .{ .lodsd, .np, &.{}, &.{ 0xad }, 0, .none, .none }, .{ .lodsq, .np, &.{}, &.{ 0xad }, 0, .long, .none }, - .{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .short, .none }, - .{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .none }, - .{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long, .none }, + .{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .short, .lzcnt }, + .{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .lzcnt }, + .{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long, .lzcnt }, .{ .mfence, .np, &.{}, &.{ 0x0f, 0xae, 0xf0 }, 0, .none, .none }, @@ -482,9 +482,9 @@ pub const table = [_]Entry{ .{ .pop, .m, &.{ .rm16 }, &.{ 0x8f }, 0, .short, .none }, .{ .pop, .m, &.{ .rm64 }, &.{ 0x8f }, 0, .none, .none }, - .{ .popcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .short, .none }, - .{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .none }, - .{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long, .none }, + .{ .popcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .short, .popcnt }, + .{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .popcnt }, + .{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long, .popcnt }, .{ .push, .o, &.{ .r16 }, &.{ 0x50 }, 0, .short, .none }, .{ .push, .o, &.{ .r64 }, &.{ 0x50 }, 0, .none, .none }, @@ -784,9 +784,9 @@ pub const table = [_]Entry{ .{ .@"test", .mr, &.{ .rm32, .r32 }, &.{ 0x85 }, 0, .none, .none }, .{ .@"test", .mr, &.{ .rm64, .r64 }, &.{ 0x85 }, 0, .long, .none }, - .{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .short, .none }, - .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .none }, - .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long, .none }, + .{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .short, .bmi }, + .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .bmi }, + .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long, .bmi }, .{ .ud2, .np, &.{}, &.{ 0x0f, 0x0b }, 0, .none, .none }, From b6d61028508c5b1e1961a124bc17d4d9bda9686f Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sat, 13 May 2023 18:06:16 -0400 Subject: [PATCH 15/24] x86_64: reimplement `@floatToInt` --- src/arch/x86_64/CodeGen.zig | 181 +++++++++++++++++----------------- src/arch/x86_64/Encoding.zig | 163 ++++++++++++++++-------------- src/arch/x86_64/Mir.zig | 46 +++++++-- src/arch/x86_64/bits.zig | 83 +++++++--------- src/arch/x86_64/encodings.zig | 111 +++++++++++++++++---- src/link/Dwarf.zig | 92 ++++++++++++----- test/behavior/cast.zig | 1 - 7 files changed, 422 insertions(+), 255 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index e4f28e34cf..e5c6925596 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2501,12 +2501,12 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { } } else if (src_bits == 64 and dst_bits == 32) { if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( - .{ .v_, .cvtsd2ss }, + .{ .v_ss, .cvtsd2 }, dst_reg, dst_reg, src_mcv.mem(.qword), ) else try self.asmRegisterRegisterRegister( - .{ .v_, .cvtsd2ss }, + .{ .v_ss, .cvtsd2 }, dst_reg, dst_reg, (if (src_mcv.isRegister()) @@ -2514,11 +2514,11 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { else try self.copyToTmpRegister(src_ty, src_mcv)).to128(), ) else if (src_mcv.isMemory()) try self.asmRegisterMemory( - .{ ._, .cvtsd2ss }, + .{ ._ss, .cvtsd2 }, dst_reg, src_mcv.mem(.qword), ) else try self.asmRegisterRegister( - .{ ._, .cvtsd2ss }, + .{ ._ss, .cvtsd2 }, dst_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -2552,22 +2552,22 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { src_mcv.getReg().? else try self.copyToTmpRegister(src_ty, src_mcv); - try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128()); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128()); switch (dst_bits) { 32 => {}, - 64 => try self.asmRegisterRegisterRegister(.{ .v_, .cvtss2sd }, dst_reg, dst_reg, dst_reg), + 64 => try self.asmRegisterRegisterRegister(.{ .v_sd, .cvtss2 }, dst_reg, dst_reg, dst_reg), else => return self.fail("TODO implement airFpext from {} to {}", .{ src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }), } } else if (src_bits == 32 and dst_bits == 64) { if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( - .{ .v_, .cvtss2sd }, + .{ .v_sd, .cvtss2 }, dst_reg, dst_reg, src_mcv.mem(.dword), ) else try self.asmRegisterRegisterRegister( - .{ .v_, .cvtss2sd }, + .{ .v_sd, .cvtss2 }, dst_reg, dst_reg, (if (src_mcv.isRegister()) @@ -2575,11 +2575,11 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { else try self.copyToTmpRegister(src_ty, src_mcv)).to128(), ) else if (src_mcv.isMemory()) try self.asmRegisterMemory( - .{ ._, .cvtss2sd }, + .{ ._sd, .cvtss2 }, dst_reg, src_mcv.mem(.dword), ) else try self.asmRegisterRegister( - .{ ._, .cvtss2sd }, + .{ ._sd, .cvtss2 }, dst_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -4789,7 +4789,6 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4 })) |tag| tag else return self.fail("TODO implement genRound for {}", .{ ty.fmt(self.bin_file.options.module.?), }); - const abi_size = @intCast(u32, ty.abiSize(self.target.*)); const dst_alias = registerAlias(dst_reg, abi_size); switch (mir_tag[0]) { @@ -4848,7 +4847,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { src_mcv.getReg().? else try self.copyToTmpRegister(ty, src_mcv); - try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128()); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128()); try self.asmRegisterRegisterRegister(.{ .v_ss, .sqrt }, dst_reg, dst_reg, dst_reg); try self.asmRegisterRegisterImmediate( .{ .v_, .cvtps2ph }, @@ -4868,7 +4867,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { 16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen()) { 1 => { try self.asmRegisterRegister( - .{ .v_, .cvtph2ps }, + .{ .v_ps, .cvtph2 }, dst_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -4892,13 +4891,13 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { 2...8 => { const wide_reg = registerAlias(dst_reg, abi_size * 2); if (src_mcv.isMemory()) try self.asmRegisterMemory( - .{ .v_, .cvtph2ps }, + .{ .v_ps, .cvtph2 }, wide_reg, src_mcv.mem(Memory.PtrSize.fromSize( @intCast(u32, @divExact(wide_reg.bitSize(), 16)), )), ) else try self.asmRegisterRegister( - .{ .v_, .cvtph2ps }, + .{ .v_ps, .cvtph2 }, wide_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -6347,7 +6346,7 @@ fn genBinOp( else try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg); try self.asmRegisterRegisterRegister( switch (air_tag) { @@ -6424,7 +6423,7 @@ fn genBinOp( else try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg); try self.asmRegisterRegisterRegister( switch (air_tag) { @@ -6467,7 +6466,7 @@ fn genBinOp( else try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); try self.asmRegisterRegisterRegister( .{ .v_ps, .movhl }, tmp_reg, @@ -6501,13 +6500,13 @@ fn genBinOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); if (src_mcv.isMemory()) try self.asmRegisterMemory( - .{ .v_, .cvtph2ps }, + .{ .v_ps, .cvtph2 }, tmp_reg, src_mcv.mem(.qword), ) else try self.asmRegisterRegister( - .{ .v_, .cvtph2ps }, + .{ .v_ps, .cvtph2 }, tmp_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -6541,13 +6540,13 @@ fn genBinOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg.to256(), dst_reg); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg.to256(), dst_reg); if (src_mcv.isMemory()) try self.asmRegisterMemory( - .{ .v_, .cvtph2ps }, + .{ .v_ps, .cvtph2 }, tmp_reg, src_mcv.mem(.xword), ) else try self.asmRegisterRegister( - .{ .v_, .cvtph2ps }, + .{ .v_ps, .cvtph2 }, tmp_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -7199,13 +7198,13 @@ fn genArgDbgInfo(self: Self, ty: Type, name: [:0]const u8, mcv: MCValue) !void { switch (self.debug_output) { .dwarf => |dw| { const loc: link.File.Dwarf.DeclState.DbgInfoLoc = switch (mcv) { - .register => |reg| .{ .register = reg.dwarfLocOp() }, + .register => |reg| .{ .register = reg.dwarfNum() }, // TODO use a frame index .load_frame => return, //.stack_offset => |off| .{ // .stack = .{ // // TODO handle -fomit-frame-pointer - // .fp_register = Register.rbp.dwarfLocOpDeref(), + // .fp_register = Register.rbp.dwarfNum(), // .offset = -off, // }, //}, @@ -7237,11 +7236,11 @@ fn genVarDbgInfo( switch (self.debug_output) { .dwarf => |dw| { const loc: link.File.Dwarf.DeclState.DbgInfoLoc = switch (mcv) { - .register => |reg| .{ .register = reg.dwarfLocOp() }, + .register => |reg| .{ .register = reg.dwarfNum() }, // TODO use a frame index .load_frame, .lea_frame => return, //=> |off| .{ .stack = .{ - // .fp_register = Register.rbp.dwarfLocOpDeref(), + // .fp_register = Register.rbp.dwarfNum(), // .offset = -off, //} }, .memory => |address| .{ .memory = address }, @@ -7595,7 +7594,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { else try self.copyToTmpRegister(ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, tmp1_reg, tmp1_reg); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, tmp1_reg, tmp1_reg); try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp2_reg, tmp1_reg); try self.genBinOpMir(.{ ._ss, .ucomi }, ty, tmp1_mcv, tmp2_mcv); } else return self.fail("TODO implement airCmp for {}", .{ @@ -8862,14 +8861,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr } }, .register => |src_reg| if (dst_reg.id() != src_reg.id()) try self.asmRegisterRegister( - if ((dst_reg.class() == .floating_point) == (src_reg.class() == .floating_point)) + if ((dst_reg.class() == .sse) == (src_reg.class() == .sse)) switch (ty.zigTypeTag()) { else => .{ ._, .mov }, .Float, .Vector => .{ ._ps, .mova }, } else switch (abi_size) { 2 => return try self.asmRegisterRegisterImmediate( - if (dst_reg.class() == .floating_point) .{ .p_w, .insr } else .{ .p_w, .extr }, + if (dst_reg.class() == .sse) .{ .p_w, .insr } else .{ .p_w, .extr }, registerAlias(dst_reg, 4), registerAlias(src_reg, 4), Immediate.u(0), @@ -9222,7 +9221,7 @@ fn genInlineMemcpyRegisterRegister( try self.asmMemoryRegister( switch (src_reg.class()) { .general_purpose, .segment => .{ ._, .mov }, - .floating_point => .{ ._ss, .mov }, + .sse => .{ ._ss, .mov }, }, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = dst_reg, .disp = -offset }), registerAlias(src_reg, abi_size), @@ -9388,10 +9387,10 @@ fn airIntToFloat(self: *Self, inst: Air.Inst.Index) !void { }); const src_mcv = try self.resolveInst(ty_op.operand); - const src_reg = switch (src_mcv) { - .register => |reg| reg, - else => try self.copyToTmpRegister(src_ty, src_mcv), - }; + const src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv); const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); defer self.register_manager.unlockReg(src_lock); @@ -9402,23 +9401,23 @@ fn airIntToFloat(self: *Self, inst: Air.Inst.Index) !void { const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); defer self.register_manager.unlockReg(dst_lock); - try self.asmRegisterRegister(switch (dst_ty.floatBits(self.target.*)) { - 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) - .{ ._, .cvtsi2ss } - else - return self.fail("TODO implement airIntToFloat from {} to {} without sse", .{ - src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), - }), - 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) - .{ ._, .cvtsi2sd } - else - return self.fail("TODO implement airIntToFloat from {} to {} without sse2", .{ - src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), - }), - else => return self.fail("TODO implement airIntToFloat from {} to {}", .{ - src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), - }), - }, dst_reg.to128(), registerAlias(src_reg, src_size)); + const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (dst_ty.zigTypeTag()) { + .Float => switch (dst_ty.floatBits(self.target.*)) { + 32 => if (self.hasFeature(.avx)) .{ .v_ss, .cvtsi2 } else .{ ._ss, .cvtsi2 }, + 64 => if (self.hasFeature(.avx)) .{ .v_sd, .cvtsi2 } else .{ ._sd, .cvtsi2 }, + 16, 80, 128 => null, + else => unreachable, + }, + else => null, + })) |tag| tag else return self.fail("TODO implement airIntToFloat from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }); + const dst_alias = dst_reg.to128(); + const src_alias = registerAlias(src_reg, src_size); + switch (mir_tag[0]) { + .v_ss, .v_sd => try self.asmRegisterRegisterRegister(mir_tag, dst_alias, dst_alias, src_alias), + else => try self.asmRegisterRegister(mir_tag, dst_alias, src_alias), + } return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } @@ -9428,46 +9427,50 @@ fn airFloatToInt(self: *Self, inst: Air.Inst.Index) !void { const src_ty = self.air.typeOf(ty_op.operand); const dst_ty = self.air.typeOfIndex(inst); - const operand = try self.resolveInst(ty_op.operand); - const src_abi_size = @intCast(u32, src_ty.abiSize(self.target.*)); - const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); + const dst_bits = @intCast(u32, dst_ty.bitSize(self.target.*)); + const dst_signedness = + if (dst_ty.isAbiInt()) dst_ty.intInfo(self.target.*).signedness else .unsigned; - switch (src_abi_size) { - 4, 8 => {}, - else => |size| return self.fail("TODO load ST(0) with abiSize={}", .{size}), - } - if (dst_abi_size > 8) { - return self.fail("TODO convert float with abiSize={}", .{dst_abi_size}); - } + const dst_size = std.math.divCeil(u32, @max(switch (dst_signedness) { + .signed => dst_bits, + .unsigned => dst_bits + 1, + }, 32), 8) catch unreachable; + if (dst_size > 8) return self.fail("TODO implement airFloatToInt from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }); - // move float src to ST(0) - const frame_addr: FrameAddr = switch (operand) { - .load_frame => |frame_addr| frame_addr, - else => frame_addr: { - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(src_ty, self.target.*)); - try self.genSetMem(.{ .frame = frame_index }, 0, src_ty, operand); - break :frame_addr .{ .index = frame_index }; - }, - }; - try self.asmMemory( - .{ .f_, .ld }, - Memory.sib(Memory.PtrSize.fromSize(src_abi_size), .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off, + const src_mcv = try self.resolveInst(ty_op.operand); + const src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv); + const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); + defer self.register_manager.unlockReg(src_lock); + + const dst_reg = try self.register_manager.allocReg(inst, regClassForType(dst_ty)); + const dst_mcv = MCValue{ .register = dst_reg }; + const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); + defer self.register_manager.unlockReg(dst_lock); + + try self.asmRegisterRegister( + if (@as(?Mir.Inst.FixedTag, switch (src_ty.zigTypeTag()) { + .Float => switch (src_ty.floatBits(self.target.*)) { + 32 => if (self.hasFeature(.avx)) .{ .v_, .cvttss2si } else .{ ._, .cvttss2si }, + 64 => if (self.hasFeature(.avx)) .{ .v_, .cvttsd2si } else .{ ._, .cvttsd2si }, + 16, 80, 128 => null, + else => unreachable, + }, + else => null, + })) |tag| tag else return self.fail("TODO implement airFloatToInt from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }), + registerAlias(dst_reg, dst_size), + src_reg.to128(), ); - // convert - const stack_dst = try self.allocRegOrMem(inst, false); - try self.asmMemory( - .{ .f_p, .istt }, - Memory.sib(Memory.PtrSize.fromSize(dst_abi_size), .{ - .base = .{ .frame = stack_dst.load_frame.index }, - .disp = stack_dst.load_frame.off, - }), - ); + if (dst_bits < dst_size * 8) try self.truncateRegister(dst_ty, dst_reg); - return self.finishAir(inst, stack_dst, .{ ty_op.operand, .none, .none }); + return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { @@ -10997,13 +11000,13 @@ fn registerAlias(reg: Register, size_bytes: u32) Register { reg.to64() else unreachable, - .floating_point => if (size_bytes <= 16) + .segment, .x87, .mmx => unreachable, + .sse => if (size_bytes <= 16) reg.to128() else if (size_bytes <= 32) reg.to256() else unreachable, - .segment => unreachable, }; } diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 537a03fa2a..66a249a3f2 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -233,7 +233,6 @@ pub const Mnemonic = enum { cmpxchg, cmpxchg8b, cmpxchg16b, cqo, cwd, cwde, div, - fisttp, fld, idiv, imul, int3, ja, jae, jb, jbe, jc, jrcxz, je, jg, jge, jl, jle, jna, jnae, jnb, jnbe, jnc, jne, jng, jnge, jnl, jnle, jno, jnp, jns, jnz, jo, jp, jpe, jpo, js, jz, @@ -259,6 +258,8 @@ pub const Mnemonic = enum { @"test", tzcnt, ud2, xadd, xchg, xor, + // X87 + fisttp, fld, // MMX movd, // SSE @@ -266,7 +267,7 @@ pub const Mnemonic = enum { andps, andnps, cmpss, - cvtsi2ss, + cvtpi2ps, cvtps2pi, cvtsi2ss, cvtss2si, cvttps2pi, cvttss2si, divps, divss, maxps, maxss, minps, minss, @@ -285,7 +286,9 @@ pub const Mnemonic = enum { andpd, andnpd, //cmpsd, - cvtsd2ss, cvtsi2sd, cvtss2sd, + cvtdq2pd, cvtdq2ps, cvtpd2dq, cvtpd2pi, cvtpd2ps, cvtpi2pd, + cvtps2dq, cvtps2pd, cvtsd2si, cvtsd2ss, cvtsi2sd, cvtss2sd, + cvttpd2dq, cvttpd2pi, cvttps2dq, cvttsd2si, divpd, divsd, maxpd, maxsd, minpd, minsd, @@ -314,7 +317,10 @@ pub const Mnemonic = enum { // AVX vaddpd, vaddps, vaddsd, vaddss, vbroadcastf128, vbroadcastsd, vbroadcastss, - vcvtsd2ss, vcvtsi2sd, vcvtsi2ss, vcvtss2sd, + vcvtdq2pd, vcvtdq2ps, vcvtpd2dq, vcvtpd2ps, + vcvtps2dq, vcvtps2pd, vcvtsd2si, vcvtsd2ss, + vcvtsi2sd, vcvtsi2ss, vcvtss2sd, vcvtss2si, + vcvttpd2dq, vcvttps2dq, vcvttsd2si, vcvttss2si, vdivpd, vdivps, vdivsd, vdivss, vextractf128, vextractps, vinsertf128, vinsertps, @@ -377,80 +383,84 @@ pub const Op = enum { m, moffs, sreg, + st, mm, mm_m64, xmm, xmm_m32, xmm_m64, xmm_m128, ymm, ymm_m256, // zig fmt: on pub fn fromOperand(operand: Instruction.Operand) Op { - switch (operand) { - .none => return .none, + return switch (operand) { + .none => .none, - .reg => |reg| { - switch (reg.class()) { - .segment => return .sreg, - .floating_point => return switch (reg.bitSize()) { - 128 => .xmm, - 256 => .ymm, + .reg => |reg| switch (reg.class()) { + .general_purpose => if (reg.to64() == .rax) + switch (reg) { + .al => .al, + .ax => .ax, + .eax => .eax, + .rax => .rax, else => unreachable, - }, - .general_purpose => { - if (reg.to64() == .rax) return switch (reg) { - .al => .al, - .ax => .ax, - .eax => .eax, - .rax => .rax, - else => unreachable, - }; - if (reg == .cl) return .cl; - return switch (reg.bitSize()) { - 8 => .r8, - 16 => .r16, - 32 => .r32, - 64 => .r64, - else => unreachable, - }; - }, - } - }, - - .mem => |mem| switch (mem) { - .moffs => return .moffs, - .sib, .rip => { - const bit_size = mem.bitSize(); - return switch (bit_size) { - 8 => .m8, - 16 => .m16, - 32 => .m32, - 64 => .m64, - 80 => .m80, - 128 => .m128, - 256 => .m256, - else => unreachable, - }; + } + else if (reg == .cl) + .cl + else switch (reg.bitSize()) { + 8 => .r8, + 16 => .r16, + 32 => .r32, + 64 => .r64, + else => unreachable, + }, + .segment => .sreg, + .x87 => .st, + .mmx => .mm, + .sse => switch (reg.bitSize()) { + 128 => .xmm, + 256 => .ymm, + else => unreachable, }, }, - .imm => |imm| { - switch (imm) { - .signed => |x| { - if (x == 1) return .unity; - if (math.cast(i8, x)) |_| return .imm8s; - if (math.cast(i16, x)) |_| return .imm16s; - return .imm32s; - }, - .unsigned => |x| { - if (x == 1) return .unity; - if (math.cast(i8, x)) |_| return .imm8s; - if (math.cast(u8, x)) |_| return .imm8; - if (math.cast(i16, x)) |_| return .imm16s; - if (math.cast(u16, x)) |_| return .imm16; - if (math.cast(i32, x)) |_| return .imm32s; - if (math.cast(u32, x)) |_| return .imm32; - return .imm64; - }, - } + .mem => |mem| switch (mem) { + .moffs => .moffs, + .sib, .rip => switch (mem.bitSize()) { + 8 => .m8, + 16 => .m16, + 32 => .m32, + 64 => .m64, + 80 => .m80, + 128 => .m128, + 256 => .m256, + else => unreachable, + }, }, - } + + .imm => |imm| switch (imm) { + .signed => |x| if (x == 1) + .unity + else if (math.cast(i8, x)) |_| + .imm8s + else if (math.cast(i16, x)) |_| + .imm16s + else + .imm32s, + .unsigned => |x| if (x == 1) + .unity + else if (math.cast(i8, x)) |_| + .imm8s + else if (math.cast(u8, x)) |_| + .imm8 + else if (math.cast(i16, x)) |_| + .imm16s + else if (math.cast(u16, x)) |_| + .imm16 + else if (math.cast(i32, x)) |_| + .imm32s + else if (math.cast(u32, x)) |_| + .imm32 + else + .imm64, + }, + }; } pub fn immBitSize(op: Op) u64 { @@ -460,6 +470,7 @@ pub const Op = enum { .ax, .r16, .rm16 => unreachable, .eax, .r32, .rm32, .r32_m16 => unreachable, .rax, .r64, .rm64, .r64_m16 => unreachable, + .st, .mm, .mm_m64 => unreachable, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable, .ymm, .ymm_m256 => unreachable, .m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable, @@ -480,7 +491,8 @@ pub const Op = enum { .al, .cl, .r8, .rm8 => 8, .ax, .r16, .rm16 => 16, .eax, .r32, .rm32, .r32_m8, .r32_m16 => 32, - .rax, .r64, .rm64, .r64_m16 => 64, + .rax, .r64, .rm64, .r64_m16, .mm, .mm_m64 => 64, + .st => 80, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => 128, .ymm, .ymm_m256 => 256, }; @@ -491,11 +503,11 @@ pub const Op = enum { .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable, .unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable, .rel8, .rel16, .rel32 => unreachable, - .al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64, .xmm, .ymm => unreachable, + .al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64, .st, .mm, .xmm, .ymm => unreachable, .m8, .rm8, .r32_m8 => 8, .m16, .rm16, .r32_m16, .r64_m16 => 16, .m32, .rm32, .xmm_m32 => 32, - .m64, .rm64, .xmm_m64 => 64, + .m64, .rm64, .mm_m64, .xmm_m64 => 64, .m80 => 80, .m128, .xmm_m128 => 128, .m256, .ymm_m256 => 256, @@ -522,6 +534,7 @@ pub const Op = enum { .r8, .r16, .r32, .r64, .rm8, .rm16, .rm32, .rm64, .r32_m8, .r32_m16, .r64_m16, + .st, .mm, .mm_m64, .xmm, .xmm_m32, .xmm_m64, .xmm_m128, .ymm, .ymm_m256, => true, @@ -550,6 +563,7 @@ pub const Op = enum { .r32_m8, .r32_m16, .r64_m16, .m8, .m16, .m32, .m64, .m80, .m128, .m256, .m, + .mm_m64, .xmm_m32, .xmm_m64, .xmm_m128, .ymm_m256, => true, @@ -573,8 +587,10 @@ pub const Op = enum { .rm8, .rm16, .rm32, .rm64 => .general_purpose, .r32_m8, .r32_m16, .r64_m16 => .general_purpose, .sreg => .segment, - .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .floating_point, - .ymm, .ymm_m256 => .floating_point, + .st => .x87, + .mm, .mm_m64 => .mmx, + .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .sse, + .ymm, .ymm_m256 => .sse, }; } @@ -695,6 +711,7 @@ pub const Feature = enum { f16c, fma, lzcnt, + movbe, popcnt, sse, sse2, @@ -717,7 +734,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op } const mnemonic_to_encodings_map = init: { - @setEvalBranchQuota(20_000); + @setEvalBranchQuota(25_000); const encodings = @import("encodings.zig"); var entries = encodings.table; std.sort.sort(encodings.Entry, &entries, {}, struct { diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index f26bf97e82..ef8bbe07b3 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -439,8 +439,21 @@ pub const Inst = struct { /// Bitwise logical and not of packed single-precision floating-point values /// Bitwise logical and not of packed double-precision floating-point values andn, + /// Convert packed doubleword integers to packed single-precision floating-point values + /// Convert packed doubleword integers to packed double-precision floating-point values + cvtpi2, + /// Convert packed single-precision floating-point values to packed doubleword integers + cvtps2pi, /// Convert doubleword integer to scalar single-precision floating-point value - cvtsi2ss, + /// Convert doubleword integer to scalar double-precision floating-point value + cvtsi2, + /// Convert scalar single-precision floating-point value to doubleword integer + cvtss2si, + /// Convert with truncation packed single-precision floating-point values to packed doubleword integers + cvttps2pi, + /// Convert with truncation scalar single-precision floating-point value to doubleword integer + cvttss2si, + /// Maximum of packed single-precision floating-point values /// Maximum of scalar single-precision floating-point values /// Maximum of packed double-precision floating-point values @@ -486,12 +499,33 @@ pub const Inst = struct { /// Unpack and interleave low packed double-precision floating-point values unpckl, + /// Convert packed doubleword integers to packed single-precision floating-point values + /// Convert packed doubleword integers to packed double-precision floating-point values + cvtdq2, + /// Convert packed double-precision floating-point values to packed doubleword integers + cvtpd2dq, + /// Convert packed double-precision floating-point values to packed doubleword integers + cvtpd2pi, + /// Convert packed double-precision floating-point values to packed single-precision floating-point values + cvtpd2, + /// Convert packed single-precision floating-point values to packed doubleword integers + cvtps2dq, + /// Convert packed single-precision floating-point values to packed double-precision floating-point values + cvtps2, + /// Convert scalar double-precision floating-point value to doubleword integer + cvtsd2si, /// Convert scalar double-precision floating-point value to scalar single-precision floating-point value - cvtsd2ss, - /// Convert doubleword integer to scalar double-precision floating-point value - cvtsi2sd, + cvtsd2, /// Convert scalar single-precision floating-point value to scalar double-precision floating-point value - cvtss2sd, + cvtss2, + /// Convert with truncation packed double-precision floating-point values to packed doubleword integers + cvttpd2dq, + /// Convert with truncation packed double-precision floating-point values to packed doubleword integers + cvttpd2pi, + /// Convert with truncation packed single-precision floating-point values to packed doubleword integers + cvttps2dq, + /// Convert with truncation scalar double-precision floating-point value to doubleword integer + cvttsd2si, /// Packed interleave shuffle of quadruplets of single-precision floating-point values /// Packed interleave shuffle of pairs of double-precision floating-point values shuf, @@ -542,7 +576,7 @@ pub const Inst = struct { broadcast, /// Convert 16-bit floating-point values to single-precision floating-point values - cvtph2ps, + cvtph2, /// Convert single-precision floating-point values to 16-bit floating-point values cvtps2ph, diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index 3343f280b9..923ba31266 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -175,15 +175,21 @@ pub const Register = enum(u7) { xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, + mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, + + st0, st1, st2, st3, st4, st5, st6, st7, + es, cs, ss, ds, fs, gs, none, // zig fmt: on - pub const Class = enum(u2) { + pub const Class = enum { general_purpose, - floating_point, segment, + x87, + mmx, + sse, }; pub fn class(reg: Register) Class { @@ -195,8 +201,10 @@ pub const Register = enum(u7) { @enumToInt(Register.al) ... @enumToInt(Register.r15b) => .general_purpose, @enumToInt(Register.ah) ... @enumToInt(Register.bh) => .general_purpose, - @enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => .floating_point, - @enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => .floating_point, + @enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => .sse, + @enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => .sse, + @enumToInt(Register.mm0) ... @enumToInt(Register.mm7) => .mmx, + @enumToInt(Register.st0) ... @enumToInt(Register.st7) => .x87, @enumToInt(Register.es) ... @enumToInt(Register.gs) => .segment, @@ -216,8 +224,10 @@ pub const Register = enum(u7) { @enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => @enumToInt(Register.ymm0) - 16, @enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => @enumToInt(Register.xmm0) - 16, + @enumToInt(Register.mm0) ... @enumToInt(Register.mm7) => @enumToInt(Register.mm0) - 32, + @enumToInt(Register.st0) ... @enumToInt(Register.st7) => @enumToInt(Register.st0) - 40, - @enumToInt(Register.es) ... @enumToInt(Register.gs) => @enumToInt(Register.es) - 32, + @enumToInt(Register.es) ... @enumToInt(Register.gs) => @enumToInt(Register.es) - 48, else => unreachable, // zig fmt: on @@ -236,6 +246,8 @@ pub const Register = enum(u7) { @enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => 256, @enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => 128, + @enumToInt(Register.mm0) ... @enumToInt(Register.mm7) => 64, + @enumToInt(Register.st0) ... @enumToInt(Register.st7) => 80, @enumToInt(Register.es) ... @enumToInt(Register.gs) => 16, @@ -271,6 +283,8 @@ pub const Register = enum(u7) { @enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => @enumToInt(Register.ymm0), @enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => @enumToInt(Register.xmm0), + @enumToInt(Register.mm0) ... @enumToInt(Register.mm7) => @enumToInt(Register.mm0), + @enumToInt(Register.st0) ... @enumToInt(Register.st7) => @enumToInt(Register.st0), @enumToInt(Register.es) ... @enumToInt(Register.gs) => @enumToInt(Register.es), @@ -326,8 +340,8 @@ pub const Register = enum(u7) { return @intToEnum(Register, @enumToInt(reg) - reg.gpBase() + @enumToInt(Register.al)); } - fn fpBase(reg: Register) u7 { - assert(reg.class() == .floating_point); + fn sseBase(reg: Register) u7 { + assert(reg.class() == .sse); return switch (@enumToInt(reg)) { @enumToInt(Register.ymm0)...@enumToInt(Register.ymm15) => @enumToInt(Register.ymm0), @enumToInt(Register.xmm0)...@enumToInt(Register.xmm15) => @enumToInt(Register.xmm0), @@ -336,49 +350,24 @@ pub const Register = enum(u7) { } pub fn to256(reg: Register) Register { - return @intToEnum(Register, @enumToInt(reg) - reg.fpBase() + @enumToInt(Register.ymm0)); + return @intToEnum(Register, @enumToInt(reg) - reg.sseBase() + @enumToInt(Register.ymm0)); } pub fn to128(reg: Register) Register { - return @intToEnum(Register, @enumToInt(reg) - reg.fpBase() + @enumToInt(Register.xmm0)); + return @intToEnum(Register, @enumToInt(reg) - reg.sseBase() + @enumToInt(Register.xmm0)); } - pub fn dwarfLocOp(reg: Register) u8 { + /// DWARF register encoding + pub fn dwarfNum(reg: Register) u6 { return switch (reg.class()) { - .general_purpose => switch (reg.to64()) { - .rax => DW.OP.reg0, - .rdx => DW.OP.reg1, - .rcx => DW.OP.reg2, - .rbx => DW.OP.reg3, - .rsi => DW.OP.reg4, - .rdi => DW.OP.reg5, - .rbp => DW.OP.reg6, - .rsp => DW.OP.reg7, - else => @intCast(u8, @enumToInt(reg) - reg.gpBase()) + DW.OP.reg0, - }, - .floating_point => @intCast(u8, @enumToInt(reg) - reg.fpBase()) + DW.OP.reg17, - else => unreachable, - }; - } - - /// DWARF encodings that push a value onto the DWARF stack that is either - /// the contents of a register or the result of adding the contents a given - /// register to a given signed offset. - pub fn dwarfLocOpDeref(reg: Register) u8 { - return switch (reg.class()) { - .general_purpose => switch (reg.to64()) { - .rax => DW.OP.breg0, - .rdx => DW.OP.breg1, - .rcx => DW.OP.breg2, - .rbx => DW.OP.breg3, - .rsi => DW.OP.breg4, - .rdi => DW.OP.breg5, - .rbp => DW.OP.breg6, - .rsp => DW.OP.breg7, - else => @intCast(u8, @enumToInt(reg) - reg.gpBase()) + DW.OP.breg0, - }, - .floating_point => @intCast(u8, @enumToInt(reg) - reg.fpBase()) + DW.OP.breg17, - else => unreachable, + .general_purpose => if (reg.isExtended()) + reg.enc() + else + @truncate(u3, @as(u24, 0o54673120) >> @as(u5, reg.enc()) * 3), + .sse => 17 + @as(u6, reg.enc()), + .x87 => 33 + @as(u6, reg.enc()), + .mmx => 41 + @as(u6, reg.enc()), + .segment => 50 + @as(u6, reg.enc()), }; } }; @@ -392,6 +381,8 @@ test "Register id - different classes" { try expect(Register.ymm0.id() == 0b10000); try expect(Register.ymm0.id() != Register.rax.id()); try expect(Register.xmm0.id() == Register.ymm0.id()); + try expect(Register.xmm0.id() != Register.mm0.id()); + try expect(Register.mm0.id() != Register.st0.id()); try expect(Register.es.id() == 0b100000); } @@ -407,7 +398,9 @@ test "Register enc - different classes" { test "Register classes" { try expect(Register.r11.class() == .general_purpose); - try expect(Register.ymm11.class() == .floating_point); + try expect(Register.ymm11.class() == .sse); + try expect(Register.mm3.class() == .mmx); + try expect(Register.st3.class() == .x87); try expect(Register.fs.class() == .segment); } diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index a7a50867c3..3383315bd6 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -272,14 +272,6 @@ pub const table = [_]Entry{ .{ .div, .m, &.{ .rm32 }, &.{ 0xf7 }, 6, .none, .none }, .{ .div, .m, &.{ .rm64 }, &.{ 0xf7 }, 6, .long, .none }, - .{ .fisttp, .m, &.{ .m16 }, &.{ 0xdf }, 1, .none, .x87 }, - .{ .fisttp, .m, &.{ .m32 }, &.{ 0xdb }, 1, .none, .x87 }, - .{ .fisttp, .m, &.{ .m64 }, &.{ 0xdd }, 1, .none, .x87 }, - - .{ .fld, .m, &.{ .m32 }, &.{ 0xd9 }, 0, .none, .x87 }, - .{ .fld, .m, &.{ .m64 }, &.{ 0xdd }, 0, .none, .x87 }, - .{ .fld, .m, &.{ .m80 }, &.{ 0xdb }, 5, .none, .x87 }, - .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .none, .none }, .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .rex, .none }, .{ .idiv, .m, &.{ .rm16 }, &.{ 0xf7 }, 7, .short, .none }, @@ -395,12 +387,12 @@ pub const table = [_]Entry{ .{ .mov, .mi, &.{ .rm32, .imm32 }, &.{ 0xc7 }, 0, .none, .none }, .{ .mov, .mi, &.{ .rm64, .imm32s }, &.{ 0xc7 }, 0, .long, .none }, - .{ .movbe, .rm, &.{ .r16, .m16 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .short, .none }, - .{ .movbe, .rm, &.{ .r32, .m32 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none, .none }, - .{ .movbe, .rm, &.{ .r64, .m64 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .long, .none }, - .{ .movbe, .mr, &.{ .m16, .r16 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .short, .none }, - .{ .movbe, .mr, &.{ .m32, .r32 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none, .none }, - .{ .movbe, .mr, &.{ .m64, .r64 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .long, .none }, + .{ .movbe, .rm, &.{ .r16, .m16 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .short, .movbe }, + .{ .movbe, .rm, &.{ .r32, .m32 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none, .movbe }, + .{ .movbe, .rm, &.{ .r64, .m64 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .long, .movbe }, + .{ .movbe, .mr, &.{ .m16, .r16 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .short, .movbe }, + .{ .movbe, .mr, &.{ .m32, .r32 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none, .movbe }, + .{ .movbe, .mr, &.{ .m64, .r64 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .long, .movbe }, .{ .movs, .np, &.{ .m8, .m8 }, &.{ 0xa4 }, 0, .none, .none }, .{ .movs, .np, &.{ .m16, .m16 }, &.{ 0xa5 }, 0, .short, .none }, @@ -836,6 +828,15 @@ pub const table = [_]Entry{ .{ .xor, .rm, &.{ .r32, .rm32 }, &.{ 0x33 }, 0, .none, .none }, .{ .xor, .rm, &.{ .r64, .rm64 }, &.{ 0x33 }, 0, .long, .none }, + // X87 + .{ .fisttp, .m, &.{ .m16 }, &.{ 0xdf }, 1, .none, .x87 }, + .{ .fisttp, .m, &.{ .m32 }, &.{ 0xdb }, 1, .none, .x87 }, + .{ .fisttp, .m, &.{ .m64 }, &.{ 0xdd }, 1, .none, .x87 }, + + .{ .fld, .m, &.{ .m32 }, &.{ 0xd9 }, 0, .none, .x87 }, + .{ .fld, .m, &.{ .m64 }, &.{ 0xdd }, 0, .none, .x87 }, + .{ .fld, .m, &.{ .m80 }, &.{ 0xdb }, 5, .none, .x87 }, + // SSE .{ .addps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x58 }, 0, .none, .sse }, @@ -847,9 +848,21 @@ pub const table = [_]Entry{ .{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .none, .sse }, + .{ .cvtpi2ps, .rm, &.{ .xmm, .mm_m64 }, &.{ 0x0f, 0x2a }, 0, .none, .sse }, + + .{ .cvtps2pi, .rm, &.{ .mm, .xmm_m64 }, &.{ 0x0f, 0x2d }, 0, .none, .sse }, + .{ .cvtsi2ss, .rm, &.{ .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .none, .sse }, .{ .cvtsi2ss, .rm, &.{ .xmm, .rm64 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .long, .sse }, + .{ .cvtss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2d }, 0, .none, .sse }, + .{ .cvtss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2d }, 0, .long, .sse }, + + .{ .cvttps2pi, .rm, &.{ .mm, .xmm_m64 }, &.{ 0x0f, 0x2c }, 0, .none, .sse }, + + .{ .cvttss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .none, .sse }, + .{ .cvttss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .long, .sse }, + .{ .divps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5e }, 0, .none, .sse }, .{ .divss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .none, .sse }, @@ -906,6 +919,25 @@ pub const table = [_]Entry{ .{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .none, .sse2 }, + .{ .cvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .none, .sse2 }, + + .{ .cvtdq2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5b }, 0, .none, .sse2 }, + + .{ .cvtpd2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf2, 0x0f, 0xe6 }, 0, .none, .sse2 }, + + .{ .cvtpd2pi, .rm, &.{ .mm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x2d }, 0, .none, .sse2 }, + + .{ .cvtpd2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5a }, 0, .none, .sse2 }, + + .{ .cvtpi2pd, .rm, &.{ .xmm, .mm_m64 }, &.{ 0x66, 0x0f, 0x2a }, 0, .none, .sse2 }, + + .{ .cvtps2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5b }, 0, .none, .sse2 }, + + .{ .cvtps2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x0f, 0x5a }, 0, .none, .sse2 }, + + .{ .cvtsd2si, .rm, &.{ .r32, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2d }, 0, .none, .sse2 }, + .{ .cvtsd2si, .rm, &.{ .r64, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2d }, 0, .long, .sse2 }, + .{ .cvtsd2ss, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .none, .sse2 }, .{ .cvtsi2sd, .rm, &.{ .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .none, .sse2 }, @@ -913,6 +945,15 @@ pub const table = [_]Entry{ .{ .cvtss2sd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5a }, 0, .none, .sse2 }, + .{ .cvttpd2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe6 }, 0, .none, .sse2 }, + + .{ .cvttpd2pi, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x2c }, 0, .none, .sse2 }, + + .{ .cvttps2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x5b }, 0, .none, .sse2 }, + + .{ .cvttsd2si, .rm, &.{ .r32, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2c }, 0, .none, .sse2 }, + .{ .cvttsd2si, .rm, &.{ .r64, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2c }, 0, .long, .sse2 }, + .{ .divpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5e }, 0, .none, .sse2 }, .{ .divsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .none, .sse2 }, @@ -1034,15 +1075,51 @@ pub const table = [_]Entry{ .{ .vbroadcastsd, .rm, &.{ .ymm, .m64 }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx }, .{ .vbroadcastf128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x1a }, 0, .vex_256_w0, .avx }, + .{ .vcvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx }, + .{ .vcvtdq2pd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx }, + + .{ .vcvtdq2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5b }, 0, .vex_128_wig, .avx }, + .{ .vcvtdq2ps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x5b }, 0, .vex_256_wig, .avx }, + + .{ .vcvtpd2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf2, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx }, + .{ .vcvtpd2dq, .rm, &.{ .xmm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx }, + + .{ .vcvtpd2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5a }, 0, .vex_128_wig, .avx }, + .{ .vcvtpd2ps, .rm, &.{ .xmm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5a }, 0, .vex_256_wig, .avx }, + + .{ .vcvtps2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5b }, 0, .vex_128_wig, .avx }, + .{ .vcvtps2dq, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5b }, 0, .vex_256_wig, .avx }, + + .{ .vcvtps2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x0f, 0x5a }, 0, .vex_128_wig, .avx }, + .{ .vcvtps2pd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x0f, 0x5a }, 0, .vex_256_wig, .avx }, + + .{ .vcvtsd2si, .rm, &.{ .r32, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2d }, 0, .vex_lig_w0, .sse2 }, + .{ .vcvtsd2si, .rm, &.{ .r64, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2d }, 0, .vex_lig_w1, .sse2 }, + .{ .vcvtsd2ss, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx }, .{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx }, .{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx }, - .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx }, - .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx }, + .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx }, + .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx }, - .{ .vcvtss2sd, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx }, + .{ .vcvtss2sd, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx }, + + .{ .vcvtss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2d }, 0, .vex_lig_w0, .avx }, + .{ .vcvtss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2d }, 0, .vex_lig_w1, .avx }, + + .{ .vcvttpd2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx }, + .{ .vcvttpd2dq, .rm, &.{ .xmm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx }, + + .{ .vcvttps2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x5b }, 0, .vex_128_wig, .avx }, + .{ .vcvttps2dq, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x5b }, 0, .vex_256_wig, .avx }, + + .{ .vcvttsd2si, .rm, &.{ .r32, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2c }, 0, .vex_lig_w0, .sse2 }, + .{ .vcvttsd2si, .rm, &.{ .r64, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2c }, 0, .vex_lig_w1, .sse2 }, + + .{ .vcvttss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .vex_lig_w0, .avx }, + .{ .vcvttss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .vex_lig_w1, .avx }, .{ .vdivpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_128_wig, .avx }, .{ .vdivpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_256_wig, .avx }, diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig index c134f60316..1a064049fc 100644 --- a/src/link/Dwarf.zig +++ b/src/link/Dwarf.zig @@ -608,23 +608,44 @@ pub const DeclState = struct { switch (loc) { .register => |reg| { - try dbg_info.ensureUnusedCapacity(3); + try dbg_info.ensureUnusedCapacity(4); dbg_info.appendAssumeCapacity(@enumToInt(AbbrevKind.parameter)); - dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc - 1, // ULEB128 dwarf expression length - reg, - }); + // DW.AT.location, DW.FORM.exprloc + var expr_len = std.io.countingWriter(std.io.null_writer); + if (reg < 32) { + expr_len.writer().writeByte(DW.OP.reg0 + reg) catch unreachable; + } else { + expr_len.writer().writeByte(DW.OP.regx) catch unreachable; + leb128.writeULEB128(expr_len.writer(), reg) catch unreachable; + } + leb128.writeULEB128(dbg_info.writer(), expr_len.bytes_written) catch unreachable; + if (reg < 32) { + dbg_info.appendAssumeCapacity(DW.OP.reg0 + reg); + } else { + dbg_info.appendAssumeCapacity(DW.OP.regx); + leb128.writeULEB128(dbg_info.writer(), reg) catch unreachable; + } }, .stack => |info| { - try dbg_info.ensureUnusedCapacity(8); + try dbg_info.ensureUnusedCapacity(9); dbg_info.appendAssumeCapacity(@enumToInt(AbbrevKind.parameter)); - const fixup = dbg_info.items.len; - dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc - 1, // we will backpatch it after we encode the displacement in LEB128 - info.fp_register, // frame pointer - }); + // DW.AT.location, DW.FORM.exprloc + var expr_len = std.io.countingWriter(std.io.null_writer); + if (info.fp_register < 32) { + expr_len.writer().writeByte(DW.OP.breg0 + info.fp_register) catch unreachable; + } else { + expr_len.writer().writeByte(DW.OP.bregx) catch unreachable; + leb128.writeULEB128(expr_len.writer(), info.fp_register) catch unreachable; + } + leb128.writeILEB128(expr_len.writer(), info.offset) catch unreachable; + leb128.writeULEB128(dbg_info.writer(), expr_len.bytes_written) catch unreachable; + if (info.fp_register < 32) { + dbg_info.appendAssumeCapacity(DW.OP.breg0 + info.fp_register); + } else { + dbg_info.appendAssumeCapacity(DW.OP.bregx); + leb128.writeULEB128(dbg_info.writer(), info.fp_register) catch unreachable; + } leb128.writeILEB128(dbg_info.writer(), info.offset) catch unreachable; - dbg_info.items[fixup] += @intCast(u8, dbg_info.items.len - fixup - 2); }, .wasm_local => |value| { const leb_size = link.File.Wasm.getULEB128Size(value); @@ -670,22 +691,45 @@ pub const DeclState = struct { switch (loc) { .register => |reg| { - try dbg_info.ensureUnusedCapacity(2); - dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc - 1, // ULEB128 dwarf expression length - reg, - }); + try dbg_info.ensureUnusedCapacity(4); + dbg_info.appendAssumeCapacity(@enumToInt(AbbrevKind.parameter)); + // DW.AT.location, DW.FORM.exprloc + var expr_len = std.io.countingWriter(std.io.null_writer); + if (reg < 32) { + expr_len.writer().writeByte(DW.OP.reg0 + reg) catch unreachable; + } else { + expr_len.writer().writeByte(DW.OP.regx) catch unreachable; + leb128.writeULEB128(expr_len.writer(), reg) catch unreachable; + } + leb128.writeULEB128(dbg_info.writer(), expr_len.bytes_written) catch unreachable; + if (reg < 32) { + dbg_info.appendAssumeCapacity(DW.OP.reg0 + reg); + } else { + dbg_info.appendAssumeCapacity(DW.OP.regx); + leb128.writeULEB128(dbg_info.writer(), reg) catch unreachable; + } }, .stack => |info| { - try dbg_info.ensureUnusedCapacity(7); - const fixup = dbg_info.items.len; - dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc - 1, // we will backpatch it after we encode the displacement in LEB128 - info.fp_register, - }); + try dbg_info.ensureUnusedCapacity(9); + dbg_info.appendAssumeCapacity(@enumToInt(AbbrevKind.parameter)); + // DW.AT.location, DW.FORM.exprloc + var expr_len = std.io.countingWriter(std.io.null_writer); + if (info.fp_register < 32) { + expr_len.writer().writeByte(DW.OP.breg0 + info.fp_register) catch unreachable; + } else { + expr_len.writer().writeByte(DW.OP.bregx) catch unreachable; + leb128.writeULEB128(expr_len.writer(), info.fp_register) catch unreachable; + } + leb128.writeILEB128(expr_len.writer(), info.offset) catch unreachable; + leb128.writeULEB128(dbg_info.writer(), expr_len.bytes_written) catch unreachable; + if (info.fp_register < 32) { + dbg_info.appendAssumeCapacity(DW.OP.breg0 + info.fp_register); + } else { + dbg_info.appendAssumeCapacity(DW.OP.bregx); + leb128.writeULEB128(dbg_info.writer(), info.fp_register) catch unreachable; + } leb128.writeILEB128(dbg_info.writer(), info.offset) catch unreachable; - dbg_info.items[fixup] += @intCast(u8, dbg_info.items.len - fixup - 2); }, .wasm_local => |value| { diff --git a/test/behavior/cast.zig b/test/behavior/cast.zig index 20f84184a0..d6717032ff 100644 --- a/test/behavior/cast.zig +++ b/test/behavior/cast.zig @@ -153,7 +153,6 @@ test "@intToFloat(f80)" { test "@floatToInt" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; From 6c6d8d67cfe14c50684c04a579c1e62bf287e8cb Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sun, 14 May 2023 05:12:46 -0400 Subject: [PATCH 16/24] x86_64: redo movement, float negation, and `@fabs` --- src/arch/x86_64/CodeGen.zig | 357 +++++++++++++++++++-------- src/arch/x86_64/Encoding.zig | 18 +- src/arch/x86_64/Mir.zig | 12 + src/arch/x86_64/encodings.zig | 75 +++++- src/type.zig | 12 +- test/behavior/floatop.zig | 1 - test/behavior/math.zig | 1 - test/behavior/translate_c_macros.zig | 1 - 8 files changed, 359 insertions(+), 118 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index e5c6925596..80f537e046 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -4681,61 +4681,136 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { } fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void { + const tag = self.air.instructions.items(.tag)[inst]; const un_op = self.air.instructions.items(.data)[inst].un_op; const ty = self.air.typeOf(un_op); - const ty_bits = ty.floatBits(self.target.*); + const abi_size: u32 = switch (ty.abiSize(self.target.*)) { + 1...16 => 16, + 17...32 => 32, + else => return self.fail("TODO implement airFloatSign for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), + }; + const scalar_bits = ty.scalarType().floatBits(self.target.*); + + const src_mcv = try self.resolveInst(un_op); + const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; + defer if (src_lock) |lock| self.register_manager.unlockReg(lock); + + const dst_mcv: MCValue = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv)) + src_mcv + else if (self.hasFeature(.avx)) + .{ .register = try self.register_manager.allocReg(inst, sse) } + else + try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); + const dst_reg = dst_mcv.getReg().?; + const dst_lock = self.register_manager.lockReg(dst_reg); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); var arena = std.heap.ArenaAllocator.init(self.gpa); defer arena.deinit(); - const ExpectedContents = union { - f16: Value.Payload.Float_16, - f32: Value.Payload.Float_32, - f64: Value.Payload.Float_64, - f80: Value.Payload.Float_80, - f128: Value.Payload.Float_128, + const ExpectedContents = struct { + scalar: union { + i64: Value.Payload.I64, + big: struct { + limbs: [ + @max( + std.math.big.int.Managed.default_capacity, + std.math.big.int.calcTwosCompLimbCount(128), + ) + ]std.math.big.Limb, + pl: Value.Payload.BigInt, + }, + }, + repeated: Value.Payload.SubValue, }; var stack align(@alignOf(ExpectedContents)) = std.heap.stackFallback(@sizeOf(ExpectedContents), arena.allocator()); + var int_pl = Type.Payload.Bits{ + .base = .{ .tag = .int_signed }, + .data = scalar_bits, + }; var vec_pl = Type.Payload.Array{ .base = .{ .tag = .vector }, .data = .{ - .len = @divExact(128, ty_bits), - .elem_type = ty, + .len = @divExact(abi_size * 8, scalar_bits), + .elem_type = Type.initPayload(&int_pl.base), }, }; const vec_ty = Type.initPayload(&vec_pl.base); - - var sign_pl = Value.Payload.SubValue{ - .base = .{ .tag = .repeated }, - .data = try Value.floatToValue(-0.0, stack.get(), ty, self.target.*), + const sign_val = switch (tag) { + .neg => try vec_ty.minInt(stack.get(), self.target.*), + .fabs => try vec_ty.maxInt(stack.get(), self.target.*), + else => unreachable, }; - const sign_val = Value.initPayload(&sign_pl.base); const sign_mcv = try self.genTypedValue(.{ .ty = vec_ty, .val = sign_val }); - - const src_mcv = try self.resolveInst(un_op); - const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv)) - src_mcv + const sign_mem = if (sign_mcv.isMemory()) + sign_mcv.mem(Memory.PtrSize.fromSize(abi_size)) else - try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); - const dst_lock = self.register_manager.lockReg(dst_mcv.register); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ + .base = .{ .reg = try self.copyToTmpRegister(Type.usize, sign_mcv.address()) }, + }); - const tag = self.air.instructions.items(.tag)[inst]; - try self.genBinOpMir(switch (ty_bits) { - // No point using an extra prefix byte for *pd which performs the same operation. - 16, 32, 64, 128 => switch (tag) { - .neg => .{ ._ps, .xor }, - .fabs => .{ ._ps, .andn }, + if (self.hasFeature(.avx)) try self.asmRegisterRegisterMemory( + switch (scalar_bits) { + 16, 128 => if (abi_size <= 16 or self.hasFeature(.avx2)) switch (tag) { + .neg => .{ .vp_, .xor }, + .fabs => .{ .vp_, .@"and" }, + else => unreachable, + } else switch (tag) { + .neg => .{ .v_ps, .xor }, + .fabs => .{ .v_ps, .@"and" }, + else => unreachable, + }, + 32 => switch (tag) { + .neg => .{ .v_ps, .xor }, + .fabs => .{ .v_ps, .@"and" }, + else => unreachable, + }, + 64 => switch (tag) { + .neg => .{ .v_pd, .xor }, + .fabs => .{ .v_pd, .@"and" }, + else => unreachable, + }, + 80 => return self.fail("TODO implement airFloatSign for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), else => unreachable, }, - 80 => return self.fail("TODO implement airFloatSign for {}", .{ - ty.fmt(self.bin_file.options.module.?), - }), - else => unreachable, - }, vec_ty, dst_mcv, sign_mcv); + registerAlias(dst_reg, abi_size), + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv), abi_size), + sign_mem, + ) else try self.asmRegisterMemory( + switch (scalar_bits) { + 16, 128 => switch (tag) { + .neg => .{ .p_, .xor }, + .fabs => .{ .p_, .@"and" }, + else => unreachable, + }, + 32 => switch (tag) { + .neg => .{ ._ps, .xor }, + .fabs => .{ ._ps, .@"and" }, + else => unreachable, + }, + 64 => switch (tag) { + .neg => .{ ._pd, .xor }, + .fabs => .{ ._pd, .@"and" }, + else => unreachable, + }, + 80 => return self.fail("TODO implement airFloatSign for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), + else => unreachable, + }, + registerAlias(dst_reg, abi_size), + sign_mem, + ); return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); } @@ -8593,7 +8668,6 @@ const MoveStrategy = union(enum) { const InsertExtract = struct { insert: Mir.Inst.FixedTag, extract: Mir.Inst.FixedTag, - imm: Immediate, }; }; fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy { @@ -8603,17 +8677,15 @@ fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy { 16 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ .insert = .{ .vp_w, .insr }, .extract = .{ .vp_w, .extr }, - .imm = Immediate.u(0), } } else .{ .insert_extract = .{ .insert = .{ .p_w, .insr }, .extract = .{ .p_w, .extr }, - .imm = Immediate.u(0), } }, 32 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov } }, 64 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov } }, 128 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } - else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, else => {}, }, .Vector => switch (ty.childType().zigTypeTag()) { @@ -8622,101 +8694,120 @@ fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy { 1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{ .insert = .{ .vp_b, .insr }, .extract = .{ .vp_b, .extr }, - .imm = Immediate.u(0), } } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{ .insert = .{ .p_b, .insr }, .extract = .{ .p_b, .extr }, - .imm = Immediate.u(0), } }, 2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ .insert = .{ .vp_w, .insr }, .extract = .{ .vp_w, .extr }, - .imm = Immediate.u(0), } } else .{ .insert_extract = .{ .insert = .{ .p_w, .insr }, .extract = .{ .p_w, .extr }, - .imm = Immediate.u(0), } }, 3...4 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_ss, .mov } + .{ .v_d, .mov } else - .{ ._ss, .mov } }, + .{ ._d, .mov } }, 5...8 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_sd, .mov } + .{ .v_q, .mov } else - .{ ._sd, .mov } }, + .{ ._q, .mov } }, + 9...16 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 17...32 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, else => {}, }, 16 => switch (ty.vectorLen()) { 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ .insert = .{ .vp_w, .insr }, .extract = .{ .vp_w, .extr }, - .imm = Immediate.u(0), } } else .{ .insert_extract = .{ .insert = .{ .p_w, .insr }, .extract = .{ .p_w, .extr }, - .imm = Immediate.u(0), } }, 2 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_ss, .mov } + .{ .v_d, .mov } else - .{ ._ss, .mov } }, + .{ ._d, .mov } }, 3...4 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_sd, .mov } + .{ .v_q, .mov } else - .{ ._sd, .mov } }, + .{ ._q, .mov } }, 5...8 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_ps, .mov } - else - .{ ._ps, .mov } }, + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 9...16 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, else => {}, }, 32 => switch (ty.vectorLen()) { 1 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_ss, .mov } + .{ .v_d, .mov } else - .{ ._ss, .mov } }, + .{ ._d, .mov } }, 2 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_sd, .mov } + .{ .v_q, .mov } else - .{ ._sd, .mov } }, + .{ ._q, .mov } }, 3...4 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } - else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 5...8 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } }, + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, else => {}, }, 64 => switch (ty.vectorLen()) { 1 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_sd, .mov } + .{ .v_q, .mov } else - .{ ._sd, .mov } }, + .{ ._q, .mov } }, 2 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } - else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 3...4 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } }, + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, + else => {}, + }, + 128 => switch (ty.vectorLen()) { + 1 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 2 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, + else => {}, + }, + 256 => switch (ty.vectorLen()) { + 1 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, else => {}, }, else => {}, }, .Float => switch (ty.childType().floatBits(self.target.*)) { 16 => switch (ty.vectorLen()) { - 1 => {}, + 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ + .insert = .{ .vp_w, .insr }, + .extract = .{ .vp_w, .extr }, + } } else .{ .insert_extract = .{ + .insert = .{ .p_w, .insr }, + .extract = .{ .p_w, .extr }, + } }, 2 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_ss, .mov } + .{ .v_d, .mov } else - .{ ._ss, .mov } }, + .{ ._d, .mov } }, 3...4 => return .{ .move = if (self.hasFeature(.avx)) - .{ .v_sd, .mov } + .{ .v_q, .mov } else - .{ ._sd, .mov } }, + .{ ._q, .mov } }, 5...8 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } - else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 9...16 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } }, + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, else => {}, }, 32 => switch (ty.vectorLen()) { @@ -8741,18 +8832,18 @@ fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy { else .{ ._sd, .mov } }, 2 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } - else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, + if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu } + else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } }, 3...4 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } }, + return .{ .move = if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu } }, else => {}, }, 128 => switch (ty.vectorLen()) { 1 => return .{ .move = if (self.hasFeature(.avx)) - if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } - else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, 2 => if (self.hasFeature(.avx)) - return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } }, + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, else => {}, }, else => {}, @@ -8860,29 +8951,69 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr ); } }, - .register => |src_reg| if (dst_reg.id() != src_reg.id()) try self.asmRegisterRegister( - if ((dst_reg.class() == .sse) == (src_reg.class() == .sse)) - switch (ty.zigTypeTag()) { - else => .{ ._, .mov }, - .Float, .Vector => .{ ._ps, .mova }, - } - else switch (abi_size) { - 2 => return try self.asmRegisterRegisterImmediate( - if (dst_reg.class() == .sse) .{ .p_w, .insr } else .{ .p_w, .extr }, - registerAlias(dst_reg, 4), - registerAlias(src_reg, 4), - Immediate.u(0), + .register => |src_reg| if (dst_reg.id() != src_reg.id()) switch (dst_reg.class()) { + .general_purpose => switch (src_reg.class()) { + .general_purpose => try self.asmRegisterRegister( + .{ ._, .mov }, + registerAlias(dst_reg, abi_size), + registerAlias(src_reg, abi_size), ), - 4 => .{ ._d, .mov }, - 8 => .{ ._q, .mov }, - else => return self.fail( - "unsupported register copy from {s} to {s}", - .{ @tagName(src_reg), @tagName(dst_reg) }, + .segment => try self.asmRegisterRegister( + .{ ._, .mov }, + registerAlias(dst_reg, abi_size), + src_reg, ), + .sse => try self.asmRegisterRegister( + switch (abi_size) { + 1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov }, + 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov }, + else => unreachable, + }, + registerAlias(dst_reg, @max(abi_size, 4)), + src_reg.to128(), + ), + .x87, .mmx => unreachable, }, - registerAlias(dst_reg, abi_size), - registerAlias(src_reg, abi_size), - ), + .segment => try self.asmRegisterRegister( + .{ ._, .mov }, + dst_reg, + switch (src_reg.class()) { + .general_purpose, .segment => registerAlias(src_reg, abi_size), + .sse => try self.copyToTmpRegister(ty, src_mcv), + .x87, .mmx => unreachable, + }, + ), + .sse => switch (src_reg.class()) { + .general_purpose => try self.asmRegisterRegister( + switch (abi_size) { + 1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov }, + 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov }, + else => unreachable, + }, + dst_reg.to128(), + registerAlias(src_reg, @max(abi_size, 4)), + ), + .segment => try self.genSetReg( + dst_reg, + ty, + .{ .register = try self.copyToTmpRegister(ty, src_mcv) }, + ), + .sse => try self.asmRegisterRegister( + switch (ty.scalarType().zigTypeTag()) { + else => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa }, + .Float => switch (ty.floatBits(self.target.*)) { + else => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa }, + 32 => if (self.hasFeature(.avx)) .{ .v_ps, .mova } else .{ ._ps, .mova }, + 64 => if (self.hasFeature(.avx)) .{ .v_pd, .mova } else .{ ._pd, .mova }, + }, + }, + registerAlias(dst_reg, abi_size), + registerAlias(src_reg, abi_size), + ), + .x87, .mmx => unreachable, + }, + .x87, .mmx => unreachable, + }, .register_offset, .indirect, .load_frame, @@ -8918,14 +9049,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr ie.insert, dst_alias, src_mem, - ie.imm, + Immediate.u(0), ), .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate( ie.insert, dst_alias, dst_alias, src_mem, - ie.imm, + Immediate.u(0), ), } }, @@ -8947,14 +9078,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr ie.insert, dst_alias, src_mem, - ie.imm, + Immediate.u(0), ), .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate( ie.insert, dst_alias, dst_alias, src_mem, - ie.imm, + Immediate.u(0), ), } }, @@ -8994,14 +9125,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr ie.insert, dst_alias, src_mem, - ie.imm, + Immediate.u(0), ), .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate( ie.insert, dst_alias, dst_alias, src_mem, - ie.imm, + Immediate.u(0), ), } }, @@ -9129,7 +9260,7 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal ie.extract, dst_mem, src_alias, - ie.imm, + Immediate.u(0), ), } }, @@ -10499,7 +10630,7 @@ fn airUnionInit(self: *Self, inst: Air.Inst.Index) !void { if (self.reuseOperand(inst, extra.init, 0, src_mcv)) break :result src_mcv; const dst_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy(src_ty, dst_mcv, src_mcv); + try self.genCopy(union_ty, dst_mcv, src_mcv); break :result dst_mcv; } @@ -11000,7 +11131,15 @@ fn registerAlias(reg: Register, size_bytes: u32) Register { reg.to64() else unreachable, - .segment, .x87, .mmx => unreachable, + .segment => if (size_bytes <= 2) + reg + else + unreachable, + .x87 => unreachable, + .mmx => if (size_bytes <= 8) + reg + else + unreachable, .sse => if (size_bytes <= 16) reg.to128() else if (size_bytes <= 32) diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 66a249a3f2..4014947673 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -261,7 +261,8 @@ pub const Mnemonic = enum { // X87 fisttp, fld, // MMX - movd, + movd, movq, + pand, pandn, por, pxor, // SSE addps, addss, andps, @@ -293,7 +294,8 @@ pub const Mnemonic = enum { maxpd, maxsd, minpd, minsd, movapd, - movq, //movd, movsd, + movdqa, movdqu, + //movsd, movupd, mulpd, mulsd, orpd, @@ -316,6 +318,7 @@ pub const Mnemonic = enum { roundpd, roundps, roundsd, roundss, // AVX vaddpd, vaddps, vaddsd, vaddss, + vandnpd, vandnps, vandpd, vandps, vbroadcastf128, vbroadcastsd, vbroadcastss, vcvtdq2pd, vcvtdq2ps, vcvtpd2dq, vcvtpd2ps, vcvtps2dq, vcvtps2pd, vcvtsd2si, vcvtsd2ss, @@ -327,22 +330,31 @@ pub const Mnemonic = enum { vmaxpd, vmaxps, vmaxsd, vmaxss, vminpd, vminps, vminsd, vminss, vmovapd, vmovaps, - vmovddup, vmovhlps, vmovlhps, + vmovd, + vmovddup, + vmovdqa, vmovdqu, + vmovhlps, vmovlhps, + vmovq, vmovsd, vmovshdup, vmovsldup, vmovss, vmovupd, vmovups, vmulpd, vmulps, vmulsd, vmulss, + vorpd, vorps, + vpand, vpandn, vpextrb, vpextrd, vpextrq, vpextrw, vpinsrb, vpinsrd, vpinsrq, vpinsrw, + vpor, vpshufhw, vpshuflw, vpsrld, vpsrlq, vpsrlw, vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd, vpunpcklbw, vpunpckldq, vpunpcklqdq, vpunpcklwd, + vpxor, vroundpd, vroundps, vroundsd, vroundss, vshufpd, vshufps, vsqrtpd, vsqrtps, vsqrtsd, vsqrtss, vsubpd, vsubps, vsubsd, vsubss, + vxorpd, vxorps, // F16C vcvtph2ps, vcvtps2ph, // FMA diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index ef8bbe07b3..4d1f59e454 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -236,6 +236,14 @@ pub const Inst = struct { /// VEX-Encoded ___ v_, + /// VEX-Encoded ___ Byte + v_b, + /// VEX-Encoded ___ Word + v_w, + /// VEX-Encoded ___ Doubleword + v_d, + /// VEX-Encoded ___ QuadWord + v_q, /// VEX-Encoded Packed ___ vp_, /// VEX-Encoded Packed ___ Byte @@ -526,6 +534,10 @@ pub const Inst = struct { cvttps2dq, /// Convert with truncation scalar double-precision floating-point value to doubleword integer cvttsd2si, + /// Move aligned packed integer values + movdqa, + /// Move unaligned packed integer values + movdqu, /// Packed interleave shuffle of quadruplets of single-precision floating-point values /// Packed interleave shuffle of pairs of double-precision floating-point values shuf, diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 3383315bd6..3e57be61ea 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -970,11 +970,16 @@ pub const table = [_]Entry{ .{ .movapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .none, .sse2 }, .{ .movd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .none, .sse2 }, - .{ .movd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .none, .sse2 }, - .{ .movq, .rm, &.{ .xmm, .rm64 }, &.{ 0x66, 0x0f, 0x6e }, 0, .long, .sse2 }, + .{ .movd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .none, .sse2 }, .{ .movq, .mr, &.{ .rm64, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .long, .sse2 }, + .{ .movdqa, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6f }, 0, .none, .sse2 }, + .{ .movdqa, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x7f }, 0, .none, .sse2 }, + + .{ .movdqu, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .none, .sse2 }, + .{ .movdqu, .mr, &.{ .xmm_m128, .xmm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .none, .sse2 }, + .{ .movq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .none, .sse2 }, .{ .movq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .none, .sse2 }, @@ -987,10 +992,16 @@ pub const table = [_]Entry{ .{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .none, .sse2 }, + .{ .pand, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdb }, 0, .none, .sse2 }, + + .{ .pandn, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .none, .sse2 }, + .{ .pextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .none, .sse2 }, .{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 }, + .{ .por, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .none, .sse2 }, + .{ .pshufhw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf3, 0x0f, 0x70 }, 0, .none, .sse2 }, .{ .pshuflw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf2, 0x0f, 0x70 }, 0, .none, .sse2 }, @@ -1012,6 +1023,8 @@ pub const table = [_]Entry{ .{ .punpckldq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .none, .sse2 }, .{ .punpcklqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .none, .sse2 }, + .{ .pxor, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xef }, 0, .none, .sse2 }, + .{ .shufpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc6 }, 0, .none, .sse2 }, .{ .sqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .none, .sse2 }, @@ -1070,6 +1083,18 @@ pub const table = [_]Entry{ .{ .vaddss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx }, + .{ .vandnpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x55 }, 0, .vex_128_wig, .avx }, + .{ .vandnpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x55 }, 0, .vex_256_wig, .avx }, + + .{ .vandnps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x55 }, 0, .vex_128_wig, .avx }, + .{ .vandnps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x55 }, 0, .vex_256_wig, .avx }, + + .{ .vandpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x54 }, 0, .vex_128_wig, .avx }, + .{ .vandpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x54 }, 0, .vex_256_wig, .avx }, + + .{ .vandps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .vex_128_wig, .avx }, + .{ .vandps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x54 }, 0, .vex_256_wig, .avx }, + .{ .vbroadcastss, .rm, &.{ .xmm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx }, .{ .vbroadcastss, .rm, &.{ .ymm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx }, .{ .vbroadcastsd, .rm, &.{ .ymm, .m64 }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx }, @@ -1169,13 +1194,31 @@ pub const table = [_]Entry{ .{ .vmovaps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x28 }, 0, .vex_256_wig, .avx }, .{ .vmovaps, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x29 }, 0, .vex_256_wig, .avx }, + .{ .vmovd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .vex_128_w0, .avx }, + .{ .vmovq, .rm, &.{ .xmm, .rm64 }, &.{ 0x66, 0x0f, 0x6e }, 0, .vex_128_w1, .avx }, + .{ .vmovd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .vex_128_w0, .avx }, + .{ .vmovq, .mr, &.{ .rm64, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .vex_128_w1, .avx }, + .{ .vmovddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, .{ .vmovddup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_256_wig, .avx }, + .{ .vmovdqa, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6f }, 0, .vex_128_wig, .avx }, + .{ .vmovdqa, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x7f }, 0, .vex_128_wig, .avx }, + .{ .vmovdqa, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6f }, 0, .vex_256_wig, .avx }, + .{ .vmovdqa, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x7f }, 0, .vex_256_wig, .avx }, + + .{ .vmovdqu, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .vex_128_wig, .avx }, + .{ .vmovdqu, .mr, &.{ .xmm_m128, .xmm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .vex_128_wig, .avx }, + .{ .vmovdqu, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .vex_256_wig, .avx }, + .{ .vmovdqu, .mr, &.{ .ymm_m256, .ymm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .vex_256_wig, .avx }, + .{ .vmovhlps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, .{ .vmovlhps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .vex_128_wig, .avx }, + .{ .vmovq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .vex_128_wig, .avx }, + .{ .vmovq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .vex_128_wig, .avx }, + .{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, .{ .vmovsd, .rm, &.{ .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, .{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx }, @@ -1212,6 +1255,16 @@ pub const table = [_]Entry{ .{ .vmulss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .vex_lig_wig, .avx }, + .{ .vorpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .vex_128_wig, .avx }, + .{ .vorpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x56 }, 0, .vex_256_wig, .avx }, + + .{ .vorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .vex_128_wig, .avx }, + .{ .vorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x56 }, 0, .vex_256_wig, .avx }, + + .{ .vpand, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdb }, 0, .vex_128_wig, .avx }, + + .{ .vpandn, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_128_wig, .avx }, + .{ .vpextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .vex_128_w0, .avx }, .{ .vpextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w0, .avx }, .{ .vpextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w1, .avx }, @@ -1225,6 +1278,8 @@ pub const table = [_]Entry{ .{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx }, + .{ .vpor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_128_wig, .avx }, + .{ .vpsrlw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_128_wig, .avx }, .{ .vpsrlw, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_128_wig, .avx }, .{ .vpsrld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_128_wig, .avx }, @@ -1242,6 +1297,8 @@ pub const table = [_]Entry{ .{ .vpunpckldq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_128_wig, .avx }, .{ .vpunpcklqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_128_wig, .avx }, + .{ .vpxor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xef }, 0, .vex_128_wig, .avx }, + .{ .vroundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_128_wig, .avx }, .{ .vroundpd, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_256_wig, .avx }, @@ -1278,6 +1335,12 @@ pub const table = [_]Entry{ .{ .vsubss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .vex_lig_wig, .avx }, + .{ .vxorpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .vex_128_wig, .avx }, + .{ .vxorpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x57 }, 0, .vex_256_wig, .avx }, + + .{ .vxorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .vex_128_wig, .avx }, + .{ .vxorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x57 }, 0, .vex_256_wig, .avx }, + // F16C .{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c }, .{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c }, @@ -1313,6 +1376,12 @@ pub const table = [_]Entry{ .{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 }, .{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 }, + .{ .vpand, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdb }, 0, .vex_256_wig, .avx2 }, + + .{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 }, + + .{ .vpor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_256_wig, .avx2 }, + .{ .vpsrlw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_256_wig, .avx2 }, .{ .vpsrlw, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_256_wig, .avx2 }, .{ .vpsrld, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_256_wig, .avx2 }, @@ -1329,5 +1398,7 @@ pub const table = [_]Entry{ .{ .vpunpcklwd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x61 }, 0, .vex_256_wig, .avx2 }, .{ .vpunpckldq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_256_wig, .avx2 }, .{ .vpunpcklqdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_256_wig, .avx2 }, + + .{ .vpxor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xef }, 0, .vex_256_wig, .avx2 }, }; // zig fmt: on diff --git a/src/type.zig b/src/type.zig index 6122afda62..bcbb9e2ea2 100644 --- a/src/type.zig +++ b/src/type.zig @@ -5433,8 +5433,18 @@ pub const Type = extern union { } } + // Works for vectors and vectors of integers. + pub fn maxInt(ty: Type, arena: Allocator, target: Target) !Value { + const scalar = try maxIntScalar(ty.scalarType(), arena, target); + if (ty.zigTypeTag() == .Vector and scalar.tag() != .the_only_possible_value) { + return Value.Tag.repeated.create(arena, scalar); + } else { + return scalar; + } + } + /// Asserts that self.zigTypeTag() == .Int. - pub fn maxInt(self: Type, arena: Allocator, target: Target) !Value { + pub fn maxIntScalar(self: Type, arena: Allocator, target: Target) !Value { assert(self.zigTypeTag() == .Int); const info = self.intInfo(target); diff --git a/test/behavior/floatop.zig b/test/behavior/floatop.zig index 9d17b05865..a3fd5b69e8 100644 --- a/test/behavior/floatop.zig +++ b/test/behavior/floatop.zig @@ -532,7 +532,6 @@ fn testFabs() !void { test "@fabs with vectors" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; diff --git a/test/behavior/math.zig b/test/behavior/math.zig index 991521b62c..7a563c1727 100644 --- a/test/behavior/math.zig +++ b/test/behavior/math.zig @@ -1612,7 +1612,6 @@ test "absFloat" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest; diff --git a/test/behavior/translate_c_macros.zig b/test/behavior/translate_c_macros.zig index aa08e8c9aa..b3d1a688fe 100644 --- a/test/behavior/translate_c_macros.zig +++ b/test/behavior/translate_c_macros.zig @@ -65,7 +65,6 @@ test "cast negative integer to pointer" { test "casting to union with a macro" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO From 037bf1a580fe24b427e0ee5f7aecfec7202c1bf3 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sun, 14 May 2023 17:26:44 -0400 Subject: [PATCH 17/24] x86_64: enable integer vector registers --- src/arch/x86_64/CodeGen.zig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 80f537e046..51e86447dc 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2261,11 +2261,11 @@ fn allocRegOrMemAdvanced(self: *Self, ty: Type, inst: ?Air.Inst.Index, reg_ok: b }, .Vector => switch (ty.childType().zigTypeTag()) { .Float => switch (ty.childType().floatBits(self.target.*)) { - 16, 32, 64 => if (self.hasFeature(.avx)) 32 else 16, - 80, 128 => break :need_mem, + 16, 32, 64, 128 => if (self.hasFeature(.avx)) 32 else 16, + 80 => break :need_mem, else => unreachable, }, - else => break :need_mem, + else => if (self.hasFeature(.avx)) 32 else 16, }, else => 8, })) { From 37ccf35ff207b8866b3fc433dd57d7c7d6bac710 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sun, 14 May 2023 17:56:34 -0400 Subject: [PATCH 18/24] x86_64: fix struct_field_val crash --- src/arch/x86_64/CodeGen.zig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 51e86447dc..87e1f9e45b 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -5411,6 +5411,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { const field_ty = container_ty.structFieldType(index); if (!field_ty.hasRuntimeBitsIgnoreComptime()) break :result .none; const field_rc = regClassForType(field_ty); + const field_is_gp = field_rc.supersetOf(gp); const src_mcv = try self.resolveInst(operand); const field_off = switch (container_ty.containerLayout()) { @@ -5443,7 +5444,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { return self.fail("TODO implement struct_field_val with large packed field", .{}); } - const dst_reg = try self.register_manager.allocReg(inst, gp); + const dst_reg = try self.register_manager.allocReg(if (field_is_gp) inst else null, gp); const field_extra_bits = self.regExtraBits(field_ty); const load_abi_size = if (field_bit_off < field_extra_bits) field_abi_size else field_abi_size * 2; @@ -5494,7 +5495,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { if (field_extra_bits > 0) try self.truncateRegister(field_ty, dst_reg); const dst_mcv = MCValue{ .register = dst_reg }; - break :result if (field_rc.supersetOf(gp)) + break :result if (field_is_gp) dst_mcv else try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv); From 77a8cb57287e8d6f8430f1dedecda2bfb30506f1 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Sun, 14 May 2023 20:27:31 -0400 Subject: [PATCH 19/24] x86_64: fix `@clz` and `@ctz` of `u8` --- src/arch/x86_64/CodeGen.zig | 61 +++++++++++++++++++++++++++++++------ test/behavior/math.zig | 2 -- 2 files changed, 52 insertions(+), 11 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 87e1f9e45b..9d5f877e14 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -4226,9 +4226,18 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { const src_bits = src_ty.bitSize(self.target.*); if (self.hasFeature(.lzcnt)) { - if (src_bits <= 64) { + if (src_bits <= 8) { + const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv); + try self.truncateRegister(src_ty, wide_reg); + try self.genBinOpMir(.{ ._, .lzcnt }, Type.u32, dst_mcv, .{ .register = wide_reg }); + try self.genBinOpMir( + .{ ._, .sub }, + dst_ty, + dst_mcv, + .{ .immediate = 8 + self.regExtraBits(src_ty) }, + ); + } else if (src_bits <= 64) { try self.genBinOpMir(.{ ._, .lzcnt }, src_ty, dst_mcv, mat_src_mcv); - const extra_bits = self.regExtraBits(src_ty); if (extra_bits > 0) { try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .immediate = extra_bits }); @@ -4267,7 +4276,17 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { const imm_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits ^ (src_bits - 1), }); - try self.genBinOpMir(.{ ._, .bsr }, src_ty, dst_mcv, mat_src_mcv); + const imm_lock = self.register_manager.lockRegAssumeUnused(imm_reg); + defer self.register_manager.unlockReg(imm_lock); + + if (src_bits <= 8) { + const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv); + const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg); + defer self.register_manager.unlockReg(wide_lock); + + try self.truncateRegister(src_ty, wide_reg); + try self.genBinOpMir(.{ ._, .bsr }, Type.u16, dst_mcv, .{ .register = wide_reg }); + } else try self.genBinOpMir(.{ ._, .bsr }, src_ty, dst_mcv, mat_src_mcv); const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( @@ -4281,7 +4300,20 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { const imm_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - self.regBitSize(dst_ty)), }); - try self.genBinOpMir(.{ ._, .bsr }, src_ty, dst_mcv, mat_src_mcv); + const imm_lock = self.register_manager.lockRegAssumeUnused(imm_reg); + defer self.register_manager.unlockReg(imm_lock); + + const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv); + const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg); + defer self.register_manager.unlockReg(wide_lock); + + try self.truncateRegister(src_ty, wide_reg); + try self.genBinOpMir( + .{ ._, .bsr }, + if (src_bits <= 8) Type.u16 else src_ty, + dst_mcv, + .{ .register = wide_reg }, + ); const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( @@ -4323,24 +4355,25 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { if (self.hasFeature(.bmi)) { if (src_bits <= 64) { - const extra_bits = self.regExtraBits(src_ty); + const extra_bits = self.regExtraBits(src_ty) + @as(u64, if (src_bits <= 8) 8 else 0); + const wide_ty = if (src_bits <= 8) Type.u16 else src_ty; const masked_mcv = if (extra_bits > 0) masked: { const tmp_mcv = tmp: { if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0)) break :tmp src_mcv; - try self.genSetReg(dst_reg, src_ty, src_mcv); + try self.genSetReg(dst_reg, wide_ty, src_mcv); break :tmp dst_mcv; }; try self.genBinOpMir( .{ ._, .@"or" }, - src_ty, + wide_ty, tmp_mcv, .{ .immediate = (@as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - extra_bits)) << @intCast(u6, src_bits) }, ); break :masked tmp_mcv; } else mat_src_mcv; - try self.genBinOpMir(.{ ._, .tzcnt }, src_ty, dst_mcv, masked_mcv); + try self.genBinOpMir(.{ ._, .tzcnt }, wide_ty, dst_mcv, masked_mcv); } else if (src_bits <= 128) { const tmp_reg = try self.register_manager.allocReg(null, gp); const tmp_mcv = MCValue{ .register = tmp_reg }; @@ -4369,7 +4402,17 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { return self.fail("TODO airCtz of {}", .{src_ty.fmt(self.bin_file.options.module.?)}); const width_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits }); - try self.genBinOpMir(.{ ._, .bsf }, src_ty, dst_mcv, mat_src_mcv); + const width_lock = self.register_manager.lockRegAssumeUnused(width_reg); + defer self.register_manager.unlockReg(width_lock); + + if (src_bits <= 8 or !math.isPowerOfTwo(src_bits)) { + const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv); + const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg); + defer self.register_manager.unlockReg(wide_lock); + + try self.truncateRegister(src_ty, wide_reg); + try self.genBinOpMir(.{ ._, .bsf }, Type.u16, dst_mcv, .{ .register = wide_reg }); + } else try self.genBinOpMir(.{ ._, .bsf }, src_ty, dst_mcv, mat_src_mcv); const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); try self.asmCmovccRegisterRegister( diff --git a/test/behavior/math.zig b/test/behavior/math.zig index 7a563c1727..46f736bf74 100644 --- a/test/behavior/math.zig +++ b/test/behavior/math.zig @@ -61,7 +61,6 @@ fn assertFalse(b: bool) !void { } test "@clz" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO @@ -142,7 +141,6 @@ fn expectVectorsEqual(a: anytype, b: anytype) !void { } test "@ctz" { - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO From 42d9789f46e94e17d8ab8d02f356eaaa44fb2822 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 15 May 2023 00:06:36 -0400 Subject: [PATCH 20/24] x86_64: fix sysv vector argument passing --- src/arch/x86_64/abi.zig | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/src/arch/x86_64/abi.zig b/src/arch/x86_64/abi.zig index ff1a0ee520..e79424d6d8 100644 --- a/src/arch/x86_64/abi.zig +++ b/src/arch/x86_64/abi.zig @@ -165,34 +165,6 @@ pub fn classifySystemV(ty: Type, target: Target, ctx: Context) [8]Class { }, .Vector => { const elem_ty = ty.childType(); - if (ctx == .arg) { - const bit_size = ty.bitSize(target); - if (bit_size > 128) { - const has_avx512 = target.cpu.features.isEnabled(@enumToInt(std.Target.x86.Feature.avx512f)); - if (has_avx512 and bit_size <= 512) return .{ - .integer, .integer, .integer, .integer, - .integer, .integer, .integer, .integer, - }; - const has_avx = target.cpu.features.isEnabled(@enumToInt(std.Target.x86.Feature.avx)); - if (has_avx and bit_size <= 256) return .{ - .integer, .integer, .integer, .integer, - .none, .none, .none, .none, - }; - return memory_class; - } - if (bit_size > 80) return .{ - .integer, .integer, .none, .none, - .none, .none, .none, .none, - }; - if (bit_size > 64) return .{ - .x87, .none, .none, .none, - .none, .none, .none, .none, - }; - return .{ - .integer, .none, .none, .none, - .none, .none, .none, .none, - }; - } const bits = elem_ty.bitSize(target) * ty.arrayLen(); if (bits <= 64) return .{ .sse, .none, .none, .none, @@ -202,6 +174,7 @@ pub fn classifySystemV(ty: Type, target: Target, ctx: Context) [8]Class { .sse, .sseup, .none, .none, .none, .none, .none, .none, }; + if (ctx == .arg and !std.Target.x86.featureSetHas(target.cpu.features, .avx)) return memory_class; if (bits <= 192) return .{ .sse, .sseup, .sseup, .none, .none, .none, .none, .none, @@ -210,6 +183,7 @@ pub fn classifySystemV(ty: Type, target: Target, ctx: Context) [8]Class { .sse, .sseup, .sseup, .sseup, .none, .none, .none, .none, }; + if (ctx == .arg and !std.Target.x86.featureSetHas(target.cpu.features, .avx512f)) return memory_class; if (bits <= 320) return .{ .sse, .sseup, .sseup, .sseup, .sseup, .none, .none, .none, From bd771bec49fbb7845ad2635c0dd13aa971a81fee Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 15 May 2023 00:26:30 -0400 Subject: [PATCH 21/24] x86_64: implement integer vector add/sub --- src/arch/x86_64/CodeGen.zig | 80 ++++++++++++++++++++++++++++++++--- src/arch/x86_64/Encoding.zig | 6 ++- src/arch/x86_64/Mir.zig | 11 +++++ src/arch/x86_64/encodings.zig | 69 ++++++++++++++++++++++++++++++ 4 files changed, 160 insertions(+), 6 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 9d5f877e14..b791ec5ecc 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -6520,6 +6520,57 @@ fn genBinOp( }, .Vector => switch (lhs_ty.childType().zigTypeTag()) { else => null, + .Int => switch (lhs_ty.childType().intInfo(self.target.*).bits) { + 8 => switch (lhs_ty.vectorLen()) { + 1...16 => switch (air_tag) { + .add, + .addwrap, + => if (self.hasFeature(.avx)) .{ .vp_b, .add } else .{ .p_b, .add }, + .sub, + .subwrap, + => if (self.hasFeature(.avx)) .{ .vp_b, .sub } else .{ .p_b, .sub }, + else => null, + }, + else => null, + }, + 16 => switch (lhs_ty.vectorLen()) { + 1...8 => switch (air_tag) { + .add, + .addwrap, + => if (self.hasFeature(.avx)) .{ .vp_w, .add } else .{ .p_w, .add }, + .sub, + .subwrap, + => if (self.hasFeature(.avx)) .{ .vp_w, .sub } else .{ .p_w, .sub }, + else => null, + }, + else => null, + }, + 32 => switch (lhs_ty.vectorLen()) { + 1...4 => switch (air_tag) { + .add, + .addwrap, + => if (self.hasFeature(.avx)) .{ .vp_d, .add } else .{ .p_d, .add }, + .sub, + .subwrap, + => if (self.hasFeature(.avx)) .{ .vp_d, .sub } else .{ .p_d, .sub }, + else => null, + }, + else => null, + }, + 64 => switch (lhs_ty.vectorLen()) { + 1...2 => switch (air_tag) { + .add, + .addwrap, + => if (self.hasFeature(.avx)) .{ .vp_q, .add } else .{ .p_q, .add }, + .sub, + .subwrap, + => if (self.hasFeature(.avx)) .{ .vp_q, .sub } else .{ .p_q, .sub }, + else => null, + }, + else => null, + }, + else => null, + }, .Float => switch (lhs_ty.childType().floatBits(self.target.*)) { 16 => if (self.hasFeature(.f16c)) switch (lhs_ty.vectorLen()) { 1 => { @@ -6812,7 +6863,7 @@ fn genBinOp( ); } switch (air_tag) { - .add, .sub, .mul, .div_float, .div_exact => {}, + .add, .addwrap, .sub, .subwrap, .mul, .mulwrap, .div_float, .div_exact => {}, .div_trunc, .div_floor => try self.genRound( lhs_ty, dst_reg, @@ -9043,14 +9094,33 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr .{ .register = try self.copyToTmpRegister(ty, src_mcv) }, ), .sse => try self.asmRegisterRegister( - switch (ty.scalarType().zigTypeTag()) { - else => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa }, + if (@as(?Mir.Inst.FixedTag, switch (ty.scalarType().zigTypeTag()) { + else => switch (abi_size) { + 1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov }, + 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov }, + 9...16 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa }, + 17...32 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else null, + else => null, + }, .Float => switch (ty.floatBits(self.target.*)) { - else => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa }, + 16, 128 => switch (abi_size) { + 2...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov }, + 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov }, + 9...16 => if (self.hasFeature(.avx)) + .{ .v_, .movdqa } + else + .{ ._, .movdqa }, + 17...32 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else null, + else => null, + }, 32 => if (self.hasFeature(.avx)) .{ .v_ps, .mova } else .{ ._ps, .mova }, 64 => if (self.hasFeature(.avx)) .{ .v_pd, .mova } else .{ ._pd, .mova }, + 80 => null, + else => unreachable, }, - }, + })) |tag| tag else return self.fail("TODO implement genSetReg for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), registerAlias(dst_reg, abi_size), registerAlias(src_reg, abi_size), ), diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 4014947673..c8919d062d 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -262,7 +262,9 @@ pub const Mnemonic = enum { fisttp, fld, // MMX movd, movq, + paddb, paddd, paddq, paddsb, paddsw, paddusb, paddusw, paddw, pand, pandn, por, pxor, + psubb, psubd, psubq, psubsb, psubsw, psubusb, psubusw, psubw, // SSE addps, addss, andps, @@ -341,12 +343,14 @@ pub const Mnemonic = enum { vmovupd, vmovups, vmulpd, vmulps, vmulsd, vmulss, vorpd, vorps, + vpaddb, vpaddd, vpaddq, vpaddsb, vpaddsw, vpaddusb, vpaddusw, vpaddw, vpand, vpandn, vpextrb, vpextrd, vpextrq, vpextrw, vpinsrb, vpinsrd, vpinsrq, vpinsrw, vpor, vpshufhw, vpshuflw, vpsrld, vpsrlq, vpsrlw, + vpsubb, vpsubd, vpsubq, vpsubsb, vpsubsw, vpsubusb, vpsubusw, vpsubw, vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd, vpunpcklbw, vpunpckldq, vpunpcklqdq, vpunpcklwd, vpxor, @@ -746,7 +750,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op } const mnemonic_to_encodings_map = init: { - @setEvalBranchQuota(25_000); + @setEvalBranchQuota(30_000); const encodings = @import("encodings.zig"); var entries = encodings.table; std.sort.sort(encodings.Entry, &entries, {}, struct { diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 4d1f59e454..58eab29958 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -288,6 +288,7 @@ pub const Inst = struct { /// Add with carry adc, /// Add + /// Add packed integers /// Add packed single-precision floating-point values /// Add scalar single-precision floating-point values /// Add packed double-precision floating-point values @@ -420,6 +421,7 @@ pub const Inst = struct { /// Double precision shift right sh, /// Subtract + /// Subtract packed integers /// Subtract packed single-precision floating-point values /// Subtract scalar single-precision floating-point values /// Subtract packed double-precision floating-point values @@ -444,9 +446,18 @@ pub const Inst = struct { /// Bitwise logical xor of packed double-precision floating-point values xor, + /// Add packed signed integers with signed saturation + adds, + /// Add packed unsigned integers with unsigned saturation + addus, /// Bitwise logical and not of packed single-precision floating-point values /// Bitwise logical and not of packed double-precision floating-point values andn, + /// Subtract packed signed integers with signed saturation + subs, + /// Subtract packed unsigned integers with unsigned saturation + subus, + /// Convert packed doubleword integers to packed single-precision floating-point values /// Convert packed doubleword integers to packed double-precision floating-point values cvtpi2, diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 3e57be61ea..820fd715ba 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -992,6 +992,17 @@ pub const table = [_]Entry{ .{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .none, .sse2 }, + .{ .paddb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfc }, 0, .none, .sse2 }, + .{ .paddw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfd }, 0, .none, .sse2 }, + .{ .paddd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfe }, 0, .none, .sse2 }, + .{ .paddq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd4 }, 0, .none, .sse2 }, + + .{ .paddsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xec }, 0, .none, .sse2 }, + .{ .paddsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xed }, 0, .none, .sse2 }, + + .{ .paddusb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdc }, 0, .none, .sse2 }, + .{ .paddusw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdd }, 0, .none, .sse2 }, + .{ .pand, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdb }, 0, .none, .sse2 }, .{ .pandn, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .none, .sse2 }, @@ -1013,6 +1024,18 @@ pub const table = [_]Entry{ .{ .psrlq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .none, .sse2 }, .{ .psrlq, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .none, .sse2 }, + .{ .psubb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xf8 }, 0, .none, .sse2 }, + .{ .psubw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xf9 }, 0, .none, .sse2 }, + .{ .psubd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfa }, 0, .none, .sse2 }, + + .{ .psubsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe8 }, 0, .none, .sse2 }, + .{ .psubsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe9 }, 0, .none, .sse2 }, + + .{ .psubq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfb }, 0, .none, .sse2 }, + + .{ .psubusb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd8 }, 0, .none, .sse2 }, + .{ .psubusw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd9 }, 0, .none, .sse2 }, + .{ .punpckhbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .none, .sse2 }, .{ .punpckhwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .none, .sse2 }, .{ .punpckhdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .none, .sse2 }, @@ -1261,6 +1284,17 @@ pub const table = [_]Entry{ .{ .vorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .vex_128_wig, .avx }, .{ .vorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x56 }, 0, .vex_256_wig, .avx }, + .{ .vpaddb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfc }, 0, .vex_128_wig, .avx }, + .{ .vpaddw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfd }, 0, .vex_128_wig, .avx }, + .{ .vpaddd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfe }, 0, .vex_128_wig, .avx }, + .{ .vpaddq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd4 }, 0, .vex_128_wig, .avx }, + + .{ .vpaddsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xec }, 0, .vex_128_wig, .avx }, + .{ .vpaddsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xed }, 0, .vex_128_wig, .avx }, + + .{ .vpaddusb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdc }, 0, .vex_128_wig, .avx }, + .{ .vpaddusw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdd }, 0, .vex_128_wig, .avx }, + .{ .vpand, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdb }, 0, .vex_128_wig, .avx }, .{ .vpandn, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_128_wig, .avx }, @@ -1287,6 +1321,18 @@ pub const table = [_]Entry{ .{ .vpsrlq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_128_wig, .avx }, .{ .vpsrlq, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_128_wig, .avx }, + .{ .vpsubb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xf8 }, 0, .vex_128_wig, .avx }, + .{ .vpsubw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xf9 }, 0, .vex_128_wig, .avx }, + .{ .vpsubd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfa }, 0, .vex_128_wig, .avx }, + + .{ .vpsubsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe8 }, 0, .vex_128_wig, .avx }, + .{ .vpsubsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe9 }, 0, .vex_128_wig, .avx }, + + .{ .vpsubq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfb }, 0, .vex_128_wig, .avx }, + + .{ .vpsubusb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd8 }, 0, .vex_128_wig, .avx }, + .{ .vpsubusw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd9 }, 0, .vex_128_wig, .avx }, + .{ .vpunpckhbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_128_wig, .avx }, .{ .vpunpckhwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_128_wig, .avx }, .{ .vpunpckhdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_128_wig, .avx }, @@ -1376,6 +1422,17 @@ pub const table = [_]Entry{ .{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 }, .{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 }, + .{ .vpaddb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfc }, 0, .vex_256_wig, .avx2 }, + .{ .vpaddw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfd }, 0, .vex_256_wig, .avx2 }, + .{ .vpaddd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfe }, 0, .vex_256_wig, .avx2 }, + .{ .vpaddq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd4 }, 0, .vex_256_wig, .avx2 }, + + .{ .vpaddsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xec }, 0, .vex_256_wig, .avx2 }, + .{ .vpaddsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xed }, 0, .vex_256_wig, .avx2 }, + + .{ .vpaddusb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdc }, 0, .vex_256_wig, .avx2 }, + .{ .vpaddusw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdd }, 0, .vex_256_wig, .avx2 }, + .{ .vpand, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdb }, 0, .vex_256_wig, .avx2 }, .{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 }, @@ -1389,6 +1446,18 @@ pub const table = [_]Entry{ .{ .vpsrlq, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_256_wig, .avx2 }, .{ .vpsrlq, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_256_wig, .avx2 }, + .{ .vpsubb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xf8 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsubw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xf9 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsubd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfa }, 0, .vex_256_wig, .avx2 }, + + .{ .vpsubsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe8 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsubsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe9 }, 0, .vex_256_wig, .avx2 }, + + .{ .vpsubq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfb }, 0, .vex_256_wig, .avx2 }, + + .{ .vpsubusb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd8 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsubusw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd9 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpckhbw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_256_wig, .avx2 }, .{ .vpunpckhwd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_256_wig, .avx2 }, .{ .vpunpckhdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_256_wig, .avx2 }, From f39ff6cc68ab7a0d8ef349d4d930118890c19b01 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 15 May 2023 01:15:37 -0400 Subject: [PATCH 22/24] x86_64: implement integer vector mul --- src/arch/x86_64/CodeGen.zig | 59 +++++++++++++++++++++++++++++++++-- src/arch/x86_64/Encoding.zig | 3 ++ src/arch/x86_64/Mir.zig | 4 +++ src/arch/x86_64/encodings.zig | 24 ++++++++++++-- test/behavior/vector.zig | 3 +- 5 files changed, 87 insertions(+), 6 deletions(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index b791ec5ecc..c5af53b2cf 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2800,8 +2800,10 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void { const result = result: { const tag = self.air.instructions.items(.tag)[inst]; const dst_ty = self.air.typeOfIndex(inst); - if (dst_ty.zigTypeTag() == .Float) - break :result try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs); + switch (dst_ty.zigTypeTag()) { + .Float, .Vector => break :result try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs), + else => {}, + } const dst_info = dst_ty.intInfo(self.target.*); var src_pl = Type.Payload.Bits{ .base = .{ .tag = switch (dst_info.signedness) { @@ -6531,6 +6533,15 @@ fn genBinOp( => if (self.hasFeature(.avx)) .{ .vp_b, .sub } else .{ .p_b, .sub }, else => null, }, + 17...32 => switch (air_tag) { + .add, + .addwrap, + => if (self.hasFeature(.avx2)) .{ .vp_b, .add } else null, + .sub, + .subwrap, + => if (self.hasFeature(.avx2)) .{ .vp_b, .sub } else null, + else => null, + }, else => null, }, 16 => switch (lhs_ty.vectorLen()) { @@ -6541,6 +6552,21 @@ fn genBinOp( .sub, .subwrap, => if (self.hasFeature(.avx)) .{ .vp_w, .sub } else .{ .p_w, .sub }, + .mul, + .mulwrap, + => if (self.hasFeature(.avx)) .{ .vp_w, .mull } else .{ .p_d, .mull }, + else => null, + }, + 9...16 => switch (air_tag) { + .add, + .addwrap, + => if (self.hasFeature(.avx2)) .{ .vp_w, .add } else null, + .sub, + .subwrap, + => if (self.hasFeature(.avx2)) .{ .vp_w, .sub } else null, + .mul, + .mulwrap, + => if (self.hasFeature(.avx2)) .{ .vp_w, .mull } else null, else => null, }, else => null, @@ -6553,6 +6579,26 @@ fn genBinOp( .sub, .subwrap, => if (self.hasFeature(.avx)) .{ .vp_d, .sub } else .{ .p_d, .sub }, + .mul, + .mulwrap, + => if (self.hasFeature(.avx)) + .{ .vp_d, .mull } + else if (self.hasFeature(.sse4_1)) + .{ .p_d, .mull } + else + null, + else => null, + }, + 5...8 => switch (air_tag) { + .add, + .addwrap, + => if (self.hasFeature(.avx2)) .{ .vp_d, .add } else null, + .sub, + .subwrap, + => if (self.hasFeature(.avx2)) .{ .vp_d, .sub } else null, + .mul, + .mulwrap, + => if (self.hasFeature(.avx2)) .{ .vp_d, .mull } else null, else => null, }, else => null, @@ -6567,6 +6613,15 @@ fn genBinOp( => if (self.hasFeature(.avx)) .{ .vp_q, .sub } else .{ .p_q, .sub }, else => null, }, + 3...4 => switch (air_tag) { + .add, + .addwrap, + => if (self.hasFeature(.avx2)) .{ .vp_q, .add } else null, + .sub, + .subwrap, + => if (self.hasFeature(.avx2)) .{ .vp_q, .sub } else null, + else => null, + }, else => null, }, else => null, diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index c8919d062d..7b029cdb4f 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -264,6 +264,7 @@ pub const Mnemonic = enum { movd, movq, paddb, paddd, paddq, paddsb, paddsw, paddusb, paddusw, paddw, pand, pandn, por, pxor, + pmulhw, pmullw, psubb, psubd, psubq, psubsb, psubsw, psubusb, psubusw, psubw, // SSE addps, addss, @@ -317,6 +318,7 @@ pub const Mnemonic = enum { insertps, pextrb, pextrd, pextrq, pinsrb, pinsrd, pinsrq, + pmulld, roundpd, roundps, roundsd, roundss, // AVX vaddpd, vaddps, vaddsd, vaddss, @@ -347,6 +349,7 @@ pub const Mnemonic = enum { vpand, vpandn, vpextrb, vpextrd, vpextrq, vpextrw, vpinsrb, vpinsrd, vpinsrq, vpinsrw, + vpmulhw, vpmulld, vpmullw, vpor, vpshufhw, vpshuflw, vpsrld, vpsrlq, vpsrlw, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 58eab29958..a18792e6aa 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -453,6 +453,10 @@ pub const Inst = struct { /// Bitwise logical and not of packed single-precision floating-point values /// Bitwise logical and not of packed double-precision floating-point values andn, + /// Multiply packed signed integers and store low result + mull, + /// Multiply packed signed integers and store high result + mulh, /// Subtract packed signed integers with signed saturation subs, /// Subtract packed unsigned integers with unsigned saturation diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 820fd715ba..86a79596cd 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -1011,6 +1011,10 @@ pub const table = [_]Entry{ .{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 }, + .{ .pmulhw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .none, .sse2 }, + + .{ .pmullw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .none, .sse2 }, + .{ .por, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .none, .sse2 }, .{ .pshufhw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf3, 0x0f, 0x70 }, 0, .none, .sse2 }, @@ -1087,6 +1091,8 @@ pub const table = [_]Entry{ .{ .pinsrd, .rmi, &.{ .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .none, .sse4_1 }, .{ .pinsrq, .rmi, &.{ .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .long, .sse4_1 }, + .{ .pmulld, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .none, .sse4_1 }, + .{ .roundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .none, .sse4_1 }, .{ .roundps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .none, .sse4_1 }, @@ -1312,6 +1318,12 @@ pub const table = [_]Entry{ .{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx }, + .{ .vpmulhw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_128_wig, .avx }, + + .{ .vpmulld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_128_wig, .avx }, + + .{ .vpmullw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .vex_128_wig, .avx }, + .{ .vpor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_128_wig, .avx }, .{ .vpsrlw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_128_wig, .avx }, @@ -1418,9 +1430,9 @@ pub const table = [_]Entry{ .{ .vfmadd231ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_lig_w0, .fma }, // AVX2 - .{ .vbroadcastss, .rm, &.{ .xmm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx2 }, - .{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 }, - .{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 }, + .{ .vbroadcastss, .rm, &.{ .xmm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx2 }, + .{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 }, + .{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 }, .{ .vpaddb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfc }, 0, .vex_256_wig, .avx2 }, .{ .vpaddw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfd }, 0, .vex_256_wig, .avx2 }, @@ -1437,6 +1449,12 @@ pub const table = [_]Entry{ .{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 }, + .{ .vpmulhw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_256_wig, .avx }, + + .{ .vpmulld, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_256_wig, .avx }, + + .{ .vpmullw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .vex_256_wig, .avx }, + .{ .vpor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_256_wig, .avx2 }, .{ .vpsrlw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_256_wig, .avx2 }, diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index 87ccdfb567..5d217a5ce0 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -26,7 +26,8 @@ test "implicit cast vector to array - bool" { test "vector wrap operators" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_x86_64 and + !comptime std.Target.x86.featureSetHas(builtin.cpu.features, .sse4_1)) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO From 40457a3696da015fe1396d6c84191b83731910db Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 15 May 2023 01:44:26 -0400 Subject: [PATCH 23/24] x86_64: implement integer vector bitwise operations --- src/arch/x86_64/CodeGen.zig | 25 +++++++++++++++++++++++++ test/behavior/vector.zig | 1 - 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index c5af53b2cf..ed2c596f8f 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -6531,6 +6531,9 @@ fn genBinOp( .sub, .subwrap, => if (self.hasFeature(.avx)) .{ .vp_b, .sub } else .{ .p_b, .sub }, + .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" }, + .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, + .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, else => null, }, 17...32 => switch (air_tag) { @@ -6540,6 +6543,9 @@ fn genBinOp( .sub, .subwrap, => if (self.hasFeature(.avx2)) .{ .vp_b, .sub } else null, + .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null, + .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null, + .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null, else => null, }, else => null, @@ -6555,6 +6561,9 @@ fn genBinOp( .mul, .mulwrap, => if (self.hasFeature(.avx)) .{ .vp_w, .mull } else .{ .p_d, .mull }, + .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" }, + .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, + .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, else => null, }, 9...16 => switch (air_tag) { @@ -6567,6 +6576,9 @@ fn genBinOp( .mul, .mulwrap, => if (self.hasFeature(.avx2)) .{ .vp_w, .mull } else null, + .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null, + .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null, + .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null, else => null, }, else => null, @@ -6587,6 +6599,9 @@ fn genBinOp( .{ .p_d, .mull } else null, + .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" }, + .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, + .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, else => null, }, 5...8 => switch (air_tag) { @@ -6599,6 +6614,9 @@ fn genBinOp( .mul, .mulwrap, => if (self.hasFeature(.avx2)) .{ .vp_d, .mull } else null, + .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null, + .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null, + .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null, else => null, }, else => null, @@ -6611,6 +6629,9 @@ fn genBinOp( .sub, .subwrap, => if (self.hasFeature(.avx)) .{ .vp_q, .sub } else .{ .p_q, .sub }, + .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" }, + .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, + .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, else => null, }, 3...4 => switch (air_tag) { @@ -6620,6 +6641,9 @@ fn genBinOp( .sub, .subwrap, => if (self.hasFeature(.avx2)) .{ .vp_q, .sub } else null, + .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null, + .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null, + .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null, else => null, }, else => null, @@ -6929,6 +6953,7 @@ fn genBinOp( else => unreachable, }, ), + .bit_and, .bit_or, .xor => {}, .max, .min => {}, // TODO: unordered select else => unreachable, } diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index 5d217a5ce0..05c9517c20 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -120,7 +120,6 @@ test "vector float operators" { test "vector bit operators" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO From cea9ac772a518ff249d47fc2cb7b2776c786ac07 Mon Sep 17 00:00:00 2001 From: Jacob Young Date: Mon, 15 May 2023 02:55:41 -0400 Subject: [PATCH 24/24] x86_64: implement integer vector min/max --- src/arch/x86_64/CodeGen.zig | 100 ++++++++++++++++++++++++++++++++++ src/arch/x86_64/Encoding.zig | 4 ++ src/arch/x86_64/Mir.zig | 8 +++ src/arch/x86_64/encodings.zig | 58 ++++++++++++++++++++ 4 files changed, 170 insertions(+) diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index ed2c596f8f..2cd5721258 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -6534,6 +6534,34 @@ fn genBinOp( .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" }, .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, + .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) { + .signed => if (self.hasFeature(.avx)) + .{ .vp_b, .mins } + else if (self.hasFeature(.sse4_1)) + .{ .p_b, .mins } + else + null, + .unsigned => if (self.hasFeature(.avx)) + .{ .vp_b, .minu } + else if (self.hasFeature(.sse4_1)) + .{ .p_b, .minu } + else + null, + }, + .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) { + .signed => if (self.hasFeature(.avx)) + .{ .vp_b, .maxs } + else if (self.hasFeature(.sse4_1)) + .{ .p_b, .maxs } + else + null, + .unsigned => if (self.hasFeature(.avx)) + .{ .vp_b, .maxu } + else if (self.hasFeature(.sse4_1)) + .{ .p_b, .maxu } + else + null, + }, else => null, }, 17...32 => switch (air_tag) { @@ -6546,6 +6574,14 @@ fn genBinOp( .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null, .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null, .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null, + .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) { + .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .mins } else null, + .unsigned => if (self.hasFeature(.avx)) .{ .vp_b, .minu } else null, + }, + .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) { + .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .maxs } else null, + .unsigned => if (self.hasFeature(.avx2)) .{ .vp_b, .maxu } else null, + }, else => null, }, else => null, @@ -6564,6 +6600,26 @@ fn genBinOp( .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" }, .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, + .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) { + .signed => if (self.hasFeature(.avx)) + .{ .vp_w, .mins } + else + .{ .p_w, .mins }, + .unsigned => if (self.hasFeature(.avx)) + .{ .vp_w, .minu } + else + .{ .p_w, .minu }, + }, + .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) { + .signed => if (self.hasFeature(.avx)) + .{ .vp_w, .maxs } + else + .{ .p_w, .maxs }, + .unsigned => if (self.hasFeature(.avx)) + .{ .vp_w, .maxu } + else + .{ .p_w, .maxu }, + }, else => null, }, 9...16 => switch (air_tag) { @@ -6579,6 +6635,14 @@ fn genBinOp( .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null, .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null, .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null, + .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) { + .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .mins } else null, + .unsigned => if (self.hasFeature(.avx)) .{ .vp_w, .minu } else null, + }, + .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) { + .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .maxs } else null, + .unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .maxu } else null, + }, else => null, }, else => null, @@ -6602,6 +6666,34 @@ fn genBinOp( .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" }, .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, + .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) { + .signed => if (self.hasFeature(.avx)) + .{ .vp_d, .mins } + else if (self.hasFeature(.sse4_1)) + .{ .p_d, .mins } + else + null, + .unsigned => if (self.hasFeature(.avx)) + .{ .vp_d, .minu } + else if (self.hasFeature(.sse4_1)) + .{ .p_d, .minu } + else + null, + }, + .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) { + .signed => if (self.hasFeature(.avx)) + .{ .vp_d, .maxs } + else if (self.hasFeature(.sse4_1)) + .{ .p_d, .maxs } + else + null, + .unsigned => if (self.hasFeature(.avx)) + .{ .vp_d, .maxu } + else if (self.hasFeature(.sse4_1)) + .{ .p_d, .maxu } + else + null, + }, else => null, }, 5...8 => switch (air_tag) { @@ -6617,6 +6709,14 @@ fn genBinOp( .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null, .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null, .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null, + .min => switch (lhs_ty.childType().intInfo(self.target.*).signedness) { + .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .mins } else null, + .unsigned => if (self.hasFeature(.avx)) .{ .vp_d, .minu } else null, + }, + .max => switch (lhs_ty.childType().intInfo(self.target.*).signedness) { + .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .maxs } else null, + .unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .maxu } else null, + }, else => null, }, else => null, diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 7b029cdb4f..52d010880e 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -280,6 +280,7 @@ pub const Mnemonic = enum { mulps, mulss, orps, pextrw, pinsrw, + pmaxsw, pmaxub, pminsw, pminub, shufps, sqrtps, sqrtss, subps, subss, @@ -318,6 +319,7 @@ pub const Mnemonic = enum { insertps, pextrb, pextrd, pextrq, pinsrb, pinsrd, pinsrq, + pmaxsb, pmaxsd, pmaxud, pmaxuw, pminsb, pminsd, pminud, pminuw, pmulld, roundpd, roundps, roundsd, roundss, // AVX @@ -349,6 +351,8 @@ pub const Mnemonic = enum { vpand, vpandn, vpextrb, vpextrd, vpextrq, vpextrw, vpinsrb, vpinsrd, vpinsrq, vpinsrw, + vpmaxsb, vpmaxsd, vpmaxsw, vpmaxub, vpmaxud, vpmaxuw, + vpminsb, vpminsd, vpminsw, vpminub, vpminud, vpminuw, vpmulhw, vpmulld, vpmullw, vpor, vpshufhw, vpshuflw, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index a18792e6aa..4483de858e 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -453,6 +453,14 @@ pub const Inst = struct { /// Bitwise logical and not of packed single-precision floating-point values /// Bitwise logical and not of packed double-precision floating-point values andn, + /// Maximum of packed signed integers + maxs, + /// Maximum of packed unsigned integers + maxu, + /// Minimum of packed signed integers + mins, + /// Minimum of packed unsigned integers + minu, /// Multiply packed signed integers and store low result mull, /// Multiply packed signed integers and store high result diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 86a79596cd..c326f4230a 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -1011,6 +1011,14 @@ pub const table = [_]Entry{ .{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 }, + .{ .pmaxsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xee }, 0, .none, .sse2 }, + + .{ .pmaxub, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xde }, 0, .none, .sse2 }, + + .{ .pminsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xea }, 0, .none, .sse2 }, + + .{ .pminub, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xda }, 0, .none, .sse2 }, + .{ .pmulhw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .none, .sse2 }, .{ .pmullw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .none, .sse2 }, @@ -1091,6 +1099,20 @@ pub const table = [_]Entry{ .{ .pinsrd, .rmi, &.{ .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .none, .sse4_1 }, .{ .pinsrq, .rmi, &.{ .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .long, .sse4_1 }, + .{ .pmaxsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .none, .sse4_1 }, + .{ .pmaxsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .none, .sse4_1 }, + + .{ .pmaxuw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .none, .sse4_1 }, + + .{ .pmaxud, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .none, .sse4_1 }, + + .{ .pminsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .none, .sse4_1 }, + .{ .pminsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .none, .sse4_1 }, + + .{ .pminuw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .none, .sse4_1 }, + + .{ .pminud, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .none, .sse4_1 }, + .{ .pmulld, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .none, .sse4_1 }, .{ .roundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .none, .sse4_1 }, @@ -1318,6 +1340,24 @@ pub const table = [_]Entry{ .{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx }, + .{ .vpmaxsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_128_wig, .avx }, + .{ .vpmaxsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_128_wig, .avx }, + .{ .vpmaxsd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_128_wig, .avx }, + + .{ .vpmaxub, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xde }, 0, .vex_128_wig, .avx }, + .{ .vpmaxuw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .vex_128_wig, .avx }, + + .{ .vpmaxud, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .vex_128_wig, .avx }, + + .{ .vpminsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .vex_128_wig, .avx }, + .{ .vpminsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xea }, 0, .vex_128_wig, .avx }, + .{ .vpminsd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .vex_128_wig, .avx }, + + .{ .vpminub, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xda }, 0, .vex_128_wig, .avx }, + .{ .vpminuw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .vex_128_wig, .avx }, + + .{ .vpminud, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .vex_128_wig, .avx }, + .{ .vpmulhw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_128_wig, .avx }, .{ .vpmulld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_128_wig, .avx }, @@ -1449,6 +1489,24 @@ pub const table = [_]Entry{ .{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 }, + .{ .vpmaxsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_256_wig, .avx }, + .{ .vpmaxsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_256_wig, .avx }, + .{ .vpmaxsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_256_wig, .avx }, + + .{ .vpmaxub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xde }, 0, .vex_256_wig, .avx }, + .{ .vpmaxuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .vex_256_wig, .avx }, + + .{ .vpmaxud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .vex_256_wig, .avx }, + + .{ .vpminsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .vex_256_wig, .avx }, + .{ .vpminsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xea }, 0, .vex_256_wig, .avx }, + .{ .vpminsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .vex_256_wig, .avx }, + + .{ .vpminub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xda }, 0, .vex_256_wig, .avx }, + .{ .vpminuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .vex_256_wig, .avx }, + + .{ .vpminud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .vex_256_wig, .avx }, + .{ .vpmulhw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_256_wig, .avx }, .{ .vpmulld, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_256_wig, .avx },