x86_64: fix RoundMode immediate encoding for vroundss/vcvtps2ph

The RoundMode packed struct had Direction as enum(u4) occupying bits 3:0,
which pushed the precision exception suppress field to bit 4. Per Intel
SDM, the ROUNDSS/VROUNDSS/VCVTPS2PH immediate layout is:

  bits 1:0 = rounding mode
  bit 2    = rounding source (MXCSR.RC vs immediate)
  bit 3    = precision exception suppress
  bits 7:4 = reserved (must be 0)

The old encoding emitted e.g. vroundss $0x12 for ceil-suppress (bit 4
set, reserved), which CPUs silently ignore but valgrind 3.26.0 correctly
rejects with SIGILL. Fix by changing Direction to enum(u3) so precision
lands at bit 3, producing the correct $0x0a encoding.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-15 20:41:19 +00:00
parent d2c301cc95
commit ef619fdec8

View File

@@ -178,25 +178,27 @@ pub const Condition = enum(u5) {
}
};
/// The immediate operand of vcvtps2ph.
pub const RoundMode = packed struct(u5) {
/// The immediate operand of vroundss/vroundps/vcvtps2ph.
/// Intel SDM layout: bits 1:0 = rounding mode, bit 2 = use MXCSR.RC,
/// bit 3 = precision exception suppress.
pub const RoundMode = packed struct(u4) {
direction: Direction = .mxcsr,
precision: enum(u1) {
normal = 0b0,
inexact = 0b1,
} = .normal,
pub const Direction = enum(u4) {
pub const Direction = enum(u3) {
/// Round to nearest (even)
nearest = 0b0_00,
nearest = 0b000,
/// Round down (toward -∞)
down = 0b0_01,
down = 0b001,
/// Round up (toward +∞)
up = 0b0_10,
up = 0b010,
/// Round toward zero (truncate)
zero = 0b0_11,
zero = 0b011,
/// Use current rounding mode of MXCSR.RC
mxcsr = 0b1_00,
mxcsr = 0b100,
};
pub fn imm(mode: RoundMode) Immediate {