blob 944fe854 (18916B) - Raw
1 const Encoding = @This(); 2 3 const std = @import("std"); 4 const assert = std.debug.assert; 5 const math = std.math; 6 7 const bits = @import("bits.zig"); 8 const encoder = @import("encoder.zig"); 9 const Instruction = encoder.Instruction; 10 const Operand = Instruction.Operand; 11 const Prefix = Instruction.Prefix; 12 const Register = bits.Register; 13 const Rex = encoder.Rex; 14 const LegacyPrefixes = encoder.LegacyPrefixes; 15 16 mnemonic: Mnemonic, 17 data: Data, 18 19 const Data = struct { 20 op_en: OpEn, 21 ops: [4]Op, 22 opc_len: u3, 23 opc: [7]u8, 24 modrm_ext: u3, 25 mode: Mode, 26 }; 27 28 pub fn findByMnemonic( 29 prefix: Instruction.Prefix, 30 mnemonic: Mnemonic, 31 ops: []const Instruction.Operand, 32 ) !?Encoding { 33 var input_ops = [1]Op{.none} ** 4; 34 for (input_ops[0..ops.len], ops) |*input_op, op| input_op.* = Op.fromOperand(op); 35 36 const rex_required = for (ops) |op| switch (op) { 37 .reg => |r| switch (r) { 38 .spl, .bpl, .sil, .dil => break true, 39 else => {}, 40 }, 41 else => {}, 42 } else false; 43 const rex_invalid = for (ops) |op| switch (op) { 44 .reg => |r| switch (r) { 45 .ah, .bh, .ch, .dh => break true, 46 else => {}, 47 }, 48 else => {}, 49 } else false; 50 const rex_extended = for (ops) |op| { 51 if (op.isBaseExtended() or op.isIndexExtended()) break true; 52 } else false; 53 54 if ((rex_required or rex_extended) and rex_invalid) return error.CannotEncode; 55 56 var shortest_enc: ?Encoding = null; 57 var shortest_len: ?usize = null; 58 next: for (mnemonic_to_encodings_map[@enumToInt(mnemonic)]) |data| { 59 switch (data.mode) { 60 .rex => if (!rex_required) continue, 61 .long, .sse_long, .sse2_long => {}, 62 else => if (rex_required) continue, 63 } 64 for (input_ops, data.ops) |input_op, data_op| 65 if (!input_op.isSubset(data_op)) continue :next; 66 67 const enc = Encoding{ .mnemonic = mnemonic, .data = data }; 68 if (shortest_enc) |previous_shortest_enc| { 69 const len = estimateInstructionLength(prefix, enc, ops); 70 const previous_shortest_len = shortest_len orelse 71 estimateInstructionLength(prefix, previous_shortest_enc, ops); 72 if (len < previous_shortest_len) { 73 shortest_enc = enc; 74 shortest_len = len; 75 } else shortest_len = previous_shortest_len; 76 } else shortest_enc = enc; 77 } 78 return shortest_enc; 79 } 80 81 /// Returns first matching encoding by opcode. 82 pub fn findByOpcode(opc: []const u8, prefixes: struct { 83 legacy: LegacyPrefixes, 84 rex: Rex, 85 }, modrm_ext: ?u3) ?Encoding { 86 for (mnemonic_to_encodings_map, 0..) |encs, mnemonic_int| for (encs) |data| { 87 const enc = Encoding{ .mnemonic = @intToEnum(Mnemonic, mnemonic_int), .data = data }; 88 if (modrm_ext) |ext| if (ext != data.modrm_ext) continue; 89 if (!std.mem.eql(u8, opc, enc.opcode())) continue; 90 if (prefixes.rex.w) { 91 switch (data.mode) { 92 .short, .fpu, .sse, .sse2, .sse4_1, .none => continue, 93 .long, .sse_long, .sse2_long, .rex => {}, 94 } 95 } else if (prefixes.rex.present and !prefixes.rex.isSet()) { 96 switch (data.mode) { 97 .rex => {}, 98 else => continue, 99 } 100 } else if (prefixes.legacy.prefix_66) { 101 switch (enc.operandBitSize()) { 102 16 => {}, 103 else => continue, 104 } 105 } else { 106 switch (data.mode) { 107 .none => switch (enc.operandBitSize()) { 108 16 => continue, 109 else => {}, 110 }, 111 else => continue, 112 } 113 } 114 return enc; 115 }; 116 return null; 117 } 118 119 pub fn opcode(encoding: *const Encoding) []const u8 { 120 return encoding.data.opc[0..encoding.data.opc_len]; 121 } 122 123 pub fn mandatoryPrefix(encoding: *const Encoding) ?u8 { 124 const prefix = encoding.data.opc[0]; 125 return switch (prefix) { 126 0x66, 0xf2, 0xf3 => prefix, 127 else => null, 128 }; 129 } 130 131 pub fn modRmExt(encoding: Encoding) u3 { 132 return switch (encoding.data.op_en) { 133 .m, .mi, .m1, .mc => encoding.data.modrm_ext, 134 else => unreachable, 135 }; 136 } 137 138 pub fn operandBitSize(encoding: Encoding) u64 { 139 switch (encoding.data.mode) { 140 .short => return 16, 141 .long, .sse_long, .sse2_long => return 64, 142 else => {}, 143 } 144 const bit_size: u64 = switch (encoding.data.op_en) { 145 .np => switch (encoding.data.ops[0]) { 146 .o16 => 16, 147 .o32 => 32, 148 .o64 => 64, 149 else => 32, 150 }, 151 .td => encoding.data.ops[1].bitSize(), 152 else => encoding.data.ops[0].bitSize(), 153 }; 154 return bit_size; 155 } 156 157 pub fn format( 158 encoding: Encoding, 159 comptime fmt: []const u8, 160 options: std.fmt.FormatOptions, 161 writer: anytype, 162 ) !void { 163 _ = options; 164 _ = fmt; 165 switch (encoding.data.mode) { 166 .long, .sse_long, .sse2_long => try writer.writeAll("REX.W + "), 167 else => {}, 168 } 169 170 for (encoding.opcode()) |byte| { 171 try writer.print("{x:0>2} ", .{byte}); 172 } 173 174 switch (encoding.data.op_en) { 175 .np, .fd, .td, .i, .zi, .d => {}, 176 .o, .oi => { 177 const tag = switch (encoding.data.ops[0]) { 178 .r8 => "rb", 179 .r16 => "rw", 180 .r32 => "rd", 181 .r64 => "rd", 182 else => unreachable, 183 }; 184 try writer.print("+{s} ", .{tag}); 185 }, 186 .m, .mi, .m1, .mc => try writer.print("/{d} ", .{encoding.modRmExt()}), 187 .mr, .rm, .rmi, .mri, .mrc => try writer.writeAll("/r "), 188 } 189 190 switch (encoding.data.op_en) { 191 .i, .d, .zi, .oi, .mi, .rmi, .mri => { 192 const op = switch (encoding.data.op_en) { 193 .i, .d => encoding.data.ops[0], 194 .zi, .oi, .mi => encoding.data.ops[1], 195 .rmi, .mri => encoding.data.ops[2], 196 else => unreachable, 197 }; 198 const tag = switch (op) { 199 .imm8, .imm8s => "ib", 200 .imm16, .imm16s => "iw", 201 .imm32, .imm32s => "id", 202 .imm64 => "io", 203 .rel8 => "cb", 204 .rel16 => "cw", 205 .rel32 => "cd", 206 else => unreachable, 207 }; 208 try writer.print("{s} ", .{tag}); 209 }, 210 .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc => {}, 211 } 212 213 try writer.print("{s} ", .{@tagName(encoding.mnemonic)}); 214 215 for (encoding.data.ops) |op| switch (op) { 216 .none, .o16, .o32, .o64 => break, 217 else => try writer.print("{s} ", .{@tagName(op)}), 218 }; 219 220 const op_en = switch (encoding.data.op_en) { 221 .zi => .i, 222 else => |op_en| op_en, 223 }; 224 try writer.print("{s}", .{@tagName(op_en)}); 225 } 226 227 pub const Mnemonic = enum { 228 // zig fmt: off 229 // General-purpose 230 adc, add, @"and", 231 bsf, bsr, bswap, bt, btc, btr, bts, 232 call, cbw, cdq, cdqe, 233 cmova, cmovae, cmovb, cmovbe, cmovc, cmove, cmovg, cmovge, cmovl, cmovle, cmovna, 234 cmovnae, cmovnb, cmovnbe, cmovnc, cmovne, cmovng, cmovnge, cmovnl, cmovnle, cmovno, 235 cmovnp, cmovns, cmovnz, cmovo, cmovp, cmovpe, cmovpo, cmovs, cmovz, 236 cmp, 237 cmps, cmpsb, cmpsd, cmpsq, cmpsw, 238 cmpxchg, cmpxchg8b, cmpxchg16b, 239 cqo, cwd, cwde, 240 div, 241 fisttp, fld, 242 idiv, imul, int3, 243 ja, jae, jb, jbe, jc, jrcxz, je, jg, jge, jl, jle, jna, jnae, jnb, jnbe, 244 jnc, jne, jng, jnge, jnl, jnle, jno, jnp, jns, jnz, jo, jp, jpe, jpo, js, jz, 245 jmp, 246 lea, lfence, 247 lods, lodsb, lodsd, lodsq, lodsw, 248 lzcnt, 249 mfence, mov, movbe, 250 movs, movsb, movsd, movsq, movsw, 251 movsx, movsxd, movzx, mul, 252 neg, nop, not, 253 @"or", 254 pop, popcnt, push, 255 rcl, rcr, ret, rol, ror, 256 sal, sar, sbb, 257 scas, scasb, scasd, scasq, scasw, 258 shl, shld, shr, shrd, sub, syscall, 259 seta, setae, setb, setbe, setc, sete, setg, setge, setl, setle, setna, setnae, 260 setnb, setnbe, setnc, setne, setng, setnge, setnl, setnle, setno, setnp, setns, 261 setnz, seto, setp, setpe, setpo, sets, setz, 262 sfence, 263 stos, stosb, stosd, stosq, stosw, 264 @"test", tzcnt, 265 ud2, 266 xadd, xchg, xor, 267 // MMX 268 movd, 269 // SSE 270 addss, 271 andps, 272 andnps, 273 cmpss, 274 cvtsi2ss, 275 divss, 276 maxss, minss, 277 movaps, movss, movups, 278 mulss, 279 orps, 280 pextrw, 281 pinsrw, 282 sqrtps, 283 sqrtss, 284 subss, 285 ucomiss, 286 xorps, 287 // SSE2 288 addsd, 289 andpd, 290 andnpd, 291 //cmpsd, 292 cvtsd2ss, cvtsi2sd, cvtss2sd, 293 divsd, 294 maxsd, minsd, 295 movapd, 296 movq, //movd, movsd, 297 movupd, 298 mulsd, 299 orpd, 300 sqrtpd, 301 sqrtsd, 302 subsd, 303 ucomisd, 304 xorpd, 305 // SSE4.1 306 roundss, 307 roundsd, 308 // zig fmt: on 309 }; 310 311 pub const OpEn = enum { 312 // zig fmt: off 313 np, 314 o, oi, 315 i, zi, 316 d, m, 317 fd, td, 318 m1, mc, mi, mr, rm, 319 rmi, mri, mrc, 320 // zig fmt: on 321 }; 322 323 pub const Op = enum { 324 // zig fmt: off 325 none, 326 o16, o32, o64, 327 unity, 328 imm8, imm16, imm32, imm64, 329 imm8s, imm16s, imm32s, 330 al, ax, eax, rax, 331 cl, 332 r8, r16, r32, r64, 333 rm8, rm16, rm32, rm64, 334 m8, m16, m32, m64, m80, m128, 335 rel8, rel16, rel32, 336 m, 337 moffs, 338 sreg, 339 xmm, xmm_m32, xmm_m64, xmm_m128, 340 // zig fmt: on 341 342 pub fn fromOperand(operand: Instruction.Operand) Op { 343 switch (operand) { 344 .none => return .none, 345 346 .reg => |reg| { 347 switch (reg.class()) { 348 .segment => return .sreg, 349 .floating_point => return switch (reg.bitSize()) { 350 128 => .xmm, 351 else => unreachable, 352 }, 353 .general_purpose => { 354 if (reg.to64() == .rax) return switch (reg) { 355 .al => .al, 356 .ax => .ax, 357 .eax => .eax, 358 .rax => .rax, 359 else => unreachable, 360 }; 361 if (reg == .cl) return .cl; 362 return switch (reg.bitSize()) { 363 8 => .r8, 364 16 => .r16, 365 32 => .r32, 366 64 => .r64, 367 else => unreachable, 368 }; 369 }, 370 } 371 }, 372 373 .mem => |mem| switch (mem) { 374 .moffs => return .moffs, 375 .sib, .rip => { 376 const bit_size = mem.bitSize(); 377 return switch (bit_size) { 378 8 => .m8, 379 16 => .m16, 380 32 => .m32, 381 64 => .m64, 382 80 => .m80, 383 128 => .m128, 384 else => unreachable, 385 }; 386 }, 387 }, 388 389 .imm => |imm| { 390 switch (imm) { 391 .signed => |x| { 392 if (x == 1) return .unity; 393 if (math.cast(i8, x)) |_| return .imm8s; 394 if (math.cast(i16, x)) |_| return .imm16s; 395 return .imm32s; 396 }, 397 .unsigned => |x| { 398 if (x == 1) return .unity; 399 if (math.cast(i8, x)) |_| return .imm8s; 400 if (math.cast(u8, x)) |_| return .imm8; 401 if (math.cast(i16, x)) |_| return .imm16s; 402 if (math.cast(u16, x)) |_| return .imm16; 403 if (math.cast(i32, x)) |_| return .imm32s; 404 if (math.cast(u32, x)) |_| return .imm32; 405 return .imm64; 406 }, 407 } 408 }, 409 } 410 } 411 412 pub fn bitSize(op: Op) u64 { 413 return switch (op) { 414 .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable, 415 .unity => 1, 416 .imm8, .imm8s, .al, .cl, .r8, .m8, .rm8, .rel8 => 8, 417 .imm16, .imm16s, .ax, .r16, .m16, .rm16, .rel16 => 16, 418 .imm32, .imm32s, .eax, .r32, .m32, .rm32, .rel32, .xmm_m32 => 32, 419 .imm64, .rax, .r64, .m64, .rm64, .xmm_m64 => 64, 420 .m80 => 80, 421 .m128, .xmm, .xmm_m128 => 128, 422 }; 423 } 424 425 pub fn isSigned(op: Op) bool { 426 return switch (op) { 427 .unity, .imm8, .imm16, .imm32, .imm64 => false, 428 .imm8s, .imm16s, .imm32s => true, 429 else => unreachable, 430 }; 431 } 432 433 pub fn isUnsigned(op: Op) bool { 434 return !op.isSigned(); 435 } 436 437 pub fn isRegister(op: Op) bool { 438 // zig fmt: off 439 return switch (op) { 440 .cl, 441 .al, .ax, .eax, .rax, 442 .r8, .r16, .r32, .r64, 443 .rm8, .rm16, .rm32, .rm64, 444 .xmm, .xmm_m32, .xmm_m64, .xmm_m128, 445 => true, 446 else => false, 447 }; 448 // zig fmt: on 449 } 450 451 pub fn isImmediate(op: Op) bool { 452 // zig fmt: off 453 return switch (op) { 454 .imm8, .imm16, .imm32, .imm64, 455 .imm8s, .imm16s, .imm32s, 456 .rel8, .rel16, .rel32, 457 .unity, 458 => true, 459 else => false, 460 }; 461 // zig fmt: on 462 } 463 464 pub fn isMemory(op: Op) bool { 465 // zig fmt: off 466 return switch (op) { 467 .rm8, .rm16, .rm32, .rm64, 468 .m8, .m16, .m32, .m64, .m80, .m128, 469 .m, 470 .xmm_m32, .xmm_m64, .xmm_m128, 471 => true, 472 else => false, 473 }; 474 // zig fmt: on 475 } 476 477 pub fn isSegmentRegister(op: Op) bool { 478 return switch (op) { 479 .moffs, .sreg => true, 480 else => false, 481 }; 482 } 483 484 pub fn class(op: Op) bits.Register.Class { 485 return switch (op) { 486 else => unreachable, 487 .al, .ax, .eax, .rax, .cl => .general_purpose, 488 .r8, .r16, .r32, .r64 => .general_purpose, 489 .rm8, .rm16, .rm32, .rm64 => .general_purpose, 490 .sreg => .segment, 491 .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .floating_point, 492 }; 493 } 494 495 pub fn isFloatingPointRegister(op: Op) bool { 496 return switch (op) { 497 .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => true, 498 else => false, 499 }; 500 } 501 502 /// Given an operand `op` checks if `target` is a subset for the purposes of the encoding. 503 pub fn isSubset(op: Op, target: Op) bool { 504 switch (op) { 505 .m, .o16, .o32, .o64 => unreachable, 506 .moffs, .sreg => return op == target, 507 .none => switch (target) { 508 .o16, .o32, .o64, .none => return true, 509 else => return false, 510 }, 511 else => { 512 if (op.isRegister() and target.isRegister()) { 513 return switch (target) { 514 .cl, .al, .ax, .eax, .rax => op == target, 515 else => op.class() == target.class() and switch (target.class()) { 516 .floating_point => true, 517 else => op.bitSize() == target.bitSize(), 518 }, 519 }; 520 } 521 if (op.isMemory() and target.isMemory()) { 522 switch (target) { 523 .m => return true, 524 else => return op.bitSize() == target.bitSize(), 525 } 526 } 527 if (op.isImmediate() and target.isImmediate()) { 528 switch (target) { 529 .imm64 => if (op.bitSize() <= 64) return true, 530 .imm32s, .rel32 => if (op.bitSize() < 32 or (op.bitSize() == 32 and op.isSigned())) 531 return true, 532 .imm32 => if (op.bitSize() <= 32) return true, 533 .imm16s, .rel16 => if (op.bitSize() < 16 or (op.bitSize() == 16 and op.isSigned())) 534 return true, 535 .imm16 => if (op.bitSize() <= 16) return true, 536 .imm8s, .rel8 => if (op.bitSize() < 8 or (op.bitSize() == 8 and op.isSigned())) 537 return true, 538 .imm8 => if (op.bitSize() <= 8) return true, 539 else => {}, 540 } 541 return op == target; 542 } 543 return false; 544 }, 545 } 546 } 547 }; 548 549 pub const Mode = enum { 550 none, 551 short, 552 fpu, 553 rex, 554 long, 555 sse, 556 sse_long, 557 sse2, 558 sse2_long, 559 sse4_1, 560 }; 561 562 fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Operand) usize { 563 var inst = Instruction{ 564 .prefix = prefix, 565 .encoding = encoding, 566 .ops = [1]Operand{.none} ** 4, 567 }; 568 @memcpy(inst.ops[0..ops.len], ops); 569 570 var cwriter = std.io.countingWriter(std.io.null_writer); 571 inst.encode(cwriter.writer(), .{ .allow_frame_loc = true }) catch unreachable; // Not allowed to fail here unless OOM. 572 return @intCast(usize, cwriter.bytes_written); 573 } 574 575 const mnemonic_to_encodings_map = init: { 576 @setEvalBranchQuota(100_000); 577 const encodings = @import("encodings.zig"); 578 var entries = encodings.table; 579 std.sort.sort(encodings.Entry, &entries, {}, struct { 580 fn lessThan(_: void, lhs: encodings.Entry, rhs: encodings.Entry) bool { 581 return @enumToInt(lhs[0]) < @enumToInt(rhs[0]); 582 } 583 }.lessThan); 584 var data_storage: [entries.len]Data = undefined; 585 var mnemonic_map: [@typeInfo(Mnemonic).Enum.fields.len][]const Data = undefined; 586 var mnemonic_int = 0; 587 var mnemonic_start = 0; 588 for (&data_storage, entries, 0..) |*data, entry, data_index| { 589 data.* = .{ 590 .op_en = entry[1], 591 .ops = undefined, 592 .opc_len = entry[3].len, 593 .opc = undefined, 594 .modrm_ext = entry[4], 595 .mode = entry[5], 596 }; 597 // TODO: use `@memcpy` for these. When I did that, I got a false positive 598 // compile error for this copy happening at compile time. 599 std.mem.copyForwards(Op, &data.ops, entry[2]); 600 std.mem.copyForwards(u8, &data.opc, entry[3]); 601 602 while (mnemonic_int < @enumToInt(entry[0])) : (mnemonic_int += 1) { 603 mnemonic_map[mnemonic_int] = data_storage[mnemonic_start..data_index]; 604 mnemonic_start = data_index; 605 } 606 } 607 while (mnemonic_int < mnemonic_map.len) : (mnemonic_int += 1) { 608 mnemonic_map[mnemonic_int] = data_storage[mnemonic_start..]; 609 mnemonic_start = data_storage.len; 610 } 611 break :init mnemonic_map; 612 };