zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

blob 944fe854 (18916B) - Raw


      1 const Encoding = @This();
      2 
      3 const std = @import("std");
      4 const assert = std.debug.assert;
      5 const math = std.math;
      6 
      7 const bits = @import("bits.zig");
      8 const encoder = @import("encoder.zig");
      9 const Instruction = encoder.Instruction;
     10 const Operand = Instruction.Operand;
     11 const Prefix = Instruction.Prefix;
     12 const Register = bits.Register;
     13 const Rex = encoder.Rex;
     14 const LegacyPrefixes = encoder.LegacyPrefixes;
     15 
     16 mnemonic: Mnemonic,
     17 data: Data,
     18 
     19 const Data = struct {
     20     op_en: OpEn,
     21     ops: [4]Op,
     22     opc_len: u3,
     23     opc: [7]u8,
     24     modrm_ext: u3,
     25     mode: Mode,
     26 };
     27 
     28 pub fn findByMnemonic(
     29     prefix: Instruction.Prefix,
     30     mnemonic: Mnemonic,
     31     ops: []const Instruction.Operand,
     32 ) !?Encoding {
     33     var input_ops = [1]Op{.none} ** 4;
     34     for (input_ops[0..ops.len], ops) |*input_op, op| input_op.* = Op.fromOperand(op);
     35 
     36     const rex_required = for (ops) |op| switch (op) {
     37         .reg => |r| switch (r) {
     38             .spl, .bpl, .sil, .dil => break true,
     39             else => {},
     40         },
     41         else => {},
     42     } else false;
     43     const rex_invalid = for (ops) |op| switch (op) {
     44         .reg => |r| switch (r) {
     45             .ah, .bh, .ch, .dh => break true,
     46             else => {},
     47         },
     48         else => {},
     49     } else false;
     50     const rex_extended = for (ops) |op| {
     51         if (op.isBaseExtended() or op.isIndexExtended()) break true;
     52     } else false;
     53 
     54     if ((rex_required or rex_extended) and rex_invalid) return error.CannotEncode;
     55 
     56     var shortest_enc: ?Encoding = null;
     57     var shortest_len: ?usize = null;
     58     next: for (mnemonic_to_encodings_map[@enumToInt(mnemonic)]) |data| {
     59         switch (data.mode) {
     60             .rex => if (!rex_required) continue,
     61             .long, .sse_long, .sse2_long => {},
     62             else => if (rex_required) continue,
     63         }
     64         for (input_ops, data.ops) |input_op, data_op|
     65             if (!input_op.isSubset(data_op)) continue :next;
     66 
     67         const enc = Encoding{ .mnemonic = mnemonic, .data = data };
     68         if (shortest_enc) |previous_shortest_enc| {
     69             const len = estimateInstructionLength(prefix, enc, ops);
     70             const previous_shortest_len = shortest_len orelse
     71                 estimateInstructionLength(prefix, previous_shortest_enc, ops);
     72             if (len < previous_shortest_len) {
     73                 shortest_enc = enc;
     74                 shortest_len = len;
     75             } else shortest_len = previous_shortest_len;
     76         } else shortest_enc = enc;
     77     }
     78     return shortest_enc;
     79 }
     80 
     81 /// Returns first matching encoding by opcode.
     82 pub fn findByOpcode(opc: []const u8, prefixes: struct {
     83     legacy: LegacyPrefixes,
     84     rex: Rex,
     85 }, modrm_ext: ?u3) ?Encoding {
     86     for (mnemonic_to_encodings_map, 0..) |encs, mnemonic_int| for (encs) |data| {
     87         const enc = Encoding{ .mnemonic = @intToEnum(Mnemonic, mnemonic_int), .data = data };
     88         if (modrm_ext) |ext| if (ext != data.modrm_ext) continue;
     89         if (!std.mem.eql(u8, opc, enc.opcode())) continue;
     90         if (prefixes.rex.w) {
     91             switch (data.mode) {
     92                 .short, .fpu, .sse, .sse2, .sse4_1, .none => continue,
     93                 .long, .sse_long, .sse2_long, .rex => {},
     94             }
     95         } else if (prefixes.rex.present and !prefixes.rex.isSet()) {
     96             switch (data.mode) {
     97                 .rex => {},
     98                 else => continue,
     99             }
    100         } else if (prefixes.legacy.prefix_66) {
    101             switch (enc.operandBitSize()) {
    102                 16 => {},
    103                 else => continue,
    104             }
    105         } else {
    106             switch (data.mode) {
    107                 .none => switch (enc.operandBitSize()) {
    108                     16 => continue,
    109                     else => {},
    110                 },
    111                 else => continue,
    112             }
    113         }
    114         return enc;
    115     };
    116     return null;
    117 }
    118 
    119 pub fn opcode(encoding: *const Encoding) []const u8 {
    120     return encoding.data.opc[0..encoding.data.opc_len];
    121 }
    122 
    123 pub fn mandatoryPrefix(encoding: *const Encoding) ?u8 {
    124     const prefix = encoding.data.opc[0];
    125     return switch (prefix) {
    126         0x66, 0xf2, 0xf3 => prefix,
    127         else => null,
    128     };
    129 }
    130 
    131 pub fn modRmExt(encoding: Encoding) u3 {
    132     return switch (encoding.data.op_en) {
    133         .m, .mi, .m1, .mc => encoding.data.modrm_ext,
    134         else => unreachable,
    135     };
    136 }
    137 
    138 pub fn operandBitSize(encoding: Encoding) u64 {
    139     switch (encoding.data.mode) {
    140         .short => return 16,
    141         .long, .sse_long, .sse2_long => return 64,
    142         else => {},
    143     }
    144     const bit_size: u64 = switch (encoding.data.op_en) {
    145         .np => switch (encoding.data.ops[0]) {
    146             .o16 => 16,
    147             .o32 => 32,
    148             .o64 => 64,
    149             else => 32,
    150         },
    151         .td => encoding.data.ops[1].bitSize(),
    152         else => encoding.data.ops[0].bitSize(),
    153     };
    154     return bit_size;
    155 }
    156 
    157 pub fn format(
    158     encoding: Encoding,
    159     comptime fmt: []const u8,
    160     options: std.fmt.FormatOptions,
    161     writer: anytype,
    162 ) !void {
    163     _ = options;
    164     _ = fmt;
    165     switch (encoding.data.mode) {
    166         .long, .sse_long, .sse2_long => try writer.writeAll("REX.W + "),
    167         else => {},
    168     }
    169 
    170     for (encoding.opcode()) |byte| {
    171         try writer.print("{x:0>2} ", .{byte});
    172     }
    173 
    174     switch (encoding.data.op_en) {
    175         .np, .fd, .td, .i, .zi, .d => {},
    176         .o, .oi => {
    177             const tag = switch (encoding.data.ops[0]) {
    178                 .r8 => "rb",
    179                 .r16 => "rw",
    180                 .r32 => "rd",
    181                 .r64 => "rd",
    182                 else => unreachable,
    183             };
    184             try writer.print("+{s} ", .{tag});
    185         },
    186         .m, .mi, .m1, .mc => try writer.print("/{d} ", .{encoding.modRmExt()}),
    187         .mr, .rm, .rmi, .mri, .mrc => try writer.writeAll("/r "),
    188     }
    189 
    190     switch (encoding.data.op_en) {
    191         .i, .d, .zi, .oi, .mi, .rmi, .mri => {
    192             const op = switch (encoding.data.op_en) {
    193                 .i, .d => encoding.data.ops[0],
    194                 .zi, .oi, .mi => encoding.data.ops[1],
    195                 .rmi, .mri => encoding.data.ops[2],
    196                 else => unreachable,
    197             };
    198             const tag = switch (op) {
    199                 .imm8, .imm8s => "ib",
    200                 .imm16, .imm16s => "iw",
    201                 .imm32, .imm32s => "id",
    202                 .imm64 => "io",
    203                 .rel8 => "cb",
    204                 .rel16 => "cw",
    205                 .rel32 => "cd",
    206                 else => unreachable,
    207             };
    208             try writer.print("{s} ", .{tag});
    209         },
    210         .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc => {},
    211     }
    212 
    213     try writer.print("{s} ", .{@tagName(encoding.mnemonic)});
    214 
    215     for (encoding.data.ops) |op| switch (op) {
    216         .none, .o16, .o32, .o64 => break,
    217         else => try writer.print("{s} ", .{@tagName(op)}),
    218     };
    219 
    220     const op_en = switch (encoding.data.op_en) {
    221         .zi => .i,
    222         else => |op_en| op_en,
    223     };
    224     try writer.print("{s}", .{@tagName(op_en)});
    225 }
    226 
    227 pub const Mnemonic = enum {
    228     // zig fmt: off
    229     // General-purpose
    230     adc, add, @"and",
    231     bsf, bsr, bswap, bt, btc, btr, bts,
    232     call, cbw, cdq, cdqe,
    233     cmova, cmovae, cmovb, cmovbe, cmovc, cmove, cmovg, cmovge, cmovl, cmovle, cmovna,
    234     cmovnae, cmovnb, cmovnbe, cmovnc, cmovne, cmovng, cmovnge, cmovnl, cmovnle, cmovno,
    235     cmovnp, cmovns, cmovnz, cmovo, cmovp, cmovpe, cmovpo, cmovs, cmovz,
    236     cmp,
    237     cmps, cmpsb, cmpsd, cmpsq, cmpsw,
    238     cmpxchg, cmpxchg8b, cmpxchg16b,
    239     cqo, cwd, cwde,
    240     div,
    241     fisttp, fld,
    242     idiv, imul, int3,
    243     ja, jae, jb, jbe, jc, jrcxz, je, jg, jge, jl, jle, jna, jnae, jnb, jnbe,
    244     jnc, jne, jng, jnge, jnl, jnle, jno, jnp, jns, jnz, jo, jp, jpe, jpo, js, jz,
    245     jmp, 
    246     lea, lfence,
    247     lods, lodsb, lodsd, lodsq, lodsw,
    248     lzcnt,
    249     mfence, mov, movbe,
    250     movs, movsb, movsd, movsq, movsw,
    251     movsx, movsxd, movzx, mul,
    252     neg, nop, not,
    253     @"or",
    254     pop, popcnt, push,
    255     rcl, rcr, ret, rol, ror,
    256     sal, sar, sbb,
    257     scas, scasb, scasd, scasq, scasw,
    258     shl, shld, shr, shrd, sub, syscall,
    259     seta, setae, setb, setbe, setc, sete, setg, setge, setl, setle, setna, setnae,
    260     setnb, setnbe, setnc, setne, setng, setnge, setnl, setnle, setno, setnp, setns,
    261     setnz, seto, setp, setpe, setpo, sets, setz,
    262     sfence,
    263     stos, stosb, stosd, stosq, stosw,
    264     @"test", tzcnt,
    265     ud2,
    266     xadd, xchg, xor,
    267     // MMX
    268     movd,
    269     // SSE
    270     addss,
    271     andps,
    272     andnps,
    273     cmpss,
    274     cvtsi2ss,
    275     divss,
    276     maxss, minss,
    277     movaps, movss, movups,
    278     mulss,
    279     orps,
    280     pextrw,
    281     pinsrw,
    282     sqrtps,
    283     sqrtss,
    284     subss,
    285     ucomiss,
    286     xorps,
    287     // SSE2
    288     addsd,
    289     andpd,
    290     andnpd,
    291     //cmpsd,
    292     cvtsd2ss, cvtsi2sd, cvtss2sd,
    293     divsd,
    294     maxsd, minsd,
    295     movapd,
    296     movq, //movd, movsd,
    297     movupd,
    298     mulsd,
    299     orpd,
    300     sqrtpd,
    301     sqrtsd,
    302     subsd,
    303     ucomisd,
    304     xorpd,
    305     // SSE4.1
    306     roundss,
    307     roundsd,
    308     // zig fmt: on
    309 };
    310 
    311 pub const OpEn = enum {
    312     // zig fmt: off
    313     np,
    314     o, oi,
    315     i, zi,
    316     d, m,
    317     fd, td,
    318     m1, mc, mi, mr, rm,
    319     rmi, mri, mrc,
    320     // zig fmt: on
    321 };
    322 
    323 pub const Op = enum {
    324     // zig fmt: off
    325     none,
    326     o16, o32, o64,
    327     unity,
    328     imm8, imm16, imm32, imm64,
    329     imm8s, imm16s, imm32s,
    330     al, ax, eax, rax,
    331     cl,
    332     r8, r16, r32, r64,
    333     rm8, rm16, rm32, rm64,
    334     m8, m16, m32, m64, m80, m128,
    335     rel8, rel16, rel32,
    336     m,
    337     moffs,
    338     sreg,
    339     xmm, xmm_m32, xmm_m64, xmm_m128,
    340     // zig fmt: on
    341 
    342     pub fn fromOperand(operand: Instruction.Operand) Op {
    343         switch (operand) {
    344             .none => return .none,
    345 
    346             .reg => |reg| {
    347                 switch (reg.class()) {
    348                     .segment => return .sreg,
    349                     .floating_point => return switch (reg.bitSize()) {
    350                         128 => .xmm,
    351                         else => unreachable,
    352                     },
    353                     .general_purpose => {
    354                         if (reg.to64() == .rax) return switch (reg) {
    355                             .al => .al,
    356                             .ax => .ax,
    357                             .eax => .eax,
    358                             .rax => .rax,
    359                             else => unreachable,
    360                         };
    361                         if (reg == .cl) return .cl;
    362                         return switch (reg.bitSize()) {
    363                             8 => .r8,
    364                             16 => .r16,
    365                             32 => .r32,
    366                             64 => .r64,
    367                             else => unreachable,
    368                         };
    369                     },
    370                 }
    371             },
    372 
    373             .mem => |mem| switch (mem) {
    374                 .moffs => return .moffs,
    375                 .sib, .rip => {
    376                     const bit_size = mem.bitSize();
    377                     return switch (bit_size) {
    378                         8 => .m8,
    379                         16 => .m16,
    380                         32 => .m32,
    381                         64 => .m64,
    382                         80 => .m80,
    383                         128 => .m128,
    384                         else => unreachable,
    385                     };
    386                 },
    387             },
    388 
    389             .imm => |imm| {
    390                 switch (imm) {
    391                     .signed => |x| {
    392                         if (x == 1) return .unity;
    393                         if (math.cast(i8, x)) |_| return .imm8s;
    394                         if (math.cast(i16, x)) |_| return .imm16s;
    395                         return .imm32s;
    396                     },
    397                     .unsigned => |x| {
    398                         if (x == 1) return .unity;
    399                         if (math.cast(i8, x)) |_| return .imm8s;
    400                         if (math.cast(u8, x)) |_| return .imm8;
    401                         if (math.cast(i16, x)) |_| return .imm16s;
    402                         if (math.cast(u16, x)) |_| return .imm16;
    403                         if (math.cast(i32, x)) |_| return .imm32s;
    404                         if (math.cast(u32, x)) |_| return .imm32;
    405                         return .imm64;
    406                     },
    407                 }
    408             },
    409         }
    410     }
    411 
    412     pub fn bitSize(op: Op) u64 {
    413         return switch (op) {
    414             .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable,
    415             .unity => 1,
    416             .imm8, .imm8s, .al, .cl, .r8, .m8, .rm8, .rel8 => 8,
    417             .imm16, .imm16s, .ax, .r16, .m16, .rm16, .rel16 => 16,
    418             .imm32, .imm32s, .eax, .r32, .m32, .rm32, .rel32, .xmm_m32 => 32,
    419             .imm64, .rax, .r64, .m64, .rm64, .xmm_m64 => 64,
    420             .m80 => 80,
    421             .m128, .xmm, .xmm_m128 => 128,
    422         };
    423     }
    424 
    425     pub fn isSigned(op: Op) bool {
    426         return switch (op) {
    427             .unity, .imm8, .imm16, .imm32, .imm64 => false,
    428             .imm8s, .imm16s, .imm32s => true,
    429             else => unreachable,
    430         };
    431     }
    432 
    433     pub fn isUnsigned(op: Op) bool {
    434         return !op.isSigned();
    435     }
    436 
    437     pub fn isRegister(op: Op) bool {
    438         // zig fmt: off
    439         return switch (op) {
    440             .cl,
    441             .al, .ax, .eax, .rax,
    442             .r8, .r16, .r32, .r64,
    443             .rm8, .rm16, .rm32, .rm64,
    444             .xmm, .xmm_m32, .xmm_m64, .xmm_m128,
    445             => true,
    446             else => false,
    447         };
    448         // zig fmt: on
    449     }
    450 
    451     pub fn isImmediate(op: Op) bool {
    452         // zig fmt: off
    453         return switch (op) {
    454             .imm8, .imm16, .imm32, .imm64, 
    455             .imm8s, .imm16s, .imm32s,
    456             .rel8, .rel16, .rel32,
    457             .unity,
    458             =>  true,
    459             else => false,
    460         };
    461         // zig fmt: on
    462     }
    463 
    464     pub fn isMemory(op: Op) bool {
    465         // zig fmt: off
    466         return switch (op) {
    467             .rm8, .rm16, .rm32, .rm64,
    468             .m8, .m16, .m32, .m64, .m80, .m128,
    469             .m,
    470             .xmm_m32, .xmm_m64, .xmm_m128,
    471             =>  true,
    472             else => false,
    473         };
    474         // zig fmt: on
    475     }
    476 
    477     pub fn isSegmentRegister(op: Op) bool {
    478         return switch (op) {
    479             .moffs, .sreg => true,
    480             else => false,
    481         };
    482     }
    483 
    484     pub fn class(op: Op) bits.Register.Class {
    485         return switch (op) {
    486             else => unreachable,
    487             .al, .ax, .eax, .rax, .cl => .general_purpose,
    488             .r8, .r16, .r32, .r64 => .general_purpose,
    489             .rm8, .rm16, .rm32, .rm64 => .general_purpose,
    490             .sreg => .segment,
    491             .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .floating_point,
    492         };
    493     }
    494 
    495     pub fn isFloatingPointRegister(op: Op) bool {
    496         return switch (op) {
    497             .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => true,
    498             else => false,
    499         };
    500     }
    501 
    502     /// Given an operand `op` checks if `target` is a subset for the purposes of the encoding.
    503     pub fn isSubset(op: Op, target: Op) bool {
    504         switch (op) {
    505             .m, .o16, .o32, .o64 => unreachable,
    506             .moffs, .sreg => return op == target,
    507             .none => switch (target) {
    508                 .o16, .o32, .o64, .none => return true,
    509                 else => return false,
    510             },
    511             else => {
    512                 if (op.isRegister() and target.isRegister()) {
    513                     return switch (target) {
    514                         .cl, .al, .ax, .eax, .rax => op == target,
    515                         else => op.class() == target.class() and switch (target.class()) {
    516                             .floating_point => true,
    517                             else => op.bitSize() == target.bitSize(),
    518                         },
    519                     };
    520                 }
    521                 if (op.isMemory() and target.isMemory()) {
    522                     switch (target) {
    523                         .m => return true,
    524                         else => return op.bitSize() == target.bitSize(),
    525                     }
    526                 }
    527                 if (op.isImmediate() and target.isImmediate()) {
    528                     switch (target) {
    529                         .imm64 => if (op.bitSize() <= 64) return true,
    530                         .imm32s, .rel32 => if (op.bitSize() < 32 or (op.bitSize() == 32 and op.isSigned()))
    531                             return true,
    532                         .imm32 => if (op.bitSize() <= 32) return true,
    533                         .imm16s, .rel16 => if (op.bitSize() < 16 or (op.bitSize() == 16 and op.isSigned()))
    534                             return true,
    535                         .imm16 => if (op.bitSize() <= 16) return true,
    536                         .imm8s, .rel8 => if (op.bitSize() < 8 or (op.bitSize() == 8 and op.isSigned()))
    537                             return true,
    538                         .imm8 => if (op.bitSize() <= 8) return true,
    539                         else => {},
    540                     }
    541                     return op == target;
    542                 }
    543                 return false;
    544             },
    545         }
    546     }
    547 };
    548 
    549 pub const Mode = enum {
    550     none,
    551     short,
    552     fpu,
    553     rex,
    554     long,
    555     sse,
    556     sse_long,
    557     sse2,
    558     sse2_long,
    559     sse4_1,
    560 };
    561 
    562 fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Operand) usize {
    563     var inst = Instruction{
    564         .prefix = prefix,
    565         .encoding = encoding,
    566         .ops = [1]Operand{.none} ** 4,
    567     };
    568     @memcpy(inst.ops[0..ops.len], ops);
    569 
    570     var cwriter = std.io.countingWriter(std.io.null_writer);
    571     inst.encode(cwriter.writer(), .{ .allow_frame_loc = true }) catch unreachable; // Not allowed to fail here unless OOM.
    572     return @intCast(usize, cwriter.bytes_written);
    573 }
    574 
    575 const mnemonic_to_encodings_map = init: {
    576     @setEvalBranchQuota(100_000);
    577     const encodings = @import("encodings.zig");
    578     var entries = encodings.table;
    579     std.sort.sort(encodings.Entry, &entries, {}, struct {
    580         fn lessThan(_: void, lhs: encodings.Entry, rhs: encodings.Entry) bool {
    581             return @enumToInt(lhs[0]) < @enumToInt(rhs[0]);
    582         }
    583     }.lessThan);
    584     var data_storage: [entries.len]Data = undefined;
    585     var mnemonic_map: [@typeInfo(Mnemonic).Enum.fields.len][]const Data = undefined;
    586     var mnemonic_int = 0;
    587     var mnemonic_start = 0;
    588     for (&data_storage, entries, 0..) |*data, entry, data_index| {
    589         data.* = .{
    590             .op_en = entry[1],
    591             .ops = undefined,
    592             .opc_len = entry[3].len,
    593             .opc = undefined,
    594             .modrm_ext = entry[4],
    595             .mode = entry[5],
    596         };
    597         // TODO: use `@memcpy` for these. When I did that, I got a false positive
    598         // compile error for this copy happening at compile time.
    599         std.mem.copyForwards(Op, &data.ops, entry[2]);
    600         std.mem.copyForwards(u8, &data.opc, entry[3]);
    601 
    602         while (mnemonic_int < @enumToInt(entry[0])) : (mnemonic_int += 1) {
    603             mnemonic_map[mnemonic_int] = data_storage[mnemonic_start..data_index];
    604             mnemonic_start = data_index;
    605         }
    606     }
    607     while (mnemonic_int < mnemonic_map.len) : (mnemonic_int += 1) {
    608         mnemonic_map[mnemonic_int] = data_storage[mnemonic_start..];
    609         mnemonic_start = data_storage.len;
    610     }
    611     break :init mnemonic_map;
    612 };