Files
zig/lib/std/target/feature/AmdGpuFeature.zig
2020-01-19 20:53:18 -05:00

344 lines
20 KiB
Zig

const FeatureInfo = @import("std").target.feature.FeatureInfo;
pub const AmdGpuFeature = enum {
BitInsts16,
AddNoCarryInsts,
ApertureRegs,
AtomicFaddInsts,
AutoWaitcntBeforeBarrier,
CiInsts,
CodeObjectV3,
Cumode,
DlInsts,
Dpp,
Dpp8,
NoSramEccSupport,
NoXnackSupport,
Dot1Insts,
Dot2Insts,
Dot3Insts,
Dot4Insts,
Dot5Insts,
Dot6Insts,
DumpCode,
Dumpcode,
EnableDs128,
LoadStoreOpt,
EnablePrtStrictNull,
SiScheduler,
UnsafeDsOffsetFolding,
Fmaf,
Fp16Denormals,
Fp32Denormals,
Fp64,
Fp64Denormals,
Fp64Fp16Denormals,
FpExceptions,
FastFmaf,
FlatAddressSpace,
FlatForGlobal,
FlatGlobalInsts,
FlatInstOffsets,
FlatScratchInsts,
FlatSegmentOffsetBug,
FmaMixInsts,
Gcn3Encoding,
Gfx7Gfx8Gfx9Insts,
Gfx8Insts,
Gfx9,
Gfx9Insts,
Gfx10,
Gfx10Insts,
InstFwdPrefetchBug,
IntClampInsts,
Inv2piInlineImm,
Ldsbankcount16,
Ldsbankcount32,
LdsBranchVmemWarHazard,
LdsMisalignedBug,
Localmemorysize0,
Localmemorysize32768,
Localmemorysize65536,
MaiInsts,
MfmaInlineLiteralBug,
MimgR128,
MadMixInsts,
MaxPrivateElementSize4,
MaxPrivateElementSize8,
MaxPrivateElementSize16,
Movrel,
NsaEncoding,
NsaToVmemBug,
NoDataDepHazard,
NoSdstCmpx,
Offset3fBug,
PkFmacF16Inst,
PromoteAlloca,
R128A16,
RegisterBanking,
Sdwa,
SdwaMav,
SdwaOmod,
SdwaOutModsVopc,
SdwaScalar,
SdwaSdst,
SgprInitBug,
SmemToVectorWriteHazard,
SMemrealtime,
SramEcc,
ScalarAtomics,
ScalarFlatScratchInsts,
ScalarStores,
SeaIslands,
SouthernIslands,
TrapHandler,
TrigReducedRange,
UnalignedBufferAccess,
UnalignedScratchAccess,
UnpackedD16Vmem,
VgprIndexMode,
VmemToScalarWriteHazard,
Vop3Literal,
Vop3p,
VcmpxExecWarHazard,
VcmpxPermlaneHazard,
VolcanicIslands,
Vscnt,
Wavefrontsize16,
Wavefrontsize32,
Wavefrontsize64,
Xnack,
HalfRate64Ops,
pub fn getInfo(self: @This()) FeatureInfo(@This()) {
return feature_infos[@enumToInt(self)];
}
pub const feature_infos = [@memberCount(@This())]FeatureInfo(@This()) {
FeatureInfo(@This()).create(.BitInsts16, "16-bit-insts", "Has i16/f16 instructions", "16-bit-insts"),
FeatureInfo(@This()).create(.AddNoCarryInsts, "add-no-carry-insts", "Have VALU add/sub instructions without carry out", "add-no-carry-insts"),
FeatureInfo(@This()).create(.ApertureRegs, "aperture-regs", "Has Memory Aperture Base and Size Registers", "aperture-regs"),
FeatureInfo(@This()).create(.AtomicFaddInsts, "atomic-fadd-insts", "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, global_atomic_pk_add_f16 instructions", "atomic-fadd-insts"),
FeatureInfo(@This()).create(.AutoWaitcntBeforeBarrier, "auto-waitcnt-before-barrier", "Hardware automatically inserts waitcnt before barrier", "auto-waitcnt-before-barrier"),
FeatureInfo(@This()).create(.CiInsts, "ci-insts", "Additional instructions for CI+", "ci-insts"),
FeatureInfo(@This()).create(.CodeObjectV3, "code-object-v3", "Generate code object version 3", "code-object-v3"),
FeatureInfo(@This()).create(.Cumode, "cumode", "Enable CU wavefront execution mode", "cumode"),
FeatureInfo(@This()).create(.DlInsts, "dl-insts", "Has v_fmac_f32 and v_xnor_b32 instructions", "dl-insts"),
FeatureInfo(@This()).create(.Dpp, "dpp", "Support DPP (Data Parallel Primitives) extension", "dpp"),
FeatureInfo(@This()).create(.Dpp8, "dpp8", "Support DPP8 (Data Parallel Primitives) extension", "dpp8"),
FeatureInfo(@This()).create(.NoSramEccSupport, "no-sram-ecc-support", "Hardware does not support SRAM ECC", "no-sram-ecc-support"),
FeatureInfo(@This()).create(.NoXnackSupport, "no-xnack-support", "Hardware does not support XNACK", "no-xnack-support"),
FeatureInfo(@This()).create(.Dot1Insts, "dot1-insts", "Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions", "dot1-insts"),
FeatureInfo(@This()).create(.Dot2Insts, "dot2-insts", "Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions", "dot2-insts"),
FeatureInfo(@This()).create(.Dot3Insts, "dot3-insts", "Has v_dot8c_i32_i4 instruction", "dot3-insts"),
FeatureInfo(@This()).create(.Dot4Insts, "dot4-insts", "Has v_dot2c_i32_i16 instruction", "dot4-insts"),
FeatureInfo(@This()).create(.Dot5Insts, "dot5-insts", "Has v_dot2c_f32_f16 instruction", "dot5-insts"),
FeatureInfo(@This()).create(.Dot6Insts, "dot6-insts", "Has v_dot4c_i32_i8 instruction", "dot6-insts"),
FeatureInfo(@This()).create(.DumpCode, "DumpCode", "Dump MachineInstrs in the CodeEmitter", "DumpCode"),
FeatureInfo(@This()).create(.Dumpcode, "dumpcode", "Dump MachineInstrs in the CodeEmitter", "dumpcode"),
FeatureInfo(@This()).create(.EnableDs128, "enable-ds128", "Use ds_{read|write}_b128", "enable-ds128"),
FeatureInfo(@This()).create(.LoadStoreOpt, "load-store-opt", "Enable SI load/store optimizer pass", "load-store-opt"),
FeatureInfo(@This()).create(.EnablePrtStrictNull, "enable-prt-strict-null", "Enable zeroing of result registers for sparse texture fetches", "enable-prt-strict-null"),
FeatureInfo(@This()).create(.SiScheduler, "si-scheduler", "Enable SI Machine Scheduler", "si-scheduler"),
FeatureInfo(@This()).create(.UnsafeDsOffsetFolding, "unsafe-ds-offset-folding", "Force using DS instruction immediate offsets on SI", "unsafe-ds-offset-folding"),
FeatureInfo(@This()).create(.Fmaf, "fmaf", "Enable single precision FMA (not as fast as mul+add, but fused)", "fmaf"),
FeatureInfo(@This()).createWithSubfeatures(.Fp16Denormals, "fp16-denormals", "Enable half precision denormal handling", "fp16-denormals", &[_]@This() {
.Fp64,
}),
FeatureInfo(@This()).create(.Fp32Denormals, "fp32-denormals", "Enable single precision denormal handling", "fp32-denormals"),
FeatureInfo(@This()).create(.Fp64, "fp64", "Enable double precision operations", "fp64"),
FeatureInfo(@This()).createWithSubfeatures(.Fp64Denormals, "fp64-denormals", "Enable double and half precision denormal handling", "fp64-denormals", &[_]@This() {
.Fp64,
}),
FeatureInfo(@This()).createWithSubfeatures(.Fp64Fp16Denormals, "fp64-fp16-denormals", "Enable double and half precision denormal handling", "fp64-fp16-denormals", &[_]@This() {
.Fp64,
}),
FeatureInfo(@This()).create(.FpExceptions, "fp-exceptions", "Enable floating point exceptions", "fp-exceptions"),
FeatureInfo(@This()).create(.FastFmaf, "fast-fmaf", "Assuming f32 fma is at least as fast as mul + add", "fast-fmaf"),
FeatureInfo(@This()).create(.FlatAddressSpace, "flat-address-space", "Support flat address space", "flat-address-space"),
FeatureInfo(@This()).create(.FlatForGlobal, "flat-for-global", "Force to generate flat instruction for global", "flat-for-global"),
FeatureInfo(@This()).create(.FlatGlobalInsts, "flat-global-insts", "Have global_* flat memory instructions", "flat-global-insts"),
FeatureInfo(@This()).create(.FlatInstOffsets, "flat-inst-offsets", "Flat instructions have immediate offset addressing mode", "flat-inst-offsets"),
FeatureInfo(@This()).create(.FlatScratchInsts, "flat-scratch-insts", "Have scratch_* flat memory instructions", "flat-scratch-insts"),
FeatureInfo(@This()).create(.FlatSegmentOffsetBug, "flat-segment-offset-bug", "GFX10 bug, inst_offset ignored in flat segment", "flat-segment-offset-bug"),
FeatureInfo(@This()).create(.FmaMixInsts, "fma-mix-insts", "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions", "fma-mix-insts"),
FeatureInfo(@This()).create(.Gcn3Encoding, "gcn3-encoding", "Encoding format for VI", "gcn3-encoding"),
FeatureInfo(@This()).create(.Gfx7Gfx8Gfx9Insts, "gfx7-gfx8-gfx9-insts", "Instructions shared in GFX7, GFX8, GFX9", "gfx7-gfx8-gfx9-insts"),
FeatureInfo(@This()).create(.Gfx8Insts, "gfx8-insts", "Additional instructions for GFX8+", "gfx8-insts"),
FeatureInfo(@This()).createWithSubfeatures(.Gfx9, "gfx9", "GFX9 GPU generation", "gfx9", &[_]@This() {
.ApertureRegs,
.IntClampInsts,
.SdwaOmod,
.SdwaScalar,
.AddNoCarryInsts,
.ScalarAtomics,
.SMemrealtime,
.Gcn3Encoding,
.CiInsts,
.FlatAddressSpace,
.Sdwa,
.Wavefrontsize64,
.SdwaSdst,
.FlatInstOffsets,
.ScalarStores,
.Gfx7Gfx8Gfx9Insts,
.R128A16,
.Dpp,
.Localmemorysize65536,
.Vop3p,
.BitInsts16,
.VgprIndexMode,
.Gfx8Insts,
.Inv2piInlineImm,
.Gfx9Insts,
.ScalarFlatScratchInsts,
.FlatGlobalInsts,
.FlatScratchInsts,
.Fp64,
.FastFmaf,
}),
FeatureInfo(@This()).create(.Gfx9Insts, "gfx9-insts", "Additional instructions for GFX9+", "gfx9-insts"),
FeatureInfo(@This()).createWithSubfeatures(.Gfx10, "gfx10", "GFX10 GPU generation", "gfx10", &[_]@This() {
.Vscnt,
.ApertureRegs,
.Gfx10Insts,
.IntClampInsts,
.PkFmacF16Inst,
.SdwaOmod,
.SdwaScalar,
.AddNoCarryInsts,
.Movrel,
.SMemrealtime,
.NoSdstCmpx,
.CiInsts,
.FlatAddressSpace,
.Sdwa,
.NoSramEccSupport,
.SdwaSdst,
.FlatInstOffsets,
.RegisterBanking,
.Dpp,
.Localmemorysize65536,
.Vop3p,
.BitInsts16,
.Dpp8,
.Gfx8Insts,
.Inv2piInlineImm,
.Gfx9Insts,
.FmaMixInsts,
.MimgR128,
.Vop3Literal,
.FlatGlobalInsts,
.FlatScratchInsts,
.Fp64,
.FastFmaf,
.NoDataDepHazard,
}),
FeatureInfo(@This()).create(.Gfx10Insts, "gfx10-insts", "Additional instructions for GFX10+", "gfx10-insts"),
FeatureInfo(@This()).create(.InstFwdPrefetchBug, "inst-fwd-prefetch-bug", "S_INST_PREFETCH instruction causes shader to hang", "inst-fwd-prefetch-bug"),
FeatureInfo(@This()).create(.IntClampInsts, "int-clamp-insts", "Support clamp for integer destination", "int-clamp-insts"),
FeatureInfo(@This()).create(.Inv2piInlineImm, "inv-2pi-inline-imm", "Has 1 / (2 * pi) as inline immediate", "inv-2pi-inline-imm"),
FeatureInfo(@This()).create(.Ldsbankcount16, "ldsbankcount16", "The number of LDS banks per compute unit.", "ldsbankcount16"),
FeatureInfo(@This()).create(.Ldsbankcount32, "ldsbankcount32", "The number of LDS banks per compute unit.", "ldsbankcount32"),
FeatureInfo(@This()).create(.LdsBranchVmemWarHazard, "lds-branch-vmem-war-hazard", "Switching between LDS and VMEM-tex not waiting VM_VSRC=0", "lds-branch-vmem-war-hazard"),
FeatureInfo(@This()).create(.LdsMisalignedBug, "lds-misaligned-bug", "Some GFX10 bug with misaligned multi-dword LDS access in WGP mode", "lds-misaligned-bug"),
FeatureInfo(@This()).create(.Localmemorysize0, "localmemorysize0", "The size of local memory in bytes", "localmemorysize0"),
FeatureInfo(@This()).create(.Localmemorysize32768, "localmemorysize32768", "The size of local memory in bytes", "localmemorysize32768"),
FeatureInfo(@This()).create(.Localmemorysize65536, "localmemorysize65536", "The size of local memory in bytes", "localmemorysize65536"),
FeatureInfo(@This()).create(.MaiInsts, "mai-insts", "Has mAI instructions", "mai-insts"),
FeatureInfo(@This()).create(.MfmaInlineLiteralBug, "mfma-inline-literal-bug", "MFMA cannot use inline literal as SrcC", "mfma-inline-literal-bug"),
FeatureInfo(@This()).create(.MimgR128, "mimg-r128", "Support 128-bit texture resources", "mimg-r128"),
FeatureInfo(@This()).create(.MadMixInsts, "mad-mix-insts", "Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions", "mad-mix-insts"),
FeatureInfo(@This()).create(.MaxPrivateElementSize4, "max-private-element-size-4", "Maximum private access size may be 4", "max-private-element-size-4"),
FeatureInfo(@This()).create(.MaxPrivateElementSize8, "max-private-element-size-8", "Maximum private access size may be 8", "max-private-element-size-8"),
FeatureInfo(@This()).create(.MaxPrivateElementSize16, "max-private-element-size-16", "Maximum private access size may be 16", "max-private-element-size-16"),
FeatureInfo(@This()).create(.Movrel, "movrel", "Has v_movrel*_b32 instructions", "movrel"),
FeatureInfo(@This()).create(.NsaEncoding, "nsa-encoding", "Support NSA encoding for image instructions", "nsa-encoding"),
FeatureInfo(@This()).create(.NsaToVmemBug, "nsa-to-vmem-bug", "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero", "nsa-to-vmem-bug"),
FeatureInfo(@This()).create(.NoDataDepHazard, "no-data-dep-hazard", "Does not need SW waitstates", "no-data-dep-hazard"),
FeatureInfo(@This()).create(.NoSdstCmpx, "no-sdst-cmpx", "V_CMPX does not write VCC/SGPR in addition to EXEC", "no-sdst-cmpx"),
FeatureInfo(@This()).create(.Offset3fBug, "offset-3f-bug", "Branch offset of 3f hardware bug", "offset-3f-bug"),
FeatureInfo(@This()).create(.PkFmacF16Inst, "pk-fmac-f16-inst", "Has v_pk_fmac_f16 instruction", "pk-fmac-f16-inst"),
FeatureInfo(@This()).create(.PromoteAlloca, "promote-alloca", "Enable promote alloca pass", "promote-alloca"),
FeatureInfo(@This()).create(.R128A16, "r128-a16", "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9", "r128-a16"),
FeatureInfo(@This()).create(.RegisterBanking, "register-banking", "Has register banking", "register-banking"),
FeatureInfo(@This()).create(.Sdwa, "sdwa", "Support SDWA (Sub-DWORD Addressing) extension", "sdwa"),
FeatureInfo(@This()).create(.SdwaMav, "sdwa-mav", "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension", "sdwa-mav"),
FeatureInfo(@This()).create(.SdwaOmod, "sdwa-omod", "Support OMod with SDWA (Sub-DWORD Addressing) extension", "sdwa-omod"),
FeatureInfo(@This()).create(.SdwaOutModsVopc, "sdwa-out-mods-vopc", "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension", "sdwa-out-mods-vopc"),
FeatureInfo(@This()).create(.SdwaScalar, "sdwa-scalar", "Support scalar register with SDWA (Sub-DWORD Addressing) extension", "sdwa-scalar"),
FeatureInfo(@This()).create(.SdwaSdst, "sdwa-sdst", "Support scalar dst for VOPC with SDWA (Sub-DWORD Addressing) extension", "sdwa-sdst"),
FeatureInfo(@This()).create(.SgprInitBug, "sgpr-init-bug", "VI SGPR initialization bug requiring a fixed SGPR allocation size", "sgpr-init-bug"),
FeatureInfo(@This()).create(.SmemToVectorWriteHazard, "smem-to-vector-write-hazard", "s_load_dword followed by v_cmp page faults", "smem-to-vector-write-hazard"),
FeatureInfo(@This()).create(.SMemrealtime, "s-memrealtime", "Has s_memrealtime instruction", "s-memrealtime"),
FeatureInfo(@This()).create(.SramEcc, "sram-ecc", "Enable SRAM ECC", "sram-ecc"),
FeatureInfo(@This()).create(.ScalarAtomics, "scalar-atomics", "Has atomic scalar memory instructions", "scalar-atomics"),
FeatureInfo(@This()).create(.ScalarFlatScratchInsts, "scalar-flat-scratch-insts", "Have s_scratch_* flat memory instructions", "scalar-flat-scratch-insts"),
FeatureInfo(@This()).create(.ScalarStores, "scalar-stores", "Has store scalar memory instructions", "scalar-stores"),
FeatureInfo(@This()).createWithSubfeatures(.SeaIslands, "sea-islands", "SEA_ISLANDS GPU generation", "sea-islands", &[_]@This() {
.Movrel,
.Gfx7Gfx8Gfx9Insts,
.Fp64,
.TrigReducedRange,
.CiInsts,
.FlatAddressSpace,
.Localmemorysize65536,
.Wavefrontsize64,
.NoSramEccSupport,
.MimgR128,
}),
FeatureInfo(@This()).createWithSubfeatures(.SouthernIslands, "southern-islands", "SOUTHERN_ISLANDS GPU generation", "southern-islands", &[_]@This() {
.Movrel,
.MimgR128,
.Fp64,
.TrigReducedRange,
.NoXnackSupport,
.Wavefrontsize64,
.NoSramEccSupport,
.Ldsbankcount32,
.Localmemorysize32768,
}),
FeatureInfo(@This()).create(.TrapHandler, "trap-handler", "Trap handler support", "trap-handler"),
FeatureInfo(@This()).create(.TrigReducedRange, "trig-reduced-range", "Requires use of fract on arguments to trig instructions", "trig-reduced-range"),
FeatureInfo(@This()).create(.UnalignedBufferAccess, "unaligned-buffer-access", "Support unaligned global loads and stores", "unaligned-buffer-access"),
FeatureInfo(@This()).create(.UnalignedScratchAccess, "unaligned-scratch-access", "Support unaligned scratch loads and stores", "unaligned-scratch-access"),
FeatureInfo(@This()).create(.UnpackedD16Vmem, "unpacked-d16-vmem", "Has unpacked d16 vmem instructions", "unpacked-d16-vmem"),
FeatureInfo(@This()).create(.VgprIndexMode, "vgpr-index-mode", "Has VGPR mode register indexing", "vgpr-index-mode"),
FeatureInfo(@This()).create(.VmemToScalarWriteHazard, "vmem-to-scalar-write-hazard", "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution.", "vmem-to-scalar-write-hazard"),
FeatureInfo(@This()).create(.Vop3Literal, "vop3-literal", "Can use one literal in VOP3", "vop3-literal"),
FeatureInfo(@This()).create(.Vop3p, "vop3p", "Has VOP3P packed instructions", "vop3p"),
FeatureInfo(@This()).create(.VcmpxExecWarHazard, "vcmpx-exec-war-hazard", "V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)", "vcmpx-exec-war-hazard"),
FeatureInfo(@This()).create(.VcmpxPermlaneHazard, "vcmpx-permlane-hazard", "TODO: describe me", "vcmpx-permlane-hazard"),
FeatureInfo(@This()).createWithSubfeatures(.VolcanicIslands, "volcanic-islands", "VOLCANIC_ISLANDS GPU generation", "volcanic-islands", &[_]@This() {
.IntClampInsts,
.SdwaMav,
.Movrel,
.SMemrealtime,
.Gcn3Encoding,
.TrigReducedRange,
.CiInsts,
.FlatAddressSpace,
.Sdwa,
.Wavefrontsize64,
.NoSramEccSupport,
.ScalarStores,
.Gfx7Gfx8Gfx9Insts,
.Dpp,
.Localmemorysize65536,
.BitInsts16,
.VgprIndexMode,
.Gfx8Insts,
.Inv2piInlineImm,
.MimgR128,
.SdwaOutModsVopc,
.Fp64,
}),
FeatureInfo(@This()).create(.Vscnt, "vscnt", "Has separate store vscnt counter", "vscnt"),
FeatureInfo(@This()).create(.Wavefrontsize16, "wavefrontsize16", "The number of threads per wavefront", "wavefrontsize16"),
FeatureInfo(@This()).create(.Wavefrontsize32, "wavefrontsize32", "The number of threads per wavefront", "wavefrontsize32"),
FeatureInfo(@This()).create(.Wavefrontsize64, "wavefrontsize64", "The number of threads per wavefront", "wavefrontsize64"),
FeatureInfo(@This()).create(.Xnack, "xnack", "Enable XNACK support", "xnack"),
FeatureInfo(@This()).create(.HalfRate64Ops, "half-rate-64-ops", "Most fp64 instructions are half rate instead of quarter", "half-rate-64-ops"),
};
};