zig/lib/std/target/feature/AmdGpuFeature.zig

const FeatureInfo = @import("std").target.feature.FeatureInfo;

pub const AmdGpuFeature = enum {
    BitInsts16,
    AddNoCarryInsts,
    ApertureRegs,
    AtomicFaddInsts,
    AutoWaitcntBeforeBarrier,
    CiInsts,
    CodeObjectV3,
    Cumode,
    DlInsts,
    Dpp,
    Dpp8,
    NoSramEccSupport,
    NoXnackSupport,
    Dot1Insts,
    Dot2Insts,
    Dot3Insts,
    Dot4Insts,
    Dot5Insts,
    Dot6Insts,
    DumpCode,
    Dumpcode,
    EnableDs128,
    LoadStoreOpt,
    EnablePrtStrictNull,
    SiScheduler,
    UnsafeDsOffsetFolding,
    Fmaf,
    Fp16Denormals,
    Fp32Denormals,
    Fp64,
    Fp64Denormals,
    Fp64Fp16Denormals,
    FpExceptions,
    FastFmaf,
    FlatAddressSpace,
    FlatForGlobal,
    FlatGlobalInsts,
    FlatInstOffsets,
    FlatScratchInsts,
    FlatSegmentOffsetBug,
    FmaMixInsts,
    Gcn3Encoding,
    Gfx7Gfx8Gfx9Insts,
    Gfx8Insts,
    Gfx9,
    Gfx9Insts,
    Gfx10,
    Gfx10Insts,
    InstFwdPrefetchBug,
    IntClampInsts,
    Inv2piInlineImm,
    Ldsbankcount16,
    Ldsbankcount32,
    LdsBranchVmemWarHazard,
    LdsMisalignedBug,
    Localmemorysize0,
    Localmemorysize32768,
    Localmemorysize65536,
    MaiInsts,
    MfmaInlineLiteralBug,
    MimgR128,
    MadMixInsts,
    MaxPrivateElementSize4,
    MaxPrivateElementSize8,
    MaxPrivateElementSize16,
    Movrel,
    NsaEncoding,
    NsaToVmemBug,
    NoDataDepHazard,
    NoSdstCmpx,
    Offset3fBug,
    PkFmacF16Inst,
    PromoteAlloca,
    R128A16,
    RegisterBanking,
    Sdwa,
    SdwaMav,
    SdwaOmod,
    SdwaOutModsVopc,
    SdwaScalar,
    SdwaSdst,
    SgprInitBug,
    SmemToVectorWriteHazard,
    SMemrealtime,
    SramEcc,
    ScalarAtomics,
    ScalarFlatScratchInsts,
    ScalarStores,
    SeaIslands,
    SouthernIslands,
    TrapHandler,
    TrigReducedRange,
    UnalignedBufferAccess,
    UnalignedScratchAccess,
    UnpackedD16Vmem,
    VgprIndexMode,
    VmemToScalarWriteHazard,
    Vop3Literal,
    Vop3p,
    VcmpxExecWarHazard,
    VcmpxPermlaneHazard,
    VolcanicIslands,
    Vscnt,
    Wavefrontsize16,
    Wavefrontsize32,
    Wavefrontsize64,
    Xnack,
    HalfRate64Ops,

    pub fn getInfo(self: @This()) FeatureInfo(@This()) {
        return feature_infos[@enumToInt(self)];
    }

    pub const feature_infos = [@memberCount(@This())]FeatureInfo(@This()) {
        FeatureInfo(@This()).create(.BitInsts16, "16-bit-insts", "Has i16/f16 instructions", "16-bit-insts"),
        FeatureInfo(@This()).create(.AddNoCarryInsts, "add-no-carry-insts", "Have VALU add/sub instructions without carry out", "add-no-carry-insts"),
        FeatureInfo(@This()).create(.ApertureRegs, "aperture-regs", "Has Memory Aperture Base and Size Registers", "aperture-regs"),
        FeatureInfo(@This()).create(.AtomicFaddInsts, "atomic-fadd-insts", "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, global_atomic_pk_add_f16 instructions", "atomic-fadd-insts"),
        FeatureInfo(@This()).create(.AutoWaitcntBeforeBarrier, "auto-waitcnt-before-barrier", "Hardware automatically inserts waitcnt before barrier", "auto-waitcnt-before-barrier"),
        FeatureInfo(@This()).create(.CiInsts, "ci-insts", "Additional instructions for CI+", "ci-insts"),
        FeatureInfo(@This()).create(.CodeObjectV3, "code-object-v3", "Generate code object version 3", "code-object-v3"),
        FeatureInfo(@This()).create(.Cumode, "cumode", "Enable CU wavefront execution mode", "cumode"),
        FeatureInfo(@This()).create(.DlInsts, "dl-insts", "Has v_fmac_f32 and v_xnor_b32 instructions", "dl-insts"),
        FeatureInfo(@This()).create(.Dpp, "dpp", "Support DPP (Data Parallel Primitives) extension", "dpp"),
        FeatureInfo(@This()).create(.Dpp8, "dpp8", "Support DPP8 (Data Parallel Primitives) extension", "dpp8"),
        FeatureInfo(@This()).create(.NoSramEccSupport, "no-sram-ecc-support", "Hardware does not support SRAM ECC", "no-sram-ecc-support"),
        FeatureInfo(@This()).create(.NoXnackSupport, "no-xnack-support", "Hardware does not support XNACK", "no-xnack-support"),
        FeatureInfo(@This()).create(.Dot1Insts, "dot1-insts", "Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions", "dot1-insts"),
        FeatureInfo(@This()).create(.Dot2Insts, "dot2-insts", "Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions", "dot2-insts"),
        FeatureInfo(@This()).create(.Dot3Insts, "dot3-insts", "Has v_dot8c_i32_i4 instruction", "dot3-insts"),
        FeatureInfo(@This()).create(.Dot4Insts, "dot4-insts", "Has v_dot2c_i32_i16 instruction", "dot4-insts"),
        FeatureInfo(@This()).create(.Dot5Insts, "dot5-insts", "Has v_dot2c_f32_f16 instruction", "dot5-insts"),
        FeatureInfo(@This()).create(.Dot6Insts, "dot6-insts", "Has v_dot4c_i32_i8 instruction", "dot6-insts"),
        FeatureInfo(@This()).create(.DumpCode, "DumpCode", "Dump MachineInstrs in the CodeEmitter", "DumpCode"),
        FeatureInfo(@This()).create(.Dumpcode, "dumpcode", "Dump MachineInstrs in the CodeEmitter", "dumpcode"),
        FeatureInfo(@This()).create(.EnableDs128, "enable-ds128", "Use ds_{read|write}_b128", "enable-ds128"),
        FeatureInfo(@This()).create(.LoadStoreOpt, "load-store-opt", "Enable SI load/store optimizer pass", "load-store-opt"),
        FeatureInfo(@This()).create(.EnablePrtStrictNull, "enable-prt-strict-null", "Enable zeroing of result registers for sparse texture fetches", "enable-prt-strict-null"),
        FeatureInfo(@This()).create(.SiScheduler, "si-scheduler", "Enable SI Machine Scheduler", "si-scheduler"),
        FeatureInfo(@This()).create(.UnsafeDsOffsetFolding, "unsafe-ds-offset-folding", "Force using DS instruction immediate offsets on SI", "unsafe-ds-offset-folding"),
        FeatureInfo(@This()).create(.Fmaf, "fmaf", "Enable single precision FMA (not as fast as mul+add, but fused)", "fmaf"),
        FeatureInfo(@This()).createWithSubfeatures(.Fp16Denormals, "fp16-denormals", "Enable half precision denormal handling", "fp16-denormals", &[_]@This() {
            .Fp64,
        }),
        FeatureInfo(@This()).create(.Fp32Denormals, "fp32-denormals", "Enable single precision denormal handling", "fp32-denormals"),
        FeatureInfo(@This()).create(.Fp64, "fp64", "Enable double precision operations", "fp64"),
        FeatureInfo(@This()).createWithSubfeatures(.Fp64Denormals, "fp64-denormals", "Enable double and half precision denormal handling", "fp64-denormals", &[_]@This() {
            .Fp64,
        }),
        FeatureInfo(@This()).createWithSubfeatures(.Fp64Fp16Denormals, "fp64-fp16-denormals", "Enable double and half precision denormal handling", "fp64-fp16-denormals", &[_]@This() {
            .Fp64,
        }),
        FeatureInfo(@This()).create(.FpExceptions, "fp-exceptions", "Enable floating point exceptions", "fp-exceptions"),
        FeatureInfo(@This()).create(.FastFmaf, "fast-fmaf", "Assuming f32 fma is at least as fast as mul + add", "fast-fmaf"),
        FeatureInfo(@This()).create(.FlatAddressSpace, "flat-address-space", "Support flat address space", "flat-address-space"),
        FeatureInfo(@This()).create(.FlatForGlobal, "flat-for-global", "Force to generate flat instruction for global", "flat-for-global"),
        FeatureInfo(@This()).create(.FlatGlobalInsts, "flat-global-insts", "Have global_* flat memory instructions", "flat-global-insts"),
        FeatureInfo(@This()).create(.FlatInstOffsets, "flat-inst-offsets", "Flat instructions have immediate offset addressing mode", "flat-inst-offsets"),
        FeatureInfo(@This()).create(.FlatScratchInsts, "flat-scratch-insts", "Have scratch_* flat memory instructions", "flat-scratch-insts"),
        FeatureInfo(@This()).create(.FlatSegmentOffsetBug, "flat-segment-offset-bug", "GFX10 bug, inst_offset ignored in flat segment", "flat-segment-offset-bug"),
        FeatureInfo(@This()).create(.FmaMixInsts, "fma-mix-insts", "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions", "fma-mix-insts"),
        FeatureInfo(@This()).create(.Gcn3Encoding, "gcn3-encoding", "Encoding format for VI", "gcn3-encoding"),
        FeatureInfo(@This()).create(.Gfx7Gfx8Gfx9Insts, "gfx7-gfx8-gfx9-insts", "Instructions shared in GFX7, GFX8, GFX9", "gfx7-gfx8-gfx9-insts"),
        FeatureInfo(@This()).create(.Gfx8Insts, "gfx8-insts", "Additional instructions for GFX8+", "gfx8-insts"),
        FeatureInfo(@This()).createWithSubfeatures(.Gfx9, "gfx9", "GFX9 GPU generation", "gfx9", &[_]@This() {
            .ApertureRegs,
            .IntClampInsts,
            .SdwaOmod,
            .SdwaScalar,
            .AddNoCarryInsts,
            .ScalarAtomics,
            .SMemrealtime,
            .Gcn3Encoding,
            .CiInsts,
            .FlatAddressSpace,
            .Sdwa,
            .Wavefrontsize64,
            .SdwaSdst,
            .FlatInstOffsets,
            .ScalarStores,
            .Gfx7Gfx8Gfx9Insts,
            .R128A16,
            .Dpp,
            .Localmemorysize65536,
            .Vop3p,
            .BitInsts16,
            .VgprIndexMode,
            .Gfx8Insts,
            .Inv2piInlineImm,
            .Gfx9Insts,
            .ScalarFlatScratchInsts,
            .FlatGlobalInsts,
            .FlatScratchInsts,
            .Fp64,
            .FastFmaf,
        }),
        FeatureInfo(@This()).create(.Gfx9Insts, "gfx9-insts", "Additional instructions for GFX9+", "gfx9-insts"),
        FeatureInfo(@This()).createWithSubfeatures(.Gfx10, "gfx10", "GFX10 GPU generation", "gfx10", &[_]@This() {
            .Vscnt,
            .ApertureRegs,
            .Gfx10Insts,
            .IntClampInsts,
            .PkFmacF16Inst,
            .SdwaOmod,
            .SdwaScalar,
            .AddNoCarryInsts,
            .Movrel,
            .SMemrealtime,
            .NoSdstCmpx,
            .CiInsts,
            .FlatAddressSpace,
            .Sdwa,
            .NoSramEccSupport,
            .SdwaSdst,
            .FlatInstOffsets,
            .RegisterBanking,
            .Dpp,
            .Localmemorysize65536,
            .Vop3p,
            .BitInsts16,
            .Dpp8,
            .Gfx8Insts,
            .Inv2piInlineImm,
            .Gfx9Insts,
            .FmaMixInsts,
            .MimgR128,
            .Vop3Literal,
            .FlatGlobalInsts,
            .FlatScratchInsts,
            .Fp64,
            .FastFmaf,
            .NoDataDepHazard,
        }),
        FeatureInfo(@This()).create(.Gfx10Insts, "gfx10-insts", "Additional instructions for GFX10+", "gfx10-insts"),
        FeatureInfo(@This()).create(.InstFwdPrefetchBug, "inst-fwd-prefetch-bug", "S_INST_PREFETCH instruction causes shader to hang", "inst-fwd-prefetch-bug"),
        FeatureInfo(@This()).create(.IntClampInsts, "int-clamp-insts", "Support clamp for integer destination", "int-clamp-insts"),
        FeatureInfo(@This()).create(.Inv2piInlineImm, "inv-2pi-inline-imm", "Has 1 / (2 * pi) as inline immediate", "inv-2pi-inline-imm"),
        FeatureInfo(@This()).create(.Ldsbankcount16, "ldsbankcount16", "The number of LDS banks per compute unit.", "ldsbankcount16"),
        FeatureInfo(@This()).create(.Ldsbankcount32, "ldsbankcount32", "The number of LDS banks per compute unit.", "ldsbankcount32"),
        FeatureInfo(@This()).create(.LdsBranchVmemWarHazard, "lds-branch-vmem-war-hazard", "Switching between LDS and VMEM-tex not waiting VM_VSRC=0", "lds-branch-vmem-war-hazard"),
        FeatureInfo(@This()).create(.LdsMisalignedBug, "lds-misaligned-bug", "Some GFX10 bug with misaligned multi-dword LDS access in WGP mode", "lds-misaligned-bug"),
        FeatureInfo(@This()).create(.Localmemorysize0, "localmemorysize0", "The size of local memory in bytes", "localmemorysize0"),
        FeatureInfo(@This()).create(.Localmemorysize32768, "localmemorysize32768", "The size of local memory in bytes", "localmemorysize32768"),
        FeatureInfo(@This()).create(.Localmemorysize65536, "localmemorysize65536", "The size of local memory in bytes", "localmemorysize65536"),
        FeatureInfo(@This()).create(.MaiInsts, "mai-insts", "Has mAI instructions", "mai-insts"),
        FeatureInfo(@This()).create(.MfmaInlineLiteralBug, "mfma-inline-literal-bug", "MFMA cannot use inline literal as SrcC", "mfma-inline-literal-bug"),
        FeatureInfo(@This()).create(.MimgR128, "mimg-r128", "Support 128-bit texture resources", "mimg-r128"),
        FeatureInfo(@This()).create(.MadMixInsts, "mad-mix-insts", "Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions", "mad-mix-insts"),
        FeatureInfo(@This()).create(.MaxPrivateElementSize4, "max-private-element-size-4", "Maximum private access size may be 4", "max-private-element-size-4"),
        FeatureInfo(@This()).create(.MaxPrivateElementSize8, "max-private-element-size-8", "Maximum private access size may be 8", "max-private-element-size-8"),
        FeatureInfo(@This()).create(.MaxPrivateElementSize16, "max-private-element-size-16", "Maximum private access size may be 16", "max-private-element-size-16"),
        FeatureInfo(@This()).create(.Movrel, "movrel", "Has v_movrel*_b32 instructions", "movrel"),
        FeatureInfo(@This()).create(.NsaEncoding, "nsa-encoding", "Support NSA encoding for image instructions", "nsa-encoding"),
        FeatureInfo(@This()).create(.NsaToVmemBug, "nsa-to-vmem-bug", "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero", "nsa-to-vmem-bug"),
        FeatureInfo(@This()).create(.NoDataDepHazard, "no-data-dep-hazard", "Does not need SW waitstates", "no-data-dep-hazard"),
        FeatureInfo(@This()).create(.NoSdstCmpx, "no-sdst-cmpx", "V_CMPX does not write VCC/SGPR in addition to EXEC", "no-sdst-cmpx"),
        FeatureInfo(@This()).create(.Offset3fBug, "offset-3f-bug", "Branch offset of 3f hardware bug", "offset-3f-bug"),
        FeatureInfo(@This()).create(.PkFmacF16Inst, "pk-fmac-f16-inst", "Has v_pk_fmac_f16 instruction", "pk-fmac-f16-inst"),
        FeatureInfo(@This()).create(.PromoteAlloca, "promote-alloca", "Enable promote alloca pass", "promote-alloca"),
        FeatureInfo(@This()).create(.R128A16, "r128-a16", "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9", "r128-a16"),
        FeatureInfo(@This()).create(.RegisterBanking, "register-banking", "Has register banking", "register-banking"),
        FeatureInfo(@This()).create(.Sdwa, "sdwa", "Support SDWA (Sub-DWORD Addressing) extension", "sdwa"),
        FeatureInfo(@This()).create(.SdwaMav, "sdwa-mav", "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension", "sdwa-mav"),
        FeatureInfo(@This()).create(.SdwaOmod, "sdwa-omod", "Support OMod with SDWA (Sub-DWORD Addressing) extension", "sdwa-omod"),
        FeatureInfo(@This()).create(.SdwaOutModsVopc, "sdwa-out-mods-vopc", "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension", "sdwa-out-mods-vopc"),
        FeatureInfo(@This()).create(.SdwaScalar, "sdwa-scalar", "Support scalar register with SDWA (Sub-DWORD Addressing) extension", "sdwa-scalar"),
        FeatureInfo(@This()).create(.SdwaSdst, "sdwa-sdst", "Support scalar dst for VOPC with SDWA (Sub-DWORD Addressing) extension", "sdwa-sdst"),
        FeatureInfo(@This()).create(.SgprInitBug, "sgpr-init-bug", "VI SGPR initialization bug requiring a fixed SGPR allocation size", "sgpr-init-bug"),
        FeatureInfo(@This()).create(.SmemToVectorWriteHazard, "smem-to-vector-write-hazard", "s_load_dword followed by v_cmp page faults", "smem-to-vector-write-hazard"),
        FeatureInfo(@This()).create(.SMemrealtime, "s-memrealtime", "Has s_memrealtime instruction", "s-memrealtime"),
        FeatureInfo(@This()).create(.SramEcc, "sram-ecc", "Enable SRAM ECC", "sram-ecc"),
        FeatureInfo(@This()).create(.ScalarAtomics, "scalar-atomics", "Has atomic scalar memory instructions", "scalar-atomics"),
        FeatureInfo(@This()).create(.ScalarFlatScratchInsts, "scalar-flat-scratch-insts", "Have s_scratch_* flat memory instructions", "scalar-flat-scratch-insts"),
        FeatureInfo(@This()).create(.ScalarStores, "scalar-stores", "Has store scalar memory instructions", "scalar-stores"),
        FeatureInfo(@This()).createWithSubfeatures(.SeaIslands, "sea-islands", "SEA_ISLANDS GPU generation", "sea-islands", &[_]@This() {
            .Movrel,
            .Gfx7Gfx8Gfx9Insts,
            .Fp64,
            .TrigReducedRange,
            .CiInsts,
            .FlatAddressSpace,
            .Localmemorysize65536,
            .Wavefrontsize64,
            .NoSramEccSupport,
            .MimgR128,
        }),
        FeatureInfo(@This()).createWithSubfeatures(.SouthernIslands, "southern-islands", "SOUTHERN_ISLANDS GPU generation", "southern-islands", &[_]@This() {
            .Movrel,
            .MimgR128,
            .Fp64,
            .TrigReducedRange,
            .NoXnackSupport,
            .Wavefrontsize64,
            .NoSramEccSupport,
            .Ldsbankcount32,
            .Localmemorysize32768,
        }),
        FeatureInfo(@This()).create(.TrapHandler, "trap-handler", "Trap handler support", "trap-handler"),
        FeatureInfo(@This()).create(.TrigReducedRange, "trig-reduced-range", "Requires use of fract on arguments to trig instructions", "trig-reduced-range"),
        FeatureInfo(@This()).create(.UnalignedBufferAccess, "unaligned-buffer-access", "Support unaligned global loads and stores", "unaligned-buffer-access"),
        FeatureInfo(@This()).create(.UnalignedScratchAccess, "unaligned-scratch-access", "Support unaligned scratch loads and stores", "unaligned-scratch-access"),
        FeatureInfo(@This()).create(.UnpackedD16Vmem, "unpacked-d16-vmem", "Has unpacked d16 vmem instructions", "unpacked-d16-vmem"),
        FeatureInfo(@This()).create(.VgprIndexMode, "vgpr-index-mode", "Has VGPR mode register indexing", "vgpr-index-mode"),
        FeatureInfo(@This()).create(.VmemToScalarWriteHazard, "vmem-to-scalar-write-hazard", "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution.", "vmem-to-scalar-write-hazard"),
        FeatureInfo(@This()).create(.Vop3Literal, "vop3-literal", "Can use one literal in VOP3", "vop3-literal"),
        FeatureInfo(@This()).create(.Vop3p, "vop3p", "Has VOP3P packed instructions", "vop3p"),
        FeatureInfo(@This()).create(.VcmpxExecWarHazard, "vcmpx-exec-war-hazard", "V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)", "vcmpx-exec-war-hazard"),
        FeatureInfo(@This()).create(.VcmpxPermlaneHazard, "vcmpx-permlane-hazard", "TODO: describe me", "vcmpx-permlane-hazard"),
        FeatureInfo(@This()).createWithSubfeatures(.VolcanicIslands, "volcanic-islands", "VOLCANIC_ISLANDS GPU generation", "volcanic-islands", &[_]@This() {
            .IntClampInsts,
            .SdwaMav,
            .Movrel,
            .SMemrealtime,
            .Gcn3Encoding,
            .TrigReducedRange,
            .CiInsts,
            .FlatAddressSpace,
            .Sdwa,
            .Wavefrontsize64,
            .NoSramEccSupport,
            .ScalarStores,
            .Gfx7Gfx8Gfx9Insts,
            .Dpp,
            .Localmemorysize65536,
            .BitInsts16,
            .VgprIndexMode,
            .Gfx8Insts,
            .Inv2piInlineImm,
            .MimgR128,
            .SdwaOutModsVopc,
            .Fp64,
        }),
        FeatureInfo(@This()).create(.Vscnt, "vscnt", "Has separate store vscnt counter", "vscnt"),
        FeatureInfo(@This()).create(.Wavefrontsize16, "wavefrontsize16", "The number of threads per wavefront", "wavefrontsize16"),
        FeatureInfo(@This()).create(.Wavefrontsize32, "wavefrontsize32", "The number of threads per wavefront", "wavefrontsize32"),
        FeatureInfo(@This()).create(.Wavefrontsize64, "wavefrontsize64", "The number of threads per wavefront", "wavefrontsize64"),
        FeatureInfo(@This()).create(.Xnack, "xnack", "Enable XNACK support", "xnack"),
        FeatureInfo(@This()).create(.HalfRate64Ops, "half-rate-64-ops", "Most fp64 instructions are half rate instead of quarter", "half-rate-64-ops"),
    };
};