diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index b2b2b3721a00c..faf59c1541fc0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -89,6 +89,12 @@ def FeatureEnableFlatScratch : SubtargetFeature<"enable-flat-scratch", "Use scratch_* flat memory instructions to access scratch" >; +def FeatureFlatGVSMode : SubtargetFeature<"flat-gvs-mode", + "FlatGVSMode", + "true", + "Have GVS addressing mode with flat_* instructions" +>; + def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts", "AddNoCarryInsts", "true", @@ -1954,6 +1960,7 @@ def FeatureISAVersion12_50 : FeatureSet< FeatureShaderCyclesHiLoRegisters, FeatureArchitectedFlatScratch, FeatureArchitectedSGPRs, + FeatureFlatGVSMode, FeatureAtomicFaddRtnInsts, FeatureAtomicFaddNoRtnInsts, FeatureAtomicDsPkAdd16Insts, @@ -2381,6 +2388,9 @@ def HasFlatScratchSTMode : Predicate<"Subtarget->hasFlatScratchSTMode()">, def HasFlatScratchSVSMode : Predicate<"Subtarget->hasFlatScratchSVSMode()">, AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX11Insts)>; +def HasFlatGVSMode : Predicate<"Subtarget->hasFlatGVSMode()">, + AssemblerPredicate<(all_of FeatureFlatGVSMode)>; + def HasGFX10_AEncoding : Predicate<"Subtarget->hasGFX10_AEncoding()">, AssemblerPredicate<(all_of FeatureGFX10_AEncoding)>; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 3625db9a4791f..3965b5dd8c5c3 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -237,10 +237,18 @@ class FLAT_Load_Pseudo< let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); } -multiclass FLAT_Load_Pseudo_t16 { - def "" : FLAT_Load_Pseudo; +multiclass FLAT_Flat_Load_Pseudo { + def "" : FLAT_Load_Pseudo, + GlobalSaddrTable<0, opName>; + let OtherPredicates = [HasFlatGVSMode] in + def _SADDR : FLAT_Load_Pseudo, + GlobalSaddrTable<1, opName>; +} + +multiclass FLAT_Flat_Load_Pseudo_t16 { + defm "" : FLAT_Flat_Load_Pseudo; let True16Predicate = UseRealTrue16Insts in - def _t16 : FLAT_Load_Pseudo, True16D16Table; + defm _t16 : FLAT_Flat_Load_Pseudo, True16D16Table; } class FLAT_Store_Pseudo { - def "" : FLAT_Store_Pseudo; - let OtherPredicates = [HasTrue16BitInsts] in - def _t16 : FLAT_Store_Pseudo, True16D16Table; +multiclass FLAT_Flat_Store_Pseudo { + def "" : FLAT_Store_Pseudo, + GlobalSaddrTable<0, opName>; + let OtherPredicates = [HasFlatGVSMode] in + def _SADDR : FLAT_Store_Pseudo, + GlobalSaddrTable<1, opName>; +} + +multiclass FLAT_Flat_Store_Pseudo_t16 { + defm "" : FLAT_Flat_Store_Pseudo; + + defvar Name16 = opName#"_t16"; + let OtherPredicates = [HasFlatGVSMode, HasTrue16BitInsts] in { + def _t16 : FLAT_Store_Pseudo, + GlobalSaddrTable<0, Name16>, + True16D16Table; + def _SADDR_t16 : FLAT_Store_Pseudo, + GlobalSaddrTable<1, Name16>, + True16D16Table; + } } multiclass FLAT_Global_Load_Pseudo { @@ -657,6 +681,18 @@ multiclass FLAT_Atomic_Pseudo_NO_RTN< let FPAtomic = data_vt.isFP; let AddedComplexity = -1; // Prefer global atomics if available } + + def _SADDR : FLAT_AtomicNoRet_Pseudo , + GlobalSaddrTable<1, opName> { + let OtherPredicates = [HasFlatGVSMode]; + let has_saddr = 1; + let enabled_saddr = 1; + let FPAtomic = data_vt.isFP; + let AddedComplexity = -1; // Prefer global atomics if available + } } multiclass FLAT_Atomic_Pseudo_RTN< @@ -665,15 +701,29 @@ multiclass FLAT_Atomic_Pseudo_RTN< ValueType vt, ValueType data_vt = vt, RegisterClass data_rc = vdst_rc, - RegisterOperand data_op = getLdStRegisterOperand.ret> { + RegisterOperand data_op = getLdStRegisterOperand.ret, + RegisterOperand vdst_op = getLdStRegisterOperand.ret> { def _RTN : FLAT_AtomicRet_Pseudo .ret:$vdst), + (outs vdst_op:$vdst), (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), " $vdst, $vaddr, $vdata$offset$cpol">, GlobalSaddrTable<0, opName#"_rtn"> { let FPAtomic = data_vt.isFP; let AddedComplexity = -1; // Prefer global atomics if available } + + def _SADDR_RTN : FLAT_AtomicRet_Pseudo , + GlobalSaddrTable<1, opName#"_rtn"> { + let OtherPredicates = [HasFlatGVSMode]; + let has_saddr = 1; + let enabled_saddr = 1; + let PseudoInstr = NAME#"_SADDR_RTN"; + let FPAtomic = data_vt.isFP; + let AddedComplexity = -1; // Prefer global atomics if available + } } multiclass FLAT_Atomic_Pseudo< @@ -762,36 +812,36 @@ multiclass FLAT_Global_Atomic_Pseudo< // Flat Instructions //===----------------------------------------------------------------------===// -def FLAT_LOAD_UBYTE : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>; -def FLAT_LOAD_SBYTE : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>; -def FLAT_LOAD_USHORT : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>; -def FLAT_LOAD_SSHORT : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>; -def FLAT_LOAD_DWORD : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>; -def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>; -def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>; -def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>; +defm FLAT_LOAD_UBYTE : FLAT_Flat_Load_Pseudo <"flat_load_ubyte", VGPR_32>; +defm FLAT_LOAD_SBYTE : FLAT_Flat_Load_Pseudo <"flat_load_sbyte", VGPR_32>; +defm FLAT_LOAD_USHORT : FLAT_Flat_Load_Pseudo <"flat_load_ushort", VGPR_32>; +defm FLAT_LOAD_SSHORT : FLAT_Flat_Load_Pseudo <"flat_load_sshort", VGPR_32>; +defm FLAT_LOAD_DWORD : FLAT_Flat_Load_Pseudo <"flat_load_dword", VGPR_32>; +defm FLAT_LOAD_DWORDX2 : FLAT_Flat_Load_Pseudo <"flat_load_dwordx2", VReg_64>; +defm FLAT_LOAD_DWORDX4 : FLAT_Flat_Load_Pseudo <"flat_load_dwordx4", VReg_128>; +defm FLAT_LOAD_DWORDX3 : FLAT_Flat_Load_Pseudo <"flat_load_dwordx3", VReg_96>; -def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>; -def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>; -def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>; -def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>; +defm FLAT_STORE_DWORD : FLAT_Flat_Store_Pseudo <"flat_store_dword", VGPR_32>; +defm FLAT_STORE_DWORDX2 : FLAT_Flat_Store_Pseudo <"flat_store_dwordx2", VReg_64>; +defm FLAT_STORE_DWORDX4 : FLAT_Flat_Store_Pseudo <"flat_store_dwordx4", VReg_128>; +defm FLAT_STORE_DWORDX3 : FLAT_Flat_Store_Pseudo <"flat_store_dwordx3", VReg_96>; let SubtargetPredicate = HasD16LoadStore in { let TiedSourceNotRead = 1 in { -def FLAT_LOAD_UBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>; -defm FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo_t16 <"flat_load_ubyte_d16">; -def FLAT_LOAD_SBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>; -defm FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo_t16 <"flat_load_sbyte_d16">; -def FLAT_LOAD_SHORT_D16_HI : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>; -defm FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo_t16 <"flat_load_short_d16">; +defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Flat_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>; +defm FLAT_LOAD_UBYTE_D16 : FLAT_Flat_Load_Pseudo_t16 <"flat_load_ubyte_d16">; +defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Flat_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>; +defm FLAT_LOAD_SBYTE_D16 : FLAT_Flat_Load_Pseudo_t16 <"flat_load_sbyte_d16">; +defm FLAT_LOAD_SHORT_D16_HI : FLAT_Flat_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>; +defm FLAT_LOAD_SHORT_D16 : FLAT_Flat_Load_Pseudo_t16 <"flat_load_short_d16">; } -def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>; -def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>; +defm FLAT_STORE_BYTE_D16_HI : FLAT_Flat_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>; +defm FLAT_STORE_SHORT_D16_HI : FLAT_Flat_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>; } -defm FLAT_STORE_BYTE : FLAT_Store_Pseudo_t16 <"flat_store_byte">; -defm FLAT_STORE_SHORT : FLAT_Store_Pseudo_t16 <"flat_store_short">; +defm FLAT_STORE_BYTE : FLAT_Flat_Store_Pseudo_t16 <"flat_store_byte">; +defm FLAT_STORE_SHORT : FLAT_Flat_Store_Pseudo_t16 <"flat_store_short">; defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap", VGPR_32, i32, v2i32, VReg_64>; @@ -1200,6 +1250,16 @@ class GlobalLoadSaddrPat_D16 ; +class FlatLoadSaddrPat_D16 : GCNPat < + (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$in)), + (inst $saddr, $voffset, $offset, (i32 0), $in) +>; + +class FlatLoadSaddrPat_D16_t16 : GCNPat < + (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))), + (inst $saddr, $voffset, $offset, (i32 0)) +>; + class GlobalLoadSaddrPat_D16_t16 : GCNPat < (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))), (inst $saddr, $voffset, $offset, (i32 0)) @@ -1210,7 +1270,7 @@ class FlatLoadSignedPat (inst $vaddr, $offset) >; -class GlobalLoadSaddrPat : GCNPat < +class FlatLoadSaddrPat : GCNPat < (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))), (inst $saddr, $voffset, $offset, 0) >; @@ -1394,7 +1454,7 @@ multiclass GlobalFLATLoadPats(!cast(inst)#"_SADDR"), node, vt> { + def : FlatLoadSaddrPat(!cast(inst)#"_SADDR"), node, vt> { let AddedComplexity = 11; } } @@ -1404,7 +1464,7 @@ multiclass GlobalFLATLoadPats_D16(!cast(inst)#"_SADDR"), node, vt> { + def : FlatLoadSaddrPat_D16(!cast(inst)#"_SADDR"), node, vt> { let AddedComplexity = 11; } } @@ -1568,32 +1628,60 @@ multiclass ScratchFLATLoadPats_D16_t16 { + def : FlatLoadPat ; + + def : FlatLoadSaddrPat(!cast(inst)#"_SADDR"), node, vt> { + let AddedComplexity = 9; + let SubtargetPredicate = HasFlatGVSMode; + } +} + +multiclass FlatLoadPats_D16 { + def : FlatLoadPat_D16 ; + + def : FlatLoadSaddrPat_D16(!cast(inst)#"_SADDR"), node, vt> { + let AddedComplexity = 9; + let SubtargetPredicate = HasFlatGVSMode; + } +} + +multiclass FlatLoadPats_D16_t16 { + def : FlatLoadPat_D16_t16 ; + + def : FlatLoadSaddrPat_D16_t16(!cast(inst)#"_SADDR"), node, vt> { + let AddedComplexity = 9; + let SubtargetPredicate = HasFlatGVSMode; + } +} + let OtherPredicates = [HasFlatAddressSpace] in { -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; -def : FlatLoadPat ; +defm : FlatLoadPats ; +defm : FlatLoadPats ; +defm : FlatLoadPats ; +defm : FlatLoadPats ; +defm : FlatLoadPats ; +defm : FlatLoadPats ; +defm : FlatLoadPats ; +defm : FlatLoadPats ; +defm : FlatLoadPats ; +defm : FlatLoadPats ; +defm : FlatLoadPats ; +defm : FlatLoadPats ; +defm : FlatLoadPats ; +defm : FlatLoadPats ; foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in let True16Predicate = p in { - def : FlatLoadPat ; - def : FlatLoadPat ; - def : FlatLoadPat ; - def : FlatLoadPat ; - def : FlatLoadPat ; - def : FlatLoadPat ; - def : FlatLoadPat ; - def : FlatLoadPat ; + defm : FlatLoadPats ; + defm : FlatLoadPats ; + defm : FlatLoadPats ; + defm : FlatLoadPats ; + defm : FlatLoadPats ; + defm : FlatLoadPats ; + defm : FlatLoadPats ; + defm : FlatLoadPats ; def : FlatStorePat ; def : FlatStorePat ; def : FlatStorePat ; @@ -1601,28 +1689,28 @@ let True16Predicate = p in { } let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts in { - def : FlatLoadPat_D16_t16; - def : FlatLoadPat_D16_t16; - def : FlatLoadPat_D16_t16; - def : FlatLoadPat_D16_t16; - def : FlatLoadPat_D16_t16; - def : FlatLoadPat_D16_t16; - def : FlatLoadPat_D16_t16; - def : FlatLoadPat_D16_t16; + defm : FlatLoadPats_D16_t16; + defm : FlatLoadPats_D16_t16; + defm : FlatLoadPats_D16_t16; + defm : FlatLoadPats_D16_t16; + defm : FlatLoadPats_D16_t16; + defm : FlatLoadPats_D16_t16; + defm : FlatLoadPats_D16_t16; + defm : FlatLoadPats_D16_t16; def : FlatStorePat ; def : FlatStorePat ; def : FlatStorePat ; def : FlatStorePat ; } // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts -def : FlatLoadPat ; -def : FlatLoadPat ; +defm : FlatLoadPats ; +defm : FlatLoadPats ; def : FlatStorePat ; def : FlatStorePat ; foreach vt = Reg32Types.types in { -def : FlatLoadPat ; +defm : FlatLoadPats ; def : FlatStorePat ; } @@ -1634,7 +1722,7 @@ def : FlatLoadPat ; def : FlatStorePat ; foreach vt = VReg_128.RegTypes in { -def : FlatLoadPat ; +defm : FlatLoadPats ; def : FlatStorePat ; } @@ -2832,11 +2920,11 @@ multiclass VFLAT_Real_Base_gfx12 op, VFLAT_Aliases_gfx12, VFLAT_Real_gfx12; -multiclass VFLAT_Real_Atomics_gfx12 op, - string name = get_FLAT_ps.Mnemonic, - string alias = name> : +multiclass VFLAT_Real_AllAddr_gfx12 op, + string name = get_FLAT_ps.Mnemonic, + string alias = name> : VFLAT_Real_Base_gfx12 { - defm _RTN : VFLAT_Real_gfx12; + defm _SADDR : VFLAT_Real_gfx12; } multiclass VGLOBAL_Real_AllAddr_gfx12 op, @@ -2853,7 +2941,7 @@ multiclass VGLOBAL_Real_AllAddr_gfx1200 op> { } } -multiclass VGLOBAL_Real_AllAddr_gfx12_w64 op, +multiclass VFLAT_Real_AllAddr_gfx12_w64 op, string name = get_FLAT_ps.Mnemonic> : VFLAT_Aliases_gfx12 { let DecoderNamespace = "GFX12W64" in { @@ -2862,10 +2950,10 @@ multiclass VGLOBAL_Real_AllAddr_gfx12_w64 op, } } -multiclass VGLOBAL_Real_Atomics_gfx12 op, +multiclass VFLAT_Real_Atomics_gfx12 op, string name = get_FLAT_ps.Mnemonic, string alias = name> : - VGLOBAL_Real_AllAddr_gfx12 { + VFLAT_Real_AllAddr_gfx12 { defm _RTN : VFLAT_Real_gfx12; defm _SADDR_RTN : VFLAT_Real_gfx12; } @@ -2879,28 +2967,28 @@ multiclass VSCRATCH_Real_AllAddr_gfx12 op, } // ENC_VFLAT. -defm FLAT_LOAD_UBYTE : VFLAT_Real_Base_gfx12<0x010, "flat_load_u8">; -defm FLAT_LOAD_SBYTE : VFLAT_Real_Base_gfx12<0x011, "flat_load_i8">; -defm FLAT_LOAD_USHORT : VFLAT_Real_Base_gfx12<0x012, "flat_load_u16">; -defm FLAT_LOAD_SSHORT : VFLAT_Real_Base_gfx12<0x013, "flat_load_i16">; -defm FLAT_LOAD_DWORD : VFLAT_Real_Base_gfx12<0x014, "flat_load_b32">; -defm FLAT_LOAD_DWORDX2 : VFLAT_Real_Base_gfx12<0x015, "flat_load_b64">; -defm FLAT_LOAD_DWORDX3 : VFLAT_Real_Base_gfx12<0x016, "flat_load_b96">; -defm FLAT_LOAD_DWORDX4 : VFLAT_Real_Base_gfx12<0x017, "flat_load_b128">; -defm FLAT_STORE_BYTE : VFLAT_Real_Base_gfx12<0x018, "flat_store_b8">; -defm FLAT_STORE_SHORT : VFLAT_Real_Base_gfx12<0x019, "flat_store_b16">; -defm FLAT_STORE_DWORD : VFLAT_Real_Base_gfx12<0x01a, "flat_store_b32">; -defm FLAT_STORE_DWORDX2 : VFLAT_Real_Base_gfx12<0x01b, "flat_store_b64">; -defm FLAT_STORE_DWORDX3 : VFLAT_Real_Base_gfx12<0x01c, "flat_store_b96">; -defm FLAT_STORE_DWORDX4 : VFLAT_Real_Base_gfx12<0x01d, "flat_store_b128">; -defm FLAT_LOAD_UBYTE_D16 : VFLAT_Real_Base_gfx12<0x01e, "flat_load_d16_u8">; -defm FLAT_LOAD_SBYTE_D16 : VFLAT_Real_Base_gfx12<0x01f, "flat_load_d16_i8">; -defm FLAT_LOAD_SHORT_D16 : VFLAT_Real_Base_gfx12<0x020, "flat_load_d16_b16">; -defm FLAT_LOAD_UBYTE_D16_HI : VFLAT_Real_Base_gfx12<0x021, "flat_load_d16_hi_u8">; -defm FLAT_LOAD_SBYTE_D16_HI : VFLAT_Real_Base_gfx12<0x022, "flat_load_d16_hi_i8">; -defm FLAT_LOAD_SHORT_D16_HI : VFLAT_Real_Base_gfx12<0x023, "flat_load_d16_hi_b16">; -defm FLAT_STORE_BYTE_D16_HI : VFLAT_Real_Base_gfx12<0x024, "flat_store_d16_hi_b8">; -defm FLAT_STORE_SHORT_D16_HI : VFLAT_Real_Base_gfx12<0x025, "flat_store_d16_hi_b16">; +defm FLAT_LOAD_UBYTE : VFLAT_Real_AllAddr_gfx12<0x010, "flat_load_u8">; +defm FLAT_LOAD_SBYTE : VFLAT_Real_AllAddr_gfx12<0x011, "flat_load_i8">; +defm FLAT_LOAD_USHORT : VFLAT_Real_AllAddr_gfx12<0x012, "flat_load_u16">; +defm FLAT_LOAD_SSHORT : VFLAT_Real_AllAddr_gfx12<0x013, "flat_load_i16">; +defm FLAT_LOAD_DWORD : VFLAT_Real_AllAddr_gfx12<0x014, "flat_load_b32">; +defm FLAT_LOAD_DWORDX2 : VFLAT_Real_AllAddr_gfx12<0x015, "flat_load_b64">; +defm FLAT_LOAD_DWORDX3 : VFLAT_Real_AllAddr_gfx12<0x016, "flat_load_b96">; +defm FLAT_LOAD_DWORDX4 : VFLAT_Real_AllAddr_gfx12<0x017, "flat_load_b128">; +defm FLAT_STORE_BYTE : VFLAT_Real_AllAddr_gfx12<0x018, "flat_store_b8">; +defm FLAT_STORE_SHORT : VFLAT_Real_AllAddr_gfx12<0x019, "flat_store_b16">; +defm FLAT_STORE_DWORD : VFLAT_Real_AllAddr_gfx12<0x01a, "flat_store_b32">; +defm FLAT_STORE_DWORDX2 : VFLAT_Real_AllAddr_gfx12<0x01b, "flat_store_b64">; +defm FLAT_STORE_DWORDX3 : VFLAT_Real_AllAddr_gfx12<0x01c, "flat_store_b96">; +defm FLAT_STORE_DWORDX4 : VFLAT_Real_AllAddr_gfx12<0x01d, "flat_store_b128">; +defm FLAT_LOAD_UBYTE_D16 : VFLAT_Real_AllAddr_gfx12<0x01e, "flat_load_d16_u8">; +defm FLAT_LOAD_SBYTE_D16 : VFLAT_Real_AllAddr_gfx12<0x01f, "flat_load_d16_i8">; +defm FLAT_LOAD_SHORT_D16 : VFLAT_Real_AllAddr_gfx12<0x020, "flat_load_d16_b16">; +defm FLAT_LOAD_UBYTE_D16_HI : VFLAT_Real_AllAddr_gfx12<0x021, "flat_load_d16_hi_u8">; +defm FLAT_LOAD_SBYTE_D16_HI : VFLAT_Real_AllAddr_gfx12<0x022, "flat_load_d16_hi_i8">; +defm FLAT_LOAD_SHORT_D16_HI : VFLAT_Real_AllAddr_gfx12<0x023, "flat_load_d16_hi_b16">; +defm FLAT_STORE_BYTE_D16_HI : VFLAT_Real_AllAddr_gfx12<0x024, "flat_store_d16_hi_b8">; +defm FLAT_STORE_SHORT_D16_HI : VFLAT_Real_AllAddr_gfx12<0x025, "flat_store_d16_hi_b16">; defm FLAT_ATOMIC_SWAP : VFLAT_Real_Atomics_gfx12<0x033, "flat_atomic_swap_b32">; defm FLAT_ATOMIC_CMPSWAP : VFLAT_Real_Atomics_gfx12<0x034, "flat_atomic_cmpswap_b32">; defm FLAT_ATOMIC_ADD : VFLAT_Real_Atomics_gfx12<0x035, "flat_atomic_add_u32">; @@ -2936,74 +3024,74 @@ defm FLAT_ATOMIC_PK_ADD_F16 : VFLAT_Real_Atomics_gfx12<0x059>; defm FLAT_ATOMIC_PK_ADD_BF16 : VFLAT_Real_Atomics_gfx12<0x05a>; // ENC_VGLOBAL. -defm GLOBAL_LOAD_UBYTE : VGLOBAL_Real_AllAddr_gfx12<0x010, "global_load_u8">; -defm GLOBAL_LOAD_SBYTE : VGLOBAL_Real_AllAddr_gfx12<0x011, "global_load_i8">; -defm GLOBAL_LOAD_USHORT : VGLOBAL_Real_AllAddr_gfx12<0x012, "global_load_u16">; -defm GLOBAL_LOAD_SSHORT : VGLOBAL_Real_AllAddr_gfx12<0x013, "global_load_i16">; -defm GLOBAL_LOAD_DWORD : VGLOBAL_Real_AllAddr_gfx12<0x014, "global_load_b32">; -defm GLOBAL_LOAD_DWORDX2 : VGLOBAL_Real_AllAddr_gfx12<0x015, "global_load_b64">; -defm GLOBAL_LOAD_DWORDX3 : VGLOBAL_Real_AllAddr_gfx12<0x016, "global_load_b96">; -defm GLOBAL_LOAD_DWORDX4 : VGLOBAL_Real_AllAddr_gfx12<0x017, "global_load_b128">; -defm GLOBAL_STORE_BYTE : VGLOBAL_Real_AllAddr_gfx12<0x018, "global_store_b8">; -defm GLOBAL_STORE_SHORT : VGLOBAL_Real_AllAddr_gfx12<0x019, "global_store_b16">; -defm GLOBAL_STORE_DWORD : VGLOBAL_Real_AllAddr_gfx12<0x01a, "global_store_b32">; -defm GLOBAL_STORE_DWORDX2 : VGLOBAL_Real_AllAddr_gfx12<0x01b, "global_store_b64">; -defm GLOBAL_STORE_DWORDX3 : VGLOBAL_Real_AllAddr_gfx12<0x01c, "global_store_b96">; -defm GLOBAL_STORE_DWORDX4 : VGLOBAL_Real_AllAddr_gfx12<0x01d, "global_store_b128">; -defm GLOBAL_LOAD_UBYTE_D16 : VGLOBAL_Real_AllAddr_gfx12<0x01e, "global_load_d16_u8">; -defm GLOBAL_LOAD_SBYTE_D16 : VGLOBAL_Real_AllAddr_gfx12<0x01f, "global_load_d16_i8">; -defm GLOBAL_LOAD_SHORT_D16 : VGLOBAL_Real_AllAddr_gfx12<0x020, "global_load_d16_b16">; -defm GLOBAL_LOAD_UBYTE_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x021, "global_load_d16_hi_u8">; -defm GLOBAL_LOAD_SBYTE_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x022, "global_load_d16_hi_i8">; -defm GLOBAL_LOAD_SHORT_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x023, "global_load_d16_hi_b16">; -defm GLOBAL_STORE_BYTE_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x024, "global_store_d16_hi_b8">; -defm GLOBAL_STORE_SHORT_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x025, "global_store_d16_hi_b16">; -defm GLOBAL_LOAD_DWORD_ADDTID : VGLOBAL_Real_AllAddr_gfx12<0x028, "global_load_addtid_b32">; -defm GLOBAL_STORE_DWORD_ADDTID : VGLOBAL_Real_AllAddr_gfx12<0x029, "global_store_addtid_b32">; -defm GLOBAL_LOAD_BLOCK : VGLOBAL_Real_AllAddr_gfx12<0x053>; -defm GLOBAL_STORE_BLOCK : VGLOBAL_Real_AllAddr_gfx12<0x054>; - -defm GLOBAL_ATOMIC_SWAP : VGLOBAL_Real_Atomics_gfx12<0x033, "global_atomic_swap_b32">; -defm GLOBAL_ATOMIC_CMPSWAP : VGLOBAL_Real_Atomics_gfx12<0x034, "global_atomic_cmpswap_b32">; -defm GLOBAL_ATOMIC_ADD : VGLOBAL_Real_Atomics_gfx12<0x035, "global_atomic_add_u32">; -defm GLOBAL_ATOMIC_SUB : VGLOBAL_Real_Atomics_gfx12<0x036, "global_atomic_sub_u32">; -defm GLOBAL_ATOMIC_CSUB : VGLOBAL_Real_Atomics_gfx12<0x037, "global_atomic_sub_clamp_u32", "global_atomic_csub_u32">; -defm GLOBAL_ATOMIC_SMIN : VGLOBAL_Real_Atomics_gfx12<0x038, "global_atomic_min_i32">; -defm GLOBAL_ATOMIC_UMIN : VGLOBAL_Real_Atomics_gfx12<0x039, "global_atomic_min_u32">; -defm GLOBAL_ATOMIC_SMAX : VGLOBAL_Real_Atomics_gfx12<0x03a, "global_atomic_max_i32">; -defm GLOBAL_ATOMIC_UMAX : VGLOBAL_Real_Atomics_gfx12<0x03b, "global_atomic_max_u32">; -defm GLOBAL_ATOMIC_AND : VGLOBAL_Real_Atomics_gfx12<0x03c, "global_atomic_and_b32">; -defm GLOBAL_ATOMIC_OR : VGLOBAL_Real_Atomics_gfx12<0x03d, "global_atomic_or_b32">; -defm GLOBAL_ATOMIC_XOR : VGLOBAL_Real_Atomics_gfx12<0x03e, "global_atomic_xor_b32">; -defm GLOBAL_ATOMIC_INC : VGLOBAL_Real_Atomics_gfx12<0x03f, "global_atomic_inc_u32">; -defm GLOBAL_ATOMIC_DEC : VGLOBAL_Real_Atomics_gfx12<0x040, "global_atomic_dec_u32">; -defm GLOBAL_ATOMIC_SWAP_X2 : VGLOBAL_Real_Atomics_gfx12<0x041, "global_atomic_swap_b64">; -defm GLOBAL_ATOMIC_CMPSWAP_X2 : VGLOBAL_Real_Atomics_gfx12<0x042, "global_atomic_cmpswap_b64">; -defm GLOBAL_ATOMIC_ADD_X2 : VGLOBAL_Real_Atomics_gfx12<0x043, "global_atomic_add_u64">; -defm GLOBAL_ATOMIC_SUB_X2 : VGLOBAL_Real_Atomics_gfx12<0x044, "global_atomic_sub_u64">; -defm GLOBAL_ATOMIC_SMIN_X2 : VGLOBAL_Real_Atomics_gfx12<0x045, "global_atomic_min_i64">; -defm GLOBAL_ATOMIC_UMIN_X2 : VGLOBAL_Real_Atomics_gfx12<0x046, "global_atomic_min_u64">; -defm GLOBAL_ATOMIC_SMAX_X2 : VGLOBAL_Real_Atomics_gfx12<0x047, "global_atomic_max_i64">; -defm GLOBAL_ATOMIC_UMAX_X2 : VGLOBAL_Real_Atomics_gfx12<0x048, "global_atomic_max_u64">; -defm GLOBAL_ATOMIC_AND_X2 : VGLOBAL_Real_Atomics_gfx12<0x049, "global_atomic_and_b64">; -defm GLOBAL_ATOMIC_OR_X2 : VGLOBAL_Real_Atomics_gfx12<0x04a, "global_atomic_or_b64">; -defm GLOBAL_ATOMIC_XOR_X2 : VGLOBAL_Real_Atomics_gfx12<0x04b, "global_atomic_xor_b64">; -defm GLOBAL_ATOMIC_INC_X2 : VGLOBAL_Real_Atomics_gfx12<0x04c, "global_atomic_inc_u64">; -defm GLOBAL_ATOMIC_DEC_X2 : VGLOBAL_Real_Atomics_gfx12<0x04d, "global_atomic_dec_u64">; -defm GLOBAL_ATOMIC_COND_SUB_U32 : VGLOBAL_Real_Atomics_gfx12<0x050>; -defm GLOBAL_ATOMIC_FMIN : VGLOBAL_Real_Atomics_gfx12<0x051, "global_atomic_min_num_f32", "global_atomic_min_f32">; -defm GLOBAL_ATOMIC_FMAX : VGLOBAL_Real_Atomics_gfx12<0x052, "global_atomic_max_num_f32", "global_atomic_max_f32">; -defm GLOBAL_ATOMIC_ADD_F32 : VGLOBAL_Real_Atomics_gfx12<0x056>; +defm GLOBAL_LOAD_UBYTE : VFLAT_Real_AllAddr_gfx12<0x010, "global_load_u8">; +defm GLOBAL_LOAD_SBYTE : VFLAT_Real_AllAddr_gfx12<0x011, "global_load_i8">; +defm GLOBAL_LOAD_USHORT : VFLAT_Real_AllAddr_gfx12<0x012, "global_load_u16">; +defm GLOBAL_LOAD_SSHORT : VFLAT_Real_AllAddr_gfx12<0x013, "global_load_i16">; +defm GLOBAL_LOAD_DWORD : VFLAT_Real_AllAddr_gfx12<0x014, "global_load_b32">; +defm GLOBAL_LOAD_DWORDX2 : VFLAT_Real_AllAddr_gfx12<0x015, "global_load_b64">; +defm GLOBAL_LOAD_DWORDX3 : VFLAT_Real_AllAddr_gfx12<0x016, "global_load_b96">; +defm GLOBAL_LOAD_DWORDX4 : VFLAT_Real_AllAddr_gfx12<0x017, "global_load_b128">; +defm GLOBAL_STORE_BYTE : VFLAT_Real_AllAddr_gfx12<0x018, "global_store_b8">; +defm GLOBAL_STORE_SHORT : VFLAT_Real_AllAddr_gfx12<0x019, "global_store_b16">; +defm GLOBAL_STORE_DWORD : VFLAT_Real_AllAddr_gfx12<0x01a, "global_store_b32">; +defm GLOBAL_STORE_DWORDX2 : VFLAT_Real_AllAddr_gfx12<0x01b, "global_store_b64">; +defm GLOBAL_STORE_DWORDX3 : VFLAT_Real_AllAddr_gfx12<0x01c, "global_store_b96">; +defm GLOBAL_STORE_DWORDX4 : VFLAT_Real_AllAddr_gfx12<0x01d, "global_store_b128">; +defm GLOBAL_LOAD_UBYTE_D16 : VFLAT_Real_AllAddr_gfx12<0x01e, "global_load_d16_u8">; +defm GLOBAL_LOAD_SBYTE_D16 : VFLAT_Real_AllAddr_gfx12<0x01f, "global_load_d16_i8">; +defm GLOBAL_LOAD_SHORT_D16 : VFLAT_Real_AllAddr_gfx12<0x020, "global_load_d16_b16">; +defm GLOBAL_LOAD_UBYTE_D16_HI : VFLAT_Real_AllAddr_gfx12<0x021, "global_load_d16_hi_u8">; +defm GLOBAL_LOAD_SBYTE_D16_HI : VFLAT_Real_AllAddr_gfx12<0x022, "global_load_d16_hi_i8">; +defm GLOBAL_LOAD_SHORT_D16_HI : VFLAT_Real_AllAddr_gfx12<0x023, "global_load_d16_hi_b16">; +defm GLOBAL_STORE_BYTE_D16_HI : VFLAT_Real_AllAddr_gfx12<0x024, "global_store_d16_hi_b8">; +defm GLOBAL_STORE_SHORT_D16_HI : VFLAT_Real_AllAddr_gfx12<0x025, "global_store_d16_hi_b16">; +defm GLOBAL_LOAD_DWORD_ADDTID : VFLAT_Real_AllAddr_gfx12<0x028, "global_load_addtid_b32">; +defm GLOBAL_STORE_DWORD_ADDTID : VFLAT_Real_AllAddr_gfx12<0x029, "global_store_addtid_b32">; +defm GLOBAL_LOAD_BLOCK : VFLAT_Real_AllAddr_gfx12<0x053>; +defm GLOBAL_STORE_BLOCK : VFLAT_Real_AllAddr_gfx12<0x054>; + +defm GLOBAL_ATOMIC_SWAP : VFLAT_Real_Atomics_gfx12<0x033, "global_atomic_swap_b32">; +defm GLOBAL_ATOMIC_CMPSWAP : VFLAT_Real_Atomics_gfx12<0x034, "global_atomic_cmpswap_b32">; +defm GLOBAL_ATOMIC_ADD : VFLAT_Real_Atomics_gfx12<0x035, "global_atomic_add_u32">; +defm GLOBAL_ATOMIC_SUB : VFLAT_Real_Atomics_gfx12<0x036, "global_atomic_sub_u32">; +defm GLOBAL_ATOMIC_CSUB : VFLAT_Real_Atomics_gfx12<0x037, "global_atomic_sub_clamp_u32", "global_atomic_csub_u32">; +defm GLOBAL_ATOMIC_SMIN : VFLAT_Real_Atomics_gfx12<0x038, "global_atomic_min_i32">; +defm GLOBAL_ATOMIC_UMIN : VFLAT_Real_Atomics_gfx12<0x039, "global_atomic_min_u32">; +defm GLOBAL_ATOMIC_SMAX : VFLAT_Real_Atomics_gfx12<0x03a, "global_atomic_max_i32">; +defm GLOBAL_ATOMIC_UMAX : VFLAT_Real_Atomics_gfx12<0x03b, "global_atomic_max_u32">; +defm GLOBAL_ATOMIC_AND : VFLAT_Real_Atomics_gfx12<0x03c, "global_atomic_and_b32">; +defm GLOBAL_ATOMIC_OR : VFLAT_Real_Atomics_gfx12<0x03d, "global_atomic_or_b32">; +defm GLOBAL_ATOMIC_XOR : VFLAT_Real_Atomics_gfx12<0x03e, "global_atomic_xor_b32">; +defm GLOBAL_ATOMIC_INC : VFLAT_Real_Atomics_gfx12<0x03f, "global_atomic_inc_u32">; +defm GLOBAL_ATOMIC_DEC : VFLAT_Real_Atomics_gfx12<0x040, "global_atomic_dec_u32">; +defm GLOBAL_ATOMIC_SWAP_X2 : VFLAT_Real_Atomics_gfx12<0x041, "global_atomic_swap_b64">; +defm GLOBAL_ATOMIC_CMPSWAP_X2 : VFLAT_Real_Atomics_gfx12<0x042, "global_atomic_cmpswap_b64">; +defm GLOBAL_ATOMIC_ADD_X2 : VFLAT_Real_Atomics_gfx12<0x043, "global_atomic_add_u64">; +defm GLOBAL_ATOMIC_SUB_X2 : VFLAT_Real_Atomics_gfx12<0x044, "global_atomic_sub_u64">; +defm GLOBAL_ATOMIC_SMIN_X2 : VFLAT_Real_Atomics_gfx12<0x045, "global_atomic_min_i64">; +defm GLOBAL_ATOMIC_UMIN_X2 : VFLAT_Real_Atomics_gfx12<0x046, "global_atomic_min_u64">; +defm GLOBAL_ATOMIC_SMAX_X2 : VFLAT_Real_Atomics_gfx12<0x047, "global_atomic_max_i64">; +defm GLOBAL_ATOMIC_UMAX_X2 : VFLAT_Real_Atomics_gfx12<0x048, "global_atomic_max_u64">; +defm GLOBAL_ATOMIC_AND_X2 : VFLAT_Real_Atomics_gfx12<0x049, "global_atomic_and_b64">; +defm GLOBAL_ATOMIC_OR_X2 : VFLAT_Real_Atomics_gfx12<0x04a, "global_atomic_or_b64">; +defm GLOBAL_ATOMIC_XOR_X2 : VFLAT_Real_Atomics_gfx12<0x04b, "global_atomic_xor_b64">; +defm GLOBAL_ATOMIC_INC_X2 : VFLAT_Real_Atomics_gfx12<0x04c, "global_atomic_inc_u64">; +defm GLOBAL_ATOMIC_DEC_X2 : VFLAT_Real_Atomics_gfx12<0x04d, "global_atomic_dec_u64">; +defm GLOBAL_ATOMIC_COND_SUB_U32 : VFLAT_Real_Atomics_gfx12<0x050>; +defm GLOBAL_ATOMIC_FMIN : VFLAT_Real_Atomics_gfx12<0x051, "global_atomic_min_num_f32", "global_atomic_min_f32">; +defm GLOBAL_ATOMIC_FMAX : VFLAT_Real_Atomics_gfx12<0x052, "global_atomic_max_num_f32", "global_atomic_max_f32">; +defm GLOBAL_ATOMIC_ADD_F32 : VFLAT_Real_Atomics_gfx12<0x056>; defm GLOBAL_LOAD_TR_B128_w32 : VGLOBAL_Real_AllAddr_gfx1200<0x057>; defm GLOBAL_LOAD_TR_B64_w32 : VGLOBAL_Real_AllAddr_gfx1200<0x058>; -defm GLOBAL_LOAD_TR_B128_w64 : VGLOBAL_Real_AllAddr_gfx12_w64<0x057>; -defm GLOBAL_LOAD_TR_B64_w64 : VGLOBAL_Real_AllAddr_gfx12_w64<0x058>; +defm GLOBAL_LOAD_TR_B128_w64 : VFLAT_Real_AllAddr_gfx12_w64<0x057>; +defm GLOBAL_LOAD_TR_B64_w64 : VFLAT_Real_AllAddr_gfx12_w64<0x058>; -defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : VGLOBAL_Real_Atomics_gfx12<0x073>; -defm GLOBAL_ATOMIC_PK_ADD_F16 : VGLOBAL_Real_Atomics_gfx12<0x059>; -defm GLOBAL_ATOMIC_PK_ADD_BF16 : VGLOBAL_Real_Atomics_gfx12<0x05a>; +defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : VFLAT_Real_Atomics_gfx12<0x073>; +defm GLOBAL_ATOMIC_PK_ADD_F16 : VFLAT_Real_Atomics_gfx12<0x059>; +defm GLOBAL_ATOMIC_PK_ADD_BF16 : VFLAT_Real_Atomics_gfx12<0x05a>; defm GLOBAL_INV : VFLAT_Real_Base_gfx12<0x02b>; defm GLOBAL_WB : VFLAT_Real_Base_gfx12<0x02c>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 68430526dba26..67c6daaa24c2a 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -214,6 +214,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool FlatInstOffsets = false; bool FlatGlobalInsts = false; bool FlatScratchInsts = false; + bool FlatGVSMode = false; bool ScalarFlatScratchInsts = false; bool HasArchitectedFlatScratch = false; bool EnableFlatScratch = false; @@ -1160,6 +1161,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasLshlAddU64Inst() const { return HasLshlAddU64Inst; } + bool hasFlatGVSMode() const { return FlatGVSMode; } + bool enableSIScheduler() const { return EnableSIScheduler; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index a1e14d90ebcab..6109a2c4dfc7f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6460,7 +6460,7 @@ bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const { if (OldSAddrIdx < 0) return false; - assert(isSegmentSpecificFLAT(Inst)); + assert(isSegmentSpecificFLAT(Inst) || (isFLAT(Inst) && ST.hasFlatGVSMode())); int NewOpc = AMDGPU::getGlobalVaddrOp(Opc); if (NewOpc < 0) @@ -6537,7 +6537,7 @@ bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const { // FIXME: Remove this when SelectionDAG is obsoleted. void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const { - if (!isSegmentSpecificFLAT(MI)) + if (!isSegmentSpecificFLAT(MI) && !ST.hasFlatGVSMode()) return; // Fixup SGPR operands in VGPRs. We only select these when the DAG divergence diff --git a/llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll b/llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll new file mode 100644 index 0000000000000..f0988a17b35f0 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/flat-saddr-load.ll @@ -0,0 +1,2405 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s + +; Test using saddr addressing mode of flat_*load_* instructions. + +; -------------------------------------------------------------------------------- +; No vgpr offset, constants +; -------------------------------------------------------------------------------- + +; SGPR base only +define amdgpu_ps float @flat_load_saddr_i8_offset_0(ptr inreg %sbase) { +; GFX1250-LABEL: flat_load_saddr_i8_offset_0: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: flat_load_u8 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %load = load i8, ptr %sbase + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; SGPR base with maximum gfx1250 immediate offset +define amdgpu_ps float @flat_load_saddr_i8_offset_8388607(ptr inreg %sbase) { +; GFX1250-LABEL: flat_load_saddr_i8_offset_8388607: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: flat_load_u8 v0, v0, s[2:3] offset:8388607 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 8388607 + %load = load i8, ptr %gep0 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; SGPR base with maximum gfx1250 immediate offset + 1 +define amdgpu_ps float @flat_load_saddr_i8_offset_8388608(ptr inreg %sbase) { +; GFX1250-LABEL: flat_load_saddr_i8_offset_8388608: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_mov_b32_e32 v0, 0x800000 +; GFX1250-NEXT: flat_load_u8 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 8388608 + %load = load i8, ptr %gep0 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; SGPR base with maximum negative gfx1250 immediate offset +define amdgpu_ps float @flat_load_saddr_i8_offset_neg8388608(ptr inreg %sbase) { +; GFX1250-LABEL: flat_load_saddr_i8_offset_neg8388608: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: flat_load_u8 v0, v0, s[2:3] offset:-8388608 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 -8388608 + %load = load i8, ptr %gep0 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; SGPR base with maximum negative gfx1250 immediate offset -1 +define amdgpu_ps float @flat_load_saddr_i8_offset_neg8388609(ptr inreg %sbase) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i8_offset_neg8388609: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0xff800000, s2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s0 +; GFX1250-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:-1 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i8_offset_neg8388609: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_add_co_u32 s0, s2, 0xff7fffff +; GFX1250-GISEL-NEXT: s_add_co_ci_u32 s1, s3, -1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 -8388609 + %load = load i8, ptr %gep0 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +define amdgpu_ps float @flat_load_saddr_i8_offset_0xFFFFFFFF(ptr inreg %sbase) { +; GFX1250-LABEL: flat_load_saddr_i8_offset_0xFFFFFFFF: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_mov_b32_e32 v0, 0xff800000 +; GFX1250-NEXT: flat_load_u8 v0, v0, s[2:3] offset:8388607 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 4294967295 + %load = load i8, ptr %gep0 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +define amdgpu_ps float @flat_load_saddr_i8_offset_0x100000000(ptr inreg %sbase) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i8_offset_0x100000000: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-SDAG-NEXT: s_add_co_i32 s3, s3, 1 +; GFX1250-SDAG-NEXT: flat_load_u8 v0, v0, s[2:3] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i8_offset_0x100000000: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_add_co_u32 s0, s2, 0 +; GFX1250-GISEL-NEXT: s_add_co_ci_u32 s1, s3, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 4294967296 + %load = load i8, ptr %gep0 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +define amdgpu_ps float @flat_load_saddr_i8_offset_0x100000001(ptr inreg %sbase) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i8_offset_0x100000001: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s3, s0 +; GFX1250-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:1 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i8_offset_0x100000001: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_add_co_u32 s0, s2, 1 +; GFX1250-GISEL-NEXT: s_add_co_ci_u32 s1, s3, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 4294967297 + %load = load i8, ptr %gep0 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +define amdgpu_ps float @flat_load_saddr_i8_offset_0x100000FFF(ptr inreg %sbase) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i8_offset_0x100000FFF: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s3, s0 +; GFX1250-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i8_offset_0x100000FFF: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_add_co_u32 s0, s2, 0xfff +; GFX1250-GISEL-NEXT: s_add_co_ci_u32 s1, s3, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 4294971391 + %load = load i8, ptr %gep0 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +define amdgpu_ps float @flat_load_saddr_i8_offset_0x100001000(ptr inreg %sbase) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i8_offset_0x100001000: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 1, s3, s0 +; GFX1250-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4096 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i8_offset_0x100001000: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_add_co_u32 s0, s2, 0x1000 +; GFX1250-GISEL-NEXT: s_add_co_ci_u32 s1, s3, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 4294971392 + %load = load i8, ptr %gep0 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +define amdgpu_ps float @flat_load_saddr_i8_offset_neg0xFFFFFFFF(ptr inreg %sbase) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i8_offset_neg0xFFFFFFFF: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0x800000, s2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s0 +; GFX1250-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:-8388607 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i8_offset_neg0xFFFFFFFF: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_add_co_u32 s0, s2, 1 +; GFX1250-GISEL-NEXT: s_add_co_ci_u32 s1, s3, -1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 -4294967295 + %load = load i8, ptr %gep0 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +define amdgpu_ps float @flat_load_saddr_i8_offset_neg0x100000000(ptr inreg %sbase) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i8_offset_neg0x100000000: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-SDAG-NEXT: s_add_co_i32 s3, s3, -1 +; GFX1250-SDAG-NEXT: flat_load_u8 v0, v0, s[2:3] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i8_offset_neg0x100000000: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_add_co_u32 s0, s2, 0 +; GFX1250-GISEL-NEXT: s_add_co_ci_u32 s1, s3, -1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 -4294967296 + %load = load i8, ptr %gep0 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +define amdgpu_ps float @flat_load_saddr_i8_offset_neg0x100000001(ptr inreg %sbase) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i8_offset_neg0x100000001: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s3, s0 +; GFX1250-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:-1 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i8_offset_neg0x100000001: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_add_co_u32 s0, s2, -1 +; GFX1250-GISEL-NEXT: s_add_co_ci_u32 s1, s3, -2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 -4294967297 + %load = load i8, ptr %gep0 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; -------------------------------------------------------------------------------- +; Basic addressing patterns +; -------------------------------------------------------------------------------- + +; Basic pattern, no immediate offset. +define amdgpu_ps float @flat_load_saddr_i8_zext_vgpr(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i8_zext_vgpr: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u8 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i8, ptr %gep0 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; Maximum positive offset on gfx1250 +define amdgpu_ps float @flat_load_saddr_i8_zext_vgpr_offset_8388607(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i8_zext_vgpr_offset_8388607: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u8 v0, v0, s[2:3] offset:8388607 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 8388607 + %load = load i8, ptr %gep1 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; Maximum positive offset on gfx1250 + 1 +define amdgpu_ps float @flat_load_saddr_i8_zext_vgpr_offset_8388608(ptr inreg %sbase, i32 %voffset) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i8_zext_vgpr_offset_8388608: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] +; GFX1250-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX1250-SDAG-NEXT: flat_load_u8 v0, v[0:1] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i8_zext_vgpr_offset_8388608: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0 +; GFX1250-GISEL-NEXT: s_wait_alu 0xfffd +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 8388608 + %load = load i8, ptr %gep1 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; Maximum negative offset on gfx1250 +define amdgpu_ps float @flat_load_saddr_i8_zext_vgpr_offset_neg8388608(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i8_zext_vgpr_offset_neg8388608: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u8 v0, v0, s[2:3] offset:-8388608 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -8388608 + %load = load i8, ptr %gep1 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; Maximum negative offset on gfx1250 - 1 +define amdgpu_ps float @flat_load_saddr_i8_zext_vgpr_offset_neg8388607(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i8_zext_vgpr_offset_neg8388607: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u8 v0, v0, s[2:3] offset:-8388607 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -8388607 + %load = load i8, ptr %gep1 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +define amdgpu_ps float @flat_load_saddr_i8_zext_vgpr_offset_8388607_gep_order(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i8_zext_vgpr_offset_8388607_gep_order: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u8 v0, v0, s[2:3] offset:8388607 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 8388607 + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 %zext.offset + %load = load i8, ptr %gep1 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; pointer addressing done in integers +define amdgpu_ps float @flat_load_saddr_i8_zext_vgpr_ptrtoint(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i8_zext_vgpr_ptrtoint: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u8 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %sbase.as.int = ptrtoint ptr %sbase to i64 + %add = add i64 %sbase.as.int, %zext.offset + %dirty.gep = inttoptr i64 %add to ptr + %load = load i8, ptr %dirty.gep + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; zext forced to LHS of addressing expression +define amdgpu_ps float @flat_load_saddr_i8_zext_vgpr_ptrtoint_commute_add(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i8_zext_vgpr_ptrtoint_commute_add: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u8 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %sbase.as.int = ptrtoint ptr %sbase to i64 + %add = add i64 %zext.offset, %sbase.as.int + %dirty.gep = inttoptr i64 %add to ptr + %load = load i8, ptr %dirty.gep + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; zext forced to LHS of addressing expression, with immediate offset +define amdgpu_ps float @flat_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset0: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u8 v0, v0, s[2:3] offset:128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %sbase.as.int = ptrtoint ptr %sbase to i64 + %add = add i64 %zext.offset, %sbase.as.int + %add.immoffset = add i64 %add, 128 + %dirty.gep = inttoptr i64 %add.immoffset to ptr + %load = load i8, ptr %dirty.gep + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; zext forced to LHS of addressing expression, with immediate offset in non-canonical position +define amdgpu_ps float @flat_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_offset1: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u8 v0, v0, s[2:3] offset:128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %sbase.as.int = ptrtoint ptr %sbase to i64 + %add.immoffset = add i64 %sbase.as.int, 128 + %add = add i64 %zext.offset, %add.immoffset + %dirty.gep = inttoptr i64 %add to ptr + %load = load i8, ptr %dirty.gep + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; -------------------------------------------------------------------------------- +; Uniformity edge cases +; -------------------------------------------------------------------------------- + +@ptr.in.lds = internal addrspace(3) global ptr undef + +; Base pointer is uniform, but also in VGPRs +define amdgpu_ps float @flat_load_saddr_uniform_ptr_in_vgprs(i32 %voffset) { +; GFX1250-SDAG-LABEL: flat_load_saddr_uniform_ptr_in_vgprs: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-SDAG-NEXT: ds_load_b64 v[2:3], v1 +; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0 +; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v2 +; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s1, v3 +; GFX1250-SDAG-NEXT: flat_load_u8 v0, v0, s[0:1] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_uniform_ptr_in_vgprs: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-GISEL-NEXT: ds_load_b64 v[2:3], v1 +; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0 +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %sbase = load ptr, ptr addrspace(3) @ptr.in.lds + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i8, ptr %gep0 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; Base pointer is uniform, but also in VGPRs, with imm offset +define amdgpu_ps float @flat_load_saddr_uniform_ptr_in_vgprs_immoffset(i32 %voffset) { +; GFX1250-SDAG-LABEL: flat_load_saddr_uniform_ptr_in_vgprs_immoffset: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-SDAG-NEXT: ds_load_b64 v[2:3], v1 +; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0 +; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s0, v2 +; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s1, v3 +; GFX1250-SDAG-NEXT: flat_load_u8 v0, v0, s[0:1] offset:42 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_uniform_ptr_in_vgprs_immoffset: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-GISEL-NEXT: ds_load_b64 v[2:3], v1 +; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0 +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:42 +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %sbase = load ptr, ptr addrspace(3) @ptr.in.lds + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 42 + %load = load i8, ptr %gep1 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; Both 64-bit base and 32-bit offset are scalar +define amdgpu_ps float @flat_load_saddr_i8_zext_uniform_offset(ptr inreg %sbase, i32 inreg %soffset) { +; GFX1250-LABEL: flat_load_saddr_i8_zext_uniform_offset: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_mov_b32_e32 v0, s4 +; GFX1250-NEXT: flat_load_u8 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %soffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i8, ptr %gep0 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; Both 64-bit base and 32-bit offset are scalar, with immediate offset. +define amdgpu_ps float @flat_load_saddr_i8_zext_uniform_offset_immoffset(ptr inreg %sbase, i32 inreg %soffset) { +; GFX1250-LABEL: flat_load_saddr_i8_zext_uniform_offset_immoffset: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_mov_b32_e32 v0, s4 +; GFX1250-NEXT: flat_load_u8 v0, v0, s[2:3] offset:-24 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %soffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -24 + %load = load i8, ptr %gep1 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; Both components uniform, zext forced to LHS of addressing expression +define amdgpu_ps float @flat_load_saddr_i8_zext_sgpr_ptrtoint_commute_add(ptr inreg %sbase, i32 inreg %soffset) { +; GFX1250-LABEL: flat_load_saddr_i8_zext_sgpr_ptrtoint_commute_add: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_mov_b32_e32 v0, s4 +; GFX1250-NEXT: flat_load_u8 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %soffset to i64 + %sbase.as.int = ptrtoint ptr %sbase to i64 + %add = add i64 %zext.offset, %sbase.as.int + %dirty.gep = inttoptr i64 %add to ptr + %load = load i8, ptr %dirty.gep + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; Both components uniform, zext forced to LHS of addressing expression, with immediate offset +define amdgpu_ps float @flat_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0(ptr inreg %sbase, i32 inreg %soffset) { +; GFX1250-LABEL: flat_load_saddr_i8_zext_sgpr_ptrtoint_commute_add_imm_offset0: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_mov_b32_e32 v0, s4 +; GFX1250-NEXT: flat_load_u8 v0, v0, s[2:3] offset:128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %soffset to i64 + %sbase.as.int = ptrtoint ptr %sbase to i64 + %add = add i64 %zext.offset, %sbase.as.int + %add.immoffset = add i64 %add, 128 + %dirty.gep = inttoptr i64 %add.immoffset to ptr + %load = load i8, ptr %dirty.gep + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; divergent 64-bit base, 32-bit scalar offset. +define amdgpu_ps float @flat_load_i8_vgpr64_sgpr32(ptr %vbase, i32 inreg %soffset) { +; GFX1250-SDAG-LABEL: flat_load_i8_vgpr64_sgpr32: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_mov_b32 s3, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3] +; GFX1250-SDAG-NEXT: flat_load_u8 v0, v[0:1] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_i8_vgpr64_sgpr32: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_mov_b32 s3, 0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %soffset to i64 + %gep0 = getelementptr inbounds i8, ptr %vbase, i64 %zext.offset + %load = load i8, ptr %gep0 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; divergent 64-bit base, 32-bit scalar offset, with imm offset +define amdgpu_ps float @flat_load_i8_vgpr64_sgpr32_offset_8388607(ptr %vbase, i32 inreg %soffset) { +; GFX1250-SDAG-LABEL: flat_load_i8_vgpr64_sgpr32_offset_8388607: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_mov_b32 s3, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3] +; GFX1250-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:8388607 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_i8_vgpr64_sgpr32_offset_8388607: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_mov_b32 s3, 0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1] offset:8388607 +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %soffset to i64 + %gep0 = getelementptr inbounds i8, ptr %vbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 8388607 + %load = load i8, ptr %gep1 + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; -------------------------------------------------------------------------------- +; Natural addressing shifts with restricted range +; -------------------------------------------------------------------------------- + +; Cannot push the shift into 32-bits, and cannot match. +define amdgpu_ps float @flat_load_saddr_f32_natural_addressing(ptr inreg %sbase, ptr %voffset.ptr) { +; GFX1250-SDAG-LABEL: flat_load_saddr_f32_natural_addressing: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_b32 v0, v[0:1] +; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[2:3] +; GFX1250-SDAG-NEXT: flat_load_b32 v0, v[0:1] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_f32_natural_addressing: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_b32 v0, v[0:1] +; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_lshlrev_b64_e32 v[0:1], 2, v[0:1] +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_b32 v0, v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %voffset = load i32, ptr %voffset.ptr + %zext.offset = zext i32 %voffset to i64 + %gep = getelementptr inbounds float, ptr %sbase, i64 %zext.offset + %load = load float, ptr %gep + ret float %load +} + +; Cannot push the shift into 32-bits, with an immediate offset. +define amdgpu_ps float @flat_load_saddr_f32_natural_addressing_immoffset(ptr inreg %sbase, ptr %voffset.ptr) { +; GFX1250-LABEL: flat_load_saddr_f32_natural_addressing_immoffset: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: flat_load_b32 v0, v0, s[2:3] offset:128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %voffset = load i32, ptr %voffset.ptr + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 128 + %load = load float, ptr %gep1 + ret float %load +} + +; Range is sufficiently restricted to push the shift into 32-bits. +define amdgpu_ps float @flat_load_f32_saddr_zext_vgpr_range(ptr inreg %sbase, ptr %voffset.ptr) { +; GFX1250-LABEL: flat_load_f32_saddr_zext_vgpr_range: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX1250-NEXT: flat_load_b32 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %voffset = load i32, ptr %voffset.ptr, !range !0, !noundef !{} + %zext.offset = zext i32 %voffset to i64 + %gep = getelementptr inbounds float, ptr %sbase, i64 %zext.offset + %load = load float, ptr %gep + ret float %load +} + +; Range is sufficiently restricted to push the shift into 32-bits, with an imm offset +define amdgpu_ps float @flat_load_f32_saddr_zext_vgpr_range_imm_offset(ptr inreg %sbase, ptr %voffset.ptr) { +; GFX1250-LABEL: flat_load_f32_saddr_zext_vgpr_range_imm_offset: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b32 v0, v[0:1] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX1250-NEXT: flat_load_b32 v0, v0, s[2:3] offset:400 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %voffset = load i32, ptr %voffset.ptr, !range !0, !noundef !{} + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds float, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds float, ptr %gep0, i64 100 + %load = load float, ptr %gep1 + ret float %load +} + +; Range is 1 beyond the limit where we can move the shift into 32-bits. +define amdgpu_ps float @flat_load_f32_saddr_zext_vgpr_range_too_large(ptr inreg %sbase, ptr %voffset.ptr) { +; GFX1250-SDAG-LABEL: flat_load_f32_saddr_zext_vgpr_range_too_large: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_b32 v0, v[0:1] +; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[2:3] +; GFX1250-SDAG-NEXT: flat_load_b32 v0, v[0:1] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_f32_saddr_zext_vgpr_range_too_large: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_b32 v0, v[0:1] +; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_lshlrev_b64_e32 v[0:1], 2, v[0:1] +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_b32 v0, v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %voffset = load i32, ptr %voffset.ptr, !range !1, !noundef !{} + %zext.offset = zext i32 %voffset to i64 + %gep = getelementptr inbounds float, ptr %sbase, i64 %zext.offset + %load = load float, ptr %gep + ret float %load +} + +; -------------------------------------------------------------------------------- +; Stress various type loads +; -------------------------------------------------------------------------------- + +define amdgpu_ps half @flat_load_saddr_i16(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u16 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i16, ptr %gep0 + %cast.load = bitcast i16 %load to half + ret half %cast.load +} + +define amdgpu_ps half @flat_load_saddr_i16_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i16_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load i16, ptr %gep1 + %cast.load = bitcast i16 %load to half + ret half %cast.load +} + +define amdgpu_ps half @flat_load_saddr_f16(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_f16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u16 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load half, ptr %gep0 + ret half %load +} + +define amdgpu_ps half @flat_load_saddr_f16_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_f16_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load half, ptr %gep1 + ret half %load +} + +define amdgpu_ps float @flat_load_saddr_i32(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b32 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i32, ptr %gep0 + %cast.load = bitcast i32 %load to float + ret float %cast.load +} + +define amdgpu_ps float @flat_load_saddr_i32_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i32_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b32 v0, v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load i32, ptr %gep1 + %cast.load = bitcast i32 %load to float + ret float %cast.load +} + +define amdgpu_ps float @flat_load_saddr_f32(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_f32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b32 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load float, ptr %gep0 + ret float %load +} + +define amdgpu_ps float @flat_load_saddr_f32_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_f32_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b32 v0, v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load float, ptr %gep1 + ret float %load +} + +define amdgpu_ps <2 x half> @flat_load_saddr_v2i16(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_v2i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b32 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load <2 x i16>, ptr %gep0 + %cast.load = bitcast <2 x i16> %load to <2 x half> + ret <2 x half> %cast.load +} + +define amdgpu_ps <2 x half> @flat_load_saddr_v2i16_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_v2i16_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b32 v0, v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load <2 x i16>, ptr %gep1 + %cast.load = bitcast <2 x i16> %load to <2 x half> + ret <2 x half> %cast.load +} + +define amdgpu_ps <2 x half> @flat_load_saddr_v2f16(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_v2f16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b32 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load <2 x half>, ptr %gep0 + ret <2 x half> %load +} + +define amdgpu_ps <2 x half> @flat_load_saddr_v2f16_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_v2f16_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b32 v0, v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load <2 x half>, ptr %gep1 + ret <2 x half> %load +} + +define amdgpu_ps <2 x half> @flat_load_saddr_p3(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_p3: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b32 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load ptr addrspace(3), ptr %gep0 + %cast.load0 = ptrtoint ptr addrspace(3) %load to i32 + %cast.load1 = bitcast i32 %cast.load0 to <2 x half> + ret <2 x half> %cast.load1 +} + +define amdgpu_ps <2 x half> @flat_load_saddr_p3_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_p3_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b32 v0, v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load ptr addrspace(3), ptr %gep1 + %cast.load0 = ptrtoint ptr addrspace(3) %load to i32 + %cast.load1 = bitcast i32 %cast.load0 to <2 x half> + ret <2 x half> %cast.load1 +} + +define amdgpu_ps <2 x float> @flat_load_saddr_f64(ptr inreg %sbase, i32 %voffset) { +; GFX1250-SDAG-LABEL: flat_load_saddr_f64: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] +; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_f64: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load double, ptr %gep0 + %cast.load = bitcast double %load to <2 x float> + ret <2 x float> %cast.load +} + +define amdgpu_ps <2 x float> @flat_load_saddr_f64_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-SDAG-LABEL: flat_load_saddr_f64_immneg128: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] +; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_f64_immneg128: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load double, ptr %gep1 + %cast.load = bitcast double %load to <2 x float> + ret <2 x float> %cast.load +} + +define amdgpu_ps <2 x float> @flat_load_saddr_i64(ptr inreg %sbase, i32 %voffset) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i64: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] +; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i64: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i64, ptr %gep0 + %cast.load = bitcast i64 %load to <2 x float> + ret <2 x float> %cast.load +} + +define amdgpu_ps <2 x float> @flat_load_saddr_i64_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i64_immneg128: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] +; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i64_immneg128: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load i64, ptr %gep1 + %cast.load = bitcast i64 %load to <2 x float> + ret <2 x float> %cast.load +} + +define amdgpu_ps <2 x float> @flat_load_saddr_v2f32(ptr inreg %sbase, i32 %voffset) { +; GFX1250-SDAG-LABEL: flat_load_saddr_v2f32: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] +; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_v2f32: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load <2 x float>, ptr %gep0 + ret <2 x float> %load +} + +define amdgpu_ps <2 x float> @flat_load_saddr_v2f32_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-SDAG-LABEL: flat_load_saddr_v2f32_immneg128: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] +; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_v2f32_immneg128: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load <2 x float>, ptr %gep1 + ret <2 x float> %load +} + +define amdgpu_ps <2 x float> @flat_load_saddr_v2i32(ptr inreg %sbase, i32 %voffset) { +; GFX1250-SDAG-LABEL: flat_load_saddr_v2i32: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] +; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_v2i32: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load <2 x i32>, ptr %gep0 + %cast.load = bitcast <2 x i32> %load to <2 x float> + ret <2 x float> %cast.load +} + +define amdgpu_ps <2 x float> @flat_load_saddr_v2i32_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-SDAG-LABEL: flat_load_saddr_v2i32_immneg128: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] +; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_v2i32_immneg128: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load <2 x i32>, ptr %gep1 + %cast.load = bitcast <2 x i32> %load to <2 x float> + ret <2 x float> %cast.load +} + +define amdgpu_ps <2 x float> @flat_load_saddr_v4i16(ptr inreg %sbase, i32 %voffset) { +; GFX1250-SDAG-LABEL: flat_load_saddr_v4i16: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] +; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_v4i16: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load <4 x i16>, ptr %gep0 + %cast.load = bitcast <4 x i16> %load to <2 x float> + ret <2 x float> %cast.load +} + +define amdgpu_ps <2 x float> @flat_load_saddr_v4i16_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-SDAG-LABEL: flat_load_saddr_v4i16_immneg128: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] +; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_v4i16_immneg128: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load <4 x i16>, ptr %gep1 + %cast.load = bitcast <4 x i16> %load to <2 x float> + ret <2 x float> %cast.load +} + +define amdgpu_ps <2 x float> @flat_load_saddr_v4f16(ptr inreg %sbase, i32 %voffset) { +; GFX1250-SDAG-LABEL: flat_load_saddr_v4f16: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] +; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_v4f16: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load <4 x half>, ptr %gep0 + %cast.load = bitcast <4 x half> %load to <2 x float> + ret <2 x float> %cast.load +} + +define amdgpu_ps <2 x float> @flat_load_saddr_v4f16_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-SDAG-LABEL: flat_load_saddr_v4f16_immneg128: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] +; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_v4f16_immneg128: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load <4 x half>, ptr %gep1 + %cast.load = bitcast <4 x half> %load to <2 x float> + ret <2 x float> %cast.load +} + +define amdgpu_ps <2 x float> @flat_load_saddr_p1(ptr inreg %sbase, i32 %voffset) { +; GFX1250-SDAG-LABEL: flat_load_saddr_p1: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] +; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_p1: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load ptr, ptr %gep0 + %cast.load0 = ptrtoint ptr %load to i64 + %cast.load1 = bitcast i64 %cast.load0 to <2 x float> + ret <2 x float> %cast.load1 +} + +define amdgpu_ps <2 x float> @flat_load_saddr_p1_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-SDAG-LABEL: flat_load_saddr_p1_immneg128: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] +; GFX1250-SDAG-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_p1_immneg128: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v3, vcc_lo +; GFX1250-GISEL-NEXT: flat_load_b64 v[0:1], v[0:1] offset:-128 +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load ptr, ptr %gep1 + %cast.load0 = ptrtoint ptr %load to i64 + %cast.load1 = bitcast i64 %cast.load0 to <2 x float> + ret <2 x float> %cast.load1 +} + +define amdgpu_ps <3 x float> @flat_load_saddr_v3f32(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_v3f32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b96 v[0:2], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load <3 x float>, ptr %gep0 + ret <3 x float> %load +} + +define amdgpu_ps <3 x float> @flat_load_saddr_v3f32_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_v3f32_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b96 v[0:2], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load <3 x float>, ptr %gep1 + ret <3 x float> %load +} + +define amdgpu_ps <3 x float> @flat_load_saddr_v3i32(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_v3i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b96 v[0:2], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load <3 x i32>, ptr %gep0 + %cast.load = bitcast <3 x i32> %load to <3 x float> + ret <3 x float> %cast.load +} + +define amdgpu_ps <3 x float> @flat_load_saddr_v3i32_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_v3i32_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b96 v[0:2], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load <3 x i32>, ptr %gep1 + %cast.load = bitcast <3 x i32> %load to <3 x float> + ret <3 x float> %cast.load +} + +define amdgpu_ps <6 x half> @flat_load_saddr_v6f16(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_v6f16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b96 v[0:2], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load <6 x half>, ptr %gep0 + ret <6 x half> %load +} + +define amdgpu_ps <6 x half> @flat_load_saddr_v6f16_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_v6f16_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b96 v[0:2], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load <6 x half>, ptr %gep1 + ret <6 x half> %load +} + +define amdgpu_ps <4 x float> @flat_load_saddr_v4f32(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_v4f32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b128 v[0:3], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load <4 x float>, ptr %gep0 + ret <4 x float> %load +} + +define amdgpu_ps <4 x float> @flat_load_saddr_v4f32_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_v4f32_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b128 v[0:3], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load <4 x float>, ptr %gep1 + ret <4 x float> %load +} + +define amdgpu_ps <4 x float> @flat_load_saddr_v4i32(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_v4i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b128 v[0:3], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load <4 x i32>, ptr %gep0 + %cast.load = bitcast <4 x i32> %load to <4 x float> + ret <4 x float> %cast.load +} + +define amdgpu_ps <4 x float> @flat_load_saddr_v4i32_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_v4i32_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b128 v[0:3], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load <4 x i32>, ptr %gep1 + %cast.load = bitcast <4 x i32> %load to <4 x float> + ret <4 x float> %cast.load +} + +define amdgpu_ps <4 x float> @flat_load_saddr_v2i64(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_v2i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b128 v[0:3], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load <2 x i64>, ptr %gep0 + %cast.load = bitcast <2 x i64> %load to <4 x float> + ret <4 x float> %cast.load +} + +define amdgpu_ps <4 x float> @flat_load_saddr_v2i64_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_v2i64_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b128 v[0:3], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load <2 x i64>, ptr %gep1 + %cast.load = bitcast <2 x i64> %load to <4 x float> + ret <4 x float> %cast.load +} + +define amdgpu_ps <4 x float> @flat_load_saddr_i128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b128 v[0:3], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i128, ptr %gep0 + %cast.load = bitcast i128 %load to <4 x float> + ret <4 x float> %cast.load +} + +define amdgpu_ps <4 x float> @flat_load_saddr_i128_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i128_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b128 v[0:3], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load i128, ptr %gep1 + %cast.load = bitcast i128 %load to <4 x float> + ret <4 x float> %cast.load +} + +define amdgpu_ps <4 x float> @flat_load_saddr_v2p1(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_v2p1: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b128 v[0:3], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load <2 x ptr>, ptr %gep0 + %cast.load0 = ptrtoint <2 x ptr> %load to <2 x i64> + %cast.load1 = bitcast <2 x i64> %cast.load0 to <4 x float> + ret <4 x float> %cast.load1 +} + +define amdgpu_ps <4 x float> @flat_load_saddr_v2p1_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_v2p1_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b128 v[0:3], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load <2 x ptr>, ptr %gep1 + %cast.load0 = ptrtoint <2 x ptr> %load to <2 x i64> + %cast.load1 = bitcast <2 x i64> %cast.load0 to <4 x float> + ret <4 x float> %cast.load1 +} + +define amdgpu_ps <4 x float> @flat_load_saddr_v4p3(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_v4p3: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b128 v[0:3], v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load <4 x ptr addrspace(3)>, ptr %gep0 + %cast.load0 = ptrtoint <4 x ptr addrspace(3)> %load to <4 x i32> + %cast.load1 = bitcast <4 x i32> %cast.load0 to <4 x float> + ret <4 x float> %cast.load1 +} + +define amdgpu_ps <4 x float> @flat_load_saddr_v4p3_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_v4p3_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b128 v[0:3], v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load <4 x ptr addrspace(3)>, ptr %gep1 + %cast.load0 = ptrtoint <4 x ptr addrspace(3)> %load to <4 x i32> + %cast.load1 = bitcast <4 x i32> %cast.load0 to <4 x float> + ret <4 x float> %cast.load1 +} + +; -------------------------------------------------------------------------------- +; Extending loads +; -------------------------------------------------------------------------------- + +define amdgpu_ps float @flat_sextload_saddr_i8(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_sextload_saddr_i8: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_i8 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i8, ptr %gep0 + %sextload = sext i8 %load to i32 + %cast.load = bitcast i32 %sextload to float + ret float %cast.load +} + +define amdgpu_ps float @flat_sextload_saddr_i8_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_sextload_saddr_i8_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_i8 v0, v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load i8, ptr %gep1 + %sextload = sext i8 %load to i32 + %cast.load = bitcast i32 %sextload to float + ret float %cast.load +} + +define amdgpu_ps float @flat_sextload_saddr_i16(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_sextload_saddr_i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_i16 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i16, ptr %gep0 + %sextload = sext i16 %load to i32 + %cast.load = bitcast i32 %sextload to float + ret float %cast.load +} + +define amdgpu_ps float @flat_sextload_saddr_i16_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_sextload_saddr_i16_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_i16 v0, v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load i16, ptr %gep1 + %sextload = sext i16 %load to i32 + %cast.load = bitcast i32 %sextload to float + ret float %cast.load +} + +define amdgpu_ps float @flat_zextload_saddr_i8(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_zextload_saddr_i8: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u8 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i8, ptr %gep0 + %zextload = zext i8 %load to i32 + %cast.load = bitcast i32 %zextload to float + ret float %cast.load +} + +define amdgpu_ps float @flat_zextload_saddr_i8_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_zextload_saddr_i8_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u8 v0, v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load i8, ptr %gep1 + %zextload = zext i8 %load to i32 + %cast.load = bitcast i32 %zextload to float + ret float %cast.load +} + +define amdgpu_ps float @flat_zextload_saddr_i16(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_zextload_saddr_i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u16 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i16, ptr %gep0 + %zextload = zext i16 %load to i32 + %cast.load = bitcast i32 %zextload to float + ret float %cast.load +} + +define amdgpu_ps float @flat_zextload_saddr_i16_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_zextload_saddr_i16_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load i16, ptr %gep1 + %zextload = zext i16 %load to i32 + %cast.load = bitcast i32 %zextload to float + ret float %cast.load +} + +; -------------------------------------------------------------------------------- +; Atomic load +; -------------------------------------------------------------------------------- + +define amdgpu_ps float @atomic_flat_load_saddr_i32(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: atomic_flat_load_saddr_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b32 v0, v0, s[2:3] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load atomic i32, ptr %gep0 seq_cst, align 4 + %cast.load = bitcast i32 %load to float + ret float %cast.load +} + +define amdgpu_ps float @atomic_flat_load_saddr_i32_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: atomic_flat_load_saddr_i32_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b32 v0, v0, s[2:3] offset:-128 scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load atomic i32, ptr %gep1 seq_cst, align 4 + %cast.load = bitcast i32 %load to float + ret float %cast.load +} + +define amdgpu_ps <2 x float> @atomic_flat_load_saddr_i64(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: atomic_flat_load_saddr_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load atomic i64, ptr %gep0 seq_cst, align 8 + %cast.load = bitcast i64 %load to <2 x float> + ret <2 x float> %cast.load +} + +define amdgpu_ps <2 x float> @atomic_flat_load_saddr_i64_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: atomic_flat_load_saddr_i64_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_b64 v[0:1], v0, s[2:3] offset:-128 scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: global_inv scope:SCOPE_SYS +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load atomic i64, ptr %gep1 seq_cst, align 8 + %cast.load = bitcast i64 %load to <2 x float> + ret <2 x float> %cast.load +} + +; -------------------------------------------------------------------------------- +; D16 load (low 16) +; -------------------------------------------------------------------------------- + +define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_undef_hi(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i16_d16lo_undef_hi: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u16 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i16, ptr %gep0 + %build = insertelement <2 x i16> undef, i16 %load, i32 0 + %cast = bitcast <2 x i16> %build to <2 x half> + ret <2 x half> %cast +} + +define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_undef_hi_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i16_d16lo_undef_hi_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load i16, ptr %gep1 + %build = insertelement <2 x i16> undef, i16 %load, i32 0 + %cast = bitcast <2 x i16> %build to <2 x half> + ret <2 x half> %cast +} + +define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_zero_hi(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i16_d16lo_zero_hi: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u16 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i16, ptr %gep0 + %build = insertelement <2 x i16> zeroinitializer, i16 %load, i32 0 + %cast = bitcast <2 x i16> %build to <2 x half> + ret <2 x half> %cast +} + +define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_zero_hi_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i16_d16lo_zero_hi_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load i16, ptr %gep1 + %build = insertelement <2 x i16> zeroinitializer, i16 %load, i32 0 + %cast = bitcast <2 x i16> %build to <2 x half> + ret <2 x half> %cast +} + +define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_reg_hi(ptr inreg %sbase, i32 %voffset, <2 x i16> %reg) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i16_d16lo_reg_hi: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_u16 v0, v0, s[2:3] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: v_bfi_b32 v0, 0xffff, v0, v1 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i16_d16lo_reg_hi: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_u16 v0, v0, s[2:3] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff0000, v1, v0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i16, ptr %gep0 + %build = insertelement <2 x i16> %reg, i16 %load, i32 0 + %cast = bitcast <2 x i16> %build to <2 x half> + ret <2 x half> %cast +} + +define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_reg_hi_immneg128(ptr inreg %sbase, i32 %voffset, <2 x i16> %reg) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i16_d16lo_reg_hi_immneg128: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: v_bfi_b32 v0, 0xffff, v0, v1 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i16_d16lo_reg_hi_immneg128: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff0000, v1, v0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load i16, ptr %gep1 + %build = insertelement <2 x i16> %reg, i16 %load, i32 0 + %cast = bitcast <2 x i16> %build to <2 x half> + ret <2 x half> %cast +} + +define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_zexti8_reg_hi(ptr inreg %sbase, i32 %voffset, <2 x i16> %reg) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i16_d16lo_zexti8_reg_hi: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_u8 v0, v0, s[2:3] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: v_bfi_b32 v0, 0xffff, v0, v1 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i16_d16lo_zexti8_reg_hi: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_u8 v0, v0, s[2:3] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff0000, v1, v0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i8, ptr %gep0 + %zext.load = zext i8 %load to i16 + %build = insertelement <2 x i16> %reg, i16 %zext.load, i32 0 + %cast = bitcast <2 x i16> %build to <2 x half> + ret <2 x half> %cast +} + +define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_zexti8_reg_hi_immneg128(ptr inreg %sbase, i32 %voffset, <2 x i16> %reg) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i16_d16lo_zexti8_reg_hi_immneg128: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_u8 v0, v0, s[2:3] offset:-128 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: v_bfi_b32 v0, 0xffff, v0, v1 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i16_d16lo_zexti8_reg_hi_immneg128: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_u8 v0, v0, s[2:3] offset:-128 +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff0000, v1, v0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load i8, ptr %gep1 + %zext.load = zext i8 %load to i16 + %build = insertelement <2 x i16> %reg, i16 %zext.load, i32 0 + %cast = bitcast <2 x i16> %build to <2 x half> + ret <2 x half> %cast +} + +define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_sexti8_reg_hi(ptr inreg %sbase, i32 %voffset, <2 x i16> %reg) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i16_d16lo_sexti8_reg_hi: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_i8 v0, v0, s[2:3] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: v_bfi_b32 v0, 0xffff, v0, v1 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i16_d16lo_sexti8_reg_hi: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_i8 v0, v0, s[2:3] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff0000, v1, v0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i8, ptr %gep0 + %sext.load = sext i8 %load to i16 + %build = insertelement <2 x i16> %reg, i16 %sext.load, i32 0 + %cast = bitcast <2 x i16> %build to <2 x half> + ret <2 x half> %cast +} + +define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16lo_sexti8_reg_hi_immneg128(ptr inreg %sbase, i32 %voffset, <2 x i16> %reg) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i16_d16lo_sexti8_reg_hi_immneg128: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_i8 v0, v0, s[2:3] offset:-128 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: v_bfi_b32 v0, 0xffff, v0, v1 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i16_d16lo_sexti8_reg_hi_immneg128: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_i8 v0, v0, s[2:3] offset:-128 +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff0000, v1, v0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load i8, ptr %gep1 + %sext.load = sext i8 %load to i16 + %build = insertelement <2 x i16> %reg, i16 %sext.load, i32 0 + %cast = bitcast <2 x i16> %build to <2 x half> + ret <2 x half> %cast +} + +; -------------------------------------------------------------------------------- +; D16 hi load (hi16) +; -------------------------------------------------------------------------------- + +define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_undef_hi(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i16_d16hi_undef_hi: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u16 v0, v0, s[2:3] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i16, ptr %gep0 + %build = insertelement <2 x i16> undef, i16 %load, i32 1 + %cast = bitcast <2 x i16> %build to <2 x half> + ret <2 x half> %cast +} + +define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_undef_hi_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-LABEL: flat_load_saddr_i16_d16hi_undef_hi_immneg128: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load i16, ptr %gep1 + %build = insertelement <2 x i16> undef, i16 %load, i32 1 + %cast = bitcast <2 x i16> %build to <2 x half> + ret <2 x half> %cast +} + +define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_zero_hi(ptr inreg %sbase, i32 %voffset) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i16_d16hi_zero_hi: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_u16 v0, v0, s[2:3] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: v_perm_b32 v0, v0, 0, 0x5040100 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i16_d16hi_zero_hi: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_u16 v0, v0, s[2:3] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i16, ptr %gep0 + %build = insertelement <2 x i16> zeroinitializer, i16 %load, i32 1 + %cast = bitcast <2 x i16> %build to <2 x half> + ret <2 x half> %cast +} + +define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_zero_hi_immneg128(ptr inreg %sbase, i32 %voffset) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i16_d16hi_zero_hi_immneg128: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: v_perm_b32 v0, v0, 0, 0x5040100 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i16_d16hi_zero_hi_immneg128: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load i16, ptr %gep1 + %build = insertelement <2 x i16> zeroinitializer, i16 %load, i32 1 + %cast = bitcast <2 x i16> %build to <2 x half> + ret <2 x half> %cast +} + +define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_reg_hi(ptr inreg %sbase, i32 %voffset, <2 x i16> %reg) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i16_d16hi_reg_hi: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_u16 v0, v0, s[2:3] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: v_perm_b32 v0, v0, v1, 0x5040100 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i16_d16hi_reg_hi: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_u16 v0, v0, s[2:3] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff, v1, v0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i16, ptr %gep0 + %build = insertelement <2 x i16> %reg, i16 %load, i32 1 + %cast = bitcast <2 x i16> %build to <2 x half> + ret <2 x half> %cast +} + +define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_reg_hi_immneg128(ptr inreg %sbase, i32 %voffset, <2 x i16> %reg) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i16_d16hi_reg_hi_immneg128: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: v_perm_b32 v0, v0, v1, 0x5040100 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i16_d16hi_reg_hi_immneg128: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_u16 v0, v0, s[2:3] offset:-128 +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff, v1, v0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load i16, ptr %gep1 + %build = insertelement <2 x i16> %reg, i16 %load, i32 1 + %cast = bitcast <2 x i16> %build to <2 x half> + ret <2 x half> %cast +} + +define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_zexti8_reg_hi(ptr inreg %sbase, i32 %voffset, <2 x i16> %reg) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i16_d16hi_zexti8_reg_hi: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_u8 v0, v0, s[2:3] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: v_perm_b32 v0, v0, v1, 0x5040100 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i16_d16hi_zexti8_reg_hi: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_u8 v0, v0, s[2:3] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff, v1, v0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i8, ptr %gep0 + %zext.load = zext i8 %load to i16 + %build = insertelement <2 x i16> %reg, i16 %zext.load, i32 1 + %cast = bitcast <2 x i16> %build to <2 x half> + ret <2 x half> %cast +} + +define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_zexti8_reg_hi_immneg128(ptr inreg %sbase, i32 %voffset, <2 x i16> %reg) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i16_d16hi_zexti8_reg_hi_immneg128: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_u8 v0, v0, s[2:3] offset:-128 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: v_perm_b32 v0, v0, v1, 0x5040100 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i16_d16hi_zexti8_reg_hi_immneg128: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_u8 v0, v0, s[2:3] offset:-128 +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff, v1, v0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load i8, ptr %gep1 + %zext.load = zext i8 %load to i16 + %build = insertelement <2 x i16> %reg, i16 %zext.load, i32 1 + %cast = bitcast <2 x i16> %build to <2 x half> + ret <2 x half> %cast +} + +define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_sexti8_reg_hi(ptr inreg %sbase, i32 %voffset, <2 x i16> %reg) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i16_d16hi_sexti8_reg_hi: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_i8 v0, v0, s[2:3] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: v_perm_b32 v0, v0, v1, 0x5040100 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i16_d16hi_sexti8_reg_hi: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_i8 v0, v0, s[2:3] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff, v1, v0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %load = load i8, ptr %gep0 + %sext.load = sext i8 %load to i16 + %build = insertelement <2 x i16> %reg, i16 %sext.load, i32 1 + %cast = bitcast <2 x i16> %build to <2 x half> + ret <2 x half> %cast +} + +define amdgpu_ps <2 x half> @flat_load_saddr_i16_d16hi_sexti8_reg_hi_immneg128(ptr inreg %sbase, i32 %voffset, <2 x i16> %reg) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i16_d16hi_sexti8_reg_hi_immneg128: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: flat_load_i8 v0, v0, s[2:3] offset:-128 +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: v_perm_b32 v0, v0, v1, 0x5040100 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i16_d16hi_sexti8_reg_hi_immneg128: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: flat_load_i8 v0, v0, s[2:3] offset:-128 +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX1250-GISEL-NEXT: v_and_or_b32 v0, 0xffff, v1, v0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.offset = zext i32 %voffset to i64 + %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset + %gep1 = getelementptr inbounds i8, ptr %gep0, i64 -128 + %load = load i8, ptr %gep1 + %sext.load = sext i8 %load to i16 + %build = insertelement <2 x i16> %reg, i16 %sext.load, i32 1 + %cast = bitcast <2 x i16> %build to <2 x half> + ret <2 x half> %cast +} + +; -------------------------------------------------------------------------------- +; or-with-constant as add +; -------------------------------------------------------------------------------- + +; Check add-as-or with split 64-bit or. +define amdgpu_ps float @flat_load_saddr_i8_offset_or_i64_imm_offset_16(ptr addrspace(6) inreg %sbase, i32 %idx) { +; GFX1250-LABEL: flat_load_saddr_i8_offset_or_i64_imm_offset_16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_bitop2_b32 v0, 16, v0 bitop3:0x54 +; GFX1250-NEXT: flat_load_u8 v0, v[0:1] +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-NEXT: ; return to shader part epilog + %zext.idx = zext i32 %idx to i64 + %or = or i64 %zext.idx, 16 + %addr = inttoptr i64 %or to ptr + %load = load i8, ptr %addr + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +define amdgpu_ps float @flat_load_saddr_i8_offset_or_i64_imm_offset_4160(ptr addrspace(6) inreg %sbase, i32 %idx) { +; GFX1250-SDAG-LABEL: flat_load_saddr_i8_offset_or_i64_imm_offset_4160: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_or_b32_e32 v0, 0x1040, v0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-SDAG-NEXT: flat_load_u8 v0, v[0:1] +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: ; return to shader part epilog +; +; GFX1250-GISEL-LABEL: flat_load_saddr_i8_offset_or_i64_imm_offset_4160: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-GISEL-NEXT: v_or_b32_e32 v0, 0x1040, v0 +; GFX1250-GISEL-NEXT: flat_load_u8 v0, v[0:1] +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: ; return to shader part epilog + %zext.idx = zext i32 %idx to i64 + %or = or i64 %zext.idx, 4160 + %addr = inttoptr i64 %or to ptr + %load = load i8, ptr %addr + %zext = zext i8 %load to i32 + %to.vgpr = bitcast i32 %zext to float + ret float %to.vgpr +} + +; -------------------------------------------------------------------------------- +; Full 64-bit scalar add. +; -------------------------------------------------------------------------------- + +define amdgpu_ps void @flat_addr_64bit_lsr_iv(ptr inreg %arg) { +; GFX1250-SDAG-LABEL: flat_addr_64bit_lsr_iv: +; GFX1250-SDAG: ; %bb.0: ; %bb +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-SDAG-NEXT: s_mov_b64 s[0:1], 0 +; GFX1250-SDAG-NEXT: .LBB116_1: ; %bb3 +; GFX1250-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 +; GFX1250-SDAG-NEXT: s_add_nc_u64 s[4:5], s[2:3], s[0:1] +; GFX1250-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 4 +; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0 +; GFX1250-SDAG-NEXT: flat_load_b32 v1, v0, s[4:5] scope:SCOPE_SYS +; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 +; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s0, 0x400 +; GFX1250-SDAG-NEXT: s_cbranch_scc0 .LBB116_1 +; GFX1250-SDAG-NEXT: ; %bb.2: ; %bb2 +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: flat_addr_64bit_lsr_iv: +; GFX1250-GISEL: ; %bb.0: ; %bb +; GFX1250-GISEL-NEXT: s_mov_b64 s[0:1], 0 +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX1250-GISEL-NEXT: .LBB116_1: ; %bb3 +; GFX1250-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2 +; GFX1250-GISEL-NEXT: s_wait_alu 0xfffd +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo +; GFX1250-GISEL-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 0, 4 +; GFX1250-GISEL-NEXT: flat_load_b32 v4, v[4:5] scope:SCOPE_SYS +; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX1250-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0x400, v2 +; GFX1250-GISEL-NEXT: s_cbranch_vccz .LBB116_1 +; GFX1250-GISEL-NEXT: ; %bb.2: ; %bb2 +; GFX1250-GISEL-NEXT: s_endpgm +bb: + br label %bb3 + +bb2: ; preds = %bb3 + ret void + +bb3: ; preds = %bb3, %bb + %i = phi i32 [ 0, %bb ], [ %i8, %bb3 ] + %i4 = zext i32 %i to i64 + %i5 = getelementptr inbounds float, ptr %arg, i64 %i4 + %i6 = load volatile float, ptr %i5, align 4 + %i8 = add nuw nsw i32 %i, 1 + %i9 = icmp eq i32 %i8, 256 + br i1 %i9, label %bb2, label %bb3 +} + +; Make sure we only have a single zero vaddr initialization. + +define amdgpu_ps void @flat_addr_64bit_lsr_iv_multiload(ptr inreg %arg, ptr inreg %arg.1) { +; GFX1250-SDAG-LABEL: flat_addr_64bit_lsr_iv_multiload: +; GFX1250-SDAG: ; %bb.0: ; %bb +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-SDAG-NEXT: s_mov_b64 s[0:1], 0 +; GFX1250-SDAG-NEXT: .LBB117_1: ; %bb3 +; GFX1250-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 +; GFX1250-SDAG-NEXT: s_add_nc_u64 s[4:5], s[2:3], s[0:1] +; GFX1250-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 4 +; GFX1250-SDAG-NEXT: s_wait_dscnt 0x0 +; GFX1250-SDAG-NEXT: flat_load_b32 v1, v0, s[4:5] scope:SCOPE_SYS +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: flat_load_b32 v1, v0, s[4:5] scope:SCOPE_SYS +; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 +; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s0, 0x400 +; GFX1250-SDAG-NEXT: s_cbranch_scc0 .LBB117_1 +; GFX1250-SDAG-NEXT: ; %bb.2: ; %bb2 +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: flat_addr_64bit_lsr_iv_multiload: +; GFX1250-GISEL: ; %bb.0: ; %bb +; GFX1250-GISEL-NEXT: s_mov_b64 s[0:1], 0 +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[2:3] +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX1250-GISEL-NEXT: .LBB117_1: ; %bb3 +; GFX1250-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1250-GISEL-NEXT: s_wait_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2 +; GFX1250-GISEL-NEXT: s_wait_alu 0xfffd +; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v5, null, v1, v3, vcc_lo +; GFX1250-GISEL-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 0, 4 +; GFX1250-GISEL-NEXT: flat_load_b32 v6, v[4:5] scope:SCOPE_SYS +; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX1250-GISEL-NEXT: flat_load_b32 v4, v[4:5] scope:SCOPE_SYS +; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX1250-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0x400, v2 +; GFX1250-GISEL-NEXT: s_cbranch_vccz .LBB117_1 +; GFX1250-GISEL-NEXT: ; %bb.2: ; %bb2 +; GFX1250-GISEL-NEXT: s_endpgm +bb: + br label %bb3 + +bb2: ; preds = %bb3 + ret void + +bb3: ; preds = %bb3, %bb + %i = phi i32 [ 0, %bb ], [ %i8, %bb3 ] + %i4 = zext i32 %i to i64 + %i5 = getelementptr inbounds float, ptr %arg, i64 %i4 + %i6 = load volatile float, ptr %i5, align 4 + %i5.1 = getelementptr inbounds float, ptr %arg.1, i64 %i4 + %i6.1 = load volatile float, ptr %i5, align 4 + %i8 = add nuw nsw i32 %i, 1 + %i9 = icmp eq i32 %i8, 256 + br i1 %i9, label %bb2, label %bb3 +} + +!0 = !{i32 0, i32 1073741824} ; (1 << 30) +!1 = !{i32 0, i32 1073741825} ; (1 << 30) + 1 diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s index 07b4055f0ab9c..737d7b3de4e92 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s @@ -20,3 +20,135 @@ tensor_stop tensor_stop th:TH_STORE_BYPASS scope:SCOPE_SYS // GFX1250: tensor_stop th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x1b,0xee,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +flat_atomic_add_f32 v2, v3, s[2:3] offset:64 +// GFX1250: flat_atomic_add_f32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x15,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_add_u32 v2, v3, s[2:3] offset:-64 +// GFX1250: flat_atomic_add_u32 v2, v3, s[2:3] offset:-64 ; encoding: [0x02,0x40,0x0d,0xec,0x00,0x00,0x80,0x01,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_add_u64 v2, v[2:3], s[2:3] offset:64 +// GFX1250: flat_atomic_add_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0xc0,0x10,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_and_b32 v2, v3, s[2:3] offset:64 +// GFX1250: flat_atomic_and_b32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x00,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_and_b64 v2, v[2:3], s[2:3] offset:64 +// GFX1250: flat_atomic_and_b64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x40,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_cmpswap_b32 v2, v[2:3], s[2:3] offset:64 +// GFX1250: flat_atomic_cmpswap_b32 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x00,0x0d,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_cmpswap_b64 v2, v[2:5], s[2:3] +// GFX1250: flat_atomic_cmpswap_b64 v2, v[2:5], s[2:3] ; encoding: [0x02,0x80,0x10,0xec,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_cond_sub_u32 v2, v3, s[2:3] offset:64 +// GFX1250: flat_atomic_cond_sub_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x00,0x14,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_dec_u32 v2, v3, s[2:3] offset:64 +// GFX1250: flat_atomic_dec_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x00,0x10,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_dec_u64 v2, v[2:3], s[2:3] offset:64 +// GFX1250: flat_atomic_dec_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x40,0x13,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_inc_u32 v2, v3, s[2:3] offset:64 +// GFX1250: flat_atomic_inc_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_inc_u64 v2, v[2:3], s[2:3] offset:64 +// GFX1250: flat_atomic_inc_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x00,0x13,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_max_num_f32 v2, v3, s[2:3] offset:64 +// GFX1250: flat_atomic_max_num_f32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x14,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_max_i32 v2, v3, s[2:3] offset:64 +// GFX1250: flat_atomic_max_i32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_max_i64 v2, v[2:3], s[2:3] offset:64 +// GFX1250: flat_atomic_max_i64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0xc0,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_max_u32 v2, v3, s[2:3] offset:64 +// GFX1250: flat_atomic_max_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_max_u64 v2, v[2:3], s[2:3] offset:64 +// GFX1250: flat_atomic_max_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x00,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_min_num_f32 v2, v3, s[2:3] offset:64 +// GFX1250: flat_atomic_min_num_f32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x40,0x14,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_min_i32 v2, v3, s[2:3] offset:64 +// GFX1250: flat_atomic_min_i32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x00,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_min_i64 v2, v[2:3], s[2:3] offset:64 +// GFX1250: flat_atomic_min_i64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x40,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_min_u32 v2, v3, s[2:3] offset:64 +// GFX1250: flat_atomic_min_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x40,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_min_u64 v2, v[2:3], s[2:3] offset:64 +// GFX1250: flat_atomic_min_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x80,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_or_b32 v2, v3, s[2:3] offset:64 +// GFX1250: flat_atomic_or_b32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x40,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_or_b64 v2, v[2:3], s[2:3] offset:64 +// GFX1250: flat_atomic_or_b64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x80,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_sub_clamp_u32 v2, v3, s[2:3] offset:64 +// GFX1250: flat_atomic_sub_clamp_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x0d,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_sub_u32 v2, v3, s[2:3] offset:64 +// GFX1250: flat_atomic_sub_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x0d,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_sub_u64 v2, v[2:3], s[2:3] offset:64 +// GFX1250: flat_atomic_sub_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x00,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_swap_b32 v2, v3, s[2:3] offset:64 +// GFX1250: flat_atomic_swap_b32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x0c,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_swap_b64 v2, v[2:3], s[2:3] offset:64 +// GFX1250: flat_atomic_swap_b64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x40,0x10,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_xor_b32 v2, v3, s[2:3] offset:64 +// GFX1250: flat_atomic_xor_b32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_xor_b64 v2, v[2:3], s[2:3] offset:64 +// GFX1250: flat_atomic_xor_b64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0xc0,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_pk_add_f16 v2, v3, s[2:3] offset:64 +// GFX1250: flat_atomic_pk_add_f16 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x40,0x16,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +flat_atomic_pk_add_bf16 v2, v3, s[2:3] offset:64 +// GFX1250: flat_atomic_pk_add_bf16 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x16,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt index 6421c6f30e177..55bc3e7a5746c 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt @@ -1,5 +1,2831 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s +# GFX1250: flat_atomic_add_f32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x15,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +0x02,0x80,0x15,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_add_u32 v2, v3, s[2:3] offset:-64 ; encoding: [0x02,0x40,0x0d,0xec,0x00,0x00,0x80,0x01,0x02,0xc0,0xff,0xff] +0x02,0x40,0x0d,0xec,0x00,0x00,0x80,0x01,0x02,0xc0,0xff,0xff + +# GFX1250: flat_atomic_add_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0xc0,0x10,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +0x02,0xc0,0x10,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_and_b32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x00,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +0x02,0x00,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_and_b64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x40,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +0x02,0x40,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_cmpswap_b32 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x00,0x0d,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +0x02,0x00,0x0d,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_cmpswap_b64 v2, v[2:5], s[2:3] ; encoding: [0x02,0x80,0x10,0xec,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00] +0x02,0x80,0x10,0xec,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00 + +# GFX1250: flat_atomic_cond_sub_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x00,0x14,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +0x02,0x00,0x14,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_dec_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x00,0x10,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +0x02,0x00,0x10,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_dec_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x40,0x13,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +0x02,0x40,0x13,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_inc_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +0x02,0xc0,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_inc_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x00,0x13,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +0x02,0x00,0x13,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_max_i32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +0x02,0x80,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_max_i64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0xc0,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +0x02,0xc0,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_max_num_f32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x14,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +0x02,0x80,0x14,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_max_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +0x02,0xc0,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_max_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x00,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +0x02,0x00,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_min_i32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x00,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +0x02,0x00,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_min_i64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x40,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +0x02,0x40,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_min_num_f32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x40,0x14,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +0x02,0x40,0x14,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_min_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x40,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +0x02,0x40,0x0e,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_min_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x80,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +0x02,0x80,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_or_b32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x40,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +0x02,0x40,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_or_b64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x80,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +0x02,0x80,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_pk_add_bf16 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x16,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +0x02,0x80,0x16,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_pk_add_f16 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x40,0x16,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +0x02,0x40,0x16,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_sub_clamp_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x0d,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +0x02,0xc0,0x0d,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_sub_u32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x0d,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +0x02,0x80,0x0d,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_sub_u64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x00,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +0x02,0x00,0x11,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_swap_b32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x0c,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +0x02,0xc0,0x0c,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_swap_b64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0x40,0x10,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +0x02,0x40,0x10,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_xor_b32 v2, v3, s[2:3] offset:64 ; encoding: [0x02,0x80,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00] +0x02,0x80,0x0f,0xec,0x00,0x00,0x80,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_xor_b64 v2, v[2:3], s[2:3] offset:64 ; encoding: [0x02,0xc0,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00] +0x02,0xc0,0x12,0xec,0x00,0x00,0x00,0x01,0x02,0x40,0x00,0x00 + +# GFX1250: flat_atomic_add_f32 v1, v[0:1], v2 offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x15,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x15,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_add_f32 v1, v[0:1], v2 offset:-8000000 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x15,0xec,0x01,0x00,0x10,0x01,0x00,0x00,0xee,0x85] +0x7c,0x80,0x15,0xec,0x01,0x00,0x10,0x01,0x00,0x00,0xee,0x85 + +# GFX1250: flat_atomic_add_f32 v1, v[0:1], v2 offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x15,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x15,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_add_f32 v1, v[0:1], v2 offset:8000000 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x15,0xec,0x01,0x00,0x10,0x01,0x00,0x00,0x12,0x7a] +0x7c,0x80,0x15,0xec,0x01,0x00,0x10,0x01,0x00,0x00,0x12,0x7a + +# GFX1250: flat_atomic_add_f32 v[0:1], v2 offset:-64 ; encoding: [0x7c,0x80,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_add_f32 v[0:1], v2 offset:64 ; encoding: [0x7c,0x80,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x15,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_add_u32 v1, v[0:1], v2 offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x0d,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x0d,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_add_u32 v1, v[0:1], v2 offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x0d,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x0d,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_add_u32 v[0:1], v2 offset:-64 ; encoding: [0x7c,0x40,0x0d,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x0d,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_add_u32 v[0:1], v2 offset:64 ; encoding: [0x7c,0x40,0x0d,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x0d,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_add_u32 v[4:5], v5 ; encoding: [0x7c,0x40,0x0d,0xec,0x00,0x00,0x80,0x02,0x04,0x00,0x00,0x00] +0x7c,0x40,0x0d,0xec,0x00,0x00,0x80,0x02,0x04,0x00,0x00,0x00 + +# GFX1250: flat_atomic_add_u64 v[0:1], v[2:3] offset:-64 ; encoding: [0x7c,0xc0,0x10,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x10,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_add_u64 v[0:1], v[2:3] offset:64 ; encoding: [0x7c,0xc0,0x10,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x10,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_add_u64 v[2:3], v[0:1], v[2:3] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x10,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x10,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_add_u64 v[2:3], v[0:1], v[2:3] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x10,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x10,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_and_b32 v1, v[0:1], v2 offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x0f,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x0f,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_and_b32 v1, v[0:1], v2 offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x0f,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x0f,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_and_b32 v[0:1], v2 offset:-64 ; encoding: [0x7c,0x00,0x0f,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x0f,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_and_b32 v[0:1], v2 offset:64 ; encoding: [0x7c,0x00,0x0f,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x0f,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_and_b64 v[0:1], v[2:3] offset:-64 ; encoding: [0x7c,0x40,0x12,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x12,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_and_b64 v[0:1], v[2:3] offset:64 ; encoding: [0x7c,0x40,0x12,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x12,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_and_b64 v[2:3], v[0:1], v[2:3] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x12,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x12,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_and_b64 v[2:3], v[0:1], v[2:3] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x12,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x12,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_cmpswap_b32 v0, v[2:3], v[4:5] offset:2047 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x0d,0xec,0x00,0x00,0x10,0x02,0x02,0xff,0x07,0x00] +0x7c,0x00,0x0d,0xec,0x00,0x00,0x10,0x02,0x02,0xff,0x07,0x00 + +# GFX1250: flat_atomic_cmpswap_b32 v0, v[2:3], v[4:5] th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x0d,0xec,0x00,0x00,0x10,0x02,0x02,0x00,0x00,0x00] +0x7c,0x00,0x0d,0xec,0x00,0x00,0x10,0x02,0x02,0x00,0x00,0x00 + +# GFX1250: flat_atomic_cmpswap_b32 v1, v[0:1], v[2:3] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x0d,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x0d,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_cmpswap_b32 v1, v[0:1], v[2:3] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x0d,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x0d,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:-64 ; encoding: [0x7c,0x00,0x0d,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x0d,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_cmpswap_b32 v[0:1], v[2:3] offset:64 ; encoding: [0x7c,0x00,0x0d,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x0d,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_cmpswap_b64 v[0:1], v[2:5] offset:-64 ; encoding: [0x7c,0x80,0x10,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x10,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_cmpswap_b64 v[0:1], v[2:5] offset:64 ; encoding: [0x7c,0x80,0x10,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x10,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x10,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x10,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x10,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x10,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_cmpswap_b64 v[2:3], v[4:5], v[6:9] offset:2047 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x10,0xec,0x02,0x00,0x10,0x03,0x04,0xff,0x07,0x00] +0x7c,0x80,0x10,0xec,0x02,0x00,0x10,0x03,0x04,0xff,0x07,0x00 + +# GFX1250: flat_atomic_cmpswap_b64 v[2:3], v[4:5], v[6:9] th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x10,0xec,0x02,0x00,0x10,0x03,0x04,0x00,0x00,0x00] +0x7c,0x80,0x10,0xec,0x02,0x00,0x10,0x03,0x04,0x00,0x00,0x00 + +# GFX1250: flat_atomic_cond_sub_u32 v1, v[0:1], v2 offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x14,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x14,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_cond_sub_u32 v1, v[0:1], v2 offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x14,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x14,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_cond_sub_u32 v[0:1], v2 offset:-64 ; encoding: [0x7c,0x00,0x14,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x14,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_cond_sub_u32 v[0:1], v2 offset:64 ; encoding: [0x7c,0x00,0x14,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x14,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_dec_u32 v1, v[0:1], v2 offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x10,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x10,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_dec_u32 v1, v[0:1], v2 offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x10,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x10,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_dec_u32 v[0:1], v2 offset:-64 ; encoding: [0x7c,0x00,0x10,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x10,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_dec_u32 v[0:1], v2 offset:64 ; encoding: [0x7c,0x00,0x10,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x10,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_dec_u64 v[0:1], v[2:3] offset:-64 ; encoding: [0x7c,0x40,0x13,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x13,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_dec_u64 v[0:1], v[2:3] offset:64 ; encoding: [0x7c,0x40,0x13,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x13,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_dec_u64 v[2:3], v[0:1], v[2:3] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x13,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x13,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_dec_u64 v[2:3], v[0:1], v[2:3] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x13,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x13,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_inc_u32 v1, v[0:1], v2 offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0f,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x0f,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_inc_u32 v1, v[0:1], v2 offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0f,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x0f,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_inc_u32 v[0:1], v2 offset:-64 ; encoding: [0x7c,0xc0,0x0f,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x0f,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_inc_u32 v[0:1], v2 offset:64 ; encoding: [0x7c,0xc0,0x0f,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x0f,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_inc_u64 v[0:1], v[2:3] offset:-64 ; encoding: [0x7c,0x00,0x13,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x13,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_inc_u64 v[0:1], v[2:3] offset:64 ; encoding: [0x7c,0x00,0x13,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x13,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_inc_u64 v[2:3], v[0:1], v[2:3] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x13,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x13,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_inc_u64 v[2:3], v[0:1], v[2:3] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x13,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x13,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_max_i32 v1, v[0:1], v2 offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x0e,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x0e,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_max_i32 v1, v[0:1], v2 offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x0e,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x0e,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_max_i32 v[0:1], v2 offset:-64 ; encoding: [0x7c,0x80,0x0e,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x0e,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_max_i32 v[0:1], v2 offset:64 ; encoding: [0x7c,0x80,0x0e,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x0e,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_max_i64 v[0:1], v[2:3] offset:-64 ; encoding: [0x7c,0xc0,0x11,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x11,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_max_i64 v[0:1], v[2:3] offset:64 ; encoding: [0x7c,0xc0,0x11,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x11,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_max_i64 v[2:3], v[0:1], v[2:3] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x11,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x11,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_max_i64 v[2:3], v[0:1], v[2:3] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x11,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x11,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_max_num_f32 v1, v[0:1], v2 offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x14,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x14,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_max_num_f32 v1, v[0:1], v2 offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x14,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x14,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_max_num_f32 v[0:1], v2 offset:-64 ; encoding: [0x7c,0x80,0x14,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x14,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_max_num_f32 v[0:1], v2 offset:64 ; encoding: [0x7c,0x80,0x14,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x14,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_max_u32 v1, v[0:1], v2 offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0e,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x0e,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_max_u32 v1, v[0:1], v2 offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0e,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x0e,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_max_u32 v[0:1], v2 offset:-64 ; encoding: [0x7c,0xc0,0x0e,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x0e,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_max_u32 v[0:1], v2 offset:64 ; encoding: [0x7c,0xc0,0x0e,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x0e,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_max_u64 v[0:1], v[2:3] offset:-64 ; encoding: [0x7c,0x00,0x12,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x12,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_max_u64 v[0:1], v[2:3] offset:64 ; encoding: [0x7c,0x00,0x12,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x12,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_max_u64 v[2:3], v[0:1], v[2:3] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x12,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x12,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_max_u64 v[2:3], v[0:1], v[2:3] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x12,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x12,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_min_i32 v1, v[0:1], v2 offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x0e,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x0e,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_min_i32 v1, v[0:1], v2 offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x0e,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x0e,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_min_i32 v[0:1], v2 offset:-64 ; encoding: [0x7c,0x00,0x0e,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x0e,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_min_i32 v[0:1], v2 offset:64 ; encoding: [0x7c,0x00,0x0e,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x0e,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_min_i64 v[0:1], v[2:3] offset:-64 ; encoding: [0x7c,0x40,0x11,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x11,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_min_i64 v[0:1], v[2:3] offset:64 ; encoding: [0x7c,0x40,0x11,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x11,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_min_i64 v[2:3], v[0:1], v[2:3] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x11,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x11,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_min_i64 v[2:3], v[0:1], v[2:3] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x11,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x11,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_min_num_f32 v1, v[0:1], v2 offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x14,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x14,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_min_num_f32 v1, v[0:1], v2 offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x14,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x14,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_min_num_f32 v[0:1], v2 offset:-64 ; encoding: [0x7c,0x40,0x14,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x14,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_min_num_f32 v[0:1], v2 offset:64 ; encoding: [0x7c,0x40,0x14,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x14,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_min_u32 v1, v[0:1], v2 offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x0e,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x0e,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_min_u32 v1, v[0:1], v2 offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x0e,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x0e,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_min_u32 v[0:1], v2 offset:-64 ; encoding: [0x7c,0x40,0x0e,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x0e,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_min_u32 v[0:1], v2 offset:64 ; encoding: [0x7c,0x40,0x0e,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x0e,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_min_u64 v[0:1], v[2:3] offset:-64 ; encoding: [0x7c,0x80,0x11,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x11,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_min_u64 v[0:1], v[2:3] offset:64 ; encoding: [0x7c,0x80,0x11,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x11,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_min_u64 v[2:3], v[0:1], v[2:3] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x11,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x11,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_min_u64 v[2:3], v[0:1], v[2:3] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x11,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x11,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_or_b32 v1, v[0:1], v2 offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x0f,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x0f,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_or_b32 v1, v[0:1], v2 offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x0f,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x0f,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_or_b32 v[0:1], v2 offset:-64 ; encoding: [0x7c,0x40,0x0f,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x0f,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_or_b32 v[0:1], v2 offset:64 ; encoding: [0x7c,0x40,0x0f,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x0f,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_or_b64 v[0:1], v[2:3] offset:-64 ; encoding: [0x7c,0x80,0x12,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x12,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_or_b64 v[0:1], v[2:3] offset:64 ; encoding: [0x7c,0x80,0x12,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x12,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_or_b64 v[2:3], v[0:1], v[2:3] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x12,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x12,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_or_b64 v[2:3], v[0:1], v[2:3] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x12,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x12,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_pk_add_bf16 v1, v[0:1], v2 offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x16,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x16,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_pk_add_bf16 v1, v[0:1], v2 offset:-8000000 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x16,0xec,0x01,0x00,0x10,0x01,0x00,0x00,0xee,0x85] +0x7c,0x80,0x16,0xec,0x01,0x00,0x10,0x01,0x00,0x00,0xee,0x85 + +# GFX1250: flat_atomic_pk_add_bf16 v1, v[0:1], v2 offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x16,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x16,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_pk_add_bf16 v1, v[0:1], v2 offset:8000000 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x16,0xec,0x01,0x00,0x10,0x01,0x00,0x00,0x12,0x7a] +0x7c,0x80,0x16,0xec,0x01,0x00,0x10,0x01,0x00,0x00,0x12,0x7a + +# GFX1250: flat_atomic_pk_add_bf16 v[0:1], v2 ; encoding: [0x7c,0x80,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +0x7c,0x80,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00 + +# GFX1250: flat_atomic_pk_add_bf16 v[0:1], v2 offset:-64 ; encoding: [0x7c,0x80,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_pk_add_bf16 v[0:1], v2 offset:64 ; encoding: [0x7c,0x80,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_pk_add_f16 v1, v[0:1], v2 offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x16,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x16,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_pk_add_f16 v1, v[0:1], v2 offset:-8000000 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x16,0xec,0x01,0x00,0x10,0x01,0x00,0x00,0xee,0x85] +0x7c,0x40,0x16,0xec,0x01,0x00,0x10,0x01,0x00,0x00,0xee,0x85 + +# GFX1250: flat_atomic_pk_add_f16 v1, v[0:1], v2 offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x16,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x16,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_pk_add_f16 v1, v[0:1], v2 offset:8000000 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x16,0xec,0x01,0x00,0x10,0x01,0x00,0x00,0x12,0x7a] +0x7c,0x40,0x16,0xec,0x01,0x00,0x10,0x01,0x00,0x00,0x12,0x7a + +# GFX1250: flat_atomic_pk_add_f16 v[0:1], v2 ; encoding: [0x7c,0x40,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +0x7c,0x40,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00 + +# GFX1250: flat_atomic_pk_add_f16 v[0:1], v2 offset:-64 ; encoding: [0x7c,0x40,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_pk_add_f16 v[0:1], v2 offset:64 ; encoding: [0x7c,0x40,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x16,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_sub_clamp_u32 v1, v[0:1], v2 offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0d,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x0d,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_sub_clamp_u32 v1, v[0:1], v2 offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0d,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x0d,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_sub_clamp_u32 v[0:1], v2 offset:64 ; encoding: [0x7c,0xc0,0x0d,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x0d,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_sub_u32 v1, v[0:1], v2 offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x0d,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x0d,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_sub_u32 v1, v[0:1], v2 offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x0d,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x0d,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_sub_u32 v[0:1], v2 offset:-64 ; encoding: [0x7c,0x80,0x0d,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x0d,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_sub_u32 v[0:1], v2 offset:64 ; encoding: [0x7c,0x80,0x0d,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x0d,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_sub_u64 v[0:1], v[2:3] offset:-64 ; encoding: [0x7c,0x00,0x11,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x11,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_sub_u64 v[0:1], v[2:3] offset:64 ; encoding: [0x7c,0x00,0x11,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x11,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_sub_u64 v[2:3], v[0:1], v[2:3] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x11,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x11,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_sub_u64 v[2:3], v[0:1], v[2:3] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x11,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x11,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_swap_b32 v0, v[2:3], v3 offset:-2048 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0c,0xec,0x00,0x00,0x90,0x01,0x02,0x00,0xf8,0xff] +0x7c,0xc0,0x0c,0xec,0x00,0x00,0x90,0x01,0x02,0x00,0xf8,0xff + +# GFX1250: flat_atomic_swap_b32 v0, v[2:3], v3 offset:2047 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0c,0xec,0x00,0x00,0x90,0x01,0x02,0xff,0x07,0x00] +0x7c,0xc0,0x0c,0xec,0x00,0x00,0x90,0x01,0x02,0xff,0x07,0x00 + +# GFX1250: flat_atomic_swap_b32 v0, v[2:3], v3 offset:2048 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0c,0xec,0x00,0x00,0x90,0x01,0x02,0x00,0x08,0x00] +0x7c,0xc0,0x0c,0xec,0x00,0x00,0x90,0x01,0x02,0x00,0x08,0x00 + +# GFX1250: flat_atomic_swap_b32 v0, v[2:3], v3 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0c,0xec,0x00,0x00,0x90,0x01,0x02,0x00,0x00,0x00] +0x7c,0xc0,0x0c,0xec,0x00,0x00,0x90,0x01,0x02,0x00,0x00,0x00 + +# GFX1250: flat_atomic_swap_b32 v1, v[0:1], v2 offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0c,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x0c,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_swap_b32 v1, v[0:1], v2 offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0c,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x0c,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_swap_b32 v[0:1], v2 offset:-64 ; encoding: [0x7c,0xc0,0x0c,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x0c,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_swap_b32 v[0:1], v2 offset:64 ; encoding: [0x7c,0xc0,0x0c,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x0c,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_swap_b64 v[0:1], v[2:3] offset:-64 ; encoding: [0x7c,0x40,0x10,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x10,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_swap_b64 v[0:1], v[2:3] offset:64 ; encoding: [0x7c,0x40,0x10,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x10,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_swap_b64 v[2:3], v[0:1], v[2:3] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x10,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x10,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_swap_b64 v[2:3], v[0:1], v[2:3] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x10,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x10,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_swap_b64 v[2:3], v[4:5], v[6:7] offset:2047 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x10,0xec,0x02,0x00,0x10,0x03,0x04,0xff,0x07,0x00] +0x7c,0x40,0x10,0xec,0x02,0x00,0x10,0x03,0x04,0xff,0x07,0x00 + +# GFX1250: flat_atomic_swap_b64 v[2:3], v[4:5], v[6:7] th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x10,0xec,0x02,0x00,0x10,0x03,0x04,0x00,0x00,0x00] +0x7c,0x40,0x10,0xec,0x02,0x00,0x10,0x03,0x04,0x00,0x00,0x00 + +# GFX1250: flat_atomic_xor_b32 v1, v[0:1], v2 offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x0f,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x0f,0xec,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_xor_b32 v1, v[0:1], v2 offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x0f,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x0f,0xec,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_xor_b32 v[0:1], v2 offset:-64 ; encoding: [0x7c,0x80,0x0f,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x0f,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_xor_b32 v[0:1], v2 offset:64 ; encoding: [0x7c,0x80,0x0f,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x0f,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_xor_b64 v[0:1], v[2:3] offset:-64 ; encoding: [0x7c,0xc0,0x12,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x12,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_xor_b64 v[0:1], v[2:3] offset:64 ; encoding: [0x7c,0xc0,0x12,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x12,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_atomic_xor_b64 v[2:3], v[0:1], v[2:3] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x12,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x12,0xec,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_atomic_xor_b64 v[2:3], v[0:1], v[2:3] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x12,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x12,0xec,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_load_b128 v[2:5], v[0:1] offset:-64 ; encoding: [0x7c,0xc0,0x05,0xec,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x05,0xec,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: flat_load_b128 v[2:5], v[0:1] offset:64 ; encoding: [0x7c,0xc0,0x05,0xec,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x05,0xec,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: flat_load_b128 v[2:5], v[6:7] ; encoding: [0x7c,0xc0,0x05,0xec,0x02,0x00,0x00,0x00,0x06,0x00,0x00,0x00] +0x7c,0xc0,0x05,0xec,0x02,0x00,0x00,0x00,0x06,0x00,0x00,0x00 + +# GFX1250: flat_load_b32 v1, v[0:1] offset:-64 ; encoding: [0x7c,0x00,0x05,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x05,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: flat_load_b32 v1, v[0:1] offset:64 ; encoding: [0x7c,0x00,0x05,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x00,0x05,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: flat_load_b32 v1, v[4:5] ; encoding: [0x7c,0x00,0x05,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0x00,0x05,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: flat_load_b32 v1, v[4:5] offset:-2048 ; encoding: [0x7c,0x00,0x05,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0xf8,0xff] +0x7c,0x00,0x05,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0xf8,0xff + +# GFX1250: flat_load_b32 v1, v[4:5] offset:-4 ; encoding: [0x7c,0x00,0x05,0xec,0x01,0x00,0x00,0x00,0x04,0xfc,0xff,0xff] +0x7c,0x00,0x05,0xec,0x01,0x00,0x00,0x00,0x04,0xfc,0xff,0xff + +# GFX1250: flat_load_b32 v1, v[4:5] offset:2047 ; encoding: [0x7c,0x00,0x05,0xec,0x01,0x00,0x00,0x00,0x04,0xff,0x07,0x00] +0x7c,0x00,0x05,0xec,0x01,0x00,0x00,0x00,0x04,0xff,0x07,0x00 + +# GFX1250: flat_load_b32 v1, v[4:5] offset:2048 ; encoding: [0x7c,0x00,0x05,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x08,0x00] +0x7c,0x00,0x05,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x08,0x00 + +# GFX1250: flat_load_b32 v1, v[4:5] offset:4 ; encoding: [0x7c,0x00,0x05,0xec,0x01,0x00,0x00,0x00,0x04,0x04,0x00,0x00] +0x7c,0x00,0x05,0xec,0x01,0x00,0x00,0x00,0x04,0x04,0x00,0x00 + +# GFX1250: flat_load_b64 v[2:3], v[0:1] offset:-64 ; encoding: [0x7c,0x40,0x05,0xec,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x05,0xec,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: flat_load_b64 v[2:3], v[0:1] offset:64 ; encoding: [0x7c,0x40,0x05,0xec,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x40,0x05,0xec,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: flat_load_b64 v[2:3], v[4:5] ; encoding: [0x7c,0x40,0x05,0xec,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0x40,0x05,0xec,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: flat_load_b96 v[2:4], v[0:1] offset:-64 ; encoding: [0x7c,0x80,0x05,0xec,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x05,0xec,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: flat_load_b96 v[2:4], v[0:1] offset:64 ; encoding: [0x7c,0x80,0x05,0xec,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x80,0x05,0xec,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: flat_load_b96 v[2:4], v[6:7] ; encoding: [0x7c,0x80,0x05,0xec,0x02,0x00,0x00,0x00,0x06,0x00,0x00,0x00] +0x7c,0x80,0x05,0xec,0x02,0x00,0x00,0x00,0x06,0x00,0x00,0x00 + +# GFX1250: flat_load_d16_b16 v1, v[0:1] offset:-64 ; encoding: [0x7c,0x00,0x08,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x08,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: flat_load_d16_b16 v1, v[0:1] offset:64 ; encoding: [0x7c,0x00,0x08,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x00,0x08,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: flat_load_d16_b16 v1, v[4:5] ; encoding: [0x7c,0x00,0x08,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0x00,0x08,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: flat_load_d16_hi_b16 v1, v[0:1] offset:-64 ; encoding: [0x7c,0xc0,0x08,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x08,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: flat_load_d16_hi_b16 v1, v[0:1] offset:64 ; encoding: [0x7c,0xc0,0x08,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x08,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: flat_load_d16_hi_b16 v1, v[4:5] ; encoding: [0x7c,0xc0,0x08,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0xc0,0x08,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: flat_load_d16_hi_i8 v1, v[0:1] offset:-64 ; encoding: [0x7c,0x80,0x08,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x08,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: flat_load_d16_hi_i8 v1, v[0:1] offset:64 ; encoding: [0x7c,0x80,0x08,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x80,0x08,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: flat_load_d16_hi_i8 v1, v[4:5] ; encoding: [0x7c,0x80,0x08,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0x80,0x08,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: flat_load_d16_hi_u8 v1, v[0:1] offset:-64 ; encoding: [0x7c,0x40,0x08,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x08,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: flat_load_d16_hi_u8 v1, v[0:1] offset:64 ; encoding: [0x7c,0x40,0x08,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x40,0x08,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: flat_load_d16_hi_u8 v1, v[4:5] ; encoding: [0x7c,0x40,0x08,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0x40,0x08,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: flat_load_d16_i8 v1, v[0:1] offset:-64 ; encoding: [0x7c,0xc0,0x07,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x07,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: flat_load_d16_i8 v1, v[0:1] offset:64 ; encoding: [0x7c,0xc0,0x07,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x07,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: flat_load_d16_i8 v1, v[4:5] ; encoding: [0x7c,0xc0,0x07,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0xc0,0x07,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: flat_load_d16_u8 v1, v[0:1] offset:-64 ; encoding: [0x7c,0x80,0x07,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x07,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: flat_load_d16_u8 v1, v[0:1] offset:64 ; encoding: [0x7c,0x80,0x07,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x80,0x07,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: flat_load_d16_u8 v1, v[4:5] ; encoding: [0x7c,0x80,0x07,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0x80,0x07,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: flat_load_i16 v1, v[0:1] offset:-64 ; encoding: [0x7c,0xc0,0x04,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x04,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: flat_load_i16 v1, v[0:1] offset:64 ; encoding: [0x7c,0xc0,0x04,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x04,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: flat_load_i16 v1, v[4:5] ; encoding: [0x7c,0xc0,0x04,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0xc0,0x04,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: flat_load_i8 v1, v[0:1] offset:-64 ; encoding: [0x7c,0x40,0x04,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x04,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: flat_load_i8 v1, v[0:1] offset:64 ; encoding: [0x7c,0x40,0x04,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x40,0x04,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: flat_load_i8 v1, v[4:5] ; encoding: [0x7c,0x40,0x04,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0x40,0x04,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: flat_load_u16 v1, v[0:1] offset:-64 ; encoding: [0x7c,0x80,0x04,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x04,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: flat_load_u16 v1, v[0:1] offset:64 ; encoding: [0x7c,0x80,0x04,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x80,0x04,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: flat_load_u16 v1, v[4:5] ; encoding: [0x7c,0x80,0x04,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0x80,0x04,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: flat_load_u8 v1, v[0:1] offset:-64 ; encoding: [0x7c,0x00,0x04,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x04,0xec,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: flat_load_u8 v1, v[0:1] offset:64 ; encoding: [0x7c,0x00,0x04,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x00,0x04,0xec,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: flat_load_u8 v1, v[4:5] ; encoding: [0x7c,0x00,0x04,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0x00,0x04,0xec,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: flat_store_b128 v[0:1], v[2:5] offset:-64 ; encoding: [0x7c,0x40,0x07,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x07,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_store_b128 v[0:1], v[2:5] offset:64 ; encoding: [0x7c,0x40,0x07,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x07,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_store_b128 v[2:3], v[4:7] ; encoding: [0x7c,0x40,0x07,0xec,0x00,0x00,0x00,0x02,0x02,0x00,0x00,0x00] +0x7c,0x40,0x07,0xec,0x00,0x00,0x00,0x02,0x02,0x00,0x00,0x00 + +# GFX1250: flat_store_b16 v[0:1], v2 offset:-64 ; encoding: [0x7c,0x40,0x06,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x06,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_store_b16 v[0:1], v2 offset:64 ; encoding: [0x7c,0x40,0x06,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x06,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_store_b16 v[4:5], v1 ; encoding: [0x7c,0x40,0x06,0xec,0x00,0x00,0x80,0x00,0x04,0x00,0x00,0x00] +0x7c,0x40,0x06,0xec,0x00,0x00,0x80,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: flat_store_b32 v[0:1], v2 offset:-64 ; encoding: [0x7c,0x80,0x06,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x06,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_store_b32 v[0:1], v2 offset:64 ; encoding: [0x7c,0x80,0x06,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x06,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_store_b32 v[4:5], v1 offset:-16 ; encoding: [0x7c,0x80,0x06,0xec,0x00,0x00,0x80,0x00,0x04,0xf0,0xff,0xff] +0x7c,0x80,0x06,0xec,0x00,0x00,0x80,0x00,0x04,0xf0,0xff,0xff + +# GFX1250: flat_store_b32 v[4:5], v1 offset:16 ; encoding: [0x7c,0x80,0x06,0xec,0x00,0x00,0x80,0x00,0x04,0x10,0x00,0x00] +0x7c,0x80,0x06,0xec,0x00,0x00,0x80,0x00,0x04,0x10,0x00,0x00 + +# GFX1250: flat_store_b64 v[0:1], v[2:3] offset:-64 ; encoding: [0x7c,0xc0,0x06,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x06,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_store_b64 v[0:1], v[2:3] offset:64 ; encoding: [0x7c,0xc0,0x06,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x06,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_store_b64 v[2:3], v[4:5] ; encoding: [0x7c,0xc0,0x06,0xec,0x00,0x00,0x00,0x02,0x02,0x00,0x00,0x00] +0x7c,0xc0,0x06,0xec,0x00,0x00,0x00,0x02,0x02,0x00,0x00,0x00 + +# GFX1250: flat_store_b8 v[0:1], v2 offset:-64 ; encoding: [0x7c,0x00,0x06,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x06,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_store_b8 v[0:1], v2 offset:64 ; encoding: [0x7c,0x00,0x06,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x06,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_store_b8 v[4:5], v1 ; encoding: [0x7c,0x00,0x06,0xec,0x00,0x00,0x80,0x00,0x04,0x00,0x00,0x00] +0x7c,0x00,0x06,0xec,0x00,0x00,0x80,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: flat_store_b96 v[0:1], v[2:4] offset:-64 ; encoding: [0x7c,0x00,0x07,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x07,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_store_b96 v[0:1], v[2:4] offset:64 ; encoding: [0x7c,0x00,0x07,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x07,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_store_b96 v[2:3], v[4:6] ; encoding: [0x7c,0x00,0x07,0xec,0x00,0x00,0x00,0x02,0x02,0x00,0x00,0x00] +0x7c,0x00,0x07,0xec,0x00,0x00,0x00,0x02,0x02,0x00,0x00,0x00 + +# GFX1250: flat_store_d16_hi_b16 v[0:1], v2 offset:-64 ; encoding: [0x7c,0x40,0x09,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x09,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_store_d16_hi_b16 v[0:1], v2 offset:64 ; encoding: [0x7c,0x40,0x09,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x09,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_store_d16_hi_b16 v[4:5], v1 ; encoding: [0x7c,0x40,0x09,0xec,0x00,0x00,0x80,0x00,0x04,0x00,0x00,0x00] +0x7c,0x40,0x09,0xec,0x00,0x00,0x80,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: flat_store_d16_hi_b8 v[0:1], v2 offset:-64 ; encoding: [0x7c,0x00,0x09,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x09,0xec,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: flat_store_d16_hi_b8 v[0:1], v2 offset:64 ; encoding: [0x7c,0x00,0x09,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x09,0xec,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: flat_store_d16_hi_b8 v[4:5], v1 ; encoding: [0x7c,0x00,0x09,0xec,0x00,0x00,0x80,0x00,0x04,0x00,0x00,0x00] +0x7c,0x00,0x09,0xec,0x00,0x00,0x80,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: global_atomic_add_f32 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0x80,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_add_f32 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0x80,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_add_f32 v1, v0, v2, s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x80,0x15,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x15,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_add_f32 v1, v0, v2, s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x80,0x15,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x15,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_add_f32 v1, v[0:1], v2, off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x15,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x15,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_add_f32 v1, v[0:1], v2, off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x15,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x15,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_add_f32 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0x80,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_add_f32 v[0:1], v2, off offset:64 ; encoding: [0x7c,0x80,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_add_u32 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_add_u32 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_add_u32 v1, v0, v2, s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x40,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_add_u32 v1, v0, v2, s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x40,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_add_u32 v1, v[0:1], v2, off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_add_u32 v1, v[0:1], v2, off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_add_u32 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_add_u32 v[0:1], v2, off offset:64 ; encoding: [0x7c,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_add_u64 v0, v[2:3], s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_add_u64 v0, v[2:3], s[0:1] offset:64 ; encoding: [0x00,0xc0,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0xc0,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_add_u64 v[0:1], v[2:3], off offset:-64 ; encoding: [0x7c,0xc0,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_add_u64 v[0:1], v[2:3], off offset:64 ; encoding: [0x7c,0xc0,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_add_u64 v[2:3], v0, v[2:3], s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0xc0,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_add_u64 v[2:3], v0, v[2:3], s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0xc0,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0xc0,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_add_u64 v[2:3], v[0:1], v[2:3], off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_add_u64 v[2:3], v[0:1], v[2:3], off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_and_b32 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0x00,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_and_b32 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0x00,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_and_b32 v1, v0, v2, s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x00,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_and_b32 v1, v0, v2, s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x00,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_and_b32 v1, v[0:1], v2, off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_and_b32 v1, v[0:1], v2, off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_and_b32 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0x00,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_and_b32 v[0:1], v2, off offset:64 ; encoding: [0x7c,0x00,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_and_b64 v0, v[2:3], s[0:1] offset:-64 ; encoding: [0x00,0x40,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_and_b64 v0, v[2:3], s[0:1] offset:64 ; encoding: [0x00,0x40,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_and_b64 v[0:1], v[2:3], off offset:-64 ; encoding: [0x7c,0x40,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_and_b64 v[0:1], v[2:3], off offset:64 ; encoding: [0x7c,0x40,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_and_b64 v[2:3], v0, v[2:3], s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x40,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_and_b64 v[2:3], v0, v[2:3], s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x40,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_and_b64 v[2:3], v[0:1], v[2:3], off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_and_b64 v[2:3], v[0:1], v[2:3], off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_cmpswap_b32 v0, v1, v[2:3], s[2:3] offset:2047 th:TH_ATOMIC_RETURN ; encoding: [0x02,0x00,0x0d,0xee,0x00,0x00,0x10,0x01,0x01,0xff,0x07,0x00] +0x02,0x00,0x0d,0xee,0x00,0x00,0x10,0x01,0x01,0xff,0x07,0x00 + +# GFX1250: global_atomic_cmpswap_b32 v0, v1, v[2:3], s[2:3] th:TH_ATOMIC_RETURN ; encoding: [0x02,0x00,0x0d,0xee,0x00,0x00,0x10,0x01,0x01,0x00,0x00,0x00] +0x02,0x00,0x0d,0xee,0x00,0x00,0x10,0x01,0x01,0x00,0x00,0x00 + +# GFX1250: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:-64 ; encoding: [0x00,0x00,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_cmpswap_b32 v0, v[2:3], s[0:1] offset:64 ; encoding: [0x00,0x00,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off offset:2047 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x0d,0xee,0x00,0x00,0x10,0x02,0x02,0xff,0x07,0x00] +0x7c,0x00,0x0d,0xee,0x00,0x00,0x10,0x02,0x02,0xff,0x07,0x00 + +# GFX1250: global_atomic_cmpswap_b32 v0, v[2:3], v[4:5], off th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x0d,0xee,0x00,0x00,0x10,0x02,0x02,0x00,0x00,0x00] +0x7c,0x00,0x0d,0xee,0x00,0x00,0x10,0x02,0x02,0x00,0x00,0x00 + +# GFX1250: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x00,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_cmpswap_b32 v1, v0, v[2:3], s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x00,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_cmpswap_b32 v1, v[0:1], v[2:3], off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_cmpswap_b32 v1, v[0:1], v[2:3], off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_cmpswap_b32 v[0:1], v[2:3], off offset:-64 ; encoding: [0x7c,0x00,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_cmpswap_b32 v[0:1], v[2:3], off offset:64 ; encoding: [0x7c,0x00,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_cmpswap_b64 v0, v[2:5], s[0:1] offset:-64 ; encoding: [0x00,0x80,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_cmpswap_b64 v0, v[2:5], s[0:1] offset:64 ; encoding: [0x00,0x80,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_cmpswap_b64 v[0:1], v[2:5], off offset:-64 ; encoding: [0x7c,0x80,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_cmpswap_b64 v[0:1], v[2:5], off offset:64 ; encoding: [0x7c,0x80,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_cmpswap_b64 v[2:3], v0, v[2:5], s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x80,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_cmpswap_b64 v[2:3], v0, v[2:5], s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x80,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_cmpswap_b64 v[2:3], v3, v[6:9], s[2:3] offset:2047 th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x10,0xee,0x02,0x00,0x10,0x03,0x03,0xff,0x07,0x00] +0x02,0x80,0x10,0xee,0x02,0x00,0x10,0x03,0x03,0xff,0x07,0x00 + +# GFX1250: global_atomic_cmpswap_b64 v[2:3], v3, v[6:9], s[2:3] th:TH_ATOMIC_RETURN ; encoding: [0x02,0x80,0x10,0xee,0x02,0x00,0x10,0x03,0x03,0x00,0x00,0x00] +0x02,0x80,0x10,0xee,0x02,0x00,0x10,0x03,0x03,0x00,0x00,0x00 + +# GFX1250: global_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5], off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5], off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_cmpswap_b64 v[2:3], v[4:5], v[6:9], off offset:2047 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x10,0xee,0x02,0x00,0x10,0x03,0x04,0xff,0x07,0x00] +0x7c,0x80,0x10,0xee,0x02,0x00,0x10,0x03,0x04,0xff,0x07,0x00 + +# GFX1250: global_atomic_cmpswap_b64 v[2:3], v[4:5], v[6:9], off th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x10,0xee,0x02,0x00,0x10,0x03,0x04,0x00,0x00,0x00] +0x7c,0x80,0x10,0xee,0x02,0x00,0x10,0x03,0x04,0x00,0x00,0x00 + +# GFX1250: global_atomic_cond_sub_u32 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0x00,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_cond_sub_u32 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0x00,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_cond_sub_u32 v1, v0, v2, s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x00,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_cond_sub_u32 v1, v0, v2, s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x00,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_cond_sub_u32 v1, v[0:1], v2, off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_cond_sub_u32 v1, v[0:1], v2, off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_cond_sub_u32 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0x00,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_cond_sub_u32 v[0:1], v2, off offset:64 ; encoding: [0x7c,0x00,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_dec_u32 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0x00,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_dec_u32 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0x00,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_dec_u32 v1, v0, v2, s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x00,0x10,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x10,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_dec_u32 v1, v0, v2, s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x00,0x10,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x10,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_dec_u32 v1, v[0:1], v2, off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x10,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x10,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_dec_u32 v1, v[0:1], v2, off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x10,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x10,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_dec_u32 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0x00,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_dec_u32 v[0:1], v2, off offset:64 ; encoding: [0x7c,0x00,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_dec_u64 v0, v[2:3], s[0:1] offset:-64 ; encoding: [0x00,0x40,0x13,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x13,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_dec_u64 v0, v[2:3], s[0:1] offset:64 ; encoding: [0x00,0x40,0x13,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x13,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_dec_u64 v[0:1], v[2:3], off offset:-64 ; encoding: [0x7c,0x40,0x13,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x13,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_dec_u64 v[0:1], v[2:3], off offset:64 ; encoding: [0x7c,0x40,0x13,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x13,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_dec_u64 v[2:3], v0, v[2:3], s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x40,0x13,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x13,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_dec_u64 v[2:3], v0, v[2:3], s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x40,0x13,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x13,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_dec_u64 v[2:3], v[0:1], v[2:3], off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x13,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x13,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_dec_u64 v[2:3], v[0:1], v[2:3], off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x13,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x13,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_inc_u32 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_inc_u32 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0xc0,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0xc0,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_inc_u32 v1, v0, v2, s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0xc0,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_inc_u32 v1, v0, v2, s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0xc0,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0xc0,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_inc_u32 v1, v[0:1], v2, off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_inc_u32 v1, v[0:1], v2, off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_inc_u32 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0xc0,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_inc_u32 v[0:1], v2, off offset:64 ; encoding: [0x7c,0xc0,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_inc_u64 v0, v[2:3], s[0:1] offset:-64 ; encoding: [0x00,0x00,0x13,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x13,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_inc_u64 v0, v[2:3], s[0:1] offset:64 ; encoding: [0x00,0x00,0x13,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x13,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_inc_u64 v[0:1], v[2:3], off offset:-64 ; encoding: [0x7c,0x00,0x13,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x13,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_inc_u64 v[0:1], v[2:3], off offset:64 ; encoding: [0x7c,0x00,0x13,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x13,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_inc_u64 v[2:3], v0, v[2:3], s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x00,0x13,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x13,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_inc_u64 v[2:3], v0, v[2:3], s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x00,0x13,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x13,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_inc_u64 v[2:3], v[0:1], v[2:3], off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x13,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x13,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_inc_u64 v[2:3], v[0:1], v[2:3], off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x13,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x13,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_max_i32 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0x80,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_max_i32 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0x80,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_max_i32 v1, v0, v2, s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x80,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_max_i32 v1, v0, v2, s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x80,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_max_i32 v1, v[0:1], v2, off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_max_i32 v1, v[0:1], v2, off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_max_i32 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0x80,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_max_i32 v[0:1], v2, off offset:64 ; encoding: [0x7c,0x80,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_max_i64 v0, v[2:3], s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_max_i64 v0, v[2:3], s[0:1] offset:64 ; encoding: [0x00,0xc0,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0xc0,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_max_i64 v[0:1], v[2:3], off offset:-64 ; encoding: [0x7c,0xc0,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_max_i64 v[0:1], v[2:3], off offset:64 ; encoding: [0x7c,0xc0,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_max_i64 v[2:3], v0, v[2:3], s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0xc0,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_max_i64 v[2:3], v0, v[2:3], s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0xc0,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0xc0,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_max_i64 v[2:3], v[0:1], v[2:3], off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_max_i64 v[2:3], v[0:1], v[2:3], off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_max_num_f32 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0x80,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_max_num_f32 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0x80,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_max_num_f32 v1, v0, v2, s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x80,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_max_num_f32 v1, v0, v2, s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x80,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_max_num_f32 v1, v[0:1], v2, off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_max_num_f32 v1, v[0:1], v2, off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_max_num_f32 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0x80,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_max_num_f32 v[0:1], v2, off offset:64 ; encoding: [0x7c,0x80,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_max_u32 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_max_u32 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0xc0,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0xc0,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_max_u32 v1, v0, v2, s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0xc0,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_max_u32 v1, v0, v2, s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0xc0,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0xc0,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_max_u32 v1, v[0:1], v2, off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_max_u32 v1, v[0:1], v2, off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_max_u32 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0xc0,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_max_u32 v[0:1], v2, off offset:64 ; encoding: [0x7c,0xc0,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_max_u64 v0, v[2:3], s[0:1] offset:-64 ; encoding: [0x00,0x00,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_max_u64 v0, v[2:3], s[0:1] offset:64 ; encoding: [0x00,0x00,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_max_u64 v[0:1], v[2:3], off offset:-64 ; encoding: [0x7c,0x00,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_max_u64 v[0:1], v[2:3], off offset:64 ; encoding: [0x7c,0x00,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_max_u64 v[2:3], v0, v[2:3], s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x00,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_max_u64 v[2:3], v0, v[2:3], s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x00,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_max_u64 v[2:3], v[0:1], v[2:3], off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_max_u64 v[2:3], v[0:1], v[2:3], off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_min_i32 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0x00,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_min_i32 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0x00,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_min_i32 v1, v0, v2, s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x00,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_min_i32 v1, v0, v2, s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x00,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_min_i32 v1, v[0:1], v2, off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_min_i32 v1, v[0:1], v2, off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_min_i32 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0x00,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_min_i32 v[0:1], v2, off offset:64 ; encoding: [0x7c,0x00,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_min_i64 v0, v[2:3], s[0:1] offset:-64 ; encoding: [0x00,0x40,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_min_i64 v0, v[2:3], s[0:1] offset:64 ; encoding: [0x00,0x40,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_min_i64 v[0:1], v[2:3], off offset:-64 ; encoding: [0x7c,0x40,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_min_i64 v[0:1], v[2:3], off offset:64 ; encoding: [0x7c,0x40,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_min_i64 v[2:3], v0, v[2:3], s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x40,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_min_i64 v[2:3], v0, v[2:3], s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x40,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_min_i64 v[2:3], v[0:1], v[2:3], off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_min_i64 v[2:3], v[0:1], v[2:3], off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_min_num_f32 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0x40,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_min_num_f32 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0x40,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_min_num_f32 v1, v0, v2, s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x40,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_min_num_f32 v1, v0, v2, s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x40,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_min_num_f32 v1, v[0:1], v2, off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_min_num_f32 v1, v[0:1], v2, off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x14,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_min_num_f32 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0x40,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_min_num_f32 v[0:1], v2, off offset:64 ; encoding: [0x7c,0x40,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x14,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_min_u32 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0x40,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_min_u32 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0x40,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_min_u32 v1, v0, v2, s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x40,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_min_u32 v1, v0, v2, s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x40,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_min_u32 v1, v[0:1], v2, off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_min_u32 v1, v[0:1], v2, off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x0e,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_min_u32 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0x40,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_min_u32 v[0:1], v2, off offset:64 ; encoding: [0x7c,0x40,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x0e,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_min_u64 v0, v[2:3], s[0:1] offset:-64 ; encoding: [0x00,0x80,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_min_u64 v0, v[2:3], s[0:1] offset:64 ; encoding: [0x00,0x80,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_min_u64 v[0:1], v[2:3], off offset:-64 ; encoding: [0x7c,0x80,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_min_u64 v[0:1], v[2:3], off offset:64 ; encoding: [0x7c,0x80,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_min_u64 v[2:3], v0, v[2:3], s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x80,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_min_u64 v[2:3], v0, v[2:3], s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x80,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_min_u64 v[2:3], v[0:1], v[2:3], off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_min_u64 v[2:3], v[0:1], v[2:3], off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_or_b32 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0x40,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_or_b32 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0x40,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_or_b32 v1, v0, v2, s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x40,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_or_b32 v1, v0, v2, s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x40,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_or_b32 v1, v[0:1], v2, off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_or_b32 v1, v[0:1], v2, off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_or_b32 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0x40,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_or_b32 v[0:1], v2, off offset:64 ; encoding: [0x7c,0x40,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_or_b64 v0, v[2:3], s[0:1] offset:-64 ; encoding: [0x00,0x80,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_or_b64 v0, v[2:3], s[0:1] offset:64 ; encoding: [0x00,0x80,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_or_b64 v[0:1], v[2:3], off offset:-64 ; encoding: [0x7c,0x80,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_or_b64 v[0:1], v[2:3], off offset:64 ; encoding: [0x7c,0x80,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_or_b64 v[2:3], v0, v[2:3], s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x80,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_or_b64 v[2:3], v0, v[2:3], s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x80,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_or_b64 v[2:3], v[0:1], v[2:3], off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_or_b64 v[2:3], v[0:1], v[2:3], off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_pk_add_bf16 v0, v2, s[0:1] ; encoding: [0x00,0x80,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +0x00,0x80,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00 + +# GFX1250: global_atomic_pk_add_bf16 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0x80,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_pk_add_bf16 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0x80,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_pk_add_bf16 v1, v0, v2, s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x80,0x16,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x16,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_pk_add_bf16 v1, v0, v2, s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x80,0x16,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x16,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_pk_add_bf16 v1, v0, v2, s[0:1] th:TH_ATOMIC_RETURN ; encoding: [0x00,0x80,0x16,0xee,0x01,0x00,0x10,0x01,0x00,0x00,0x00,0x00] +0x00,0x80,0x16,0xee,0x01,0x00,0x10,0x01,0x00,0x00,0x00,0x00 + +# GFX1250: global_atomic_pk_add_bf16 v1, v[0:1], v2, off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x16,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x16,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_pk_add_bf16 v1, v[0:1], v2, off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x16,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x16,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_pk_add_bf16 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0x80,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_pk_add_bf16 v[0:1], v2, off offset:64 ; encoding: [0x7c,0x80,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_pk_add_f16 v0, v2, s[0:1] ; encoding: [0x00,0x40,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +0x00,0x40,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00 + +# GFX1250: global_atomic_pk_add_f16 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0x40,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_pk_add_f16 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0x40,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_pk_add_f16 v1, v0, v2, s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x40,0x16,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x16,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_pk_add_f16 v1, v0, v2, s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x40,0x16,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x16,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_pk_add_f16 v1, v0, v2, s[0:1] th:TH_ATOMIC_RETURN ; encoding: [0x00,0x40,0x16,0xee,0x01,0x00,0x10,0x01,0x00,0x00,0x00,0x00] +0x00,0x40,0x16,0xee,0x01,0x00,0x10,0x01,0x00,0x00,0x00,0x00 + +# GFX1250: global_atomic_pk_add_f16 v1, v[0:1], v2, off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x16,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x16,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_pk_add_f16 v1, v[0:1], v2, off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x16,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x16,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_pk_add_f16 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0x40,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_pk_add_f16 v[0:1], v2, off offset:64 ; encoding: [0x7c,0x40,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x16,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_sub_clamp_u32 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0xc0,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0xc0,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_sub_clamp_u32 v1, v0, v2, s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0xc0,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_sub_clamp_u32 v1, v0, v2, s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0xc0,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0xc0,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_sub_clamp_u32 v1, v[0:1], v2, off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_sub_clamp_u32 v1, v[0:1], v2, off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_sub_clamp_u32 v[0:1], v2, off offset:64 ; encoding: [0x7c,0xc0,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_sub_u32 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0x80,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_sub_u32 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0x80,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_sub_u32 v1, v0, v2, s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x80,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_sub_u32 v1, v0, v2, s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x80,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_sub_u32 v1, v[0:1], v2, off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_sub_u32 v1, v[0:1], v2, off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x0d,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_sub_u32 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0x80,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_sub_u32 v[0:1], v2, off offset:64 ; encoding: [0x7c,0x80,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x0d,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_sub_u64 v0, v[2:3], s[0:1] offset:-64 ; encoding: [0x00,0x00,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_sub_u64 v0, v[2:3], s[0:1] offset:64 ; encoding: [0x00,0x00,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_sub_u64 v[0:1], v[2:3], off offset:-64 ; encoding: [0x7c,0x00,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_sub_u64 v[0:1], v[2:3], off offset:64 ; encoding: [0x7c,0x00,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x11,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_sub_u64 v[2:3], v0, v[2:3], s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x00,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_sub_u64 v[2:3], v0, v[2:3], s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x00,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_sub_u64 v[2:3], v[0:1], v[2:3], off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_sub_u64 v[2:3], v[0:1], v[2:3], off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x00,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x11,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_swap_b32 v0, v1, v3, s[2:3] offset:2047 th:TH_ATOMIC_RETURN ; encoding: [0x02,0xc0,0x0c,0xee,0x00,0x00,0x90,0x01,0x01,0xff,0x07,0x00] +0x02,0xc0,0x0c,0xee,0x00,0x00,0x90,0x01,0x01,0xff,0x07,0x00 + +# GFX1250: global_atomic_swap_b32 v0, v1, v3, s[2:3] th:TH_ATOMIC_RETURN ; encoding: [0x02,0xc0,0x0c,0xee,0x00,0x00,0x90,0x01,0x01,0x00,0x00,0x00] +0x02,0xc0,0x0c,0xee,0x00,0x00,0x90,0x01,0x01,0x00,0x00,0x00 + +# GFX1250: global_atomic_swap_b32 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x0c,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x0c,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_swap_b32 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0xc0,0x0c,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0xc0,0x0c,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_swap_b32 v0, v[2:3], v3, off offset:2047 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0c,0xee,0x00,0x00,0x90,0x01,0x02,0xff,0x07,0x00] +0x7c,0xc0,0x0c,0xee,0x00,0x00,0x90,0x01,0x02,0xff,0x07,0x00 + +# GFX1250: global_atomic_swap_b32 v0, v[2:3], v3, off th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0c,0xee,0x00,0x00,0x90,0x01,0x02,0x00,0x00,0x00] +0x7c,0xc0,0x0c,0xee,0x00,0x00,0x90,0x01,0x02,0x00,0x00,0x00 + +# GFX1250: global_atomic_swap_b32 v1, v0, v2, s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0xc0,0x0c,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x0c,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_swap_b32 v1, v0, v2, s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0xc0,0x0c,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0xc0,0x0c,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_swap_b32 v1, v[0:1], v2, off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0c,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x0c,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_swap_b32 v1, v[0:1], v2, off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x0c,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x0c,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_swap_b32 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0xc0,0x0c,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x0c,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_swap_b32 v[0:1], v2, off offset:64 ; encoding: [0x7c,0xc0,0x0c,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x0c,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_swap_b64 v0, v[2:3], s[0:1] offset:-64 ; encoding: [0x00,0x40,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_swap_b64 v0, v[2:3], s[0:1] offset:64 ; encoding: [0x00,0x40,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_swap_b64 v[0:1], v[2:3], off offset:-64 ; encoding: [0x7c,0x40,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_swap_b64 v[0:1], v[2:3], off offset:64 ; encoding: [0x7c,0x40,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x10,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_swap_b64 v[2:3], v0, v[2:3], s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x40,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_swap_b64 v[2:3], v0, v[2:3], s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x40,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_swap_b64 v[2:3], v3, v[6:7], s[2:3] offset:2047 th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x10,0xee,0x02,0x00,0x10,0x03,0x03,0xff,0x07,0x00] +0x02,0x40,0x10,0xee,0x02,0x00,0x10,0x03,0x03,0xff,0x07,0x00 + +# GFX1250: global_atomic_swap_b64 v[2:3], v3, v[6:7], s[2:3] th:TH_ATOMIC_RETURN ; encoding: [0x02,0x40,0x10,0xee,0x02,0x00,0x10,0x03,0x03,0x00,0x00,0x00] +0x02,0x40,0x10,0xee,0x02,0x00,0x10,0x03,0x03,0x00,0x00,0x00 + +# GFX1250: global_atomic_swap_b64 v[2:3], v[0:1], v[2:3], off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_swap_b64 v[2:3], v[0:1], v[2:3], off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x10,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_swap_b64 v[2:3], v[4:5], v[6:7], off offset:2047 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x10,0xee,0x02,0x00,0x10,0x03,0x04,0xff,0x07,0x00] +0x7c,0x40,0x10,0xee,0x02,0x00,0x10,0x03,0x04,0xff,0x07,0x00 + +# GFX1250: global_atomic_swap_b64 v[2:3], v[4:5], v[6:7], off th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x40,0x10,0xee,0x02,0x00,0x10,0x03,0x04,0x00,0x00,0x00] +0x7c,0x40,0x10,0xee,0x02,0x00,0x10,0x03,0x04,0x00,0x00,0x00 + +# GFX1250: global_atomic_xor_b32 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0x80,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_xor_b32 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0x80,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_xor_b32 v1, v0, v2, s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x80,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_xor_b32 v1, v0, v2, s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0x80,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_xor_b32 v1, v[0:1], v2, off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_xor_b32 v1, v[0:1], v2, off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0x80,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x0f,0xee,0x01,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_xor_b32 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0x80,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_xor_b32 v[0:1], v2, off offset:64 ; encoding: [0x7c,0x80,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x0f,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_xor_b64 v0, v[2:3], s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_xor_b64 v0, v[2:3], s[0:1] offset:64 ; encoding: [0x00,0xc0,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0xc0,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_xor_b64 v[0:1], v[2:3], off offset:-64 ; encoding: [0x7c,0xc0,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_xor_b64 v[0:1], v[2:3], off offset:64 ; encoding: [0x7c,0xc0,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x12,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_xor_b64 v[2:3], v0, v[2:3], s[0:1] offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0xc0,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_xor_b64 v[2:3], v0, v[2:3], s[0:1] offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x00,0xc0,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x00,0xc0,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_atomic_xor_b64 v[2:3], v[0:1], v[2:3], off offset:-64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_atomic_xor_b64 v[2:3], v[0:1], v[2:3], off offset:64 th:TH_ATOMIC_RETURN ; encoding: [0x7c,0xc0,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x12,0xee,0x02,0x00,0x10,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_inv ; encoding: [0x7c,0xc0,0x0a,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] +0x7c,0xc0,0x0a,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: global_inv scope:SCOPE_DEV ; encoding: [0x7c,0xc0,0x0a,0xee,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00] +0x7c,0xc0,0x0a,0xee,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: global_inv scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x0a,0xee,0x00,0x00,0x0c,0x00,0x00,0x00,0x00,0x00] +0x7c,0xc0,0x0a,0xee,0x00,0x00,0x0c,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: global_load_addtid_b32 v1, off ; encoding: [0x7c,0x00,0x0a,0xee,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00] +0x7c,0x00,0x0a,0xee,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: global_load_addtid_b32 v1, off offset:-64 ; encoding: [0x7c,0x00,0x0a,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x0a,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_addtid_b32 v1, off offset:64 ; encoding: [0x7c,0x00,0x0a,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x00,0x0a,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_addtid_b32 v1, s[0:1] offset:-64 ; encoding: [0x00,0x00,0x0a,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x00,0x0a,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_addtid_b32 v1, s[0:1] offset:64 ; encoding: [0x00,0x00,0x0a,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x00,0x0a,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_addtid_b32 v1, s[2:3] ; encoding: [0x02,0x00,0x0a,0xee,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00] +0x02,0x00,0x0a,0xee,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: global_load_b128 v[2:5], v0, s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_b128 v[2:5], v0, s[0:1] offset:64 ; encoding: [0x00,0xc0,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0xc0,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_b128 v[2:5], v5, s[2:3] ; encoding: [0x02,0xc0,0x05,0xee,0x02,0x00,0x00,0x00,0x05,0x00,0x00,0x00] +0x02,0xc0,0x05,0xee,0x02,0x00,0x00,0x00,0x05,0x00,0x00,0x00 + +# GFX1250: global_load_b128 v[2:5], v[0:1], off offset:-64 ; encoding: [0x7c,0xc0,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_b128 v[2:5], v[0:1], off offset:64 ; encoding: [0x7c,0xc0,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_b128 v[2:5], v[6:7], off ; encoding: [0x7c,0xc0,0x05,0xee,0x02,0x00,0x00,0x00,0x06,0x00,0x00,0x00] +0x7c,0xc0,0x05,0xee,0x02,0x00,0x00,0x00,0x06,0x00,0x00,0x00 + +# GFX1250: global_load_b32 v1, v0, s[0:1] offset:-64 ; encoding: [0x00,0x00,0x05,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x00,0x05,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_b32 v1, v0, s[0:1] offset:64 ; encoding: [0x00,0x00,0x05,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x00,0x05,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_b32 v1, v3, s[2:3] ; encoding: [0x02,0x00,0x05,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00] +0x02,0x00,0x05,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00 + +# GFX1250: global_load_b32 v1, v3, s[2:3] offset:2047 ; encoding: [0x02,0x00,0x05,0xee,0x01,0x00,0x00,0x00,0x03,0xff,0x07,0x00] +0x02,0x00,0x05,0xee,0x01,0x00,0x00,0x00,0x03,0xff,0x07,0x00 + +# GFX1250: global_load_b32 v1, v[0:1], off offset:-64 ; encoding: [0x7c,0x00,0x05,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x05,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_b32 v1, v[0:1], off offset:64 ; encoding: [0x7c,0x00,0x05,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x00,0x05,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_b32 v1, v[4:5], off ; encoding: [0x7c,0x00,0x05,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0x00,0x05,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: global_load_b32 v1, v[4:5], off offset:2047 ; encoding: [0x7c,0x00,0x05,0xee,0x01,0x00,0x00,0x00,0x04,0xff,0x07,0x00] +0x7c,0x00,0x05,0xee,0x01,0x00,0x00,0x00,0x04,0xff,0x07,0x00 + +# GFX1250: global_load_b64 v[2:3], v0, s[0:1] offset:-64 ; encoding: [0x00,0x40,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x40,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_b64 v[2:3], v0, s[0:1] offset:64 ; encoding: [0x00,0x40,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x40,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_b64 v[2:3], v3, s[2:3] ; encoding: [0x02,0x40,0x05,0xee,0x02,0x00,0x00,0x00,0x03,0x00,0x00,0x00] +0x02,0x40,0x05,0xee,0x02,0x00,0x00,0x00,0x03,0x00,0x00,0x00 + +# GFX1250: global_load_b64 v[2:3], v[0:1], off offset:-64 ; encoding: [0x7c,0x40,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_b64 v[2:3], v[0:1], off offset:64 ; encoding: [0x7c,0x40,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x40,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_b64 v[2:3], v[4:5], off ; encoding: [0x7c,0x40,0x05,0xee,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0x40,0x05,0xee,0x02,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: global_load_b96 v[2:4], v0, s[0:1] offset:-64 ; encoding: [0x00,0x80,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x80,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_b96 v[2:4], v0, s[0:1] offset:64 ; encoding: [0x00,0x80,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x80,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_b96 v[2:4], v5, s[2:3] ; encoding: [0x02,0x80,0x05,0xee,0x02,0x00,0x00,0x00,0x05,0x00,0x00,0x00] +0x02,0x80,0x05,0xee,0x02,0x00,0x00,0x00,0x05,0x00,0x00,0x00 + +# GFX1250: global_load_b96 v[2:4], v[0:1], off offset:-64 ; encoding: [0x7c,0x80,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_b96 v[2:4], v[0:1], off offset:64 ; encoding: [0x7c,0x80,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x80,0x05,0xee,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_b96 v[2:4], v[6:7], off ; encoding: [0x7c,0x80,0x05,0xee,0x02,0x00,0x00,0x00,0x06,0x00,0x00,0x00] +0x7c,0x80,0x05,0xee,0x02,0x00,0x00,0x00,0x06,0x00,0x00,0x00 + +# GFX1250: global_load_block v[8:39], v0, s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x14,0xee,0x08,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x14,0xee,0x08,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_block v[8:39], v0, s[0:1] offset:64 ; encoding: [0x00,0xc0,0x14,0xee,0x08,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0xc0,0x14,0xee,0x08,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_block v[8:39], v5, s[2:3] ; encoding: [0x02,0xc0,0x14,0xee,0x08,0x00,0x00,0x00,0x05,0x00,0x00,0x00] +0x02,0xc0,0x14,0xee,0x08,0x00,0x00,0x00,0x05,0x00,0x00,0x00 + +# GFX1250: global_load_block v[8:39], v[0:1], off offset:-64 ; encoding: [0x7c,0xc0,0x14,0xee,0x08,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x14,0xee,0x08,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_block v[8:39], v[0:1], off offset:64 ; encoding: [0x7c,0xc0,0x14,0xee,0x08,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x14,0xee,0x08,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_block v[8:39], v[6:7], off ; encoding: [0x7c,0xc0,0x14,0xee,0x08,0x00,0x00,0x00,0x06,0x00,0x00,0x00] +0x7c,0xc0,0x14,0xee,0x08,0x00,0x00,0x00,0x06,0x00,0x00,0x00 + +# GFX1250: global_load_block v[8:39], v[6:7], off th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x7c,0xc0,0x14,0xee,0x08,0x00,0x24,0x00,0x06,0x00,0x00,0x00] +0x7c,0xc0,0x14,0xee,0x08,0x00,0x24,0x00,0x06,0x00,0x00,0x00 + +# GFX1250: global_load_d16_b16 v1, v0, s[0:1] offset:-64 ; encoding: [0x00,0x00,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x00,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_d16_b16 v1, v0, s[0:1] offset:64 ; encoding: [0x00,0x00,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x00,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_d16_b16 v1, v3, s[2:3] ; encoding: [0x02,0x00,0x08,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00] +0x02,0x00,0x08,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00 + +# GFX1250: global_load_d16_b16 v1, v[0:1], off offset:-64 ; encoding: [0x7c,0x00,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_d16_b16 v1, v[0:1], off offset:64 ; encoding: [0x7c,0x00,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x00,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_d16_b16 v1, v[4:5], off ; encoding: [0x7c,0x00,0x08,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0x00,0x08,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: global_load_d16_hi_b16 v1, v0, s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_d16_hi_b16 v1, v0, s[0:1] offset:64 ; encoding: [0x00,0xc0,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0xc0,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_d16_hi_b16 v1, v3, s[2:3] ; encoding: [0x02,0xc0,0x08,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00] +0x02,0xc0,0x08,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00 + +# GFX1250: global_load_d16_hi_b16 v1, v[0:1], off offset:-64 ; encoding: [0x7c,0xc0,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_d16_hi_b16 v1, v[0:1], off offset:64 ; encoding: [0x7c,0xc0,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_d16_hi_b16 v1, v[4:5], off ; encoding: [0x7c,0xc0,0x08,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0xc0,0x08,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: global_load_d16_hi_i8 v1, v0, s[0:1] offset:-64 ; encoding: [0x00,0x80,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x80,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_d16_hi_i8 v1, v0, s[0:1] offset:64 ; encoding: [0x00,0x80,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x80,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_d16_hi_i8 v1, v3, s[2:3] ; encoding: [0x02,0x80,0x08,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00] +0x02,0x80,0x08,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00 + +# GFX1250: global_load_d16_hi_i8 v1, v[0:1], off offset:-64 ; encoding: [0x7c,0x80,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_d16_hi_i8 v1, v[0:1], off offset:64 ; encoding: [0x7c,0x80,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x80,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_d16_hi_i8 v1, v[4:5], off ; encoding: [0x7c,0x80,0x08,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0x80,0x08,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: global_load_d16_hi_u8 v1, v0, s[0:1] offset:-64 ; encoding: [0x00,0x40,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x40,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_d16_hi_u8 v1, v0, s[0:1] offset:64 ; encoding: [0x00,0x40,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x40,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_d16_hi_u8 v1, v3, s[2:3] ; encoding: [0x02,0x40,0x08,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00] +0x02,0x40,0x08,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00 + +# GFX1250: global_load_d16_hi_u8 v1, v[0:1], off offset:-64 ; encoding: [0x7c,0x40,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_d16_hi_u8 v1, v[0:1], off offset:64 ; encoding: [0x7c,0x40,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x40,0x08,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_d16_hi_u8 v1, v[4:5], off ; encoding: [0x7c,0x40,0x08,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0x40,0x08,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: global_load_d16_i8 v1, v0, s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x07,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x07,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_d16_i8 v1, v0, s[0:1] offset:64 ; encoding: [0x00,0xc0,0x07,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0xc0,0x07,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_d16_i8 v1, v3, s[2:3] ; encoding: [0x02,0xc0,0x07,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00] +0x02,0xc0,0x07,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00 + +# GFX1250: global_load_d16_i8 v1, v[0:1], off offset:-64 ; encoding: [0x7c,0xc0,0x07,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x07,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_d16_i8 v1, v[0:1], off offset:64 ; encoding: [0x7c,0xc0,0x07,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x07,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_d16_i8 v1, v[4:5], off ; encoding: [0x7c,0xc0,0x07,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0xc0,0x07,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: global_load_d16_u8 v1, v0, s[0:1] offset:-64 ; encoding: [0x00,0x80,0x07,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x80,0x07,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_d16_u8 v1, v0, s[0:1] offset:64 ; encoding: [0x00,0x80,0x07,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x80,0x07,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_d16_u8 v1, v3, s[2:3] ; encoding: [0x02,0x80,0x07,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00] +0x02,0x80,0x07,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00 + +# GFX1250: global_load_d16_u8 v1, v[0:1], off offset:-64 ; encoding: [0x7c,0x80,0x07,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x07,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_d16_u8 v1, v[0:1], off offset:64 ; encoding: [0x7c,0x80,0x07,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x80,0x07,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_d16_u8 v1, v[4:5], off ; encoding: [0x7c,0x80,0x07,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0x80,0x07,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: global_load_i16 v1, v0, s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_i16 v1, v0, s[0:1] offset:64 ; encoding: [0x00,0xc0,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0xc0,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_i16 v1, v3, s[2:3] ; encoding: [0x02,0xc0,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00] +0x02,0xc0,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00 + +# GFX1250: global_load_i16 v1, v[0:1], off offset:-64 ; encoding: [0x7c,0xc0,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_i16 v1, v[0:1], off offset:64 ; encoding: [0x7c,0xc0,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_i16 v1, v[4:5], off ; encoding: [0x7c,0xc0,0x04,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0xc0,0x04,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: global_load_i8 v1, v0, s[0:1] offset:-64 ; encoding: [0x00,0x40,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x40,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_i8 v1, v0, s[0:1] offset:64 ; encoding: [0x00,0x40,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x40,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_i8 v1, v3, s[2:3] ; encoding: [0x02,0x40,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00] +0x02,0x40,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00 + +# GFX1250: global_load_i8 v1, v[0:1], off offset:-64 ; encoding: [0x7c,0x40,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_i8 v1, v[0:1], off offset:64 ; encoding: [0x7c,0x40,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x40,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_i8 v1, v[4:5], off ; encoding: [0x7c,0x40,0x04,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0x40,0x04,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: global_load_u16 v1, v0, s[0:1] offset:-64 ; encoding: [0x00,0x80,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x80,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_u16 v1, v0, s[0:1] offset:64 ; encoding: [0x00,0x80,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x80,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_u16 v1, v3, s[2:3] ; encoding: [0x02,0x80,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00] +0x02,0x80,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00 + +# GFX1250: global_load_u16 v1, v[0:1], off offset:-64 ; encoding: [0x7c,0x80,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_u16 v1, v[0:1], off offset:64 ; encoding: [0x7c,0x80,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x80,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_u16 v1, v[4:5], off ; encoding: [0x7c,0x80,0x04,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0x80,0x04,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: global_load_u8 v1, v0, s[0:1] offset:-64 ; encoding: [0x00,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_u8 v1, v0, s[0:1] offset:64 ; encoding: [0x00,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_u8 v1, v3, s[2:3] ; encoding: [0x02,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00] +0x02,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00 + +# GFX1250: global_load_u8 v1, v[0:1], off offset:-64 ; encoding: [0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: global_load_u8 v1, v[0:1], off offset:64 ; encoding: [0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: global_load_u8 v1, v[4:5], off ; encoding: [0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +0x7c,0x00,0x04,0xee,0x01,0x00,0x00,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: global_store_addtid_b32 v2, off offset:-64 ; encoding: [0x7c,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_store_addtid_b32 v2, off offset:64 ; encoding: [0x7c,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_store_addtid_b32 v2, s[0:1] offset:-64 ; encoding: [0x00,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_store_addtid_b32 v2, s[0:1] offset:64 ; encoding: [0x00,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x0a,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_store_b128 v0, v[2:5], s[0:1] offset:-64 ; encoding: [0x00,0x40,0x07,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x07,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_store_b128 v0, v[2:5], s[0:1] offset:64 ; encoding: [0x00,0x40,0x07,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x07,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_store_b128 v1, v[4:7], s[2:3] ; encoding: [0x02,0x40,0x07,0xee,0x00,0x00,0x00,0x02,0x01,0x00,0x00,0x00] +0x02,0x40,0x07,0xee,0x00,0x00,0x00,0x02,0x01,0x00,0x00,0x00 + +# GFX1250: global_store_b128 v[0:1], v[2:5], off offset:-64 ; encoding: [0x7c,0x40,0x07,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x07,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_store_b128 v[0:1], v[2:5], off offset:64 ; encoding: [0x7c,0x40,0x07,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x07,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_store_b128 v[2:3], v[4:7], off ; encoding: [0x7c,0x40,0x07,0xee,0x00,0x00,0x00,0x02,0x02,0x00,0x00,0x00] +0x7c,0x40,0x07,0xee,0x00,0x00,0x00,0x02,0x02,0x00,0x00,0x00 + +# GFX1250: global_store_b16 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0x40,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_store_b16 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0x40,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_store_b16 v3, v1, s[2:3] ; encoding: [0x02,0x40,0x06,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00] +0x02,0x40,0x06,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00 + +# GFX1250: global_store_b16 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0x40,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_store_b16 v[0:1], v2, off offset:64 ; encoding: [0x7c,0x40,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_store_b16 v[4:5], v1, off ; encoding: [0x7c,0x40,0x06,0xee,0x00,0x00,0x80,0x00,0x04,0x00,0x00,0x00] +0x7c,0x40,0x06,0xee,0x00,0x00,0x80,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: global_store_b32 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0x80,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_store_b32 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0x80,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_store_b32 v3, v1, s[2:3] offset:-16 ; encoding: [0x02,0x80,0x06,0xee,0x00,0x00,0x80,0x00,0x03,0xf0,0xff,0xff] +0x02,0x80,0x06,0xee,0x00,0x00,0x80,0x00,0x03,0xf0,0xff,0xff + +# GFX1250: global_store_b32 v3, v1, s[2:3] offset:16 ; encoding: [0x02,0x80,0x06,0xee,0x00,0x00,0x80,0x00,0x03,0x10,0x00,0x00] +0x02,0x80,0x06,0xee,0x00,0x00,0x80,0x00,0x03,0x10,0x00,0x00 + +# GFX1250: global_store_b32 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0x80,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_store_b32 v[0:1], v2, off offset:64 ; encoding: [0x7c,0x80,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_store_b32 v[4:5], v1, off offset:-16 ; encoding: [0x7c,0x80,0x06,0xee,0x00,0x00,0x80,0x00,0x04,0xf0,0xff,0xff] +0x7c,0x80,0x06,0xee,0x00,0x00,0x80,0x00,0x04,0xf0,0xff,0xff + +# GFX1250: global_store_b32 v[4:5], v1, off offset:16 ; encoding: [0x7c,0x80,0x06,0xee,0x00,0x00,0x80,0x00,0x04,0x10,0x00,0x00] +0x7c,0x80,0x06,0xee,0x00,0x00,0x80,0x00,0x04,0x10,0x00,0x00 + +# GFX1250: global_store_b64 v0, v[2:3], s[0:1] offset:-64 ; encoding: [0x00,0xc0,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_store_b64 v0, v[2:3], s[0:1] offset:64 ; encoding: [0x00,0xc0,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0xc0,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_store_b64 v1, v[2:3], s[2:3] ; encoding: [0x02,0xc0,0x06,0xee,0x00,0x00,0x00,0x01,0x01,0x00,0x00,0x00] +0x02,0xc0,0x06,0xee,0x00,0x00,0x00,0x01,0x01,0x00,0x00,0x00 + +# GFX1250: global_store_b64 v[0:1], v[2:3], off offset:-64 ; encoding: [0x7c,0xc0,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_store_b64 v[0:1], v[2:3], off offset:64 ; encoding: [0x7c,0xc0,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_store_b64 v[2:3], v[4:5], off ; encoding: [0x7c,0xc0,0x06,0xee,0x00,0x00,0x00,0x02,0x02,0x00,0x00,0x00] +0x7c,0xc0,0x06,0xee,0x00,0x00,0x00,0x02,0x02,0x00,0x00,0x00 + +# GFX1250: global_store_b8 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0x00,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_store_b8 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0x00,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_store_b8 v3, v1, s[2:3] ; encoding: [0x02,0x00,0x06,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00] +0x02,0x00,0x06,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00 + +# GFX1250: global_store_b8 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0x00,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_store_b8 v[0:1], v2, off offset:64 ; encoding: [0x7c,0x00,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x06,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_store_b8 v[4:5], v1, off ; encoding: [0x7c,0x00,0x06,0xee,0x00,0x00,0x80,0x00,0x04,0x00,0x00,0x00] +0x7c,0x00,0x06,0xee,0x00,0x00,0x80,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: global_store_b96 v0, v[2:4], s[0:1] offset:-64 ; encoding: [0x00,0x00,0x07,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x07,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_store_b96 v0, v[2:4], s[0:1] offset:64 ; encoding: [0x00,0x00,0x07,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x07,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_store_b96 v1, v[4:6], s[2:3] ; encoding: [0x02,0x00,0x07,0xee,0x00,0x00,0x00,0x02,0x01,0x00,0x00,0x00] +0x02,0x00,0x07,0xee,0x00,0x00,0x00,0x02,0x01,0x00,0x00,0x00 + +# GFX1250: global_store_b96 v[0:1], v[2:4], off offset:-64 ; encoding: [0x7c,0x00,0x07,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x07,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_store_b96 v[0:1], v[2:4], off offset:64 ; encoding: [0x7c,0x00,0x07,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x07,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_store_b96 v[2:3], v[4:6], off ; encoding: [0x7c,0x00,0x07,0xee,0x00,0x00,0x00,0x02,0x02,0x00,0x00,0x00] +0x7c,0x00,0x07,0xee,0x00,0x00,0x00,0x02,0x02,0x00,0x00,0x00 + +# GFX1250: global_store_block v0, v[2:33], s[0:1] offset:-64 ; encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_store_block v0, v[2:33], s[0:1] offset:64 ; encoding: [0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_store_block v1, v[4:35], s[2:3] ; encoding: [0x02,0x00,0x15,0xee,0x00,0x00,0x00,0x02,0x01,0x00,0x00,0x00] +0x02,0x00,0x15,0xee,0x00,0x00,0x00,0x02,0x01,0x00,0x00,0x00 + +# GFX1250: global_store_block v[0:1], v[2:33], off offset:-64 ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_store_block v[0:1], v[2:33], off offset:64 ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_store_block v[2:3], v[4:35], off ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x02,0x02,0x00,0x00,0x00] +0x7c,0x00,0x15,0xee,0x00,0x00,0x00,0x02,0x02,0x00,0x00,0x00 + +# GFX1250: global_store_block v[2:3], v[4:35], off th:TH_STORE_HT scope:SCOPE_SE ; encoding: [0x7c,0x00,0x15,0xee,0x00,0x00,0x24,0x02,0x02,0x00,0x00,0x00] +0x7c,0x00,0x15,0xee,0x00,0x00,0x24,0x02,0x02,0x00,0x00,0x00 + +# GFX1250: global_store_d16_hi_b16 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0x40,0x09,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x09,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_store_d16_hi_b16 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0x40,0x09,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x09,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_store_d16_hi_b16 v3, v1, s[2:3] ; encoding: [0x02,0x40,0x09,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00] +0x02,0x40,0x09,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00 + +# GFX1250: global_store_d16_hi_b16 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0x40,0x09,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x09,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_store_d16_hi_b16 v[0:1], v2, off offset:64 ; encoding: [0x7c,0x40,0x09,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x09,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_store_d16_hi_b16 v[4:5], v1, off ; encoding: [0x7c,0x40,0x09,0xee,0x00,0x00,0x80,0x00,0x04,0x00,0x00,0x00] +0x7c,0x40,0x09,0xee,0x00,0x00,0x80,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: global_store_d16_hi_b8 v0, v2, s[0:1] offset:-64 ; encoding: [0x00,0x00,0x09,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x09,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_store_d16_hi_b8 v0, v2, s[0:1] offset:64 ; encoding: [0x00,0x00,0x09,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x09,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_store_d16_hi_b8 v3, v1, s[2:3] ; encoding: [0x02,0x00,0x09,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00] +0x02,0x00,0x09,0xee,0x00,0x00,0x80,0x00,0x03,0x00,0x00,0x00 + +# GFX1250: global_store_d16_hi_b8 v[0:1], v2, off offset:-64 ; encoding: [0x7c,0x00,0x09,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x09,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: global_store_d16_hi_b8 v[0:1], v2, off offset:64 ; encoding: [0x7c,0x00,0x09,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x09,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: global_store_d16_hi_b8 v[4:5], v1, off ; encoding: [0x7c,0x00,0x09,0xee,0x00,0x00,0x80,0x00,0x04,0x00,0x00,0x00] +0x7c,0x00,0x09,0xee,0x00,0x00,0x80,0x00,0x04,0x00,0x00,0x00 + +# GFX1250: global_wb ; encoding: [0x7c,0x00,0x0b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] +0x7c,0x00,0x0b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: global_wb scope:SCOPE_DEV ; encoding: [0x7c,0x00,0x0b,0xee,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00] +0x7c,0x00,0x0b,0xee,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: global_wb scope:SCOPE_SYS ; encoding: [0x7c,0x00,0x0b,0xee,0x00,0x00,0x0c,0x00,0x00,0x00,0x00,0x00] +0x7c,0x00,0x0b,0xee,0x00,0x00,0x0c,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: global_wbinv ; encoding: [0x7c,0xc0,0x13,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] +0x7c,0xc0,0x13,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: global_wbinv scope:SCOPE_DEV ; encoding: [0x7c,0xc0,0x13,0xee,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00] +0x7c,0xc0,0x13,0xee,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: global_wbinv scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x13,0xee,0x00,0x00,0x0c,0x00,0x00,0x00,0x00,0x00] +0x7c,0xc0,0x13,0xee,0x00,0x00,0x0c,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: scratch_load_b128 v[2:5], off, off offset:-64 ; encoding: [0x7c,0xc0,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_b128 v[2:5], off, off offset:64 ; encoding: [0x7c,0xc0,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_b128 v[2:5], off, s0 offset:-64 ; encoding: [0x00,0xc0,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_b128 v[2:5], off, s0 offset:64 ; encoding: [0x00,0xc0,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0xc0,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_b128 v[2:5], v0, off offset:-64 ; encoding: [0x7c,0xc0,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_b128 v[2:5], v0, off offset:64 ; encoding: [0x7c,0xc0,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_b128 v[2:5], v0, s0 offset:-64 ; encoding: [0x00,0xc0,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_b128 v[2:5], v0, s0 offset:64 ; encoding: [0x00,0xc0,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x00,0xc0,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_b128 v[2:5], v2, s1 ; encoding: [0x01,0xc0,0x05,0xed,0x02,0x00,0x02,0x00,0x02,0x00,0x00,0x00] +0x01,0xc0,0x05,0xed,0x02,0x00,0x02,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: scratch_load_b32 v1, off, off ; encoding: [0x7c,0x00,0x05,0xed,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00] +0x7c,0x00,0x05,0xed,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + +# GFX1250: scratch_load_b32 v1, off, off offset:-64 ; encoding: [0x7c,0x00,0x05,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x05,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_b32 v1, off, off offset:2047 ; encoding: [0x7c,0x00,0x05,0xed,0x01,0x00,0x00,0x00,0x00,0xff,0x07,0x00] +0x7c,0x00,0x05,0xed,0x01,0x00,0x00,0x00,0x00,0xff,0x07,0x00 + +# GFX1250: scratch_load_b32 v1, off, off offset:64 ; encoding: [0x7c,0x00,0x05,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x00,0x05,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_b32 v1, off, s0 offset:-64 ; encoding: [0x00,0x00,0x05,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x00,0x05,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_b32 v1, off, s0 offset:64 ; encoding: [0x00,0x00,0x05,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x00,0x05,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_b32 v1, off, s1 offset:2047 ; encoding: [0x01,0x00,0x05,0xed,0x01,0x00,0x00,0x00,0x00,0xff,0x07,0x00] +0x01,0x00,0x05,0xed,0x01,0x00,0x00,0x00,0x00,0xff,0x07,0x00 + +# GFX1250: scratch_load_b32 v1, v0, off offset:-64 ; encoding: [0x7c,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_b32 v1, v0, off offset:64 ; encoding: [0x7c,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x7c,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_b32 v1, v0, s0 offset:-64 ; encoding: [0x00,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x00,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_b32 v1, v0, s0 offset:64 ; encoding: [0x00,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x00,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_b32 v1, v2, off offset:2047 ; encoding: [0x7c,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x02,0xff,0x07,0x00] +0x7c,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x02,0xff,0x07,0x00 + +# GFX1250: scratch_load_b32 v1, v2, s1 ; encoding: [0x01,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00] +0x01,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: scratch_load_b32 v1, v2, s1 offset:-4095 ; encoding: [0x01,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x02,0x01,0xf0,0xff] +0x01,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x02,0x01,0xf0,0xff + +# GFX1250: scratch_load_b32 v1, v2, s1 offset:-61440 ; encoding: [0x01,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x10,0xff] +0x01,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x10,0xff + +# GFX1250: scratch_load_b32 v1, v2, s1 offset:2047 ; encoding: [0x01,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x02,0xff,0x07,0x00] +0x01,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x02,0xff,0x07,0x00 + +# GFX1250: scratch_load_b32 v1, v2, s1 offset:4095 ; encoding: [0x01,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x02,0xff,0x0f,0x00] +0x01,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x02,0xff,0x0f,0x00 + +# GFX1250: scratch_load_b32 v1, v2, s1 offset:61440 ; encoding: [0x01,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0xf0,0x00] +0x01,0x00,0x05,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0xf0,0x00 + +# GFX1250: scratch_load_b64 v[2:3], off, off offset:-64 ; encoding: [0x7c,0x40,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_b64 v[2:3], off, off offset:64 ; encoding: [0x7c,0x40,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x40,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_b64 v[2:3], off, s0 offset:-64 ; encoding: [0x00,0x40,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x40,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_b64 v[2:3], off, s0 offset:64 ; encoding: [0x00,0x40,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x40,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_b64 v[2:3], v0, off offset:-64 ; encoding: [0x7c,0x40,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_b64 v[2:3], v0, off offset:64 ; encoding: [0x7c,0x40,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x7c,0x40,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_b64 v[2:3], v0, s0 offset:-64 ; encoding: [0x00,0x40,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x00,0x40,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_b64 v[2:3], v0, s0 offset:64 ; encoding: [0x00,0x40,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x00,0x40,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_b64 v[2:3], v2, s1 ; encoding: [0x01,0x40,0x05,0xed,0x02,0x00,0x02,0x00,0x02,0x00,0x00,0x00] +0x01,0x40,0x05,0xed,0x02,0x00,0x02,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: scratch_load_b96 v[2:4], off, off offset:-64 ; encoding: [0x7c,0x80,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_b96 v[2:4], off, off offset:64 ; encoding: [0x7c,0x80,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x80,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_b96 v[2:4], off, s0 offset:-64 ; encoding: [0x00,0x80,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x80,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_b96 v[2:4], off, s0 offset:64 ; encoding: [0x00,0x80,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x80,0x05,0xed,0x02,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_b96 v[2:4], v0, off offset:-64 ; encoding: [0x7c,0x80,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_b96 v[2:4], v0, off offset:64 ; encoding: [0x7c,0x80,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x7c,0x80,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_b96 v[2:4], v0, s0 offset:-64 ; encoding: [0x00,0x80,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x00,0x80,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_b96 v[2:4], v0, s0 offset:64 ; encoding: [0x00,0x80,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x00,0x80,0x05,0xed,0x02,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_b96 v[2:4], v2, s1 ; encoding: [0x01,0x80,0x05,0xed,0x02,0x00,0x02,0x00,0x02,0x00,0x00,0x00] +0x01,0x80,0x05,0xed,0x02,0x00,0x02,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: scratch_load_block v[4:35], off, off offset:-64 ; encoding: [0x7c,0xc0,0x14,0xed,0x04,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x14,0xed,0x04,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_block v[4:35], off, off offset:64 ; encoding: [0x7c,0xc0,0x14,0xed,0x04,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x14,0xed,0x04,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_block v[4:35], off, s0 offset:-64 ; encoding: [0x00,0xc0,0x14,0xed,0x04,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x14,0xed,0x04,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_block v[4:35], off, s0 offset:64 ; encoding: [0x00,0xc0,0x14,0xed,0x04,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0xc0,0x14,0xed,0x04,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_block v[4:35], v0, off offset:-64 ; encoding: [0x7c,0xc0,0x14,0xed,0x04,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x14,0xed,0x04,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_block v[4:35], v0, off offset:64 ; encoding: [0x7c,0xc0,0x14,0xed,0x04,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x14,0xed,0x04,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_block v[4:35], v0, s0 offset:-64 ; encoding: [0x00,0xc0,0x14,0xed,0x04,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x14,0xed,0x04,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_block v[4:35], v0, s0 offset:64 ; encoding: [0x00,0xc0,0x14,0xed,0x04,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x00,0xc0,0x14,0xed,0x04,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_block v[4:35], v2, s1 ; encoding: [0x01,0xc0,0x14,0xed,0x04,0x00,0x02,0x00,0x02,0x00,0x00,0x00] +0x01,0xc0,0x14,0xed,0x04,0x00,0x02,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: scratch_load_block v[4:35], v2, s1 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x01,0xc0,0x14,0xed,0x04,0x00,0x26,0x00,0x02,0x00,0x00,0x00] +0x01,0xc0,0x14,0xed,0x04,0x00,0x26,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: scratch_load_d16_b16 v1, off, off offset:-64 ; encoding: [0x7c,0x00,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_b16 v1, off, off offset:64 ; encoding: [0x7c,0x00,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x00,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_b16 v1, off, s0 offset:-64 ; encoding: [0x00,0x00,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x00,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_b16 v1, off, s0 offset:64 ; encoding: [0x00,0x00,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x00,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_b16 v1, v0, off offset:-64 ; encoding: [0x7c,0x00,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_b16 v1, v0, off offset:64 ; encoding: [0x7c,0x00,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x7c,0x00,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_b16 v1, v0, s0 offset:-64 ; encoding: [0x00,0x00,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x00,0x00,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_b16 v1, v0, s0 offset:64 ; encoding: [0x00,0x00,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x00,0x00,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_b16 v1, v2, s1 ; encoding: [0x01,0x00,0x08,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00] +0x01,0x00,0x08,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: scratch_load_d16_hi_b16 v1, off, off offset:-64 ; encoding: [0x7c,0xc0,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_hi_b16 v1, off, off offset:64 ; encoding: [0x7c,0xc0,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_hi_b16 v1, off, s0 offset:-64 ; encoding: [0x00,0xc0,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_hi_b16 v1, off, s0 offset:64 ; encoding: [0x00,0xc0,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0xc0,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_hi_b16 v1, v0, off offset:-64 ; encoding: [0x7c,0xc0,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_hi_b16 v1, v0, off offset:64 ; encoding: [0x7c,0xc0,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_hi_b16 v1, v0, s0 offset:-64 ; encoding: [0x00,0xc0,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_hi_b16 v1, v0, s0 offset:64 ; encoding: [0x00,0xc0,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x00,0xc0,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_hi_b16 v1, v2, s1 ; encoding: [0x01,0xc0,0x08,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00] +0x01,0xc0,0x08,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: scratch_load_d16_hi_i8 v1, off, off offset:-64 ; encoding: [0x7c,0x80,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_hi_i8 v1, off, off offset:64 ; encoding: [0x7c,0x80,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x80,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_hi_i8 v1, off, s0 offset:-64 ; encoding: [0x00,0x80,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x80,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_hi_i8 v1, off, s0 offset:64 ; encoding: [0x00,0x80,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x80,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_hi_i8 v1, v0, off offset:-64 ; encoding: [0x7c,0x80,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_hi_i8 v1, v0, off offset:64 ; encoding: [0x7c,0x80,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x7c,0x80,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_hi_i8 v1, v0, s0 offset:-64 ; encoding: [0x00,0x80,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x00,0x80,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_hi_i8 v1, v0, s0 offset:64 ; encoding: [0x00,0x80,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x00,0x80,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_hi_i8 v1, v2, s1 ; encoding: [0x01,0x80,0x08,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00] +0x01,0x80,0x08,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: scratch_load_d16_hi_u8 v1, off, off offset:-64 ; encoding: [0x7c,0x40,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_hi_u8 v1, off, off offset:64 ; encoding: [0x7c,0x40,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x40,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_hi_u8 v1, off, s0 offset:-64 ; encoding: [0x00,0x40,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x40,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_hi_u8 v1, off, s0 offset:64 ; encoding: [0x00,0x40,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x40,0x08,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_hi_u8 v1, v0, off offset:-64 ; encoding: [0x7c,0x40,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_hi_u8 v1, v0, off offset:64 ; encoding: [0x7c,0x40,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x7c,0x40,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_hi_u8 v1, v0, s0 offset:-64 ; encoding: [0x00,0x40,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x00,0x40,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_hi_u8 v1, v0, s0 offset:64 ; encoding: [0x00,0x40,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x00,0x40,0x08,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_hi_u8 v1, v2, s1 ; encoding: [0x01,0x40,0x08,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00] +0x01,0x40,0x08,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: scratch_load_d16_i8 v1, off, off offset:-64 ; encoding: [0x7c,0xc0,0x07,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x07,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_i8 v1, off, off offset:64 ; encoding: [0x7c,0xc0,0x07,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x07,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_i8 v1, off, s0 offset:-64 ; encoding: [0x00,0xc0,0x07,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x07,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_i8 v1, off, s0 offset:64 ; encoding: [0x00,0xc0,0x07,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0xc0,0x07,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_i8 v1, v0, off offset:-64 ; encoding: [0x7c,0xc0,0x07,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x07,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_i8 v1, v0, off offset:64 ; encoding: [0x7c,0xc0,0x07,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x07,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_i8 v1, v0, s0 offset:-64 ; encoding: [0x00,0xc0,0x07,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x07,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_i8 v1, v0, s0 offset:64 ; encoding: [0x00,0xc0,0x07,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x00,0xc0,0x07,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_i8 v1, v2, s1 ; encoding: [0x01,0xc0,0x07,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00] +0x01,0xc0,0x07,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: scratch_load_d16_u8 v1, off, off offset:-64 ; encoding: [0x7c,0x80,0x07,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x07,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_u8 v1, off, off offset:64 ; encoding: [0x7c,0x80,0x07,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x80,0x07,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_u8 v1, off, s0 offset:-64 ; encoding: [0x00,0x80,0x07,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x80,0x07,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_u8 v1, off, s0 offset:64 ; encoding: [0x00,0x80,0x07,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x80,0x07,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_u8 v1, v0, off offset:-64 ; encoding: [0x7c,0x80,0x07,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x07,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_u8 v1, v0, off offset:64 ; encoding: [0x7c,0x80,0x07,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x7c,0x80,0x07,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_u8 v1, v0, s0 offset:-64 ; encoding: [0x00,0x80,0x07,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x00,0x80,0x07,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_d16_u8 v1, v0, s0 offset:64 ; encoding: [0x00,0x80,0x07,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x00,0x80,0x07,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_d16_u8 v1, v2, s1 ; encoding: [0x01,0x80,0x07,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00] +0x01,0x80,0x07,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: scratch_load_i16 v1, off, off offset:-64 ; encoding: [0x7c,0xc0,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_i16 v1, off, off offset:64 ; encoding: [0x7c,0xc0,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_i16 v1, off, s0 offset:-64 ; encoding: [0x00,0xc0,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_i16 v1, off, s0 offset:64 ; encoding: [0x00,0xc0,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0xc0,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_i16 v1, v0, off offset:-64 ; encoding: [0x7c,0xc0,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_i16 v1, v0, off offset:64 ; encoding: [0x7c,0xc0,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_i16 v1, v0, s0 offset:-64 ; encoding: [0x00,0xc0,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_i16 v1, v0, s0 offset:64 ; encoding: [0x00,0xc0,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x00,0xc0,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_i16 v1, v2, s1 ; encoding: [0x01,0xc0,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00] +0x01,0xc0,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: scratch_load_i8 v1, off, off offset:-64 ; encoding: [0x7c,0x40,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_i8 v1, off, off offset:64 ; encoding: [0x7c,0x40,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x40,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_i8 v1, off, s0 offset:-64 ; encoding: [0x00,0x40,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x40,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_i8 v1, off, s0 offset:64 ; encoding: [0x00,0x40,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x40,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_i8 v1, v0, off offset:-64 ; encoding: [0x7c,0x40,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_i8 v1, v0, off offset:64 ; encoding: [0x7c,0x40,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x7c,0x40,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_i8 v1, v0, s0 offset:-64 ; encoding: [0x00,0x40,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x00,0x40,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_i8 v1, v0, s0 offset:64 ; encoding: [0x00,0x40,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x00,0x40,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_i8 v1, v2, s1 ; encoding: [0x01,0x40,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00] +0x01,0x40,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: scratch_load_u16 v1, off, off offset:-64 ; encoding: [0x7c,0x80,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_u16 v1, off, off offset:64 ; encoding: [0x7c,0x80,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x80,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_u16 v1, off, s0 offset:-64 ; encoding: [0x00,0x80,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x80,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_u16 v1, off, s0 offset:64 ; encoding: [0x00,0x80,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x80,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_u16 v1, v0, off offset:-64 ; encoding: [0x7c,0x80,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_u16 v1, v0, off offset:64 ; encoding: [0x7c,0x80,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x7c,0x80,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_u16 v1, v0, s0 offset:-64 ; encoding: [0x00,0x80,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x00,0x80,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_u16 v1, v0, s0 offset:64 ; encoding: [0x00,0x80,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x00,0x80,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_u16 v1, v2, s1 ; encoding: [0x01,0x80,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00] +0x01,0x80,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: scratch_load_u8 v1, off, off offset:-64 ; encoding: [0x7c,0x00,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_u8 v1, off, off offset:64 ; encoding: [0x7c,0x00,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x7c,0x00,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_u8 v1, off, s0 offset:-64 ; encoding: [0x00,0x00,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff] +0x00,0x00,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_u8 v1, off, s0 offset:64 ; encoding: [0x00,0x00,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00] +0x00,0x00,0x04,0xed,0x01,0x00,0x00,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_u8 v1, v0, off offset:-64 ; encoding: [0x7c,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_u8 v1, v0, off offset:64 ; encoding: [0x7c,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x7c,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_u8 v1, v0, s0 offset:-64 ; encoding: [0x00,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff] +0x00,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_load_u8 v1, v0, s0 offset:64 ; encoding: [0x00,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00] +0x00,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_load_u8 v1, v2, s1 ; encoding: [0x01,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00] +0x01,0x00,0x04,0xed,0x01,0x00,0x02,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: scratch_store_b128 off, v[2:5], off offset:-64 ; encoding: [0x7c,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b128 off, v[2:5], off offset:64 ; encoding: [0x7c,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b128 off, v[2:5], s0 offset:-64 ; encoding: [0x00,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b128 off, v[2:5], s0 offset:64 ; encoding: [0x00,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b128 v0, v[2:5], off offset:-64 ; encoding: [0x7c,0x40,0x07,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x07,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b128 v0, v[2:5], off offset:64 ; encoding: [0x7c,0x40,0x07,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x07,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b128 v0, v[2:5], s0 offset:-64 ; encoding: [0x00,0x40,0x07,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x07,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b128 v0, v[2:5], s0 offset:64 ; encoding: [0x00,0x40,0x07,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x07,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b128 v1, v[2:5], s3 ; encoding: [0x03,0x40,0x07,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00] +0x03,0x40,0x07,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00 + +# GFX1250: scratch_store_b16 off, v2, off offset:-64 ; encoding: [0x7c,0x40,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b16 off, v2, off offset:64 ; encoding: [0x7c,0x40,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b16 off, v2, s0 offset:-64 ; encoding: [0x00,0x40,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b16 off, v2, s0 offset:64 ; encoding: [0x00,0x40,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b16 v0, v2, off offset:-64 ; encoding: [0x7c,0x40,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b16 v0, v2, off offset:64 ; encoding: [0x7c,0x40,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b16 v0, v2, s0 offset:-64 ; encoding: [0x00,0x40,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b16 v0, v2, s0 offset:64 ; encoding: [0x00,0x40,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b16 v1, v2, s3 ; encoding: [0x03,0x40,0x06,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00] +0x03,0x40,0x06,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00 + +# GFX1250: scratch_store_b32 off, v2, off ; encoding: [0x7c,0x80,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00] +0x7c,0x80,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00 + +# GFX1250: scratch_store_b32 off, v2, off offset:-64 ; encoding: [0x7c,0x80,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b32 off, v2, off offset:2047 ; encoding: [0x7c,0x80,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0xff,0x07,0x00] +0x7c,0x80,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0xff,0x07,0x00 + +# GFX1250: scratch_store_b32 off, v2, off offset:64 ; encoding: [0x7c,0x80,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b32 off, v2, s0 offset:-64 ; encoding: [0x00,0x80,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b32 off, v2, s0 offset:64 ; encoding: [0x00,0x80,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b32 off, v2, s3 offset:2047 ; encoding: [0x03,0x80,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0xff,0x07,0x00] +0x03,0x80,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0xff,0x07,0x00 + +# GFX1250: scratch_store_b32 v0, v2, off offset:-64 ; encoding: [0x7c,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b32 v0, v2, off offset:64 ; encoding: [0x7c,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] +0x7c,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b32 v0, v2, s0 offset:-64 ; encoding: [0x00,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] +0x00,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b32 v0, v2, s0 offset:64 ; encoding: [0x00,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] +0x00,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b32 v1, v2, off offset:2047 ; encoding: [0x7c,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x01,0xff,0x07,0x00] +0x7c,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x01,0xff,0x07,0x00 + +# GFX1250: scratch_store_b32 v1, v2, s1 offset:-4095 ; encoding: [0x01,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x01,0x01,0xf0,0xff] +0x01,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x01,0x01,0xf0,0xff + +# GFX1250: scratch_store_b32 v1, v2, s1 offset:-61440 ; encoding: [0x01,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x10,0xff] +0x01,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x10,0xff + +# GFX1250: scratch_store_b32 v1, v2, s1 offset:4095 ; encoding: [0x01,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x01,0xff,0x0f,0x00] +0x01,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x01,0xff,0x0f,0x00 + +# GFX1250: scratch_store_b32 v1, v2, s1 offset:61440 ; encoding: [0x01,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0xf0,0x00] +0x01,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0xf0,0x00 + +# GFX1250: scratch_store_b32 v1, v2, s3 ; encoding: [0x03,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00] +0x03,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00 + +# GFX1250: scratch_store_b32 v1, v2, s3 offset:2047 ; encoding: [0x03,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x01,0xff,0x07,0x00] +0x03,0x80,0x06,0xed,0x00,0x00,0x02,0x01,0x01,0xff,0x07,0x00 + +# GFX1250: scratch_store_b64 off, v[2:3], off offset:-64 ; encoding: [0x7c,0xc0,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b64 off, v[2:3], off offset:64 ; encoding: [0x7c,0xc0,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b64 off, v[2:3], s0 offset:-64 ; encoding: [0x00,0xc0,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b64 off, v[2:3], s0 offset:64 ; encoding: [0x00,0xc0,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0xc0,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b64 v0, v[2:3], off offset:-64 ; encoding: [0x7c,0xc0,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] +0x7c,0xc0,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b64 v0, v[2:3], off offset:64 ; encoding: [0x7c,0xc0,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] +0x7c,0xc0,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b64 v0, v[2:3], s0 offset:-64 ; encoding: [0x00,0xc0,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] +0x00,0xc0,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b64 v0, v[2:3], s0 offset:64 ; encoding: [0x00,0xc0,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] +0x00,0xc0,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b64 v1, v[2:3], s3 ; encoding: [0x03,0xc0,0x06,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00] +0x03,0xc0,0x06,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00 + +# GFX1250: scratch_store_b8 off, v2, off offset:-64 ; encoding: [0x7c,0x00,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b8 off, v2, off offset:64 ; encoding: [0x7c,0x00,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b8 off, v2, s0 offset:-64 ; encoding: [0x00,0x00,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b8 off, v2, s0 offset:64 ; encoding: [0x00,0x00,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x06,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b8 v0, v2, off offset:-64 ; encoding: [0x7c,0x00,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b8 v0, v2, off offset:64 ; encoding: [0x7c,0x00,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b8 v0, v2, s0 offset:-64 ; encoding: [0x00,0x00,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b8 v0, v2, s0 offset:64 ; encoding: [0x00,0x00,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x06,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b8 v1, v2, s3 ; encoding: [0x03,0x00,0x06,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00] +0x03,0x00,0x06,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00 + +# GFX1250: scratch_store_b96 off, v[2:4], off offset:-64 ; encoding: [0x7c,0x00,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b96 off, v[2:4], off offset:64 ; encoding: [0x7c,0x00,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b96 off, v[2:4], s0 offset:-64 ; encoding: [0x00,0x00,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b96 off, v[2:4], s0 offset:64 ; encoding: [0x00,0x00,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x07,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b96 v0, v[2:4], off offset:-64 ; encoding: [0x7c,0x00,0x07,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x07,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b96 v0, v[2:4], off offset:64 ; encoding: [0x7c,0x00,0x07,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x07,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b96 v0, v[2:4], s0 offset:-64 ; encoding: [0x00,0x00,0x07,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x07,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_b96 v0, v[2:4], s0 offset:64 ; encoding: [0x00,0x00,0x07,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x07,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_b96 v1, v[2:4], s3 ; encoding: [0x03,0x00,0x07,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00] +0x03,0x00,0x07,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00 + +# GFX1250: scratch_store_block off, v[2:33], off offset:-64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_block off, v[2:33], off offset:64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_block off, v[2:33], s0 offset:-64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_block off, v[2:33], s0 offset:64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x15,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_block v0, v[2:33], off offset:-64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_block v0, v[2:33], off offset:64 ; encoding: [0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_block v0, v[2:33], s0 offset:-64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_block v0, v[2:33], s0 offset:64 ; encoding: [0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_block v1, v[2:33], s3 ; encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00] +0x03,0x00,0x15,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00 + +# GFX1250: scratch_store_block v1, v[2:33], s3 th:TH_STORE_HT scope:SCOPE_SE ; encoding: [0x03,0x00,0x15,0xed,0x00,0x00,0x26,0x01,0x01,0x00,0x00,0x00] +0x03,0x00,0x15,0xed,0x00,0x00,0x26,0x01,0x01,0x00,0x00,0x00 + +# GFX1250: scratch_store_d16_hi_b16 off, v2, off offset:-64 ; encoding: [0x7c,0x40,0x09,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x09,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_d16_hi_b16 off, v2, off offset:64 ; encoding: [0x7c,0x40,0x09,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x09,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_d16_hi_b16 off, v2, s0 offset:-64 ; encoding: [0x00,0x40,0x09,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x09,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_d16_hi_b16 off, v2, s0 offset:64 ; encoding: [0x00,0x40,0x09,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x09,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_d16_hi_b16 v0, v2, off offset:-64 ; encoding: [0x7c,0x40,0x09,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x40,0x09,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_d16_hi_b16 v0, v2, off offset:64 ; encoding: [0x7c,0x40,0x09,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] +0x7c,0x40,0x09,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_d16_hi_b16 v0, v2, s0 offset:-64 ; encoding: [0x00,0x40,0x09,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] +0x00,0x40,0x09,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_d16_hi_b16 v0, v2, s0 offset:64 ; encoding: [0x00,0x40,0x09,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] +0x00,0x40,0x09,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_d16_hi_b16 v1, v2, s3 ; encoding: [0x03,0x40,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00] +0x03,0x40,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00 + +# GFX1250: scratch_store_d16_hi_b8 off, v2, off offset:-64 ; encoding: [0x7c,0x00,0x09,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x09,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_d16_hi_b8 off, v2, off offset:64 ; encoding: [0x7c,0x00,0x09,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x09,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_d16_hi_b8 off, v2, s0 offset:-64 ; encoding: [0x00,0x00,0x09,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x09,0xed,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_d16_hi_b8 off, v2, s0 offset:64 ; encoding: [0x00,0x00,0x09,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x09,0xed,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_d16_hi_b8 v0, v2, off offset:-64 ; encoding: [0x7c,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] +0x7c,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_d16_hi_b8 v0, v2, off offset:64 ; encoding: [0x7c,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] +0x7c,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_d16_hi_b8 v0, v2, s0 offset:-64 ; encoding: [0x00,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff] +0x00,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x00,0xc0,0xff,0xff + +# GFX1250: scratch_store_d16_hi_b8 v0, v2, s0 offset:64 ; encoding: [0x00,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00] +0x00,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x00,0x40,0x00,0x00 + +# GFX1250: scratch_store_d16_hi_b8 v1, v2, s3 ; encoding: [0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00] +0x03,0x00,0x09,0xed,0x00,0x00,0x02,0x01,0x01,0x00,0x00,0x00 + # GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] 0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00