Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AMDGPU: Add v_prng_b32 instruction for gfx950 #116310

Open
wants to merge 1 commit into
base: users/arsenm/gfx950/increase-lds-size
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/BuiltinsAMDGPU.def
Original file line number Diff line number Diff line change
Expand Up @@ -522,5 +522,7 @@ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64, "V4fiV2iV4fs",
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64")

TARGET_BUILTIN(__builtin_amdgcn_prng_b32, "UiUi", "nc", "prng-inst")

#undef BUILTIN
#undef TARGET_BUILTIN
2 changes: 1 addition & 1 deletion clang/test/CodeGenOpenCL/amdgpu-features.cl
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX950: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX950: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
Expand Down
16 changes: 16 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx906 -emit-llvm \
// RUN: -verify -o - %s
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx90a -emit-llvm \
// RUN: -verify -o - %s
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx940 -emit-llvm \
// RUN: -verify -o - %s
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx1200 -emit-llvm \
// RUN: -verify -o - %s


// REQUIRES: amdgpu-registered-target

typedef unsigned int uint;
void test_prng_b32(global uint* out, uint a) {
*out = __builtin_amdgcn_prng_b32(a); // expected-error{{'__builtin_amdgcn_prng_b32' needs target feature prng-inst}}
}
21 changes: 21 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -cl-std=CL1.2 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx950 -emit-llvm -o - %s | FileCheck %s
// REQUIRES: amdgpu-registered-target

typedef unsigned int uint;

// CHECK-LABEL: @test_prng_b32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8
// CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.prng.b32(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[TMP2]], align 4
// CHECK-NEXT: ret void
//
void test_prng_b32(global uint* out, uint a) {
*out = __builtin_amdgcn_prng_b32(a);
}
4 changes: 4 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,10 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;

def int_amdgcn_prng_b32 : DefaultAttrsIntrinsic<
[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]
>, ClangBuiltin<"__builtin_amdgcn_prng_b32">;

} // TargetPrefix = "amdgcn"

// New-style image intrinsics
Expand Down
12 changes: 11 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -978,6 +978,12 @@ def FeatureVmemWriteVgprInOrder : SubtargetFeature<"vmem-write-vgpr-in-order",
"VMEM instructions of the same type write VGPR results in order"
>;

def FeaturePrngInst : SubtargetFeature<"prng-inst",
"HasPrngInst",
"true",
"Has v_prng_b32 instruction"
>;

//===------------------------------------------------------------===//
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
Expand Down Expand Up @@ -1495,7 +1501,8 @@ def FeatureISAVersion9_5_Common : FeatureSet<
FeatureFP8ConversionInsts,
FeatureCvtFP8VOP1Bug,
FeatureGFX950Insts,
FeatureAddressableLocalMemorySize163840
FeatureAddressableLocalMemorySize163840,
FeaturePrngInst
])>;

def FeatureISAVersion9_4_0 : FeatureSet<
Expand Down Expand Up @@ -2344,6 +2351,9 @@ def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">,
def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">,
AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>;

def HasPrngInst : Predicate<"Subtarget->hasPrngInst()">,
AssemblerPredicate<(all_of FeaturePrngInst)>;

def HasGDS : Predicate<"Subtarget->hasGDS()">;

def HasGWS : Predicate<"Subtarget->hasGWS()">;
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1253,6 +1253,12 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {

break;
}
case Intrinsic::amdgcn_prng_b32: {
auto *Src = II.getArgOperand(0);
if (isa<UndefValue>(Src)) {
return IC.replaceInstUsesWith(II, Src);
}
}
}
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4515,6 +4515,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_cvt_pk_u8_f32:
case Intrinsic::amdgcn_alignbyte:
case Intrinsic::amdgcn_perm:
case Intrinsic::amdgcn_prng_b32:
case Intrinsic::amdgcn_fdot2:
case Intrinsic::amdgcn_sdot2:
case Intrinsic::amdgcn_udot2:
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasSALUFloatInsts = false;
bool HasPseudoScalarTrans = false;
bool HasRestrictedSOffset = false;

bool HasPrngInst = false;
bool HasVcmpxPermlaneHazard = false;
bool HasVMEMtoScalarWriteHazard = false;
bool HasSMEMtoVectorWriteHazard = false;
Expand Down Expand Up @@ -1321,6 +1321,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// instruction.
unsigned maxHardClauseLength() const { return MaxHardClauseLength; }

bool hasPrngInst() const { return HasPrngInst; }

/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
/// SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -761,6 +761,9 @@ let SubtargetPredicate = isGFX11Plus in {
defm V_CVT_U32_U16 : VOP1Inst_t16<"v_cvt_u32_u16", VOP_I32_I16>;
} // End SubtargetPredicate = isGFX11Plus

let SubtargetPredicate = HasPrngInst in
defm V_PRNG_B32 : VOP1Inst <"v_prng_b32", VOP_I32_I32, int_amdgcn_prng_b32>;

foreach vt = Reg32Types.types in {
def : GCNPat<(int_amdgcn_permlane64 (vt VRegSrc_32:$src0)),
(vt (V_PERMLANE64_B32 (vt VRegSrc_32:$src0)))
Expand Down Expand Up @@ -1516,6 +1519,8 @@ defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>;
defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>;
defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>;

defm V_PRNG_B32 : VOP1_Real_gfx9 <0x58>;

class MovDPP8Pattern<Predicate Pred, Instruction Inst, ValueType vt> : GCNPat <
(vt (int_amdgcn_mov_dpp8 vt:$src, timm:$dpp8)),
(Inst VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp8), (i32 DPP8Mode.FI_0))> {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/TargetParser/TargetParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["gws"] = true;
break;
case GK_GFX950:
Features["prng-inst"] = true;
Features["gfx950-insts"] = true;
[[fallthrough]];
case GK_GFX942:
Expand Down
32 changes: 32 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s

declare i32 @llvm.amdgcn.prng.b32(i32) #0

; GCN-LABEL: {{^}}prng_b32:
; GCN: v_prng_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}
define amdgpu_kernel void @prng_b32(ptr addrspace(1) %out, i32 %src) #1 {
%prng = call i32 @llvm.amdgcn.prng.b32(i32 %src) #0
store i32 %prng, ptr addrspace(1) %out, align 4
ret void
}

; GCN-LABEL: {{^}}prng_b32_constant_4
; GCN: v_prng_b32_e32 {{v[0-9]+}}, 4
define amdgpu_kernel void @prng_b32_constant_4(ptr addrspace(1) %out) #1 {
%prng = call i32 @llvm.amdgcn.prng.b32(i32 4) #0
store i32 %prng, ptr addrspace(1) %out, align 4
ret void
}

; GCN-LABEL: {{^}}prng_b32_constant_100
; GCN: v_prng_b32_e32 {{v[0-9]+}}, 0x64
define amdgpu_kernel void @prng_b32_constant_100(ptr addrspace(1) %out) #1 {
%prng = call i32 @llvm.amdgcn.prng.b32(i32 100) #0
store i32 %prng, ptr addrspace(1) %out, align 4
ret void
}


attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
57 changes: 57 additions & 0 deletions llvm/test/MC/AMDGPU/gfx950_asm_vop1.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s

v_prng_b32 v5, v1
// GFX950: v_prng_b32_e32 v5, v1 ; encoding: [0x01,0xb1,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, v255
// GFX950: v_prng_b32_e32 v5, v255 ; encoding: [0xff,0xb1,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, s1
// GFX950: v_prng_b32_e32 v5, s1 ; encoding: [0x01,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, s101
// GFX950: v_prng_b32_e32 v5, s101 ; encoding: [0x65,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, vcc_lo
// GFX950: v_prng_b32_e32 v5, vcc_lo ; encoding: [0x6a,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, vcc_hi
// GFX950: v_prng_b32_e32 v5, vcc_hi ; encoding: [0x6b,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, ttmp15
// GFX950: v_prng_b32_e32 v5, ttmp15 ; encoding: [0x7b,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, m0
// GFX950: v_prng_b32_e32 v5, m0 ; encoding: [0x7c,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, exec_lo
// GFX950: v_prng_b32_e32 v5, exec_lo ; encoding: [0x7e,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, exec_hi
// GFX950: v_prng_b32_e32 v5, exec_hi ; encoding: [0x7f,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, -1
// GFX950: v_prng_b32_e32 v5, -1 ; encoding: [0xc1,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, 0.5
// GFX950: v_prng_b32_e32 v5, 0.5 ; encoding: [0xf0,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, src_scc
// GFX950: v_prng_b32_e32 v5, src_scc ; encoding: [0xfd,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v255, 0xaf123456
// GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
31 changes: 31 additions & 0 deletions llvm/test/MC/AMDGPU/gfx950_asm_vop1_dpp16.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefixes=GFX950 %s

v_prng_b32 v5, v1 quad_perm:[3,2,1,0]
// GFX950: v_prng_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1b,0x00,0xff]

v_prng_b32 v5, v1 quad_perm:[0,1,2,3]
// GFX950: v_prng_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0xe4,0x00,0xff]

v_prng_b32 v5, v1 row_mirror
// GFX950: v_prng_b32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x40,0x01,0xff]

v_prng_b32 v5, v1 row_half_mirror
// GFX950: v_prng_b32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x41,0x01,0xff]

v_prng_b32 v5, v1 row_shl:1
// GFX950: v_prng_b32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x01,0x01,0xff]

v_prng_b32 v5, v1 row_shl:15
// GFX950: v_prng_b32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x0f,0x01,0xff]

v_prng_b32 v5, v1 row_shr:1
// GFX950: v_prng_b32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x11,0x01,0xff]

v_prng_b32 v5, v1 row_shr:15
// GFX950: v_prng_b32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1f,0x01,0xff]

v_prng_b32 v5, v1 row_ror:1
// GFX950: v_prng_b32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x21,0x01,0xff]

v_prng_b32 v5, v1 row_ror:15
// GFX950: v_prng_b32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x2f,0x01,0xff]
43 changes: 43 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX950 %s

# GFX950: v_prng_b32_e32 v5, v1 ; encoding: [0x01,0xb1,0x0a,0x7e]
0x01,0xb1,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, v255 ; encoding: [0xff,0xb1,0x0a,0x7e]
0xff,0xb1,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, s1 ; encoding: [0x01,0xb0,0x0a,0x7e]
0x01,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, s101 ; encoding: [0x65,0xb0,0x0a,0x7e]
0x65,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, vcc_lo ; encoding: [0x6a,0xb0,0x0a,0x7e]
0x6a,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, vcc_hi ; encoding: [0x6b,0xb0,0x0a,0x7e]
0x6b,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, ttmp15 ; encoding: [0x7b,0xb0,0x0a,0x7e]
0x7b,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, m0 ; encoding: [0x7c,0xb0,0x0a,0x7e]
0x7c,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, exec_lo ; encoding: [0x7e,0xb0,0x0a,0x7e]
0x7e,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, exec_hi ; encoding: [0x7f,0xb0,0x0a,0x7e]
0x7f,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, -1 ; encoding: [0xc1,0xb0,0x0a,0x7e]
0xc1,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, 0.5 ; encoding: [0xf0,0xb0,0x0a,0x7e]
0xf0,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, src_scc ; encoding: [0xfd,0xb0,0x0a,0x7e]
0xfd,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf]
0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf
18 changes: 18 additions & 0 deletions llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6547,3 +6547,21 @@ define half @test_constant_fold_exp2_f16_neg_denorm() {
%val = call half @llvm.amdgcn.exp2.f16(half 0xH83ff)
ret half %val
}

; --------------------------------------------------------------------
; llvm.amdgcn.prng
; --------------------------------------------------------------------
declare i32 @llvm.amdgcn.prng.b32(i32)
define i32 @prng_undef_i32() {
; CHECK-LABEL: @prng_undef_i32(
; CHECK-NEXT: ret i32 undef
%prng = call i32 @llvm.amdgcn.prng.b32(i32 undef)
ret i32 %prng
}

define i32 @prng_poison_i32() {
; CHECK-LABEL: @prng_poison_i32(
; CHECK-NEXT: ret i32 poison
%prng = call i32 @llvm.amdgcn.prng.b32(i32 poison)
ret i32 %prng
}
Loading