Revert "[AMDGPU] Rework the clamp support for WMMA instructions"#174674
Merged
Revert "[AMDGPU] Rework the clamp support for WMMA instructions"#174674
Conversation
)" This reverts commit ccca3b8.
Member
|
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-backend-amdgpu Author: None (dyung) ChangesReverts llvm/llvm-project#174310 This change is causing 2 cross-project-test failures on https://lab.llvm.org/buildbot/#/builders/174/builds/29695 Patch is 87.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/174674.diff 18 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 7faf73b7628fe..24b79c3b69b67 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -857,7 +857,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8, "V8hV8iV8iIsV8hIbIb",
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb.", "nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
@@ -885,7 +885,7 @@ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8, "V8hV8iV16iV8hiIbI
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8, "V8hV8iV16iV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8, "V8hV8iV16iV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8, "V8hV8iV16iV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
-TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x128_iu8, "V8iIbV8iIbV16iV8iiIbIb.", "nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x128_iu8, "V8iIbV8iIbV16iV8iiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x64_f16, "V8fIbV16hIbV32hV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x64_f16, "V8hIbV16hIbV32hV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32")
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index a8a5bc348f00c..eabdc370da6b4 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -1665,20 +1665,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
if (AppendFalseForOpselArg)
Args.push_back(Builder.getFalse());
- // Handle the optional clamp argument of the following two builtins.
- if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
- if (Args.size() == 7)
- Args.push_back(Builder.getFalse());
- assert(Args.size() == 8 && "Expected 8 arguments");
- Args[7] = Builder.CreateZExtOrTrunc(Args[7], Builder.getInt1Ty());
- } else if (BuiltinID ==
- AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8) {
- if (Args.size() == 8)
- Args.push_back(Builder.getFalse());
- assert(Args.size() == 9 && "Expected 9 arguments");
- Args[8] = Builder.CreateZExtOrTrunc(Args[8], Builder.getInt1Ty());
- }
-
SmallVector<llvm::Type *, 6> ArgTypes;
if (NeedReturnType)
ArgTypes.push_back(ConvertType(E->getType()));
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index 9d154c65c932e..cece22092bb14 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -255,40 +255,6 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) ||
(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result));
}
- case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
- case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
- if (BuiltinID == AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8) {
- if (SemaRef.checkArgCountRange(TheCall, 7, 8))
- return true;
- if (TheCall->getNumArgs() == 7)
- return false;
- } else if (BuiltinID ==
- AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8) {
- if (SemaRef.checkArgCountRange(TheCall, 8, 9))
- return true;
- if (TheCall->getNumArgs() == 8)
- return false;
- }
- // Check if the last argument (clamp operand) is a constant and is
- // convertible to bool.
- Expr *ClampArg = TheCall->getArg(TheCall->getNumArgs() - 1);
- // 1) Ensure clamp argument is a constant expression
- llvm::APSInt ClampValue;
- if (!SemaRef.VerifyIntegerConstantExpression(ClampArg, &ClampValue)
- .isUsable())
- return true;
- // 2) Check if the argument can be converted to bool type
- if (!SemaRef.Context.hasSameType(ClampArg->getType(),
- SemaRef.Context.BoolTy)) {
- // Try to convert to bool
- QualType BoolTy = SemaRef.Context.BoolTy;
- ExprResult ClampExpr(ClampArg);
- SemaRef.CheckSingleAssignmentConstraints(BoolTy, ClampExpr);
- if (ClampExpr.isInvalid())
- return true;
- }
- return false;
- }
default:
return false;
}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
index a463ba7ab41c3..afad4bb15b528 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
@@ -148,7 +148,7 @@ void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h* out, v8i a, v8i b, v8h c)
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]], i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 false)
+// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]], i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true)
// CHECK-GFX1250-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA8]]
// CHECK-GFX1250-NEXT: ret void
//
@@ -157,17 +157,6 @@ void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c)
*out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true);
}
-// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8_clamp(
-// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]], i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true, i1 true)
-// CHECK-GFX1250-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA8]]
-// CHECK-GFX1250-NEXT: ret void
-//
-void test_amdgcn_wmma_i32_16x16x64_iu8_clamp(global v8i* out, v8i a, v8i b, v8i c)
-{
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true, 1);
-}
-
// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_f8f6f4(
// CHECK-GFX1250-NEXT: entry:
// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = shufflevector <16 x i32> [[B:%.*]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
@@ -470,7 +459,7 @@ void test_amdgcn_swmmac_f16_16x16x128_bf8_bf8(global v8h* out, v8i a, v16i b, v8
// CHECK-GFX1250-LABEL: @test_amdgcn_swmmac_i32_16x16x128_iu8(
// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x128.iu8.v8i32.v8i32.v16i32.i32(i1 false, <8 x i32> [[A:%.*]], i1 false, <16 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true, i1 false)
+// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x128.iu8.v8i32.v8i32.v16i32.i32(i1 false, <8 x i32> [[A:%.*]], i1 false, <16 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true)
// CHECK-GFX1250-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA8]]
// CHECK-GFX1250-NEXT: ret void
//
@@ -479,17 +468,6 @@ void test_amdgcn_swmmac_i32_16x16x128_iu8(global v8i* out, v8i a, v16i b, v8i c,
*out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a, 0, b, c, index, false, true);
}
-// CHECK-GFX1250-LABEL: @test_amdgcn_swmmac_i32_16x16x128_iu8_clamp(
-// CHECK-GFX1250-NEXT: entry:
-// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x128.iu8.v8i32.v8i32.v16i32.i32(i1 false, <8 x i32> [[A:%.*]], i1 false, <16 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true, i1 true)
-// CHECK-GFX1250-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA8]]
-// CHECK-GFX1250-NEXT: ret void
-//
-void test_amdgcn_swmmac_i32_16x16x128_iu8_clamp(global v8i* out, v8i a, v16i b, v8i c, int index)
-{
- *out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a, 0, b, c, index, false, true, 1);
-}
-
// CHECK-GFX1250-LABEL: @test_amdgcn_swmmac_f32_16x16x64_f16(
// CHECK-GFX1250-NEXT: entry:
// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x64.f16.v8f32.v16f16.v32f16.i32(i1 false, <16 x half> [[A:%.*]], i1 false, <32 x half> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true)
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
index e3e0cc3f596c7..49ef2e571740c 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
@@ -112,9 +112,6 @@ void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c, int
*out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant integer}}
*out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant integer}}
*out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant integer}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true, 32.0f); // expected-error {{integer constant expression must have integer type, not 'double'}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true, mod); // expected-error {{expression is not an integer constant expression}}
- *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true, true, 32.0f); // expected-error {{too many arguments to function call, expected at most 8, have 9}}
}
void test_amdgcn_wmma_f32_16x16x128_f8f6f4(global v8f* out, v16i a, v16i b, v8f c, int mod)
@@ -289,9 +286,6 @@ void test_amdgcn_swmmac_i32_16x16x128_iu8(global v8i* out, v8i a, v16i b, v8i c,
*out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a, mod, b, c, index, false, false); // expected-error {{'__builtin_amdgcn_swmmac_i32_16x16x128_iu8' must be a constant integer}}
*out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a, 0, b, c, index, mod, false); // expected-error {{'__builtin_amdgcn_swmmac_i32_16x16x128_iu8' must be a constant integer}}
*out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a, 0, b, c, index, false, mod); // expected-error {{'__builtin_amdgcn_swmmac_i32_16x16x128_iu8' must be a constant integer}}
- *out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a, 0, b, c, index, false, true, 32.0f); // expected-error {{integer constant expression must have integer type, not 'double'}}
- *out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a, 0, b, c, index, false, true, mod); // expected-error {{expression is not an integer constant expression}}
- *out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a, 0, b, c, index, false, true, true, 32.0f); // expected-error {{too many arguments to function call, expected at most 9, have 10}}
}
void test_amdgcn_swmmac_f32_16x16x64_f16(global v8f* out, v16h a, v32h b, v8f c, int index, int mod)
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index a8eba9ed126b7..f2650f678deaf 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -3912,7 +3912,7 @@ def int_amdgcn_global_store_async_from_lds_b128 :
ClangBuiltin<"__builtin_amdgcn_global_store_async_from_lds_b128">, AMDGPUAsyncGlobalStoreFromLDS;
// WMMA intrinsics.
-class AMDGPUWmmaIntrinsicModsABClamp<LLVMType AB, LLVMType CD> :
+class AMDGPUWmmaIntrinsicModsAB<LLVMType AB, LLVMType CD> :
Intrinsic<
[CD], // %D
[
@@ -3923,9 +3923,8 @@ class AMDGPUWmmaIntrinsicModsABClamp<LLVMType AB, LLVMType CD> :
LLVMMatchType<0>, // %C
llvm_i1_ty, // matrix_a_reuse
llvm_i1_ty, // matrix_b_reuse
- llvm_i1_ty, // %clamp
],
- [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>,
+ [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>,
IntrWillReturn, IntrNoCallback, IntrNoFree, IntrNoCreateUndefOrPoison]
>;
@@ -4090,7 +4089,7 @@ def int_amdgcn_wmma_f32_16x16x128_fp8_fp8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint
def int_amdgcn_wmma_f32_16x16x128_fp8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
def int_amdgcn_wmma_f32_16x16x128_bf8_fp8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
def int_amdgcn_wmma_f32_16x16x128_bf8_bf8 : AMDGPUWmmaIntrinsicModsC<llvm_anyint_ty, llvm_anyfloat_ty>;
-def int_amdgcn_wmma_i32_16x16x64_iu8 : AMDGPUWmmaIntrinsicModsABClamp<llvm_anyint_ty, llvm_anyint_ty>;
+def int_amdgcn_wmma_i32_16x16x64_iu8 : AMDGPUWmmaIntrinsicModsAB<llvm_anyint_ty, llvm_anyint_ty>;
def int_amdgcn_wmma_f32_16x16x128_f8f6f4 : AMDGPUWmmaIntrinsicModsC_MatrixFMT;
def int_amdgcn_wmma_scale_f32_16x16x128_f8f6f4 : AMDGPUWmmaScaleIntrinsicModsC<llvm_i32_ty>;
def int_amdgcn_wmma_scale16_f32_16x16x128_f8f6f4 : AMDGPUWmmaScaleIntrinsicModsC<llvm_i64_ty>;
@@ -4116,24 +4115,6 @@ class AMDGPUSWmmacIntrinsicABIdx<LLVMType A, LLVMType B, LLVMType CD, LLVMType I
ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>]
>;
-class AMDGPUSWmmacIntrinsicABIdxClamp<LLVMType A, LLVMType B, LLVMType CD, LLVMType Index> :
- Intrinsic<
- [CD], // %D
- [
- llvm_i1_ty, // %A_mod: 0 - none, 1 - neg
- A, // %A
- llvm_i1_ty, // %B_mod: 0 - none, 1 - neg
- B, // %B
- LLVMMatchType<0>, // %C
- Index, // %Sparsity index for A
- llvm_i1_ty, // matrix_a_reuse
- llvm_i1_ty, // matrix_b_reuse
- llvm_i1_ty, // %clamp
- ],
- [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCreateUndefOrPoison,
- ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>, ImmArg<ArgIndex<8>>]
->;
-
defset list<Intrinsic> AMDGPUSWMMACIntrinsicsGFX1250 = {
def int_amdgcn_swmmac_f32_16x16x64_f16 : AMDGPUSWmmacIntrinsicABIdx<llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
def int_amdgcn_swmmac_f32_16x16x64_bf16 : AMDGPUSWmmacIntrinsicABIdx<llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
@@ -4148,7 +4129,7 @@ def int_amdgcn_swmmac_f16_16x16x128_fp8_fp8 : AMDGPUSWmmacIntrinsicIdxReuse<llvm
def int_amdgcn_swmmac_f16_16x16x128_fp8_bf8 : AMDGPUSWmmacIntrinsicIdxReuse<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
def int_amdgcn_swmmac_f16_16x16x128_bf8_fp8 : AMDGPUSWmmacIntrinsicIdxReuse<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
def int_amdgcn_swmmac_f16_16x16x128_bf8_bf8 : AMDGPUSWmmacIntrinsicIdxReuse<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
-def int_amdgcn_swmmac_i32_16x16x128_iu8 : AMDGPUSWmmacIntrinsicABIdxClamp<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_i32_16x16x128_iu8 : AMDGPUSWmmacIntrinsicABIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>;
}
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index d8b54c396df28..cbb7b6ee4f3f5 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -32,7 +32,6 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
-#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/IntrinsicsRISCV.h"
@@ -1285,18 +1284,6 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
break; // No other 'amdgcn.atomic.*'
}
- // Legacy wmma iu intrinsics without the optional clamp operand.
- if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8 &&
- F->arg_size() == 7) {
- NewFn = nullptr;
- return true;
- }
- if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8 &&
- F->arg_size() == 8) {
- NewFn = nullptr;
- return true;
- }
-
if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
Name.consume_front("flat.atomic.")) {
if (Name.starts_with("fadd") ||
@@ -4633,50 +4620,6 @@ static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
//
static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
Function *F, IRBuilder<> &Builder) {
- // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
- // for compatibility.
- auto UpgradeLegacyWMMAIUIntrinsicCall =
- [](Function *F, CallBase *CI, IRBuilder<> &Builder,
- ArrayRef<Type *> OverloadTys) -> Value * {
- // Prepare arguments, append clamp=0 for compatibility
- SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
- Args.push_back(Builder.getFalse());
-
- // Insert the declaration for the right overload types
- Function *NewDecl = Intrinsic::getOrInsertDeclaration(
- F->getParent(), F->getIntrinsicID(), OverloadTys);
-
- // Copy operand bundles if any
- SmallVector<OperandBundleDef, 1> Bundles;
- CI->getOperandBundlesAsDefs(Bundles);
-
- // Create the new call and copy calling properties
- auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
- NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
- NewCall->setCallingConv(CI->getCallingConv());
- NewCall->setAttributes(CI->getAttributes());
- NewCall->setDebugLoc(CI->getDebugLoc());
- NewCall->copyMetadata(*CI);
- return NewCall;
- };
-
- if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
- assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
- "intrinsic should have 7 arguments");
- Type *T1 = CI->getArgOperand(4)->getType();
- Type *T2 = CI->getArgOperand(1)->getType();
- return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
- }
- if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
- assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
- "intrinsic should have 8 arguments");
- Type *T1 = CI->getArgOperand(4)->getType();
- Type *T2 = CI->getArgOperand(1)->getType();
- Type *T3 = CI->getArgOperand(3)->getType();
- Type *T4 = CI->getArgOperand(5)->getType();
- return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
- }
-
AtomicRMWInst::BinOp RMWOp =
StringSwitch<AtomicRMWInst::BinOp>(Name)
.StartsWith("ds.fadd", AtomicRMWInst::FAdd)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 139e2d101a077.....
[truncated]
|
shiltian
added a commit
that referenced
this pull request
Jan 7, 2026
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Reverts #174310
This change is causing 2 cross-project-test failures on https://lab.llvm.org/buildbot/#/builders/174/builds/29695