Revert "[mlir][amdgpu] Add tensor load store operations (#170918)"#172671
Merged
amd-eochoalo merged 1 commit intollvm:mainfrom Dec 17, 2025
Merged
Revert "[mlir][amdgpu] Add tensor load store operations (#170918)"#172671amd-eochoalo merged 1 commit intollvm:mainfrom
amd-eochoalo merged 1 commit intollvm:mainfrom
Conversation
This reverts commit ecbb444.
Member
|
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-backend-amdgpu Author: Erick Ochoa Lopez (amd-eochoalo) ChangesThis reverts commit ecbb444. Broke ROCM integration tests. Will reland in future commit. Full diff: https://github.com/llvm/llvm-project/pull/172671.diff 3 Files Affected:
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 4865dc13f324b..96f5f5c6f1a3f 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -1587,35 +1587,4 @@ def AMDGPU_MakeDmaDescriptorOp : AMDGPU_MakeDescriptorOp<"make_dma_descriptor">
}
-def AMDGPU_TensorLoadToLDSOp :
- AMDGPU_Op<"tensor_load_to_lds", [MemoryEffects<[MemWrite, MemRead]>]>,
- Arguments<(ins AMDGPU_TDMDescriptorType: $desc)> {
- let summary = "Load tensors from global memory to LDS.";
- let description = [{
- Load tensors of up to five dimensions from global memory to LDS.
-
- This operation was introduced in gfx1250.
- }];
-
- let assemblyFormat = [{
- $desc attr-dict `:` qualified(type($desc))
- }];
-}
-
-def AMDGPU_TensorStoreFromLDSOp :
- AMDGPU_Op<"tensor_store_from_lds", [MemoryEffects<[MemWrite, MemRead]>]>,
- Arguments<(ins AMDGPU_TDMDescriptorType: $desc)> {
-
- let summary = "Store tensors from LDS to global memory.";
- let description = [{
- Store tensors of up to five dimensions from LDS to global memory.
-
- This operation was introduced in gfx1250.
- }];
-
- let assemblyFormat = [{
- $desc attr-dict `:` qualified(type($desc))
- }];
-}
-
#endif // AMDGPU
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index 4f3192570640a..541bb02d79eae 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -3218,6 +3218,11 @@ struct AMDGPULowerDescriptor : public ConvertOpToLLVMPattern<DescriptorOp> {
Location loc = op.getLoc();
+ IntegerType i32 = rewriter.getI32Type();
+ [[maybe_unused]] Type v4i32 =
+ this->typeConverter->convertType(VectorType::get(4, i32));
+ assert(v4i32 && "expected type conversion to succeed");
+
SmallVector<Value> consts;
for (int64_t i = 0; i < 8; ++i)
consts.push_back(createI32Constant(rewriter, loc, i));
@@ -3232,32 +3237,6 @@ struct AMDGPULowerDescriptor : public ConvertOpToLLVMPattern<DescriptorOp> {
}
};
-template <typename SourceOp, typename TargetOp>
-struct AMDGPUTensorLoadStoreOpLowering
- : public ConvertOpToLLVMPattern<SourceOp> {
- using ConvertOpToLLVMPattern<SourceOp>::ConvertOpToLLVMPattern;
- using Adaptor = typename ConvertOpToLLVMPattern<SourceOp>::OneToNOpAdaptor;
- AMDGPUTensorLoadStoreOpLowering(const LLVMTypeConverter &converter,
- Chipset chipset)
- : ConvertOpToLLVMPattern<SourceOp>(converter), chipset(chipset) {}
- Chipset chipset;
-
- LogicalResult
- matchAndRewrite(SourceOp op, Adaptor adaptor,
- ConversionPatternRewriter &rewriter) const override {
- if (chipset < kGfx1250)
- return op->emitOpError("is only supported on gfx1250");
-
- ValueRange desc = adaptor.getDesc();
- rewriter.replaceOpWithNewOp<TargetOp>(op, desc[0], desc[1], desc[2],
- desc[3], /*cachePolicy=*/0,
- /*alias_scopes=*/nullptr,
- /*noalias_scopes=*/nullptr,
- /*tbaa=*/nullptr);
- return success();
- }
-};
-
struct ConvertAMDGPUToROCDLPass
: public impl::ConvertAMDGPUToROCDLPassBase<ConvertAMDGPUToROCDLPass> {
using Base::Base;
@@ -3327,24 +3306,6 @@ void mlir::populateAMDGPUTypeAndAttributeConversions(
Type i32 = IntegerType::get(type.getContext(), 32);
return typeConverter.convertType(VectorType::get(4, i32));
});
- typeConverter.addConversion(
- [&](TDMDescriptorType type,
- SmallVectorImpl<Type> &result) -> std::optional<LogicalResult> {
- Type i32 = IntegerType::get(type.getContext(), 32);
- Type v4i32 = typeConverter.convertType(VectorType::get(4, i32));
- Type v8i32 = typeConverter.convertType(VectorType::get(8, i32));
- llvm::append_values(result, v4i32, v8i32, v4i32, v4i32);
- return success();
- });
-
- auto addUnrealizedCast = [](OpBuilder &builder, TypeRange types,
- ValueRange inputs,
- Location loc) -> SmallVector<Value> {
- auto cast = UnrealizedConversionCastOp::create(builder, loc, types, inputs);
- return cast.getResults();
- };
-
- typeConverter.addTargetMaterialization(addUnrealizedCast);
}
void mlir::populateAMDGPUToROCDLConversionPatterns(LLVMTypeConverter &converter,
@@ -3375,11 +3336,7 @@ void mlir::populateAMDGPUToROCDLConversionPatterns(LLVMTypeConverter &converter,
AMDGPUMakeDmaBaseLowering<MakeDmaBaseOp>,
AMDGPUMakeDmaBaseLowering<MakeGatherDmaBaseOp>,
AMDGPULowerDescriptor<MakeDmaDescriptorOp>,
- AMDGPULowerDescriptor<MakeGatherDmaDescriptorOp>,
- AMDGPUTensorLoadStoreOpLowering<TensorLoadToLDSOp,
- ROCDL::TensorLoadToLDSOp>,
- AMDGPUTensorLoadStoreOpLowering<TensorStoreFromLDSOp,
- ROCDL::TensorStoreFromLDSOp>>(
- converter, chipset);
+ AMDGPULowerDescriptor<MakeGatherDmaDescriptorOp>>(converter,
+ chipset);
patterns.add<AMDGPUSwizzleBitModeLowering>(converter);
}
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir b/mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir
index e62db9ff571bf..4979e85785970 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir
@@ -773,24 +773,6 @@ func.func @make_dma_descriptor_workgroup_mask(%base: !amdgpu.tdm_base<i32>, %wg_
func.return %descriptor : !amdgpu.tdm_descriptor
}
-// CHECK-LABEL: func @tensor_load_to_lds
-// CHECK-SAME: (%[[DESC:.+]]: !amdgpu.tdm_descriptor)
-func.func @tensor_load_to_lds(%desc: !amdgpu.tdm_descriptor) {
- // CHECK: %[[DGROUPS:.+]]:4 = builtin.unrealized_conversion_cast %[[DESC]]
- // CHECK: rocdl.tensor.load.to.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 0 : vector<4xi32>, vector<8xi32>
- amdgpu.tensor_load_to_lds %desc : !amdgpu.tdm_descriptor
- func.return
-}
-
-// CHECK-LABEL: func @tensor_store_from_lds
-// CHECK-SAME: (%[[DESC:.+]]: !amdgpu.tdm_descriptor)
-func.func @tensor_store_from_lds(%desc: !amdgpu.tdm_descriptor) {
- // CHECK: %[[DGROUPS:.+]]:4 = builtin.unrealized_conversion_cast %[[DESC]]
- // CHECK: rocdl.tensor.store.from.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 0 : vector<4xi32>, vector<8xi32>
- amdgpu.tensor_store_from_lds %desc : !amdgpu.tdm_descriptor
- func.return
-}
-
// -----
// CHECK-LABEL: func @make_gather_dma_descriptor
|
Hardcode84
approved these changes
Dec 17, 2025
amd-eochoalo
added a commit
to amd-eochoalo/llvm-project
that referenced
this pull request
Dec 17, 2025
…" (llvm#172671) This reverts commit b9d6ad9.
11 tasks
mahesh-attarde
pushed a commit
to mahesh-attarde/llvm-project
that referenced
this pull request
Dec 19, 2025
llvm#172671) This reverts commit ecbb444. Broke ROCM integration tests. Will reland in future commit.
valadaptive
pushed a commit
to valadaptive/llvm-project
that referenced
this pull request
Dec 24, 2025
llvm#172671) This reverts commit ecbb444. Broke ROCM integration tests. Will reland in future commit.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
This reverts commit ecbb444. Broke ROCM integration tests. Will reland in future commit.