Skip to content

Revert "[mlir][amdgpu] Add tensor load store operations (#170918)"#172671

Merged
amd-eochoalo merged 1 commit intollvm:mainfrom
amd-eochoalo:eochoa/2025-12-16/revert
Dec 17, 2025
Merged

Revert "[mlir][amdgpu] Add tensor load store operations (#170918)"#172671
amd-eochoalo merged 1 commit intollvm:mainfrom
amd-eochoalo:eochoa/2025-12-16/revert

Conversation

@amd-eochoalo
Copy link
Contributor

This reverts commit ecbb444. Broke ROCM integration tests. Will reland in future commit.

@llvmbot
Copy link
Member

llvmbot commented Dec 17, 2025

@llvm/pr-subscribers-mlir
@llvm/pr-subscribers-mlir-amdgpu
@llvm/pr-subscribers-mlir-gpu

@llvm/pr-subscribers-backend-amdgpu

Author: Erick Ochoa Lopez (amd-eochoalo)

Changes

This reverts commit ecbb444. Broke ROCM integration tests. Will reland in future commit.


Full diff: https://github.com/llvm/llvm-project/pull/172671.diff

3 Files Affected:

  • (modified) mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td (-31)
  • (modified) mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp (+7-50)
  • (modified) mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir (-18)
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 4865dc13f324b..96f5f5c6f1a3f 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -1587,35 +1587,4 @@ def AMDGPU_MakeDmaDescriptorOp : AMDGPU_MakeDescriptorOp<"make_dma_descriptor">
 
 }
 
-def AMDGPU_TensorLoadToLDSOp :
-  AMDGPU_Op<"tensor_load_to_lds", [MemoryEffects<[MemWrite, MemRead]>]>,
-  Arguments<(ins AMDGPU_TDMDescriptorType: $desc)> {
-  let summary = "Load tensors from global memory to LDS.";
-  let description = [{
-    Load tensors of up to five dimensions from global memory to LDS.
-
-    This operation was introduced in gfx1250.
-  }];
-
-  let assemblyFormat = [{
-    $desc attr-dict `:` qualified(type($desc))
-  }];
-}
-
-def AMDGPU_TensorStoreFromLDSOp :
-  AMDGPU_Op<"tensor_store_from_lds", [MemoryEffects<[MemWrite, MemRead]>]>,
-  Arguments<(ins AMDGPU_TDMDescriptorType: $desc)> {
-
-  let summary = "Store tensors from LDS to global memory.";
-  let description = [{
-    Store tensors of up to five dimensions from LDS to global memory.
-
-    This operation was introduced in gfx1250.
-  }];
-
-  let assemblyFormat = [{
-    $desc attr-dict `:` qualified(type($desc))
-  }];
-}
-
 #endif // AMDGPU
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index 4f3192570640a..541bb02d79eae 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -3218,6 +3218,11 @@ struct AMDGPULowerDescriptor : public ConvertOpToLLVMPattern<DescriptorOp> {
 
     Location loc = op.getLoc();
 
+    IntegerType i32 = rewriter.getI32Type();
+    [[maybe_unused]] Type v4i32 =
+        this->typeConverter->convertType(VectorType::get(4, i32));
+    assert(v4i32 && "expected type conversion to succeed");
+
     SmallVector<Value> consts;
     for (int64_t i = 0; i < 8; ++i)
       consts.push_back(createI32Constant(rewriter, loc, i));
@@ -3232,32 +3237,6 @@ struct AMDGPULowerDescriptor : public ConvertOpToLLVMPattern<DescriptorOp> {
   }
 };
 
-template <typename SourceOp, typename TargetOp>
-struct AMDGPUTensorLoadStoreOpLowering
-    : public ConvertOpToLLVMPattern<SourceOp> {
-  using ConvertOpToLLVMPattern<SourceOp>::ConvertOpToLLVMPattern;
-  using Adaptor = typename ConvertOpToLLVMPattern<SourceOp>::OneToNOpAdaptor;
-  AMDGPUTensorLoadStoreOpLowering(const LLVMTypeConverter &converter,
-                                  Chipset chipset)
-      : ConvertOpToLLVMPattern<SourceOp>(converter), chipset(chipset) {}
-  Chipset chipset;
-
-  LogicalResult
-  matchAndRewrite(SourceOp op, Adaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
-    if (chipset < kGfx1250)
-      return op->emitOpError("is only supported on gfx1250");
-
-    ValueRange desc = adaptor.getDesc();
-    rewriter.replaceOpWithNewOp<TargetOp>(op, desc[0], desc[1], desc[2],
-                                          desc[3], /*cachePolicy=*/0,
-                                          /*alias_scopes=*/nullptr,
-                                          /*noalias_scopes=*/nullptr,
-                                          /*tbaa=*/nullptr);
-    return success();
-  }
-};
-
 struct ConvertAMDGPUToROCDLPass
     : public impl::ConvertAMDGPUToROCDLPassBase<ConvertAMDGPUToROCDLPass> {
   using Base::Base;
@@ -3327,24 +3306,6 @@ void mlir::populateAMDGPUTypeAndAttributeConversions(
     Type i32 = IntegerType::get(type.getContext(), 32);
     return typeConverter.convertType(VectorType::get(4, i32));
   });
-  typeConverter.addConversion(
-      [&](TDMDescriptorType type,
-          SmallVectorImpl<Type> &result) -> std::optional<LogicalResult> {
-        Type i32 = IntegerType::get(type.getContext(), 32);
-        Type v4i32 = typeConverter.convertType(VectorType::get(4, i32));
-        Type v8i32 = typeConverter.convertType(VectorType::get(8, i32));
-        llvm::append_values(result, v4i32, v8i32, v4i32, v4i32);
-        return success();
-      });
-
-  auto addUnrealizedCast = [](OpBuilder &builder, TypeRange types,
-                              ValueRange inputs,
-                              Location loc) -> SmallVector<Value> {
-    auto cast = UnrealizedConversionCastOp::create(builder, loc, types, inputs);
-    return cast.getResults();
-  };
-
-  typeConverter.addTargetMaterialization(addUnrealizedCast);
 }
 
 void mlir::populateAMDGPUToROCDLConversionPatterns(LLVMTypeConverter &converter,
@@ -3375,11 +3336,7 @@ void mlir::populateAMDGPUToROCDLConversionPatterns(LLVMTypeConverter &converter,
            AMDGPUMakeDmaBaseLowering<MakeDmaBaseOp>,
            AMDGPUMakeDmaBaseLowering<MakeGatherDmaBaseOp>,
            AMDGPULowerDescriptor<MakeDmaDescriptorOp>,
-           AMDGPULowerDescriptor<MakeGatherDmaDescriptorOp>,
-           AMDGPUTensorLoadStoreOpLowering<TensorLoadToLDSOp,
-                                           ROCDL::TensorLoadToLDSOp>,
-           AMDGPUTensorLoadStoreOpLowering<TensorStoreFromLDSOp,
-                                           ROCDL::TensorStoreFromLDSOp>>(
-          converter, chipset);
+           AMDGPULowerDescriptor<MakeGatherDmaDescriptorOp>>(converter,
+                                                             chipset);
   patterns.add<AMDGPUSwizzleBitModeLowering>(converter);
 }
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir b/mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir
index e62db9ff571bf..4979e85785970 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir
@@ -773,24 +773,6 @@ func.func @make_dma_descriptor_workgroup_mask(%base: !amdgpu.tdm_base<i32>, %wg_
   func.return %descriptor : !amdgpu.tdm_descriptor
 }
 
-// CHECK-LABEL: func @tensor_load_to_lds
-// CHECK-SAME: (%[[DESC:.+]]: !amdgpu.tdm_descriptor)
-func.func @tensor_load_to_lds(%desc: !amdgpu.tdm_descriptor) {
-  // CHECK: %[[DGROUPS:.+]]:4 = builtin.unrealized_conversion_cast %[[DESC]]
-  // CHECK: rocdl.tensor.load.to.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 0 : vector<4xi32>, vector<8xi32>
-  amdgpu.tensor_load_to_lds %desc : !amdgpu.tdm_descriptor
-  func.return
-}
-
-// CHECK-LABEL: func @tensor_store_from_lds
-// CHECK-SAME: (%[[DESC:.+]]: !amdgpu.tdm_descriptor)
-func.func @tensor_store_from_lds(%desc: !amdgpu.tdm_descriptor) {
-  // CHECK: %[[DGROUPS:.+]]:4 = builtin.unrealized_conversion_cast %[[DESC]]
-  // CHECK: rocdl.tensor.store.from.lds %[[DGROUPS]]#0, %[[DGROUPS]]#1, %[[DGROUPS]]#2, %[[DGROUPS]]#3 cachepolicy 0 : vector<4xi32>, vector<8xi32>
-  amdgpu.tensor_store_from_lds %desc : !amdgpu.tdm_descriptor
-  func.return
-}
-
 // -----
 
 // CHECK-LABEL: func @make_gather_dma_descriptor

@amd-eochoalo amd-eochoalo enabled auto-merge (squash) December 17, 2025 15:04
@amd-eochoalo amd-eochoalo merged commit b9d6ad9 into llvm:main Dec 17, 2025
15 of 16 checks passed
amd-eochoalo added a commit to amd-eochoalo/llvm-project that referenced this pull request Dec 17, 2025
mahesh-attarde pushed a commit to mahesh-attarde/llvm-project that referenced this pull request Dec 19, 2025
llvm#172671)

This reverts commit ecbb444. Broke ROCM
integration tests. Will reland in future commit.
valadaptive pushed a commit to valadaptive/llvm-project that referenced this pull request Dec 24, 2025
llvm#172671)

This reverts commit ecbb444. Broke ROCM
integration tests. Will reland in future commit.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants