diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUApplyTilingLevel.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUApplyTilingLevel.cpp index 8ab4d99bbf73..b5c51c5cad45 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUApplyTilingLevel.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUApplyTilingLevel.cpp @@ -11,6 +11,7 @@ #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenInterfaces.h" #include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.h" #include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUEnums.h" +#include "iree/compiler/Codegen/Dialect/GPU/Transforms/Transforms.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLForwardCompat.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" @@ -61,7 +62,6 @@ getTiledOps(Operation *funcOp, IREE::GPU::TilingLevel tilingLevel) { void GPUApplyTilingLevelPass::runOnOperation() { FunctionOpInterface funcOp = getOperation(); - if (!llvm::is_contained({IREE::GPU::TilingLevel::Reduction, IREE::GPU::TilingLevel::Thread, IREE::GPU::TilingLevel::Subgroup, @@ -107,6 +107,7 @@ void GPUApplyTilingLevelPass::runOnOperation() { // Apply cleanup patterns. { RewritePatternSet patterns(context); + IREE::GPU::populateFoldSwizzleHintOpPatterns(patterns); // Merge consecutive insert/extract slice ops to simplify later loop // hoisting patterns. tensor::populateFoldTensorEmptyPatterns(patterns); diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel index 2673c42bbdf5..6a2e4654d024 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel @@ -37,6 +37,7 @@ iree_lit_test_suite( "gpu_distribute_scf_for.mlir", "gpu_distribute_shared_memory.mlir", "gpu_expand_dimensions.mlir", + "gpu_fold_swizzle_hint_ops.mlir", "gpu_fuse_and_hoist_forall.mlir", "gpu_generalize_named_ops.mlir", "gpu_greedily_distribute_to_threads.mlir", diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt index 4ce6c005b783..dde2c3d34120 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt @@ -33,6 +33,7 @@ iree_lit_test_suite( "gpu_distribute_scf_for.mlir" "gpu_distribute_shared_memory.mlir" "gpu_expand_dimensions.mlir" + "gpu_fold_swizzle_hint_ops.mlir" "gpu_fuse_and_hoist_forall.mlir" "gpu_generalize_named_ops.mlir" "gpu_greedily_distribute_to_threads.mlir" diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_apply_tiling_level.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_apply_tiling_level.mlir index 348bc4db92be..d5e21133494c 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_apply_tiling_level.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_apply_tiling_level.mlir @@ -735,3 +735,42 @@ module { // SERIAL: linalg.generic // SERIAL: scf.forall.in_parallel // SERIAL-NOT: mapping + +// ----- + +func.func @matmul_transpose_b_with_swizzle(%5: tensor<64x64xf32>, %6: tensor<64x1280xf16>, %7: tensor<64x1280xf16>) -> tensor<64x64xf32> { + %c4 = arith.constant 4 : index + %c1280 = arith.constant 1280 : index + %cst = arith.constant 0.000000e+00 : f32 + %c0 = arith.constant 0 : index + %8 = linalg.fill ins(%cst : f32) outs(%5 : tensor<64x64xf32>) -> tensor<64x64xf32> + %9 = tensor.empty() : tensor<64x1280xf16> + %swizzle_9 = iree_codegen.swizzle_hint %9[#iree_codegen.xor_shuffle<256, 32>] : tensor<64x1280xf16> + %10 = tensor.empty() : tensor<64x1280xf16> + %swizzle_10 = iree_codegen.swizzle_hint %10[#iree_codegen.xor_shuffle<256, 32>] : tensor<64x1280xf16> + %11 = scf.for %arg0 = %c0 to %c1280 step %c4 iter_args(%arg1 = %8) -> (tensor<64x64xf32>) { + %extracted_slice = tensor.extract_slice %6[0, %arg0] [64, 4] [1, 1] : tensor<64x1280xf16> to tensor<64x4xf16> + %extracted_slice_0 = tensor.extract_slice %swizzle_9[0, %arg0] [64, 4] [1, 1] : tensor<64x1280xf16> to tensor<64x4xf16> + %12 = linalg.copy {lowering_config = #iree_gpu.lowering_config<{thread = [1, 1]}>} ins(%extracted_slice : tensor<64x4xf16>) outs(%extracted_slice_0 : tensor<64x4xf16>) -> tensor<64x4xf16> + %extracted_slice_1 = tensor.extract_slice %7[0, %arg0] [64, 4] [1, 1] : tensor<64x1280xf16> to tensor<64x4xf16> + %extracted_slice_2 = tensor.extract_slice %swizzle_10[0, %arg0] [64, 4] [1, 1] : tensor<64x1280xf16> to tensor<64x4xf16> + %13 = linalg.copy {lowering_config = #iree_gpu.lowering_config<{thread = [1, 1]}>} ins(%extracted_slice_1 : tensor<64x4xf16>) outs(%extracted_slice_2 : tensor<64x4xf16>) -> tensor<64x4xf16> + %14 = linalg.matmul + indexing_maps = [ + affine_map<(d0, d1, d2) -> (d0, d2)>, + affine_map<(d0, d1, d2) -> (d1, d2)>, + affine_map<(d0, d1, d2) -> (d0, d1)> + ] + {lowering_config = #iree_gpu.lowering_config<{thread = [4, 4]}>} + ins(%12, %13 : tensor<64x4xf16>, tensor<64x4xf16>) + outs(%arg1 : tensor<64x64xf32>) -> tensor<64x64xf32> + scf.yield %14 : tensor<64x64xf32> + } + return %11 : tensor<64x64xf32> +} + +// CHECK-LABEL: func.func @matmul_transpose_b_with_swizzle + +// THREAD-LABEL: func.func @matmul_transpose_b_with_swizzle +// THREAD: %2 = tensor.empty() : tensor<64x4xf16> +// THREAD: %3 = iree_codegen.swizzle_hint %2[#iree_codegen.xor_shuffle<256, 32>] : tensor<64x4xf16> diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_fold_swizzle_hint_ops.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_fold_swizzle_hint_ops.mlir new file mode 100644 index 000000000000..4fdd41f9e6cf --- /dev/null +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_fold_swizzle_hint_ops.mlir @@ -0,0 +1,120 @@ +// RUN: iree-opt --mlir-print-local-scope --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-apply-tiling-level, canonicalize, cse))" %s | FileCheck %s + +// Test: tensor.extract_slice of swizzle_hint(tensor.empty) should fold +// to swizzle_hint(tensor.empty) with the sliced shape. +func.func @fold_extract_slice_of_swizzle_hint() -> tensor<16x32xf32> { + %empty = tensor.empty() : tensor<64x64xf32> + %swizzle = iree_codegen.swizzle_hint %empty[#iree_codegen.rotate_rows<64, 4>] : tensor<64x64xf32> + %slice = tensor.extract_slice %swizzle[0, 0] [16, 32] [1, 1] : tensor<64x64xf32> to tensor<16x32xf32> + return %slice : tensor<16x32xf32> +} + +// CHECK-LABEL: func.func @fold_extract_slice_of_swizzle_hint +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<16x32xf32> +// CHECK: %[[SWIZZLE:.+]] = iree_codegen.swizzle_hint %[[EMPTY]][#iree_codegen.rotate_rows<64, 4>] : tensor<16x32xf32> +// CHECK: return %[[SWIZZLE]] + +// Test: tensor.extract_slice with dynamic sizes should fold correctly. +func.func @fold_extract_slice_dynamic(%size0: index, %size1: index) -> tensor { + %empty = tensor.empty() : tensor<64x64xf32> + %swizzle = iree_codegen.swizzle_hint %empty[#iree_codegen.xor_shuffle<128, 16>] : tensor<64x64xf32> + %slice = tensor.extract_slice %swizzle[0, 0] [%size0, %size1] [1, 1] : tensor<64x64xf32> to tensor + return %slice : tensor +} + +// CHECK-LABEL: func.func @fold_extract_slice_dynamic +// CHECK-SAME: %[[SIZE0:[A-Za-z0-9]+]]: index +// CHECK-SAME: %[[SIZE1:[A-Za-z0-9]+]]: index +// CHECK: %[[EMPTY:.+]] = tensor.empty(%[[SIZE0]], %[[SIZE1]]) : tensor +// CHECK: %[[SWIZZLE:.+]] = iree_codegen.swizzle_hint %[[EMPTY]][#iree_codegen.xor_shuffle<128, 16>] : tensor +// CHECK: return %[[SWIZZLE]] + +// Test: tensor.expand_shape of swizzle_hint(tensor.empty) should fold +// to swizzle_hint(tensor.empty) with the expanded shape. +func.func @fold_expand_shape_of_swizzle_hint() -> tensor<4x16x64xf32> { + %empty = tensor.empty() : tensor<64x64xf32> + %swizzle = iree_codegen.swizzle_hint %empty[#iree_codegen.rotate_rows<64, 4>] : tensor<64x64xf32> + %expanded = tensor.expand_shape %swizzle [[0, 1], [2]] output_shape [4, 16, 64] : tensor<64x64xf32> into tensor<4x16x64xf32> + return %expanded : tensor<4x16x64xf32> +} + +// CHECK-LABEL: func.func @fold_expand_shape_of_swizzle_hint +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<4x16x64xf32> +// CHECK: %[[SWIZZLE:.+]] = iree_codegen.swizzle_hint %[[EMPTY]][#iree_codegen.rotate_rows<64, 4>] : tensor<4x16x64xf32> +// CHECK: return %[[SWIZZLE]] + +// Test: tensor.collapse_shape of swizzle_hint(tensor.empty) should fold +// to swizzle_hint(tensor.empty) with the collapsed shape. +func.func @fold_collapse_shape_of_swizzle_hint() -> tensor<64x64xf32> { + %empty = tensor.empty() : tensor<4x16x4x16xf32> + %swizzle = iree_codegen.swizzle_hint %empty[#iree_codegen.rotate_rows<64, 4>] : tensor<4x16x4x16xf32> + %collapsed = tensor.collapse_shape %swizzle [[0, 1], [2, 3]] : tensor<4x16x4x16xf32> into tensor<64x64xf32> + return %collapsed : tensor<64x64xf32> +} + +// CHECK-LABEL: func.func @fold_collapse_shape_of_swizzle_hint +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<64x64xf32> +// CHECK: %[[SWIZZLE:.+]] = iree_codegen.swizzle_hint %[[EMPTY]][#iree_codegen.rotate_rows<64, 4>] : tensor<64x64xf32> +// CHECK: return %[[SWIZZLE]] + +// Negative test: extract_slice of swizzle_hint without tensor.empty source +// should NOT fold. +func.func @no_fold_extract_slice_non_empty(%arg0: tensor<64x64xf32>) -> tensor<16x32xf32> { + %swizzle = iree_codegen.swizzle_hint %arg0[#iree_codegen.rotate_rows<64, 4>] : tensor<64x64xf32> + %slice = tensor.extract_slice %swizzle[0, 0] [16, 32] [1, 1] : tensor<64x64xf32> to tensor<16x32xf32> + return %slice : tensor<16x32xf32> +} + +// CHECK-LABEL: func.func @no_fold_extract_slice_non_empty +// CHECK-SAME: %[[ARG0:[A-Za-z0-9]+]]: tensor<64x64xf32> +// CHECK: %[[SWIZZLE:.+]] = iree_codegen.swizzle_hint %[[ARG0]][#iree_codegen.rotate_rows<64, 4>] : tensor<64x64xf32> +// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[SWIZZLE]] +// CHECK: return %[[SLICE]] + +// Negative test: expand_shape of swizzle_hint without tensor.empty source +// should NOT fold. +func.func @no_fold_expand_shape_non_empty(%arg0: tensor<64x64xf32>) -> tensor<4x16x64xf32> { + %swizzle = iree_codegen.swizzle_hint %arg0[#iree_codegen.rotate_rows<64, 4>] : tensor<64x64xf32> + %expanded = tensor.expand_shape %swizzle [[0, 1], [2]] output_shape [4, 16, 64] : tensor<64x64xf32> into tensor<4x16x64xf32> + return %expanded : tensor<4x16x64xf32> +} + +// CHECK-LABEL: func.func @no_fold_expand_shape_non_empty +// CHECK-SAME: %[[ARG0:[A-Za-z0-9]+]]: tensor<64x64xf32> +// CHECK: %[[SWIZZLE:.+]] = iree_codegen.swizzle_hint %[[ARG0]][#iree_codegen.rotate_rows<64, 4>] : tensor<64x64xf32> +// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[SWIZZLE]] +// CHECK: return %[[EXPANDED]] + +// Test: XOR shuffle swizzle attribute is preserved through folding. +func.func @fold_xor_shuffle_swizzle() -> tensor<8x64xf32> { + %empty = tensor.empty() : tensor<16x128xf32> + %swizzle = iree_codegen.swizzle_hint %empty[#iree_codegen.xor_shuffle<128, 16>] : tensor<16x128xf32> + %slice = tensor.extract_slice %swizzle[0, 0] [8, 64] [1, 1] : tensor<16x128xf32> to tensor<8x64xf32> + return %slice : tensor<8x64xf32> +} + +// CHECK-LABEL: func.func @fold_xor_shuffle_swizzle +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x64xf32> +// CHECK: %[[SWIZZLE:.+]] = iree_codegen.swizzle_hint %[[EMPTY]][#iree_codegen.xor_shuffle<128, 16>] : tensor<8x64xf32> +// CHECK: return %[[SWIZZLE]] + +// Test: Rank-reducing extract_slice should work correctly. +func.func @fold_rank_reducing_extract_slice() -> tensor<32xf32> { + %empty = tensor.empty() : tensor<64x64xf32> + %swizzle = iree_codegen.swizzle_hint %empty[#iree_codegen.rotate_rows<64, 4>] : tensor<64x64xf32> + %slice = tensor.extract_slice %swizzle[0, 0] [1, 32] [1, 1] : tensor<64x64xf32> to tensor<32xf32> + return %slice : tensor<32xf32> +} + +// CHECK-LABEL: func.func @fold_rank_reducing_extract_slice +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32xf32> +// CHECK: %[[SWIZZLE:.+]] = iree_codegen.swizzle_hint %[[EMPTY]][#iree_codegen.rotate_rows<64, 4>] : tensor<32xf32> +// CHECK: return %[[SWIZZLE]] + +#encoding = #iree_encoding.encoding (m, k)>, affine_map<(m, n, k) -> (k, n)>, affine_map<(m, n, k) -> (m, n)>], iteration_sizes = [?, ?, ?]> +func.func @fold_swizzle_hint_of_encoding() -> tensor<16xbf16,#encoding> { + %empty = tensor.empty() : tensor<8x16xbf16, #encoding> + %swizzle = iree_codegen.swizzle_hint %empty[#iree_codegen.rotate_rows<8, 4>] : tensor<8x16xbf16, #encoding> + %slice = tensor.extract_slice %swizzle[0, 0] [1, 16] [1, 1] : tensor<8x16xbf16, #encoding> to tensor<16xbf16,#encoding> + return %slice : tensor<16xbf16,#encoding> +} diff --git a/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenOps.td b/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenOps.td index 8c2bdab9cff7..a59e341f9657 100644 --- a/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenOps.td +++ b/compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenOps.td @@ -173,9 +173,9 @@ def IREECodegen_SwizzleHintOp : Op:$operand, + let arguments = (ins AnyRankedTensorOrMemRef:$operand, IREECodegen_AnySwizzleAttr:$swizzle); - let results = (outs RankedTensorOrMemRefOf<[AnyType], [1]>:$result); + let results = (outs AnyRankedTensorOrMemRef:$result); let assemblyFormat = [{ $operand `[` $swizzle attr-dict `]` `:` type($result) diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/Transforms.cpp b/compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/Transforms.cpp index d67ced9ed05c..af0474a6d043 100644 --- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/Transforms.cpp +++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/Transforms.cpp @@ -101,24 +101,44 @@ static FailureOr createSharedAllocDestination(RewriterBase &rewriter, return failure(); } - auto empty = forallOp.getDpsInits()[0].getDefiningOp(); + // Skip swizzle hint ops. + Operation *destination = forallOp.getDpsInits()[0].getDefiningOp(); + if (auto swizzleOp = dyn_cast(destination)) { + destination = swizzleOp->getOperand(0).getDefiningOp(); + } + // Fail if the destination is not a `tensor.empty` op and cannot be trivially // converted to a `bufferization.alloc_tensor`. + auto empty = dyn_cast(destination); if (!empty) { return failure(); } // Create a `bufferization.alloc_tensor` op with memory space // `#gpu.address_space`. + Location loc = empty->getLoc(); OpBuilder::InsertionGuard g(rewriter); rewriter.setInsertionPoint(empty); Attribute sharedMemoryAddrSpace = gpu::AddressSpaceAttr::get( rewriter.getContext(), gpu::GPUDialect::getWorkgroupAddressSpace()); auto allocTensor = bufferization::AllocTensorOp::create( - rewriter, empty->getLoc(), cast(empty.getResult().getType()), + rewriter, loc, cast(empty.getResult().getType()), empty.getDynamicSizes(), /*copy=*/Value(), /*size_hint=*/Value(), /*memory_space=*/sharedMemoryAddrSpace); + + // If the original `tensor.empty` has a swizzle hint, apply it to the new + // allocation. Note that if there is a swizzle hint, it will be the only user + // of the `tensor.empty` op. + if (auto swizzleHintOp = + dyn_cast(*empty->getUsers().begin())) { + assert(swizzleHintOp->hasOneUse() && + "a tensor.empty op with a swizzle hint applied, should have the " + "swizzle hint as its only user"); + auto newSwizzle = IREE::Codegen::SwizzleHintOp::create( + rewriter, loc, allocTensor.getResult(), swizzleHintOp.getSwizzle()); + return newSwizzle.getResult(); + } return allocTensor.getResult(); } @@ -2065,4 +2085,107 @@ void populateIREEGPULowerValueBarrierPatterns(RewritePatternSet &patterns) { patterns.add(patterns.getContext()); } +//===----------------------------------------------------------------------===// +// SwizzleHintOp Fold Patterns +//===----------------------------------------------------------------------===// + +// The following patterns are adapted from the populateFoldTensorEmptyPatterns +// in upstream llvm-project. The main change is to add support for folding with +// swizzle_hint ops from IREE. Once swizzle_hint ops are more widely used and +// proven stable, we could consider upstreaming this extension. + +namespace { +struct FoldSwizzleHintOpWithExtractSliceOp final + : OpRewritePattern { + using Base::Base; + LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp, + PatternRewriter &rewriter) const override { + // Check for swizzle_hint op source. + auto swizzleHintOp = + sliceOp.getSource().getDefiningOp(); + if (!swizzleHintOp) { + return failure(); + } + + // Check for tensor.empty source. + auto emptyOp = swizzleHintOp.getOperand().getDefiningOp(); + if (!emptyOp) { + return failure(); + } + + // Check for single use. + if (!emptyOp->hasOneUse()) { + return failure(); + } + + // Create new tensor.empty op. tensor.extract_slice may be rank-reducing; + // its dynamic sizes must be preserved as well as its result type. + Location loc = sliceOp.getLoc(); + auto sliceType = cast(sliceOp.getType()); + auto tensorType = + RankedTensorType::get(sliceType.getShape(), sliceType.getElementType(), + sliceType.getEncoding()); + auto newEmptyOp = + tensor::EmptyOp::create(rewriter, loc, tensorType, sliceOp.getSizes()); + rewriter.replaceOpWithNewOp( + sliceOp, newEmptyOp, swizzleHintOp.getSwizzle()); + return success(); + } +}; + +template +struct FoldSwizzleHintOpWithReshapeOp final : OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(ReshapeOp reshapeOp, + PatternRewriter &rewriter) const override { + auto swizzleHintOp = + reshapeOp.getSrc() + .template getDefiningOp(); + if (!swizzleHintOp) { + return failure(); + } + auto emptyOp = + swizzleHintOp.getOperand().template getDefiningOp(); + if (!emptyOp) { + return failure(); + } + + // Check for single use. + if (!emptyOp->hasOneUse()) { + return failure(); + } + + // Reify result shape. + Location loc = reshapeOp.getLoc(); + ReifiedRankedShapedTypeDims resultShapes; + if (failed(reifyResultShapes(rewriter, reshapeOp, resultShapes)) || + !llvm::hasSingleElement(resultShapes)) { + return failure(); + } + + // Create new tensor.empty op. + Value emptyTensor = + tensor::EmptyOp::create(rewriter, loc, resultShapes[0], + reshapeOp.getResultType().getElementType(), + reshapeOp.getResultType().getEncoding()); + Value newSwizzleHintOp = IREE::Codegen::SwizzleHintOp::create( + rewriter, loc, emptyTensor, swizzleHintOp.getSwizzle()); + if (newSwizzleHintOp.getType() != reshapeOp.getResultType()) { + rewriter.replaceOpWithNewOp( + reshapeOp, reshapeOp.getResultType(), newSwizzleHintOp); + } else { + rewriter.replaceOp(reshapeOp, newSwizzleHintOp); + } + return success(); + } +}; + +} // namespace + +void populateFoldSwizzleHintOpPatterns(RewritePatternSet &patterns) { + patterns.add, + FoldSwizzleHintOpWithReshapeOp, + FoldSwizzleHintOpWithExtractSliceOp>(patterns.getContext()); +} + } // namespace mlir::iree_compiler::IREE::GPU diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/Transforms.h b/compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/Transforms.h index 70d9c3522b73..dcdd11f4232a 100644 --- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/Transforms.h +++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/Transforms/Transforms.h @@ -195,6 +195,9 @@ void populateIREEGPUVectorUnrollPatterns( void populateIREEGPUVectorUnrollPatterns(RewritePatternSet &patterns); void populateIREEGPUVectorizationPatterns(RewritePatternSet &patterns); +// Populate patterns to fold tensor.empty ops through swizzle hint ops. +void populateFoldSwizzleHintOpPatterns(RewritePatternSet &patterns); + } // namespace mlir::iree_compiler::IREE::GPU #endif // IREE_COMPILER_CODEGEN_DIALECT_GPU_TRANSFORMS_TRANSFORMS_H_