Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions compiler/src/iree/compiler/Codegen/Common/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ iree_compiler_cc_library(
"//compiler/src/iree/compiler/Codegen/Common:FoldTensorExtractOpIncGen",
"//compiler/src/iree/compiler/Codegen/Dialect/CPU/IR:IREECPUDialect",
"//compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR:IREECodegenDialect",
"//compiler/src/iree/compiler/Codegen/Dialect/Codegen/Transforms:IREECodegenTransforms",
"//compiler/src/iree/compiler/Codegen/Dialect/Codegen/Utils",
"//compiler/src/iree/compiler/Codegen/Dialect/GPU/IR:IREEGPUDialect",
"//compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils:KnownTargets",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "iree/compiler/Codegen/Common/TensorDynamicDimAnalysis.h"
#include "iree/compiler/Codegen/Common/Transforms.h"
#include "iree/compiler/Codegen/Dialect/Codegen/Transforms/Transforms.h"
#include "iree/compiler/Codegen/Transforms/Transforms.h"
#include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h"
#include "iree/compiler/Dialect/LinalgExt/Transforms/Transforms.h"
Expand Down Expand Up @@ -318,6 +319,8 @@ void BlockDynamicDimensionsPass::runOnOperation() {
controlFusionFn);
IREE::LinalgExt::populateFoldReshapeOpsByExpansionPatterns(patterns,
controlFusionFn);
IREE::Codegen::populateFoldReshapeOpsByExpansionPatterns(patterns,
controlFusionFn);
// Add patterns to fold `tensor.empty` operations with its consumers.
tensor::populateFoldTensorEmptyPatterns(patterns);
// Add some additional patterns that can simplify the IR.
Expand Down Expand Up @@ -367,6 +370,8 @@ void BlockDynamicDimensionsPass::runOnOperation() {
controlFn);
IREE::LinalgExt::populateFoldReshapeOpsByExpansionPatterns(
bubbleExpandShapePatterns, controlFn);
IREE::Codegen::populateFoldReshapeOpsByExpansionPatterns(
bubbleExpandShapePatterns, controlFn);
// Add patterns to fold the "bubbled-up" `tensor.expand_shape` operation and
// "pushed-down" `tensor.collapse_shape` operation with their interface
// bindings or `tensor.empty` operations.
Expand Down
1 change: 1 addition & 0 deletions compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ iree_cc_library(
iree::compiler::Codegen::Common::FoldTensorExtractOpIncGen
iree::compiler::Codegen::Dialect::CPU::IR::IREECPUDialect
iree::compiler::Codegen::Dialect::Codegen::IR::IREECodegenDialect
iree::compiler::Codegen::Dialect::Codegen::Transforms::IREECodegenTransforms
iree::compiler::Codegen::Dialect::Codegen::Utils
iree::compiler::Codegen::Dialect::GPU::IR::IREEGPUDialect
iree::compiler::Codegen::Dialect::GPU::TargetUtils::KnownTargets
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "iree/compiler/Codegen/Common/Transforms.h"
#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h"
#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenOps.h"
#include "iree/compiler/Codegen/Dialect/Codegen/Transforms/Transforms.h"
#include "iree/compiler/Codegen/Utils/GPUUtils.h"
#include "iree/compiler/Codegen/Utils/Utils.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
Expand Down Expand Up @@ -412,6 +413,8 @@ void PropagateReshapesByExpansionPass::runOnOperation() {
};
linalg::populateFoldReshapeOpsByExpansionPatterns(bubbleExpandShapePatterns,
bubbleUpExpansionControlFn);
IREE::Codegen::populateFoldReshapeOpsByExpansionPatterns(
bubbleExpandShapePatterns, bubbleUpExpansionControlFn);
// Add patterns to do some additional cleanup (on top of canonicalizations
// that can be done later) of reshape ops.
tensor::populateFoldTensorEmptyPatterns(bubbleExpandShapePatterns);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -459,3 +459,205 @@ func.func @no_swap_rank_reducing_slice(%arg0: tensor<3x6xi8>) -> tensor<3xi16> {
// CHECK-SAME: %[[ARG0:[A-Za-z0-9]+]]: tensor<3x6xi8>
// CHECK-NEXT: %[[SLICE:.+]] = tensor.extract_slice %[[ARG0]]
// CHECK-NEXT: iree_tensor_ext.bitcast %[[SLICE]]

// -----

// Test propagating collapse_shape producer through inner_tiled op.
// Using proper 2D matmul indexing maps with MFMA_F32_16x16x16_F16 layout.
// Tensor shapes: LHS[outer_m, outer_k, 16, 16], RHS[outer_k, outer_n, 16, 16], ACC[outer_m, outer_n, 16, 16]
#contraction_accesses = [
affine_map<(m, n, k) -> (m, k)>,
affine_map<(m, n, k) -> (k, n)>,
affine_map<(m, n, k) -> (m, n)>
]
func.func @propagate_collapse_through_inner_tiled(
%src: tensor<2x3x4x16x16xf16>, %rhs: tensor<4x2x16x16xf16>, %out: tensor<6x2x16x16xf32>)
-> tensor<6x2x16x16xf32> {
// Collapse the first two outer dims of LHS: [2,3] -> [6]
%collapsed = tensor.collapse_shape %src [[0, 1], [2], [3], [4]]
: tensor<2x3x4x16x16xf16> into tensor<6x4x16x16xf16>
%result = iree_codegen.inner_tiled ins(%collapsed, %rhs) outs(%out) {
indexing_maps = #contraction_accesses,
iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>],
kind = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
permutations = [array<i64: 0, 1>, array<i64: 1, 0>, array<i64: 0, 1>],
semantics = #iree_gpu.mma_semantics<distributed = false, opaque = true>
} : tensor<6x4x16x16xf16>, tensor<4x2x16x16xf16> into tensor<6x2x16x16xf32>
return %result : tensor<6x2x16x16xf32>
}

// CHECK-LABEL: func @propagate_collapse_through_inner_tiled
// CHECK-SAME: %[[SRC:[A-Za-z0-9]+]]: tensor<2x3x4x16x16xf16>
// CHECK-SAME: %[[RHS:[A-Za-z0-9]+]]: tensor<4x2x16x16xf16>
// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]: tensor<6x2x16x16xf32>
// CHECK: %[[EXPANDED_OUT:.+]] = tensor.expand_shape %[[OUT]] {{\[}}[0, 1], [2], [3], [4]{{\]}}
// CHECK-SAME: : tensor<6x2x16x16xf32> into tensor<2x3x2x16x16xf32>
// CHECK: %[[INNER_TILED:.+]] = iree_codegen.inner_tiled
// CHECK-SAME: ins(%[[SRC]], %[[RHS]])
// CHECK-SAME: outs(%[[EXPANDED_OUT]])
// CHECK-SAME: indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>,
// CHECK-SAME: affine_map<(d0, d1, d2, d3) -> (d2, d3)>,
// CHECK-SAME: affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>]
// CHECK-SAME: iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]
// CHECK-SAME: : tensor<2x3x4x16x16xf16>, tensor<4x2x16x16xf16> into tensor<2x3x2x16x16xf32>
// CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[INNER_TILED]] {{\[}}[0, 1], [2], [3], [4]{{\]}}
// CHECK-SAME: : tensor<2x3x2x16x16xf32> into tensor<6x2x16x16xf32>
// CHECK: return %[[COLLAPSED]]

// -----

// Test propagating expand_shape consumer through inner_tiled op.
#contraction_accesses2 = [
affine_map<(m, n, k) -> (m, k)>,
affine_map<(m, n, k) -> (k, n)>,
affine_map<(m, n, k) -> (m, n)>
]
func.func @propagate_expand_through_inner_tiled(
%lhs: tensor<6x4x16x16xf16>, %rhs: tensor<4x2x16x16xf16>, %out: tensor<6x2x16x16xf32>)
-> tensor<2x3x2x16x16xf32> {
%result = iree_codegen.inner_tiled ins(%lhs, %rhs) outs(%out) {
indexing_maps = #contraction_accesses2,
iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>],
kind = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
permutations = [array<i64: 0, 1>, array<i64: 1, 0>, array<i64: 0, 1>],
semantics = #iree_gpu.mma_semantics<distributed = false, opaque = true>
} : tensor<6x4x16x16xf16>, tensor<4x2x16x16xf16> into tensor<6x2x16x16xf32>
%expanded = tensor.expand_shape %result [[0, 1], [2], [3], [4]]
output_shape [2, 3, 2, 16, 16] : tensor<6x2x16x16xf32> into tensor<2x3x2x16x16xf32>
return %expanded : tensor<2x3x2x16x16xf32>
}

// CHECK-LABEL: func @propagate_expand_through_inner_tiled
// CHECK-SAME: %[[LHS:[A-Za-z0-9]+]]: tensor<6x4x16x16xf16>
// CHECK-SAME: %[[RHS:[A-Za-z0-9]+]]: tensor<4x2x16x16xf16>
// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]: tensor<6x2x16x16xf32>
// CHECK-DAG: %[[EXPANDED_OUT:.+]] = tensor.expand_shape %[[OUT]] {{\[}}[0, 1], [2], [3], [4]{{\]}}
// CHECK-SAME: : tensor<6x2x16x16xf32> into tensor<2x3x2x16x16xf32>
// CHECK-DAG: %[[EXPANDED_LHS:.+]] = tensor.expand_shape %[[LHS]] {{\[}}[0, 1], [2], [3], [4]{{\]}}
// CHECK-SAME: : tensor<6x4x16x16xf16> into tensor<2x3x4x16x16xf16>
// CHECK: %[[INNER_TILED:.+]] = iree_codegen.inner_tiled
// CHECK-SAME: ins(%[[EXPANDED_LHS]], %[[RHS]])
// CHECK-SAME: outs(%[[EXPANDED_OUT]])
// CHECK-SAME: indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>,
// CHECK-SAME: affine_map<(d0, d1, d2, d3) -> (d3, d2)>,
// CHECK-SAME: affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>]
// CHECK-SAME: iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]
// CHECK-SAME: : tensor<2x3x4x16x16xf16>, tensor<4x2x16x16xf16> into tensor<2x3x2x16x16xf32>
// CHECK: return %[[INNER_TILED]]

// -----

// Test that reshape touching inner dimensions is NOT propagated.
#contraction_accesses3 = [
affine_map<(m, n, k) -> (m, k)>,
affine_map<(m, n, k) -> (k, n)>,
affine_map<(m, n, k) -> (m, n)>
]
func.func @no_propagate_inner_dim_reshape(
%src: tensor<6x4x16x2x8xf16>, %rhs: tensor<4x2x16x16xf16>, %out: tensor<6x2x16x16xf32>)
-> tensor<6x2x16x16xf32> {
// Collapsing inner dims [3,4] which are part of inner tile - should NOT propagate.
%collapsed = tensor.collapse_shape %src [[0], [1], [2], [3, 4]]
: tensor<6x4x16x2x8xf16> into tensor<6x4x16x16xf16>
%result = iree_codegen.inner_tiled ins(%collapsed, %rhs) outs(%out) {
indexing_maps = #contraction_accesses3,
iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>],
kind = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
permutations = [array<i64: 0, 1>, array<i64: 1, 0>, array<i64: 0, 1>],
semantics = #iree_gpu.mma_semantics<distributed = false, opaque = true>
} : tensor<6x4x16x16xf16>, tensor<4x2x16x16xf16> into tensor<6x2x16x16xf32>
return %result : tensor<6x2x16x16xf32>
}

// CHECK-LABEL: func @no_propagate_inner_dim_reshape
// CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape
// CHECK: iree_codegen.inner_tiled ins(%[[COLLAPSED]],

// -----

// Test propagating collapse_shape producer through inner_tiled op with dynamic outer shapes.
#contraction_accesses_dyn1 = [
affine_map<(m, n, k) -> (m, k)>,
affine_map<(m, n, k) -> (k, n)>,
affine_map<(m, n, k) -> (m, n)>
]
func.func @propagate_collapse_through_inner_tiled_dynamic(
%src: tensor<?x3x4x16x16xf16>, %rhs: tensor<4x2x16x16xf16>, %out: tensor<?x2x16x16xf32>)
-> tensor<?x2x16x16xf32> {
// Collapse the first two outer dims of LHS: [?, 3] -> [?*3]
%collapsed = tensor.collapse_shape %src [[0, 1], [2], [3], [4]]
: tensor<?x3x4x16x16xf16> into tensor<?x4x16x16xf16>
%result = iree_codegen.inner_tiled ins(%collapsed, %rhs) outs(%out) {
indexing_maps = #contraction_accesses_dyn1,
iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>],
kind = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
permutations = [array<i64: 0, 1>, array<i64: 1, 0>, array<i64: 0, 1>],
semantics = #iree_gpu.mma_semantics<distributed = false, opaque = true>
} : tensor<?x4x16x16xf16>, tensor<4x2x16x16xf16> into tensor<?x2x16x16xf32>
return %result : tensor<?x2x16x16xf32>
}

// CHECK-LABEL: func @propagate_collapse_through_inner_tiled_dynamic
// CHECK-SAME: %[[SRC:[A-Za-z0-9]+]]: tensor<?x3x4x16x16xf16>
// CHECK-SAME: %[[RHS:[A-Za-z0-9]+]]: tensor<4x2x16x16xf16>
// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]: tensor<?x2x16x16xf32>
// CHECK-DAG: %[[DIM:.+]] = tensor.dim %[[SRC]], %c0
// CHECK: %[[EXPANDED_OUT:.+]] = tensor.expand_shape %[[OUT]] {{\[}}[0, 1], [2], [3], [4]{{\]}}
// CHECK-SAME: output_shape [%[[DIM]], 3, 2, 16, 16]
// CHECK-SAME: : tensor<?x2x16x16xf32> into tensor<?x3x2x16x16xf32>
// CHECK: %[[INNER_TILED:.+]] = iree_codegen.inner_tiled
// CHECK-SAME: ins(%[[SRC]], %[[RHS]])
// CHECK-SAME: outs(%[[EXPANDED_OUT]])
// CHECK-SAME: indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>,
// CHECK-SAME: affine_map<(d0, d1, d2, d3) -> (d2, d3)>,
// CHECK-SAME: affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>]
// CHECK-SAME: iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]
// CHECK-SAME: : tensor<?x3x4x16x16xf16>, tensor<4x2x16x16xf16> into tensor<?x3x2x16x16xf32>
// CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[INNER_TILED]] {{\[}}[0, 1], [2], [3], [4]{{\]}}
// CHECK-SAME: : tensor<?x3x2x16x16xf32> into tensor<?x2x16x16xf32>
// CHECK: return %[[COLLAPSED]]

// -----

// Test propagating expand_shape consumer through inner_tiled op with dynamic outer shapes.
#contraction_accesses_dyn2 = [
affine_map<(m, n, k) -> (m, k)>,
affine_map<(m, n, k) -> (k, n)>,
affine_map<(m, n, k) -> (m, n)>
]
func.func @propagate_expand_through_inner_tiled_dynamic(
%lhs: tensor<?x4x16x16xf16>, %rhs: tensor<4x2x16x16xf16>, %out: tensor<?x2x16x16xf32>,
%dyn_dim: index)
-> tensor<?x3x2x16x16xf32> {
%result = iree_codegen.inner_tiled ins(%lhs, %rhs) outs(%out) {
indexing_maps = #contraction_accesses_dyn2,
iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>],
kind = #iree_gpu.mma_layout<MFMA_F32_16x16x16_F16>,
permutations = [array<i64: 0, 1>, array<i64: 1, 0>, array<i64: 0, 1>],
semantics = #iree_gpu.mma_semantics<distributed = false, opaque = true>
} : tensor<?x4x16x16xf16>, tensor<4x2x16x16xf16> into tensor<?x2x16x16xf32>
%expanded = tensor.expand_shape %result [[0, 1], [2], [3], [4]]
output_shape [%dyn_dim, 3, 2, 16, 16] : tensor<?x2x16x16xf32> into tensor<?x3x2x16x16xf32>
return %expanded : tensor<?x3x2x16x16xf32>
}

// CHECK-LABEL: func @propagate_expand_through_inner_tiled_dynamic
// CHECK-SAME: %[[LHS:[A-Za-z0-9]+]]: tensor<?x4x16x16xf16>
// CHECK-SAME: %[[RHS:[A-Za-z0-9]+]]: tensor<4x2x16x16xf16>
// CHECK-SAME: %[[OUT:[A-Za-z0-9]+]]: tensor<?x2x16x16xf32>
// CHECK-SAME: %[[DYN_DIM:[A-Za-z0-9]+]]: index
// CHECK-DAG: %[[EXPANDED_OUT:.+]] = tensor.expand_shape %[[OUT]] {{\[}}[0, 1], [2], [3], [4]{{\]}}
// CHECK-SAME: output_shape [%[[DYN_DIM]], 3, 2, 16, 16]
// CHECK-SAME: : tensor<?x2x16x16xf32> into tensor<?x3x2x16x16xf32>
// CHECK-DAG: %[[EXPANDED_LHS:.+]] = tensor.expand_shape %[[LHS]] {{\[}}[0, 1], [2], [3], [4]{{\]}}
// CHECK-SAME: output_shape [%[[DYN_DIM]], 3, 4, 16, 16]
// CHECK-SAME: : tensor<?x4x16x16xf16> into tensor<?x3x4x16x16xf16>
// CHECK: %[[INNER_TILED:.+]] = iree_codegen.inner_tiled
// CHECK-SAME: ins(%[[EXPANDED_LHS]], %[[RHS]])
// CHECK-SAME: outs(%[[EXPANDED_OUT]])
// CHECK-SAME: indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>,
// CHECK-SAME: affine_map<(d0, d1, d2, d3) -> (d3, d2)>,
// CHECK-SAME: affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>]
// CHECK-SAME: iterator_types = [#linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<parallel>, #linalg.iterator_type<reduction>]
// CHECK-SAME: : tensor<?x3x4x16x16xf16>, tensor<4x2x16x16xf16> into tensor<?x3x2x16x16xf32>
// CHECK: return %[[INNER_TILED]]
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright 2026 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

load("//build_tools/bazel:build_defs.oss.bzl", "iree_compiler_cc_library")

package(
default_visibility = ["//visibility:public"],
features = ["layering_check"],
licenses = ["notice"], # Apache 2.0
)

iree_compiler_cc_library(
name = "IREECodegenTransforms",
srcs = [
"ReshapeFusion.cpp",
],
hdrs = [
"Transforms.h",
],
deps = [
"//compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR:IREECodegenDialect",
"@llvm-project//llvm:Support",
"@llvm-project//mlir:DialectUtils",
"@llvm-project//mlir:IR",
"@llvm-project//mlir:LinalgDialect",
"@llvm-project//mlir:LinalgTransforms",
"@llvm-project//mlir:Support",
"@llvm-project//mlir:TensorDialect",
"@llvm-project//mlir:TransformUtils",
"@llvm-project//mlir:Transforms",
],
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
################################################################################
# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from #
# compiler/src/iree/compiler/Codegen/Dialect/Codegen/Transforms/BUILD.bazel #
# #
# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary #
# CMake-only content. #
# #
# To disable autogeneration for this file entirely, delete this header. #
################################################################################

iree_add_all_subdirs()

iree_cc_library(
NAME
IREECodegenTransforms
HDRS
"Transforms.h"
SRCS
"ReshapeFusion.cpp"
DEPS
LLVMSupport
MLIRIR
MLIRLinalgDialect
MLIRLinalgTransforms
MLIRSupport
MLIRTensorDialect
MLIRTransformUtils
MLIRTransforms
iree::compiler::Codegen::Dialect::Codegen::IR::IREECodegenDialect
PUBLIC
)

### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
Loading
Loading