From cb3e406c460fae5cb15b7267f71401630b93ced7 Mon Sep 17 00:00:00 2001 From: Marko Rakita Date: Fri, 20 Dec 2024 23:36:26 +0100 Subject: [PATCH] Add Reduce ops workaround for keepDim=false (#1625) This PR adds TTNN workarounds for these Metal issues: - https://github.com/tenstorrent/tt-metal/issues/13361 - By decomposing `reduce(keepDim=false)` into `reduce(keepDim=true) + reshape` - https://github.com/tenstorrent/tt-metal/issues/16118 - By annulling dimensions argument when all dims are being reduced As part of this work I've also: - Enabled conversion of `stablehlo.reduce` op with multiple reduce dimensions - Added reduce ops verifiers in TTIR - Added a separate function in TTNNWorkarounds to run rewrite patterns for decomposition and layout workarounds - Added lots of unit tests for reduce ops to cover conversions and verifiers - Added lots of silicon tests for reduce ops Opened issue https://github.com/tenstorrent/tt-mlir/issues/1624 on myself to revert these workarounds once Metal issues are fixed. Closes #805, #848 After implementing these workarounds and running tests, I've encountered [another Metal issue](https://github.com/tenstorrent/tt-metal/issues/16104), this time in `reshape` op. I've debugged it and I have a local fix, I will send a PR to fix it in Metal repo, confirmed with reshape op owners. I've opened myself an issue https://github.com/tenstorrent/tt-mlir/issues/1640 to enable Reduce ops silicon tests after this fix is uplifted. Another issue that I've encountered while working on this is that after the workaround pass decompositions, if we are changing the shapes of the ops tensors, that means that their layout needs to be changed too, but layout pass is done before the workaround pass. I've managed to solve it by reusing the layout of the input tensor, but I am not sure if that is a good solution and maybe we need to repeat some of the layout logic again after workaround decompositions. FYI @sdjordjevicTT Here is the example TTNN IR before the workarounds: ``` %3 = "ttnn.sum"(%2) <{dim_arg = [0: i32, 1 : i32, 2: i32], keep_dim = false}> : (tensor<128x32x4xf32, #ttnn_layout2>) -> tensor<1xf32, #ttnn_layout2> ``` and after the workarounds: ``` %3 = "ttnn.sum"(%2) <{keep_dim = true}> : (tensor<128x32x4xf32, #ttnn_layout2>) -> tensor<1x1x1xf32, #ttnn_layout2> %4 = "ttnn.reshape"(%3) <{shape = [1 : i32]}> : (tensor<1x1x1xf32, #ttnn_layout2>) -> tensor<1xf32, #ttnn_layout3> ``` --- include/ttmlir/Dialect/TTIR/IR/TTIROps.td | 2 + include/ttmlir/Dialect/TTNN/IR/TTNNOps.td | 2 + .../ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.td | 1 + .../Decomposition/ReduceOpsRewritePattern.h | 140 ++++++++++++++++++ .../StableHLOToTTIRPatterns.cpp | 7 +- lib/Dialect/TTIR/IR/TTIROps.cpp | 87 +++++++++-- lib/Dialect/TTNN/IR/TTNNOps.cpp | 48 ++++++ lib/Dialect/TTNN/IR/TTNNOpsAttrs.cpp | 18 +++ lib/Dialect/TTNN/Transforms/CMakeLists.txt | 3 +- .../Decomposition/ReduceOpsRewritePattern.cpp | 50 +++++++ .../{ => Workarounds}/TTNNWorkarounds.cpp | 76 ++++++---- .../StableHLOToTTIR/reduce_add_op.mlir | 109 +++++++++++++- .../StableHLOToTTIR/reduce_maximum_op.mlir | 109 +++++++++++++- .../reduce_ops/negative_invalid_dim_high.mlir | 9 ++ .../reduce_ops/negative_invalid_dim_low.mlir | 9 ++ .../reduce_ops/negative_repeating_dims.mlir | 9 ++ .../TTNN/reduction/max_op_negative.mlir | 10 ++ .../TTNN/reduction/mean_op_negative.mlir | 10 ++ .../TTNN/reduction/sum_op_negative.mlir | 10 ++ .../Silicon/StableHLO/reduce_add_op.mlir | 106 +++++++++++-- .../Silicon/StableHLO/reduce_maximum_op.mlir | 106 +++++++++++-- test/ttmlir/Silicon/TTNN/simple_max.mlir | 39 +++++ test/ttmlir/Silicon/TTNN/simple_mean.mlir | 37 ++++- test/ttmlir/Silicon/TTNN/simple_sum.mlir | 39 +++++ 24 files changed, 954 insertions(+), 82 deletions(-) create mode 100644 include/ttmlir/Dialect/TTNN/Transforms/Workarounds/Decomposition/ReduceOpsRewritePattern.h create mode 100644 lib/Dialect/TTNN/Transforms/Workarounds/Decomposition/ReduceOpsRewritePattern.cpp rename lib/Dialect/TTNN/Transforms/{ => Workarounds}/TTNNWorkarounds.cpp (88%) create mode 100644 test/ttmlir/Dialect/TTIR/reduce_ops/negative_invalid_dim_high.mlir create mode 100644 test/ttmlir/Dialect/TTIR/reduce_ops/negative_invalid_dim_low.mlir create mode 100644 test/ttmlir/Dialect/TTIR/reduce_ops/negative_repeating_dims.mlir create mode 100644 test/ttmlir/Dialect/TTNN/reduction/max_op_negative.mlir create mode 100644 test/ttmlir/Dialect/TTNN/reduction/mean_op_negative.mlir create mode 100644 test/ttmlir/Dialect/TTNN/reduction/sum_op_negative.mlir create mode 100644 test/ttmlir/Silicon/TTNN/simple_max.mlir create mode 100644 test/ttmlir/Silicon/TTNN/simple_sum.mlir diff --git a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td index 842d353626..b571287c82 100644 --- a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td +++ b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td @@ -651,6 +651,8 @@ class TTIR_ReductionOp traits = []> : return {builder.getAffineMapArrayAttr(indexingMaps), builder.getArrayAttr(iteratorTypes)};} }]; + + let hasVerifier = 1; } def TTIR_SumOp : TTIR_ReductionOp<"sum"> { diff --git a/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td b/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td index b915959474..0d1d235bb8 100644 --- a/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td +++ b/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td @@ -581,6 +581,8 @@ class TTNN_ReductionOp traits = []> : TTNN_Op:$dim_arg); let results = (outs AnyRankedTensor:$result); + + let hasVerifier = 1; } def TTNN_SumOp : TTNN_ReductionOp<"sum"> { diff --git a/include/ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.td b/include/ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.td index 94d05eadcb..8d20a2bcc5 100644 --- a/include/ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.td +++ b/include/ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.td @@ -145,6 +145,7 @@ def TTNN_TTNNLayoutAttr: TTNN_Attr<"TTNNLayout", "ttnn_layout"> { TTNNLayoutAttr withMemoryLayout(::mlir::MLIRContext *context, TensorMemoryLayoutAttr memLayoutAttr); TTNNLayoutAttr withMemoryLayout(::mlir::MLIRContext *context, TensorMemoryLayout memLayout); TTNNLayoutAttr withShardShape(::mlir::MLIRContext *context, llvm::SmallVector shardShape); + TTNNLayoutAttr withTensorShape(::mlir::MLIRContext *context, ArrayRef tensorShape); bool isSystemBufferType() const { return ::mlir::tt::ttnn::isSystemBufferType(getBufferType()); } bool isDeviceBufferType() const { return ::mlir::tt::ttnn::isDeviceBufferType(getBufferType()); } diff --git a/include/ttmlir/Dialect/TTNN/Transforms/Workarounds/Decomposition/ReduceOpsRewritePattern.h b/include/ttmlir/Dialect/TTNN/Transforms/Workarounds/Decomposition/ReduceOpsRewritePattern.h new file mode 100644 index 0000000000..741fbfc068 --- /dev/null +++ b/include/ttmlir/Dialect/TTNN/Transforms/Workarounds/Decomposition/ReduceOpsRewritePattern.h @@ -0,0 +1,140 @@ +// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef TTMLIR_DIALECT_TTNN_TRANSFORMS_WORKAROUNDS_DECOMPOSITION_REDUCEOPSREWRITEPATTERN_H +#define TTMLIR_DIALECT_TTNN_TRANSFORMS_WORKAROUNDS_DECOMPOSITION_REDUCEOPSREWRITEPATTERN_H + +#include "ttmlir/Dialect/TTNN/IR/TTNNOps.h" + +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Support/LogicalResult.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" + +namespace mlir::tt::ttnn::workarounds::decomposition { + +// Extracts reduce dimensions' values from the dimArg attribute. In case when +// dimArg is not specified, returns empty vector. +llvm::SmallVector +getReduceDims(const std::optional &dimArg); + +// Calculates the shape of the new Reduce op created in the workaround, based +// on the input shape and reducing dimensions. +llvm::SmallVector +calculateNewReduceShape(RankedTensorType inputType, + const std::optional &dimArg); + +// This workaround addresses the next Metal issue: +// https://github.com/tenstorrent/tt-metal/issues/13361 +// +// TODO(mrakita): Remove this workaround once these Metal issues are fixed +// (tracked by https://github.com/tenstorrent/tt-mlir/issues/1624). +// +template +class ReduceOpsKeepDimRewritePattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(ReduceOp srcOp, + PatternRewriter &rewriter) const override { + if (srcOp.getKeepDim()) { + return failure(); + } + + RankedTensorType inputType = srcOp.getInput().getType(); + RankedTensorType outputType = srcOp.getResult().getType(); + + ReduceOp newReduceOp = + createReduceOpWithKeepDim(srcOp, rewriter, inputType, outputType); + + // Metal TTNN implementation of Reduce ops doesn't yet support + // keepDim=false. As a workaround, we convert Reduce ops to combination of + // Reduce op with keepDim=true + Reshape op to remove the reduce dims so + // that the rest of the graph is not affected. In case when this is not + // needed (for example because type converters already promoted rank of the + // op result) then we avoid adding unnecessary Reshape op. + if (outputType.getShape().size() < inputType.getShape().size()) { + replaceOpWithReshapeOp(srcOp, newReduceOp, rewriter, outputType); + } else { + rewriter.replaceOp(srcOp, newReduceOp); + } + + return success(); + } + +private: + ReduceOp createReduceOpWithKeepDim(ReduceOp srcOp, PatternRewriter &rewriter, + RankedTensorType inputType, + RankedTensorType outputType) const { + llvm::SmallVector outputShapeVec = + calculateNewReduceShape(inputType, srcOp.getDimArg()); + + TTNNLayoutAttr newOutputLayoutAttr = + mlir::cast(outputType.getEncoding()) + .withTensorShape(rewriter.getContext(), outputShapeVec); + + RankedTensorType newOutputType = RankedTensorType::get( + outputShapeVec, outputType.getElementType(), newOutputLayoutAttr); + + return rewriter.create(srcOp.getLoc(), newOutputType, + srcOp.getInput(), true /*keep_dim*/, + srcOp.getDimArg().value_or(nullptr)); + } + + void replaceOpWithReshapeOp(ReduceOp srcOp, ReduceOp newReduceOp, + PatternRewriter &rewriter, + RankedTensorType outputType) const { + mlir::ArrayAttr shapeAttr = rewriter.getI32ArrayAttr( + llvm::SmallVector(outputType.getShape())); + + rewriter.replaceOpWithNewOp( + srcOp, outputType, newReduceOp, shapeAttr); + } +}; + +// This workaround addresses the next Metal issue: +// https://github.com/tenstorrent/tt-metal/issues/16118 +// +// TODO(mrakita): Remove this workaround once these Metal issues are fixed +// (tracked by https://github.com/tenstorrent/tt-mlir/issues/1624). +// +template +class ReduceOpsAllDimsRewritePattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(ReduceOp srcOp, + PatternRewriter &rewriter) const override { + if (!srcOp.getDimArg() || srcOp.getDimArg()->empty()) { + return failure(); + } + + llvm::SmallVector reduceDims = getReduceDims(srcOp.getDimArg()); + llvm::SmallSet uniqueReduceDims(reduceDims.begin(), + reduceDims.end()); + + // Check if reduce is done over all dimensions of the input tensor. + if (uniqueReduceDims.size() != + srcOp.getInput().getType().getShape().size()) { + return failure(); + } + + // In case when reduce is done over all dimensions of the input we need to + // unset the dimensions attribute, because Metal supports reduce over all + // dimensions for any tensor rank when reduce dimensions are not specified, + // but it doesn't support reduce for tensors with rank larger than 2 when + // reduce dimensions are specified. + rewriter.replaceOpWithNewOp(srcOp, srcOp.getResult().getType(), + srcOp.getInput(), srcOp.getKeepDim(), + nullptr); + + return success(); + } +}; + +} // namespace mlir::tt::ttnn::workarounds::decomposition + +#endif // TTMLIR_DIALECT_TTNN_TRANSFORMS_WORKAROUNDS_DECOMPOSITION_REDUCEOPSREWRITEPATTERN_H diff --git a/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp b/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp index 4eeec92dcd..cdab2a4b70 100644 --- a/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp +++ b/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp @@ -116,10 +116,9 @@ class StableHLOToTTIRReduceOpConversionPattern tensor::EmptyOp outputTensor = rewriter.create( srcOp.getLoc(), outputType.getShape(), outputType.getElementType()); - mlir::ArrayAttr dimArg = rewriter.getArrayAttr(SmallVector( - 1, rewriter.getI32IntegerAttr(adaptor.getDimensionsAttr().size() > 0 - ? adaptor.getDimensionsAttr()[0] - : 1))); + // Can't reuse the original dimensions attribute because it uses i64 type. + mlir::ArrayAttr dimArg = rewriter.getI32ArrayAttr( + llvm::SmallVector(srcOp.getDimensions())); rewriter.replaceOpWithNewOp( srcOp, outputType, adaptor.getInputs().front(), outputTensor, diff --git a/lib/Dialect/TTIR/IR/TTIROps.cpp b/lib/Dialect/TTIR/IR/TTIROps.cpp index 52e68b8113..83bb98baa5 100644 --- a/lib/Dialect/TTIR/IR/TTIROps.cpp +++ b/lib/Dialect/TTIR/IR/TTIROps.cpp @@ -18,6 +18,7 @@ #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Location.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/LogicalResult.h" @@ -1672,32 +1673,32 @@ static void buildGenericEltwiseUnaryRegion(::mlir::Location loc, opBuilder.create(loc, mlir::ValueRange({result})); } -// AddOp generic region builder +// AddOp generic region builder. void mlir::tt::ttir::AddOp::buildGenericRegion(::mlir::OpBuilder &opBuilder, ::mlir::Block *block) { buildGenericEltwiseBinaryRegion(getLoc(), opBuilder, block); } -// MultiplyOp generic region builder +// MultiplyOp generic region builder. void mlir::tt::ttir::MultiplyOp::buildGenericRegion( ::mlir::OpBuilder &opBuilder, ::mlir::Block *block) { buildGenericEltwiseBinaryRegion(getLoc(), opBuilder, block); } -// ExpOp generic region builder +// ExpOp generic region builder. void mlir::tt::ttir::ExpOp::buildGenericRegion(::mlir::OpBuilder &opBuilder, ::mlir::Block *block) { buildGenericEltwiseUnaryRegion(getLoc(), opBuilder, block); } -// DivOp generic region builder +// DivOp generic region builder. void mlir::tt::ttir::DivOp::buildGenericRegion(::mlir::OpBuilder &opBuilder, ::mlir::Block *block) { return buildGenericEltwiseBinaryRegion(getLoc(), opBuilder, block); } -// MaximumOp generic region builder +// MaximumOp generic region builder. void mlir::tt::ttir::MaximumOp::buildGenericRegion(::mlir::OpBuilder &opBuilder, ::mlir::Block *block) { buildGenericEltwiseBinaryRegion(getLoc(), opBuilder, @@ -1708,7 +1709,7 @@ void mlir::tt::ttir::MaximumOp::buildGenericRegion(::mlir::OpBuilder &opBuilder, // KernelOp //===----------------------------------------------------------------------===// -// KernelOp builders +// KernelOp builders. static mlir::tt::ttir::KernelOp buildKernelOp(::mlir::OpBuilder &opBuilder, ::mlir::Location loc, ::mlir::StringRef kernelName, ::mlir::StringRef kernelKind, @@ -1717,7 +1718,7 @@ buildKernelOp(::mlir::OpBuilder &opBuilder, ::mlir::Location loc, loc, outputs.getTypes(), kernelName, kernelKind, inputs, outputs); } -// Reduce op kernel builder +// Reduce op kernel builder. static void createReduceOp(::mlir::OpBuilder &opBuilder, ::mlir::Block *block, mlir::Location loc, ::mlir::StringRef kernelKind) { auto kernelOp = buildKernelOp(opBuilder, loc, "reduce", kernelKind, @@ -1725,23 +1726,81 @@ static void createReduceOp(::mlir::OpBuilder &opBuilder, ::mlir::Block *block, opBuilder.create(loc, kernelOp->getResults()); } -// Sum op kernel builder -void mlir::tt::ttir::SumOp::buildGenericRegion(::mlir::OpBuilder &opBuilder, +// Common verifier for all Reduce ops. +static mlir::LogicalResult +verifyReduceOp(mlir::Operation *reduceOp, mlir::RankedTensorType inputType, + const std::optional &reduceDims) { + if (!reduceDims) { + return mlir::success(); + } + + int64_t inputTensorRank = inputType.getRank(); + + llvm::SmallSet uniqueReduceDims; + for (mlir::Attribute reduceDim : *reduceDims) { + int64_t reduceDimInt = mlir::cast(reduceDim).getInt(); + if (reduceDimInt < -inputTensorRank || reduceDimInt >= inputTensorRank) { + return reduceOp->emitOpError("Reduce dimensions are out of range"); + } + uniqueReduceDims.insert(reduceDimInt); + } + + if (uniqueReduceDims.size() != reduceDims->size()) { + return reduceOp->emitOpError("Reduce dimensions are not unique"); + } + + // TODO(mrakita): Add a check that depending on inputShape, reduceDims and + // keepDim computes the expected output shape and checks if it matches the + // actual output shape. Tracked by: + // https://github.com/tenstorrent/tt-mlir/issues/1639 + + return mlir::success(); +} + +//===----------------------------------------------------------------------===// +// MaxOp +//===----------------------------------------------------------------------===// + +// MaxOp kernel builder. +void mlir::tt::ttir::MaxOp::buildGenericRegion(::mlir::OpBuilder &opBuilder, ::mlir::Block *block) { // NOLINTNEXTLINE - createReduceOp(opBuilder, block, getLoc(), "sum"); + createReduceOp(opBuilder, block, getLoc(), "max"); +} + +// MaxOp verification. +::mlir::LogicalResult mlir::tt::ttir::MaxOp::verify() { + return verifyReduceOp(getOperation(), getInput().getType(), getDimArg()); } -// Mean op kernel builder +//===----------------------------------------------------------------------===// +// MeanOp +//===----------------------------------------------------------------------===// + +// MeanOp kernel builder. void mlir::tt::ttir::MeanOp::buildGenericRegion(::mlir::OpBuilder &opBuilder, ::mlir::Block *block) { // NOLINTNEXTLINE createReduceOp(opBuilder, block, getLoc(), "mean"); } -// Max op kernel builder -void mlir::tt::ttir::MaxOp::buildGenericRegion(::mlir::OpBuilder &opBuilder, +// MeanOp verification. +::mlir::LogicalResult mlir::tt::ttir::MeanOp::verify() { + return verifyReduceOp(getOperation(), getInput().getType(), getDimArg()); +} + +//===----------------------------------------------------------------------===// +// SumOp +//===----------------------------------------------------------------------===// + +// SumOp kernel builder. +void mlir::tt::ttir::SumOp::buildGenericRegion(::mlir::OpBuilder &opBuilder, ::mlir::Block *block) { // NOLINTNEXTLINE - createReduceOp(opBuilder, block, getLoc(), "max"); + createReduceOp(opBuilder, block, getLoc(), "sum"); +} + +// SumOp verification. +::mlir::LogicalResult mlir::tt::ttir::SumOp::verify() { + return verifyReduceOp(getOperation(), getInput().getType(), getDimArg()); } diff --git a/lib/Dialect/TTNN/IR/TTNNOps.cpp b/lib/Dialect/TTNN/IR/TTNNOps.cpp index e3fc5a33c0..286393858d 100644 --- a/lib/Dialect/TTNN/IR/TTNNOps.cpp +++ b/lib/Dialect/TTNN/IR/TTNNOps.cpp @@ -1310,4 +1310,52 @@ ::mlir::LogicalResult mlir::tt::ttnn::PermuteOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// Reduction ops +//===----------------------------------------------------------------------===// + +// Common verifier for all Reduction ops. +static mlir::LogicalResult +verifyReduceOp(mlir::Operation *reduceOp, mlir::RankedTensorType inputType, + const std::optional &reduceDims) { + int64_t inputTensorRank = inputType.getRank(); + + // TODO(mrakita): Only last two dimensions can be reduced, check for that + // too. + if (reduceDims && reduceDims->size() > 2 && + static_cast(reduceDims->size()) != inputTensorRank) { + return reduceOp->emitOpError("Reduce on more than two dimensions is not " + "currently supported by TTNN"); + } + + return mlir::success(); +} + +//===----------------------------------------------------------------------===// +// MaxOp +//===----------------------------------------------------------------------===// + +// MaxOp verification. +::mlir::LogicalResult MaxOp::verify() { + return verifyReduceOp(getOperation(), getInput().getType(), getDimArg()); +} + +//===----------------------------------------------------------------------===// +// MeanOp +//===----------------------------------------------------------------------===// + +// MeanOp verification. +::mlir::LogicalResult MeanOp::verify() { + return verifyReduceOp(getOperation(), getInput().getType(), getDimArg()); +} + +//===----------------------------------------------------------------------===// +// SumOp +//===----------------------------------------------------------------------===// + +// SumOp verification. +::mlir::LogicalResult SumOp::verify() { + return verifyReduceOp(getOperation(), getInput().getType(), getDimArg()); +} + } // namespace mlir::tt::ttnn diff --git a/lib/Dialect/TTNN/IR/TTNNOpsAttrs.cpp b/lib/Dialect/TTNN/IR/TTNNOpsAttrs.cpp index c7bf769ddc..d16a748226 100644 --- a/lib/Dialect/TTNN/IR/TTNNOpsAttrs.cpp +++ b/lib/Dialect/TTNN/IR/TTNNOpsAttrs.cpp @@ -494,6 +494,24 @@ TTNNLayoutAttr::withShardShape(::mlir::MLIRContext *context, getMemLayout()); } +// Construct a new TTNNLayoutAttr +// +// This function creates a deep copy of the current TTNNLayoutAttr and +// applies changes necessary to fit new tensor shape. +// +// param context The MLIR context. +// param tensorShape The new tensor shape. +// return The new TTNNLayoutAttr with the given tensor shape. +TTNNLayoutAttr TTNNLayoutAttr::withTensorShape(::mlir::MLIRContext *context, + ArrayRef tensorShape) { + // TODO(mrakita): This leaves default value of collapseIntervals parameter, + // which might be different than the original value used to create the layout + // attribute. This will work for now since we always use default value, but in + // the future we would need to take this into account. + return TTNNLayoutAttr::get(context, tensorShape, getElementType(), + getBufferType(), getGrid(), getMemLayout()); +} + // Construct a new TTNNLayoutAttr // // This function constructs a new TTNNLayoutAttr with the given parameters. diff --git a/lib/Dialect/TTNN/Transforms/CMakeLists.txt b/lib/Dialect/TTNN/Transforms/CMakeLists.txt index fd21e03d0c..1aae802c62 100644 --- a/lib/Dialect/TTNN/Transforms/CMakeLists.txt +++ b/lib/Dialect/TTNN/Transforms/CMakeLists.txt @@ -3,7 +3,8 @@ add_mlir_dialect_library(MLIRTTNNTransforms Passes.cpp TTNNLayout.cpp TTNNToCpp.cpp - TTNNWorkarounds.cpp + Workarounds/Decomposition/ReduceOpsRewritePattern.cpp + Workarounds/TTNNWorkarounds.cpp ADDITIONAL_HEADER_DIRS ${PROJECT_SOURCE_DIR}/include/ttmlir diff --git a/lib/Dialect/TTNN/Transforms/Workarounds/Decomposition/ReduceOpsRewritePattern.cpp b/lib/Dialect/TTNN/Transforms/Workarounds/Decomposition/ReduceOpsRewritePattern.cpp new file mode 100644 index 0000000000..99b61ef0b4 --- /dev/null +++ b/lib/Dialect/TTNN/Transforms/Workarounds/Decomposition/ReduceOpsRewritePattern.cpp @@ -0,0 +1,50 @@ +// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +// +// SPDX-License-Identifier: Apache-2.0 + +#include "ttmlir/Dialect/TTNN/Transforms/Workarounds/Decomposition/ReduceOpsRewritePattern.h" + +#include + +namespace mlir::tt::ttnn::workarounds::decomposition { + +llvm::SmallVector +getReduceDims(const std::optional &dimArg) { + llvm::SmallVector reduceDims; + if (!dimArg) { + return reduceDims; + } + + for (const mlir::Attribute &reduceDim : *dimArg) { + reduceDims.push_back(mlir::cast(reduceDim).getInt()); + } + + return reduceDims; +} + +llvm::SmallVector +calculateNewReduceShape(RankedTensorType inputType, + const std::optional &dimArg) { + llvm::SmallVector outputShapeVec(inputType.getShape()); + llvm::SmallVector reduceDims = getReduceDims(dimArg); + + if (reduceDims.empty()) { + // When reduce dimensions are not specified that means we are reducing over + // all dimensions, so all dimensions of the output shape become 1. + std::fill(outputShapeVec.begin(), outputShapeVec.end(), 1); + } else { + // Dimensions can be specified as negative numbers, so to calculate the + // index in the output shape vector we need to sum them with the output + // shape rank. + int64_t outputShapeRank = static_cast(outputShapeVec.size()); + for (const int64_t reduceDim : reduceDims) { + int64_t outputShapeIndex = + reduceDim < 0 ? outputShapeRank + reduceDim : reduceDim; + outputShapeVec[static_cast(outputShapeIndex)] = 1; + } + } + + return outputShapeVec; +} + +} // namespace mlir::tt::ttnn::workarounds::decomposition diff --git a/lib/Dialect/TTNN/Transforms/TTNNWorkarounds.cpp b/lib/Dialect/TTNN/Transforms/Workarounds/TTNNWorkarounds.cpp similarity index 88% rename from lib/Dialect/TTNN/Transforms/TTNNWorkarounds.cpp rename to lib/Dialect/TTNN/Transforms/Workarounds/TTNNWorkarounds.cpp index 2c0c48dbcc..eed6af498b 100644 --- a/lib/Dialect/TTNN/Transforms/TTNNWorkarounds.cpp +++ b/lib/Dialect/TTNN/Transforms/Workarounds/TTNNWorkarounds.cpp @@ -8,6 +8,7 @@ #include "ttmlir/Dialect/TTNN/IR/TTNNOps.h" #include "ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.h" #include "ttmlir/Dialect/TTNN/IR/TTNNWorkarounds.h" +#include "ttmlir/Dialect/TTNN/Transforms/Workarounds/Decomposition/ReduceOpsRewritePattern.h" #include "ttmlir/Dialect/TTNN/Types/Types.h" #include "ttmlir/Dialect/TTNN/Utils/TransformUtils.h" #include "ttmlir/Dialect/TTNN/Utils/Utils.h" @@ -399,44 +400,55 @@ class TTNNWorkarounds : public impl::TTNNWorkaroundsBase { void runOnOperation() final { if (decompositionWorkaroundsEnabled) { - // Placeholder for workaround decomposition patterns. RewritePatternSet patterns(&getContext()); - patterns.add(&getContext()); - - FrozenRewritePatternSet patternSet(std::move(patterns)); - GreedyRewriteConfig config = GreedyRewriteConfig(); - config.useTopDownTraversal = true; - config.maxIterations = GreedyRewriteConfig::kNoLimit; - if (failed(applyPatternsAndFoldGreedily(getOperation(), patternSet, - config))) { - signalPassFailure(); - return; - } + patterns.add, + workarounds::decomposition::ReduceOpsKeepDimRewritePattern< + ttnn::MaxOp>, + workarounds::decomposition::ReduceOpsKeepDimRewritePattern< + ttnn::MeanOp>, + workarounds::decomposition::ReduceOpsAllDimsRewritePattern< + ttnn::SumOp>, + workarounds::decomposition::ReduceOpsAllDimsRewritePattern< + ttnn::MaxOp>, + workarounds::decomposition::ReduceOpsAllDimsRewritePattern< + ttnn::MeanOp>>(&getContext()); + + runRewritePatterns(std::move(patterns), + GreedyRewriteConfig::kNoLimit /*maxIterations*/); } if (layouotWorkaroundsEnabled) { RewritePatternSet patterns(&getContext()); patterns.add(&getContext()); - FrozenRewritePatternSet patternSet(std::move(patterns)); - GreedyRewriteConfig config = GreedyRewriteConfig(); - // This configuration specifies that the rewriter should traverse the IR - // in a top-down order. - config.useTopDownTraversal = true; - // This configuration specifies the maximum number of iterations the - // rewriter will perform on the IR. The rewriter will iterate through the - // IR until a fixpoint is reached. All workarounds should be applied - // during the first iteration. If the workarounds are not applied in the - // first iteration, it indicates a bug in the workarounds implementation. - // Although the workarounds are applied in the first iteration, the - // rewriter must iterate through the IR once more to confirm that the - // fixpoint is reached. If the fixpoint is not reached in the second - // iteration, it indicates a bug in the workarounds implementation. - config.maxIterations = 2; - if (failed(applyPatternsAndFoldGreedily(getOperation(), patternSet, - config))) { - signalPassFailure(); - return; - } + // All layout workarounds should be applied during the first iteration. If + // the workarounds are not applied in the first iteration, it indicates a + // bug in the workarounds implementation. Although the workarounds are + // applied in the first iteration, the rewriter must iterate through the + // IR once more to confirm that the fixpoint is reached. If the fixpoint + // is not reached in the second iteration, it indicates a bug in the + // workarounds implementation. + const int64_t maxIterations = 2; + runRewritePatterns(std::move(patterns), maxIterations); + } + } + +private: + // Runs rewrite patterns with specified maximum number of iterations the + // rewriter will perform on the IR. The rewriter will iterate through the IR + // until a fixpoint is reached. + void runRewritePatterns(RewritePatternSet &&patterns, int64_t maxIterations) { + FrozenRewritePatternSet patternSet(std::move(patterns)); + GreedyRewriteConfig config = GreedyRewriteConfig(); + config.maxIterations = maxIterations; + // This configuration specifies that the rewriter should traverse the IR + // in a top-down order. + config.useTopDownTraversal = true; + if (failed( + applyPatternsAndFoldGreedily(getOperation(), patternSet, config))) { + signalPassFailure(); + return; } } }; diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/reduce_add_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/reduce_add_op.mlir index b7058b3dea..66f3ce4e10 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/reduce_add_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/reduce_add_op.mlir @@ -1,10 +1,113 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s module @jit_reduce_add attributes {} { - func.func public @test_reduce_add(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor<128xf32> { + func.func public @test_reduce_add_4to3dim(%arg0: tensor<128x10x32x4xf32>, %cst_0: tensor) -> tensor<128x32x4xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.sum" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x32x4xf32> + // CHECK-SAME: -> tensor<128x32x4xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [1] : (tensor<128x10x32x4xf32>, tensor) -> tensor<128x32x4xf32> + return %0 : tensor<128x32x4xf32> + } + + func.func public @test_reduce_add_4to2dim(%arg0: tensor<128x10x32x4xf32>, %cst_0: tensor) -> tensor<128x32xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.sum" + // CHECK-SAME: dim_arg = [1 : i32, 3 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x32x4xf32> + // CHECK-SAME: -> tensor<128x32xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [1, 3] : (tensor<128x10x32x4xf32>, tensor) -> tensor<128x32xf32> + return %0 : tensor<128x32xf32> + } + + func.func public @test_reduce_add_4to1dim(%arg0: tensor<128x10x32x4xf32>, %cst_0: tensor) -> tensor<128xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.sum" + // CHECK-SAME: dim_arg = [1 : i32, 2 : i32, 3 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x32x4xf32> + // CHECK-SAME: -> tensor<128xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [1, 2, 3] : (tensor<128x10x32x4xf32>, tensor) -> tensor<128xf32> + return %0 : tensor<128xf32> + } + + func.func public @test_reduce_add_4to0dim(%arg0: tensor<128x10x32x4xf32>, %cst_0: tensor) -> tensor { + // CHECK: tensor.empty + // CHECK: "ttir.sum" + // CHECK-SAME: dim_arg = [0 : i32, 1 : i32, 2 : i32, 3 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x32x4xf32> + // CHECK-SAME: -> tensor<1xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [0, 1, 2, 3] : (tensor<128x10x32x4xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_add_3to2dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor<128x4xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.sum" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x4xf32> + // CHECK-SAME: -> tensor<128x4xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [1] : (tensor<128x10x4xf32>, tensor) -> tensor<128x4xf32> + return %0 : tensor<128x4xf32> + } + + func.func public @test_reduce_add_3to1dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor<128xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.sum" + // CHECK-SAME: dim_arg = [1 : i32, 2 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x4xf32> + // CHECK-SAME: -> tensor<128xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [1, 2] : (tensor<128x10x4xf32>, tensor) -> tensor<128xf32> + return %0 : tensor<128xf32> + } + + func.func public @test_reduce_add_3to0dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor { + // CHECK: tensor.empty + // CHECK: "ttir.sum" + // CHECK-SAME: dim_arg = [0 : i32, 1 : i32, 2 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x4xf32> + // CHECK-SAME: -> tensor<1xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [0, 1, 2] : (tensor<128x10x4xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_add_2to1dim(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor<128xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.sum" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10xf32> + // CHECK-SAME: -> tensor<128xf32> %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [1] : (tensor<128x10xf32>, tensor) -> tensor<128xf32> - // CHECK: %[[C:.*]] = tensor.empty[[C:.*]] - // CHECK: %[[C:.*]] = "ttir.sum"[[C:.*]] return %0 : tensor<128xf32> } + + func.func public @test_reduce_add_2to0dim(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor { + // CHECK: tensor.empty + // CHECK: "ttir.sum" + // CHECK-SAME: dim_arg = [0 : i32, 1 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10xf32> + // CHECK-SAME: -> tensor<1xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [0, 1] : (tensor<128x10xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_add_1to0dim(%arg0: tensor<128xf32>, %cst_0: tensor) -> tensor { + // CHECK: tensor.empty + // CHECK: "ttir.sum" + // CHECK-SAME: dim_arg = [0 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128xf32> + // CHECK-SAME: -> tensor<1xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [0] : (tensor<128xf32>, tensor) -> tensor + return %0 : tensor + } } diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/reduce_maximum_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/reduce_maximum_op.mlir index ea03c57669..81fb59bfb8 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/reduce_maximum_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/reduce_maximum_op.mlir @@ -1,10 +1,113 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s module @jit_reduce_maximum attributes {} { - func.func public @test_reduce_maximum(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor<128xf32> { + func.func public @test_reduce_maximum_4to3dim(%arg0: tensor<128x10x32x4xf32>, %cst_0: tensor) -> tensor<128x32x4xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.max" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x32x4xf32> + // CHECK-SAME: -> tensor<128x32x4xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [1] : (tensor<128x10x32x4xf32>, tensor) -> tensor<128x32x4xf32> + return %0 : tensor<128x32x4xf32> + } + + func.func public @test_reduce_maximum_4to2dim(%arg0: tensor<128x10x32x4xf32>, %cst_0: tensor) -> tensor<128x32xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.max" + // CHECK-SAME: dim_arg = [1 : i32, 3 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x32x4xf32> + // CHECK-SAME: -> tensor<128x32xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [1, 3] : (tensor<128x10x32x4xf32>, tensor) -> tensor<128x32xf32> + return %0 : tensor<128x32xf32> + } + + func.func public @test_reduce_maximum_4to1dim(%arg0: tensor<128x10x32x4xf32>, %cst_0: tensor) -> tensor<128xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.max" + // CHECK-SAME: dim_arg = [1 : i32, 2 : i32, 3 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x32x4xf32> + // CHECK-SAME: -> tensor<128xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [1, 2, 3] : (tensor<128x10x32x4xf32>, tensor) -> tensor<128xf32> + return %0 : tensor<128xf32> + } + + func.func public @test_reduce_maximum_4to0dim(%arg0: tensor<128x10x32x4xf32>, %cst_0: tensor) -> tensor { + // CHECK: tensor.empty + // CHECK: "ttir.max" + // CHECK-SAME: dim_arg = [0 : i32, 1 : i32, 2 : i32, 3 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x32x4xf32> + // CHECK-SAME: -> tensor<1xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [0, 1, 2, 3] : (tensor<128x10x32x4xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_maximum_3to2dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor<128x4xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.max" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x4xf32> + // CHECK-SAME: -> tensor<128x4xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [1] : (tensor<128x10x4xf32>, tensor) -> tensor<128x4xf32> + return %0 : tensor<128x4xf32> + } + + func.func public @test_reduce_maximum_3to1dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor<128xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.max" + // CHECK-SAME: dim_arg = [1 : i32, 2 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x4xf32> + // CHECK-SAME: -> tensor<128xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [1, 2] : (tensor<128x10x4xf32>, tensor) -> tensor<128xf32> + return %0 : tensor<128xf32> + } + + func.func public @test_reduce_maximum_3to0dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor { + // CHECK: tensor.empty + // CHECK: "ttir.max" + // CHECK-SAME: dim_arg = [0 : i32, 1 : i32, 2 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x4xf32> + // CHECK-SAME: -> tensor<1xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [0, 1, 2] : (tensor<128x10x4xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_maximum_2to1dim(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor<128xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.max" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10xf32> + // CHECK-SAME: -> tensor<128xf32> %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [1] : (tensor<128x10xf32>, tensor) -> tensor<128xf32> - // CHECK: %[[C:.*]] = tensor.empty[[C:.*]] - // CHECK: %[[C:.*]] = "ttir.max"[[C:.*]] return %0 : tensor<128xf32> } + + func.func public @test_reduce_maximum_2to0dim(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor { + // CHECK: tensor.empty + // CHECK: "ttir.max" + // CHECK-SAME: dim_arg = [0 : i32, 1 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10xf32> + // CHECK-SAME: -> tensor<1xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [0, 1] : (tensor<128x10xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_maximum_1to0dim(%arg0: tensor<128xf32>, %cst_0: tensor) -> tensor { + // CHECK: tensor.empty + // CHECK: "ttir.max" + // CHECK-SAME: dim_arg = [0 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128xf32> + // CHECK-SAME: -> tensor<1xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [0] : (tensor<128xf32>, tensor) -> tensor + return %0 : tensor + } } diff --git a/test/ttmlir/Dialect/TTIR/reduce_ops/negative_invalid_dim_high.mlir b/test/ttmlir/Dialect/TTIR/reduce_ops/negative_invalid_dim_high.mlir new file mode 100644 index 0000000000..565745d057 --- /dev/null +++ b/test/ttmlir/Dialect/TTIR/reduce_ops/negative_invalid_dim_high.mlir @@ -0,0 +1,9 @@ +// RUN: not ttmlir-opt --split-input-file %s 2>&1 | FileCheck %s +// Negative tests for reduce ops + +// CHECK: error: 'ttir.sum' op Reduce dimensions are out of range +func.func public @test_reduce_add_invalid_dim_high(%arg0: tensor<128x10xf32>, %arg1: tensor<1xf32>) -> tensor<128xf32> { + %0 = tensor.empty() : tensor<128xf32> + %1 = "ttir.sum"(%arg0, %0) <{dim_arg = [2 : i32], keep_dim = false}> : (tensor<128x10xf32>, tensor<128xf32>) -> tensor<128xf32> + return %1 : tensor<128xf32> +} diff --git a/test/ttmlir/Dialect/TTIR/reduce_ops/negative_invalid_dim_low.mlir b/test/ttmlir/Dialect/TTIR/reduce_ops/negative_invalid_dim_low.mlir new file mode 100644 index 0000000000..bd4a237d46 --- /dev/null +++ b/test/ttmlir/Dialect/TTIR/reduce_ops/negative_invalid_dim_low.mlir @@ -0,0 +1,9 @@ +// RUN: not ttmlir-opt --split-input-file %s 2>&1 | FileCheck %s +// Negative tests for reduce ops + +// CHECK: error: 'ttir.sum' op Reduce dimensions are out of range +func.func public @test_reduce_add_invalid_dim_low(%arg0: tensor<128x10xf32>, %arg1: tensor<1xf32>) -> tensor<128xf32> { + %0 = tensor.empty() : tensor<128xf32> + %1 = "ttir.sum"(%arg0, %0) <{dim_arg = [-3 : i32], keep_dim = false}> : (tensor<128x10xf32>, tensor<128xf32>) -> tensor<128xf32> + return %1 : tensor<128xf32> +} diff --git a/test/ttmlir/Dialect/TTIR/reduce_ops/negative_repeating_dims.mlir b/test/ttmlir/Dialect/TTIR/reduce_ops/negative_repeating_dims.mlir new file mode 100644 index 0000000000..13649e1e65 --- /dev/null +++ b/test/ttmlir/Dialect/TTIR/reduce_ops/negative_repeating_dims.mlir @@ -0,0 +1,9 @@ +// RUN: not ttmlir-opt --split-input-file %s 2>&1 | FileCheck %s +// Negative tests for reduce ops + +// CHECK: error: 'ttir.sum' op Reduce dimensions are not unique +func.func public @test_reduce_add_repeating_dims(%arg0: tensor<128x10x32x4xf32>, %arg1: tensor<1xf32>) -> tensor<128xf32> { + %0 = tensor.empty() : tensor<128xf32> + %1 = "ttir.sum"(%arg0, %0) <{dim_arg = [1 : i32, 2 : i32, 3 : i32, 2 : i32], keep_dim = false}> : (tensor<128x10x32x4xf32>, tensor<128xf32>) -> tensor<128xf32> + return %1 : tensor<128xf32> +} diff --git a/test/ttmlir/Dialect/TTNN/reduction/max_op_negative.mlir b/test/ttmlir/Dialect/TTNN/reduction/max_op_negative.mlir new file mode 100644 index 0000000000..ac587303ed --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/reduction/max_op_negative.mlir @@ -0,0 +1,10 @@ +// RUN: not ttmlir-opt --split-input-file --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s 2>&1 | FileCheck %s +// Negative tests for Max op. +module { + func.func @forward(%arg0: tensor<128x32x10x4xbf16>) -> tensor<128x1x1x1xbf16> { + %0 = tensor.empty() : tensor<128x1x1x1xbf16> + // CHECK: error: 'ttnn.max' op Reduce on more than two dimensions is not currently supported by TTNN + %1 = "ttir.max"(%arg0, %0) <{dim_arg = [1: i32, 2: i32, 3: i32], keep_dim = true}> : (tensor<128x32x10x4xbf16>, tensor<128x1x1x1xbf16>) -> tensor<128x1x1x1xbf16> + return %1 : tensor<128x1x1x1xbf16> + } +} diff --git a/test/ttmlir/Dialect/TTNN/reduction/mean_op_negative.mlir b/test/ttmlir/Dialect/TTNN/reduction/mean_op_negative.mlir new file mode 100644 index 0000000000..768b220bb1 --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/reduction/mean_op_negative.mlir @@ -0,0 +1,10 @@ +// RUN: not ttmlir-opt --split-input-file --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s 2>&1 | FileCheck %s +// Negative tests for Mean op. +module { + func.func @forward(%arg0: tensor<128x32x10x4xbf16>) -> tensor<128x1x1x1xbf16> { + %0 = tensor.empty() : tensor<128x1x1x1xbf16> + // CHECK: error: 'ttnn.mean' op Reduce on more than two dimensions is not currently supported by TTNN + %1 = "ttir.mean"(%arg0, %0) <{dim_arg = [1: i32, 2: i32, 3: i32], keep_dim = true}> : (tensor<128x32x10x4xbf16>, tensor<128x1x1x1xbf16>) -> tensor<128x1x1x1xbf16> + return %1 : tensor<128x1x1x1xbf16> + } +} diff --git a/test/ttmlir/Dialect/TTNN/reduction/sum_op_negative.mlir b/test/ttmlir/Dialect/TTNN/reduction/sum_op_negative.mlir new file mode 100644 index 0000000000..c0c634f051 --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/reduction/sum_op_negative.mlir @@ -0,0 +1,10 @@ +// RUN: not ttmlir-opt --split-input-file --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s 2>&1 | FileCheck %s +// Negative tests for Sum op. +module { + func.func @forward(%arg0: tensor<128x32x10x4xbf16>) -> tensor<128x1x1x1xbf16> { + %0 = tensor.empty() : tensor<128x1x1x1xbf16> + // CHECK: error: 'ttnn.sum' op Reduce on more than two dimensions is not currently supported by TTNN + %1 = "ttir.sum"(%arg0, %0) <{dim_arg = [1: i32, 2: i32, 3: i32], keep_dim = true}> : (tensor<128x32x10x4xbf16>, tensor<128x1x1x1xbf16>) -> tensor<128x1x1x1xbf16> + return %1 : tensor<128x1x1x1xbf16> + } +} diff --git a/test/ttmlir/Silicon/StableHLO/reduce_add_op.mlir b/test/ttmlir/Silicon/StableHLO/reduce_add_op.mlir index 9da138bbb8..89f51123e6 100644 --- a/test/ttmlir/Silicon/StableHLO/reduce_add_op.mlir +++ b/test/ttmlir/Silicon/StableHLO/reduce_add_op.mlir @@ -1,22 +1,108 @@ // REQUIRES: stablehlo // RUN: rm -rf %t.ttnn // RUN: rm -rf %t.mlir -// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | \ -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" > %t.mlir -// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" > %t.mlir // RUN: FileCheck --input-file=%t.mlir %s +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn // UNSUPPORTED: true -// error: keepdim=False is not supported +// These tests are currently failing until a fix for this issue is uplifted +// with new version of Metal: https://github.com/tenstorrent/tt-metal/issues/16104 +// TODO(mrakita): Enable and edit these tests after the Metal issue is fixed. +// Tracked by: https://github.com/tenstorrent/tt-mlir/issues/1640 module @jit_reduce_add attributes {} { - func.func public @test_reduce_add(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor<128xf32> { - // CHECK-LABEL: func.func public @test_reduce_add - // CHECK: ttnn.sum - // CHECK-SAME: dim_arg = [1 : i32], - // CHECK-SAME: keep_dim = false - // CHECK-SAME: tensor<128x10xf32 + func.func public @test_reduce_add_4to0dim(%arg0: tensor<128x10x32x4xf32>, %cst_0: tensor) -> tensor { + // CHECK: "ttnn.sum" + // CHECK-NOT: dim_arg + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10x32x4xf32, + // CHECK-SAME: -> tensor<1x1x1x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [1 : i32] + // CHECK-SAME: tensor<1x1x1x1xf32, + // CHECK-SAME: -> tensor<1xf32, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [0, 1, 2, 3] : (tensor<128x10x32x4xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_add_3to2dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor<128x4xf32> { + // CHECK: "ttnn.sum" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10x4xf32, + // CHECK-SAME: -> tensor<128x1x4xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [128 : i32, 4 : i32] + // CHECK-SAME: tensor<128x1x4xf32, + // CHECK-SAME: -> tensor<128x4xf32, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [1] : (tensor<128x10x4xf32>, tensor) -> tensor<128x4xf32> + return %0 : tensor<128x4xf32> + } + + func.func public @test_reduce_add_3to1dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor<128xf32> { + // CHECK: "ttnn.sum" + // CHECK-SAME: dim_arg = [1 : i32, 2 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10x4xf32, + // CHECK-SAME: -> tensor<128x1x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [128 : i32] + // CHECK-SAME: tensor<128x1x1xf32, + // CHECK-SAME: -> tensor<128xf32, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [1, 2] : (tensor<128x10x4xf32>, tensor) -> tensor<128xf32> + return %0 : tensor<128xf32> + } + + func.func public @test_reduce_add_3to0dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor { + // CHECK: "ttnn.sum" + // CHECK-NOT: dim_arg + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10x4xf32, + // CHECK-SAME: -> tensor<1x1x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [1 : i32] + // CHECK-SAME: tensor<1x1x1xf32, + // CHECK-SAME: -> tensor<1xf32, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [0, 1, 2] : (tensor<128x10x4xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_add_2to1dim(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor<128xf32> { + // CHECK: "ttnn.sum" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10xf32, + // CHECK-SAME: -> tensor<128x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [128 : i32] + // CHECK-SAME: tensor<128x1xf32, // CHECK-SAME: -> tensor<128xf32, %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [1] : (tensor<128x10xf32>, tensor) -> tensor<128xf32> return %0 : tensor<128xf32> } + + func.func public @test_reduce_add_2to0dim(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor { + // CHECK: "ttnn.sum" + // CHECK-NOT: dim_arg + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10xf32, + // CHECK-SAME: -> tensor<1x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [1 : i32] + // CHECK-SAME: tensor<1x1xf32, + // CHECK-SAME: -> tensor<1xf32, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [0, 1] : (tensor<128x10xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_add_1to0dim(%arg0: tensor<128xf32>, %cst_0: tensor) -> tensor { + // CHECK: "ttnn.sum" + // CHECK-NOT: dim_arg + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128xf32, + // CHECK-SAME: -> tensor<1xf32, + // CHECK-NOT: "ttnn.reshape" + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [0] : (tensor<128xf32>, tensor) -> tensor + return %0 : tensor + } } diff --git a/test/ttmlir/Silicon/StableHLO/reduce_maximum_op.mlir b/test/ttmlir/Silicon/StableHLO/reduce_maximum_op.mlir index 57318948e5..8ee57fd52f 100644 --- a/test/ttmlir/Silicon/StableHLO/reduce_maximum_op.mlir +++ b/test/ttmlir/Silicon/StableHLO/reduce_maximum_op.mlir @@ -1,22 +1,108 @@ // REQUIRES: stablehlo // RUN: rm -rf %t.ttnn // RUN: rm -rf %t.mlir -// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | \ -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" > %t.mlir -// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" > %t.mlir // RUN: FileCheck --input-file=%t.mlir %s +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn // UNSUPPORTED: true -// error: keepdim=False is not supported +// These tests are currently failing until a fix for this issue is uplifted +// with new version of Metal: https://github.com/tenstorrent/tt-metal/issues/16104 +// TODO(mrakita): Enable and edit these tests after the Metal issue is fixed. +// Tracked by: https://github.com/tenstorrent/tt-mlir/issues/1640 module @jit_reduce_maximum attributes {} { - func.func public @test_reduce_maximum(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor<128xf32> { - // CHECK-LABEL: func.func public @test_reduce_maximum - // CHECK: ttnn.max - // CHECK-SAME: dim_arg = [1 : i32], - // CHECK-SAME: keep_dim = false} + func.func public @test_reduce_maximum_4to0dim(%arg0: tensor<128x10x32x4xf32>, %cst_0: tensor) -> tensor { + // CHECK: "ttnn.max" + // CHECK-NOT: dim_arg + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10x32x4xf32, + // CHECK-SAME: -> tensor<1x1x1x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [1 : i32] + // CHECK-SAME: tensor<1x1x1x1xf32, + // CHECK-SAME: -> tensor<1xf32, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [0, 1, 2, 3] : (tensor<128x10x32x4xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_maximum_3to2dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor<128x4xf32> { + // CHECK: "ttnn.max" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10x4xf32, + // CHECK-SAME: -> tensor<128x1x4xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [128 : i32, 4 : i32] + // CHECK-SAME: tensor<128x1x4xf32, + // CHECK-SAME: -> tensor<128x4xf32, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [1] : (tensor<128x10x4xf32>, tensor) -> tensor<128x4xf32> + return %0 : tensor<128x4xf32> + } + + func.func public @test_reduce_maximum_3to1dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor<128xf32> { + // CHECK: "ttnn.max" + // CHECK-SAME: dim_arg = [1 : i32, 2 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10x4xf32, + // CHECK-SAME: -> tensor<128x1x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [128 : i32] + // CHECK-SAME: tensor<128x1x1xf32, + // CHECK-SAME: -> tensor<128xf32, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [1, 2] : (tensor<128x10x4xf32>, tensor) -> tensor<128xf32> + return %0 : tensor<128xf32> + } + + func.func public @test_reduce_maximum_3to0dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor { + // CHECK: "ttnn.max" + // CHECK-NOT: dim_arg + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10x4xf32, + // CHECK-SAME: -> tensor<1x1x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [1 : i32] + // CHECK-SAME: tensor<1x1x1xf32, + // CHECK-SAME: -> tensor<1xf32, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [0, 1, 2] : (tensor<128x10x4xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_maximum_2to1dim(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor<128xf32> { + // CHECK: "ttnn.max" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = true // CHECK-SAME: tensor<128x10xf32, - // CHECK-SAME: -> tensor<128xf32 + // CHECK-SAME: -> tensor<128x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [128 : i32] + // CHECK-SAME: tensor<128x1xf32, + // CHECK-SAME: -> tensor<128xf32, %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [1] : (tensor<128x10xf32>, tensor) -> tensor<128xf32> return %0 : tensor<128xf32> } + + func.func public @test_reduce_maximum_2to0dim(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor { + // CHECK: "ttnn.max" + // CHECK-NOT: dim_arg + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10xf32, + // CHECK-SAME: -> tensor<1x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [1 : i32] + // CHECK-SAME: tensor<1x1xf32, + // CHECK-SAME: -> tensor<1xf32, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [0, 1] : (tensor<128x10xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_maximum_1to0dim(%arg0: tensor<128xf32>, %cst_0: tensor) -> tensor { + // CHECK: "ttnn.max" + // CHECK-NOT: dim_arg + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128xf32, + // CHECK-SAME: -> tensor<1xf32, + // CHECK-NOT: "ttnn.reshape" + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [0] : (tensor<128xf32>, tensor) -> tensor + return %0 : tensor + } } diff --git a/test/ttmlir/Silicon/TTNN/simple_max.mlir b/test/ttmlir/Silicon/TTNN/simple_max.mlir new file mode 100644 index 0000000000..8ec3bdc591 --- /dev/null +++ b/test/ttmlir/Silicon/TTNN/simple_max.mlir @@ -0,0 +1,39 @@ +// RUN: rm -rf %t.ttnn +// RUN: rm -rf %t.mlir +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s > %t.mlir +// RUN: FileCheck %s --input-file=%t.mlir +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +// UNSUPPORTED: true +// These tests are currently failing until a fix for this issue is uplifted +// with new version of Metal: https://github.com/tenstorrent/tt-metal/issues/16104 +// TODO(mrakita): Enable and edit these tests after the Metal issue is fixed. +// Tracked by: https://github.com/tenstorrent/tt-mlir/issues/1640 + +module { + func.func public @reduce_not_keep_dim(%arg0: tensor<128x10xf32>) -> tensor<128xf32> { + %0 = tensor.empty() : tensor<128xf32> + // CHECK: "ttnn.max" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10xf32, + // CHECK-SAME: -> tensor<128x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [128 : i32] + // CHECK-SAME: tensor<128x1xf32, + // CHECK-SAME: -> tensor<128xf32, + %1 = "ttir.max"(%arg0, %0) <{dim_arg = [1 : i32], keep_dim = false}> : (tensor<128x10xf32>, tensor<128xf32>) -> tensor<128xf32> + return %1 : tensor<128xf32> + } + + func.func public @reduce_keep_dim(%arg0: tensor<128x10xf32>) -> tensor<128x1xf32> { + %0 = tensor.empty() : tensor<128x1xf32> + // CHECK: "ttnn.max" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10xf32, + // CHECK-SAME: -> tensor<128x1xf32, + // CHECK-NOT: "ttnn.reshape" + %1 = "ttir.max"(%arg0, %0) <{dim_arg = [1 : i32], keep_dim = true}> : (tensor<128x10xf32>, tensor<128x1xf32>) -> tensor<128x1xf32> + return %1 : tensor<128x1xf32> + } +} diff --git a/test/ttmlir/Silicon/TTNN/simple_mean.mlir b/test/ttmlir/Silicon/TTNN/simple_mean.mlir index 0a3250936b..476dcd9aba 100644 --- a/test/ttmlir/Silicon/TTNN/simple_mean.mlir +++ b/test/ttmlir/Silicon/TTNN/simple_mean.mlir @@ -1,12 +1,39 @@ +// RUN: rm -rf %t.ttnn +// RUN: rm -rf %t.mlir // RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s > %t.mlir // RUN: FileCheck %s --input-file=%t.mlir // RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn // UNSUPPORTED: true +// These tests are currently failing until a fix for this issue is uplifted +// with new version of Metal: https://github.com/tenstorrent/tt-metal/issues/16104 +// TODO(mrakita): Enable and edit these tests after the Metal issue is fixed. +// Tracked by: https://github.com/tenstorrent/tt-mlir/issues/1640 + module { - func.func @forward(%arg0: tensor<512x1024xbf16>) -> tensor<512x32xbf16> { - %0 = tensor.empty() : tensor<512x32xbf16> - // CHECK: %[[C:.*]] = "ttnn.mean"[[C:.*]] - %1 = "ttir.mean"(%arg0, %0) <{dim_arg = [-1: i32], keep_dim = true}> : (tensor<512x1024xbf16>, tensor<512x32xbf16>) -> tensor<512x32xbf16> - return %1 : tensor<512x32xbf16> + func.func public @reduce_not_keep_dim(%arg0: tensor<128x10xf32>) -> tensor<128xf32> { + %0 = tensor.empty() : tensor<128xf32> + // CHECK: "ttnn.mean" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10xf32, + // CHECK-SAME: -> tensor<128x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [128 : i32] + // CHECK-SAME: tensor<128x1xf32, + // CHECK-SAME: -> tensor<128xf32, + %1 = "ttir.mean"(%arg0, %0) <{dim_arg = [1 : i32], keep_dim = false}> : (tensor<128x10xf32>, tensor<128xf32>) -> tensor<128xf32> + return %1 : tensor<128xf32> + } + + func.func public @reduce_keep_dim(%arg0: tensor<128x10xf32>) -> tensor<128x1xf32> { + %0 = tensor.empty() : tensor<128x1xf32> + // CHECK: "ttnn.mean" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10xf32, + // CHECK-SAME: -> tensor<128x1xf32, + // CHECK-NOT: "ttnn.reshape" + %1 = "ttir.mean"(%arg0, %0) <{dim_arg = [1 : i32], keep_dim = true}> : (tensor<128x10xf32>, tensor<128x1xf32>) -> tensor<128x1xf32> + return %1 : tensor<128x1xf32> } } diff --git a/test/ttmlir/Silicon/TTNN/simple_sum.mlir b/test/ttmlir/Silicon/TTNN/simple_sum.mlir new file mode 100644 index 0000000000..cb1904a34e --- /dev/null +++ b/test/ttmlir/Silicon/TTNN/simple_sum.mlir @@ -0,0 +1,39 @@ +// RUN: rm -rf %t.ttnn +// RUN: rm -rf %t.mlir +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s > %t.mlir +// RUN: FileCheck %s --input-file=%t.mlir +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +// UNSUPPORTED: true +// These tests are currently failing until a fix for this issue is uplifted +// with new version of Metal: https://github.com/tenstorrent/tt-metal/issues/16104 +// TODO(mrakita): Enable and edit these tests after the Metal issue is fixed. +// Tracked by: https://github.com/tenstorrent/tt-mlir/issues/1640 + +module { + func.func public @reduce_not_keep_dim(%arg0: tensor<128x10xf32>) -> tensor<128xf32> { + %0 = tensor.empty() : tensor<128xf32> + // CHECK: "ttnn.sum" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10xf32, + // CHECK-SAME: -> tensor<128x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [128 : i32] + // CHECK-SAME: tensor<128x1xf32, + // CHECK-SAME: -> tensor<128xf32, + %1 = "ttir.sum"(%arg0, %0) <{dim_arg = [1 : i32], keep_dim = false}> : (tensor<128x10xf32>, tensor<128xf32>) -> tensor<128xf32> + return %1 : tensor<128xf32> + } + + func.func public @reduce_keep_dim(%arg0: tensor<128x10xf32>) -> tensor<128x1xf32> { + %0 = tensor.empty() : tensor<128x1xf32> + // CHECK: "ttnn.sum" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10xf32, + // CHECK-SAME: -> tensor<128x1xf32, + // CHECK-NOT: "ttnn.reshape" + %1 = "ttir.sum"(%arg0, %0) <{dim_arg = [1 : i32], keep_dim = true}> : (tensor<128x10xf32>, tensor<128x1xf32>) -> tensor<128x1xf32> + return %1 : tensor<128x1xf32> + } +}