Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mark all TTIR and TTNN ops as pure #1481

Merged
merged 1 commit into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion include/ttmlir/Dialect/TTIR/IR/TTIRBase.td
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#define TTMLIR_TTMLIR_DIALECT_TTIR_TTIRDIALECT_TD

include "mlir/IR/OpBase.td"
include "mlir/Interfaces/SideEffectInterfaces.td"

//===----------------------------------------------------------------------===//
// TTIR dialect definition.
Expand Down Expand Up @@ -38,6 +39,6 @@ def TTIR_Dialect : Dialect {
//===----------------------------------------------------------------------===//

class TTIR_Op<string mnemonic, list<Trait> traits = []> :
Op<TTIR_Dialect, mnemonic, traits>;
Op<TTIR_Dialect, mnemonic, !listconcat(traits, [Pure])>;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should Pure trait be the top-level trait? I'm thinking about alloc and dealloc. Every dealloc will trivially be removed since it doesn't produce the result.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, we probably should adopt MemAlloc and MemFree traits for those respectively.


#endif
3 changes: 2 additions & 1 deletion include/ttmlir/Dialect/TTNN/IR/TTNNBase.td
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#define TTMLIR_TTMLIR_DIALECT_TTNN_TTNNDIALECT_TD

include "mlir/IR/OpBase.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "ttmlir/Dialect/TTNN/IR/TTNNOpModelInterface.td"
include "ttmlir/Dialect/TTNN/IR/TTNNWorkaroundInterface.td"

Expand Down Expand Up @@ -45,6 +46,6 @@ def TTNN_Dialect : Dialect {
//===----------------------------------------------------------------------===//

class TTNN_Op<string mnemonic, list<Trait> traits = []> :
Op<TTNN_Dialect, mnemonic, !listconcat(traits, [TTNN_OpModelInterface, TTNN_WorkaroundInterface])>;
Op<TTNN_Dialect, mnemonic, !listconcat(traits, [Pure, TTNN_OpModelInterface, TTNN_WorkaroundInterface])>;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above.


#endif
4 changes: 1 addition & 3 deletions include/ttmlir/Dialect/TTNN/IR/TTNNOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -794,9 +794,7 @@ def TTNN_ClampOp : TTNN_Op<"clamp"> {
let hasVerifier = 1;
}

// Note: NoMemoryEffect is used to indicate that operation can be removed if it is not used.
// Removal of this operation is done by the dead code elimination pass (RemoveDeadValuesPass).
def TTNN_EmptyOp : TTNN_Op<"empty", [NoMemoryEffect]> {
def TTNN_EmptyOp : TTNN_Op<"empty"> {
let summary = "Empty op.";
let description = [{
Tensor empty operation
Expand Down
22 changes: 22 additions & 0 deletions test/ttmlir/Dialect/TTIR/test_remove_dead_values_pass.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// RUN: ttmlir-opt --remove-dead-values %s | FileCheck %s
#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
module attributes {} {
func.func @forward(%arg0: tensor<64x128xf32>, %arg1: tensor<64x128xf32>) -> tensor<64x128xf32> {
%0 = tensor.empty() : tensor<64x128xf32>
// CHECK: %[[C:.*]] = "ttir.multiply"[[C:.*]]
%1 = "ttir.multiply"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x128xf32>, tensor<64x128xf32>, tensor<64x128xf32>) -> tensor<64x128xf32>
%2 = tensor.empty() : tensor<64x128xf32>
// CHECK-NOT: %[[C:.*]] = "ttir.add"[[C:.*]]
%3 = "ttir.add"(%arg0, %arg1, %2) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x128xf32>, tensor<64x128xf32>, tensor<64x128xf32>) -> tensor<64x128xf32>
%4 = tensor.empty() : tensor<64x128xf32>
// CHECK-NOT: %[[C:.*]] = "ttir.subtract"[[C:.*]]
%5 = "ttir.subtract"(%arg0, %arg1, %4) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x128xf32>, tensor<64x128xf32>, tensor<64x128xf32>) -> tensor<64x128xf32>
%6 = tensor.empty() : tensor<64x128xf32>
// CHECK-NOT: %[[C:.*]] = "ttir.div"[[C:.*]]
%7 = "ttir.div"(%arg0, %arg1, %6) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x128xf32>, tensor<64x128xf32>, tensor<64x128xf32>) -> tensor<64x128xf32>
%8 = tensor.empty() : tensor<64x128xf32>
// CHECK-NOT: %[[C:.*]] = "ttir.eq"[[C:.*]]
%9 = "ttir.eq"(%arg0, %arg1, %8) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x128xf32>, tensor<64x128xf32>, tensor<64x128xf32>) -> tensor<64x128xf32>
return %1 : tensor<64x128xf32>
}
}
77 changes: 77 additions & 0 deletions test/ttmlir/Dialect/TTNN/test_remove_dead_values_pass.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// RUN: ttmlir-opt --remove-dead-values %s | FileCheck %s
#device = #tt.device<workerGrid = #tt.grid<8x8, (d0, d1) -> (0, d0, d1)>, l1Map = (d0, d1)[s0, s1] -> (0, d0 floordiv s0, d1 floordiv s1, (d0 mod s0) * s1 + d1 mod s1), dramMap = (d0, d1)[s0, s1] -> (0, 0, ((((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 8192) mod 12, (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 98304 + (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) mod 8192), meshShape = , chipIds = [0]>
#dram = #ttnn.buffer_type<dram>
#system_desc = #tt.system_desc<[{role = host, target_triple = "x86_64-pc-linux-gnu"}], [{arch = <wormhole_b0>, grid = 8x8, l1_size = 1499136, num_dram_channels = 12, dram_channel_size = 1073741824, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32, l1_unreserved_base = 1024, erisc_l1_unreserved_base = 1024, dram_unreserved_base = 1024, dram_unreserved_end = 1073741824, physical_cores = {worker = [ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 1x0, 1x1, 1x2, 1x3, 1x4, 1x5, 1x6, 1x7, 2x0, 2x1, 2x2, 2x3, 2x4, 2x5, 2x6, 2x7, 3x0, 3x1, 3x2, 3x3, 3x4, 3x5, 3x6, 3x7, 4x0, 4x1, 4x2, 4x3, 4x4, 4x5, 4x6, 4x7, 5x0, 5x1, 5x2, 5x3, 5x4, 5x5, 5x6, 5x7, 6x0, 6x1, 6x2, 6x3, 6x4, 6x5, 6x6, 6x7, 7x0, 7x1, 7x2, 7x3, 7x4, 7x5, 7x6, 7x7] dram = [ 8x0, 9x0, 10x0, 8x1, 9x1, 10x1, 8x2, 9x2, 10x2, 8x3, 9x3, 10x3]}, supported_data_types = [<f32>, <f16>, <bf16>, <bfp_f8>, <bfp_bf8>, <bfp_f4>, <bfp_bf4>, <bfp_f2>, <bfp_bf2>, <u32>, <u16>, <u8>], supported_tile_sizes = [ 4x16, 16x16, 32x16, 4x32, 16x32, 32x32], num_cbs = 32}], [0], [3 : i32], [ 0x0x0x0]>
#system_memory = #ttnn.buffer_type<system_memory>
#ttnn_layout = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<64x128xf32, #system_memory>>
#ttnn_layout1 = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<2x4x!tt.tile<32x32, f32>, #dram>, interleaved>
#ttnn_layout2 = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<64x128xf32, #dram>, interleaved>
module attributes {tt.device = #device, tt.system_desc = #system_desc} {
func.func @forward(%arg0: tensor<64x128xf32, #ttnn_layout>, %arg1: tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout> {
%0 = "ttnn.get_device"() <{mesh_shape = #ttnn<mesh_shape 1x1>}> : () -> !tt.device<#device>
%1 = "ttnn.to_layout"(%arg0) <{layout = #ttnn.layout<tile>}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout1>
%2 = "ttnn.to_device"(%1, %0) <{memory_config = #ttnn.memory_config<<interleaved>, #dram, <<2x4>>>}> : (tensor<64x128xf32, #ttnn_layout1>, !tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout1>
"ttnn.deallocate"(%1) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> ()
%3 = "ttnn.to_layout"(%arg1) <{layout = #ttnn.layout<tile>}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout1>
%4 = "ttnn.to_device"(%3, %0) <{memory_config = #ttnn.memory_config<<interleaved>, #dram, <<2x4>>>}> : (tensor<64x128xf32, #ttnn_layout1>, !tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout1>
"ttnn.deallocate"(%3) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> ()
%5 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, #dram, <<64x128>>>, shape = #ttnn.shape<64x128>}> : (!tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout2>
// CHECK: %[[C:.*]] = "ttnn.multiply"[[C:.*]]
%6 = "ttnn.multiply"(%2, %4, %5) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<64x128xf32, #ttnn_layout1>, tensor<64x128xf32, #ttnn_layout1>, tensor<64x128xf32, #ttnn_layout2>) -> tensor<64x128xf32, #ttnn_layout2>
"ttnn.deallocate"(%4) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> ()
"ttnn.deallocate"(%2) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> ()
%7 = "ttnn.to_layout"(%arg0) <{layout = #ttnn.layout<tile>}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout1>
%8 = "ttnn.to_device"(%7, %0) <{memory_config = #ttnn.memory_config<<interleaved>, #dram, <<2x4>>>}> : (tensor<64x128xf32, #ttnn_layout1>, !tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout1>
"ttnn.deallocate"(%7) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> ()
%9 = "ttnn.to_layout"(%arg1) <{layout = #ttnn.layout<tile>}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout1>
%10 = "ttnn.to_device"(%9, %0) <{memory_config = #ttnn.memory_config<<interleaved>, #dram, <<2x4>>>}> : (tensor<64x128xf32, #ttnn_layout1>, !tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout1>
"ttnn.deallocate"(%9) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> ()
%11 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, #dram, <<64x128>>>, shape = #ttnn.shape<64x128>}> : (!tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout2>
// CHECK-NOT: %[[C:.*]] = "ttnn.add"[[C:.*]]
%12 = "ttnn.add"(%8, %10, %11) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<64x128xf32, #ttnn_layout1>, tensor<64x128xf32, #ttnn_layout1>, tensor<64x128xf32, #ttnn_layout2>) -> tensor<64x128xf32, #ttnn_layout2>
"ttnn.deallocate"(%11) <{force = false}> : (tensor<64x128xf32, #ttnn_layout2>) -> ()
"ttnn.deallocate"(%10) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> ()
"ttnn.deallocate"(%8) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> ()
%13 = "ttnn.to_layout"(%arg0) <{layout = #ttnn.layout<tile>}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout1>
%14 = "ttnn.to_device"(%13, %0) <{memory_config = #ttnn.memory_config<<interleaved>, #dram, <<2x4>>>}> : (tensor<64x128xf32, #ttnn_layout1>, !tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout1>
"ttnn.deallocate"(%13) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> ()
%15 = "ttnn.to_layout"(%arg1) <{layout = #ttnn.layout<tile>}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout1>
%16 = "ttnn.to_device"(%15, %0) <{memory_config = #ttnn.memory_config<<interleaved>, #dram, <<2x4>>>}> : (tensor<64x128xf32, #ttnn_layout1>, !tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout1>
"ttnn.deallocate"(%15) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> ()
%17 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, #dram, <<64x128>>>, shape = #ttnn.shape<64x128>}> : (!tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout2>
// CHECK-NOT: %[[C:.*]] = "ttnn.subtract"[[C:.*]]
%18 = "ttnn.subtract"(%14, %16, %17) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<64x128xf32, #ttnn_layout1>, tensor<64x128xf32, #ttnn_layout1>, tensor<64x128xf32, #ttnn_layout2>) -> tensor<64x128xf32, #ttnn_layout2>
"ttnn.deallocate"(%17) <{force = false}> : (tensor<64x128xf32, #ttnn_layout2>) -> ()
"ttnn.deallocate"(%16) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> ()
"ttnn.deallocate"(%14) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> ()
%19 = "ttnn.to_layout"(%arg0) <{layout = #ttnn.layout<tile>}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout1>
%20 = "ttnn.to_device"(%19, %0) <{memory_config = #ttnn.memory_config<<interleaved>, #dram, <<2x4>>>}> : (tensor<64x128xf32, #ttnn_layout1>, !tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout1>
"ttnn.deallocate"(%19) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> ()
%21 = "ttnn.to_layout"(%arg1) <{layout = #ttnn.layout<tile>}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout1>
%22 = "ttnn.to_device"(%21, %0) <{memory_config = #ttnn.memory_config<<interleaved>, #dram, <<2x4>>>}> : (tensor<64x128xf32, #ttnn_layout1>, !tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout1>
"ttnn.deallocate"(%21) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> ()
%23 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, #dram, <<64x128>>>, shape = #ttnn.shape<64x128>}> : (!tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout2>
// CHECK-NOT: %[[C:.*]] = "ttnn.div"[[C:.*]]
%24 = "ttnn.div"(%20, %22, %23) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<64x128xf32, #ttnn_layout1>, tensor<64x128xf32, #ttnn_layout1>, tensor<64x128xf32, #ttnn_layout2>) -> tensor<64x128xf32, #ttnn_layout2>
"ttnn.deallocate"(%23) <{force = false}> : (tensor<64x128xf32, #ttnn_layout2>) -> ()
"ttnn.deallocate"(%22) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> ()
"ttnn.deallocate"(%20) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> ()
%25 = "ttnn.to_layout"(%arg0) <{layout = #ttnn.layout<tile>}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout1>
%26 = "ttnn.to_device"(%25, %0) <{memory_config = #ttnn.memory_config<<interleaved>, #dram, <<2x4>>>}> : (tensor<64x128xf32, #ttnn_layout1>, !tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout1>
"ttnn.deallocate"(%25) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> ()
%27 = "ttnn.to_layout"(%arg1) <{layout = #ttnn.layout<tile>}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout1>
%28 = "ttnn.to_device"(%27, %0) <{memory_config = #ttnn.memory_config<<interleaved>, #dram, <<2x4>>>}> : (tensor<64x128xf32, #ttnn_layout1>, !tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout1>
"ttnn.deallocate"(%27) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> ()
%29 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes<f32>, layout = #ttnn.layout<row_major>, memory_config = #ttnn.memory_config<<interleaved>, #dram, <<64x128>>>, shape = #ttnn.shape<64x128>}> : (!tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout2>
// CHECK-NOT: %[[C:.*]] = "ttnn.eq"[[C:.*]]
%30 = "ttnn.eq"(%26, %28, %29) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<64x128xf32, #ttnn_layout1>, tensor<64x128xf32, #ttnn_layout1>, tensor<64x128xf32, #ttnn_layout2>) -> tensor<64x128xf32, #ttnn_layout2>
"ttnn.deallocate"(%29) <{force = false}> : (tensor<64x128xf32, #ttnn_layout2>) -> ()
"ttnn.deallocate"(%28) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> ()
"ttnn.deallocate"(%26) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> ()
%31 = "ttnn.from_device"(%6) : (tensor<64x128xf32, #ttnn_layout2>) -> tensor<64x128xf32, #ttnn_layout>
"ttnn.deallocate"(%5) <{force = false}> : (tensor<64x128xf32, #ttnn_layout2>) -> ()
%32 = "ttnn.to_layout"(%31) <{layout = #ttnn.layout<row_major>}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout>
"ttnn.deallocate"(%31) <{force = false}> : (tensor<64x128xf32, #ttnn_layout>) -> ()
return %32 : tensor<64x128xf32, #ttnn_layout>
}
}
Loading