diff --git a/include/ttmlir/Dialect/TTIR/IR/TTIRBase.td b/include/ttmlir/Dialect/TTIR/IR/TTIRBase.td index 57f3dc37d3..b71541b8dc 100644 --- a/include/ttmlir/Dialect/TTIR/IR/TTIRBase.td +++ b/include/ttmlir/Dialect/TTIR/IR/TTIRBase.td @@ -6,6 +6,7 @@ #define TTMLIR_TTMLIR_DIALECT_TTIR_TTIRDIALECT_TD include "mlir/IR/OpBase.td" +include "mlir/Interfaces/SideEffectInterfaces.td" //===----------------------------------------------------------------------===// // TTIR dialect definition. @@ -38,6 +39,6 @@ def TTIR_Dialect : Dialect { //===----------------------------------------------------------------------===// class TTIR_Op traits = []> : - Op; + Op; #endif diff --git a/include/ttmlir/Dialect/TTNN/IR/TTNNBase.td b/include/ttmlir/Dialect/TTNN/IR/TTNNBase.td index 34d3daf9cc..f6f764d01a 100644 --- a/include/ttmlir/Dialect/TTNN/IR/TTNNBase.td +++ b/include/ttmlir/Dialect/TTNN/IR/TTNNBase.td @@ -6,6 +6,7 @@ #define TTMLIR_TTMLIR_DIALECT_TTNN_TTNNDIALECT_TD include "mlir/IR/OpBase.td" +include "mlir/Interfaces/SideEffectInterfaces.td" include "ttmlir/Dialect/TTNN/IR/TTNNOpModelInterface.td" include "ttmlir/Dialect/TTNN/IR/TTNNWorkaroundInterface.td" @@ -45,6 +46,6 @@ def TTNN_Dialect : Dialect { //===----------------------------------------------------------------------===// class TTNN_Op traits = []> : - Op; + Op; #endif diff --git a/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td b/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td index d918691611..41eb9dff75 100644 --- a/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td +++ b/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td @@ -794,9 +794,7 @@ def TTNN_ClampOp : TTNN_Op<"clamp"> { let hasVerifier = 1; } -// Note: NoMemoryEffect is used to indicate that operation can be removed if it is not used. -// Removal of this operation is done by the dead code elimination pass (RemoveDeadValuesPass). -def TTNN_EmptyOp : TTNN_Op<"empty", [NoMemoryEffect]> { +def TTNN_EmptyOp : TTNN_Op<"empty"> { let summary = "Empty op."; let description = [{ Tensor empty operation diff --git a/test/ttmlir/Dialect/TTIR/test_remove_dead_values_pass.mlir b/test/ttmlir/Dialect/TTIR/test_remove_dead_values_pass.mlir new file mode 100644 index 0000000000..8b6df4d0f2 --- /dev/null +++ b/test/ttmlir/Dialect/TTIR/test_remove_dead_values_pass.mlir @@ -0,0 +1,22 @@ +// RUN: ttmlir-opt --remove-dead-values %s | FileCheck %s +#any_device = #tt.operand_constraint +module attributes {} { + func.func @forward(%arg0: tensor<64x128xf32>, %arg1: tensor<64x128xf32>) -> tensor<64x128xf32> { + %0 = tensor.empty() : tensor<64x128xf32> + // CHECK: %[[C:.*]] = "ttir.multiply"[[C:.*]] + %1 = "ttir.multiply"(%arg0, %arg1, %0) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x128xf32>, tensor<64x128xf32>, tensor<64x128xf32>) -> tensor<64x128xf32> + %2 = tensor.empty() : tensor<64x128xf32> + // CHECK-NOT: %[[C:.*]] = "ttir.add"[[C:.*]] + %3 = "ttir.add"(%arg0, %arg1, %2) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x128xf32>, tensor<64x128xf32>, tensor<64x128xf32>) -> tensor<64x128xf32> + %4 = tensor.empty() : tensor<64x128xf32> + // CHECK-NOT: %[[C:.*]] = "ttir.subtract"[[C:.*]] + %5 = "ttir.subtract"(%arg0, %arg1, %4) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x128xf32>, tensor<64x128xf32>, tensor<64x128xf32>) -> tensor<64x128xf32> + %6 = tensor.empty() : tensor<64x128xf32> + // CHECK-NOT: %[[C:.*]] = "ttir.div"[[C:.*]] + %7 = "ttir.div"(%arg0, %arg1, %6) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x128xf32>, tensor<64x128xf32>, tensor<64x128xf32>) -> tensor<64x128xf32> + %8 = tensor.empty() : tensor<64x128xf32> + // CHECK-NOT: %[[C:.*]] = "ttir.eq"[[C:.*]] + %9 = "ttir.eq"(%arg0, %arg1, %8) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x128xf32>, tensor<64x128xf32>, tensor<64x128xf32>) -> tensor<64x128xf32> + return %1 : tensor<64x128xf32> + } +} diff --git a/test/ttmlir/Dialect/TTNN/test_remove_dead_values_pass.mlir b/test/ttmlir/Dialect/TTNN/test_remove_dead_values_pass.mlir new file mode 100644 index 0000000000..f3231730f5 --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/test_remove_dead_values_pass.mlir @@ -0,0 +1,77 @@ +// RUN: ttmlir-opt --remove-dead-values %s | FileCheck %s +#device = #tt.device (0, d0, d1)>, l1Map = (d0, d1)[s0, s1] -> (0, d0 floordiv s0, d1 floordiv s1, (d0 mod s0) * s1 + d1 mod s1), dramMap = (d0, d1)[s0, s1] -> (0, 0, ((((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 8192) mod 12, (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 98304 + (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) mod 8192), meshShape = , chipIds = [0]> +#dram = #ttnn.buffer_type +#system_desc = #tt.system_desc<[{role = host, target_triple = "x86_64-pc-linux-gnu"}], [{arch = , grid = 8x8, l1_size = 1499136, num_dram_channels = 12, dram_channel_size = 1073741824, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32, l1_unreserved_base = 1024, erisc_l1_unreserved_base = 1024, dram_unreserved_base = 1024, dram_unreserved_end = 1073741824, physical_cores = {worker = [ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 1x0, 1x1, 1x2, 1x3, 1x4, 1x5, 1x6, 1x7, 2x0, 2x1, 2x2, 2x3, 2x4, 2x5, 2x6, 2x7, 3x0, 3x1, 3x2, 3x3, 3x4, 3x5, 3x6, 3x7, 4x0, 4x1, 4x2, 4x3, 4x4, 4x5, 4x6, 4x7, 5x0, 5x1, 5x2, 5x3, 5x4, 5x5, 5x6, 5x7, 6x0, 6x1, 6x2, 6x3, 6x4, 6x5, 6x6, 6x7, 7x0, 7x1, 7x2, 7x3, 7x4, 7x5, 7x6, 7x7] dram = [ 8x0, 9x0, 10x0, 8x1, 9x1, 10x1, 8x2, 9x2, 10x2, 8x3, 9x3, 10x3]}, supported_data_types = [, , , , , , , , , , , ], supported_tile_sizes = [ 4x16, 16x16, 32x16, 4x32, 16x32, 32x32], num_cbs = 32}], [0], [3 : i32], [ 0x0x0x0]> +#system_memory = #ttnn.buffer_type +#ttnn_layout = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<64x128xf32, #system_memory>> +#ttnn_layout1 = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<2x4x!tt.tile<32x32, f32>, #dram>, interleaved> +#ttnn_layout2 = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <1x1>, memref<64x128xf32, #dram>, interleaved> +module attributes {tt.device = #device, tt.system_desc = #system_desc} { + func.func @forward(%arg0: tensor<64x128xf32, #ttnn_layout>, %arg1: tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout> { + %0 = "ttnn.get_device"() <{mesh_shape = #ttnn}> : () -> !tt.device<#device> + %1 = "ttnn.to_layout"(%arg0) <{layout = #ttnn.layout}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout1> + %2 = "ttnn.to_device"(%1, %0) <{memory_config = #ttnn.memory_config<, #dram, <<2x4>>>}> : (tensor<64x128xf32, #ttnn_layout1>, !tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout1> + "ttnn.deallocate"(%1) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> () + %3 = "ttnn.to_layout"(%arg1) <{layout = #ttnn.layout}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout1> + %4 = "ttnn.to_device"(%3, %0) <{memory_config = #ttnn.memory_config<, #dram, <<2x4>>>}> : (tensor<64x128xf32, #ttnn_layout1>, !tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout1> + "ttnn.deallocate"(%3) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> () + %5 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, #dram, <<64x128>>>, shape = #ttnn.shape<64x128>}> : (!tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout2> + // CHECK: %[[C:.*]] = "ttnn.multiply"[[C:.*]] + %6 = "ttnn.multiply"(%2, %4, %5) <{operandSegmentSizes = array}> : (tensor<64x128xf32, #ttnn_layout1>, tensor<64x128xf32, #ttnn_layout1>, tensor<64x128xf32, #ttnn_layout2>) -> tensor<64x128xf32, #ttnn_layout2> + "ttnn.deallocate"(%4) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> () + "ttnn.deallocate"(%2) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> () + %7 = "ttnn.to_layout"(%arg0) <{layout = #ttnn.layout}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout1> + %8 = "ttnn.to_device"(%7, %0) <{memory_config = #ttnn.memory_config<, #dram, <<2x4>>>}> : (tensor<64x128xf32, #ttnn_layout1>, !tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout1> + "ttnn.deallocate"(%7) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> () + %9 = "ttnn.to_layout"(%arg1) <{layout = #ttnn.layout}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout1> + %10 = "ttnn.to_device"(%9, %0) <{memory_config = #ttnn.memory_config<, #dram, <<2x4>>>}> : (tensor<64x128xf32, #ttnn_layout1>, !tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout1> + "ttnn.deallocate"(%9) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> () + %11 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, #dram, <<64x128>>>, shape = #ttnn.shape<64x128>}> : (!tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout2> + // CHECK-NOT: %[[C:.*]] = "ttnn.add"[[C:.*]] + %12 = "ttnn.add"(%8, %10, %11) <{operandSegmentSizes = array}> : (tensor<64x128xf32, #ttnn_layout1>, tensor<64x128xf32, #ttnn_layout1>, tensor<64x128xf32, #ttnn_layout2>) -> tensor<64x128xf32, #ttnn_layout2> + "ttnn.deallocate"(%11) <{force = false}> : (tensor<64x128xf32, #ttnn_layout2>) -> () + "ttnn.deallocate"(%10) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> () + "ttnn.deallocate"(%8) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> () + %13 = "ttnn.to_layout"(%arg0) <{layout = #ttnn.layout}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout1> + %14 = "ttnn.to_device"(%13, %0) <{memory_config = #ttnn.memory_config<, #dram, <<2x4>>>}> : (tensor<64x128xf32, #ttnn_layout1>, !tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout1> + "ttnn.deallocate"(%13) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> () + %15 = "ttnn.to_layout"(%arg1) <{layout = #ttnn.layout}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout1> + %16 = "ttnn.to_device"(%15, %0) <{memory_config = #ttnn.memory_config<, #dram, <<2x4>>>}> : (tensor<64x128xf32, #ttnn_layout1>, !tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout1> + "ttnn.deallocate"(%15) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> () + %17 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, #dram, <<64x128>>>, shape = #ttnn.shape<64x128>}> : (!tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout2> + // CHECK-NOT: %[[C:.*]] = "ttnn.subtract"[[C:.*]] + %18 = "ttnn.subtract"(%14, %16, %17) <{operandSegmentSizes = array}> : (tensor<64x128xf32, #ttnn_layout1>, tensor<64x128xf32, #ttnn_layout1>, tensor<64x128xf32, #ttnn_layout2>) -> tensor<64x128xf32, #ttnn_layout2> + "ttnn.deallocate"(%17) <{force = false}> : (tensor<64x128xf32, #ttnn_layout2>) -> () + "ttnn.deallocate"(%16) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> () + "ttnn.deallocate"(%14) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> () + %19 = "ttnn.to_layout"(%arg0) <{layout = #ttnn.layout}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout1> + %20 = "ttnn.to_device"(%19, %0) <{memory_config = #ttnn.memory_config<, #dram, <<2x4>>>}> : (tensor<64x128xf32, #ttnn_layout1>, !tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout1> + "ttnn.deallocate"(%19) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> () + %21 = "ttnn.to_layout"(%arg1) <{layout = #ttnn.layout}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout1> + %22 = "ttnn.to_device"(%21, %0) <{memory_config = #ttnn.memory_config<, #dram, <<2x4>>>}> : (tensor<64x128xf32, #ttnn_layout1>, !tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout1> + "ttnn.deallocate"(%21) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> () + %23 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, #dram, <<64x128>>>, shape = #ttnn.shape<64x128>}> : (!tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout2> + // CHECK-NOT: %[[C:.*]] = "ttnn.div"[[C:.*]] + %24 = "ttnn.div"(%20, %22, %23) <{operandSegmentSizes = array}> : (tensor<64x128xf32, #ttnn_layout1>, tensor<64x128xf32, #ttnn_layout1>, tensor<64x128xf32, #ttnn_layout2>) -> tensor<64x128xf32, #ttnn_layout2> + "ttnn.deallocate"(%23) <{force = false}> : (tensor<64x128xf32, #ttnn_layout2>) -> () + "ttnn.deallocate"(%22) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> () + "ttnn.deallocate"(%20) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> () + %25 = "ttnn.to_layout"(%arg0) <{layout = #ttnn.layout}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout1> + %26 = "ttnn.to_device"(%25, %0) <{memory_config = #ttnn.memory_config<, #dram, <<2x4>>>}> : (tensor<64x128xf32, #ttnn_layout1>, !tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout1> + "ttnn.deallocate"(%25) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> () + %27 = "ttnn.to_layout"(%arg1) <{layout = #ttnn.layout}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout1> + %28 = "ttnn.to_device"(%27, %0) <{memory_config = #ttnn.memory_config<, #dram, <<2x4>>>}> : (tensor<64x128xf32, #ttnn_layout1>, !tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout1> + "ttnn.deallocate"(%27) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> () + %29 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, #dram, <<64x128>>>, shape = #ttnn.shape<64x128>}> : (!tt.device<#device>) -> tensor<64x128xf32, #ttnn_layout2> + // CHECK-NOT: %[[C:.*]] = "ttnn.eq"[[C:.*]] + %30 = "ttnn.eq"(%26, %28, %29) <{operandSegmentSizes = array}> : (tensor<64x128xf32, #ttnn_layout1>, tensor<64x128xf32, #ttnn_layout1>, tensor<64x128xf32, #ttnn_layout2>) -> tensor<64x128xf32, #ttnn_layout2> + "ttnn.deallocate"(%29) <{force = false}> : (tensor<64x128xf32, #ttnn_layout2>) -> () + "ttnn.deallocate"(%28) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> () + "ttnn.deallocate"(%26) <{force = false}> : (tensor<64x128xf32, #ttnn_layout1>) -> () + %31 = "ttnn.from_device"(%6) : (tensor<64x128xf32, #ttnn_layout2>) -> tensor<64x128xf32, #ttnn_layout> + "ttnn.deallocate"(%5) <{force = false}> : (tensor<64x128xf32, #ttnn_layout2>) -> () + %32 = "ttnn.to_layout"(%31) <{layout = #ttnn.layout}> : (tensor<64x128xf32, #ttnn_layout>) -> tensor<64x128xf32, #ttnn_layout> + "ttnn.deallocate"(%31) <{force = false}> : (tensor<64x128xf32, #ttnn_layout>) -> () + return %32 : tensor<64x128xf32, #ttnn_layout> + } +}