@@ -435,6 +435,40 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %init: ten
435435
436436// -----
437437
438+ #map0 = affine_map <(d0 , d1 ) -> (d0 , d1 )>
439+ func.func @elem_pack_transpose_outer_dims_unused_init (%arg0: tensor <128 x256 xi32 >, %init: tensor <128 x256 xi32 >) -> tensor <16 x4 x32 x16 xi32 >{
440+ %elem = linalg.generic {index ing_maps = [#map0 , #map0 ], iterator_types = [" parallel" , " parallel" ]}
441+ ins (%arg0 : tensor <128 x256 xi32 >)
442+ outs (%init : tensor <128 x256 xi32 >) {
443+ ^bb0 (%arg3: i32 , %arg4: i32 ):
444+ %4 = arith.addi %arg3 , %arg3 : i32
445+ linalg.yield %4 : i32
446+ } -> tensor <128 x256 xi32 >
447+ %empty = tensor.empty () : tensor <16 x4 x32 x16 xi32 >
448+ %pack = linalg.pack %elem
449+ outer_dims_perm = [1 , 0 ]
450+ inner_dims_pos = [0 , 1 ]
451+ inner_tiles = [32 , 16 ]
452+ into %empty : tensor <128 x256 xi32 > -> tensor <16 x4 x32 x16 xi32 >
453+ return %pack : tensor <16 x4 x32 x16 xi32 >
454+ }
455+
456+ // CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
457+ // CHECK-LABEL: func.func @elem_pack_transpose_outer_dims
458+ // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
459+ // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]
460+ // CHECK: %[[ARG1_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32>
461+ // CHECK: %[[ARG0_EMPTY:.+]] = tensor.empty() : tensor<16x4x32x16xi32>
462+ // CHECK: %[[PACKED_ARG0:.+]] = linalg.pack %[[ARG0]]
463+ // CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16]
464+ // CHECK-SAME: into %[[ARG0_EMPTY]]
465+ // CHECK: %[[RES:.+]] = linalg.generic
466+ // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]]]
467+ // CHECK-SAME: ins(%[[PACKED_ARG0]]
468+ // CHECK-SAME: outs(%[[ARG1_EMPTY]]
469+
470+ // -----
471+
438472#map = affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 , d3 )>
439473
440474func.func @unpack_on_output (%arg0: tensor <12 x2 x56 x56 x32 xf32 >) -> tensor <12 x56 x56 x64 xf32 > {
@@ -497,7 +531,7 @@ func.func @unpack_on_input(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56
497531
498532#map = affine_map <(d0 , d1 , d2 , d3 ) -> (d0 , d1 , d2 , d3 )>
499533
500- func.func @unpack_element_type_change (%arg0: tensor <12 x2 x56 x56 x32 xf32 >, %init: tensor <12 x56 x56 x64 xf16 >) -> tensor <12 x56 x56 x64 xf16 > {
534+ func.func @unpack_element_type_change_no_use (%arg0: tensor <12 x2 x56 x56 x32 xf32 >, %init: tensor <12 x56 x56 x64 xf16 >) -> tensor <12 x56 x56 x64 xf16 > {
501535 %0 = tensor.empty () : tensor <12 x56 x56 x64 xf32 >
502536 %1 = linalg.unpack %arg0 outer_dims_perm = [0 , 3 , 1 , 2 ] inner_dims_pos = [3 ] inner_tiles = [32 ] into %0 : tensor <12 x2 x56 x56 x32 xf32 > -> tensor <12 x56 x56 x64 xf32 >
503537 %2 = linalg.generic {index ing_maps = [#map , #map ], iterator_types = [" parallel" , " parallel" , " parallel" , " parallel" ]} ins (%1: tensor <12 x56 x56 x64 xf32 >) outs (%init : tensor <12 x56 x56 x64 xf16 >) {
@@ -509,17 +543,14 @@ func.func @unpack_element_type_change(%arg0: tensor<12x2x56x56x32xf32>, %init: t
509543}
510544
511545// CHECK: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
512- // CHECK-LABEL: func.func @unpack_element_type_change
546+ // CHECK-LABEL: func.func @unpack_element_type_change_no_use
513547// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
514548// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]
515- // CHECK: %[[ARG1_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf16>
516- // CHECK: %[[ARG1_PACK:.+]] = linalg.pack %[[ARG1]]
517- // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
518- // CHECK-SAME: into %[[ARG1_PACK_EMPTY]]
549+ // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf16>
519550// CHECK: %[[RES:.+]] = linalg.generic
520551// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]]]
521552// CHECK-SAME: ins(%[[ARG0]]
522- // CHECK-SAME: outs(%[[ARG1_PACK ]]
553+ // CHECK-SAME: outs(%[[EMPTY ]]
523554// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[RES]]
524555// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32]
525556// CHECK-SAME: into %[[ARG1]]
@@ -1402,13 +1433,10 @@ func.func @push_unpack_in_padded_domain_foldable(%arg0: tensor<8x8x4x8xf32>, %de
14021433// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
14031434// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]
14041435// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]
1405- // CHECK: %[[ARG2_PACK_EMPTY:.+]] = tensor.empty
1406- // CHECK: %[[ARG2_PACK:.+]] = linalg.pack %[[ARG2]]
1407- // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [4, 8]
1408- // CHECK-SAME: into %[[ARG2_PACK_EMPTY]]
1436+ // CHECK: %[[EMPTY:.+]] = tensor.empty
14091437// CHECK: %[[GENERIC:.+]] = linalg.generic
14101438// CHECK-SAME: ins(%[[ARG0]] : tensor<8x8x4x8xf32>)
1411- // CHECK-SAME: outs(%[[ARG2_PACK ]] : tensor<?x8x4x8xbf16>)
1439+ // CHECK-SAME: outs(%[[EMPTY ]] : tensor<?x8x4x8xbf16>)
14121440// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[GENERIC]]
14131441// CHECK-SAME: into %[[ARG2]]
14141442// CHECK: return %[[UNPACK]] : tensor<?x64xbf16>
0 commit comments