@@ -1301,25 +1301,27 @@ func.func @test_vectorize_unpack_no_vector_sizes_permute(%source: tensor<4x7x4xf
13011301// different - vector sizes are inferred (rather than user-specified) and hence
13021302// masking was used.
13031303
1304- func.func @test_vectorize_pack (%arg0: tensor <32 x8 x16 xf32 >, %arg1: tensor <4 x1 x32 x16 x2 xf32 >) -> tensor <4 x1 x32 x16 x2 xf32 > {
1305- %pack = linalg.pack %arg0 outer_dims_perm = [1 , 2 , 0 ] inner_dims_pos = [2 , 1 ] inner_tiles = [16 , 2 ] into %arg1 : tensor <32 x8 x16 xf32 > -> tensor <4 x1 x32 x16 x2 xf32 >
1304+ // CHECK-LABEL: func @test_vectorize_pack
1305+ // CHECK-SAME: %[[SRC:.*]]: tensor<32x8x16xf32>,
1306+ // CHECK-SAME: %[[DEST:.*]]: tensor<4x1x32x16x2xf32>
1307+ func.func @test_vectorize_pack (%src: tensor <32 x8 x16 xf32 >, %dest: tensor <4 x1 x32 x16 x2 xf32 >) -> tensor <4 x1 x32 x16 x2 xf32 > {
1308+ %pack = linalg.pack %src outer_dims_perm = [1 , 2 , 0 ] inner_dims_pos = [2 , 1 ] inner_tiles = [16 , 2 ] into %dest : tensor <32 x8 x16 xf32 > -> tensor <4 x1 x32 x16 x2 xf32 >
13061309 return %pack : tensor <4 x1 x32 x16 x2 xf32 >
13071310}
1308- // CHECK-DAG: %[[cst :.*]] = arith.constant 0.000000e+00 : f32
1309- // CHECK-DAG: %[[c0 :.*]] = arith.constant 0 : index
1310- // CHECK: %[[read :.*]] = vector.transfer_read %{{.*}}[%[[c0 ]], %[[c0 ]], %[[c0 ]]], %[[cst ]]
1311+ // CHECK-DAG: %[[CST :.*]] = arith.constant 0.000000e+00 : f32
1312+ // CHECK-DAG: %[[C0 :.*]] = arith.constant 0 : index
1313+ // CHECK: %[[READ :.*]] = vector.transfer_read %{{.*}}[%[[C0 ]], %[[C0 ]], %[[C0 ]]], %[[CST ]]
13111314// CHECK-SAME: {in_bounds = [true, true, true]} : tensor<32x8x16xf32>, vector<32x8x16xf32>
1312- // CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[read]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32>
1313- // CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [1, 3, 0, 4, 2] : vector<32x4x2x1x16xf32> to vector<4x1x32x16x2xf32>
1314- // CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index
1315- // CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<4x1x32x16x2xf32>
1316- // CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]]]
1315+ // CHECK: %[[SC:.*]] = vector.shape_cast %[[READ]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32>
1316+ // CHECK: %[[TR:.*]] = vector.transpose %[[SC]], [1, 3, 0, 4, 2] : vector<32x4x2x1x16xf32> to vector<4x1x32x16x2xf32>
1317+ // CHECK-DAG: %[[C0_1:.*]] = arith.constant 0 : index
1318+ // CHECK: %[[write:.*]] = vector.transfer_write %[[TR]], %[[DEST]][%[[C0_1]], %[[C0_1]], %[[C0_1]], %[[C0_1]], %[[C0_1]]]
13171319// CHECK-SAME: {in_bounds = [true, true, true, true, true]} : vector<4x1x32x16x2xf32>, tensor<4x1x32x16x2xf32>
13181320// CHECK: return %[[write]] : tensor<4x1x32x16x2xf32>
13191321
13201322module attributes {transform.with_named_sequence } {
1321- transform.named_sequence @__transform_main (%arg0 : !transform.any_op {transform.readonly }) {
1322- %0 = transform.structured.match ops {[" linalg.pack" ]} in %arg0 : (!transform.any_op ) -> !transform.any_op
1323+ transform.named_sequence @__transform_main (%src : !transform.any_op {transform.readonly }) {
1324+ %0 = transform.structured.match ops {[" linalg.pack" ]} in %src : (!transform.any_op ) -> !transform.any_op
13231325 transform.structured.vectorize %0 vector_sizes [4 , 1 , 32 ] : !transform.any_op
13241326 transform.yield
13251327 }
@@ -1331,26 +1333,28 @@ module attributes {transform.with_named_sequence} {
13311333// different - vector sizes are inferred (rather than user-specified) and hence
13321334// masking was used.
13331335
1334- func.func @test_vectorize_padded_pack (%arg0: tensor <32 x7 x15 xf32 >, %arg1: tensor <32 x4 x1 x16 x2 xf32 >) -> tensor <32 x4 x1 x16 x2 xf32 > {
1336+ // CHECK-LABEL: func @test_vectorize_padded_pack
1337+ // CHECK-SAME: %[[SRC:.*]]: tensor<32x7x15xf32>,
1338+ // CHECK-SAME: %[[DEST:.*]]: tensor<32x4x1x16x2xf32>
1339+ func.func @test_vectorize_padded_pack (%src: tensor <32 x7 x15 xf32 >, %dest: tensor <32 x4 x1 x16 x2 xf32 >) -> tensor <32 x4 x1 x16 x2 xf32 > {
13351340 %pad = arith.constant 0.000000e+00 : f32
1336- %pack = linalg.pack %arg0 padding_value (%pad : f32 ) inner_dims_pos = [2 , 1 ] inner_tiles = [16 , 2 ] into %arg1 : tensor <32 x7 x15 xf32 > -> tensor <32 x4 x1 x16 x2 xf32 >
1341+ %pack = linalg.pack %src padding_value (%pad : f32 ) inner_dims_pos = [2 , 1 ] inner_tiles = [16 , 2 ] into %dest : tensor <32 x7 x15 xf32 > -> tensor <32 x4 x1 x16 x2 xf32 >
13371342 return %pack : tensor <32 x4 x1 x16 x2 xf32 >
13381343}
1339- // CHECK-DAG: %[[cst :.*]] = arith.constant 0.000000e+00 : f32
1340- // CHECK-DAG: %[[c0 :.*]] = arith.constant 0 : index
1341- // CHECK-DAG: %[[c32 :.*]] = arith.constant 32 : index
1342- // CHECK-DAG: %[[c7 :.*]] = arith.constant 7 : index
1343- // CHECK-DAG: %[[c15 :.*]] = arith.constant 15 : index
1344- // CHECK: %[[mask :.*]] = vector.create_mask %[[c32 ]], %[[c7 ]], %[[c15 ]] : vector<32x8x16xi1>
1345- // CHECK: %[[masked_read :.*]] = vector.mask %[[mask ]] {
1346- // CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0 ]], %[[c0 ]], %[[c0 ]]], %[[cst ]]
1344+ // CHECK-DAG: %[[CST :.*]] = arith.constant 0.000000e+00 : f32
1345+ // CHECK-DAG: %[[C0 :.*]] = arith.constant 0 : index
1346+ // CHECK-DAG: %[[C32 :.*]] = arith.constant 32 : index
1347+ // CHECK-DAG: %[[C7 :.*]] = arith.constant 7 : index
1348+ // CHECK-DAG: %[[C15 :.*]] = arith.constant 15 : index
1349+ // CHECK: %[[MASK :.*]] = vector.create_mask %[[C32 ]], %[[C7 ]], %[[C15 ]] : vector<32x8x16xi1>
1350+ // CHECK: %[[READ :.*]] = vector.mask %[[MASK ]] {
1351+ // CHECK-SAME: vector.transfer_read %{{.*}}[%[[C0 ]], %[[C0 ]], %[[C0 ]]], %[[CST ]]
13471352// CHECK-SAME: {in_bounds = [true, true, true]} : tensor<32x7x15xf32>, vector<32x8x16xf32>
13481353// CHECK-SAME: } : vector<32x8x16xi1> -> vector<32x8x16xf32>
1349- // CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[masked_read]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32>
1350- // CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32>
1351- // CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index
1352- // CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<32x4x1x16x2xf32>
1353- // CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]]]
1354+ // CHECK: %[[SC:.*]] = vector.shape_cast %[[READ]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32>
1355+ // CHECK: %[[TR:.*]] = vector.transpose %[[SC]], [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32>
1356+ // CHECK-DAG: %[[C0_1:.*]] = arith.constant 0 : index
1357+ // CHECK: %[[write:.*]] = vector.transfer_write %[[TR]], %[[DEST]][%[[C0_1]], %[[C0_1]], %[[C0_1]], %[[C0_1]], %[[C0_1]]]
13541358// CHECK-SAME: {in_bounds = [true, true, true, true, true]} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32>
13551359// CHECK: return %[[write]] : tensor<32x4x1x16x2xf32>
13561360
@@ -1364,38 +1368,37 @@ module attributes {transform.with_named_sequence} {
13641368
13651369// -----
13661370
1367- func.func @test_vectorize_dynamic_pack (%arg0: tensor <?x?xf32 >, %arg1: tensor <?x?x16 x2 xf32 >) -> tensor <?x?x16 x2 xf32 > {
1368- %pack = linalg.pack %arg0 inner_dims_pos = [1 , 0 ] inner_tiles = [16 , 2 ] into %arg1 : tensor <?x?xf32 > -> tensor <?x?x16 x2 xf32 >
1371+ // CHECK-LABEL: func @test_vectorize_dynamic_pack
1372+ // CHECK-SAME: %[[SRC:.*]]: tensor<?x?xf32>,
1373+ // CHECK-SAME: %[[DEST:.*]]: tensor<?x?x16x2xf32>
1374+ func.func @test_vectorize_dynamic_pack (%src: tensor <?x?xf32 >, %dest: tensor <?x?x16 x2 xf32 >) -> tensor <?x?x16 x2 xf32 > {
1375+ %pack = linalg.pack %src inner_dims_pos = [1 , 0 ] inner_tiles = [16 , 2 ] into %dest : tensor <?x?xf32 > -> tensor <?x?x16 x2 xf32 >
13691376 return %pack : tensor <?x?x16 x2 xf32 >
13701377}
1371- // CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32
1372- // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
1373- // CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index
1374- // CHECK-DAG: %[[d0:.*]] = tensor.dim {{.*}} %[[c0]] : tensor<?x?x16x2xf32>
1375- // CHECK-DAG: %[[d1:.*]] = tensor.dim {{.*}} %[[c1]] : tensor<?x?x16x2xf32>
1376- // CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index
1377- // CHECK-DAG: %[[c0_0:.*]] = arith.constant 0 : index
1378- // CHECK-DAG: %[[c1_0:.*]] = arith.constant 1 : index
1379- // CHECK-DAG: %[[d0_0:.*]] = tensor.dim {{.*}} %[[c0_0]] : tensor<?x?xf32>
1380- // CHECK-DAG: %[[d1_0:.*]] = tensor.dim {{.*}} %[[c1_0]] : tensor<?x?xf32>
1381- // CHECK: %[[mask:.*]] = vector.create_mask %[[d0_0]], %[[d1_0]] : vector<8x16xi1>
1382- // CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] {
1383- // CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0_1]], %[[c0_1]]], %[[cst]]
1378+
1379+ // CHECK-DAG: %[[CST:.*]] = arith.constant 0.000000e+00 : f32
1380+ // CHECK-DAG: %[[C0_1:.*]] = arith.constant 0 : index
1381+ // CHECK-DAG: %[[C0_0:.*]] = arith.constant 0 : index
1382+ // CHECK-DAG: %[[C1_0:.*]] = arith.constant 1 : index
1383+ // CHECK-DAG: %[[D0_0:.*]] = tensor.dim {{.*}} %[[C0_0]] : tensor<?x?xf32>
1384+ // CHECK-DAG: %[[D1_0:.*]] = tensor.dim {{.*}} %[[C1_0]] : tensor<?x?xf32>
1385+ // CHECK: %[[MASK:.*]] = vector.create_mask %[[D0_0]], %[[D1_0]] : vector<8x16xi1>
1386+ // CHECK: %[[READ:.*]] = vector.mask %[[MASK]] {
1387+ // CHECK-SAME: vector.transfer_read %{{.*}}[%[[C0_1]], %[[C0_1]]], %[[CST]]
13841388// CHECK-SAME: {in_bounds = [true, true]} : tensor<?x?xf32>, vector<8x16xf32>
13851389// CHECK-SAME: } : vector<8x16xi1> -> vector<8x16xf32>
1386- // CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[masked_read]] : vector<8x16xf32> to vector<4x2x1x16xf32>
1387- // CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [0, 2, 3, 1] : vector<4x2x1x16xf32> to vector<4x1x16x2xf32>
1388- // CHECK-DAG: %[[c0_2:.*]] = arith.constant 0 : index
1389- // CHECK-DAG: %[[c16:.*]] = arith.constant 16 : index
1390- // CHECK-DAG: %[[c2:.*]] = arith.constant 2 : index
1391- // CHECK-DAG: %[[empty:.*]] = tensor.empty(%[[d0]], %[[d1]]) : tensor<?x?x16x2xf32>
1392- // CHECK-DAG: %[[d2:.*]] = tensor.dim %[[empty]], {{.*}} : tensor<?x?x16x2xf32>
1393- // CHECK-DAG: %[[d3:.*]] = tensor.dim %[[empty]], {{.*}} : tensor<?x?x16x2xf32>
1394- // CHECK: %[[mask_0:.*]] = vector.create_mask %[[d2]], %[[d3]], %[[c16]], %[[c2]] : vector<4x1x16x2xi1>
1395- // CHECK: %[[masked_write:.*]] = vector.mask %[[mask_0]] {
1396- // CHECK-SAME: vector.transfer_write %[[transpose]], %[[empty]][%[[c0_2]], %[[c0_2]], %[[c0_2]], %[[c0_2]]]
1390+ // CHECK: %[[SC:.*]] = vector.shape_cast %[[READ]] : vector<8x16xf32> to vector<4x2x1x16xf32>
1391+ // CHECK: %[[TR:.*]] = vector.transpose %[[SC]], [0, 2, 3, 1] : vector<4x2x1x16xf32> to vector<4x1x16x2xf32>
1392+ // CHECK-DAG: %[[C0_2:.*]] = arith.constant 0 : index
1393+ // CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index
1394+ // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
1395+ // CHECK-DAG: %[[D2:.*]] = tensor.dim %[[DEST]], {{.*}} : tensor<?x?x16x2xf32>
1396+ // CHECK-DAG: %[[D3:.*]] = tensor.dim %[[DEST]], {{.*}} : tensor<?x?x16x2xf32>
1397+ // CHECK: %[[MASK_0:.*]] = vector.create_mask %[[D2]], %[[D3]], %[[C16]], %[[C2]] : vector<4x1x16x2xi1>
1398+ // CHECK: %[[WRITE:.*]] = vector.mask %[[MASK_0]] {
1399+ // CHECK-SAME: vector.transfer_write %[[TR]], %[[DEST]][%[[C0_2]], %[[C0_2]], %[[C0_2]], %[[C0_2]]]
13971400// CHECK-SAME: {in_bounds = [true, true, true, true]} : vector<4x1x16x2xf32>, tensor<?x?x16x2xf32>
1398- // CHECK: return %[[masked_write ]] : tensor<?x?x16x2xf32>
1401+ // CHECK: return %[[WRITE ]] : tensor<?x?x16x2xf32>
13991402
14001403module attributes {transform.with_named_sequence } {
14011404 transform.named_sequence @__transform_main (%arg0: !transform.any_op {transform.readonly }) {
@@ -1407,22 +1410,23 @@ module attributes {transform.with_named_sequence} {
14071410
14081411// -----
14091412
1410- // CHECK-LABEL: test_vectorize_pack_no_vector_sizes
1411- func.func @test_vectorize_pack_no_vector_sizes (%arg0: tensor <64 x4 xf32 >, %arg1: tensor <2 x4 x16 x2 xf32 >) -> tensor <2 x4 x16 x2 xf32 > {
1412- %pack = linalg.pack %arg0 outer_dims_perm = [1 , 0 ] inner_dims_pos = [0 , 1 ] inner_tiles = [16 , 2 ] into %arg1 : tensor <64 x4 xf32 > -> tensor <2 x4 x16 x2 xf32 >
1413+ // CHECK-LABEL: func @test_vectorize_pack_no_vector_sizes
1414+ // CHECK-SAME: %[[SRC:.*]]: tensor<64x4xf32>,
1415+ // CHECK-SAME: %[[DEST:.*]]: tensor<2x4x16x2xf32>
1416+ func.func @test_vectorize_pack_no_vector_sizes (%src: tensor <64 x4 xf32 >, %dest: tensor <2 x4 x16 x2 xf32 >) -> tensor <2 x4 x16 x2 xf32 > {
1417+ %pack = linalg.pack %src outer_dims_perm = [1 , 0 ] inner_dims_pos = [0 , 1 ] inner_tiles = [16 , 2 ] into %dest : tensor <64 x4 xf32 > -> tensor <2 x4 x16 x2 xf32 >
14131418 return %pack : tensor <2 x4 x16 x2 xf32 >
14141419}
1415- // CHECK-DAG: %[[cst :.*]] = arith.constant 0.000000e+00 : f32
1416- // CHECK-DAG: %[[c0 :.*]] = arith.constant 0 : index
1417- // CHECK: %[[read :.*]] = vector.transfer_read %{{.*}}[%[[c0 ]], %[[c0 ]]], %[[cst ]]
1420+ // CHECK-DAG: %[[CST :.*]] = arith.constant 0.000000e+00 : f32
1421+ // CHECK-DAG: %[[C0 :.*]] = arith.constant 0 : index
1422+ // CHECK: %[[READ :.*]] = vector.transfer_read %{{.*}}[%[[C0 ]], %[[C0 ]]], %[[CST ]]
14181423// CHECK-SAME: {in_bounds = [true, true]} : tensor<64x4xf32>, vector<64x4xf32>
1419- // CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[read]] : vector<64x4xf32> to vector<4x16x2x2xf32>
1420- // CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [2, 0, 1, 3] : vector<4x16x2x2xf32> to vector<2x4x16x2xf32>
1421- // CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index
1422- // CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<2x4x16x2xf32>
1423- // CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]]]
1424+ // CHECK: %[[SC:.*]] = vector.shape_cast %[[READ]] : vector<64x4xf32> to vector<4x16x2x2xf32>
1425+ // CHECK: %[[TR:.*]] = vector.transpose %[[SC]], [2, 0, 1, 3] : vector<4x16x2x2xf32> to vector<2x4x16x2xf32>
1426+ // CHECK-DAG: %[[C0_1:.*]] = arith.constant 0 : index
1427+ // CHECK: %[[WRITE:.*]] = vector.transfer_write %[[TR]], %[[DEST]][%[[C0_1]], %[[C0_1]], %[[C0_1]], %[[C0_1]]]
14241428// CHECK-SAME: {in_bounds = [true, true, true, true]} : vector<2x4x16x2xf32>, tensor<2x4x16x2xf32>
1425- // CHECK: return %[[write ]] : tensor<2x4x16x2xf32>
1429+ // CHECK: return %[[WRITE ]] : tensor<2x4x16x2xf32>
14261430
14271431module attributes {transform.with_named_sequence } {
14281432 transform.named_sequence @__transform_main (%arg0: !transform.any_op {transform.readonly }) {
@@ -1435,22 +1439,23 @@ module attributes {transform.with_named_sequence} {
14351439// -----
14361440
14371441// CHECK-LABEL: test_vectorize_padded_pack_no_vector_sizes
1438- func.func @test_vectorize_padded_pack_no_vector_sizes (%arg0: tensor <32 x7 x15 xf32 >, %arg1: tensor <32 x4 x1 x16 x2 xf32 >) -> tensor <32 x4 x1 x16 x2 xf32 > {
1442+ // CHECK-SAME: %[[SRC:.*]]: tensor<32x7x15xf32>,
1443+ // CHECK-SAME: %[[DEST:.*]]: tensor<32x4x1x16x2xf32>
1444+ func.func @test_vectorize_padded_pack_no_vector_sizes (%src: tensor <32 x7 x15 xf32 >, %dest: tensor <32 x4 x1 x16 x2 xf32 >) -> tensor <32 x4 x1 x16 x2 xf32 > {
14391445 %pad = arith.constant 0.000000e+00 : f32
1440- %pack = linalg.pack %arg0 padding_value (%pad : f32 ) inner_dims_pos = [2 , 1 ] inner_tiles = [16 , 2 ] into %arg1 : tensor <32 x7 x15 xf32 > -> tensor <32 x4 x1 x16 x2 xf32 >
1446+ %pack = linalg.pack %src padding_value (%pad : f32 ) inner_dims_pos = [2 , 1 ] inner_tiles = [16 , 2 ] into %dest : tensor <32 x7 x15 xf32 > -> tensor <32 x4 x1 x16 x2 xf32 >
14411447 return %pack : tensor <32 x4 x1 x16 x2 xf32 >
14421448}
1443- // CHECK-DAG: %[[cst :.*]] = arith.constant 0.000000e+00 : f32
1444- // CHECK-DAG: %[[c0 :.*]] = arith.constant 0 : index
1445- // CHECK: %[[transfer_read :.*]] = vector.transfer_read %{{.*}}[%[[c0 ]], %[[c0 ]], %[[c0 ]]], %[[cst ]]
1449+ // CHECK-DAG: %[[CST :.*]] = arith.constant 0.000000e+00 : f32
1450+ // CHECK-DAG: %[[C0 :.*]] = arith.constant 0 : index
1451+ // CHECK: %[[READ :.*]] = vector.transfer_read %{{.*}}[%[[C0 ]], %[[C0 ]], %[[C0 ]]], %[[CST ]]
14461452// CHECK-SAME: {in_bounds = [true, false, false]} : tensor<32x7x15xf32>, vector<32x8x16xf32>
1447- // CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[transfer_read]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32>
1448- // CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32>
1449- // CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index
1450- // CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<32x4x1x16x2xf32>
1451- // CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]]]
1453+ // CHECK: %[[SC:.*]] = vector.shape_cast %[[READ]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32>
1454+ // CHECK: %[[TR:.*]] = vector.transpose %[[SC]], [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32>
1455+ // CHECK-DAG: %[[C0_1:.*]] = arith.constant 0 : index
1456+ // CHECK: %[[WRITE:.*]] = vector.transfer_write %[[TR]], %[[DEST]][%[[C0_1]], %[[C0_1]], %[[C0_1]], %[[C0_1]], %[[C0_1]]]
14521457// CHECK-SAME: {in_bounds = [true, true, true, true, true]} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32>
1453- // CHECK: return %[[write ]] : tensor<32x4x1x16x2xf32>
1458+ // CHECK: return %[[WRITE ]] : tensor<32x4x1x16x2xf32>
14541459
14551460module attributes {transform.with_named_sequence } {
14561461 transform.named_sequence @__transform_main (%arg0: !transform.any_op {transform.readonly }) {
0 commit comments