@@ -1451,3 +1451,52 @@ define <4 x i32> @partial_reduce_shl_zext_non_const_rhs(<16 x i8> %l, <4 x i32>
14511451 %red = tail call <4 x i32 > @llvm.vector.partial.reduce.add.v4i32.v16i32 (<4 x i32 > %part , <16 x i32 > %shift )
14521452 ret <4 x i32 > %red
14531453}
1454+
1455+ define <2 x i32 > @udot_v16i8tov2i32 (<2 x i32 > %acc , <16 x i8 > %input ) {
1456+ ; CHECK-NODOT-LABEL: udot_v16i8tov2i32:
1457+ ; CHECK-NODOT: // %bb.0: // %entry
1458+ ; CHECK-NODOT-NEXT: ushll v2.8h, v1.8b, #0
1459+ ; CHECK-NODOT-NEXT: // kill: def $d0 killed $d0 def $q0
1460+ ; CHECK-NODOT-NEXT: ushll2 v1.8h, v1.16b, #0
1461+ ; CHECK-NODOT-NEXT: ushll v3.4s, v2.4h, #0
1462+ ; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v2.4h
1463+ ; CHECK-NODOT-NEXT: ushll2 v4.4s, v2.8h, #0
1464+ ; CHECK-NODOT-NEXT: ext v2.16b, v2.16b, v2.16b, #8
1465+ ; CHECK-NODOT-NEXT: ext v3.16b, v3.16b, v3.16b, #8
1466+ ; CHECK-NODOT-NEXT: add v0.2s, v3.2s, v0.2s
1467+ ; CHECK-NODOT-NEXT: ext v3.16b, v4.16b, v4.16b, #8
1468+ ; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v2.4h
1469+ ; CHECK-NODOT-NEXT: ushll v2.4s, v1.4h, #0
1470+ ; CHECK-NODOT-NEXT: add v0.2s, v3.2s, v0.2s
1471+ ; CHECK-NODOT-NEXT: ext v2.16b, v2.16b, v2.16b, #8
1472+ ; CHECK-NODOT-NEXT: ushll2 v3.4s, v1.8h, #0
1473+ ; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v1.4h
1474+ ; CHECK-NODOT-NEXT: ext v1.16b, v1.16b, v1.16b, #8
1475+ ; CHECK-NODOT-NEXT: add v0.2s, v2.2s, v0.2s
1476+ ; CHECK-NODOT-NEXT: ext v2.16b, v3.16b, v3.16b, #8
1477+ ; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v1.4h
1478+ ; CHECK-NODOT-NEXT: add v0.2s, v2.2s, v0.2s
1479+ ; CHECK-NODOT-NEXT: ret
1480+ ;
1481+ ; CHECK-DOT-LABEL: udot_v16i8tov2i32:
1482+ ; CHECK-DOT: // %bb.0: // %entry
1483+ ; CHECK-DOT-NEXT: movi v2.16b, #1
1484+ ; CHECK-DOT-NEXT: fmov d0, d0
1485+ ; CHECK-DOT-NEXT: udot v0.4s, v1.16b, v2.16b
1486+ ; CHECK-DOT-NEXT: addp v0.4s, v0.4s, v0.4s
1487+ ; CHECK-DOT-NEXT: // kill: def $d0 killed $d0 killed $q0
1488+ ; CHECK-DOT-NEXT: ret
1489+ ;
1490+ ; CHECK-DOT-I8MM-LABEL: udot_v16i8tov2i32:
1491+ ; CHECK-DOT-I8MM: // %bb.0: // %entry
1492+ ; CHECK-DOT-I8MM-NEXT: movi v2.16b, #1
1493+ ; CHECK-DOT-I8MM-NEXT: fmov d0, d0
1494+ ; CHECK-DOT-I8MM-NEXT: udot v0.4s, v1.16b, v2.16b
1495+ ; CHECK-DOT-I8MM-NEXT: addp v0.4s, v0.4s, v0.4s
1496+ ; CHECK-DOT-I8MM-NEXT: // kill: def $d0 killed $d0 killed $q0
1497+ ; CHECK-DOT-I8MM-NEXT: ret
1498+ entry:
1499+ %input.wide = zext <16 x i8 > %input to <16 x i32 >
1500+ %partial.reduce = tail call <2 x i32 > @llvm.vector.partial.reduce.add (<2 x i32 > %acc , <16 x i32 > %input.wide )
1501+ ret <2 x i32 > %partial.reduce
1502+ }
0 commit comments