@@ -7,9 +7,8 @@ define i32 @test_and(<16 x i32> %a, ptr %b) {
77; CHECK-LABEL: @test_and(
88; CHECK-NEXT: entry:
99; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1
10- ; CHECK-NEXT: [[TMP0:%.*]] = trunc <16 x i32> [[A:%.*]] to <16 x i8>
11- ; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i8> [[WIDE_LOAD]], [[TMP0]]
12- ; CHECK-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[TMP1]] to <16 x i32>
10+ ; CHECK-NEXT: [[TMP0:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
11+ ; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i32> [[TMP0]], [[A:%.*]]
1312; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP2]])
1413; CHECK-NEXT: ret i32 [[TMP3]]
1514;
@@ -26,9 +25,8 @@ define i32 @test_mask_or(<16 x i32> %a, ptr %b) {
2625; CHECK-NEXT: entry:
2726; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1
2827; CHECK-NEXT: [[A_MASKED:%.*]] = and <16 x i32> [[A:%.*]], splat (i32 16)
29- ; CHECK-NEXT: [[TMP0:%.*]] = trunc <16 x i32> [[A_MASKED]] to <16 x i8>
30- ; CHECK-NEXT: [[TMP1:%.*]] = or <16 x i8> [[WIDE_LOAD]], [[TMP0]]
31- ; CHECK-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[TMP1]] to <16 x i32>
28+ ; CHECK-NEXT: [[TMP0:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
29+ ; CHECK-NEXT: [[TMP2:%.*]] = or <16 x i32> [[TMP0]], [[A_MASKED]]
3230; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP2]])
3331; CHECK-NEXT: ret i32 [[TMP3]]
3432;
@@ -47,15 +45,13 @@ define i32 @multiuse(<16 x i32> %u, <16 x i32> %v, ptr %b) {
4745; CHECK-NEXT: [[U_MASKED:%.*]] = and <16 x i32> [[U:%.*]], splat (i32 255)
4846; CHECK-NEXT: [[V_MASKED:%.*]] = and <16 x i32> [[V:%.*]], splat (i32 255)
4947; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1
50- ; CHECK-NEXT: [[TMP0:%.*]] = lshr <16 x i8> [[WIDE_LOAD]], splat (i8 4)
51- ; CHECK-NEXT: [[TMP1:%.*]] = trunc <16 x i32> [[V_MASKED]] to <16 x i8>
52- ; CHECK-NEXT: [[TMP2:%.*]] = or <16 x i8> [[TMP0]], [[TMP1]]
53- ; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i32>
54- ; CHECK-NEXT: [[TMP4:%.*]] = and <16 x i8> [[WIDE_LOAD]], splat (i8 15)
55- ; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i32> [[U_MASKED]] to <16 x i8>
56- ; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i8> [[TMP4]], [[TMP5]]
48+ ; CHECK-NEXT: [[TMP0:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
49+ ; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i8> [[WIDE_LOAD]], splat (i8 4)
5750; CHECK-NEXT: [[TMP7:%.*]] = zext <16 x i8> [[TMP6]] to <16 x i32>
58- ; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i32> [[TMP3]], [[TMP7]]
51+ ; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP7]], [[V_MASKED]]
52+ ; CHECK-NEXT: [[TMP4:%.*]] = and <16 x i32> [[TMP0]], splat (i32 15)
53+ ; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], [[U_MASKED]]
54+ ; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i32> [[TMP3]], [[TMP5]]
5955; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP8]])
6056; CHECK-NEXT: ret i32 [[TMP9]]
6157;
@@ -81,9 +77,8 @@ define i32 @phi_bug(<16 x i32> %a, ptr %b) {
8177; CHECK: vector.body:
8278; CHECK-NEXT: [[A_PHI:%.*]] = phi <16 x i32> [ [[A:%.*]], [[ENTRY:%.*]] ]
8379; CHECK-NEXT: [[WIDE_LOAD_PHI:%.*]] = phi <16 x i8> [ [[WIDE_LOAD]], [[ENTRY]] ]
84- ; CHECK-NEXT: [[TMP0:%.*]] = trunc <16 x i32> [[A_PHI]] to <16 x i8>
85- ; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i8> [[WIDE_LOAD_PHI]], [[TMP0]]
86- ; CHECK-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[TMP1]] to <16 x i32>
80+ ; CHECK-NEXT: [[TMP0:%.*]] = zext <16 x i8> [[WIDE_LOAD_PHI]] to <16 x i32>
81+ ; CHECK-NEXT: [[TMP2:%.*]] = and <16 x i32> [[TMP0]], [[A_PHI]]
8782; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP2]])
8883; CHECK-NEXT: ret i32 [[TMP3]]
8984;
0 commit comments