@@ -1419,13 +1419,11 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
14191419; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP2]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
14201420; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
14211421; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1422- ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = or disjoint i32 [[TMP1]], 1
14231422; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8> [[STRIDED_VEC1]])
1423+ ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[TMP1]]
14241424; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = sub <8 x i8> zeroinitializer, [[TMP4]]
1425- ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[TMP3]]
1426- ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i32 -1
14271425; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1428- ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP7 ]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
1426+ ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP6 ]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
14291427; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
14301428; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
14311429; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
@@ -2555,13 +2553,11 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
25552553; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP3]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
25562554; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
25572555; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
2558- ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = or disjoint i32 [[TMP2]], 1
25592556; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8> [[STRIDED_VEC3]])
2557+ ; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[TMP2]]
25602558; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = sub <8 x i8> zeroinitializer, [[TMP6]]
2561- ; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[TMP5]]
2562- ; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i32 -1
25632559; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
2564- ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP9 ]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
2560+ ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP8 ]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
25652561; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
25662562; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
25672563; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
@@ -2989,13 +2985,11 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
29892985; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_VEC:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP2]], i32 1, <16 x i1> [[INTERLEAVED_MASK]], <16 x i8> poison)
29902986; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
29912987; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
2992- ; ENABLED_MASKED_STRIDED-NEXT: [[TMP3:%.*]] = or disjoint i32 [[TMP1]], 1
29932988; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[STRIDED_VEC]], <8 x i8> [[STRIDED_VEC3]])
2989+ ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[TMP1]]
29942990; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = sub <8 x i8> zeroinitializer, [[TMP4]]
2995- ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[TMP3]]
2996- ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 -1
29972991; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
2998- ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP7 ]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
2992+ ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP6 ]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
29992993; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
30002994; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
30012995; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
0 commit comments