-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[DAGCombiner] Avoid double deletion when replacing multiple frozen/unfrozen uses #155427
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-llvm-selectiondag Author: Yingwei Zheng (dtcxzyw) ChangesCloses #155345. In For clarity, see the following call graph: This patch unfreezes all the uses first to avoid triggering CSE when introducing cycles. Patch is 21.05 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155427.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 27b5a0d37b679..4e2dc7b6619a7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16788,6 +16788,8 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
// If we have frozen and unfrozen users of N0, update so everything uses N.
if (!N0.isUndef() && !N0.hasOneUse()) {
SDValue FrozenN0(N, 0);
+ // Unfreeze all uses of N to avoid double deleting N from the CSE map.
+ DAG.ReplaceAllUsesOfValueWith(FrozenN0, N0);
DAG.ReplaceAllUsesOfValueWith(N0, FrozenN0);
// ReplaceAllUsesOfValueWith will have also updated the use in N, thus
// creating a cycle in a DAG. Let's undo that by mutating the freeze.
diff --git a/llvm/test/CodeGen/X86/freeze.ll b/llvm/test/CodeGen/X86/freeze.ll
index 3196f8177cc9b..38e3e23f7caac 100644
--- a/llvm/test/CodeGen/X86/freeze.ll
+++ b/llvm/test/CodeGen/X86/freeze.ll
@@ -141,3 +141,48 @@ entry:
%z = urem i32 %y, 10
ret i32 %z
}
+
+; Make sure we don't crash when replacing all uses of N with an existing freeze N.
+
+define i64 @pr155345(ptr %p1, i1 %cond, ptr %p2, ptr %p3) {
+; X86ASM-LABEL: pr155345:
+; X86ASM: # %bb.0: # %entry
+; X86ASM-NEXT: movzbl (%rdi), %edi
+; X86ASM-NEXT: xorl %eax, %eax
+; X86ASM-NEXT: orb $1, %dil
+; X86ASM-NEXT: movb %dil, (%rdx)
+; X86ASM-NEXT: movzbl %dil, %edx
+; X86ASM-NEXT: cmovel %edx, %eax
+; X86ASM-NEXT: sete %dil
+; X86ASM-NEXT: testb $1, %sil
+; X86ASM-NEXT: cmovnel %edx, %eax
+; X86ASM-NEXT: movb %dl, (%rcx)
+; X86ASM-NEXT: movl $1, %edx
+; X86ASM-NEXT: movl %eax, %ecx
+; X86ASM-NEXT: shlq %cl, %rdx
+; X86ASM-NEXT: orb %sil, %dil
+; X86ASM-NEXT: movzbl %dil, %eax
+; X86ASM-NEXT: andl %edx, %eax
+; X86ASM-NEXT: andl $1, %eax
+; X86ASM-NEXT: retq
+entry:
+ %load1 = load i8, ptr %p1, align 1
+ %v1 = or i8 %load1, 1
+ %v2 = zext i8 %v1 to i32
+ store i8 %v1, ptr %p2, align 1
+ %v3 = load i8, ptr %p2, align 1
+ %ext1 = sext i8 %v3 to i64
+ %ext2 = zext i32 %v2 to i64
+ %cmp1 = icmp ult i64 0, %ext1
+ %v4 = select i1 %cond, i1 false, i1 %cmp1
+ %sel1 = select i1 %v4, i64 0, i64 %ext2
+ %shl = shl i64 1, %sel1
+ store i8 %v1, ptr %p3, align 1
+ %v5 = load i8, ptr %p3, align 1
+ %ext3 = sext i8 %v5 to i64
+ %cmp2 = icmp ult i64 0, %ext3
+ %v6 = select i1 %cond, i1 false, i1 %cmp2
+ %sel2 = select i1 %v6, i64 0, i64 1
+ %and = and i64 %sel2, %shl
+ ret i64 %and
+}
diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll
index fb2433dbbb1e1..7c9adaf31aff5 100644
--- a/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll
+++ b/llvm/test/CodeGen/X86/midpoint-int-vec-256.ll
@@ -730,36 +730,36 @@ define <4 x i64> @vec256_i64_signed_mem_reg(ptr %a1_addr, <4 x i64> %a2) nounwin
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa (%rdi), %xmm2
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm5
; AVX1-NEXT: vpsubq %xmm0, %xmm2, %xmm0
-; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpsubq %xmm0, %xmm4, %xmm0
-; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm5
+; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
+; AVX1-NEXT: vpsubq %xmm0, %xmm5, %xmm0
; AVX1-NEXT: vpsubq %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
-; AVX1-NEXT: vpsubq %xmm1, %xmm5, %xmm1
+; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1
; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm6
; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm7
; AVX1-NEXT: vpsrlq $33, %xmm0, %xmm0
; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm8 = [1,1]
-; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm9
+; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm9
; AVX1-NEXT: vpmuludq %xmm0, %xmm9, %xmm0
-; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4
-; AVX1-NEXT: vpmuludq %xmm4, %xmm7, %xmm4
-; AVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5
+; AVX1-NEXT: vpmuludq %xmm5, %xmm7, %xmm5
+; AVX1-NEXT: vpaddq %xmm0, %xmm5, %xmm0
; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
-; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm4
+; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm5
; AVX1-NEXT: vpsrlq $33, %xmm1, %xmm1
-; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm7
+; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm7
; AVX1-NEXT: vpmuludq %xmm7, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5
-; AVX1-NEXT: vpmuludq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vpaddq %xmm1, %xmm5, %xmm1
+; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4
+; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vpaddq %xmm1, %xmm4, %xmm1
; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1
-; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm5
-; AVX1-NEXT: vpaddq %xmm3, %xmm5, %xmm3
+; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm4
+; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpaddq %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpaddq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2
; AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
@@ -767,20 +767,20 @@ define <4 x i64> @vec256_i64_signed_mem_reg(ptr %a1_addr, <4 x i64> %a2) nounwin
; AVX2-LABEL: vec256_i64_signed_mem_reg:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa (%rdi), %ymm1
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
-; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm3
-; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [1,1,1,1]
+; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm3
; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vpxor %ymm3, %ymm0, %ymm0
-; AVX2-NEXT: vpsubq %ymm0, %ymm3, %ymm0
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsubq %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm4
; AVX2-NEXT: vpsrlq $33, %ymm0, %ymm0
-; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpsrlq $32, %ymm3, %ymm3
-; AVX2-NEXT: vpmuludq %ymm3, %ymm4, %ymm3
-; AVX2-NEXT: vpaddq %ymm0, %ymm3, %ymm0
-; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
+; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlq $32, %ymm2, %ymm2
; AVX2-NEXT: vpmuludq %ymm2, %ymm4, %ymm2
+; AVX2-NEXT: vpaddq %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
+; AVX2-NEXT: vpmuludq %ymm3, %ymm4, %ymm2
; AVX2-NEXT: vpaddq %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpaddq %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
@@ -790,36 +790,36 @@ define <4 x i64> @vec256_i64_signed_mem_reg(ptr %a1_addr, <4 x i64> %a2) nounwin
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm1
; XOP-NEXT: vmovdqa (%rdi), %xmm2
; XOP-NEXT: vmovdqa 16(%rdi), %xmm3
-; XOP-NEXT: vpcomgtq %xmm0, %xmm2, %xmm4
+; XOP-NEXT: vpcomgtq %xmm1, %xmm3, %xmm4
+; XOP-NEXT: vpcomgtq %xmm0, %xmm2, %xmm5
; XOP-NEXT: vpsubq %xmm0, %xmm2, %xmm0
-; XOP-NEXT: vpxor %xmm4, %xmm0, %xmm0
-; XOP-NEXT: vpsubq %xmm0, %xmm4, %xmm0
-; XOP-NEXT: vpcomgtq %xmm1, %xmm3, %xmm5
+; XOP-NEXT: vpxor %xmm5, %xmm0, %xmm0
+; XOP-NEXT: vpsubq %xmm0, %xmm5, %xmm0
; XOP-NEXT: vpsubq %xmm1, %xmm3, %xmm1
-; XOP-NEXT: vpxor %xmm5, %xmm1, %xmm1
-; XOP-NEXT: vpsubq %xmm1, %xmm5, %xmm1
+; XOP-NEXT: vpxor %xmm4, %xmm1, %xmm1
+; XOP-NEXT: vpsubq %xmm1, %xmm4, %xmm1
; XOP-NEXT: vpsrlq $1, %xmm1, %xmm6
; XOP-NEXT: vpsrlq $1, %xmm0, %xmm7
; XOP-NEXT: vpsrlq $33, %xmm0, %xmm0
; XOP-NEXT: vpmovsxbq {{.*#+}} xmm8 = [1,1]
-; XOP-NEXT: vpor %xmm4, %xmm8, %xmm9
+; XOP-NEXT: vpor %xmm5, %xmm8, %xmm9
; XOP-NEXT: vpmuludq %xmm0, %xmm9, %xmm0
-; XOP-NEXT: vpsrlq $32, %xmm4, %xmm4
-; XOP-NEXT: vpmuludq %xmm4, %xmm7, %xmm4
-; XOP-NEXT: vpaddq %xmm0, %xmm4, %xmm0
+; XOP-NEXT: vpsrlq $32, %xmm5, %xmm5
+; XOP-NEXT: vpmuludq %xmm5, %xmm7, %xmm5
+; XOP-NEXT: vpaddq %xmm0, %xmm5, %xmm0
; XOP-NEXT: vpsllq $32, %xmm0, %xmm0
-; XOP-NEXT: vpmuludq %xmm7, %xmm9, %xmm4
+; XOP-NEXT: vpmuludq %xmm7, %xmm9, %xmm5
; XOP-NEXT: vpsrlq $33, %xmm1, %xmm1
-; XOP-NEXT: vpor %xmm5, %xmm8, %xmm7
+; XOP-NEXT: vpor %xmm4, %xmm8, %xmm7
; XOP-NEXT: vpmuludq %xmm7, %xmm1, %xmm1
-; XOP-NEXT: vpsrlq $32, %xmm5, %xmm5
-; XOP-NEXT: vpmuludq %xmm5, %xmm6, %xmm5
-; XOP-NEXT: vpaddq %xmm1, %xmm5, %xmm1
+; XOP-NEXT: vpsrlq $32, %xmm4, %xmm4
+; XOP-NEXT: vpmuludq %xmm4, %xmm6, %xmm4
+; XOP-NEXT: vpaddq %xmm1, %xmm4, %xmm1
; XOP-NEXT: vpsllq $32, %xmm1, %xmm1
-; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm5
-; XOP-NEXT: vpaddq %xmm3, %xmm5, %xmm3
+; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm4
+; XOP-NEXT: vpaddq %xmm3, %xmm4, %xmm3
; XOP-NEXT: vpaddq %xmm1, %xmm3, %xmm1
-; XOP-NEXT: vpaddq %xmm2, %xmm4, %xmm2
+; XOP-NEXT: vpaddq %xmm2, %xmm5, %xmm2
; XOP-NEXT: vpaddq %xmm0, %xmm2, %xmm0
; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; XOP-NEXT: retq
@@ -900,36 +900,36 @@ define <4 x i64> @vec256_i64_signed_reg_mem(<4 x i64> %a1, ptr %a2_addr) nounwin
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa (%rdi), %xmm2
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm5
; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpsubq %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm5
+; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpsubq %xmm2, %xmm5, %xmm2
; AVX1-NEXT: vpsubq %xmm3, %xmm1, %xmm3
-; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
-; AVX1-NEXT: vpsubq %xmm3, %xmm5, %xmm3
+; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpsubq %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpsrlq $1, %xmm3, %xmm6
; AVX1-NEXT: vpsrlq $1, %xmm2, %xmm7
; AVX1-NEXT: vpsrlq $33, %xmm2, %xmm2
; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm8 = [1,1]
-; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm9
+; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm9
; AVX1-NEXT: vpmuludq %xmm2, %xmm9, %xmm2
-; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4
-; AVX1-NEXT: vpmuludq %xmm4, %xmm7, %xmm4
-; AVX1-NEXT: vpaddq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5
+; AVX1-NEXT: vpmuludq %xmm5, %xmm7, %xmm5
+; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2
; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm4
+; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm5
; AVX1-NEXT: vpsrlq $33, %xmm3, %xmm3
-; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm7
+; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm7
; AVX1-NEXT: vpmuludq %xmm7, %xmm3, %xmm3
-; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5
-; AVX1-NEXT: vpmuludq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vpaddq %xmm3, %xmm5, %xmm3
+; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4
+; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpsllq $32, %xmm3, %xmm3
-; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm5
-; AVX1-NEXT: vpaddq %xmm1, %xmm5, %xmm1
+; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm4
+; AVX1-NEXT: vpaddq %xmm1, %xmm4, %xmm1
; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vpaddq %xmm0, %xmm5, %xmm0
; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
@@ -937,20 +937,20 @@ define <4 x i64> @vec256_i64_signed_reg_mem(<4 x i64> %a1, ptr %a2_addr) nounwin
; AVX2-LABEL: vec256_i64_signed_reg_mem:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa (%rdi), %ymm1
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
-; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm3
-; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [1,1,1,1]
+; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm3
; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpsubq %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpsubq %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpsrlq $1, %ymm1, %ymm4
; AVX2-NEXT: vpsrlq $33, %ymm1, %ymm1
-; AVX2-NEXT: vpmuludq %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vpsrlq $32, %ymm3, %ymm3
-; AVX2-NEXT: vpmuludq %ymm3, %ymm4, %ymm3
-; AVX2-NEXT: vpaddq %ymm1, %ymm3, %ymm1
-; AVX2-NEXT: vpsllq $32, %ymm1, %ymm1
+; AVX2-NEXT: vpmuludq %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: vpsrlq $32, %ymm2, %ymm2
; AVX2-NEXT: vpmuludq %ymm2, %ymm4, %ymm2
+; AVX2-NEXT: vpaddq %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vpsllq $32, %ymm1, %ymm1
+; AVX2-NEXT: vpmuludq %ymm3, %ymm4, %ymm2
; AVX2-NEXT: vpaddq %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -960,36 +960,36 @@ define <4 x i64> @vec256_i64_signed_reg_mem(<4 x i64> %a1, ptr %a2_addr) nounwin
; XOP-NEXT: vextractf128 $1, %ymm0, %xmm1
; XOP-NEXT: vmovdqa (%rdi), %xmm2
; XOP-NEXT: vmovdqa 16(%rdi), %xmm3
-; XOP-NEXT: vpcomgtq %xmm2, %xmm0, %xmm4
+; XOP-NEXT: vpcomgtq %xmm3, %xmm1, %xmm4
+; XOP-NEXT: vpcomgtq %xmm2, %xmm0, %xmm5
; XOP-NEXT: vpsubq %xmm2, %xmm0, %xmm2
-; XOP-NEXT: vpxor %xmm4, %xmm2, %xmm2
-; XOP-NEXT: vpsubq %xmm2, %xmm4, %xmm2
-; XOP-NEXT: vpcomgtq %xmm3, %xmm1, %xmm5
+; XOP-NEXT: vpxor %xmm5, %xmm2, %xmm2
+; XOP-NEXT: vpsubq %xmm2, %xmm5, %xmm2
; XOP-NEXT: vpsubq %xmm3, %xmm1, %xmm3
-; XOP-NEXT: vpxor %xmm5, %xmm3, %xmm3
-; XOP-NEXT: vpsubq %xmm3, %xmm5, %xmm3
+; XOP-NEXT: vpxor %xmm4, %xmm3, %xmm3
+; XOP-NEXT: vpsubq %xmm3, %xmm4, %xmm3
; XOP-NEXT: vpsrlq $1, %xmm3, %xmm6
; XOP-NEXT: vpsrlq $1, %xmm2, %xmm7
; XOP-NEXT: vpsrlq $33, %xmm2, %xmm2
; XOP-NEXT: vpmovsxbq {{.*#+}} xmm8 = [1,1]
-; XOP-NEXT: vpor %xmm4, %xmm8, %xmm9
+; XOP-NEXT: vpor %xmm5, %xmm8, %xmm9
; XOP-NEXT: vpmuludq %xmm2, %xmm9, %xmm2
-; XOP-NEXT: vpsrlq $32, %xmm4, %xmm4
-; XOP-NEXT: vpmuludq %xmm4, %xmm7, %xmm4
-; XOP-NEXT: vpaddq %xmm2, %xmm4, %xmm2
+; XOP-NEXT: vpsrlq $32, %xmm5, %xmm5
+; XOP-NEXT: vpmuludq %xmm5, %xmm7, %xmm5
+; XOP-NEXT: vpaddq %xmm2, %xmm5, %xmm2
; XOP-NEXT: vpsllq $32, %xmm2, %xmm2
-; XOP-NEXT: vpmuludq %xmm7, %xmm9, %xmm4
+; XOP-NEXT: vpmuludq %xmm7, %xmm9, %xmm5
; XOP-NEXT: vpsrlq $33, %xmm3, %xmm3
-; XOP-NEXT: vpor %xmm5, %xmm8, %xmm7
+; XOP-NEXT: vpor %xmm4, %xmm8, %xmm7
; XOP-NEXT: vpmuludq %xmm7, %xmm3, %xmm3
-; XOP-NEXT: vpsrlq $32, %xmm5, %xmm5
-; XOP-NEXT: vpmuludq %xmm5, %xmm6, %xmm5
-; XOP-NEXT: vpaddq %xmm3, %xmm5, %xmm3
+; XOP-NEXT: vpsrlq $32, %xmm4, %xmm4
+; XOP-NEXT: vpmuludq %xmm4, %xmm6, %xmm4
+; XOP-NEXT: vpaddq %xmm3, %xmm4, %xmm3
; XOP-NEXT: vpsllq $32, %xmm3, %xmm3
-; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm5
-; XOP-NEXT: vpaddq %xmm1, %xmm5, %xmm1
+; XOP-NEXT: vpmuludq %xmm7, %xmm6, %xmm4
+; XOP-NEXT: vpaddq %xmm1, %xmm4, %xmm1
; XOP-NEXT: vpaddq %xmm3, %xmm1, %xmm1
-; XOP-NEXT: vpaddq %xmm0, %xmm4, %xmm0
+; XOP-NEXT: vpaddq %xmm0, %xmm5, %xmm0
; XOP-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; XOP-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; XOP-NEXT: retq
@@ -1071,36 +1071,36 @@ define <4 x i64> @vec256_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; AVX1-NEXT: vmovdqa 16(%rsi), %xmm1
; AVX1-NEXT: vmovdqa (%rdi), %xmm2
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm5
; AVX1-NEXT: vpsubq %xmm0, %xmm2, %xmm0
-; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpsubq %xmm0, %xmm4, %xmm0
-; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm5
+; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
+; AVX1-NEXT: vpsubq %xmm0, %xmm5, %xmm0
; AVX1-NEXT: vpsubq %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
-; AVX1-NEXT: vpsubq %xmm1, %xmm5, %xmm1
+; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpsubq %xmm1, %xmm4, %xmm1
; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm6
; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm7
; AVX1-NEXT: vpsrlq $33, %xmm0, %xmm0
; AVX1-NEXT: vpmovsxbq {{.*#+}} xmm8 = [1,1]
-; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm9
+; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm9
; AVX1-NEXT: vpmuludq %xmm0, %xmm9, %xmm0
-; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4
-; AVX1-NEXT: vpmuludq %xmm4, %xmm7, %xmm4
-; AVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5
+; AVX1-NEXT: vpmuludq %xmm5, %xmm7, %xmm5
+; AVX1-NEXT: vpaddq %xmm0, %xmm5, %xmm0
; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
-; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm4
+; AVX1-NEXT: vpmuludq %xmm7, %xmm9, %xmm5
; AVX1-NEXT: vpsrlq $33, %xmm1, %xmm1
-; AVX1-NEXT: vpor %xmm5, %xmm8, %xmm7
+; AVX1-NEXT: vpor %xmm4, %xmm8, %xmm7
; AVX1-NEXT: vpmuludq %xmm7, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm5
-; AVX1-NEXT: vpmuludq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vpaddq %xmm1, %xmm5, %xmm1
+; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm4
+; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vpaddq %xmm1, %xmm4, %xmm1
; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1
-; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm5
-; AVX1-NEXT: vpaddq %xmm3, %xmm5, %xmm3
+; AVX1-NEXT: vpmuludq %xmm7, %xmm6, %xmm4
+; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpaddq %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpaddq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpaddq %xmm2, %xmm5, %xmm2
; AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
@@ -1109,20 +1109,20 @@ define <4 x i64> @vec256_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa (%rdi), %ymm0
; AVX2-NEXT: vmovdqa (%rsi), %ymm1
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
-; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm3
-; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [1,1,1,1]
+; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm3
; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1
-; AVX2-NEXT: vpsubq %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpsubq %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpsrlq $1, %ymm1, %ymm4
; AVX2-NEXT: vpsrlq $33, %ymm1, %ymm1
-; AVX2-NEXT: vpmuludq %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vpsrlq $32, %ymm3, %ymm3
-; AVX2-NEXT: vpmuludq %ymm3, %ymm4, %ymm3
-; AVX2-NEXT: vpaddq %ymm1, %ymm3, %ymm1
-; AVX2-NEXT: vpsllq $32, %ymm1, %ymm1
+; AVX2-NEXT: vpmuludq %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: vpsrlq $32, %ymm2, %ymm2
; AVX2-NEXT: vpmuludq %ymm2, %ymm4, %ymm2
+; AVX2-NEXT: vpaddq %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vpsllq $32, %ymm1, %ymm1
+; AVX2-NEXT: vpmuludq %ymm3, %ymm4, %ymm2
; AVX2-NEXT: vpaddq %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -1133,36 +1133,36 @@ define <4 x i64> @vec256_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; XOP-NEXT: vmovdqa 16(%rsi), %xmm1
; XOP-NEXT: vmovdqa (%rdi), %xmm2
; XOP-NEXT: vmovdqa 16(%rdi), %xmm3
-; XOP-NEXT: vpcomgtq %xmm0, %xmm2, %xmm4
+; XOP-NEXT: vpcomgtq %xmm1, %xmm3, %xmm4
+; XOP-NEXT: vpcomgtq %xmm0, %xmm2, %xmm5
; XOP-NEXT: vpsubq %xmm0, %xmm2, %xmm0
-; XOP-NEXT: vpxor %xmm4, %xmm0, %xmm0
-; XOP-NEXT: vpsubq %xmm0, %xmm4, %xmm0
-; XOP-NEXT: vpcomgtq %xmm1, %xmm3, %xmm5
+; XOP-NEXT: vpxor %xmm5, %xmm0, %xmm0
+; XOP-NEXT: vpsubq %xmm0, %xmm5, %xmm0
; XOP-NEXT: vpsubq %xmm1, %xmm3, %xmm1
-; XOP-NEXT: vpxor %xmm5, %xmm1, %xmm1
-; XOP-NEXT: vpsubq %xmm1, %xmm5, %xmm1
+; XOP-NEXT: vpxor %xmm4, %xmm1, %xmm1
+; XOP-NEXT: vpsubq %xmm1, %xmm4, %xmm1
; XOP-NEXT: vpsrlq $1, %xmm1, %xmm6
; XOP-NEXT: vpsrlq $1, %xmm0, %xmm7
; XOP-NEXT: vpsrlq $33, %xmm0, %xmm0
; XOP-NEXT: vpmovsxbq {{.*#+}} xmm8 = [1,1]
-; XOP-NEXT: vpor %xmm4, %xmm8, %xmm9
+; XOP-NEXT: vpor %xmm5, %xmm8, %xmm9
; XOP-NEXT: vpmuludq %xmm0, %xmm9, %xmm0
-; XOP-NEXT: vpsrlq $32, %xmm4, %xmm4
-; XOP-NEXT: vpmuludq %xmm4, %xmm7, %xmm4
-; XOP-NEXT: vpaddq %xmm0, %xmm4, %xmm0
+; XOP-NEXT: vpsrlq $32, %xmm5, %xmm5
+; XOP-NEXT: vpmuludq %x...
[truncated]
|
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
topperc
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
I cannot reproduce it locally. |
Closes #155345.
In the original case, we have one frozen use and two unfrozen uses:
In
DAGCombiner::visitFREEZE, we replace all uses oft18witht59.After updating the uses,
t59: i8 = freeze t18will be updated tot59: i8 = freeze t59(AddModifiedNodeToCSEMaps) and CSEed intot80: i8 = freeze t59(ReplaceAllUsesWith). As the previous call toAddModifiedNodeToCSEMapsalready removedt59from the CSE map,ReplaceAllUsesWithcannot removet59again.For clarity, see the following call graph:
This patch unfreezes all the uses first to avoid triggering CSE when introducing cycles.
I once attempted to avoid introducing cycles by temporarily setting the operand of N to a fake SDValue (e.g., poison). But I find it hard to bypass CSE as these SelectionDAG APIs are private.