-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[SCEVDivision] Prevent propagation of incorrect no-wrap flags #154745
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,130 @@ | ||
| ; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s | ||
|
|
||
| ; In the following case, we don't know the concret value of `m`, so we cannot | ||
| ; deduce no-wrap behavior for the quotient/remainder divided by `m`. However, | ||
| ; we can infer `{0,+,1}<%loop>` is nuw and nsw from the induction variable. | ||
| ; | ||
| ; for (int i = 0; i < btc; i++) | ||
| ; a[i * (m + 42)] = 0; | ||
|
|
||
| ; CHECK: AccessFunction: {0,+,(42 + %m)}<nuw><nsw><%loop> | ||
| ; CHECK-NEXT: Base offset: %a | ||
| ; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 1 bytes. | ||
| ; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%loop>][{0,+,42}<%loop>] | ||
| define void @divide_by_m0(ptr %a, i64 %m, i64 %btc) { | ||
| entry: | ||
| %stride = add nsw nuw i64 %m, 42 | ||
| br label %loop | ||
|
|
||
| loop: | ||
| %i = phi i64 [ 0, %entry ], [ %i.next, %loop ] | ||
| %offset = phi i64 [ 0, %entry ], [ %offset.next, %loop ] | ||
| %idx = getelementptr inbounds i8, ptr %a, i64 %offset | ||
| store i8 0, ptr %idx | ||
| %i.next = add nsw nuw i64 %i, 1 | ||
| %offset.next = add nsw nuw i64 %offset, %stride | ||
| %cond = icmp eq i64 %i.next, %btc | ||
| br i1 %cond, label %exit, label %loop | ||
|
|
||
| exit: | ||
| ret void | ||
| } | ||
|
|
||
| ; In the following case, we don't know the concret value of `m`, so we cannot | ||
| ; deduce no-wrap behavior for the quotient/remainder divided by `m`. Also, we | ||
| ; don't infer nsw/nuw from the induction variable in this case. | ||
| ; | ||
| ; for (int i = 0; i < btc; i++) | ||
| ; a[i * (2 * m + 42)] = 0; | ||
|
|
||
| ; CHECK: AccessFunction: {0,+,(42 + (2 * %m))}<nuw><nsw><%loop> | ||
| ; CHECK-NEXT: Base offset: %a | ||
| ; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 1 bytes. | ||
| ; CHECK-NEXT: ArrayRef[{0,+,2}<%loop>][{0,+,42}<%loop>] | ||
| define void @divide_by_m2(ptr %a, i64 %m, i64 %btc) { | ||
| entry: | ||
| %m2 = add nsw nuw i64 %m, %m | ||
| %stride = add nsw nuw i64 %m2, 42 | ||
| br label %loop | ||
|
|
||
| loop: | ||
| %i = phi i64 [ 0, %entry ], [ %i.next, %loop ] | ||
| %offset = phi i64 [ 0, %entry ], [ %offset.next, %loop ] | ||
| %idx = getelementptr inbounds i8, ptr %a, i64 %offset | ||
| store i8 0, ptr %idx | ||
| %i.next = add nsw nuw i64 %i, 1 | ||
| %offset.next = add nsw nuw i64 %offset, %stride | ||
| %cond = icmp eq i64 %i.next, %btc | ||
| br i1 %cond, label %exit, label %loop | ||
|
|
||
| exit: | ||
| ret void | ||
| } | ||
|
|
||
| ; In the following case, the `i * 2 * d` is always zero, so it's nsw and nuw. | ||
| ; However, the quotient divided by `d` is neither nsw nor nuw. | ||
| ; | ||
| ; if (d == 0) | ||
| ; for (unsigned long long i = 0; i != UINT64_MAX; i++) | ||
| ; a[i * 2 * d] = 42; | ||
|
|
||
| ; CHECK: AccessFunction: {0,+,(2 * %d)}<nuw><nsw><%loop> | ||
| ; CHECK-NEXT: Base offset: %a | ||
| ; CHECK-NEXT: ArrayDecl[UnknownSize][%d] with elements of 1 bytes. | ||
| ; CHECK-NEXT: ArrayRef[{0,+,2}<%loop>][0] | ||
| define void @divide_by_zero(ptr %a, i64 %d) { | ||
| entry: | ||
| %guard = icmp eq i64 %d, 0 | ||
| br i1 %guard, label %loop.preheader, label %exit | ||
|
|
||
| loop.preheader: | ||
| %stride = mul nsw nuw i64 %d, 2 ; since %d is 0, %stride is also 0 | ||
| br label %loop | ||
|
|
||
| loop: | ||
| %i = phi i64 [ 0, %loop.preheader ], [ %i.next, %loop ] | ||
| %offset = phi i64 [ 0, %loop.preheader ], [ %offset.next, %loop ] | ||
| %idx = getelementptr inbounds i8, ptr %a, i64 %offset | ||
| store i8 42, ptr %idx | ||
| %i.next = add nuw i64 %i, 1 | ||
| %offset.next = add nsw nuw i64 %offset, %stride | ||
| %cond = icmp eq i64 %i.next, -1 | ||
| br i1 %cond, label %exit, label %loop | ||
|
|
||
| exit: | ||
| ret void | ||
| } | ||
|
|
||
| ; In the following case, the `i * (d + 1)` is always zero, so it's nsw and nuw. | ||
| ; However, the quotient/remainder divided by `d` is not nsw. | ||
| ; | ||
| ; if (d == UINT64_MAX) | ||
| ; for (unsigned long long i = 0; i != d; i++) | ||
| ; a[i * (d + 1)] = 42; | ||
|
|
||
| ; CHECK: AccessFunction: {0,+,(1 + %d)}<nuw><nsw><%loop> | ||
| ; CHECK-NEXT: Base offset: %a | ||
| ; CHECK-NEXT: ArrayDecl[UnknownSize][%d] with elements of 1 bytes. | ||
| ; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><%loop>][{0,+,1}<nuw><%loop>] | ||
| define void @divide_by_minus_one(ptr %a, i64 %d) { | ||
| entry: | ||
| %guard = icmp eq i64 %d, -1 | ||
| br i1 %guard, label %loop.preheader, label %exit | ||
|
|
||
| loop.preheader: | ||
| %stride = add nsw i64 %d, 1 ; since %d is -1, %stride is 0 | ||
| br label %loop | ||
|
|
||
| loop: | ||
| %i = phi i64 [ 0, %loop.preheader ], [ %i.next, %loop ] | ||
| %offset = phi i64 [ 0, %loop.preheader ], [ %offset.next, %loop ] | ||
| %idx = getelementptr inbounds i8, ptr %a, i64 %offset | ||
| store i8 42, ptr %idx | ||
| %i.next = add nuw i64 %i, 1 | ||
| %offset.next = add nsw nuw i64 %offset, %stride | ||
| %cond = icmp eq i64 %i.next, %d | ||
| br i1 %cond, label %exit, label %loop | ||
|
|
||
| exit: | ||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -479,14 +479,16 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, | |
| ;; for (int k = 1; k < o; k++) | ||
| ;; = A[i*m*o + j*o + k] | ||
| ;; A[i*m*o + j*o + k - 1] = | ||
| ;; | ||
| ;; FIXME: Currently fails to infer nsw for the SCEV `{0,+,1}<for.body8>` | ||
| define void @t8(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { | ||
| ; CHECK-LABEL: 't8' | ||
| ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 | ||
| ; CHECK-NEXT: da analyze - none! | ||
| ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 | ||
| ; CHECK-NEXT: da analyze - consistent anti [0 0 1]! | ||
| ; CHECK-NEXT: da analyze - anti [* * *|<]! | ||
| ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 | ||
| ; CHECK-NEXT: da analyze - none! | ||
| ; CHECK-NEXT: da analyze - output [* * *]! | ||
|
Comment on lines
+482
to
+491
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The essential portion of the IR is as follows: preheader:
%guard = icmp sgt i32 %o, 0
br i1 %guard, label %loop, label %exit
loop:
%i = phi i32 [ 1, %preheader ], [ %inc, %loop ]
...
%inc = add nuw nsw i32 %i, 1
%exitcond = icmp eq i32 %inc, %o
br i1 %exitcond, label %exit, label %body
exit:
...From the loop-guard and the induction variable, we know the following:
IIUIC, in principle, we can deduce that |
||
| ; | ||
| entry: | ||
| %cmp49 = icmp sgt i32 %n, 0 | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
StepNumAbsalready is the absoluteAdd reasoning here? Such as "since the denominator cannot be zero, so abs(Denom) >= 1, the range of the SCEVAddRec can only shrink, i.e. if the range was not exceeding the size of the integer type's domain (i.e. not self-wrap) before, it will not self-wrap after division"
I think the deduction is more general, only needs that the nominator is NW and the denominator is loop-invariant.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the comments.
This seems correct, but I realized that there are no checks to ensure that Denominator is non-zero in the first place. Anyway, it looks like I should fix the other parts first, so I’ll start with those.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Division by zero must either be ruled-out by the caller, or the code that it is processing has undefined behaviour. In either case, there is not a lot
SCEVDivisioncan do when representing a division-by-zero.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is currently used in Delinearization and does not correspond to actual div instructions in LLVM IR. That is, given a multiplication like
%m * %n, an invocation likedivide(%m * %n, %m)can happen without a non-zero check. Anyway, in such a case the division seems ill-defined.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Something we need to be careful about is that SCEV flags are global, not use-site specific. So even if we know that division by zero is not possible in context, we may not be able to assume it for the purpose of flag propagation. (I haven't checked whether this is a problem for the delinearization case or not.)