[Delinearization] Add test for inferred array size exceeds integer range (NFC) #169048

kasuga-fj · 2025-11-21T14:52:21Z

Add test cases where the delinearized arrays may not satisfy the following "common" property:

&A[I_1][I_2]...[I_n] == &A[J_1][J_2]...[J_n] iff (I_1, I_2, ..., I_n) == (J_1, J_2, ..., J_n)

The root cause of this issue is that the inferred array size is too large and the offset calculation overflows.
Such results should be discarded during validation. This will be fixed by #169902 .

kasuga-fj · 2025-11-21T14:52:34Z

This stack of pull requests is managed by Graphite. Learn more about stacking.

github-actions · 2025-11-21T15:37:10Z

🐧 Linux x64 Test Results

186829 tests passed
4906 tests skipped

✅ The build succeeded and all tests passed.

llvmbot · 2025-11-28T12:14:14Z

@llvm/pr-subscribers-llvm-analysis

Author: Ryotaro Kasuga (kasuga-fj)

Changes

Add test cases where the delinearized arrays may not be "injective" because the estimated sizes are (potentially) too large. Since we assume the delinearized result as a normal array, we expect different subscript values to be mapped to different integers (the array offsets). However, this property doesn't hold if the array size is large and the offset calculation overflows. The test cases added by this patch demonstrate this issue.
Such results should be discarded during validation. This will be fixed by #169902 .

Full diff: https://github.com/llvm/llvm-project/pull/169048.diff

1 Files Affected:

(added) llvm/test/Analysis/Delinearization/validation_large_size.ll (+136)

diff --git a/llvm/test/Analysis/Delinearization/validation_large_size.ll b/llvm/test/Analysis/Delinearization/validation_large_size.ll
new file mode 100644
index 0000000000000..f78e68f60569a
--- /dev/null
+++ b/llvm/test/Analysis/Delinearization/validation_large_size.ll
@@ -0,0 +1,136 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes='print<delinearization>' --delinearize-use-fixed-size-array-heuristic -disable-output 2>&1 | FileCheck %s
+
+; FIXME: When considering an array as a function from subcripts to addresses,
+; it should be injective. That is, different subscript tuples should map to
+; different addresses. Currently, delinearization doesn't guarantee this
+; property, especially when the inferred array size is very large so that the
+; product of dimensions may overflow. The delinearization validation should
+; consider such cases as invalid.
+
+; for (i = 0; i < (1ULL << 60); i++)
+;   for (j = 0; j < 256; j++)
+;     A[i*256 + j] = 0;
+;
+; The store will be delinearized to `A[i][j]` with its size `[][256]`. Since
+; `i` can be very large, the mapping from subscripts to addresses is not
+; injective. E.g., `&A[0][j] = &A[2^56][j] = ...`.
+;
+define void @large_size_fixed(ptr %A) {
+; CHECK-LABEL: 'large_size_fixed'
+; CHECK-NEXT:  Inst: store i8 0, ptr %gep, align 1
+; CHECK-NEXT:  AccessFunction: {{\{\{}}0,+,256}<%for.i.header>,+,1}<nsw><%for.j>
+; CHECK-NEXT:  Base offset: %A
+; CHECK-NEXT:  ArrayDecl[UnknownSize][256] with elements of 1 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j>]
+; CHECK-NEXT:  Delinearization validation: Succeeded
+;
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  %i.mul = mul i64 %i, 256
+  br label %for.j
+
+for.j:
+  %j = phi i64 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+  %offset = add nsw i64 %i.mul, %j
+  %gep = getelementptr i8, ptr %A, i64 %offset
+  store i8 0, ptr %gep
+  %j.inc = add i64 %j, 1
+  %ec.j = icmp eq i64 %j.inc, 256
+  br i1 %ec.j, label %for.i.latch, label %for.j
+
+for.i.latch:
+  %i.inc = add i64 %i, 1
+  %ec.i = icmp eq i64 %i.inc, 1152921504606846976
+  br i1 %ec.i, label %exit, label %for.i.header
+
+exit:
+  ret void
+}
+
+; for (i = 0; i < n; i++)
+;   for (j = 0; j < m; j++)
+;     for (k = 0; k < o; k++)
+;       if (i < 5 && j < 5 && k < 5)
+;         A[i*m*o + j*o + k] = 0;
+;
+; The product (%m * %o) can overflow, e.g., (%m, %o) = (2^32 - 1, 2^32). In
+; this case, the delinearization `A[%i][%j][%k]` with its size `[][%m][%o]`
+; should be considered invalid, because the address calculation will be:
+;
+; A[%i][%j][%k] = %A + %i*%m*%o + %j*%o + %k
+;               = %A - 2^32*%i + %j*2^32 + %k
+;               = %A + 2^32*(%j - %i) + %k
+;
+; It means `&A[0][0][%k]` = `&A[1][1][%k]` = ..., which implies that the
+; mapping from subscripts to addresses is not injective.
+;
+define void @large_size_parametric(i64 %n, i64 %m, i64 %o, ptr %A) {
+; CHECK-LABEL: 'large_size_parametric'
+; CHECK-NEXT:  Inst: store i8 0, ptr %gep, align 1
+; CHECK-NEXT:  AccessFunction: {{\{\{\{}}0,+,(%m * %o)}<%for.i.header>,+,%o}<%for.j.header>,+,1}<nw><%for.k.header>
+; CHECK-NEXT:  Base offset: %A
+; CHECK-NEXT:  ArrayDecl[UnknownSize][%m][%o] with elements of 1 bytes.
+; CHECK-NEXT:  ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k.header>]
+; CHECK-NEXT:  Delinearization validation: Succeeded
+;
+entry:
+  %guard.i = icmp sgt i64 %n, 0
+  %m_o = mul i64 %m, %o
+  br i1 %guard.i, label %for.i.header, label %exit
+
+for.i.header:
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+  %i_m_o = mul i64 %i, %m_o
+  br label %for.j.preheader
+
+for.j.preheader:
+  %guard.j = icmp sgt i64 %m, 0
+  br i1 %guard.j, label %for.j.header, label %for.i.latch
+
+for.j.header:
+  %j = phi i64 [ 0, %for.j.preheader ], [ %j.inc, %for.j.latch ]
+  %j_o = mul i64 %j, %o
+  br label %for.k.preheader
+
+for.k.preheader:
+  %guard.k = icmp sgt i64 %o, 0
+  br i1 %guard.k, label %for.k.header, label %for.j.latch
+
+for.k.header:
+  %k = phi i64 [ 0, %for.k.preheader ], [ %k.inc, %for.k.latch ]
+  %cond.i = icmp slt i64 %i, 5
+  %cond.j = icmp slt i64 %j, 5
+  %cond.k = icmp slt i64 %k, 5
+  %cond.ij = and i1 %cond.i, %cond.j
+  %cond = and i1 %cond.ij, %cond.k
+  br i1 %cond, label %if.then, label %for.k.latch
+
+if.then:
+  %offset.tmp = add i64 %i_m_o, %j_o
+  %offset = add i64 %offset.tmp, %k
+  %gep = getelementptr i8, ptr %A, i64 %offset
+  store i8 0, ptr %gep, align 1
+  br label %for.k.latch
+
+for.k.latch:
+  %k.inc = add nsw i64 %k, 1
+  %ec.k = icmp eq i64 %k.inc, %o
+  br i1 %ec.k, label %for.j.latch, label %for.k.header
+
+for.j.latch:
+  %j.inc = add nsw i64 %j, 1
+  %ec.j = icmp eq i64 %j.inc, %m
+  br i1 %ec.j, label %for.i.latch, label %for.j.header
+
+for.i.latch:
+  %i.inc = add nsw i64 %i, 1
+  %ec.i = icmp eq i64 %i.inc, %n
+  br i1 %ec.i, label %exit, label %for.i.header
+
+exit:
+  ret void
+}

Meinersbur · 2025-11-28T15:54:24Z

llvm/test/Analysis/Delinearization/validation_large_size.ll

+; RUN: opt < %s -passes='print<delinearization>' --delinearize-use-fixed-size-array-heuristic -disable-output 2>&1 | FileCheck %s
+
+; FIXME: When considering an array as a function from subcripts to addresses,
+; it should be injective. That is, different subscript tuples should map to


Is injectivity a requirement for array accesses that we want to analyze? What about A[i][j/2], A[i][max(0,j-1)] or A[i][0]?

I considered the domain of an array is integers (or its tuples). &A[i][j/2] can be interpreted as a composed function of f: (i, j) -> (i, j/2) and g: (x, y) -> &A[x][y]. Maybe the term "subscript" is confusing? Or is there a more common expression for this property?

I think non-(dynamic-)aliasing. The property that &A[i] == &B[j] if, and only if, &A == &B && i == j etc.

I understood subscripts as the the function f (which is not injective). You can also use array index for x, y in g.

Changed the wording to avoid ambiguity.

I understood subscripts as the the function f (which is not injective). You can also use array index for x, y in g.

Considering that DA defines Subscript as (a pair of) addrecs, it seems consistent to interpret it as a map rather than an integer.

llvm/test/Analysis/Delinearization/validation_large_size.ll

Meinersbur · 2025-12-01T17:02:54Z

llvm/test/Analysis/Delinearization/validation_large_size.ll

+; RUN: opt < %s -passes='print<delinearization>' --delinearize-use-fixed-size-array-heuristic -disable-output 2>&1 | FileCheck %s
+
+; FIXME: When considering an array as a function from subcripts to addresses,
+; it should be injective. That is, different subscript tuples should map to


I think non-(dynamic-)aliasing. The property that &A[i] == &B[j] if, and only if, &A == &B && i == j etc.

I understood subscripts as the the function f (which is not injective). You can also use array index for x, y in g.

This patch adds a check in validation for delinearization to ensure that the offset calculation does not overflow. If it overflows, different array accesses (e.g., `A[0][0]` and `A[1][0]`) could map to the same linear index, leading to incorrect behavior. For fixed-size arrays, the check is relatively straightforward. However, for dynamic-size arrays (i.e., arrays where the size is not known at compile time), it's difficult to prove this statically, and it going to fail for almost all cases. Maybe we need to add some runtime checks or reasoning based on `inbounds` like LAA does. Fixes the test cases added in #169048.

…nge (NFC) (llvm#169048) Add test cases where the delinearized arrays may not satisfy the following "common" property: `&A[I_1][I_2]...[I_n] == &A[J_1][J_2]...[J_n]` iff `(I_1, I_2, ..., I_n) == (J_1, J_2, ..., J_n)` The root cause of this issue is that the inferred array size is too large and the offset calculation overflows. Such results should be discarded during validation. This will be fixed by llvm#169902 .

This patch adds a check in validation for delinearization to ensure that the offset calculation does not overflow. If it overflows, different array accesses (e.g., `A[0][0]` and `A[1][0]`) could map to the same linear index, leading to incorrect behavior. For fixed-size arrays, the check is relatively straightforward. However, for dynamic-size arrays (i.e., arrays where the size is not known at compile time), it's difficult to prove this statically, and it going to fail for almost all cases. Maybe we need to add some runtime checks or reasoning based on `inbounds` like LAA does. Fixes the test cases added in llvm#169048.

…nge (NFC) (llvm#169048) Add test cases where the delinearized arrays may not satisfy the following "common" property: `&A[I_1][I_2]...[I_n] == &A[J_1][J_2]...[J_n]` iff `(I_1, I_2, ..., I_n) == (J_1, J_2, ..., J_n)` The root cause of this issue is that the inferred array size is too large and the offset calculation overflows. Such results should be discarded during validation. This will be fixed by llvm#169902 .

This patch adds a check in validation for delinearization to ensure that the offset calculation does not overflow. If it overflows, different array accesses (e.g., `A[0][0]` and `A[1][0]`) could map to the same linear index, leading to incorrect behavior. For fixed-size arrays, the check is relatively straightforward. However, for dynamic-size arrays (i.e., arrays where the size is not known at compile time), it's difficult to prove this statically, and it going to fail for almost all cases. Maybe we need to add some runtime checks or reasoning based on `inbounds` like LAA does. Fixes the test cases added in llvm#169048.

kasuga-fj mentioned this pull request Nov 21, 2025

[DA][Delinearization] Move validation logic into Delinearization #169047

Merged

Base automatically changed from users/kasuga-fj/da-move-delinearize-validation to main November 27, 2025 15:06

[Delinarization] Add test for inferred array size exceeds integer range

de8e1cb

kasuga-fj force-pushed the users/kasuga-fj/delinearize-add-validation-test branch from 1ced6d1 to de8e1cb Compare November 28, 2025 11:14

kasuga-fj mentioned this pull request Nov 28, 2025

[Delinearization] Add validation for large size arrays #169902

Merged

kasuga-fj marked this pull request as ready for review November 28, 2025 12:13

llvmbot added the llvm:analysis Includes value tracking, cost tables and constant folding label Nov 28, 2025

kasuga-fj requested review from Meinersbur, sebpop and sjoerdmeijer November 28, 2025 12:14

Meinersbur reviewed Nov 28, 2025

View reviewed changes

kasuga-fj changed the title ~~[Delinarization] Add test for inferred array size exceeds integer range~~ [Delinarization] Add test for inferred array size exceeds integer range (NFC) Nov 28, 2025

kasuga-fj changed the title ~~[Delinarization] Add test for inferred array size exceeds integer range (NFC)~~ [Delinearization] Add test for inferred array size exceeds integer range (NFC) Dec 1, 2025

Meinersbur approved these changes Dec 1, 2025

View reviewed changes

adjust comment

416ae92

kasuga-fj merged commit 9ba5fa2 into main Dec 2, 2025
10 checks passed

kasuga-fj deleted the users/kasuga-fj/delinearize-add-validation-test branch December 2, 2025 11:49

kasuga-fj mentioned this pull request Dec 4, 2025

[DA] batch delinearization #170519

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[Delinearization] Add test for inferred array size exceeds integer range (NFC) #169048

[Delinearization] Add test for inferred array size exceeds integer range (NFC) #169048

Uh oh!

kasuga-fj commented Nov 21, 2025 •

edited

Loading

Uh oh!

kasuga-fj commented Nov 21, 2025 •

edited

Loading

Uh oh!

github-actions bot commented Nov 21, 2025 •

edited

Loading

Uh oh!

llvmbot commented Nov 28, 2025

Uh oh!

Meinersbur Nov 28, 2025

Uh oh!

kasuga-fj Nov 28, 2025

Uh oh!

Meinersbur Dec 1, 2025

Uh oh!

kasuga-fj Dec 2, 2025

Uh oh!

Uh oh!

Meinersbur Dec 1, 2025

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[Delinearization] Add test for inferred array size exceeds integer range (NFC) #169048

[Delinearization] Add test for inferred array size exceeds integer range (NFC) #169048

Uh oh!

Conversation

kasuga-fj commented Nov 21, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

kasuga-fj commented Nov 21, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Nov 21, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

🐧 Linux x64 Test Results

Uh oh!

llvmbot commented Nov 28, 2025

Uh oh!

Meinersbur Nov 28, 2025

Choose a reason for hiding this comment

Uh oh!

kasuga-fj Nov 28, 2025

Choose a reason for hiding this comment

Uh oh!

Meinersbur Dec 1, 2025

Choose a reason for hiding this comment

Uh oh!

kasuga-fj Dec 2, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Meinersbur Dec 1, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

kasuga-fj commented Nov 21, 2025 •

edited

Loading

kasuga-fj commented Nov 21, 2025 •

edited

Loading

github-actions bot commented Nov 21, 2025 •

edited

Loading