From 30cce6d7da0d1a146ef135ccc3d1f706492e7d97 Mon Sep 17 00:00:00 2001 From: Manas103 Date: Fri, 22 May 2026 13:36:59 -0400 Subject: [PATCH 1/2] [Analysis] Clamp SelectOp divisibility when condConstancy reduces output contiguity In SelectOpAxisInfoVisitor's tensor-cond branch, the call to getDivisibilityFromContiguity sees only the lhs/rhs contiguities and can overestimate divisibility when condConstancy further reduces the output contiguity below either input's contiguity. Example: lhs c=8 d=8, rhs c=8 d=16, condConstancy=1. Output contiguity collapses to 1 (every position is a leader), but the helper returns gcd(8, 16) = 8 because c_lhs == c_rhs. The output value at position 1 may be 17, not divisible by 8. This is latent on the current pow2 lattice (gcd == min, and codegen vec_width is capped by contiguity, which is computed correctly), but it is a soundness regression introduced by #7781. Fix is a conditional GCD with the output contiguity at the SelectOp callsite, preserving the existing semantics when condConstancy does not bind. Fixes #10067. --- lib/Analysis/AxisInfo.cpp | 13 +++++++++++-- test/Analysis/test-alignment.mlir | 18 ++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/lib/Analysis/AxisInfo.cpp b/lib/Analysis/AxisInfo.cpp index 025f882bb141..1f247f1508b5 100644 --- a/lib/Analysis/AxisInfo.cpp +++ b/lib/Analysis/AxisInfo.cpp @@ -960,8 +960,17 @@ class SelectOpAxisInfoVisitor final : public AxisInfoVisitorImpl { rhsInfo.getConstancy(d), condConstancy[d])); contiguity.push_back(gcd(lhsInfo.getContiguity(d), rhsInfo.getContiguity(d), condConstancy[d])); - divisibility.push_back( - getDivisibilityFromContiguity(lhsInfo, rhsInfo, d)); + // When condConstancy reduces output contiguity below either input's + // contiguity, output "group leaders" include positions that were + // non-leaders in lhs/rhs; the value at such a position p is + // divisible only by gcd(d_src, p) <= gcd(d_src, outContig). Clamp + // divisibility by output contiguity to keep this sound. + // getDivisibilityFromContiguity itself does not see condConstancy. + int64_t div = getDivisibilityFromContiguity(lhsInfo, rhsInfo, d); + if (contiguity.back() < lhsInfo.getContiguity(d) || + contiguity.back() < rhsInfo.getContiguity(d)) + div = gcd(div, contiguity.back()); + divisibility.push_back(div); } } if (lhsInfo.getConstantValue().has_value() && diff --git a/test/Analysis/test-alignment.mlir b/test/Analysis/test-alignment.mlir index cee58274d4a7..03f288f615ca 100644 --- a/test/Analysis/test-alignment.mlir +++ b/test/Analysis/test-alignment.mlir @@ -1146,6 +1146,24 @@ tt.func @select_same_value_constancy() { // ----- +// Regression: SelectOp must clamp divisibility when condConstancy reduces the +// output contiguity below either input's contiguity. Otherwise the helper +// getDivisibilityFromContiguity overestimates divisibility because it does not +// see condConstancy. See issue triton-lang/triton#10067. +tt.func @select_cond_constancy_clamps_divisibility(%arg0: tensor<8xi1>) { + // expected-remark @below {{contiguity = [8], divisibility = [8], constancy = [1], constant_value = }} + %lhs = tt.make_range {end = 16 : i32, start = 8 : i32} : tensor<8xi32> + // expected-remark @below {{contiguity = [8], divisibility = [16], constancy = [1], constant_value = }} + %rhs = tt.make_range {end = 24 : i32, start = 16 : i32} : tensor<8xi32> + // %arg0 has unknown contents, so condConstancy = 1. Output contiguity must + // collapse to gcd(8, 8, 1) = 1; divisibility must clamp to 1 (not gcd(8, 16) = 8). + // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = }} + %sel = arith.select %arg0, %lhs, %rhs : tensor<8xi1>, tensor<8xi32> + tt.return +} + +// ----- + tt.func @cmp_after_max_constancy() { %c5 = arith.constant dense<5> : tensor<4xi32> %c7 = arith.constant dense<7> : tensor<4xi32> From 2fb17bd8df510126d2820f2387c1165889a64665 Mon Sep 17 00:00:00 2001 From: Manas103 Date: Fri, 22 May 2026 19:39:49 -0400 Subject: [PATCH 2/2] Simplify clamp per reviewer suggestion --- lib/Analysis/AxisInfo.cpp | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/lib/Analysis/AxisInfo.cpp b/lib/Analysis/AxisInfo.cpp index 1f247f1508b5..777364157a63 100644 --- a/lib/Analysis/AxisInfo.cpp +++ b/lib/Analysis/AxisInfo.cpp @@ -960,17 +960,12 @@ class SelectOpAxisInfoVisitor final : public AxisInfoVisitorImpl { rhsInfo.getConstancy(d), condConstancy[d])); contiguity.push_back(gcd(lhsInfo.getContiguity(d), rhsInfo.getContiguity(d), condConstancy[d])); - // When condConstancy reduces output contiguity below either input's - // contiguity, output "group leaders" include positions that were - // non-leaders in lhs/rhs; the value at such a position p is - // divisible only by gcd(d_src, p) <= gcd(d_src, outContig). Clamp - // divisibility by output contiguity to keep this sound. - // getDivisibilityFromContiguity itself does not see condConstancy. - int64_t div = getDivisibilityFromContiguity(lhsInfo, rhsInfo, d); - if (contiguity.back() < lhsInfo.getContiguity(d) || - contiguity.back() < rhsInfo.getContiguity(d)) - div = gcd(div, contiguity.back()); - divisibility.push_back(div); + // getDivisibilityFromContiguity does not see condConstancy; clamp + // by the just-computed output contiguity so the result remains + // sound when condConstancy reduces it below the input contiguities. + divisibility.push_back( + gcd(getDivisibilityFromContiguity(lhsInfo, rhsInfo, d), + contiguity.back())); } } if (lhsInfo.getConstantValue().has_value() &&