Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 31 additions & 17 deletions src/transform/loop_vectorize.cc
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,13 @@ class VectorizePlanner : public arith::IRMutatorWithAnalyzer {
if (CanProveIndependent(elem_offset, inner_for_->loop_var, analyzer_)) {
return;
}
// 3. Tight vectorize bound
vector_size_ = arith::ZeroAwareGCD(vector_size_, vector_load_bits_max_ /
buffer->dtype.bits());
// 3. Check if current vector_size_ works with invariant boundary check
if (!IsExprInvariantInVectorBoundary(elem_offset, inner_for_->loop_var,
vector_size_, analyzer_)) {
// If not, tight vectorize bound with buffer dtype constraint
vector_size_ = arith::ZeroAwareGCD(
vector_size_, vector_load_bits_max_ / buffer->dtype.bits());
}
// 4. Try to vectorize buffer load
while (!IndiceCanVectorize(elem_offset, inner_for_->loop_var,
inner_for_->extent, vector_size_, analyzer_)) {
Expand Down Expand Up @@ -272,6 +276,28 @@ bool CanProveIndependent(const PrimExpr &expr, Var var,
return false;
}

bool IsExprInvariantInVectorBoundary(const PrimExpr &expr, Var var,
int target_vectorized_size,
arith::Analyzer *analyzer) {
// Check if expr is invariant within vector boundaries
// We're trying to prove the access expression A[f(var)] depends only on
// floor(var/vecsize), not on var%vecsize
// Mathematically:
// \forall var, f(floor(var/vecsize)*vecsize + var%vecsize) ==
// f(floor(var/vecsize)*vecsize + 0)
// Example: for i in T.vectorized(8):
// A[i] = B[i] * C[i//4]
// if vecsize=4, f(i)=i//4 depends only on i//4
// Therefore A[i] = B[i] * C[i//4] can be vectorized with vecsize=4
PrimExpr var_aligned =
floordiv(var, target_vectorized_size) * target_vectorized_size;
PrimExpr expr_aligned = Substitute(expr, {{var, var_aligned}});
if (analyzer->CanProveEqual(expr, expr_aligned)) {
return true;
}
return false;
}

bool IndiceCanVectorize(const PrimExpr &expr, Var var,
const PrimExpr &iter_var_size,
int target_vectorized_size, arith::Analyzer *analyzer) {
Expand All @@ -292,20 +318,8 @@ bool IndiceCanVectorize(const PrimExpr &expr, Var var,
0))
return false;

// Check if expr is invariant within vector boundaries
// We're trying to prove the access expression A[f(var)] depends only on
// floor(var/vecsize), not on var%vecsize
// Mathematically:
// \forall var, f(floor(var/vecsize)*vecsize + var%vecsize) ==
// f(floor(var/vecsize)*vecsize + 0)
// Example: for i in T.vectorized(8):
// A[i] = B[i] * C[i//4]
// if vecsize=4, f(i)=i//4 depends only on i//4
// Therefore A[i] = B[i] * C[i//4] can be vectorized with vecsize=4
PrimExpr var_aligned =
floordiv(var, target_vectorized_size) * target_vectorized_size;
PrimExpr expr_aligned = Substitute(expr, {{var, var_aligned}});
if (analyzer->CanProveEqual(expr, expr_aligned)) {
if (IsExprInvariantInVectorBoundary(expr, var, target_vectorized_size,
analyzer)) {
return true;
}

Expand Down
6 changes: 6 additions & 0 deletions src/transform/loop_vectorize.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ For VectorizeLoop(const For &loop, arith::Analyzer *analyzer,
// when var changes
bool CanProveIndependent(const PrimExpr &expr, Var var,
arith::Analyzer *analyzer);

// Check if expr is invariant within vector boundaries
bool IsExprInvariantInVectorBoundary(const PrimExpr &expr, Var var,
int target_vectorized_size,
arith::Analyzer *analyzer);

bool IndiceCanVectorize(const PrimExpr &expr, Var var,
const PrimExpr &iter_var_size,
int target_vectorized_size, arith::Analyzer *analyzer);
Expand Down
Loading