Skip to content

Commit

Permalink
[Pipeliner] Fixed the epilogue predicate (#4754)
Browse files Browse the repository at this point in the history
This mirrors upstream patch
llvm/llvm-project#108964
  • Loading branch information
sjw36 authored Sep 18, 2024
1 parent 5083988 commit fad49b2
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,9 @@ LoopPipelinerInternal::emitEpilogue(RewriterBase &rewriter,
Value rangeDecr = rewriter.create<arith::AddIOp>(loc, rangeIncr, minus1);
Value totalIterations = rewriter.create<arith::DivUIOp>(loc, rangeDecr, step);

Value zero =
rewriter.create<arith::ConstantOp>(loc, rewriter.getIntegerAttr(t, 0));

// Capture predicates for dynamic loops.
SmallVector<Value> predicates(maxStage + 1);

Expand All @@ -685,9 +688,9 @@ LoopPipelinerInternal::emitEpilogue(RewriterBase &rewriter,
setValueMapping(forOp.getInductionVar(), newlastIter, maxStage - i);

if (dynamicLoop) {
// pred = iterI >= lb
// pred = iterI >= 0
predicates[i + 1] = rewriter.create<arith::CmpIOp>(
loc, arith::CmpIPredicate::sge, iterI, lb);
loc, arith::CmpIPredicate::sge, iterI, zero);
}
}

Expand Down
3 changes: 2 additions & 1 deletion test/TritonGPU/loop-pipeline.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
// CHECK: scf.yield {{.*}}, %[[INS_IDX_3]], %[[EXT_IDX_3]], %[[NEXT_A]], %[[NEXT_B]]

// AMD-LABEL: tt.func @matmul_loop
// AMD-DAG: %[[C0:.*]] = arith.constant 0 : index
// AMD: %{{.*}}:6 = scf.for %[[ARG5:.*]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ARG6:.*]] = %{{.*}}, %[[ARG7:.*]] = %{{.*}}, %[[ARG8:.*]] = %{{.*}}, %[[ARG9:.*]] = %{{.*}}, %[[ARG10:.*]] = %{{.*}}, %[[ARG11:.*]] = %{{.*}})
// AMD: %[[LOCAL_LOAD_32:.*]] = triton_gpu.local_load %[[ARG10]]
// AMD: %[[LOCAL_LOAD_33:.*]] = triton_gpu.local_load %[[ARG11]]
Expand All @@ -80,7 +81,7 @@
// AMD: %[[ADDI_23:.*]] = arith.addi %[[ADDI_22]], %{{.*}}-1
// AMD: %[[DIVUI_24:.*]] = arith.divui %[[ADDI_23]], %{{.*}}
// AMD: %[[ADDI_25:.*]] = arith.addi %[[DIVUI_24]], %{{.*}}-1
// AMD: %[[CMPI_26:.*]] = arith.cmpi sge, %[[ADDI_25]], %{{.*}}
// AMD: %[[CMPI_26:.*]] = arith.cmpi sge, %[[ADDI_25]], %[[C0]]
// AMD: %[[LOCAL_LOAD_27:.*]] = triton_gpu.local_load %{{.*}}#4
// AMD: %[[LOCAL_LOAD_28:.*]] = triton_gpu.local_load %{{.*}}#5
// AMD: %[[MULF_29:.*]] = arith.mulf %[[LOCAL_LOAD_28]], %{{.*}}
Expand Down

0 comments on commit fad49b2

Please sign in to comment.