diff --git a/llvm/lib/CodeGen/WindowScheduler.cpp b/llvm/lib/CodeGen/WindowScheduler.cpp index 0777480499e55b..aece214f7ddbbb 100644 --- a/llvm/lib/CodeGen/WindowScheduler.cpp +++ b/llvm/lib/CodeGen/WindowScheduler.cpp @@ -485,6 +485,7 @@ int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG, // ======================================== int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) { int MaxStallCycle = 0; + int CurrentII = MaxCycle + 1; auto Range = getScheduleRange(Offset, SchedInstrNum); for (auto &MI : Range) { auto *SU = TripleDAG->getSUnit(&MI); @@ -492,8 +493,8 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) { for (auto &Succ : SU->Succs) { if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU) continue; - // If the expected cycle does not exceed MaxCycle, no check is needed. - if (DefCycle + (int)Succ.getLatency() <= MaxCycle) + // If the expected cycle does not exceed CurrentII, no check is needed. + if (DefCycle + (int)Succ.getLatency() <= CurrentII) continue; // If the cycle of the scheduled MI A is less than that of the scheduled // MI B, the scheduling will fail because the lifetime of the @@ -503,7 +504,7 @@ int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) { if (DefCycle < UseCycle) return WindowIILimit; // Get the stall cycle introduced by the register between two trips. - int StallCycle = DefCycle + (int)Succ.getLatency() - MaxCycle - UseCycle; + int StallCycle = DefCycle + (int)Succ.getLatency() - CurrentII - UseCycle; MaxStallCycle = std::max(MaxStallCycle, StallCycle); } } diff --git a/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir b/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir new file mode 100644 index 00000000000000..ddba67d78eb58c --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-ws-stall-cycle.mir @@ -0,0 +1,59 @@ +# REQUIRES: asserts +# RUN: llc --march=hexagon %s -run-pass=pipeliner -debug-only=pipeliner \ +# RUN: -window-sched=force -filetype=null -verify-machineinstrs \ +# RUN: -window-region-limit=1 -window-search-ratio=100 -window-diff-limit=0 \ +# RUN: 2>&1 | FileCheck %s + +# CHECK-LABEL: Start analyzing II +# CHECK: MaxStallCycle is 0 +# CHECK-LABEL: Start analyzing II +# CHECK: MaxStallCycle is 0 +# CHECK-LABEL: Start analyzing II +# CHECK: MaxStallCycle is 0 + +--- +name: test_window_stall_cycle +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.3(0x40000000), %bb.1(0x40000000) + liveins: $r0, $r1 + + %0:intregs = COPY $r1 + %1:intregs = COPY $r0 + %2:intregs = nsw A2_add %0, %1 + %3:intregs = S2_lsr_i_r_acc %2, %2, 31 + %4:intregs = S2_asr_i_r killed %3, 1 + %5:predregs = C2_cmpgt %1, %4 + %6:intregs = A2_tfrsi 0 + J2_jumpt killed %5, %bb.3, implicit-def dead $pc + J2_jump %bb.1, implicit-def dead $pc + + bb.1: + successors: %bb.2(0x80000000) + + %7:intregs = A2_addi %4, 2 + %8:intregs = A2_tfrsi 0 + %9:intregs = A2_sub %4, %1 + %10:intregs = A2_addi %9, 1 + %11:intregs = COPY %10 + J2_loop0r %bb.2, %11, implicit-def $lc0, implicit-def $sa0, implicit-def $usr + + bb.2 (machine-block-address-taken): + successors: %bb.3(0x04000000), %bb.2(0x7c000000) + + %12:intregs = PHI %7, %bb.1, %13, %bb.2 + %14:intregs = PHI %8, %bb.1, %15, %bb.2 + %16:intregs = PHI %8, %bb.1, %17, %bb.2 + %18:intregs, %13:intregs = L2_loadri_pi %12, -4 + %17:intregs = nsw A2_add killed %18, %16 + %15:intregs = A2_max %17, %14 + ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 + J2_jump %bb.3, implicit-def dead $pc + + bb.3: + %19:intregs = PHI %6, %bb.0, %15, %bb.2 + $r0 = COPY %19 + PS_jmpret $r31, implicit-def dead $pc, implicit $r0 + +...