@@ -477,8 +477,8 @@ fn remove_cycle(
477477/// Detects query cycles by using depth first search over all active query jobs.
478478/// If a query cycle is found it will break the cycle by finding an edge which
479479/// uses a query latch and then resuming that waiter.
480- /// There may be multiple cycles involved in a deadlock, so this searches
481- /// all active queries for cycles before finally resuming all the waiters at once.
480+ /// There may be multiple cycles involved in a deadlock, but we only search
481+ /// one cycle at a call and resume one waiter at once. See `FIXME` below .
482482pub fn break_query_cycles ( query_map : QueryMap , registry : & rayon_core:: Registry ) {
483483 let mut wakelist = Vec :: new ( ) ;
484484 let mut jobs: Vec < QueryJobId > = query_map. keys ( ) . cloned ( ) . collect ( ) ;
@@ -488,6 +488,19 @@ pub fn break_query_cycles(query_map: QueryMap, registry: &rayon_core::Registry)
488488 while jobs. len ( ) > 0 {
489489 if remove_cycle ( & query_map, & mut jobs, & mut wakelist) {
490490 found_cycle = true ;
491+
492+ // FIXME(#137731): Resume all the waiters at once may cause deadlocks,
493+ // so we resume one waiter at a call for now. It's still unclear whether
494+ // it's due to possible issues in rustc-rayon or instead in the handling
495+ // of query cycles.
496+ // This seem to only appear when multiple query cycles errors
497+ // are involved, so this reduction in parallelism, while suboptimal, is not
498+ // universal and only the deadlock handler will encounter these cases.
499+ // The workaround shows loss of potential gains, but there still are big
500+ // improvements in the common case, and no regressions compared to the
501+ // single-threaded case. More investigation is still needed, and once fixed,
502+ // we can wake up all the waiters up.
503+ break ;
491504 }
492505 }
493506
0 commit comments