diff --git a/sgl-model-gateway/src/policies/cache_aware.rs b/sgl-model-gateway/src/policies/cache_aware.rs index 781f413e7ba8..c30a0ec21986 100644 --- a/sgl-model-gateway/src/policies/cache_aware.rs +++ b/sgl-model-gateway/src/policies/cache_aware.rs @@ -309,7 +309,10 @@ impl LoadBalancingPolicy for CacheAwarePolicy { matched_worker.to_string() } else { RouterMetrics::record_cache_miss(); - tree.get_smallest_tenant() + let min_load_idx = *healthy_indices + .iter() + .min_by_key(|&&idx| workers[idx].load())?; + workers[min_load_idx].url().to_string() }; // Find the index of the selected worker