Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion lib/bindings/c/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ impl RouterHandles {
lora_name: Option<String>,
priority_jump: f64,
allowed_worker_ids: Option<HashSet<WorkerId>>,
) -> Result<(u64, u32), QueryRouterResult> {
) -> Result<(u64, Option<u32>), QueryRouterResult> {
if let Some(ref ids) = allowed_worker_ids {
self.prefill_router.register_workers(ids);
}
Expand Down Expand Up @@ -1214,6 +1214,8 @@ pub unsafe extern "C" fn route_prefill_request(
.query_prefill_worker(&tokens, None, false, None, 0.0, allowed_worker_ids)
.await?;

let prefill_dp_rank = prefill_dp_rank.unwrap_or(u32::MAX);

tracing::info!(
prefill_worker_id = prefill_worker_id,
prefill_dp_rank = prefill_dp_rank,
Expand Down
13 changes: 6 additions & 7 deletions lib/llm/src/kv_router/prefill_router/execution.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,10 @@ impl PrefillRouter {
let dp_rank = req
.routing
.as_ref()
.and_then(|r| r.prefill_dp_rank.or(r.dp_rank))
.unwrap_or(0);
.and_then(|r| r.prefill_dp_rank.or(r.dp_rank));
tracing::debug!(
worker_id = id,
dp_rank = dp_rank,
dp_rank = ?dp_rank,
"Using pre-selected prefill worker for bootstrap"
);
(id, dp_rank)
Expand Down Expand Up @@ -99,7 +98,7 @@ impl PrefillRouter {

tracing::debug!(
worker_id = worker_id,
dp_rank = dp_rank,
dp_rank = ?dp_rank,
bootstrap_host = %host,
bootstrap_port = port,
bootstrap_room = bootstrap_room,
Expand Down Expand Up @@ -266,7 +265,7 @@ impl PrefillRouter {
lora_name: Option<String>,
priority_jump: f64,
allowed_worker_ids: Option<HashSet<WorkerId>>,
) -> Result<(u64, u32)> {
) -> Result<(u64, Option<u32>)> {
let prefill_router = self
.prefill_router
.get()
Expand All @@ -288,7 +287,7 @@ impl PrefillRouter {
allowed_worker_ids,
)
.await?;
Ok((worker.worker_id, worker.dp_rank))
Ok((worker.worker_id, Some(worker.dp_rank)))
}
InnerPrefillRouter::SimpleRouter(r) => {
let worker_id = if update_states {
Expand All @@ -297,7 +296,7 @@ impl PrefillRouter {
r.peek_next_worker()
}
.ok_or_else(|| anyhow::anyhow!("No workers available for prefill"))?;
Ok((worker_id, 0))
Ok((worker_id, None))
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion lib/llm/src/kv_router/prefill_router/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ impl

let routing = prefill_req.routing_mut();
routing.prefill_worker_id = Some(worker_id);
routing.dp_rank = Some(dp_rank);
routing.dp_rank = dp_rank;
prefill_req.bootstrap_info = Some(bootstrap_info.clone());

let prefill_context =
Expand Down
2 changes: 1 addition & 1 deletion lib/llm/src/kv_router/prefill_router/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ pub(super) enum PrefillOutcome {
pub(super) enum PrefillResolveDecision {
Resolved {
worker_id: u64,
dp_rank: u32,
dp_rank: Option<u32>,
bootstrap_info: BootstrapInfo,
},
Unavailable,
Expand Down
Loading