Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/vllm_router/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ def initialize_all(app: FastAPI, args):
prefill_model_labels=args.prefill_model_labels,
decode_model_labels=args.decode_model_labels,
kv_aware_threshold=args.kv_aware_threshold,
request_reroutes=args.request_reroutes,
)

# Initialize feature gates
Expand Down
7 changes: 7 additions & 0 deletions src/vllm_router/parsers/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,13 @@ def parse_args():
help="The threshold for kv-aware routing.",
)

parser.add_argument(
"--request-reroutes",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You may consider renaming this argument to a more explicit name, such as --max-instance-failover-reroute-attempts

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed

type=int,
default=0,
help="Number of reroute attempts per failed request",
)

args = parser.parse_args()
args = load_initial_config_from_config_file_if_required(parser, args)

Expand Down
14 changes: 9 additions & 5 deletions src/vllm_router/routers/routing_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ def _qps_routing(
ret = url
return ret

def set_request_migration(self, request_reroutes):
self.request_reroutes = request_reroutes

def _update_hash_ring(self, endpoints: List["EndpointInfo"]):
"""
Update the hash ring with the current list of endpoints.
Expand Down Expand Up @@ -466,10 +469,10 @@ def initialize_routing_logic(
) -> RoutingInterface:
if routing_logic == RoutingLogic.ROUND_ROBIN:
logger.info("Initializing round-robin routing logic")
return RoundRobinRouter()
router = RoundRobinRouter()
elif routing_logic == RoutingLogic.SESSION_BASED:
logger.info(f"Initializing session-based routing logic with kwargs: {kwargs}")
return SessionRouter(kwargs.get("session_key"))
router = SessionRouter(kwargs.get("session_key"))
elif routing_logic == RoutingLogic.KVAWARE:
logger.info("Initializing kvaware routing logic")
router = KvawareRouter(
Expand All @@ -478,17 +481,18 @@ def initialize_routing_logic(
kwargs.get("kv_aware_threshold"),
)
router.start_kv_manager()
return router
elif routing_logic == RoutingLogic.PREFIXAWARE:
logger.info("Initializing prefix-aware routing logic")
return PrefixAwareRouter()
router = PrefixAwareRouter()
elif routing_logic == RoutingLogic.DISAGGREGATED_PREFILL:
logger.info("Initializing disaggregated prefill routing logic")
return DisaggregatedPrefillRouter(
router = DisaggregatedPrefillRouter(
kwargs.get("prefill_model_labels"), kwargs.get("decode_model_labels")
)
else:
raise ValueError(f"Invalid routing logic {routing_logic}")
router.set_request_migration(request_reroutes=kwargs.get("request_reroutes"))
return router


def reconfigure_routing_logic(
Expand Down
3 changes: 1 addition & 2 deletions src/vllm_router/services/request_service/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,6 @@ async def route_general_request(
request: Request,
endpoint: str,
background_tasks: BackgroundTasks,
attempted_reroutes: int = 0,
):
"""
Route the incoming request to the backend server and stream the response back to the client.
Expand Down Expand Up @@ -257,7 +256,7 @@ async def route_general_request(

# Perform service discovery to request path a number of times equal to reroutes + 1
error_urls = set()
for _ in range(attempted_reroutes + 1):
for _ in range(request.app.state.router.reroutes + 1):
endpoints, engine_stats, request_stats = await asyncio.to_thread(
perform_service_discovery,
request,
Expand Down
Loading