Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 19 additions & 15 deletions litellm/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@
_get_parent_otel_span_from_kwargs,
get_metadata_variable_name_from_kwargs,
)
from litellm.litellm_core_utils.thread_pool_executor import executor
from litellm.litellm_core_utils.coroutine_checker import coroutine_checker
from litellm.litellm_core_utils.credential_accessor import CredentialAccessor
from litellm.litellm_core_utils.dd_tracing import tracer
Expand Down Expand Up @@ -619,11 +618,12 @@ def __init__( # noqa: PLR0915
self.retry_policy = RetryPolicy(**retry_policy)
elif isinstance(retry_policy, RetryPolicy):
self.retry_policy = retry_policy
verbose_router_logger.info(
"\033[32mRouter Custom Retry Policy Set:\n{}\033[0m".format(
self.retry_policy.model_dump(exclude_none=True)
if self.retry_policy is not None:
verbose_router_logger.info(
"\033[32mRouter Custom Retry Policy Set:\n{}\033[0m".format(
self.retry_policy.model_dump(exclude_none=True)
)
)
)

self.model_group_retry_policy: Optional[
Dict[str, RetryPolicy]
Expand All @@ -636,11 +636,12 @@ def __init__( # noqa: PLR0915
elif isinstance(allowed_fails_policy, AllowedFailsPolicy):
self.allowed_fails_policy = allowed_fails_policy

verbose_router_logger.info(
"\033[32mRouter Custom Allowed Fails Policy Set:\n{}\033[0m".format(
self.allowed_fails_policy.model_dump(exclude_none=True)
if self.allowed_fails_policy is not None:
verbose_router_logger.info(
"\033[32mRouter Custom Allowed Fails Policy Set:\n{}\033[0m".format(
self.allowed_fails_policy.model_dump(exclude_none=True)
)
)
)

self.alerting_config: Optional[AlertingConfig] = alerting_config

Expand Down Expand Up @@ -1269,13 +1270,16 @@ def _completion(

if silent_model is not None:
# Mirroring traffic to a secondary model
# Use shared thread pool for background calls
executor.submit(
self._silent_experiment_completion,
silent_model,
messages,
**kwargs,
# Use threading.Thread (not ThreadPoolExecutor) - executor.submit()
# requires pickling args, which fails when kwargs contain unpicklable
# objects (e.g. _thread.RLock from OTEL spans, loggers) in deployment.
thread = threading.Thread(
target=self._silent_experiment_completion,
args=(silent_model, messages),
kwargs=kwargs,
daemon=True,
)
thread.start()
Comment on lines +1276 to +1282
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potential resource management concern: Unlike ThreadPoolExecutor which had a bounded pool (MAX_THREADS=100), threading.Thread creates unbounded threads. If many requests with silent_model arrive simultaneously, this could create many threads and potentially exhaust system resources. Consider adding a semaphore or bounded thread pool to limit concurrent silent experiments.

Prompt To Fix With AI
This is a comment left during a code review.
Path: litellm/router.py
Line: 1276:1282

Comment:
Potential resource management concern: Unlike `ThreadPoolExecutor` which had a bounded pool (`MAX_THREADS=100`), `threading.Thread` creates unbounded threads. If many requests with `silent_model` arrive simultaneously, this could create many threads and potentially exhaust system resources. Consider adding a semaphore or bounded thread pool to limit concurrent silent experiments.

How can I resolve this? If you propose a fix, please make it concise.


self._update_kwargs_with_deployment(deployment=deployment, kwargs=kwargs)
kwargs.pop("silent_model", None) # Ensure it's not in kwargs either
Expand Down
Loading