Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/my-website/docs/proxy/config_settings.md
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,7 @@ router_settings:
| LITELLM_EMAIL | Email associated with LiteLLM account
| LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRIES | Maximum retries for parallel requests in LiteLLM
| LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRY_TIMEOUT | Timeout for retries of parallel requests in LiteLLM
| LITELLM_DISABLE_LAZY_LOADING | When set to "1", "true", "yes", or "on", disables lazy loading of attributes (currently only affects encoding/tiktoken). This ensures encoding is initialized before VCR starts recording HTTP requests, fixing VCR cassette creation issues. See [issue #18659](https://github.com/BerriAI/litellm/issues/18659)
| LITELLM_MIGRATION_DIR | Custom migrations directory for prisma migrations, used for baselining db in read-only file systems.
| LITELLM_HOSTED_UI | URL of the hosted UI for LiteLLM
| LITELLM_UI_API_DOC_BASE_URL | Optional override for the API Reference base URL (used in sample code/docs) when the admin UI runs on a different host than the proxy. Defaults to `PROXY_BASE_URL` when unset.
Expand Down
10 changes: 10 additions & 0 deletions litellm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1553,6 +1553,16 @@ def set_global_gitlab_config(config: Dict[str, Any]) -> None:
# Track if async client cleanup has been registered (for lazy loading)
_async_client_cleanup_registered = False

# Eager loading for backwards compatibility with VCR and other HTTP recording tools
# When LITELLM_DISABLE_LAZY_LOADING is set, lazy-loaded attributes are loaded at import time
# For now, this only affects encoding (tiktoken) as it was the only reported issue
# See: https://github.com/BerriAI/litellm/issues/18659
# This ensures encoding is initialized before VCR starts recording HTTP requests
if os.getenv("LITELLM_DISABLE_LAZY_LOADING", "").lower() in ("1", "true", "yes", "on"):
# Load encoding at import time (pre-#18070 behavior)
# This ensures encoding is initialized before VCR starts recording
from .main import encoding


def __getattr__(name: str) -> Any:
"""Lazy import handler with cached registry for improved performance."""
Expand Down
87 changes: 87 additions & 0 deletions tests/test_litellm/test_eager_tiktoken_load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""
Test for LITELLM_DISABLE_LAZY_LOADING environment variable.

This test verifies that when LITELLM_DISABLE_LAZY_LOADING is set,
encoding is loaded at import time (pre-#18070 behavior) instead of lazy loading.

This addresses issue #18659: VCR cassette creation broken by lazy loading.
For now, this only affects encoding as it was the only reported issue.
"""
import os
import sys
import pytest


def test_eager_loading_enabled():
"""Test that encoding is loaded at import time when env var is set"""
# Set environment variable
os.environ["LITELLM_DISABLE_LAZY_LOADING"] = "1"

# Clear any cached modules to ensure fresh import
modules_to_clear = [k for k in sys.modules.keys() if k.startswith("litellm")]
for module in modules_to_clear:
del sys.modules[module]

# Import litellm - encoding should be loaded immediately
import litellm

# Check that encoding is available (not lazy loaded)
assert hasattr(litellm, "encoding"), "Encoding should be available when eager loading is enabled"

# Verify it's actually the encoding object
encoding = litellm.encoding
assert encoding is not None, "Encoding should not be None"

# Test that it works
tokens = encoding.encode("Hello, world!")
assert len(tokens) > 0, "Encoding should work"


def test_eager_loading_env_var_values():
"""Test that various env var values enable eager loading"""
values = ["1", "true", "True", "TRUE", "yes", "Yes", "YES", "on", "On", "ON"]

for value in values:
os.environ["LITELLM_DISABLE_LAZY_LOADING"] = value

# Clear modules
modules_to_clear = [k for k in sys.modules.keys() if k.startswith("litellm")]
for module in modules_to_clear:
del sys.modules[module]

import litellm
assert hasattr(litellm, "encoding"), f"Encoding should be available for value: {value}"
encoding = litellm.encoding
tokens = encoding.encode("test")
assert len(tokens) > 0


def test_lazy_loading_default():
"""Test that encoding is lazy loaded by default (when env var is not set)"""
# Remove environment variable if set
if "LITELLM_DISABLE_LAZY_LOADING" in os.environ:
del os.environ["LITELLM_DISABLE_LAZY_LOADING"]

# Clear any cached modules
modules_to_clear = [k for k in sys.modules.keys() if k.startswith("litellm")]
for module in modules_to_clear:
del sys.modules[module]

# Import litellm - encoding should NOT be loaded yet
import litellm

# Encoding should be accessible via __getattr__ (lazy loading)
encoding = litellm.encoding # This triggers lazy loading

# Verify it works
tokens = encoding.encode("Hello, world!")
assert len(tokens) > 0, "Encoding should work"


@pytest.fixture(autouse=True)
def cleanup_env():
"""Clean up environment variable after each test"""
yield
if "LITELLM_DISABLE_LAZY_LOADING" in os.environ:
del os.environ["LITELLM_DISABLE_LAZY_LOADING"]

Loading