BerriAI · AlexsanderHamir · Jan 6, 2026 · Jan 6, 2026 · Jan 6, 2026
diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md
@@ -699,6 +699,7 @@ router_settings:
 | LITELLM_EMAIL | Email associated with LiteLLM account
 | LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRIES | Maximum retries for parallel requests in LiteLLM
 | LITELLM_GLOBAL_MAX_PARALLEL_REQUEST_RETRY_TIMEOUT | Timeout for retries of parallel requests in LiteLLM
+| LITELLM_DISABLE_LAZY_LOADING | When set to "1", "true", "yes", or "on", disables lazy loading of attributes (currently only affects encoding/tiktoken). This ensures encoding is initialized before VCR starts recording HTTP requests, fixing VCR cassette creation issues. See [issue #18659](https://github.com/BerriAI/litellm/issues/18659)
 | LITELLM_MIGRATION_DIR | Custom migrations directory for prisma migrations, used for baselining db in read-only file systems.
 | LITELLM_HOSTED_UI | URL of the hosted UI for LiteLLM
 | LITELLM_UI_API_DOC_BASE_URL | Optional override for the API Reference base URL (used in sample code/docs) when the admin UI runs on a different host than the proxy. Defaults to `PROXY_BASE_URL` when unset.

diff --git a/litellm/__init__.py b/litellm/__init__.py
@@ -1553,6 +1553,16 @@ def set_global_gitlab_config(config: Dict[str, Any]) -> None:
 # Track if async client cleanup has been registered (for lazy loading)
 _async_client_cleanup_registered = False
 
+# Eager loading for backwards compatibility with VCR and other HTTP recording tools
+# When LITELLM_DISABLE_LAZY_LOADING is set, lazy-loaded attributes are loaded at import time
+# For now, this only affects encoding (tiktoken) as it was the only reported issue
+# See: https://github.com/BerriAI/litellm/issues/18659
+# This ensures encoding is initialized before VCR starts recording HTTP requests
+if os.getenv("LITELLM_DISABLE_LAZY_LOADING", "").lower() in ("1", "true", "yes", "on"):
+    # Load encoding at import time (pre-#18070 behavior)
+    # This ensures encoding is initialized before VCR starts recording
+    from .main import encoding
+
 
 def __getattr__(name: str) -> Any:
     """Lazy import handler with cached registry for improved performance."""

diff --git a/tests/test_litellm/test_eager_tiktoken_load.py b/tests/test_litellm/test_eager_tiktoken_load.py
@@ -0,0 +1,87 @@
+"""
+Test for LITELLM_DISABLE_LAZY_LOADING environment variable.
+
+This test verifies that when LITELLM_DISABLE_LAZY_LOADING is set,
+encoding is loaded at import time (pre-#18070 behavior) instead of lazy loading.
+
+This addresses issue #18659: VCR cassette creation broken by lazy loading.
+For now, this only affects encoding as it was the only reported issue.
+"""
+import os
+import sys
+import pytest
+
+
+def test_eager_loading_enabled():
+    """Test that encoding is loaded at import time when env var is set"""
+    # Set environment variable
+    os.environ["LITELLM_DISABLE_LAZY_LOADING"] = "1"
+
+    # Clear any cached modules to ensure fresh import
+    modules_to_clear = [k for k in sys.modules.keys() if k.startswith("litellm")]
+    for module in modules_to_clear:
+        del sys.modules[module]
+
+    # Import litellm - encoding should be loaded immediately
+    import litellm
+
+    # Check that encoding is available (not lazy loaded)
+    assert hasattr(litellm, "encoding"), "Encoding should be available when eager loading is enabled"
+
+    # Verify it's actually the encoding object
+    encoding = litellm.encoding
+    assert encoding is not None, "Encoding should not be None"
+
+    # Test that it works
+    tokens = encoding.encode("Hello, world!")
+    assert len(tokens) > 0, "Encoding should work"
+
+
+def test_eager_loading_env_var_values():
+    """Test that various env var values enable eager loading"""
+    values = ["1", "true", "True", "TRUE", "yes", "Yes", "YES", "on", "On", "ON"]
+
+    for value in values:
+        os.environ["LITELLM_DISABLE_LAZY_LOADING"] = value
+
+        # Clear modules
+        modules_to_clear = [k for k in sys.modules.keys() if k.startswith("litellm")]
+        for module in modules_to_clear:
+            del sys.modules[module]
+
+        import litellm
+        assert hasattr(litellm, "encoding"), f"Encoding should be available for value: {value}"
+        encoding = litellm.encoding
+        tokens = encoding.encode("test")
+        assert len(tokens) > 0
+
+
+def test_lazy_loading_default():
+    """Test that encoding is lazy loaded by default (when env var is not set)"""
+    # Remove environment variable if set
+    if "LITELLM_DISABLE_LAZY_LOADING" in os.environ:
+        del os.environ["LITELLM_DISABLE_LAZY_LOADING"]
+
+    # Clear any cached modules
+    modules_to_clear = [k for k in sys.modules.keys() if k.startswith("litellm")]
+    for module in modules_to_clear:
+        del sys.modules[module]
+
+    # Import litellm - encoding should NOT be loaded yet
+    import litellm
+
+    # Encoding should be accessible via __getattr__ (lazy loading)
+    encoding = litellm.encoding  # This triggers lazy loading
+
+    # Verify it works
+    tokens = encoding.encode("Hello, world!")
+    assert len(tokens) > 0, "Encoding should work"
+
+
+@pytest.fixture(autouse=True)
+def cleanup_env():
+    """Clean up environment variable after each test"""
+    yield
+    if "LITELLM_DISABLE_LAZY_LOADING" in os.environ:
+        del os.environ["LITELLM_DISABLE_LAZY_LOADING"]
+