diff --git a/docs/ci-test-improvements.md b/docs/ci-test-improvements.md
new file mode 100644
index 00000000000..d7d91f4aa0d
--- /dev/null
+++ b/docs/ci-test-improvements.md
@@ -0,0 +1,349 @@
+# CI Test Improvements - Implementation Plan
+
+## Current Issues
+
+1. Tests pass locally but fail in CI
+2. Async mock timing issues
+3. Module reload causing `isinstance()` failures
+
+## Implemented Solutions
+
+### ✅ Already In Place
+
+1. **`@pytest.mark.no_parallel` marker** - Line 200 in pyproject.toml
+   - Use for tests with async mocks or timing dependencies
+   - MCP OAuth test already uses this correctly
+
+2. **Retry logic** - Lines 195-196 in pyproject.toml
+   ```toml
+   retries = 20
+   retry_delay = 5
+   ```
+
+3. **Async mode** - Line 194 in pyproject.toml
+   ```toml
+   asyncio_mode = "auto"
+   ```
+
+### 🔧 Recommended Additions
+
+## 1. Install pytest-rerunfailures Plugin
+
+Add to `pyproject.toml`:
+
+```toml
+[tool.poetry.group.dev.dependencies]
+pytest-rerunfailures = "^15.0"
+```
+
+Then use in tests:
+
+```python
+@pytest.mark.flaky(reruns=3, reruns_delay=1)
+async def test_sometimes_flaky():
+    # Test code
+    pass
+```
+
+## 2. Add Test Categories
+
+Update `pyproject.toml`:
+
+```toml
+[tool.pytest.ini_options]
+markers = [
+    "asyncio: mark test as an asyncio test",
+    "limit_leaks: mark test with memory limit for leak detection",
+    "no_parallel: mark test to run sequentially (not in parallel)",
+    "flaky: mark test as potentially flaky (auto-retry)",  # NEW
+    "requires_credentials: mark test that needs external credentials",  # NEW
+    "slow: mark test as slow running (> 10 seconds)",  # NEW
+]
+```
+
+## 3. GitHub Actions Improvements
+
+### Option A: Run Flaky Tests Separately
+
+`.github/workflows/test.yml`:
+
+```yaml
+jobs:
+  test-stable:
+    name: Stable Tests
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Run Stable Tests
+        run: |
+          poetry run pytest tests/ \
+            -m "not flaky" \
+            -n 4 \
+            --maxfail=5
+
+  test-flaky:
+    name: Flaky Tests (with retries)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Run Flaky Tests
+        run: |
+          poetry run pytest tests/ \
+            -m "flaky" \
+            --reruns 3 \
+            --reruns-delay 2 \
+            -n 1  # Sequential for flaky tests
+```
+
+### Option B: Add Retry Step
+
+```yaml
+- name: Run Tests
+  id: tests
+  run: poetry run pytest tests/ -v -n 4
+
+- name: Retry Failed Tests
+  if: failure() && steps.tests.outcome == 'failure'
+  run: |
+    echo "Retrying failed tests..."
+    poetry run pytest tests/ --lf -v --maxfail=5
+```
+
+## 4. Pre-commit Hook for Test Best Practices
+
+`.pre-commit-config.yaml`:
+
+```yaml
+repos:
+  - repo: local
+    hooks:
+      - id: check-async-test-markers
+        name: Check async tests have proper markers
+        entry: python scripts/check_test_markers.py
+        language: python
+        files: ^tests/.*test.*\.py$
+```
+
+`scripts/check_test_markers.py`:
+
+```python
+#!/usr/bin/env python3
+import ast
+import sys
+import re
+
+def check_test_file(filename):
+    """Check that async tests with mocks have no_parallel marker"""
+    with open(filename) as f:
+        content = f.read()
+
+    # Parse AST
+    tree = ast.parse(content)
+
+    issues = []
+    for node in ast.walk(tree):
+        if isinstance(node, ast.AsyncFunctionDef) and node.name.startswith('test_'):
+            # Check if it has @patch or Mock
+            has_mock = 'patch' in content or 'Mock' in content or 'mock_' in node.name.lower()
+
+            # Check if it has @pytest.mark.no_parallel
+            decorators = [d for d in node.decorator_list]
+            has_no_parallel = any(
+                'no_parallel' in ast.unparse(d) for d in decorators
+            )
+
+            if has_mock and not has_no_parallel:
+                issues.append(
+                    f"{filename}:{node.lineno} - Async test '{node.name}' "
+                    f"uses mocks but missing @pytest.mark.no_parallel"
+                )
+
+    return issues
+
+if __name__ == '__main__':
+    all_issues = []
+    for filename in sys.argv[1:]:
+        all_issues.extend(check_test_file(filename))
+
+    if all_issues:
+        print("❌ Test marker issues found:")
+        for issue in all_issues:
+            print(f"  {issue}")
+        sys.exit(1)
+    else:
+        print("✅ All async tests properly marked")
+        sys.exit(0)
+```
+
+## 5. Makefile Targets for Testing
+
+`Makefile`:
+
+```makefile
+.PHONY: test test-fast test-flaky test-repeat
+
+# Regular test run
+test:
+	poetry run pytest tests/ -v
+
+# Fast: parallel execution, stop on first failure
+test-fast:
+	poetry run pytest tests/ -n 4 -x
+
+# Only flaky tests with retries
+test-flaky:
+	poetry run pytest tests/ -m flaky --reruns 3 --reruns-delay 1
+
+# Repeat test N times to catch flakiness
+test-repeat:
+	poetry run pytest tests/ --count=100 -x
+
+# Test specific file 100 times
+test-file-repeat:
+	@if [ -z "$(FILE)" ]; then \
+		echo "Usage: make test-file-repeat FILE=tests/path/to/test.py"; \
+		exit 1; \
+	fi
+	for i in {1..100}; do \
+		echo "Run $$i/100"; \
+		poetry run pytest $(FILE) || exit 1; \
+	done
+```
+
+Usage:
+```bash
+make test-fast                              # Quick CI-like run
+make test-flaky                             # Test only flaky tests
+make test-file-repeat FILE=tests/test_mcp_server.py  # Stress test
+```
+
+## 6. Test Utilities Module
+
+`tests/test_utils.py`:
+
+```python
+"""Common test utilities for handling flakiness"""
+import asyncio
+import functools
+import os
+import pytest
+from typing import Callable, TypeVar
+
+T = TypeVar('T')
+
+def retry_on_assertion_error(max_attempts: int = 3, delay: float = 0.1):
+    """Retry async test on AssertionError (for flaky CI tests)"""
+    def decorator(func: Callable[..., T]) -> Callable[..., T]:
+        @functools.wraps(func)
+        async def wrapper(*args, **kwargs):
+            last_error = None
+            for attempt in range(max_attempts):
+                try:
+                    return await func(*args, **kwargs)
+                except AssertionError as e:
+                    last_error = e
+                    if attempt < max_attempts - 1:
+                        print(f"  ⚠️  Attempt {attempt + 1} failed, retrying...")
+                        await asyncio.sleep(delay)
+            raise last_error
+        return wrapper
+    return decorator
+
+def skip_in_ci(reason: str = "Flaky in CI"):
+    """Skip test in CI environment"""
+    is_ci = os.getenv('CI') == 'true' or os.getenv('GITHUB_ACTIONS') == 'true'
+    return pytest.mark.skipif(is_ci, reason=reason)
+
+def requires_credentials(*env_vars: str):
+    """Skip test if required credentials are missing"""
+    has_creds = all(os.getenv(var) for var in env_vars)
+    var_names = ', '.join(env_vars)
+    return pytest.mark.skipif(
+        not has_creds,
+        reason=f"Missing credentials: {var_names}"
+    )
+
+# Usage examples:
+# @retry_on_assertion_error(max_attempts=3)
+# async def test_flaky():
+#     ...
+#
+# @skip_in_ci("Known timing issue in CI - tracked in #12345")
+# async def test_problematic():
+#     ...
+#
+# @requires_credentials("GOOGLE_APPLICATION_CREDENTIALS", "VERTEX_PROJECT")
+# def test_vertex_ai():
+#     ...
+```
+
+## 7. Update Test Documentation
+
+Add to `CLAUDE.md`:
+
+```markdown
+### Test Best Practices
+
+When writing tests:
+
+1. **Async tests with mocks** → Add `@pytest.mark.no_parallel`
+2. **Tests needing credentials** → Add `@requires_credentials("ENV_VAR")`
+3. **Known flaky tests** → Add `@pytest.mark.flaky(reruns=3)`
+4. **Slow tests** → Add `@pytest.mark.slow`
+
+Example:
+\`\`\`python
+@pytest.mark.asyncio
+@pytest.mark.no_parallel  # Has async mocks
+async def test_mcp_oauth():
+    with patch("module.func") as mock:
+        # Test code
+        mock.assert_called_once()
+\`\`\`
+
+Check test flakiness locally:
+\`\`\`bash
+make test-file-repeat FILE=tests/path/to/test.py
+\`\`\`
+```
+
+## Priority Implementation Order
+
+### Phase 1: Quick Wins (1 day)
+1. ✅ Document test patterns (done - test-flakiness-guide.md)
+2. Add `pytest-rerunfailures` to dependencies
+3. Mark known flaky tests with `@pytest.mark.flaky`
+4. Add test utilities module
+
+### Phase 2: CI Improvements (2-3 days)
+1. Update GitHub Actions to retry failed tests
+2. Split stable vs flaky test runs
+3. Add Makefile targets for common test scenarios
+
+### Phase 3: Enforcement (1 week)
+1. Add pre-commit hook for test markers
+2. Update CLAUDE.md with test guidelines
+3. Review and mark all async tests appropriately
+
+## Monitoring Success
+
+Track these metrics:
+- **Flaky test rate**: Tests that fail <10% of runs
+- **False failure rate**: CI failures that pass on retry
+- **Test runtime**: Should decrease with better parallelization
+
+Target goals:
+- ✅ <5% flaky test rate
+- ✅ <2% false failure rate
+- ✅ Test suite completes in <15 minutes
+
+## Quick Checklist
+
+Before merging a PR with new tests:
+
+- [ ] All async tests with mocks have `@pytest.mark.no_parallel`
+- [ ] Tests requiring credentials have skip conditions
+- [ ] No `isinstance()` checks on module-level imports
+- [ ] Async mocks are configured before execution
+- [ ] Tests pass 10 times locally: `make test-file-repeat FILE=...`
+- [ ] Tests pass in parallel: `pytest tests/file.py -n 4`
diff --git a/docs/test-flakiness-guide.md b/docs/test-flakiness-guide.md
new file mode 100644
index 00000000000..a5f74ec2b01
--- /dev/null
+++ b/docs/test-flakiness-guide.md
@@ -0,0 +1,325 @@
+# Test Flakiness Solutions Guide
+
+This guide explains how to address CI test flakiness in LiteLLM.
+
+## Problem Summary
+
+Tests passing locally but failing in CI due to:
+- Parallel test execution (pytest-xdist)
+- Async mock timing issues
+- Module reload side effects
+- Environment differences
+
+## Solution 1: Better Test Isolation
+
+### Use `@pytest.mark.no_parallel` for Sequential Tests
+
+When tests have timing-sensitive mocks or shared state:
+
+```python
+@pytest.mark.asyncio
+@pytest.mark.no_parallel  # Prevents parallel execution
+async def test_oauth2_headers_passed_to_mcp_client():
+    # Async mocks that need deterministic timing
+    with patch("module._create_mcp_client") as mock_client:
+        # Test code
+        mock_client.assert_called_once()
+```
+
+**When to use:**
+- ✅ Async mock assertions (`call_count`, `assert_called_once`)
+- ✅ Global state manipulation
+- ✅ Database schema migrations
+- ✅ File system operations that aren't isolated
+- ❌ Simple unit tests (adds unnecessary overhead)
+
+### Fix Module Import Issues
+
+For `isinstance()` checks that fail after module reload:
+
+```python
+# ❌ BAD: Module-level import becomes stale after reload
+from module import MyClass
+
+def test_something():
+    obj = create_object()
+    assert isinstance(obj, MyClass)  # Fails if litellm reloaded!
+
+# ✅ GOOD: Import locally after reload
+def test_something():
+    from module import MyClass  # Fresh reference
+    obj = create_object()
+    assert isinstance(obj, MyClass)  # Always works
+```
+
+**Root cause:** `conftest.py` does `importlib.reload(litellm)` which creates new class objects.
+
+## Solution 2: Robust Async Mock Setup
+
+### Pattern 1: Ensure Mock Applied Before Async Code
+
+```python
+@pytest.mark.asyncio
+async def test_async_function():
+    # ❌ BAD: Mock might not be ready
+    with patch("module.async_func") as mock:
+        result = await call_async_code()
+        mock.assert_called_once()  # Flaky in CI!
+
+    # ✅ GOOD: Use AsyncMock and configure before execution
+    mock_func = AsyncMock(return_value="result")
+    with patch("module.async_func", mock_func):
+        # Ensure mock is configured
+        assert mock_func.call_count == 0
+
+        result = await call_async_code()
+
+        # More reliable assertion
+        await asyncio.sleep(0)  # Let event loop settle
+        assert mock_func.call_count == 1
+```
+
+### Pattern 2: Use `side_effect` for Async Mocks
+
+```python
+@pytest.mark.asyncio
+async def test_with_side_effect():
+    async def mock_implementation(*args, **kwargs):
+        # Custom async logic
+        return {"status": "success"}
+
+    with patch("module.async_func", side_effect=mock_implementation):
+        result = await call_code()
+        assert result["status"] == "success"
+```
+
+### Pattern 3: Patch Early, Execute Late
+
+```python
+@pytest.mark.asyncio
+async def test_patch_early():
+    # Create and configure mock FIRST
+    mock_client = AsyncMock()
+    mock_client._create_mcp_client = AsyncMock(return_value={"id": "123"})
+
+    # THEN patch
+    with patch("module.ClientClass", return_value=mock_client):
+        # NOW execute async code
+        result = await execute_code()
+
+        # Assertions are more reliable
+        mock_client._create_mcp_client.assert_called_once()
+```
+
+## Solution 3: Retry Logic for Flaky Tests
+
+### Method A: pytest-rerunfailures (Already Configured)
+
+The project already has retry logic in `pyproject.toml`:
+
+```toml
+[tool.pytest.ini_options]
+retries = 20
+retry_delay = 5
+```
+
+To use it for specific tests:
+
+```python
+@pytest.mark.flaky(reruns=3, reruns_delay=1)
+def test_sometimes_fails():
+    # Flaky test code
+    pass
+```
+
+### Method B: Manual Retry Decorator
+
+For async tests with specific retry logic:
+
+```python
+import asyncio
+from functools import wraps
+
+def retry_async(max_attempts=3, delay=0.1):
+    def decorator(func):
+        @wraps(func)
+        async def wrapper(*args, **kwargs):
+            last_exception = None
+            for attempt in range(max_attempts):
+                try:
+                    return await func(*args, **kwargs)
+                except AssertionError as e:
+                    last_exception = e
+                    if attempt < max_attempts - 1:
+                        await asyncio.sleep(delay)
+            raise last_exception
+        return wrapper
+    return decorator
+
+@pytest.mark.asyncio
+@retry_async(max_attempts=3)
+async def test_with_retry():
+    # Test that might fail due to timing
+    result = await async_operation()
+    assert result.call_count == 1
+```
+
+### Method C: CI-Only Retries
+
+In GitHub Actions workflow:
+
+```yaml
+- name: Run Tests
+  run: |
+    poetry run pytest tests/ -v --maxfail=5 || \
+    poetry run pytest tests/ --lf -v --maxfail=5 || \
+    poetry run pytest tests/ --lf -v
+```
+
+This runs:
+1. All tests normally
+2. If fails, rerun only last failures
+3. If still fails, rerun last failures again
+
+## Solution 4: Test Fixtures for Cleanup
+
+### Ensure Clean State Between Tests
+
+```python
+@pytest.fixture(autouse=True)
+async def cleanup_mcp_state():
+    """Clean up MCP state between tests"""
+    yield
+
+    # Teardown
+    try:
+        from litellm.proxy._experimental.mcp_server.mcp_server_manager import (
+            global_mcp_server_manager,
+        )
+        global_mcp_server_manager.registry.clear()
+    except ImportError:
+        pass
+```
+
+### Module-Level Cleanup
+
+```python
+@pytest.fixture(scope="module", autouse=True)
+def reset_litellm_state():
+    """Reset litellm state before module tests"""
+    import litellm
+
+    # Save state
+    original_settings = {
+        "disable_aiohttp_transport": litellm.disable_aiohttp_transport,
+        "force_ipv4": litellm.force_ipv4,
+    }
+
+    yield
+
+    # Restore state
+    for key, value in original_settings.items():
+        setattr(litellm, key, value)
+```
+
+## Solution 5: CI Environment Variables
+
+### Skip Tests Without Credentials
+
+```python
+def _has_credentials() -> bool:
+    return "GOOGLE_APPLICATION_CREDENTIALS" in os.environ
+
+@pytest.mark.skipif(
+    not _has_credentials(),
+    reason="Credentials not available in CI"
+)
+def test_requires_credentials():
+    # Test code
+    pass
+```
+
+### Detect CI Environment
+
+```python
+import os
+
+IS_CI = os.getenv("CI") == "true" or os.getenv("GITHUB_ACTIONS") == "true"
+
+@pytest.mark.skipif(IS_CI, reason="Flaky in CI, tracked in issue #XXXXX")
+def test_known_flaky():
+    # Test code
+    pass
+```
+
+## Checklist for Test Authors
+
+When writing tests that might be flaky:
+
+- [ ] Does it use async mocks? → Add `@pytest.mark.no_parallel`
+- [ ] Does it modify global state? → Add cleanup fixture
+- [ ] Does it check `isinstance()` → Import classes locally in test
+- [ ] Does it need credentials? → Add `@pytest.mark.skipif`
+- [ ] Is it timing-sensitive? → Add retries or `asyncio.sleep(0)`
+- [ ] Does it fail randomly in CI? → Add `@pytest.mark.flaky(reruns=3)`
+
+## Quick Reference
+
+| Problem | Solution | Example |
+|---------|----------|---------|
+| Async mock not called | Use `@pytest.mark.no_parallel` | See Pattern 3 above |
+| `isinstance()` fails | Import class locally | See Fix Module Import above |
+| Need credentials | Use `@pytest.mark.skipif` | See Solution 5 above |
+| Random timing failures | Add `@pytest.mark.flaky` | See Method A above |
+| Module state pollution | Add cleanup fixture | See Solution 4 above |
+
+## Examples from Codebase
+
+### Fixed: http_handler isinstance issue (PR #21388)
+```python
+@pytest.mark.asyncio
+async def test_session_reuse_integration():
+    # Import locally to get fresh class after reload
+    from litellm.llms.custom_httpx.http_handler import (
+        get_async_httpx_client,
+        AsyncHTTPHandler as AsyncHTTPHandlerReload
+    )
+
+    client = get_async_httpx_client(...)
+    assert isinstance(client, AsyncHTTPHandlerReload)  # ✅ Works!
+```
+
+### Already Correct: MCP OAuth test
+```python
+@pytest.mark.asyncio
+@pytest.mark.no_parallel  # Prevents parallel execution issues
+async def test_oauth2_headers_passed_to_mcp_client():
+    # Test with async mocks
+    pass
+```
+
+## Debugging Flaky Tests
+
+### Run test 100 times locally:
+```bash
+for i in {1..100}; do
+    poetry run pytest tests/path/to/test.py::test_name || break
+done
+```
+
+### Run with pytest-repeat:
+```bash
+poetry run pytest tests/path/to/test.py::test_name --count=100
+```
+
+### Run in parallel to reproduce CI:
+```bash
+poetry run pytest tests/path/to/test.py -n 4  # 4 parallel workers
+```
+
+## Further Reading
+
+- [pytest-xdist docs](https://pytest-xdist.readthedocs.io/)
+- [pytest-asyncio docs](https://pytest-asyncio.readthedocs.io/)
+- [unittest.mock docs](https://docs.python.org/3/library/unittest.mock.html)
+- [pytest-rerunfailures](https://github.com/pytest-dev/pytest-rerunfailures)
diff --git a/tests/test_litellm/llms/custom_httpx/test_http_handler.py b/tests/test_litellm/llms/custom_httpx/test_http_handler.py
index b0011fd8f76..caf90dce6c8 100644
--- a/tests/test_litellm/llms/custom_httpx/test_http_handler.py
+++ b/tests/test_litellm/llms/custom_httpx/test_http_handler.py
@@ -358,38 +358,40 @@ async def test_async_handler_with_shared_session():
 @pytest.mark.asyncio
 async def test_get_async_httpx_client_with_shared_session():
     """Test get_async_httpx_client with shared session"""
-    from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
+    from litellm.llms.custom_httpx.http_handler import get_async_httpx_client, AsyncHTTPHandler as AsyncHTTPHandlerReload
     from litellm.types.utils import LlmProviders
-    
+
     # Create a mock shared session
     mock_session = MockClientSession()
-    
+
     # Test with shared session
     client = get_async_httpx_client(
         llm_provider=LlmProviders.ANTHROPIC,
         shared_session=mock_session  # type: ignore
     )
-    
+
     # Verify the client was created successfully
     assert client is not None
-    assert isinstance(client, AsyncHTTPHandler)
+    # Import locally to avoid stale reference after module reload in conftest
+    assert isinstance(client, AsyncHTTPHandlerReload)
 
 
 @pytest.mark.asyncio
 async def test_get_async_httpx_client_without_shared_session():
     """Test get_async_httpx_client without shared session (backward compatibility)"""
-    from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
+    from litellm.llms.custom_httpx.http_handler import get_async_httpx_client, AsyncHTTPHandler as AsyncHTTPHandlerReload
     from litellm.types.utils import LlmProviders
-    
+
     # Test without shared session
     client = get_async_httpx_client(
         llm_provider=LlmProviders.ANTHROPIC,
         shared_session=None
     )
-    
+
     # Verify the client was created successfully
     assert client is not None
-    assert isinstance(client, AsyncHTTPHandler)
+    # Import locally to avoid stale reference after module reload in conftest
+    assert isinstance(client, AsyncHTTPHandlerReload)
 
 
 @pytest.mark.asyncio
@@ -450,31 +452,32 @@ def test_shared_session_parameter_in_completion():
 @pytest.mark.asyncio
 async def test_session_reuse_integration():
     """Integration test for session reuse functionality"""
-    from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
+    from litellm.llms.custom_httpx.http_handler import get_async_httpx_client, AsyncHTTPHandler as AsyncHTTPHandlerReload
     from litellm.types.utils import LlmProviders
-    
+
     # Create a mock session
     mock_session = MockClientSession()
-    
+
     # Create two clients with the same session
     client1 = get_async_httpx_client(
         llm_provider=LlmProviders.ANTHROPIC,
         shared_session=mock_session  # type: ignore
     )
-    
+
     client2 = get_async_httpx_client(
         llm_provider=LlmProviders.OPENAI,
         shared_session=mock_session  # type: ignore
     )
-    
+
     # Both clients should be created successfully
     assert client1 is not None
     assert client2 is not None
-    
+
     # Both should be AsyncHTTPHandler instances
-    assert isinstance(client1, AsyncHTTPHandler)
-    assert isinstance(client2, AsyncHTTPHandler)
-    
+    # Import locally to avoid stale reference after module reload in conftest
+    assert isinstance(client1, AsyncHTTPHandlerReload)
+    assert isinstance(client2, AsyncHTTPHandlerReload)
+
     # Clean up
     await client1.close()
     await client2.close()