diff --git a/docs/ci-test-improvements.md b/docs/ci-test-improvements.md new file mode 100644 index 00000000000..d7d91f4aa0d --- /dev/null +++ b/docs/ci-test-improvements.md @@ -0,0 +1,349 @@ +# CI Test Improvements - Implementation Plan + +## Current Issues + +1. Tests pass locally but fail in CI +2. Async mock timing issues +3. Module reload causing `isinstance()` failures + +## Implemented Solutions + +### ✅ Already In Place + +1. **`@pytest.mark.no_parallel` marker** - Line 200 in pyproject.toml + - Use for tests with async mocks or timing dependencies + - MCP OAuth test already uses this correctly + +2. **Retry logic** - Lines 195-196 in pyproject.toml + ```toml + retries = 20 + retry_delay = 5 + ``` + +3. **Async mode** - Line 194 in pyproject.toml + ```toml + asyncio_mode = "auto" + ``` + +### 🔧 Recommended Additions + +## 1. Install pytest-rerunfailures Plugin + +Add to `pyproject.toml`: + +```toml +[tool.poetry.group.dev.dependencies] +pytest-rerunfailures = "^15.0" +``` + +Then use in tests: + +```python +@pytest.mark.flaky(reruns=3, reruns_delay=1) +async def test_sometimes_flaky(): + # Test code + pass +``` + +## 2. Add Test Categories + +Update `pyproject.toml`: + +```toml +[tool.pytest.ini_options] +markers = [ + "asyncio: mark test as an asyncio test", + "limit_leaks: mark test with memory limit for leak detection", + "no_parallel: mark test to run sequentially (not in parallel)", + "flaky: mark test as potentially flaky (auto-retry)", # NEW + "requires_credentials: mark test that needs external credentials", # NEW + "slow: mark test as slow running (> 10 seconds)", # NEW +] +``` + +## 3. GitHub Actions Improvements + +### Option A: Run Flaky Tests Separately + +`.github/workflows/test.yml`: + +```yaml +jobs: + test-stable: + name: Stable Tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Run Stable Tests + run: | + poetry run pytest tests/ \ + -m "not flaky" \ + -n 4 \ + --maxfail=5 + + test-flaky: + name: Flaky Tests (with retries) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Run Flaky Tests + run: | + poetry run pytest tests/ \ + -m "flaky" \ + --reruns 3 \ + --reruns-delay 2 \ + -n 1 # Sequential for flaky tests +``` + +### Option B: Add Retry Step + +```yaml +- name: Run Tests + id: tests + run: poetry run pytest tests/ -v -n 4 + +- name: Retry Failed Tests + if: failure() && steps.tests.outcome == 'failure' + run: | + echo "Retrying failed tests..." + poetry run pytest tests/ --lf -v --maxfail=5 +``` + +## 4. Pre-commit Hook for Test Best Practices + +`.pre-commit-config.yaml`: + +```yaml +repos: + - repo: local + hooks: + - id: check-async-test-markers + name: Check async tests have proper markers + entry: python scripts/check_test_markers.py + language: python + files: ^tests/.*test.*\.py$ +``` + +`scripts/check_test_markers.py`: + +```python +#!/usr/bin/env python3 +import ast +import sys +import re + +def check_test_file(filename): + """Check that async tests with mocks have no_parallel marker""" + with open(filename) as f: + content = f.read() + + # Parse AST + tree = ast.parse(content) + + issues = [] + for node in ast.walk(tree): + if isinstance(node, ast.AsyncFunctionDef) and node.name.startswith('test_'): + # Check if it has @patch or Mock + has_mock = 'patch' in content or 'Mock' in content or 'mock_' in node.name.lower() + + # Check if it has @pytest.mark.no_parallel + decorators = [d for d in node.decorator_list] + has_no_parallel = any( + 'no_parallel' in ast.unparse(d) for d in decorators + ) + + if has_mock and not has_no_parallel: + issues.append( + f"{filename}:{node.lineno} - Async test '{node.name}' " + f"uses mocks but missing @pytest.mark.no_parallel" + ) + + return issues + +if __name__ == '__main__': + all_issues = [] + for filename in sys.argv[1:]: + all_issues.extend(check_test_file(filename)) + + if all_issues: + print("❌ Test marker issues found:") + for issue in all_issues: + print(f" {issue}") + sys.exit(1) + else: + print("✅ All async tests properly marked") + sys.exit(0) +``` + +## 5. Makefile Targets for Testing + +`Makefile`: + +```makefile +.PHONY: test test-fast test-flaky test-repeat + +# Regular test run +test: + poetry run pytest tests/ -v + +# Fast: parallel execution, stop on first failure +test-fast: + poetry run pytest tests/ -n 4 -x + +# Only flaky tests with retries +test-flaky: + poetry run pytest tests/ -m flaky --reruns 3 --reruns-delay 1 + +# Repeat test N times to catch flakiness +test-repeat: + poetry run pytest tests/ --count=100 -x + +# Test specific file 100 times +test-file-repeat: + @if [ -z "$(FILE)" ]; then \ + echo "Usage: make test-file-repeat FILE=tests/path/to/test.py"; \ + exit 1; \ + fi + for i in {1..100}; do \ + echo "Run $$i/100"; \ + poetry run pytest $(FILE) || exit 1; \ + done +``` + +Usage: +```bash +make test-fast # Quick CI-like run +make test-flaky # Test only flaky tests +make test-file-repeat FILE=tests/test_mcp_server.py # Stress test +``` + +## 6. Test Utilities Module + +`tests/test_utils.py`: + +```python +"""Common test utilities for handling flakiness""" +import asyncio +import functools +import os +import pytest +from typing import Callable, TypeVar + +T = TypeVar('T') + +def retry_on_assertion_error(max_attempts: int = 3, delay: float = 0.1): + """Retry async test on AssertionError (for flaky CI tests)""" + def decorator(func: Callable[..., T]) -> Callable[..., T]: + @functools.wraps(func) + async def wrapper(*args, **kwargs): + last_error = None + for attempt in range(max_attempts): + try: + return await func(*args, **kwargs) + except AssertionError as e: + last_error = e + if attempt < max_attempts - 1: + print(f" ⚠️ Attempt {attempt + 1} failed, retrying...") + await asyncio.sleep(delay) + raise last_error + return wrapper + return decorator + +def skip_in_ci(reason: str = "Flaky in CI"): + """Skip test in CI environment""" + is_ci = os.getenv('CI') == 'true' or os.getenv('GITHUB_ACTIONS') == 'true' + return pytest.mark.skipif(is_ci, reason=reason) + +def requires_credentials(*env_vars: str): + """Skip test if required credentials are missing""" + has_creds = all(os.getenv(var) for var in env_vars) + var_names = ', '.join(env_vars) + return pytest.mark.skipif( + not has_creds, + reason=f"Missing credentials: {var_names}" + ) + +# Usage examples: +# @retry_on_assertion_error(max_attempts=3) +# async def test_flaky(): +# ... +# +# @skip_in_ci("Known timing issue in CI - tracked in #12345") +# async def test_problematic(): +# ... +# +# @requires_credentials("GOOGLE_APPLICATION_CREDENTIALS", "VERTEX_PROJECT") +# def test_vertex_ai(): +# ... +``` + +## 7. Update Test Documentation + +Add to `CLAUDE.md`: + +```markdown +### Test Best Practices + +When writing tests: + +1. **Async tests with mocks** → Add `@pytest.mark.no_parallel` +2. **Tests needing credentials** → Add `@requires_credentials("ENV_VAR")` +3. **Known flaky tests** → Add `@pytest.mark.flaky(reruns=3)` +4. **Slow tests** → Add `@pytest.mark.slow` + +Example: +\`\`\`python +@pytest.mark.asyncio +@pytest.mark.no_parallel # Has async mocks +async def test_mcp_oauth(): + with patch("module.func") as mock: + # Test code + mock.assert_called_once() +\`\`\` + +Check test flakiness locally: +\`\`\`bash +make test-file-repeat FILE=tests/path/to/test.py +\`\`\` +``` + +## Priority Implementation Order + +### Phase 1: Quick Wins (1 day) +1. ✅ Document test patterns (done - test-flakiness-guide.md) +2. Add `pytest-rerunfailures` to dependencies +3. Mark known flaky tests with `@pytest.mark.flaky` +4. Add test utilities module + +### Phase 2: CI Improvements (2-3 days) +1. Update GitHub Actions to retry failed tests +2. Split stable vs flaky test runs +3. Add Makefile targets for common test scenarios + +### Phase 3: Enforcement (1 week) +1. Add pre-commit hook for test markers +2. Update CLAUDE.md with test guidelines +3. Review and mark all async tests appropriately + +## Monitoring Success + +Track these metrics: +- **Flaky test rate**: Tests that fail <10% of runs +- **False failure rate**: CI failures that pass on retry +- **Test runtime**: Should decrease with better parallelization + +Target goals: +- ✅ <5% flaky test rate +- ✅ <2% false failure rate +- ✅ Test suite completes in <15 minutes + +## Quick Checklist + +Before merging a PR with new tests: + +- [ ] All async tests with mocks have `@pytest.mark.no_parallel` +- [ ] Tests requiring credentials have skip conditions +- [ ] No `isinstance()` checks on module-level imports +- [ ] Async mocks are configured before execution +- [ ] Tests pass 10 times locally: `make test-file-repeat FILE=...` +- [ ] Tests pass in parallel: `pytest tests/file.py -n 4` diff --git a/docs/test-flakiness-guide.md b/docs/test-flakiness-guide.md new file mode 100644 index 00000000000..a5f74ec2b01 --- /dev/null +++ b/docs/test-flakiness-guide.md @@ -0,0 +1,325 @@ +# Test Flakiness Solutions Guide + +This guide explains how to address CI test flakiness in LiteLLM. + +## Problem Summary + +Tests passing locally but failing in CI due to: +- Parallel test execution (pytest-xdist) +- Async mock timing issues +- Module reload side effects +- Environment differences + +## Solution 1: Better Test Isolation + +### Use `@pytest.mark.no_parallel` for Sequential Tests + +When tests have timing-sensitive mocks or shared state: + +```python +@pytest.mark.asyncio +@pytest.mark.no_parallel # Prevents parallel execution +async def test_oauth2_headers_passed_to_mcp_client(): + # Async mocks that need deterministic timing + with patch("module._create_mcp_client") as mock_client: + # Test code + mock_client.assert_called_once() +``` + +**When to use:** +- ✅ Async mock assertions (`call_count`, `assert_called_once`) +- ✅ Global state manipulation +- ✅ Database schema migrations +- ✅ File system operations that aren't isolated +- ❌ Simple unit tests (adds unnecessary overhead) + +### Fix Module Import Issues + +For `isinstance()` checks that fail after module reload: + +```python +# ❌ BAD: Module-level import becomes stale after reload +from module import MyClass + +def test_something(): + obj = create_object() + assert isinstance(obj, MyClass) # Fails if litellm reloaded! + +# ✅ GOOD: Import locally after reload +def test_something(): + from module import MyClass # Fresh reference + obj = create_object() + assert isinstance(obj, MyClass) # Always works +``` + +**Root cause:** `conftest.py` does `importlib.reload(litellm)` which creates new class objects. + +## Solution 2: Robust Async Mock Setup + +### Pattern 1: Ensure Mock Applied Before Async Code + +```python +@pytest.mark.asyncio +async def test_async_function(): + # ❌ BAD: Mock might not be ready + with patch("module.async_func") as mock: + result = await call_async_code() + mock.assert_called_once() # Flaky in CI! + + # ✅ GOOD: Use AsyncMock and configure before execution + mock_func = AsyncMock(return_value="result") + with patch("module.async_func", mock_func): + # Ensure mock is configured + assert mock_func.call_count == 0 + + result = await call_async_code() + + # More reliable assertion + await asyncio.sleep(0) # Let event loop settle + assert mock_func.call_count == 1 +``` + +### Pattern 2: Use `side_effect` for Async Mocks + +```python +@pytest.mark.asyncio +async def test_with_side_effect(): + async def mock_implementation(*args, **kwargs): + # Custom async logic + return {"status": "success"} + + with patch("module.async_func", side_effect=mock_implementation): + result = await call_code() + assert result["status"] == "success" +``` + +### Pattern 3: Patch Early, Execute Late + +```python +@pytest.mark.asyncio +async def test_patch_early(): + # Create and configure mock FIRST + mock_client = AsyncMock() + mock_client._create_mcp_client = AsyncMock(return_value={"id": "123"}) + + # THEN patch + with patch("module.ClientClass", return_value=mock_client): + # NOW execute async code + result = await execute_code() + + # Assertions are more reliable + mock_client._create_mcp_client.assert_called_once() +``` + +## Solution 3: Retry Logic for Flaky Tests + +### Method A: pytest-rerunfailures (Already Configured) + +The project already has retry logic in `pyproject.toml`: + +```toml +[tool.pytest.ini_options] +retries = 20 +retry_delay = 5 +``` + +To use it for specific tests: + +```python +@pytest.mark.flaky(reruns=3, reruns_delay=1) +def test_sometimes_fails(): + # Flaky test code + pass +``` + +### Method B: Manual Retry Decorator + +For async tests with specific retry logic: + +```python +import asyncio +from functools import wraps + +def retry_async(max_attempts=3, delay=0.1): + def decorator(func): + @wraps(func) + async def wrapper(*args, **kwargs): + last_exception = None + for attempt in range(max_attempts): + try: + return await func(*args, **kwargs) + except AssertionError as e: + last_exception = e + if attempt < max_attempts - 1: + await asyncio.sleep(delay) + raise last_exception + return wrapper + return decorator + +@pytest.mark.asyncio +@retry_async(max_attempts=3) +async def test_with_retry(): + # Test that might fail due to timing + result = await async_operation() + assert result.call_count == 1 +``` + +### Method C: CI-Only Retries + +In GitHub Actions workflow: + +```yaml +- name: Run Tests + run: | + poetry run pytest tests/ -v --maxfail=5 || \ + poetry run pytest tests/ --lf -v --maxfail=5 || \ + poetry run pytest tests/ --lf -v +``` + +This runs: +1. All tests normally +2. If fails, rerun only last failures +3. If still fails, rerun last failures again + +## Solution 4: Test Fixtures for Cleanup + +### Ensure Clean State Between Tests + +```python +@pytest.fixture(autouse=True) +async def cleanup_mcp_state(): + """Clean up MCP state between tests""" + yield + + # Teardown + try: + from litellm.proxy._experimental.mcp_server.mcp_server_manager import ( + global_mcp_server_manager, + ) + global_mcp_server_manager.registry.clear() + except ImportError: + pass +``` + +### Module-Level Cleanup + +```python +@pytest.fixture(scope="module", autouse=True) +def reset_litellm_state(): + """Reset litellm state before module tests""" + import litellm + + # Save state + original_settings = { + "disable_aiohttp_transport": litellm.disable_aiohttp_transport, + "force_ipv4": litellm.force_ipv4, + } + + yield + + # Restore state + for key, value in original_settings.items(): + setattr(litellm, key, value) +``` + +## Solution 5: CI Environment Variables + +### Skip Tests Without Credentials + +```python +def _has_credentials() -> bool: + return "GOOGLE_APPLICATION_CREDENTIALS" in os.environ + +@pytest.mark.skipif( + not _has_credentials(), + reason="Credentials not available in CI" +) +def test_requires_credentials(): + # Test code + pass +``` + +### Detect CI Environment + +```python +import os + +IS_CI = os.getenv("CI") == "true" or os.getenv("GITHUB_ACTIONS") == "true" + +@pytest.mark.skipif(IS_CI, reason="Flaky in CI, tracked in issue #XXXXX") +def test_known_flaky(): + # Test code + pass +``` + +## Checklist for Test Authors + +When writing tests that might be flaky: + +- [ ] Does it use async mocks? → Add `@pytest.mark.no_parallel` +- [ ] Does it modify global state? → Add cleanup fixture +- [ ] Does it check `isinstance()` → Import classes locally in test +- [ ] Does it need credentials? → Add `@pytest.mark.skipif` +- [ ] Is it timing-sensitive? → Add retries or `asyncio.sleep(0)` +- [ ] Does it fail randomly in CI? → Add `@pytest.mark.flaky(reruns=3)` + +## Quick Reference + +| Problem | Solution | Example | +|---------|----------|---------| +| Async mock not called | Use `@pytest.mark.no_parallel` | See Pattern 3 above | +| `isinstance()` fails | Import class locally | See Fix Module Import above | +| Need credentials | Use `@pytest.mark.skipif` | See Solution 5 above | +| Random timing failures | Add `@pytest.mark.flaky` | See Method A above | +| Module state pollution | Add cleanup fixture | See Solution 4 above | + +## Examples from Codebase + +### Fixed: http_handler isinstance issue (PR #21388) +```python +@pytest.mark.asyncio +async def test_session_reuse_integration(): + # Import locally to get fresh class after reload + from litellm.llms.custom_httpx.http_handler import ( + get_async_httpx_client, + AsyncHTTPHandler as AsyncHTTPHandlerReload + ) + + client = get_async_httpx_client(...) + assert isinstance(client, AsyncHTTPHandlerReload) # ✅ Works! +``` + +### Already Correct: MCP OAuth test +```python +@pytest.mark.asyncio +@pytest.mark.no_parallel # Prevents parallel execution issues +async def test_oauth2_headers_passed_to_mcp_client(): + # Test with async mocks + pass +``` + +## Debugging Flaky Tests + +### Run test 100 times locally: +```bash +for i in {1..100}; do + poetry run pytest tests/path/to/test.py::test_name || break +done +``` + +### Run with pytest-repeat: +```bash +poetry run pytest tests/path/to/test.py::test_name --count=100 +``` + +### Run in parallel to reproduce CI: +```bash +poetry run pytest tests/path/to/test.py -n 4 # 4 parallel workers +``` + +## Further Reading + +- [pytest-xdist docs](https://pytest-xdist.readthedocs.io/) +- [pytest-asyncio docs](https://pytest-asyncio.readthedocs.io/) +- [unittest.mock docs](https://docs.python.org/3/library/unittest.mock.html) +- [pytest-rerunfailures](https://github.com/pytest-dev/pytest-rerunfailures) diff --git a/tests/test_litellm/llms/custom_httpx/test_http_handler.py b/tests/test_litellm/llms/custom_httpx/test_http_handler.py index b0011fd8f76..caf90dce6c8 100644 --- a/tests/test_litellm/llms/custom_httpx/test_http_handler.py +++ b/tests/test_litellm/llms/custom_httpx/test_http_handler.py @@ -358,38 +358,40 @@ async def test_async_handler_with_shared_session(): @pytest.mark.asyncio async def test_get_async_httpx_client_with_shared_session(): """Test get_async_httpx_client with shared session""" - from litellm.llms.custom_httpx.http_handler import get_async_httpx_client + from litellm.llms.custom_httpx.http_handler import get_async_httpx_client, AsyncHTTPHandler as AsyncHTTPHandlerReload from litellm.types.utils import LlmProviders - + # Create a mock shared session mock_session = MockClientSession() - + # Test with shared session client = get_async_httpx_client( llm_provider=LlmProviders.ANTHROPIC, shared_session=mock_session # type: ignore ) - + # Verify the client was created successfully assert client is not None - assert isinstance(client, AsyncHTTPHandler) + # Import locally to avoid stale reference after module reload in conftest + assert isinstance(client, AsyncHTTPHandlerReload) @pytest.mark.asyncio async def test_get_async_httpx_client_without_shared_session(): """Test get_async_httpx_client without shared session (backward compatibility)""" - from litellm.llms.custom_httpx.http_handler import get_async_httpx_client + from litellm.llms.custom_httpx.http_handler import get_async_httpx_client, AsyncHTTPHandler as AsyncHTTPHandlerReload from litellm.types.utils import LlmProviders - + # Test without shared session client = get_async_httpx_client( llm_provider=LlmProviders.ANTHROPIC, shared_session=None ) - + # Verify the client was created successfully assert client is not None - assert isinstance(client, AsyncHTTPHandler) + # Import locally to avoid stale reference after module reload in conftest + assert isinstance(client, AsyncHTTPHandlerReload) @pytest.mark.asyncio @@ -450,31 +452,32 @@ def test_shared_session_parameter_in_completion(): @pytest.mark.asyncio async def test_session_reuse_integration(): """Integration test for session reuse functionality""" - from litellm.llms.custom_httpx.http_handler import get_async_httpx_client + from litellm.llms.custom_httpx.http_handler import get_async_httpx_client, AsyncHTTPHandler as AsyncHTTPHandlerReload from litellm.types.utils import LlmProviders - + # Create a mock session mock_session = MockClientSession() - + # Create two clients with the same session client1 = get_async_httpx_client( llm_provider=LlmProviders.ANTHROPIC, shared_session=mock_session # type: ignore ) - + client2 = get_async_httpx_client( llm_provider=LlmProviders.OPENAI, shared_session=mock_session # type: ignore ) - + # Both clients should be created successfully assert client1 is not None assert client2 is not None - + # Both should be AsyncHTTPHandler instances - assert isinstance(client1, AsyncHTTPHandler) - assert isinstance(client2, AsyncHTTPHandler) - + # Import locally to avoid stale reference after module reload in conftest + assert isinstance(client1, AsyncHTTPHandlerReload) + assert isinstance(client2, AsyncHTTPHandlerReload) + # Clean up await client1.close() await client2.close()