diff --git a/.github/workflows/test-litellm-matrix.yml b/.github/workflows/test-litellm-matrix.yml index d83fedcb2ae..5590662f5ae 100644 --- a/.github/workflows/test-litellm-matrix.yml +++ b/.github/workflows/test-litellm-matrix.yml @@ -12,7 +12,7 @@ concurrency: jobs: test: runs-on: ubuntu-latest - timeout-minutes: 15 + timeout-minutes: 20 # Increased from 15 to 20 strategy: fail-fast: false matrix: @@ -20,36 +20,46 @@ jobs: # tests/test_litellm split by subdirectory (~560 files total) - name: "llms" path: "tests/test_litellm/llms" - workers: 4 + workers: 2 # Reduced from 4 to 2 to avoid race conditions + reruns: 2 # Retry flaky tests twice # tests/test_litellm/proxy split by subdirectory (~180 files total) - name: "proxy-guardrails" path: "tests/test_litellm/proxy/guardrails tests/test_litellm/proxy/management_endpoints tests/test_litellm/proxy/management_helpers" - workers: 4 + workers: 2 + reruns: 2 - name: "proxy-core" path: "tests/test_litellm/proxy/auth tests/test_litellm/proxy/client tests/test_litellm/proxy/db tests/test_litellm/proxy/hooks tests/test_litellm/proxy/policy_engine" - workers: 4 + workers: 2 + reruns: 2 - name: "proxy-misc" path: "tests/test_litellm/proxy/_experimental tests/test_litellm/proxy/agent_endpoints tests/test_litellm/proxy/anthropic_endpoints tests/test_litellm/proxy/common_utils tests/test_litellm/proxy/discovery_endpoints tests/test_litellm/proxy/experimental tests/test_litellm/proxy/google_endpoints tests/test_litellm/proxy/health_endpoints tests/test_litellm/proxy/image_endpoints tests/test_litellm/proxy/middleware tests/test_litellm/proxy/openai_files_endpoint tests/test_litellm/proxy/pass_through_endpoints tests/test_litellm/proxy/prompts tests/test_litellm/proxy/public_endpoints tests/test_litellm/proxy/response_api_endpoints tests/test_litellm/proxy/spend_tracking tests/test_litellm/proxy/ui_crud_endpoints tests/test_litellm/proxy/vector_store_endpoints tests/test_litellm/proxy/test_*.py" - workers: 4 + workers: 2 + reruns: 2 - name: "integrations" path: "tests/test_litellm/integrations" - workers: 4 + workers: 2 + reruns: 3 # Integration tests tend to be flakier - name: "core-utils" path: "tests/test_litellm/litellm_core_utils" workers: 2 + reruns: 1 - name: "other" path: "tests/test_litellm/caching tests/test_litellm/responses tests/test_litellm/secret_managers tests/test_litellm/vector_stores tests/test_litellm/a2a_protocol tests/test_litellm/anthropic_interface tests/test_litellm/completion_extras tests/test_litellm/containers tests/test_litellm/enterprise tests/test_litellm/experimental_mcp_client tests/test_litellm/google_genai tests/test_litellm/images tests/test_litellm/interactions tests/test_litellm/passthrough tests/test_litellm/router_strategy tests/test_litellm/router_utils tests/test_litellm/types" - workers: 4 + workers: 2 + reruns: 2 - name: "root" path: "tests/test_litellm/test_*.py" - workers: 4 + workers: 2 + reruns: 2 # tests/proxy_unit_tests split alphabetically (~48 files total) - name: "proxy-unit-a" path: "tests/proxy_unit_tests/test_[a-o]*.py" workers: 2 + reruns: 1 - name: "proxy-unit-b" path: "tests/proxy_unit_tests/test_[p-z]*.py" workers: 2 + reruns: 1 name: test (${{ matrix.test-group.name }}) @@ -79,7 +89,8 @@ jobs: run: | poetry config virtualenvs.in-project true poetry install --with dev,proxy-dev --extras "proxy semantic-router" - poetry run pip install pytest-retry==1.6.3 pytest-xdist google-genai==1.22.0 \ + # pytest-rerunfailures and pytest-xdist are in pyproject.toml dev dependencies + poetry run pip install google-genai==1.22.0 \ google-cloud-aiplatform>=1.38 fastapi-offline==1.7.3 python-multipart==0.0.22 openapi-core - name: Setup litellm-enterprise @@ -92,4 +103,6 @@ jobs: --tb=short -vv \ --maxfail=10 \ -n ${{ matrix.test-group.workers }} \ + --reruns ${{ matrix.test-group.reruns }} \ + --reruns-delay 1 \ --durations=20 diff --git a/poetry.lock b/poetry.lock index 82df007de13..e3f3fea1321 100644 --- a/poetry.lock +++ b/poetry.lock @@ -5524,6 +5524,22 @@ psycopg = ">=3.0.0" pytest = ">=6.2" setuptools = "*" +[[package]] +name = "pytest-rerunfailures" +version = "14.0" +description = "pytest plugin to re-run tests to eliminate flaky failures" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "pytest-rerunfailures-14.0.tar.gz", hash = "sha256:4a400bcbcd3c7a4ad151ab8afac123d90eca3abe27f98725dc4d9702887d2e92"}, + {file = "pytest_rerunfailures-14.0-py3-none-any.whl", hash = "sha256:4197bdd2eaeffdbf50b5ea6e7236f47ff0e44d1def8dae08e409f536d84e7b32"}, +] + +[package.dependencies] +packaging = ">=17.1" +pytest = ">=7.2" + [[package]] name = "pytest-retry" version = "1.7.0" @@ -7934,4 +7950,4 @@ utils = ["numpydoc"] [metadata] lock-version = "2.1" python-versions = ">=3.9,<4.0" -content-hash = "dfaf1eabfd17db5e30a8dda813872507aa38664fe7681ece2f8fa06ba035d3cf" +content-hash = "d99036fc86de60170dde4a1a9b3f9fdac6eb3610edb6177e70ca86133b71703e" diff --git a/pyproject.toml b/pyproject.toml index 4deb61836b3..52b1b9452f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -151,7 +151,7 @@ pytest = "^7.4.3" pytest-mock = "^3.12.0" pytest-asyncio = "^0.21.1" pytest-postgresql = "^6.0.0" -pytest-retry = "^1.6.3" +pytest-xdist = "^3.5.0" requests-mock = "^1.12.1" responses = "^0.25.7" respx = "^0.22.0" @@ -166,6 +166,7 @@ opentelemetry-exporter-otlp = "^1.28.0" langfuse = "^2.45.0" fastapi-offline = "^1.7.3" fakeredis = "^2.27.1" +pytest-rerunfailures = "^14.0" [tool.poetry.group.proxy-dev.dependencies] prisma = "0.11.0" @@ -192,8 +193,6 @@ plugins = "pydantic.mypy" [tool.pytest.ini_options] asyncio_mode = "auto" -retries = 20 -retry_delay = 5 markers = [ "asyncio: mark test as an asyncio test", "limit_leaks: mark test with memory limit for leak detection (e.g., '40 MB')",