Skip to content
Merged
115 changes: 114 additions & 1 deletion .github/workflows/python-integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,14 @@ jobs:
-n logical --dist worksteal
--timeout=120 --session-timeout=900 --timeout_method thread
--retries 2 --retry-delay 5
--junitxml=pytest.xml
- name: Upload test results
if: always()
uses: actions/upload-artifact@v7
with:
name: test-results-openai
path: ./python/pytest.xml
if-no-files-found: ignore

# Azure OpenAI integration tests
python-tests-azure-openai:
Expand Down Expand Up @@ -130,6 +138,14 @@ jobs:
-n logical --dist worksteal
--timeout=120 --session-timeout=900 --timeout_method thread
--retries 2 --retry-delay 5
--junitxml=pytest.xml
- name: Upload test results
if: always()
uses: actions/upload-artifact@v7
with:
name: test-results-azure-openai
path: ./python/pytest.xml
if-no-files-found: ignore

# Misc integration tests (Anthropic, Hyperlight, Ollama, MCP)
python-tests-misc-integration:
Expand Down Expand Up @@ -173,6 +189,14 @@ jobs:
-n logical --dist worksteal
--timeout=120 --session-timeout=900 --timeout_method thread
--retries 2 --retry-delay 30
--junitxml=pytest.xml
- name: Upload test results
if: always()
uses: actions/upload-artifact@v7
with:
name: test-results-misc
path: ./python/pytest.xml
if-no-files-found: ignore
- name: Stop local MCP server
if: always()
shell: bash
Expand Down Expand Up @@ -249,6 +273,14 @@ jobs:
-x
--timeout=360 --session-timeout=900 --timeout_method thread
--retries 2 --retry-delay 5
--junitxml=pytest.xml
- name: Upload test results
if: always()
uses: actions/upload-artifact@v7
with:
name: test-results-functions
path: ./python/pytest.xml
if-no-files-found: ignore

# Foundry integration tests
python-tests-foundry:
Expand Down Expand Up @@ -295,6 +327,14 @@ jobs:
-n logical --dist worksteal
--timeout=120 --session-timeout=900 --timeout_method thread
--retries 2 --retry-delay 5
--junitxml=pytest.xml
- name: Upload test results
if: always()
uses: actions/upload-artifact@v7
with:
name: test-results-foundry
path: ./python/pytest.xml
if-no-files-found: ignore

# Azure Cosmos integration tests
python-tests-cosmos:
Expand Down Expand Up @@ -339,7 +379,80 @@ jobs:
echo "Cosmos DB emulator did not become ready in time." >&2
exit 1
- name: Test with pytest (Cosmos integration)
run: uv run --directory packages/azure-cosmos poe integration-tests -n logical --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5
run: uv run --directory packages/azure-cosmos poe integration-tests -n logical --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5 --junitxml=${{ github.workspace }}/python/pytest.xml
- name: Upload test results
if: always()
uses: actions/upload-artifact@v7
with:
name: test-results-cosmos
path: ./python/pytest.xml
if-no-files-found: ignore

# Flaky test trend report (aggregates per-job JUnit XML results)
python-flaky-test-report:
name: Flaky Test Report
if: >
always() &&
(contains(join(needs.*.result, ','), 'success') ||
contains(join(needs.*.result, ','), 'failure'))
needs:
[
python-tests-openai,
python-tests-azure-openai,
python-tests-misc-integration,
python-tests-functions,
python-tests-foundry,
python-tests-cosmos,
]
runs-on: ubuntu-latest
defaults:
run:
working-directory: python
steps:
- uses: actions/checkout@v6
with:
ref: ${{ inputs.checkout-ref }}
persist-credentials: false
- name: Set up python and install the project
uses: ./.github/actions/python-setup
with:
python-version: ${{ env.UV_PYTHON }}
os: ${{ runner.os }}
- name: Download all test results from current run
uses: actions/download-artifact@v4
with:
pattern: test-results-*
path: test-results/
- name: Restore flaky report history cache
uses: actions/cache/restore@v4
with:
path: python/flaky-report-history.json
key: flaky-report-history-integration-${{ github.run_id }}
restore-keys: |
flaky-report-history-integration-
- name: Generate trend report
run: >
uv run python scripts/flaky_report/aggregate.py
../test-results/
flaky-report-history.json
flaky-test-report.md
- name: Post to Job Summary
if: always()
run: cat flaky-test-report.md >> $GITHUB_STEP_SUMMARY
- name: Save flaky report history cache
if: always()
uses: actions/cache/save@v4
with:
path: python/flaky-report-history.json
key: flaky-report-history-integration-${{ github.run_id }}
- name: Upload unified trend report
if: always()
uses: actions/upload-artifact@v7
with:
name: flaky-test-report
path: |
python/flaky-test-report.md
python/flaky-report-history.json

python-integration-tests-check:
if: always()
Expand Down
111 changes: 110 additions & 1 deletion .github/workflows/python-merge-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,13 @@ jobs:
display-options: fEX
fail-on-empty: false
title: OpenAI integration test results
- name: Upload test results
if: always()
uses: actions/upload-artifact@v7
with:
name: test-results-openai
path: ./python/pytest.xml
if-no-files-found: ignore

# Azure OpenAI integration tests
python-tests-azure-openai:
Expand Down Expand Up @@ -244,6 +251,13 @@ jobs:
display-options: fEX
fail-on-empty: false
title: Azure OpenAI integration test results
- name: Upload test results
if: always()
uses: actions/upload-artifact@v7
with:
name: test-results-azure-openai
path: ./python/pytest.xml
if-no-files-found: ignore

# Misc integration tests (Anthropic, Ollama, MCP)
python-tests-misc-integration:
Expand Down Expand Up @@ -321,6 +335,13 @@ jobs:
display-options: fEX
fail-on-empty: false
title: Misc integration test results
- name: Upload test results
if: always()
uses: actions/upload-artifact@v7
with:
name: test-results-misc
path: ./python/pytest.xml
if-no-files-found: ignore

# Azure Functions + Durable Task integration tests
python-tests-functions:
Expand Down Expand Up @@ -392,6 +413,13 @@ jobs:
display-options: fEX
fail-on-empty: false
title: Functions integration test results
- name: Upload test results
if: always()
uses: actions/upload-artifact@v7
with:
name: test-results-functions
path: ./python/pytest.xml
if-no-files-found: ignore

python-tests-foundry:
name: Python Integration Tests - Foundry
Expand All @@ -409,6 +437,10 @@ jobs:
FOUNDRY_MODEL: ${{ vars.FOUNDRY_MODEL }}
FOUNDRY_AGENT_NAME: ${{ vars.FOUNDRY_AGENT_NAME }}
FOUNDRY_AGENT_VERSION: ${{ vars.FOUNDRY_AGENT_VERSION }}
FOUNDRY_MODELS_ENDPOINT: ${{ vars.FOUNDRY_MODELS_ENDPOINT || '' }}
FOUNDRY_MODELS_API_KEY: ${{ secrets.FOUNDRY_MODELS_API_KEY || '' }}
FOUNDRY_EMBEDDING_MODEL: ${{ vars.FOUNDRY_EMBEDDING_MODEL || '' }}
FOUNDRY_IMAGE_EMBEDDING_MODEL: ${{ vars.FOUNDRY_IMAGE_EMBEDDING_MODEL || '' }}
LOCAL_MCP_URL: ${{ vars.LOCAL_MCP__URL }}
defaults:
run:
Expand Down Expand Up @@ -448,6 +480,13 @@ jobs:
display-options: fEX
fail-on-empty: false
title: Test results
- name: Upload test results
if: always()
uses: actions/upload-artifact@v7
with:
name: test-results-foundry
path: ./python/pytest.xml
if-no-files-found: ignore

# TODO: Add python-tests-lab

Expand Down Expand Up @@ -497,7 +536,7 @@ jobs:
echo "Cosmos DB emulator did not become ready in time." >&2
exit 1
- name: Test with pytest (Cosmos integration)
run: uv run --directory packages/azure-cosmos poe integration-tests -n logical --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5 --junitxml=pytest.xml
run: uv run --directory packages/azure-cosmos poe integration-tests -n logical --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5 --junitxml=${{ github.workspace }}/python/pytest.xml
working-directory: ./python
- name: Surface failing tests
if: always()
Expand All @@ -508,6 +547,76 @@ jobs:
display-options: fEX
fail-on-empty: false
title: Cosmos integration test results
- name: Upload test results
if: always()
uses: actions/upload-artifact@v7
with:
name: test-results-cosmos
path: ./python/pytest.xml
if-no-files-found: ignore

# Flaky test trend report (aggregates per-job JUnit XML results)
python-flaky-test-report:
name: Flaky Test Report
if: >
always() &&
(contains(join(needs.*.result, ','), 'success') ||
contains(join(needs.*.result, ','), 'failure'))
needs:
[
python-tests-openai,
python-tests-azure-openai,
python-tests-misc-integration,
python-tests-functions,
python-tests-foundry,
python-tests-cosmos,
]
runs-on: ubuntu-latest
defaults:
run:
working-directory: python
steps:
- uses: actions/checkout@v6
- name: Set up python and install the project
uses: ./.github/actions/python-setup
with:
python-version: ${{ env.UV_PYTHON }}
os: ${{ runner.os }}
- name: Download all test results from current run
uses: actions/download-artifact@v4
with:
pattern: test-results-*
path: test-results/
- name: Restore flaky report history cache
uses: actions/cache/restore@v4
with:
path: python/flaky-report-history.json
key: flaky-report-history-merge-${{ github.run_id }}
restore-keys: |
flaky-report-history-merge-
- name: Generate trend report
run: >
uv run python scripts/flaky_report/aggregate.py
../test-results/
flaky-report-history.json
flaky-test-report.md
- name: Post to Job Summary
if: always()
run: cat flaky-test-report.md >> $GITHUB_STEP_SUMMARY
- name: Save flaky report history cache
if: always()
uses: actions/cache/save@v4
with:
path: python/flaky-report-history.json
key: flaky-report-history-merge-${{ github.run_id }}
- name: Upload unified trend report
if: always()
uses: actions/upload-artifact@v7
with:
name: flaky-test-report
path: |
python/flaky-test-report.md
python/flaky-report-history.json

python-integration-tests-check:
if: always()
Expand Down
11 changes: 11 additions & 0 deletions python/scripts/flaky_report/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright (c) Microsoft. All rights reserved.

"""Flaky test report aggregation and trend generation.

Parses JUnit XML (``pytest.xml``) files produced by each CI job, merges
them with historical data, and generates a markdown trend report showing
per-test status across the last N runs.

Usage:
uv run python -m scripts.flaky_report <reports-dir> <history-file> <output-file>
"""
20 changes: 20 additions & 0 deletions python/scripts/flaky_report/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright (c) Microsoft. All rights reserved.

"""CLI entry point for the flaky test report tool.

Usage:
uv run python -m scripts.flaky_report <reports-dir> <history-file> <output-file>

Example (from python/ directory):
uv run python -m scripts.flaky_report \\
../flaky-reports/ \\
flaky-report-history.json \\
flaky-test-report.md
"""

import sys

from scripts.flaky_report.aggregate import main

if __name__ == "__main__":
sys.exit(main())
Loading
Loading