Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
101f501
Enable Ollama integration tests in CI and rename report to Integratio…
giles17 Apr 23, 2026
733bfb9
Bump Ollama model to qwen2.5:1.5b for better instruction following
giles17 Apr 23, 2026
dc64d63
Re-enable reliable streaming integration tests
giles17 Apr 24, 2026
9316f2c
Re-enable skipped Functions/DurableTask tests and bump timeout to 480s
giles17 Apr 24, 2026
f6f8747
Re-skip failing Functions/DurableTask tests with specific root causes
giles17 Apr 27, 2026
a6e0ab5
Fix auth routing in samples 06/11: api_key -> credential for Azure Op…
giles17 Apr 27, 2026
3745265
Re-skip parallel workflow tests: xdist worker distribution issue
giles17 Apr 27, 2026
386e08e
Fix E501 line-too-long in azurefunctions parallel test skip reasons
giles17 Apr 28, 2026
e2eba0b
Add retry logic and port-conflict fix for Ollama CI setup
giles17 Apr 28, 2026
2e6b999
Merge branch 'main' into flaky-test-report
giles17 Apr 29, 2026
072123a
Fix flaky integration tests and re-enable skipped tests
giles17 Apr 29, 2026
3ab3370
Remove temperature from foundry hosting test (unsupported by CI model)
giles17 Apr 29, 2026
acf24ea
Stabilize Ollama tool call integration tests with no-arg function
giles17 Apr 30, 2026
7ce27dd
Increase reliable streaming test timeouts from 30s to 60s
giles17 Apr 30, 2026
455c28d
Re-enable workflow parallel tests with xdist_group marker
giles17 Apr 30, 2026
d2de5ba
Revert "Re-enable workflow parallel tests with xdist_group marker"
giles17 Apr 30, 2026
52589ab
Rename flaky_report to integration_test_report and add try/finally cl…
giles17 Apr 30, 2026
0edd5f1
Merge branch 'main' into flaky-test-report
giles17 Apr 30, 2026
097095c
Fix Ollama pull failure propagation and Azure OpenAI vector store rea…
giles17 Apr 30, 2026
e33d3e5
remove load_dotenv from test file
giles17 Apr 30, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 38 additions & 16 deletions .github/workflows/python-integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ jobs:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
ANTHROPIC_CHAT_MODEL: ${{ vars.ANTHROPIC_CHAT_MODEL_ID }}
LOCAL_MCP_URL: ${{ vars.LOCAL_MCP__URL }}
OLLAMA_MODEL: qwen2.5:0.5b
OLLAMA_EMBEDDING_MODEL: nomic-embed-text
defaults:
run:
working-directory: python
Expand All @@ -171,6 +173,26 @@ jobs:
with:
python-version: ${{ env.UV_PYTHON }}
os: ${{ runner.os }}
- name: Install Ollama
run: curl -fsSL https://ollama.com/install.sh | sh
Comment thread
giles17 marked this conversation as resolved.
working-directory: .
- name: Cache Ollama models
uses: actions/cache@v4
with:
path: ~/.ollama/models
key: ollama-models-qwen2.5-0.5b-nomic-embed-text-v1
- name: Start Ollama and pull models
run: |
ollama serve &
for i in $(seq 1 30); do
if curl -sf http://localhost:11434/api/tags > /dev/null 2>&1; then
break
fi
sleep 1
done
ollama pull qwen2.5:0.5b
ollama pull nomic-embed-text
Comment thread
giles17 marked this conversation as resolved.
Outdated
working-directory: .
- name: Start local MCP server
id: local-mcp
uses: ./.github/actions/setup-local-mcp-server
Expand Down Expand Up @@ -388,9 +410,9 @@ jobs:
path: ./python/pytest.xml
if-no-files-found: ignore

# Flaky test trend report (aggregates per-job JUnit XML results)
python-flaky-test-report:
name: Flaky Test Report
# Integration test trend report (aggregates per-job JUnit XML results)
python-integration-test-report:
name: Integration Test Report
if: >
always() &&
(contains(join(needs.*.result, ','), 'success') ||
Expand Down Expand Up @@ -423,36 +445,36 @@ jobs:
with:
pattern: test-results-*
path: test-results/
- name: Restore flaky report history cache
- name: Restore report history cache
uses: actions/cache/restore@v4
with:
path: python/flaky-report-history.json
key: flaky-report-history-integration-${{ github.run_id }}
path: python/integration-report-history.json
key: integration-report-history-integration-${{ github.run_id }}
restore-keys: |
flaky-report-history-integration-
integration-report-history-integration-
- name: Generate trend report
run: >
uv run python scripts/flaky_report/aggregate.py
../test-results/
flaky-report-history.json
flaky-test-report.md
integration-report-history.json
integration-test-report.md
- name: Post to Job Summary
if: always()
run: cat flaky-test-report.md >> $GITHUB_STEP_SUMMARY
- name: Save flaky report history cache
run: cat integration-test-report.md >> $GITHUB_STEP_SUMMARY
- name: Save report history cache
if: always()
uses: actions/cache/save@v4
with:
path: python/flaky-report-history.json
key: flaky-report-history-integration-${{ github.run_id }}
path: python/integration-report-history.json
key: integration-report-history-integration-${{ github.run_id }}
- name: Upload unified trend report
if: always()
uses: actions/upload-artifact@v7
with:
name: flaky-test-report
name: integration-test-report
path: |
python/flaky-test-report.md
python/flaky-report-history.json
python/integration-test-report.md
python/integration-report-history.json

python-integration-tests-check:
if: always()
Expand Down
54 changes: 38 additions & 16 deletions .github/workflows/python-merge-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,8 @@ jobs:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
ANTHROPIC_CHAT_MODEL: ${{ vars.ANTHROPIC_CHAT_MODEL_ID }}
LOCAL_MCP_URL: ${{ vars.LOCAL_MCP__URL }}
OLLAMA_MODEL: qwen2.5:0.5b
OLLAMA_EMBEDDING_MODEL: nomic-embed-text
defaults:
run:
working-directory: python
Expand All @@ -286,6 +288,26 @@ jobs:
with:
python-version: ${{ env.UV_PYTHON }}
os: ${{ runner.os }}
- name: Install Ollama
run: curl -fsSL https://ollama.com/install.sh | sh
Comment thread
giles17 marked this conversation as resolved.
working-directory: .
- name: Cache Ollama models
uses: actions/cache@v4
with:
path: ~/.ollama/models
key: ollama-models-qwen2.5-0.5b-nomic-embed-text-v1
- name: Start Ollama and pull models
run: |
ollama serve &
for i in $(seq 1 30); do
if curl -sf http://localhost:11434/api/tags > /dev/null 2>&1; then
break
fi
sleep 1
done
ollama pull qwen2.5:0.5b
ollama pull nomic-embed-text
Comment thread
giles17 marked this conversation as resolved.
Outdated
working-directory: .
- name: Start local MCP server
id: local-mcp
uses: ./.github/actions/setup-local-mcp-server
Expand Down Expand Up @@ -555,9 +577,9 @@ jobs:
path: ./python/pytest.xml
if-no-files-found: ignore

# Flaky test trend report (aggregates per-job JUnit XML results)
python-flaky-test-report:
name: Flaky Test Report
# Integration test trend report (aggregates per-job JUnit XML results)
python-integration-test-report:
name: Integration Test Report
if: >
always() &&
(contains(join(needs.*.result, ','), 'success') ||
Expand Down Expand Up @@ -587,36 +609,36 @@ jobs:
with:
pattern: test-results-*
path: test-results/
- name: Restore flaky report history cache
- name: Restore report history cache
uses: actions/cache/restore@v4
with:
path: python/flaky-report-history.json
key: flaky-report-history-merge-${{ github.run_id }}
path: python/integration-report-history.json
key: integration-report-history-merge-${{ github.run_id }}
restore-keys: |
flaky-report-history-merge-
integration-report-history-merge-
- name: Generate trend report
run: >
uv run python scripts/flaky_report/aggregate.py
../test-results/
flaky-report-history.json
flaky-test-report.md
integration-report-history.json
integration-test-report.md
- name: Post to Job Summary
if: always()
run: cat flaky-test-report.md >> $GITHUB_STEP_SUMMARY
- name: Save flaky report history cache
run: cat integration-test-report.md >> $GITHUB_STEP_SUMMARY
- name: Save report history cache
if: always()
uses: actions/cache/save@v4
with:
path: python/flaky-report-history.json
key: flaky-report-history-merge-${{ github.run_id }}
path: python/integration-report-history.json
key: integration-report-history-merge-${{ github.run_id }}
- name: Upload unified trend report
if: always()
uses: actions/upload-artifact@v7
with:
name: flaky-test-report
name: integration-test-report
path: |
python/flaky-test-report.md
python/flaky-report-history.json
python/integration-test-report.md
python/integration-report-history.json

python-integration-tests-check:
if: always()
Expand Down
2 changes: 1 addition & 1 deletion python/scripts/flaky_report/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Copyright (c) Microsoft. All rights reserved.

"""Flaky test report aggregation and trend generation.
"""Integration test report aggregation and trend generation.

Parses JUnit XML (``pytest.xml``) files produced by each CI job, merges
them with historical data, and generates a markdown trend report showing
Expand Down
8 changes: 4 additions & 4 deletions python/scripts/flaky_report/__main__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# Copyright (c) Microsoft. All rights reserved.

"""CLI entry point for the flaky test report tool.
"""CLI entry point for the integration test report tool.

Usage:
uv run python -m scripts.flaky_report <reports-dir> <history-file> <output-file>

Example (from python/ directory):
uv run python -m scripts.flaky_report \\
../flaky-reports/ \\
flaky-report-history.json \\
flaky-test-report.md
../test-results/ \\
integration-report-history.json \\
integration-test-report.md
"""

import sys
Expand Down
2 changes: 1 addition & 1 deletion python/scripts/flaky_report/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def _short_name(nodeid: str) -> str:
def generate_trend_report(runs: list[dict[str, Any]]) -> str:
"""Generate a markdown trend report from run history."""
lines = [
"# 🔬 Flaky Test Report",
"# 🔬 Integration Test Report",
"",
f"*Generated: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}*",
"",
Expand Down
Loading