diff --git a/.github/workflows/python-integration-tests.yml b/.github/workflows/python-integration-tests.yml
index f2fb5c6448..48d15bceda 100644
--- a/.github/workflows/python-integration-tests.yml
+++ b/.github/workflows/python-integration-tests.yml
@@ -87,6 +87,14 @@ jobs:
           -n logical --dist worksteal
           --timeout=120 --session-timeout=900 --timeout_method thread
           --retries 2 --retry-delay 5
+          --junitxml=pytest.xml
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: test-results-openai
+          path: ./python/pytest.xml
+          if-no-files-found: ignore
 
   # Azure OpenAI integration tests
   python-tests-azure-openai:
@@ -130,6 +138,14 @@ jobs:
           -n logical --dist worksteal
           --timeout=120 --session-timeout=900 --timeout_method thread
           --retries 2 --retry-delay 5
+          --junitxml=pytest.xml
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: test-results-azure-openai
+          path: ./python/pytest.xml
+          if-no-files-found: ignore
 
   # Misc integration tests (Anthropic, Hyperlight, Ollama, MCP)
   python-tests-misc-integration:
@@ -173,6 +189,14 @@ jobs:
           -n logical --dist worksteal
           --timeout=120 --session-timeout=900 --timeout_method thread
           --retries 2 --retry-delay 30
+          --junitxml=pytest.xml
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: test-results-misc
+          path: ./python/pytest.xml
+          if-no-files-found: ignore
       - name: Stop local MCP server
         if: always()
         shell: bash
@@ -249,6 +273,14 @@ jobs:
           -x
           --timeout=360 --session-timeout=900 --timeout_method thread
           --retries 2 --retry-delay 5
+          --junitxml=pytest.xml
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: test-results-functions
+          path: ./python/pytest.xml
+          if-no-files-found: ignore
 
   # Foundry integration tests
   python-tests-foundry:
@@ -295,6 +327,14 @@ jobs:
           -n logical --dist worksteal
           --timeout=120 --session-timeout=900 --timeout_method thread
           --retries 2 --retry-delay 5
+          --junitxml=pytest.xml
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: test-results-foundry
+          path: ./python/pytest.xml
+          if-no-files-found: ignore
 
   # Azure Cosmos integration tests
   python-tests-cosmos:
@@ -339,7 +379,80 @@ jobs:
           echo "Cosmos DB emulator did not become ready in time." >&2
           exit 1
       - name: Test with pytest (Cosmos integration)
-        run: uv run --directory packages/azure-cosmos poe integration-tests -n logical --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5
+        run: uv run --directory packages/azure-cosmos poe integration-tests -n logical --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5 --junitxml=${{ github.workspace }}/python/pytest.xml
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: test-results-cosmos
+          path: ./python/pytest.xml
+          if-no-files-found: ignore
+
+  # Flaky test trend report (aggregates per-job JUnit XML results)
+  python-flaky-test-report:
+    name: Flaky Test Report
+    if: >
+      always() &&
+      (contains(join(needs.*.result, ','), 'success') ||
+       contains(join(needs.*.result, ','), 'failure'))
+    needs:
+      [
+        python-tests-openai,
+        python-tests-azure-openai,
+        python-tests-misc-integration,
+        python-tests-functions,
+        python-tests-foundry,
+        python-tests-cosmos,
+      ]
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: ${{ inputs.checkout-ref }}
+          persist-credentials: false
+      - name: Set up python and install the project
+        uses: ./.github/actions/python-setup
+        with:
+          python-version: ${{ env.UV_PYTHON }}
+          os: ${{ runner.os }}
+      - name: Download all test results from current run
+        uses: actions/download-artifact@v4
+        with:
+          pattern: test-results-*
+          path: test-results/
+      - name: Restore flaky report history cache
+        uses: actions/cache/restore@v4
+        with:
+          path: python/flaky-report-history.json
+          key: flaky-report-history-integration-${{ github.run_id }}
+          restore-keys: |
+            flaky-report-history-integration-
+      - name: Generate trend report
+        run: >
+          uv run python scripts/flaky_report/aggregate.py
+          ../test-results/
+          flaky-report-history.json
+          flaky-test-report.md
+      - name: Post to Job Summary
+        if: always()
+        run: cat flaky-test-report.md >> $GITHUB_STEP_SUMMARY
+      - name: Save flaky report history cache
+        if: always()
+        uses: actions/cache/save@v4
+        with:
+          path: python/flaky-report-history.json
+          key: flaky-report-history-integration-${{ github.run_id }}
+      - name: Upload unified trend report
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: flaky-test-report
+          path: |
+            python/flaky-test-report.md
+            python/flaky-report-history.json
 
   python-integration-tests-check:
     if: always()
diff --git a/.github/workflows/python-merge-tests.yml b/.github/workflows/python-merge-tests.yml
index dd48b268df..843253e788 100644
--- a/.github/workflows/python-merge-tests.yml
+++ b/.github/workflows/python-merge-tests.yml
@@ -181,6 +181,13 @@ jobs:
           display-options: fEX
           fail-on-empty: false
           title: OpenAI integration test results
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: test-results-openai
+          path: ./python/pytest.xml
+          if-no-files-found: ignore
 
   # Azure OpenAI integration tests
   python-tests-azure-openai:
@@ -244,6 +251,13 @@ jobs:
           display-options: fEX
           fail-on-empty: false
           title: Azure OpenAI integration test results
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: test-results-azure-openai
+          path: ./python/pytest.xml
+          if-no-files-found: ignore
 
   # Misc integration tests (Anthropic, Ollama, MCP)
   python-tests-misc-integration:
@@ -321,6 +335,13 @@ jobs:
           display-options: fEX
           fail-on-empty: false
           title: Misc integration test results
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: test-results-misc
+          path: ./python/pytest.xml
+          if-no-files-found: ignore
 
   # Azure Functions + Durable Task integration tests
   python-tests-functions:
@@ -392,6 +413,13 @@ jobs:
           display-options: fEX
           fail-on-empty: false
           title: Functions integration test results
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: test-results-functions
+          path: ./python/pytest.xml
+          if-no-files-found: ignore
 
   python-tests-foundry:
     name: Python Integration Tests - Foundry
@@ -409,6 +437,10 @@ jobs:
       FOUNDRY_MODEL: ${{ vars.FOUNDRY_MODEL }}
       FOUNDRY_AGENT_NAME: ${{ vars.FOUNDRY_AGENT_NAME }}
       FOUNDRY_AGENT_VERSION: ${{ vars.FOUNDRY_AGENT_VERSION }}
+      FOUNDRY_MODELS_ENDPOINT: ${{ vars.FOUNDRY_MODELS_ENDPOINT || '' }}
+      FOUNDRY_MODELS_API_KEY: ${{ secrets.FOUNDRY_MODELS_API_KEY || '' }}
+      FOUNDRY_EMBEDDING_MODEL: ${{ vars.FOUNDRY_EMBEDDING_MODEL || '' }}
+      FOUNDRY_IMAGE_EMBEDDING_MODEL: ${{ vars.FOUNDRY_IMAGE_EMBEDDING_MODEL || '' }}
       LOCAL_MCP_URL: ${{ vars.LOCAL_MCP__URL }}
     defaults:
       run:
@@ -448,6 +480,13 @@ jobs:
           display-options: fEX
           fail-on-empty: false
           title: Test results
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: test-results-foundry
+          path: ./python/pytest.xml
+          if-no-files-found: ignore
 
   # TODO: Add python-tests-lab
 
@@ -497,7 +536,7 @@ jobs:
           echo "Cosmos DB emulator did not become ready in time." >&2
           exit 1
       - name: Test with pytest (Cosmos integration)
-        run: uv run --directory packages/azure-cosmos poe integration-tests -n logical --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5 --junitxml=pytest.xml
+        run: uv run --directory packages/azure-cosmos poe integration-tests -n logical --dist worksteal --timeout=120 --session-timeout=900 --timeout_method thread --retries 2 --retry-delay 5 --junitxml=${{ github.workspace }}/python/pytest.xml
         working-directory: ./python
       - name: Surface failing tests
         if: always()
@@ -508,6 +547,76 @@ jobs:
           display-options: fEX
           fail-on-empty: false
           title: Cosmos integration test results
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: test-results-cosmos
+          path: ./python/pytest.xml
+          if-no-files-found: ignore
+
+  # Flaky test trend report (aggregates per-job JUnit XML results)
+  python-flaky-test-report:
+    name: Flaky Test Report
+    if: >
+      always() &&
+      (contains(join(needs.*.result, ','), 'success') ||
+       contains(join(needs.*.result, ','), 'failure'))
+    needs:
+      [
+        python-tests-openai,
+        python-tests-azure-openai,
+        python-tests-misc-integration,
+        python-tests-functions,
+        python-tests-foundry,
+        python-tests-cosmos,
+      ]
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - uses: actions/checkout@v6
+      - name: Set up python and install the project
+        uses: ./.github/actions/python-setup
+        with:
+          python-version: ${{ env.UV_PYTHON }}
+          os: ${{ runner.os }}
+      - name: Download all test results from current run
+        uses: actions/download-artifact@v4
+        with:
+          pattern: test-results-*
+          path: test-results/
+      - name: Restore flaky report history cache
+        uses: actions/cache/restore@v4
+        with:
+          path: python/flaky-report-history.json
+          key: flaky-report-history-merge-${{ github.run_id }}
+          restore-keys: |
+            flaky-report-history-merge-
+      - name: Generate trend report
+        run: >
+          uv run python scripts/flaky_report/aggregate.py
+          ../test-results/
+          flaky-report-history.json
+          flaky-test-report.md
+      - name: Post to Job Summary
+        if: always()
+        run: cat flaky-test-report.md >> $GITHUB_STEP_SUMMARY
+      - name: Save flaky report history cache
+        if: always()
+        uses: actions/cache/save@v4
+        with:
+          path: python/flaky-report-history.json
+          key: flaky-report-history-merge-${{ github.run_id }}
+      - name: Upload unified trend report
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: flaky-test-report
+          path: |
+            python/flaky-test-report.md
+            python/flaky-report-history.json
 
   python-integration-tests-check:
     if: always()
diff --git a/python/scripts/flaky_report/__init__.py b/python/scripts/flaky_report/__init__.py
new file mode 100644
index 0000000000..e5a0eeb0ca
--- /dev/null
+++ b/python/scripts/flaky_report/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+"""Flaky test report aggregation and trend generation.
+
+Parses JUnit XML (``pytest.xml``) files produced by each CI job, merges
+them with historical data, and generates a markdown trend report showing
+per-test status across the last N runs.
+
+Usage:
+    uv run python -m scripts.flaky_report <reports-dir> <history-file> <output-file>
+"""
diff --git a/python/scripts/flaky_report/__main__.py b/python/scripts/flaky_report/__main__.py
new file mode 100644
index 0000000000..89969baae6
--- /dev/null
+++ b/python/scripts/flaky_report/__main__.py
@@ -0,0 +1,20 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+"""CLI entry point for the flaky test report tool.
+
+Usage:
+    uv run python -m scripts.flaky_report <reports-dir> <history-file> <output-file>
+
+Example (from python/ directory):
+    uv run python -m scripts.flaky_report \\
+        ../flaky-reports/ \\
+        flaky-report-history.json \\
+        flaky-test-report.md
+"""
+
+import sys
+
+from scripts.flaky_report.aggregate import main
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/python/scripts/flaky_report/aggregate.py b/python/scripts/flaky_report/aggregate.py
new file mode 100644
index 0000000000..e07a5e136a
--- /dev/null
+++ b/python/scripts/flaky_report/aggregate.py
@@ -0,0 +1,396 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+"""Aggregate per-provider JUnit XML test results and generate a trend report.
+
+Parses ``pytest.xml`` (JUnit XML) files produced by each CI job, merges them
+into a single run, combines with historical data, and generates a markdown
+trend table — the same pattern used by ``scripts/sample_validation/aggregate.py``.
+
+Usage (from CI):
+    python aggregate.py <reports-dir> <history-file> <output-file>
+
+The reports directory is expected to contain subdirectories named
+``test-results-<provider>/`` each containing a ``pytest.xml`` file
+(created by ``actions/download-artifact``).
+"""
+
+from __future__ import annotations
+
+import json
+import sys
+import xml.etree.ElementTree as ET
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+MAX_HISTORY = 5
+
+STATUS_EMOJI = {
+    "passed": "✅",
+    "failed": "❌",
+    "skipped": "⏭️",
+    "xfailed": "⚠️",
+    "error": "❌",
+}
+
+
+def _format_run_label(timestamp: str) -> str:
+    """Format a timestamp as a compact column label (e.g. '04-16 00:57')."""
+    try:
+        dt = datetime.fromisoformat(timestamp)
+        return dt.strftime("%m-%d %H:%M")
+    except (ValueError, TypeError):
+        return timestamp[:16]
+
+
+def _derive_provider(directory_name: str) -> str:
+    """Derive a provider label from a report directory name.
+
+    ``test-results-openai`` → ``OpenAI``
+    ``test-results-azure-openai`` → ``Azure OpenAI``
+    """
+    raw = directory_name.replace("test-results-", "")
+    known = {
+        "openai": "OpenAI",
+        "azure-openai": "Azure OpenAI",
+        "misc": "Misc (Anthropic, Ollama, MCP)",
+        "functions": "Functions",
+        "foundry": "Foundry",
+        "cosmos": "Cosmos",
+        "unit": "Unit",
+    }
+    if raw in known:
+        return known[raw]
+    parts = raw.split("-")
+    return " ".join(p.capitalize() for p in parts)
+
+
+def _parse_junit_xml(xml_path: Path) -> list[dict[str, str]]:
+    """Parse a JUnit XML file and return a list of test result dicts.
+
+    Each dict has keys: ``nodeid``, ``status``, ``duration``, ``message``.
+    """
+    results: list[dict[str, str]] = []
+    try:
+        tree = ET.parse(xml_path)  # noqa: S314
+    except ET.ParseError as exc:
+        print(f"Warning: failed to parse JUnit XML report '{xml_path}': {exc}", file=sys.stderr)
+        return results
+    root = tree.getroot()
+
+    # Handle both <testsuites><testsuite>... and <testsuite>... layouts
+    testcases: list[ET.Element] = []
+    if root.tag == "testsuites":
+        for suite in root.findall("testsuite"):
+            testcases.extend(suite.findall("testcase"))
+    elif root.tag == "testsuite":
+        testcases = list(root.findall("testcase"))
+
+    for tc in testcases:
+        classname = tc.get("classname", "")
+        name = tc.get("name", "")
+        duration = tc.get("time", "0")
+
+        # Use classname::name as a stable identifier.
+        # pytest writes classname as the dotted module path (possibly including
+        # a test class), e.g. "packages.openai.tests.openai.test_chat_client"
+        # or "packages.openai.tests.openai.test_chat_client.TestClass".
+        nodeid = f"{classname}::{name}" if classname else name
+
+        # Extract module/file name from classname for display context.
+        # pytest writes classname as a dotted path. For tests inside a class
+        # it appends the class name, e.g.:
+        #   "packages.foundry.tests.foundry.test_foundry_embedding_client.TestFoundryEmbeddingIntegration"
+        # We want the file-level module: "test_foundry_embedding_client"
+        if classname:
+            parts = classname.rsplit(".", 2)
+            # If the last segment starts with uppercase it's a class name — take the one before it
+            if len(parts) >= 2 and parts[-1][0:1].isupper():
+                module = parts[-2]
+            else:
+                module = parts[-1]
+        else:
+            module = ""
+
+        # Determine status from child elements
+        failure = tc.find("failure")
+        error = tc.find("error")
+        skipped = tc.find("skipped")
+
+        if failure is not None:
+            status = "failed"
+            message = failure.get("message", "")
+        elif error is not None:
+            status = "error"
+            message = error.get("message", "")
+        elif skipped is not None:
+            # pytest marks xfail as <skipped type="pytest.xfail">
+            skip_type = skipped.get("type", "")
+            status = "xfailed" if "xfail" in skip_type else "skipped"
+            message = skipped.get("message", "")
+        else:
+            status = "passed"
+            message = ""
+
+        results.append({
+            "nodeid": nodeid,
+            "status": status,
+            "duration": duration,
+            "message": message,
+            "module": module,
+        })
+
+    return results
+
+
+# ---------------------------------------------------------------------------
+# Loading
+# ---------------------------------------------------------------------------
+
+
+def load_current_run(reports_dir: Path) -> dict[str, Any]:
+    """Load per-provider JUnit XML reports from the current CI run and merge.
+
+    Args:
+        reports_dir: Directory containing ``test-results-<provider>/`` subdirs.
+
+    Returns:
+        Merged run dict with ``timestamp``, ``summary``, ``results``.
+    """
+    combined_results: dict[str, dict[str, str]] = {}  # nodeid → {status, provider}
+
+    # actions/download-artifact creates: reports_dir/test-results-openai/pytest.xml
+    xml_files: list[tuple[str, Path]] = []
+    if reports_dir.is_dir():
+        for subdir in sorted(reports_dir.iterdir()):
+            if subdir.is_dir():
+                xml_file = subdir / "pytest.xml"
+                if xml_file.exists():
+                    xml_files.append((subdir.name, xml_file))
+
+    if not xml_files:
+        print(f"Warning: No pytest.xml files found in {reports_dir}")
+        return {
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "summary": {
+                "total": 0,
+                "passed": 0,
+                "failed": 0,
+                "skipped": 0,
+            },
+            "results": {},
+        }
+
+    for dir_name, xml_file in xml_files:
+        print(f"  Loading: {xml_file}")
+        provider = _derive_provider(dir_name)
+        tests = _parse_junit_xml(xml_file)
+        for test in tests:
+            combined_results[test["nodeid"]] = {
+                "status": test["status"],
+                "provider": provider,
+                "module": test.get("module", ""),
+            }
+
+    # Build summary counts using mutually exclusive status buckets.
+    # Errors are folded into the failed count for display purposes.
+    statuses = [r["status"] for r in combined_results.values()]
+    summary = {
+        "total": len(statuses),
+        "passed": statuses.count("passed"),
+        "failed": statuses.count("failed") + statuses.count("error"),
+        "skipped": statuses.count("skipped"),
+    }
+
+    return {
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "summary": summary,
+        "results": combined_results,
+    }
+
+
+def load_history(history_path: Path) -> list[dict[str, Any]]:
+    """Load previous run history from a cache file."""
+    if history_path.exists():
+        with open(history_path, encoding="utf-8") as f:
+            data = json.load(f)
+        runs = data.get("runs", [])
+        print(f"  Loaded {len(runs)} previous run(s) from history")
+        return runs
+    print("  No previous history found")
+    return []
+
+
+def save_history(history_path: Path, runs: list[dict[str, Any]]) -> None:
+    """Save run history, keeping only the last ``MAX_HISTORY`` entries."""
+    history_path.parent.mkdir(parents=True, exist_ok=True)
+    trimmed = runs[-MAX_HISTORY:]
+    with open(history_path, "w", encoding="utf-8") as f:
+        json.dump({"runs": trimmed}, f, indent=2)
+    print(f"  Saved {len(trimmed)} run(s) to history")
+
+
+# ---------------------------------------------------------------------------
+# Report generation
+# ---------------------------------------------------------------------------
+
+
+def _short_name(nodeid: str) -> str:
+    """Extract a short test name from a full nodeid.
+
+    ``packages.openai.tests.openai.test_openai_chat_client::test_integration_options``
+    → ``test_integration_options``
+    """
+    return nodeid.split("::")[-1] if "::" in nodeid else nodeid
+
+
+def generate_trend_report(runs: list[dict[str, Any]]) -> str:
+    """Generate a markdown trend report from run history."""
+    lines = [
+        "# 🔬 Flaky Test Report",
+        "",
+        f"*Generated: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}*",
+        "",
+    ]
+
+    # --- Overall status table (most recent first) ---
+    lines.append("## Overall Status (Last 5 Runs)")
+    lines.append("")
+    lines.append("| Run | Total | ✅ Passed | ❌ Failed | ⏭️ Skipped |")
+    lines.append("|-----|-------|-----------|-----------|------------|")
+
+    for run in reversed(runs):
+        s = run.get("summary", {})
+        total = s.get("total", 0)
+        label = _format_run_label(run["timestamp"])
+        lines.append(
+            f"| {label} "
+            f"| {total} "
+            f"| {s.get('passed', 0)}/{total} "
+            f"| {s.get('failed', 0)}/{total} "
+            f"| {s.get('skipped', 0)}/{total} |"
+        )
+
+    for _ in range(MAX_HISTORY - len(runs)):
+        lines.append("| N/A | N/A | N/A | N/A | N/A |")
+
+    lines.append("")
+
+    # --- Per-test results table ---
+    lines.append("## Per-Test Results")
+    lines.append("")
+
+    # Collect all test nodeids, providers, and modules across all runs
+    all_tests: dict[str, str] = {}  # nodeid → provider (from most recent run)
+    all_modules: dict[str, str] = {}  # nodeid → module (from most recent run)
+    for run in runs:
+        for nodeid, info in run.get("results", {}).items():
+            provider = info.get("provider", "Unknown") if isinstance(info, dict) else "Unknown"
+            module = info.get("module", "") if isinstance(info, dict) else ""
+            all_tests[nodeid] = provider
+            all_modules[nodeid] = module
+
+    if not all_tests:
+        lines.append("*No test results available.*")
+        return "\n".join(lines)
+
+    # Build header (most recent run first)
+    header = "| Test | File | Provider |"
+    separator = "|------|------|----------|"
+    for run in reversed(runs):
+        label = _format_run_label(run["timestamp"])
+        header += f" {label} |"
+        separator += "------------|"
+    for _ in range(MAX_HISTORY - len(runs)):
+        header += " N/A |"
+        separator += "-----|"
+
+    lines.append(header)
+    lines.append(separator)
+
+    # Sort by provider then test name
+    for nodeid in sorted(all_tests, key=lambda n: (all_tests[n], n)):
+        provider = all_tests[nodeid]
+        module = all_modules.get(nodeid, "")
+        short = _short_name(nodeid)
+        row = f"| `{short}` | `{module}` | {provider} |"
+
+        for run in reversed(runs):
+            result = run.get("results", {}).get(nodeid)
+            if result is None:
+                emoji = "N/A"
+            else:
+                status = result.get("status", "N/A") if isinstance(result, dict) else result
+                emoji = STATUS_EMOJI.get(status, "❓")
+            row += f" {emoji} |"
+
+        for _ in range(MAX_HISTORY - len(runs)):
+            row += " N/A |"
+
+        lines.append(row)
+
+    lines.append("")
+    lines.append("**Legend:** ✅ Passed · ❌ Failed · ⏭️ Skipped · ⚠️ Expected Failure (xfail) · N/A Not available")
+    lines.append("")
+
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# CLI entry point
+# ---------------------------------------------------------------------------
+
+
+def main() -> int:
+    if len(sys.argv) != 4:
+        print("Usage: python aggregate.py <reports-dir> <history-file> <output-file>")
+        return 1
+
+    reports_dir = Path(sys.argv[1])
+    history_path = Path(sys.argv[2])
+    output_path = Path(sys.argv[3])
+
+    print("Aggregating test results from JUnit XML...")
+
+    # Load current run's per-provider XML reports
+    print(f"\nLoading reports from {reports_dir}:")
+    current_run = load_current_run(reports_dir)
+    s = current_run.get("summary", {})
+    total = s.get("total", 0)
+    print(
+        f"  Current run: {s.get('passed', 0)} passed, "
+        f"{s.get('failed', 0)} failed, "
+        f"{s.get('skipped', 0)} skipped "
+        f"(total: {total})"
+    )
+
+    # Load history and append current run (skip empty runs to avoid polluting trend)
+    print(f"\nLoading history from {history_path}:")
+    runs = load_history(history_path)
+    if total > 0:
+        runs.append(current_run)
+        runs = runs[-MAX_HISTORY:]
+    else:
+        print("  Skipping history append (no test results in current run)")
+
+    # Save updated history
+    print(f"\nSaving history to {history_path}:")
+    save_history(history_path, runs)
+
+    # Generate trend report
+    print("\nGenerating trend report...")
+    report = generate_trend_report(runs)
+
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(report, encoding="utf-8")
+    print(f"Trend report written to {output_path}")
+
+    # Print the report to stdout for CI visibility
+    print("\n" + "=" * 80)
+    print(report)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())