astral-sh · AlexWaygood · Mar 22, 2026 · Mar 22, 2026
diff --git a/src/ecosystem_analyzer/diff.py b/src/ecosystem_analyzer/diff.py
@@ -680,8 +680,16 @@ def _generate_text_diff(self, old_text: str, new_text: str) -> list[str]:
         diff = difflib.ndiff(old_text.splitlines(), new_text.splitlines())
         return list(diff)
 
-    def _calculate_statistics(self) -> DiffStatistics:
-        """Calculate statistics about added, removed, and changed diagnostics."""
+    def _calculate_statistics(
+        self, *, exclude_flaky: bool = False
+    ) -> DiffStatistics:
+        """Calculate statistics about added, removed, and changed diagnostics.
+
+        If *exclude_flaky* is True, flaky diagnostic diffs are excluded from
+        the per-lint and per-project tables as well as the totals.  Stable
+        diagnostics from projects that happen to also have flaky data are
+        still counted.
+        """
         # Intermediate dictionaries (local variables)
         added_by_lint: dict[str, int] = {}
         removed_by_lint: dict[str, int] = {}
@@ -795,44 +803,46 @@ def _calculate_statistics(self) -> DiffStatistics:
                             removed_by_project.get(project_name, 0) + 1
                         )
 
-            # Count flaky location diffs (each location = 1 diagnostic)
-            flaky_diffs = project.get("flaky_diffs", {})
-            for loc in flaky_diffs.get("added", []):
-                total_added += 1
-                # Use the first variant's lint_name as representative
-                lint_name = (
-                    loc["variants"][0]["diagnostic"]["lint_name"]
-                    if loc["variants"]
-                    else "unknown"
-                )
-                added_by_lint[lint_name] = added_by_lint.get(lint_name, 0) + 1
-                added_by_project[project_name] = (
-                    added_by_project.get(project_name, 0) + 1
-                )
+            # Count flaky location diffs (each location = 1 diagnostic).
+            # When exclude_flaky is set, skip these entirely.
+            if not exclude_flaky:
+                flaky_diffs = project.get("flaky_diffs", {})
+                for loc in flaky_diffs.get("added", []):
+                    total_added += 1
+                    # Use the first variant's lint_name as representative
+                    lint_name = (
+                        loc["variants"][0]["diagnostic"]["lint_name"]
+                        if loc["variants"]
+                        else "unknown"
+                    )
+                    added_by_lint[lint_name] = added_by_lint.get(lint_name, 0) + 1
+                    added_by_project[project_name] = (
+                        added_by_project.get(project_name, 0) + 1
+                    )
 
-            for loc in flaky_diffs.get("removed", []):
-                total_removed += 1
-                lint_name = (
-                    loc["variants"][0]["diagnostic"]["lint_name"]
-                    if loc["variants"]
-                    else "unknown"
-                )
-                removed_by_lint[lint_name] = removed_by_lint.get(lint_name, 0) + 1
-                removed_by_project[project_name] = (
-                    removed_by_project.get(project_name, 0) + 1
-                )
+                for loc in flaky_diffs.get("removed", []):
+                    total_removed += 1
+                    lint_name = (
+                        loc["variants"][0]["diagnostic"]["lint_name"]
+                        if loc["variants"]
+                        else "unknown"
+                    )
+                    removed_by_lint[lint_name] = removed_by_lint.get(lint_name, 0) + 1
+                    removed_by_project[project_name] = (
+                        removed_by_project.get(project_name, 0) + 1
+                    )
 
-            for change in flaky_diffs.get("changed", []):
-                total_changed += 1
-                lint_name = (
-                    change["old"]["variants"][0]["diagnostic"]["lint_name"]
-                    if change["old"]["variants"]
-                    else "unknown"
-                )
-                changed_by_lint[lint_name] = changed_by_lint.get(lint_name, 0) + 1
-                changed_by_project[project_name] = (
-                    changed_by_project.get(project_name, 0) + 1
-                )
+                for change in flaky_diffs.get("changed", []):
+                    total_changed += 1
+                    lint_name = (
+                        change["old"]["variants"][0]["diagnostic"]["lint_name"]
+                        if change["old"]["variants"]
+                        else "unknown"
+                    )
+                    changed_by_lint[lint_name] = changed_by_lint.get(lint_name, 0) + 1
+                    changed_by_project[project_name] = (
+                        changed_by_project.get(project_name, 0) + 1
+                    )
 
         # Create merged lint breakdown sorted by total absolute change (descending)
         all_lints = (
@@ -1165,8 +1175,9 @@ def render_statistics_markdown(
         *,
         inline_threshold: int = 15,
         max_raw_diff_lines: int = 100,
+        exclude_flaky: bool = False,
     ) -> str:
-        statistics = self._calculate_statistics()
+        statistics = self._calculate_statistics(exclude_flaky=exclude_flaky)
         failed_projects = self.diffs.get("failed_projects", [])
 
         markdown_content = ""
@@ -1235,10 +1246,24 @@ def render_statistics_markdown(
         )
         raw_diff_lines = self._render_raw_diff_sections(raw_diff_sections)
 
-        if omitted_flaky_projects:
+        # Check whether any flaky diagnostic diffs were excluded from the
+        # summary table so we can tell the reader.
+        excluded_flaky_from_table = False
+        if exclude_flaky:
+            for project in self.diffs["modified_projects"]:
+                flaky_diffs = project.get("flaky_diffs", {})
+                if (
+                    flaky_diffs.get("added")
+                    or flaky_diffs.get("removed")
+                    or flaky_diffs.get("changed")
+                ):
+                    excluded_flaky_from_table = True
+                    break
+
+        if omitted_flaky_projects or excluded_flaky_from_table:
             markdown_content += (
                 "\n\n_Changes in flaky projects detected. "
-                "Raw diff output excludes flaky projects; see the HTML report for details._"
+                "This PR summary excludes flaky changes; see the HTML report for details._"
             )
 
         if not raw_diff_lines:

diff --git a/src/ecosystem_analyzer/main.py b/src/ecosystem_analyzer/main.py
@@ -520,6 +520,12 @@ def generate_timing_diff(
     show_default=True,
     help="Exit with a non-zero status if a project regresses from exit code 0/1 to another exit code or a timeout.",
 )
+@click.option(
+    "--exclude-flaky/--no-exclude-flaky",
+    default=False,
+    show_default=True,
+    help="Exclude flaky diagnostics from the summary table.",
+)
 def generate_diff_statistics(
     old_file: str,
     new_file: str,
@@ -529,6 +535,7 @@ def generate_diff_statistics(
     old_name: str | None,
     new_name: str | None,
     fail_on_new_abnormal_exits: bool,
+    exclude_flaky: bool,
 ) -> None:
     """
     Generate a Markdown statistics report of diagnostic differences between two JSON files.
@@ -540,6 +547,7 @@ def generate_diff_statistics(
     markdown_content = diff.render_statistics_markdown(
         inline_threshold=inline_threshold,
         max_raw_diff_lines=max_raw_diff_lines,
+        exclude_flaky=exclude_flaky,
     )
 
     with open(output, "w") as f:

diff --git a/tests/test_json_roundtrip.py b/tests/test_json_roundtrip.py
@@ -395,6 +395,98 @@ def test_statistics_markdown_includes_large_timing_changes(self):
         assert "| `slow-project` | 10.00s | 15.00s | +50% |" in markdown
         assert "| `very-fast-project` | 10.00s | 5.00s | -50% |" in markdown
 
+    def test_exclude_flaky_omits_only_flaky_diffs_from_statistics(self):
+        """With exclude_flaky, flaky diffs are excluded but stable diffs from the same project are kept."""
+        stable_diag = {
+            "level": "error",
+            "lint_name": "some-lint",
+            "path": "a.py",
+            "line": 1,
+            "column": 1,
+            "message": "stable",
+        }
+        new_stable_diag = {
+            "level": "error",
+            "lint_name": "some-lint",
+            "path": "a.py",
+            "line": 2,
+            "column": 1,
+            "message": "new stable diagnostic",
+        }
+        # Flaky location only on new side (genuinely new flaky location)
+        new_flaky = [
+            _make_flaky_loc(
+                "c.py", 50, 1, [_make_variant("c.py", 50, 1, "flaky variant", count=2)]
+            )
+        ]
+
+        old_data = {"outputs": [_make_output("proj", [stable_diag])]}
+        new_data = {
+            "outputs": [
+                _make_output(
+                    "proj", [stable_diag, new_stable_diag], new_flaky, flaky_runs=3
+                )
+            ]
+        }
+
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f1:
+            json.dump(old_data, f1)
+            old_path = f1.name
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f2:
+            json.dump(new_data, f2)
+            new_path = f2.name
+
+        diff = DiagnosticDiff(old_path, new_path)
+
+        # Without exclude_flaky: 1 stable added + 1 flaky added = 2
+        stats = diff._calculate_statistics(exclude_flaky=False)
+        assert stats["total_added"] == 2
+
+        # With exclude_flaky: only the stable diagnostic is counted
+        stats_excl = diff._calculate_statistics(exclude_flaky=True)
+        assert stats_excl["total_added"] == 1
+        lint_entry = next(
+            e for e in stats_excl["merged_by_lint"] if e["lint_name"] == "some-lint"
+        )
+        assert lint_entry["added"] == 1
+
+    def test_exclude_flaky_keeps_stable_diffs_from_flaky_projects(self):
+        """Stable diagnostic changes from a project with flaky_runs > 1 are still counted."""
+        diag = {
+            "level": "error",
+            "lint_name": "some-lint",
+            "path": "a.py",
+            "line": 1,
+            "column": 1,
+            "message": "msg",
+        }
+        new_diag = {
+            "level": "error",
+            "lint_name": "some-lint",
+            "path": "a.py",
+            "line": 2,
+            "column": 1,
+            "message": "new msg",
+        }
+
+        # Project has flaky_runs=5 but no flaky_diagnostics — the stable
+        # diagnostic change should still be counted even with exclude_flaky.
+        old_data = {"outputs": [_make_output("proj", [diag], flaky_runs=5)]}
+        new_data = {
+            "outputs": [_make_output("proj", [diag, new_diag], flaky_runs=5)]
+        }
+
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f1:
+            json.dump(old_data, f1)
+            old_path = f1.name
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f2:
+            json.dump(new_data, f2)
+            new_path = f2.name
+
+        diff = DiagnosticDiff(old_path, new_path)
+        stats = diff._calculate_statistics(exclude_flaky=True)
+        assert stats["total_added"] == 1
+
     def test_statistics_markdown_omits_small_or_failed_timing_changes(self):
         old_data = {
             "outputs": [