From 3f2fc85a20930cfca3d44e2fb648f66f22cdd8e7 Mon Sep 17 00:00:00 2001 From: "cmeans-claude-dev[bot]" <272174644+cmeans-claude-dev[bot]@users.noreply.github.com> Date: Sun, 26 Apr 2026 21:36:09 -0500 Subject: [PATCH] fix(collector): _write_health OSError no longer escapes per-package isolation (closes #32) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surfaced by independent code-review pass against main `4e4148f` (2026-04-26): `collect()` called `_write_health(...)` with no try/except, and `__main__.main()` had no handler around `collector_fn(config)`. A failure inside the health-write step (disk full, output dir not writable, cross-device atomic-replace, etc.) propagated as an unhandled OSError traceback all the way out of the process, bypassing the structured per-package failure summary that operators rely on. This defeated the per-package isolation contract one level up: `_collect_one` already wraps `(CollectorError, OSError)` so one bad package can't kill the run, but the final health-write step had no equivalent guard. The fix: - Wrap the `_write_health(...)` call in `collect()` with `try: ... except OSError as e: logger.error(...)`. Surface the failure structurally via a new `CollectorResult.health_write_error: str | None` field (default `None`, additive change — existing call sites that construct `CollectorResult` keep working without modification). - Update `__main__.main()` to combine per-package failures and health-write failures into one structured exit message: `winnow-collect: 2 package(s) failed: foo, bar; health file write failed: [Errno 28] No space left on device`. Both modes produce non-zero exit; either alone produces a single-clause message. Tests added: - `test_collect_health_write_oserror_recorded_not_raised` — monkeypatches `collector_module.os.replace` to raise ENOSPC ONLY on the `_health.json` target (badge writes still work via the real call). Asserts the per-package outcome lands intact AND `result.health_write_error` is set, AND no exception escapes `collect()`. - `test_main_exits_nonzero_when_health_file_write_fails` — calls `__main__.main()` with a fake `collector_fn` that returns `health_write_error="[Errno 28] No space left on device"`; asserts SystemExit message contains the structured text. - `test_main_combines_package_and_health_failure_messages` — same shape with both per-package failures AND health_write_error set; asserts both clauses appear in the exit message. - Existing `test_collect_result_reports_no_failures_on_full_success` extended to assert `health_write_error is None` on the happy path. Verified locally: 59/59 pytest pass, ruff/format/mypy clean. --- CHANGELOG.md | 1 + src/pypi_winnow_downloads/__main__.py | 8 ++++- src/pypi_winnow_downloads/collector.py | 21 +++++++++++-- tests/test_collector.py | 40 ++++++++++++++++++++++++ tests/test_main.py | 43 ++++++++++++++++++++++++++ 5 files changed, 110 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fb25aae..4a98b46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- **collector: `_write_health` `OSError` no longer escapes per-package isolation.** A failure inside the health-file write step (disk full, output dir not writable, atomic-replace cross-device, etc.) used to propagate as an unhandled `OSError` traceback through `__main__.main()`, bypassing the structured `winnow-collect: N package(s) failed: …` exit message and producing a raw exit. The fix wraps `_write_health(...)` inside `collect()` with `try/except OSError`, logs the failure, and surfaces it via a new `CollectorResult.health_write_error: str | None` field (default `None`, backward-compat). `__main__.main()` now combines per-package failures and health-write failures into one structured exit message (`winnow-collect: 2 package(s) failed: foo, bar; health file write failed: [Errno 28] No space left on device`). Adds `test_collect_health_write_oserror_recorded_not_raised` (monkeypatches `os.replace` to raise `ENOSPC` only on the health file) and `test_main_combines_package_and_health_failure_messages` for regression coverage. Closes #32. - `README.md` line 11: canonicalized the [shields.io](https://shields.io/badges/endpoint-badge) doc link to match the form already used at line 96. Both references now point at the same canonical URL instead of relying on `/endpoint` redirecting to `/badges/endpoint-badge`. Closes #16. - `deploy/README.md` gains an `## Alternative HTTPS exposure: diff --git a/src/pypi_winnow_downloads/__main__.py b/src/pypi_winnow_downloads/__main__.py index f67678c..1ad7255 100644 --- a/src/pypi_winnow_downloads/__main__.py +++ b/src/pypi_winnow_downloads/__main__.py @@ -37,9 +37,15 @@ def main( result = collector_fn(config) + problems: list[str] = [] if result.failures: names = ", ".join(f.package for f in result.failures) - sys.exit(f"winnow-collect: {len(result.failures)} package(s) failed: {names}") + problems.append(f"{len(result.failures)} package(s) failed: {names}") + if result.health_write_error: + problems.append(f"health file write failed: {result.health_write_error}") + + if problems: + sys.exit(f"winnow-collect: {'; '.join(problems)}") if __name__ == "__main__": diff --git a/src/pypi_winnow_downloads/collector.py b/src/pypi_winnow_downloads/collector.py index 1eeaccb..a5fe3ef 100644 --- a/src/pypi_winnow_downloads/collector.py +++ b/src/pypi_winnow_downloads/collector.py @@ -85,6 +85,7 @@ class CollectorResult: started: datetime finished: datetime outcomes: tuple[PackageOutcome, ...] + health_write_error: str | None = None @property def failures(self) -> tuple[PackageOutcome, ...]: @@ -207,8 +208,24 @@ def collect( outcomes.append(outcome) finished = clock() - _write_health(config.service.output_dir, started, finished, outcomes) - return CollectorResult(started=started, finished=finished, outcomes=tuple(outcomes)) + health_write_error: str | None = None + try: + _write_health(config.service.output_dir, started, finished, outcomes) + except OSError as e: + # Mirror the per-package isolation contract one level up: a write + # failure here (disk full, output dir not writable, cross-device + # atomic-replace, etc.) must not propagate as a raw traceback that + # bypasses the structured exit path in __main__. Surface via + # CollectorResult.health_write_error so the caller can fold it + # into the exit message. + logger.error("collector: failed to write _health.json: %s", e) + health_write_error = str(e) + return CollectorResult( + started=started, + finished=finished, + outcomes=tuple(outcomes), + health_write_error=health_write_error, + ) def _collect_one( diff --git a/tests/test_collector.py b/tests/test_collector.py index 5a09689..ce2f0b8 100644 --- a/tests/test_collector.py +++ b/tests/test_collector.py @@ -628,3 +628,43 @@ def test_collect_result_reports_no_failures_on_full_success(tmp_path: Path) -> N assert len(result.outcomes) == 1 assert result.outcomes[0].package == "mcp-clipboard" assert result.outcomes[0].count == 142 + assert result.health_write_error is None + + +def test_collect_health_write_oserror_recorded_not_raised( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """A failure inside _write_health (disk full, perms, cross-device replace, + etc.) must surface via CollectorResult.health_write_error rather than + propagating as a raw OSError that bypasses the structured exit path in + __main__. Per-package outcomes must still be returned intact so the + operator can see what completed before the health-write step failed. + """ + config = _make_config(tmp_path, [PackageConfig(name="mcp-clipboard", window_days=30)]) + runner = _fake_runner_for({"mcp-clipboard": 142}) + + # Force os.replace to raise OSError as the final step of _write_health. + # The badge-write path uses Path.replace via badge.write_badge, so target + # the collector module's os.replace specifically. + real_replace = collector_module.os.replace + + def raising_replace(src: object, dst: object) -> None: + # Only raise for the health file; let badge writes use the real call. + if str(dst).endswith("_health.json"): + raise OSError(28, "No space left on device") + real_replace(src, dst) + + monkeypatch.setattr(collector_module.os, "replace", raising_replace) + + # Should NOT raise. + result = collect(config, runner=runner) + + # Per-package outcome still recorded. + assert len(result.outcomes) == 1 + assert result.outcomes[0].package == "mcp-clipboard" + assert result.outcomes[0].count == 142 + assert result.failures == () + + # Health-write failure surfaces structurally. + assert result.health_write_error is not None + assert "No space left on device" in result.health_write_error diff --git a/tests/test_main.py b/tests/test_main.py index 2a5b35c..25801f2 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -84,3 +84,46 @@ def failing_collect(_cfg): with pytest.raises(SystemExit, match="broken"): main([f"--config={cfg}"], collector_fn=failing_collect) + + +def test_main_exits_nonzero_when_health_file_write_fails(tmp_path: Path) -> None: + """A health-write failure must produce a structured exit message via + CollectorResult.health_write_error rather than letting an OSError escape + through to a raw traceback. Bug class from issue #32. + """ + cfg = _valid_config(tmp_path) + + def health_failed_collect(_cfg): + now = datetime(2026, 4, 24, 21, 0, 0, tzinfo=UTC) + return CollectorResult( + started=now, + finished=now, + outcomes=(PackageOutcome(package="ok-pkg", window_days=30, count=99),), + health_write_error="[Errno 28] No space left on device", + ) + + with pytest.raises(SystemExit, match=r"health file write failed.*No space left"): + main([f"--config={cfg}"], collector_fn=health_failed_collect) + + +def test_main_combines_package_and_health_failure_messages(tmp_path: Path) -> None: + """When both a per-package failure AND a health-write failure occur, the + exit message reports both so the operator sees the full picture. + """ + cfg = _valid_config(tmp_path) + + def both_failed_collect(_cfg): + now = datetime(2026, 4, 24, 21, 0, 0, tzinfo=UTC) + return CollectorResult( + started=now, + finished=now, + outcomes=( + PackageOutcome(package="broken-pkg", window_days=30, count=None, error="boom"), + ), + health_write_error="[Errno 13] Permission denied", + ) + + with pytest.raises( + SystemExit, match=r"broken-pkg.*health file write failed.*Permission denied" + ): + main([f"--config={cfg}"], collector_fn=both_failed_collect)