diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 467c175571..5c889f42dc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ ci: # scripts with no CI counterpart, and letting pre-commit.ci enforce # them closes the gap where a PR from a contributor who skipped local # hooks would otherwise introduce a regression unchecked. - skip: [commitizen, gitleaks, hadolint-docker, caddy-validate, no-em-dashes, no-redundant-timeout, mypy, pytest-unit, golangci-lint, go-vet, go-test, eslint-web, check-push-rebased, check-single-migration-per-pr, check-no-modify-migration, forbidden-literals, persistence-boundary, provider-complete-chokepoint, no-new-logger-exception-str-exc, orphan-fixtures, doc-drift-counts, boundary-typed, setting-to-startup-trace] + skip: [commitizen, gitleaks, hadolint-docker, caddy-validate, no-em-dashes, no-redundant-timeout, mypy, pytest-unit, golangci-lint, go-vet, go-test, eslint-web, check-push-rebased, check-single-migration-per-pr, check-no-modify-migration, forbidden-literals, persistence-boundary, provider-complete-chokepoint, no-new-logger-exception-str-exc, orphan-fixtures, doc-drift-counts, boundary-typed, setting-to-startup-trace, domain-error-hierarchy] default_install_hook_types: [pre-commit, commit-msg, pre-push] @@ -274,6 +274,18 @@ repos: pass_filenames: false stages: [pre-push] + - id: domain-error-hierarchy + name: domain-error-hierarchy gate (DomainError-rooted exception classes) + entry: uv run python scripts/check_domain_error_hierarchy.py + language: system + # Trigger on any change to src/synthorg Python files, the gate + # script itself, or its baseline so a PR that introduces a new + # plain-Exception class (or weakens the gate by editing the + # script / baseline) cannot bypass the check. + files: ^(src/synthorg/.*\.py|scripts/check_domain_error_hierarchy\.py|scripts/domain_error_hierarchy_baseline\.txt|\.pre-commit-config\.yaml)$ + pass_filenames: false + stages: [pre-commit, pre-push] + - id: request-dto-forbid-extra name: request-DTO extra="forbid" gate (src/synthorg/api) entry: uv run python scripts/check_request_dto_forbid_extra.py diff --git a/CLAUDE.md b/CLAUDE.md index a8dca848e0..ad6e186fd0 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -135,7 +135,7 @@ For every mutable setting: **DB > env (`SYNTHORG__`) > YAML > code defa - **HTML parsing (SEC-1)**: never call `lxml.html.fromstring` on attacker input; use `HTMLParseGuard`. See [sec-prompt-safety.md](docs/reference/sec-prompt-safety.md). - **Pluggable subsystems**: protocol + strategy + factory + config discriminator with safe defaults. Services (which wrap repositories) are a distinct pattern. See [pluggable-subsystems.md](docs/reference/pluggable-subsystems.md). - **Sizes**: line length 88 (ruff); functions <50 lines; files <800 lines. -- **Errors**: handle explicitly, never swallow. Domain error families register a base-class entry in `EXCEPTION_HANDLERS` (`src/synthorg/api/exception_handlers.py`). Use `Error` inheriting from `DomainError`; bare `Exception` / `RuntimeError` at domain boundaries is forbidden. See [errors.md](docs/reference/errors.md) + `src/synthorg/core/domain_errors.py`. +- **Errors**: handle explicitly, never swallow. Domain error families register a base-class entry in `EXCEPTION_HANDLERS` (`src/synthorg/api/exception_handlers.py`). Use `Error` inheriting from `DomainError`; any of `Exception` / `RuntimeError` / `LookupError` / `PermissionError` / `ValueError` / `TypeError` / `KeyError` / `IndexError` / `AttributeError` / `OSError` / `IOError` as a direct base in `src/synthorg/` is forbidden. Enforced by `scripts/check_domain_error_hierarchy.py` (pre-push); per-line opt-out: `# lint-allow: domain-error-hierarchy -- `. See [errors.md](docs/reference/errors.md) + `src/synthorg/core/domain_errors.py`. - **Repository CRUD**: `save(entity) -> None` (idempotent), `get(id) -> Entity | None`, `delete(id) -> bool`, `list_items(...) -> tuple[Entity, ...]`, `query(...) -> tuple[Entity, ...]`. Query methods always return tuples. See [conventions.md](docs/reference/conventions.md) §14. - **Validate** at system boundaries (user input, external APIs, config files). - **Datetime in persistence**: `parse_iso_utc` / `format_iso_utc` from `synthorg.persistence._shared` (both reject naive); `normalize_utc` for relaxed coercion on already-typed `datetime`. diff --git a/docs/reference/conventions.md b/docs/reference/conventions.md index 5c318b9b67..25da67cad3 100644 --- a/docs/reference/conventions.md +++ b/docs/reference/conventions.md @@ -99,6 +99,16 @@ Each domain owns `errors.py` with a base error class carrying change. The HTTP exception handler keys off the base class so a new subclass automatically inherits the correct status mapping. +Enforced at pre-push by `scripts/check_domain_error_hierarchy.py`, +which AST-walks every `class .*` definition under `src/synthorg/` and +fails the build if a class inherits directly from `Exception` / +`RuntimeError` / `LookupError` / `PermissionError` / `ValueError` / +`TypeError` / `KeyError` / `IndexError` / `AttributeError` / `OSError` +/ `IOError` without reaching `DomainError` via another base. Per-line +opt-out: `# lint-allow: domain-error-hierarchy -- `. See +[errors.md](errors.md#domain-error-hierarchy-gate) for the full gate +contract. + References: * `src/synthorg/budget/errors.py`: `BudgetExhaustedError` family. diff --git a/docs/reference/errors.md b/docs/reference/errors.md index a2f8b64a8c..39463dec39 100644 --- a/docs/reference/errors.md +++ b/docs/reference/errors.md @@ -175,6 +175,32 @@ When introducing a new domain error family: 4. Add tests in `tests/unit/api/test_exception_handlers.py` covering each branch and a regression test for the catch-all. +## Domain-error-hierarchy gate + +`scripts/check_domain_error_hierarchy.py` enforces the rule at pre-push +and in CI: every class definition under `src/synthorg/` whose direct +base is one of `Exception` / `RuntimeError` / `LookupError` / +`PermissionError` / `ValueError` / `TypeError` / `KeyError` / +`IndexError` / `AttributeError` / `OSError` / `IOError` is a violation +unless the class itself reaches `DomainError` via another base. + +Only the *root* of a stdlib-rooted chain is flagged; migrating the root +to `DomainError` automatically corrects every descendant. + +Per-line opt-out: + +```python +class TsaError(Exception): # lint-allow: domain-error-hierarchy -- RFC 3161 internals; observability stays stdlib-rooted + ... +``` + +The justification after `--` is mandatory and must be non-empty. The +gate also accepts a frozen baseline file +(`scripts/domain_error_hierarchy_baseline.txt`) listing pre-existing +violations a rollout has not yet reached. The baseline shrinks +monotonically: any entry that no longer maps to a real violation is +reported as drift, so the file cannot harbour stale rows. + ## Further reading - [Design: security](../design/security.md): the SEC-1 rules behind the categories diff --git a/scripts/check_domain_error_hierarchy.py b/scripts/check_domain_error_hierarchy.py new file mode 100644 index 0000000000..ce51ad1c71 --- /dev/null +++ b/scripts/check_domain_error_hierarchy.py @@ -0,0 +1,630 @@ +#!/usr/bin/env python3 +"""Pre-push / CI domain-error-hierarchy gate. + +Enforces the rule: every exception class defined in ``src/synthorg/`` +inherits from :class:`synthorg.core.domain_errors.DomainError` (directly +or via an intermediate that itself reaches ``DomainError``). + +Plain stdlib bases (``Exception`` / ``RuntimeError`` / ``LookupError`` / +``PermissionError`` / ``ValueError`` / ``TypeError`` / ``KeyError`` / +``IndexError`` / ``AttributeError`` / ``OSError`` / ``IOError``) are +forbidden as a *direct* base for any class in ``src/synthorg/``. Only +the root of a bad inheritance chain is flagged; migrating the root +automatically fixes every transitive descendant. + +Sanctioned exceptions (e.g. the ``DomainError`` root itself, RFC 3161 +internals) opt out via a per-line trailing comment:: + + class Foo(Exception): # lint-allow: domain-error-hierarchy -- + +The justification after ``--`` is required and must be non-empty. + +The script also accepts a frozen baseline file +(``scripts/domain_error_hierarchy_baseline.txt``) listing pre-existing +violations the gate should ignore. Entries are formatted +``::``. A baseline entry that no longer +maps to a real violation is reported as drift -- the baseline must +shrink monotonically. + +Usage:: + + python scripts/check_domain_error_hierarchy.py + python scripts/check_domain_error_hierarchy.py --no-baseline + python scripts/check_domain_error_hierarchy.py --update-baseline +""" + +import argparse +import ast +import os +import re +import subprocess +import sys +from pathlib import Path +from typing import Final + +# ── Constants ──────────────────────────────────────────────────── + +FORBIDDEN_BASES: Final[frozenset[str]] = frozenset( + { + "Exception", + "RuntimeError", + "LookupError", + "PermissionError", + "ValueError", + "TypeError", + "KeyError", + "IndexError", + "AttributeError", + "OSError", + "IOError", + } +) + +SUPPRESSION_MARKER: Final[str] = "lint-allow: domain-error-hierarchy" + +_SUPPRESSION_RE: Final[re.Pattern[str]] = re.compile( + r"\blint-allow:\s*domain-error-hierarchy\s*--\s*\S", +) + +_DOMAIN_ERROR_MODULE: Final[str] = "synthorg.core.domain_errors" +_DOMAIN_ERROR_NAME: Final[str] = "DomainError" +_FORBIDDEN_PSEUDO_MODULE: Final[str] = "__forbidden__" + +_BASELINE_REL_PATH: Final[str] = "scripts/domain_error_hierarchy_baseline.txt" + +_BASELINE_HEADER = """\ +# Frozen baseline of pre-existing plain-Exception class definitions in +# src/synthorg/. Each line is `path:lineno:class_name` (POSIX path, +# 1-indexed line) sorted in deterministic order. +# +# scripts/check_domain_error_hierarchy.py reads this file to suppress +# violations at these exact entries. New violations NOT in this list +# will fail the pre-push hook. +# +# Regenerate (rare; requires explicit user approval) with: +# uv run python scripts/check_domain_error_hierarchy.py --update-baseline +""" + +_BASELINE_ENTRY_PATTERN: Final[re.Pattern[str]] = re.compile(r"^.+:\d+:\w+$") + +_SCAN_REL_DEFAULT: Final[str] = "src/synthorg" + + +# ── Suppression marker ─────────────────────────────────────────── + + +def _line_has_trailing_marker(line: str) -> bool: + """Return True iff *line* carries the marker as a trailing comment. + + The marker must be followed by ``--`` and non-empty justification + text -- ``# lint-allow: domain-error-hierarchy -- TSA RFC 3161 client``. + + Regex-based on purpose: ruff-format may split a long class header + across multiple physical lines, leaving the marker on a continuation + line that ``tokenize.generate_tokens`` can't parse in isolation + (``): # marker`` is not a valid Python statement). The full + comment-shape regex below works on any line fragment. + """ + return bool(_SUPPRESSION_RE.search(line)) + + +# ── Module path resolver ───────────────────────────────────────── + + +def _module_dotted_for_rel(rel: str) -> str: + """Return the dotted module path for a repo-relative POSIX path. + + ``src/synthorg/foo/bar.py`` -> ``synthorg.foo.bar`` + ``src/synthorg/foo/__init__.py`` -> ``synthorg.foo`` + ``src/synthorg/__init__.py`` -> ``synthorg`` + """ + posix = rel.replace("\\", "/") + posix = posix.removeprefix("src/") + posix = posix.removesuffix(".py") + parts = posix.split("/") + if parts and parts[-1] == "__init__": + parts = parts[:-1] + return ".".join(parts) + + +# ── AST resolution ─────────────────────────────────────────────── + + +def _build_alias_map(tree: ast.Module) -> dict[str, tuple[str, str | None]]: + """Return ``{local_name: (module_dotted, original_name_or_None)}``. + + ``original_name`` is ``None`` when the local name aliases a whole + module (``import X as Y`` or ``import X``); otherwise it is the + original symbol name imported from the module. + + Module-level imports only -- function-scoped imports are ignored. + """ + aliases: dict[str, tuple[str, str | None]] = {} + for node in tree.body: + if isinstance(node, ast.ImportFrom) and node.module: + for alias in node.names: + local = alias.asname or alias.name + aliases[local] = (node.module, alias.name) + elif isinstance(node, ast.Import): + for alias in node.names: + if alias.asname: + aliases[alias.asname] = (alias.name, None) + else: + head = alias.name.split(".")[0] + aliases.setdefault(head, (head, None)) + return aliases + + +def _resolve_base( # noqa: C901, PLR0911 -- AST base-class resolver covers Name, Attribute (head + dotted-tail), forbidden stdlib, alias-as-module, and aliased-symbol cases; collapsing the branches would obscure each case + node: ast.expr, + alias_map: dict[str, tuple[str, str | None]], + current_module: str, +) -> tuple[str, str] | None: + """Resolve a base-class AST node to ``(module, name)`` or ``None``. + + Returns: + ``(_FORBIDDEN_PSEUDO_MODULE, base_name)`` for forbidden stdlib + names that aren't shadowed by a local import. + ``(module_dotted, name)`` for cross-module references resolved + through *alias_map*. + ``(current_module, name)`` for local-module references with no + matching import. + ``None`` for unsupported expression forms (subscript, call, ...). + + Edge case: deeply chained attribute access through a module alias + (``import x.y as z; class Foo(z.a.b.C):`` -> ``("x.y.a.b", "C")``) + constructs a synthetic dotted module path that may not match an + indexed entry. The closure pass treats unmatched bases as + "non-rooted, non-forbidden" -- the right outcome: such a class + surfaces as unresolved but is not flagged unless one of its other + bases is also forbidden. The codebase doesn't use this shape; the + branch exists for completeness and the index miss is the safe + default. + """ + if isinstance(node, ast.Name): + if node.id in alias_map: + module, original = alias_map[node.id] + if original is None: + return None + return (module, original) + if node.id in FORBIDDEN_BASES: + return (_FORBIDDEN_PSEUDO_MODULE, node.id) + return (current_module, node.id) + if isinstance(node, ast.Attribute): + attrs: list[str] = [] + cur: ast.expr = node + while isinstance(cur, ast.Attribute): + attrs.append(cur.attr) + cur = cur.value + if not isinstance(cur, ast.Name): + return None + head = cur.id + attrs.reverse() + if head not in alias_map: + return None + module, original = alias_map[head] + if original is None: + if len(attrs) == 1: + return (module, attrs[0]) + return (module + "." + ".".join(attrs[:-1]), attrs[-1]) + return None + return None + + +# ── Scanning ───────────────────────────────────────────────────── + + +class _ClassEntry: + """Indexed metadata for one ``class`` definition in the source tree.""" + + __slots__ = ("bases", "lineno", "module", "name", "rel", "suppressed") + + def __init__( # noqa: PLR0913 -- frozen dataclass-style init; keyword-only would clutter every callsite + self, + rel: str, + lineno: int, + module: str, + name: str, + bases: list[tuple[str, str] | None], + *, + suppressed: bool, + ) -> None: + self.rel = rel + self.lineno = lineno + self.module = module + self.name = name + self.bases = bases + self.suppressed = suppressed + + +def _index_file( + path: Path, + rel: str, +) -> tuple[list[_ClassEntry], str | None]: + """Return every class-definition entry in *path* (and parse error, if any).""" + try: + text = path.read_text(encoding="utf-8") + except (OSError, UnicodeDecodeError) as exc: + return [], f"{rel}:0: unable to read file: {exc}" + try: + tree = ast.parse(text, filename=str(path)) + except SyntaxError as exc: + return [], f"{rel}:{exc.lineno or 0}: unable to parse file: {exc.msg}" + lines = text.splitlines() + alias_map = _build_alias_map(tree) + module = _module_dotted_for_rel(rel) + entries: list[_ClassEntry] = [] + for node in ast.walk(tree): + if not isinstance(node, ast.ClassDef): + continue + bases = [_resolve_base(b, alias_map, module) for b in node.bases] + suppressed = _class_def_suppressed(node, lines) + entries.append( + _ClassEntry( + rel=rel, + lineno=node.lineno, + module=module, + name=node.name, + bases=bases, + suppressed=suppressed, + ) + ) + return entries, None + + +def _class_def_suppressed(node: ast.ClassDef, lines: list[str]) -> bool: + """Return True iff any line of the class header carries the marker. + + The class header spans from the ``class`` keyword line through the + first statement of the body; the marker may sit on any of those + lines. This mirrors check_persistence_boundary's two-line rule. + """ + start = node.lineno + end = node.body[0].lineno if node.body else start + for lineno in range(start, end + 1): + if 1 <= lineno <= len(lines) and _line_has_trailing_marker(lines[lineno - 1]): + return True + return False + + +def _compute_rooted( + entries: list[_ClassEntry], +) -> set[tuple[str, str]]: + """Return the closure of (module, class_name) pairs reaching DomainError.""" + rooted: set[tuple[str, str]] = {(_DOMAIN_ERROR_MODULE, _DOMAIN_ERROR_NAME)} + by_key: dict[tuple[str, str], _ClassEntry] = { + (e.module, e.name): e for e in entries + } + changed = True + while changed: + changed = False + for key, entry in by_key.items(): + if key in rooted: + continue + for base in entry.bases: + if base is None: + continue + if base in rooted: + rooted.add(key) + changed = True + break + return rooted + + +def _has_forbidden_direct_base(entry: _ClassEntry) -> bool: + """Return True iff *entry* directly inherits a forbidden stdlib base.""" + return any( + base is not None and base[0] == _FORBIDDEN_PSEUDO_MODULE for base in entry.bases + ) + + +def _format_baseline_entry(rel: str, lineno: int, class_name: str) -> str: + """Return the canonical baseline-entry string.""" + return f"{rel}:{lineno}:{class_name}" + + +def _git_tracked_python_files( + abs_root: Path, + project_root: Path, +) -> list[tuple[Path, str]]: + """Return every tracked ``*.py`` file under *abs_root* as ``(abs, rel)``.""" + rel_root = abs_root.relative_to(project_root).as_posix() or "." + try: + result = subprocess.run( + ["git", "ls-files", "-z", "--", f"{rel_root}/*.py"], + check=True, + capture_output=True, + cwd=project_root, + ) + except subprocess.CalledProcessError, FileNotFoundError: + return [ + (p, p.relative_to(project_root).as_posix()) + for p in sorted(abs_root.rglob("*.py")) + ] + out = result.stdout.decode("utf-8", errors="replace") + paths = [p for p in out.split("\0") if p] + return [((project_root / rel_path), rel_path) for rel_path in paths] + + +def _scan_tree( # noqa: C901 -- two-pass closure + violation collection + drift detection + project_root: Path, + scan_root: Path, + baseline: set[str] | None = None, +) -> list[str]: + """Run the full scan; return a sorted list of violation messages. + + Args: + project_root: Repo root used to compute relative paths. + scan_root: Directory under *project_root* to walk. + baseline: When provided, suppresses entries whose + ``path:lineno:class_name`` key matches. ``None`` means no + suppression. An empty set is the same as ``None`` for + scanning, but baseline drift is only reported when a + non-empty set is supplied. + """ + files = _git_tracked_python_files(scan_root, project_root) + all_entries: list[_ClassEntry] = [] + parse_errors: list[str] = [] + for path, rel in files: + entries, error = _index_file(path, rel) + if error is not None: + parse_errors.append(error) + continue + all_entries.extend(entries) + rooted = _compute_rooted(all_entries) + violations: list[tuple[str, int, str, str]] = [] + seen_keys: set[str] = set() + for entry in all_entries: + if entry.suppressed: + continue + if not _has_forbidden_direct_base(entry): + continue + if (entry.module, entry.name) in rooted: + continue + baseline_key = _format_baseline_entry(entry.rel, entry.lineno, entry.name) + seen_keys.add(baseline_key) + if baseline is not None and baseline_key in baseline: + continue + forbidden_names = sorted( + base[1] + for base in entry.bases + if base is not None and base[0] == _FORBIDDEN_PSEUDO_MODULE + ) + violations.append( + ( + entry.rel, + entry.lineno, + entry.name, + ", ".join(forbidden_names), + ) + ) + messages = sorted(parse_errors) + for rel, lineno, name, bases in sorted(violations): + messages.append( + f"{rel}:{lineno}: {name} inherits from plain " + f"{bases}; should inherit from DomainError. " + "See src/synthorg/core/domain_errors.py for the canonical " + "pattern (or add '# lint-allow: domain-error-hierarchy " + "-- ' if the stdlib base is genuinely intentional)." + ) + if baseline: + for stale in sorted(baseline - seen_keys): + messages.append( + f"{stale}: stale baseline entry (no matching violation " + "found; the class may have been migrated, renamed, or " + "deleted). Regenerate the baseline with " + "'uv run python scripts/check_domain_error_hierarchy.py " + "--update-baseline' to drop drifted rows." + ) + return messages + + +# ── Baseline I/O ───────────────────────────────────────────────── + + +def _load_baseline(baseline_path: Path) -> set[str]: + """Return the set of allowlisted ``path:lineno:class_name`` entries. + + Validates each non-empty, non-comment line; rejects duplicates and + malformed entries. A corrupted baseline silently dropping entries + would let real violations slip past the gate, so the loader fails + loud on validation errors. + """ + if not baseline_path.exists(): + return set() + entries: set[str] = set() + errors: list[str] = [] + rel_path = str(baseline_path) + for lineno, line in enumerate( + baseline_path.read_text(encoding="utf-8").splitlines(), + start=1, + ): + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + if not _BASELINE_ENTRY_PATTERN.match(stripped): + errors.append( + f"{rel_path}:{lineno}: malformed entry (expected " + f"'path:lineno:class_name', got {stripped!r})" + ) + continue + if stripped in entries: + errors.append(f"{rel_path}:{lineno}: duplicate entry {stripped!r}") + continue + entries.add(stripped) + if errors: + for err in errors: + print(err, file=sys.stderr) + msg = ( + f"{rel_path}: baseline failed validation " + f"({len(errors)} error{'s' if len(errors) != 1 else ''}); " + "regenerate with 'uv run python " + "scripts/check_domain_error_hierarchy.py --update-baseline' " + "or fix by hand." + ) + raise ValueError(msg) + return entries + + +def _scan_for_baseline_entries( + project_root: Path, + scan_root: Path, +) -> list[str]: + """Return every violation in the tree as baseline entries (sorted).""" + files = _git_tracked_python_files(scan_root, project_root) + all_entries: list[_ClassEntry] = [] + for path, rel in files: + entries, _ = _index_file(path, rel) + all_entries.extend(entries) + rooted = _compute_rooted(all_entries) + found: list[tuple[str, int, str]] = [] + for entry in all_entries: + if entry.suppressed: + continue + if not _has_forbidden_direct_base(entry): + continue + if (entry.module, entry.name) in rooted: + continue + found.append((entry.rel, entry.lineno, entry.name)) + found.sort() + return [_format_baseline_entry(rel, lineno, name) for rel, lineno, name in found] + + +# ── CLI ───────────────────────────────────────────────────────── + + +class ProjectRootError( + Exception +): # lint-allow: domain-error-hierarchy -- gate-internal CLI error; never leaves this script + """Raised when ``--repo-root`` cannot be resolved.""" + + +def _resolve_project_root(repo_root: Path | None) -> Path: + """Resolve the project root from CLI arguments.""" + default_root = Path(__file__).resolve().parent.parent + if repo_root is None: + return default_root + try: + resolved = repo_root.resolve(strict=True) + except OSError as exc: + msg = f"--repo-root not accessible: {repo_root} ({exc})" + raise ProjectRootError(msg) from exc + if not resolved.is_dir(): + msg = f"--repo-root must be a directory: {resolved}" + raise ProjectRootError(msg) + return resolved + + +def _resolve_scan_root(project_root: Path, scan_path: str) -> Path | None: + """Resolve *scan_path* to an absolute path strictly under *project_root*.""" + candidate = Path(scan_path) + if not candidate.is_absolute(): + candidate = project_root / candidate + try: + resolved = candidate.resolve(strict=False) + except OSError: + return None + project_root_str = os.fspath(project_root.resolve(strict=False)) + resolved_str = os.fspath(resolved) + try: + common = os.path.commonpath([project_root_str, resolved_str]) + except ValueError: + return None + if common != project_root_str: + return None + return resolved + + +def main(argv: list[str] | None = None) -> int: + """CLI entry point.""" + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--paths", + default=_SCAN_REL_DEFAULT, + help=( + "Root to scan (relative to repo root). The gate always walks " + "the full tree under this path because cross-module base " + "resolution requires it." + ), + ) + parser.add_argument( + "--repo-root", + type=Path, + default=None, + help=( + "Project root to anchor path resolution against. Defaults to " + "the ancestor directory of this script." + ), + ) + parser.add_argument( + "--update-baseline", + action="store_true", + help=( + "Regenerate scripts/domain_error_hierarchy_baseline.txt from " + "the current tree. Rare; requires explicit user approval." + ), + ) + parser.add_argument( + "--no-baseline", + action="store_true", + help=( + "Ignore the baseline file; report every violation. Useful " + "for measuring residual progress during the rollout." + ), + ) + args = parser.parse_args(argv) + + try: + project_root = _resolve_project_root(args.repo_root) + except ProjectRootError as exc: + print(str(exc), file=sys.stderr) + return 2 + + scan_root = _resolve_scan_root(project_root, args.paths) + if scan_root is None or not scan_root.exists(): + print( + f"refusing to scan path outside project root or missing: {args.paths}", + file=sys.stderr, + ) + return 2 + + baseline_path = project_root / _BASELINE_REL_PATH + + if args.update_baseline: + entries = _scan_for_baseline_entries(project_root, scan_root) + body = _BASELINE_HEADER + if entries: + body += "\n".join(entries) + "\n" + baseline_path.parent.mkdir(parents=True, exist_ok=True) + baseline_path.write_text(body, encoding="utf-8") + print( + f"Wrote {len(entries)} entries to {baseline_path}.", + file=sys.stderr, + ) + return 0 + + baseline: set[str] | None + if args.no_baseline: + baseline = None + else: + try: + baseline = _load_baseline(baseline_path) + except ValueError as exc: + print(str(exc), file=sys.stderr) + return 2 + + messages = _scan_tree(project_root, scan_root, baseline=baseline) + if messages: + for msg in messages: + print(msg) + print( + f"\n{len(messages)} domain-error-hierarchy violation(s) found. " + "See docs/reference/errors.md for the DomainError contract.", + file=sys.stderr, + ) + return 1 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/domain_error_hierarchy_baseline.txt b/scripts/domain_error_hierarchy_baseline.txt new file mode 100644 index 0000000000..3a262e62a5 --- /dev/null +++ b/scripts/domain_error_hierarchy_baseline.txt @@ -0,0 +1,10 @@ +# Frozen baseline of pre-existing plain-Exception class definitions in +# src/synthorg/. Each line is `path:lineno:class_name` (POSIX path, +# 1-indexed line) sorted in deterministic order. +# +# scripts/check_domain_error_hierarchy.py reads this file to suppress +# violations at these exact entries. New violations NOT in this list +# will fail the pre-push hook. +# +# Regenerate (rare; requires explicit user approval) with: +# uv run python scripts/check_domain_error_hierarchy.py --update-baseline diff --git a/scripts/mock_spec_baseline.txt b/scripts/mock_spec_baseline.txt index 1e361a0bab..c266cc0ae0 100644 --- a/scripts/mock_spec_baseline.txt +++ b/scripts/mock_spec_baseline.txt @@ -366,9 +366,6 @@ tests/unit/api/controllers/test_setup.py:646:17 tests/unit/api/controllers/test_setup.py:653:27 tests/unit/api/controllers/test_setup.py:656:16 tests/unit/api/controllers/test_setup.py:657:31 -tests/unit/api/controllers/test_setup.py:1186:33 -tests/unit/api/controllers/test_setup.py:1209:33 -tests/unit/api/controllers/test_setup.py:1256:33 tests/unit/api/controllers/test_setup_agent_ops.py:80:21 tests/unit/api/controllers/test_setup_agent_ops.py:83:31 tests/unit/api/controllers/test_setup_agent_ops.py:86:20 @@ -387,9 +384,6 @@ tests/unit/api/controllers/test_setup_has_gpu.py:65:23 tests/unit/api/controllers/test_setup_has_gpu.py:66:27 tests/unit/api/controllers/test_setup_has_gpu.py:75:23 tests/unit/api/controllers/test_setup_has_gpu.py:76:27 -tests/unit/api/controllers/test_setup_locales.py:246:33 -tests/unit/api/controllers/test_setup_locales.py:269:33 -tests/unit/api/controllers/test_setup_locales.py:316:33 tests/unit/api/controllers/test_sse_keepalive_setting.py:28:31 tests/unit/api/controllers/test_sse_keepalive_setting.py:29:41 tests/unit/api/controllers/test_sse_revalidate.py:44:21 diff --git a/src/synthorg/a2a/client.py b/src/synthorg/a2a/client.py index 7cc370863f..0c5bd13368 100644 --- a/src/synthorg/a2a/client.py +++ b/src/synthorg/a2a/client.py @@ -5,7 +5,7 @@ outbound URLs against SSRF rules. """ -from typing import Any +from typing import Any, ClassVar from uuid import uuid4 import httpx @@ -15,6 +15,8 @@ JsonRpcRequest, JsonRpcResponse, ) +from synthorg.core.domain_errors import DomainError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode from synthorg.observability import get_logger, safe_error_description from synthorg.observability.events.a2a import ( A2A_OUTBOUND_FAILED, @@ -26,9 +28,14 @@ logger = get_logger(__name__) -class A2AClientError(Exception): +class A2AClientError(DomainError): """Error raised by the outbound A2A client.""" + default_message: ClassVar[str] = "A2A client request failed" + error_category: ClassVar[ErrorCategory] = ErrorCategory.PROVIDER_ERROR + error_code: ClassVar[ErrorCode] = ErrorCode.PROVIDER_ERROR + status_code: ClassVar[int] = 502 + def __init__(self, message: str, *, peer_name: str = "") -> None: super().__init__(message) self.peer_name = peer_name diff --git a/src/synthorg/a2a/gateway.py b/src/synthorg/a2a/gateway.py index 060c74e5ee..578eb4c594 100644 --- a/src/synthorg/a2a/gateway.py +++ b/src/synthorg/a2a/gateway.py @@ -9,7 +9,7 @@ import asyncio import hmac import json -from typing import Any +from typing import Any, ClassVar from litestar import Controller, Request, post from litestar.datastructures import State # noqa: TC002 @@ -40,6 +40,8 @@ from synthorg.a2a.security import validate_peer from synthorg.a2a.task_mapper import to_a2a from synthorg.api.rate_limits.policies import per_op_rate_limit_from_policy +from synthorg.core.domain_errors import DomainError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode from synthorg.observability import get_logger, safe_error_description from synthorg.observability.events.a2a import ( A2A_INBOUND_AUTH_FAILED, @@ -612,9 +614,14 @@ def _extract_peer_name( return None -class _A2AMethodError(Exception): +class _A2AMethodError(DomainError): """Error raised by method handlers for JSON-RPC error responses.""" + default_message: ClassVar[str] = "A2A method dispatch failed" + error_category: ClassVar[ErrorCategory] = ErrorCategory.INTERNAL + error_code: ClassVar[ErrorCode] = ErrorCode.INTERNAL_ERROR + status_code: ClassVar[int] = 500 + def __init__( self, code: int, diff --git a/src/synthorg/api/auth/service.py b/src/synthorg/api/auth/service.py index 403b6b35dd..29f1f00cfa 100644 --- a/src/synthorg/api/auth/service.py +++ b/src/synthorg/api/auth/service.py @@ -6,7 +6,7 @@ import secrets import uuid from datetime import UTC, datetime, timedelta -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, ClassVar import argon2 import jwt @@ -17,6 +17,8 @@ from synthorg.api.boundary import parse_typed from synthorg.core.auth.models import User # noqa: TC001 from synthorg.core.auth.roles import HumanRole +from synthorg.core.domain_errors import ServiceUnavailableError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode from synthorg.observability import get_logger, safe_error_description from synthorg.observability.events.security import ( SECURITY_AUTH_FAILED, @@ -29,9 +31,14 @@ logger = get_logger(__name__) -class SecretNotConfiguredError(RuntimeError): +class SecretNotConfiguredError(ServiceUnavailableError): """Raised when the JWT secret is required but not configured.""" + default_message: ClassVar[str] = "JWT secret not configured" + error_category: ClassVar[ErrorCategory] = ErrorCategory.INTERNAL + error_code: ClassVar[ErrorCode] = ErrorCode.SERVICE_UNAVAILABLE + status_code: ClassVar[int] = 503 + _hasher = argon2.PasswordHasher( time_cost=3, diff --git a/src/synthorg/api/auth/ticket_store.py b/src/synthorg/api/auth/ticket_store.py index f70bc2fdaf..60562aeb5e 100644 --- a/src/synthorg/api/auth/ticket_store.py +++ b/src/synthorg/api/auth/ticket_store.py @@ -24,12 +24,15 @@ import math import secrets import threading +from typing import ClassVar from pydantic import BaseModel, ConfigDict from synthorg.api.auth.token_size import get_auth_token_bytes from synthorg.core.auth.models import AuthenticatedUser # noqa: TC001 from synthorg.core.clock import Clock, SystemClock +from synthorg.core.domain_errors import PerOperationRateLimitError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode from synthorg.observability import get_logger from synthorg.observability.events.api import ( API_WS_TICKET_CLEANUP, @@ -43,9 +46,14 @@ logger = get_logger(__name__) -class TicketLimitExceededError(Exception): +class TicketLimitExceededError(PerOperationRateLimitError): """Raised when a user exceeds the per-user pending ticket cap.""" + default_message: ClassVar[str] = "WebSocket ticket cap exceeded" + error_category: ClassVar[ErrorCategory] = ErrorCategory.RATE_LIMIT + error_code: ClassVar[ErrorCode] = ErrorCode.PER_OPERATION_RATE_LIMITED + status_code: ClassVar[int] = 429 + # Internal constant by design: boot-time snapshot of # ``api.ws_ticket_max_pending_per_user``. The store reads its cap @@ -162,8 +170,11 @@ def create(self, user: AuthenticatedUser) -> str: pending=user_pending, cap=self._max_pending, ) - msg = f"Ticket limit exceeded for user {user.user_id}" - raise TicketLimitExceededError(msg) + # Empty message lets the centralised handler fall back to + # the class default ("WebSocket ticket cap exceeded") so + # the 429 response carries no caller-identifying detail + # (the user_id is logged server-side just above). + raise TicketLimitExceededError ticket = secrets.token_urlsafe(get_auth_token_bytes()) entry = _TicketEntry( diff --git a/src/synthorg/api/controllers/budget_config_versions.py b/src/synthorg/api/controllers/budget_config_versions.py index d9693d3077..762a83d036 100644 --- a/src/synthorg/api/controllers/budget_config_versions.py +++ b/src/synthorg/api/controllers/budget_config_versions.py @@ -16,6 +16,7 @@ encode_repo_seek_meta, ) from synthorg.budget.config import BudgetConfig +from synthorg.core.domain_errors import NotFoundError from synthorg.observability import get_logger from synthorg.observability.events.versioning import ( VERSION_LISTED, @@ -91,12 +92,8 @@ async def get_version( entity_id=_ENTITY_ID, version=version_num, ) - return Response( - content=ApiResponse[SnapshotT]( - error=f"Version {version_num} not found", - ), - status_code=404, - ) + msg = f"Version {version_num} not found" + raise NotFoundError(msg) return Response( content=ApiResponse[SnapshotT](data=version), ) diff --git a/src/synthorg/api/controllers/company_versions.py b/src/synthorg/api/controllers/company_versions.py index 541e55caa8..185febf7c8 100644 --- a/src/synthorg/api/controllers/company_versions.py +++ b/src/synthorg/api/controllers/company_versions.py @@ -16,6 +16,7 @@ encode_repo_seek_meta, ) from synthorg.core.company import Company +from synthorg.core.domain_errors import NotFoundError from synthorg.observability import get_logger from synthorg.observability.events.versioning import ( VERSION_LISTED, @@ -91,12 +92,8 @@ async def get_version( entity_id=_ENTITY_ID, version=version_num, ) - return Response( - content=ApiResponse[SnapshotT]( - error=f"Version {version_num} not found", - ), - status_code=404, - ) + msg = f"Version {version_num} not found" + raise NotFoundError(msg) return Response( content=ApiResponse[SnapshotT](data=version), ) diff --git a/src/synthorg/api/controllers/evaluation_config_versions.py b/src/synthorg/api/controllers/evaluation_config_versions.py index 7c0b0d8c6c..f71ae703a5 100644 --- a/src/synthorg/api/controllers/evaluation_config_versions.py +++ b/src/synthorg/api/controllers/evaluation_config_versions.py @@ -15,6 +15,7 @@ CursorParam, encode_repo_seek_meta, ) +from synthorg.core.domain_errors import NotFoundError from synthorg.hr.evaluation.config import EvaluationConfig from synthorg.observability import get_logger from synthorg.observability.events.versioning import ( @@ -91,12 +92,8 @@ async def get_version( entity_id=_ENTITY_ID, version=version_num, ) - return Response( - content=ApiResponse[SnapshotT]( - error=f"Version {version_num} not found", - ), - status_code=404, - ) + msg = f"Version {version_num} not found" + raise NotFoundError(msg) return Response( content=ApiResponse[SnapshotT](data=version), ) diff --git a/src/synthorg/api/controllers/role_versions.py b/src/synthorg/api/controllers/role_versions.py index befadc996e..39ace17b2c 100644 --- a/src/synthorg/api/controllers/role_versions.py +++ b/src/synthorg/api/controllers/role_versions.py @@ -15,6 +15,7 @@ CursorParam, encode_repo_seek_meta, ) +from synthorg.core.domain_errors import NotFoundError from synthorg.core.role import Role from synthorg.observability import get_logger from synthorg.observability.events.versioning import ( @@ -90,12 +91,8 @@ async def get_version( entity_id=role_name, version=version_num, ) - return Response( - content=ApiResponse[SnapshotT]( - error=f"Version {version_num} not found for role {role_name!r}", - ), - status_code=404, - ) + msg = f"Version {version_num} not found for role {role_name!r}" + raise NotFoundError(msg) return Response( content=ApiResponse[SnapshotT](data=version), ) diff --git a/src/synthorg/api/controllers/workflow_executions.py b/src/synthorg/api/controllers/workflow_executions.py index 2c4a080fd2..e73207ea3b 100644 --- a/src/synthorg/api/controllers/workflow_executions.py +++ b/src/synthorg/api/controllers/workflow_executions.py @@ -20,7 +20,11 @@ from synthorg.api.responses import require_resource_or_404 from synthorg.core.domain_errors import NotFoundError from synthorg.core.error_taxonomy import ErrorCode -from synthorg.core.persistence_errors import PersistenceError, VersionConflictError +from synthorg.core.persistence_errors import ( + PersistenceError, + PersistenceVersionConflictError, + RecordNotFoundError, +) from synthorg.engine.errors import ( WorkflowConditionEvalError, WorkflowDefinitionInvalidError, @@ -263,14 +267,14 @@ async def cancel_execution( execution_id, cancelled_by=cancelled_by, ) - except WorkflowExecutionNotFoundError: + except WorkflowExecutionNotFoundError, RecordNotFoundError: logger.warning( WORKFLOW_EXEC_NOT_FOUND, execution_id=execution_id, ) msg = f"Workflow execution {execution_id!r} not found" raise NotFoundError(msg) from None - except (WorkflowExecutionError, VersionConflictError) as exc: + except (WorkflowExecutionError, PersistenceVersionConflictError) as exc: # Cancel was rejected (already-terminal status, version # mismatch). Emit a distinct conflict event so audit # streams and dashboards do NOT mistake a failed cancel diff --git a/src/synthorg/api/controllers/workflow_versions.py b/src/synthorg/api/controllers/workflow_versions.py index 36faa4366e..14df66af00 100644 --- a/src/synthorg/api/controllers/workflow_versions.py +++ b/src/synthorg/api/controllers/workflow_versions.py @@ -21,7 +21,14 @@ encode_repo_seek_meta, ) from synthorg.api.path_params import PathId # noqa: TC001 -from synthorg.core.persistence_errors import VersionConflictError +from synthorg.core.domain_errors import ( + NotFoundError, + ValidationError, + VersionConflictError, +) +from synthorg.core.persistence_errors import ( + PersistenceVersionConflictError, +) from synthorg.engine.workflow.definition import ( WorkflowDefinition, ) @@ -50,8 +57,8 @@ async def _fetch_version_pair( workflow_id: str, from_version: int, to_version: int, -) -> tuple[SnapshotT, SnapshotT] | Response[ApiResponse[WorkflowDiff]]: - """Fetch two version snapshots, returning an error response on failure. +) -> tuple[SnapshotT, SnapshotT]: + """Fetch two version snapshots, raising :class:`NotFoundError` if absent. Args: version_repo: The workflow version repository. @@ -60,8 +67,10 @@ async def _fetch_version_pair( to_version: Target version number. Returns: - A tuple ``(old, new)`` on success, or a ``Response`` error if - either version is not found. + A tuple ``(old, new)`` with both snapshots. + + Raises: + NotFoundError: If either version snapshot is absent. """ old = await version_repo.get_version(workflow_id, from_version) if old is None: @@ -70,12 +79,8 @@ async def _fetch_version_pair( definition_id=workflow_id, version=from_version, ) - return Response( - content=ApiResponse[WorkflowDiff]( - error=f"Version {from_version} not found", - ), - status_code=404, - ) + msg = f"Version {from_version} not found" + raise NotFoundError(msg) new = await version_repo.get_version(workflow_id, to_version) if new is None: logger.warning( @@ -83,12 +88,8 @@ async def _fetch_version_pair( definition_id=workflow_id, version=to_version, ) - return Response( - content=ApiResponse[WorkflowDiff]( - error=f"Version {to_version} not found", - ), - status_code=404, - ) + msg = f"Version {to_version} not found" + raise NotFoundError(msg) return old, new @@ -97,7 +98,7 @@ async def _fetch_rollback_target( version_repo: VersionRepository[WorkflowDefinition], workflow_id: str, data: RollbackWorkflowRequest, -) -> tuple[WorkflowDefinition, SnapshotT] | Response[ApiResponse[WorkflowDefinition]]: +) -> tuple[WorkflowDefinition, SnapshotT]: """Look up the definition and target version for a rollback. Validates that the definition exists, the expected version matches, @@ -110,8 +111,12 @@ async def _fetch_rollback_target( data: The rollback request payload. Returns: - A tuple ``(existing, target)`` on success, or a ``Response`` - error on any validation failure. + A tuple ``(existing, target)`` on success. + + Raises: + NotFoundError: If the definition or the target version is absent. + VersionConflictError: If ``data.expected_revision`` does not match + the current persisted revision. """ existing = await repo.get(workflow_id) if existing is None: @@ -119,12 +124,8 @@ async def _fetch_rollback_target( WORKFLOW_DEF_NOT_FOUND, definition_id=workflow_id, ) - return Response( - content=ApiResponse[WorkflowDefinition]( - error="Workflow definition not found", - ), - status_code=404, - ) + msg = "Workflow definition not found" + raise NotFoundError(msg) if data.expected_revision != existing.revision: logger.warning( @@ -133,14 +134,8 @@ async def _fetch_rollback_target( expected=data.expected_revision, actual=existing.revision, ) - return Response( - content=ApiResponse[WorkflowDefinition]( - error=( - "Version conflict: the workflow was modified. Reload and retry." - ), - ), - status_code=409, - ) + msg = "Version conflict: the workflow was modified. Reload and retry." + raise VersionConflictError(msg) target = await version_repo.get_version( workflow_id, @@ -152,12 +147,8 @@ async def _fetch_rollback_target( definition_id=workflow_id, version=data.target_version, ) - return Response( - content=ApiResponse[WorkflowDefinition]( - error=f"Target version {data.target_version} not found", - ), - status_code=404, - ) + msg = f"Target version {data.target_version} not found" + raise NotFoundError(msg) return existing, target @@ -263,12 +254,8 @@ async def get_version( definition_id=workflow_id, version=version_num, ) - return Response( - content=ApiResponse[SnapshotT]( - error=f"Version {version_num} not found", - ), - status_code=404, - ) + msg = f"Version {version_num} not found" + raise NotFoundError(msg) return Response( content=ApiResponse[SnapshotT](data=version), ) @@ -302,23 +289,16 @@ async def get_diff( definition_id=workflow_id, error="from_version and to_version must differ", ) - return Response( - content=ApiResponse[WorkflowDiff]( - error="from_version and to_version must differ", - ), - status_code=400, - ) + msg = "from_version and to_version must differ" + raise ValidationError(msg) version_repo = state.app_state.persistence.workflow_versions - result = await _fetch_version_pair( + old, new = await _fetch_version_pair( version_repo, workflow_id, from_version, to_version, ) - if isinstance(result, Response): - return result - old, new = result diff = compute_diff(old, new) logger.debug( @@ -347,15 +327,12 @@ async def rollback_workflow( repo = state.app_state.persistence.workflow_definitions version_repo = state.app_state.persistence.workflow_versions - result = await _fetch_rollback_target( + existing, target = await _fetch_rollback_target( repo, version_repo, workflow_id, data, ) - if isinstance(result, Response): - return result - existing, target = result updater = get_auth_user_id(request) rolled_back = _build_rolled_back_definition( existing, @@ -363,12 +340,6 @@ async def rollback_workflow( datetime.now(UTC), ) - # Route the durable save + post-rollback snapshot through the - # AppState-wired ``WorkflowRollbackService`` so audit logging - # cannot regress when a new write path lands in the rollback - # contract. The service raises ``VersionConflictError`` on - # optimistic-concurrency mismatch; the 409 translation stays - # here so the controller owns the HTTP response shape. rollback_service = state.app_state.workflow_rollback_service try: await rollback_service.rollback( @@ -376,19 +347,15 @@ async def rollback_workflow( target_version=data.target_version, saved_by=updater, ) - except VersionConflictError as exc: + except PersistenceVersionConflictError as exc: logger.warning( WORKFLOW_DEF_VERSION_CONFLICT, definition_id=workflow_id, error_type=type(exc).__name__, error=safe_error_description(exc), ) - return Response( - content=ApiResponse[WorkflowDefinition]( - error=("Version conflict during rollback. Reload and retry."), - ), - status_code=409, - ) + msg = "Version conflict during rollback. Reload and retry." + raise VersionConflictError(msg) from exc return Response( content=ApiResponse[WorkflowDefinition](data=rolled_back), diff --git a/src/synthorg/api/controllers/workflows.py b/src/synthorg/api/controllers/workflows.py index e5c129f603..05ec92dbb8 100644 --- a/src/synthorg/api/controllers/workflows.py +++ b/src/synthorg/api/controllers/workflows.py @@ -32,7 +32,6 @@ from synthorg.api.rate_limits import per_op_rate_limit_from_policy from synthorg.core.domain_errors import NotFoundError from synthorg.core.enums import WorkflowType -from synthorg.core.persistence_errors import VersionConflictError from synthorg.core.types import NotBlankStr from synthorg.engine.workflow.blueprint_loader import list_blueprints from synthorg.engine.workflow.definition import ( @@ -467,7 +466,7 @@ async def update_workflow( # noqa: PLR0911 -- enumerate distinct HTTP error pat ), status_code=404, ) - except VersionConflictError as exc: + except WorkflowDefinitionRevisionMismatchError as exc: logger.warning( WORKFLOW_DEF_VERSION_CONFLICT, definition_id=updated.id, diff --git a/src/synthorg/api/cursor.py b/src/synthorg/api/cursor.py index daa108d4b9..6c8c584c9e 100644 --- a/src/synthorg/api/cursor.py +++ b/src/synthorg/api/cursor.py @@ -17,18 +17,27 @@ import hmac import json import secrets -from typing import Self +from typing import ClassVar, Self from synthorg.api.cursor_config import CursorConfig # noqa: TC001 +from synthorg.core.domain_errors import ValidationError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode -class InvalidCursorError(ValueError): +class InvalidCursorError(ValidationError): """Raised when a cursor token is malformed, tampered, or unsigned. - Controllers should translate this to HTTP 400 with a structured - ``ErrorDetail`` (``error_category=validation``). + Renders as HTTP 422 Unprocessable Entity with a structured + ``ErrorDetail`` (``error_category=validation``, + ``error_code=VALIDATION_ERROR``) via the centralised RFC 9457 + dispatch. """ + default_message: ClassVar[str] = "Invalid cursor token" + error_category: ClassVar[ErrorCategory] = ErrorCategory.VALIDATION + error_code: ClassVar[ErrorCode] = ErrorCode.VALIDATION_ERROR + status_code: ClassVar[int] = 422 + # Minimum key length (bytes) enforced on HMAC secrets. 16 bytes = 128 # bits of entropy; below this the signing strength falls off fast. diff --git a/src/synthorg/api/exception_handlers.py b/src/synthorg/api/exception_handlers.py index 3d3c2e0a21..6ea1a43060 100644 --- a/src/synthorg/api/exception_handlers.py +++ b/src/synthorg/api/exception_handlers.py @@ -28,14 +28,10 @@ ValidationException, ) +from synthorg.a2a.client import A2AClientError from synthorg.api.auth_response_discriminator import discriminate_unauthorized from synthorg.api.cursor import InvalidCursorError from synthorg.api.dto import ApiResponse, ErrorDetail, ProblemDetail -from synthorg.backup.errors import ( - BackupError, - BackupInProgressError, - BackupNotFoundError, -) from synthorg.budget.errors import ( BudgetExhaustedError, MixedCurrencyAggregationError, @@ -446,52 +442,6 @@ def handle_persistence_integrity_error( ) -def handle_backup_error( - request: Request[Any, Any, Any], - exc: BackupError, -) -> Response[ApiResponse[None]] | Response[ProblemDetail]: - """Map ``BackupError`` subclasses to structured HTTP responses. - - ``BackupNotFoundError`` and ``BackupInProgressError`` carry semantic - HTTP analogues (404 / 409); every other ``BackupError`` subtype - surfaces as a structured 5xx instead of falling through to - ``handle_unexpected`` and producing an unstructured generic 500. - - The 4xx branches pass the exception's message through to the client - because ``BackupError`` instances raised by ``synthorg.backup`` carry - user-safe identifiers only (backup_id, the in-progress reason); - they do not embed filesystem paths, secrets, or internal state. - The 5xx branch scrubs the message via the standard server-error - convention (the original is logged server-side via ``_log_error``). - """ - if isinstance(exc, BackupNotFoundError): - _log_error(request, exc, status=404) - return _build_response( - request, - detail=str(exc) or "Backup not found", - error_code=ErrorCode.RECORD_NOT_FOUND, - error_category=ErrorCategory.NOT_FOUND, - status_code=404, - ) - if isinstance(exc, BackupInProgressError): - _log_error(request, exc, status=409) - return _build_response( - request, - detail=str(exc) or "Backup operation already in progress", - error_code=ErrorCode.RESOURCE_CONFLICT, - error_category=ErrorCategory.CONFLICT, - status_code=409, - ) - _log_error(request, exc, status=500) - return _build_response( - request, - detail="Backup operation failed", - error_code=ErrorCode.INTERNAL_ERROR, - error_category=ErrorCategory.INTERNAL, - status_code=500, - ) - - def _normalize_status_code(raw: object) -> int: """Coerce a raw ``status_code`` attribute to a valid HTTP error code. @@ -856,8 +806,8 @@ def handle_http_exception( (CommunicationError, handle_domain_error), (IntegrationError, handle_domain_error), (ToolError, handle_domain_error), + (A2AClientError, handle_domain_error), (DomainError, handle_domain_error), - (BackupError, handle_backup_error), (Exception, handle_unexpected), ) diff --git a/src/synthorg/api/services/workflow_rollback_service.py b/src/synthorg/api/services/workflow_rollback_service.py index 4ee0f54ca2..f08bb8b704 100644 --- a/src/synthorg/api/services/workflow_rollback_service.py +++ b/src/synthorg/api/services/workflow_rollback_service.py @@ -1,25 +1,20 @@ """WorkflowRollbackService -- audit-aware facade for rollback persistence. -The :class:`WorkflowVersionController.rollback_workflow` handler -previously called ``repo.save(rolled_back)`` directly on the -workflow_definitions repository, then constructed a fresh -:class:`VersioningService` to record the post-rollback snapshot. -Routing both writes through one cohesive service centralises: - -1. The durable definition save (raises :class:`VersionConflictError` - on optimistic-concurrency mismatch -- the controller still owns - the 409 translation). -2. The best-effort post-rollback snapshot via - :class:`VersioningService.snapshot_if_changed`. A snapshot failure - is logged at WARNING and swallowed -- the rollback itself has - already been persisted, so dropping the audit row keeps service - availability while operators receive the WARN signal. +Routes the rollback save and the post-rollback snapshot through one +cohesive service so callers get: + +1. The durable definition save, which may raise + :class:`PersistenceVersionConflictError` on optimistic-concurrency + mismatch. +2. A best-effort post-rollback snapshot via + :class:`VersioningService.snapshot_if_changed`. Snapshot failures + are logged at WARNING and swallowed so a snapshot write failure + cannot fail the whole rollback after the durable save committed. 3. The audit-grade :data:`WORKFLOW_DEF_ROLLED_BACK` event emitted on success. -The service is constructed per request from the controller (it is a -thin two-method object whose lifecycle is bound to the request, not -the process); no AppState wiring is required. +The service is a thin two-method object constructed per request; no +AppState wiring is required. """ from typing import TYPE_CHECKING @@ -79,9 +74,8 @@ async def rollback( """Persist ``rolled_back`` and snapshot the new revision. Raises: - VersionConflictError: When the optimistic-concurrency - guard on ``definition_repo.save`` rejects the write - (the controller catches this and returns 409). + PersistenceVersionConflictError: When the optimistic-concurrency + guard on ``definition_repo.save`` rejects the write. Returns ``rolled_back`` unchanged so the caller can serialise it onto the response without re-fetching. diff --git a/src/synthorg/backup/errors.py b/src/synthorg/backup/errors.py index 096cf9f11d..22ce45f8dd 100644 --- a/src/synthorg/backup/errors.py +++ b/src/synthorg/backup/errors.py @@ -1,21 +1,35 @@ """Backup error hierarchy. -All backup-related errors inherit from ``BackupError`` so callers -can catch the entire family with a single except clause. +All backup-related errors inherit from :class:`BackupError` (which is +itself a :class:`DomainError` subclass) so callers can catch the entire +family with a single except clause and the API layer maps every +subclass to a structured RFC 9457 response via the centralised +``handle_domain_error`` dispatch. """ from typing import ClassVar -from synthorg.core.domain_errors import ConflictError +from synthorg.core.domain_errors import DomainError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode -class BackupError(Exception): +class BackupError(DomainError): """Base exception for all backup operations.""" + default_message: ClassVar[str] = "Backup operation failed" + error_category: ClassVar[ErrorCategory] = ErrorCategory.INTERNAL + error_code: ClassVar[ErrorCode] = ErrorCode.INTERNAL_ERROR + status_code: ClassVar[int] = 500 + class BackupInProgressError(BackupError): """Raised when a backup is attempted while another is in progress.""" + default_message: ClassVar[str] = "Backup operation already in progress" + error_category: ClassVar[ErrorCategory] = ErrorCategory.CONFLICT + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_CONFLICT + status_code: ClassVar[int] = 409 + class RestoreError(BackupError): """Raised when a restore operation fails.""" @@ -36,17 +50,23 @@ class RetentionError(BackupError): class BackupNotFoundError(BackupError): """Raised when a requested backup ID does not exist.""" + default_message: ClassVar[str] = "Backup not found" + error_category: ClassVar[ErrorCategory] = ErrorCategory.NOT_FOUND + error_code: ClassVar[ErrorCode] = ErrorCode.RECORD_NOT_FOUND + status_code: ClassVar[int] = 404 + -class BackupUnrestartableError(ConflictError): +class BackupUnrestartableError(BackupError): """Raised when ``BackupScheduler.start()`` is called after a timed-out stop. The scheduler refuses to spawn a fresh loop on top of an orphan task that may still own the backup lock, so the request is - rejected with HTTP 409. Inherits :class:`ConflictError` so the - centralised ``EXCEPTION_HANDLERS`` routing produces the right - RFC 9457 response. + rejected with HTTP 409. """ default_message: ClassVar[str] = ( "Backup scheduler is unrestartable after a timed-out stop" ) + error_category: ClassVar[ErrorCategory] = ErrorCategory.CONFLICT + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_CONFLICT + status_code: ClassVar[int] = 409 diff --git a/src/synthorg/client/factory.py b/src/synthorg/client/factory.py index ca48add390..36d7548e86 100644 --- a/src/synthorg/client/factory.py +++ b/src/synthorg/client/factory.py @@ -8,6 +8,7 @@ """ from pathlib import Path +from typing import ClassVar from synthorg.client.adapters import ( DirectAdapter, @@ -31,6 +32,8 @@ from synthorg.client.report.json_export import JsonExportReport from synthorg.client.report.metrics_only import MetricsOnlyReport from synthorg.client.report.summary import SummaryReport +from synthorg.core.domain_errors import ValidationError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode from synthorg.core.types import NotBlankStr from synthorg.observability import get_logger from synthorg.observability.events.client import ( @@ -78,9 +81,14 @@ ) -class UnknownStrategyError(ValueError): +class UnknownStrategyError(ValidationError): """Raised when a config discriminator does not map to any strategy.""" + default_message: ClassVar[str] = "Unknown strategy discriminator" + error_category: ClassVar[ErrorCategory] = ErrorCategory.VALIDATION + error_code: ClassVar[ErrorCode] = ErrorCode.VALIDATION_ERROR + status_code: ClassVar[int] = 422 + def _require_non_blank( value: object, diff --git a/src/synthorg/communication/event_stream/stream.py b/src/synthorg/communication/event_stream/stream.py index 894ddfacf3..226b84c7ae 100644 --- a/src/synthorg/communication/event_stream/stream.py +++ b/src/synthorg/communication/event_stream/stream.py @@ -20,6 +20,7 @@ from collections import OrderedDict from dataclasses import dataclass, field from datetime import UTC, datetime +from typing import ClassVar from uuid import uuid4 from synthorg.communication.event_stream.types import ( @@ -27,6 +28,8 @@ StreamEvent, ) from synthorg.core.clock import Clock, SystemClock +from synthorg.core.domain_errors import ConflictError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode from synthorg.observability import get_logger, safe_error_description from synthorg.observability.events.event_stream import ( EVENT_STREAM_HUB_JANITOR_FAILED, @@ -48,7 +51,7 @@ _DEFAULT_JANITOR_STOP_TIMEOUT_SECONDS = 10.0 -class EventStreamHubUnrestartableError(RuntimeError): +class EventStreamHubUnrestartableError(ConflictError): """Raised when ``start()`` is called on a hub that timed out during ``stop()``. Per the lifecycle-sync contract, a service whose ``stop()`` drain hits its @@ -57,6 +60,13 @@ class EventStreamHubUnrestartableError(RuntimeError): Operators must construct a fresh hub instead. """ + default_message: ClassVar[str] = ( + "Event stream hub is unrestartable after a timed-out stop" + ) + error_category: ClassVar[ErrorCategory] = ErrorCategory.CONFLICT + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_CONFLICT + status_code: ClassVar[int] = 409 + # Intentionally NOT frozen: ``last_active`` is mutated in-place under # ``EventStreamHub._lock`` per the docstring below. CLAUDE.md "Frozen diff --git a/src/synthorg/communication/mcp_errors.py b/src/synthorg/communication/mcp_errors.py index 2b1c984ab1..a8c5ac0f66 100644 --- a/src/synthorg/communication/mcp_errors.py +++ b/src/synthorg/communication/mcp_errors.py @@ -8,8 +8,13 @@ real service before the gap was detected. """ +from typing import ClassVar -class CapabilityNotSupportedError(RuntimeError): +from synthorg.core.domain_errors import FeatureNotImplementedError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode + + +class CapabilityNotSupportedError(FeatureNotImplementedError): """Raised when a facade method's underlying primitive cannot satisfy the op. Attributes: @@ -18,7 +23,11 @@ class CapabilityNotSupportedError(RuntimeError): error message for operator observability. """ - domain_code = "not_supported" + domain_code: ClassVar[str] = "not_supported" + default_message: ClassVar[str] = "Capability not supported by underlying primitive" + error_category: ClassVar[ErrorCategory] = ErrorCategory.INTERNAL + error_code: ClassVar[ErrorCode] = ErrorCode.FEATURE_NOT_IMPLEMENTED + status_code: ClassVar[int] = 501 def __init__(self, capability: str, detail: str) -> None: """Initialise with a capability name and reason. diff --git a/src/synthorg/communication/meeting/agent_caller.py b/src/synthorg/communication/meeting/agent_caller.py index 6d22c1b066..53514a6b67 100644 --- a/src/synthorg/communication/meeting/agent_caller.py +++ b/src/synthorg/communication/meeting/agent_caller.py @@ -19,7 +19,7 @@ module only runs a single agent's inference. """ -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, ClassVar from synthorg.budget.call_category import LLMCallCategory @@ -32,6 +32,8 @@ from synthorg.budget.tracker import CostTracker # noqa: TC001 from synthorg.communication.meeting.models import AgentResponse from synthorg.communication.meeting.protocol import AgentCaller # noqa: TC001 +from synthorg.core.domain_errors import DomainError, NotFoundError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode from synthorg.core.types import NotBlankStr from synthorg.engine.prompt_safety import ( TAG_PEER_CONTRIBUTION, @@ -56,13 +58,18 @@ logger = get_logger(__name__) -class UnknownMeetingAgentError(LookupError): +class UnknownMeetingAgentError(NotFoundError): """Raised when the meeting orchestrator invokes an unregistered agent. Attributes: agent_id: The agent identifier that was not found in the registry. """ + default_message: ClassVar[str] = "Meeting agent not registered" + error_category: ClassVar[ErrorCategory] = ErrorCategory.NOT_FOUND + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_NOT_FOUND + status_code: ClassVar[int] = 404 + def __init__(self, agent_id: NotBlankStr) -> None: super().__init__( f"Meeting agent {agent_id!r} is not registered in the " @@ -216,7 +223,7 @@ def _render_system_prompt(identity: AgentIdentity) -> str: return f"{body}\n\n{directive}" -class MeetingAgentCallerNotConfiguredError(RuntimeError): +class MeetingAgentCallerNotConfiguredError(DomainError): """Raised when a meeting runs without an agent + provider registry. The meeting orchestrator is structurally wired during Phase 1 so @@ -238,6 +245,13 @@ class MeetingAgentCallerNotConfiguredError(RuntimeError): only meaningful when at least one dependency is missing. """ + default_message: ClassVar[str] = ( + "Meeting agent caller missing wire-time dependencies" + ) + error_category: ClassVar[ErrorCategory] = ErrorCategory.INTERNAL + error_code: ClassVar[ErrorCode] = ErrorCode.INTERNAL_ERROR + status_code: ClassVar[int] = 500 + def __init__( self, *, diff --git a/src/synthorg/config/errors.py b/src/synthorg/config/errors.py index 9b2774bfcf..4afcf50d7f 100644 --- a/src/synthorg/config/errors.py +++ b/src/synthorg/config/errors.py @@ -1,6 +1,10 @@ """Custom exception hierarchy for configuration errors.""" from dataclasses import dataclass +from typing import ClassVar + +from synthorg.core.domain_errors import DomainError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode @dataclass(frozen=True) @@ -21,7 +25,7 @@ class ConfigLocation: column: int | None = None -class ConfigError(Exception): +class ConfigError(DomainError): """Base exception for configuration errors. Attributes: @@ -29,6 +33,14 @@ class ConfigError(Exception): locations: Source locations associated with this error. """ + default_message: ClassVar[str] = "Configuration error" + error_category: ClassVar[ErrorCategory] = ErrorCategory.INTERNAL + error_code: ClassVar[ErrorCode] = ErrorCode.INTERNAL_ERROR + status_code: ClassVar[int] = 500 + + message: str + locations: tuple[ConfigLocation, ...] + def __init__( self, message: str, @@ -62,10 +74,20 @@ def __str__(self) -> str: class ConfigFileNotFoundError(ConfigError): """Raised when a configuration file does not exist.""" + default_message: ClassVar[str] = "Configuration file not found" + error_category: ClassVar[ErrorCategory] = ErrorCategory.NOT_FOUND + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_NOT_FOUND + status_code: ClassVar[int] = 404 + class ConfigParseError(ConfigError): """Raised when YAML parsing fails.""" + default_message: ClassVar[str] = "Configuration file parse error" + error_category: ClassVar[ErrorCategory] = ErrorCategory.VALIDATION + error_code: ClassVar[ErrorCode] = ErrorCode.VALIDATION_ERROR + status_code: ClassVar[int] = 400 + class ConfigValidationError(ConfigError): """Raised when Pydantic validation fails. @@ -75,6 +97,13 @@ class ConfigValidationError(ConfigError): ``(key_path, message)`` pairs. """ + default_message: ClassVar[str] = "Configuration validation error" + error_category: ClassVar[ErrorCategory] = ErrorCategory.VALIDATION + error_code: ClassVar[ErrorCode] = ErrorCode.VALIDATION_ERROR + status_code: ClassVar[int] = 422 + + field_errors: tuple[tuple[str, str], ...] + def __init__( self, message: str, diff --git a/src/synthorg/core/persistence_errors.py b/src/synthorg/core/persistence_errors.py index 7798cda5ed..d23d5b6b1c 100644 --- a/src/synthorg/core/persistence_errors.py +++ b/src/synthorg/core/persistence_errors.py @@ -2,25 +2,36 @@ All persistence-related errors inherit from :class:`PersistenceError` so callers can catch the entire family with a single except clause. +``PersistenceError`` itself is rooted in :class:`DomainError` so the API +layer's centralised RFC 9457 dispatch picks up every subtype; the +existing ``handle_record_not_found`` / ``handle_persistence_integrity_error`` +/ ``handle_duplicate_record`` / ``handle_persistence_error`` handlers +remain registered so persistence-layer 4xx responses keep their fixed +public messages (``"Resource not found"``, etc.) instead of leaking +record identifiers from ``str(exc)``. Each concrete exception carries an ``is_retryable`` class attribute mirroring the provider-layer convention in -:mod:`synthorg.providers.errors`. Callers that implement bounded +:mod:`synthorg.providers.errors`. Callers that implement bounded retry/backoff (e.g. a repository middleware) can branch on this flag -without string-matching the driver exception. Default: ``False``. +without string-matching the driver exception. Default: ``False``. Transient I/O failures override to ``True``. - -This module is dependency-free apart from stdlib so the persistence -implementation (SQLite/Postgres repositories) and any consumer -(controllers, services, engine) can import it without pulling in the -HTTP layer. """ +from typing import ClassVar + +from synthorg.core.domain_errors import DomainError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode -class PersistenceError(Exception): + +class PersistenceError(DomainError): """Base exception for all persistence operations.""" is_retryable: bool = False + default_message: ClassVar[str] = "Persistence operation failed" + error_category: ClassVar[ErrorCategory] = ErrorCategory.INTERNAL + error_code: ClassVar[ErrorCode] = ErrorCode.PERSISTENCE_ERROR + status_code: ClassVar[int] = 500 class PersistenceConnectionError(PersistenceError): @@ -47,26 +58,35 @@ class RecordNotFoundError(PersistenceError): """Raised when a requested record does not exist. Used by ``ArtifactStorageBackend.retrieve()`` when no content - exists for the given artifact ID. Repository ``get()`` methods + exists for the given artifact ID. Repository ``get()`` methods return ``None`` on miss instead of raising. """ is_retryable: bool = False + default_message: ClassVar[str] = "Resource not found" + error_category: ClassVar[ErrorCategory] = ErrorCategory.NOT_FOUND + error_code: ClassVar[ErrorCode] = ErrorCode.RECORD_NOT_FOUND + status_code: ClassVar[int] = 404 class DuplicateRecordError(PersistenceError): """Raised when inserting a record that already exists.""" is_retryable: bool = False + default_message: ClassVar[str] = "Duplicate record" + error_category: ClassVar[ErrorCategory] = ErrorCategory.CONFLICT + error_code: ClassVar[ErrorCode] = ErrorCode.DUPLICATE_RECORD + status_code: ClassVar[int] = 409 class QueryError(PersistenceError): """Raised when a query fails due to invalid parameters or backend issues. Transient by default: connection drops and deadlocks during a - query surface here and are safe to retry. Deterministic failures + query surface here and are safe to retry. Deterministic failures (bad SQL, invalid params) use :class:`ConstraintViolationError` - or :class:`VersionConflictError` which override to non-retryable. + or :class:`PersistenceVersionConflictError` which override to + non-retryable. """ is_retryable: bool = True @@ -77,7 +97,7 @@ class ConstraintViolationError(QueryError): Carries a ``constraint`` attribute that identifies the violated constraint by its DB-side name (for Postgres) or by a stable - token parsed from the error message (for SQLite). Callers can + token parsed from the error message (for SQLite). Callers can check this attribute to map the violation to a domain error without parsing error strings. @@ -85,7 +105,7 @@ class ConstraintViolationError(QueryError): given input and will not succeed on a bare retry. Blank ``constraint`` (empty / whitespace-only) is normalised to the - sentinel ``""`` rather than raising. Raising + sentinel ``""`` rather than raising. Raising :class:`ValueError` from ``__init__`` would bypass downstream ``except PersistenceError`` handlers; the sentinel keeps the construction inside the persistence-error family so callers that @@ -95,6 +115,17 @@ class ConstraintViolationError(QueryError): UNKNOWN_CONSTRAINT: str = "" is_retryable: bool = False + default_message: ClassVar[str] = "Database constraint violated" + error_category: ClassVar[ErrorCategory] = ErrorCategory.VALIDATION + error_code: ClassVar[ErrorCode] = ErrorCode.VALIDATION_ERROR + # 400 instead of 422: a DB-level constraint violation is a + # malformed-request condition surfaced after the request reached the + # data layer (e.g. unique-key collision under concurrent insert), not + # a Pydantic-style schema-validation miss caught at the API boundary. + # The dedicated ``handle_persistence_integrity_error`` handler also + # hardcodes 400 for the same reason; this ClassVar matches that + # mapping so the two paths stay in lockstep. + status_code: ClassVar[int] = 400 def __init__(self, message: str, *, constraint: str) -> None: super().__init__(message) @@ -102,15 +133,24 @@ def __init__(self, message: str, *, constraint: str) -> None: self.constraint: str = stripped or self.UNKNOWN_CONSTRAINT -class VersionConflictError(QueryError): +class PersistenceVersionConflictError(QueryError): """Raised when an optimistic concurrency version check fails. Non-retryable at this layer: the caller must re-read, re-apply - its intended change, and resubmit with the fresh version. A + its intended change, and resubmit with the fresh version. A blind retry would just lose the racing write. + + The API layer translates this to + :class:`synthorg.core.domain_errors.VersionConflictError` (the + HTTP-aware sibling) so that controllers raise / catch consistently + with other 409 paths. """ is_retryable: bool = False + default_message: ClassVar[str] = "Optimistic concurrency conflict" + error_category: ClassVar[ErrorCategory] = ErrorCategory.CONFLICT + error_code: ClassVar[ErrorCode] = ErrorCode.VERSION_CONFLICT + status_code: ClassVar[int] = 409 class MalformedRowError(QueryError): @@ -118,7 +158,7 @@ class MalformedRowError(QueryError): JSON decode failures, validation errors, and missing-key errors on rows already committed to the database are deterministic - data-integrity problems, not transient query failures. Retrying + data-integrity problems, not transient query failures. Retrying the same read returns the same corrupt row -- it just burns the budget and obscures the underlying integrity issue. @@ -132,9 +172,17 @@ class ArtifactTooLargeError(PersistenceError): """Raised when a single artifact exceeds the maximum allowed size.""" is_retryable: bool = False + default_message: ClassVar[str] = "Artifact exceeds maximum size" + error_category: ClassVar[ErrorCategory] = ErrorCategory.VALIDATION + error_code: ClassVar[ErrorCode] = ErrorCode.ARTIFACT_TOO_LARGE + status_code: ClassVar[int] = 413 class ArtifactStorageFullError(PersistenceError): """Raised when total artifact storage exceeds capacity.""" is_retryable: bool = False + default_message: ClassVar[str] = "Artifact storage at capacity" + error_category: ClassVar[ErrorCategory] = ErrorCategory.INTERNAL + error_code: ClassVar[ErrorCode] = ErrorCode.ARTIFACT_STORAGE_FULL + status_code: ClassVar[int] = 507 diff --git a/src/synthorg/core/registry/errors.py b/src/synthorg/core/registry/errors.py index cef3a4a45d..ea613b08ec 100644 --- a/src/synthorg/core/registry/errors.py +++ b/src/synthorg/core/registry/errors.py @@ -2,12 +2,20 @@ import copy from types import MappingProxyType -from typing import Any +from typing import Any, ClassVar +from synthorg.core.domain_errors import DomainError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode -class StrategyFactoryError(LookupError): + +class StrategyFactoryError(DomainError): """Base class for strategy registry lookup failures.""" + default_message: ClassVar[str] = "Strategy factory error" + error_category: ClassVar[ErrorCategory] = ErrorCategory.INTERNAL + error_code: ClassVar[ErrorCode] = ErrorCode.INTERNAL_ERROR + status_code: ClassVar[int] = 500 + def __init__( self, message: str, @@ -16,8 +24,6 @@ def __init__( ) -> None: """Store *message* and an immutable *context* mapping.""" self.message = message - # Deep-copy to insulate the stored context from later mutation of - # nested values supplied by the caller (e.g. a list of names). self.context: MappingProxyType[str, Any] = MappingProxyType( copy.deepcopy(context) if context else {}, ) @@ -33,3 +39,8 @@ def __str__(self) -> str: class StrategyFactoryNotFoundError(StrategyFactoryError): """No factory registered for the requested discriminator value.""" + + default_message: ClassVar[str] = "Strategy factory not registered" + error_category: ClassVar[ErrorCategory] = ErrorCategory.NOT_FOUND + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_NOT_FOUND + status_code: ClassVar[int] = 404 diff --git a/src/synthorg/engine/quality/decomposers/llm.py b/src/synthorg/engine/quality/decomposers/llm.py index 677a35da47..d80c6b2c55 100644 --- a/src/synthorg/engine/quality/decomposers/llm.py +++ b/src/synthorg/engine/quality/decomposers/llm.py @@ -14,7 +14,7 @@ class of parse errors, retries on malformed JSON, and prompt-injection import json from collections.abc import Mapping -from typing import Any, Final, NoReturn +from typing import Any, ClassVar, Final, NoReturn from synthorg.budget.call_category import LLMCallCategory @@ -22,6 +22,8 @@ class of parse errors, retries on malformed JSON, and prompt-injection # annotation, so it must resolve at runtime when downstream tooling # evaluates type hints (DI containers, doc generators). from synthorg.budget.tracker import CostTracker # noqa: TC001 +from synthorg.core.domain_errors import DomainError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode from synthorg.core.task import AcceptanceCriterion # noqa: TC001 from synthorg.core.types import NotBlankStr # noqa: TC001 from synthorg.engine.prompt_safety import ( @@ -89,9 +91,14 @@ class of parse errors, retries on malformed JSON, and prompt-injection _DECOMPOSER_TOOL_REQUIRED_KEYS: Final[frozenset[str]] = frozenset({"probes"}) -class LLMDecompositionError(RuntimeError): +class LLMDecompositionError(DomainError): """Raised when the LLM decomposer cannot obtain a valid probe list.""" + default_message: ClassVar[str] = "LLM decomposition failed" + error_category: ClassVar[ErrorCategory] = ErrorCategory.INTERNAL + error_code: ClassVar[ErrorCode] = ErrorCode.INTERNAL_ERROR + status_code: ClassVar[int] = 500 + def _decomposer_instructions(max_probes: int) -> str: """Build the instruction block passed in the user prompt.""" diff --git a/src/synthorg/engine/strategy/principles.py b/src/synthorg/engine/strategy/principles.py index ff690131b3..cb174af1d4 100644 --- a/src/synthorg/engine/strategy/principles.py +++ b/src/synthorg/engine/strategy/principles.py @@ -9,11 +9,13 @@ from importlib import resources from pathlib import Path from types import MappingProxyType -from typing import Any +from typing import Any, ClassVar import yaml -from pydantic import ValidationError +from pydantic import ValidationError as PydanticValidationError +from synthorg.core.domain_errors import NotFoundError, ValidationError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode from synthorg.engine.strategy.models import ( ConstitutionalPrinciple, ConstitutionalPrincipleConfig, @@ -42,13 +44,23 @@ ) -class StrategyPackNotFoundError(Exception): +class StrategyPackNotFoundError(NotFoundError): """Raised when a requested principle pack cannot be found.""" + default_message: ClassVar[str] = "Strategy pack not found" + error_category: ClassVar[ErrorCategory] = ErrorCategory.NOT_FOUND + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_NOT_FOUND + status_code: ClassVar[int] = 404 -class StrategyPackValidationError(Exception): + +class StrategyPackValidationError(ValidationError): """Raised when a principle pack fails schema validation.""" + default_message: ClassVar[str] = "Strategy pack validation failed" + error_category: ClassVar[ErrorCategory] = ErrorCategory.VALIDATION + error_code: ClassVar[ErrorCode] = ErrorCode.VALIDATION_ERROR + status_code: ClassVar[int] = 422 + def _validate_pack_name(name: str) -> str: """Normalize and validate a pack name. @@ -117,7 +129,7 @@ def _parse_pack_yaml( description=data.get("description", ""), principles=principles, ) - except (TypeError, ValueError, KeyError, ValidationError) as exc: + except (TypeError, ValueError, KeyError, PydanticValidationError) as exc: msg = f"Validation failed for pack from {source_name}: {exc}" logger.warning( STRATEGY_PACK_INVALID, @@ -281,8 +293,15 @@ def load_and_merge( for i, raw in enumerate(config.custom): try: principle = ConstitutionalPrinciple(**raw) - except (TypeError, ValidationError) as exc: + except (TypeError, PydanticValidationError) as exc: msg = f"Invalid custom principle at index {i}: {exc}" + logger.warning( + STRATEGY_PACK_INVALID, + source="custom", + index=i, + error_type=type(exc).__name__, + error=safe_error_description(exc), + ) raise StrategyPackValidationError(msg) from exc if principle.id not in existing_ids: principles.append(principle) diff --git a/src/synthorg/engine/workflow/blueprint_errors.py b/src/synthorg/engine/workflow/blueprint_errors.py index 7ef7334db6..b2b5e1b9b5 100644 --- a/src/synthorg/engine/workflow/blueprint_errors.py +++ b/src/synthorg/engine/workflow/blueprint_errors.py @@ -1,9 +1,24 @@ """Blueprint-specific error types.""" +from typing import ClassVar -class BlueprintNotFoundError(Exception): +from synthorg.core.domain_errors import NotFoundError, ValidationError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode + + +class BlueprintNotFoundError(NotFoundError): """Raised when a workflow blueprint cannot be found.""" + default_message: ClassVar[str] = "Workflow blueprint not found" + error_category: ClassVar[ErrorCategory] = ErrorCategory.NOT_FOUND + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_NOT_FOUND + status_code: ClassVar[int] = 404 -class BlueprintValidationError(Exception): + +class BlueprintValidationError(ValidationError): """Raised when a blueprint YAML fails schema validation.""" + + default_message: ClassVar[str] = "Workflow blueprint validation failed" + error_category: ClassVar[ErrorCategory] = ErrorCategory.VALIDATION + error_code: ClassVar[ErrorCode] = ErrorCode.VALIDATION_ERROR + status_code: ClassVar[int] = 422 diff --git a/src/synthorg/engine/workflow/execution_lifecycle.py b/src/synthorg/engine/workflow/execution_lifecycle.py index 5d9f05c3aa..3d5b5919a9 100644 --- a/src/synthorg/engine/workflow/execution_lifecycle.py +++ b/src/synthorg/engine/workflow/execution_lifecycle.py @@ -13,7 +13,10 @@ WorkflowNodeExecutionStatus, WorkflowNodeType, ) -from synthorg.core.persistence_errors import VersionConflictError +from synthorg.core.persistence_errors import ( + PersistenceVersionConflictError, + RecordNotFoundError, +) from synthorg.engine.errors import ( WorkflowExecutionError, WorkflowExecutionNotFoundError, @@ -282,6 +285,89 @@ async def fail_execution( # -- Task-event handling --------------------------------------------------- +def _retry_event_for(event: TaskStateChanged) -> str: + """Return the WORKFLOW_EXEC_NODE_TASK_* event for a retry log line.""" + if event.new_status in {TaskStatus.FAILED, TaskStatus.CANCELLED}: + return WORKFLOW_EXEC_NODE_TASK_FAILED + return WORKFLOW_EXEC_NODE_TASK_COMPLETED + + +async def _dispatch_task_handler( + repo: WorkflowExecutionRepository, + execution: WorkflowExecution, + event: TaskStateChanged, +) -> None: + """Route a terminal task event to the matching lifecycle handler.""" + if event.new_status in {TaskStatus.FAILED, TaskStatus.CANCELLED}: + await _handle_task_failed(repo, execution, event) + else: + await _handle_task_completed(repo, execution, event) + + +def _expected_terminal_node_status( + event: TaskStateChanged, +) -> WorkflowNodeExecutionStatus: + """Return the node status the lifecycle handler would set for ``event``.""" + if event.new_status in {TaskStatus.FAILED, TaskStatus.CANCELLED}: + return WorkflowNodeExecutionStatus.TASK_FAILED + return WorkflowNodeExecutionStatus.TASK_COMPLETED + + +async def _retry_after_version_conflict( + repo: WorkflowExecutionRepository, + execution: WorkflowExecution, + event: TaskStateChanged, +) -> None: + """Refresh the execution after a conflict and re-dispatch idempotently.""" + retry_event = _retry_event_for(event) + logger.warning( + retry_event, + execution_id=execution.id, + task_id=event.task_id, + error="Concurrent modification; re-fetching execution", + ) + refreshed = await repo.get(execution.id) + if refreshed is None: + logger.warning( + retry_event, + execution_id=execution.id, + task_id=event.task_id, + error="Execution not found after version conflict", + ) + return + if refreshed.status in { + WorkflowExecutionStatus.COMPLETED, + WorkflowExecutionStatus.FAILED, + WorkflowExecutionStatus.CANCELLED, + }: + return + # Idempotency: if the winning writer already applied the same + # node transition this event would set, re-dispatching would + # bump version + emit a bogus same-status hop. + if _node_status_for_task(refreshed, event.task_id) is ( + _expected_terminal_node_status(event) + ): + return + try: + await _dispatch_task_handler(repo, refreshed, event) + except MemoryError, RecursionError: + raise + except RecordNotFoundError: + logger.warning( + retry_event, + execution_id=execution.id, + task_id=event.task_id, + error="Execution deleted during retry", + ) + except PersistenceVersionConflictError: + logger.warning( + retry_event, + execution_id=execution.id, + task_id=event.task_id, + error="Concurrent modification during retry", + ) + + async def handle_task_state_changed( repo: WorkflowExecutionRepository, event: TaskStateChanged, @@ -302,52 +388,18 @@ async def handle_task_state_changed( return try: - if event.new_status in {TaskStatus.FAILED, TaskStatus.CANCELLED}: - await _handle_task_failed(repo, execution, event) - else: - await _handle_task_completed(repo, execution, event) - except VersionConflictError: - retry_event = ( - WORKFLOW_EXEC_NODE_TASK_FAILED - if event.new_status in {TaskStatus.FAILED, TaskStatus.CANCELLED} - else WORKFLOW_EXEC_NODE_TASK_COMPLETED - ) + await _dispatch_task_handler(repo, execution, event) + except RecordNotFoundError: + # Execution row was deleted between read and update; retry + # would only fail the same way, so log and drop. logger.warning( - retry_event, + _retry_event_for(event), execution_id=execution.id, task_id=event.task_id, - error="Concurrent modification; re-fetching execution", - ) - refreshed = await repo.get(execution.id) - if refreshed is None: - logger.warning( - retry_event, - execution_id=execution.id, - task_id=event.task_id, - error="Execution not found after version conflict", - ) - return - if refreshed.status in { - WorkflowExecutionStatus.COMPLETED, - WorkflowExecutionStatus.FAILED, - WorkflowExecutionStatus.CANCELLED, - }: - return - try: - if event.new_status in { - TaskStatus.FAILED, - TaskStatus.CANCELLED, - }: - await _handle_task_failed(repo, refreshed, event) - else: - await _handle_task_completed(repo, refreshed, event) - except Exception: - logger.exception( - retry_event, - execution_id=execution.id, - task_id=event.task_id, - error="Retry after version conflict also failed", - ) + error="Execution deleted before lifecycle update could persist", + ) + except PersistenceVersionConflictError: + await _retry_after_version_conflict(repo, execution, event) # -- Private helpers ------------------------------------------------------- diff --git a/src/synthorg/engine/workflow/service.py b/src/synthorg/engine/workflow/service.py index 281f2c938b..84b24a538f 100644 --- a/src/synthorg/engine/workflow/service.py +++ b/src/synthorg/engine/workflow/service.py @@ -7,10 +7,16 @@ place so the audit trail stays consistent. """ -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, ClassVar +from synthorg.core.domain_errors import ( + ConflictError, + NotFoundError, + VersionConflictError, +) from synthorg.core.enums import WorkflowType # noqa: TC001 -from synthorg.core.persistence_errors import VersionConflictError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode +from synthorg.core.persistence_errors import PersistenceVersionConflictError from synthorg.core.types import NotBlankStr # noqa: TC001 from synthorg.engine.workflow.definition import WorkflowDefinition # noqa: TC001 from synthorg.observability import get_logger, safe_error_description @@ -39,17 +45,32 @@ logger = get_logger(__name__) -class WorkflowDefinitionExistsError(Exception): +class WorkflowDefinitionExistsError(ConflictError): """Raised when ``create_definition`` targets an id that already exists.""" + default_message: ClassVar[str] = "Workflow definition already exists" + error_category: ClassVar[ErrorCategory] = ErrorCategory.CONFLICT + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_CONFLICT + status_code: ClassVar[int] = 409 + -class WorkflowDefinitionNotFoundError(Exception): +class WorkflowDefinitionNotFoundError(NotFoundError): """Raised when ``fetch_for_update`` / update targets a missing id.""" + default_message: ClassVar[str] = "Workflow definition not found" + error_category: ClassVar[ErrorCategory] = ErrorCategory.NOT_FOUND + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_NOT_FOUND + status_code: ClassVar[int] = 404 -class WorkflowDefinitionRevisionMismatchError(Exception): + +class WorkflowDefinitionRevisionMismatchError(VersionConflictError): """Raised when an optimistic-concurrency revision check fails. + Inherits the HTTP 409 / ``VERSION_CONFLICT`` ClassVar mapping from + :class:`VersionConflictError`; the centralised RFC 9457 dispatch + therefore produces a 409 response without any controller-side + translation. + ``actual`` is ``None`` when the persistence layer surfaces a conflict without a usable stored-revision read (e.g. the follow-up lookup raced with a delete). Callers should treat ``None`` as @@ -68,7 +89,7 @@ def __init__( Args: message: Human-readable description (passed to - ``Exception.__init__``). + ``DomainError.__init__``). definition_id: Workflow definition identifier that hit the conflict. expected: Stored revision the caller was asserting against @@ -273,8 +294,8 @@ async def update_definition( its stored ``revision`` does not match ``definition.revision - 1`` (optimistic-concurrency failure). Translated from the persistence layer's - ``VersionConflictError`` so callers of this service - never depend on a persistence-level exception type. + ``PersistenceVersionConflictError`` so callers of this + service never depend on a persistence-level exception type. MemoryError: Propagated from either the stored-revision lookup probe or the best-effort snapshot; never swallowed. @@ -284,7 +305,7 @@ async def update_definition( """ try: updated = await self._definitions.update_if_exists(definition) - except VersionConflictError as exc: + except PersistenceVersionConflictError as exc: # ``update_if_exists`` applies the UPDATE only when the # stored row's ``revision`` equals ``definition.revision - 1``, # so the "expected stored revision" the caller is asserting diff --git a/src/synthorg/hr/errors.py b/src/synthorg/hr/errors.py index 400dbfffcb..05a1d9e7ff 100644 --- a/src/synthorg/hr/errors.py +++ b/src/synthorg/hr/errors.py @@ -10,10 +10,11 @@ from typing import ClassVar -from synthorg.core.domain_errors import ConflictError +from synthorg.core.domain_errors import DomainError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode -class HRError(Exception): +class HRError(DomainError): """Base error for all HR operations. ``is_retryable`` defaults to ``False`` so the provider-retry layer @@ -22,6 +23,10 @@ class HRError(Exception): """ is_retryable: bool = False + default_message: ClassVar[str] = "HR operation failed" + error_category: ClassVar[ErrorCategory] = ErrorCategory.INTERNAL + error_code: ClassVar[ErrorCode] = ErrorCode.INTERNAL_ERROR + status_code: ClassVar[int] = 500 # ── Hiring ──────────────────────────────────────────────────────── @@ -34,14 +39,29 @@ class HiringError(HRError): class HiringApprovalRequiredError(HiringError): """Hiring request requires approval before instantiation.""" + default_message: ClassVar[str] = "Hiring request requires approval" + error_category: ClassVar[ErrorCategory] = ErrorCategory.CONFLICT + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_CONFLICT + status_code: ClassVar[int] = 409 + class HiringRejectedError(HiringError): """Hiring request was rejected.""" + default_message: ClassVar[str] = "Hiring request was rejected" + error_category: ClassVar[ErrorCategory] = ErrorCategory.CONFLICT + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_CONFLICT + status_code: ClassVar[int] = 409 + class InvalidCandidateError(HiringError): """Candidate card is invalid or does not exist on the request.""" + default_message: ClassVar[str] = "Invalid candidate card" + error_category: ClassVar[ErrorCategory] = ErrorCategory.VALIDATION + error_code: ClassVar[ErrorCode] = ErrorCode.VALIDATION_ERROR + status_code: ClassVar[int] = 422 + # ── Firing / Offboarding ───────────────────────────────────────── @@ -79,10 +99,20 @@ class AgentRegistryError(HRError): class AgentNotFoundError(AgentRegistryError): """Agent not found in the registry.""" + default_message: ClassVar[str] = "Agent not found" + error_category: ClassVar[ErrorCategory] = ErrorCategory.NOT_FOUND + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_NOT_FOUND + status_code: ClassVar[int] = 404 + class AgentAlreadyRegisteredError(AgentRegistryError): """Agent is already registered.""" + default_message: ClassVar[str] = "Agent already registered" + error_category: ClassVar[ErrorCategory] = ErrorCategory.CONFLICT + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_CONFLICT + status_code: ClassVar[int] = 409 + # ── Performance ────────────────────────────────────────────────── @@ -105,10 +135,20 @@ class PromotionError(HRError): class PromotionCooldownError(PromotionError): """Promotion is blocked by the cooldown period.""" + default_message: ClassVar[str] = "Promotion blocked by cooldown" + error_category: ClassVar[ErrorCategory] = ErrorCategory.CONFLICT + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_CONFLICT + status_code: ClassVar[int] = 409 + class PromotionApprovalRequiredError(PromotionError): """Promotion requires human approval before proceeding.""" + default_message: ClassVar[str] = "Promotion requires approval" + error_category: ClassVar[ErrorCategory] = ErrorCategory.CONFLICT + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_CONFLICT + status_code: ClassVar[int] = 409 + # ── Pruning ──────────────────────────────────────────────────── @@ -117,7 +157,7 @@ class PruningError(HRError): """Error during the pruning process.""" -class PruningUnrestartableError(ConflictError): +class PruningUnrestartableError(PruningError): """Raised when ``PruningService.start()`` is called after a timed-out stop. Mirrors :class:`BackupUnrestartableError`: a stuck drain leaves an @@ -129,6 +169,9 @@ class PruningUnrestartableError(ConflictError): default_message: ClassVar[str] = ( "Pruning service is unrestartable after a timed-out stop" ) + error_category: ClassVar[ErrorCategory] = ErrorCategory.CONFLICT + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_CONFLICT + status_code: ClassVar[int] = 409 # ── Personalities ─────────────────────────────────────────────── @@ -141,6 +184,11 @@ class PersonalityError(HRError): class PersonalityNotFoundError(PersonalityError): """Personality preset not found in the catalogue.""" + default_message: ClassVar[str] = "Personality preset not found" + error_category: ClassVar[ErrorCategory] = ErrorCategory.NOT_FOUND + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_NOT_FOUND + status_code: ClassVar[int] = 404 + # ── Training ──────────────────────────────────────────────────── @@ -151,3 +199,8 @@ class TrainingError(HRError): class TrainingSessionNotFoundError(TrainingError): """Training session not found in the session store.""" + + default_message: ClassVar[str] = "Training session not found" + error_category: ClassVar[ErrorCategory] = ErrorCategory.NOT_FOUND + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_NOT_FOUND + status_code: ClassVar[int] = 404 diff --git a/src/synthorg/hr/scaling/errors.py b/src/synthorg/hr/scaling/errors.py index 55a1a0354d..e984b22b60 100644 --- a/src/synthorg/hr/scaling/errors.py +++ b/src/synthorg/hr/scaling/errors.py @@ -1,9 +1,19 @@ """Scaling domain exceptions.""" +from typing import ClassVar -class ScalingError(Exception): +from synthorg.core.domain_errors import DomainError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode + + +class ScalingError(DomainError): """Base exception for scaling operations.""" + default_message: ClassVar[str] = "Scaling operation failed" + error_category: ClassVar[ErrorCategory] = ErrorCategory.INTERNAL + error_code: ClassVar[ErrorCode] = ErrorCode.INTERNAL_ERROR + status_code: ClassVar[int] = 500 + class ScalingStrategyError(ScalingError): """A strategy evaluation failed.""" @@ -19,3 +29,8 @@ class ScalingExecutionError(ScalingError): class ScalingCooldownActiveError(ScalingError): """Action blocked by an active cooldown window.""" + + default_message: ClassVar[str] = "Scaling blocked by active cooldown" + error_category: ClassVar[ErrorCategory] = ErrorCategory.CONFLICT + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_CONFLICT + status_code: ClassVar[int] = 409 diff --git a/src/synthorg/meta/appliers/_validation.py b/src/synthorg/meta/appliers/_validation.py index 94ebdfe9b4..db20989625 100644 --- a/src/synthorg/meta/appliers/_validation.py +++ b/src/synthorg/meta/appliers/_validation.py @@ -22,7 +22,9 @@ ] -class DottedPathError(ValueError): +class DottedPathError( + ValueError, +): # lint-allow: domain-error-hierarchy -- internal path-parse precondition """Raised when a dotted config path cannot be parsed or resolved.""" diff --git a/src/synthorg/meta/appliers/code_applier.py b/src/synthorg/meta/appliers/code_applier.py index 0a15318caf..2747760b3b 100644 --- a/src/synthorg/meta/appliers/code_applier.py +++ b/src/synthorg/meta/appliers/code_applier.py @@ -10,8 +10,10 @@ import asyncio from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, ClassVar +from synthorg.core.domain_errors import DomainError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode from synthorg.meta.models import ( ApplyResult, CIValidationResult, @@ -41,7 +43,7 @@ logger = get_logger(__name__) -class PartialWriteError(RuntimeError): +class PartialWriteError(DomainError): """Raised by ``_write_changes`` on partial application failure. Carries the ordered subset of ``CodeChange`` instances that were @@ -51,6 +53,11 @@ class PartialWriteError(RuntimeError): were never made and could clobber files the proposal never touched. """ + default_message: ClassVar[str] = "Partial code write failed" + error_category: ClassVar[ErrorCategory] = ErrorCategory.INTERNAL + error_code: ClassVar[ErrorCode] = ErrorCode.INTERNAL_ERROR + status_code: ClassVar[int] = 500 + def __init__(self, message: str, *, applied: tuple[CodeChange, ...]) -> None: super().__init__(message) self.applied = applied @@ -423,7 +430,9 @@ def _write_changes( changes that were successfully written before the error. Raises: - RuntimeError: If a file write or delete fails. + PartialWriteError: If applying a change fails after one or + more changes were written; carries the applied subset + so the outer ``apply()`` can revert what already landed. """ changed: list[str] = [] applied: list[CodeChange] = [] diff --git a/src/synthorg/meta/appliers/config_applier.py b/src/synthorg/meta/appliers/config_applier.py index 47c43bcb13..09af36b5fc 100644 --- a/src/synthorg/meta/appliers/config_applier.py +++ b/src/synthorg/meta/appliers/config_applier.py @@ -46,7 +46,9 @@ """Zero-arg callable returning the current ``RootConfig`` snapshot.""" -class _PathResolutionError(ValueError): +class _PathResolutionError( + ValueError, +): # lint-allow: domain-error-hierarchy -- internal config-path precondition """Raised when a dotted path does not resolve on the given model.""" diff --git a/src/synthorg/meta/appliers/github_client.py b/src/synthorg/meta/appliers/github_client.py index 0c02ad048b..0d3973b5aa 100644 --- a/src/synthorg/meta/appliers/github_client.py +++ b/src/synthorg/meta/appliers/github_client.py @@ -7,10 +7,12 @@ import base64 import re -from typing import Any, Self +from typing import Any, ClassVar, Self import httpx +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode +from synthorg.integrations.errors import IntegrationError from synthorg.meta.models import CodeChange, CodeOperation from synthorg.observability import get_logger from synthorg.observability.events.meta import ( @@ -23,17 +25,19 @@ # ── Custom exception types ─────────────────────────────────────── -class GitHubAPIError(Exception): +class GitHubAPIError(IntegrationError): """Raised on non-auth GitHub API failures. Attributes: - status_code: HTTP status code from the response. + github_status_code: HTTP status code from the upstream response. action: Human-readable description of the attempted action. body: Sanitized response body snippet. """ + default_message: ClassVar[str] = "GitHub API request failed" + def __init__(self, *, status_code: int, action: str, body: str) -> None: - self.status_code = status_code + self.github_status_code = status_code self.action = action self.body = body super().__init__( @@ -47,6 +51,11 @@ class GitHubAuthError(GitHubAPIError): Indicates invalid, expired, or insufficiently scoped credentials. """ + default_message: ClassVar[str] = "GitHub API authentication failed" + error_category: ClassVar[ErrorCategory] = ErrorCategory.AUTH + error_code: ClassVar[ErrorCode] = ErrorCode.UNAUTHORIZED + status_code: ClassVar[int] = 401 + logger = get_logger(__name__) diff --git a/src/synthorg/meta/errors.py b/src/synthorg/meta/errors.py index a9c5db1fc7..e9a512e60b 100644 --- a/src/synthorg/meta/errors.py +++ b/src/synthorg/meta/errors.py @@ -6,10 +6,20 @@ internal config state. """ +from typing import ClassVar -class SelfImprovementError(Exception): +from synthorg.core.domain_errors import DomainError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode + + +class SelfImprovementError(DomainError): """Base class for self-improvement service domain errors.""" + default_message: ClassVar[str] = "Self-improvement operation failed" + error_category: ClassVar[ErrorCategory] = ErrorCategory.INTERNAL + error_code: ClassVar[ErrorCode] = ErrorCode.INTERNAL_ERROR + status_code: ClassVar[int] = 500 + class SelfImprovementTriggerError(SelfImprovementError): """Raised when ``SelfImprovementService.trigger_cycle`` cannot run. diff --git a/src/synthorg/meta/mcp/errors.py b/src/synthorg/meta/mcp/errors.py index 1e1eacfa5b..2dfebc73b6 100644 --- a/src/synthorg/meta/mcp/errors.py +++ b/src/synthorg/meta/mcp/errors.py @@ -4,19 +4,24 @@ when caller input is malformed or a guardrail has not been satisfied. The handler is expected to catch these and return an ``err(...)`` envelope to the invoker; they are intentionally *not* system errors. + +The ``domain_code`` class attribute carries the stable wire identifier used +in MCP error envelopes (``"invalid_argument"`` / ``"guardrail_violated"``). +This is the MCP-layer dispatch token consumed by callers; it sits alongside +the RFC 9457 ``error_code`` ClassVar (used by the HTTP layer) without +overlap. """ -from typing import Literal +from typing import ClassVar, Literal + +from synthorg.core.domain_errors import ForbiddenError, ValidationError GuardrailCode = Literal["missing_confirm", "missing_reason", "missing_actor"] -class ArgumentValidationError(ValueError): +class ArgumentValidationError(ValidationError): """Raised when a required handler argument is missing or wrongly typed. - The ``domain_code`` attribute carries the stable wire identifier used - in error envelopes so callers can dispatch programmatically. - Call sites use the module-level factory ``invalid_argument(name, expected)`` rather than instantiating this class directly; the factory keeps the raise statement free of string literals so ruff's ``EM101`` rule passes. @@ -27,7 +32,7 @@ class ArgumentValidationError(ValueError): domain_code: Stable wire identifier (``"invalid_argument"``). """ - domain_code = "invalid_argument" + domain_code: ClassVar[str] = "invalid_argument" def __init__(self, argument: str, expected: str) -> None: """Initialise with argument name and expected-type description. @@ -42,7 +47,7 @@ def __init__(self, argument: str, expected: str) -> None: self.expected = expected -class GuardrailViolationError(PermissionError): +class GuardrailViolationError(ForbiddenError): """Raised when a destructive-op call fails its guardrails. Guardrails are: ``confirm=True`` set, non-blank ``reason``, and a @@ -56,7 +61,7 @@ class GuardrailViolationError(PermissionError): domain_code: Stable wire identifier (``"guardrail_violated"``). """ - domain_code = "guardrail_violated" + domain_code: ClassVar[str] = "guardrail_violated" def __init__(self, violation: GuardrailCode, message: str) -> None: """Initialise with a violation code and human-readable message. diff --git a/src/synthorg/meta/mcp/handlers/approvals.py b/src/synthorg/meta/mcp/handlers/approvals.py index 63da7d1219..c482228452 100644 --- a/src/synthorg/meta/mcp/handlers/approvals.py +++ b/src/synthorg/meta/mcp/handlers/approvals.py @@ -63,7 +63,9 @@ logger = get_logger(__name__) -class _NotFoundError(LookupError): +class _NotFoundError( + LookupError, +): # lint-allow: domain-error-hierarchy -- MCP handler-local; no HTTP layer """Handler-local not-found signal. Raised inside the try block so the ``err()`` envelope picks up @@ -75,7 +77,9 @@ class _NotFoundError(LookupError): domain_code = "not_found" -class _ConflictError(RuntimeError): +class _ConflictError( + RuntimeError, +): # lint-allow: domain-error-hierarchy -- MCP handler-local; no HTTP layer """Handler-local conflict signal (approve/reject race).""" domain_code = "conflict" diff --git a/src/synthorg/meta/mcp/handlers/budget.py b/src/synthorg/meta/mcp/handlers/budget.py index b0c0a0c182..9169a7a24a 100644 --- a/src/synthorg/meta/mcp/handlers/budget.py +++ b/src/synthorg/meta/mcp/handlers/budget.py @@ -72,7 +72,9 @@ def _versions_service(app_state: Any) -> BudgetConfigVersionsService: ) -class _NotFoundError(LookupError): +class _NotFoundError( + LookupError +): # lint-allow: domain-error-hierarchy -- MCP handler-local; no HTTP layer """Handler-local not-found signal (budget config version missing).""" domain_code = "not_found" diff --git a/src/synthorg/meta/rollout/inverse_dispatch.py b/src/synthorg/meta/rollout/inverse_dispatch.py index 213aa2a1d5..8ccaa2853f 100644 --- a/src/synthorg/meta/rollout/inverse_dispatch.py +++ b/src/synthorg/meta/rollout/inverse_dispatch.py @@ -28,7 +28,9 @@ logger = get_logger(__name__) -class UnknownRollbackOperationError(ValueError): +class UnknownRollbackOperationError( + ValueError, +): # lint-allow: domain-error-hierarchy -- internal dispatch precondition """Raised when a ``RollbackOperation`` has no registered handler.""" diff --git a/src/synthorg/meta/rollout/regression/welch.py b/src/synthorg/meta/rollout/regression/welch.py index c35fe8c6ed..01af01aead 100644 --- a/src/synthorg/meta/rollout/regression/welch.py +++ b/src/synthorg/meta/rollout/regression/welch.py @@ -27,11 +27,15 @@ _MIN_SAMPLES_PER_ARM = 2 -class InsufficientDataError(ValueError): +class InsufficientDataError( + ValueError, +): # lint-allow: domain-error-hierarchy -- statistical precondition """Raised when either arm has fewer than two observations.""" -class ZeroVarianceError(ValueError): +class ZeroVarianceError( + ValueError, +): # lint-allow: domain-error-hierarchy -- statistical precondition """Raised when the combined Welch statistics are undefined. The guard fires when ``se_sq <= 0`` (both arms flat, so the diff --git a/src/synthorg/meta/telemetry/emitter.py b/src/synthorg/meta/telemetry/emitter.py index a467cc6b46..cf965149ee 100644 --- a/src/synthorg/meta/telemetry/emitter.py +++ b/src/synthorg/meta/telemetry/emitter.py @@ -51,7 +51,9 @@ _LOG_BODY_MAX_LEN = 500 -class _TransientPostError(Exception): +class _TransientPostError( + Exception, +): # lint-allow: domain-error-hierarchy -- internal retry sentinel """Internal sentinel: retryable HTTP failure (network exception, 3xx, 5xx). Carries either an HTTP status (3xx / 5xx response) or ``None`` diff --git a/src/synthorg/observability/audit_chain/tsa_client.py b/src/synthorg/observability/audit_chain/tsa_client.py index 13dbdf3c2c..17d445ac8f 100644 --- a/src/synthorg/observability/audit_chain/tsa_client.py +++ b/src/synthorg/observability/audit_chain/tsa_client.py @@ -68,7 +68,9 @@ _RESP_CONTENT_TYPE = "application/timestamp-reply" -class TsaError(Exception): +class TsaError( + Exception, +): # lint-allow: domain-error-hierarchy -- RFC 3161 client; obs internals stdlib-rooted """Base class for TSA client failures. Every subclass signals a specific failure mode so the audit diff --git a/src/synthorg/persistence/postgres/refresh_repo.py b/src/synthorg/persistence/postgres/refresh_repo.py index 77ea7838cf..a9bc05c84e 100644 --- a/src/synthorg/persistence/postgres/refresh_repo.py +++ b/src/synthorg/persistence/postgres/refresh_repo.py @@ -44,7 +44,9 @@ def _import_dict_row() -> Any: logger = get_logger(__name__) -class _SessionRevokedError(Exception): +class _SessionRevokedError( + Exception, +): # lint-allow: domain-error-hierarchy -- internal txn-rollback sentinel """Internal sentinel: rollback the consume() transaction. Raised inside the ``conn.transaction()`` context when the diff --git a/src/synthorg/persistence/postgres/settings_repo.py b/src/synthorg/persistence/postgres/settings_repo.py index 954755406f..96b3183a22 100644 --- a/src/synthorg/persistence/postgres/settings_repo.py +++ b/src/synthorg/persistence/postgres/settings_repo.py @@ -31,7 +31,9 @@ logger = get_logger(__name__) -class _CASConflict(Exception): # noqa: N818 +class _CASConflictError( + Exception, +): # lint-allow: domain-error-hierarchy -- internal CAS-miss sentinel """Internal sentinel -- raised inside transactions to signal CAS miss. Caught immediately by ``set_many`` to convert the exception into a @@ -337,7 +339,7 @@ async def set_many( (namespace, key, value, updated_at_dt), ) if cur.rowcount == 0: - raise _CASConflict # noqa: TRY301 + raise _CASConflictError # noqa: TRY301 continue expected_dt = self._safe_parse_iso( expected, @@ -358,8 +360,8 @@ async def set_many( ), ) if cur.rowcount == 0: - raise _CASConflict # noqa: TRY301 - except _CASConflict: + raise _CASConflictError # noqa: TRY301 + except _CASConflictError: return False except psycopg.Error as exc: msg = "Failed to set_many settings" diff --git a/src/synthorg/persistence/postgres/workflow_definition_repo.py b/src/synthorg/persistence/postgres/workflow_definition_repo.py index 8ce7caa62f..15fae1afd3 100644 --- a/src/synthorg/persistence/postgres/workflow_definition_repo.py +++ b/src/synthorg/persistence/postgres/workflow_definition_repo.py @@ -14,7 +14,10 @@ from pydantic import ValidationError from synthorg.core.enums import WorkflowType -from synthorg.core.persistence_errors import QueryError, VersionConflictError +from synthorg.core.persistence_errors import ( + PersistenceVersionConflictError, + QueryError, +) from synthorg.core.types import NotBlankStr # noqa: TC001 from synthorg.engine.workflow.definition import ( WorkflowDefinition, @@ -189,7 +192,7 @@ async def update_if_exists(self, definition: WorkflowDefinition) -> bool: definition_id=definition.id, error=msg, ) - raise VersionConflictError(msg) + raise PersistenceVersionConflictError(msg) await conn.commit() except psycopg.Error as exc: msg = f"Failed to update workflow definition {definition.id!r}" @@ -267,7 +270,7 @@ async def save(self, definition: WorkflowDefinition) -> None: Raises: QueryError: If the database operation fails. - VersionConflictError: If optimistic concurrency check fails. + PersistenceVersionConflictError: If optimistic concurrency check fails. """ self._require_valid_revision(definition) @@ -355,7 +358,7 @@ async def save(self, definition: WorkflowDefinition) -> None: definition_id=definition.id, error=msg, ) - raise VersionConflictError(msg) + raise PersistenceVersionConflictError(msg) await conn.commit() except psycopg.Error as exc: msg = f"Failed to save workflow definition {definition.id!r}" diff --git a/src/synthorg/persistence/postgres/workflow_execution_repo.py b/src/synthorg/persistence/postgres/workflow_execution_repo.py index fa2633455c..005042f1f2 100644 --- a/src/synthorg/persistence/postgres/workflow_execution_repo.py +++ b/src/synthorg/persistence/postgres/workflow_execution_repo.py @@ -20,8 +20,9 @@ ) from synthorg.core.persistence_errors import ( DuplicateRecordError, + PersistenceVersionConflictError, QueryError, - VersionConflictError, + RecordNotFoundError, ) from synthorg.core.types import NotBlankStr # noqa: TC001 from synthorg.engine.workflow.execution_models import ( @@ -138,7 +139,10 @@ async def save(self, execution: WorkflowExecution) -> None: Raises: DuplicateRecordError: If inserting a duplicate ID. - VersionConflictError: If optimistic concurrency check fails. + PersistenceVersionConflictError: If the row exists but its + stored version differs from ``execution.version - 1``. + RecordNotFoundError: If updating a row that no longer + exists (delete race between read and update). QueryError: If the database operation fails. """ if execution.version == 1: @@ -226,17 +230,33 @@ async def _update(self, execution: WorkflowExecution) -> None: ), ) if cur.rowcount == 0: + await cur.execute( + "SELECT version FROM workflow_executions WHERE id = %s", + (execution.id,), + ) + row = await cur.fetchone() + if row is None: + msg = ( + f"Workflow execution {execution.id!r} not found" + f" (deleted between read and update)" + ) + logger.warning( + PERSISTENCE_WORKFLOW_EXEC_SAVE_FAILED, + execution_id=execution.id, + error=msg, + ) + raise RecordNotFoundError(msg) msg = ( f"Version conflict saving workflow execution" f" {execution.id!r}: expected version" - f" {execution.version - 1}, not found" + f" {execution.version - 1}, current is {row[0]}" ) logger.warning( PERSISTENCE_WORKFLOW_EXEC_SAVE_FAILED, execution_id=execution.id, error=msg, ) - raise VersionConflictError(msg) + raise PersistenceVersionConflictError(msg) await conn.commit() except psycopg.Error as exc: msg = f"Failed to save workflow execution {execution.id!r}" diff --git a/src/synthorg/persistence/sqlite/backup_utils.py b/src/synthorg/persistence/sqlite/backup_utils.py index 18eb0c1a68..df68218c91 100644 --- a/src/synthorg/persistence/sqlite/backup_utils.py +++ b/src/synthorg/persistence/sqlite/backup_utils.py @@ -47,7 +47,9 @@ def vacuum_into(source_path: str, target_path: str) -> int: return Path(target_path).stat().st_size -class IntegrityCheckError(RuntimeError): +class IntegrityCheckError( + RuntimeError, +): # lint-allow: domain-error-hierarchy -- CLI/operator flow; not HTTP-exposed """Raised when ``PRAGMA integrity_check`` cannot run. Distinct from the check itself reporting corruption (which returns diff --git a/src/synthorg/persistence/sqlite/workflow_definition_repo.py b/src/synthorg/persistence/sqlite/workflow_definition_repo.py index ef352d3a28..546c586e8a 100644 --- a/src/synthorg/persistence/sqlite/workflow_definition_repo.py +++ b/src/synthorg/persistence/sqlite/workflow_definition_repo.py @@ -12,7 +12,10 @@ import aiosqlite from synthorg.core.enums import WorkflowType -from synthorg.core.persistence_errors import QueryError, VersionConflictError +from synthorg.core.persistence_errors import ( + PersistenceVersionConflictError, + QueryError, +) from synthorg.core.types import NotBlankStr # noqa: TC001 from synthorg.engine.workflow.definition import ( WorkflowDefinition, @@ -165,8 +168,9 @@ async def update_if_exists(self, definition: WorkflowDefinition) -> bool: Enforces the same optimistic-concurrency rule as :meth:`save`: the UPDATE only applies when the stored row's ``revision`` equals ``definition.revision - 1``; otherwise a - ``VersionConflictError`` is raised so callers distinguish - "row missing" (``False``) from "row changed concurrently". + ``PersistenceVersionConflictError`` is raised so callers + distinguish "row missing" (``False``) from "row changed + concurrently". """ self._require_valid_revision(definition) nodes_json = json.dumps( @@ -227,7 +231,7 @@ async def update_if_exists(self, definition: WorkflowDefinition) -> bool: definition_id=definition.id, error=msg, ) - raise VersionConflictError(msg) + raise PersistenceVersionConflictError(msg) await self._db.commit() except sqlite3.Error as exc: # Roll back the aiosqlite transaction so the shared @@ -390,7 +394,7 @@ async def save(self, definition: WorkflowDefinition) -> None: definition_id=definition.id, error=msg, ) - raise VersionConflictError(msg) + raise PersistenceVersionConflictError(msg) await self._db.commit() except sqlite3.Error as exc: await _rollback_quietly(self._db) diff --git a/src/synthorg/persistence/sqlite/workflow_execution_repo.py b/src/synthorg/persistence/sqlite/workflow_execution_repo.py index e5c5b5a122..0ae805201d 100644 --- a/src/synthorg/persistence/sqlite/workflow_execution_repo.py +++ b/src/synthorg/persistence/sqlite/workflow_execution_repo.py @@ -18,8 +18,9 @@ ) from synthorg.core.persistence_errors import ( DuplicateRecordError, + PersistenceVersionConflictError, QueryError, - VersionConflictError, + RecordNotFoundError, ) from synthorg.core.types import NotBlankStr # noqa: TC001 from synthorg.engine.workflow.execution_models import ( @@ -161,7 +162,10 @@ async def save(self, execution: WorkflowExecution) -> None: Raises: DuplicateRecordError: If inserting a duplicate ID. - VersionConflictError: If optimistic concurrency check fails. + PersistenceVersionConflictError: If the row exists but its + stored version differs from ``execution.version - 1``. + RecordNotFoundError: If updating a row that no longer + exists (delete race between read and update). QueryError: If the database operation fails. """ if execution.version == 1: @@ -273,18 +277,34 @@ async def _update(self, execution: WorkflowExecution) -> None: ), ) if cursor.rowcount == 0: + probe = await self._db.execute( + "SELECT version FROM workflow_executions WHERE id = ?", + (execution.id,), + ) + row = await probe.fetchone() await self._db.rollback() + if row is None: + msg = ( + f"Workflow execution {execution.id!r} not found" + f" (deleted between read and update)" + ) + logger.warning( + PERSISTENCE_WORKFLOW_EXEC_SAVE_FAILED, + execution_id=execution.id, + error=msg, + ) + raise RecordNotFoundError(msg) msg = ( f"Version conflict saving workflow execution" f" {execution.id!r}: expected version" - f" {execution.version - 1}, not found" + f" {execution.version - 1}, current is {row[0]}" ) logger.warning( PERSISTENCE_WORKFLOW_EXEC_SAVE_FAILED, execution_id=execution.id, error=msg, ) - raise VersionConflictError(msg) + raise PersistenceVersionConflictError(msg) await self._db.commit() except sqlite3.Error as exc: await self._db.rollback() diff --git a/src/synthorg/persistence/workflow_definition_repo.py b/src/synthorg/persistence/workflow_definition_repo.py index 4fcd6d7e2f..33d9436031 100644 --- a/src/synthorg/persistence/workflow_definition_repo.py +++ b/src/synthorg/persistence/workflow_definition_repo.py @@ -69,8 +69,8 @@ async def update_if_exists(self, definition: WorkflowDefinition) -> bool: Raises: PersistenceError: If the operation fails. - VersionConflictError: If optimistic-concurrency fields do - not match (backends that enforce ``revision``-based + PersistenceVersionConflictError: If optimistic-concurrency + fields do not match (backends that enforce ``revision``-based concurrency should raise). """ ... diff --git a/src/synthorg/persistence/workflow_execution_repo.py b/src/synthorg/persistence/workflow_execution_repo.py index 38c979b999..70ca2ff538 100644 --- a/src/synthorg/persistence/workflow_execution_repo.py +++ b/src/synthorg/persistence/workflow_execution_repo.py @@ -26,7 +26,10 @@ async def save(self, execution: WorkflowExecution) -> None: Raises: DuplicateRecordError: If inserting a duplicate ID. - VersionConflictError: If optimistic concurrency check fails. + PersistenceVersionConflictError: If the row exists but its + stored version differs from ``execution.version - 1``. + RecordNotFoundError: If updating a row that no longer + exists (delete race between read and update). PersistenceError: If the operation fails. """ ... diff --git a/src/synthorg/security/trust/errors.py b/src/synthorg/security/trust/errors.py index 380f352bc2..1b4c433350 100644 --- a/src/synthorg/security/trust/errors.py +++ b/src/synthorg/security/trust/errors.py @@ -1,9 +1,19 @@ """Trust domain error hierarchy.""" +from typing import ClassVar -class TrustError(Exception): +from synthorg.core.domain_errors import DomainError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode + + +class TrustError(DomainError): """Base error for all trust operations.""" + default_message: ClassVar[str] = "Trust operation failed" + error_category: ClassVar[ErrorCategory] = ErrorCategory.INTERNAL + error_code: ClassVar[ErrorCode] = ErrorCode.INTERNAL_ERROR + status_code: ClassVar[int] = 500 + class TrustEvaluationError(TrustError): """Error during trust evaluation.""" diff --git a/src/synthorg/settings/errors.py b/src/synthorg/settings/errors.py index 598f98e4c5..c397e47c95 100644 --- a/src/synthorg/settings/errors.py +++ b/src/synthorg/settings/errors.py @@ -1,17 +1,37 @@ """Error hierarchy for the settings persistence layer.""" +from typing import ClassVar -class SettingsError(Exception): +from synthorg.core.domain_errors import DomainError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode + + +class SettingsError(DomainError): """Base exception for all settings-related errors.""" + default_message: ClassVar[str] = "Settings operation failed" + error_category: ClassVar[ErrorCategory] = ErrorCategory.INTERNAL + error_code: ClassVar[ErrorCode] = ErrorCode.INTERNAL_ERROR + status_code: ClassVar[int] = 500 + class SettingNotFoundError(SettingsError): """Raised when a setting key is not found in the registry.""" + default_message: ClassVar[str] = "Setting not found" + error_category: ClassVar[ErrorCategory] = ErrorCategory.NOT_FOUND + error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_NOT_FOUND + status_code: ClassVar[int] = 404 + class SettingValidationError(SettingsError): """Raised when a setting value fails type, range, or pattern validation.""" + default_message: ClassVar[str] = "Setting validation failed" + error_category: ClassVar[ErrorCategory] = ErrorCategory.VALIDATION + error_code: ClassVar[ErrorCode] = ErrorCode.VALIDATION_ERROR + status_code: ClassVar[int] = 422 + class SettingReadOnlyError(SettingValidationError): """Raised when a write is attempted on a ``read_only_post_init`` setting. diff --git a/src/synthorg/telemetry/privacy.py b/src/synthorg/telemetry/privacy.py index b0419857fc..693993a490 100644 --- a/src/synthorg/telemetry/privacy.py +++ b/src/synthorg/telemetry/privacy.py @@ -121,7 +121,9 @@ ) -class PrivacyViolationError(Exception): +class PrivacyViolationError( + Exception, +): # lint-allow: domain-error-hierarchy -- internal scrubber sentinel """Raised when a telemetry event fails privacy validation.""" diff --git a/src/synthorg/telemetry/reporters/errors.py b/src/synthorg/telemetry/reporters/errors.py index 2dbab0ca91..18a16e04b7 100644 --- a/src/synthorg/telemetry/reporters/errors.py +++ b/src/synthorg/telemetry/reporters/errors.py @@ -8,10 +8,25 @@ silent fallback to ``NoopReporter`` never hides a programming bug. """ +from typing import ClassVar -class LogfireTokenMissingError(RuntimeError): +from synthorg.core.domain_errors import DomainError +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode + + +class LogfireTokenMissingError(DomainError): """Raised when the build artifact ships the sentinel token.""" + default_message: ClassVar[str] = "Logfire token missing" + error_category: ClassVar[ErrorCategory] = ErrorCategory.INTERNAL + error_code: ClassVar[ErrorCode] = ErrorCode.INTERNAL_ERROR + status_code: ClassVar[int] = 500 -class LogfireConfigureError(RuntimeError): + +class LogfireConfigureError(DomainError): """Raised when ``logfire.configure()`` fails at init time.""" + + default_message: ClassVar[str] = "Logfire configure failed" + error_category: ClassVar[ErrorCategory] = ErrorCategory.INTERNAL + error_code: ClassVar[ErrorCode] = ErrorCode.INTERNAL_ERROR + status_code: ClassVar[int] = 500 diff --git a/src/synthorg/tools/html_parse_guard.py b/src/synthorg/tools/html_parse_guard.py index 397deac174..1c26c24e31 100644 --- a/src/synthorg/tools/html_parse_guard.py +++ b/src/synthorg/tools/html_parse_guard.py @@ -70,12 +70,15 @@ ) -class XXEDetectedError(ValueError): +class XXEDetectedError( + ValueError, +): # lint-allow: domain-error-hierarchy -- caught by HTMLParseGuard.sanitize """Pre-parse detection of an XXE payload. - Subclass of ``ValueError`` so the ``except Exception`` branch in - :meth:`HTMLParseGuard.sanitize` catches it naturally and routes - to the safe-empty fallback. + Subclass of ``ValueError``. :meth:`HTMLParseGuard.sanitize` catches + this explicitly (ahead of its generic ``except Exception`` branch) + and returns a safe-empty :class:`HTMLSanitizeResult` so the XXE + rejection event is not double-emitted. ``is_retryable = False`` so the resilience layer's retry-classifier (see ``BaseCompletionProvider`` / ``api/exception_handlers.py``) diff --git a/tests/conformance/persistence/test_workflow_definition_repository.py b/tests/conformance/persistence/test_workflow_definition_repository.py index 63478e827b..bf27de1b05 100644 --- a/tests/conformance/persistence/test_workflow_definition_repository.py +++ b/tests/conformance/persistence/test_workflow_definition_repository.py @@ -9,7 +9,7 @@ import pytest from synthorg.core.enums import WorkflowEdgeType, WorkflowNodeType, WorkflowType -from synthorg.core.persistence_errors import VersionConflictError +from synthorg.core.persistence_errors import PersistenceVersionConflictError from synthorg.engine.workflow.definition import ( WorkflowDefinition, WorkflowEdge, @@ -201,7 +201,7 @@ async def test_version_conflict_on_stale_update( revision=5, ) - with pytest.raises(VersionConflictError): + with pytest.raises(PersistenceVersionConflictError): await repo.save(stale) async def test_delete_workflow_definition( diff --git a/tests/conformance/persistence/test_workflow_execution_repository.py b/tests/conformance/persistence/test_workflow_execution_repository.py index 2bd8d3b2eb..d8c16b2b84 100644 --- a/tests/conformance/persistence/test_workflow_execution_repository.py +++ b/tests/conformance/persistence/test_workflow_execution_repository.py @@ -15,7 +15,11 @@ WorkflowNodeType, WorkflowType, ) -from synthorg.core.persistence_errors import DuplicateRecordError, VersionConflictError +from synthorg.core.persistence_errors import ( + DuplicateRecordError, + PersistenceVersionConflictError, + RecordNotFoundError, +) from synthorg.engine.workflow.definition import ( WorkflowDefinition, WorkflowEdge, @@ -282,7 +286,7 @@ async def test_update_with_version_conflict( self, backend: PersistenceBackend, ) -> None: - """Update with wrong version raises VersionConflictError.""" + """Update with wrong version raises PersistenceVersionConflictError.""" defn_repo = backend.workflow_definitions exec_repo = backend.workflow_executions @@ -305,9 +309,41 @@ async def test_update_with_version_conflict( version=5, ) - with pytest.raises(VersionConflictError): + with pytest.raises(PersistenceVersionConflictError): await exec_repo.save(stale) + async def test_update_after_delete_raises_record_not_found( + self, + backend: PersistenceBackend, + ) -> None: + # Differentiates a delete race from a true version mismatch + # so the API surface keeps the 404 (not-found) path distinct + # from the 409 (conflict) path. + defn_repo = backend.workflow_definitions + exec_repo = backend.workflow_executions + + defn = _make_workflow_definition("wf-delete-race", "Test") + await defn_repo.save(defn) + + execution = _make_workflow_execution( + execution_id="exec-delete-race", + definition_id="wf-delete-race", + ) + await exec_repo.save(execution) + deleted = await exec_repo.delete("exec-delete-race") + assert deleted is True + + updated = _make_workflow_execution( + execution_id="exec-delete-race", + definition_id="wf-delete-race", + status=WorkflowExecutionStatus.COMPLETED, + completed_at=datetime.now(UTC), + version=2, + ) + + with pytest.raises(RecordNotFoundError): + await exec_repo.save(updated) + async def test_delete_execution( self, backend: PersistenceBackend, diff --git a/tests/unit/api/controllers/test_company.py b/tests/unit/api/controllers/test_company.py index edbe660df6..b44462dc68 100644 --- a/tests/unit/api/controllers/test_company.py +++ b/tests/unit/api/controllers/test_company.py @@ -105,11 +105,13 @@ async def test_taskgroup_error_returns_clean_error_response( with TestClient(app) as client: client.headers.update(make_auth_headers("ceo")) resolver = app.state.app_state.config_resolver + original_get_str = resolver.get_str resolver.get_str = AsyncMock( + spec=original_get_str, side_effect=SettingNotFoundError("company/company_name"), ) resp = client.get("/api/v1/company") - assert resp.status_code == 500 + assert resp.status_code == 404 body = resp.json() assert body["success"] is False assert body["error"] is not None diff --git a/tests/unit/api/controllers/test_setup.py b/tests/unit/api/controllers/test_setup.py index c8c1ad936f..a0961ff34b 100644 --- a/tests/unit/api/controllers/test_setup.py +++ b/tests/unit/api/controllers/test_setup.py @@ -1184,6 +1184,7 @@ async def test_returns_false_on_generic_exception( original = settings_svc.get_entry settings_svc.get_entry = AsyncMock( + spec=original, side_effect=RuntimeError("db connection lost"), ) try: @@ -1207,7 +1208,8 @@ async def test_returns_false_on_setting_not_found_error( original = settings_svc.get_entry settings_svc.get_entry = AsyncMock( - side_effect=SettingNotFoundError("company", "name_locales"), + spec=original, + side_effect=SettingNotFoundError("company/name_locales"), ) try: result = await _check_has_name_locales(settings_svc) @@ -1254,7 +1256,8 @@ async def test_returns_none_when_setting_not_found( original = settings_svc.get_entry settings_svc.get_entry = AsyncMock( - side_effect=SettingNotFoundError("company", "name_locales"), + spec=original, + side_effect=SettingNotFoundError("company/name_locales"), ) try: result = await _read_name_locales(settings_svc) diff --git a/tests/unit/api/controllers/test_setup_locales.py b/tests/unit/api/controllers/test_setup_locales.py index 6a685c9b36..6fb465d3bd 100644 --- a/tests/unit/api/controllers/test_setup_locales.py +++ b/tests/unit/api/controllers/test_setup_locales.py @@ -244,6 +244,7 @@ async def test_returns_false_on_generic_exception( original = settings_svc.get_entry settings_svc.get_entry = AsyncMock( + spec=original, side_effect=RuntimeError("db connection lost"), ) try: @@ -267,7 +268,8 @@ async def test_returns_false_on_setting_not_found_error( original = settings_svc.get_entry settings_svc.get_entry = AsyncMock( - side_effect=SettingNotFoundError("company", "name_locales"), + spec=original, + side_effect=SettingNotFoundError("company/name_locales"), ) try: result = await _check_has_name_locales(settings_svc) @@ -314,7 +316,8 @@ async def test_returns_none_when_setting_not_found( original = settings_svc.get_entry settings_svc.get_entry = AsyncMock( - side_effect=SettingNotFoundError("company", "name_locales"), + spec=original, + side_effect=SettingNotFoundError("company/name_locales"), ) try: result = await _read_name_locales(settings_svc) diff --git a/tests/unit/api/controllers/test_workflow_versions.py b/tests/unit/api/controllers/test_workflow_versions.py index 2e1413f38e..50a3d2eede 100644 --- a/tests/unit/api/controllers/test_workflow_versions.py +++ b/tests/unit/api/controllers/test_workflow_versions.py @@ -5,6 +5,9 @@ import pytest from litestar.testing import TestClient +from synthorg.api.services.workflow_rollback_service import WorkflowRollbackService +from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode +from synthorg.core.persistence_errors import PersistenceVersionConflictError from tests.unit.api.conftest import make_auth_headers # ── Helpers ────────────────────────────────────────────────────── @@ -241,14 +244,28 @@ def test_diff_between_versions(self, test_client: TestClient[Any]) -> None: assert "name" in meta_fields @pytest.mark.unit - def test_diff_same_version_400(self, test_client: TestClient[Any]) -> None: + def test_diff_same_version_returns_validation_error( + self, + test_client: TestClient[Any], + ) -> None: wf = _create_workflow(test_client) resp = test_client.get( f"/api/v1/workflows/{wf['id']}/diff", params={"from_version": 1, "to_version": 1}, headers=make_auth_headers("ceo"), ) - assert resp.status_code == 400 + assert resp.status_code == 422 + body = resp.json() + assert body["success"] is False + detail = body["error_detail"] + from synthorg.core.error_taxonomy import ( + ErrorCategory, + ErrorCode, + ) + + assert detail["error_code"] == ErrorCode.VALIDATION_ERROR + assert detail["error_category"] == ErrorCategory.VALIDATION + assert detail["retryable"] is False @pytest.mark.unit def test_diff_version_not_found(self, test_client: TestClient[Any]) -> None: @@ -315,3 +332,51 @@ def test_rollback_definition_not_found(self, test_client: TestClient[Any]) -> No headers=make_auth_headers("ceo"), ) assert resp.status_code == 404 + + @pytest.mark.unit + def test_rollback_persistence_version_conflict_translates_to_409( + self, + test_client: TestClient[Any], + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """A late persistence-layer concurrency miss surfaces as 409. + + The controller catches ``PersistenceVersionConflictError`` from + the rollback service and re-raises the HTTP-aware + ``VersionConflictError`` so the centralised RFC 9457 dispatch + produces a 409 response. Without that translation the persistence + error would escape uncaught and become a generic 500. + """ + wf = _create_workflow(test_client, name="Original") + wf_id = wf["id"] + _update_workflow(test_client, wf_id, 1, name="Updated") + + # Patch the ``rollback`` method at the class level so the + # AppState-wired instance picks up the stub via normal attribute + # lookup. ``WorkflowRollbackService`` uses ``__slots__``, which + # makes per-instance method substitution a no-go; + # ``workflow_rollback_service`` is also a read-only property on + # ``AppState``. Class-level patching is the supported path. + async def _raise_conflict( + self: WorkflowRollbackService, + rolled_back: object, + *, + target_version: int, + saved_by: object, + ) -> None: + msg = "racing write" + raise PersistenceVersionConflictError(msg) + + monkeypatch.setattr(WorkflowRollbackService, "rollback", _raise_conflict) + + resp = test_client.post( + f"/api/v1/workflows/{wf_id}/rollback", + json={"target_version": 1, "expected_revision": 2}, + headers=make_auth_headers("ceo"), + ) + assert resp.status_code == 409 + body = resp.json() + assert body["success"] is False + detail = body["error_detail"] + assert detail["error_code"] == ErrorCode.VERSION_CONFLICT + assert detail["error_category"] == ErrorCategory.CONFLICT diff --git a/tests/unit/api/fakes_workflow.py b/tests/unit/api/fakes_workflow.py index 5a24001e11..000010fb13 100644 --- a/tests/unit/api/fakes_workflow.py +++ b/tests/unit/api/fakes_workflow.py @@ -6,7 +6,10 @@ from packaging.version import InvalidVersion, Version from synthorg.core.enums import WorkflowExecutionStatus, WorkflowNodeType -from synthorg.core.persistence_errors import DuplicateRecordError, VersionConflictError +from synthorg.core.persistence_errors import ( + DuplicateRecordError, + PersistenceVersionConflictError, +) from synthorg.persistence.subworkflow_repo import ( ParentReference, SubworkflowSummary, @@ -73,7 +76,7 @@ async def save(self, execution: WorkflowExecution) -> None: f"Cannot insert execution {execution.id!r}" f" with version {execution.version}" ) - raise VersionConflictError(msg) + raise PersistenceVersionConflictError(msg) else: if execution.version == 1: msg = f"Execution {execution.id!r} already exists" @@ -83,7 +86,7 @@ async def save(self, execution: WorkflowExecution) -> None: f"Version conflict: expected {stored.version + 1}," f" got {execution.version}" ) - raise VersionConflictError(msg) + raise PersistenceVersionConflictError(msg) self._executions[execution.id] = copy.deepcopy(execution) async def get(self, execution_id: str) -> WorkflowExecution | None: diff --git a/tests/unit/api/services/test_workflow_rollback_service.py b/tests/unit/api/services/test_workflow_rollback_service.py index e30058d990..fd210782b5 100644 --- a/tests/unit/api/services/test_workflow_rollback_service.py +++ b/tests/unit/api/services/test_workflow_rollback_service.py @@ -13,7 +13,7 @@ constructed from the supplied ``version_repo``. - A ``PersistenceError`` from the snapshot is swallowed (best-effort) so the rollback itself stays committed. -- A ``VersionConflictError`` from ``definition_repo.save`` propagates +- A ``PersistenceVersionConflictError`` from ``definition_repo.save`` propagates to the caller (the controller catches it and returns 409). """ @@ -23,7 +23,10 @@ import pytest from synthorg.api.services.workflow_rollback_service import WorkflowRollbackService -from synthorg.core.persistence_errors import PersistenceError, VersionConflictError +from synthorg.core.persistence_errors import ( + PersistenceError, + PersistenceVersionConflictError, +) from synthorg.core.types import NotBlankStr from synthorg.engine.workflow.definition import WorkflowDefinition from synthorg.persistence.version_repo import VersionRepository @@ -116,7 +119,9 @@ async def test_propagates_version_conflict_from_definition_save( ) -> None: """Optimistic-concurrency mismatch surfaces to the controller.""" definition_repo = AsyncMock(spec=WorkflowDefinitionRepository) - definition_repo.save.side_effect = VersionConflictError("revision moved") + definition_repo.save.side_effect = PersistenceVersionConflictError( + "revision moved" + ) version_repo = AsyncMock(spec=VersionRepository) version_repo.get_latest_version = AsyncMock( spec=VersionRepository.get_latest_version, @@ -125,7 +130,7 @@ async def test_propagates_version_conflict_from_definition_save( definition_repo=definition_repo, version_repo=version_repo, ) - with pytest.raises(VersionConflictError): + with pytest.raises(PersistenceVersionConflictError): await service.rollback( _definition_stub(), target_version=2, diff --git a/tests/unit/api/test_exception_handlers.py b/tests/unit/api/test_exception_handlers.py index 37818ec9b9..8f6c0ef954 100644 --- a/tests/unit/api/test_exception_handlers.py +++ b/tests/unit/api/test_exception_handlers.py @@ -1634,3 +1634,160 @@ async def handler(project_id: str) -> None: error_category=ErrorCategory.NOT_FOUND, retryable=False, ) + + +class TestDomainErrorFamilyClassVarHttpMapping: + """End-to-end ClassVar -> HTTP mapping for DomainError-rooted families. + + Catches typos in ``status_code`` / ``error_code`` / ``error_category`` + on any class in the domain-error tree. Without these, a refactor + that flips ``status_code = 404`` to ``500`` on (for example) + ``AgentNotFoundError`` would silently ship: the AST gate only + checks that the class roots in ``DomainError``, not that its HTTP + metadata is sensible. + """ + + @pytest.mark.parametrize( + ("import_path", "exc_name", "status_code", "error_code", "error_category"), + [ + ( + "synthorg.hr.errors", + "AgentNotFoundError", + 404, + ErrorCode.RESOURCE_NOT_FOUND, + ErrorCategory.NOT_FOUND, + ), + ( + "synthorg.hr.errors", + "AgentAlreadyRegisteredError", + 409, + ErrorCode.RESOURCE_CONFLICT, + ErrorCategory.CONFLICT, + ), + ( + "synthorg.hr.errors", + "InvalidCandidateError", + 422, + ErrorCode.VALIDATION_ERROR, + ErrorCategory.VALIDATION, + ), + ( + "synthorg.hr.errors", + "PromotionCooldownError", + 409, + ErrorCode.RESOURCE_CONFLICT, + ErrorCategory.CONFLICT, + ), + ( + "synthorg.hr.errors", + "HRError", + 500, + ErrorCode.INTERNAL_ERROR, + ErrorCategory.INTERNAL, + ), + ( + "synthorg.hr.scaling.errors", + "ScalingCooldownActiveError", + 409, + ErrorCode.RESOURCE_CONFLICT, + ErrorCategory.CONFLICT, + ), + ( + "synthorg.hr.scaling.errors", + "ScalingError", + 500, + ErrorCode.INTERNAL_ERROR, + ErrorCategory.INTERNAL, + ), + ( + "synthorg.security.trust.errors", + "TrustError", + 500, + ErrorCode.INTERNAL_ERROR, + ErrorCategory.INTERNAL, + ), + ( + "synthorg.settings.errors", + "SettingNotFoundError", + 404, + ErrorCode.RESOURCE_NOT_FOUND, + ErrorCategory.NOT_FOUND, + ), + ( + "synthorg.settings.errors", + "SettingValidationError", + 422, + ErrorCode.VALIDATION_ERROR, + ErrorCategory.VALIDATION, + ), + ( + "synthorg.settings.errors", + "SettingsError", + 500, + ErrorCode.INTERNAL_ERROR, + ErrorCategory.INTERNAL, + ), + ( + "synthorg.config.errors", + "ConfigError", + 500, + ErrorCode.INTERNAL_ERROR, + ErrorCategory.INTERNAL, + ), + ( + "synthorg.core.registry.errors", + "StrategyFactoryNotFoundError", + 404, + ErrorCode.RESOURCE_NOT_FOUND, + ErrorCategory.NOT_FOUND, + ), + ( + "synthorg.core.registry.errors", + "StrategyFactoryError", + 500, + ErrorCode.INTERNAL_ERROR, + ErrorCategory.INTERNAL, + ), + ], + ) + def test_migrated_class_classvars_drive_response( + self, + import_path: str, + exc_name: str, + status_code: int, + error_code: int, + error_category: ErrorCategory, + ) -> None: + """Each class's ClassVars produce the right HTTP envelope.""" + import importlib + + module = importlib.import_module(import_path) + exc_cls: type[Exception] = getattr(module, exc_name) + + # Most DomainError subclasses accept ``()`` because the base + # ``__init__`` defaults ``message=None``; the fallback path + # below keeps the metadata-routing assertion alive even if a + # class ever requires a positional message argument. + @get("/test") + async def handler() -> None: + try: + raise exc_cls + except TypeError: + # Class needs an arg; pass an empty marker so the test + # still verifies metadata routing. + msg = "" + raise exc_cls(msg) # noqa: B904 -- intentional re-raise + + with TestClient(make_exception_handler_app(handler)) as client: + resp = client.get("/test") + assert resp.status_code == status_code, ( + f"{exc_name}: expected {status_code}, got {resp.status_code}" + ) + body = resp.json() + assert body["success"] is False + _assert_error_detail( + body, + error_code=error_code, + error_category=error_category, + retryable=False, + ) diff --git a/tests/unit/communication/meeting/test_agent_caller.py b/tests/unit/communication/meeting/test_agent_caller.py index 729509cbd0..0d7ab86388 100644 --- a/tests/unit/communication/meeting/test_agent_caller.py +++ b/tests/unit/communication/meeting/test_agent_caller.py @@ -148,9 +148,11 @@ async def test_unknown_agent_raises(self) -> None: pytest.raises(UnknownMeetingAgentError) as exc_info, ): await caller(_AGENT_ID, "prompt", 100, _MEETING_ID) - # LookupError-compatible so callers can catch with existing - # lookup-failure handlers. - assert isinstance(exc_info.value, LookupError) + # NotFoundError-rooted so the API layer's RFC 9457 dispatch + # produces a structured 404 instead of an opaque 500. + from synthorg.core.domain_errors import NotFoundError + + assert isinstance(exc_info.value, NotFoundError) # agent_id must be available as a typed attribute for # programmatic handling (logging, retries, metric tagging). assert exc_info.value.agent_id == _AGENT_ID diff --git a/tests/unit/core/test_persistence_errors.py b/tests/unit/core/test_persistence_errors.py index 15c2f1470b..2b0c3d6614 100644 --- a/tests/unit/core/test_persistence_errors.py +++ b/tests/unit/core/test_persistence_errors.py @@ -11,9 +11,9 @@ MigrationError, PersistenceConnectionError, PersistenceError, + PersistenceVersionConflictError, QueryError, RecordNotFoundError, - VersionConflictError, ) pytestmark = pytest.mark.unit @@ -30,7 +30,7 @@ def test_concrete_classes_inherit_persistence_error(self) -> None: DuplicateRecordError, QueryError, ConstraintViolationError, - VersionConflictError, + PersistenceVersionConflictError, MalformedRowError, ArtifactTooLargeError, ArtifactStorageFullError, @@ -41,7 +41,7 @@ def test_constraint_violation_is_query_error(self) -> None: assert issubclass(ConstraintViolationError, QueryError) def test_version_conflict_is_query_error(self) -> None: - assert issubclass(VersionConflictError, QueryError) + assert issubclass(PersistenceVersionConflictError, QueryError) def test_malformed_row_is_query_error(self) -> None: assert issubclass(MalformedRowError, QueryError) @@ -59,7 +59,7 @@ class TestRetryabilityFlags: (RecordNotFoundError, False), (DuplicateRecordError, False), (QueryError, True), - (VersionConflictError, False), + (PersistenceVersionConflictError, False), (MalformedRowError, False), (ArtifactTooLargeError, False), (ArtifactStorageFullError, False), diff --git a/tests/unit/engine/workflow/test_execution_lifecycle.py b/tests/unit/engine/workflow/test_execution_lifecycle.py index 2c214c3095..fd5bc35bdd 100644 --- a/tests/unit/engine/workflow/test_execution_lifecycle.py +++ b/tests/unit/engine/workflow/test_execution_lifecycle.py @@ -14,7 +14,11 @@ WorkflowExecutionStatus, WorkflowNodeExecutionStatus, ) -from synthorg.core.persistence_errors import DuplicateRecordError, VersionConflictError +from synthorg.core.persistence_errors import ( + DuplicateRecordError, + PersistenceVersionConflictError, + RecordNotFoundError, +) from synthorg.core.task import Task from synthorg.engine.errors import ( WorkflowExecutionError, @@ -85,8 +89,8 @@ async def save(self, execution: WorkflowExecution) -> None: stored = self._store.get(execution.id) if stored is None: if execution.version != 1: - msg = f"Cannot insert with version {execution.version}" - raise VersionConflictError(msg) + msg = f"Execution {execution.id!r} not found" + raise RecordNotFoundError(msg) else: if execution.version == 1: msg = f"Execution {execution.id!r} already exists" @@ -96,7 +100,7 @@ async def save(self, execution: WorkflowExecution) -> None: f"Version conflict: expected {stored.version + 1}," f" got {execution.version}" ) - raise VersionConflictError(msg) + raise PersistenceVersionConflictError(msg) self._store[execution.id] = copy.deepcopy(execution) async def get(self, execution_id: str) -> WorkflowExecution | None: diff --git a/tests/unit/engine/workflow/test_execution_service.py b/tests/unit/engine/workflow/test_execution_service.py index e212ba0781..1bb57dc0cf 100644 --- a/tests/unit/engine/workflow/test_execution_service.py +++ b/tests/unit/engine/workflow/test_execution_service.py @@ -11,7 +11,10 @@ WorkflowExecutionStatus, WorkflowNodeExecutionStatus, ) -from synthorg.core.persistence_errors import DuplicateRecordError, VersionConflictError +from synthorg.core.persistence_errors import ( + DuplicateRecordError, + PersistenceVersionConflictError, +) from synthorg.core.task import Task from synthorg.engine.errors import ( WorkflowDefinitionInvalidError, @@ -87,7 +90,7 @@ async def save(self, execution: WorkflowExecution) -> None: if stored is None: if execution.version != 1: msg = f"Cannot insert with version {execution.version}" - raise VersionConflictError(msg) + raise PersistenceVersionConflictError(msg) else: if execution.version == 1: msg = f"Execution {execution.id!r} already exists" @@ -97,7 +100,7 @@ async def save(self, execution: WorkflowExecution) -> None: f"Version conflict: expected {stored.version + 1}," f" got {execution.version}" ) - raise VersionConflictError(msg) + raise PersistenceVersionConflictError(msg) self._store[execution.id] = copy.deepcopy(execution) async def get(self, execution_id: str) -> WorkflowExecution | None: diff --git a/tests/unit/meta/test_code_applier.py b/tests/unit/meta/test_code_applier.py index c4dfcf420e..3e60797b77 100644 --- a/tests/unit/meta/test_code_applier.py +++ b/tests/unit/meta/test_code_applier.py @@ -439,7 +439,7 @@ class TestGitHubExceptions: def test_github_api_error_attributes(self) -> None: err = GitHubAPIError(status_code=500, action="push", body="oops") - assert err.status_code == 500 + assert err.github_status_code == 500 assert err.action == "push" assert err.body == "oops" assert "500" in str(err) diff --git a/tests/unit/persistence/sqlite/test_workflow_definition_repo.py b/tests/unit/persistence/sqlite/test_workflow_definition_repo.py index f72bf5d5a1..0b622aebbb 100644 --- a/tests/unit/persistence/sqlite/test_workflow_definition_repo.py +++ b/tests/unit/persistence/sqlite/test_workflow_definition_repo.py @@ -10,7 +10,7 @@ WorkflowNodeType, WorkflowType, ) -from synthorg.core.persistence_errors import VersionConflictError +from synthorg.core.persistence_errors import PersistenceVersionConflictError from synthorg.engine.workflow.definition import ( WorkflowDefinition, WorkflowEdge, @@ -181,7 +181,7 @@ async def test_save_rejects_on_revision_mismatch( name="Skipped revision", revision=3, ) - with pytest.raises(VersionConflictError, match="Version conflict"): + with pytest.raises(PersistenceVersionConflictError, match="Version conflict"): await repo.save(defn_v3) # Original revision 1 should still be stored @@ -194,7 +194,7 @@ async def test_save_rejects_duplicate_revision_1( self, repo: SQLiteWorkflowDefinitionRepository, ) -> None: - """Saving revision=1 twice must raise VersionConflictError. + """Saving revision=1 twice must raise PersistenceVersionConflictError. Regression test: previously the ``definition.revision > 1`` guard silently treated this as success. @@ -207,7 +207,7 @@ async def test_save_rejects_duplicate_revision_1( name="Duplicate", revision=1, ) - with pytest.raises(VersionConflictError, match="Version conflict"): + with pytest.raises(PersistenceVersionConflictError, match="Version conflict"): await repo.save(defn_v1_again) # Original should be unchanged diff --git a/tests/unit/persistence/sqlite/test_workflow_execution_repo.py b/tests/unit/persistence/sqlite/test_workflow_execution_repo.py index e9d6f13162..d4a817c692 100644 --- a/tests/unit/persistence/sqlite/test_workflow_execution_repo.py +++ b/tests/unit/persistence/sqlite/test_workflow_execution_repo.py @@ -10,7 +10,10 @@ WorkflowNodeExecutionStatus, WorkflowNodeType, ) -from synthorg.core.persistence_errors import VersionConflictError +from synthorg.core.persistence_errors import ( + PersistenceVersionConflictError, + RecordNotFoundError, +) from synthorg.engine.workflow.execution_models import ( WorkflowExecution, WorkflowNodeExecution, @@ -154,7 +157,7 @@ async def test_version_conflict_on_update( stale = WorkflowExecution.model_validate( {**exe.model_dump(mode="json"), "version": 3}, ) - with pytest.raises(VersionConflictError): + with pytest.raises(PersistenceVersionConflictError): await repo.save(stale) @pytest.mark.unit @@ -179,6 +182,26 @@ async def test_version_increment_succeeds( assert loaded.version == 2 assert loaded.status is WorkflowExecutionStatus.COMPLETED + @pytest.mark.unit + async def test_update_after_delete_raises_record_not_found( + self, + repo: SQLiteWorkflowExecutionRepository, + ) -> None: + # Differentiates a delete race (404 semantics) from a true + # version mismatch (409 semantics). Without the SELECT probe + # both collapse into PersistenceVersionConflictError and the + # API surface loses the not-found path. + exe = _make_execution() + await repo.save(exe) + deleted = await repo.delete("wfexec-test001") + assert deleted is True + + updated = WorkflowExecution.model_validate( + {**exe.model_dump(mode="json"), "version": 2}, + ) + with pytest.raises(RecordNotFoundError): + await repo.save(updated) + class TestListByDefinition: """List executions by definition ID.""" diff --git a/tests/unit/scripts/test_check_domain_error_hierarchy.py b/tests/unit/scripts/test_check_domain_error_hierarchy.py new file mode 100644 index 0000000000..d12b10a2af --- /dev/null +++ b/tests/unit/scripts/test_check_domain_error_hierarchy.py @@ -0,0 +1,473 @@ +"""Tests for the domain-error-hierarchy AST gate.""" + +import importlib.util +from pathlib import Path +from typing import Protocol, cast + +import pytest + +pytestmark = pytest.mark.unit + + +class WriteFile(Protocol): + """Callable signature for the ``write_file`` fixture.""" + + def __call__(self, rel: str, content: str) -> Path: ... + + +class _GateModule(Protocol): + """Subset of ``scripts/check_domain_error_hierarchy.py`` the tests exercise.""" + + FORBIDDEN_BASES: frozenset[str] + SUPPRESSION_MARKER: str + + @staticmethod + def _line_has_trailing_marker(line: str) -> bool: ... + @staticmethod + def _module_dotted_for_rel(rel: str) -> str: ... + @staticmethod + def _scan_tree( + project_root: Path, + scan_root: Path, + baseline: set[str] | None = None, + ) -> list[str]: ... + @staticmethod + def _format_baseline_entry(rel: str, lineno: int, class_name: str) -> str: ... + @staticmethod + def _load_baseline(baseline_path: Path) -> set[str]: ... + @staticmethod + def main(argv: list[str] | None = None) -> int: ... + + +def _load_module() -> _GateModule: + repo_root = Path(__file__).resolve().parents[3] + script_path = repo_root / "scripts" / "check_domain_error_hierarchy.py" + spec = importlib.util.spec_from_file_location( + "check_domain_error_hierarchy", + script_path, + ) + if spec is None or spec.loader is None: + msg = f"could not load module spec for {script_path}" + raise RuntimeError(msg) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return cast(_GateModule, module) + + +_MODULE = _load_module() + + +def _make_project( + tmp_path: Path, + files: dict[str, str], +) -> tuple[Path, Path]: + """Materialise a synthetic ``src/synthorg/`` tree under *tmp_path*. + + Always seeds ``synthorg/core/domain_errors.py`` with a minimal + ``DomainError`` (and the standard intermediates the migration uses + as bases) so cross-module resolution has a real target to follow. + """ + project_root = tmp_path + src_root = project_root / "src" / "synthorg" + src_root.mkdir(parents=True) + (src_root / "__init__.py").write_text("", encoding="utf-8") + (src_root / "core").mkdir() + (src_root / "core" / "__init__.py").write_text("", encoding="utf-8") + (src_root / "core" / "domain_errors.py").write_text( + "class DomainError(Exception): " + "# lint-allow: domain-error-hierarchy -- root of the hierarchy\n" + " pass\n" + "\n" + "class NotFoundError(DomainError):\n" + " pass\n" + "\n" + "class ConflictError(DomainError):\n" + " pass\n" + "\n" + "class ValidationError(DomainError):\n" + " pass\n", + encoding="utf-8", + ) + for rel, content in files.items(): + target = project_root / rel + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(content, encoding="utf-8") + # touch __init__.py up the tree so the module path resolves. + for parent in target.parents: + if parent == project_root: + break + init = parent / "__init__.py" + if not init.exists(): + init.write_text("", encoding="utf-8") + return project_root, src_root + + +@pytest.fixture +def write_file(tmp_path: Path) -> WriteFile: + """Return a helper that writes a file under *tmp_path*.""" + + def _write(rel: str, content: str) -> Path: + path = tmp_path / rel + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content, encoding="utf-8") + return path + + return _write + + +# ── module-path resolver ───────────────────────────────────────── + + +def test_module_dotted_for_src_synthorg() -> None: + assert ( + _MODULE._module_dotted_for_rel("src/synthorg/foo/bar.py") == "synthorg.foo.bar" + ) + + +def test_module_dotted_for_init() -> None: + assert ( + _MODULE._module_dotted_for_rel("src/synthorg/foo/__init__.py") == "synthorg.foo" + ) + + +# ── suppression marker ─────────────────────────────────────────── + + +def test_suppression_marker_present() -> None: + line = ( + "class Foo(Exception): # lint-allow: domain-error-hierarchy " + "-- TSA RFC 3161 client carve-out" + ) + assert _MODULE._line_has_trailing_marker(line) is True + + +def test_suppression_marker_requires_justification() -> None: + line = "class Foo(Exception): # lint-allow: domain-error-hierarchy --" + assert _MODULE._line_has_trailing_marker(line) is False + + +def test_suppression_marker_requires_double_dash() -> None: + line = "class Foo(Exception): # lint-allow: domain-error-hierarchy" + assert _MODULE._line_has_trailing_marker(line) is False + + +def test_suppression_marker_other_marker_ignored() -> None: + line = "class Foo(Exception): # lint-allow: persistence-boundary -- ok" + assert _MODULE._line_has_trailing_marker(line) is False + + +# ── positive cases (must NOT flag) ─────────────────────────────── + + +def test_direct_domain_error_subclass_ok(tmp_path: Path) -> None: + project_root, _ = _make_project( + tmp_path, + { + "src/synthorg/foo/errors.py": ( + "from synthorg.core.domain_errors import DomainError\n" + "\n" + "class FooError(DomainError):\n" + " pass\n" + ), + }, + ) + issues = _MODULE._scan_tree(project_root, project_root / "src" / "synthorg") + assert issues == [] + + +def test_intermediate_subclass_ok(tmp_path: Path) -> None: + project_root, _ = _make_project( + tmp_path, + { + "src/synthorg/foo/errors.py": ( + "from synthorg.core.domain_errors import NotFoundError\n" + "\n" + "class FooNotFoundError(NotFoundError):\n" + " pass\n" + ), + }, + ) + issues = _MODULE._scan_tree(project_root, project_root / "src" / "synthorg") + assert issues == [] + + +def test_deeper_mro_ok(tmp_path: Path) -> None: + """Cross-file transitive: Foo -> Bar -> DomainError must not flag.""" + project_root, _ = _make_project( + tmp_path, + { + "src/synthorg/foo/errors.py": ( + "from synthorg.core.domain_errors import DomainError\n" + "\n" + "class BarError(DomainError):\n" + " pass\n" + ), + "src/synthorg/foo/sub/errors.py": ( + "from synthorg.foo.errors import BarError\n" + "\n" + "class FooError(BarError):\n" + " pass\n" + ), + }, + ) + issues = _MODULE._scan_tree(project_root, project_root / "src" / "synthorg") + assert issues == [] + + +def test_per_line_marker_ok(tmp_path: Path) -> None: + project_root, _ = _make_project( + tmp_path, + { + "src/synthorg/observability/audit_chain/tsa_client.py": ( + "class TsaError(Exception): " + "# lint-allow: domain-error-hierarchy -- RFC 3161 internals\n" + " pass\n" + ), + }, + ) + issues = _MODULE._scan_tree(project_root, project_root / "src" / "synthorg") + assert issues == [] + + +def test_module_import_alias_ok(tmp_path: Path) -> None: + """``import x.y as z`` followed by ``class Foo(z.DomainError)``.""" + project_root, _ = _make_project( + tmp_path, + { + "src/synthorg/foo/errors.py": ( + "import synthorg.core.domain_errors as dom\n" + "\n" + "class FooError(dom.DomainError):\n" + " pass\n" + ), + }, + ) + issues = _MODULE._scan_tree(project_root, project_root / "src" / "synthorg") + assert issues == [] + + +def test_baseline_suppresses(tmp_path: Path) -> None: + project_root, _ = _make_project( + tmp_path, + { + "src/synthorg/foo/errors.py": ("class FooError(Exception):\n pass\n"), + }, + ) + baseline = {"src/synthorg/foo/errors.py:1:FooError"} + issues = _MODULE._scan_tree( + project_root, + project_root / "src" / "synthorg", + baseline=baseline, + ) + assert issues == [] + + +# ── negative cases (must flag) ─────────────────────────────────── + + +@pytest.mark.parametrize( + "base", + [ + "Exception", + "RuntimeError", + "LookupError", + "PermissionError", + "ValueError", + ], +) +def test_direct_stdlib_base_flagged(tmp_path: Path, base: str) -> None: + project_root, _ = _make_project( + tmp_path, + { + "src/synthorg/foo/errors.py": f"class FooError({base}):\n pass\n", + }, + ) + issues = _MODULE._scan_tree(project_root, project_root / "src" / "synthorg") + assert len(issues) == 1 + assert "FooError" in issues[0] + assert "src/synthorg/foo/errors.py" in issues[0] + + +def test_only_root_of_bad_chain_is_flagged(tmp_path: Path) -> None: + """Only the class whose DIRECT base is a forbidden stdlib name flags. + + `class BarError(Exception)` is flagged. `class FooError(BarError)` + inherits from BarError -- not a forbidden stdlib base, so FooError + itself does NOT flag. Migrating BarError to DomainError fixes the + whole chain in one edit; subclasses don't double-count. + """ + project_root, _ = _make_project( + tmp_path, + { + "src/synthorg/foo/errors.py": ( + "class BarError(Exception):\n" + " pass\n" + "\n" + "class FooError(BarError):\n" + " pass\n" + ), + }, + ) + issues = _MODULE._scan_tree(project_root, project_root / "src" / "synthorg") + assert len(issues) == 1 + assert "BarError" in issues[0] + + +def test_baseline_drift_warns(tmp_path: Path) -> None: + """Baseline lists a class that no longer exists -> gate flags drift.""" + project_root, _ = _make_project( + tmp_path, + { + "src/synthorg/foo/errors.py": ( + "from synthorg.core.domain_errors import DomainError\n" + "\n" + "class FooError(DomainError):\n" + " pass\n" + ), + }, + ) + baseline = {"src/synthorg/foo/errors.py:1:GhostError"} + issues = _MODULE._scan_tree( + project_root, + project_root / "src" / "synthorg", + baseline=baseline, + ) + assert len(issues) == 1 + assert "GhostError" in issues[0] + assert "stale" in issues[0].lower() or "drift" in issues[0].lower() + + +# ── baseline file format ───────────────────────────────────────── + + +def test_format_baseline_entry() -> None: + entry = _MODULE._format_baseline_entry( + "src/synthorg/foo/errors.py", + 42, + "FooError", + ) + assert entry == "src/synthorg/foo/errors.py:42:FooError" + + +def test_load_baseline_skips_comments_and_blanks(tmp_path: Path) -> None: + baseline_path = tmp_path / "baseline.txt" + baseline_path.write_text( + "# header line\n" + "\n" + "src/synthorg/foo.py:1:FooError\n" + "src/synthorg/bar.py:2:BarError\n", + encoding="utf-8", + ) + entries = _MODULE._load_baseline(baseline_path) + assert entries == { + "src/synthorg/foo.py:1:FooError", + "src/synthorg/bar.py:2:BarError", + } + + +def test_load_baseline_missing_file_returns_empty(tmp_path: Path) -> None: + assert _MODULE._load_baseline(tmp_path / "absent.txt") == set() + + +def test_load_baseline_rejects_malformed( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + baseline_path = tmp_path / "baseline.txt" + baseline_path.write_text("not-a-valid-entry\n", encoding="utf-8") + with pytest.raises(ValueError, match="validation"): + _MODULE._load_baseline(baseline_path) + captured = capsys.readouterr() + assert "malformed entry" in captured.err + + +def test_load_baseline_rejects_duplicates( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + baseline_path = tmp_path / "baseline.txt" + baseline_path.write_text( + "src/synthorg/foo.py:1:FooError\nsrc/synthorg/foo.py:1:FooError\n", + encoding="utf-8", + ) + with pytest.raises(ValueError, match="validation"): + _MODULE._load_baseline(baseline_path) + captured = capsys.readouterr() + assert "duplicate entry" in captured.err + + +# ── CLI ───────────────────────────────────────────────────────── + + +def test_no_baseline_flag_reports_all( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], + monkeypatch: pytest.MonkeyPatch, +) -> None: + project_root, _ = _make_project( + tmp_path, + { + "src/synthorg/foo/errors.py": ("class FooError(Exception):\n pass\n"), + "scripts/domain_error_hierarchy_baseline.txt": ( + "# baseline\nsrc/synthorg/foo/errors.py:1:FooError\n" + ), + }, + ) + monkeypatch.chdir(tmp_path.parent) + rc = _MODULE.main( + [ + "--repo-root", + str(project_root), + "--no-baseline", + ] + ) + captured = capsys.readouterr() + assert rc == 1 + assert "FooError" in captured.out + + +def test_clean_tree_returns_zero( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + project_root, _ = _make_project( + tmp_path, + { + "src/synthorg/foo/errors.py": ( + "from synthorg.core.domain_errors import DomainError\n" + "\n" + "class FooError(DomainError):\n" + " pass\n" + ), + }, + ) + monkeypatch.chdir(tmp_path.parent) + rc = _MODULE.main(["--repo-root", str(project_root), "--no-baseline"]) + assert rc == 0 + + +def test_update_baseline_writes_sorted( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + project_root, _ = _make_project( + tmp_path, + { + "src/synthorg/zeta/errors.py": ("class ZetaError(Exception):\n pass\n"), + "src/synthorg/alpha/errors.py": ( + "class AlphaError(Exception):\n pass\n" + ), + }, + ) + baseline_path = project_root / "scripts" / "domain_error_hierarchy_baseline.txt" + baseline_path.parent.mkdir(parents=True, exist_ok=True) + monkeypatch.chdir(tmp_path.parent) + rc = _MODULE.main(["--repo-root", str(project_root), "--update-baseline"]) + assert rc == 0 + body = baseline_path.read_text(encoding="utf-8") + # Alpha sorts before Zeta; both must be present. + alpha_idx = body.find("src/synthorg/alpha/errors.py") + zeta_idx = body.find("src/synthorg/zeta/errors.py") + assert alpha_idx >= 0 + assert zeta_idx >= 0 + assert alpha_idx < zeta_idx