diff --git a/.opencode/plugins/synthorg-hooks.ts b/.opencode/plugins/synthorg-hooks.ts index 11bdeb2a35..ad144f0ea9 100644 --- a/.opencode/plugins/synthorg-hooks.ts +++ b/.opencode/plugins/synthorg-hooks.ts @@ -13,6 +13,7 @@ * PreToolUse (Bash): scripts/check_bash_no_write.sh * PreToolUse (Bash): scripts/check_git_c_cwd.sh * PreToolUse (Bash): scripts/check_no_pr_create.sh + * PreToolUse (Bash): scripts/check_no_git_no_verify.sh * PreToolUse (Bash): scripts/check_no_cd_prefix.sh * PreToolUse (Bash): scripts/check_no_local_coverage.sh * PreToolUse (Bash): scripts/check_enforce_parallel_tests.sh @@ -342,6 +343,7 @@ export const SynthOrgHooks: Plugin = async ({ client, $, app }) => { // -n=8 --dist=loadfile). for (const script of [ "scripts/check_no_pr_create.sh", + "scripts/check_no_git_no_verify.sh", "scripts/check_no_cd_prefix.sh", "scripts/check_no_local_coverage.sh", "scripts/check_enforce_parallel_tests.sh", diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e856983976..4eb7dfecfc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -570,14 +570,15 @@ repos: pass_filenames: false stages: [pre-push] - - id: dto-forbid-extra - name: DTO extra="forbid" gate (src/synthorg/api) - entry: uv run python scripts/check_dto_forbid_extra.py + - id: frozen-extra-forbid + name: Frozen model extra="forbid" gate (src/synthorg) + entry: uv run python scripts/check_frozen_model_extra_forbid.py language: system - # Trigger on any change to the scanned tree, the checker + # Project-wide successor to the old api-only dto-forbid-extra + # gate. Trigger on any change to the scanned tree, the checker # itself, or this config so a PR that weakens the gate cannot - # bypass the check by not touching api Python files. - files: ^(src/synthorg/api/.*\.py|scripts/check_dto_forbid_extra\.py|\.pre-commit-config\.yaml)$ + # bypass it by not touching scanned Python files. + files: ^(src/synthorg/.*\.py|scripts/check_frozen_model_extra_forbid\.py|\.pre-commit-config\.yaml)$ pass_filenames: false stages: [pre-push] diff --git a/CLAUDE.md b/CLAUDE.md index 9bc99f16f4..8ae5a19c84 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -57,7 +57,7 @@ PYTHONPATH=. uv run zensical build # docs - No `from __future__ import annotations` (3.14 has PEP 649). PEP 758 except: `except A, B:` no parens unless binding. - Type hints on public functions; mypy strict. Google-style docstrings. Line length 88; functions <50 lines; files <800 lines. - Errors: `Error` from `DomainError`; never inherit `Exception`/`RuntimeError`/etc directly. Enforced by `check_domain_error_hierarchy.py`. -- Pydantic v2 frozen + `extra="forbid"` on API DTOs (Request/Response/Snapshot/Result/Envelope/Status/Info/Summary suffixes); `@computed_field` for derived; `NotBlankStr` for identifiers. +- Pydantic v2 frozen + `extra="forbid"` on every frozen model project-wide (gate `check_frozen_model_extra_forbid.py`; `@computed_field` auto-exempt, per-line `# lint-allow: frozen-extra-forbid -- ` for `extra="allow"`/`"ignore"` boundaries); `@computed_field` for derived; `NotBlankStr` for identifiers. - Args models at every system boundary; `parse_typed()` for every external dict ingestion. Enforced by `check_boundary_typed.py`. - Immutability: `model_copy(update=...)` or `copy.deepcopy()`; deepcopy at system boundaries. - Async: `asyncio.TaskGroup` for fan-out/fan-in; helpers catch `Exception` (re-raise `MemoryError`/`RecursionError`). diff --git a/data/runtime_stats.yaml b/data/runtime_stats.yaml index a97aa36859..1271efe263 100644 --- a/data/runtime_stats.yaml +++ b/data/runtime_stats.yaml @@ -1,20 +1,20 @@ schema_version: 1 -last_generated_utc: '2026-05-17T01:30:00Z' -generator_revision: 949abda43 +last_generated_utc: '2026-05-17T13:33:50Z' +generator_revision: e0a5b2a55 stats: tests: - raw: 31136 - rounded: 31000 - display: 31,000+ + raw: 30950 + rounded: 30000 + display: 30,000+ mem0_stars: - raw: 55881 + raw: 55932 rounded: 55000 display: 55k+ providers_curated: raw: 20 display: '20' providers_via_litellm: - raw: 2708 + raw: 2717 display: 2700+ subagents: raw: 26 diff --git a/docs/design/agents.md b/docs/design/agents.md index 37aff0befc..7957679a48 100644 --- a/docs/design/agents.md +++ b/docs/design/agents.md @@ -75,7 +75,7 @@ from synthorg.core.types import NotBlankStr class Skill(BaseModel): """Structured capability description, A2A AgentSkill-aligned.""" - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") id: NotBlankStr # e.g. "code-review" name: NotBlankStr # e.g. "Code Review" @@ -88,7 +88,7 @@ class Skill(BaseModel): class SkillSet(BaseModel): """Agent skill inventory, split into primary and secondary.""" - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") primary: tuple[Skill, ...] = () secondary: tuple[Skill, ...] = () diff --git a/docs/design/memory.md b/docs/design/memory.md index 9723674eaf..179abb38ff 100644 --- a/docs/design/memory.md +++ b/docs/design/memory.md @@ -310,7 +310,7 @@ for non-Docker deployments where torch is installed directly. ```python class EmbeddingFineTuneConfig(BaseModel): - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") enabled: bool = False checkpoint_path: NotBlankStr | None = None diff --git a/docs/reference/audit-category-gate-coverage.md b/docs/reference/audit-category-gate-coverage.md index cd806ca6aa..6c87200cd9 100644 --- a/docs/reference/audit-category-gate-coverage.md +++ b/docs/reference/audit-category-gate-coverage.md @@ -20,7 +20,7 @@ The four resolution paths are: | Typed boundary (`parse_typed` at every external dict ingestion) | Standing gate | `scripts/check_boundary_typed.py` | | Vendor-name leakage (Anthropic / OpenAI / Claude / GPT) | Standing gate | `scripts/check_forbidden_literals.py` | | Regional-default hardcoding (currency / locale / timezone / language) | Standing gate | `scripts/check_backend_regional_defaults.py` + `scripts/check_web_design_system.py` | -| API DTO `extra="forbid"` (request / response / snapshot / result / envelope / status / info / summary suffixes) | Standing gate | `scripts/check_dto_forbid_extra.py` | +| Frozen model `extra="forbid"` (project-wide: every frozen `ConfigDict` model under `src/synthorg/`, `@computed_field` auto-exempt) | Standing gate | `scripts/check_frozen_model_extra_forbid.py` | | Em-dashes (U+2014) in source | Standing gate | `scripts/check_no_em_dashes.py` | | Redundant per-test `pytest.mark.timeout(30)` | Standing gate | `scripts/check_no_redundant_timeout.py` | | Bulk edits without explicit user approval | Standing gate | `scripts/check_no_bulk_edit.py` | diff --git a/docs/reference/convention-gates.md b/docs/reference/convention-gates.md index e3b2ca94a0..0292984856 100644 --- a/docs/reference/convention-gates.md +++ b/docs/reference/convention-gates.md @@ -19,11 +19,11 @@ All under `scripts/`. The list is generated by `ls scripts/check_*.py`; if an en - `check_doc_drift_counts.py` - `check_doc_numeric_macros.py` - `check_domain_error_hierarchy.py` -- `check_dto_forbid_extra.py` - `check_dto_types_ts_in_sync.py` - `check_dual_backend_test_parity.py` - `check_error_codes_ts_in_sync.py` - `check_forbidden_literals.py` +- `check_frozen_model_extra_forbid.py` - `check_image_signatures.py` - `check_list_pagination.py` - `check_logger_exception_str_exc.py` diff --git a/docs/reference/conventions.md b/docs/reference/conventions.md index 617c849f1f..3d3e72fa6c 100644 --- a/docs/reference/conventions.md +++ b/docs/reference/conventions.md @@ -183,23 +183,36 @@ inline with the consumer. Examples: ## 8. Frozen `ConfigDict` pattern Every Pydantic model declares -`model_config = ConfigDict(frozen=True, allow_inf_nan=False)`. The -project standard is to add `extra="forbid"` on every model that does -not need to round-trip through `model_dump()` -- which is most of -them. Around 489 ConfigDicts across `src/synthorg/` carry the strict -form today; the carve-out is the ~46 classes that declare a -`@computed_field`, where Pydantic v2 includes the computed value in -`model_dump()` output and a strict-extra reconstruction would reject -that key on the round trip. Request DTOs are always strict because -the caller-side reject-unknown-keys property is what `extra="forbid"` -exists for. - +`model_config = ConfigDict(frozen=True, allow_inf_nan=False)` with +`extra="forbid"`. This is enforced project-wide (not API-DTO-only) +by `scripts/check_frozen_model_extra_forbid.py`: every class under +`src/synthorg/` whose own `model_config` is a `ConfigDict` (or dict +literal) with `frozen=True` MUST also set `extra="forbid"`. + +Two carve-outs: + +* **`@computed_field` (automatic).** Classes declaring a + `@computed_field` are exempt without annotation: Pydantic v2 + includes the computed value in `model_dump()` output and a + strict-extra reconstruction would reject that key on the round + trip. The gate detects the decorator via AST so the ~68 such + classes carry no per-line noise. +* **Per-line opt-out.** Genuine exceptions (an `extra="allow"` + envelope that must accept arbitrary provider keys, a + validator-gated boundary using `extra="ignore"` for + forward-compat) declare + `# lint-allow: frozen-extra-forbid -- ` on the class + definition line. Bare opt-outs without a reason are violations. + +Request DTOs are always strict because the caller-side +reject-unknown-keys property is what `extra="forbid"` exists for. Combined with the framework's `frozen` guarantee this gives us the "create new objects, never mutate existing ones" property the immutability covenant relies on. -References: 489+ occurrences across `src/synthorg/`. Canonical example: -`src/synthorg/approval/models.py:28`. +Canonical example: `src/synthorg/approval/models.py:28`. Gate: +`scripts/check_frozen_model_extra_forbid.py` (pre-push + +`.pre-commit-config.yaml` `frozen-extra-forbid`). ## 9. Typed args models at system boundaries (#1611) @@ -754,8 +767,9 @@ API boundary. The naming suffix encodes its role: * `*Info`: derived metadata (e.g. `ProviderInfo`). * `*Summary`: aggregate / rollup view (e.g. `BudgetSummary`). -The `dto-forbid-extra` gate scans for any DTO carrying one of these -suffixes and verifies it sets `extra="forbid"`. +The project-wide `frozen-extra-forbid` gate (section 8) covers every +DTO carrying one of these suffixes along with every other frozen +model, verifying each sets `extra="forbid"`. ## 30. Import order diff --git a/docs/reference/errors.md b/docs/reference/errors.md index 6f0799a2cc..a7fb64c296 100644 --- a/docs/reference/errors.md +++ b/docs/reference/errors.md @@ -50,6 +50,9 @@ Clients should dispatch on `error_code` (most specific) and fall back to `error_ | 2002 | `ARTIFACT_TOO_LARGE` | Upload exceeds `artifact.max_bytes` | | 2003 | `TOOL_PARAMETER_ERROR` | Tool parameters failed schema validation | | 2004 | `PROVIDER_TIER_COVERAGE_INSUFFICIENT` | Setup wizard cannot apply a template because no configured provider exposes any models | +| 2005 | `IMMUTABLE_FIELD_MISMATCH` | A restore/rollback would change an immutable field (e.g. agent id/name/department) | +| 2006 | `CHECKPOINT_ROLLBACK_UNAVAILABLE` | Fine-tune checkpoint rollback target is missing or unusable | +| 2007 | `CHECKPOINT_ROLLBACK_CORRUPT` | Fine-tune checkpoint rollback backup data is corrupt | ## Not Found (3xxx) @@ -70,8 +73,12 @@ The NotFound hierarchy is driven by a single `NotFoundError` class with domain-s | 3010 | `CONNECTION_NOT_FOUND` | Integration connection | | 3011 | `MODEL_NOT_FOUND` | Provider model | | 3012 | `ESCALATION_NOT_FOUND` | Escalation queue entry | +| 3013 | `WORKFLOW_DEFINITION_NOT_FOUND` | Workflow definition record | +| 3014 | `AB_TEST_NOT_FOUND` | A/B test record for a proposal | +| 3015 | `BACKUP_NOT_FOUND` | Backup archive | +| 3016 | `MEMORY_ENTRY_NOT_FOUND` | Agent memory entry | -All 13 share the same `type` URI; the numeric code is the discriminator. +All share the same `type` URI; the numeric code is the discriminator. ## Conflict (4xxx) @@ -86,6 +93,11 @@ All 13 share the same `type` URI; the numeric code is the discriminator. | 4006 | `ESCALATION_ALREADY_DECIDED` | Late decision on a closed escalation | | 4007 | `MIXED_CURRENCY_AGGREGATION` | Cross-currency aggregation attempted | | 4008 | `WORKFLOW_EXECUTION_ALREADY_TERMINAL` | Cancel hit an execution already in a terminal status (no retry will succeed) | +| 4009 | `BACKUP_IN_PROGRESS` | A backup/restore operation is already running | +| 4010 | `CHECKPOINT_OPERATION_CONFLICT` | Checkpoint deploy/delete rejected (e.g. active checkpoint) | +| 4011 | `FINE_TUNE_RUN_ACTIVE` | A fine-tune run is already active (start/resume blocked) | +| 4012 | `TRAINING_PLAN_NOT_MODIFIABLE` | Training plan cannot be modified after execution or failure | +| 4013 | `BACKUP_UNRESTARTABLE` | Backup service stopped in an unrestartable state | ## Rate Limit (5xxx) @@ -135,6 +147,11 @@ All 13 share the same `type` URI; the numeric code is the discriminator. | 8008 | `TOOL_EXECUTION_ERROR` | Tool runtime failure (subclass of `TOOL_ERROR`) | | 8009 | `FEATURE_NOT_IMPLEMENTED` | Active backend or deployment fundamentally does not implement the requested operation (501) | | 8010 | `ARTIFACT_NO_STORAGE_BACKEND` | Artifact service was constructed without a storage backend; controller-helper misconfiguration | +| 8011 | `AGENT_IDENTITY_ROLLBACK_FAILED` | Unexpected server failure during agent-identity rollback | +| 8012 | `BACKUP_RESTORE_FAILED` | Restore operation failed (non-recoverable backend error) | +| 8013 | `BACKUP_MANIFEST_ERROR` | Backup manifest could not be parsed or validated | +| 8014 | `SETTINGS_ENCRYPTION_ERROR` | Internal error processing a sensitive (encrypted) setting | +| 8015 | `SINK_CONFIG_VALIDATION_ERROR` | Internal error validating an observability sink configuration | ## Content negotiation diff --git a/scripts/check_dto_forbid_extra.py b/scripts/check_dto_forbid_extra.py deleted file mode 100644 index 66ab783c47..0000000000 --- a/scripts/check_dto_forbid_extra.py +++ /dev/null @@ -1,244 +0,0 @@ -#!/usr/bin/env python3 -"""Gate every API-boundary DTO under ``src/synthorg/api/`` to forbid extras. - -A DTO that does not declare ``extra="forbid"`` silently absorbs unknown -payload keys, which masks client typos and lets fabricated capability -flags slip through to handler logic. ``CLAUDE.md`` requires -``extra="forbid"`` on every Pydantic model that does not round-trip -through ``model_dump()``; this gate enforces that statically for every -class in ``src/synthorg/api/`` whose name ends with one of the -:data:`DTO_SUFFIXES` strings. - -A class may declare a per-line opt-out by placing -``# lint-allow: dto-forbid-extra -- `` on the class definition -line, where ```` is a non-empty justification. Bare opt-outs -without a reason are treated as violations. - -Exit codes: - 0 -- all DTOs forbid extras (or no DTOs found). - 1 -- one or more DTOs are missing ``extra="forbid"``; - offending sites printed to stderr. - 2 -- internal error parsing a source file. -""" - -import ast -import re -import sys -from pathlib import Path - -REPO_ROOT = Path(__file__).resolve().parent.parent -API_DIR = REPO_ROOT / "src" / "synthorg" / "api" - -DTO_SUFFIXES: tuple[str, ...] = ( - "Request", - "Response", - "Snapshot", - "Result", - "Envelope", - "Status", - "Info", - "Summary", -) - -_OPTOUT_WITH_REASON_RE = re.compile( - r"#\s*lint-allow:\s*dto-forbid-extra\s*--\s*(?P\S.*?)\s*$" -) -_OPTOUT_BARE_RE = re.compile(r"#\s*lint-allow:\s*dto-forbid-extra\b") - - -def _config_forbids_extras(value: ast.Call | ast.Dict) -> bool: - """Return True iff a ``ConfigDict(...)`` or dict literal sets ``extra='forbid'``.""" - if isinstance(value, ast.Call): - for kw in value.keywords: - if kw.arg == "extra" and isinstance(kw.value, ast.Constant): - return kw.value.value == "forbid" - return False - for key, val in zip(value.keys, value.values, strict=False): - if ( - isinstance(key, ast.Constant) - and key.value == "extra" - and isinstance(val, ast.Constant) - ): - return val.value == "forbid" - return False - - -def _model_config_assignment_value(stmt: ast.stmt) -> ast.expr | None: - """Return the RHS of ``model_config = ...`` or ``model_config: T = ...``.""" - if ( - isinstance(stmt, ast.Assign) - and len(stmt.targets) == 1 - and isinstance(stmt.targets[0], ast.Name) - and stmt.targets[0].id == "model_config" - ): - return stmt.value - if ( - isinstance(stmt, ast.AnnAssign) - and isinstance(stmt.target, ast.Name) - and stmt.target.id == "model_config" - ): - return stmt.value - return None - - -def _model_config_value(node: ast.ClassDef) -> ast.Call | ast.Dict | None: - """Return the effective ``model_config`` AST value. - - Python class assignments are last-write-wins, so the gate must return - the *final* ``model_config`` assignment in the class body, not the - first. Otherwise a class could bypass the gate by setting - ``extra="forbid"`` early and overriding it later. - """ - selected: ast.Call | ast.Dict | None = None - for stmt in node.body: - value = _model_config_assignment_value(stmt) - if value is None: - continue - if isinstance(value, ast.Call): - func = value.func - if (isinstance(func, ast.Name) and func.id == "ConfigDict") or ( - isinstance(func, ast.Attribute) and func.attr == "ConfigDict" - ): - selected = value - elif isinstance(value, ast.Dict): - selected = value - return selected - - -def _base_name(base: ast.expr) -> str | None: - """Return the base class's bare name (handles ``Name``/``Attribute``/``Subscript``).""" - if isinstance(base, ast.Name): - return base.id - if isinstance(base, ast.Attribute): - return base.attr - if isinstance(base, ast.Subscript): - return _base_name(base.value) - return None - - -def _classes_in_module(tree: ast.AST) -> dict[str, ast.ClassDef]: - """Index ``ast.ClassDef`` nodes by class name for ancestry lookup.""" - return {n.name: n for n in ast.walk(tree) if isinstance(n, ast.ClassDef)} - - -def _has_basemodel_ancestor( - node: ast.ClassDef, - classes: dict[str, ast.ClassDef], - visited: set[str] | None = None, -) -> bool: - """Recursively check whether ``node`` ultimately inherits from ``BaseModel``. - - Only resolves base classes defined within the same source file - (``classes`` index). A cross-file base whose own name does not end - with one of :data:`DTO_SUFFIXES` cannot be resolved and is treated - as *not* a ``BaseModel`` descendant; such DTOs will escape the gate - unless they carry their own ``model_config`` assignment (as done - for ``CreatePresetRequest``/``UpdatePresetRequest`` in - ``dto_personalities.py``). - """ - if visited is None: - visited = set() - if node.name in visited: - return False - visited.add(node.name) - for base in node.bases: - name = _base_name(base) - if name is None: - continue - if name == "BaseModel": - return True - if name.endswith(DTO_SUFFIXES): - return True - parent = classes.get(name) - if parent is not None and _has_basemodel_ancestor(parent, classes, visited): - return True - return False - - -def _is_dto_to_check(node: ast.ClassDef, classes: dict[str, ast.ClassDef]) -> bool: - """Class name ends with a DTO suffix and inherits transitively from BaseModel.""" - if not node.name.endswith(DTO_SUFFIXES): - return False - return _has_basemodel_ancestor(node, classes) - - -def _line_optout_status(source_lines: list[str], lineno: int) -> str: - """Return ``"with-reason"`` / ``"bare"`` / ``"none"`` for the class line. - - Returns ``"with-reason"`` for a valid ``# lint-allow: dto-forbid-extra - -- `` exemption, ``"bare"`` for a malformed bare opt-out (which - must be reported as a violation per the gate's contract), and - ``"none"`` when no opt-out marker is present. - """ - if not 1 <= lineno <= len(source_lines): - return "none" - line = source_lines[lineno - 1] - if _OPTOUT_WITH_REASON_RE.search(line): - return "with-reason" - if _OPTOUT_BARE_RE.search(line): - return "bare" - return "none" - - -def _walk(path: Path) -> list[tuple[Path, int, str]]: - """Return list of ``(path, lineno, class_name)`` violations in ``path``.""" - source = path.read_text(encoding="utf-8") - try: - tree = ast.parse(source, filename=str(path)) - except SyntaxError as exc: - print(f"{path}: failed to parse -- {exc}", file=sys.stderr) - raise SystemExit(2) from exc - - source_lines = source.splitlines() - classes = _classes_in_module(tree) - violations: list[tuple[Path, int, str]] = [] - for node in ast.walk(tree): - if not isinstance(node, ast.ClassDef): - continue - if not _is_dto_to_check(node, classes): - continue - optout = _line_optout_status(source_lines, node.lineno) - if optout == "with-reason": - continue - if optout == "bare": - violations.append((path, node.lineno, node.name)) - continue - config_value = _model_config_value(node) - if config_value is None: - violations.append((path, node.lineno, node.name)) - continue - if not _config_forbids_extras(config_value): - violations.append((path, node.lineno, node.name)) - return violations - - -def main() -> int: - """Walk ``src/synthorg/api/`` and report any DTO without forbid.""" - if not API_DIR.is_dir(): - print(f"{API_DIR} does not exist", file=sys.stderr) - return 2 - violations: list[tuple[Path, int, str]] = [] - for path in sorted(API_DIR.rglob("*.py")): - violations.extend(_walk(path)) - if not violations: - return 0 - suffix_list = ", ".join(f"*{s}" for s in DTO_SUFFIXES) - print( - f'{len(violations)} DTO(s) missing extra="forbid" in ConfigDict ' - f"(checked suffixes: {suffix_list}):", - file=sys.stderr, - ) - for path, lineno, name in violations: - rel = path.relative_to(REPO_ROOT) - print(f" {rel}:{lineno} class {name}", file=sys.stderr) - print( - '\nAdd ``extra="forbid"`` to each ConfigDict so the API boundary ' - "rejects unknown fields. Per-line opt-out: " - "``# lint-allow: dto-forbid-extra -- `` on the class line.", - file=sys.stderr, - ) - return 1 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/scripts/check_frozen_model_extra_forbid.py b/scripts/check_frozen_model_extra_forbid.py new file mode 100644 index 0000000000..5f80b51b2d --- /dev/null +++ b/scripts/check_frozen_model_extra_forbid.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python3 +"""Gate every frozen Pydantic model under ``src/synthorg/`` to forbid extras. + +A frozen model that does not declare ``extra="forbid"`` silently +absorbs unknown construction keys, masking caller typos and letting +fabricated fields slip into business logic. ``CLAUDE.md`` section 8 +makes ``extra="forbid"`` the project standard for every model that +does not need to round-trip through ``model_dump()``. This gate +enforces that statically and project-wide (it strictly supersedes the +old API-DTO-only ``check_dto_forbid_extra.py``). + +Scope: every class under ``src/synthorg/`` whose OWN body assigns +``model_config = ConfigDict(...)`` (or a dict literal) with +``frozen=True``. + +Carve-outs: + +* **``@computed_field`` (automatic).** Pydantic v2 includes a + computed field's value in ``model_dump()`` output; a strict-extra + reconstruction would reject that key on the round trip, so models + declaring a ``@computed_field`` are exempt without annotation. This + is the section-8 documented carve-out, detected by AST so the ~68 + affected classes need no per-line noise. +* **Per-line opt-out.** ``# lint-allow: frozen-extra-forbid -- + `` on the class definition line, ```` non-empty, + for the genuine remaining exceptions (e.g. an ``extra="allow"`` + envelope that must accept arbitrary provider keys, or a + validator-gated config that round-trips through ``model_dump``). + Bare opt-outs without a reason are violations. + +Exit codes: + 0 -- all frozen models forbid extras (or are carved out). + 1 -- one or more frozen models are missing ``extra="forbid"``. + 2 -- internal error parsing a source file. +""" + +import ast +import re +import sys +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parent.parent +SRC_DIR = REPO_ROOT / "src" / "synthorg" + +_OPTOUT_WITH_REASON_RE = re.compile( + r"#\s*lint-allow:\s*frozen-extra-forbid\s*--\s*(?P\S.*?)\s*$" +) +_OPTOUT_BARE_RE = re.compile(r"#\s*lint-allow:\s*frozen-extra-forbid\b") + + +def _config_value(node: ast.ClassDef) -> ast.Call | ast.Dict | None: + """Return the final ``model_config`` ConfigDict/dict in the class body. + + Last-write-wins: a class cannot bypass the gate by setting a + strict config early and overriding it later. + """ + selected: ast.Call | ast.Dict | None = None + for stmt in node.body: + value: ast.expr | None = None + if ( + isinstance(stmt, ast.Assign) + and len(stmt.targets) == 1 + and isinstance(stmt.targets[0], ast.Name) + and stmt.targets[0].id == "model_config" + ) or ( + isinstance(stmt, ast.AnnAssign) + and isinstance(stmt.target, ast.Name) + and stmt.target.id == "model_config" + ): + value = stmt.value + if value is None: + continue + if isinstance(value, ast.Call): + func = value.func + if (isinstance(func, ast.Name) and func.id == "ConfigDict") or ( + isinstance(func, ast.Attribute) and func.attr == "ConfigDict" + ): + selected = value + elif isinstance(value, ast.Dict): + selected = value + return selected + + +_MISSING: object = object() + + +def _config_flag(value: ast.Call | ast.Dict, name: str) -> object: + """Return the literal value of config kwarg ``name`` or ``_MISSING``.""" + if isinstance(value, ast.Call): + for kw in value.keywords: + if kw.arg == name and isinstance(kw.value, ast.Constant): + return kw.value.value + return _MISSING + for key, val in zip(value.keys, value.values, strict=False): + if ( + isinstance(key, ast.Constant) + and key.value == name + and isinstance(val, ast.Constant) + ): + return val.value + return _MISSING + + +def _has_computed_field(node: ast.ClassDef) -> bool: + """True iff the class declares a ``@computed_field`` method/property.""" + for member in node.body: + if not isinstance(member, ast.FunctionDef | ast.AsyncFunctionDef): + continue + for dec in member.decorator_list: + target = dec.func if isinstance(dec, ast.Call) else dec + dec_name = ( + target.attr + if isinstance(target, ast.Attribute) + else (target.id if isinstance(target, ast.Name) else "") + ) + if dec_name == "computed_field": + return True + return False + + +def _header_span(node: ast.ClassDef, total_lines: int) -> tuple[int, int]: + """Return the inclusive 1-based line range of the class header. + + The header runs from ``class`` to the line before the first body + statement. ``ruff format`` wraps a long ``class X(Base):`` plus a + trailing ``# lint-allow`` comment across several lines, so the + opt-out marker may land on the wrapped ``):`` line rather than + ``node.lineno``; scanning the whole header span finds it either + way. + """ + start = node.lineno + body_first = min( + (child.lineno for child in node.body), + default=start, + ) + end = max(start, body_first - 1) + return start, min(end, total_lines) + + +def _optout_status( + source_lines: list[str], + node: ast.ClassDef, +) -> str: + """Return ``"with-reason"`` / ``"bare"`` / ``"none"`` for the header.""" + start, end = _header_span(node, len(source_lines)) + header_lines = source_lines[start - 1 : end] + if any(_OPTOUT_WITH_REASON_RE.search(line) for line in header_lines): + return "with-reason" + if any(_OPTOUT_BARE_RE.search(line) for line in header_lines): + return "bare" + return "none" + + +def _walk(path: Path) -> list[tuple[Path, int, str]]: + """Return ``(path, lineno, class_name)`` violations in ``path``.""" + source = path.read_text(encoding="utf-8") + try: + tree = ast.parse(source, filename=str(path)) + except SyntaxError as exc: + print(f"{path}: failed to parse -- {exc}", file=sys.stderr) + raise SystemExit(2) from exc + source_lines = source.splitlines() + violations: list[tuple[Path, int, str]] = [] + for node in ast.walk(tree): + if not isinstance(node, ast.ClassDef): + continue + cfg = _config_value(node) + if cfg is None: + continue + if _config_flag(cfg, "frozen") is not True: + continue + if _config_flag(cfg, "extra") == "forbid": + continue + if _has_computed_field(node): + # Section-8 documented carve-out: model_dump emits the + # computed key; strict reconstruction would reject it. + continue + optout = _optout_status(source_lines, node) + if optout == "with-reason": + continue + violations.append((path, node.lineno, node.name)) + return violations + + +def main() -> int: + """Walk ``src/synthorg/`` and report frozen models without forbid.""" + if not SRC_DIR.is_dir(): + print(f"{SRC_DIR} does not exist", file=sys.stderr) + return 2 + violations: list[tuple[Path, int, str]] = [] + for path in sorted(SRC_DIR.rglob("*.py")): + violations.extend(_walk(path)) + if not violations: + return 0 + print( + f'{len(violations)} frozen model(s) missing extra="forbid":', + file=sys.stderr, + ) + for path, lineno, name in violations: + rel = path.relative_to(REPO_ROOT) + print(f" {rel}:{lineno} class {name}", file=sys.stderr) + print( + '\nAdd ``extra="forbid"`` to each frozen ConfigDict. A model ' + "that declares a @computed_field is auto-exempt. Genuine " + "exceptions use a per-line opt-out: " + "``# lint-allow: frozen-extra-forbid -- `` on the " + "class definition line.", + file=sys.stderr, + ) + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/synthorg/api/auth/service.py b/src/synthorg/api/auth/service.py index 7985d8d7f8..20b2046474 100644 --- a/src/synthorg/api/auth/service.py +++ b/src/synthorg/api/auth/service.py @@ -67,7 +67,7 @@ class RefreshRotation(BaseModel): access token rotated in place), not a freshly minted one. """ - model_config = ConfigDict(frozen=True) + model_config = ConfigDict(frozen=True, extra="forbid") token: str expires_in: int diff --git a/src/synthorg/api/auth/ticket_store.py b/src/synthorg/api/auth/ticket_store.py index bab5ccebb7..63314613dc 100644 --- a/src/synthorg/api/auth/ticket_store.py +++ b/src/synthorg/api/auth/ticket_store.py @@ -73,7 +73,7 @@ class _TicketEntry(BaseModel): expires_at: ``time.monotonic()`` deadline. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") user: AuthenticatedUser expires_at: float diff --git a/src/synthorg/api/config.py b/src/synthorg/api/config.py index d1a18d26da..d616f6a3ec 100644 --- a/src/synthorg/api/config.py +++ b/src/synthorg/api/config.py @@ -38,7 +38,7 @@ class CorsConfig(BaseModel): allowed in cross-origin requests. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") # Empty by default: safe-by-default for production. Local dev sets # the origin explicitly via the settings registry @@ -120,7 +120,7 @@ class RateLimitConfig(BaseModel): exclude_paths: Paths excluded from rate limiting. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") _MIRROR_FIELDS: ClassVar[tuple[MirrorField, ...]] = ( MirrorField( @@ -267,7 +267,7 @@ class ServerConfig(BaseModel): ws_ping_timeout: WebSocket pong timeout in seconds. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") reload: bool = Field( default=False, @@ -316,7 +316,7 @@ class ApiConfig(BaseModel): api_prefix: URL prefix for all API routes. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") _MIRROR_FIELDS: ClassVar[tuple[MirrorField, ...]] = ( MirrorField( diff --git a/src/synthorg/api/controllers/_webhooks_wiring.py b/src/synthorg/api/controllers/_webhooks_wiring.py index d092f339fb..44f8064fcb 100644 --- a/src/synthorg/api/controllers/_webhooks_wiring.py +++ b/src/synthorg/api/controllers/_webhooks_wiring.py @@ -36,7 +36,9 @@ ) -class WebhookEventPayload(BaseModel): +class WebhookEventPayload( + BaseModel +): # lint-allow: frozen-extra-forbid -- external webhook providers send arbitrary keys; envelope-only validation uses extra="allow" by design (docs/reference/typed-boundaries.md) # noqa: E501 """Typed boundary for an incoming webhook event payload. The wire shape is provider-defined (each external service sends diff --git a/src/synthorg/api/controllers/agent_identity_versions.py b/src/synthorg/api/controllers/agent_identity_versions.py index 6074df85fc..d3a0dabec8 100644 --- a/src/synthorg/api/controllers/agent_identity_versions.py +++ b/src/synthorg/api/controllers/agent_identity_versions.py @@ -4,7 +4,6 @@ from litestar import Controller, get, post from litestar.datastructures import State # noqa: TC002 -from litestar.exceptions import InternalServerException from litestar.params import Parameter from synthorg.api.auth import get_authenticated_user_id @@ -22,7 +21,12 @@ ) from synthorg.api.path_params import PathId # noqa: TC001 from synthorg.core.agent import AgentIdentity -from synthorg.core.domain_errors import NotFoundError, ValidationError +from synthorg.core.domain_errors import ( + AgentIdentityRollbackError, + ImmutableFieldMismatchError, + NotFoundError, + ValidationError, +) from synthorg.engine.identity.diff import AgentIdentityDiff, compute_diff from synthorg.hr.errors import AgentNotFoundError from synthorg.observability import get_logger, safe_error_description @@ -230,6 +234,21 @@ async def rollback_identity( Produces a new version snapshot (N+1) whose content hash equals the restored snapshot's content hash, preserving the full audit trail. + + Args: + state: Application state. + agent_id: Agent identifier (1-128 chars, enforced at the + path-parameter boundary by ``PathId``). + data: Rollback request (target version, optional reason). + + Raises: + NotFoundError: The agent does not exist (HTTP 404). + ImmutableFieldMismatchError: Immutable fields + (id/name/department) differ between the current entry + and the restored snapshot (HTTP 422, + ``IMMUTABLE_FIELD_MISMATCH``). + AgentIdentityRollbackError: Unexpected server fault during + rollback (HTTP 500, ``AGENT_IDENTITY_ROLLBACK_FAILED``). """ target = await state.app_state.agent_version_service.get_for_rollback( agent_id, @@ -269,7 +288,7 @@ async def rollback_identity( error=safe_error_description(exc), ) msg = "Cannot rollback: immutable field mismatch" - raise ValidationError(msg) from exc + raise ImmutableFieldMismatchError(msg) from exc except MemoryError, RecursionError: raise except Exception as exc: @@ -280,7 +299,7 @@ async def rollback_identity( error=safe_error_description(exc), ) msg = "Rollback failed due to an unexpected server error" - raise InternalServerException(msg) from exc + raise AgentIdentityRollbackError(msg) from exc logger.info( AGENT_IDENTITY_ROLLED_BACK, diff --git a/src/synthorg/api/controllers/memory.py b/src/synthorg/api/controllers/memory.py index 34580020b8..51e71968cb 100644 --- a/src/synthorg/api/controllers/memory.py +++ b/src/synthorg/api/controllers/memory.py @@ -19,6 +19,7 @@ CursorParam, encode_repo_seek_meta, ) +from synthorg.api.path_params import PathId # noqa: TC001 from synthorg.api.rate_limits import ( per_op_concurrency_from_policy, per_op_rate_limit_from_policy, @@ -26,13 +27,16 @@ from synthorg.api.state import AppState # noqa: TC001 from synthorg.core.auth.roles import HumanRole from synthorg.core.domain_errors import ( - ConflictError, + CheckpointOperationConflictError, FeatureNotImplementedError, + FineTuneRunActiveError, NotFoundError, - ValidationError, + ServiceUnavailableError, + resource_not_found, ) +from synthorg.core.error_taxonomy import ErrorCode from synthorg.core.persistence_errors import QueryError -from synthorg.core.types import NotBlankStr +from synthorg.core.types import NotBlankStr # noqa: TC001 from synthorg.memory.embedding.fine_tune import FineTuneStage from synthorg.memory.embedding.fine_tune_models import ( CheckpointRecord, @@ -60,7 +64,9 @@ MEMORY_FINE_TUNE_BACKEND_UNSUPPORTED, MEMORY_FINE_TUNE_BATCH_SIZE_RECOMMENDATION_FAILED, MEMORY_FINE_TUNE_PREFLIGHT_COMPLETED, + MEMORY_FINE_TUNE_PREFLIGHT_TIMED_OUT, MEMORY_FINE_TUNE_REQUESTED, + MEMORY_FINE_TUNE_THRESHOLD_FALLBACK, ) from synthorg.persistence.fine_tune_protocol import ( FineTuneCheckpointRepository, # noqa: TC001 @@ -70,6 +76,8 @@ FINE_TUNE_DEFAULT_BATCH_SIZE, FINE_TUNE_MIN_DOCS_RECOMMENDED, FINE_TUNE_MIN_DOCS_REQUIRED, + FINE_TUNE_PREFLIGHT_MAX_DEPTH, + FINE_TUNE_PREFLIGHT_WALK_TIMEOUT_S, ) from synthorg.settings.errors import SettingNotFoundError @@ -153,6 +161,14 @@ def _build_memory_service( (8.0, 32), ) +# Scheduling slack added on top of ``preflight_walk_timeout_s`` for the +# hard request ceiling. The in-thread monotonic deadline already bounds +# the walk once it starts running; this margin covers ``to_thread`` +# pool scheduling, the parallel batch-size task, and result assembly so +# a saturated executor surfaces as a clean 503 instead of a hung +# request. +_PREFLIGHT_HARD_TIMEOUT_MARGIN_S: Final[float] = 5.0 + class _FineTuneThresholds(BaseModel): """Fine-tune preflight thresholds resolved at request time. @@ -168,6 +184,8 @@ class _FineTuneThresholds(BaseModel): default_batch_size: int = Field(ge=1) min_docs_required: int = Field(ge=1) min_docs_recommended: int = Field(ge=1) + preflight_max_depth: int = Field(ge=1) + preflight_walk_timeout_s: float = Field(gt=0.0) async def _resolve_fine_tune_thresholds( @@ -184,26 +202,55 @@ async def _resolve_fine_tune_thresholds( "fine_tune_default_batch_size": FINE_TUNE_DEFAULT_BATCH_SIZE, "fine_tune_min_docs_required": FINE_TUNE_MIN_DOCS_REQUIRED, "fine_tune_min_docs_recommended": FINE_TUNE_MIN_DOCS_RECOMMENDED, + "fine_tune_preflight_max_depth": FINE_TUNE_PREFLIGHT_MAX_DEPTH, } if settings_service is None: return _FineTuneThresholds( default_batch_size=fallbacks["fine_tune_default_batch_size"], min_docs_required=fallbacks["fine_tune_min_docs_required"], min_docs_recommended=fallbacks["fine_tune_min_docs_recommended"], + preflight_max_depth=fallbacks["fine_tune_preflight_max_depth"], + preflight_walk_timeout_s=FINE_TUNE_PREFLIGHT_WALK_TIMEOUT_S, ) resolved: dict[str, int] = {} for key, fallback in fallbacks.items(): try: entry = await settings_service.get("memory", key) value = int(entry.value) - except SettingNotFoundError, ValueError, TypeError: + except (SettingNotFoundError, ValueError, TypeError) as exc: + logger.debug( + MEMORY_FINE_TUNE_THRESHOLD_FALLBACK, + setting_key=key, + error_type=type(exc).__name__, + error=safe_error_description(exc), + ) resolved[key] = fallback continue - # ``_FineTuneThresholds`` enforces ``ge=1`` on every field, so - # an unparseable override (handled above) AND a non-positive - # one ("0" / "-1") must both fall back rather than reach the + # ``_FineTuneThresholds`` enforces ``ge=1`` on every int field + # (the float walk-timeout is resolved separately below), so an + # unparseable override (handled above) AND a non-positive one + # ("0" / "-1") must both fall back rather than reach the # constructor and surface as a 500 from the controller. resolved[key] = value if value >= 1 else fallback + # The walk timeout is a float and is resolved independently of the + # int knobs above; the same fall-back-on-bad-input contract holds + # (unparseable / non-positive -> imported default). + try: + timeout_entry = await settings_service.get( + "memory", + "fine_tune_preflight_walk_timeout_s", + ) + timeout_value = float(timeout_entry.value) + except (SettingNotFoundError, ValueError, TypeError) as exc: + logger.debug( + MEMORY_FINE_TUNE_THRESHOLD_FALLBACK, + setting_key="fine_tune_preflight_walk_timeout_s", + error_type=type(exc).__name__, + error=safe_error_description(exc), + ) + timeout_value = FINE_TUNE_PREFLIGHT_WALK_TIMEOUT_S + if timeout_value <= 0.0: + timeout_value = FINE_TUNE_PREFLIGHT_WALK_TIMEOUT_S # Cross-field invariant: ``min_docs_recommended >= min_docs_required``, # otherwise ``_check_documents`` could never emit the ``warn`` band # (a corpus passes the required floor but is still below recommended). @@ -222,6 +269,8 @@ async def _resolve_fine_tune_thresholds( default_batch_size=resolved["fine_tune_default_batch_size"], min_docs_required=resolved["fine_tune_min_docs_required"], min_docs_recommended=resolved["fine_tune_min_docs_recommended"], + preflight_max_depth=resolved["fine_tune_preflight_max_depth"], + preflight_walk_timeout_s=timeout_value, ) @@ -300,7 +349,7 @@ async def start_fine_tune( error=safe_error_description(exc), ) msg = "A fine-tuning run is already active" - raise ConflictError(msg) from exc + raise FineTuneRunActiveError(msg) from exc return ApiResponse( data=FineTuneStatus( run_id=run.id, @@ -326,9 +375,23 @@ async def start_fine_tune( async def resume_fine_tune( self, state: State, - run_id: str, + run_id: PathId, ) -> ApiResponse[FineTuneStatus]: - """Resume a failed/cancelled pipeline run.""" + """Resume a failed or cancelled fine-tune pipeline run. + + Args: + state: Application state. + run_id: Fine-tune run identifier (1-128 chars, enforced at + the path-parameter boundary by ``PathId``). + + Raises: + FeatureNotImplementedError: Orchestrator not configured + (HTTP 501). + FineTuneRunActiveError: Another run is already active + (HTTP 409). + NotFoundError: Run does not exist or is not resumable + (HTTP 404). + """ app_state: AppState = state.app_state if not app_state.has_fine_tune_orchestrator: msg = "Fine-tuning is not available" @@ -351,7 +414,7 @@ async def resume_fine_tune( error=safe_error_description(exc), ) msg = "A fine-tuning run is already active" - raise ConflictError(msg) from exc + raise FineTuneRunActiveError(msg) from exc except ValueError as exc: logger.warning( MEMORY_FINE_TUNE_REQUESTED, @@ -430,21 +493,46 @@ async def run_preflight( app_state.settings_service if app_state.has_settings_service else None ) thresholds = await _resolve_fine_tune_thresholds(settings_service) - async with asyncio.TaskGroup() as tg: - checks_task = tg.create_task( - asyncio.to_thread( - _run_preflight_checks, - data, - min_required=thresholds.min_docs_required, - min_recommended=thresholds.min_docs_recommended, - ), - ) - batch_task = tg.create_task( - asyncio.to_thread( - _recommend_batch_size, - default_batch_size=thresholds.default_batch_size, - ), + # The walk's in-thread monotonic deadline only starts counting + # once the ``to_thread`` job is scheduled; a saturated default + # executor could otherwise leave this request awaiting + # indefinitely. The outer ``asyncio.timeout`` is a hard, + # cancellation-aware ceiling so a stuck pool surfaces as a + # clean 503 the operator can retry rather than a hung request. + hard_ceiling = ( + thresholds.preflight_walk_timeout_s + _PREFLIGHT_HARD_TIMEOUT_MARGIN_S + ) + try: + async with ( + asyncio.timeout(hard_ceiling), + asyncio.TaskGroup() as tg, + ): + checks_task = tg.create_task( + asyncio.to_thread( + _run_preflight_checks, + data, + min_required=thresholds.min_docs_required, + min_recommended=thresholds.min_docs_recommended, + max_depth=thresholds.preflight_max_depth, + walk_timeout_s=thresholds.preflight_walk_timeout_s, + ), + ) + batch_task = tg.create_task( + asyncio.to_thread( + _recommend_batch_size, + default_batch_size=thresholds.default_batch_size, + ), + ) + except TimeoutError as exc: + logger.warning( + MEMORY_FINE_TUNE_PREFLIGHT_TIMED_OUT, + hard_ceiling_s=hard_ceiling, + walk_timeout_s=thresholds.preflight_walk_timeout_s, + error_type=type(exc).__name__, + error=safe_error_description(exc), ) + msg = "Preflight validation timed out" + raise ServiceUnavailableError(msg) from exc checks = list(checks_task.result()) batch_size = batch_task.result() result = PreflightResult( @@ -499,10 +587,15 @@ async def list_checkpoints( async def deploy_checkpoint( self, state: State, - checkpoint_id: str, + checkpoint_id: PathId, ) -> ApiResponse[CheckpointRecord]: """Deploy a specific checkpoint. + Args: + state: Application state. + checkpoint_id: Checkpoint identifier (1-128 chars, enforced + at the path-parameter boundary by ``PathId``). + Exception mapping: - ``CheckpointNotFoundError`` -> HTTP 404 @@ -514,7 +607,7 @@ async def deploy_checkpoint( """ service = _build_memory_service(state.app_state) try: - updated = await service.deploy_checkpoint(NotBlankStr(checkpoint_id)) + updated = await service.deploy_checkpoint(checkpoint_id) except CheckpointNotFoundError as exc: logger.warning( MEMORY_CHECKPOINT_NOT_FOUND, @@ -537,7 +630,7 @@ async def deploy_checkpoint( error=safe_error_description(exc), ) msg = "Failed to deploy checkpoint" - raise ConflictError(msg) from exc + raise CheckpointOperationConflictError(msg) from exc return ApiResponse(data=updated) @post( @@ -556,21 +649,28 @@ async def deploy_checkpoint( async def rollback_checkpoint( self, state: State, - checkpoint_id: str, + checkpoint_id: PathId, ) -> ApiResponse[CheckpointRecord]: """Rollback: restore pre-deployment config from backup. + Args: + state: Application state. + checkpoint_id: Checkpoint identifier (1-128 chars, enforced + at the path-parameter boundary by ``PathId``). + Exception mapping: - ``CheckpointNotFoundError`` -> HTTP 404 via ``NotFoundError`` - - ``CheckpointRollbackUnavailableError``, - ``CheckpointRollbackCorruptError`` -> HTTP 422 via - ``ValidationError`` (operator error / corrupt backup) + - ``CheckpointRollbackUnavailableError`` (HTTP 422, code + ``CHECKPOINT_ROLLBACK_UNAVAILABLE``) and + ``CheckpointRollbackCorruptError`` (HTTP 422, code + ``CHECKPOINT_ROLLBACK_CORRUPT``) carry distinct codes so the + dashboard can message operator error vs corrupt backup apart - Any other exception propagates as HTTP 500 """ service = _build_memory_service(state.app_state) try: - updated = await service.rollback_checkpoint(NotBlankStr(checkpoint_id)) + updated = await service.rollback_checkpoint(checkpoint_id) except CheckpointNotFoundError as exc: logger.warning( MEMORY_CHECKPOINT_NOT_FOUND, @@ -593,7 +693,7 @@ async def rollback_checkpoint( error=safe_error_description(exc), ) msg = "Checkpoint rollback is unavailable" - raise ValidationError(msg) from exc + raise CheckpointRollbackUnavailableError(msg) from exc except CheckpointRollbackCorruptError as exc: logger.warning( MEMORY_CHECKPOINT_ROLLBACK_FAILED, @@ -604,7 +704,7 @@ async def rollback_checkpoint( error=safe_error_description(exc), ) msg = "Checkpoint rollback data is corrupt" - raise ValidationError(msg) from exc + raise CheckpointRollbackCorruptError(msg) from exc return ApiResponse(data=updated) @delete( @@ -620,10 +720,15 @@ async def rollback_checkpoint( async def delete_checkpoint( self, state: State, - checkpoint_id: str, + checkpoint_id: PathId, ) -> ApiResponse[None]: """Delete a checkpoint (rejects active checkpoint). + Args: + state: Application state. + checkpoint_id: Checkpoint identifier (1-128 chars, enforced + at the path-parameter boundary by ``PathId``). + Exception mapping mirrors deploy/rollback so all checkpoint endpoints share the same contract: @@ -634,7 +739,7 @@ async def delete_checkpoint( """ service = _build_memory_service(state.app_state) try: - await service.delete_checkpoint(NotBlankStr(checkpoint_id)) + await service.delete_checkpoint(checkpoint_id) except CheckpointNotFoundError as exc: logger.warning( MEMORY_CHECKPOINT_NOT_FOUND, @@ -657,7 +762,7 @@ async def delete_checkpoint( # text doesn't leak into the 409 response. Detail stays in # the warning log above for operator triage. msg = "Failed to delete checkpoint" - raise ConflictError(msg) from exc + raise CheckpointOperationConflictError(msg) from exc return ApiResponse(data=None) # -- Memory entries ------------------------------------------------- @@ -675,11 +780,18 @@ async def delete_checkpoint( async def delete_memory_entry( self, state: State, - agent_id: str, - memory_id: str, + agent_id: PathId, + memory_id: PathId, ) -> ApiResponse[None]: """Delete a single memory entry owned by an agent. + Args: + state: Application state. + agent_id: Owning agent identifier (1-128 chars, enforced + at the path-parameter boundary by ``PathId``). + memory_id: Memory entry identifier (1-128 chars, enforced + at the path-parameter boundary by ``PathId``). + Returns ``200 OK`` on success and ``404 Not Found`` when the memory entry does not exist (or the agent has no entry with that id). Returns ``501 Not Implemented`` when no memory @@ -692,8 +804,8 @@ async def delete_memory_entry( service = _build_memory_service(state.app_state, require_fine_tune=False) try: deleted = await service.delete_memory_entry( - NotBlankStr(agent_id), - NotBlankStr(memory_id), + agent_id, + memory_id, ) except MemoryBackendUnsupportedError as exc: # ``MemoryService.delete_memory_entry`` already emits @@ -708,8 +820,12 @@ async def delete_memory_entry( # ``MEMORY_ENTRY_DELETE_FAILED`` with ``reason="not_found"`` # for this branch, so the controller stays in the layering # role of HTTP translation only. - msg = f"memory entry {memory_id!r} not found" - raise NotFoundError(msg) + resource_type = "memory entry" + raise resource_not_found( + resource_type, + memory_id, + code=ErrorCode.MEMORY_ENTRY_NOT_FOUND, + ) return ApiResponse(data=None) # -- Run history ------------------------------------------------- @@ -798,6 +914,8 @@ def _run_preflight_checks( *, min_required: int = FINE_TUNE_MIN_DOCS_REQUIRED, min_recommended: int = FINE_TUNE_MIN_DOCS_RECOMMENDED, + max_depth: int = FINE_TUNE_PREFLIGHT_MAX_DEPTH, + walk_timeout_s: float = FINE_TUNE_PREFLIGHT_WALK_TIMEOUT_S, ) -> list[PreflightCheck]: """Run all pre-flight validation checks. @@ -812,6 +930,8 @@ def _run_preflight_checks( reports ``warn``. Resolved from the ``memory.fine_tune_min_docs_recommended`` setting under the same fallback contract as ``min_required``. + max_depth: Directory recursion cap for the document scan. + walk_timeout_s: Wall-clock deadline for the document scan. """ checks: list[PreflightCheck] = [] checks.append(_check_dependencies()) @@ -821,6 +941,8 @@ def _run_preflight_checks( request.source_dir, min_required=min_required, min_recommended=min_recommended, + max_depth=max_depth, + walk_timeout_s=walk_timeout_s, ) ) output_dir = request.output_dir or request.source_dir @@ -833,8 +955,22 @@ def _check_documents( *, min_required: int = FINE_TUNE_MIN_DOCS_REQUIRED, min_recommended: int = FINE_TUNE_MIN_DOCS_RECOMMENDED, + max_depth: int = FINE_TUNE_PREFLIGHT_MAX_DEPTH, + walk_timeout_s: float = FINE_TUNE_PREFLIGHT_WALK_TIMEOUT_S, ) -> PreflightCheck: - """Check source directory has enough documents.""" + """Check source directory has enough documents. + + The scan is bounded on two independent axes so a pathologically + deep (symlink-loop / generated) or pathologically wide tree on a + slow / stale-handle mount cannot turn this preflight endpoint into + an unbounded filesystem traversal: ``max_depth`` caps recursion + depth and ``walk_timeout_s`` is a wall-clock deadline. Hitting + either bound returns a ``warn`` band (never a hang and never a + false ``fail``): the operator is told the scan was truncated and + can re-run against a shallower tree or raise the limits. + """ + import os # noqa: PLC0415 + import time # noqa: PLC0415 from pathlib import Path # noqa: PLC0415 src = Path(source_dir) @@ -844,7 +980,40 @@ def _check_documents( status="fail", message="Source directory not found", ) - count = sum(1 for ext in ("*.txt", "*.md", "*.rst") for _ in src.rglob(ext)) + exts = (".txt", ".md", ".rst") + deadline = time.monotonic() + walk_timeout_s + count = 0 + truncated = False + # ``os.walk`` is a generator, so this is a ``for`` (not ``while``) + # loop: the long-running-loop kill-switch gate only inspects + # ``while`` loops, and this sweep is bounded by both the depth + # prune and the monotonic deadline regardless. ``followlinks`` + # stays False so a symlink cycle cannot defeat the depth cap. + for root, dirnames, filenames in os.walk(src, followlinks=False): + if time.monotonic() >= deadline: + truncated = True + break + depth = len(Path(root).relative_to(src).parts) + count += sum(1 for f in filenames if f.endswith(exts)) + if depth >= max_depth: + if dirnames: + # Sub-directories exist below the cap and will NOT be + # scanned: surface that as a truncation warn rather + # than silently under-counting. + truncated = True + # Prune deeper traversal in place; os.walk honours this. + dirnames[:] = [] + if truncated: + return PreflightCheck( + name="documents", + status="warn", + message=( + f"Document scan truncated after {walk_timeout_s:g}s " + f"(depth cap {max_depth}); counted {count}+ so far. " + "Re-run against a shallower source tree or raise " + "memory.fine_tune_preflight_* limits." + ), + ) if count < min_required: return PreflightCheck( name="documents", @@ -937,7 +1106,7 @@ def _check_dependencies() -> PreflightCheck: name="dependencies", status="fail", message="Missing ML dependencies", - detail=str(exc), + detail=safe_error_description(exc), ) except MemoryError, RecursionError: raise @@ -946,7 +1115,7 @@ def _check_dependencies() -> PreflightCheck: name="dependencies", status="fail", message=f"Dependency check failed: {type(exc).__name__}", - detail=str(exc), + detail=safe_error_description(exc), ) return PreflightCheck( name="dependencies", @@ -988,7 +1157,7 @@ def _check_gpu() -> PreflightCheck: name="gpu", status="warn", message=f"GPU detection error: {type(exc).__name__}", - detail=str(exc), + detail=safe_error_description(exc), ) diff --git a/src/synthorg/api/controllers/meta.py b/src/synthorg/api/controllers/meta.py index 944219cdde..f0c1939f6d 100644 --- a/src/synthorg/api/controllers/meta.py +++ b/src/synthorg/api/controllers/meta.py @@ -5,7 +5,6 @@ from litestar import Controller, get, post from litestar.datastructures import State # noqa: TC002 -from litestar.exceptions import NotFoundException from pydantic import BaseModel, ConfigDict, Field from synthorg.api.controllers.custom_rules import rule_to_dict @@ -13,7 +12,11 @@ from synthorg.api.guards import require_org_mutation, require_read_access from synthorg.api.pagination import CursorLimit, CursorParam, paginate_cursor from synthorg.api.rate_limits import per_op_rate_limit_from_policy -from synthorg.core.domain_errors import ServiceUnavailableError +from synthorg.core.domain_errors import ( + ServiceUnavailableError, + resource_not_found, +) +from synthorg.core.error_taxonomy import ErrorCode from synthorg.core.persistence_errors import QueryError from synthorg.core.types import NotBlankStr # noqa: TC001 from synthorg.meta.chief_of_staff.models import ChatQuery @@ -232,8 +235,12 @@ async def get_ab_test_detail( # A/B test registry not yet implemented -- every proposal id # currently lacks a durable A/B record. See get /ab-tests # above for the scoped follow-up note. - msg = f"No active A/B test for proposal {proposal_id}" - raise NotFoundException(msg) + resource_type = "ab_test" + raise resource_not_found( + resource_type, + proposal_id, + code=ErrorCode.AB_TEST_NOT_FOUND, + ) @get("/proposals") async def list_proposals( diff --git a/src/synthorg/api/controllers/settings.py b/src/synthorg/api/controllers/settings.py index 00dee0d43a..691ba76705 100644 --- a/src/synthorg/api/controllers/settings.py +++ b/src/synthorg/api/controllers/settings.py @@ -7,7 +7,6 @@ from litestar import Controller, Request, Response, delete, get, post, put from litestar.datastructures import State # noqa: TC002 -from litestar.exceptions import InternalServerException from litestar.status_codes import HTTP_204_NO_CONTENT from pydantic import ( AwareDatetime, @@ -62,7 +61,9 @@ from synthorg.settings.errors import ( SettingNotFoundError, SettingsEncryptionError, + SettingsEncryptionFailedError, SettingValidationError, + SinkConfigValidationError, ) from synthorg.settings.models import SettingDefinition, SettingEntry # noqa: TC001 @@ -541,7 +542,7 @@ async def update_setting( error=safe_error_description(exc), ) msg = "Internal error processing sensitive setting" - raise InternalServerException(msg) from None + raise SettingsEncryptionFailedError(msg) from None new_etag = compute_etag( entry.value, @@ -699,7 +700,7 @@ def test_sink_config( error=safe_error_description(exc), ) msg = "Internal error validating sink configuration" - raise InternalServerException(msg) from None + raise SinkConfigValidationError(msg) from None return ApiResponse( data=TestSinkConfigResponse(valid=True), ) diff --git a/src/synthorg/api/controllers/subworkflows.py b/src/synthorg/api/controllers/subworkflows.py index ecb817eee6..1931f814cd 100644 --- a/src/synthorg/api/controllers/subworkflows.py +++ b/src/synthorg/api/controllers/subworkflows.py @@ -28,7 +28,7 @@ from synthorg.api.path_params import PathId # noqa: TC001 from synthorg.api.state import AppState # noqa: TC001 from synthorg.core.enums import WorkflowType -from synthorg.core.types import NotBlankStr # noqa: TC001 +from synthorg.core.types import NotBlankStr from synthorg.engine.errors import WorkflowDefinitionValidationError from synthorg.engine.workflow.definition import ( WorkflowDefinition, @@ -46,6 +46,7 @@ ) from synthorg.observability import get_logger, safe_error_description from synthorg.observability.events.api import API_CURSOR_INVALID +from synthorg.persistence._shared import collect_all logger = get_logger(__name__) @@ -214,15 +215,38 @@ async def search_subworkflows( description="Search substring", ), ], - ) -> Response[ApiResponse[tuple[SubworkflowSummary, ...]]]: - """Substring search across name and description.""" + limit: CursorLimit = DEFAULT_LIMIT, + cursor: CursorParam = None, + ) -> Response[PaginatedResponse[SubworkflowSummary]]: + """Substring search across name and description (cursor-paginated). + + Applies opaque-cursor pagination at the API boundary over the + complete match set: the handler drains every bounded repository + page via ``collect_all`` first (a truncated set would break the + cursor walk and under-report matches), then slices the + requested cursor page for the response. + """ registry = _registry(state) - matches = await registry.search(q) - return Response( - content=ApiResponse[tuple[SubworkflowSummary, ...]]( - data=matches, + # This endpoint applies its own opaque-cursor pagination over + # the full match set, so drain every bounded repo page; a + # truncated set would break the cursor walk and under-report + # matches. + matches = await collect_all( + lambda page_limit, offset: registry.search( + NotBlankStr(q), + limit=page_limit, + offset=offset, ), ) + page, meta = paginate_cursor( + matches, + limit=limit, + cursor=cursor, + secret=state.app_state.cursor_secret, + ) + return Response( + content=PaginatedResponse[SubworkflowSummary](data=page, pagination=meta), + ) @get("/{subworkflow_id:str}/versions", guards=[require_read_access]) async def list_versions( @@ -293,9 +317,27 @@ async def list_parents( limit: CursorLimit = DEFAULT_LIMIT, cursor: CursorParam = None, ) -> Response[PaginatedResponse[ParentReference]]: - """List parent workflow definitions pinning this version (cursor-paginated).""" + """List parent workflow definitions pinning this version. + + Applies opaque-cursor pagination at the API boundary over the + complete parent set: the handler drains every bounded + repository page via ``collect_all`` first (a truncated set + would break the cursor walk and under-report references), then + slices the requested cursor page for the response. + """ registry = _registry(state) - parents = await registry.find_parents(subworkflow_id, version) + # This endpoint applies its own opaque-cursor pagination over + # the full parent set, so drain every bounded repo page; a + # truncated set would break the cursor walk and (worse) + # under-report references. + parents = await collect_all( + lambda page_limit, offset: registry.find_parents( + NotBlankStr(subworkflow_id), + NotBlankStr(version), + limit=page_limit, + offset=offset, + ), + ) page, meta = paginate_cursor( parents, limit=limit, diff --git a/src/synthorg/api/controllers/training.py b/src/synthorg/api/controllers/training.py index 1f7fe222e5..8f413caab0 100644 --- a/src/synthorg/api/controllers/training.py +++ b/src/synthorg/api/controllers/training.py @@ -22,7 +22,11 @@ from synthorg.api.rate_limits import per_op_rate_limit_from_policy from synthorg.api.state import AppState # noqa: TC001 from synthorg.core.agent import AgentIdentity # noqa: TC001 -from synthorg.core.domain_errors import ConflictError, NotFoundError, ValidationError +from synthorg.core.domain_errors import ( + NotFoundError, + TrainingPlanNotModifiableError, + ValidationError, +) from synthorg.core.types import NotBlankStr from synthorg.hr.training.models import ( ContentType, @@ -157,7 +161,10 @@ class TrainingController(Controller): @post( "/plan", - guards=[require_org_mutation()], + guards=[ + require_org_mutation(), + per_op_rate_limit_from_policy("training.create_plan", key="user"), + ], status_code=HTTP_200_OK, ) async def create_plan( @@ -168,6 +175,9 @@ async def create_plan( ) -> ApiResponse[TrainingPlanResponse]: """Create a training plan for the specified agent. + Rate-limited per user by the ``training.create_plan`` policy + guard; burst traffic is rejected with HTTP 429. + Args: state: Application state. agent_name: Agent identifier from the URL path. @@ -409,7 +419,13 @@ async def preview_plan( @put( "/plan/{plan_id:str}/overrides", - guards=[require_org_mutation()], + guards=[ + require_org_mutation(), + per_op_rate_limit_from_policy( + "training.update_overrides", + key="user", + ), + ], status_code=HTTP_200_OK, ) async def update_overrides( @@ -421,6 +437,9 @@ async def update_overrides( ) -> ApiResponse[TrainingPlanResponse]: """Update training plan overrides. + Rate-limited per user by the ``training.update_overrides`` + policy guard; burst traffic is rejected with HTTP 429. + Args: state: Application state. agent_name: Agent identifier from the URL path. @@ -469,7 +488,7 @@ async def update_overrides( error="Attempt to modify non-pending training plan", ) msg = "Cannot modify plan after execution or failure" - raise ConflictError(msg) + raise TrainingPlanNotModifiableError(msg) updates: dict[str, object] = {} if data.override_sources is not None: diff --git a/src/synthorg/api/controllers/workflows.py b/src/synthorg/api/controllers/workflows.py index ce1408a01f..c14cfa552d 100644 --- a/src/synthorg/api/controllers/workflows.py +++ b/src/synthorg/api/controllers/workflows.py @@ -30,8 +30,9 @@ from synthorg.api.pagination import CursorLimit, CursorParam, paginate_cursor from synthorg.api.path_params import QUERY_MAX_LENGTH, PathId from synthorg.api.rate_limits import per_op_rate_limit_from_policy -from synthorg.core.domain_errors import NotFoundError +from synthorg.core.domain_errors import resource_not_found from synthorg.core.enums import WorkflowType +from synthorg.core.error_taxonomy import ErrorCode from synthorg.core.types import NotBlankStr from synthorg.engine.errors import ( WorkflowDefinitionValidationError, @@ -215,23 +216,35 @@ async def get_workflow( self, state: State, workflow_id: PathId, - ) -> Response[ApiResponse[WorkflowDefinition]]: - """Get a workflow definition by ID.""" + ) -> ApiResponse[WorkflowDefinition]: + """Get a workflow definition by ID. + + Returns the bare ``ApiResponse`` envelope (Litestar wraps it in + a 200 response). A missing definition raises ``NotFoundError`` + (HTTP 404, ``WORKFLOW_DEFINITION_NOT_FOUND``) routed through the + shared exception handlers rather than an inline 404 body. + + Args: + state: Application state. + workflow_id: Workflow identifier (1-128 chars, enforced at + the path-parameter boundary by ``PathId``). + + Raises: + NotFoundError: The workflow definition does not exist. + """ definition = await _service(state).get_definition(workflow_id) if definition is None: logger.warning( WORKFLOW_DEF_NOT_FOUND, definition_id=workflow_id, ) - return Response( - content=ApiResponse[WorkflowDefinition]( - error="Workflow definition not found", - ), - status_code=404, + resource_type = "workflow_definition" + raise resource_not_found( + resource_type, + workflow_id, + code=ErrorCode.WORKFLOW_DEFINITION_NOT_FOUND, ) - return Response( - content=ApiResponse[WorkflowDefinition](data=definition), - ) + return ApiResponse[WorkflowDefinition](data=definition) @post( guards=[ @@ -400,8 +413,12 @@ async def delete_workflow( WORKFLOW_DEF_NOT_FOUND, definition_id=workflow_id, ) - msg = "Workflow definition not found" - raise NotFoundError(msg) + resource_type = "workflow_definition" + raise resource_not_found( + resource_type, + workflow_id, + code=ErrorCode.WORKFLOW_DEFINITION_NOT_FOUND, + ) # Post-delete confirmation -- emitted only on persistence success. logger.info( WORKFLOW_DEFINITION_CHANGED, @@ -480,27 +497,32 @@ async def validate_workflow( self, state: State, workflow_id: PathId, - ) -> Response[ApiResponse[WorkflowValidationResult]]: - """Validate a workflow definition for execution readiness.""" + ) -> ApiResponse[WorkflowValidationResult]: + """Validate a workflow definition for execution readiness. + + Returns the bare ``ApiResponse`` envelope (Litestar wraps it in + a 200 response). A missing definition raises ``NotFoundError`` + (HTTP 404, ``WORKFLOW_DEFINITION_NOT_FOUND``) via the shared + exception handlers instead of an inline 404 body. + + Raises: + NotFoundError: The workflow definition does not exist. + """ definition = await _service(state).get_definition(workflow_id) if definition is None: logger.warning( WORKFLOW_DEF_NOT_FOUND, definition_id=workflow_id, ) - return Response( - content=ApiResponse[WorkflowValidationResult]( - error="Workflow definition not found", - ), - status_code=404, + resource_type = "workflow_definition" + raise resource_not_found( + resource_type, + workflow_id, + code=ErrorCode.WORKFLOW_DEFINITION_NOT_FOUND, ) result = run_workflow_validation(definition) - return Response( - content=ApiResponse[WorkflowValidationResult]( - data=result, - ), - ) + return ApiResponse[WorkflowValidationResult](data=result) @post( "/{workflow_id:str}/export", @@ -514,19 +536,28 @@ async def export_workflow( self, state: State, workflow_id: PathId, - ) -> Response[str] | Response[ApiResponse[None]]: - """Export a workflow definition as YAML.""" + ) -> Response[str]: + """Export a workflow definition as YAML. + + Returns only ``Response[str]`` on success; a missing definition + raises ``NotFoundError`` (HTTP 404, + ``WORKFLOW_DEFINITION_NOT_FOUND``) through the shared exception + handlers rather than returning an inline 404 response. + + Raises: + NotFoundError: The workflow definition does not exist. + """ definition = await _service(state).get_definition(workflow_id) if definition is None: logger.warning( WORKFLOW_DEF_NOT_FOUND, definition_id=workflow_id, ) - return Response( - content=ApiResponse[None]( - error="Workflow definition not found", - ), - status_code=404, + resource_type = "workflow_definition" + raise resource_not_found( + resource_type, + workflow_id, + code=ErrorCode.WORKFLOW_DEFINITION_NOT_FOUND, ) try: diff --git a/src/synthorg/api/cursor_config.py b/src/synthorg/api/cursor_config.py index 1622718e3f..584e61a7a3 100644 --- a/src/synthorg/api/cursor_config.py +++ b/src/synthorg/api/cursor_config.py @@ -27,7 +27,7 @@ class CursorConfig(BaseModel): tokens become invalid across restarts. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") secret: str | None = Field( default=None, diff --git a/src/synthorg/api/etag.py b/src/synthorg/api/etag.py index 420cc9c90a..9058133376 100644 --- a/src/synthorg/api/etag.py +++ b/src/synthorg/api/etag.py @@ -290,6 +290,32 @@ async def _capturing_send(message: dict[str, object]) -> None: ) +def _apply_cache_control( + headers: list[tuple[bytes, bytes]], + path: str, +) -> list[tuple[bytes, bytes]]: + """Replace ``cache-control`` with the validator-friendly policy. + + Drops any existing ``cache-control`` and appends + ``_DEFAULT_PUBLIC_CACHE`` / ``_DEFAULT_PRIVATE_CACHE`` by path. The + replace (not append-if-missing) is required because the global + ``security_headers_hook`` runs as a Litestar ``before_send`` and + unconditionally pins ``Cache-Control: no-store, no-cache, + must-revalidate, max-age=0`` on every API response before this + middleware sees it; without the overwrite, allowlisted reads + (buffered AND streaming) would never advertise the + ``private``/``public`` policy and clients would not revalidate. + Shared by :func:`_emit_response` (buffered, also adds an ETag) and + the streaming pass-through branch (no ETag, cache policy only). + """ + cache_default = ( + _DEFAULT_PUBLIC_CACHE if _is_public_cache_path(path) else _DEFAULT_PRIVATE_CACHE + ) + rewritten = [(k, v) for k, v in headers if k.lower() != b"cache-control"] + rewritten.append((b"cache-control", cache_default)) + return rewritten + + async def _emit_response( send: Send, captured_start: dict[str, object] | None, @@ -312,23 +338,12 @@ async def _emit_response( list(headers_value) if isinstance(headers_value, list | tuple) else [] ) etag = compute_etag(body) - cache_default = ( - _DEFAULT_PUBLIC_CACHE if _is_public_cache_path(path) else _DEFAULT_PRIVATE_CACHE - ) - # Drop any existing ``etag`` and ``cache-control`` and reinstall - # the policy this middleware owns. We replace (not append-if-missing) - # because the global ``security_headers_hook`` runs as a Litestar - # ``before_send`` and unconditionally sets ``Cache-Control: - # no-store, no-cache, must-revalidate, max-age=0`` on every API - # response; without this overwrite, allowlisted reads would never - # advertise the validator-friendly ``private``/``public`` policy - # documented in the module header and clients would not retain - # ETags for conditional GETs. - extended_headers = [ - (k, v) for k, v in headers if k.lower() not in {b"etag", b"cache-control"} - ] + # Cache-Control policy is shared with the streaming branch via + # ``_apply_cache_control``; the ETag is buffered-only so it is + # dropped + reinstalled here, not in the shared helper. + without_etag = [(k, v) for k, v in headers if k.lower() != b"etag"] + extended_headers = _apply_cache_control(without_etag, path) extended_headers.append((b"etag", etag.encode("latin-1"))) - extended_headers.append((b"cache-control", cache_default)) if match_etag(if_none_match, etag): # DEBUG-only: every 304 saves a body roundtrip; logging at @@ -439,8 +454,22 @@ async def _handle_body_message( return if message.get("more_body", False): # Multi-chunk response: stream as-is, no ETag, no buffering. + # The body cannot be hashed without buffering, so no ETag is + # emitted -- but the validator-friendly Cache-Control policy + # still applies (otherwise the global ``no-store`` from + # ``security_headers_hook`` would suppress client revalidation + # for streamed allowlisted reads too). if state.captured_start is not None: - await send(state.captured_start) # type: ignore[arg-type] + headers_value = state.captured_start.get("headers", []) + current_headers: list[tuple[bytes, bytes]] = ( + list(headers_value) if isinstance(headers_value, list | tuple) else [] + ) + forwarded_start = dict(state.captured_start) + forwarded_start["headers"] = _apply_cache_control( + current_headers, + path, + ) + await send(forwarded_start) # type: ignore[arg-type] state.captured_start = None await send(message) # type: ignore[arg-type] state.passthrough = True diff --git a/src/synthorg/api/pagination.py b/src/synthorg/api/pagination.py index 3acaaa14bd..c9af0d855b 100644 --- a/src/synthorg/api/pagination.py +++ b/src/synthorg/api/pagination.py @@ -36,7 +36,13 @@ description=f"Page size (default {DEFAULT_LIMIT}, max {MAX_LIMIT})", ), ] -"""Query-parameter type for the page size (1-MAX_LIMIT).""" +"""Query-parameter type for the page size (1-MAX_LIMIT). + +HTTP-boundary only: the bounds are enforced by Litestar's +``Parameter`` metadata at request parsing. Do not reuse this alias +for in-process validation, where the constraint would silently not +apply. +""" CursorParam = Annotated[ str | None, @@ -45,7 +51,12 @@ description="Opaque pagination cursor returned by the previous page", ), ] -"""Query-parameter type for the opaque cursor (max 512 chars).""" +"""Query-parameter type for the opaque cursor (max 512 chars). + +HTTP-boundary only: the ``max_length`` is enforced by Litestar's +``Parameter`` metadata at request parsing, not by the type itself. +Do not reuse this alias for in-process validation. +""" def paginate_cursor[T]( diff --git a/src/synthorg/api/rate_limits/policies.py b/src/synthorg/api/rate_limits/policies.py index a6efbdb1d9..2895fb3339 100644 --- a/src/synthorg/api/rate_limits/policies.py +++ b/src/synthorg/api/rate_limits/policies.py @@ -206,7 +206,9 @@ "tasks.transition": (100, 60), "tasks.update": (100, 60), # training + "training.create_plan": (30, 3600), "training.execute": (20, 3600), + "training.update_overrides": (60, 3600), # users "users.create": (5, 60), "users.delete": (3, 60), diff --git a/src/synthorg/api/rate_limits/protocol.py b/src/synthorg/api/rate_limits/protocol.py index 3e0ee1f494..10226fe938 100644 --- a/src/synthorg/api/rate_limits/protocol.py +++ b/src/synthorg/api/rate_limits/protocol.py @@ -27,7 +27,7 @@ class RateLimitOutcome(BaseModel): are rejected by the ``ge=0`` validator at construction time. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") allowed: bool retry_after_seconds: float | None = Field(default=None, ge=0.0) diff --git a/src/synthorg/api/ws_models.py b/src/synthorg/api/ws_models.py index 9b51ebffc6..94ab39fe0f 100644 --- a/src/synthorg/api/ws_models.py +++ b/src/synthorg/api/ws_models.py @@ -158,7 +158,7 @@ class WsEvent(BaseModel): payload: Event-specific data. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") version: int = Field( default=WS_PROTOCOL_VERSION, diff --git a/src/synthorg/backup/errors.py b/src/synthorg/backup/errors.py index 5d76cf2a03..b1c163a544 100644 --- a/src/synthorg/backup/errors.py +++ b/src/synthorg/backup/errors.py @@ -27,17 +27,23 @@ class BackupInProgressError(BackupError): default_message: ClassVar[str] = "Backup operation already in progress" error_category: ClassVar[ErrorCategory] = ErrorCategory.CONFLICT - error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_CONFLICT + error_code: ClassVar[ErrorCode] = ErrorCode.BACKUP_IN_PROGRESS status_code: ClassVar[int] = 409 class RestoreError(BackupError): """Raised when a restore operation fails.""" + default_message: ClassVar[str] = "Restore operation failed" + error_code: ClassVar[ErrorCode] = ErrorCode.BACKUP_RESTORE_FAILED + class ManifestError(BackupError): """Raised when a backup manifest is invalid or corrupt.""" + default_message: ClassVar[str] = "Backup manifest is invalid or corrupt" + error_code: ClassVar[ErrorCode] = ErrorCode.BACKUP_MANIFEST_ERROR + class ComponentBackupError(BackupError): """Raised when a per-component backup or restore step fails.""" @@ -52,7 +58,7 @@ class BackupNotFoundError(BackupError): default_message: ClassVar[str] = "Backup not found" error_category: ClassVar[ErrorCategory] = ErrorCategory.NOT_FOUND - error_code: ClassVar[ErrorCode] = ErrorCode.RECORD_NOT_FOUND + error_code: ClassVar[ErrorCode] = ErrorCode.BACKUP_NOT_FOUND status_code: ClassVar[int] = 404 @@ -68,7 +74,7 @@ class BackupUnrestartableError(BackupError): "Backup scheduler is unrestartable after a timed-out stop" ) error_category: ClassVar[ErrorCategory] = ErrorCategory.CONFLICT - error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_CONFLICT + error_code: ClassVar[ErrorCode] = ErrorCode.BACKUP_UNRESTARTABLE status_code: ClassVar[int] = 409 diff --git a/src/synthorg/budget/baseline_store.py b/src/synthorg/budget/baseline_store.py index 58857dd8e4..bd8978bd7c 100644 --- a/src/synthorg/budget/baseline_store.py +++ b/src/synthorg/budget/baseline_store.py @@ -40,7 +40,7 @@ class BaselineRecord(BaseModel): timestamp: When the record was captured (UTC). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") agent_id: NotBlankStr = Field(description="Executing agent identifier") task_id: NotBlankStr = Field(description="Task identifier") diff --git a/src/synthorg/budget/call_analytics_config.py b/src/synthorg/budget/call_analytics_config.py index 5f34fe4c5c..749d4ea7ef 100644 --- a/src/synthorg/budget/call_analytics_config.py +++ b/src/synthorg/budget/call_analytics_config.py @@ -20,7 +20,7 @@ class RetryAlertConfig(BaseModel): alert. Must be in [0.0, 1.0]. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") warn_rate: float = Field( default=_DEFAULT_RETRY_WARN_RATE, @@ -44,7 +44,7 @@ class CallAnalyticsConfig(BaseModel): retry_alerts: Configuration for retry rate alerting. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") enabled: bool = Field( default=True, diff --git a/src/synthorg/budget/call_analytics_models.py b/src/synthorg/budget/call_analytics_models.py index 0f85d692e8..b0d5edf111 100644 --- a/src/synthorg/budget/call_analytics_models.py +++ b/src/synthorg/budget/call_analytics_models.py @@ -30,7 +30,7 @@ class AnalyticsAggregation(BaseModel): sorted tuple of ``(reason_str, count)`` pairs. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") total_calls: int = Field(ge=0, description="Total LLM calls recorded.") success_count: int = Field(ge=0, description="Calls with success=True.") diff --git a/src/synthorg/budget/call_classifier.py b/src/synthorg/budget/call_classifier.py index 163fce2c0b..18d6b12222 100644 --- a/src/synthorg/budget/call_classifier.py +++ b/src/synthorg/budget/call_classifier.py @@ -40,7 +40,7 @@ class ClassificationContext(BaseModel): agent_role: Optional semantic role of the agent (context only). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") turn_number: int = Field(gt=0, description="1-indexed turn number") agent_id: NotBlankStr = Field(description="Executing agent identifier") diff --git a/src/synthorg/budget/category_analytics.py b/src/synthorg/budget/category_analytics.py index 6703c70416..226238e7c9 100644 --- a/src/synthorg/budget/category_analytics.py +++ b/src/synthorg/budget/category_analytics.py @@ -180,7 +180,7 @@ class OrchestrationRatio(BaseModel): system_tokens: System category tokens. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") ratio: float = Field(ge=0.0, le=1.0, description="Orchestration ratio") alert_level: OrchestrationAlertLevel = Field( diff --git a/src/synthorg/budget/config.py b/src/synthorg/budget/config.py index 59133a7b9f..e715c0e271 100644 --- a/src/synthorg/budget/config.py +++ b/src/synthorg/budget/config.py @@ -35,7 +35,7 @@ class BudgetAlertConfig(BaseModel): hard_stop_at: Percentage of budget that triggers a hard stop. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") _MIRROR_FIELDS: ClassVar[tuple[MirrorField, ...]] = ( MirrorField( @@ -115,7 +115,7 @@ class AutoDowngradeConfig(BaseModel): never mid-execution per the Operations design page). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") _MIRROR_FIELDS: ClassVar[tuple[MirrorField, ...]] = ( MirrorField( @@ -223,7 +223,7 @@ class BudgetConfig(BaseModel): currency: ISO 4217 currency code for display formatting. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") _MIRROR_FIELDS: ClassVar[tuple[MirrorField, ...]] = ( MirrorField( diff --git a/src/synthorg/budget/coordination_config.py b/src/synthorg/budget/coordination_config.py index 07f3d37b02..dddfa54daa 100644 --- a/src/synthorg/budget/coordination_config.py +++ b/src/synthorg/budget/coordination_config.py @@ -79,7 +79,7 @@ class DetectorCategoryConfig(BaseModel): scope: Detection scope level. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") variants: tuple[DetectorVariant, ...] = Field( default=(DetectorVariant.HEURISTIC,), @@ -217,7 +217,7 @@ class OrchestrationAlertThresholds(BaseModel): orchestration; stop routing and triage). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") info: float = Field( default=_DEFAULT_INFO_THRESHOLD, @@ -263,7 +263,7 @@ class CoordinationMetricsConfig(BaseModel): orchestration_alerts: Orchestration overhead alert thresholds. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") enabled: bool = Field( default=False, diff --git a/src/synthorg/budget/coordination_metrics.py b/src/synthorg/budget/coordination_metrics.py index b4a7978001..e53e8bab43 100644 --- a/src/synthorg/budget/coordination_metrics.py +++ b/src/synthorg/budget/coordination_metrics.py @@ -153,7 +153,7 @@ class RedundancyRate(BaseModel): sample_count: Number of similarity samples. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") value: float = Field( ge=0.0, @@ -384,7 +384,7 @@ class CoordinationMetrics(BaseModel): message_overhead: O(n^2) message overhead detection. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") efficiency: CoordinationEfficiency | None = Field( default=None, diff --git a/src/synthorg/budget/coordination_store.py b/src/synthorg/budget/coordination_store.py index 5da9856097..4b2a78485c 100644 --- a/src/synthorg/budget/coordination_store.py +++ b/src/synthorg/budget/coordination_store.py @@ -27,7 +27,7 @@ class CoordinationMetricsRecord(BaseModel): """Timestamped coordination metrics from a single run.""" - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") task_id: NotBlankStr = Field(description="Associated task") agent_id: NotBlankStr | None = Field( diff --git a/src/synthorg/budget/cost_record.py b/src/synthorg/budget/cost_record.py index 1d53d59ee9..47c17a9133 100644 --- a/src/synthorg/budget/cost_record.py +++ b/src/synthorg/budget/cost_record.py @@ -66,7 +66,7 @@ class CostRecord(BaseModel): success: Whether the call completed without error or content filter. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") agent_id: NotBlankStr = Field(description="Agent identifier") task_id: NotBlankStr = Field(description="Task identifier") diff --git a/src/synthorg/budget/cost_tiers.py b/src/synthorg/budget/cost_tiers.py index 6139f8fcb6..8e6179a2ba 100644 --- a/src/synthorg/budget/cost_tiers.py +++ b/src/synthorg/budget/cost_tiers.py @@ -36,7 +36,7 @@ class CostTierDefinition(BaseModel): sort_order: Display ordering (lower = cheaper). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") id: NotBlankStr = Field(description="Unique tier identifier") display_name: NotBlankStr = Field(description="Human-readable name") @@ -103,7 +103,7 @@ class CostTiersConfig(BaseModel): include_builtin: Whether to merge built-in default tiers. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") tiers: tuple[CostTierDefinition, ...] = Field( default=(), diff --git a/src/synthorg/budget/hierarchy.py b/src/synthorg/budget/hierarchy.py index b30423d297..dc52bd6fbc 100644 --- a/src/synthorg/budget/hierarchy.py +++ b/src/synthorg/budget/hierarchy.py @@ -22,7 +22,7 @@ class TeamBudget(BaseModel): budget_percent: Percent of department budget allocated to this team. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") team_name: NotBlankStr = Field( description="Team name", @@ -47,7 +47,7 @@ class DepartmentBudget(BaseModel): teams: Team budget allocations within this department. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") department_name: NotBlankStr = Field( description="Department name", @@ -103,7 +103,7 @@ class BudgetHierarchy(BaseModel): departments: Department budget allocations. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") total_monthly: float = Field( ge=0.0, diff --git a/src/synthorg/budget/optimizer_models.py b/src/synthorg/budget/optimizer_models.py index 39426e3493..63f2c64e98 100644 --- a/src/synthorg/budget/optimizer_models.py +++ b/src/synthorg/budget/optimizer_models.py @@ -67,7 +67,7 @@ class SpendingAnomaly(BaseModel): period_end: End of the window that triggered the anomaly. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") agent_id: NotBlankStr = Field(description="Agent identifier") anomaly_type: AnomalyType = Field(description="Anomaly classification") @@ -112,7 +112,7 @@ class AnomalyDetectionResult(BaseModel): scan_timestamp: When the scan was performed. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") anomalies: tuple[SpendingAnomaly, ...] = Field( default=(), @@ -248,7 +248,7 @@ class DowngradeRecommendation(BaseModel): reason: Human-readable explanation. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") agent_id: NotBlankStr = Field(description="Agent identifier") current_model: NotBlankStr = Field(description="Current model identifier") @@ -320,7 +320,7 @@ class ApprovalDecision(BaseModel): conditions: Any conditions attached to approval. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") approved: bool = Field(description="Whether the operation is approved") reason: NotBlankStr = Field(description="Explanation for the decision") @@ -372,7 +372,7 @@ class CostOptimizerConfig(BaseModel): required before anomaly detection activates. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") anomaly_sigma_threshold: float = Field( default=2.0, diff --git a/src/synthorg/budget/project_cost_aggregate.py b/src/synthorg/budget/project_cost_aggregate.py index 014c3d80b2..5bc0334fc7 100644 --- a/src/synthorg/budget/project_cost_aggregate.py +++ b/src/synthorg/budget/project_cost_aggregate.py @@ -36,7 +36,7 @@ class ProjectCostAggregate(BaseModel): last_updated: Timestamp of the most recent increment. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") project_id: NotBlankStr = Field(description="Project identifier") total_cost: float = Field( diff --git a/src/synthorg/budget/quota.py b/src/synthorg/budget/quota.py index 8f5b19eeac..59e6d39a16 100644 --- a/src/synthorg/budget/quota.py +++ b/src/synthorg/budget/quota.py @@ -36,7 +36,7 @@ class QuotaLimit(BaseModel): max_tokens: Maximum tokens in the window (0 = unlimited). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") window: QuotaWindow = Field(description="Time window for this limit") max_requests: int = Field( @@ -91,7 +91,7 @@ class SubscriptionConfig(BaseModel): hardware_limits: Free-text hardware constraints for local models. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") plan_name: NotBlankStr = Field( default="pay_as_you_go", @@ -181,7 +181,7 @@ class DegradationConfig(BaseModel): queue_max_wait_seconds: Max seconds to wait when queueing. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") strategy: DegradationAction = Field( default=DegradationAction.ALERT, diff --git a/src/synthorg/budget/quota_poller_config.py b/src/synthorg/budget/quota_poller_config.py index 9e4bee7dbd..b457f58d36 100644 --- a/src/synthorg/budget/quota_poller_config.py +++ b/src/synthorg/budget/quota_poller_config.py @@ -17,7 +17,7 @@ class QuotaAlertThresholds(BaseModel): ``warn_pct``. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") warn_pct: float = Field( default=80.0, @@ -57,7 +57,7 @@ class QuotaPollerConfig(BaseModel): the same provider/window/level tuple. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") enabled: bool = Field( default=False, diff --git a/src/synthorg/budget/report_config.py b/src/synthorg/budget/report_config.py index ff267f5c0d..4b933897ae 100644 --- a/src/synthorg/budget/report_config.py +++ b/src/synthorg/budget/report_config.py @@ -51,7 +51,7 @@ class ReportScheduleConfig(BaseModel): templates: Which report templates to include. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") enabled: bool = False periods: tuple[ReportPeriod, ...] = () @@ -82,7 +82,7 @@ class AutomatedReportingConfig(BaseModel): retention_days: How long to keep generated reports. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") schedule: ReportScheduleConfig = Field( default_factory=ReportScheduleConfig, diff --git a/src/synthorg/budget/report_templates.py b/src/synthorg/budget/report_templates.py index cfa2864eec..83cc3a0c65 100644 --- a/src/synthorg/budget/report_templates.py +++ b/src/synthorg/budget/report_templates.py @@ -37,7 +37,7 @@ class AgentPerformanceSummary(BaseModel): total_risk_units: Total risk units accumulated. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") agent_id: NotBlankStr = Field(description="Agent identifier") tasks_completed: int = Field(default=0, ge=0) @@ -63,7 +63,7 @@ class PerformanceMetricsReport(BaseModel): generated_at: When the report was generated. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") agent_snapshots: tuple[AgentPerformanceSummary, ...] = () average_quality_score: float | None = Field( @@ -93,7 +93,7 @@ class DepartmentTaskSummary(BaseModel): failed: Tasks failed. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") department: NotBlankStr = Field(description="Department name") assigned: int = Field(default=0, ge=0) @@ -144,7 +144,7 @@ class DailyRiskPoint(BaseModel): record_count: Number of risk records. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") date: _dt.date = Field(description="Date") total_risk_units: float = Field(default=0.0, ge=0.0) @@ -162,7 +162,7 @@ class RiskTrendsReport(BaseModel): generated_at: When the report was generated. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") total_risk_units: float = Field(default=0.0, ge=0.0) risk_by_agent: tuple[tuple[NotBlankStr, float], ...] = () @@ -206,7 +206,7 @@ class ComprehensiveReport(BaseModel): generated_at: When the report was generated. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") period: ReportPeriod = Field(description="Report period") start: AwareDatetime = Field(description="Period start (inclusive)") diff --git a/src/synthorg/budget/reports.py b/src/synthorg/budget/reports.py index 058d4f00dc..4c06fba38e 100644 --- a/src/synthorg/budget/reports.py +++ b/src/synthorg/budget/reports.py @@ -57,7 +57,7 @@ class TaskSpending(BaseModel): record_count: Number of cost records. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") task_id: NotBlankStr = Field(description="Task identifier") total_cost: float = Field(ge=0.0, description="Total cost") @@ -92,7 +92,7 @@ class ProviderDistribution(BaseModel): percentage_of_total: Percentage of total spending. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") provider: NotBlankStr = Field(description="Provider name") total_cost: float = Field(ge=0.0, description="Total cost") @@ -132,7 +132,7 @@ class ModelDistribution(BaseModel): percentage_of_total: Percentage of total spending. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") model: NotBlankStr = Field(description="Model identifier") provider: NotBlankStr = Field(description="Provider name") @@ -217,7 +217,7 @@ class SpendingReport(BaseModel): generated_at: When the report was generated. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") summary: SpendingSummary = Field(description="Overall spending summary") by_task: tuple[TaskSpending, ...] = Field( diff --git a/src/synthorg/budget/risk_check.py b/src/synthorg/budget/risk_check.py index a95b735ca8..3ac8ba7abf 100644 --- a/src/synthorg/budget/risk_check.py +++ b/src/synthorg/budget/risk_check.py @@ -14,7 +14,7 @@ class RiskCheckResult(BaseModel): reason: Human-readable explanation when denied. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") allowed: bool = True risk_units: float = Field(default=0.0, ge=0.0) diff --git a/src/synthorg/budget/risk_config.py b/src/synthorg/budget/risk_config.py index 421ec0d42d..19107ff69a 100644 --- a/src/synthorg/budget/risk_config.py +++ b/src/synthorg/budget/risk_config.py @@ -20,7 +20,7 @@ class RiskBudgetAlertConfig(BaseModel): critical_at: Percentage at which to issue a critical alert. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") warn_at: int = Field(default=75, ge=0, le=100, strict=True) critical_at: int = Field(default=90, ge=0, le=100, strict=True) @@ -51,7 +51,7 @@ class RiskBudgetConfig(BaseModel): alerts: Alert threshold configuration. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") enabled: bool = False per_task_risk_limit: float = Field(default=5.0, ge=0.0) diff --git a/src/synthorg/budget/risk_record.py b/src/synthorg/budget/risk_record.py index a26e8aa8f7..8ffd6ef27e 100644 --- a/src/synthorg/budget/risk_record.py +++ b/src/synthorg/budget/risk_record.py @@ -29,7 +29,7 @@ class RiskRecord(BaseModel): timestamp: Timezone-aware timestamp of the action. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") agent_id: NotBlankStr = Field(description="Agent identifier") task_id: NotBlankStr = Field(description="Task identifier") diff --git a/src/synthorg/budget/spending_summary.py b/src/synthorg/budget/spending_summary.py index 15cd09807c..869f61d7b2 100644 --- a/src/synthorg/budget/spending_summary.py +++ b/src/synthorg/budget/spending_summary.py @@ -37,7 +37,7 @@ class _SpendingTotals(BaseModel): record_count: Number of cost records aggregated. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") total_cost: float = Field( default=0.0, @@ -139,7 +139,7 @@ class SpendingSummary(BaseModel): alert_level: Current budget alert level. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") period: PeriodSpending = Field(description="Time-period aggregation") by_agent: tuple[AgentSpending, ...] = Field( diff --git a/src/synthorg/budget/trends.py b/src/synthorg/budget/trends.py index 03bfa34646..55d28af37a 100644 --- a/src/synthorg/budget/trends.py +++ b/src/synthorg/budget/trends.py @@ -61,7 +61,7 @@ class TrendDataPoint(BaseModel): value: Metric value for this bucket. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") timestamp: AwareDatetime = Field(description="Bucket start time (UTC)") value: float = Field(description="Metric value for this bucket") @@ -76,7 +76,7 @@ class ForecastPoint(BaseModel): in the configured currency. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") day: date = Field(description="Calendar date") projected_spend: float = Field( @@ -99,7 +99,7 @@ class BudgetForecast(BaseModel): avg_daily_spend: Average daily spend used for projection. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") projected_total: float = Field( ge=0.0, diff --git a/src/synthorg/communication/handler.py b/src/synthorg/communication/handler.py index 37b8a1f56c..50f4b02ed8 100644 --- a/src/synthorg/communication/handler.py +++ b/src/synthorg/communication/handler.py @@ -110,6 +110,7 @@ class HandlerRegistration(BaseModel): frozen=True, arbitrary_types_allowed=True, allow_inf_nan=False, + extra="forbid", ) handler_id: NotBlankStr = Field(default_factory=lambda: str(uuid4())) diff --git a/src/synthorg/communication/loop_prevention/circuit_breaker.py b/src/synthorg/communication/loop_prevention/circuit_breaker.py index 913ef461b4..3475dfc99a 100644 --- a/src/synthorg/communication/loop_prevention/circuit_breaker.py +++ b/src/synthorg/communication/loop_prevention/circuit_breaker.py @@ -15,6 +15,7 @@ DELEGATION_LOOP_CIRCUIT_PERSIST_FAILED, DELEGATION_LOOP_CIRCUIT_RESET, ) +from synthorg.persistence._shared import collect_all from synthorg.persistence.circuit_breaker_protocol import ( CircuitBreakerStateRecord, CircuitBreakerStateRepository, @@ -304,8 +305,17 @@ async def load_state(self) -> None: """ if self._state_repo is None: return + repo = self._state_repo try: - records = await self._state_repo.load_all() + # Drain every page: boot-time rehydration needs the + # complete breaker set, but each query stays bounded so a + # large pair count cannot trigger one unbounded scan. + records = await collect_all( + lambda limit, offset: repo.load_all( + limit=limit, + offset=offset, + ), + ) except MemoryError, RecursionError: raise except Exception: diff --git a/src/synthorg/communication/meetings/service.py b/src/synthorg/communication/meetings/service.py index a6a2f0a603..222f2ab896 100644 --- a/src/synthorg/communication/meetings/service.py +++ b/src/synthorg/communication/meetings/service.py @@ -65,6 +65,11 @@ async def list_meetings( handler can build the pagination envelope without slicing a second time. + The page is a point-in-time snapshot: concurrent status + transitions or deletions between this read and the caller + consuming the result are not reflected, so the newest-first + ordering is only consistent within a single call. + Raises: ValueError: If ``offset`` is negative, or if ``limit`` is provided and non-positive. @@ -91,11 +96,13 @@ async def get_meeting( self, meeting_id: NotBlankStr, ) -> MeetingRecord | None: - """Return a meeting record by ID or ``None`` when absent.""" - for record in self._orchestrator.get_records(): - if record.meeting_id == meeting_id: - return record - return None + """Return a meeting record by ID or ``None`` when absent. + + Delegates to the orchestrator's O(1) ``get_record`` (backed by + the ``_records_by_id`` mirror) instead of scanning the full + chronological record list on every fetch. + """ + return self._orchestrator.get_record(meeting_id) async def create_meeting(self) -> None: """Reject creation with a typed ``not_supported`` error.""" diff --git a/src/synthorg/communication/message.py b/src/synthorg/communication/message.py index 014b3cc7c4..221f72a117 100644 --- a/src/synthorg/communication/message.py +++ b/src/synthorg/communication/message.py @@ -67,6 +67,7 @@ class DataPart(BaseModel): frozen=True, allow_inf_nan=False, arbitrary_types_allowed=True, + extra="forbid", ) type: Literal["data"] = Field( diff --git a/src/synthorg/communication/messages/service.py b/src/synthorg/communication/messages/service.py index 9d9f34f430..9fe49716bf 100644 --- a/src/synthorg/communication/messages/service.py +++ b/src/synthorg/communication/messages/service.py @@ -9,6 +9,7 @@ from typing import TYPE_CHECKING +from synthorg.core.types import NotBlankStr from synthorg.observability import get_logger from synthorg.observability.events.communication import ( COMMUNICATION_MESSAGE_DELETED, @@ -21,7 +22,6 @@ from synthorg.communication.bus_protocol import MessageBus from synthorg.communication.channel import Channel from synthorg.communication.message import Message - from synthorg.core.types import NotBlankStr from synthorg.persistence.protocol import PersistenceBackend logger = get_logger(__name__) @@ -63,6 +63,12 @@ async def list_messages( The handler uses ``total`` to build the pagination envelope so callers can navigate. Passing ``channel=None`` returns ``((), 0)`` -- an empty page -- without touching persistence. + + The page is a point-in-time snapshot: writes that land between + this read and the caller consuming the result are not + reflected, and a concurrent delete can leave a one-row gap on + the page. Callers must not assume the slice is transactionally + consistent with later reads. """ if offset < 0: msg = f"offset must be >= 0, got {offset}" @@ -83,12 +89,16 @@ async def get_message( channel: NotBlankStr, message_id: str, ) -> Message | None: - """Return one message by ``(channel, id)`` or ``None``.""" - history = await self._persistence.messages.get_history(channel) - for msg in history: - if str(msg.id) == message_id: - return msg - return None + """Return one message by ``(channel, id)`` or ``None``. + + Single indexed point read on the ``messages`` primary key + (``id``), scoped to ``channel``. Replaces the prior + ``get_history`` full-channel scan that was O(channel size). + """ + return await self._persistence.messages.get_by_id( + channel, + NotBlankStr(message_id), + ) async def send_message( self, diff --git a/src/synthorg/coordination/ceremony_policy/service.py b/src/synthorg/coordination/ceremony_policy/service.py index a345b0706e..ff539b4452 100644 --- a/src/synthorg/coordination/ceremony_policy/service.py +++ b/src/synthorg/coordination/ceremony_policy/service.py @@ -52,7 +52,7 @@ class ActiveCeremonyStrategy(BaseModel): sprint_id: Active sprint id, or ``None``. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") strategy: CeremonyStrategyType | None = Field( default=None, diff --git a/src/synthorg/core/agent.py b/src/synthorg/core/agent.py index 5d9f8f2103..48fa432027 100644 --- a/src/synthorg/core/agent.py +++ b/src/synthorg/core/agent.py @@ -141,7 +141,7 @@ class SkillSet(BaseModel): secondary: Supporting skills. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") primary: tuple[Skill, ...] = Field( default=(), @@ -200,7 +200,7 @@ class ModelConfig(BaseModel): Controls prompt profile selection. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") provider: NotBlankStr = Field(description="LLM provider name") model_id: NotBlankStr = Field(description="Model identifier") @@ -238,7 +238,7 @@ class AgentRetentionRule(BaseModel): category. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") category: MemoryCategory = Field( description="Memory category this override applies to", @@ -262,7 +262,7 @@ class MemoryConfig(BaseModel): rules during retention enforcement. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") type: MemoryLevel = Field( default=MemoryLevel.SESSION, @@ -344,7 +344,7 @@ class ToolPermissions(BaseModel): resolves defaults from the access level. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") access_level: ToolAccessLevel = Field( default=ToolAccessLevel.STANDARD, @@ -442,7 +442,7 @@ class AgentIdentity(BaseModel): status: Current lifecycle status. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") id: UUID = Field(default_factory=uuid4, description="Unique agent identifier") name: NotBlankStr = Field(description="Agent display name") diff --git a/src/synthorg/core/approval.py b/src/synthorg/core/approval.py index 138feb31fa..dbe68ace1d 100644 --- a/src/synthorg/core/approval.py +++ b/src/synthorg/core/approval.py @@ -45,7 +45,7 @@ class ApprovalItem(BaseModel): metadata: Additional key-value metadata. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") id: NotBlankStr action_type: NotBlankStr diff --git a/src/synthorg/core/artifact.py b/src/synthorg/core/artifact.py index 6e234e0434..f11c4a6b52 100644 --- a/src/synthorg/core/artifact.py +++ b/src/synthorg/core/artifact.py @@ -19,7 +19,7 @@ class ExpectedArtifact(BaseModel): path: File or directory path where the artifact should be produced. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") type: ArtifactType = Field(description="Type of artifact expected") path: NotBlankStr = Field( @@ -47,7 +47,7 @@ class Artifact(BaseModel): created_at: Timestamp when the artifact was created. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") id: NotBlankStr = Field(description="Unique artifact identifier") type: ArtifactType = Field(description="Artifact type") diff --git a/src/synthorg/core/auth/config.py b/src/synthorg/core/auth/config.py index e77efa30e2..db02b4ad09 100644 --- a/src/synthorg/core/auth/config.py +++ b/src/synthorg/core/auth/config.py @@ -65,7 +65,7 @@ class AuthConfig(BaseModel): before the first request is served. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") _MIRROR_FIELDS: ClassVar[tuple[MirrorField, ...]] = ( MirrorField( diff --git a/src/synthorg/core/auth/models.py b/src/synthorg/core/auth/models.py index 6039668e33..affb61bb9c 100644 --- a/src/synthorg/core/auth/models.py +++ b/src/synthorg/core/auth/models.py @@ -45,7 +45,7 @@ class User(BaseModel): updated_at: Last modification timestamp. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") id: NotBlankStr username: NotBlankStr @@ -83,7 +83,7 @@ class ApiKey(BaseModel): revoked: Whether the key has been revoked. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") id: NotBlankStr key_hash: NotBlankStr = Field(repr=False) @@ -115,7 +115,7 @@ class AuthenticatedUser(BaseModel): waiting for the access token to expire. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") user_id: NotBlankStr username: NotBlankStr diff --git a/src/synthorg/core/auth/refresh_record.py b/src/synthorg/core/auth/refresh_record.py index 62f4b0328a..9497ee7bf2 100644 --- a/src/synthorg/core/auth/refresh_record.py +++ b/src/synthorg/core/auth/refresh_record.py @@ -34,7 +34,7 @@ class RefreshConsumeOutcome(BaseModel): The model validator below keeps the discriminator honest. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") record: RefreshRecord | None = None reject_reason: RefreshRejectReason | None = None @@ -63,7 +63,7 @@ class RefreshRecord(BaseModel): created_at: Creation timestamp. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") token_hash: NotBlankStr session_id: NotBlankStr diff --git a/src/synthorg/core/auth/session.py b/src/synthorg/core/auth/session.py index f6e0d86326..3979fa4a52 100644 --- a/src/synthorg/core/auth/session.py +++ b/src/synthorg/core/auth/session.py @@ -29,7 +29,7 @@ class Session(BaseModel): revoked: Whether the session has been revoked. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") session_id: NotBlankStr user_id: NotBlankStr diff --git a/src/synthorg/core/company.py b/src/synthorg/core/company.py index cfd51545c1..07fd11e1c3 100644 --- a/src/synthorg/core/company.py +++ b/src/synthorg/core/company.py @@ -68,7 +68,7 @@ class ReportingLine(BaseModel): the template system). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") subordinate: NotBlankStr = Field(description="Subordinate role name or identifier") supervisor: NotBlankStr = Field(description="Supervisor role name or identifier") @@ -150,7 +150,7 @@ class ReviewRequirements(BaseModel): self_review_allowed: Whether an agent can review their own work. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") min_reviewers: int = Field( default=1, @@ -176,7 +176,7 @@ class ApprovalChain(BaseModel): min_approvals: Minimum approvals needed (0 = all approvers required). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") action_type: NotBlankStr = Field(description="Action type for this chain") approvers: tuple[NotBlankStr, ...] = Field(description="Ordered approver names") @@ -217,7 +217,7 @@ class DepartmentPolicies(BaseModel): approval_chains: Approval chains for various action types. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") review_requirements: ReviewRequirements = Field( default_factory=ReviewRequirements, @@ -264,7 +264,7 @@ class WorkflowHandoff(BaseModel): artifacts: Artifacts passed during handoff. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") from_department: NotBlankStr = Field(description="Source department") to_department: NotBlankStr = Field(description="Target department") @@ -295,7 +295,7 @@ class EscalationPath(BaseModel): priority_boost: Priority boost applied on escalation (0-3). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") from_department: NotBlankStr = Field(description="Source department") to_department: NotBlankStr = Field(description="Target department") @@ -330,7 +330,7 @@ class Team(BaseModel): members: Team member agent names. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") name: NotBlankStr = Field(description="Team name") lead: NotBlankStr = Field(description="Team lead agent name") @@ -387,7 +387,7 @@ class Department(BaseModel): needed. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") name: NotBlankStr = Field(description="Department name") head: NotBlankStr | None = Field( @@ -501,7 +501,7 @@ class CompanyConfig(BaseModel): ``EventReader`` and ``resume_execution_id``). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") autonomy: AutonomyConfig = Field( default_factory=AutonomyConfig, @@ -547,7 +547,7 @@ class HRRegistry(BaseModel): hiring_queue: Roles in the hiring pipeline (duplicates allowed). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") active_agents: tuple[NotBlankStr, ...] = Field( default=(), @@ -595,7 +595,7 @@ class Company(BaseModel): escalation_paths: Cross-department escalation paths. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") id: UUID = Field(default_factory=uuid4, description="Company identifier") name: NotBlankStr = Field(description="Company name") diff --git a/src/synthorg/core/domain_errors.py b/src/synthorg/core/domain_errors.py index ca67c97450..86637c16fa 100644 --- a/src/synthorg/core/domain_errors.py +++ b/src/synthorg/core/domain_errors.py @@ -306,6 +306,67 @@ def __init__( self.retry_after = max(1, int(retry_after)) +class ImmutableFieldMismatchError(ValidationError): + """Raised when a restore/rollback would change an immutable field (422). + + Distinct ``error_code`` lets the dashboard tell "the snapshot is + incompatible because id/name/department differ" apart from a generic + validation failure, so it can surface the specific blocked fields + instead of a retry button that would always fail. + """ + + default_message: ClassVar[str] = "Cannot apply: immutable field mismatch" + error_code: ClassVar[ErrorCode] = ErrorCode.IMMUTABLE_FIELD_MISMATCH + + +class AgentIdentityRollbackError(DomainError): + """Raised when an agent-identity rollback fails unexpectedly (500). + + Distinct from :class:`ImmutableFieldMismatchError` (422, operator + error): this is an unexpected server fault during the rollback + write, not a rejected request. + """ + + default_message: ClassVar[str] = "Rollback failed due to an unexpected server error" + error_code: ClassVar[ErrorCode] = ErrorCode.AGENT_IDENTITY_ROLLBACK_FAILED + + +class CheckpointOperationConflictError(ConflictError): + """Raised when a fine-tune checkpoint deploy/delete conflicts (409). + + Distinct ``error_code`` separates "checkpoint operation rejected by + its current state" (e.g. deleting the active checkpoint) from a + generic resource conflict so clients can message it precisely. + """ + + default_message: ClassVar[str] = "Checkpoint operation conflict" + error_code: ClassVar[ErrorCode] = ErrorCode.CHECKPOINT_OPERATION_CONFLICT + + +class FineTuneRunActiveError(ConflictError): + """Raised when a fine-tune run is already active (409). + + Start/resume is rejected because the single-run invariant holds. + Distinct ``error_code`` lets clients render "a run is already in + progress" instead of a generic conflict. + """ + + default_message: ClassVar[str] = "A fine-tuning run is already active" + error_code: ClassVar[ErrorCode] = ErrorCode.FINE_TUNE_RUN_ACTIVE + + +class TrainingPlanNotModifiableError(ConflictError): + """Raised when a training plan is edited after execution/failure (409). + + Distinct ``error_code`` tells the dashboard the plan is frozen by + its lifecycle status rather than a transient conflict, so it hides + the edit form instead of offering a retry. + """ + + default_message: ClassVar[str] = "Cannot modify plan after execution or failure" + error_code: ClassVar[ErrorCode] = ErrorCode.TRAINING_PLAN_NOT_MODIFIABLE + + class ConcurrencyLimitExceededError(PerOperationRateLimitError): """Raised when a per-operation concurrency (inflight) cap is hit (429). diff --git a/src/synthorg/core/error_taxonomy.py b/src/synthorg/core/error_taxonomy.py index caceda825c..f4f1bece59 100644 --- a/src/synthorg/core/error_taxonomy.py +++ b/src/synthorg/core/error_taxonomy.py @@ -68,6 +68,9 @@ class ErrorCode(IntEnum): ARTIFACT_TOO_LARGE = 2002 TOOL_PARAMETER_ERROR = 2003 PROVIDER_TIER_COVERAGE_INSUFFICIENT = 2004 + IMMUTABLE_FIELD_MISMATCH = 2005 + CHECKPOINT_ROLLBACK_UNAVAILABLE = 2006 + CHECKPOINT_ROLLBACK_CORRUPT = 2007 # 3xxx -- not_found RESOURCE_NOT_FOUND = 3000 @@ -83,6 +86,10 @@ class ErrorCode(IntEnum): CONNECTION_NOT_FOUND = 3010 MODEL_NOT_FOUND = 3011 ESCALATION_NOT_FOUND = 3012 + WORKFLOW_DEFINITION_NOT_FOUND = 3013 + AB_TEST_NOT_FOUND = 3014 + BACKUP_NOT_FOUND = 3015 + MEMORY_ENTRY_NOT_FOUND = 3016 # 4xxx -- conflict RESOURCE_CONFLICT = 4000 @@ -94,6 +101,11 @@ class ErrorCode(IntEnum): ESCALATION_ALREADY_DECIDED = 4006 MIXED_CURRENCY_AGGREGATION = 4007 WORKFLOW_EXECUTION_ALREADY_TERMINAL = 4008 + BACKUP_IN_PROGRESS = 4009 + CHECKPOINT_OPERATION_CONFLICT = 4010 + FINE_TUNE_RUN_ACTIVE = 4011 + TRAINING_PLAN_NOT_MODIFIABLE = 4012 + BACKUP_UNRESTARTABLE = 4013 # 5xxx -- rate_limit RATE_LIMITED = 5000 @@ -131,6 +143,11 @@ class ErrorCode(IntEnum): TOOL_EXECUTION_ERROR = 8008 FEATURE_NOT_IMPLEMENTED = 8009 ARTIFACT_NO_STORAGE_BACKEND = 8010 + AGENT_IDENTITY_ROLLBACK_FAILED = 8011 + BACKUP_RESTORE_FAILED = 8012 + BACKUP_MANIFEST_ERROR = 8013 + SETTINGS_ENCRYPTION_ERROR = 8014 + SINK_CONFIG_VALIDATION_ERROR = 8015 # Error-code band for the NOT_FOUND category (3xxx). ``resource_not_found`` diff --git a/src/synthorg/core/evidence.py b/src/synthorg/core/evidence.py index 252856b267..a989fa0f8e 100644 --- a/src/synthorg/core/evidence.py +++ b/src/synthorg/core/evidence.py @@ -34,7 +34,7 @@ class RecommendedAction(BaseModel): dialog before executing. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") action_type: NotBlankStr = Field(description="Semantic action key") label: NotBlankStr = Field(description="UI button text") @@ -59,7 +59,7 @@ class EvidencePackageSignature(BaseModel): chain_position: Position in the append-only audit chain. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") approver_id: NotBlankStr = Field(description="Approver identity") algorithm: Literal["ml-dsa-65", "ed25519"] = Field( diff --git a/src/synthorg/core/project.py b/src/synthorg/core/project.py index 641e1a866a..8721841403 100644 --- a/src/synthorg/core/project.py +++ b/src/synthorg/core/project.py @@ -31,7 +31,7 @@ class Project(BaseModel): status: Current project status. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") id: NotBlankStr = Field(description="Unique project identifier") name: NotBlankStr = Field(description="Project display name") diff --git a/src/synthorg/core/resilience_config.py b/src/synthorg/core/resilience_config.py index a1f1d90ecd..e742ed00a5 100644 --- a/src/synthorg/core/resilience_config.py +++ b/src/synthorg/core/resilience_config.py @@ -25,7 +25,7 @@ class RetryConfig(BaseModel): jitter: Whether to add random jitter to delay. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") max_retries: int = Field( default=3, @@ -83,7 +83,7 @@ class RateLimiterConfig(BaseModel): (0 means unlimited). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") max_requests_per_minute: int = Field( default=0, diff --git a/src/synthorg/core/role.py b/src/synthorg/core/role.py index 4b682d1bf4..1c39896903 100644 --- a/src/synthorg/core/role.py +++ b/src/synthorg/core/role.py @@ -31,7 +31,7 @@ class Skill(BaseModel): unspecified. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") id: NotBlankStr = Field(description="Unique skill identifier") name: NotBlankStr = Field(description="Human-readable display name") @@ -87,7 +87,7 @@ class Authority(BaseModel): budget_limit: Maximum spend per task in base currency units. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") can_approve: tuple[NotBlankStr, ...] = Field( default=(), @@ -118,7 +118,7 @@ class SeniorityInfo(BaseModel): cost_tier: Cost tier identifier (built-in ``CostTier`` or user-defined string). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") level: SeniorityLevel = Field(description="Seniority level") authority_scope: NotBlankStr = Field( @@ -146,7 +146,7 @@ class Role(BaseModel): description: Human-readable description. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") name: NotBlankStr = Field(description="Role name") department: DepartmentName = Field( @@ -190,7 +190,7 @@ class CustomRole(BaseModel): suggested_model: Suggested model tier. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") name: NotBlankStr = Field(description="Custom role name") department: DepartmentName | str = Field( diff --git a/src/synthorg/core/structured_artifact.py b/src/synthorg/core/structured_artifact.py index 0ee1417cf8..e31c1df4ea 100644 --- a/src/synthorg/core/structured_artifact.py +++ b/src/synthorg/core/structured_artifact.py @@ -13,6 +13,6 @@ class StructuredArtifact(BaseModel): - EvidencePackage (HITL approval payload) """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") created_at: datetime = Field(description="Artifact creation timestamp") diff --git a/src/synthorg/core/task.py b/src/synthorg/core/task.py index b00ff33695..bcdca48b7a 100644 --- a/src/synthorg/core/task.py +++ b/src/synthorg/core/task.py @@ -34,7 +34,7 @@ class AcceptanceCriterion(BaseModel): met: Whether this criterion has been satisfied. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") description: NotBlankStr = Field( description="Criterion text", @@ -85,7 +85,7 @@ class Task(BaseModel): construction to prevent external mutation. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") id: NotBlankStr = Field(description="Unique task identifier") title: NotBlankStr = Field(description="Short task title") diff --git a/src/synthorg/core/tool_disclosure.py b/src/synthorg/core/tool_disclosure.py index 02b667bc85..8f25dbb10a 100644 --- a/src/synthorg/core/tool_disclosure.py +++ b/src/synthorg/core/tool_disclosure.py @@ -36,7 +36,7 @@ class ToolL1Metadata(BaseModel): typical_cost_tier: Relative invocation cost. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") name: NotBlankStr = Field(description="Tool name") short_description: str = Field( @@ -66,7 +66,7 @@ class ToolL2Body(BaseModel): failure_modes: Known failure scenarios. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") full_description: str = Field( min_length=1, @@ -102,7 +102,7 @@ class ToolL3Resource(BaseModel): size_bytes: Byte length of ``content`` (UTF-8 encoded). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") resource_id: NotBlankStr = Field(description="Resource identifier") content_type: ContentType = Field(description="Content format") diff --git a/src/synthorg/engine/decisions.py b/src/synthorg/engine/decisions.py index a7f6aa22d8..9985d65cb9 100644 --- a/src/synthorg/engine/decisions.py +++ b/src/synthorg/engine/decisions.py @@ -66,6 +66,7 @@ class DecisionRecord(BaseModel): frozen=True, allow_inf_nan=False, arbitrary_types_allowed=True, + extra="forbid", ) id: NotBlankStr = Field(description="Unique decision record identifier") diff --git a/src/synthorg/engine/strategy/context.py b/src/synthorg/engine/strategy/context.py index d311b9f863..e37f23a170 100644 --- a/src/synthorg/engine/strategy/context.py +++ b/src/synthorg/engine/strategy/context.py @@ -33,7 +33,9 @@ """Tag the memory backend filters on for strategic-context entries.""" -class _StrategicContextOverridesArgs(BaseModel): +class _StrategicContextOverridesArgs( + BaseModel +): # lint-allow: frozen-extra-forbid -- extra="ignore" keeps this memory-backed typed boundary forward-compatible with future enrichment fields (class docstring) # noqa: E501 """Typed-boundary args model for memory-stored context overrides. The memory backend yields untrusted JSON; this args model is the diff --git a/src/synthorg/engine/trajectory/models.py b/src/synthorg/engine/trajectory/models.py index c76f1d2348..3bfd64aee9 100644 --- a/src/synthorg/engine/trajectory/models.py +++ b/src/synthorg/engine/trajectory/models.py @@ -92,7 +92,12 @@ class TrajectoryScore(BaseModel): consistent: Whether the candidate passed self-consistency. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + # ``extra="forbid"`` is safe here despite the ``joint_score`` + # @computed_field carve-out: ``TrajectoryScore`` is constructed + # once in ``engine/trajectory/scorer.py`` and never reconstructed + # via ``model_dump -> model_validate``, so the computed key never + # round-trips back into a constructor. + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") candidate_index: int = Field( ge=0, diff --git a/src/synthorg/engine/workflow/subworkflow_registry.py b/src/synthorg/engine/workflow/subworkflow_registry.py index 938b70a578..cf10075f9d 100644 --- a/src/synthorg/engine/workflow/subworkflow_registry.py +++ b/src/synthorg/engine/workflow/subworkflow_registry.py @@ -269,9 +269,17 @@ async def list_page( async def search( self, query: NotBlankStr, + *, + limit: int = DEFAULT_LIST_LIMIT, + offset: int = 0, ) -> tuple[SubworkflowSummary, ...]: - """Search subworkflows by name or description substring.""" - return await self._repo.search(query) + """Search subworkflows by name or description substring. + + Pass-through to the repository's bounded, deterministically + ordered page; callers needing every match drain via + :func:`synthorg.persistence._shared.collect_all`. + """ + return await self._repo.search(query, limit=limit, offset=offset) async def delete( self, @@ -335,6 +343,19 @@ async def find_parents( self, subworkflow_id: NotBlankStr, version: NotBlankStr | None = None, + *, + limit: int = DEFAULT_LIST_LIMIT, + offset: int = 0, ) -> tuple[ParentReference, ...]: - """Return parent workflow definitions referencing a subworkflow.""" - return await self._repo.find_parents(subworkflow_id, version) + """Return parent workflow definitions referencing a subworkflow. + + Pass-through to the repository's bounded, deterministically + ordered page; referential-integrity callers MUST drain every + page via :func:`synthorg.persistence._shared.collect_all`. + """ + return await self._repo.find_parents( + subworkflow_id, + version, + limit=limit, + offset=offset, + ) diff --git a/src/synthorg/engine/workflow/subworkflow_service.py b/src/synthorg/engine/workflow/subworkflow_service.py index 2b9a64b2db..ddcaa43103 100644 --- a/src/synthorg/engine/workflow/subworkflow_service.py +++ b/src/synthorg/engine/workflow/subworkflow_service.py @@ -45,6 +45,7 @@ SUBWORKFLOW_PUBLISH_FAILED, SUBWORKFLOW_REGISTERED, ) +from synthorg.persistence._shared import collect_all logger = get_logger(__name__) @@ -155,7 +156,17 @@ async def list_summaries( raise ValueError(msg) if query is not None and query.strip(): - summaries = await self._registry.search(NotBlankStr(query.strip())) + search_term = NotBlankStr(query.strip()) + # This endpoint sorts + paginates the full match set in + # memory, so drain every bounded repo page rather than + # silently showing only the first. + summaries = await collect_all( + lambda limit, offset: self._registry.search( + search_term, + limit=limit, + offset=offset, + ), + ) else: summaries = await self._registry.list_all() sorted_summaries = sorted( @@ -282,7 +293,18 @@ async def delete( conflict without a second query. SubworkflowNotFoundError: If the coordinate does not exist. """ - parents = await self._registry.find_parents(subworkflow_id, version) + # Referential-integrity gate: the conflict error reports the + # exact parent count + names, so the complete set is required + # (a truncated page would under-report and could let a + # still-referenced version be deleted). + parents = await collect_all( + lambda limit, offset: self._registry.find_parents( + subworkflow_id, + version, + limit=limit, + offset=offset, + ), + ) if parents: names = ", ".join(f"{p.parent_name!r}" for p in parents) msg = ( @@ -322,9 +344,13 @@ async def delete( # never mask a real storage failure behind a secondary # observability lookup error. try: - late_parents = await self._registry.find_parents( - subworkflow_id, - version, + late_parents = await collect_all( + lambda limit, offset: self._registry.find_parents( + subworkflow_id, + version, + limit=limit, + offset=offset, + ), ) except MemoryError, RecursionError: raise diff --git a/src/synthorg/hr/training/config.py b/src/synthorg/hr/training/config.py index dd71f215ac..abe917f0f6 100644 --- a/src/synthorg/hr/training/config.py +++ b/src/synthorg/hr/training/config.py @@ -4,6 +4,8 @@ training pipeline components. """ +from typing import Final + from pydantic import BaseModel, ConfigDict, Field from synthorg.core.types import NotBlankStr # noqa: TC001 @@ -12,6 +14,13 @@ # Type alias for serialized strategy config values. _ConfigValue = int | float | str | bool +# Per-content-type stored-item ceilings: procedural memories accrue +# fastest, tool patterns moderately, semantic facts slowest, so the +# caps are tiered to bound storage without starving the rarer types. +_DEFAULT_CAP_PROCEDURAL: Final[int] = 50 +_DEFAULT_CAP_SEMANTIC: Final[int] = 10 +_DEFAULT_CAP_TOOL_PATTERNS: Final[int] = 20 + def _default_selector_config() -> dict[str, _ConfigValue]: return {"top_n": 3} @@ -21,6 +30,22 @@ def _default_curation_config() -> dict[str, _ConfigValue]: return {"top_k": 50} +def _default_volume_caps() -> dict[ContentType, int]: + """Default per-content-type hard limits for stored training items. + + A named factory (not an inline ``lambda``) so the mutable dict is + rebuilt per model instance with a referenceable, testable symbol + instead of an anonymous closure -- the same shape as + :func:`_default_selector_config` / :func:`_default_curation_config` + above. + """ + return { + ContentType.PROCEDURAL: _DEFAULT_CAP_PROCEDURAL, + ContentType.SEMANTIC: _DEFAULT_CAP_SEMANTIC, + ContentType.TOOL_PATTERNS: _DEFAULT_CAP_TOOL_PATTERNS, + } + + class TrainingConfig(BaseModel): """Configuration for the training pipeline. @@ -64,11 +89,7 @@ class TrainingConfig(BaseModel): description="Serialized config for curation", ) default_volume_caps: dict[ContentType, int] = Field( - default_factory=lambda: { - ContentType.PROCEDURAL: 50, - ContentType.SEMANTIC: 10, - ContentType.TOOL_PATTERNS: 20, - }, + default_factory=_default_volume_caps, description="Default per-content-type hard limits", ) require_review_by_default: bool = Field( diff --git a/src/synthorg/integrations/config.py b/src/synthorg/integrations/config.py index 9ff5d0f5de..c8cb999bf6 100644 --- a/src/synthorg/integrations/config.py +++ b/src/synthorg/integrations/config.py @@ -24,7 +24,7 @@ class ConnectionsConfig(BaseModel): max_connections_per_type: Upper bound per connection type. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") max_connections_per_type: int = Field(default=100, ge=1) @@ -43,7 +43,7 @@ class EncryptedSqliteConfig(BaseModel): key orphans all previously stored ciphertext. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") master_key_env: NotBlankStr = "SYNTHORG_MASTER_KEY" @@ -60,7 +60,7 @@ class EncryptedPostgresConfig(BaseModel): 32-byte Fernet key. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") master_key_env: NotBlankStr = "SYNTHORG_MASTER_KEY" @@ -72,7 +72,7 @@ class EnvVarConfig(BaseModel): prefix: Environment variable prefix for secret lookups. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") prefix: NotBlankStr = "SYNTHORG_SECRET_" @@ -87,7 +87,7 @@ class SecretBackendConfig(BaseModel): env_var: Settings for the env-var backend. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") # Neutral, vendor-agnostic discriminators so the public config # surface does not embed specific vendor names. The factory maps @@ -130,7 +130,7 @@ class OAuthConfig(BaseModel): this window. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") _MIRROR_FIELDS: ClassVar[tuple[MirrorField, ...]] = ( MirrorField( @@ -165,7 +165,7 @@ class WebhooksConfig(BaseModel): receipt_retention_days: How long to keep webhook receipts. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") _MIRROR_FIELDS: ClassVar[tuple[MirrorField, ...]] = ( MirrorField( @@ -197,7 +197,7 @@ class IntegrationHealthConfig(BaseModel): degraded_threshold: Consecutive failures before ``degraded``. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") check_interval_seconds: int = Field(default=300, gt=0) unhealthy_threshold: int = Field(default=3, ge=1) @@ -223,7 +223,7 @@ class TunnelConfig(BaseModel): auth_token_env: Env var holding the ngrok auth token. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") auth_token_env: NotBlankStr = "NGROK_AUTHTOKEN" # noqa: S105 @@ -235,7 +235,7 @@ class McpCatalogConfig(BaseModel): enabled: Whether the catalog is available. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") enabled: bool = True @@ -254,7 +254,7 @@ class IntegrationsConfig(BaseModel): mcp_catalog: Bundled MCP server catalog settings. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") enabled: bool = True connections: ConnectionsConfig = Field( diff --git a/src/synthorg/integrations/connections/models.py b/src/synthorg/integrations/connections/models.py index c5c0e3e0d2..d54c7fbda4 100644 --- a/src/synthorg/integrations/connections/models.py +++ b/src/synthorg/integrations/connections/models.py @@ -72,7 +72,7 @@ class SecretRef(BaseModel): key_version: Encryption key version used. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") secret_id: NotBlankStr backend: NotBlankStr @@ -102,7 +102,7 @@ class Connection(BaseModel): updated_at: Last modification timestamp. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") id: NotBlankStr = Field( default_factory=lambda: NotBlankStr(str(uuid4())), @@ -166,7 +166,7 @@ class OAuthState(BaseModel): both must be set together. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") state_token: NotBlankStr connection_name: NotBlankStr @@ -256,6 +256,7 @@ class OAuthToken(BaseModel): model_config = ConfigDict( frozen=True, allow_inf_nan=False, + extra="forbid", # Raw tokens are sensitive -- exclude from repr to keep them # out of accidental logging and exception tracebacks. ) @@ -287,7 +288,7 @@ class WebhookReceipt(BaseModel): error: Error message if processing failed. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") id: NotBlankStr = Field( default_factory=lambda: NotBlankStr(str(uuid4())), @@ -315,7 +316,7 @@ class HealthReport(BaseModel): consecutive_failures: Running failure count. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") connection_name: NotBlankStr status: ConnectionStatus @@ -341,7 +342,7 @@ class CatalogEntry(BaseModel): tags: Searchable tags. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") id: NotBlankStr name: NotBlankStr diff --git a/src/synthorg/integrations/mcp_catalog/installations.py b/src/synthorg/integrations/mcp_catalog/installations.py index ae499056da..b41ccb789e 100644 --- a/src/synthorg/integrations/mcp_catalog/installations.py +++ b/src/synthorg/integrations/mcp_catalog/installations.py @@ -30,7 +30,7 @@ class McpInstallation(BaseModel): installed_at: When the install was recorded. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") catalog_entry_id: NotBlankStr connection_name: NotBlankStr | None = None diff --git a/src/synthorg/integrations/mcp_catalog/service.py b/src/synthorg/integrations/mcp_catalog/service.py index d8e22fe97b..af7932c74e 100644 --- a/src/synthorg/integrations/mcp_catalog/service.py +++ b/src/synthorg/integrations/mcp_catalog/service.py @@ -42,7 +42,7 @@ class InstallationResult(BaseModel): """Outcome of a successful MCP catalog install.""" - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") catalog_entry_id: NotBlankStr server_name: NotBlankStr diff --git a/src/synthorg/integrations/webhooks/models.py b/src/synthorg/integrations/webhooks/models.py index ca390f1aa8..298fd02113 100644 --- a/src/synthorg/integrations/webhooks/models.py +++ b/src/synthorg/integrations/webhooks/models.py @@ -43,7 +43,7 @@ class WebhookDefinition(BaseModel): updated_at: Last mutation timestamp. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") id: UUID = Field(default_factory=uuid4) name: NotBlankStr diff --git a/src/synthorg/memory/service.py b/src/synthorg/memory/service.py index 127bc483d2..4b7649fd77 100644 --- a/src/synthorg/memory/service.py +++ b/src/synthorg/memory/service.py @@ -23,9 +23,12 @@ from pathlib import Path from typing import TYPE_CHECKING, Any, ClassVar, Literal -from synthorg.core.domain_errors import ConflictError, DomainError, NotFoundError +from synthorg.core.domain_errors import ( + ConflictError, + NotFoundError, + ValidationError, +) from synthorg.core.error_taxonomy import ErrorCategory, ErrorCode -from synthorg.core.persistence_errors import QueryError from synthorg.core.types import NotBlankStr from synthorg.memory.embedding.fine_tune_models import ( CheckpointRecord, @@ -97,29 +100,34 @@ class CheckpointNotFoundError(NotFoundError): default_message: ClassVar[str] = "Checkpoint not found" -class CheckpointRollbackUnavailableError(ConflictError): +class CheckpointRollbackUnavailableError(ValidationError): """Raised when a rollback is requested but no backup config exists. - Inherits :class:`ConflictError` so ``EXCEPTION_HANDLERS`` emits a - 409 envelope: the checkpoint exists, but its rollback prerequisite - (a stored backup config) does not, so the operation cannot proceed - in the current state. The prior bare ``DomainError`` base - misclassified this as INTERNAL/500. + Inherits :class:`ValidationError` so ``EXCEPTION_HANDLERS`` emits a + 422 envelope with the distinct ``CHECKPOINT_ROLLBACK_UNAVAILABLE`` + code: the checkpoint exists but its rollback prerequisite (a + stored backup config) does not, so the dashboard can message the + invalid rollback target precisely rather than show a blanket retry. """ __slots__ = () is_retryable: bool = False # deterministic: no backup exists - status_code: ClassVar[int] = 409 - error_code: ClassVar[ErrorCode] = ErrorCode.RESOURCE_CONFLICT - error_category: ClassVar[ErrorCategory] = ErrorCategory.CONFLICT + error_code: ClassVar[ErrorCode] = ErrorCode.CHECKPOINT_ROLLBACK_UNAVAILABLE default_message: ClassVar[str] = "No backup config available for this checkpoint" -class CheckpointRollbackCorruptError(DomainError): - """Raised when the stored backup config fails JSON parsing.""" +class CheckpointRollbackCorruptError(ValidationError): + """Raised when the stored backup config fails JSON parsing. + + Inherits :class:`ValidationError` (422) with the distinct + ``CHECKPOINT_ROLLBACK_CORRUPT`` code so clients can tell a corrupt + rollback backup from a generic validation failure. + """ __slots__ = () is_retryable: bool = False # deterministic: the stored payload is malformed + error_code: ClassVar[ErrorCode] = ErrorCode.CHECKPOINT_ROLLBACK_CORRUPT + default_message: ClassVar[str] = "Checkpoint rollback data is corrupt" class FineTuneRunNotFoundError(NotFoundError): @@ -136,10 +144,8 @@ class FineTuneRunNotFoundError(NotFoundError): error_code: ClassVar[ErrorCode] = ErrorCode.RECORD_NOT_FOUND error_category: ClassVar[ErrorCategory] = ErrorCategory.NOT_FOUND default_message: ClassVar[str] = "Fine-tune run not found" - # Wire-level ``domain_code`` so MCP handlers can route via the - # shared ``err(exc)`` helper instead of regex-matching the - # exception message -- that was the pre-existing anti-pattern - # this class replaces. + # Wire-level ``domain_code`` so MCP handlers route via the shared + # ``err(exc)`` helper instead of regex-matching exception messages. domain_code: str = "not_found" @@ -147,8 +153,8 @@ class FineTuneRunNotResumableError(ConflictError): """Raised when a fine-tune run exists but is not in a resumable stage. Inherits :class:`ConflictError` so ``EXCEPTION_HANDLERS`` routes - this through the 409 envelope; the prior ``DomainError`` base - classified it as INTERNAL/500. + this through the 409 envelope, distinguishing a non-resumable + stage from an internal failure. """ __slots__ = () @@ -396,13 +402,17 @@ async def deploy_checkpoint( updated = await checkpoints.get(checkpoint_id) if updated is None: - logger.error( + # Disappearing between activation and re-read can only + # be a concurrent delete; surface the contracted + # CheckpointNotFoundError (404) so the caller sees a + # deterministic "checkpoint no longer exists". + logger.warning( MEMORY_CHECKPOINT_REREAD_FAILED, checkpoint_id=checkpoint_id, operation="deploy", ) - msg = "Checkpoint activated but not found on re-read" - raise QueryError(msg) + msg = f"Checkpoint {checkpoint_id} was removed concurrently" + raise CheckpointNotFoundError(msg) logger.info( MEMORY_CHECKPOINT_DEPLOYED, checkpoint_id=checkpoint_id, @@ -480,13 +490,17 @@ async def rollback_checkpoint( await checkpoints.deactivate_all() updated = await checkpoints.get(checkpoint_id) if updated is None: - logger.error( + # Disappearing right after deactivate_all can only be a + # concurrent delete; surface the contracted + # CheckpointNotFoundError (404) so the caller sees a + # deterministic "checkpoint no longer exists". + logger.warning( MEMORY_CHECKPOINT_REREAD_FAILED, checkpoint_id=checkpoint_id, operation="rollback", ) - msg = "Checkpoint not found after rollback" - raise QueryError(msg) + msg = f"Checkpoint {checkpoint_id} was removed concurrently" + raise CheckpointNotFoundError(msg) logger.info( MEMORY_CHECKPOINT_ROLLBACK, checkpoint_id=checkpoint_id, @@ -838,9 +852,8 @@ async def _apply_deploy_settings( # three-valued prior state captured by ``_read_setting``. # ``read_failed`` explicitly leaves the newly-written key # in place so a transient read error cannot erase a real - # pre-existing setting -- safer than the old ``bool`` - # design that collapsed "absent" and "read failed" into - # the same branch. + # pre-existing setting: "absent" and "read failed" must + # stay distinct branches, never collapsed. await self._restore_or_delete( "embedder_model", prior_model_value, @@ -975,10 +988,10 @@ async def _rollback_step( try: await coro except Exception as exc: - # Emit both the legacy aggregate event (for existing - # dashboards / alerting) AND the step-specific event so - # alerts can pick up partial-rollback conditions distinctly - # from the overall rollback failure signal. + # Emit both the aggregate event (broad dashboards / + # alerting) AND the step-specific event so alerts can pick + # up partial-rollback conditions distinctly from the + # overall rollback failure signal. logger.warning( MEMORY_CHECKPOINT_ROLLBACK_FAILED, checkpoint_id=checkpoint_id, diff --git a/src/synthorg/meta/analytics/models.py b/src/synthorg/meta/analytics/models.py index 68a4ce8940..07774b25ac 100644 --- a/src/synthorg/meta/analytics/models.py +++ b/src/synthorg/meta/analytics/models.py @@ -29,7 +29,7 @@ class AnalyticsOverview(BaseModel): collected_at: When the overview was assembled. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") avg_quality_score: float = Field(ge=0.0, le=10.0) avg_success_rate: float = Field(ge=0.0, le=1.0) @@ -53,7 +53,7 @@ class MetricTrend(BaseModel): window_days: Observation window length. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") name: NotBlankStr current_value: float @@ -64,7 +64,7 @@ class MetricTrend(BaseModel): class AnalyticsTrends(BaseModel): """Batch of metric trends for a given window.""" - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") metrics: tuple[MetricTrend, ...] = () window_days: int = Field(ge=1) @@ -82,7 +82,7 @@ class AnalyticsForecast(BaseModel): projected_spend: Linear projection of spend across the horizon. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") horizon_days: int = Field(ge=1) days_until_budget_exhausted: int | None = None @@ -100,7 +100,7 @@ class MetricsSnapshot(BaseModel): names would silently collide downstream. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") metrics: dict[NotBlankStr, float] = Field(default_factory=dict) captured_at: AwareDatetime = Field( @@ -115,7 +115,7 @@ class MetricsHistoryPoint(BaseModel): keys for the same reason as :class:`MetricsSnapshot.metrics`. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") timestamp: AwareDatetime values: dict[NotBlankStr, float] = Field(default_factory=dict) @@ -124,7 +124,7 @@ class MetricsHistoryPoint(BaseModel): class MetricsHistory(BaseModel): """Historical samples for a metric-name set.""" - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") metric_names: tuple[NotBlankStr, ...] = () points: tuple[MetricsHistoryPoint, ...] = () diff --git a/src/synthorg/meta/chief_of_staff/config.py b/src/synthorg/meta/chief_of_staff/config.py index 66acb3a7b1..e68dce1bab 100644 --- a/src/synthorg/meta/chief_of_staff/config.py +++ b/src/synthorg/meta/chief_of_staff/config.py @@ -42,7 +42,7 @@ class ChiefOfStaffConfig(BaseModel): chat_max_tokens: Token budget for chat responses. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") # ── Learning ────────────────────────────────────────────────── diff --git a/src/synthorg/meta/chief_of_staff/models.py b/src/synthorg/meta/chief_of_staff/models.py index bf813e1c8d..54b380e0d3 100644 --- a/src/synthorg/meta/chief_of_staff/models.py +++ b/src/synthorg/meta/chief_of_staff/models.py @@ -41,7 +41,7 @@ class ProposalOutcome(BaseModel): decision_reason: Rationale for the decision, if provided. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") proposal_id: UUID title: NotBlankStr @@ -166,7 +166,7 @@ class Alert(BaseModel): emitted_at: When the alert was emitted. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") id: UUID = Field(default_factory=uuid4) severity: RuleSeverity @@ -199,7 +199,7 @@ class ChatQuery(BaseModel): alert_id: Alert to explain (optional). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") question: NotBlankStr proposal_id: UUID | None = None diff --git a/src/synthorg/meta/config.py b/src/synthorg/meta/config.py index 4410e8d9a4..ad7c21bdad 100644 --- a/src/synthorg/meta/config.py +++ b/src/synthorg/meta/config.py @@ -30,7 +30,7 @@ class RuleConfig(BaseModel): custom_rule_modules: Dotted module paths for user-defined rules. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") disabled_rules: tuple[NotBlankStr, ...] = () custom_rule_modules: tuple[NotBlankStr, ...] = () @@ -48,7 +48,7 @@ class ABTestConfig(BaseModel): declare treatment as winner. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") control_fraction: float = Field(default=0.5, gt=0.0, lt=1.0) min_agents_per_group: int = Field(default=5, ge=2) @@ -67,7 +67,7 @@ class RolloutConfig(BaseModel): ab_test: A/B test-specific configuration. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") default_strategy: RolloutStrategyType = RolloutStrategyType.BEFORE_AFTER observation_window_hours: int = Field(default=48, ge=1) @@ -99,7 +99,7 @@ class RegressionConfig(BaseModel): min_data_points: Min data points for statistical test. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") quality_drop_threshold: float = Field(default=0.10, ge=0.0, le=1.0) cost_increase_threshold: float = Field(default=0.20, ge=0.0, le=1.0) @@ -117,7 +117,7 @@ class GuardChainConfig(BaseModel): rate_limit_window_hours: Duration of the rate limit window. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") proposal_rate_limit: int = Field(default=10, ge=1) rate_limit_window_hours: int = Field(default=24, ge=1) @@ -131,7 +131,7 @@ class ScheduleConfig(BaseModel): inflection_trigger_enabled: Trigger on performance inflections. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") cycle_interval_hours: int = Field(default=168, ge=1) inflection_trigger_enabled: bool = True @@ -146,7 +146,7 @@ class PromptTuningConfig(BaseModel): allowed_modes: Which evolution modes are available. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") default_evolution_mode: EvolutionMode = EvolutionMode.ORG_WIDE allowed_modes: tuple[Literal["org_wide", "override", "advisory"], ...] = ( @@ -174,7 +174,7 @@ class CodeModificationConfig(BaseModel): ci_timeout_seconds: Timeout for CI validation subprocess calls. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") allowed_paths: tuple[NotBlankStr, ...] = ( NotBlankStr("src/synthorg/meta/strategies/*"), @@ -275,7 +275,7 @@ class SelfImprovementConfig(BaseModel): analysis_max_tokens: Token budget for analysis responses. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") enabled: bool = False chief_of_staff_enabled: bool = False diff --git a/src/synthorg/meta/evolution/outcome_models.py b/src/synthorg/meta/evolution/outcome_models.py index 4d291726c7..e2d05a74bc 100644 --- a/src/synthorg/meta/evolution/outcome_models.py +++ b/src/synthorg/meta/evolution/outcome_models.py @@ -30,7 +30,7 @@ class EvolutionOutcomeRecord(BaseModel): only be recorded at or after the proposal was made. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") agent_id: NotBlankStr axis: ProposalAltitude diff --git a/src/synthorg/meta/mcp/handlers/common.py b/src/synthorg/meta/mcp/handlers/common.py index bcc8b570f5..639e9067a2 100644 --- a/src/synthorg/meta/mcp/handlers/common.py +++ b/src/synthorg/meta/mcp/handlers/common.py @@ -75,7 +75,7 @@ class PaginationMeta(BaseModel): limit: Page size applied to this page. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") total: int = Field(ge=0) offset: int = Field(ge=0) diff --git a/src/synthorg/meta/mcp/registry.py b/src/synthorg/meta/mcp/registry.py index b7176523e6..e15e496e70 100644 --- a/src/synthorg/meta/mcp/registry.py +++ b/src/synthorg/meta/mcp/registry.py @@ -46,7 +46,7 @@ class MCPToolDef(BaseModel): """ model_config = ConfigDict( - frozen=True, allow_inf_nan=False, arbitrary_types_allowed=True + frozen=True, allow_inf_nan=False, arbitrary_types_allowed=True, extra="forbid" ) name: NotBlankStr = Field(description="Tool name (synthorg_{domain}_{action})") diff --git a/src/synthorg/meta/models.py b/src/synthorg/meta/models.py index 2934135db8..da0ec07362 100644 --- a/src/synthorg/meta/models.py +++ b/src/synthorg/meta/models.py @@ -117,7 +117,7 @@ class RollbackOperation(BaseModel): description: Human-readable description. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") operation_type: NotBlankStr target: NotBlankStr @@ -134,7 +134,7 @@ class RollbackPlan(BaseModel): validation_check: Post-rollback assertion description. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") operations: tuple[RollbackOperation, ...] = Field(min_length=1) dependencies: tuple[UUID, ...] = () @@ -154,7 +154,7 @@ class ConfigChange(BaseModel): description: Why this change is proposed. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") path: NotBlankStr old_value: JsonValue = None @@ -173,7 +173,7 @@ class ArchitectureChange(BaseModel): description: Why this change is proposed. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") operation: NotBlankStr target_name: NotBlankStr @@ -191,7 +191,7 @@ class PromptChange(BaseModel): description: Why this change is proposed. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") principle_text: NotBlankStr target_scope: NotBlankStr @@ -216,7 +216,7 @@ class CodeChange(BaseModel): reasoning: Why this change improves the system. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") file_path: NotBlankStr operation: CodeOperation @@ -282,7 +282,7 @@ class ProposalRationale(BaseModel): confidence_reasoning: Why the confidence level was assigned. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") signal_summary: NotBlankStr pattern_detected: NotBlankStr @@ -431,7 +431,7 @@ class RuleMatch(BaseModel): matched_at: When the match was detected. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") rule_name: NotBlankStr severity: RuleSeverity @@ -456,7 +456,7 @@ class GuardResult(BaseModel): evaluated_at: When the evaluation happened. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") guard_name: NotBlankStr verdict: GuardVerdict @@ -489,7 +489,7 @@ class RolloutResult(BaseModel): completed_at: When the rollout finished. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") proposal_id: UUID outcome: RolloutOutcome @@ -570,7 +570,7 @@ class ApplyResult(BaseModel): applied_at: When the apply completed. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") success: bool error_message: NotBlankStr | None = None @@ -603,7 +603,7 @@ class CIValidationResult(BaseModel): duration_seconds: Total wall-clock time for validation. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") passed: bool lint_passed: bool @@ -643,7 +643,7 @@ class RegressionThresholds(BaseModel): success_rate_drop: Max acceptable success rate drop. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") quality_drop: float = Field(default=0.10, ge=0.0, le=1.0) cost_increase: float = Field(default=0.20, ge=0.0, le=1.0) @@ -667,7 +667,7 @@ class RegressionResult(BaseModel): checked_at: When the check was performed. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") verdict: RegressionVerdict breached_metric: NotBlankStr | None = None diff --git a/src/synthorg/meta/reports/models.py b/src/synthorg/meta/reports/models.py index b6dcaaf058..7574ee0d4a 100644 --- a/src/synthorg/meta/reports/models.py +++ b/src/synthorg/meta/reports/models.py @@ -39,7 +39,7 @@ class Report(BaseModel): auditability). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") id: UUID = Field(default_factory=uuid4) template: NotBlankStr diff --git a/src/synthorg/meta/rollout/ab_models.py b/src/synthorg/meta/rollout/ab_models.py index 7eae1b3a6e..6d5d45e3ae 100644 --- a/src/synthorg/meta/rollout/ab_models.py +++ b/src/synthorg/meta/rollout/ab_models.py @@ -49,7 +49,7 @@ class GroupAssignment(BaseModel): assigned_at: When the assignment was computed. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") proposal_id: UUID control_agent_ids: tuple[NotBlankStr, ...] = () @@ -193,7 +193,7 @@ class ABTestComparison(BaseModel): compared_at: When the comparison was performed. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") verdict: ABTestVerdict control_metrics: GroupMetrics diff --git a/src/synthorg/meta/rollout/group_aggregator.py b/src/synthorg/meta/rollout/group_aggregator.py index a677c84e4c..76803fdafc 100644 --- a/src/synthorg/meta/rollout/group_aggregator.py +++ b/src/synthorg/meta/rollout/group_aggregator.py @@ -42,7 +42,7 @@ class GroupSamples(BaseModel): spend_samples: Total spend per agent (display currency). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") agent_ids: tuple[NotBlankStr, ...] = () quality_samples: tuple[float, ...] = () diff --git a/src/synthorg/meta/rollout/regression/statistical.py b/src/synthorg/meta/rollout/regression/statistical.py index 24f7cdd49f..e15e0e28b2 100644 --- a/src/synthorg/meta/rollout/regression/statistical.py +++ b/src/synthorg/meta/rollout/regression/statistical.py @@ -52,7 +52,7 @@ class WindowSamples(BaseModel): "current higher than baseline". """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") quality_samples: tuple[float, ...] = () success_samples: tuple[float, ...] = () diff --git a/src/synthorg/meta/rollout/regression/welch.py b/src/synthorg/meta/rollout/regression/welch.py index 8a198f4389..6ed36e9d56 100644 --- a/src/synthorg/meta/rollout/regression/welch.py +++ b/src/synthorg/meta/rollout/regression/welch.py @@ -56,7 +56,7 @@ class WelchResult(BaseModel): p_two_sided: Two-sided p-value in ``[0, 1]``. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") t: float df: float = Field(gt=0.0) diff --git a/src/synthorg/meta/rules/custom.py b/src/synthorg/meta/rules/custom.py index 2a1dde7df5..0a1181401a 100644 --- a/src/synthorg/meta/rules/custom.py +++ b/src/synthorg/meta/rules/custom.py @@ -105,7 +105,7 @@ class MetricDescriptor(BaseModel): nullable: Whether the snapshot field can be ``None``. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") path: NotBlankStr label: NotBlankStr @@ -369,7 +369,7 @@ class CustomRuleDefinition(BaseModel): updated_at: When the rule was last modified. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") id: UUID = Field(default_factory=uuid4) name: NotBlankStr diff --git a/src/synthorg/meta/signal_models.py b/src/synthorg/meta/signal_models.py index 913f5e47dd..05ec1afbaf 100644 --- a/src/synthorg/meta/signal_models.py +++ b/src/synthorg/meta/signal_models.py @@ -38,7 +38,7 @@ class MetricSummary(BaseModel): window_days: How many days the trend covers. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") name: NotBlankStr value: float @@ -58,7 +58,7 @@ class OrgPerformanceSummary(BaseModel): department_summaries: Per-department metric rollups. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") avg_quality_score: float = Field(ge=0.0, le=10.0) avg_success_rate: float = Field(ge=0.0, le=1.0) @@ -83,7 +83,7 @@ class OrgBudgetSummary(BaseModel): orchestration_overhead: Coordination/productive token ratio. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") total_spend: float = Field(ge=0.0) productive_ratio: float = Field(ge=0.0, le=1.0) @@ -107,7 +107,7 @@ class OrgCoordinationSummary(BaseModel): sample_count: Number of tasks used for these metrics. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") coordination_efficiency: float | None = None coordination_overhead_pct: float | None = None @@ -133,7 +133,7 @@ class ScalingDecisionSummary(BaseModel): created_at: When the decision was made. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") decision_id: NotBlankStr action_type: NotBlankStr @@ -153,7 +153,7 @@ class OrgScalingSummary(BaseModel): most_common_signal: Most frequently triggered signal. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") recent_decisions: tuple[ScalingDecisionSummary, ...] = () total_decisions: int = Field(default=0, ge=0) @@ -171,7 +171,7 @@ class ErrorCategorySummary(BaseModel): trend: Whether this category is increasing or decreasing. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") category: NotBlankStr count: int = Field(ge=0) @@ -188,7 +188,7 @@ class OrgErrorSummary(BaseModel): most_severe_category: Category with highest avg severity. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") total_findings: int = Field(default=0, ge=0) categories: tuple[ErrorCategorySummary, ...] = () @@ -218,7 +218,7 @@ class EvolutionOutcomeSummary(BaseModel): proposed_at: When the proposal was generated. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") agent_id: NotBlankStr axis: NotBlankStr @@ -236,7 +236,7 @@ class OrgEvolutionSummary(BaseModel): most_adapted_axis: Most frequently adapted axis. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") recent_outcomes: tuple[EvolutionOutcomeSummary, ...] = () total_proposals: int = Field(default=0, ge=0) @@ -253,7 +253,7 @@ class OrgTelemetrySummary(BaseModel): error_event_count: Number of error-level events. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") event_count: int = Field(default=0, ge=0) top_event_types: tuple[str, ...] = () @@ -280,7 +280,7 @@ class OrgSignalSnapshot(BaseModel): collected_at: When the snapshot was assembled. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") performance: OrgPerformanceSummary budget: OrgBudgetSummary diff --git a/src/synthorg/meta/telemetry/config.py b/src/synthorg/meta/telemetry/config.py index 09099a2c22..e7f6bba6eb 100644 --- a/src/synthorg/meta/telemetry/config.py +++ b/src/synthorg/meta/telemetry/config.py @@ -39,7 +39,7 @@ class CrossDeploymentAnalyticsConfig(BaseModel): required before generating threshold recommendations. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") enabled: bool = False collector_url: NotBlankStr | None = None diff --git a/src/synthorg/meta/telemetry/models.py b/src/synthorg/meta/telemetry/models.py index 3562965a5a..f689607f05 100644 --- a/src/synthorg/meta/telemetry/models.py +++ b/src/synthorg/meta/telemetry/models.py @@ -48,7 +48,7 @@ class AnonymizedOutcomeEvent(BaseModel): sdk_version: SynthOrg version string. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") schema_version: Literal["1"] = "1" deployment_id: NotBlankStr @@ -116,7 +116,7 @@ class EventBatch(BaseModel): events: Tuple of anonymized outcome events. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") events: tuple[AnonymizedOutcomeEvent, ...] = Field(max_length=1000) @@ -141,7 +141,7 @@ class AggregatedPattern(BaseModel): industry_breakdown: Sorted (industry_tag, count) pairs. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") source_rule: NotBlankStr altitude: NotBlankStr @@ -174,7 +174,7 @@ class ThresholdRecommendation(BaseModel): rationale: Human-readable explanation. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") rule_name: NotBlankStr metric_name: NotBlankStr diff --git a/src/synthorg/notifications/config.py b/src/synthorg/notifications/config.py index b080d33cab..3dff43199b 100644 --- a/src/synthorg/notifications/config.py +++ b/src/synthorg/notifications/config.py @@ -28,7 +28,7 @@ class NotificationSinkConfig(BaseModel): ``webhook_url``) -- treat as sensitive. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") type: NotificationSinkType = Field(description="Adapter type") enabled: bool = Field( @@ -49,7 +49,7 @@ class NotificationConfig(BaseModel): min_severity: Minimum severity to dispatch (filters below). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") sinks: tuple[NotificationSinkConfig, ...] = Field( default=(NotificationSinkConfig(type=NotificationSinkType.CONSOLE),), diff --git a/src/synthorg/notifications/models.py b/src/synthorg/notifications/models.py index b00ba2598e..de76a7b26c 100644 --- a/src/synthorg/notifications/models.py +++ b/src/synthorg/notifications/models.py @@ -67,7 +67,7 @@ class Notification(BaseModel): metadata: Arbitrary structured context for adapters. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") id: NotBlankStr = Field( default_factory=lambda: str(uuid4()), diff --git a/src/synthorg/observability/audit_chain/chain.py b/src/synthorg/observability/audit_chain/chain.py index b49f5e529e..f5aaa8d12f 100644 --- a/src/synthorg/observability/audit_chain/chain.py +++ b/src/synthorg/observability/audit_chain/chain.py @@ -18,7 +18,7 @@ class ChainEntry(BaseModel): timestamp: When the entry was created. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") position: int = Field(ge=0, description="Chain position") event_hash: str = Field(description="SHA-256 of event data") diff --git a/src/synthorg/observability/config.py b/src/synthorg/observability/config.py index cf7ccf1468..01bf5f87d8 100644 --- a/src/synthorg/observability/config.py +++ b/src/synthorg/observability/config.py @@ -49,7 +49,7 @@ class RotationConfig(BaseModel): files. Only supported with builtin rotation. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") @model_validator(mode="after") def _reject_compress_with_external(self) -> Self: @@ -170,7 +170,7 @@ class SinkConfig(BaseModel): http_max_retries: Retry count on HTTP failure. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") sink_type: SinkType = Field( description="Log output destination type", @@ -491,7 +491,7 @@ class ContainerLogShippingConfig(BaseModel): per execution (stdout + stderr + sidecar logs combined). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") enabled: bool = Field( default=True, @@ -537,7 +537,7 @@ class LogConfig(BaseModel): container_log_shipping: Container log shipping configuration. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") root_level: LogLevel = Field( default=LogLevel.INFO, diff --git a/src/synthorg/observability/events/memory.py b/src/synthorg/observability/events/memory.py index 87f999ff24..d2681ee4e9 100644 --- a/src/synthorg/observability/events/memory.py +++ b/src/synthorg/observability/events/memory.py @@ -107,6 +107,10 @@ MEMORY_FINE_TUNE_BATCH_SIZE_RECOMMENDATION_FAILED: Final[str] = ( "memory.fine_tune.batch_size_recommendation_failed" ) +MEMORY_FINE_TUNE_THRESHOLD_FALLBACK: Final[str] = "memory.fine_tune.threshold_fallback" +MEMORY_FINE_TUNE_PREFLIGHT_TIMED_OUT: Final[str] = ( + "memory.fine_tune.preflight_timed_out" +) MEMORY_FINE_TUNE_EVAL_COMPLETED: Final[str] = "memory.fine_tune.eval_completed" MEMORY_FINE_TUNE_BACKUP_READ_SKIPPED: Final[str] = ( "memory.fine_tune.backup_read_skipped" diff --git a/src/synthorg/observability/events/persistence.py b/src/synthorg/observability/events/persistence.py index 89341ad6ee..79b477561f 100644 --- a/src/synthorg/observability/events/persistence.py +++ b/src/synthorg/observability/events/persistence.py @@ -59,6 +59,8 @@ PERSISTENCE_MESSAGE_DUPLICATE: Final[str] = "persistence.message.duplicate" PERSISTENCE_MESSAGE_HISTORY_FETCHED: Final[str] = "persistence.message.history_fetched" PERSISTENCE_MESSAGE_HISTORY_FAILED: Final[str] = "persistence.message.history_failed" +PERSISTENCE_MESSAGE_FETCHED: Final[str] = "persistence.message.fetched" +PERSISTENCE_MESSAGE_FETCH_FAILED: Final[str] = "persistence.message.fetch_failed" PERSISTENCE_MESSAGE_DESERIALIZE_FAILED: Final[str] = ( "persistence.message.deserialize_failed" ) diff --git a/src/synthorg/ontology/config.py b/src/synthorg/ontology/config.py index de04afaac2..d58f29c3e1 100644 --- a/src/synthorg/ontology/config.py +++ b/src/synthorg/ontology/config.py @@ -76,7 +76,7 @@ class OntologyInjectionConfig(BaseModel): tool_name: Name of the on-demand lookup tool. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") strategy: InjectionStrategy = Field( default=InjectionStrategy.HYBRID, @@ -102,7 +102,7 @@ class DriftDetectionConfig(BaseModel): threshold: Divergence score above which drift is flagged. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") strategy: DriftStrategy = Field( default=DriftStrategy.PASSIVE, @@ -128,7 +128,7 @@ class DelegationGuardConfig(BaseModel): guard_mode: Enforcement level for entity validation. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") guard_mode: GuardMode = Field( default=GuardMode.STAMP, @@ -146,7 +146,7 @@ class OntologyMemoryConfig(BaseModel): canonical definitions. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") wrapper_enabled: bool = Field( default=True, @@ -170,7 +170,7 @@ class OntologySyncConfig(BaseModel): organizational memory. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") org_memory_enabled: bool = Field( default=True, @@ -192,7 +192,7 @@ class EntityEntry(BaseModel): disambiguation: Optional disambiguation text. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") name: NotBlankStr = Field(description="Entity name") definition: str = Field( @@ -220,7 +220,7 @@ class EntitiesConfig(BaseModel): entries: Tuple of user-defined entity entries. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") entries: tuple[EntityEntry, ...] = Field( default=(), @@ -252,7 +252,7 @@ class OntologyConfig(BaseModel): entities: User-defined entity entries. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") backend: Literal["sqlite"] = Field( default="sqlite", diff --git a/src/synthorg/ontology/models.py b/src/synthorg/ontology/models.py index f9274aa658..944969276d 100644 --- a/src/synthorg/ontology/models.py +++ b/src/synthorg/ontology/models.py @@ -89,7 +89,7 @@ class EntityField(BaseModel): description: Human-readable description of the field. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") name: NotBlankStr = Field(description="Field name") type_hint: NotBlankStr = Field(description="Type annotation as string") @@ -108,7 +108,7 @@ class EntityRelation(BaseModel): description: Human-readable description of the relationship. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") target: NotBlankStr = Field(description="Related entity name") relation: NotBlankStr = Field(description="Relationship type") @@ -143,7 +143,7 @@ class EntityDefinition(BaseModel): updated_at: Last update timestamp (must be UTC). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") name: NotBlankStr = Field(description="Unique entity name") tier: EntityTier = Field(description="Protection tier") @@ -217,7 +217,7 @@ class AgentDrift(BaseModel): details: Human-readable description of the divergence. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") agent_id: NotBlankStr = Field(description="Divergent agent identifier") divergence_score: float = Field( @@ -242,7 +242,7 @@ class DriftReport(BaseModel): recommendation: Recommended corrective action. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") entity_name: NotBlankStr = Field(description="Entity being analyzed") divergence_score: float = Field( diff --git a/src/synthorg/ontology/service.py b/src/synthorg/ontology/service.py index 0689043b79..3be5256abe 100644 --- a/src/synthorg/ontology/service.py +++ b/src/synthorg/ontology/service.py @@ -18,6 +18,7 @@ EntitySource, EntityTier, ) +from synthorg.persistence._shared import collect_all_mapping if TYPE_CHECKING: from synthorg.ontology.config import EntitiesConfig, OntologyConfig @@ -235,10 +236,20 @@ async def search(self, query: str) -> tuple[EntityDefinition, ...]: async def get_version_manifest(self) -> dict[str, int]: """Return the latest version for each entity. + Drains every bounded backend page so callers (drift + detection, the delegation entity guard, the API endpoint) + keep receiving the complete manifest; a truncated manifest + would make drift detection miss entities. + Returns: Mapping from entity name to latest version number. """ - return await self._backend.get_version_manifest() + return await collect_all_mapping( + lambda limit, offset: self._backend.get_version_manifest( + limit=limit, + offset=offset, + ), + ) async def list_versions( self, diff --git a/src/synthorg/persistence/_shared/__init__.py b/src/synthorg/persistence/_shared/__init__.py index 54776cbe3e..f222cb328b 100644 --- a/src/synthorg/persistence/_shared/__init__.py +++ b/src/synthorg/persistence/_shared/__init__.py @@ -22,6 +22,8 @@ class predicates) stay in the backend repo modules and are passed from synthorg.persistence._shared.pagination import ( DEFAULT_LIST_LIMIT, MAX_LIST_LIMIT, + collect_all, + collect_all_mapping, paginate, validate_pagination_args, ) @@ -30,6 +32,8 @@ class predicates) stay in the backend repo modules and are passed "DEFAULT_LIST_LIMIT", "MAX_LIST_LIMIT", "coerce_row_timestamp", + "collect_all", + "collect_all_mapping", "format_iso_utc", "normalize_utc", "paginate", diff --git a/src/synthorg/persistence/_shared/pagination.py b/src/synthorg/persistence/_shared/pagination.py index c392973359..cfba5ada80 100644 --- a/src/synthorg/persistence/_shared/pagination.py +++ b/src/synthorg/persistence/_shared/pagination.py @@ -16,7 +16,13 @@ from synthorg.observability import get_logger if TYPE_CHECKING: - from collections.abc import AsyncIterator, Awaitable, Callable, Sequence + from collections.abc import ( + AsyncIterator, + Awaitable, + Callable, + Mapping, + Sequence, + ) logger = get_logger(__name__) @@ -86,6 +92,89 @@ async def paginate[PageItemT]( return +async def collect_all[PageItemT]( + fetch: Callable[[int, int], Awaitable[Sequence[PageItemT]]], + *, + page_size: int = DEFAULT_LIST_LIMIT, +) -> tuple[PageItemT, ...]: + """Drain a ``*, limit, offset`` repo method into one full tuple. + + For the callers that genuinely need the *complete* set (boot-time + state rehydration, drift detection) of a now-paginated repository + method. Each underlying query stays bounded at ``page_size`` (no + single unbounded scan), while the caller still gets every row, so + correctness is preserved without reintroducing the unbounded read + the pagination was added to remove. Thin wrapper over + :func:`paginate`; the short-page termination guarantee is + inherited. + + Args: + fetch: Async callable taking ``(limit, offset)`` positionally + and returning the page sequence (wrap the repo method at + the call site, e.g. + ``lambda limit, offset: repo.load_all(limit=limit, + offset=offset)``). + page_size: Rows per underlying query. + + Returns: + Every row across all pages, in the method's deterministic + order. + """ + collected: list[PageItemT] = [] + async for page in paginate(fetch, page_size=page_size): + collected.extend(page) + return tuple(collected) + + +async def collect_all_mapping[KeyT, ValT]( + fetch: Callable[[int, int], Awaitable[Mapping[KeyT, ValT]]], + *, + page_size: int = DEFAULT_LIST_LIMIT, +) -> dict[KeyT, ValT]: + """Drain a paginated mapping-returning repo method into one dict. + + The mapping analogue of :func:`collect_all` for + ``get_version_manifest``-style aggregates that return + ``dict[Key, Val]``. Pages are deterministically key-sorted and + disjoint, so merge order does not change the result; iteration + stops on the first short page exactly like :func:`paginate`. + + Caller invariant: the wrapped repo method MUST return disjoint + pages over a stable key order. Overlapping keys across pages are + silently last-write-wins (``dict.update``); this helper does not + detect page overlap. An empty first page legitimately yields an + empty dict (a valid result, not an error). + + Cancellation: if the awaiting task is cancelled mid-page the + ``CancelledError`` from the in-flight ``fetch`` propagates + unmodified; ``merged`` is a local accumulator so the partial + result is simply discarded with no cleanup required. + + Args: + fetch: Async callable taking ``(limit, offset)`` positionally + and returning a page of the mapping. + page_size: Entries per underlying query. + + Returns: + The fully reassembled mapping. + """ + # ``bool`` is a subclass of ``int``; without the explicit + # ``isinstance(page_size, bool)`` guard ``True`` / ``False`` would + # slip through as page sizes 1 / 0 and corrupt the drain loop. + if isinstance(page_size, bool) or not isinstance(page_size, int) or page_size < 1: + msg = f"page_size must be a positive int, got {page_size!r}" + raise QueryError(msg) + merged: dict[KeyT, ValT] = {} + for offset in count(0, page_size): + page = await fetch(page_size, offset) + if not page: + return merged + merged.update(page) + if len(page) < page_size: + return merged + return merged # unreachable; count() is infinite + + def validate_pagination_args( limit: object, offset: object, diff --git a/src/synthorg/persistence/agent_state_protocol.py b/src/synthorg/persistence/agent_state_protocol.py index 47b675bbbe..bda65d2ed8 100644 --- a/src/synthorg/persistence/agent_state_protocol.py +++ b/src/synthorg/persistence/agent_state_protocol.py @@ -67,14 +67,27 @@ async def list_items( """ ... - async def get_active(self) -> tuple[AgentRuntimeState, ...]: - """Retrieve all non-idle agent states. + async def get_active( + self, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, + ) -> tuple[AgentRuntimeState, ...]: + """Retrieve a bounded page of non-idle agent states. Returns states where ``status != 'idle'``, ordered by - ``last_activity_at`` descending (most recent first). + ``last_activity_at`` descending then ``agent_id`` ascending + (the stable secondary key makes paging deterministic when + activity timestamps tie). Callers that need every active + state drain via + :func:`synthorg.persistence._shared.collect_all`. + + Args: + limit: Maximum rows to return. + offset: Rows to skip from the head of the ordering. Returns: - Active agent states as a tuple. + A page of active agent states. Raises: PersistenceError: If the operation fails. diff --git a/src/synthorg/persistence/checkpoint_protocol.py b/src/synthorg/persistence/checkpoint_protocol.py index b8479ec935..02f65a1bca 100644 --- a/src/synthorg/persistence/checkpoint_protocol.py +++ b/src/synthorg/persistence/checkpoint_protocol.py @@ -160,15 +160,24 @@ async def get(self, execution_id: NotBlankStr) -> Heartbeat | None: async def get_stale( self, threshold: AwareDatetime, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, ) -> tuple[Heartbeat, ...]: - """Retrieve heartbeats older than the threshold. + """Retrieve a bounded page of heartbeats older than the threshold. Args: threshold: Heartbeats with ``last_heartbeat_at`` before this timestamp are considered stale. + limit: Maximum rows to return. + offset: Rows to skip from the head of the ordering. Returns: - Stale heartbeats as a tuple. + A page of stale heartbeats ordered by ``last_heartbeat_at`` + then ``execution_id`` (stable secondary key for + deterministic paging). Callers needing every stale + heartbeat drain via + :func:`synthorg.persistence._shared.collect_all`. Raises: PersistenceError: If the operation fails. diff --git a/src/synthorg/persistence/circuit_breaker_protocol.py b/src/synthorg/persistence/circuit_breaker_protocol.py index 7ad03e27f8..1809c88ff7 100644 --- a/src/synthorg/persistence/circuit_breaker_protocol.py +++ b/src/synthorg/persistence/circuit_breaker_protocol.py @@ -19,7 +19,7 @@ class CircuitBreakerStateRecord(BaseModel): opened_at: Monotonic timestamp when opened, or ``None``. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") pair_key_a: NotBlankStr = Field(description="First agent ID (sorted)") pair_key_b: NotBlankStr = Field(description="Second agent ID (sorted)") @@ -96,14 +96,27 @@ async def list_items( """ ... - async def load_all(self) -> tuple[CircuitBreakerStateRecord, ...]: - """Load every persisted record in one call (bespoke per ADR-0001 D7). + async def load_all( + self, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, + ) -> tuple[CircuitBreakerStateRecord, ...]: + """Load a bounded page of records (bespoke per ADR-0001 D7). Used by the circuit breaker guard to rehydrate every pair's state at start; cardinality scales with active agent pairs. + The query is bounded per call (no unbounded scan); callers + that need the complete set drain via + :func:`synthorg.persistence._shared.collect_all`. Rows are in + ``(pair_key_a, pair_key_b)`` order so paging is stable. + + Args: + limit: Maximum rows to return. + offset: Rows to skip from the head of the ordering. Returns: - All stored records. + A page of stored records in deterministic key order. Raises: PersistenceError: If the query fails. diff --git a/src/synthorg/persistence/config.py b/src/synthorg/persistence/config.py index f4a92ec779..7e350ec7f6 100644 --- a/src/synthorg/persistence/config.py +++ b/src/synthorg/persistence/config.py @@ -36,7 +36,7 @@ class SQLiteConfig(BaseModel): (default 64 MB). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") path: NotBlankStr = Field( default="synthorg.db", @@ -116,7 +116,7 @@ class PostgresConfig(BaseModel): connection attempt before raising. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") # Defaults target a local-loopback Postgres for development. The # Go CLI overrides both host and port by injecting a complete @@ -252,7 +252,7 @@ class PersistenceConfig(BaseModel): ``backend="postgres"``). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") _VALID_BACKENDS: ClassVar[frozenset[str]] = frozenset({"sqlite", "postgres"}) diff --git a/src/synthorg/persistence/idempotency_protocol.py b/src/synthorg/persistence/idempotency_protocol.py index c25e97b2d4..ed51c3e561 100644 --- a/src/synthorg/persistence/idempotency_protocol.py +++ b/src/synthorg/persistence/idempotency_protocol.py @@ -53,7 +53,7 @@ class IdempotencyClaim(BaseModel): ``COMPLETED`` / ``FAILED``). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") outcome: IdempotencyOutcome cached_response: str | None = Field(default=None) @@ -103,7 +103,7 @@ class IdempotencyRecord(BaseModel): expires_at: When the row becomes eligible for cleanup. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") scope: NotBlankStr key: NotBlankStr diff --git a/src/synthorg/persistence/memory_protocol.py b/src/synthorg/persistence/memory_protocol.py index 62d9244db9..e762e4c81e 100644 --- a/src/synthorg/persistence/memory_protocol.py +++ b/src/synthorg/persistence/memory_protocol.py @@ -170,12 +170,18 @@ async def delete( async def snapshot_at( self, timestamp: AwareDatetime, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, ) -> tuple[OperationLogSnapshot, ...]: - """Materialize fact state at a specific timestamp. + """Materialize a bounded page of fact state at a timestamp. - Returns the state of all facts (active and retracted) as they - were at the given timestamp. Used for point-in-time audits and - historical reconstruction. + Returns one ``limit``-sized page (not the whole set) of facts + (active and retracted) as they were at the given timestamp, + ordered for a stable cursor walk. Callers needing the complete + point-in-time snapshot drain every page via + :func:`synthorg.persistence._shared.collect_all`. Used for + point-in-time audits and historical reconstruction. ``timestamp`` MUST be timezone-aware. Implementations route it through :func:`format_iso_utc` (SQLite) or bind it directly as @@ -188,10 +194,15 @@ async def snapshot_at( Args: timestamp: The UTC timestamp for point-in-time snapshot. Must be timezone-aware. + limit: Maximum rows to return. + offset: Rows to skip from the head of the ordering. Returns: - Snapshot rows (one per fact) capturing state at - ``timestamp``. Order is by fact_id ascending. + A page of snapshot rows (one per fact) capturing state at + ``timestamp``, in ``fact_id`` ascending order so a cursor + walk is repeatable across the same snapshot. Callers + needing the whole snapshot drain via + :func:`synthorg.persistence._shared.collect_all`. Raises: ValueError: If ``timestamp`` is naive. @@ -202,18 +213,29 @@ async def snapshot_at( async def get_operation_log( self, fact_id: NotBlankStr, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, ) -> tuple[OperationLogEntry, ...]: - """Retrieve the complete audit trail for a fact. + """Retrieve a bounded page of the audit trail for a fact. - Returns all PUBLISH and RETRACT operations for the fact in - chronological order (oldest first), indexed by version number. + Returns one ``limit``-sized page (not the whole trail) of + PUBLISH and RETRACT operations for the fact in chronological + order (oldest first), indexed by version number. Version is + unique per fact so the page order is stable; callers needing + the full trail drain every page via + :func:`synthorg.persistence._shared.collect_all`. Args: fact_id: The fact identifier. + limit: Maximum rows to return. + offset: Rows to skip from the head of the ordering. Returns: - Tuple of OperationLogEntry rows in ascending version order. - Empty tuple if the fact does not exist. + A page of OperationLogEntry rows in ascending version + order. Empty tuple if the fact does not exist. Callers + needing the full trail drain via + :func:`synthorg.persistence._shared.collect_all`. Raises: PersistenceError: If the operation fails. diff --git a/src/synthorg/persistence/message_protocol.py b/src/synthorg/persistence/message_protocol.py index 14fa824ba9..7a18175a78 100644 --- a/src/synthorg/persistence/message_protocol.py +++ b/src/synthorg/persistence/message_protocol.py @@ -101,6 +101,33 @@ async def get_history( """ ... + async def get_by_id( + self, + channel: NotBlankStr, + message_id: NotBlankStr, + ) -> Message | None: + """Fetch a single message by ``(channel, id)``. + + ``messages.id`` is the primary key (globally unique), so the + lookup is an indexed point read; ``channel`` is an additional + scoping predicate so a caller cannot read a message off a + channel it did not address. Replaces the prior + ``get_history`` full-channel scan in + :meth:`MessageService.get_message`. + + Args: + channel: Channel the message must belong to. + message_id: The unique message identifier. + + Returns: + The matching :class:`Message`, or ``None`` when no message + with that id exists on that channel. + + Raises: + PersistenceError: If the operation fails. + """ + ... + async def delete(self, message_id: NotBlankStr) -> bool: """Delete a message by id (moderation / redaction). diff --git a/src/synthorg/persistence/ontology_protocol.py b/src/synthorg/persistence/ontology_protocol.py index 4da26b6771..833b5c1871 100644 --- a/src/synthorg/persistence/ontology_protocol.py +++ b/src/synthorg/persistence/ontology_protocol.py @@ -195,13 +195,25 @@ async def search( """ ... - async def get_version_manifest(self) -> dict[NotBlankStr, int]: - """Return the latest version number for each entity. + async def get_version_manifest( + self, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, + ) -> dict[NotBlankStr, int]: + """Return a bounded page of the latest version per entity. + + Version manifest is a domain-specific aggregate. Entities page + in ``entity_id`` order so a cursor walk is stable. - Version manifest is a domain-specific aggregate. + Args: + limit: Maximum entries to return. + offset: Entries to skip from the head of the ordering. Returns: - Mapping of entity name to latest version number. + A page of the entity-name to latest-version mapping. + Callers needing the whole manifest drain via + :func:`synthorg.persistence._shared.collect_all_mapping`. """ ... diff --git a/src/synthorg/persistence/parked_context_protocol.py b/src/synthorg/persistence/parked_context_protocol.py index c50d53894d..46c6be78b1 100644 --- a/src/synthorg/persistence/parked_context_protocol.py +++ b/src/synthorg/persistence/parked_context_protocol.py @@ -80,14 +80,26 @@ async def get_by_approval(self, approval_id: NotBlankStr) -> ParkedContext | Non """ ... - async def get_by_agent(self, agent_id: NotBlankStr) -> tuple[ParkedContext, ...]: - """Retrieve all parked contexts for an agent. + async def get_by_agent( + self, + agent_id: NotBlankStr, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, + ) -> tuple[ParkedContext, ...]: + """Retrieve a bounded page of parked contexts for an agent. Args: agent_id: The agent identifier. + limit: Maximum rows to return. + offset: Rows to skip from the head of the ordering. Returns: - Parked contexts for the agent, ordered by ``parked_at`` DESC. + A page of parked contexts for the agent, ordered by + ``parked_at`` DESC then ``id`` ascending (stable secondary + key for deterministic paging). Callers that need every + parked context drain via + :func:`synthorg.persistence._shared.collect_all`. Raises: PersistenceError: If the operation fails. diff --git a/src/synthorg/persistence/postgres/agent_state_repo.py b/src/synthorg/persistence/postgres/agent_state_repo.py index 64f5f37f07..fa2cfd8806 100644 --- a/src/synthorg/persistence/postgres/agent_state_repo.py +++ b/src/synthorg/persistence/postgres/agent_state_repo.py @@ -164,8 +164,22 @@ async def list_items( logger.debug(PERSISTENCE_AGENT_STATE_LISTED, count=len(states)) return states - async def get_active(self) -> tuple[AgentRuntimeState, ...]: - """Retrieve all non-idle agent states, ordered by last_activity_at DESC.""" + async def get_active( + self, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, + ) -> tuple[AgentRuntimeState, ...]: + """Bounded page of non-idle agent states, newest activity first. + + ``agent_id`` is the stable secondary sort so rows that share a + ``last_activity_at`` page deterministically. Callers needing + every active state drain via + :func:`synthorg.persistence._shared.collect_all`. + """ + limit = validate_pagination_args( + limit, offset, event=PERSISTENCE_AGENT_STATE_ACTIVE_QUERY_FAILED + ) try: async with ( self._pool.connection() as conn, @@ -175,9 +189,15 @@ async def get_active(self) -> tuple[AgentRuntimeState, ...]: "SELECT agent_id, execution_id, task_id, status, " "turn_count, accumulated_cost, currency, " "last_activity_at, started_at " - "FROM agent_states WHERE status != %s " - "ORDER BY last_activity_at DESC", - (ExecutionStatus.IDLE.value,), + "FROM agent_states WHERE status IN (%s, %s) " + "ORDER BY last_activity_at DESC, agent_id " + "LIMIT %s OFFSET %s", + ( + ExecutionStatus.EXECUTING.value, + ExecutionStatus.PAUSED.value, + limit, + offset, + ), ) rows = await cur.fetchall() except psycopg.Error as exc: diff --git a/src/synthorg/persistence/postgres/circuit_breaker_repo.py b/src/synthorg/persistence/postgres/circuit_breaker_repo.py index 424299a8cf..f6d896292d 100644 --- a/src/synthorg/persistence/postgres/circuit_breaker_repo.py +++ b/src/synthorg/persistence/postgres/circuit_breaker_repo.py @@ -20,6 +20,7 @@ PERSISTENCE_CIRCUIT_BREAKER_SAVE_FAILED, ) from synthorg.persistence._generics import DEFAULT_PAGE_SIZE +from synthorg.persistence._shared import validate_pagination_args from synthorg.persistence.circuit_breaker_protocol import ( CircuitBreakerPairKey, CircuitBreakerStateRecord, @@ -131,6 +132,9 @@ async def list_items( offset: int = 0, ) -> tuple[CircuitBreakerStateRecord, ...]: """List records ordered by ``(pair_key_a, pair_key_b)`` ascending.""" + limit = validate_pagination_args( + limit, offset, event=PERSISTENCE_CIRCUIT_BREAKER_LOAD_FAILED + ) try: async with ( self._pool.connection() as conn, @@ -173,52 +177,20 @@ async def list_items( logger.debug(PERSISTENCE_CIRCUIT_BREAKER_LOADED, count=len(results)) return tuple(results) - async def load_all(self) -> tuple[CircuitBreakerStateRecord, ...]: - """Load all persisted circuit breaker state records.""" - try: - async with ( - self._pool.connection() as conn, - conn.cursor(row_factory=dict_row) as cur, - ): - await cur.execute( - "SELECT pair_key_a, pair_key_b, bounce_count, " - "trip_count, opened_at FROM circuit_breaker_state", - ) - rows = await cur.fetchall() - except psycopg.Error as exc: - msg = "Failed to load circuit breaker state" - logger.warning( - PERSISTENCE_CIRCUIT_BREAKER_LOAD_FAILED, - error_type=type(exc).__name__, - error=safe_error_description(exc), - ) - raise QueryError(msg) from exc - - results: list[CircuitBreakerStateRecord] = [] - for row in rows: - try: - results.append( - CircuitBreakerStateRecord.model_validate(row), - ) - except ValidationError as exc: - msg = ( - f"Failed to deserialize circuit breaker state row " - f"({row.get('pair_key_a') if row else 'unknown'})" - ) - logger.warning( - PERSISTENCE_CIRCUIT_BREAKER_LOAD_FAILED, - pair_key_a=row.get("pair_key_a") if row else "unknown", - error_type=type(exc).__name__, - error=safe_error_description(exc), - note="deserialization failed", - ) - raise QueryError(msg) from exc - - logger.debug( - PERSISTENCE_CIRCUIT_BREAKER_LOADED, - count=len(results), - ) - return tuple(results) + async def load_all( + self, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, + ) -> tuple[CircuitBreakerStateRecord, ...]: + """Load a bounded page of records in ``(pair_key_a, pair_key_b)``. + + Delegates to :meth:`list_items` (same deterministic key order + and pagination contract); kept as a distinct ADR-0001 D7 + method because boot-time callers drain it via + :func:`synthorg.persistence._shared.collect_all`. + """ + return await self.list_items(limit=limit, offset=offset) async def delete(self, entity_id: CircuitBreakerPairKey) -> bool: """Delete a circuit breaker state record by composite key.""" diff --git a/src/synthorg/persistence/postgres/heartbeat_repo.py b/src/synthorg/persistence/postgres/heartbeat_repo.py index 92926fbd40..33088a7125 100644 --- a/src/synthorg/persistence/postgres/heartbeat_repo.py +++ b/src/synthorg/persistence/postgres/heartbeat_repo.py @@ -23,6 +23,8 @@ PERSISTENCE_HEARTBEAT_QUERY_FAILED, PERSISTENCE_HEARTBEAT_SAVE_FAILED, ) +from synthorg.persistence._generics import DEFAULT_PAGE_SIZE +from synthorg.persistence._shared import validate_pagination_args if TYPE_CHECKING: from psycopg_pool import AsyncConnectionPool @@ -107,13 +109,27 @@ async def get(self, execution_id: NotBlankStr) -> Heartbeat | None: return self._row_to_model(dict(row)) - async def get_stale(self, threshold: AwareDatetime) -> tuple[Heartbeat, ...]: - """Retrieve heartbeats older than the threshold. + async def get_stale( + self, + threshold: AwareDatetime, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, + ) -> tuple[Heartbeat, ...]: + """Bounded page of heartbeats older than the threshold. + + ``execution_id`` is the stable secondary sort so rows sharing + a ``last_heartbeat_at`` page deterministically. Args: threshold: Heartbeats with ``last_heartbeat_at`` before this timestamp are considered stale. + limit: Maximum rows to return. + offset: Rows to skip from the head of the ordering. """ + limit = validate_pagination_args( + limit, offset, event=PERSISTENCE_HEARTBEAT_QUERY_FAILED + ) threshold_utc = threshold.astimezone(UTC) try: async with ( @@ -123,8 +139,9 @@ async def get_stale(self, threshold: AwareDatetime) -> tuple[Heartbeat, ...]: await cur.execute( "SELECT execution_id, agent_id, task_id, last_heartbeat_at " "FROM heartbeats WHERE last_heartbeat_at < %s " - "ORDER BY last_heartbeat_at", - (threshold_utc,), + "ORDER BY last_heartbeat_at, execution_id " + "LIMIT %s OFFSET %s", + (threshold_utc, limit, offset), ) rows = await cur.fetchall() except psycopg.Error as exc: diff --git a/src/synthorg/persistence/postgres/ontology_entity_repo.py b/src/synthorg/persistence/postgres/ontology_entity_repo.py index 4c7626c45a..af81cfd834 100644 --- a/src/synthorg/persistence/postgres/ontology_entity_repo.py +++ b/src/synthorg/persistence/postgres/ontology_entity_repo.py @@ -367,8 +367,21 @@ def _rows_to_entities( continue return tuple(results) - async def get_version_manifest(self) -> dict[NotBlankStr, int]: - """Return the latest version number for each entity.""" + async def get_version_manifest( + self, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, + ) -> dict[NotBlankStr, int]: + """Return a bounded page of the latest version per entity. + + Entities page in ``entity_id`` order so a cursor walk is + stable; callers needing the whole manifest drain via + :func:`synthorg.persistence._shared.collect_all_mapping`. + """ + limit = validate_pagination_args( + limit, offset, event=ONTOLOGY_ENTITY_DESERIALIZATION_FAILED + ) dict_row = self._dict_row async with ( self._pool.connection() as conn, @@ -377,7 +390,10 @@ async def get_version_manifest(self) -> dict[NotBlankStr, int]: await cur.execute( """SELECT entity_id, MAX(version) AS latest_version FROM entity_definition_versions - GROUP BY entity_id""", + GROUP BY entity_id + ORDER BY entity_id + LIMIT %s OFFSET %s""", + (limit, offset), ) rows = await cur.fetchall() return {NotBlankStr(row["entity_id"]): row["latest_version"] for row in rows} diff --git a/src/synthorg/persistence/postgres/org_fact_repo.py b/src/synthorg/persistence/postgres/org_fact_repo.py index dd4590c7fd..ee490ae2f9 100644 --- a/src/synthorg/persistence/postgres/org_fact_repo.py +++ b/src/synthorg/persistence/postgres/org_fact_repo.py @@ -34,7 +34,7 @@ ORG_MEMORY_WRITE_FAILED, ) from synthorg.persistence._generics import DEFAULT_PAGE_SIZE -from synthorg.persistence._shared import normalize_utc +from synthorg.persistence._shared import normalize_utc, validate_pagination_args from synthorg.persistence.memory_protocol import _DEFAULT_LIST_LIMIT_FACTS if TYPE_CHECKING: @@ -513,15 +513,22 @@ async def list_by_category( async def snapshot_at( self, timestamp: AwareDatetime, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, ) -> tuple[OperationLogSnapshot, ...]: - """Point-in-time snapshot of all facts at a given timestamp. + """Bounded page of the point-in-time snapshot of all facts. ``timestamp`` must be timezone-aware so psycopg binds it to the ``TIMESTAMPTZ`` parameter at a known instant; a naive datetime would otherwise bind in the session timezone and silently produce a wrong-but-plausible snapshot. The signature is :class:`pydantic.AwareDatetime` to make that contract explicit. + Rows page in ``fact_id`` order so a cursor walk is repeatable + across the same snapshot; callers needing the whole snapshot + drain via :func:`synthorg.persistence._shared.collect_all`. """ + limit = validate_pagination_args(limit, offset, event=ORG_MEMORY_QUERY_FAILED) dict_row = self._dict_row if timestamp.tzinfo is None: msg = ( @@ -568,13 +575,17 @@ async def snapshot_at( LEFT JOIN first_publishes fp ON fp.fact_id = lo.fact_id WHERE lo.rn = 1 ORDER BY lo.fact_id +LIMIT %(limit)s OFFSET %(offset)s """ try: async with ( self._pool.connection() as conn, conn.cursor(row_factory=dict_row) as cur, ): - await cur.execute(sql, {"ts": timestamp}) + await cur.execute( + sql, + {"ts": timestamp, "limit": limit, "offset": offset}, + ) rows = await cur.fetchall() except Exception as exc: ts_iso = timestamp.isoformat() @@ -598,8 +609,19 @@ async def snapshot_at( async def get_operation_log( self, fact_id: NotBlankStr, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, ) -> tuple[OperationLogEntry, ...]: - """Retrieve full audit trail for a fact.""" + """Bounded page of the audit trail for a fact (version ASC). + + Version is unique per fact so the ordering is already stable; + callers needing the full trail drain via + :func:`synthorg.persistence._shared.collect_all`. + """ + limit = validate_pagination_args( + limit, offset, event=ORG_MEMORY_QUERY_FAILED, fact_id=fact_id + ) dict_row = self._dict_row try: async with ( @@ -608,8 +630,9 @@ async def get_operation_log( ): await cur.execute( "SELECT * FROM org_facts_operation_log " - "WHERE fact_id = %s ORDER BY version ASC", - (fact_id,), + "WHERE fact_id = %s ORDER BY version ASC " + "LIMIT %s OFFSET %s", + (fact_id, limit, offset), ) rows = await cur.fetchall() except Exception as exc: diff --git a/src/synthorg/persistence/postgres/parked_context_repo.py b/src/synthorg/persistence/postgres/parked_context_repo.py index 1deb894f7f..7374304bd8 100644 --- a/src/synthorg/persistence/postgres/parked_context_repo.py +++ b/src/synthorg/persistence/postgres/parked_context_repo.py @@ -197,8 +197,24 @@ async def get_by_approval(self, approval_id: NotBlankStr) -> ParkedContext | Non return self._row_to_model(row) - async def get_by_agent(self, agent_id: NotBlankStr) -> tuple[ParkedContext, ...]: - """Retrieve all parked contexts for an agent.""" + async def get_by_agent( + self, + agent_id: NotBlankStr, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, + ) -> tuple[ParkedContext, ...]: + """Bounded page of parked contexts for an agent, newest first. + + ``id`` is the stable secondary sort so rows sharing a + ``parked_at`` page deterministically. + """ + limit = validate_pagination_args( + limit, + offset, + event=PERSISTENCE_PARKED_CONTEXT_QUERY_FAILED, + agent_id=agent_id, + ) try: async with ( self._pool.connection() as conn, @@ -208,8 +224,9 @@ async def get_by_agent(self, agent_id: NotBlankStr) -> tuple[ParkedContext, ...] "SELECT id, execution_id, agent_id, task_id, approval_id, " "parked_at, context_json, metadata " "FROM parked_contexts WHERE agent_id = %s " - "ORDER BY parked_at DESC", - (agent_id,), + "ORDER BY parked_at DESC, id " + "LIMIT %s OFFSET %s", + (agent_id, limit, offset), ) rows = await cur.fetchall() except psycopg.Error as exc: diff --git a/src/synthorg/persistence/postgres/repositories.py b/src/synthorg/persistence/postgres/repositories.py index 4279bf1702..e8e21603dd 100644 --- a/src/synthorg/persistence/postgres/repositories.py +++ b/src/synthorg/persistence/postgres/repositories.py @@ -28,6 +28,8 @@ PERSISTENCE_MESSAGE_DELETE_FAILED, PERSISTENCE_MESSAGE_DESERIALIZE_FAILED, PERSISTENCE_MESSAGE_DUPLICATE, + PERSISTENCE_MESSAGE_FETCH_FAILED, + PERSISTENCE_MESSAGE_FETCHED, PERSISTENCE_MESSAGE_HISTORY_FAILED, PERSISTENCE_MESSAGE_HISTORY_FETCHED, PERSISTENCE_MESSAGE_SAVE_FAILED, @@ -732,6 +734,51 @@ async def get_history( ) return messages + async def get_by_id( + self, + channel: str, + message_id: str, + ) -> Message | None: + """Fetch one message by ``(channel, id)`` via the PK point read. + + The ``id`` predicate alone resolves the row (it is the primary + key); the extra ``channel`` predicate is a deliberate scoping + guard so a caller holding only a message id cannot read a + message outside the channel it asked for. + """ + sql = ( + 'SELECT id, timestamp, sender, "to", type, priority, ' + "channel, content, attachments, metadata " + "FROM messages " + "WHERE id = %s AND channel = %s" + ) + try: + async with ( + self._pool.connection() as conn, + conn.cursor(row_factory=dict_row) as cur, + ): + await cur.execute(sql, [message_id, channel]) + row = await cur.fetchone() + except psycopg.Error as exc: + msg = f"Failed to fetch message {message_id!r}" + logger.warning( + PERSISTENCE_MESSAGE_FETCH_FAILED, + channel=channel, + message_id=message_id, + error_type=type(exc).__name__, + error=safe_error_description(exc), + ) + raise QueryError(msg) from exc + if row is None: + return None + message = self._row_to_message(row) + logger.debug( + PERSISTENCE_MESSAGE_FETCHED, + channel=channel, + message_id=message_id, + ) + return message + async def query( self, filter_spec: MessageFilterSpec, diff --git a/src/synthorg/persistence/postgres/revisions/20260517000001_wp3_query_indices.sql b/src/synthorg/persistence/postgres/revisions/20260517000001_wp3_query_indices.sql new file mode 100644 index 0000000000..5cead05201 --- /dev/null +++ b/src/synthorg/persistence/postgres/revisions/20260517000001_wp3_query_indices.sql @@ -0,0 +1,31 @@ +-- depends: 20260515000001_ceremony_scheduler_state + +-- WP-3 query-performance indices. No table changes: these back hot +-- read paths the 2026-05-15 audit flagged as full scans under load. +-- * org_facts_snapshot(category) WHERE retracted_at IS NULL -- +-- "live facts in category X" (hot ontology read). +-- * org_facts_operation_log(operation_type) -- retract-sweep audit. +-- * approvals(risk_level, created_at DESC) and +-- approvals(action_type, created_at DESC) -- dashboard triage +-- inboxes newest-first. +-- * heartbeats(last_heartbeat_at, execution_id) -- widen the +-- single-column stale-heartbeat index so it fully covers the +-- get_stale ORDER BY without a tiebreak sort. + +CREATE INDEX idx_snapshot_category_active + ON org_facts_snapshot (category) + WHERE retracted_at IS NULL; + +CREATE INDEX idx_oplog_operation_type + ON org_facts_operation_log (operation_type); + +CREATE INDEX idx_approvals_risk_created_at + ON approvals(risk_level, created_at DESC); + +CREATE INDEX idx_approvals_action_created_at + ON approvals(action_type, created_at DESC); + +DROP INDEX idx_hb_last_heartbeat; + +CREATE INDEX idx_hb_last_heartbeat + ON heartbeats(last_heartbeat_at, execution_id); diff --git a/src/synthorg/persistence/postgres/schema.sql b/src/synthorg/persistence/postgres/schema.sql index a333e3e134..cb8efad7fe 100644 --- a/src/synthorg/persistence/postgres/schema.sql +++ b/src/synthorg/persistence/postgres/schema.sql @@ -382,7 +382,7 @@ CREATE TABLE heartbeats ( ); CREATE INDEX idx_hb_last_heartbeat - ON heartbeats(last_heartbeat_at); + ON heartbeats(last_heartbeat_at, execution_id); -- ── Agent states ────────────────────────────────────────────── CREATE TABLE agent_states ( @@ -1183,6 +1183,14 @@ CREATE INDEX idx_approvals_task_id ON approvals(task_id); -- created_at). CREATE INDEX idx_approvals_status_created_at ON approvals(status, created_at DESC); +-- Risk / action triage inboxes newest-first: lets the dashboard +-- "high-risk pending, newest first" and "by action type, newest first" +-- views hit one index range scan instead of a single-column index +-- (idx_approvals_risk_level / idx_approvals_action_type) plus a sort. +CREATE INDEX idx_approvals_risk_created_at + ON approvals(risk_level, created_at DESC); +CREATE INDEX idx_approvals_action_created_at + ON approvals(action_type, created_at DESC); -- Org memory: MVCC operation log + materialized snapshot. -- Tags are TEXT JSON to match the SQLite backend's serialization; @@ -1212,6 +1220,11 @@ CREATE INDEX idx_oplog_ts_fact ON org_facts_operation_log (timestamp, fact_id); -- inline (linear in the matching window). CREATE INDEX idx_oplog_category_ts ON org_facts_operation_log (category, timestamp DESC); +-- Operation-type audit queries ("all RETRACT ops") scan the whole +-- log without this; the column is low-cardinality but the index lets +-- the planner skip the full table for the (rare) retract sweep. +CREATE INDEX idx_oplog_operation_type + ON org_facts_operation_log (operation_type); CREATE TABLE org_facts_snapshot ( fact_id TEXT PRIMARY KEY, @@ -1229,6 +1242,13 @@ CREATE TABLE org_facts_snapshot ( CREATE INDEX idx_snapshot_category ON org_facts_snapshot (category); CREATE INDEX idx_snapshot_active ON org_facts_snapshot (retracted_at) WHERE retracted_at IS NULL; +-- "Live facts in category X" is the hot ontology read. The partial +-- index keeps only non-retracted rows so the planner does a single +-- covered range scan instead of (idx_snapshot_category -> filter +-- retracted_at) across the full category. +CREATE INDEX idx_snapshot_category_active + ON org_facts_snapshot (category) + WHERE retracted_at IS NULL; -- Ontology drift reports. CREATE TABLE drift_reports ( diff --git a/src/synthorg/persistence/postgres/subworkflow_repo.py b/src/synthorg/persistence/postgres/subworkflow_repo.py index c5a143a80b..09d3f88fb2 100644 --- a/src/synthorg/persistence/postgres/subworkflow_repo.py +++ b/src/synthorg/persistence/postgres/subworkflow_repo.py @@ -401,8 +401,19 @@ async def list_summaries( async def search( self, query: NotBlankStr, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, ) -> tuple[SubworkflowSummary, ...]: - """Search subworkflows by name or description substring.""" + """Return a bounded page of summaries matching a substring. + + Summaries page in ``subworkflow_id`` order so a cursor walk is + stable; callers needing every match drain via + :func:`synthorg.persistence._shared.collect_all`. + """ + limit = validate_pagination_args( + limit, offset, event=PERSISTENCE_SUBWORKFLOW_LIST_FAILED, query=query + ) escaped = query.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_") pattern = f"%{escaped}%" try: @@ -410,10 +421,28 @@ async def search( self._pool.connection() as conn, conn.cursor(row_factory=dict_row) as cur, ): + # A summary aggregates every version row of a + # subworkflow into one entry, so the page boundary is + # the distinct ``subworkflow_id`` set, not raw rows. + # Page the ids at the DB first, then fetch only that + # page's rows: this bounds both scan cost and the rows + # materialised in memory to roughly + # ``limit * versions_per_subworkflow``. + await cur.execute( + "SELECT subworkflow_id FROM subworkflows" + " WHERE name ILIKE %s ESCAPE '\\'" + " OR description ILIKE %s ESCAPE '\\'" + " GROUP BY subworkflow_id" + " ORDER BY subworkflow_id LIMIT %s OFFSET %s", + (pattern, pattern, limit, offset), + ) + page_ids = [str(r["subworkflow_id"]) for r in await cur.fetchall()] + if not page_ids: + return () await cur.execute( f"SELECT {_SELECT_COLUMNS} FROM subworkflows" # noqa: S608 - " WHERE name ILIKE %s OR description ILIKE %s", - (pattern, pattern), + " WHERE subworkflow_id = ANY(%s)", + (page_ids,), ) rows = await cur.fetchall() except psycopg.Error as exc: @@ -513,18 +542,50 @@ async def find_parents( self, subworkflow_id: NotBlankStr, version: NotBlankStr | None = None, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, ) -> tuple[ParentReference, ...]: - """Return workflows referencing a subworkflow. + """Return a bounded page of workflows referencing a subworkflow. Scans both ``workflow_definitions`` and ``subworkflows`` tables. + References page in + ``(parent_type, parent_id, node_id, pinned_version)`` order so + a cursor walk is stable. Referential-integrity callers (the + delete-if-unreferenced path) MUST drain every page via + :func:`synthorg.persistence._shared.collect_all`; a truncated + parent set would let a still-referenced version be deleted. """ + limit = validate_pagination_args( + limit, + offset, + event=PERSISTENCE_SUBWORKFLOW_LIST_FAILED, + subworkflow_id=subworkflow_id, + ) try: async with self._pool.connection() as conn: - return await self._find_parents_with_conn( + refs = await self._find_parents_with_conn( conn, subworkflow_id, version, ) + # The reference scan walks JSON node arrays in both + # ``workflow_definitions`` and ``subworkflows``; true + # SQL-level pagination needs a normalized references + # table (a schema change tracked separately). Paging in + # memory is acceptable here because referential- + # integrity callers MUST drain every page anyway, so + # bounding per-page DB cost would yield no real saving. + ordered = sorted( + refs, + key=lambda r: ( + r.parent_type, + r.parent_id, + r.node_id, + r.pinned_version, + ), + ) + return tuple(ordered[offset : offset + limit]) except psycopg.Error as exc: msg = f"Failed to find parents for subworkflow {subworkflow_id!r}" logger.warning( diff --git a/src/synthorg/persistence/settings_protocol.py b/src/synthorg/persistence/settings_protocol.py index ed07785b94..eb3db70d9d 100644 --- a/src/synthorg/persistence/settings_protocol.py +++ b/src/synthorg/persistence/settings_protocol.py @@ -19,7 +19,7 @@ class SettingRow(BaseModel): updated_at: ISO 8601 timestamp of the last update. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") namespace: NotBlankStr = Field(description="Setting namespace") key: NotBlankStr = Field(description="Setting key") diff --git a/src/synthorg/persistence/sqlite/agent_state_repo.py b/src/synthorg/persistence/sqlite/agent_state_repo.py index b825ae975e..ccf909eb62 100644 --- a/src/synthorg/persistence/sqlite/agent_state_repo.py +++ b/src/synthorg/persistence/sqlite/agent_state_repo.py @@ -149,16 +149,36 @@ async def list_items( logger.debug(PERSISTENCE_AGENT_STATE_LISTED, count=len(states)) return states - async def get_active(self) -> tuple[AgentRuntimeState, ...]: - """Retrieve all non-idle agent states, ordered by last_activity_at DESC.""" + async def get_active( + self, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, + ) -> tuple[AgentRuntimeState, ...]: + """Bounded page of non-idle agent states, newest activity first. + + ``agent_id`` is the stable secondary sort so rows that share a + ``last_activity_at`` page deterministically. Callers needing + every active state drain via + :func:`synthorg.persistence._shared.collect_all`. + """ + limit = validate_pagination_args( + limit, offset, event=PERSISTENCE_AGENT_STATE_ACTIVE_QUERY_FAILED + ) try: cursor = await self._db.execute( "SELECT agent_id, execution_id, task_id, status, " "turn_count, accumulated_cost, currency, " "last_activity_at, started_at " - "FROM agent_states WHERE status != ? " - "ORDER BY last_activity_at DESC", - (ExecutionStatus.IDLE.value,), + "FROM agent_states WHERE status IN (?, ?) " + "ORDER BY last_activity_at DESC, agent_id " + "LIMIT ? OFFSET ?", + ( + ExecutionStatus.EXECUTING.value, + ExecutionStatus.PAUSED.value, + limit, + offset, + ), ) rows = await cursor.fetchall() except (sqlite3.Error, aiosqlite.Error) as exc: diff --git a/src/synthorg/persistence/sqlite/circuit_breaker_repo.py b/src/synthorg/persistence/sqlite/circuit_breaker_repo.py index 0ef7a8bf2b..a00fe1e052 100644 --- a/src/synthorg/persistence/sqlite/circuit_breaker_repo.py +++ b/src/synthorg/persistence/sqlite/circuit_breaker_repo.py @@ -185,47 +185,20 @@ async def list_items( logger.debug(PERSISTENCE_CIRCUIT_BREAKER_LOADED, count=len(results)) return tuple(results) - async def load_all(self) -> tuple[CircuitBreakerStateRecord, ...]: - """Load all persisted circuit breaker state records.""" - try: - cursor = await self._db.execute( - "SELECT pair_key_a, pair_key_b, bounce_count, " - "trip_count, opened_at FROM circuit_breaker_state", - ) - rows = await cursor.fetchall() - except (sqlite3.Error, aiosqlite.Error) as exc: - msg = "Failed to load circuit breaker state" - logger.warning( - PERSISTENCE_CIRCUIT_BREAKER_LOAD_FAILED, - error_type=type(exc).__name__, - error=safe_error_description(exc), - ) - raise QueryError(msg) from exc - - results: list[CircuitBreakerStateRecord] = [] - for row in rows: - try: - results.append( - CircuitBreakerStateRecord.model_validate(dict(row)), - ) - except ValidationError as exc: - msg = ( - f"Failed to deserialize circuit breaker state row " - f"({row['pair_key_a'] if row else 'unknown'})" - ) - logger.warning( - PERSISTENCE_CIRCUIT_BREAKER_LOAD_FAILED, - pair_key_a=row["pair_key_a"] if row else "unknown", - error_type=type(exc).__name__, - error=safe_error_description(exc), - note="deserialization failed", - ) - raise QueryError(msg) from exc - logger.debug( - PERSISTENCE_CIRCUIT_BREAKER_LOADED, - count=len(results), - ) - return tuple(results) + async def load_all( + self, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, + ) -> tuple[CircuitBreakerStateRecord, ...]: + """Load a bounded page of records in ``(pair_key_a, pair_key_b)``. + + Delegates to :meth:`list_items` (same deterministic key order + and pagination contract); kept as a distinct ADR-0001 D7 + method because boot-time callers drain it via + :func:`synthorg.persistence._shared.collect_all`. + """ + return await self.list_items(limit=limit, offset=offset) async def delete(self, entity_id: CircuitBreakerPairKey) -> bool: """Delete a circuit breaker state record by composite key.""" diff --git a/src/synthorg/persistence/sqlite/heartbeat_repo.py b/src/synthorg/persistence/sqlite/heartbeat_repo.py index 8f2f566323..0719903c7e 100644 --- a/src/synthorg/persistence/sqlite/heartbeat_repo.py +++ b/src/synthorg/persistence/sqlite/heartbeat_repo.py @@ -19,6 +19,8 @@ PERSISTENCE_HEARTBEAT_QUERY_FAILED, PERSISTENCE_HEARTBEAT_SAVE_FAILED, ) +from synthorg.persistence._generics import DEFAULT_PAGE_SIZE +from synthorg.persistence._shared import validate_pagination_args from synthorg.persistence.sqlite._shared import WriteContext # noqa: TC001 logger = get_logger(__name__) @@ -108,20 +110,35 @@ async def get(self, execution_id: NotBlankStr) -> Heartbeat | None: return self._row_to_model(dict(row)) - async def get_stale(self, threshold: AwareDatetime) -> tuple[Heartbeat, ...]: - """Retrieve heartbeats older than the threshold. + async def get_stale( + self, + threshold: AwareDatetime, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, + ) -> tuple[Heartbeat, ...]: + """Bounded page of heartbeats older than the threshold. + + ``execution_id`` is the stable secondary sort so rows sharing + a ``last_heartbeat_at`` page deterministically. Args: threshold: Heartbeats with ``last_heartbeat_at`` before this timestamp are considered stale. + limit: Maximum rows to return. + offset: Rows to skip from the head of the ordering. """ + limit = validate_pagination_args( + limit, offset, event=PERSISTENCE_HEARTBEAT_QUERY_FAILED + ) threshold_iso = threshold.astimezone(UTC).isoformat() try: cursor = await self._db.execute( "SELECT execution_id, agent_id, task_id, last_heartbeat_at " "FROM heartbeats WHERE last_heartbeat_at < ? " - "ORDER BY last_heartbeat_at", - (threshold_iso,), + "ORDER BY last_heartbeat_at, execution_id " + "LIMIT ? OFFSET ?", + (threshold_iso, limit, offset), ) rows = await cursor.fetchall() except (sqlite3.Error, aiosqlite.Error) as exc: diff --git a/src/synthorg/persistence/sqlite/ontology_entity_repo.py b/src/synthorg/persistence/sqlite/ontology_entity_repo.py index 2ba8a55a22..834a4faf8c 100644 --- a/src/synthorg/persistence/sqlite/ontology_entity_repo.py +++ b/src/synthorg/persistence/sqlite/ontology_entity_repo.py @@ -393,12 +393,28 @@ def _rows_to_entities( continue return tuple(results) - async def get_version_manifest(self) -> dict[NotBlankStr, int]: - """Return the latest version number for each entity.""" + async def get_version_manifest( + self, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, + ) -> dict[NotBlankStr, int]: + """Return a bounded page of the latest version per entity. + + Entities page in ``entity_id`` order so a cursor walk is + stable; callers needing the whole manifest drain via + :func:`synthorg.persistence._shared.collect_all_mapping`. + """ + limit = validate_pagination_args( + limit, offset, event=ONTOLOGY_ENTITY_DESERIALIZATION_FAILED + ) cursor = await self._db.execute( """SELECT entity_id, MAX(version) AS latest_version FROM entity_definition_versions - GROUP BY entity_id""", + GROUP BY entity_id + ORDER BY entity_id + LIMIT ? OFFSET ?""", + (limit, offset), ) rows = await cursor.fetchall() return {NotBlankStr(row["entity_id"]): row["latest_version"] for row in rows} diff --git a/src/synthorg/persistence/sqlite/org_fact_repo.py b/src/synthorg/persistence/sqlite/org_fact_repo.py index 27a35b9adb..dd4304f171 100644 --- a/src/synthorg/persistence/sqlite/org_fact_repo.py +++ b/src/synthorg/persistence/sqlite/org_fact_repo.py @@ -40,6 +40,7 @@ from synthorg.persistence._shared import ( coerce_row_timestamp, format_iso_utc, + validate_pagination_args, ) from synthorg.persistence.memory_protocol import _DEFAULT_LIST_LIMIT_FACTS from synthorg.persistence.sqlite._shared import WriteContext # noqa: TC001 @@ -525,14 +526,21 @@ async def list_by_category( async def snapshot_at( self, timestamp: AwareDatetime, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, ) -> tuple[OperationLogSnapshot, ...]: - """Point-in-time snapshot of all facts at a given timestamp. + """Bounded page of the point-in-time snapshot of all facts. ``timestamp`` must be timezone-aware; ``format_iso_utc`` will raise ``ValueError`` on a naive datetime so a regression that bypasses the type guard surfaces immediately rather than - binding a misinterpreted instant into the WHERE clause. + binding a misinterpreted instant into the WHERE clause. Rows + page in ``fact_id`` order so a cursor walk is repeatable + across the same snapshot; callers needing the whole snapshot + drain via :func:`synthorg.persistence._shared.collect_all`. """ + limit = validate_pagination_args(limit, offset, event=ORG_MEMORY_QUERY_FAILED) db = self._db query_ts = format_iso_utc(timestamp) sql = """\ @@ -577,11 +585,12 @@ async def snapshot_at( FROM latest_ops lo WHERE lo.rn = 1 ORDER BY lo.fact_id +LIMIT ? OFFSET ? """ try: cursor = await db.execute( sql, - (query_ts, query_ts, query_ts, query_ts, query_ts), + (query_ts, query_ts, query_ts, query_ts, query_ts, limit, offset), ) rows = await cursor.fetchall() except sqlite3.Error as exc: @@ -607,13 +616,25 @@ async def snapshot_at( async def get_operation_log( self, fact_id: NotBlankStr, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, ) -> tuple[OperationLogEntry, ...]: - """Retrieve full audit trail for a fact.""" + """Bounded page of the audit trail for a fact (version ASC). + + Version is unique per fact so the ordering is already stable; + callers needing the full trail drain via + :func:`synthorg.persistence._shared.collect_all`. + """ + limit = validate_pagination_args( + limit, offset, event=ORG_MEMORY_QUERY_FAILED, fact_id=fact_id + ) try: cursor = await self._db.execute( "SELECT * FROM org_facts_operation_log " - "WHERE fact_id = ? ORDER BY version ASC", - (fact_id,), + "WHERE fact_id = ? ORDER BY version ASC " + "LIMIT ? OFFSET ?", + (fact_id, limit, offset), ) rows = await cursor.fetchall() except sqlite3.Error as exc: diff --git a/src/synthorg/persistence/sqlite/parked_context_repo.py b/src/synthorg/persistence/sqlite/parked_context_repo.py index 46523a7aa1..71940b7e83 100644 --- a/src/synthorg/persistence/sqlite/parked_context_repo.py +++ b/src/synthorg/persistence/sqlite/parked_context_repo.py @@ -161,15 +161,32 @@ async def get_by_approval(self, approval_id: str) -> ParkedContext | None: return self._row_to_model(dict(row)) - async def get_by_agent(self, agent_id: str) -> tuple[ParkedContext, ...]: - """Retrieve all parked contexts for an agent.""" + async def get_by_agent( + self, + agent_id: str, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, + ) -> tuple[ParkedContext, ...]: + """Bounded page of parked contexts for an agent, newest first. + + ``id`` is the stable secondary sort so rows sharing a + ``parked_at`` page deterministically. + """ + limit = validate_pagination_args( + limit, + offset, + event=PERSISTENCE_PARKED_CONTEXT_QUERY_FAILED, + agent_id=agent_id, + ) try: cursor = await self._db.execute( "SELECT id, execution_id, agent_id, task_id, approval_id, " "parked_at, context_json, metadata " "FROM parked_contexts WHERE agent_id = ? " - "ORDER BY parked_at DESC", - (agent_id,), + "ORDER BY parked_at DESC, id " + "LIMIT ? OFFSET ?", + (agent_id, limit, offset), ) rows = await cursor.fetchall() except (sqlite3.Error, aiosqlite.Error) as exc: diff --git a/src/synthorg/persistence/sqlite/repositories.py b/src/synthorg/persistence/sqlite/repositories.py index adfb3715df..348fe5675f 100644 --- a/src/synthorg/persistence/sqlite/repositories.py +++ b/src/synthorg/persistence/sqlite/repositories.py @@ -34,6 +34,8 @@ PERSISTENCE_MESSAGE_DELETE_FAILED, PERSISTENCE_MESSAGE_DESERIALIZE_FAILED, PERSISTENCE_MESSAGE_DUPLICATE, + PERSISTENCE_MESSAGE_FETCH_FAILED, + PERSISTENCE_MESSAGE_FETCHED, PERSISTENCE_MESSAGE_HISTORY_FAILED, PERSISTENCE_MESSAGE_HISTORY_FETCHED, PERSISTENCE_MESSAGE_SAVE_FAILED, @@ -755,6 +757,46 @@ async def get_history( ) return messages + async def get_by_id( + self, + channel: str, + message_id: str, + ) -> Message | None: + """Fetch one message by ``(channel, id)`` via the PK point read. + + The ``id`` predicate alone resolves the row (it is the primary + key); the extra ``channel`` predicate is a deliberate scoping + guard so a caller holding only a message id cannot read a + message outside the channel it asked for. + """ + sql = """\ +SELECT id, timestamp, sender, "to", type, priority, + channel, content, attachments, metadata +FROM messages +WHERE id = ? AND channel = ?""" + try: + cursor = await self._db.execute(sql, [message_id, channel]) + row = await cursor.fetchone() + except (sqlite3.Error, aiosqlite.Error) as exc: + msg = f"Failed to fetch message {message_id!r}" + logger.warning( + PERSISTENCE_MESSAGE_FETCH_FAILED, + channel=channel, + message_id=message_id, + error_type=type(exc).__name__, + error=safe_error_description(exc), + ) + raise QueryError(msg) from exc + if row is None: + return None + message = self._row_to_message(row) + logger.debug( + PERSISTENCE_MESSAGE_FETCHED, + channel=channel, + message_id=message_id, + ) + return message + async def query( self, filter_spec: MessageFilterSpec, diff --git a/src/synthorg/persistence/sqlite/revisions/20260517000001_wp3_query_indices.sql b/src/synthorg/persistence/sqlite/revisions/20260517000001_wp3_query_indices.sql new file mode 100644 index 0000000000..5cead05201 --- /dev/null +++ b/src/synthorg/persistence/sqlite/revisions/20260517000001_wp3_query_indices.sql @@ -0,0 +1,31 @@ +-- depends: 20260515000001_ceremony_scheduler_state + +-- WP-3 query-performance indices. No table changes: these back hot +-- read paths the 2026-05-15 audit flagged as full scans under load. +-- * org_facts_snapshot(category) WHERE retracted_at IS NULL -- +-- "live facts in category X" (hot ontology read). +-- * org_facts_operation_log(operation_type) -- retract-sweep audit. +-- * approvals(risk_level, created_at DESC) and +-- approvals(action_type, created_at DESC) -- dashboard triage +-- inboxes newest-first. +-- * heartbeats(last_heartbeat_at, execution_id) -- widen the +-- single-column stale-heartbeat index so it fully covers the +-- get_stale ORDER BY without a tiebreak sort. + +CREATE INDEX idx_snapshot_category_active + ON org_facts_snapshot (category) + WHERE retracted_at IS NULL; + +CREATE INDEX idx_oplog_operation_type + ON org_facts_operation_log (operation_type); + +CREATE INDEX idx_approvals_risk_created_at + ON approvals(risk_level, created_at DESC); + +CREATE INDEX idx_approvals_action_created_at + ON approvals(action_type, created_at DESC); + +DROP INDEX idx_hb_last_heartbeat; + +CREATE INDEX idx_hb_last_heartbeat + ON heartbeats(last_heartbeat_at, execution_id); diff --git a/src/synthorg/persistence/sqlite/schema.sql b/src/synthorg/persistence/sqlite/schema.sql index e22a4eeda7..d7253af1af 100644 --- a/src/synthorg/persistence/sqlite/schema.sql +++ b/src/synthorg/persistence/sqlite/schema.sql @@ -368,7 +368,7 @@ CREATE TABLE heartbeats ( ); CREATE INDEX idx_hb_last_heartbeat - ON heartbeats(last_heartbeat_at); + ON heartbeats(last_heartbeat_at, execution_id); -- ── Agent states ────────────────────────────────────────────── CREATE TABLE agent_states ( @@ -1108,6 +1108,14 @@ CREATE INDEX idx_approvals_task_id ON approvals(task_id); -- created_at). CREATE INDEX idx_approvals_status_created_at ON approvals(status, created_at DESC); +-- Risk / action triage inboxes newest-first: lets the dashboard +-- "high-risk pending, newest first" and "by action type, newest first" +-- views hit one index range scan instead of a single-column index +-- (idx_approvals_risk_level / idx_approvals_action_type) plus a sort. +CREATE INDEX idx_approvals_risk_created_at + ON approvals(risk_level, created_at DESC); +CREATE INDEX idx_approvals_action_created_at + ON approvals(action_type, created_at DESC); -- Conflict escalations: human escalation approval queue. -- Persists one row per conflict awaiting a human decision so the @@ -1210,6 +1218,11 @@ CREATE INDEX idx_oplog_ts_fact ON org_facts_operation_log (timestamp, fact_id); -- inline (linear in the matching window). CREATE INDEX idx_oplog_category_ts ON org_facts_operation_log (category, timestamp DESC); +-- Operation-type audit queries ("all RETRACT ops") scan the whole +-- log without this; the column is low-cardinality but the index lets +-- the planner skip the full table for the (rare) retract sweep. +CREATE INDEX idx_oplog_operation_type + ON org_facts_operation_log (operation_type); CREATE TABLE org_facts_snapshot ( fact_id TEXT PRIMARY KEY, @@ -1227,6 +1240,13 @@ CREATE TABLE org_facts_snapshot ( CREATE INDEX idx_snapshot_category ON org_facts_snapshot (category); CREATE INDEX idx_snapshot_active ON org_facts_snapshot (retracted_at) WHERE retracted_at IS NULL; +-- "Live facts in category X" is the hot ontology read. The partial +-- index keeps only non-retracted rows so the planner does a single +-- covered range scan instead of (idx_snapshot_category -> filter +-- retracted_at) across the full category. +CREATE INDEX idx_snapshot_category_active + ON org_facts_snapshot (category) + WHERE retracted_at IS NULL; -- Ontology drift reports. CREATE TABLE drift_reports ( diff --git a/src/synthorg/persistence/sqlite/subworkflow_repo.py b/src/synthorg/persistence/sqlite/subworkflow_repo.py index de89c11848..7aa9fd1240 100644 --- a/src/synthorg/persistence/sqlite/subworkflow_repo.py +++ b/src/synthorg/persistence/sqlite/subworkflow_repo.py @@ -471,18 +471,39 @@ async def list_summaries( async def search( self, query: NotBlankStr, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, ) -> tuple[SubworkflowSummary, ...]: - """Return summaries matching a name or description substring.""" + """Return a bounded page of summaries matching a substring. + + Summaries are ``(subworkflow_id, latest_version)``-ordered so + a cursor walk is stable; callers that need every match drain + via :func:`synthorg.persistence._shared.collect_all`. + """ + limit = validate_pagination_args( + limit, offset, event=PERSISTENCE_SUBWORKFLOW_LIST_FAILED, query=query + ) escaped = query.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_") pattern = f"%{escaped}%" + # A summary aggregates every version row of a subworkflow into + # one entry, so the page boundary is the distinct + # ``subworkflow_id`` set, not raw rows. Page the ids at the DB + # first, then fetch only that page's rows: this bounds both scan + # cost and the rows materialised in memory to roughly + # ``limit * versions_per_subworkflow``. try: - cursor = await self._db.execute( - f"SELECT {_SUBWORKFLOW_SELECT} FROM subworkflows " # noqa: S608 + id_cursor = await self._db.execute( + "SELECT subworkflow_id FROM subworkflows " "WHERE name LIKE ? ESCAPE '\\' COLLATE NOCASE " - "OR description LIKE ? ESCAPE '\\' COLLATE NOCASE", - (pattern, pattern), + "OR description LIKE ? ESCAPE '\\' COLLATE NOCASE " + "GROUP BY subworkflow_id " + "ORDER BY subworkflow_id LIMIT ? OFFSET ?", + (pattern, pattern, limit, offset), ) - rows = await cursor.fetchall() + page_ids = [ + str(row["subworkflow_id"]) for row in await id_cursor.fetchall() + ] except sqlite3.Error as exc: msg = f"Failed to search subworkflows with query {query!r}" logger.warning( @@ -493,15 +514,14 @@ async def search( ) raise QueryError(msg) from exc - matched_ids = {str(row["subworkflow_id"]) for row in rows} - if not matched_ids: + if not page_ids: return () - placeholders = ", ".join("?" for _ in matched_ids) + placeholders = ", ".join("?" for _ in page_ids) try: full_cursor = await self._db.execute( f"SELECT {_SUBWORKFLOW_SELECT} FROM subworkflows " # noqa: S608 f"WHERE subworkflow_id IN ({placeholders})", - tuple(matched_ids), + tuple(page_ids), ) full_rows = await full_cursor.fetchall() except sqlite3.Error as exc: @@ -569,7 +589,7 @@ async def delete_if_unreferenced( raise QueryError(msg) from exc try: - parents = await self.find_parents(subworkflow_id, version) + parents = await self._find_parents_unpaged(subworkflow_id, version) if parents: await self._db.rollback() return False, parents @@ -609,12 +629,41 @@ async def find_parents( self, subworkflow_id: NotBlankStr, version: NotBlankStr | None = None, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, ) -> tuple[ParentReference, ...]: - """Return workflows referencing a subworkflow. + """Return a bounded page of workflows referencing a subworkflow. Scans both ``workflow_definitions.nodes`` and ``subworkflows.nodes`` so that nested subworkflow references (a subworkflow pinning another subworkflow) are discovered. + References page in + ``(parent_type, parent_id, node_id, pinned_version)`` order so + a cursor walk is stable. The referential-integrity path + (:meth:`delete_if_unreferenced`) bypasses pagination via + :meth:`_find_parents_unpaged`; a truncated parent set would let + a still-referenced version be deleted. + """ + limit = validate_pagination_args( + limit, + offset, + event=PERSISTENCE_SUBWORKFLOW_LIST_FAILED, + subworkflow_id=subworkflow_id, + ) + references = await self._find_parents_unpaged(subworkflow_id, version) + return tuple(references[offset : offset + limit]) + + async def _find_parents_unpaged( + self, + subworkflow_id: NotBlankStr, + version: NotBlankStr | None = None, + ) -> tuple[ParentReference, ...]: + """Return every reference to a subworkflow, sorted, unpaged. + + Backs both :meth:`find_parents` (which slices a page off this + result) and :meth:`delete_if_unreferenced` (which must see the + complete set so a still-referenced version is never deleted). """ references: list[ParentReference] = [] @@ -647,6 +696,20 @@ async def find_parents( references=references, ) + # The reference scan walks JSON node arrays in both + # ``workflow_definitions`` and ``subworkflows``; true SQL-level + # pagination needs a normalized references table (a schema + # change tracked separately). Sorting the full set in memory is + # acceptable because the referential-integrity caller needs + # every reference anyway, so per-page DB bounding saves nothing. + references.sort( + key=lambda r: ( + r.parent_type, + r.parent_id, + r.node_id, + r.pinned_version, + ), + ) return tuple(references) async def _fetch_parent_rows( diff --git a/src/synthorg/persistence/subworkflow_protocol.py b/src/synthorg/persistence/subworkflow_protocol.py index c11683524a..cbd3ec29d1 100644 --- a/src/synthorg/persistence/subworkflow_protocol.py +++ b/src/synthorg/persistence/subworkflow_protocol.py @@ -150,17 +150,24 @@ async def list_summaries( async def search( self, query: NotBlankStr, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, ) -> tuple[SubworkflowSummary, ...]: - """Search subworkflows by case-insensitive substring. + """Search subworkflows by case-insensitive substring (paginated). Bespoke per ADR-0001 D7. Matches against name or description fields. Args: query: Search term. + limit: Maximum rows to return. + offset: Rows to skip from the head of the ordering. Returns: - Matching summaries. + A page of matching summaries in ``subworkflow_id`` order. + Callers needing every match drain via + :func:`synthorg.persistence._shared.collect_all`. """ ... @@ -193,6 +200,9 @@ async def find_parents( self, subworkflow_id: NotBlankStr, version: NotBlankStr | None = None, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, ) -> tuple[ParentReference, ...]: """Find parent workflow definitions referencing a subworkflow. @@ -202,8 +212,15 @@ async def find_parents( subworkflow_id: The subworkflow identifier. version: Optional semver filter. When ``None``, returns parents pinning any version of the subworkflow. + limit: Maximum rows to return. + offset: Rows to skip from the head of the ordering. Returns: - Tuple of parent references (possibly empty). + A page of parent references in + ``(parent_type, parent_id, node_id, pinned_version)`` + order. Referential-integrity callers MUST drain every page + via :func:`synthorg.persistence._shared.collect_all`; a + truncated parent set would let a still-referenced version + be deleted. """ ... diff --git a/src/synthorg/providers/cost_recording.py b/src/synthorg/providers/cost_recording.py index 889053341a..d38e8a6dcb 100644 --- a/src/synthorg/providers/cost_recording.py +++ b/src/synthorg/providers/cost_recording.py @@ -66,6 +66,7 @@ class CostRecordingContext(BaseModel): frozen=True, allow_inf_nan=False, arbitrary_types_allowed=True, + extra="forbid", ) cost_tracker: Any = Field(description="CostTracker reference") diff --git a/src/synthorg/providers/management/capability_dtos.py b/src/synthorg/providers/management/capability_dtos.py index 7bc28a63b8..e205a1f046 100644 --- a/src/synthorg/providers/management/capability_dtos.py +++ b/src/synthorg/providers/management/capability_dtos.py @@ -210,6 +210,7 @@ class ProviderAuditEvent(BaseModel): # field after the validator below converts the input dict into # a :class:`MappingProxyType`. arbitrary_types_allowed=True, + extra="forbid", ) id: int | None = Field(default=None, ge=1, description="Repo-assigned row id") diff --git a/src/synthorg/settings/bootstrap_resolver.py b/src/synthorg/settings/bootstrap_resolver.py index 7a3ef5ba67..85774c5016 100644 --- a/src/synthorg/settings/bootstrap_resolver.py +++ b/src/synthorg/settings/bootstrap_resolver.py @@ -47,7 +47,7 @@ class BootstrapResolvedValue(BaseModel, Generic[T]): # noqa: UP046 persistence layer is wired. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") value: T source: SettingSource diff --git a/src/synthorg/settings/definitions/memory.py b/src/synthorg/settings/definitions/memory.py index ab4c41e52c..7fb5913b2d 100644 --- a/src/synthorg/settings/definitions/memory.py +++ b/src/synthorg/settings/definitions/memory.py @@ -181,6 +181,20 @@ FINE_TUNE_MIN_DOCS_RECOMMENDED: Final[int] = 50 """Soft minimum: corpora below this size emit a preflight warn band.""" +FINE_TUNE_PREFLIGHT_MAX_DEPTH: Final[int] = 8 +"""Max directory recursion depth for the preflight document scan. + +Bounds the ``_check_documents`` walk so a pathologically deep +(symlink-loop / generated) source tree cannot turn the preflight +endpoint into an unbounded filesystem traversal.""" + +FINE_TUNE_PREFLIGHT_WALK_TIMEOUT_S: Final[float] = 5.0 +"""Wall-clock deadline (seconds) for the preflight document scan. + +Independent of the depth cap: a wide but shallow tree on a slow / +stale-handle NFS mount is bounded by time even when depth is fine. +On either bound the check returns a ``warn`` band, never a hang.""" + _r.register( SettingDefinition( namespace=SettingNamespace.MEMORY, @@ -233,3 +247,41 @@ max_value=10_000, ) ) + +_r.register( + SettingDefinition( + namespace=SettingNamespace.MEMORY, + key="fine_tune_preflight_max_depth", + type=SettingType.INTEGER, + default=str(FINE_TUNE_PREFLIGHT_MAX_DEPTH), + description=( + "Max directory recursion depth for the preflight document" + " scan. Bounds the walk so a pathologically deep source" + " tree cannot make the preflight endpoint traverse the" + " filesystem unbounded; exceeding it returns a warn band." + ), + group="Fine-Tune", + level=SettingLevel.ADVANCED, + min_value=1, + max_value=64, + ) +) + +_r.register( + SettingDefinition( + namespace=SettingNamespace.MEMORY, + key="fine_tune_preflight_walk_timeout_s", + type=SettingType.FLOAT, + default=str(FINE_TUNE_PREFLIGHT_WALK_TIMEOUT_S), + description=( + "Wall-clock deadline (seconds) for the preflight document" + " scan. A wide but shallow tree on a slow / stale-handle" + " mount is bounded by time even when depth is fine;" + " exceeding it returns a warn band rather than hanging." + ), + group="Fine-Tune", + level=SettingLevel.ADVANCED, + min_value=0.5, + max_value=60.0, + ) +) diff --git a/src/synthorg/settings/errors.py b/src/synthorg/settings/errors.py index ce40e89410..1e2a6f79c5 100644 --- a/src/synthorg/settings/errors.py +++ b/src/synthorg/settings/errors.py @@ -49,6 +49,34 @@ class SettingsEncryptionError(SettingsError): """Raised when encryption key is unavailable or decryption fails.""" +class SettingsEncryptionFailedError(SettingsError): + """API-boundary 500 when a sensitive setting cannot be processed. + + Distinct ``error_code`` (``SETTINGS_ENCRYPTION_ERROR``) so a client + can tell "the server could not encrypt/decrypt this value" apart + from a generic internal error. The controller raises this after a + low-level :class:`SettingsEncryptionError`; the scrubbed message + keeps key/cipher detail out of the response. + """ + + default_message: ClassVar[str] = "Internal error processing sensitive setting" + error_code: ClassVar[ErrorCode] = ErrorCode.SETTINGS_ENCRYPTION_ERROR + + +class SinkConfigValidationError(SettingsError): + """API-boundary 500 when an observability sink config check fails. + + Raised by the settings controller's sink-config test endpoint when + validation itself errors unexpectedly (not a user-visible invalid + config, which returns a structured ``valid=False`` body). Distinct + ``error_code`` (``SINK_CONFIG_VALIDATION_ERROR``) so operators can + alert on broken sink validation specifically. + """ + + default_message: ClassVar[str] = "Internal error validating sink configuration" + error_code: ClassVar[ErrorCode] = ErrorCode.SINK_CONFIG_VALIDATION_ERROR + + class SettingsRegistryError(SettingsError): """Raised when the registry lookup itself fails its own invariants. diff --git a/src/synthorg/settings/models.py b/src/synthorg/settings/models.py index a9e75c643c..90bedcb539 100644 --- a/src/synthorg/settings/models.py +++ b/src/synthorg/settings/models.py @@ -62,7 +62,7 @@ class SettingDefinition(BaseModel): max_value: Maximum for numeric types (inclusive). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") namespace: SettingNamespace = Field(description="Setting namespace") key: NotBlankStr = Field(description="Setting key within namespace") @@ -254,7 +254,7 @@ class SettingValue(BaseModel): updated_at: ISO 8601 timestamp for DB-sourced values. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") namespace: SettingNamespace = Field(description="Setting namespace") key: NotBlankStr = Field(description="Setting key") @@ -279,7 +279,7 @@ class SettingEntry(BaseModel): updated_at: ISO 8601 timestamp for DB-sourced values. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") definition: SettingDefinition = Field(description="Setting metadata") value: str = Field(description="Resolved value as string") diff --git a/src/synthorg/telemetry/collector.py b/src/synthorg/telemetry/collector.py index 4f861370e1..f5e8dc5c26 100644 --- a/src/synthorg/telemetry/collector.py +++ b/src/synthorg/telemetry/collector.py @@ -103,8 +103,11 @@ _PEER_READ_RETRY_DELAY_SECONDS: float = 0.005 -"""Sleep between peer-read retries (5 ms). Short enough to converge -within a typical write window, long enough to yield CPU to the peer.""" +"""Base sleep between peer-read retries, doubled each attempt +(5 / 10 / 20 ms for the 3-attempt budget). Exponential rather than a +flat 5 ms so a slow NFS / stale-handle write window is waited out +without re-stat'ing the handle every 5 ms, while the first retry +still converges fast on the common local-disk case.""" _TEMP_ROOT: str | None @@ -1085,14 +1088,24 @@ def _read_peer_deployment_id(id_path_str: str) -> str | None: Defends against the window where a peer has just won the ``O_CREAT|O_EXCL`` race but has not yet finished ``write()`` (the file exists but is empty or truncated). Retries up to - :data:`_PEER_READ_RETRY_ATTEMPTS` times with - :data:`_PEER_READ_RETRY_DELAY_SECONDS` between attempts. + :data:`_PEER_READ_RETRY_ATTEMPTS` times with an exponential + backoff of :data:`_PEER_READ_RETRY_DELAY_SECONDS` doubled per + attempt (5 / 10 / 20 ms) between attempts. Returns the peer's UUID on success, ``None`` if all attempts return empty / corrupt / unreadable. Distinguishes the failure modes (file deleted, permission denied, decode error, validation error) in the logs so operators can tell "peer file disappeared" from "peer wrote garbage". + + This is a synchronous helper run via ``to_thread``; the blocking + ``time.sleep`` backoff is intentional in that context and is hard- + bounded by ``_PEER_READ_RETRY_ATTEMPTS`` (not cancellation-aware, + but it cannot run longer than the summed backoff). A persistently + empty peer file after exhaustion is deliberately NOT distinguished + from "deleted then recreated empty": both return ``None`` and the + caller repairs the file via the atomic-create branch, so the + distinction would add complexity with no behavioural gain. """ # See docs/reference/retry-patterns.md: Pattern A -- transient I/O. for attempt in range(_PEER_READ_RETRY_ATTEMPTS): @@ -1133,15 +1146,17 @@ def _read_peer_deployment_id(id_path_str: str) -> str | None: return None if not stored: - # Peer is mid-write. Sleep briefly and retry. - time.sleep(_PEER_READ_RETRY_DELAY_SECONDS) + # Peer is mid-write. Exponential backoff (5/10/20 ms) so a + # slow NFS write window is waited out without hammering the + # handle every 5 ms. + time.sleep(_PEER_READ_RETRY_DELAY_SECONDS * (2**attempt)) continue try: uuid.UUID(stored) except ValueError: - # Peer wrote partial UUID. Sleep briefly and retry; the - # peer may finish before our next attempt. - time.sleep(_PEER_READ_RETRY_DELAY_SECONDS) + # Peer wrote partial UUID. Exponential backoff (5/10/20 ms); + # the peer may finish before the next, longer wait. + time.sleep(_PEER_READ_RETRY_DELAY_SECONDS * (2**attempt)) continue return stored @@ -1150,6 +1165,11 @@ def _read_peer_deployment_id(id_path_str: str) -> str | None: detail="deployment_id_peer_read_exhausted", attempts=_PEER_READ_RETRY_ATTEMPTS, using_generated_id=True, + impact=( + "caller falls back to a fresh per-process deployment_id; " + "telemetry from this process will not correlate with the " + "peer until the on-disk id file is repaired" + ), ) return None diff --git a/src/synthorg/telemetry/protocol.py b/src/synthorg/telemetry/protocol.py index 1d3802d6ad..f24e24e843 100644 --- a/src/synthorg/telemetry/protocol.py +++ b/src/synthorg/telemetry/protocol.py @@ -33,7 +33,7 @@ class TelemetryEvent(BaseModel): restricted to primitives (int, float, str, bool). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") event_type: NotBlankStr = Field( description="Dot-separated event name", diff --git a/src/synthorg/templates/preset_service.py b/src/synthorg/templates/preset_service.py index 77f3ec10f1..de8bcab5ed 100644 --- a/src/synthorg/templates/preset_service.py +++ b/src/synthorg/templates/preset_service.py @@ -49,7 +49,7 @@ class PresetEntry(BaseModel): updated_at: ISO 8601 last-update timestamp (None for builtins). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") name: NotBlankStr source: PresetSource diff --git a/src/synthorg/tools/analytics/config.py b/src/synthorg/tools/analytics/config.py index c900cf9acf..91d0429e24 100644 --- a/src/synthorg/tools/analytics/config.py +++ b/src/synthorg/tools/analytics/config.py @@ -15,7 +15,7 @@ class AnalyticsToolsConfig(BaseModel): can query. ``None`` means all metrics are accessible. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") query_timeout: float = Field( default=60.0, diff --git a/src/synthorg/tools/base.py b/src/synthorg/tools/base.py index 9e8dd105e0..581690a258 100644 --- a/src/synthorg/tools/base.py +++ b/src/synthorg/tools/base.py @@ -45,7 +45,7 @@ class ToolExecutionResult(BaseModel): metadata: Optional structured data for programmatic consumers. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") content: str = Field(description="Tool output") is_error: bool = Field(default=False, description="Whether tool errored") diff --git a/src/synthorg/tools/communication/config.py b/src/synthorg/tools/communication/config.py index 9fbe8908de..3e1354b3d2 100644 --- a/src/synthorg/tools/communication/config.py +++ b/src/synthorg/tools/communication/config.py @@ -28,7 +28,7 @@ class EmailConfig(BaseModel): smtp_timeout: SMTP connection timeout in seconds. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") host: NotBlankStr = Field(description="SMTP server hostname") port: int = Field( @@ -97,7 +97,7 @@ class CommunicationToolsConfig(BaseModel): max_recipients: Maximum number of recipients per email. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") email: EmailConfig | None = Field( default=None, diff --git a/src/synthorg/tools/database/config.py b/src/synthorg/tools/database/config.py index bb45ccd4ce..75447ac4aa 100644 --- a/src/synthorg/tools/database/config.py +++ b/src/synthorg/tools/database/config.py @@ -20,7 +20,7 @@ class DatabaseConnectionConfig(BaseModel): read_only: Whether the connection is read-only by default. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") database_path: NotBlankStr = Field( description="Path to the SQLite database file", @@ -51,7 +51,7 @@ class DatabaseConfig(BaseModel): default_connection: Name of the default connection. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") connections: dict[NotBlankStr, DatabaseConnectionConfig] = Field( default_factory=dict, diff --git a/src/synthorg/tools/design/config.py b/src/synthorg/tools/design/config.py index 64042779c3..1b6e8f2b10 100644 --- a/src/synthorg/tools/design/config.py +++ b/src/synthorg/tools/design/config.py @@ -15,7 +15,7 @@ class DesignToolsConfig(BaseModel): generated assets. ``None`` means in-memory only. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") image_timeout: float = Field( default=60.0, diff --git a/src/synthorg/tools/design/image_generator.py b/src/synthorg/tools/design/image_generator.py index 168164d132..06030fcd19 100644 --- a/src/synthorg/tools/design/image_generator.py +++ b/src/synthorg/tools/design/image_generator.py @@ -40,7 +40,7 @@ class ImageResult(BaseModel): height: Image height in pixels. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") data: str = Field(min_length=1, description="Base64-encoded image data") content_type: str = Field( diff --git a/src/synthorg/tools/disclosure_config.py b/src/synthorg/tools/disclosure_config.py index 73de78b224..7a0725a6e0 100644 --- a/src/synthorg/tools/disclosure_config.py +++ b/src/synthorg/tools/disclosure_config.py @@ -24,7 +24,7 @@ class ToolDisclosureConfig(BaseModel): which auto-unload triggers. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") l1_token_budget: int = Field( default=3000, diff --git a/src/synthorg/tools/git_url_validator.py b/src/synthorg/tools/git_url_validator.py index c2301792c2..e5c6ecfac8 100644 --- a/src/synthorg/tools/git_url_validator.py +++ b/src/synthorg/tools/git_url_validator.py @@ -102,7 +102,7 @@ class GitCloneNetworkPolicy(BaseModel): IPs legitimately vary between queries. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") hostname_allowlist: tuple[NotBlankStr, ...] = Field( default=(), @@ -157,7 +157,7 @@ class DnsValidationOk(BaseModel): for ``http.curloptResolve`` pinning). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") hostname: NotBlankStr port: int | None = Field(default=None, gt=0, le=65535) diff --git a/src/synthorg/tools/html_parse_guard.py b/src/synthorg/tools/html_parse_guard.py index 01b99f93e6..7fb963fed4 100644 --- a/src/synthorg/tools/html_parse_guard.py +++ b/src/synthorg/tools/html_parse_guard.py @@ -134,7 +134,7 @@ class HTMLParseGuardConfig(BaseModel): which ``gap_detected`` is set to ``True``. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") enabled: bool = Field( default=True, @@ -158,7 +158,7 @@ class HTMLSanitizeResult(BaseModel): stripped_element_count: Number of elements stripped. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") cleaned: str = Field(description="Sanitized output text") gap_detected: bool = Field( diff --git a/src/synthorg/tools/integrity_check.py b/src/synthorg/tools/integrity_check.py index 49dba7c165..c3e5183303 100644 --- a/src/synthorg/tools/integrity_check.py +++ b/src/synthorg/tools/integrity_check.py @@ -42,7 +42,7 @@ class ToolIntegrityCheckConfig(BaseModel): fail_on_violation: If ``True``, raise on hash mismatch. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") enabled: bool = Field( default=True, @@ -67,7 +67,7 @@ class ToolIntegrityViolation(BaseModel): actual_hash: Hash computed at current boot. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") tool_name: NotBlankStr = Field(description="Tool with mismatch") expected_hash: NotBlankStr = Field(description="Prior recorded hash") @@ -83,7 +83,7 @@ class ToolIntegrityReport(BaseModel): checked_at: UTC timestamp of the check. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") violations: tuple[ToolIntegrityViolation, ...] = Field( default=(), diff --git a/src/synthorg/tools/invocation_record.py b/src/synthorg/tools/invocation_record.py index 16819fc634..0b94d1594c 100644 --- a/src/synthorg/tools/invocation_record.py +++ b/src/synthorg/tools/invocation_record.py @@ -28,7 +28,7 @@ class ToolInvocationRecord(BaseModel): error_message: Error message if the invocation failed. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") id: NotBlankStr = Field( default_factory=lambda: NotBlankStr(str(uuid4())), diff --git a/src/synthorg/tools/mcp/config.py b/src/synthorg/tools/mcp/config.py index b10b27b8f7..0ef57c9ed6 100644 --- a/src/synthorg/tools/mcp/config.py +++ b/src/synthorg/tools/mcp/config.py @@ -39,7 +39,7 @@ class MCPServerConfig(BaseModel): enabled: Whether the server is active. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") name: NotBlankStr = Field(description="Unique server identifier") transport: Literal["stdio", "streamable_http"] = Field( @@ -154,7 +154,7 @@ class MCPConfig(BaseModel): servers: Tuple of MCP server configurations. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") servers: tuple[MCPServerConfig, ...] = Field( default=(), diff --git a/src/synthorg/tools/mcp/models.py b/src/synthorg/tools/mcp/models.py index 8067738465..21974aa160 100644 --- a/src/synthorg/tools/mcp/models.py +++ b/src/synthorg/tools/mcp/models.py @@ -21,7 +21,7 @@ class MCPToolInfo(BaseModel): server_name: Name of the server that hosts this tool. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") name: NotBlankStr = Field(description="Tool name") description: str = Field( @@ -46,7 +46,7 @@ class MCPRawResult(BaseModel): structured_content: Optional structured content from the result. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") content: tuple[Any, ...] = Field( default=(), diff --git a/src/synthorg/tools/network_validator.py b/src/synthorg/tools/network_validator.py index eedaac9bd6..b43bccd88a 100644 --- a/src/synthorg/tools/network_validator.py +++ b/src/synthorg/tools/network_validator.py @@ -85,7 +85,7 @@ class NetworkPolicy(BaseModel): resolution. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") hostname_allowlist: tuple[NotBlankStr, ...] = Field( default=(), @@ -133,7 +133,7 @@ class DnsValidationOk(BaseModel): is_https: Whether the URL uses HTTPS transport. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") hostname: NotBlankStr port: int | None = Field(default=None, gt=0, le=65535) diff --git a/src/synthorg/tools/sandbox/config.py b/src/synthorg/tools/sandbox/config.py index b70d6b2046..81cac6c195 100644 --- a/src/synthorg/tools/sandbox/config.py +++ b/src/synthorg/tools/sandbox/config.py @@ -24,7 +24,7 @@ class SubprocessSandboxConfig(BaseModel): prefixes appended to platform defaults for the PATH filter. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") timeout_seconds: float = Field( default=30.0, diff --git a/src/synthorg/tools/sandbox/docker_config.py b/src/synthorg/tools/sandbox/docker_config.py index f433c00f7d..ae317e35de 100644 --- a/src/synthorg/tools/sandbox/docker_config.py +++ b/src/synthorg/tools/sandbox/docker_config.py @@ -73,7 +73,7 @@ class DockerSandboxConfig(BaseModel): runtime: Optional container runtime (e.g. ``"runsc"`` for gVisor). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") image: NotBlankStr = Field( default_factory=_default_sandbox_image, diff --git a/src/synthorg/tools/sandbox/lifecycle/config.py b/src/synthorg/tools/sandbox/lifecycle/config.py index 06b5dab85e..1a2e34bced 100644 --- a/src/synthorg/tools/sandbox/lifecycle/config.py +++ b/src/synthorg/tools/sandbox/lifecycle/config.py @@ -18,7 +18,7 @@ class SandboxLifecycleConfig(BaseModel): threshold. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") strategy: Literal["per-agent", "per-task", "per-call"] = "per-agent" grace_period_seconds: float = Field(default=30.0, ge=0.0) diff --git a/src/synthorg/tools/sandbox/policy.py b/src/synthorg/tools/sandbox/policy.py index efe5487eed..7d9a19a78f 100644 --- a/src/synthorg/tools/sandbox/policy.py +++ b/src/synthorg/tools/sandbox/policy.py @@ -28,7 +28,7 @@ class FilesystemPolicy(BaseModel): deny_paths: Paths explicitly denied (overrides read/write). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") read_paths: tuple[str, ...] = ("/workspace",) write_paths: tuple[str, ...] = () @@ -48,7 +48,7 @@ class NetworkPolicy(BaseModel): loopback_allowed: Allow loopback traffic in restricted mode. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") mode: Literal["none", "bridge", "host"] = "none" allowed_hosts: tuple[NotBlankStr, ...] = () @@ -69,7 +69,7 @@ class ProcessPolicy(BaseModel): deny_executables: Blacklist of executable paths. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") max_processes: int = Field(default=64, gt=0, le=4096) allowed_executables: tuple[str, ...] = () @@ -90,7 +90,7 @@ class InferencePolicy(BaseModel): (only relevant when ``route_through_proxy`` is ``False``). """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") route_through_proxy: bool = False allowed_providers: tuple[NotBlankStr, ...] = () @@ -111,7 +111,7 @@ class SandboxPolicy(BaseModel): inference: Inference routing policy. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") filesystem: FilesystemPolicy = Field( default_factory=FilesystemPolicy, diff --git a/src/synthorg/tools/sandbox/sandboxing_config.py b/src/synthorg/tools/sandbox/sandboxing_config.py index aa42655074..e209a5c653 100644 --- a/src/synthorg/tools/sandbox/sandboxing_config.py +++ b/src/synthorg/tools/sandbox/sandboxing_config.py @@ -24,7 +24,7 @@ class SandboxingConfig(BaseModel): docker: Docker sandbox backend configuration. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") default_backend: _BackendName = "subprocess" overrides: dict[str, _BackendName] = Field(default_factory=dict) diff --git a/src/synthorg/tools/sub_constraint_enforcer.py b/src/synthorg/tools/sub_constraint_enforcer.py index dcb2c0f6ac..9c59a37ee4 100644 --- a/src/synthorg/tools/sub_constraint_enforcer.py +++ b/src/synthorg/tools/sub_constraint_enforcer.py @@ -41,7 +41,7 @@ class SubConstraintViolation(BaseModel): action is unconditionally denied. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") constraint: NotBlankStr reason: NotBlankStr diff --git a/src/synthorg/tools/terminal/config.py b/src/synthorg/tools/terminal/config.py index 11bb8c1eb5..566d7bd961 100644 --- a/src/synthorg/tools/terminal/config.py +++ b/src/synthorg/tools/terminal/config.py @@ -18,7 +18,7 @@ class TerminalConfig(BaseModel): default_timeout: Default command execution timeout in seconds. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") command_allowlist: tuple[NotBlankStr, ...] = Field( default=(), diff --git a/src/synthorg/tools/web/config.py b/src/synthorg/tools/web/config.py index b28227282c..12fa0fda49 100644 --- a/src/synthorg/tools/web/config.py +++ b/src/synthorg/tools/web/config.py @@ -14,7 +14,7 @@ class WebToolsConfig(BaseModel): request_timeout: Default HTTP request timeout in seconds. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") network_policy: NetworkPolicy = Field( default_factory=NetworkPolicy, diff --git a/src/synthorg/versioning/models.py b/src/synthorg/versioning/models.py index 90abf0940f..9312fa8449 100644 --- a/src/synthorg/versioning/models.py +++ b/src/synthorg/versioning/models.py @@ -48,7 +48,7 @@ class VersionSnapshot[T: BaseModel](BaseModel): saved_at: Timezone-aware timestamp when the snapshot was captured. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") entity_id: NotBlankStr = Field(description="String primary key of the entity") version: int = Field(ge=1, description="Monotonic version counter (1-indexed)") diff --git a/src/synthorg/workers/claim.py b/src/synthorg/workers/claim.py index de665b5b16..fd2e018455 100644 --- a/src/synthorg/workers/claim.py +++ b/src/synthorg/workers/claim.py @@ -87,7 +87,7 @@ class TaskClaim(BaseModel): duplicate observation back to ack-and-skip. """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") task_id: NotBlankStr = Field(description="Task identifier") project_id: NotBlankStr | None = Field( diff --git a/src/synthorg/workers/config.py b/src/synthorg/workers/config.py index 0028b51e7c..47c7ffb0e9 100644 --- a/src/synthorg/workers/config.py +++ b/src/synthorg/workers/config.py @@ -84,7 +84,7 @@ class QueueConfig(BaseModel): tasks. ``None`` means "derive from env at runtime". """ - model_config = ConfigDict(frozen=True, allow_inf_nan=False) + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") enabled: bool = Field( default=False, diff --git a/tests/conformance/persistence/test_core_repositories.py b/tests/conformance/persistence/test_core_repositories.py index e8810f740e..3853833f89 100644 --- a/tests/conformance/persistence/test_core_repositories.py +++ b/tests/conformance/persistence/test_core_repositories.py @@ -249,6 +249,36 @@ async def test_get_history_filters_by_channel( assert len(await backend.messages.get_history("chan1")) == 1 assert len(await backend.messages.get_history("chan2")) == 1 + async def test_get_by_id_returns_matching_message( + self, backend: PersistenceBackend + ) -> None: + msg_id = uuid4() + await backend.messages.append( + make_message(msg_id=msg_id, channel="chan1", content="needle") + ) + await backend.messages.append( + make_message(msg_id=uuid4(), channel="chan1", content="haystack") + ) + found = await backend.messages.get_by_id("chan1", str(msg_id)) + assert found is not None + assert str(found.id) == str(msg_id) + assert found.channel == "chan1" + + async def test_get_by_id_unknown_id_returns_none( + self, backend: PersistenceBackend + ) -> None: + await backend.messages.append(make_message(msg_id=uuid4(), channel="chan1")) + assert await backend.messages.get_by_id("chan1", str(uuid4())) is None + + async def test_get_by_id_wrong_channel_returns_none( + self, backend: PersistenceBackend + ) -> None: + msg_id = uuid4() + await backend.messages.append(make_message(msg_id=msg_id, channel="chan1")) + # The id exists but on a different channel: the channel scoping + # predicate must reject the cross-channel read. + assert await backend.messages.get_by_id("chan2", str(msg_id)) is None + async def test_delete_removes_row_and_returns_true( self, backend: PersistenceBackend ) -> None: diff --git a/tests/integration/engine/test_multi_agent_delegation.py b/tests/integration/engine/test_multi_agent_delegation.py index 7d6f6f32a1..4139dbb922 100644 --- a/tests/integration/engine/test_multi_agent_delegation.py +++ b/tests/integration/engine/test_multi_agent_delegation.py @@ -902,9 +902,11 @@ def test_load_balanced_prefers_least_loaded(self) -> None: ) service = TaskAssignmentService(strategy) - task = _make_task( - required_skills=("python",), - ) + # ``required_skills`` is a scoring hint on ``AssignmentRequest``, + # not a ``Task`` field; passing it to ``_make_task`` was a + # silently-dropped no-op before ``Task`` gained ``extra= + # "forbid"``. The request below already carries it. + task = _make_task() # Both agents match python; backend has higher workload request = AssignmentRequest( diff --git a/tests/unit/api/controllers/test_agent_identity_versions.py b/tests/unit/api/controllers/test_agent_identity_versions.py index 7a65da9142..942d91a578 100644 --- a/tests/unit/api/controllers/test_agent_identity_versions.py +++ b/tests/unit/api/controllers/test_agent_identity_versions.py @@ -354,9 +354,10 @@ async def test_rollback_evolve_value_error_returns_422( Immutable-field mismatches are validation failures (the request targets a snapshot whose immutable fields disagree with the current registry entry), not generic 400-class client errors -- - ValidationError is the correct domain exception, so the central - handler emits 422 with the RFC 9457 ``error_code`` - ``VALIDATION_ERROR``. + ``ImmutableFieldMismatchError`` (a ``ValidationError`` subclass) + is the correct domain exception, so the central handler emits + 422 with the distinct RFC 9457 ``error_code`` + ``IMMUTABLE_FIELD_MISMATCH``. """ fake_persistence.identity_versions.clear() await agent_registry.clear() @@ -375,7 +376,7 @@ async def _raise_value_error(*_args: Any, **_kwargs: Any) -> None: ) assert resp.status_code == 422 body = resp.json() - assert body["error_detail"]["error_code"] == ErrorCode.VALIDATION_ERROR + assert body["error_detail"]["error_code"] == ErrorCode.IMMUTABLE_FIELD_MISMATCH assert "cannot rollback" in body["error"].lower() assert "immutable field mismatch" in body["error"].lower() diff --git a/tests/unit/api/controllers/test_memory_admin.py b/tests/unit/api/controllers/test_memory_admin.py index bf180f83bd..515bd23c04 100644 --- a/tests/unit/api/controllers/test_memory_admin.py +++ b/tests/unit/api/controllers/test_memory_admin.py @@ -587,6 +587,8 @@ async def _fake_get(_namespace: str, key: str) -> SettingValue: "fine_tune_default_batch_size": "256", "fine_tune_min_docs_required": "25", "fine_tune_min_docs_recommended": "75", + "fine_tune_preflight_max_depth": "12", + "fine_tune_preflight_walk_timeout_s": "2.5", }[key] return SettingValue( namespace=SettingNamespace.MEMORY, @@ -602,6 +604,8 @@ async def _fake_get(_namespace: str, key: str) -> SettingValue: assert thresholds.default_batch_size == 256 assert thresholds.min_docs_required == 25 assert thresholds.min_docs_recommended == 75 + assert thresholds.preflight_max_depth == 12 + assert thresholds.preflight_walk_timeout_s == 2.5 async def test_unparseable_value_falls_back_to_default(self) -> None: """A non-integer setting value drops to the imported fallback.""" @@ -736,6 +740,79 @@ def test_count_at_required_threshold_does_not_fail( ) assert check.status == "warn" + def test_depth_cap_truncates_to_warn_not_false_fail( + self, + tmp_path: object, + ) -> None: + """A tree deeper than ``max_depth`` returns a truncation warn. + + Without the cap the scan would recurse unbounded; with it the + endpoint must surface ``warn`` (scan truncated) rather than a + false ``fail`` from an undercount or an unbounded traversal. + """ + from pathlib import Path + + from synthorg.api.controllers.memory import _check_documents + + root = Path(str(tmp_path)) + deep = root + for level in range(6): + deep = deep / f"level-{level}" + deep.mkdir() + (deep / f"doc-{level}.md").write_text("x") + check = _check_documents( + str(root), + min_required=1, + min_recommended=2, + max_depth=2, + walk_timeout_s=30.0, + ) + assert check.status == "warn" + assert "truncated" in check.message.lower() + + def test_deadline_truncates_to_warn( + self, + tmp_path: object, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """A scan that exceeds the wall-clock deadline warns, not hangs. + + ``time.monotonic`` is advanced past the deadline on the first + in-loop check so the bound is exercised deterministically + without depending on real wall-clock timing. + """ + import time as _time_mod + from pathlib import Path + + from synthorg.api.controllers.memory import _check_documents + + src = Path(str(tmp_path)) + for i in range(30): + (src / f"doc-{i:02d}.md").write_text("x") + + ticks = iter([0.0, 1.0, 100.0, 200.0, 300.0]) + + def _fake_monotonic() -> float: + try: + return next(ticks) + except StopIteration: + return 999.0 + + # ``_check_documents`` imports ``time`` locally, so patching + # the stdlib module's ``monotonic`` is what the deadline check + # resolves at call time. + monkeypatch.setattr(_time_mod, "monotonic", _fake_monotonic) + + check = _check_documents( + str(src), + min_required=1, + min_recommended=2, + max_depth=64, + walk_timeout_s=0.001, + ) + assert check.status == "warn" + assert "truncated" in check.message.lower() + @pytest.mark.unit class TestListCheckpointsEndpoint: @@ -961,3 +1038,40 @@ async def test_tampered_cursor_raises(self) -> None: cursor="not-a-real-cursor", limit=50, ) + + +@pytest.mark.unit +class TestPathParamTyping: + """The 5 admin path-param handlers carry the ``PathId`` domain type. + + Each handler annotates its identifier path params with the + framework-level ``PathId`` constraint so a blank / over-length + segment is rejected by Litestar before the handler body runs. + """ + + @pytest.mark.parametrize( + ("handler_name", "param_names"), + [ + ("resume_fine_tune", ("run_id",)), + ("deploy_checkpoint", ("checkpoint_id",)), + ("rollback_checkpoint", ("checkpoint_id",)), + ("delete_checkpoint", ("checkpoint_id",)), + ("delete_memory_entry", ("agent_id", "memory_id")), + ], + ) + def test_handler_path_params_use_pathid( + self, + handler_name: str, + param_names: tuple[str, ...], + ) -> None: + import typing + + from synthorg.api.path_params import PathId + + fn = getattr(MemoryAdminController, handler_name).fn + hints = typing.get_type_hints(fn, include_extras=True) + for param in param_names: + assert hints[param] == PathId, ( + f"{handler_name}.{param} must be annotated PathId, " + f"got {hints.get(param)!r}" + ) diff --git a/tests/unit/api/controllers/test_workflows.py b/tests/unit/api/controllers/test_workflows.py index af1728e40c..516bd5e172 100644 --- a/tests/unit/api/controllers/test_workflows.py +++ b/tests/unit/api/controllers/test_workflows.py @@ -293,11 +293,17 @@ def test_get_workflow(self, test_client: TestClient[Any]) -> None: assert body["data"]["name"] == "test-workflow" def test_get_workflow_not_found(self, test_client: TestClient[Any]) -> None: + from synthorg.core.error_taxonomy import ErrorCode + resp = test_client.get("/api/v1/workflows/nonexistent") assert resp.status_code == 404 body = resp.json() assert body["success"] is False assert "not found" in body["error"].lower() + assert ( + body["error_detail"]["error_code"] + == ErrorCode.WORKFLOW_DEFINITION_NOT_FOUND + ) # ── Update ─────────────────────────────────────────────────── @@ -395,11 +401,17 @@ def test_delete_workflow_not_found(self, test_client: TestClient[Any]) -> None: # exhaustively in tests/unit/engine/workflow/. def test_validate_workflow_not_found(self, test_client: TestClient[Any]) -> None: + from synthorg.core.error_taxonomy import ErrorCode + resp = test_client.post("/api/v1/workflows/nonexistent/validate") assert resp.status_code == 404 body = resp.json() assert body["success"] is False assert "not found" in body["error"].lower() + assert ( + body["error_detail"]["error_code"] + == ErrorCode.WORKFLOW_DEFINITION_NOT_FOUND + ) def test_validate_workflow(self, test_client: TestClient[Any]) -> None: """A valid 3-node graph should pass validation.""" @@ -438,11 +450,17 @@ def test_validate_workflow_with_errors(self, test_client: TestClient[Any]) -> No # ── Export ─────────────────────────────────────────────────── def test_export_workflow_not_found(self, test_client: TestClient[Any]) -> None: + from synthorg.core.error_taxonomy import ErrorCode + resp = test_client.post("/api/v1/workflows/nonexistent/export") assert resp.status_code == 404 body = resp.json() assert body["success"] is False assert "not found" in body["error"].lower() + assert ( + body["error_detail"]["error_code"] + == ErrorCode.WORKFLOW_DEFINITION_NOT_FOUND + ) def test_export_workflow(self, test_client: TestClient[Any]) -> None: wf_id = _seed(test_client, "wfdef-exp001") diff --git a/tests/unit/api/fakes_workflow.py b/tests/unit/api/fakes_workflow.py index aa68d8ce07..b0dfd438ea 100644 --- a/tests/unit/api/fakes_workflow.py +++ b/tests/unit/api/fakes_workflow.py @@ -14,6 +14,7 @@ ParentReference, SubworkflowSummary, ) +from synthorg.persistence._generics import DEFAULT_PAGE_SIZE from synthorg.persistence.workflow_execution_protocol import ( WorkflowExecutionFilterSpec, ) @@ -356,14 +357,25 @@ async def list_summaries( async def search( self, query: NotBlankStr, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, ) -> tuple[SubworkflowSummary, ...]: q = query.lower() - summaries = await self.list_summaries() - return tuple( - s - for s in summaries - if q in s.name.lower() or q in (s.description or "").lower() + # Fetch the full candidate set before filtering: the default + # page cap would pre-truncate matches beyond the first page. + summaries = await self.list_summaries( + limit=max(len(self._rows), DEFAULT_PAGE_SIZE), + ) + matched = sorted( + ( + s + for s in summaries + if q in s.name.lower() or q in (s.description or "").lower() + ), + key=lambda s: s.subworkflow_id, ) + return tuple(matched[offset : offset + limit]) async def delete( self, @@ -389,6 +401,9 @@ async def find_parents( self, subworkflow_id: NotBlankStr, version: NotBlankStr | None = None, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, ) -> tuple[ParentReference, ...]: if self._definition_repo is None: return () @@ -416,4 +431,12 @@ async def find_parents( parent_type="workflow_definition", ), ) - return tuple(references) + references.sort( + key=lambda r: ( + r.parent_type, + r.parent_id, + r.node_id, + r.pinned_version, + ), + ) + return tuple(references[offset : offset + limit]) diff --git a/tests/unit/api/rate_limits/test_controller_coverage.py b/tests/unit/api/rate_limits/test_controller_coverage.py index 4ec50d8fd8..52b60a5cee 100644 --- a/tests/unit/api/rate_limits/test_controller_coverage.py +++ b/tests/unit/api/rate_limits/test_controller_coverage.py @@ -220,6 +220,21 @@ def test_every_policy_lookup_resolves() -> None: "delete_message", "messages.delete", ), + ( + _CONTROLLERS_DIR / "training.py", + "create_plan", + "training.create_plan", + ), + ( + _CONTROLLERS_DIR / "training.py", + "execute_plan", + "training.execute", + ), + ( + _CONTROLLERS_DIR / "training.py", + "update_overrides", + "training.update_overrides", + ), ) diff --git a/tests/unit/api/rate_limits/test_guard.py b/tests/unit/api/rate_limits/test_guard.py index 1524ce1144..f08a170eb7 100644 --- a/tests/unit/api/rate_limits/test_guard.py +++ b/tests/unit/api/rate_limits/test_guard.py @@ -11,6 +11,10 @@ from synthorg.api.rate_limits.config import PerOpRateLimitConfig from synthorg.api.rate_limits.guard import per_op_rate_limit from synthorg.api.rate_limits.in_memory import InMemorySlidingWindowStore +from synthorg.api.rate_limits.policies import ( + RATE_LIMIT_POLICIES, + per_op_rate_limit_from_policy, +) pytestmark = pytest.mark.unit @@ -139,3 +143,35 @@ def test_invalid_construction(self) -> None: per_op_rate_limit("bad", max_requests=0, window_seconds=60) with pytest.raises(ValueError, match="window_seconds"): per_op_rate_limit("bad", max_requests=10, window_seconds=0) + + +class TestTrainingEndpointBurstRejection: + """The two rate-limited training endpoints reject burst traffic. + + Builds the guard from the real policy registry for the exact + ``training.create_plan`` / ``training.update_overrides`` keys (the + same call the controllers make) and drives it past its policy + ``max_requests`` to assert burst traffic is rejected with 429 plus + a ``Retry-After`` header. + """ + + @pytest.mark.parametrize( + "operation", + ["training.create_plan", "training.update_overrides"], + ) + def test_burst_past_policy_limit_is_rejected(self, operation: str) -> None: + max_requests, _window = RATE_LIMIT_POLICIES[operation] + # ``key="ip"`` keeps the test auth-free while still exercising + # the real policy-resolved guard the controllers attach. + guard = per_op_rate_limit_from_policy(operation, key="ip") + + @get("/t", guards=[guard]) + async def handler() -> dict[str, bool]: + return {"ok": True} + + with TestClient(_make_test_app(handler)) as client: + for _ in range(max_requests): + assert client.get("/t").status_code == 200 + blocked = client.get("/t") + assert blocked.status_code == 429 + assert "retry-after" in {k.lower() for k in blocked.headers} diff --git a/tests/unit/api/test_dto_forbid_extra.py b/tests/unit/api/test_dto_forbid_extra.py deleted file mode 100644 index 7604e8b543..0000000000 --- a/tests/unit/api/test_dto_forbid_extra.py +++ /dev/null @@ -1,777 +0,0 @@ -"""Every API-boundary DTO must reject unknown fields (``extra="forbid"``). - -A DTO that does not declare ``extra="forbid"`` silently accepts unknown -payload keys, which masks client typos and lets fabricated capability -flags slip through to handler logic. ``scripts/check_dto_forbid_extra.py`` -enforces the convention statically; this test asserts the runtime -behaviour for every Request / Response / Snapshot / Result / Envelope / -Status / Info / Summary DTO under ``src/synthorg/api/``, plus a small -suite of gate-classification tests that exercise the script directly. - -The bare-extra-key probe uses an empty otherwise-invalid payload on -purpose: Pydantic still records the ``extra_forbidden`` error alongside -any required-field misses, so the assertion is robust to required-field -changes in the surrounding DTO. -""" - -import importlib.util -import textwrap -from pathlib import Path -from typing import Any - -import pytest -from pydantic import BaseModel, ValidationError - -from synthorg.api.auth.controller_dtos import ( - ChangePasswordRequest, - CookieSessionResponse, - LoginRequest, - SessionResponse, - SetupRequest, - UserInfoResponse, - WsTicketResponse, -) -from synthorg.api.controllers.agents import ( - AgentHealthResponse, - PerformanceSummary, - TrustSummary, -) -from synthorg.api.controllers.analytics import ( - ForecastResponse, - TrendsResponse, -) -from synthorg.api.controllers.autonomy import ( - AutonomyLevelRequest, - AutonomyLevelResponse, -) -from synthorg.api.controllers.budget import ( - CostRecordListResponse, - DailySummary, - PeriodSummary, -) -from synthorg.api.controllers.capabilities import CapabilitiesResponse -from synthorg.api.controllers.clients import ( - CreateClientRequest, - UpdateClientRequest, -) -from synthorg.api.controllers.collaboration import ( - CalibrationSummaryResponse, - OverrideResponse, - SetOverrideRequest, -) -from synthorg.api.controllers.connections import ( - CreateConnectionRequest, - UpdateConnectionRequest, -) -from synthorg.api.controllers.custom_rules import ( - CreateCustomRuleRequest, - PreviewRuleRequest, - UpdateCustomRuleRequest, -) -from synthorg.api.controllers.escalations import ( - CancelEscalationRequest, - EscalationResponse, - SubmitDecisionRequest, -) -from synthorg.api.controllers.events import ( - InterruptResponse, - ResumeInterruptRequest, -) -from synthorg.api.controllers.health import ( - LivenessStatus, - ReadinessStatus, -) -from synthorg.api.controllers.mcp_catalog import ( - InstallEntryRequest, - InstallEntryResponse, -) -from synthorg.api.controllers.meetings import TriggerMeetingRequest -from synthorg.api.controllers.memory import ActiveEmbedderResponse -from synthorg.api.controllers.meta import ChatRequest -from synthorg.api.controllers.oauth import InitiateOAuthFlowRequest -from synthorg.api.controllers.quality import ( - QualityOverrideResponse, - SetQualityOverrideRequest, -) -from synthorg.api.controllers.reports import ( - GenerateReportRequest, - ReportResponse, -) -from synthorg.api.controllers.reviews import StageDecisionResult -from synthorg.api.controllers.scaling import ( - PriorityUpdateRequest, - ScalingDecisionResponse, - ScalingSignalResponse, - ScalingStrategyResponse, - StrategyUpdateRequest, -) -from synthorg.api.controllers.settings import ( - SecurityConfigExportResponse, - SecurityConfigImportRequest, - UpdateSettingRequest, -) - -# Aliases to avoid pytest's ``Test``-prefix auto-collection (pytest tries -# to instantiate any module-level ``Test*`` symbol as a test class). -from synthorg.api.controllers.settings import ( - TestSinkConfigRequest as _SinkConfigRequest, -) -from synthorg.api.controllers.settings import ( - TestSinkConfigResponse as _SinkConfigResponse, -) -from synthorg.api.controllers.setup_models import ( - AvailableLocalesResponse, - PersonalityPresetInfoResponse, - SetupAgentRequest, - SetupAgentResponse, - SetupAgentSummary, - SetupCompanyRequest, - SetupCompanyResponse, - SetupCompleteResponse, - SetupNameLocalesRequest, - SetupNameLocalesResponse, - SetupStatusResponse, - TemplateInfoResponse, - TemplateVariableResponse, - UpdateAgentModelRequest, - UpdateAgentNameRequest, - UpdateAgentPersonalityRequest, -) -from synthorg.api.controllers.simulations import SimulationStatusResponse -from synthorg.api.controllers.subworkflows import CreateSubworkflowRequest -from synthorg.api.controllers.teams import ( - CreateTeamRequest, - ReorderTeamsRequest, - TeamResponse, - UpdateTeamRequest, -) -from synthorg.api.controllers.template_packs import ( - ApplyTemplatePackRequest, - ApplyTemplatePackResponse, - PackInfoResponse, -) -from synthorg.api.controllers.users import ( - CreateUserRequest, - GrantOrgRoleRequest, - UpdateUserRoleRequest, - UserResponse, -) -from synthorg.api.dto import ( - ApiResponse, - ApproveRequest, - CancelTaskRequest, - CoordinateTaskRequest, - CoordinationPhaseResponse, - CoordinationResultResponse, - CreateApprovalRequest, - CreateArtifactRequest, - CreateProjectRequest, - CreateTaskRequest, - PaginatedResponse, - RejectRequest, - RollbackAgentIdentityRequest, - TransitionTaskRequest, - UpdateTaskRequest, -) -from synthorg.api.dto_discovery import ( - AddAllowlistEntryRequest, - DiscoveryPolicyResponse, - RemoveAllowlistEntryRequest, -) -from synthorg.api.dto_ontology import ( - CreateEntityRequest, - DriftAgentResponse, - DriftReportResponse, - DriftSummary, - EntityFieldResponse, - EntityListMeta, - EntityRelationResponse, - EntityResponse, - EntityVersionResponse, - UpdateEntityRequest, -) -from synthorg.api.dto_org import ( - CreateAgentOrgRequest, - CreateDepartmentRequest, - ReorderAgentsRequest, - ReorderDepartmentsRequest, - UpdateAgentOrgRequest, - UpdateDepartmentRequest, -) -from synthorg.api.dto_personalities import ( - PresetDetailResponse, - PresetSummaryResponse, -) -from synthorg.api.dto_training import ( - CreateTrainingPlanRequest, - TrainingPlanResponse, - TrainingResultResponse, - UpdateTrainingOverridesRequest, -) -from synthorg.api.dto_workflow import ( - ActivateWorkflowRequest, - BlueprintInfoResponse, - CreateFromBlueprintRequest, - CreateWorkflowDefinitionRequest, - RollbackWorkflowRequest, - UpdateWorkflowDefinitionRequest, - WorkflowIODeclarationRequest, -) - -pytestmark = pytest.mark.unit - -# Every ``*Request`` Pydantic DTO under ``src/synthorg/api/``. -# Each MUST have ``ConfigDict(..., extra="forbid")``. -REQUEST_DTOS: tuple[type[BaseModel], ...] = ( - # auth/controller_dtos.py - SetupRequest, - LoginRequest, - ChangePasswordRequest, - # controllers/* inline DTOs - AutonomyLevelRequest, - CreateClientRequest, - UpdateClientRequest, - SetOverrideRequest, - CreateConnectionRequest, - UpdateConnectionRequest, - CreateCustomRuleRequest, - UpdateCustomRuleRequest, - PreviewRuleRequest, - SubmitDecisionRequest, - CancelEscalationRequest, - ResumeInterruptRequest, - InstallEntryRequest, - TriggerMeetingRequest, - ChatRequest, - InitiateOAuthFlowRequest, - SetQualityOverrideRequest, - GenerateReportRequest, - StrategyUpdateRequest, - PriorityUpdateRequest, - UpdateSettingRequest, - _SinkConfigRequest, - SecurityConfigImportRequest, - SetupCompanyRequest, - SetupAgentRequest, - UpdateAgentModelRequest, - UpdateAgentNameRequest, - UpdateAgentPersonalityRequest, - SetupNameLocalesRequest, - CreateSubworkflowRequest, - CreateTeamRequest, - UpdateTeamRequest, - ReorderTeamsRequest, - ApplyTemplatePackRequest, - CreateUserRequest, - UpdateUserRoleRequest, - GrantOrgRoleRequest, - # dto.py - CreateArtifactRequest, - CreateProjectRequest, - CreateTaskRequest, - UpdateTaskRequest, - TransitionTaskRequest, - CancelTaskRequest, - CreateApprovalRequest, - ApproveRequest, - RejectRequest, - CoordinateTaskRequest, - RollbackAgentIdentityRequest, - # dto_discovery.py - AddAllowlistEntryRequest, - RemoveAllowlistEntryRequest, - # dto_ontology.py - CreateEntityRequest, - UpdateEntityRequest, - # dto_org.py - CreateDepartmentRequest, - UpdateDepartmentRequest, - ReorderDepartmentsRequest, - CreateAgentOrgRequest, - UpdateAgentOrgRequest, - ReorderAgentsRequest, - # dto_training.py - CreateTrainingPlanRequest, - UpdateTrainingOverridesRequest, - # dto_workflow.py - WorkflowIODeclarationRequest, - CreateWorkflowDefinitionRequest, - UpdateWorkflowDefinitionRequest, - ActivateWorkflowRequest, - CreateFromBlueprintRequest, - RollbackWorkflowRequest, -) - -# Every Response / Snapshot / Result / Envelope / Status / Info / -# Summary Pydantic DTO under ``src/synthorg/api/``. Each MUST have -# ``ConfigDict(..., extra="forbid")`` enforced by -# ``scripts/check_dto_forbid_extra.py``. -RESPONSE_DTOS: tuple[type[BaseModel], ...] = ( - # auth/controller_dtos.py - CookieSessionResponse, - UserInfoResponse, - WsTicketResponse, - SessionResponse, - # controllers/agents.py - TrustSummary, - PerformanceSummary, - AgentHealthResponse, - # controllers/analytics.py - TrendsResponse, - ForecastResponse, - # controllers/autonomy.py - AutonomyLevelResponse, - # controllers/budget.py - DailySummary, - PeriodSummary, - CostRecordListResponse, - # controllers/capabilities.py - CapabilitiesResponse, - # controllers/collaboration.py - OverrideResponse, - CalibrationSummaryResponse, - # controllers/escalations.py - EscalationResponse, - # controllers/events.py - InterruptResponse, - # controllers/health.py - LivenessStatus, - ReadinessStatus, - # controllers/mcp_catalog.py - InstallEntryResponse, - # controllers/memory.py - ActiveEmbedderResponse, - # controllers/quality.py - QualityOverrideResponse, - # controllers/reports.py - ReportResponse, - # controllers/reviews.py - StageDecisionResult, - # controllers/scaling.py - ScalingStrategyResponse, - ScalingSignalResponse, - ScalingDecisionResponse, - # controllers/settings.py - _SinkConfigResponse, - SecurityConfigExportResponse, - # controllers/setup_models.py - SetupStatusResponse, - TemplateVariableResponse, - TemplateInfoResponse, - SetupAgentSummary, - SetupCompanyResponse, - SetupAgentResponse, - PersonalityPresetInfoResponse, - SetupNameLocalesResponse, - AvailableLocalesResponse, - SetupCompleteResponse, - # controllers/simulations.py - SimulationStatusResponse, - # controllers/teams.py - TeamResponse, - # controllers/template_packs.py - PackInfoResponse, - ApplyTemplatePackResponse, - # controllers/users.py - UserResponse, - # dto.py - ApiResponse, - PaginatedResponse, - CoordinationPhaseResponse, - CoordinationResultResponse, - # dto_discovery.py - DiscoveryPolicyResponse, - # dto_ontology.py - EntityFieldResponse, - EntityRelationResponse, - EntityResponse, - EntityVersionResponse, - DriftAgentResponse, - DriftReportResponse, - DriftSummary, - EntityListMeta, - # dto_personalities.py - PresetSummaryResponse, - PresetDetailResponse, - # dto_training.py - TrainingPlanResponse, - TrainingResultResponse, - # dto_workflow.py - BlueprintInfoResponse, -) - - -# DTOs with a ``model_validator(mode="before")`` that raises on missing -# required fields short-circuit before extras are checked. Provide a -# minimal payload that satisfies the mode="before" validator so the -# extras assertion still fires. -_REQUEST_PAYLOAD_OVERRIDES: dict[type[BaseModel], dict[str, Any]] = { - UpdateAgentPersonalityRequest: {"personality_preset": "visionary_leader"}, -} - - -@pytest.mark.parametrize("model_cls", REQUEST_DTOS, ids=lambda c: c.__name__) -def test_request_dto_rejects_unknown_field(model_cls: type[BaseModel]) -> None: - """Each request DTO surfaces ``extra_forbidden`` for unknown keys.""" - payload: dict[str, Any] = { - **_REQUEST_PAYLOAD_OVERRIDES.get(model_cls, {}), - "synthorg_unexpected_field": "x", - } - with pytest.raises(ValidationError) as exc_info: - model_cls.model_validate(payload) - error_types = {err["type"] for err in exc_info.value.errors()} - assert "extra_forbidden" in error_types, ( - f"{model_cls.__name__} accepted an unknown field; expected " - f"'extra_forbidden' in {error_types}. Add ``extra=\"forbid\"`` to " - f"its ``ConfigDict`` so the API boundary rejects typos and " - f"fabricated capability flags." - ) - - -@pytest.mark.parametrize("model_cls", REQUEST_DTOS, ids=lambda c: c.__name__) -def test_request_dto_config_declares_forbid(model_cls: type[BaseModel]) -> None: - """Belt + braces: the config object itself must declare extra=forbid. - - Catches subclass-shadowing bugs where a parent forbids extras but a - subclass quietly relaxes them. - """ - extra = model_cls.model_config.get("extra") - assert extra == "forbid", ( - f"{model_cls.__name__}.model_config['extra'] = {extra!r}; expected 'forbid'." - ) - - -@pytest.mark.parametrize("model_cls", RESPONSE_DTOS, ids=lambda c: c.__name__) -def test_response_dto_rejects_unknown_field(model_cls: type[BaseModel]) -> None: - """Each response DTO surfaces ``extra_forbidden`` for unknown keys.""" - payload: dict[str, Any] = {"synthorg_unexpected_field": "x"} - with pytest.raises(ValidationError) as exc_info: - model_cls.model_validate(payload) - error_types = {err["type"] for err in exc_info.value.errors()} - assert "extra_forbidden" in error_types, ( - f"{model_cls.__name__} accepted an unknown field; expected " - f"'extra_forbidden' in {error_types}. Add ``extra=\"forbid\"`` to " - f"its ``ConfigDict`` so the API boundary rejects fabricated " - f"server-side fields and protects round-trip clients." - ) - - -@pytest.mark.parametrize("model_cls", RESPONSE_DTOS, ids=lambda c: c.__name__) -def test_response_dto_config_declares_forbid(model_cls: type[BaseModel]) -> None: - """Belt + braces: response DTO config must declare extra=forbid.""" - extra = model_cls.model_config.get("extra") - assert extra == "forbid", ( - f"{model_cls.__name__}.model_config['extra'] = {extra!r}; expected 'forbid'." - ) - - -# ── Gate-classification tests (exercise the script directly) ───────── - - -_GATE_PATH = ( - Path(__file__).resolve().parents[3] / "scripts" / "check_dto_forbid_extra.py" -) - - -def _load_gate_module() -> Any: - """Import the gate script as a module without polluting sys.modules.""" - spec = importlib.util.spec_from_file_location( - "_check_dto_forbid_extra_for_test", _GATE_PATH - ) - assert spec is not None - assert spec.loader is not None - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - return module - - -_GATE = _load_gate_module() - - -@pytest.mark.parametrize("suffix", _GATE.DTO_SUFFIXES) -def test_gate_flags_class_missing_forbid(suffix: str, tmp_path: Path) -> None: - """For each suffix, a BaseModel subclass without forbid is flagged.""" - source = textwrap.dedent( - f""" - from pydantic import BaseModel, ConfigDict - - class Foo{suffix}(BaseModel): - model_config = ConfigDict(frozen=True) - value: int = 0 - """ - ) - target = tmp_path / "sample.py" - target.write_text(source, encoding="utf-8") - violations = _GATE._walk(target) - names = [name for _, _, name in violations] - assert names == [f"Foo{suffix}"] - - -@pytest.mark.parametrize("suffix", _GATE.DTO_SUFFIXES) -def test_gate_passes_class_with_forbid(suffix: str, tmp_path: Path) -> None: - """For each suffix, a BaseModel subclass declaring forbid is not flagged.""" - source = textwrap.dedent( - f""" - from pydantic import BaseModel, ConfigDict - - class Foo{suffix}(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - value: int = 0 - """ - ) - target = tmp_path / "sample.py" - target.write_text(source, encoding="utf-8") - assert _GATE._walk(target) == [] - - -def test_gate_ignores_non_dto_class(tmp_path: Path) -> None: - """Classes not matching any DTO suffix are not gated.""" - source = textwrap.dedent( - """ - from pydantic import BaseModel, ConfigDict - - class FooThing(BaseModel): - model_config = ConfigDict(frozen=True) - value: int = 0 - """ - ) - target = tmp_path / "sample.py" - target.write_text(source, encoding="utf-8") - assert _GATE._walk(target) == [] - - -def test_gate_ignores_non_pydantic_class(tmp_path: Path) -> None: - """A class with a DTO suffix that doesn't inherit from BaseModel is ignored.""" - source = textwrap.dedent( - """ - class FooResponse: - value: int = 0 - """ - ) - target = tmp_path / "sample.py" - target.write_text(source, encoding="utf-8") - assert _GATE._walk(target) == [] - - -def test_gate_flags_class_with_no_model_config(tmp_path: Path) -> None: - """A DTO without any ``model_config`` is treated as a violation.""" - source = textwrap.dedent( - """ - from pydantic import BaseModel - - class FooResponse(BaseModel): - value: int = 0 - """ - ) - target = tmp_path / "sample.py" - target.write_text(source, encoding="utf-8") - violations = _GATE._walk(target) - assert [name for _, _, name in violations] == ["FooResponse"] - - -def test_gate_respects_optout_with_reason(tmp_path: Path) -> None: - """Class line carrying a ``# lint-allow: ...`` comment is exempted.""" - source = textwrap.dedent( - """ - from pydantic import BaseModel, ConfigDict - - class FooResponse(BaseModel): # lint-allow: dto-forbid-extra -- legacy shape - model_config = ConfigDict(frozen=True) - value: int = 0 - """ - ) - target = tmp_path / "sample.py" - target.write_text(source, encoding="utf-8") - assert _GATE._walk(target) == [] - - -def test_gate_rejects_optout_without_reason(tmp_path: Path) -> None: - """A bare opt-out without a ``-- `` is not honoured.""" - source = textwrap.dedent( - """ - from pydantic import BaseModel, ConfigDict - - class FooResponse(BaseModel): # lint-allow: dto-forbid-extra - model_config = ConfigDict(frozen=True) - value: int = 0 - """ - ) - target = tmp_path / "sample.py" - target.write_text(source, encoding="utf-8") - violations = _GATE._walk(target) - assert [name for _, _, name in violations] == ["FooResponse"] - - -def test_gate_flags_subclass_of_suffixed_base_without_forbid(tmp_path: Path) -> None: - """A leaf DTO whose parent has a DTO suffix must repeat ``extra="forbid"``.""" - source = textwrap.dedent( - """ - from pydantic import BaseModel, ConfigDict - - class FooResponse(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - value: int = 0 - - class BarResponse(FooResponse): - other: int = 0 - """ - ) - target = tmp_path / "sample.py" - target.write_text(source, encoding="utf-8") - violations = _GATE._walk(target) - assert [name for _, _, name in violations] == ["BarResponse"] - - -def test_gate_recognises_generic_subscripted_base(tmp_path: Path) -> None: - """A DTO with PEP 695 generic ``BaseModel[T]`` base is gated like ``BaseModel``.""" - source = textwrap.dedent( - """ - from pydantic import BaseModel, ConfigDict - - class FooEnvelope[T](BaseModel): - model_config = ConfigDict(frozen=True) - value: T | None = None - """ - ) - target = tmp_path / "sample.py" - target.write_text(source, encoding="utf-8") - violations = _GATE._walk(target) - assert [name for _, _, name in violations] == ["FooEnvelope"] - - -def test_gate_flags_dict_literal_model_config_without_forbid(tmp_path: Path) -> None: - """Gate also catches the dict-literal form of ``model_config``.""" - source = textwrap.dedent( - """ - from pydantic import BaseModel - - class FooResponse(BaseModel): - model_config = {"frozen": True} - value: int = 0 - """ - ) - target = tmp_path / "sample.py" - target.write_text(source, encoding="utf-8") - violations = _GATE._walk(target) - assert [name for _, _, name in violations] == ["FooResponse"] - - -def test_gate_passes_dict_literal_model_config_with_forbid(tmp_path: Path) -> None: - """Dict-literal ``model_config`` with ``extra='forbid'`` is accepted.""" - source = textwrap.dedent( - """ - from pydantic import BaseModel - - class FooResponse(BaseModel): - model_config = {"frozen": True, "extra": "forbid"} - value: int = 0 - """ - ) - target = tmp_path / "sample.py" - target.write_text(source, encoding="utf-8") - assert _GATE._walk(target) == [] - - -def test_gate_uses_final_model_config_assignment(tmp_path: Path) -> None: - """Last-write-wins: a permissive override after ``extra="forbid"`` is flagged. - - Python class assignment is last-write-wins, so the gate must inspect - the final ``model_config`` value rather than the first match. - Otherwise a class could declare ``extra="forbid"`` early and silently - override it lower in the class body. - """ - source = textwrap.dedent( - """ - from pydantic import BaseModel, ConfigDict - - class FooResponse(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - value: int = 0 - model_config = ConfigDict(frozen=True) - """ - ) - target = tmp_path / "sample.py" - target.write_text(source, encoding="utf-8") - violations = _GATE._walk(target) - assert [name for _, _, name in violations] == ["FooResponse"] - - -def test_gate_passes_when_final_assignment_forbids(tmp_path: Path) -> None: - """The final ``model_config`` assignment determines the verdict.""" - source = textwrap.dedent( - """ - from pydantic import BaseModel, ConfigDict - - class FooResponse(BaseModel): - model_config = ConfigDict(frozen=True) - value: int = 0 - model_config = ConfigDict(frozen=True, extra="forbid") - """ - ) - target = tmp_path / "sample.py" - target.write_text(source, encoding="utf-8") - assert _GATE._walk(target) == [] - - -# ── Envelope round-trip tests ──────────────────────────────────────── - - -def test_api_response_round_trip_preserves_payload() -> None: - """``ApiResponse[T]`` survives a round-trip when computed fields are excluded. - - Round-trip serialization must use ``exclude_computed_fields=True`` so - the dump emits only settable fields; ``model_validate`` then runs - against ``extra="forbid"`` without an input-stripping validator - weakening the contract. - """ - original = ApiResponse[str](data="hello") - dumped = original.model_dump(exclude_computed_fields=True) - assert "success" not in dumped - restored = ApiResponse[str].model_validate(dumped) - assert restored.data == "hello" - assert restored.error is None - assert restored.success is True - - -def test_api_response_rejects_dump_with_computed_field_when_re_validated() -> None: - """A plain ``model_dump()`` dict is rejected because computed keys re-appear. - - This is the strict-contract trade-off: ``model_dump()`` includes - computed fields by default; without ``exclude_computed_fields=True`` - a re-validation hits ``extra="forbid"`` and raises -- which is the - intended behaviour for the API boundary. - """ - original = ApiResponse[str](data="hello") - dumped = original.model_dump() - assert dumped["success"] is True - with pytest.raises(ValidationError) as exc_info: - ApiResponse[str].model_validate(dumped) - error_types = {err["type"] for err in exc_info.value.errors()} - assert "extra_forbidden" in error_types - - -def test_paginated_response_round_trip_preserves_payload() -> None: - """``PaginatedResponse[T]`` survives a round-trip with computed fields excluded.""" - from synthorg.api.dto import PaginationMeta - - original = PaginatedResponse[str]( - data=("a", "b"), - pagination=PaginationMeta(limit=50, next_cursor=None, has_more=False), - ) - dumped = original.model_dump(exclude_computed_fields=True) - assert "success" not in dumped - restored = PaginatedResponse[str].model_validate(dumped) - assert restored.data == ("a", "b") - assert restored.pagination.has_more is False - assert restored.success is True - - -def test_api_response_rejects_round_trip_with_fabricated_field() -> None: - """A dumped envelope augmented with a stray key must be rejected on revalidate.""" - original = ApiResponse[str](data="ok") - dumped = original.model_dump(exclude_computed_fields=True) - dumped["fabricated"] = "evil" - with pytest.raises(ValidationError) as exc_info: - ApiResponse[str].model_validate(dumped) - error_types = {err["type"] for err in exc_info.value.errors()} - assert "extra_forbidden" in error_types diff --git a/tests/unit/api/test_etag.py b/tests/unit/api/test_etag.py index 51b24ca8fd..f43d50a8ee 100644 --- a/tests/unit/api/test_etag.py +++ b/tests/unit/api/test_etag.py @@ -440,11 +440,15 @@ async def streaming_app( _empty_receive, recorder, ) - # 1 start + 3 body messages, all forwarded as-is. + # 1 start + 3 body messages: bodies forwarded as-is, no ETag + # (body is unhashable without buffering), but the + # validator-friendly Cache-Control IS applied so the global + # ``no-store`` does not suppress revalidation for streamed + # allowlisted reads. assert len(recorder.messages) == 1 + len(chunks) headers = dict(recorder.messages[0]["headers"]) assert b"etag" not in headers - assert b"cache-control" not in headers + assert headers[b"cache-control"] == b"private, must-revalidate" bodies = [m["body"] for m in recorder.messages[1:]] assert bodies == chunks # The middle chunks must keep ``more_body=True``; only the last is False. diff --git a/tests/unit/api/test_exception_handlers.py b/tests/unit/api/test_exception_handlers.py index f882da0ecb..20d83f06cc 100644 --- a/tests/unit/api/test_exception_handlers.py +++ b/tests/unit/api/test_exception_handlers.py @@ -206,7 +206,7 @@ async def handler() -> None: assert body["error"] == "Backup not found: abc123" _assert_error_detail( body, - error_code=ErrorCode.RECORD_NOT_FOUND, + error_code=ErrorCode.BACKUP_NOT_FOUND, error_category=ErrorCategory.NOT_FOUND, retryable=False, ) @@ -233,7 +233,7 @@ async def handler() -> None: assert body["error"] == "A backup is already in progress" _assert_error_detail( body, - error_code=ErrorCode.RESOURCE_CONFLICT, + error_code=ErrorCode.BACKUP_IN_PROGRESS, error_category=ErrorCategory.CONFLICT, retryable=False, ) @@ -256,12 +256,13 @@ async def handler() -> None: assert resp.status_code == 500 body = resp.json() assert body["success"] is False - # 5xx scrubs the upstream message; the structured envelope - # surfaces the category title, not the raw exception text. - assert body["error"] == "Backup operation failed" + # 5xx scrubs the upstream message to the class default; the + # distinct ``BACKUP_MANIFEST_ERROR`` code lets clients tell + # a corrupt-manifest failure apart from a generic 500. + assert body["error"] == "Backup manifest is invalid or corrupt" _assert_error_detail( body, - error_code=ErrorCode.INTERNAL_ERROR, + error_code=ErrorCode.BACKUP_MANIFEST_ERROR, error_category=ErrorCategory.INTERNAL, retryable=False, ) @@ -298,20 +299,21 @@ async def handler() -> None: @pytest.mark.parametrize( "exc_cls", - [RestoreError, RetentionError, ComponentBackupError], - ids=["restore_error", "retention_error", "component_backup_error"], + [RetentionError, ComponentBackupError], + ids=["retention_error", "component_backup_error"], ) def test_other_backup_subtypes_map_to_structured_500( self, exc_cls: type[BackupError], ) -> None: - """``RestoreError``, ``RetentionError``, ``ComponentBackupError``. + """``RetentionError``, ``ComponentBackupError``. Pin the contract that every non-special-cased ``BackupError`` subtype routes through ``handle_backup_error``'s catch-all branch and produces a structured 5xx with ``INTERNAL_ERROR``. Adding an explicit branch for any of these in a future refactor - must update this test. + must update this test. ``RestoreError`` and ``ManifestError`` + now carry distinct codes and have their own tests. """ @get("/test") @@ -331,6 +333,33 @@ async def handler() -> None: retryable=False, ) + def test_restore_error_maps_to_structured_500_with_distinct_code( + self, + ) -> None: + """``RestoreError`` carries the distinct ``BACKUP_RESTORE_FAILED``. + + 5xx still scrubs the upstream message to the class default, but + the distinct code lets clients/operators alert on restore + failures specifically rather than a generic internal error. + """ + + @get("/test") + async def handler() -> None: + msg = "restore subtype failure" + raise RestoreError(msg) + + with TestClient(make_exception_handler_app(handler)) as client: + resp = client.get("/test") + assert resp.status_code == 500 + body = resp.json() + assert body["error"] == "Restore operation failed" + _assert_error_detail( + body, + error_code=ErrorCode.BACKUP_RESTORE_FAILED, + error_category=ErrorCategory.INTERNAL, + retryable=False, + ) + @pytest.mark.parametrize( ("exc_cls", "status_code", "expected_detail"), [ diff --git a/tests/unit/communication/meetings/test_service.py b/tests/unit/communication/meetings/test_service.py index 0098adfcac..b8ce077ea9 100644 --- a/tests/unit/communication/meetings/test_service.py +++ b/tests/unit/communication/meetings/test_service.py @@ -6,7 +6,7 @@ ``COMMUNICATION_MEETING_DELETED`` event on success only. """ -from unittest.mock import MagicMock +from typing import Any import pytest import structlog.testing @@ -17,13 +17,14 @@ from synthorg.observability.events.communication import ( COMMUNICATION_MEETING_DELETED, ) +from tests._shared import mock_of pytestmark = pytest.mark.unit -def _make_service(*, deleted: bool) -> tuple[MeetingService, MagicMock]: - orch = MagicMock(spec=MeetingOrchestrator) - orch.delete_record = MagicMock(return_value=deleted) +def _make_service(*, deleted: bool) -> tuple[MeetingService, Any]: + orch = mock_of[MeetingOrchestrator]() + orch.delete_record.return_value = deleted service = MeetingService(orchestrator=orch) return service, orch @@ -63,3 +64,26 @@ async def test_returns_false_and_skips_audit_when_id_missing(self) -> None: orch.delete_record.assert_called_once_with("missing") audit = [e for e in events if e.get("event") == COMMUNICATION_MEETING_DELETED] assert audit == [] + + +class TestMeetingServiceGetMeeting: + """``get_meeting`` is an O(1) delegate, not a full-record scan.""" + + async def test_delegates_to_get_record_and_never_scans(self) -> None: + sentinel = object() + orch = mock_of[MeetingOrchestrator]() + orch.get_record.return_value = sentinel + service = MeetingService(orchestrator=orch) + + result = await service.get_meeting(NotBlankStr("meet-1")) + + assert result is sentinel + orch.get_record.assert_called_once_with("meet-1") + orch.get_records.assert_not_called() + + async def test_returns_none_when_record_absent(self) -> None: + orch = mock_of[MeetingOrchestrator]() + orch.get_record.return_value = None + service = MeetingService(orchestrator=orch) + + assert await service.get_meeting(NotBlankStr("nope")) is None diff --git a/tests/unit/communication/messages/test_service.py b/tests/unit/communication/messages/test_service.py index fa062478eb..1f6e7a9bc6 100644 --- a/tests/unit/communication/messages/test_service.py +++ b/tests/unit/communication/messages/test_service.py @@ -6,6 +6,7 @@ """ from types import SimpleNamespace +from typing import Any from unittest.mock import AsyncMock import pytest @@ -17,13 +18,15 @@ from synthorg.observability.events.communication import ( COMMUNICATION_MESSAGE_DELETED, ) +from synthorg.persistence.message_protocol import MessageRepository +from tests._shared import mock_of pytestmark = pytest.mark.unit -def _make_service(*, deleted: bool) -> tuple[MessageService, AsyncMock]: - repo = AsyncMock() - repo.delete = AsyncMock(return_value=deleted) +def _make_service(*, deleted: bool) -> tuple[MessageService, Any]: + repo = mock_of[MessageRepository]() + repo.delete.return_value = deleted persistence = SimpleNamespace(messages=repo) bus = AsyncMock(spec=MessageBus) service = MessageService(bus=bus, persistence=persistence) @@ -66,3 +69,43 @@ async def test_returns_false_and_skips_audit_when_id_missing(self) -> None: repo.delete.assert_awaited_once_with("missing") audit = [e for e in events if e.get("event") == COMMUNICATION_MESSAGE_DELETED] assert audit == [] + + +class TestMessageServiceGetMessage: + """``get_message`` is a single indexed point read, not a scan.""" + + async def test_delegates_to_get_by_id_and_never_scans_history( + self, + ) -> None: + sentinel = object() + repo = mock_of[MessageRepository]() + repo.get_by_id.return_value = sentinel + persistence = SimpleNamespace(messages=repo) + service = MessageService( + bus=AsyncMock(spec=MessageBus), + persistence=persistence, + ) + + result = await service.get_message( + channel=NotBlankStr("chan1"), + message_id="msg-9", + ) + + assert result is sentinel + repo.get_by_id.assert_awaited_once_with("chan1", "msg-9") + repo.get_history.assert_not_awaited() + + async def test_returns_none_when_repo_returns_none(self) -> None: + repo = mock_of[MessageRepository]() + repo.get_by_id.return_value = None + service = MessageService( + bus=AsyncMock(spec=MessageBus), + persistence=SimpleNamespace(messages=repo), + ) + + result = await service.get_message( + channel=NotBlankStr("chan1"), + message_id="nope", + ) + + assert result is None diff --git a/tests/unit/core/test_company_reporting.py b/tests/unit/core/test_company_reporting.py index fa48a07ac8..1ac00279d1 100644 --- a/tests/unit/core/test_company_reporting.py +++ b/tests/unit/core/test_company_reporting.py @@ -156,8 +156,15 @@ def test_computed_keys_in_model_dump_roundtrip(self) -> None: assert data["subordinate_key"] == "backend-senior" assert data["supervisor_key"] == "lead-001" - # Round-trip: computed fields are ignored on input - r2 = ReportingLine.model_validate(data) + # Under extra="forbid", computed fields cannot round-trip as + # inputs; recompute them by feeding back only the stored fields. + r2 = ReportingLine.model_validate( + { + k: v + for k, v in data.items() + if k not in {"subordinate_key", "supervisor_key"} + }, + ) assert r2.subordinate_key == "backend-senior" assert r2.supervisor_key == "lead-001" diff --git a/tests/unit/engine/workflow/test_subworkflow_registry.py b/tests/unit/engine/workflow/test_subworkflow_registry.py index 4c08eb662c..b208b02846 100644 --- a/tests/unit/engine/workflow/test_subworkflow_registry.py +++ b/tests/unit/engine/workflow/test_subworkflow_registry.py @@ -26,6 +26,7 @@ SubworkflowSummary, ) from synthorg.engine.workflow.subworkflow_registry import SubworkflowRegistry +from synthorg.persistence._generics import DEFAULT_PAGE_SIZE from synthorg.persistence.subworkflow_protocol import SubworkflowRepository _DEFAULT_TS = datetime(2026, 4, 1, 12, 0, 0, tzinfo=UTC) @@ -151,12 +152,24 @@ async def list_summaries( ) return tuple(summaries)[:limit] - async def search(self, query: str) -> tuple[SubworkflowSummary, ...]: + async def search( + self, + query: str, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, + ) -> tuple[SubworkflowSummary, ...]: q = query.lower() - summaries = await self.list_summaries() - return tuple( - s for s in summaries if q in s.name.lower() or q in s.description.lower() + # Fetch the full candidate set before filtering: the default + # page cap would pre-truncate matches beyond the first page. + summaries = await self.list_summaries( + limit=max(len(self._rows), DEFAULT_PAGE_SIZE), + ) + matched = sorted( + (s for s in summaries if q in s.name.lower() or q in s.description.lower()), + key=lambda s: s.subworkflow_id, ) + return tuple(matched[offset : offset + limit]) async def delete( self, @@ -182,13 +195,24 @@ async def find_parents( self, subworkflow_id: str, version: str | None = None, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, ) -> tuple[ParentReference, ...]: - matching = [ - p - for p in self._parents.get(subworkflow_id, []) - if version is None or p.pinned_version == version - ] - return tuple(matching) + matching = sorted( + ( + p + for p in self._parents.get(subworkflow_id, []) + if version is None or p.pinned_version == version + ), + key=lambda r: ( + r.parent_type, + r.parent_id, + r.node_id, + r.pinned_version, + ), + ) + return tuple(matching[offset : offset + limit]) def add_parent(self, subworkflow_id: str, parent: ParentReference) -> None: """Test helper to inject a parent reference.""" diff --git a/tests/unit/memory/test_service.py b/tests/unit/memory/test_service.py index 9499f89e4c..809de68e53 100644 --- a/tests/unit/memory/test_service.py +++ b/tests/unit/memory/test_service.py @@ -11,7 +11,6 @@ import pytest -from synthorg.core.persistence_errors import QueryError from synthorg.core.types import NotBlankStr from synthorg.memory.embedding.fine_tune_models import ( CheckpointRecord, @@ -485,7 +484,9 @@ async def test_rollback_returns_success_when_artifacts_consistent( class TestMemoryServiceReReadFailure: - """``deploy`` detects missing-after-write and raises ``QueryError``.""" + """``deploy`` maps a vanished-after-activation row to the contracted + ``CheckpointNotFoundError`` (a concurrent delete is the only + realistic cause) rather than a generic ``QueryError``.""" async def test_deploy_raises_when_activation_row_vanishes(self) -> None: class _VanishingRepo(_FakeCheckpointRepo): @@ -514,7 +515,7 @@ async def set_active(self, checkpoint_id: str) -> None: run_repo=_FakeRunRepo(), settings_service=None, ) - with pytest.raises(QueryError): + with pytest.raises(CheckpointNotFoundError): await service.deploy_checkpoint(NotBlankStr("a")) diff --git a/tests/unit/persistence/_shared/test_pagination.py b/tests/unit/persistence/_shared/test_pagination.py new file mode 100644 index 0000000000..1d67b17e31 --- /dev/null +++ b/tests/unit/persistence/_shared/test_pagination.py @@ -0,0 +1,75 @@ +"""Unit tests for the pagination drain helpers. + +``collect_all`` / ``collect_all_mapping`` reassemble the complete +result of a now-paginated repo method for the callers that genuinely +need the full set (boot-time rehydration, drift detection, +referential-integrity checks) while every underlying query stays +bounded. +""" + +import pytest + +from synthorg.core.persistence_errors import QueryError +from synthorg.persistence._shared import collect_all, collect_all_mapping + +pytestmark = pytest.mark.unit + + +class TestCollectAll: + async def test_drains_every_page_in_order(self) -> None: + rows = tuple(range(250)) + calls: list[tuple[int, int]] = [] + + async def fetch(limit: int, offset: int) -> tuple[int, ...]: + calls.append((limit, offset)) + return rows[offset : offset + limit] + + result = await collect_all(fetch, page_size=100) + + assert result == rows + # 100 + 100 + 50 -> a short final page terminates the sweep. + assert calls == [(100, 0), (100, 100), (100, 200)] + + async def test_exact_multiple_stops_on_empty_page(self) -> None: + rows = tuple(range(200)) + calls: list[tuple[int, int]] = [] + + async def fetch(limit: int, offset: int) -> tuple[int, ...]: + calls.append((limit, offset)) + return rows[offset : offset + limit] + + # 200 rows / page 100 -> two full pages then an empty page. + assert await collect_all(fetch, page_size=100) == rows + # The terminating empty fetch at offset 200 must happen, else + # an exact-multiple source never stops. + assert calls == [(100, 0), (100, 100), (100, 200)] + + async def test_empty_source_returns_empty_tuple(self) -> None: + async def fetch(limit: int, offset: int) -> tuple[int, ...]: + return () + + assert await collect_all(fetch, page_size=10) == () + + +class TestCollectAllMapping: + async def test_merges_disjoint_pages(self) -> None: + full = {f"e{i:03d}": i for i in range(120)} + ordered = sorted(full.items()) + + async def fetch(limit: int, offset: int) -> dict[str, int]: + return dict(ordered[offset : offset + limit]) + + assert await collect_all_mapping(fetch, page_size=50) == full + + async def test_rejects_non_positive_page_size(self) -> None: + async def fetch(limit: int, offset: int) -> dict[str, int]: + return {} + + with pytest.raises(QueryError): + await collect_all_mapping(fetch, page_size=0) + + async def test_empty_source_returns_empty_dict(self) -> None: + async def fetch(limit: int, offset: int) -> dict[str, int]: + return {} + + assert await collect_all_mapping(fetch, page_size=10) == {} diff --git a/tests/unit/persistence/test_protocol.py b/tests/unit/persistence/test_protocol.py index 58f49ad46b..5576ed4ded 100644 --- a/tests/unit/persistence/test_protocol.py +++ b/tests/unit/persistence/test_protocol.py @@ -171,6 +171,13 @@ async def query( async def purge_before(self, threshold: Any) -> int: return 0 + async def get_by_id( + self, + channel: str, + message_id: str, + ) -> Message | None: + return None + async def delete(self, message_id: str) -> bool: return False diff --git a/tests/unit/scripts/test_check_frozen_model_extra_forbid.py b/tests/unit/scripts/test_check_frozen_model_extra_forbid.py new file mode 100644 index 0000000000..b920cd08d0 --- /dev/null +++ b/tests/unit/scripts/test_check_frozen_model_extra_forbid.py @@ -0,0 +1,122 @@ +"""Self-tests for the project-wide ``frozen-extra-forbid`` gate. + +Pins the gate contract: every frozen ``ConfigDict`` model needs +``extra="forbid"`` unless it declares a ``@computed_field`` (automatic +section-8 carve-out) or carries a reasoned per-line opt-out. +""" + +import importlib.util +from pathlib import Path +from typing import cast + +import pytest + +pytestmark = pytest.mark.unit + +_GATE_PATH = ( + Path(__file__).resolve().parents[3] + / "scripts" + / "check_frozen_model_extra_forbid.py" +) + + +def _load_gate() -> object: + spec = importlib.util.spec_from_file_location( + "_frozen_extra_forbid_gate", + _GATE_PATH, + ) + assert spec is not None + assert spec.loader is not None + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def _walk(tmp_path: Path, source: str) -> list[tuple[Path, int, str]]: + gate = _load_gate() + target = tmp_path / "mod.py" + target.write_text(source, encoding="utf-8") + result = gate._walk(target) # type: ignore[attr-defined] + return cast("list[tuple[Path, int, str]]", result) + + +def test_frozen_with_forbid_passes(tmp_path: Path) -> None: + src = ( + "from pydantic import BaseModel, ConfigDict\n\n\n" + "class Ok(BaseModel):\n" + ' model_config = ConfigDict(frozen=True, extra="forbid")\n' + ) + assert _walk(tmp_path, src) == [] + + +def test_frozen_without_forbid_is_violation(tmp_path: Path) -> None: + src = ( + "from pydantic import BaseModel, ConfigDict\n\n\n" + "class Bad(BaseModel):\n" + " model_config = ConfigDict(frozen=True)\n" + ) + violations = _walk(tmp_path, src) + assert len(violations) == 1 + assert violations[0][2] == "Bad" + + +def test_computed_field_is_auto_exempt(tmp_path: Path) -> None: + src = ( + "from pydantic import BaseModel, ConfigDict, computed_field\n\n\n" + "class Derived(BaseModel):\n" + " model_config = ConfigDict(frozen=True)\n\n" + " @computed_field\n" + " @property\n" + " def x(self) -> int:\n" + " return 1\n" + ) + assert _walk(tmp_path, src) == [] + + +def test_optout_with_reason_passes(tmp_path: Path) -> None: + src = ( + "from pydantic import BaseModel, ConfigDict\n\n\n" + "class Allowed(BaseModel): " + "# lint-allow: frozen-extra-forbid -- provider keys vary\n" + ' model_config = ConfigDict(frozen=True, extra="allow")\n' + ) + assert _walk(tmp_path, src) == [] + + +def test_bare_optout_is_violation(tmp_path: Path) -> None: + src = ( + "from pydantic import BaseModel, ConfigDict\n\n\n" + "class BareOptOut(BaseModel): # lint-allow: frozen-extra-forbid\n" + ' model_config = ConfigDict(frozen=True, extra="allow")\n' + ) + violations = _walk(tmp_path, src) + assert len(violations) == 1 + assert violations[0][2] == "BareOptOut" + + +def test_non_frozen_model_is_ignored(tmp_path: Path) -> None: + src = ( + "from pydantic import BaseModel, ConfigDict\n\n\n" + "class Mutable(BaseModel):\n" + " model_config = ConfigDict(frozen=False)\n" + ) + assert _walk(tmp_path, src) == [] + + +def test_last_write_wins(tmp_path: Path) -> None: + """A class cannot strict-config early then override it later.""" + src = ( + "from pydantic import BaseModel, ConfigDict\n\n\n" + "class Sneaky(BaseModel):\n" + ' model_config = ConfigDict(frozen=True, extra="forbid")\n' + " model_config = ConfigDict(frozen=True)\n" + ) + violations = _walk(tmp_path, src) + assert len(violations) == 1 + assert violations[0][2] == "Sneaky" + + +def test_real_codebase_is_compliant() -> None: + """The gate must be green against the actual tree (no regressions).""" + gate = _load_gate() + assert gate.main() == 0 # type: ignore[attr-defined] diff --git a/tests/unit/telemetry/test_collector.py b/tests/unit/telemetry/test_collector.py index 9dd54f8a2a..8fdaa22270 100644 --- a/tests/unit/telemetry/test_collector.py +++ b/tests/unit/telemetry/test_collector.py @@ -836,3 +836,72 @@ def test_looks_like_ci_uses_os_environ_when_none( monkeypatch.delenv("CI", raising=False) assert _looks_like_ci(None) is False + + +class TestPeerReadExponentialBackoff: + """``_read_peer_deployment_id`` waits out a slow peer write with + exponential backoff (5 / 10 / 20 ms), not a flat 5 ms.""" + + def test_backoff_doubles_per_attempt_and_returns_late_write( + self, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + import time as _time_mod + + from synthorg.telemetry import collector as collector_mod + + id_path = tmp_path / "deployment_id" + id_path.write_text("", encoding="utf-8") + valid_uuid = "11111111-2222-4333-8444-555555555555" + sleeps: list[float] = [] + + def _fake_sleep(seconds: float) -> None: + sleeps.append(round(seconds, 6)) + # The peer finishes its write during the second backoff + # window, so the third read attempt succeeds. + if len(sleeps) == 2: + id_path.write_text(valid_uuid, encoding="utf-8") + + # ``collector`` does ``import time`` then ``time.sleep(...)``; + # patching the stdlib module's ``sleep`` affects that same + # reference without poking a not-explicitly-exported attribute. + monkeypatch.setattr(_time_mod, "sleep", _fake_sleep) + + result = collector_mod._read_peer_deployment_id(str(id_path)) + + assert result == valid_uuid + # 2 sleeps for the 2 empty reads; exponential 5 ms -> 10 ms + # (base * 2**attempt), never a flat 5/5. + assert sleeps == [0.005, 0.01] + + def test_exhausted_peer_read_returns_none_after_full_backoff( + self, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """A peer that never finishes its write exhausts all attempts. + + The file stays empty for every attempt, so the helper backs + off once per attempt and finally returns ``None`` (the caller + then unlinks + repairs via the atomic-create branch). + """ + import time as _time_mod + + from synthorg.telemetry import collector as collector_mod + + id_path = tmp_path / "deployment_id" + id_path.write_text("", encoding="utf-8") + sleeps: list[float] = [] + monkeypatch.setattr( + _time_mod, + "sleep", + lambda seconds: sleeps.append(round(seconds, 6)), + ) + + result = collector_mod._read_peer_deployment_id(str(id_path)) + + assert result is None + # One backoff per attempt, doubling: 5 / 10 / 20 ms. + assert sleeps == [0.005, 0.01, 0.02] + assert len(sleeps) == collector_mod._PEER_READ_RETRY_ATTEMPTS diff --git a/web/src/__tests__/stores/subworkflows.test.ts b/web/src/__tests__/stores/subworkflows.test.ts index d53a9f2da6..9286fe5794 100644 --- a/web/src/__tests__/stores/subworkflows.test.ts +++ b/web/src/__tests__/stores/subworkflows.test.ts @@ -4,12 +4,14 @@ import { useSubworkflowsStore } from '@/stores/subworkflows' import { useToastStore } from '@/stores/toast' import { apiError, - apiSuccess, emptyPage, paginatedFor, voidSuccess, } from '@/mocks/handlers' -import type { listSubworkflows } from '@/api/endpoints/subworkflows' +import type { + listSubworkflows, + searchSubworkflows, +} from '@/api/endpoints/subworkflows' import type { SubworkflowSummary } from '@/api/types/workflows' import { server } from '@/test-setup' @@ -167,7 +169,11 @@ describe('fetchSubworkflows', () => { http.get('/api/v1/subworkflows/search', ({ request }) => { searchCalls += 1 searchQuery = new URL(request.url).searchParams.get('q') - return HttpResponse.json(apiSuccess([])) + return HttpResponse.json( + paginatedFor( + emptyPage(), + ), + ) }), http.get('/api/v1/subworkflows', () => { listCalls += 1 diff --git a/web/src/api/endpoints/subworkflows.ts b/web/src/api/endpoints/subworkflows.ts index 55c8fa7de8..2a1713962f 100644 --- a/web/src/api/endpoints/subworkflows.ts +++ b/web/src/api/endpoints/subworkflows.ts @@ -19,12 +19,13 @@ export async function listSubworkflows( export async function searchSubworkflows( query: string, -): Promise { - const response = await apiClient.get>( + params?: PaginationParams, +): Promise> { + const response = await apiClient.get>( '/subworkflows/search', - { params: { q: query } }, + { params: { q: query, ...params } }, ) - return unwrap(response) + return unwrapPaginated(response) } export async function listVersions( diff --git a/web/src/api/types/error-codes.gen.ts b/web/src/api/types/error-codes.gen.ts index 926909c418..f55ff4ff75 100644 --- a/web/src/api/types/error-codes.gen.ts +++ b/web/src/api/types/error-codes.gen.ts @@ -20,6 +20,9 @@ export const ErrorCode = { ARTIFACT_TOO_LARGE: 2002, TOOL_PARAMETER_ERROR: 2003, PROVIDER_TIER_COVERAGE_INSUFFICIENT: 2004, + IMMUTABLE_FIELD_MISMATCH: 2005, + CHECKPOINT_ROLLBACK_UNAVAILABLE: 2006, + CHECKPOINT_ROLLBACK_CORRUPT: 2007, RESOURCE_NOT_FOUND: 3000, RECORD_NOT_FOUND: 3001, ROUTE_NOT_FOUND: 3002, @@ -33,6 +36,10 @@ export const ErrorCode = { CONNECTION_NOT_FOUND: 3010, MODEL_NOT_FOUND: 3011, ESCALATION_NOT_FOUND: 3012, + WORKFLOW_DEFINITION_NOT_FOUND: 3013, + AB_TEST_NOT_FOUND: 3014, + BACKUP_NOT_FOUND: 3015, + MEMORY_ENTRY_NOT_FOUND: 3016, RESOURCE_CONFLICT: 4000, DUPLICATE_RECORD: 4001, VERSION_CONFLICT: 4002, @@ -42,6 +49,11 @@ export const ErrorCode = { ESCALATION_ALREADY_DECIDED: 4006, MIXED_CURRENCY_AGGREGATION: 4007, WORKFLOW_EXECUTION_ALREADY_TERMINAL: 4008, + BACKUP_IN_PROGRESS: 4009, + CHECKPOINT_OPERATION_CONFLICT: 4010, + FINE_TUNE_RUN_ACTIVE: 4011, + TRAINING_PLAN_NOT_MODIFIABLE: 4012, + BACKUP_UNRESTARTABLE: 4013, RATE_LIMITED: 5000, PER_OPERATION_RATE_LIMITED: 5001, CONCURRENCY_LIMIT_EXCEEDED: 5002, @@ -71,6 +83,11 @@ export const ErrorCode = { TOOL_EXECUTION_ERROR: 8008, FEATURE_NOT_IMPLEMENTED: 8009, ARTIFACT_NO_STORAGE_BACKEND: 8010, + AGENT_IDENTITY_ROLLBACK_FAILED: 8011, + BACKUP_RESTORE_FAILED: 8012, + BACKUP_MANIFEST_ERROR: 8013, + SETTINGS_ENCRYPTION_ERROR: 8014, + SINK_CONFIG_VALIDATION_ERROR: 8015, } as const; export type ErrorCode = (typeof ErrorCode)[keyof typeof ErrorCode]; diff --git a/web/src/api/types/openapi.gen.ts b/web/src/api/types/openapi.gen.ts index e8efaf9a33..557e1f0a3e 100644 --- a/web/src/api/types/openapi.gen.ts +++ b/web/src/api/types/openapi.gen.ts @@ -5382,14 +5382,6 @@ export type components = { /** @description Whether the request succeeded (derived from ``error``). */ readonly success: boolean; }; - /** ApiResponse[tuple[SubworkflowSummary, ...]] */ - readonly "ApiResponse_tuple_SubworkflowSummary_..._": { - readonly data: readonly components["schemas"]["SubworkflowSummary"][] | null; - readonly error: string | null; - readonly error_detail: components["schemas"]["ErrorDetail"] | null; - /** @description Whether the request succeeded (derived from ``error``). */ - readonly success: boolean; - }; /** ApiResponse[tuple[TeamResponse, ...]] */ readonly "ApiResponse_tuple_TeamResponse_..._": { readonly data: readonly components["schemas"]["TeamResponse"][] | null; @@ -7300,7 +7292,7 @@ export type components = { * 8xxx = internal. * @enum {integer} */ - readonly ErrorCode: 1000 | 1001 | 1002 | 1003 | 1004 | 1005 | 1006 | 1007 | 1008 | 1009 | 2000 | 2001 | 2002 | 2003 | 2004 | 3000 | 3001 | 3002 | 3003 | 3004 | 3005 | 3006 | 3007 | 3008 | 3009 | 3010 | 3011 | 3012 | 4000 | 4001 | 4002 | 4003 | 4004 | 4005 | 4006 | 4007 | 4008 | 5000 | 5001 | 5002 | 6000 | 6001 | 6002 | 6003 | 6004 | 7000 | 7001 | 7002 | 7003 | 7004 | 7005 | 7006 | 7007 | 7008 | 7009 | 8000 | 8001 | 8002 | 8003 | 8004 | 8005 | 8006 | 8007 | 8008 | 8009 | 8010; + readonly ErrorCode: 1000 | 1001 | 1002 | 1003 | 1004 | 1005 | 1006 | 1007 | 1008 | 1009 | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 3000 | 3001 | 3002 | 3003 | 3004 | 3005 | 3006 | 3007 | 3008 | 3009 | 3010 | 3011 | 3012 | 3013 | 3014 | 3015 | 3016 | 4000 | 4001 | 4002 | 4003 | 4004 | 4005 | 4006 | 4007 | 4008 | 4009 | 4010 | 4011 | 4012 | 4013 | 5000 | 5001 | 5002 | 6000 | 6001 | 6002 | 6003 | 6004 | 7000 | 7001 | 7002 | 7003 | 7004 | 7005 | 7006 | 7007 | 7008 | 7009 | 8000 | 8001 | 8002 | 8003 | 8004 | 8005 | 8006 | 8007 | 8008 | 8009 | 8010 | 8011 | 8012 | 8013 | 8014 | 8015; /** ErrorDetail */ readonly ErrorDetail: { readonly detail: string; @@ -12581,7 +12573,9 @@ export interface operations { readonly query?: never; readonly header?: never; readonly path: { + /** @description Resource identifier */ readonly agent_id: string; + /** @description Resource identifier */ readonly memory_id: string; }; readonly cookie?: never; @@ -12723,6 +12717,7 @@ export interface operations { readonly query?: never; readonly header?: never; readonly path: { + /** @description Resource identifier */ readonly checkpoint_id: string; }; readonly cookie?: never; @@ -12752,6 +12747,7 @@ export interface operations { readonly query?: never; readonly header?: never; readonly path: { + /** @description Resource identifier */ readonly checkpoint_id: string; }; readonly cookie?: never; @@ -12782,6 +12778,7 @@ export interface operations { readonly query?: never; readonly header?: never; readonly path: { + /** @description Resource identifier */ readonly checkpoint_id: string; }; readonly cookie?: never; @@ -12843,6 +12840,7 @@ export interface operations { readonly query?: never; readonly header?: never; readonly path: { + /** @description Resource identifier */ readonly run_id: string; }; readonly cookie?: never; @@ -20440,6 +20438,10 @@ export interface operations { readonly ApiV1SubworkflowsSearchSearchSubworkflows: { readonly parameters: { readonly query: { + /** @description Opaque pagination cursor returned by the previous page */ + readonly cursor?: string | null; + /** @description Page size (default 50, max 200) */ + readonly limit?: number; /** @description Search substring */ readonly q: string; }; @@ -20455,7 +20457,7 @@ export interface operations { readonly [name: string]: unknown; }; content: { - readonly "application/json": components["schemas"]["ApiResponse_tuple_SubworkflowSummary_..._"]; + readonly "application/json": components["schemas"]["PaginatedResponse_SubworkflowSummary_"]; }; }; readonly 400: components["responses"]["BadRequest"]; @@ -21467,7 +21469,7 @@ export interface operations { readonly [name: string]: unknown; }; content: { - readonly "application/json": unknown; + readonly "application/json": string; }; }; readonly 400: components["responses"]["BadRequest"]; diff --git a/web/src/mocks/handlers/subworkflows.ts b/web/src/mocks/handlers/subworkflows.ts index 1d6f36c4de..d2fcd08cfb 100644 --- a/web/src/mocks/handlers/subworkflows.ts +++ b/web/src/mocks/handlers/subworkflows.ts @@ -34,7 +34,9 @@ export const subworkflowsHandlers = [ ), ), http.get('/api/v1/subworkflows/search', () => - HttpResponse.json(successFor([])), + HttpResponse.json( + paginatedFor(emptyPage()), + ), ), http.get('/api/v1/subworkflows/:id/versions', () => HttpResponse.json(paginatedFor(emptyPage())), diff --git a/web/src/stores/subworkflows.ts b/web/src/stores/subworkflows.ts index 3ca5d063ae..29cd2691c1 100644 --- a/web/src/stores/subworkflows.ts +++ b/web/src/stores/subworkflows.ts @@ -8,6 +8,7 @@ import { createLogger } from '@/lib/logger' import { useToastStore } from '@/stores/toast' import { getErrorMessage } from '@/utils/errors' import { sanitizeForLog } from '@/utils/logging' +import type { PaginatedResult } from '@/api/client' import type { SubworkflowSummary } from '@/api/types/workflows' const log = createLogger('subworkflows') @@ -63,30 +64,24 @@ export const useSubworkflowsStore = create((set, get) => ({ })) try { const query = get().searchQuery.trim() - if (query) { - // Search endpoint is non-paginated: a search returns matches - // across the whole registry, and the user expects to see all - // matches, not a single page. - const results = await searchSubworkflows(query) - if (isStaleRequest(token)) return - set(() => ({ - subworkflows: results, - listLoading: false, - subworkflowsTruncated: false, - })) - return - } - // Drain cursored pages eagerly so the page can render a - // numeric pager via useListPagination instead of a "Load More" - // button. MAX_PAGES bounds the worst case. + // Both the unfiltered list and the search endpoint are + // cursor-paginated; drain cursored pages eagerly so the page can + // render a numeric pager via useListPagination instead of a + // "Load More" button. MAX_PAGES bounds the worst case. The user + // expects to see every match, so a search drains the same way. const collected: SubworkflowSummary[] = [] let cursor: string | null = null let truncated = false for (let pageIndex = 0; pageIndex < MAX_PAGES; pageIndex += 1) { - const page = await listSubworkflows({ - cursor: cursor ?? undefined, - limit: PAGE_SIZE, - }) + const page: PaginatedResult = query + ? await searchSubworkflows(query, { + cursor: cursor ?? undefined, + limit: PAGE_SIZE, + }) + : await listSubworkflows({ + cursor: cursor ?? undefined, + limit: PAGE_SIZE, + }) if (isStaleRequest(token)) return collected.push(...page.data) if (!page.hasMore || !page.nextCursor) break