From 71bb26c175da874c7c5801c93311078ff3d8a7a3 Mon Sep 17 00:00:00 2001 From: Aurelio <19254254+Aureliolo@users.noreply.github.com> Date: Wed, 18 Mar 2026 23:46:27 +0100 Subject: [PATCH 1/6] fix: activate structured logging pipeline -- wire 8-sink system, suppress spam, integrate Uvicorn Wire bootstrap_logging() into create_app() so the comprehensive 8-sink structured logging system (console + synthorg.log + audit.log + errors.log + agent_activity.log + cost_usage.log + debug.log + access.log) actually activates at startup. Previously all logs fell through to Uvicorn's default stdout handler despite the full pipeline being implemented. - Wire bootstrap_logging() at top of create_app() with SYNTHORG_LOG_DIR env var support for Docker volume paths (#559) - Disable Uvicorn access log (RequestLoggingMiddleware already provides richer structured access logging) and add access.log sink (#560) - Replace per-timeout channel debug spam (41% of all log output) with periodic batch summary every 60 idle polls (#561) - Add SYNTHORG_LOG_LEVEL env var to override console sink level (#562) - Add Observability and Logging section to operations design spec (#563) Closes #559, closes #560, closes #561, closes #562, closes #563 Co-Authored-By: Claude Opus 4.6 (1M context) --- docker/.env.example | 6 + docker/backend/Dockerfile | 6 +- docker/compose.yml | 1 + docs/design/operations.md | 110 ++++++++++++++++++ src/synthorg/api/app.py | 40 +++++++ src/synthorg/api/server.py | 2 + src/synthorg/communication/bus_memory.py | 23 ++-- src/synthorg/observability/config.py | 7 ++ .../observability/events/communication.py | 1 + src/synthorg/observability/setup.py | 47 +++++++- src/synthorg/observability/sinks.py | 1 + tests/unit/api/test_app.py | 6 + tests/unit/observability/test_config.py | 4 +- tests/unit/observability/test_setup.py | 4 +- 14 files changed, 242 insertions(+), 16 deletions(-) diff --git a/docker/.env.example b/docker/.env.example index ead26fa080..c67c741a18 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -26,6 +26,12 @@ SYNTHORG_PERSISTENCE_BACKEND=sqlite # Memory backend (currently: mem0) SYNTHORG_MEMORY_BACKEND=mem0 +# Log file directory (inside container: /data/logs, persisted via volume) +SYNTHORG_LOG_DIR=/data/logs + +# Console log level override (valid: debug, info, warning, error, critical) +# SYNTHORG_LOG_LEVEL=info + # --- Container Networking ---------------------------------------------------- # Host port for the backend API BACKEND_PORT=8000 diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 0a16f8b011..42b3933828 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -120,6 +120,10 @@ HEALTHCHECK --interval=10s --timeout=5s --retries=3 --start-period=30s \ # on Python 3.14+ (harmless import-time check, not actual v1 usage). # Tracked: Litestar v3 will drop the v1 compat layer entirely (#551). ENV PYTHONWARNINGS="ignore::UserWarning:pydantic.v1" +# Disable Uvicorn's default access log -- RequestLoggingMiddleware provides +# richer structured access logging via structlog. log_config=None is set +# in server.py; this env var covers the --factory CMD path. +ENV UVICORN_ACCESS_LOG=0 ENTRYPOINT [] -CMD ["/app/.venv/bin/uvicorn", "synthorg.api.app:create_app", "--factory"] +CMD ["/app/.venv/bin/uvicorn", "synthorg.api.app:create_app", "--factory", "--no-access-log"] diff --git a/docker/compose.yml b/docker/compose.yml index 8c41bb350a..7d5ff7885d 100644 --- a/docker/compose.yml +++ b/docker/compose.yml @@ -19,6 +19,7 @@ services: SYNTHORG_PORT: "${SYNTHORG_PORT:-8000}" SYNTHORG_DB_PATH: "/data/synthorg.db" SYNTHORG_MEMORY_DIR: "/data/memory" + SYNTHORG_LOG_DIR: "${SYNTHORG_LOG_DIR:-/data/logs}" # Disable Mem0 telemetry by default. Override in .env to enable. MEM0_TELEMETRY: "${MEM0_TELEMETRY:-false}" # Bridge SYNTHORG_* to uvicorn's native env vars (exec form CMD, no shell) diff --git a/docs/design/operations.md b/docs/design/operations.md index 6c7d403e5f..3eddf39acb 100644 --- a/docs/design/operations.md +++ b/docs/design/operations.md @@ -1133,3 +1133,113 @@ Backup settings live in the `backup` namespace with runtime editability via `Bac | `GET` | `/api/v1/admin/backups/{id}` | Get backup details | | `DELETE` | `/api/v1/admin/backups/{id}` | Delete a specific backup | | `POST` | `/api/v1/admin/backups/restore` | Restore from backup (requires `confirm=true`) | + +## Observability and Logging + +Structured logging pipeline built on **structlog** + stdlib, with automatic sensitive field +redaction, async-safe correlation tracking, and per-domain log routing. + +### Sink Layout + +Eight default sinks, activated at startup via `bootstrap_logging()`: + +| Sink | Type | Level | Format | Routes | Description | +|------|------|-------|--------|--------|-------------| +| Console | stderr | INFO | Colored text | All loggers | Human-readable development output | +| `synthorg.log` | File | INFO | JSON | All loggers | Main application log (catch-all) | +| `audit.log` | File | INFO | JSON | `synthorg.security.*` | Security events only | +| `errors.log` | File | ERROR | JSON | All loggers | Errors and above only | +| `agent_activity.log` | File | DEBUG | JSON | `synthorg.engine.*`, `synthorg.core.*` | Agent execution and task lifecycle | +| `cost_usage.log` | File | INFO | JSON | `synthorg.budget.*`, `synthorg.providers.*` | Cost records and provider calls | +| `debug.log` | File | DEBUG | JSON | All loggers | Full debug trace (catch-all) | +| `access.log` | File | DEBUG | JSON | `synthorg.api.*` | HTTP request/response access log | + +Logger name routing is implemented via `_LoggerNameFilter` on file handlers. Sinks without +explicit routing are catch-all (accept all loggers at their configured level). + +### Log Directory + +- **Docker**: `/data/logs/` (under the `synthorg-data` volume, persisted across restarts) +- **Local dev**: `logs/` relative to working directory (default) +- **Override**: `SYNTHORG_LOG_DIR` env var + +### Rotation + +File sinks use `RotatingFileHandler` by default (10 MB max, 5 backup files). Alternative: +`WatchedFileHandler` for external logrotate (`rotation.strategy: external` in config). + +### Sensitive Field Redaction + +The `sanitize_sensitive_fields` processor automatically redacts values for keys matching: +`password`, `secret`, `token`, `api_key`, `api_secret`, `authorization`, `credential`, +`private_key`, `bearer`, `session`. Redaction applies at all nesting depths in structured +log events. Redacted values are replaced with `"[REDACTED]"`. + +### Correlation Tracking + +Three correlation IDs propagated via `contextvars` (async-safe): + +- **`request_id`**: Bound per HTTP request by `RequestLoggingMiddleware`. Links all log + events during a single API call. +- **`task_id`**: Bound per task execution. Links agent activity to a specific task. +- **`agent_id`**: Bound per agent execution context. + +All three are automatically injected into every log event by `merge_contextvars` in the +structlog processor chain. + +### Per-Logger Levels + +Default levels per domain module (overridable via `LogConfig.logger_levels`): + +| Logger | Default Level | +|--------|---------------| +| `synthorg.engine` | DEBUG | +| `synthorg.memory` | DEBUG | +| `synthorg.core` | INFO | +| `synthorg.communication` | INFO | +| `synthorg.providers` | INFO | +| `synthorg.budget` | INFO | +| `synthorg.security` | INFO | +| `synthorg.tools` | INFO | +| `synthorg.api` | INFO | +| `synthorg.config` | INFO | +| `synthorg.templates` | INFO | + +### Event Taxonomy + +54 domain-specific event constant modules under `observability/events/` (one per subsystem: +api, budget, tool, git, engine, communication, etc.). Every log call uses a typed constant +(e.g., `API_REQUEST_STARTED`, `BUDGET_RECORD_ADDED`) for consistent, grep-friendly event +names. Format: `".."` (e.g., `"api.request.started"`). + +### Uvicorn Integration + +Uvicorn's default access logger is **disabled** (`access_log=False`, `log_config=None`). +HTTP access logging is handled by `RequestLoggingMiddleware`, which provides richer structured +fields (method, path, status_code, duration_ms, request_id) through structlog. Uvicorn's own +startup/error messages propagate through stdlib's root handler (which structlog wraps via +`ProcessorFormatter`). + +### Docker Logging + +Two layers of log management: + +1. **App-level** (structlog): 8 file sinks with `RotatingFileHandler` (10 MB x 5) writing + JSON to `/data/logs/`. Console sink writes colored text to stderr. +2. **Container-level** (Docker): `json-file` driver with 10 MB x 3 rotation on + stdout/stderr. Captures console sink output and any uncaught stderr. + +The layers are complementary -- app files provide structured, routed logs; Docker captures +the console stream for `docker logs` access. + +### Runtime Settings + +Two observability settings are runtime-editable via `SettingsService`: + +- `root_log_level` (enum: debug/info/warning/error/critical) -- changes the root logger level +- `enable_correlation` (boolean) -- toggles correlation ID injection + +Console sink level can also be overridden via `SYNTHORG_LOG_LEVEL` env var. + +Full sink CRUD via SettingsService (add/remove/reconfigure sinks at runtime) is planned as a +future enhancement. diff --git a/src/synthorg/api/app.py b/src/synthorg/api/app.py index 8ea399b41e..653134317c 100644 --- a/src/synthorg/api/app.py +++ b/src/synthorg/api/app.py @@ -48,6 +48,7 @@ MeetingOrchestrator, # noqa: TC001 ) from synthorg.communication.meeting.scheduler import MeetingScheduler # noqa: TC001 +from synthorg.config import bootstrap_logging from synthorg.config.schema import RootConfig from synthorg.core.approval import ApprovalItem # noqa: TC001 from synthorg.engine.coordination.service import MultiAgentCoordinator # noqa: TC001 @@ -328,6 +329,38 @@ async def on_shutdown() -> None: # to code calling get_api_config(), not to the middleware itself. +def _bootstrap_app_logging(effective_config: RootConfig) -> None: + """Activate the structured logging pipeline. + + Applies the ``SYNTHORG_LOG_DIR`` env var override (for Docker + volume paths) before calling :func:`bootstrap_logging`. + """ + log_dir = os.environ.get("SYNTHORG_LOG_DIR", "").strip() + if log_dir and effective_config.logging is not None: + patched = effective_config.model_copy( + update={ + "logging": effective_config.logging.model_copy( + update={"log_dir": log_dir}, + ), + }, + ) + bootstrap_logging(patched) + elif log_dir: + from synthorg.observability.config import ( # noqa: PLC0415 + DEFAULT_SINKS, + LogConfig, + ) + + bootstrap_logging( + RootConfig( + company_name=effective_config.company_name, + logging=LogConfig(sinks=DEFAULT_SINKS, log_dir=log_dir), + ), + ) + else: + bootstrap_logging(effective_config) + + def create_app( # noqa: PLR0913 *, config: RootConfig | None = None, @@ -371,6 +404,13 @@ def create_app( # noqa: PLR0913 Configured Litestar application. """ effective_config = config or RootConfig(company_name="default") + + # Activate the structured logging pipeline (7+1 sinks) before any + # other setup so that auto-wiring, persistence, and bus logs all + # flow through the configured sinks. Respects SYNTHORG_LOG_DIR + # env var for Docker log directory override. + _bootstrap_app_logging(effective_config) + api_config = effective_config.api # Resolve runtime paths for backup service wiring. diff --git a/src/synthorg/api/server.py b/src/synthorg/api/server.py index a30b887949..9c93d25780 100644 --- a/src/synthorg/api/server.py +++ b/src/synthorg/api/server.py @@ -55,4 +55,6 @@ def run_server(config: RootConfig) -> None: reload=server.reload, ws_ping_interval=ws_ping, ws_ping_timeout=ws_timeout, + access_log=False, + log_config=None, ) diff --git a/src/synthorg/communication/bus_memory.py b/src/synthorg/communication/bus_memory.py index ce3fbc15ca..4d134b4044 100644 --- a/src/synthorg/communication/bus_memory.py +++ b/src/synthorg/communication/bus_memory.py @@ -35,12 +35,12 @@ COMM_CHANNEL_ALREADY_EXISTS, COMM_CHANNEL_CREATED, COMM_CHANNEL_NOT_FOUND, + COMM_CHANNELS_IDLE_SUMMARY, COMM_DIRECT_SENT, COMM_HISTORY_QUERIED, COMM_MESSAGE_DELIVERED, COMM_MESSAGE_PUBLISHED, COMM_RECEIVE_SHUTDOWN, - COMM_RECEIVE_TIMEOUT, COMM_RECEIVE_UNSUBSCRIBED, COMM_SEND_DIRECT_INVALID, COMM_SUBSCRIPTION_CREATED, @@ -102,6 +102,8 @@ def __init__(self, *, config: MessageBusConfig) -> None: self._waiters: dict[tuple[str, str], int] = {} self._running = False self._shutdown_event = asyncio.Event() + self._idle_poll_count: int = 0 + self._idle_summary_interval: int = 60 @property def is_running(self) -> bool: @@ -479,19 +481,20 @@ async def receive( else: self._waiters[key] = current - 1 if result is None: - await self._log_receive_null(channel_name, subscriber_id, timeout) + await self._log_receive_null(channel_name, subscriber_id) return result async def _log_receive_null( self, channel_name: str, subscriber_id: str, - timeout_seconds: float | None, ) -> None: """Log the cause when ``receive()`` returns ``None``. Acquires the lock to safely inspect bus state (queue map and shutdown flag) so the inferred reason is not racy. + For normal idle timeouts, increments a counter and emits + a periodic summary instead of per-timeout spam. """ async with self._lock: is_shutdown = self._shutdown_event.is_set() @@ -509,12 +512,14 @@ async def _log_receive_null( subscriber=subscriber_id, ) else: - logger.debug( - COMM_RECEIVE_TIMEOUT, - channel=channel_name, - subscriber=subscriber_id, - timeout=timeout_seconds, - ) + self._idle_poll_count += 1 + if self._idle_poll_count >= self._idle_summary_interval: + logger.debug( + COMM_CHANNELS_IDLE_SUMMARY, + idle_polls=self._idle_poll_count, + subscriber_count=len(self._queues), + ) + self._idle_poll_count = 0 async def _await_with_shutdown( self, diff --git a/src/synthorg/observability/config.py b/src/synthorg/observability/config.py index 24d43bc6a0..ec3b99592e 100644 --- a/src/synthorg/observability/config.py +++ b/src/synthorg/observability/config.py @@ -238,4 +238,11 @@ def _validate_log_dir_safe(self) -> Self: rotation=RotationConfig(), json_format=True, ), + SinkConfig( + sink_type=SinkType.FILE, + level=LogLevel.DEBUG, + file_path="access.log", + rotation=RotationConfig(), + json_format=True, + ), ) diff --git a/src/synthorg/observability/events/communication.py b/src/synthorg/observability/events/communication.py index 5e0b5d3d33..e3d780a7b5 100644 --- a/src/synthorg/observability/events/communication.py +++ b/src/synthorg/observability/events/communication.py @@ -51,6 +51,7 @@ COMM_RECEIVE_TIMEOUT: Final[str] = "communication.receive.timeout" COMM_RECEIVE_SHUTDOWN: Final[str] = "communication.receive.shutdown" COMM_RECEIVE_UNSUBSCRIBED: Final[str] = "communication.receive.unsubscribed" +COMM_CHANNELS_IDLE_SUMMARY: Final[str] = "communication.channels.idle_summary" # Validation COMM_MESSENGER_INVALID_AGENT: Final[str] = "communication.messenger.invalid_agent" diff --git a/src/synthorg/observability/setup.py b/src/synthorg/observability/setup.py index b5449dc00d..15333165ef 100644 --- a/src/synthorg/observability/setup.py +++ b/src/synthorg/observability/setup.py @@ -5,6 +5,7 @@ """ import logging +import os import sys from pathlib import Path from typing import Any @@ -12,7 +13,7 @@ import structlog from synthorg.observability.config import DEFAULT_SINKS, LogConfig -from synthorg.observability.enums import LogLevel +from synthorg.observability.enums import LogLevel, SinkType from synthorg.observability.processors import sanitize_sensitive_fields from synthorg.observability.sinks import build_handler @@ -156,13 +157,53 @@ def _apply_logger_levels(config: LogConfig) -> None: logging.getLogger(name).setLevel(level.value) +def _apply_console_level_override(config: LogConfig) -> LogConfig: + """Override the console sink level from ``SYNTHORG_LOG_LEVEL``. + + When the env var is set, finds the CONSOLE sink in ``config.sinks`` + and replaces its level. Invalid values fall back to INFO with a + stderr warning. + + Args: + config: Current logging configuration. + + Returns: + Possibly updated config with the console sink level overridden. + """ + raw = os.environ.get("SYNTHORG_LOG_LEVEL", "").strip().lower() + if not raw: + return config + + try: + level = LogLevel(raw.upper()) + except ValueError: + valid = ", ".join(lvl.value.lower() for lvl in LogLevel) + print( # noqa: T201 + f"WARNING: Invalid SYNTHORG_LOG_LEVEL={raw!r}. " + f"Valid values: {valid}. Falling back to INFO.", + file=sys.stderr, + ) + level = LogLevel.INFO + + new_sinks = tuple( + sink.model_copy(update={"level": level}) + if sink.sink_type == SinkType.CONSOLE + else sink + for sink in config.sinks + ) + return config.model_copy(update={"sinks": new_sinks}) + + def configure_logging(config: LogConfig | None = None) -> None: """Configure the structured logging system. Sets up structlog processor chains, stdlib handlers, and per-logger - levels. This function is **idempotent** — calling it multiple times + levels. This function is **idempotent** -- calling it multiple times replaces the previous configuration without duplicating handlers. + Respects the ``SYNTHORG_LOG_LEVEL`` env var to override the console + sink level (useful for Docker deployments). + Args: config: Logging configuration. When ``None``, uses sensible defaults with all standard sinks. @@ -170,6 +211,8 @@ def configure_logging(config: LogConfig | None = None) -> None: if config is None: config = LogConfig(sinks=DEFAULT_SINKS) + config = _apply_console_level_override(config) + # 1. Reset structlog to a clean state structlog.reset_defaults() diff --git a/src/synthorg/observability/sinks.py b/src/synthorg/observability/sinks.py index 49c2b5a91a..5ac8f4959d 100644 --- a/src/synthorg/observability/sinks.py +++ b/src/synthorg/observability/sinks.py @@ -30,6 +30,7 @@ "audit.log": ("synthorg.security.",), "cost_usage.log": ("synthorg.budget.", "synthorg.providers."), "agent_activity.log": ("synthorg.engine.", "synthorg.core."), + "access.log": ("synthorg.api.",), } ) diff --git a/tests/unit/api/test_app.py b/tests/unit/api/test_app.py index 6aaba6cbef..e607ec5a6a 100644 --- a/tests/unit/api/test_app.py +++ b/tests/unit/api/test_app.py @@ -415,6 +415,12 @@ def test_no_persistence_warns( ) -> None: """Warning is logged when no persistence is available.""" monkeypatch.delenv("SYNTHORG_DB_PATH", raising=False) + # Prevent bootstrap_logging from resetting structlog's + # capture_logs context during create_app(). + monkeypatch.setattr( + "synthorg.api.app._bootstrap_app_logging", + lambda _config: None, + ) import structlog with structlog.testing.capture_logs() as logs: diff --git a/tests/unit/observability/test_config.py b/tests/unit/observability/test_config.py index f1b166c3cc..9dc0109347 100644 --- a/tests/unit/observability/test_config.py +++ b/tests/unit/observability/test_config.py @@ -274,7 +274,7 @@ class TestDefaultSinks: """Tests for the DEFAULT_SINKS constant.""" def test_count(self) -> None: - assert len(DEFAULT_SINKS) == 7 + assert len(DEFAULT_SINKS) == 8 def test_first_is_console(self) -> None: assert DEFAULT_SINKS[0].sink_type == SinkType.CONSOLE @@ -292,4 +292,4 @@ def test_no_duplicate_file_paths(self) -> None: def test_valid_as_log_config(self) -> None: cfg = LogConfig(sinks=DEFAULT_SINKS) - assert len(cfg.sinks) == 7 + assert len(cfg.sinks) == 8 diff --git a/tests/unit/observability/test_setup.py b/tests/unit/observability/test_setup.py index a5211939f4..e429290631 100644 --- a/tests/unit/observability/test_setup.py +++ b/tests/unit/observability/test_setup.py @@ -51,7 +51,7 @@ class TestConfigureLogging: def test_default_config_creates_handlers(self) -> None: configure_logging() root = logging.getLogger() - assert len(root.handlers) == 7 + assert len(root.handlers) == 8 def test_custom_config_creates_handlers(self) -> None: configure_logging(_console_only_config()) @@ -104,7 +104,7 @@ def test_config_overrides_default_levels(self) -> None: def test_none_config_uses_defaults(self) -> None: configure_logging(None) root = logging.getLogger() - assert len(root.handlers) == 7 + assert len(root.handlers) == 8 @pytest.mark.unit From 4fe7c78fc6e9836e42cb5a1e8b55f1ac949eb591 Mon Sep 17 00:00:00 2001 From: Aurelio <19254254+Aureliolo@users.noreply.github.com> Date: Thu, 19 Mar 2026 08:03:15 +0100 Subject: [PATCH 2/6] fix: address 14 review agent findings -- tests, safety, correctness Pre-reviewed by 10 agents, 14 findings addressed: - Remove dead COMM_RECEIVE_TIMEOUT constant (no longer used) - Replace count-based idle summary with time-based interval (robust regardless of concurrent subscriber count) - Change access.log sink level from DEBUG to INFO (match intent) - Fix module docstring: "seven-sink" -> "eight-sink" - Fix design spec event module count: 54 -> 50 - Warn on stderr when SYNTHORG_LOG_LEVEL set but no CONSOLE sink - Add flush=True to all stderr warning prints - Critical sink failure (audit.log, access.log) is now a hard error - Wrap _bootstrap_app_logging in try/except with diagnostic stderr - Remove redundant UVICORN_ACCESS_LOG env var from Dockerfile - Move deferred import to top-level in app.py - Add tests for _apply_console_level_override (5 cases) - Add tests for _bootstrap_app_logging (3 branches) - Add tests for idle summary behavior (3 cases) Co-Authored-By: Claude Opus 4.6 (1M context) --- docker/backend/Dockerfile | 7 +- docs/design/operations.md | 4 +- src/synthorg/api/app.py | 19 ++-- src/synthorg/communication/bus_memory.py | 13 ++- src/synthorg/observability/config.py | 6 +- .../observability/events/communication.py | 1 - src/synthorg/observability/setup.py | 26 +++++- tests/unit/api/test_app.py | 64 +++++++++++++- tests/unit/communication/test_bus_memory.py | 44 ++++++++++ tests/unit/observability/test_setup.py | 86 ++++++++++++++++++- 10 files changed, 248 insertions(+), 22 deletions(-) diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 42b3933828..1fb147f7e2 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -120,10 +120,9 @@ HEALTHCHECK --interval=10s --timeout=5s --retries=3 --start-period=30s \ # on Python 3.14+ (harmless import-time check, not actual v1 usage). # Tracked: Litestar v3 will drop the v1 compat layer entirely (#551). ENV PYTHONWARNINGS="ignore::UserWarning:pydantic.v1" -# Disable Uvicorn's default access log -- RequestLoggingMiddleware provides -# richer structured access logging via structlog. log_config=None is set -# in server.py; this env var covers the --factory CMD path. -ENV UVICORN_ACCESS_LOG=0 ENTRYPOINT [] +# --no-access-log: Uvicorn's default access logger is disabled because +# RequestLoggingMiddleware provides richer structured access logging via +# structlog (method, path, status_code, duration_ms, request_id). CMD ["/app/.venv/bin/uvicorn", "synthorg.api.app:create_app", "--factory", "--no-access-log"] diff --git a/docs/design/operations.md b/docs/design/operations.md index 3eddf39acb..b6876fdf1f 100644 --- a/docs/design/operations.md +++ b/docs/design/operations.md @@ -1152,7 +1152,7 @@ Eight default sinks, activated at startup via `bootstrap_logging()`: | `agent_activity.log` | File | DEBUG | JSON | `synthorg.engine.*`, `synthorg.core.*` | Agent execution and task lifecycle | | `cost_usage.log` | File | INFO | JSON | `synthorg.budget.*`, `synthorg.providers.*` | Cost records and provider calls | | `debug.log` | File | DEBUG | JSON | All loggers | Full debug trace (catch-all) | -| `access.log` | File | DEBUG | JSON | `synthorg.api.*` | HTTP request/response access log | +| `access.log` | File | INFO | JSON | `synthorg.api.*` | HTTP request/response access log | Logger name routing is implemented via `_LoggerNameFilter` on file handlers. Sinks without explicit routing are catch-all (accept all loggers at their configured level). @@ -1207,7 +1207,7 @@ Default levels per domain module (overridable via `LogConfig.logger_levels`): ### Event Taxonomy -54 domain-specific event constant modules under `observability/events/` (one per subsystem: +50 domain-specific event constant modules under `observability/events/` (one per subsystem: api, budget, tool, git, engine, communication, etc.). Every log call uses a typed constant (e.g., `API_REQUEST_STARTED`, `BUDGET_RECORD_ADDED`) for consistent, grep-friendly event names. Format: `".."` (e.g., `"api.request.started"`). diff --git a/src/synthorg/api/app.py b/src/synthorg/api/app.py index 653134317c..c82a4883b0 100644 --- a/src/synthorg/api/app.py +++ b/src/synthorg/api/app.py @@ -8,6 +8,7 @@ import asyncio import contextlib import os +import sys import time from datetime import UTC, datetime from pathlib import Path @@ -56,6 +57,7 @@ from synthorg.hr.performance.tracker import PerformanceTracker # noqa: TC001 from synthorg.hr.registry import AgentRegistryService # noqa: TC001 from synthorg.observability import get_logger +from synthorg.observability.config import DEFAULT_SINKS, LogConfig from synthorg.observability.events.api import ( API_APP_SHUTDOWN, API_APP_STARTUP, @@ -346,11 +348,6 @@ def _bootstrap_app_logging(effective_config: RootConfig) -> None: ) bootstrap_logging(patched) elif log_dir: - from synthorg.observability.config import ( # noqa: PLC0415 - DEFAULT_SINKS, - LogConfig, - ) - bootstrap_logging( RootConfig( company_name=effective_config.company_name, @@ -409,7 +406,17 @@ def create_app( # noqa: PLR0913 # other setup so that auto-wiring, persistence, and bus logs all # flow through the configured sinks. Respects SYNTHORG_LOG_DIR # env var for Docker log directory override. - _bootstrap_app_logging(effective_config) + try: + _bootstrap_app_logging(effective_config) + except Exception as exc: + print( # noqa: T201 + f"CRITICAL: Failed to initialise logging pipeline: {exc}. " + "Check SYNTHORG_LOG_DIR, SYNTHORG_LOG_LEVEL, and the " + "'logging' section of your config file.", + file=sys.stderr, + flush=True, + ) + raise api_config = effective_config.api diff --git a/src/synthorg/communication/bus_memory.py b/src/synthorg/communication/bus_memory.py index 4d134b4044..b714d492c1 100644 --- a/src/synthorg/communication/bus_memory.py +++ b/src/synthorg/communication/bus_memory.py @@ -6,9 +6,10 @@ import asyncio import contextlib +import time from collections import deque from datetime import UTC, datetime -from typing import NoReturn +from typing import Final, NoReturn from synthorg.communication.channel import Channel from synthorg.communication.config import MessageBusConfig # noqa: TC001 @@ -53,6 +54,9 @@ _DM_SEPARATOR = ":" """Separator used in deterministic direct-channel names.""" +_IDLE_SUMMARY_INTERVAL_SECONDS: Final[float] = 60.0 +"""Minimum seconds between idle-channel summary log emissions.""" + def _raise_channel_not_found(channel_name: str) -> NoReturn: """Log and raise :class:`ChannelNotFoundError`.""" @@ -103,7 +107,7 @@ def __init__(self, *, config: MessageBusConfig) -> None: self._running = False self._shutdown_event = asyncio.Event() self._idle_poll_count: int = 0 - self._idle_summary_interval: int = 60 + self._last_idle_summary: float = time.monotonic() @property def is_running(self) -> bool: @@ -513,13 +517,16 @@ async def _log_receive_null( ) else: self._idle_poll_count += 1 - if self._idle_poll_count >= self._idle_summary_interval: + now = time.monotonic() + if now - self._last_idle_summary >= _IDLE_SUMMARY_INTERVAL_SECONDS: logger.debug( COMM_CHANNELS_IDLE_SUMMARY, idle_polls=self._idle_poll_count, subscriber_count=len(self._queues), + interval_seconds=round(now - self._last_idle_summary, 1), ) self._idle_poll_count = 0 + self._last_idle_summary = now async def _await_with_shutdown( self, diff --git a/src/synthorg/observability/config.py b/src/synthorg/observability/config.py index ec3b99592e..80170a51ed 100644 --- a/src/synthorg/observability/config.py +++ b/src/synthorg/observability/config.py @@ -5,8 +5,8 @@ .. note:: - ``DEFAULT_SINKS`` provides the standard seven-sink layout described - in the design spec (console + six file sinks). + ``DEFAULT_SINKS`` provides the standard eight-sink layout described + in the design spec (console + seven file sinks). """ from collections import Counter @@ -240,7 +240,7 @@ def _validate_log_dir_safe(self) -> Self: ), SinkConfig( sink_type=SinkType.FILE, - level=LogLevel.DEBUG, + level=LogLevel.INFO, file_path="access.log", rotation=RotationConfig(), json_format=True, diff --git a/src/synthorg/observability/events/communication.py b/src/synthorg/observability/events/communication.py index e3d780a7b5..f65916df11 100644 --- a/src/synthorg/observability/events/communication.py +++ b/src/synthorg/observability/events/communication.py @@ -48,7 +48,6 @@ COMM_HANDLER_INVALID: Final[str] = "communication.handler.invalid" # Receive -COMM_RECEIVE_TIMEOUT: Final[str] = "communication.receive.timeout" COMM_RECEIVE_SHUTDOWN: Final[str] = "communication.receive.shutdown" COMM_RECEIVE_UNSUBSCRIBED: Final[str] = "communication.receive.unsubscribed" COMM_CHANNELS_IDLE_SUMMARY: Final[str] = "communication.channels.idle_summary" diff --git a/src/synthorg/observability/setup.py b/src/synthorg/observability/setup.py index 15333165ef..3822b613ca 100644 --- a/src/synthorg/observability/setup.py +++ b/src/synthorg/observability/setup.py @@ -117,13 +117,21 @@ def _attach_handlers( """Build and attach a handler for each configured sink. Failures on individual sinks are logged to stderr and skipped so - that the remaining sinks can still be initialised. + that the remaining sinks can still be initialised. Critical sinks + (``audit.log``, ``access.log``) cause a hard failure if they cannot + be created -- silently dropping security audit or access records is + not acceptable. Args: config: The logging configuration. root_logger: The stdlib root logger. shared_processors: Processor chain for the foreign pre-chain. + + Raises: + RuntimeError: If a critical sink (audit or access) fails to + initialise. """ + _critical_sinks = frozenset({"audit.log", "access.log"}) log_dir = Path(config.log_dir) for sink in config.sinks: try: @@ -138,7 +146,15 @@ def _attach_handlers( f"WARNING: Failed to initialise log sink " f"{sink!r}. This sink will be skipped.", file=sys.stderr, + flush=True, ) + if sink.file_path in _critical_sinks: + msg = ( + f"Critical log sink '{sink.file_path}' could not be " + "initialised. Refusing to start with missing " + "audit/access logs." + ) + raise RuntimeError(msg) from None def _apply_logger_levels(config: LogConfig) -> None: @@ -182,6 +198,7 @@ def _apply_console_level_override(config: LogConfig) -> LogConfig: f"WARNING: Invalid SYNTHORG_LOG_LEVEL={raw!r}. " f"Valid values: {valid}. Falling back to INFO.", file=sys.stderr, + flush=True, ) level = LogLevel.INFO @@ -191,6 +208,13 @@ def _apply_console_level_override(config: LogConfig) -> LogConfig: else sink for sink in config.sinks ) + if new_sinks == config.sinks: + print( # noqa: T201 + f"WARNING: SYNTHORG_LOG_LEVEL={raw!r} set but no CONSOLE " + "sink found in config -- env var has no effect.", + file=sys.stderr, + flush=True, + ) return config.model_copy(update={"sinks": new_sinks}) diff --git a/tests/unit/api/test_app.py b/tests/unit/api/test_app.py index e607ec5a6a..22c5d2082f 100644 --- a/tests/unit/api/test_app.py +++ b/tests/unit/api/test_app.py @@ -6,12 +6,14 @@ from litestar import Litestar from litestar.testing import TestClient -from synthorg.api.app import create_app +from synthorg.api.app import _bootstrap_app_logging, create_app from synthorg.api.middleware import _SECURITY_HEADERS from synthorg.api.state import AppState from synthorg.budget.tracker import CostTracker from synthorg.communication.bus_memory import InMemoryMessageBus +from synthorg.config.schema import RootConfig from synthorg.engine.task_engine import TaskEngine +from synthorg.observability.config import DEFAULT_SINKS, LogConfig @pytest.mark.unit @@ -909,3 +911,63 @@ def failing_init(self: Any, **kwargs: Any) -> None: task_engine=None, provider_registry=None, ) + + +@pytest.mark.unit +class TestBootstrapAppLogging: + """Tests for _bootstrap_app_logging env var branches.""" + + def test_no_log_dir_calls_bootstrap( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Without SYNTHORG_LOG_DIR, bootstrap_logging is called unchanged.""" + monkeypatch.delenv("SYNTHORG_LOG_DIR", raising=False) + calls: list[object] = [] + monkeypatch.setattr( + "synthorg.api.app.bootstrap_logging", + calls.append, + ) + config = RootConfig(company_name="test-co") + _bootstrap_app_logging(config) + assert len(calls) == 1 + assert calls[0] is config + + def test_log_dir_with_existing_logging_config( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """SYNTHORG_LOG_DIR overrides log_dir in existing logging config.""" + monkeypatch.setenv("SYNTHORG_LOG_DIR", "/custom/logs") + calls: list[RootConfig] = [] + monkeypatch.setattr( + "synthorg.api.app.bootstrap_logging", + calls.append, + ) + config = RootConfig( + company_name="test-co", + logging=LogConfig(sinks=DEFAULT_SINKS, log_dir="original"), + ) + _bootstrap_app_logging(config) + assert len(calls) == 1 + assert calls[0].logging is not None + assert calls[0].logging.log_dir == "/custom/logs" + + def test_log_dir_without_logging_config( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """SYNTHORG_LOG_DIR creates a LogConfig when none exists.""" + monkeypatch.setenv("SYNTHORG_LOG_DIR", "/data/logs") + calls: list[RootConfig] = [] + monkeypatch.setattr( + "synthorg.api.app.bootstrap_logging", + calls.append, + ) + config = RootConfig(company_name="test-co") + assert config.logging is None + _bootstrap_app_logging(config) + assert len(calls) == 1 + assert calls[0].logging is not None + assert calls[0].logging.log_dir == "/data/logs" + assert len(calls[0].logging.sinks) == len(DEFAULT_SINKS) diff --git a/tests/unit/communication/test_bus_memory.py b/tests/unit/communication/test_bus_memory.py index 1797d50582..5c5894478c 100644 --- a/tests/unit/communication/test_bus_memory.py +++ b/tests/unit/communication/test_bus_memory.py @@ -733,3 +733,47 @@ async def test_history_limit_negative_returns_empty(self) -> None: ) history = await bus.get_channel_history("#general", limit=-5) assert history == () + + +@pytest.mark.unit +class TestIdleSummary: + """Tests for the periodic idle channel summary log.""" + + async def test_idle_polls_increment_without_logging(self) -> None: + """Idle polls below the time threshold do not emit a summary.""" + bus = InMemoryMessageBus(config=_make_config()) + await bus.start() + await bus.subscribe("#general", "agent-a") + # Short timeouts to accumulate idle polls quickly. + for _ in range(5): + result = await bus.receive("#general", "agent-a", timeout=0.01) + assert result is None + assert bus._idle_poll_count == 5 + + async def test_summary_emits_after_time_interval( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Summary fires when time interval elapses.""" + import time + + bus = InMemoryMessageBus(config=_make_config()) + await bus.start() + await bus.subscribe("#general", "agent-a") + # Fast-forward the last summary timestamp to trigger summary. + bus._last_idle_summary = time.monotonic() - 61.0 + await bus.receive("#general", "agent-a", timeout=0.01) + # Counter should have been reset after summary. + assert bus._idle_poll_count == 0 + + async def test_message_delivery_still_works(self) -> None: + """Message delivery is not affected by idle summary changes.""" + bus = InMemoryMessageBus(config=_make_config()) + await bus.start() + await bus.subscribe("#general", "agent-a") + await bus.publish( + _make_message(channel="#general", content="hello"), + ) + envelope = await bus.receive("#general", "agent-a", timeout=0.5) + assert envelope is not None + assert envelope.message.content == "hello" diff --git a/tests/unit/observability/test_setup.py b/tests/unit/observability/test_setup.py index e429290631..5b10e626e0 100644 --- a/tests/unit/observability/test_setup.py +++ b/tests/unit/observability/test_setup.py @@ -13,7 +13,11 @@ from synthorg.observability.config import LogConfig, SinkConfig from synthorg.observability.correlation import bind_correlation_id from synthorg.observability.enums import LogLevel, SinkType -from synthorg.observability.setup import _DEFAULT_LOGGER_LEVELS, configure_logging +from synthorg.observability.setup import ( + _DEFAULT_LOGGER_LEVELS, + _apply_console_level_override, + configure_logging, +) pytestmark = pytest.mark.timeout(30) @@ -191,3 +195,83 @@ def test_correlation_id_in_log_output(self, tmp_path: Path) -> None: assert content record = json.loads(content) assert record["request_id"] == "test-req-123" + + +@pytest.mark.unit +class TestApplyConsoleLevelOverride: + """Tests for _apply_console_level_override.""" + + def test_no_env_var_returns_unchanged( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + monkeypatch.delenv("SYNTHORG_LOG_LEVEL", raising=False) + config = _console_only_config() + result = _apply_console_level_override(config) + assert result is config + + def test_valid_level_overrides_console_sink( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + monkeypatch.setenv("SYNTHORG_LOG_LEVEL", "warning") + config = _console_only_config() + result = _apply_console_level_override(config) + assert result.sinks[0].level == LogLevel.WARNING + + def test_invalid_level_falls_back_to_info( + self, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], + ) -> None: + monkeypatch.setenv("SYNTHORG_LOG_LEVEL", "bogus") + config = _console_only_config() + result = _apply_console_level_override(config) + assert result.sinks[0].level == LogLevel.INFO + captured = capsys.readouterr() + assert "Invalid SYNTHORG_LOG_LEVEL" in captured.err + + def test_file_sinks_unaffected( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + monkeypatch.setenv("SYNTHORG_LOG_LEVEL", "error") + config = LogConfig( + sinks=( + SinkConfig( + sink_type=SinkType.CONSOLE, + level=LogLevel.INFO, + json_format=False, + ), + SinkConfig( + sink_type=SinkType.FILE, + level=LogLevel.DEBUG, + file_path="test.log", + json_format=True, + ), + ), + ) + result = _apply_console_level_override(config) + assert result.sinks[0].level == LogLevel.ERROR + assert result.sinks[1].level == LogLevel.DEBUG + + def test_no_console_sink_warns( + self, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], + ) -> None: + monkeypatch.setenv("SYNTHORG_LOG_LEVEL", "debug") + config = LogConfig( + sinks=( + SinkConfig( + sink_type=SinkType.FILE, + level=LogLevel.INFO, + file_path="only-file.log", + json_format=True, + ), + ), + ) + result = _apply_console_level_override(config) + assert result.sinks[0].level == LogLevel.INFO + captured = capsys.readouterr() + assert "no CONSOLE sink found" in captured.err From f8ab85106590262fa7386d3534e29669d252c6b4 Mon Sep 17 00:00:00 2001 From: Aurelio <19254254+Aureliolo@users.noreply.github.com> Date: Thu, 19 Mar 2026 08:40:37 +0100 Subject: [PATCH 3/6] fix: address 24 review findings from local agents, CodeRabbit, and Gemini Security: - Validate SYNTHORG_LOG_DIR for path traversal before model_copy (model_copy bypasses Pydantic validators) - Bind exception in _attach_handlers except clause for proper chaining Bugs: - Fix false "no CONSOLE sink" warning when SYNTHORG_LOG_LEVEL matches current level (use explicit flag instead of equality check) - Reset idle poll state on bus restart to prevent immediate summary - Chain original exception in critical sink failure (from exc, not None) - Add flush=True to _clear_root_handlers stderr print - Collapse _bootstrap_app_logging from 3 branches to 2 (removes bare RootConfig construction that discarded non-logging config) Docs: - Fix redacted token in operations.md ([REDACTED] -> **REDACTED**) - Add missing synthorg.cli to Per-Logger Levels table - Update CLAUDE.md api/ and observability/ package descriptions - Add Raises section to configure_logging docstring - Fix "7+1 sinks" comment to "8 sinks" Infra: - Add SYNTHORG_LOG_DIR to CLI compose template + golden files - Add SYNTHORG_LOG_LEVEL to docker/compose.yml environment block Tests (11 new): - Critical sink failure: 4 tests (non-critical skip, audit raises, access raises, cause chain preserved) - Bootstrap failure: 1 test (create_app prints CRITICAL and re-raises) - Path traversal: 1 test (SYNTHORG_LOG_DIR with '..' raises ValueError) - Whitespace LOG_DIR: 1 test (treated as unset) - access.log routing: 1 test in TestSinkRoutingTable - Server params: 1 test (uvicorn access_log=False, log_config=None) - Log level integration: 1 test (SYNTHORG_LOG_LEVEL end-to-end) - Idle summary: mock time.monotonic() for determinism, reference _IDLE_SUMMARY_INTERVAL_SECONDS constant, add restart reset test Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 4 +- cli/internal/compose/compose.yml.tmpl | 1 + cli/testdata/compose_custom_ports.yml | 1 + cli/testdata/compose_default.yml | 1 + cli/testdata/compose_digest_pins.yml | 1 + docker/compose.yml | 1 + docs/design/operations.md | 3 +- src/synthorg/api/app.py | 50 ++++--- src/synthorg/communication/bus_memory.py | 2 + src/synthorg/observability/setup.py | 33 +++-- tests/unit/api/test_app.py | 50 +++++++ tests/unit/api/test_server.py | 33 +++++ tests/unit/communication/test_bus_memory.py | 51 ++++++- tests/unit/observability/test_setup.py | 133 ++++++++++++++++++ tests/unit/observability/test_sink_routing.py | 4 + 15 files changed, 327 insertions(+), 41 deletions(-) create mode 100644 tests/unit/api/test_server.py diff --git a/CLAUDE.md b/CLAUDE.md index 6178274345..92a75688fd 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -114,7 +114,7 @@ curl http://localhost:3000/api/v1/health # backend (via web proxy) ```text src/synthorg/ - api/ # Litestar REST + WebSocket API (controllers, guards, channels, JWT + API key + WS ticket auth, approval gate integration, coordination endpoint, collaboration endpoint, settings endpoint, provider management endpoint (CRUD + test + presets), backup endpoint, RFC 9457 structured errors (ErrorCategory, ErrorCode, ErrorDetail, ProblemDetail, CATEGORY_TITLES, category_title, category_type_uri, content negotiation)), AppState hot-reload slots (provider_registry, model_router with swap methods, provider_management), settings dispatcher lifecycle, service auto-wiring (auto_wire.py: Phase 1 at construction -- message bus/cost tracker/provider registry/task engine; Phase 2 in on_startup after persistence connects -- settings service + config resolver + provider management), lifecycle helpers (lifecycle.py: _safe_startup, _safe_shutdown, _cleanup_on_failure, _init_persistence, _try_stop) + api/ # Litestar REST + WebSocket API (controllers, guards, channels, JWT + API key + WS ticket auth, approval gate integration, coordination endpoint, collaboration endpoint, settings endpoint, provider management endpoint (CRUD + test + presets), backup endpoint, RFC 9457 structured errors (ErrorCategory, ErrorCode, ErrorDetail, ProblemDetail, CATEGORY_TITLES, category_title, category_type_uri, content negotiation)), AppState hot-reload slots (provider_registry, model_router with swap methods, provider_management), settings dispatcher lifecycle, logging bootstrap (_bootstrap_app_logging, SYNTHORG_LOG_DIR env var override, called before all other setup in create_app), service auto-wiring (auto_wire.py: Phase 1 at construction -- message bus/cost tracker/provider registry/task engine; Phase 2 in on_startup after persistence connects -- settings service + config resolver + provider management), lifecycle helpers (lifecycle.py: _safe_startup, _safe_shutdown, _cleanup_on_failure, _init_persistence, _try_stop) auth/ # Authentication subpackage (controller, service, middleware, JWT + API key + WS ticket store, models, config, secret resolution) backup/ # Backup and restore -- scheduled/manual/lifecycle backups of persistence DB, agent memory, and company config. BackupService orchestrator, BackupScheduler (periodic asyncio task), RetentionManager (count + age pruning), tar.gz compression, SHA-256 checksums, manifest tracking, validated restore with atomic rollback and safety backup handlers/ # ComponentHandler protocol + concrete handlers: PersistenceComponentHandler (SQLite VACUUM INTO), MemoryComponentHandler (copytree), ConfigComponentHandler (copy2) @@ -128,7 +128,7 @@ src/synthorg/ hr/ # HR engine: hiring, firing, onboarding, offboarding, agent registry, performance tracking (task metrics, collaboration scoring, LLM calibration sampling, collaboration overrides, trend detection), promotion/demotion (criteria evaluation, approval strategies, model mapping) memory/ # Persistent agent memory (pluggable MemoryBackend protocol), backends/ (Mem0 adapter: backends/mem0/), retrieval pipeline (ranking, RRF fusion, injection, context formatting, non-inferable filtering), shared org memory (org/), consolidation/archival (consolidation/, dual-mode density-aware archival: DensityClassifier, AbstractiveSummarizer, ExtractivePreserver, DualModeConsolidationStrategy) persistence/ # Operational data persistence — pluggable PersistenceBackend protocol, SQLite initial, SettingsRepository (namespaced settings CRUD) (see Memory & Persistence design page) - observability/ # Structured logging, correlation tracking, log sinks + observability/ # Structured logging (8-sink pipeline: console + 7 file sinks with logger-name routing), correlation tracking (request_id/task_id/agent_id via contextvars), sensitive field redaction, SYNTHORG_LOG_LEVEL env var override, critical sink enforcement (audit.log/access.log), log sinks providers/ # LLM provider abstraction (LiteLLM adapter), auth types (AuthType enum: api_key/oauth/custom_header/none), presets (ProviderPreset, PROVIDER_PRESETS for Ollama/LM Studio/OpenRouter/vLLM), runtime CRUD (management/ -- ProviderManagementService, asyncio.Lock-serialized create/update/delete/test, hot-reload of ProviderRegistry + ModelRouter via AppState swap) settings/ # Runtime-editable settings persistence (DB > env > YAML > code defaults), typed definitions (9 namespaces, including JSON type for structural data), Fernet encryption for sensitive values, config bridge (JSON serialization for Pydantic models/collections), ConfigResolver (typed scalar + structural data accessors for controllers — get_agents, get_departments, get_provider_configs with validation fallbacks to YAML), validation, registry, change notifications via message bus, SettingsSubscriber protocol (subscriber.py), SettingsChangeDispatcher (dispatcher.py, polls #settings channel, routes to subscribers, restart_required filtering) definitions/ # Per-namespace setting definitions (api, company, providers, memory, budget, security, coordination, observability, backup) diff --git a/cli/internal/compose/compose.yml.tmpl b/cli/internal/compose/compose.yml.tmpl index 9706fff6d6..6c6743f090 100644 --- a/cli/internal/compose/compose.yml.tmpl +++ b/cli/internal/compose/compose.yml.tmpl @@ -15,6 +15,7 @@ services: SYNTHORG_MEMORY_DIR: "/data/memory" SYNTHORG_PERSISTENCE_BACKEND: {{yamlStr .PersistenceBackend}} SYNTHORG_MEMORY_BACKEND: {{yamlStr .MemoryBackend}} + SYNTHORG_LOG_DIR: "/data/logs" MEM0_TELEMETRY: "false" SYNTHORG_LOG_LEVEL: {{yamlStr .LogLevel}} {{- if .JWTSecret}} diff --git a/cli/testdata/compose_custom_ports.yml b/cli/testdata/compose_custom_ports.yml index e3b9d7542a..44b237d91e 100644 --- a/cli/testdata/compose_custom_ports.yml +++ b/cli/testdata/compose_custom_ports.yml @@ -15,6 +15,7 @@ services: SYNTHORG_MEMORY_DIR: "/data/memory" SYNTHORG_PERSISTENCE_BACKEND: "sqlite" SYNTHORG_MEMORY_BACKEND: "mem0" + SYNTHORG_LOG_DIR: "/data/logs" MEM0_TELEMETRY: "false" SYNTHORG_LOG_LEVEL: "debug" SYNTHORG_JWT_SECRET: "test-secret-value" diff --git a/cli/testdata/compose_default.yml b/cli/testdata/compose_default.yml index 5cc7a2f389..46e7a713d6 100644 --- a/cli/testdata/compose_default.yml +++ b/cli/testdata/compose_default.yml @@ -15,6 +15,7 @@ services: SYNTHORG_MEMORY_DIR: "/data/memory" SYNTHORG_PERSISTENCE_BACKEND: "sqlite" SYNTHORG_MEMORY_BACKEND: "mem0" + SYNTHORG_LOG_DIR: "/data/logs" MEM0_TELEMETRY: "false" SYNTHORG_LOG_LEVEL: "info" user: "65532:65532" diff --git a/cli/testdata/compose_digest_pins.yml b/cli/testdata/compose_digest_pins.yml index 9def8cde0d..9503c45814 100644 --- a/cli/testdata/compose_digest_pins.yml +++ b/cli/testdata/compose_digest_pins.yml @@ -15,6 +15,7 @@ services: SYNTHORG_MEMORY_DIR: "/data/memory" SYNTHORG_PERSISTENCE_BACKEND: "sqlite" SYNTHORG_MEMORY_BACKEND: "mem0" + SYNTHORG_LOG_DIR: "/data/logs" MEM0_TELEMETRY: "false" SYNTHORG_LOG_LEVEL: "info" user: "65532:65532" diff --git a/docker/compose.yml b/docker/compose.yml index 7d5ff7885d..e20811d4a1 100644 --- a/docker/compose.yml +++ b/docker/compose.yml @@ -20,6 +20,7 @@ services: SYNTHORG_DB_PATH: "/data/synthorg.db" SYNTHORG_MEMORY_DIR: "/data/memory" SYNTHORG_LOG_DIR: "${SYNTHORG_LOG_DIR:-/data/logs}" + SYNTHORG_LOG_LEVEL: "${SYNTHORG_LOG_LEVEL:-info}" # Disable Mem0 telemetry by default. Override in .env to enable. MEM0_TELEMETRY: "${MEM0_TELEMETRY:-false}" # Bridge SYNTHORG_* to uvicorn's native env vars (exec form CMD, no shell) diff --git a/docs/design/operations.md b/docs/design/operations.md index b6876fdf1f..cd50efa4e0 100644 --- a/docs/design/operations.md +++ b/docs/design/operations.md @@ -1173,7 +1173,7 @@ File sinks use `RotatingFileHandler` by default (10 MB max, 5 backup files). Alt The `sanitize_sensitive_fields` processor automatically redacts values for keys matching: `password`, `secret`, `token`, `api_key`, `api_secret`, `authorization`, `credential`, `private_key`, `bearer`, `session`. Redaction applies at all nesting depths in structured -log events. Redacted values are replaced with `"[REDACTED]"`. +log events. Redacted values are replaced with `"**REDACTED**"`. ### Correlation Tracking @@ -1202,6 +1202,7 @@ Default levels per domain module (overridable via `LogConfig.logger_levels`): | `synthorg.security` | INFO | | `synthorg.tools` | INFO | | `synthorg.api` | INFO | +| `synthorg.cli` | INFO | | `synthorg.config` | INFO | | `synthorg.templates` | INFO | diff --git a/src/synthorg/api/app.py b/src/synthorg/api/app.py index c82a4883b0..ed8355344d 100644 --- a/src/synthorg/api/app.py +++ b/src/synthorg/api/app.py @@ -11,7 +11,7 @@ import sys import time from datetime import UTC, datetime -from pathlib import Path +from pathlib import Path, PurePath from typing import TYPE_CHECKING, Any from litestar import Litestar, Router @@ -336,26 +336,38 @@ def _bootstrap_app_logging(effective_config: RootConfig) -> None: Applies the ``SYNTHORG_LOG_DIR`` env var override (for Docker volume paths) before calling :func:`bootstrap_logging`. + + When the env var is set with an existing logging config, patches + ``log_dir``. When set without a logging config, creates a + default config with ``DEFAULT_SINKS``. Otherwise, delegates + directly to ``bootstrap_logging``. + + Raises: + ValueError: If ``SYNTHORG_LOG_DIR`` contains ``..`` path + traversal components. """ log_dir = os.environ.get("SYNTHORG_LOG_DIR", "").strip() - if log_dir and effective_config.logging is not None: - patched = effective_config.model_copy( - update={ - "logging": effective_config.logging.model_copy( - update={"log_dir": log_dir}, - ), - }, - ) - bootstrap_logging(patched) - elif log_dir: - bootstrap_logging( - RootConfig( - company_name=effective_config.company_name, - logging=LogConfig(sinks=DEFAULT_SINKS, log_dir=log_dir), - ), - ) - else: + if not log_dir: bootstrap_logging(effective_config) + return + + # Validate before model_copy -- Pydantic validators do not run + # on model_copy(update=...), so we must check manually. + if ".." in PurePath(log_dir).parts: + msg = f"SYNTHORG_LOG_DIR contains '..' path traversal component: {log_dir!r}" + raise ValueError(msg) + + base_log_cfg = effective_config.logging or LogConfig( + sinks=DEFAULT_SINKS, + ) + patched = effective_config.model_copy( + update={ + "logging": base_log_cfg.model_copy( + update={"log_dir": log_dir}, + ), + }, + ) + bootstrap_logging(patched) def create_app( # noqa: PLR0913 @@ -402,7 +414,7 @@ def create_app( # noqa: PLR0913 """ effective_config = config or RootConfig(company_name="default") - # Activate the structured logging pipeline (7+1 sinks) before any + # Activate the structured logging pipeline (8 sinks) before any # other setup so that auto-wiring, persistence, and bus logs all # flow through the configured sinks. Respects SYNTHORG_LOG_DIR # env var for Docker log directory override. diff --git a/src/synthorg/communication/bus_memory.py b/src/synthorg/communication/bus_memory.py index b714d492c1..1a938d4cb5 100644 --- a/src/synthorg/communication/bus_memory.py +++ b/src/synthorg/communication/bus_memory.py @@ -127,6 +127,8 @@ async def start(self) -> None: raise MessageBusAlreadyRunningError(msg) self._running = True self._shutdown_event.clear() + self._idle_poll_count = 0 + self._last_idle_summary = time.monotonic() maxlen = self._config.retention.max_messages_per_channel for name in self._config.channels: ch = Channel(name=name, type=ChannelType.TOPIC) diff --git a/src/synthorg/observability/setup.py b/src/synthorg/observability/setup.py index 3822b613ca..6318115620 100644 --- a/src/synthorg/observability/setup.py +++ b/src/synthorg/observability/setup.py @@ -12,7 +12,7 @@ import structlog -from synthorg.observability.config import DEFAULT_SINKS, LogConfig +from synthorg.observability.config import DEFAULT_SINKS, LogConfig, SinkConfig from synthorg.observability.enums import LogLevel, SinkType from synthorg.observability.processors import sanitize_sensitive_fields from synthorg.observability.sinks import build_handler @@ -85,6 +85,7 @@ def _clear_root_handlers(root_logger: logging.Logger) -> None: print( # noqa: T201 f"WARNING: Failed to close log handler {handler!r}", file=sys.stderr, + flush=True, ) @@ -141,10 +142,10 @@ def _attach_handlers( foreign_pre_chain=shared_processors, ) root_logger.addHandler(handler) - except OSError, RuntimeError, ValueError: + except (OSError, RuntimeError, ValueError) as exc: print( # noqa: T201 f"WARNING: Failed to initialise log sink " - f"{sink!r}. This sink will be skipped.", + f"{sink!r}: {exc}. This sink will be skipped.", file=sys.stderr, flush=True, ) @@ -154,7 +155,7 @@ def _attach_handlers( "initialised. Refusing to start with missing " "audit/access logs." ) - raise RuntimeError(msg) from None + raise RuntimeError(msg) from exc def _apply_logger_levels(config: LogConfig) -> None: @@ -202,20 +203,22 @@ def _apply_console_level_override(config: LogConfig) -> LogConfig: ) level = LogLevel.INFO - new_sinks = tuple( - sink.model_copy(update={"level": level}) - if sink.sink_type == SinkType.CONSOLE - else sink - for sink in config.sinks - ) - if new_sinks == config.sinks: + found_console = False + new_sinks: list[SinkConfig] = [] + for sink in config.sinks: + if sink.sink_type == SinkType.CONSOLE: + found_console = True + new_sinks.append(sink.model_copy(update={"level": level})) + else: + new_sinks.append(sink) + if not found_console: print( # noqa: T201 f"WARNING: SYNTHORG_LOG_LEVEL={raw!r} set but no CONSOLE " "sink found in config -- env var has no effect.", file=sys.stderr, flush=True, ) - return config.model_copy(update={"sinks": new_sinks}) + return config.model_copy(update={"sinks": tuple(new_sinks)}) def configure_logging(config: LogConfig | None = None) -> None: @@ -231,6 +234,12 @@ def configure_logging(config: LogConfig | None = None) -> None: Args: config: Logging configuration. When ``None``, uses sensible defaults with all standard sinks. + + Raises: + RuntimeError: If a critical sink (``audit.log`` or + ``access.log``) fails to initialise. The logging system + may be in a partially configured state (structlog reset, + old handlers cleared, some new handlers attached). """ if config is None: config = LogConfig(sinks=DEFAULT_SINKS) diff --git a/tests/unit/api/test_app.py b/tests/unit/api/test_app.py index 22c5d2082f..b29a4a7ea8 100644 --- a/tests/unit/api/test_app.py +++ b/tests/unit/api/test_app.py @@ -971,3 +971,53 @@ def test_log_dir_without_logging_config( assert calls[0].logging is not None assert calls[0].logging.log_dir == "/data/logs" assert len(calls[0].logging.sinks) == len(DEFAULT_SINKS) + + def test_whitespace_only_log_dir_treated_as_unset( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Whitespace-only SYNTHORG_LOG_DIR behaves like unset.""" + monkeypatch.setenv("SYNTHORG_LOG_DIR", " ") + calls: list[object] = [] + monkeypatch.setattr( + "synthorg.api.app.bootstrap_logging", + calls.append, + ) + config = RootConfig(company_name="test-co") + _bootstrap_app_logging(config) + assert len(calls) == 1 + assert calls[0] is config + + def test_path_traversal_in_log_dir_raises( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """SYNTHORG_LOG_DIR with '..' raises ValueError.""" + monkeypatch.setenv("SYNTHORG_LOG_DIR", "../../etc") + monkeypatch.setattr( + "synthorg.api.app.bootstrap_logging", + lambda _: None, + ) + config = RootConfig(company_name="test-co") + with pytest.raises(ValueError, match="path traversal"): + _bootstrap_app_logging(config) + + def test_bootstrap_failure_prints_critical_and_reraises( + self, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], + ) -> None: + """create_app re-raises and prints CRITICAL on logging failure.""" + monkeypatch.setattr( + "synthorg.api.app._bootstrap_app_logging", + _raise_runtime_error, + ) + with pytest.raises(RuntimeError, match="boom"): + create_app() + captured = capsys.readouterr() + assert "CRITICAL" in captured.err + + +def _raise_runtime_error(_config: object) -> None: + msg = "boom" + raise RuntimeError(msg) diff --git a/tests/unit/api/test_server.py b/tests/unit/api/test_server.py new file mode 100644 index 0000000000..b2581db515 --- /dev/null +++ b/tests/unit/api/test_server.py @@ -0,0 +1,33 @@ +"""Tests for the Uvicorn server runner.""" + +from unittest.mock import MagicMock, patch + +import pytest + +from synthorg.config.schema import RootConfig + + +@pytest.mark.unit +class TestRunServerUvicornParams: + """Verify that run_server passes correct params to uvicorn.run.""" + + def test_access_log_disabled_and_log_config_none( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Uvicorn access log is disabled; log_config is None.""" + # Prevent create_app from actually bootstrapping logging. + monkeypatch.setattr( + "synthorg.api.app._bootstrap_app_logging", + lambda _config: None, + ) + mock_run = MagicMock() + with patch("synthorg.api.server.uvicorn.run", mock_run): + from synthorg.api.server import run_server + + run_server(RootConfig(company_name="test-co")) + + mock_run.assert_called_once() + kwargs = mock_run.call_args.kwargs + assert kwargs["access_log"] is False + assert kwargs["log_config"] is None diff --git a/tests/unit/communication/test_bus_memory.py b/tests/unit/communication/test_bus_memory.py index 5c5894478c..6f16c64847 100644 --- a/tests/unit/communication/test_bus_memory.py +++ b/tests/unit/communication/test_bus_memory.py @@ -739,14 +739,20 @@ async def test_history_limit_negative_returns_empty(self) -> None: class TestIdleSummary: """Tests for the periodic idle channel summary log.""" - async def test_idle_polls_increment_without_logging(self) -> None: + async def test_idle_polls_increment_without_logging( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: """Idle polls below the time threshold do not emit a summary.""" + from synthorg.communication import bus_memory as _bm + + clock = 1000.0 + monkeypatch.setattr(_bm.time, "monotonic", lambda: clock) bus = InMemoryMessageBus(config=_make_config()) await bus.start() await bus.subscribe("#general", "agent-a") - # Short timeouts to accumulate idle polls quickly. for _ in range(5): - result = await bus.receive("#general", "agent-a", timeout=0.01) + result = await bus.receive("#general", "agent-a", timeout=0.0) assert result is None assert bus._idle_poll_count == 5 @@ -755,14 +761,24 @@ async def test_summary_emits_after_time_interval( monkeypatch: pytest.MonkeyPatch, ) -> None: """Summary fires when time interval elapses.""" - import time + from synthorg.communication import bus_memory as _bm + from synthorg.communication.bus_memory import ( + _IDLE_SUMMARY_INTERVAL_SECONDS, + ) + clock = 1000.0 + monkeypatch.setattr(_bm.time, "monotonic", lambda: clock) bus = InMemoryMessageBus(config=_make_config()) await bus.start() await bus.subscribe("#general", "agent-a") - # Fast-forward the last summary timestamp to trigger summary. - bus._last_idle_summary = time.monotonic() - 61.0 - await bus.receive("#general", "agent-a", timeout=0.01) + # First idle poll -- counter increments. + await bus.receive("#general", "agent-a", timeout=0.0) + assert bus._idle_poll_count == 1 + + # Advance past the summary interval. + clock = 1000.0 + _IDLE_SUMMARY_INTERVAL_SECONDS + 1.0 + monkeypatch.setattr(_bm.time, "monotonic", lambda: clock) + await bus.receive("#general", "agent-a", timeout=0.0) # Counter should have been reset after summary. assert bus._idle_poll_count == 0 @@ -777,3 +793,24 @@ async def test_message_delivery_still_works(self) -> None: envelope = await bus.receive("#general", "agent-a", timeout=0.5) assert envelope is not None assert envelope.message.content == "hello" + + async def test_idle_state_reset_on_restart( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Idle counters reset when the bus is restarted.""" + from synthorg.communication import bus_memory as _bm + + clock = 1000.0 + monkeypatch.setattr(_bm.time, "monotonic", lambda: clock) + bus = InMemoryMessageBus(config=_make_config()) + await bus.start() + await bus.subscribe("#general", "agent-a") + await bus.receive("#general", "agent-a", timeout=0.0) + assert bus._idle_poll_count == 1 + + await bus.stop() + clock = 2000.0 + monkeypatch.setattr(_bm.time, "monotonic", lambda: clock) + await bus.start() + assert bus._idle_poll_count == 0 diff --git a/tests/unit/observability/test_setup.py b/tests/unit/observability/test_setup.py index 5b10e626e0..ad18c525f2 100644 --- a/tests/unit/observability/test_setup.py +++ b/tests/unit/observability/test_setup.py @@ -16,6 +16,7 @@ from synthorg.observability.setup import ( _DEFAULT_LOGGER_LEVELS, _apply_console_level_override, + _attach_handlers, configure_logging, ) @@ -275,3 +276,135 @@ def test_no_console_sink_warns( assert result.sinks[0].level == LogLevel.INFO captured = capsys.readouterr() assert "no CONSOLE sink found" in captured.err + + +@pytest.mark.unit +class TestCriticalSinkFailure: + """Tests for critical sink failure enforcement in _attach_handlers.""" + + def test_non_critical_sink_failure_skipped( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Non-critical sink failure is tolerated.""" + import synthorg.observability.setup as _setup + + config = LogConfig( + sinks=( + SinkConfig( + sink_type=SinkType.FILE, + level=LogLevel.DEBUG, + file_path="debug.log", + json_format=True, + ), + ), + ) + + def _boom(**_kwargs: object) -> None: + msg = "disk full" + raise OSError(msg) + + monkeypatch.setattr(_setup, "build_handler", _boom) + root = logging.getLogger() + initial_count = len(root.handlers) + # Should not raise -- non-critical sink failures are skipped. + _attach_handlers(config, root, []) + assert len(root.handlers) == initial_count + + def test_critical_audit_sink_failure_raises( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """audit.log failure raises RuntimeError.""" + import synthorg.observability.setup as _setup + + config = LogConfig( + sinks=( + SinkConfig( + sink_type=SinkType.FILE, + level=LogLevel.INFO, + file_path="audit.log", + json_format=True, + ), + ), + ) + + def _boom(**_kwargs: object) -> None: + msg = "permission denied" + raise OSError(msg) + + monkeypatch.setattr(_setup, "build_handler", _boom) + root = logging.getLogger() + with pytest.raises(RuntimeError, match=r"audit\.log"): + _attach_handlers(config, root, []) + + def test_critical_access_sink_failure_raises( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """access.log failure raises RuntimeError.""" + import synthorg.observability.setup as _setup + + config = LogConfig( + sinks=( + SinkConfig( + sink_type=SinkType.FILE, + level=LogLevel.INFO, + file_path="access.log", + json_format=True, + ), + ), + ) + + def _boom(**_kwargs: object) -> None: + msg = "permission denied" + raise OSError(msg) + + monkeypatch.setattr(_setup, "build_handler", _boom) + root = logging.getLogger() + with pytest.raises(RuntimeError, match=r"access\.log"): + _attach_handlers(config, root, []) + + def test_critical_sink_failure_chains_original_cause( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """RuntimeError chains the original OS error.""" + import synthorg.observability.setup as _setup + + config = LogConfig( + sinks=( + SinkConfig( + sink_type=SinkType.FILE, + level=LogLevel.INFO, + file_path="audit.log", + json_format=True, + ), + ), + ) + + def _boom(**_kwargs: object) -> None: + msg = "permission denied" + raise OSError(msg) + + monkeypatch.setattr(_setup, "build_handler", _boom) + root = logging.getLogger() + with pytest.raises(RuntimeError) as exc_info: + _attach_handlers(config, root, []) + assert isinstance(exc_info.value.__cause__, OSError) + + +@pytest.mark.unit +class TestConfigureLoggingIntegration: + """Integration tests for configure_logging with env var overrides.""" + + def test_synthorg_log_level_applied_end_to_end( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """SYNTHORG_LOG_LEVEL env var takes effect through configure_logging.""" + monkeypatch.setenv("SYNTHORG_LOG_LEVEL", "warning") + configure_logging(_console_only_config()) + root = logging.getLogger() + # The console handler level should reflect the override. + assert any(h.level == logging.WARNING for h in root.handlers) diff --git a/tests/unit/observability/test_sink_routing.py b/tests/unit/observability/test_sink_routing.py index f5aba12386..ed781814f9 100644 --- a/tests/unit/observability/test_sink_routing.py +++ b/tests/unit/observability/test_sink_routing.py @@ -84,6 +84,10 @@ def test_agent_activity_routes_engine_and_core(self) -> None: assert "synthorg.engine." in prefixes assert "synthorg.core." in prefixes + def test_access_routes_api(self) -> None: + assert "access.log" in _SINK_ROUTING + assert "synthorg.api." in _SINK_ROUTING["access.log"] + def test_catchall_sinks_not_in_routing(self) -> None: for name in ("synthorg.log", "errors.log", "debug.log"): assert name not in _SINK_ROUTING From 37b6d972cbfb247e187730488fff73d5c195ec16 Mon Sep 17 00:00:00 2001 From: Aurelio <19254254+Aureliolo@users.noreply.github.com> Date: Thu, 19 Mar 2026 08:45:15 +0100 Subject: [PATCH 4/6] fix: use stdlib time import for monkeypatch in idle summary tests Mypy flagged _bm.time as not explicitly exported from the bus_memory module. Switch to importing the time module directly. Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/unit/communication/test_bus_memory.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tests/unit/communication/test_bus_memory.py b/tests/unit/communication/test_bus_memory.py index 6f16c64847..81022a6b63 100644 --- a/tests/unit/communication/test_bus_memory.py +++ b/tests/unit/communication/test_bus_memory.py @@ -744,10 +744,10 @@ async def test_idle_polls_increment_without_logging( monkeypatch: pytest.MonkeyPatch, ) -> None: """Idle polls below the time threshold do not emit a summary.""" - from synthorg.communication import bus_memory as _bm + import time as _time clock = 1000.0 - monkeypatch.setattr(_bm.time, "monotonic", lambda: clock) + monkeypatch.setattr(_time, "monotonic", lambda: clock) bus = InMemoryMessageBus(config=_make_config()) await bus.start() await bus.subscribe("#general", "agent-a") @@ -761,13 +761,14 @@ async def test_summary_emits_after_time_interval( monkeypatch: pytest.MonkeyPatch, ) -> None: """Summary fires when time interval elapses.""" - from synthorg.communication import bus_memory as _bm + import time as _time + from synthorg.communication.bus_memory import ( _IDLE_SUMMARY_INTERVAL_SECONDS, ) clock = 1000.0 - monkeypatch.setattr(_bm.time, "monotonic", lambda: clock) + monkeypatch.setattr(_time, "monotonic", lambda: clock) bus = InMemoryMessageBus(config=_make_config()) await bus.start() await bus.subscribe("#general", "agent-a") @@ -777,7 +778,7 @@ async def test_summary_emits_after_time_interval( # Advance past the summary interval. clock = 1000.0 + _IDLE_SUMMARY_INTERVAL_SECONDS + 1.0 - monkeypatch.setattr(_bm.time, "monotonic", lambda: clock) + monkeypatch.setattr(_time, "monotonic", lambda: clock) await bus.receive("#general", "agent-a", timeout=0.0) # Counter should have been reset after summary. assert bus._idle_poll_count == 0 @@ -799,10 +800,10 @@ async def test_idle_state_reset_on_restart( monkeypatch: pytest.MonkeyPatch, ) -> None: """Idle counters reset when the bus is restarted.""" - from synthorg.communication import bus_memory as _bm + import time as _time clock = 1000.0 - monkeypatch.setattr(_bm.time, "monotonic", lambda: clock) + monkeypatch.setattr(_time, "monotonic", lambda: clock) bus = InMemoryMessageBus(config=_make_config()) await bus.start() await bus.subscribe("#general", "agent-a") @@ -811,6 +812,6 @@ async def test_idle_state_reset_on_restart( await bus.stop() clock = 2000.0 - monkeypatch.setattr(_bm.time, "monotonic", lambda: clock) + monkeypatch.setattr(_time, "monotonic", lambda: clock) await bus.start() assert bus._idle_poll_count == 0 From 449df2a912ab18c5bfbffc7ea18aee1fe88d30d9 Mon Sep 17 00:00:00 2001 From: Aurelio <19254254+Aureliolo@users.noreply.github.com> Date: Thu, 19 Mar 2026 09:04:48 +0100 Subject: [PATCH 5/6] fix: address 3 CodeRabbit findings -- return patched config, fix misleading message, isolate test - _bootstrap_app_logging now returns the (possibly patched) RootConfig so create_app's effective_config reflects the SYNTHORG_LOG_DIR override - Restructure _attach_handlers except block: critical sinks print "Refusing to start" (not "will be skipped") before raising - Stub create_app in test_server.py to avoid running the full app factory - Update _bootstrap_app_logging tests to assert on return values Co-Authored-By: Claude Opus 4.6 (1M context) --- src/synthorg/api/app.py | 18 ++++++++++++++--- src/synthorg/observability/setup.py | 19 ++++++++++++------ tests/unit/api/test_app.py | 19 +++++++++++++----- tests/unit/api/test_server.py | 31 +++++++++++++++++------------ 4 files changed, 60 insertions(+), 27 deletions(-) diff --git a/src/synthorg/api/app.py b/src/synthorg/api/app.py index ed8355344d..44229c1aa9 100644 --- a/src/synthorg/api/app.py +++ b/src/synthorg/api/app.py @@ -331,7 +331,7 @@ async def on_shutdown() -> None: # to code calling get_api_config(), not to the middleware itself. -def _bootstrap_app_logging(effective_config: RootConfig) -> None: +def _bootstrap_app_logging(effective_config: RootConfig) -> RootConfig: """Activate the structured logging pipeline. Applies the ``SYNTHORG_LOG_DIR`` env var override (for Docker @@ -342,6 +342,17 @@ def _bootstrap_app_logging(effective_config: RootConfig) -> None: default config with ``DEFAULT_SINKS``. Otherwise, delegates directly to ``bootstrap_logging``. + Args: + effective_config: Root config (possibly without a logging + section). + + Returns: + The config actually used for logging -- either the original + ``effective_config`` or a patched copy with the + ``SYNTHORG_LOG_DIR`` override applied. Callers should use + the returned value so that ``AppState.config.logging`` + reflects the active logging configuration. + Raises: ValueError: If ``SYNTHORG_LOG_DIR`` contains ``..`` path traversal components. @@ -349,7 +360,7 @@ def _bootstrap_app_logging(effective_config: RootConfig) -> None: log_dir = os.environ.get("SYNTHORG_LOG_DIR", "").strip() if not log_dir: bootstrap_logging(effective_config) - return + return effective_config # Validate before model_copy -- Pydantic validators do not run # on model_copy(update=...), so we must check manually. @@ -368,6 +379,7 @@ def _bootstrap_app_logging(effective_config: RootConfig) -> None: }, ) bootstrap_logging(patched) + return patched def create_app( # noqa: PLR0913 @@ -419,7 +431,7 @@ def create_app( # noqa: PLR0913 # flow through the configured sinks. Respects SYNTHORG_LOG_DIR # env var for Docker log directory override. try: - _bootstrap_app_logging(effective_config) + effective_config = _bootstrap_app_logging(effective_config) except Exception as exc: print( # noqa: T201 f"CRITICAL: Failed to initialise logging pipeline: {exc}. " diff --git a/src/synthorg/observability/setup.py b/src/synthorg/observability/setup.py index 6318115620..8a4a45fa42 100644 --- a/src/synthorg/observability/setup.py +++ b/src/synthorg/observability/setup.py @@ -143,19 +143,26 @@ def _attach_handlers( ) root_logger.addHandler(handler) except (OSError, RuntimeError, ValueError) as exc: - print( # noqa: T201 - f"WARNING: Failed to initialise log sink " - f"{sink!r}: {exc}. This sink will be skipped.", - file=sys.stderr, - flush=True, - ) if sink.file_path in _critical_sinks: + print( # noqa: T201 + f"CRITICAL: Log sink '{sink.file_path}' could not " + f"be initialised: {exc}. Refusing to start with " + "missing audit/access logs.", + file=sys.stderr, + flush=True, + ) msg = ( f"Critical log sink '{sink.file_path}' could not be " "initialised. Refusing to start with missing " "audit/access logs." ) raise RuntimeError(msg) from exc + print( # noqa: T201 + f"WARNING: Failed to initialise log sink " + f"{sink!r}: {exc}. This sink will be skipped.", + file=sys.stderr, + flush=True, + ) def _apply_logger_levels(config: LogConfig) -> None: diff --git a/tests/unit/api/test_app.py b/tests/unit/api/test_app.py index b29a4a7ea8..f2a83f9b64 100644 --- a/tests/unit/api/test_app.py +++ b/tests/unit/api/test_app.py @@ -421,7 +421,7 @@ def test_no_persistence_warns( # capture_logs context during create_app(). monkeypatch.setattr( "synthorg.api.app._bootstrap_app_logging", - lambda _config: None, + lambda config: config, ) import structlog @@ -929,9 +929,10 @@ def test_no_log_dir_calls_bootstrap( calls.append, ) config = RootConfig(company_name="test-co") - _bootstrap_app_logging(config) + result = _bootstrap_app_logging(config) assert len(calls) == 1 assert calls[0] is config + assert result is config def test_log_dir_with_existing_logging_config( self, @@ -948,10 +949,14 @@ def test_log_dir_with_existing_logging_config( company_name="test-co", logging=LogConfig(sinks=DEFAULT_SINKS, log_dir="original"), ) - _bootstrap_app_logging(config) + result = _bootstrap_app_logging(config) assert len(calls) == 1 assert calls[0].logging is not None assert calls[0].logging.log_dir == "/custom/logs" + # Return value is the patched config, not the original. + assert result is not config + assert result.logging is not None + assert result.logging.log_dir == "/custom/logs" def test_log_dir_without_logging_config( self, @@ -966,11 +971,14 @@ def test_log_dir_without_logging_config( ) config = RootConfig(company_name="test-co") assert config.logging is None - _bootstrap_app_logging(config) + result = _bootstrap_app_logging(config) assert len(calls) == 1 assert calls[0].logging is not None assert calls[0].logging.log_dir == "/data/logs" assert len(calls[0].logging.sinks) == len(DEFAULT_SINKS) + # Return value carries the new logging config. + assert result.logging is not None + assert result.logging.log_dir == "/data/logs" def test_whitespace_only_log_dir_treated_as_unset( self, @@ -984,9 +992,10 @@ def test_whitespace_only_log_dir_treated_as_unset( calls.append, ) config = RootConfig(company_name="test-co") - _bootstrap_app_logging(config) + result = _bootstrap_app_logging(config) assert len(calls) == 1 assert calls[0] is config + assert result is config def test_path_traversal_in_log_dir_raises( self, diff --git a/tests/unit/api/test_server.py b/tests/unit/api/test_server.py index b2581db515..ec6cd95ff1 100644 --- a/tests/unit/api/test_server.py +++ b/tests/unit/api/test_server.py @@ -11,23 +11,28 @@ class TestRunServerUvicornParams: """Verify that run_server passes correct params to uvicorn.run.""" - def test_access_log_disabled_and_log_config_none( - self, - monkeypatch: pytest.MonkeyPatch, - ) -> None: + def test_access_log_disabled_and_log_config_none(self) -> None: """Uvicorn access log is disabled; log_config is None.""" - # Prevent create_app from actually bootstrapping logging. - monkeypatch.setattr( - "synthorg.api.app._bootstrap_app_logging", - lambda _config: None, - ) + dummy_app = MagicMock() mock_run = MagicMock() - with patch("synthorg.api.server.uvicorn.run", mock_run): + with ( + patch( + "synthorg.api.server.create_app", + return_value=dummy_app, + ), + patch("synthorg.api.server.uvicorn.run", mock_run), + ): from synthorg.api.server import run_server run_server(RootConfig(company_name="test-co")) mock_run.assert_called_once() - kwargs = mock_run.call_args.kwargs - assert kwargs["access_log"] is False - assert kwargs["log_config"] is None + call_kwargs = mock_run.call_args + assert call_kwargs.kwargs.get("access_log") is False or ( + len(call_kwargs.args) > 0 + ) + # access_log and log_config are keyword args to uvicorn.run. + assert call_kwargs.kwargs["access_log"] is False + assert call_kwargs.kwargs["log_config"] is None + # The dummy app was passed as the first positional arg. + assert call_kwargs.args[0] is dummy_app From 3749bc1b6ceda06337b8cd534f70c43504085b25 Mon Sep 17 00:00:00 2001 From: Aurelio <19254254+Aureliolo@users.noreply.github.com> Date: Thu, 19 Mar 2026 09:26:44 +0100 Subject: [PATCH 6/6] fix: add AppState config integration test, clean up test_server - Add test_patched_config_preserved_on_app_state: verifies that SYNTHORG_LOG_DIR override flows through create_app into AppState - Add pytestmark with timeout(30) to test_server.py - Remove redundant pre-assertion in test_server.py - Drop redundant @pytest.mark.unit from class (pytestmark covers it) Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/unit/api/test_app.py | 16 ++++++++++++++++ tests/unit/api/test_server.py | 6 ++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/tests/unit/api/test_app.py b/tests/unit/api/test_app.py index f2a83f9b64..068f91b002 100644 --- a/tests/unit/api/test_app.py +++ b/tests/unit/api/test_app.py @@ -1026,6 +1026,22 @@ def test_bootstrap_failure_prints_critical_and_reraises( captured = capsys.readouterr() assert "CRITICAL" in captured.err + def test_patched_config_preserved_on_app_state( + self, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """create_app stores the SYNTHORG_LOG_DIR-patched config on AppState.""" + monkeypatch.setenv("SYNTHORG_LOG_DIR", "/custom/volume/logs") + # Prevent bootstrap_logging from actually reconfiguring structlog. + monkeypatch.setattr( + "synthorg.api.app.bootstrap_logging", + lambda _config: None, + ) + app = create_app() + app_state = app.state["app_state"] + assert app_state.config.logging is not None + assert app_state.config.logging.log_dir == "/custom/volume/logs" + def _raise_runtime_error(_config: object) -> None: msg = "boom" diff --git a/tests/unit/api/test_server.py b/tests/unit/api/test_server.py index ec6cd95ff1..9c971d39a7 100644 --- a/tests/unit/api/test_server.py +++ b/tests/unit/api/test_server.py @@ -6,8 +6,9 @@ from synthorg.config.schema import RootConfig +pytestmark = [pytest.mark.unit, pytest.mark.timeout(30)] + -@pytest.mark.unit class TestRunServerUvicornParams: """Verify that run_server passes correct params to uvicorn.run.""" @@ -28,9 +29,6 @@ def test_access_log_disabled_and_log_config_none(self) -> None: mock_run.assert_called_once() call_kwargs = mock_run.call_args - assert call_kwargs.kwargs.get("access_log") is False or ( - len(call_kwargs.args) > 0 - ) # access_log and log_config are keyword args to uvicorn.run. assert call_kwargs.kwargs["access_log"] is False assert call_kwargs.kwargs["log_config"] is None