Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ curl http://localhost:3000/api/v1/health # backend (via web proxy)

```text
src/synthorg/
api/ # Litestar REST + WebSocket API (controllers, guards, channels, JWT + API key + WS ticket auth, approval gate integration, coordination endpoint, collaboration endpoint, settings endpoint, provider management endpoint (CRUD + test + presets), backup endpoint, setup endpoint (first-run wizard: status check, template listing, company/agent creation, completion gate), RFC 9457 structured errors (ErrorCategory, ErrorCode, ErrorDetail, ProblemDetail, CATEGORY_TITLES, category_title, category_type_uri, content negotiation)), AppState hot-reload slots (provider_registry, model_router with swap methods, provider_management), settings dispatcher lifecycle, logging bootstrap (_bootstrap_app_logging, SYNTHORG_LOG_DIR env var override, called before all other setup in create_app), service auto-wiring (auto_wire.py: Phase 1 at construction -- message bus/cost tracker/provider registry/task engine; Phase 2 in on_startup after persistence connects -- settings service + config resolver + provider management), lifecycle helpers (lifecycle.py: _safe_startup, _safe_shutdown, _cleanup_on_failure, _init_persistence, _try_stop)
api/ # Litestar REST + WebSocket API (controllers, guards, channels, JWT + API key + WS ticket auth, approval gate integration, coordination endpoint, collaboration endpoint, settings endpoint, provider management endpoint (CRUD + test + presets), backup endpoint, setup endpoint (first-run wizard: status check (CEO-role admin detection, min_password_length from settings), template listing, company/agent creation, completion gate (company + agent + provider verification)), RFC 9457 structured errors (ErrorCategory, ErrorCode, ErrorDetail, ProblemDetail, CATEGORY_TITLES, category_title, category_type_uri, content negotiation)), AppState hot-reload slots (provider_registry, model_router with swap methods, provider_management), settings dispatcher lifecycle, logging bootstrap (_bootstrap_app_logging, SYNTHORG_LOG_DIR env var override, called before all other setup in create_app), service auto-wiring (auto_wire.py: Phase 1 at construction -- message bus/cost tracker/provider registry/task engine; Phase 2 in on_startup after persistence connects -- settings service + config resolver + provider management), lifecycle helpers (lifecycle.py: _safe_startup, _safe_shutdown, _cleanup_on_failure, _init_persistence, _try_stop)
auth/ # Authentication subpackage (controller, service, middleware, JWT + API key + WS ticket store, models, config, secret resolution)
backup/ # Backup and restore -- scheduled/manual/lifecycle backups of persistence DB, agent memory, and company config. BackupService orchestrator, BackupScheduler (periodic asyncio task), RetentionManager (count + age pruning), tar.gz compression, SHA-256 checksums, manifest tracking, validated restore with atomic rollback and safety backup
handlers/ # ComponentHandler protocol + concrete handlers: PersistenceComponentHandler (SQLite VACUUM INTO), MemoryComponentHandler (copytree), ConfigComponentHandler (copy2)
Expand All @@ -127,7 +127,7 @@ src/synthorg/
engine/ # Agent orchestration, execution loops, parallel execution, task decomposition, routing, task assignment, centralized single-writer task state engine (TaskEngine), task lifecycle, recovery, shutdown, workspace isolation, coordination (multi-agent pipeline: TopologyDispatcher protocol, 4 dispatchers — SAS/centralized/decentralized/context-dependent, wave execution, workspace lifecycle integration, CoordinationSectionConfig company config bridge, build_coordinator factory), coordination error classification, prompt policy validation, checkpoint recovery (checkpoint/, per-turn persistence, heartbeat detection, CheckpointRecoveryStrategy), approval gate (escalation detection, context parking/resume, EscalationInfo/ResumePayload models), stagnation detection (stagnation/, StagnationDetector protocol, ToolRepetitionDetector, dual-signal analysis, corrective prompt injection), agent runtime state (AgentRuntimeState, lightweight per-agent execution status for dashboard queries and recovery), context budget management (context_budget.py, ContextBudgetIndicator, fill estimation, token estimation protocol in token_estimation.py), conversation compaction (compaction/, CompactionCallback type alias, CompactionConfig, CompressionMetadata, oldest-turns summarizer), execution loop auto-selection (loop_selector.py, AutoLoopConfig, AutoLoopRule, select_loop_type, build_execution_loop -- complexity-based loop routing with budget-aware downgrade, optional hybrid fallback, and configurable default_loop_type), hybrid execution loop (hybrid_loop.py, HybridLoop -- plan + mini-ReAct steps with per-step turn limits, progress-summary checkpoints, LLM-decided replanning; hybrid_models.py, HybridLoopConfig), shared plan helpers (plan_helpers.py, update_step_status, extract_task_summary, assess_step_success)
hr/ # HR engine: hiring, firing, onboarding, offboarding, agent registry, performance tracking (task metrics, collaboration scoring, LLM calibration sampling, collaboration overrides, trend detection), promotion/demotion (criteria evaluation, approval strategies, model mapping)
memory/ # Persistent agent memory (pluggable MemoryBackend protocol), backends/ (Mem0 adapter: backends/mem0/), retrieval pipeline (ranking, RRF fusion, injection, context formatting, non-inferable filtering), shared org memory (org/), consolidation/archival (consolidation/, dual-mode density-aware archival: DensityClassifier, AbstractiveSummarizer, ExtractivePreserver, DualModeConsolidationStrategy)
persistence/ # Operational data persistence pluggable PersistenceBackend protocol, SQLite initial, SettingsRepository (namespaced settings CRUD) (see Memory & Persistence design page)
persistence/ # Operational data persistence -- pluggable PersistenceBackend protocol, SQLite initial, SettingsRepository (namespaced settings CRUD), UserRepository (user CRUD + role-based counting) (see Memory & Persistence design page)
observability/ # Structured logging (8-sink pipeline: console + 7 file sinks with logger-name routing), correlation tracking (request_id/task_id/agent_id via contextvars), sensitive field redaction, SYNTHORG_LOG_LEVEL env var override, critical sink enforcement (audit.log/access.log), log sinks
providers/ # LLM provider abstraction (LiteLLM adapter), auth types (AuthType enum: api_key/oauth/custom_header/none), presets (ProviderPreset, PROVIDER_PRESETS for Ollama/LM Studio/OpenRouter/vLLM), runtime CRUD (management/ -- ProviderManagementService, asyncio.Lock-serialized create/update/delete/test, hot-reload of ProviderRegistry + ModelRouter via AppState swap)
settings/ # Runtime-editable settings persistence (DB > env > YAML > code defaults), typed definitions (9 namespaces, including JSON type for structural data), Fernet encryption for sensitive values, config bridge (JSON serialization for Pydantic models/collections), ConfigResolver (typed scalar + structural data accessors for controllers — get_agents, get_departments, get_provider_configs with validation fallbacks to YAML), validation, registry, change notifications via message bus, SettingsSubscriber protocol (subscriber.py), SettingsChangeDispatcher (dispatcher.py, polls #settings channel, routes to subscribers, restart_required filtering)
Expand Down
2 changes: 1 addition & 1 deletion docs/design/operations.md
Original file line number Diff line number Diff line change
Expand Up @@ -998,7 +998,7 @@ future CLI tool are thin clients that call the API -- they contain no business l
| `/api/v1/analytics` | Performance metrics, dashboards |
| `/api/v1/settings` | Runtime-editable configuration (9 namespaces), schema discovery |
| `GET /api/v1/providers`, `POST /api/v1/providers`, `PUT /api/v1/providers/{name}`, `DELETE /api/v1/providers/{name}`, `POST /api/v1/providers/{name}/test`, `GET /api/v1/providers/presets`, `POST /api/v1/providers/from-preset` | Provider CRUD, connection testing, presets, 4 auth types (api_key, oauth, custom_header, none) |
| `/api/v1/setup` | First-run setup wizard: status check (public), template listing, company/agent creation, completion gate |
| `/api/v1/setup` | First-run setup wizard: status check (public), template listing, company/agent creation, completion gate (requires company + agent + provider) |
| `/api/v1/admin/backups` | Manual backup, list, detail, delete |
| `/api/v1/ws` | WebSocket for real-time updates (ticket auth via `?ticket=`) |
| `POST /api/v1/auth/ws-ticket` | Exchange JWT for one-time WebSocket connection ticket |
Expand Down
2 changes: 1 addition & 1 deletion docs/user_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ After the containers are running, open the web dashboard at [http://localhost:30
3. **Create your company** -- name your synthetic organization and optionally start from a template.
4. **Hire your first agent** -- choose a role, model, and personality for the first AI agent.

After completing the wizard, the dashboard appears and the setup wizard is not shown again.
All four steps must be completed -- the backend validates that a company, at least one agent, and at least one provider exist before allowing setup to finish. After completing the wizard, the dashboard appears and the setup wizard is not shown again.

To re-run the wizard later, use `synthorg setup` (resets the flag and opens the browser) or delete the `api.setup_complete` setting via the settings API.

Expand Down
88 changes: 80 additions & 8 deletions src/synthorg/api/controllers/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
from litestar.status_codes import HTTP_201_CREATED
from pydantic import BaseModel, ConfigDict, Field, model_validator

from synthorg.api.auth.config import AuthConfig
from synthorg.api.dto import ApiResponse
from synthorg.api.errors import ApiValidationError, ConflictError, NotFoundError
from synthorg.api.guards import require_read_access, require_write_access
from synthorg.api.guards import HumanRole, require_read_access, require_write_access
from synthorg.api.state import AppState # noqa: TC001
from synthorg.core.enums import SeniorityLevel
from synthorg.core.types import NotBlankStr # noqa: TC001
Expand All @@ -24,21 +25,30 @@
SETUP_COMPANY_CREATED,
SETUP_COMPLETED,
SETUP_MODEL_NOT_FOUND,
SETUP_NO_AGENTS,
SETUP_NO_COMPANY,
SETUP_NO_PROVIDERS,
SETUP_PROVIDER_NOT_FOUND,
SETUP_STATUS_CHECKED,
SETUP_STATUS_SETTINGS_DEFAULT_USED,
SETUP_STATUS_SETTINGS_UNAVAILABLE,
SETUP_TEMPLATE_INVALID,
SETUP_TEMPLATE_NOT_FOUND,
SETUP_TEMPLATES_LISTED,
)
from synthorg.persistence.errors import QueryError
from synthorg.settings.errors import SettingNotFoundError

if TYPE_CHECKING:
from synthorg.settings.service import SettingsService

logger = get_logger(__name__)

# Derive from AuthConfig default to prevent silent divergence.
_DEFAULT_MIN_PASSWORD_LENGTH: int = AuthConfig.model_fields[
"min_password_length"
].default

# Serializes read-modify-write on the agents settings blob.
_AGENT_LOCK = asyncio.Lock()

Expand All @@ -50,16 +60,18 @@ class SetupStatusResponse(BaseModel):
"""First-run setup status.

Attributes:
needs_admin: True if no admin user exists yet.
needs_admin: True if no user with the CEO role exists yet.
needs_setup: True if setup has not been completed.
has_providers: True if at least one provider is configured.
min_password_length: Backend-configured minimum password length.
"""

model_config = ConfigDict(frozen=True)

needs_admin: bool
needs_setup: bool
has_providers: bool
min_password_length: int = Field(ge=8)


class TemplateInfoResponse(BaseModel):
Expand Down Expand Up @@ -215,8 +227,15 @@ async def get_status(
app_state: AppState = state.app_state
persistence = app_state.persistence

user_count = await persistence.users.count()
needs_admin = user_count == 0
try:
admin_count = await persistence.users.count_by_role(HumanRole.CEO)
except QueryError:
logger.warning(
SETUP_STATUS_SETTINGS_UNAVAILABLE,
exc_info=True,
)
admin_count = 0
needs_admin = admin_count == 0

settings_svc = app_state.settings_service
try:
Expand All @@ -235,6 +254,34 @@ async def get_status(
app_state.has_provider_registry and len(app_state.provider_registry) > 0
)

min_password_length = _DEFAULT_MIN_PASSWORD_LENGTH
raw_pw_value: str | None = None
try:
pw_entry = await settings_svc.get_entry("api", "min_password_length")
raw_pw_value = pw_entry.value
parsed = int(raw_pw_value)
min_password_length = max(parsed, _DEFAULT_MIN_PASSWORD_LENGTH)
except MemoryError, RecursionError:
raise
except SettingNotFoundError:
logger.debug(
SETUP_STATUS_SETTINGS_DEFAULT_USED,
setting="min_password_length",
)
except ValueError:
logger.warning(
SETUP_STATUS_SETTINGS_UNAVAILABLE,
setting="min_password_length",
reason="non_integer_value",
raw=raw_pw_value,
)
except Exception:
logger.warning(
SETUP_STATUS_SETTINGS_UNAVAILABLE,
setting="min_password_length",
exc_info=True,
)

logger.debug(
SETUP_STATUS_CHECKED,
needs_admin=needs_admin,
Expand All @@ -247,6 +294,7 @@ async def get_status(
needs_admin=needs_admin,
needs_setup=needs_setup,
has_providers=has_providers,
min_password_length=min_password_length,
),
)

Expand Down Expand Up @@ -421,8 +469,8 @@ async def complete_setup(
) -> ApiResponse[SetupCompleteResponse]:
"""Mark first-run setup as complete.

Validates that at least one provider is configured before
allowing completion.
Validates that a company, at least one agent, and at least one
provider are configured before allowing completion.

Args:
state: Application state.
Expand All @@ -431,16 +479,40 @@ async def complete_setup(
Success envelope.

Raises:
ApiValidationError: If no providers are configured.
ConflictError: If setup has already been completed.
ApiValidationError: If company, agents, or providers are missing.
"""
app_state: AppState = state.app_state
settings_svc = app_state.settings_service
await _check_setup_not_complete(settings_svc)

# Verify company has been created.
has_company = False
try:
entry = await settings_svc.get_entry("company", "company_name")
has_company = bool(entry.value and entry.value.strip())
except MemoryError, RecursionError:
raise
except SettingNotFoundError:
pass
if not has_company:
msg = "A company must be created before completing setup"
logger.warning(SETUP_NO_COMPANY)
raise ApiValidationError(msg)

# Verify at least one agent has been created.
existing_agents = await _get_existing_agents(settings_svc)
if not existing_agents:
msg = "At least one agent must be created before completing setup"
logger.warning(SETUP_NO_AGENTS)
raise ApiValidationError(msg)
Comment on lines +489 to +508
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

Validation logic is correct; minor observability note.

The prerequisite checks follow the correct order per PR objectives: company → agents → providers.

One observation: if SettingNotFoundError is caught (line 488-489), we log at DEBUG, then has_company remains False, triggering the WARNING log at line 492. This results in two log entries for the same event (SETUP_NO_COMPANY) at different levels. While not incorrect (DEBUG for tracing, WARNING for alerting), it could create slight noise.

💡 Optional: Consolidate to single warning with reason
         has_company = False
+        reason: str | None = None
         try:
             entry = await settings_svc.get_entry("company", "company_name")
             has_company = bool(entry.value and entry.value.strip())
+            if not has_company:
+                reason = "empty_or_blank"
         except MemoryError, RecursionError:
             raise
         except SettingNotFoundError:
-            logger.debug(SETUP_NO_COMPANY, reason="setting_not_found")
+            reason = "setting_not_found"
         if not has_company:
             msg = "A company must be created before completing setup"
-            logger.warning(SETUP_NO_COMPANY)
+            logger.warning(SETUP_NO_COMPANY, reason=reason)
             raise ApiValidationError(msg)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/synthorg/api/controllers/setup.py` around lines 481 - 500, When
SettingNotFoundError is raised by settings_svc.get_entry, avoid emitting both
logger.debug and the later logger.warning for the same SETUP_NO_COMPANY event;
update the exception handler around settings_svc.get_entry (the try that sets
has_company) to set has_company = False and call
logger.warning(SETUP_NO_COMPANY, reason="setting_not_found") (or remove the
later warning) so only a single warning-level log is emitted before raising
ApiValidationError; reference settings_svc.get_entry, SettingNotFoundError,
has_company, SETUP_NO_COMPANY, logger.debug, logger.warning, and
ApiValidationError when making the change.


# Verify at least one provider is configured.
if not app_state.has_provider_registry or len(app_state.provider_registry) == 0:
msg = "At least one provider must be configured before completing setup"
logger.warning(SETUP_NO_PROVIDERS)
raise ApiValidationError(msg)

settings_svc = app_state.settings_service
await settings_svc.set("api", "setup_complete", "true")

logger.info(SETUP_COMPLETED)
Expand Down
4 changes: 4 additions & 0 deletions src/synthorg/observability/events/persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@
PERSISTENCE_USER_LIST_FAILED: Final[str] = "persistence.user.list_failed"
PERSISTENCE_USER_COUNTED: Final[str] = "persistence.user.counted"
PERSISTENCE_USER_COUNT_FAILED: Final[str] = "persistence.user.count_failed"
PERSISTENCE_USER_COUNTED_BY_ROLE: Final[str] = "persistence.user.counted_by_role"
PERSISTENCE_USER_COUNT_BY_ROLE_FAILED: Final[str] = (
"persistence.user.count_by_role_failed"
)
PERSISTENCE_USER_DELETED: Final[str] = "persistence.user.deleted"
PERSISTENCE_USER_DELETE_FAILED: Final[str] = "persistence.user.delete_failed"

Expand Down
9 changes: 9 additions & 0 deletions src/synthorg/observability/events/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
# Status check fallback (settings service unavailable)
SETUP_STATUS_SETTINGS_UNAVAILABLE: Final[str] = "setup.status.settings_unavailable"

# Status check used a default value for a setting (entry absent or not configured)
SETUP_STATUS_SETTINGS_DEFAULT_USED: Final[str] = "setup.status.settings_default_used"

# Provider not found during agent creation
SETUP_PROVIDER_NOT_FOUND: Final[str] = "setup.agent.provider_not_found"

Expand All @@ -40,6 +43,12 @@
# No providers configured when attempting to complete setup
SETUP_NO_PROVIDERS: Final[str] = "setup.flow.no_providers"

# No company created when attempting to complete setup
SETUP_NO_COMPANY: Final[str] = "setup.flow.no_company"

# No agents created when attempting to complete setup
SETUP_NO_AGENTS: Final[str] = "setup.flow.no_agents"

# Template not found during company creation
SETUP_TEMPLATE_NOT_FOUND: Final[str] = "setup.company.template_not_found"

Expand Down
15 changes: 15 additions & 0 deletions src/synthorg/persistence/repositories.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from pydantic import AwareDatetime # noqa: TC002

from synthorg.api.auth.models import ApiKey, User # noqa: TC001
from synthorg.api.guards import HumanRole # noqa: TC001
from synthorg.budget.cost_record import CostRecord # noqa: TC001
from synthorg.communication.message import Message # noqa: TC001
from synthorg.core.enums import ApprovalRiskLevel, TaskStatus # noqa: TC001
Expand Down Expand Up @@ -396,6 +397,20 @@ async def count(self) -> int:
"""
...

async def count_by_role(self, role: HumanRole) -> int:
"""Count users with a specific role.

Args:
role: The role to filter by.

Returns:
Number of users with the given role.

Raises:
PersistenceError: If the operation fails.
"""
...

async def delete(self, user_id: NotBlankStr) -> bool:
"""Delete a user by ID.

Expand Down
36 changes: 36 additions & 0 deletions src/synthorg/persistence/sqlite/user_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@
PERSISTENCE_API_KEY_LISTED,
PERSISTENCE_API_KEY_SAVE_FAILED,
PERSISTENCE_API_KEY_SAVED,
PERSISTENCE_USER_COUNT_BY_ROLE_FAILED,
PERSISTENCE_USER_COUNT_FAILED,
PERSISTENCE_USER_COUNTED,
PERSISTENCE_USER_COUNTED_BY_ROLE,
PERSISTENCE_USER_DELETE_FAILED,
PERSISTENCE_USER_DELETED,
PERSISTENCE_USER_FETCH_FAILED,
Expand Down Expand Up @@ -262,6 +264,40 @@ async def count(self) -> int:
logger.debug(PERSISTENCE_USER_COUNTED, count=result)
return result

async def count_by_role(self, role: HumanRole) -> int:
"""Return the number of users with the given role.

Args:
role: The role to filter by.

Returns:
Non-negative integer count.

Raises:
QueryError: If the database query fails.
"""
try:
cursor = await self._db.execute(
"SELECT COUNT(*) FROM users WHERE role = ?",
(role.value,),
)
row = await cursor.fetchone()
except (sqlite3.Error, aiosqlite.Error) as exc:
msg = "Failed to count users by role"
logger.exception(
PERSISTENCE_USER_COUNT_BY_ROLE_FAILED,
role=role.value,
error=str(exc),
)
raise QueryError(msg) from exc
result = int(row[0]) if row else 0
logger.debug(
PERSISTENCE_USER_COUNTED_BY_ROLE,
role=role.value,
count=result,
)
return result

async def delete(self, user_id: NotBlankStr) -> bool:
"""Delete a user by primary key.

Expand Down
Loading
Loading