Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/release-please-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
".": {
"release-type": "python",
"package-name": "ai-company",
"include-component-in-tag": false,
"bump-minor-pre-major": true,
"bump-patch-for-minor-pre-major": true,
"changelog-sections": [
Expand Down
4 changes: 2 additions & 2 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,11 @@ curl http://localhost:3000/api/v1/health # backend (via web proxy)
```text
src/ai_company/
api/ # Litestar REST + WebSocket API (controllers, guards, channels)
budget/ # Cost tracking, budget enforcement (pre-flight/in-flight checks, auto-downgrade), billing periods, cost tiers, quota/subscription tracking, CFO cost optimization (anomaly detection, efficiency analysis, downgrade recommendations, approval decisions), spending reports
budget/ # Cost tracking, budget enforcement (pre-flight/in-flight checks, auto-downgrade), billing periods, cost tiers, quota/subscription tracking, CFO cost optimization (anomaly detection, efficiency analysis, downgrade recommendations, approval decisions), spending reports, budget errors (BudgetExhaustedError, DailyLimitExceededError, QuotaExhaustedError)
cli/ # CLI interface (future — thin API wrapper if needed)
communication/ # Message bus, dispatcher, messenger, channels, delegation, loop prevention, conflict resolution, meeting protocol
config/ # YAML company config loading and validation
core/ # Shared domain models and base classes
core/ # Shared domain models, base classes, and resilience config (RetryConfig, RateLimiterConfig)
engine/ # Agent orchestration, execution loops, parallel execution, task decomposition, routing, task assignment, task lifecycle, recovery, shutdown, workspace isolation, coordination error classification, and prompt policy validation
hr/ # HR engine: hiring, firing, onboarding, offboarding, agent registry, performance tracking (task metrics, collaboration scoring, trend detection), promotion/demotion (criteria evaluation, approval strategies, model mapping)
memory/ # Persistent agent memory (Mem0 initial, custom stack future — ADR-001), retrieval pipeline (ranking, injection, context formatting, non-inferable filtering), shared org memory (org/), consolidation/archival (consolidation/)
Expand Down
8 changes: 4 additions & 4 deletions DESIGN_SPEC.md
Original file line number Diff line number Diff line change
Expand Up @@ -2790,7 +2790,8 @@ ai-company/
│ │ ├── artifact.py # Produced work items
│ │ ├── role.py # Role model
│ │ ├── role_catalog.py # Role catalog
│ │ └── personality.py # Personality compatibility scoring
│ │ ├── personality.py # Personality compatibility scoring
│ │ └── resilience_config.py # RetryConfig, RateLimiterConfig (shared by config.schema + providers.resilience)
│ ├── engine/ # Agent orchestration, execution loops, parallel execution, task decomposition, routing, task assignment, task lifecycle, recovery, shutdown, workspace isolation, coordination error classification, and prompt policy validation
│ │ ├── errors.py # Engine error hierarchy
│ │ ├── prompt.py # System prompt builder
Expand Down Expand Up @@ -3028,7 +3029,7 @@ ai-company/
│ │ │ ├── role.py # ROLE_* constants
│ │ │ ├── routing.py # ROUTING_* constants
│ │ │ ├── sandbox.py # SANDBOX_* constants
│ │ │ ├── security.py # SECURITY_* constants
│ │ │ ├── security.py # SECURITY_* constants
│ │ │ ├── task.py # TASK_* constants
│ │ │ ├── task_assignment.py # TASK_ASSIGNMENT_* constants
│ │ │ ├── task_routing.py # TASK_ROUTING_* constants
Expand All @@ -3039,7 +3040,6 @@ ai-company/
│ │ │ ├── code_runner.py # CODE_RUNNER_* constants
│ │ │ ├── docker.py # DOCKER_* constants
│ │ │ ├── mcp.py # MCP_* constants
│ │ │ ├── security.py # Security event constants
│ │ │ ├── trust.py # Trust event constants
│ │ │ └── promotion.py # Promotion event constants
│ │ ├── processors.py # Log processors
Expand All @@ -3064,7 +3064,6 @@ ai-company/
│ │ │ ├── router.py # Router orchestrator
│ │ │ └── strategies.py # Routing strategies
│ │ └── resilience/ # Resilience patterns
│ │ ├── config.py # RetryConfig, RateLimiterConfig
│ │ ├── errors.py # RetryExhaustedError
│ │ ├── rate_limiter.py # Token bucket rate limiter
│ │ └── retry.py # RetryHandler with backoff
Expand Down Expand Up @@ -3175,6 +3174,7 @@ ai-company/
│ │ ├── enums.py # Budget-related enums
│ │ ├── billing.py # Billing period computation utilities
│ │ ├── enforcer.py # BudgetEnforcer service (pre-flight, in-flight, auto-downgrade)
│ │ ├── errors.py # BudgetExhaustedError, DailyLimitExceededError, QuotaExhaustedError
│ │ ├── optimizer.py # CostOptimizer service — anomaly detection, efficiency analysis, downgrade recommendations, approval decisions (M5)
│ │ ├── optimizer_models.py # CostOptimizer domain models — anomaly, efficiency, downgrade, approval, config (M5)
│ │ ├── quota.py # Quota/subscription models, degradation config, quota snapshots
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies = [
"aiosqlite==0.22.1",
"jinja2==3.1.6",
"jsonschema==4.26.0",
"litellm==1.82.0",
"litellm==1.82.1",
"litestar[standard,structlog,pydantic,brotli,prometheus]==2.21.1",
"mcp==1.26.0",
"pydantic==2.12.5",
Expand Down
8 changes: 8 additions & 0 deletions src/ai_company/budget/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
)
from ai_company.budget.enforcer import BudgetEnforcer
from ai_company.budget.enums import BudgetAlertLevel
from ai_company.budget.errors import (
BudgetExhaustedError,
DailyLimitExceededError,
QuotaExhaustedError,
)
Comment on lines +41 to +45
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Removing the old budget-error aliases is another patch-level API break.

These new exports are fine as the canonical budget namespace, but the PR summary also removes the previous ai_company.engine / ai_company.engine.errors re-exports. Existing consumers importing these exceptions from the old paths will start failing immediately on a fix: release. Keep deprecated aliases for one release, or ship this as an explicit breaking change.

Also applies to: 108-145

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/ai_company/budget/__init__.py` around lines 41 - 45, Restore the
deprecated aliases so existing imports keep working: re-add the old re-exports
that previously lived under ai_company.engine and ai_company.engine.errors
(i.e., ensure BudgetExhaustedError, DailyLimitExceededError, QuotaExhaustedError
are still importable from the old paths) and mark them as deprecated (emit a
DeprecationWarning when imported) so consumers get one release to migrate; apply
the same re-export+warning pattern for the other affected symbols referenced in
lines 108-145.

from ai_company.budget.hierarchy import (
BudgetHierarchy,
DepartmentBudget,
Expand Down Expand Up @@ -100,6 +105,7 @@
"BudgetAlertLevel",
"BudgetConfig",
"BudgetEnforcer",
"BudgetExhaustedError",
"BudgetHierarchy",
"CategoryBreakdown",
"CoordinationEfficiency",
Expand All @@ -113,6 +119,7 @@
"CostTierDefinition",
"CostTiersConfig",
"CostTracker",
"DailyLimitExceededError",
"DegradationAction",
"DegradationConfig",
"DepartmentBudget",
Expand All @@ -135,6 +142,7 @@
"ProviderCostModel",
"ProviderDistribution",
"QuotaCheckResult",
"QuotaExhaustedError",
"QuotaLimit",
"QuotaSnapshot",
"QuotaTracker",
Expand Down
6 changes: 3 additions & 3 deletions src/ai_company/budget/enforcer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@

from ai_company.budget.billing import billing_period_start, daily_period_start
from ai_company.budget.enums import BudgetAlertLevel
from ai_company.budget.quota import QuotaCheckResult
from ai_company.constants import BUDGET_ROUNDING_PRECISION
from ai_company.engine.errors import (
from ai_company.budget.errors import (
BudgetExhaustedError,
DailyLimitExceededError,
QuotaExhaustedError,
)
from ai_company.budget.quota import QuotaCheckResult
from ai_company.constants import BUDGET_ROUNDING_PRECISION
from ai_company.observability import get_logger
from ai_company.observability.events.budget import (
BUDGET_ALERT_THRESHOLD_CROSSED,
Expand Down
32 changes: 32 additions & 0 deletions src/ai_company/budget/errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Budget-layer error hierarchy.

Defines budget-specific exceptions in a leaf module with no intra-project
imports, preventing circular dependency chains when these exceptions are
needed by both the budget enforcer and the engine layer.
"""


class BudgetExhaustedError(Exception):
"""Budget exhaustion signal.

Used in two contexts:

1. Raised directly by :meth:`BudgetEnforcer.check_can_execute`
when pre-flight budget checks fail (e.g., monthly hard stop,
daily limit, or provider quota exceeded).
2. Caught by the engine layer (``AgentEngine.run``) and used to
build an ``AgentRunResult`` with
``TerminationReason.BUDGET_EXHAUSTED``.
"""


class DailyLimitExceededError(BudgetExhaustedError):
"""Per-agent daily spending limit exceeded."""


class QuotaExhaustedError(BudgetExhaustedError):
"""Raised when provider quota is exhausted.

Raised for all degradation strategies. Degradation routing
(FALLBACK/QUEUE) is not yet implemented.
"""
89 changes: 1 addition & 88 deletions src/ai_company/config/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
WorkflowHandoff,
)
from ai_company.core.enums import AutonomyLevel, CompanyType, SeniorityLevel
from ai_company.core.resilience_config import RateLimiterConfig, RetryConfig
from ai_company.core.role import CustomRole # noqa: TC001
from ai_company.core.types import NotBlankStr # noqa: TC001
from ai_company.hr.promotion.config import PromotionConfig
Expand All @@ -35,94 +36,6 @@
logger = get_logger(__name__)


# ── Resilience config models ─────────────────────────────────────
# Defined here (not in providers.resilience) to avoid circular imports
# between config ↔ providers. Re-exported by providers.resilience.config.


class RetryConfig(BaseModel):
"""Configuration for automatic retry of transient provider errors.

Attributes:
max_retries: Maximum number of retry attempts (0 disables retries).
base_delay: Initial delay in seconds before the first retry.
max_delay: Upper bound on computed delay in seconds.
exponential_base: Multiplier for exponential backoff.
jitter: Whether to add random jitter to delay.
"""

model_config = ConfigDict(frozen=True, allow_inf_nan=False)

max_retries: int = Field(
default=3,
ge=0,
le=10,
description="Maximum number of retry attempts (0 disables retries)",
)
base_delay: float = Field(
default=1.0,
gt=0.0,
description="Initial delay in seconds before the first retry",
)
max_delay: float = Field(
default=60.0,
gt=0.0,
description="Upper bound on computed delay in seconds",
)
exponential_base: float = Field(
default=2.0,
gt=1.0,
description="Multiplier for exponential backoff",
)
jitter: bool = Field(
default=True,
description="Whether to add random jitter to delay",
)

@model_validator(mode="after")
def _validate_delay_ordering(self) -> Self:
"""Ensure base_delay does not exceed max_delay."""
if self.base_delay > self.max_delay:
msg = (
f"base_delay ({self.base_delay}) must be"
f" <= max_delay ({self.max_delay})"
)
logger.warning(
CONFIG_VALIDATION_FAILED,
model="RetryConfig",
field="base_delay/max_delay",
base_delay=self.base_delay,
max_delay=self.max_delay,
reason=msg,
)
raise ValueError(msg)
return self


class RateLimiterConfig(BaseModel):
"""Configuration for client-side rate limiting.

Attributes:
max_requests_per_minute: Maximum requests per minute
(0 means unlimited).
max_concurrent: Maximum concurrent in-flight requests
(0 means unlimited).
"""

model_config = ConfigDict(frozen=True, allow_inf_nan=False)

max_requests_per_minute: int = Field(
default=0,
ge=0,
description="Maximum requests per minute (0 = unlimited)",
)
max_concurrent: int = Field(
default=0,
ge=0,
description="Maximum concurrent in-flight requests (0 = unlimited)",
)


class ProviderModelConfig(BaseModel):
"""Configuration for a single LLM model within a provider.

Expand Down
97 changes: 97 additions & 0 deletions src/ai_company/core/resilience_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""Resilience configuration models (retry + rate limiting).

Defined in ``core/`` to avoid circular imports between ``config.schema``
and ``providers.resilience``. Both modules import from here.
"""

from typing import Self

from pydantic import BaseModel, ConfigDict, Field, model_validator

from ai_company.observability import get_logger
from ai_company.observability.events.config import CONFIG_VALIDATION_FAILED

logger = get_logger(__name__)


class RetryConfig(BaseModel):
"""Configuration for automatic retry of transient provider errors.

Attributes:
max_retries: Maximum number of retry attempts (0 disables retries).
base_delay: Initial delay in seconds before the first retry.
max_delay: Upper bound on computed delay in seconds.
exponential_base: Multiplier for exponential backoff.
jitter: Whether to add random jitter to delay.
"""

model_config = ConfigDict(frozen=True, allow_inf_nan=False)

max_retries: int = Field(
default=3,
ge=0,
le=10,
description="Maximum number of retry attempts (0 disables retries)",
)
base_delay: float = Field(
default=1.0,
gt=0.0,
description="Initial delay in seconds before the first retry",
)
max_delay: float = Field(
default=60.0,
gt=0.0,
description="Upper bound on computed delay in seconds",
)
exponential_base: float = Field(
default=2.0,
gt=1.0,
description="Multiplier for exponential backoff",
)
jitter: bool = Field(
default=True,
description="Whether to add random jitter to delay",
)

@model_validator(mode="after")
def _validate_delay_ordering(self) -> Self:
"""Ensure base_delay does not exceed max_delay."""
if self.base_delay > self.max_delay:
msg = (
f"base_delay ({self.base_delay}) must be"
f" <= max_delay ({self.max_delay})"
)
logger.warning(
CONFIG_VALIDATION_FAILED,
model="RetryConfig",
field="base_delay/max_delay",
base_delay=self.base_delay,
max_delay=self.max_delay,
reason=msg,
)
raise ValueError(msg)
return self


class RateLimiterConfig(BaseModel):
"""Configuration for client-side rate limiting.

Attributes:
max_requests_per_minute: Maximum requests per minute
(0 means unlimited).
max_concurrent: Maximum concurrent in-flight requests
(0 means unlimited).
"""

model_config = ConfigDict(frozen=True, allow_inf_nan=False)

max_requests_per_minute: int = Field(
default=0,
ge=0,
description="Maximum requests per minute (0 = unlimited)",
)
max_concurrent: int = Field(
default=0,
ge=0,
description="Maximum concurrent in-flight requests (0 = unlimited)",
)
6 changes: 0 additions & 6 deletions src/ai_company/engine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@
TaskStructureClassifier,
)
from ai_company.engine.errors import (
BudgetExhaustedError,
DailyLimitExceededError,
DecompositionCycleError,
DecompositionDepthError,
DecompositionError,
Expand All @@ -67,7 +65,6 @@
NoEligibleAgentError,
ParallelExecutionError,
PromptBuildError,
QuotaExhaustedError,
ResourceConflictError,
TaskAssignmentError,
TaskRoutingError,
Expand Down Expand Up @@ -171,12 +168,10 @@
"AuctionAssignmentStrategy",
"AutoTopologyConfig",
"BudgetChecker",
"BudgetExhaustedError",
"ClassificationResult",
"CleanupCallback",
"CooperativeTimeoutStrategy",
"CostOptimizedAssignmentStrategy",
"DailyLimitExceededError",
"DecompositionContext",
"DecompositionCycleError",
"DecompositionDepthError",
Expand Down Expand Up @@ -221,7 +216,6 @@
"ProgressCallback",
"PromptBuildError",
"PromptTokenEstimator",
"QuotaExhaustedError",
"ReactLoop",
"RecoveryResult",
"RecoveryStrategy",
Expand Down
Loading
Loading