Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ web/ # Vue 3 + PrimeVue + Tailwind CSS dashboard
- **Every module** with business logic MUST have: `from ai_company.observability import get_logger` then `logger = get_logger(__name__)`
- **Never** use `import logging` / `logging.getLogger()` / `print()` in application code
- **Variable name**: always `logger` (not `_logger`, not `log`)
- **Event names**: always use constants from the domain-specific module under `ai_company.observability.events` (e.g. `PROVIDER_CALL_START` from `events.provider`, `BUDGET_RECORD_ADDED` from `events.budget`, `CFO_ANOMALY_DETECTED` from `events.cfo`, `CONFLICT_DETECTED` from `events.conflict`, `MEETING_STARTED` from `events.meeting`, `CLASSIFICATION_START` from `events.classification`, `CONSOLIDATION_START` from `events.consolidation`, `ORG_MEMORY_QUERY_START` from `events.org_memory`, `API_REQUEST_STARTED` from `events.api`, `API_ROUTE_NOT_FOUND` from `events.api`, `CODE_RUNNER_EXECUTE_START` from `events.code_runner`, `DOCKER_EXECUTE_START` from `events.docker`, `MCP_INVOKE_START` from `events.mcp`, `SECURITY_EVALUATE_START` from `events.security`, `HR_HIRING_REQUEST_CREATED` from `events.hr`, `PERF_METRIC_RECORDED` from `events.performance`, `TRUST_EVALUATE_START` from `events.trust`, `PROMOTION_EVALUATE_START` from `events.promotion`, `PROMPT_BUILD_START` from `events.prompt`, `MEMORY_RETRIEVAL_START` from `events.memory`, `MEMORY_BACKEND_CONNECTED` from `events.memory`, `MEMORY_ENTRY_STORED` from `events.memory`, `MEMORY_BACKEND_SYSTEM_ERROR` from `events.memory`, `AUTONOMY_ACTION_AUTO_APPROVED` from `events.autonomy`, `TIMEOUT_POLICY_EVALUATED` from `events.timeout`, `PERSISTENCE_AUDIT_ENTRY_SAVED` from `events.persistence`, `TASK_ENGINE_STARTED` from `events.task_engine`, `COORDINATION_STARTED` from `events.coordination`, `COMMUNICATION_DISPATCH_START` from `events.communication`, `COMPANY_STARTED` from `events.company`, `CONFIG_LOADED` from `events.config`, `CORRELATION_ID_CREATED` from `events.correlation`, `DECOMPOSITION_STARTED` from `events.decomposition`, `DELEGATION_STARTED` from `events.delegation`, `EXECUTION_LOOP_START` from `events.execution`, `CHECKPOINT_SAVED` from `events.checkpoint`, `PERSISTENCE_CHECKPOINT_SAVED` from `events.persistence`, `GIT_OPERATION_START` from `events.git`, `PARALLEL_GROUP_START` from `events.parallel`, `PERSONALITY_LOADED` from `events.personality`, `QUOTA_CHECKED` from `events.quota`, `ROLE_ASSIGNED` from `events.role`, `ROUTING_STARTED` from `events.routing`, `SANDBOX_EXECUTE_START` from `events.sandbox`, `TASK_CREATED` from `events.task`, `TASK_ASSIGNMENT_STARTED` from `events.task_assignment`, `TASK_ROUTING_STARTED` from `events.task_routing`, `TEMPLATE_LOADED` from `events.template`, `TOOL_INVOKE_START` from `events.tool`, `WORKSPACE_CREATED` from `events.workspace`, `APPROVAL_GATE_ESCALATION_DETECTED` from `events.approval_gate`, `APPROVAL_GATE_ESCALATION_FAILED` from `events.approval_gate`, `APPROVAL_GATE_INITIALIZED` from `events.approval_gate`, `APPROVAL_GATE_RISK_CLASSIFIED` from `events.approval_gate`, `APPROVAL_GATE_RISK_CLASSIFY_FAILED` from `events.approval_gate`, `APPROVAL_GATE_CONTEXT_PARKED` from `events.approval_gate`, `APPROVAL_GATE_CONTEXT_PARK_FAILED` from `events.approval_gate`, `APPROVAL_GATE_PARK_TASKLESS` from `events.approval_gate`, `APPROVAL_GATE_RESUME_STARTED` from `events.approval_gate`, `APPROVAL_GATE_CONTEXT_RESUMED` from `events.approval_gate`, `APPROVAL_GATE_RESUME_FAILED` from `events.approval_gate`, `APPROVAL_GATE_RESUME_DELETE_FAILED` from `events.approval_gate`, `APPROVAL_GATE_RESUME_TRIGGERED` from `events.approval_gate`, `APPROVAL_GATE_NO_PARKED_CONTEXT` from `events.approval_gate`, `APPROVAL_GATE_LOOP_WIRING_WARNING` from `events.approval_gate`). Import directly: `from ai_company.observability.events.<domain> import EVENT_CONSTANT`
- **Event names**: always use constants from the domain-specific module under `ai_company.observability.events` (e.g. `PROVIDER_CALL_START` from `events.provider`, `BUDGET_RECORD_ADDED` from `events.budget`, `CFO_ANOMALY_DETECTED` from `events.cfo`, `CONFLICT_DETECTED` from `events.conflict`, `MEETING_STARTED` from `events.meeting`, `CLASSIFICATION_START` from `events.classification`, `CONSOLIDATION_START` from `events.consolidation`, `ORG_MEMORY_QUERY_START` from `events.org_memory`, `API_REQUEST_STARTED` from `events.api`, `API_ROUTE_NOT_FOUND` from `events.api`, `CODE_RUNNER_EXECUTE_START` from `events.code_runner`, `DOCKER_EXECUTE_START` from `events.docker`, `MCP_INVOKE_START` from `events.mcp`, `SECURITY_EVALUATE_START` from `events.security`, `HR_HIRING_REQUEST_CREATED` from `events.hr`, `PERF_METRIC_RECORDED` from `events.performance`, `TRUST_EVALUATE_START` from `events.trust`, `PROMOTION_EVALUATE_START` from `events.promotion`, `PROMPT_BUILD_START` from `events.prompt`, `MEMORY_RETRIEVAL_START` from `events.memory`, `MEMORY_BACKEND_CONNECTED` from `events.memory`, `MEMORY_ENTRY_STORED` from `events.memory`, `MEMORY_BACKEND_SYSTEM_ERROR` from `events.memory`, `AUTONOMY_ACTION_AUTO_APPROVED` from `events.autonomy`, `TIMEOUT_POLICY_EVALUATED` from `events.timeout`, `PERSISTENCE_AUDIT_ENTRY_SAVED` from `events.persistence`, `TASK_ENGINE_STARTED` from `events.task_engine`, `COORDINATION_STARTED` from `events.coordination`, `COMMUNICATION_DISPATCH_START` from `events.communication`, `COMPANY_STARTED` from `events.company`, `CONFIG_LOADED` from `events.config`, `CORRELATION_ID_CREATED` from `events.correlation`, `DECOMPOSITION_STARTED` from `events.decomposition`, `DELEGATION_STARTED` from `events.delegation`, `EXECUTION_LOOP_START` from `events.execution`, `CHECKPOINT_SAVED` from `events.checkpoint`, `PERSISTENCE_CHECKPOINT_SAVED` from `events.persistence`, `GIT_OPERATION_START` from `events.git`, `PARALLEL_GROUP_START` from `events.parallel`, `PERSONALITY_LOADED` from `events.personality`, `QUOTA_CHECKED` from `events.quota`, `ROLE_ASSIGNED` from `events.role`, `ROUTING_STARTED` from `events.routing`, `SANDBOX_EXECUTE_START` from `events.sandbox`, `TASK_CREATED` from `events.task`, `TASK_ASSIGNMENT_STARTED` from `events.task_assignment`, `TASK_ROUTING_STARTED` from `events.task_routing`, `TEMPLATE_LOADED` from `events.template`, `TOOL_INVOKE_START` from `events.tool`, `TOOL_OUTPUT_WITHHELD` from `events.tool`, `WORKSPACE_CREATED` from `events.workspace`, `APPROVAL_GATE_ESCALATION_DETECTED` from `events.approval_gate`, `APPROVAL_GATE_ESCALATION_FAILED` from `events.approval_gate`, `APPROVAL_GATE_INITIALIZED` from `events.approval_gate`, `APPROVAL_GATE_RISK_CLASSIFIED` from `events.approval_gate`, `APPROVAL_GATE_RISK_CLASSIFY_FAILED` from `events.approval_gate`, `APPROVAL_GATE_CONTEXT_PARKED` from `events.approval_gate`, `APPROVAL_GATE_CONTEXT_PARK_FAILED` from `events.approval_gate`, `APPROVAL_GATE_PARK_TASKLESS` from `events.approval_gate`, `APPROVAL_GATE_RESUME_STARTED` from `events.approval_gate`, `APPROVAL_GATE_CONTEXT_RESUMED` from `events.approval_gate`, `APPROVAL_GATE_RESUME_FAILED` from `events.approval_gate`, `APPROVAL_GATE_RESUME_DELETE_FAILED` from `events.approval_gate`, `APPROVAL_GATE_RESUME_TRIGGERED` from `events.approval_gate`, `APPROVAL_GATE_NO_PARKED_CONTEXT` from `events.approval_gate`, `APPROVAL_GATE_LOOP_WIRING_WARNING` from `events.approval_gate`). Import directly: `from ai_company.observability.events.<domain> import EVENT_CONSTANT`
- **Structured kwargs**: always `logger.info(EVENT, key=value)` — never `logger.info("msg %s", val)`
- **All error paths** must log at WARNING or ERROR with context before raising
- **All state transitions** must log at INFO
Expand Down
25 changes: 17 additions & 8 deletions docs/design/operations.md
Original file line number Diff line number Diff line change
Expand Up @@ -787,14 +787,23 @@ execution. Post-tool-call scanning detects sensitive data in outputs.
### Output Scan Response Policies

After the output scanner detects sensitive data, a pluggable `OutputScanResponsePolicy`
protocol decides how to handle the findings:

| Policy | Behavior | Default for |
|--------|----------|-------------|
| **Redact** (default) | Return scanner's redacted content as-is | `SEMI`, `SUPERVISED` autonomy |
| **Withhold** | Clear redacted content -- fail-closed, no partial data returned | `LOCKED` autonomy |
| **Log-only** | Discard findings (logs at WARNING), pass original output through | `FULL` autonomy |
| **Autonomy-tiered** | Delegate to a sub-policy based on effective autonomy level | Composite policy |
protocol decides how to handle the findings. Each policy sets a `ScanOutcome` enum on the
returned `OutputScanResult` so downstream consumers (primarily `ToolInvoker`) can
distinguish intentional policy decisions from scanner failures:

| Policy | Behavior | `ScanOutcome` | Default for |
|--------|----------|---------------|-------------|
| **Redact** (default) | Return scanner's redacted content as-is | `REDACTED` | `SEMI`, `SUPERVISED` autonomy |
| **Withhold** | Clear redacted content — content withheld by policy | `WITHHELD` | `LOCKED` autonomy |
| **Log-only** | Discard findings (logs at WARNING), pass original output through | `LOG_ONLY` | `FULL` autonomy |
| **Autonomy-tiered** | Delegate to a sub-policy based on effective autonomy level | *(set by delegate)* | Composite policy |

The `ScanOutcome` enum (`CLEAN`, `REDACTED`, `WITHHELD`, `LOG_ONLY`) is set by the scanner
(initial `REDACTED` when findings are detected) and may be transformed by the policy (e.g.
`WithholdPolicy` changes `REDACTED` → `WITHHELD`). The `ToolInvoker._scan_output` method
branches on `ScanOutcome.WITHHELD` first to return a dedicated error message ("content
withheld by security policy") with `output_withheld` metadata — distinct from the generic
fail-closed path used for scanner exceptions.

Policy selection is declarative via `SecurityConfig.output_scan_policy_type`
(`OutputScanPolicyType` enum). A factory function (`build_output_scan_policy`) resolves the
Expand Down
1 change: 1 addition & 0 deletions src/ai_company/observability/events/tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
TOOL_SECURITY_DENIED: Final[str] = "tool.security.denied"
TOOL_SECURITY_ESCALATED: Final[str] = "tool.security.escalated"
TOOL_OUTPUT_REDACTED: Final[str] = "tool.output.redacted"
TOOL_OUTPUT_WITHHELD: Final[str] = "tool.output.withheld"

# ── Subprocess utility events ───────────────────────────────────
TOOL_SUBPROCESS_TRANSPORT_CLOSE_FAILED: Final[str] = (
Expand Down
5 changes: 4 additions & 1 deletion src/ai_company/security/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
- ``SecurityVerdict`` / ``SecurityVerdictType`` — evaluation results.
- ``SecurityContext`` — tool invocation context for evaluation.
- ``AuditEntry`` / ``AuditLog`` — audit recording.
- ``OutputScanResult`` / ``OutputScanner`` — post-tool output scanning.
- ``OutputScanResult`` / ``ScanOutcome`` / ``OutputScanner``
— post-tool output scanning.
- ``OutputScanResponsePolicy`` — protocol for output scan policies.
- ``RedactPolicy`` / ``WithholdPolicy`` / ``LogOnlyPolicy``
/ ``AutonomyTieredPolicy`` — policy implementations.
Expand All @@ -32,6 +33,7 @@
from ai_company.security.models import (
AuditEntry,
OutputScanResult,
ScanOutcome,
SecurityContext,
SecurityVerdict,
SecurityVerdictType,
Expand Down Expand Up @@ -66,6 +68,7 @@
"RedactPolicy",
"RuleEngine",
"RuleEngineConfig",
"ScanOutcome",
"SecOpsService",
"SecurityConfig",
"SecurityContext",
Expand Down
49 changes: 49 additions & 0 deletions src/ai_company/security/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,30 @@
from ai_company.core.types import NotBlankStr # noqa: TC001


class ScanOutcome(StrEnum):
"""Outcome of an output scan policy decision.

Tracks what the scanner/policy *did* with the output so that
downstream consumers (e.g. ``ToolInvoker``) can distinguish
intentional withholding from scanner failure.

Attributes:
CLEAN: No sensitive data detected (default).
REDACTED: Sensitive data found, redacted content available.
WITHHELD: Content intentionally withheld by policy.
LOG_ONLY: Findings discarded by policy, original content passed
through. Always emitted with ``has_sensitive_data=False``
because the policy resets the result — the audit log
(written by ``SecOpsService`` before the policy runs) is
the source of truth for what was actually detected.
"""

CLEAN = "clean"
REDACTED = "redacted"
WITHHELD = "withheld"
LOG_ONLY = "log_only"


class SecurityVerdictType(StrEnum):
"""Security verdict constants.

Expand Down Expand Up @@ -156,13 +180,17 @@ class OutputScanResult(BaseModel):
has_sensitive_data: Whether sensitive data was detected.
findings: Descriptions of findings.
redacted_content: Content with sensitive data replaced, or None.
outcome: What the scanner/policy did with the output.
Allows downstream consumers to distinguish intentional
withholding from scanner failure.
"""

model_config = ConfigDict(frozen=True)

has_sensitive_data: bool = False
findings: tuple[NotBlankStr, ...] = ()
redacted_content: str | None = None
outcome: ScanOutcome = ScanOutcome.CLEAN

@model_validator(mode="after")
def _check_consistency(self) -> OutputScanResult:
Expand All @@ -174,4 +202,25 @@ def _check_consistency(self) -> OutputScanResult:
if self.redacted_content is not None:
msg = "redacted_content must be None when has_sensitive_data is False"
raise ValueError(msg)
if self.outcome in (ScanOutcome.REDACTED, ScanOutcome.WITHHELD):
msg = (
f"outcome={self.outcome.value!r} is invalid when "
"has_sensitive_data is False"
)
raise ValueError(msg)
elif self.outcome == ScanOutcome.CLEAN:
msg = "outcome='clean' is invalid when has_sensitive_data is True"
raise ValueError(msg)
elif not self.findings:
msg = "findings must not be empty when has_sensitive_data is True"
raise ValueError(msg)
if self.outcome == ScanOutcome.REDACTED and self.redacted_content is None:
msg = "redacted_content must not be None when outcome is 'redacted'"
raise ValueError(msg)
if self.outcome == ScanOutcome.WITHHELD and self.redacted_content is not None:
msg = "redacted_content must be None when outcome is 'withheld'"
raise ValueError(msg)
if self.outcome == ScanOutcome.LOG_ONLY and self.has_sensitive_data:
msg = "outcome='log_only' is invalid when has_sensitive_data is True"
raise ValueError(msg)
return self
Comment thread
greptile-apps[bot] marked this conversation as resolved.
22 changes: 13 additions & 9 deletions src/ai_company/security/output_scan_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from ai_company.observability.events.security import (
SECURITY_OUTPUT_SCAN_POLICY_APPLIED,
)
from ai_company.security.models import OutputScanResult
from ai_company.security.models import OutputScanResult, ScanOutcome

if TYPE_CHECKING:
from collections.abc import Mapping
Expand Down Expand Up @@ -95,7 +95,9 @@ def apply(
class WithholdPolicy:
"""Clear redacted content when sensitive data is found.

Forces fail-closed in the invoker — no partial data is returned.
Sets ``ScanOutcome.WITHHELD`` so the invoker returns a dedicated
"withheld by policy" error — no partial data is returned. This
is distinct from the fail-closed path used for scanner errors.
The ``findings`` tuple is deliberately preserved so that audit
consumers can categorise what was detected without seeing the
actual content.
Expand Down Expand Up @@ -127,7 +129,9 @@ def apply(
)
if not scan_result.has_sensitive_data:
return scan_result
return scan_result.model_copy(update={"redacted_content": None})
return scan_result.model_copy(
update={"redacted_content": None, "outcome": ScanOutcome.WITHHELD},
)


class LogOnlyPolicy:
Expand Down Expand Up @@ -172,12 +176,12 @@ def apply(
agent_id=context.agent_id,
note="Sensitive data detected but passed through by log_only policy",
)
else:
logger.debug(
SECURITY_OUTPUT_SCAN_POLICY_APPLIED,
policy="log_only",
has_sensitive_data=False,
)
return OutputScanResult(outcome=ScanOutcome.LOG_ONLY)
logger.debug(
SECURITY_OUTPUT_SCAN_POLICY_APPLIED,
policy="log_only",
has_sensitive_data=False,
)
return OutputScanResult()


Expand Down
3 changes: 2 additions & 1 deletion src/ai_company/security/output_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
SECURITY_OUTPUT_SCAN_FINDING,
SECURITY_OUTPUT_SCAN_START,
)
from ai_company.security.models import OutputScanResult
from ai_company.security.models import OutputScanResult, ScanOutcome
from ai_company.security.rules.credential_detector import CREDENTIAL_PATTERNS
from ai_company.security.rules.data_leak_detector import PII_PATTERNS

Expand Down Expand Up @@ -67,4 +67,5 @@ def scan(self, output: str) -> OutputScanResult:
has_sensitive_data=True,
findings=tuple(sorted(set(findings))),
redacted_content=redacted,
outcome=ScanOutcome.REDACTED,
)
4 changes: 3 additions & 1 deletion src/ai_company/security/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,10 @@ async def scan_output(
SECURITY_INTERCEPTOR_ERROR,
tool_name=context.tool_name,
policy=policy_name,
fallback_outcome=result.outcome.value,
note="Output scan policy application failed "
"— returning raw scan result",
"— returning raw scan result "
"(may be less strict than intended policy)",
)

return result
Expand Down
Loading
Loading