diff --git a/.claude/hookify.block-push-main.local.md b/.claude/hookify.block-push-main.local.md new file mode 100644 index 00000000..880e5aea --- /dev/null +++ b/.claude/hookify.block-push-main.local.md @@ -0,0 +1,19 @@ +--- +name: block-push-main +enabled: true +event: bash +pattern: git push[^|]*\bmain\b +action: block +--- + +🚫 **Direct push to main blocked!** + +This project requires all changes to `main` to go through pull requests. + +**What to do instead:** + +1. Create a feature branch: `git checkout -b feature/your-feature` +2. Push your branch: `git push -u origin feature/your-feature` +3. Create a PR: `gh pr create --base main` + +See AGENTS.md Git Rules for more details. diff --git a/.claude/hookify.warn-lru-cache.local.md b/.claude/hookify.warn-lru-cache.local.md new file mode 100644 index 00000000..01d1e381 --- /dev/null +++ b/.claude/hookify.warn-lru-cache.local.md @@ -0,0 +1,28 @@ +--- +name: warn-lru-cache +enabled: true +event: file +conditions: + - field: file_path + operator: regex_match + pattern: \.py$ + - field: new_text + operator: regex_match + pattern: from functools import.*lru_cache|@lru_cache +--- + +⚠️ **functools.lru_cache detected!** + +This project uses **cashews** for caching instead of `functools.lru_cache`. + +**Replace with:** + +```python +from cashews import cache + + +@cache(ttl="1h") +async def your_function(): ... +``` + +See AGENTS.md "Required Library Substitutions" for details. diff --git a/.claude/hookify.warn-stdlib-logging.local.md b/.claude/hookify.warn-stdlib-logging.local.md new file mode 100644 index 00000000..9f9a2943 --- /dev/null +++ b/.claude/hookify.warn-stdlib-logging.local.md @@ -0,0 +1,27 @@ +--- +name: warn-stdlib-logging +enabled: true +event: file +conditions: + - field: file_path + operator: regex_match + pattern: \.py$ + - field: new_text + operator: regex_match + pattern: ^import logging$|^from logging import +--- + +⚠️ **stdlib logging detected!** + +This project uses **loguru** instead of the stdlib `logging` module. + +**Replace with:** + +```python +from app.core.logging import logger + +logger.info("Your message") +logger.bind(key=value).info("Structured logging") +``` + +See AGENTS.md "Required Library Substitutions" for details. diff --git a/.claude/hookify.warn-utcnow.local.md b/.claude/hookify.warn-utcnow.local.md new file mode 100644 index 00000000..5a098988 --- /dev/null +++ b/.claude/hookify.warn-utcnow.local.md @@ -0,0 +1,28 @@ +--- +name: warn-utcnow +enabled: true +event: file +conditions: + - field: file_path + operator: regex_match + pattern: \.py$ + - field: new_text + operator: regex_match + pattern: datetime\.utcnow\(\) +--- + +⚠️ **Deprecated datetime.utcnow() detected!** + +`datetime.utcnow()` is deprecated and returns a naive datetime. + +**Replace with:** + +```python +from datetime import UTC, datetime + +now = datetime.now(UTC) +``` + +This returns a timezone-aware datetime, which is the correct approach. + +See AGENTS.md "Required Library Substitutions" for details. diff --git a/.claude/instincts/api-architecture.md b/.claude/instincts/api-architecture.md new file mode 100644 index 00000000..7dfc51a5 --- /dev/null +++ b/.claude/instincts/api-architecture.md @@ -0,0 +1,46 @@ +# Instinct: Three-API Architecture + +**Confidence**: 100% **Source**: AGENTS.md, codebase structure **Category**: architecture + +## Pattern + +Ouroboros uses three distinct APIs with different purposes and authentication: + +### Agent API (`/api/v1/client/*`) + +- **Purpose**: Communication with CipherSwarmAgent (Go hashcat runners) +- **Auth**: Bearer token (`csa__`) +- **Contract**: IMMUTABLE - locked to `contracts/v1_api_swagger.json` +- **Breaking changes**: NEVER allowed + +### Web UI API (`/api/v1/web/*`) + +- **Purpose**: SvelteKit frontend interactions +- **Auth**: OAuth2 + refresh tokens (session cookies) +- **Responses**: Optimized for UI consumption + +### Control API (`/api/v1/control/*`) + +- **Purpose**: CLI tool (csadmin), automation, integrations +- **Auth**: API key bearer (`cst__`) +- **Errors**: RFC9457 `application/problem+json` format +- **Pagination**: Offset-based for programmatic consumption + +## Service Layer Reuse + +All three APIs delegate to shared service functions: + +``` +Web UI API ──┐ +Control API ──┼──> app/core/services/* ──> SQLAlchemy ORM +Agent API ──┘ +``` + +## Trigger + +Activate when: + +- Creating new endpoints +- Modifying existing API behavior +- Discussing authentication +- Planning API changes diff --git a/.claude/instincts/commit-conventions.md b/.claude/instincts/commit-conventions.md new file mode 100644 index 00000000..669a0d4f --- /dev/null +++ b/.claude/instincts/commit-conventions.md @@ -0,0 +1,29 @@ +# Instinct: Commit Conventions + +**Confidence**: 95% **Source**: Git history analysis (100+ commits) **Category**: workflow + +## Pattern + +When creating commits in the Ouroboros project: + +1. Use conventional commit format: `(): ` +2. Most common types: `chore`, `fix`, `feat`, `docs`, `test`, `ci` +3. Common scopes: `api`, `deps`, `docs`, `auth`, `security`, `state-machines` +4. Keep descriptions lowercase and concise +5. No period at end of subject line + +## Examples + +``` +fix(api): improve error handling and add get_valid_actions method +test(state-machines): add tests for get_valid_actions method +chore(deps): bump docker/login-action from 3.6.0 to 3.7.0 +docs: link CLAUDE.md to AGENTS.md for reference +feat(api): implement Agent API v2 with enhanced features +``` + +## Anti-patterns + +- `Fixed bug` (missing type/scope) +- `feat(api): Added new feature.` (capitalized, has period) +- Long multi-line commit messages for simple changes diff --git a/.claude/instincts/library-substitutions.md b/.claude/instincts/library-substitutions.md new file mode 100644 index 00000000..ad383f33 --- /dev/null +++ b/.claude/instincts/library-substitutions.md @@ -0,0 +1,65 @@ +# Instinct: Library Substitutions + +**Confidence**: 100% **Source**: AGENTS.md (documented requirement) **Category**: code-style + +## Pattern + +The Ouroboros project requires specific library substitutions. Always use the prescribed alternatives. + +| Never Use | Always Use | +| --------------------- | ---------------------------- | +| `logging` (stdlib) | `loguru` | +| `functools.lru_cache` | `cashews` | +| `datetime.utcnow()` | `datetime.now(datetime.UTC)` | +| `Optional[T]` | `T \| None` | + +## Examples + +### Logging + +```python +# Wrong +import logging + +logger = logging.getLogger(__name__) +logger.info("Message") + +# Correct +from loguru import logger + +logger.info("Message") +logger.bind(task_id=task.id).info("Task started") +``` + +### Datetime + +```python +# Wrong +from datetime import datetime + +now = datetime.utcnow() + +# Correct +from datetime import datetime, UTC + +now = datetime.now(UTC) +``` + +### Type Hints + +```python +# Wrong +from typing import Optional +def get_user(id: int) -> Optional[User]: + +# Correct +def get_user(id: int) -> User | None: +``` + +## Trigger + +Activate when: + +- Writing new Python code +- Reviewing code for style issues +- Seeing `import logging`, `lru_cache`, `utcnow()`, or `Optional[` diff --git a/.claude/instincts/protected-files.md b/.claude/instincts/protected-files.md new file mode 100644 index 00000000..02be404c --- /dev/null +++ b/.claude/instincts/protected-files.md @@ -0,0 +1,48 @@ +# Instinct: Protected Files + +**Confidence**: 100% **Source**: AGENTS.md (documented requirement) **Category**: safety + +## Pattern + +Certain directories and files are protected and must NEVER be modified without explicit permission. + +### Protected Directories + +| Directory | Contents | Why Protected | +| ------------ | -------------------- | ----------------------- | +| `contracts/` | API contract specs | Agent API compatibility | +| `alembic/` | Database migrations | Data integrity | +| `.cursor/` | Cursor configuration | IDE settings | +| `.github/` | GitHub workflows | CI/CD stability | + +### Protected Files + +- `contracts/v1_api_swagger.json` - Agent API v1 contract (IMMUTABLE) +- `contracts/current_api_openapi.json` - Current API snapshot + +### Agent API v1 Rules + +The Agent API at `/api/v1/client/*` is IMMUTABLE: + +- Contract MUST match `contracts/v1_api_swagger.json` exactly +- Breaking changes are NEVER allowed +- Locked to OpenAPI 3.0.1 spec +- All responses must validate against spec + +## Response When Asked to Modify + +``` +I notice you're asking me to modify [protected path]. +This is a protected file/directory in Ouroboros. + +Per project rules, I cannot modify this without explicit permission. +Should I proceed anyway, or would you like to discuss alternatives? +``` + +## Trigger + +Activate when: + +- Asked to modify files in contracts/, alembic/, .cursor/, .github/ +- Touching Agent API v1 endpoints +- Making changes that could break API compatibility diff --git a/.claude/instincts/rfc9457-errors.md b/.claude/instincts/rfc9457-errors.md new file mode 100644 index 00000000..ee1beb58 --- /dev/null +++ b/.claude/instincts/rfc9457-errors.md @@ -0,0 +1,58 @@ +# Instinct: RFC9457 Error Responses + +**Confidence**: 100% **Source**: Control API implementation, design specs **Category**: api-design + +## Pattern + +Control API errors must return `application/problem+json` format per RFC9457: + +```python +{ + "type": "https://example.com/problems/invalid-request", + "title": "Invalid Request", + "status": 400, + "detail": "The request parameters are invalid", + "instance": "/api/v1/control/campaigns/123", +} +``` + +### Extension Fields + +Add context-specific extension fields: + +```python +# For state transition errors +{ + "type": "...", + "title": "Invalid State Transition", + "status": 409, + "detail": "Cannot start campaign from COMPLETED state", + "instance": "/api/v1/control/campaigns/123/start", + "current_state": "COMPLETED", + "valid_actions": ["archive"], # What CAN be done +} + +# For validation errors +{ + "type": "...", + "title": "Validation Error", + "status": 422, + "detail": "Request validation failed", + "instance": "/api/v1/control/campaigns", + "errors": [ + {"field": "name", "message": "Field is required"}, + ], +} +``` + +## Middleware Implementation + +Use `app/core/control_rfc9457_middleware.py` for automatic exception translation. + +## Trigger + +Activate when: + +- Implementing Control API endpoints +- Handling errors in Control API +- Adding new exception types for Control API diff --git a/.claude/instincts/service-pattern.md b/.claude/instincts/service-pattern.md new file mode 100644 index 00000000..2e16da8d --- /dev/null +++ b/.claude/instincts/service-pattern.md @@ -0,0 +1,75 @@ +# Instinct: Service Layer Pattern + +**Confidence**: 95% **Source**: Codebase analysis, AGENTS.md **Category**: architecture + +## Pattern + +Business logic lives in service functions, not endpoints. Endpoints are thin wrappers. + +### Service Function Naming + +```python +# Standard CRUD operations +create_campaign(db, data) -> Campaign +get_campaign(db, id) -> Campaign | None +list_campaigns(db, filters) -> list[Campaign] +update_campaign(db, id, data) -> Campaign +delete_campaign(db, id) -> None + +# Action operations +start_campaign(db, id) -> Campaign +pause_campaign(db, id) -> Campaign +``` + +### Exception Pattern + +Services raise domain exceptions; endpoints translate to HTTP: + +```python +# app/core/exceptions.py +class CampaignNotFoundError(Exception): + """Raised when a campaign is not found.""" + + pass + + +class InvalidResourceStateError(Exception): + """Raised when a state transition is invalid.""" + + def __init__(self, message: str, current_state: str, valid_actions: list[str]): + self.current_state = current_state + self.valid_actions = valid_actions + super().__init__(message) + + +# app/api/v1/endpoints/web/campaigns.py +try: + campaign = await start_campaign(db, campaign_id) +except CampaignNotFoundError: + raise HTTPException(status_code=404, detail="Campaign not found") +except InvalidResourceStateError as e: + raise HTTPException( + status_code=409, detail=f"Cannot start: valid actions are {e.valid_actions}" + ) +``` + +### Database Session Pattern + +```python +async def create_campaign(db: AsyncSession, data: CampaignCreate) -> Campaign: + campaign = Campaign(**data.model_dump()) + db.add(campaign) + await db.flush() # Get ID without committing + await db.refresh(campaign) + return campaign + # Commit happens in session context manager +``` + +## Trigger + +Activate when: + +- Creating new endpoints +- Adding business logic +- Handling errors in API layer +- Writing service functions diff --git a/.claude/instincts/state-machines.md b/.claude/instincts/state-machines.md new file mode 100644 index 00000000..547b8ad6 --- /dev/null +++ b/.claude/instincts/state-machines.md @@ -0,0 +1,71 @@ +# Instinct: State Machine Pattern + +**Confidence**: 95% **Source**: Control API implementation, app/core/state_machines.py **Category**: domain-logic + +## Pattern + +Campaign and Attack entities use state machines for lifecycle management. + +### State Machine Structure + +```python +class CampaignStateMachine: + TRANSITIONS: ClassVar[dict[CampaignState, set[CampaignState]]] = { + CampaignState.DRAFT: {CampaignState.ACTIVE, CampaignState.ARCHIVED}, + CampaignState.ACTIVE: {CampaignState.PAUSED, CampaignState.COMPLETED, ...}, + # ... + } + + ACTIONS: ClassVar[dict[str, dict[CampaignState, CampaignState]]] = { + "start": {CampaignState.DRAFT: CampaignState.ACTIVE}, + "pause": {CampaignState.ACTIVE: CampaignState.PAUSED}, + # ... + } + + @classmethod + def can_transition(cls, from_state: CampaignState, to_state: CampaignState) -> bool: + """Check if transition is valid.""" + + @classmethod + def get_valid_actions(cls, from_state: CampaignState) -> list[str]: + """Get all valid actions from a given state.""" +``` + +### Usage in Services + +```python +async def start_campaign(db: AsyncSession, campaign_id: int) -> Campaign: + campaign = await get_campaign(db, campaign_id) + if not campaign: + raise CampaignNotFoundError() + + if not CampaignStateMachine.can_transition(campaign.state, CampaignState.ACTIVE): + valid_actions = CampaignStateMachine.get_valid_actions(campaign.state) + raise InvalidResourceStateError( + f"Cannot start campaign from {campaign.state}", + current_state=campaign.state.value, + valid_actions=valid_actions, + ) + + campaign.state = CampaignState.ACTIVE + await db.flush() + return campaign +``` + +### Error Messages Include Valid Actions + +When a state transition fails, always include what actions ARE valid: + +```python +raise HTTPException( + status_code=409, detail=f"Cannot perform action. Valid actions: {valid_actions}" +) +``` + +## Trigger + +Activate when: + +- Working with Campaign or Attack state changes +- Implementing lifecycle actions (start, pause, resume, etc.) +- Handling `InvalidResourceStateError` diff --git a/.claude/instincts/testing-tiers.md b/.claude/instincts/testing-tiers.md new file mode 100644 index 00000000..70748b23 --- /dev/null +++ b/.claude/instincts/testing-tiers.md @@ -0,0 +1,54 @@ +# Instinct: Testing Tiers + +**Confidence**: 100% **Source**: AGENTS.md, justfile **Category**: workflow + +## Pattern + +Use the smallest test tier that covers your changes. + +### Tier Selection + +| Tier | Command | Use When | +| -------- | ---------------------------- | ---------------------------------------------- | +| Backend | `just test-backend` | Backend logic, services, models, API endpoints | +| Frontend | `pnpm test` (from frontend/) | UI components, client logic, stores | +| E2E | `just test-e2e` | Complete user workflows, integration | +| CI Check | `just ci-check` | PR-ready, touching multiple tiers | + +### Decision Tree + +``` +Did you change Python code? +├─ Yes → just test-backend +│ └─ Also changed frontend? → just ci-check +└─ No + └─ Did you change frontend code? + ├─ Yes → cd frontend && pnpm test + └─ No (docs only, config) → skip tests +``` + +### Skip Testing When + +- Verification-only tasks (reading code, answering questions) +- Documentation-only changes +- Configuration tweaks that don't affect behavior + +### Run `just ci-check` When + +- PR is ready for review +- Changes span backend AND frontend +- Unsure what's affected + +## Anti-patterns + +- Running `just ci-check` for every small backend change +- Skipping tests entirely for code changes +- Running E2E tests for unit-testable logic + +## Trigger + +Activate when: + +- Completing code changes +- Preparing commits +- Before creating PRs diff --git a/.claude/skills/ouroboros-patterns.md b/.claude/skills/ouroboros-patterns.md new file mode 100644 index 00000000..533f6ac2 --- /dev/null +++ b/.claude/skills/ouroboros-patterns.md @@ -0,0 +1,139 @@ +# Ouroboros Development Patterns + +## Overview + +This skill captures the coding patterns, conventions, and workflows specific to the Ouroboros project - a distributed password cracking management system built with FastAPI backend and SvelteKit frontend. + +## Commit Conventions + +### Format + +``` +(): +``` + +### Commit Types (by frequency) + +| Type | Usage | Example | +| ------- | ------------------------- | ------------------------------------------------------------------- | +| `chore` | Maintenance, deps, config | `chore(deps): bump docker/login-action from 3.6.0 to 3.7.0` | +| `fix` | Bug fixes | `fix(api): improve error handling and add get_valid_actions method` | +| `feat` | New features | `feat(api): implement Agent API v2 with enhanced features` | +| `docs` | Documentation | `docs: link CLAUDE.md to AGENTS.md for reference` | +| `test` | Tests | `test(state-machines): add tests for get_valid_actions method` | +| `ci` | CI/CD changes | `ci: add pre-commit hooks workflow` | + +### Common Scopes + +| Scope | Area | +| ---------------- | --------------------------- | +| `api` | API endpoints (any version) | +| `deps` | Dependencies | +| `docs` | Documentation | +| `auth` | Authentication | +| `security` | Security fixes | +| `state-machines` | State machine logic | + +## Architecture Patterns + +### Three-API Architecture + +``` +/api/v1/client/* - Agent API (IMMUTABLE - locked to OpenAPI 3.0.1) +/api/v1/web/* - Web UI API (OAuth2 + refresh tokens) +/api/v1/control/* - Control API (API key bearer, RFC9457 errors) +``` + +### Service Layer Pattern + +All APIs delegate to shared service functions in `app/core/services/`: + +- `create_*`, `get_*`, `list_*`, `update_*`, `delete_*` naming +- Business logic lives in services, not endpoints +- Domain exceptions raised by services, translated to HTTP by endpoints + +### State Machine Pattern + +Campaign and Attack entities use state machines (`app/core/state_machines.py`): + +- State transitions validated before execution +- `get_valid_actions()` returns allowed actions from current state +- Invalid transitions raise `InvalidResourceStateError` + +## Error Handling + +### Control API (RFC9457) + +```python +{ + "type": "https://example.com/problems/invalid-request", + "title": "Invalid Request", + "status": 400, + "detail": "The request parameters are invalid", + "instance": "/api/v1/control/campaigns/123", + "valid_actions": ["start", "archive"], # Extension fields +} +``` + +### Service Layer Pattern + +```python +# Service raises domain exception +class CampaignNotFoundError(Exception): + pass + + +# Endpoint translates to HTTP +try: + campaign = await get_campaign_service(db, campaign_id) +except CampaignNotFoundError: + raise HTTPException(status_code=404, detail="Campaign not found") +``` + +## Required Substitutions + +| Never Use | Always Use | +| --------------------- | ---------------------------- | +| `logging` (stdlib) | `loguru` | +| `functools.lru_cache` | `cashews` | +| `datetime.utcnow()` | `datetime.now(datetime.UTC)` | +| `Optional[T]` | `T \| None` | + +## Pydantic v2 Pattern + +```python +from typing import Annotated +from pydantic import Field + + +class QueueStatus(BaseModel): + name: Annotated[str, Field(description="Queue name", min_length=1)] + pending_jobs: Annotated[int | None, Field(description="Pending jobs", ge=0)] = 0 +``` + +## Testing Tiers + +| Tier | Command | Use When | +| -------- | ------------------- | ------------------------------- | +| Backend | `just test-backend` | Backend logic, services, models | +| Frontend | `pnpm test` | UI components, client logic | +| E2E | `just test-e2e` | Complete user workflows | + +Run smallest tier that covers changes. Run `just ci-check` only when PR-ready. + +## Protected Files + +Never modify without permission: + +- `contracts/` - API contract specs +- `alembic/` - Database migrations +- `.cursor/` - Cursor configuration +- `.github/` - GitHub workflows + +## Spec-Driven Development + +Authoritative specs in `.kiro/specs/`: + +- Phased implementation (phase-1 through phase-6) +- Each phase has: `design.md`, `requirements.md`, `tasks.md` +- Control API spec: `.kiro/specs/phase-2e-control-api-v1/` diff --git a/.coderabbit.yml b/.coderabbit.yml index aa23d2dc..002da797 100644 --- a/.coderabbit.yml +++ b/.coderabbit.yml @@ -61,35 +61,11 @@ reviews: poem: true enable_prompt_for_ai_agents: true path_filters: - # Python bytecode cache - - "**/__pycache__/**" - - "**/*.pyc" - - "**/*.pyo" - # Build artifacts - - "frontend/build/**" - - "frontend/test-results/**" - - "frontend/test-artifacts/**" - # Generated files - - "contracts/current_api_openapi.json" - - "alembic/versions/*.py" - - "coverage.xml" - # Logs and debug files - - "logs/**" - - "**/*.log" - - "firebase-debug.log" - # IDE and editor files - - "**/*.code-workspace" - - "**/.vscode/**" - - "**/.idea/**" - # Lock files (review dependency changes instead) - - "**/*lock.json" - - "**/*lock.yaml" - - "**/*lock.yml" - - "uv.lock" - - "bun.lock" - # OS files - - "**/.DS_Store" - - "**/Thumbs.db" + # Exclude auto-generated files that shouldn't be reviewed + - "!contracts/current_api_openapi.json" + - "!alembic/versions/*.py" + # Note: CodeRabbit respects .gitignore and has sensible defaults + # for lock files, build artifacts, and cache directories path_instructions: - path: app/api/v1/endpoints/agent/** instructions: > @@ -160,9 +136,6 @@ reviews: - "Draft" - "[Draft]" - "work in progress" - labels: - - "auto-reviewed" - - "security-review-needed" drafts: false base_branches: - "main" @@ -442,7 +415,7 @@ issue_enrichment: auto_enrich: enabled: true planning: - enabled: true + enabled: false auto_planning: enabled: false labels: [] diff --git a/.reports/dead-code-analysis.md b/.reports/dead-code-analysis.md new file mode 100644 index 00000000..fd695171 --- /dev/null +++ b/.reports/dead-code-analysis.md @@ -0,0 +1,166 @@ +# Dead Code Analysis Report + +**Generated**: 2026-02-09 **Branch**: control_api_completion + +## Summary + +| Category | Count | Severity | +| ----------------------- | ----- | -------- | +| Unused Frontend Files | 156 | CAUTION | +| Unused Frontend Exports | 43 | SAFE | +| Unused Frontend Types | 51 | SAFE | +| Unused Dev Dependencies | 21 | CAUTION | +| Duplicate Exports | 2 | SAFE | +| Python Unused Imports | 0 | - | +| Python Unused Variables | 0 | - | + +## Backend (Python) + +**Status**: Clean + +Ruff analysis (F401, F841, F811) found no unused imports or variables in `app/` or `tests/`. + +## Frontend (TypeScript/Svelte) + +### Unused Files (156 files) - CAUTION + +Many are Shadcn-UI components that may be used in the future. Categories: + +#### UI Component Libraries (NOT recommended to delete) + +These are standard Shadcn-UI components - keep for future use: + +| Directory | Files | Status | +| --------------------- | ----- | -------------------- | +| `ui/calendar/` | 17 | KEEP - May be needed | +| `ui/carousel/` | 7 | KEEP - May be needed | +| `ui/collapsible/` | 4 | KEEP - May be needed | +| `ui/command/` | 11 | KEEP - May be needed | +| `ui/context-menu/` | 13 | KEEP - May be needed | +| `ui/data-table/` | 4 | KEEP - May be needed | +| `ui/drawer/` | 10 | KEEP - May be needed | +| `ui/hover-card/` | 3 | KEEP - May be needed | +| `ui/input-otp/` | 5 | KEEP - May be needed | +| `ui/menubar/` | 12 | KEEP - May be needed | +| `ui/navigation-menu/` | ~10 | KEEP - May be needed | +| `ui/pagination/` | ~5 | KEEP - May be needed | +| `ui/popover/` | ~3 | KEEP - May be needed | +| `ui/resizable/` | ~4 | KEEP - May be needed | +| `ui/scroll-area/` | ~3 | KEEP - May be needed | +| `ui/select/` | ~10 | KEEP - May be needed | +| `ui/sheet/` | ~8 | KEEP - May be needed | +| `ui/slider/` | ~2 | KEEP - May be needed | +| `ui/sonner/` | ~2 | KEEP - May be needed | +| `ui/toggle-group/` | ~3 | KEEP - May be needed | + +#### Potentially Removable Files - SAFE + +| File | Reason | Recommendation | +| --------------------------------------------------------- | ----------------------- | -------------------- | +| `.eslintrc.local.js` | Local config not in use | SAFE to delete | +| `playwright.config.e2e.ts` | Duplicate config? | Verify before delete | +| `src/lib/components/campaigns/CampaignDeleteModal.svelte` | Unused component | Verify usage | +| `src/lib/components/campaigns/CampaignEditorModal.svelte` | Unused component | Verify usage | +| `src/lib/components/resources/RulelistDropdown.svelte` | Unused component | Verify usage | +| `src/lib/components/resources/WordlistDropdown.svelte` | Unused component | Verify usage | + +### Unused Exports (43 functions) - SAFE + +Store getter functions that may be used dynamically: + +``` +getAttacks, getWordlists, getRulelists, getResourcesLoading (attacks.svelte.ts) +getCampaigns, getCampaignsLoading, getCampaignsError, getTotalPages, getCurrentPage, getPageSize, getTotal (campaigns.svelte.ts) +getProjects, getProjectsLoading, getProjectsError, getProjectsPagination, getActiveProject, getAvailableProjects, getContextUser, getContextLoading, getContextError (projects.svelte.ts) +getResources, getResourcesLoading, getResourcesError, getResourcesPagination, getWordlists, getRulelists, getMasklists, getCharsets, getDynamicWordlists (resources.svelte.ts) +getUsers, getUsersLoading, getUsersError, getUsersPagination (users.svelte.ts) +``` + +**Recommendation**: Keep - these are likely accessed via Svelte reactive statements or templates. + +### Unused Types (51 types) - SAFE + +These are type definitions that may be used for documentation or future features: + +- Schema types: `DictionaryAttackData`, `MaskAttackData`, `BruteForceAttackData` +- Response types: `PaginationRequest`, `PaginationResponse`, `SuccessResponse`, `ErrorResponse` +- Form types: `CampaignFormData`, `ProjectFormData`, `LoginSchema` +- Entity types: `AttackRead`, `CampaignRead`, `ProjectRead`, `UserProfile` + +**Recommendation**: Keep - type definitions don't affect bundle size and serve as documentation. + +### Unused Dev Dependencies (21) - CAUTION + +| Package | Used By | Recommendation | +| ----------------------------- | ----------------- | ------------------------------------ | +| `@lucide/svelte` | Icons | VERIFY - may be imported dynamically | +| `@tailwindcss/forms` | Tailwind plugin | KEEP - used in tailwind.config | +| `@tailwindcss/typography` | Tailwind plugin | KEEP - used in tailwind.config | +| `@types/d3-scale` | D3 types | VERIFY - check chart components | +| `@types/d3-shape` | D3 types | VERIFY - check chart components | +| `@zxcvbn-ts/core` | Password strength | VERIFY - check auth forms | +| `@zxcvbn-ts/language-common` | Password strength | VERIFY - check auth forms | +| `@zxcvbn-ts/language-en` | Password strength | VERIFY - check auth forms | +| `d3-scale` | Charts | VERIFY - check chart components | +| `d3-shape` | Charts | VERIFY - check chart components | +| `human-format` | Formatting | VERIFY - check utilities | +| `mode-watcher` | Theme switching | VERIFY - check theme logic | +| `prettier-plugin-svelte` | Dev tooling | KEEP - used by prettier | +| `prettier-plugin-tailwindcss` | Dev tooling | KEEP - used by prettier | +| `rollup-plugin-visualizer` | Build analysis | KEEP - dev tool | +| `runed` | Svelte utilities | VERIFY - check usage | +| `svelte-toolbelt` | Svelte utilities | VERIFY - check usage | +| `tailwind-variants` | Styling | VERIFY - check component styles | +| `tailwindcss` | CSS framework | KEEP - core dependency | +| `tw-animate-css` | Animations | VERIFY - check animations | +| `yeezy-dates` | Date utilities | VERIFY - check date formatting | + +### Duplicate Exports (2) - SAFE + +| Export | File | Recommendation | +| ---------------------------------------- | ----------------------------- | ------------------ | +| `ContextResponse\|contextResponseSchema` | `src/lib/schemas/auth.ts` | Clean up duplicate | +| `ProjectRead\|ProjectReadType` | `src/lib/schemas/projects.ts` | Clean up duplicate | + +## Recommendations + +### Immediate Actions (SAFE) + +1. **Delete `.eslintrc.local.js`** - Local config file not in use +2. **Clean up duplicate exports** - Minor refactoring in `auth.ts` and `projects.ts` + +### Requires Verification (CAUTION) + +1. **Campaign/Resource modals** - Verify if used in routes before deletion +2. **Dev dependencies** - Run `pnpm why ` to verify usage +3. **Playwright config** - Check if `playwright.config.e2e.ts` duplicates main config + +### Keep (UI Library) + +All Shadcn-UI component directories should be kept. They are standard UI primitives that: + +- Don't significantly impact bundle size (tree-shaking) +- May be needed for future features +- Are easy to add back but tedious to regenerate + +## Test Verification Required + +Before any deletions, run: + +```bash +# Backend tests +just test-backend + +# Frontend tests +cd frontend && pnpm test + +# E2E tests (if available) +just test-e2e +``` + +## Next Steps + +1. Run tests to establish baseline +2. Delete SAFE items one at a time +3. Re-run tests after each deletion +4. Commit after verified deletions diff --git a/AGENTS.md b/AGENTS.md index 549dda6a..d0ca2d82 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,76 +1,66 @@ # Ouroboros Agents Guide -This AGENTS.md file provides comprehensive guidance for AI agents working with the Ouroboros distributed password cracking management system. +Distributed password cracking management system built with FastAPI (backend) and SvelteKit (frontend). Coordinates Go-based agents (CipherSwarmAgent) running hashcat across multiple machines. + +> **Authoritative specs**: `.kiro/specs/` contains numbered implementation specs. --- -## Project Overview +## Critical Rules + +### Protected Files - NEVER Modify Without Permission + +| Directory | Contents | +| ------------ | ------------------------------------------------------------------ | +| `contracts/` | API contract specs (v1_api_swagger.json, current_api_openapi.json) | +| `alembic/` | Database migrations | +| `.cursor/` | Cursor configuration | +| `.github/` | GitHub workflows | + +### Agent API v1 - IMMUTABLE -Ouroboros is a distributed password cracking management system built with FastAPI and SvelteKit. It coordinates multiple agents running hashcat to efficiently distribute password cracking tasks across a network of machines. +- **Endpoint**: `/api/v1/client/*` +- **Contract**: Must match `contracts/v1_api_swagger.json` exactly +- **Breaking changes**: NEVER allowed - locked to OpenAPI 3.0.1 +- **Validation**: All responses must validate against spec -> [!NOTE] -> Treat .kiro/specs/ as the authoritative source for the project's requirements and architecture. The specs are numbered sequentially and serve as the implementation plan for the project. +### Required Library Substitutions -### Key Components +| Never Use | Always Use Instead | +| --------------------- | ---------------------------- | +| `logging` (stdlib) | `loguru` | +| `functools.lru_cache` | `cashews` | +| `datetime.utcnow()` | `datetime.now(datetime.UTC)` | +| `Optional[T]` | `T \| None` | -- **Backend**: FastAPI application with PostgreSQL, SQLAlchemy ORM, JWT authentication -- **Frontend**: SvelteKit SPA with Shadcn-Svelte components and Tailwind CSS -- **Agent System**: Distributed Go-based agents (CipherSwarmAgent) that execute hashcat tasks -- **Storage**: MinIO S3-compatible storage for attack resources (wordlists, rules, masks) -- **Caching**: Cashews library for Redis-compatible caching -- **Task Queue**: Celery for background task processing +### Git Rules + +- **NO direct pushes to `main`** - PRs only +- **Never commit on behalf of maintainer** - always open PRs +- **Use handle 'UncleSp1d3r'** - never real name in commits/docs +- **Conventional commits**: `(scope): description` + +--- ## Project Structure ```text Ouroboros/ -├── app/ # FastAPI backend application -│ ├── api/v1/endpoints/ # API endpoints organized by interface +├── app/ # FastAPI backend +│ ├── api/v1/endpoints/ │ │ ├── agent/ # Agent API (/api/v1/client/*) │ │ ├── web/ # Web UI API (/api/v1/web/*) -│ │ ├── control/ # Control API (/api/v1/control/*) -│ │ └── *.py # Shared infrastructure APIs -│ ├── core/ # Core application logic -│ ├── db/ # Database configuration -│ ├── models/ # SQLAlchemy database models -│ ├── schemas/ # Pydantic request/response schemas -│ └── plugins/ # Plugin system -├── frontend/ # SvelteKit frontend application -│ ├── src/lib/components/ # Reusable Svelte components -│ ├── src/routes/ # SvelteKit routes -│ └── package.json # Frontend dependencies (separate from backend) -├── tests/ # Test suite -├── docs/ # Documentation -├── alembic/ # Database migrations -├── contracts/ # API contract reference files (PROTECTED) -│ ├── v1_api_swagger.json # Agent API v1 specification (PROTECTED) -│ ├── current_api_openapi.json # Current API OpenAPI specification (PROTECTED) -└── justfile # Development task runner - -CipherSwarmAgent/ # Go-based agent (separate project) -├── cmd/ # CLI entrypoint -├── lib/ # Core agent logic -└── main.go # Agent application entry point +│ │ └── control/ # Control API (/api/v1/control/*) +│ ├── core/ # Core logic, exceptions, deps +│ ├── models/ # SQLAlchemy models +│ └── schemas/ # Pydantic schemas +├── frontend/ # SvelteKit SPA +├── tests/ # pytest test suite +├── contracts/ # API contracts (PROTECTED) +└── justfile # Task runner ``` -## Critical API Compatibility Requirements - -### Agent API v1 (`/api/v1/client/*`) - -- **IMMUTABLE**: Must follow `contracts/v1_api_swagger.json` specification exactly -- **NO BREAKING CHANGES**: Locked to OpenAPI 3.0.1 specification -- **Legacy Compatibility**: Mirrors Ruby-on-Rails Ouroboros version -- **Testing**: All responses must validate against OpenAPI specification - -### Agent API v2 (`/api/v2/client/*`) - -- **NOT YET IMPLEMENTED**: Future FastAPI-native version -- **Breaking Changes Allowed**: With proper versioning and documentation -- **Cannot Interfere**: Must not affect v1 Agent API - -### Router File Organization - -Each API interface must be organized in separate directories: +### Router Organization | Endpoint Path | Router File | | ------------------------------ | ---------------------------------------- | @@ -82,789 +72,217 @@ Each API interface must be organized in separate directories: | `/api/v1/web/*` | `app/api/v1/endpoints/web/` | | `/api/v1/control/*` | `app/api/v1/endpoints/control/` | -## Coding Standards - -### Emoji Usage - -- Avoid using emojis and other non-ASCII characters in code, comments, or documentation, except when the code is handling non-plaintext characters (for example: em dash, en dash, or other non-ASCII symbols). - -### Python Development - -- **Formatting**: Use `ruff format` with 119 character line limit -- **Type Hints**: Always use type hints, prefer `| None` over `Optional[]` -- **Strings**: Use double quotes (`"`) for all strings -- **Imports**: Group as stdlib, third-party, local with 2 lines between top-level definitions -- **Logging**: Use `loguru` exclusively, never standard Python `logging` -- **Caching**: Use `cashews` exclusively, never `functools.lru_cache` or other mechanisms -- **Time Handling**: Use `datetime.now(datetime.UTC)` instead of deprecated `datetime.utcnow()` -- **Pydantic**: Always use v2 conventions with `Annotated` for field definitions - -#### Type Hints Best Practices - -```python -# [x] Good -from typing import Annotated -from pydantic import Field - -name: Annotated[str, Field(min_length=1, description="User's full name")] -age: Annotated[int, Field(ge=0, le=120)] - -# [FAIL] Avoid -name: str = Field(..., min_length=1, description="User's full name") -``` +--- -#### Error Handling Patterns +## Quick Reference -```python -# [x] Good - Early returns and guard clauses -async def process_resource(resource_id: int) -> Resource: - if not resource_id: - raise ValueError("Resource ID is required") +### Essential Commands - resource = await get_resource(resource_id) - if not resource: - raise ResourceNotFound(f"Resource {resource_id} not found") - - return await process_resource_data(resource) +```bash +just install # Setup dependencies +just dev # Backend dev server +just docker-dev-up-watch # Fullstack with hot reload +just docker-dev-down # Stop dev stack +just test-backend # Run backend tests +just check # Lint + type check +just ci-check # Full CI validation (before PR) ``` -### FastAPI Development - -- **All APIs must be versioned**: Use `/api/v1/...` prefix -- **Response Models**: Define Pydantic response models for all endpoints -- **Error Handling**: Use `HTTPException` for API errors, custom exceptions for business logic -- **Dependencies**: Use dependency injection for auth, database sessions, and user context -- **Documentation**: Include comprehensive docstrings with Args, Returns, and Raises sections - -#### Control API Error Handling - -- **RFC9457 Compliance**: All Control API endpoints must return errors in `application/problem+json` format -- **Required Fields**: `type`, `title`, `status`, `detail`, `instance`, and relevant extensions - -### Frontend Development (SvelteKit) - -- **Component Library**: Use Shadcn-Svelte and Flowbite as primary UI libraries -- **Styling**: Use Tailwind CSS with utility-first approach -- **Forms**: Use Superforms with Zod validation -- **State Management**: Use SvelteKit stores and `$app/state` (not deprecated `$app/stores`) -- **Package Management**: Run `pnpm`/`npm` commands from `frontend/` directory -- **Idiomatic Svelte**: Follow Svelte 5 conventions and best practices +### Testing Tiers - Use Smallest Tier That Covers Changes -### Database Development +| Tier | Command | Use When | +| -------- | ---------------------------- | ------------------------------- | +| Backend | `just test-backend` | Backend logic, services, models | +| Frontend | `pnpm test` (from frontend/) | UI components, client logic | +| E2E | `just test-e2e` | Complete user workflows | -- **ORM**: Use SQLAlchemy 2.0 with async patterns -- **Migrations**: Use Alembic for all schema changes -- **Models**: Define relationships clearly with proper foreign keys and join tables -- **Multi-tenancy**: Enforce project-level isolation for all data access +**Skip testing** for verification-only tasks (no code changes). **Run `just ci-check`** only when PR-ready or touching multiple tiers. -#### Service Layer Architecture Patterns - -All business logic should be implemented in service functions, not in API endpoints: - -```python -# [x] Service Function Structure -async def create_campaign_service( - db: AsyncSession, campaign_data: CampaignCreate, current_user: User -) -> Campaign: - """Create a new campaign with business validation.""" - # Validation - if await _campaign_name_exists(db, campaign_data.name, campaign_data.project_id): - raise CampaignExistsError("Campaign name already exists in project") - - # Business logic - campaign = Campaign(**campaign_data.model_dump()) - db.add(campaign) - await db.commit() - await db.refresh(campaign) - return campaign - - -# [x] Service Function Naming Conventions -# CRUD: create_*, get_*, list_*, update_*, delete_* -# Business: estimate_keyspace_*, reorder_attacks_*, start_campaign_* -``` - -#### Database Patterns +### Dependency Management -**Session Management:** +- **Python**: `uv add PACKAGE` / `uv add --dev PACKAGE` / `uv remove PACKAGE` +- **Frontend**: `pnpm` from `frontend/` directory +- **Never edit** `pyproject.toml` dependencies manually -```python -# [x] Session management with dependency injection -from app.core.deps import get_db +--- +## Coding Standards -@router.get("/campaigns") -async def list_campaigns(db: AsyncSession = Depends(get_db)): - return await campaign_service.list_campaigns_service(db) -``` +### Python -**Pagination Pattern:** +- **Format**: `ruff format`, 119 char line limit, double quotes +- **Type hints**: Always use, prefer `T | None` over `Optional[T]` +- **Pydantic**: v2 with `Annotated` field definitions +- **Imports**: stdlib, third-party, local (2 blank lines between top-level defs) ```python -# [x] Pagination pattern -async def list_campaigns_service( - db: AsyncSession, skip: int = 0, limit: int = 20 -) -> tuple[list[Campaign], int]: - query = select(Campaign).offset(skip).limit(limit) - result = await db.execute(query) - items = result.scalars().all() - - count_query = select(func.count(Campaign.id)) - total = await db.scalar(count_query) - - return list(items), total or 0 -``` - -### Go Development (CipherSwarmAgent) - -- **Version**: Go 1.22 or later -- **CLI Framework**: Use Cobra for command-line interface -- **API Contract**: Must match Agent API v1 specification exactly -- **Configuration**: Support environment variables, CLI flags, and YAML config files -- **Error Handling**: Implement exponential backoff for API requests - -## Authentication Strategies - -### Web UI Authentication - -- OAuth2 with Password flow and refresh tokens -- Session-based with secure HTTP-only cookies -- CSRF protection for forms -- Argon2 password hashing - -### Agent API Authentication - -- Bearer token authentication -- Token format: `csa__` -- One token per agent, bound to agent ID -- Automatic token invalidation on agent removal - -### Control API Authentication - -- API key-based authentication using bearer tokens -- Token format: `cst__` -- Multiple active keys per user supported -- Configurable permissions and scopes - -## Database Models and Relationships - -### Core Models - -- **Project**: Top-level organizational boundary (multi-tenancy) -- **Campaign**: Coordinated cracking attempts targeting a hash list -- **Attack**: Specific cracking configuration within a campaign -- **Task**: Discrete work unit assigned to a single agent -- **HashList**: Set of hashes targeted by campaigns -- **HashItem**: Individual hash with metadata (stored as JSONB) -- **Agent**: Registered client capable of executing tasks -- **CrackResult**: Successfully cracked hash record -- **User**: Authenticated entity with project-scoped permissions - -### Key Relationships - -- Project → Campaigns (one-to-many) -- User ↔ Projects (many-to-many) -- Campaign → Attacks (one-to-many) -- Attack → Tasks (one-to-many) -- Campaign → HashList (many-to-one) -- HashList ↔ HashItems (many-to-many) - -## Testing Requirements - -### Three-Tier Testing Architecture - -Ouroboros uses a strategic three-tier testing architecture: - -#### Tier 1: Backend (`just test-backend`) - -- **Technology**: pytest + testcontainers + polyfactory -- **Scope**: API endpoints, services, models with real PostgreSQL -- **Coverage**: Focused on `app/` directory -- **Speed**: Fast (seconds) -- **When to use**: Testing backend logic, services, database operations - -#### Tier 2: Frontend (`just test-frontend`) - -- **Technology**: Vitest + Playwright with mocked APIs -- **Scope**: UI components, user interactions, client-side logic -- **Speed**: Fast (seconds) -- **When to use**: Testing UI components and frontend logic in isolation - -#### Tier 3: Full E2E (`just test-e2e`) - -- **Technology**: Playwright against full Docker stack -- **Scope**: Complete user workflows across real backend -- **Data**: Uses `scripts/seed_e2e_data.py` for test data -- **Speed**: Slow (minutes) -- **When to use**: Validating complete user workflows end-to-end - -**Testing Strategy Guidelines:** - -- Run the **smallest tier** that exercises your changes -- Use `just ci-check` only when PR-ready or touching multiple tiers -- For verification-only tasks (no code changes), testing is not required - -### Backend Testing - -```bash -# Run all tests -just test-backend - -# Run with coverage -just coverage - -# Run linting and type checking -just check - -# Full CI check (REQUIRED before PR submission) -just ci-check -``` - -### Frontend Testing - -```bash -# From frontend/ directory -pnpm test - -# E2E tests (requires backend running) -pnpm test:e2e +# Pydantic v2 pattern +from typing import Annotated +from pydantic import Field -# Lint and type check -pnpm check +name: Annotated[str, Field(min_length=1, description="User's full name")] ``` -### Test Patterns - -- Use `pytest` for all Python tests -- Use test factories in `tests/factories/` -- Use helper functions from `tests/utils/test_helpers.py` -- For Control API tests, use `create_user_with_api_key_and_project_access()` -- Validate API responses against OpenAPI specifications - -## Development Workflow - -### Quickstart Commands - -```bash -# 1) Setup -just install - -# 2) Backend dev only (hot reload) -just dev +### FastAPI -# 3) Fullstack dev (Docker, hot reload, migrations, seed, logs) -just docker-dev-up-watch +- All APIs versioned: `/api/v1/...` +- Business logic in service functions, not endpoints +- Service naming: `create_*`, `get_*`, `list_*`, `update_*`, `delete_*` -# 4) Stop dev stack -just docker-dev-down +### Control API - RFC9457 Errors -# 5) Open docs/UI -open http://localhost:8000/docs # Swagger UI -open http://localhost:8000/redoc # ReDoc -open http://localhost:5173 # SvelteKit Frontend +All Control API errors must return `application/problem+json`: -# 6) Full CI checks (heavy - only before PR) -just ci-check +```python +{ + "type": "https://example.com/problems/invalid-request", + "title": "Invalid Request", + "status": 400, + "detail": "The request parameters are invalid", + "instance": "/api/v1/control/campaigns/123", +} ``` -### Common Just Commands - -**Setup & Maintenance:** - -- `just install` - Install Python/JS dependencies and pre-commit hooks -- `just update-deps` - Update uv and pnpm dependencies - -**Linting & Formatting:** +### Frontend (SvelteKit) -- `just check` - Run all code and commit checks -- `just format` - Auto-format code with ruff and prettier -- `just format-check` - Check formatting only -- `just lint` - Run all linting checks +- **UI**: Shadcn-Svelte + Flowbite + Tailwind CSS +- **Forms**: Superforms with Zod validation +- **State**: `$app/state` (not deprecated `$app/stores`) +- **Svelte 5** conventions +- Run commands from `frontend/` directory -**Development Servers:** +### Database -- `just dev` - Backend only (alias for `dev-backend`) -- `just dev-backend` - Run migrations + start FastAPI dev server -- `just dev-frontend` - Start SvelteKit dev server only -- `just dev-fullstack` - Start both in Docker with hot reload +- SQLAlchemy 2.0 async patterns +- Alembic for migrations +- Multi-tenancy: enforce project-level isolation -**Docker Workflows:** +--- -- `just docker-dev-up-watch` - Start dev stack + follow logs -- `just docker-dev-down` - Stop dev stack -- `just docker-prod-up` / `just docker-prod-down` - Production compose +## Authentication -**Documentation:** +| Interface | Method | Token Format | +| ----------- | ----------------------- | ------------------------- | +| Web UI | OAuth2 + refresh tokens | Session cookies | +| Agent API | Bearer token | `csa__` | +| Control API | API key bearer | `cst__` | -- `just docs` - Serve MkDocs locally (port 9090) -- `just docs-test` - Test documentation build +--- -**Database:** +## Core Models -- `just db-reset` - Drop, recreate, and migrate test database +- **Project**: Multi-tenancy boundary +- **Campaign**: Cracking attempts targeting a hash list +- **Attack**: Cracking configuration within campaign +- **Task**: Work unit assigned to single agent +- **HashList/HashItem**: Hashes to crack +- **Agent**: Registered hashcat executor -**Release Management:** +**Relationships**: Project -> Campaigns -> Attacks -> Tasks; Campaign -> HashList \<-> HashItems -- `just release` - Generate CHANGELOG.md with git-cliff -- `just release-preview` - Preview changelog without writing +--- -### Git Workflow +## Git Workflow -#### Branch Strategy +### Branches -- **Long-lived branches:** +- `main`: Primary development (PRs target here) +- `feature//`: New features +- `hotfix/`: Emergency fixes +- `v1-archive`: Read-only archive - - `main`: Primary development branch (v2 codebase) - - `v1-archive`: Archived v1 stable (maintenance-only, rarely updated) +### Commit Types -- **Short-lived branches:** +`feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`, `build`, `ci`, `chore` - - `feature//`: New features off `main` - - `hotfix/`: Emergency fixes off `main` - - `release/`: Release preparation off `main` +### Scopes -#### Development Workflows +`auth`, `api`, `cli`, `models`, `docs`, `deps` -**Standard Development:** +### Workflow ```bash git checkout main && git pull git checkout -b feature/api/new-feature -just dev # develop with hot reload +# develop... just test-backend # smallest tier covering changes git commit -m "feat(api): add project quotas" gh pr create --base main ``` -**Hotfixes:** - -```bash -git checkout main && git pull -git checkout -b hotfix/critical-security-fix -# fix the issue... -just test-backend -git commit -m "fix(auth): patch security vulnerability" -gh pr create --base main -``` - -**Releases:** - -```bash -git checkout main && git pull -git checkout -b release/v2.1.0 -# stabilization work... -just ci-check # full validation -git commit -m "chore(release): prepare v2.1.0" -gh pr create --base main -``` - -### Git Conventions - -Follow [Conventional Commits](https://www.conventionalcommits.org): - -```text -[optional scope]: - -[optional body] - -[optional footer(s)] -``` - -#### Commit Types - -- `feat`: New feature (MINOR version) -- `fix`: Bug fix (PATCH version) -- `docs`: Documentation changes -- `style`: Code style changes -- `refactor`: Code refactoring -- `perf`: Performance improvements -- `test`: Test additions/corrections -- `build`: Build system changes -- `ci`: CI configuration changes -- `chore`: Maintenance tasks - -#### Scopes - -- `(auth)`: Authentication and authorization -- `(api)`: API endpoints and routes -- `(cli)`: Command-line interface -- `(models)`: Data models and schemas -- `(docs)`: Documentation -- `(deps)`: Dependencies - -#### Golden Rules - -1. **NO direct pushes** to `main` - PRs only -2. **Agent API v1 compatibility** - maintain existing contracts -3. **Rebase before PR** - stay synced with `main` -4. **Test locally first** - run appropriate test tier before opening PR -5. **PR scope manageable** - under ~400 lines when feasible -6. **v1-archive is read-only** - only emergency security patches if absolutely needed - -### Dependency Management - -- **Python**: Use `uv` for all dependency management - - `uv add PACKAGE_NAME` to install packages - - `uv add --dev PACKAGE_NAME` for dev dependencies - - `uv remove PACKAGE_NAME` to uninstall -- **Frontend**: Use `pnpm` from `frontend/` directory -- **Never edit** `pyproject.toml` dependencies manually - -### Protected Files and Directories - -**NEVER modify these without explicit permission:** - -- `contracts/` (API contract reference files) -- `alembic/` (database migrations) -- `.cursor/` (cursor configuration) -- `.github/` (GitHub workflows) - -## Docker Development Environment - -### Environment Variables - -Key environment variables from `docker-compose.yml` and `docker-compose.dev.yml`: - -- `DATABASE_URL` - PostgreSQL connection string -- `REDIS_HOST/PORT` - Redis cache connection -- `CELERY_BROKER_URL/RESULT_BACKEND` - Task queue configuration -- `SECRET_KEY` - JWT signing secret -- `FIRST_SUPERUSER/PASSWORD` - Initial admin user -- `BACKEND_CORS_ORIGINS` - Frontend origins for CORS - -### Health Endpoints - -- `/api-info` - API metadata (name, version, docs links) -- `/health` - Simple health check for Docker - -### Docker Commands - -**Development Environment:** - -```bash -# Start fullstack with migrations, seeding, and log following -just docker-dev-up-watch - -# Stop and clean up -just docker-dev-down -``` - -**Production Environment:** - -```bash -just docker-prod-up # Start production stack -just docker-prod-down # Stop production stack -``` - -## Security Guidelines - -### General Security - -- **HTTPS Only**: Never serve over plain HTTP in production -- **No Hard-coded Secrets**: Use pydantic-settings and environment variables -- **Strong JWT Secrets**: Use rotating secrets with short token lifetimes -- **CSRF Protection**: Implement CSRF tokens for state-changing requests -- **Rate Limiting**: Apply per-user and per-IP rate limiting -- **Error Handling**: Never leak stack traces or internal errors to clients - -### Database Security - -- **Parameterized Queries**: Always use SQLAlchemy ORM, never raw SQL -- **Minimal Permissions**: Database user should have minimum required permissions -- **SSL Connections**: Require SSL for all database connections -- **Migration Review**: Review all Alembic migrations before production - -### API Security - -- **Input Validation**: Validate all input with Pydantic models -- **Output Sanitization**: Escape user-displayed data in templates -- **Access Control**: Use dependency injection for user context and auth -- **Security Headers**: Set standard security headers (HSTS, X-Frame-Options, etc.) - -## Performance Guidelines - -### Caching Strategy - -```python -# Use Cashews for all caching -from cashews import cache - - -@cache(ttl=60) # 60 second TTL -async def expensive_operation(): - return await perform_calculation() - - -# Cache with tags for invalidation -@cache(ttl=300, tags=["campaign", "stats"]) -async def get_campaign_stats(campaign_id: int): - return await calculate_stats(campaign_id) -``` - -### Database Optimization - -- Use async SQLAlchemy operations for I/O-bound tasks -- Implement proper indexing for frequently queried fields -- Use lazy loading for large datasets -- Optimize Pydantic models for serialization performance - -### Frontend Optimization - -- Use SvelteKit's built-in optimizations -- Implement proper component lazy loading -- Optimize bundle size with tree shaking -- Use Tailwind CSS purging for production builds - -## Programmatic Checks - -Before submitting any changes, run these validation commands: - -### Backend Validation - -```bash -# Full CI check (REQUIRED) -just ci-check - -# Individual checks -just check # Linting and type checking -just test # Run test suite -just test-cov # Run tests with coverage -``` - -### Frontend Validation - -```bash -# From frontend/ directory -pnpm check # Type checking and linting -pnpm test # Unit tests -pnpm build # Production build check -``` - -### Docker Validation - -```bash -# Build and test containers -docker compose build -docker compose up -d -docker compose exec app just ci-check -``` - -## Error Handling Patterns - -### Custom Exceptions - -Define custom exceptions in `app/core/exceptions.py`: - -```python -class CipherSwarmException(Exception): - """Base exception for Ouroboros""" - - pass - - -class ResourceNotFound(CipherSwarmException): - """Resource not found exception""" - - pass - +--- -class CampaignNotFoundError(Exception): - """Raised when a campaign is not found.""" +## Error Handling - pass -``` +### Services -### Service Layer Error Patterns +Services raise domain exceptions; endpoints translate to HTTP: ```python -# [x] Custom domain exceptions in services +# Service class CampaignNotFoundError(Exception): - """Raised when a campaign is not found.""" - pass -# [x] Exception translation in endpoints +# Endpoint try: campaign = await get_campaign_service(db, campaign_id) except CampaignNotFoundError: raise HTTPException(status_code=404, detail="Campaign not found") ``` -### API Error Responses - -```python -# FastAPI error handling -from fastapi import HTTPException - -# Standard HTTP exception -raise HTTPException(status_code=404, detail="Agent not found") - -# Control API RFC9457 compliance -return JSONResponse( - status_code=400, - content={ - "type": "https://example.com/problems/invalid-request", - "title": "Invalid Request", - "status": 400, - "detail": "The request parameters are invalid", - "instance": "/api/v1/control/campaigns/123", - }, - headers={"Content-Type": "application/problem+json"}, -) -``` - -## Resource Management - -### MinIO Storage Structure - -```text -Buckets: -├── wordlists/ # Dictionary attack word lists -├── rules/ # Hashcat rule files -├── masks/ # Mask pattern files -├── charsets/ # Custom charset definitions -└── temp/ # Temporary storage for uploads -``` - -### File Upload Handling - -- Direct uploads to MinIO buckets -- Progress tracking for large files -- MD5 checksum verification -- Virus scanning for uploads -- File type verification - -## Monitoring and Logging - -### Logging Standards +### Logging ```python from loguru import logger -# Structured logging with context logger.bind(task_id=task.id, agent_id=agent.id).info("Task started") - -# Error logging with exception details -try: - result = await process_task() -except Exception as e: - logger.bind(task_id=task.id).error(f"Task failed: {e}") - raise ``` -### Performance Monitoring - -- Container metrics collection -- Application performance tracking -- Resource usage monitoring -- Alert configuration for critical issues - -## Debugging and Development Tools - -### Backend Debugging - -- **VS Code**: Use provided launch configurations for debugging the backend -- **Command Line**: Use `pytest --pdb` to drop into debugger on test failures -- **Logs**: Check Docker logs with `docker compose logs -f backend` - -### Frontend Debugging - -- **Browser DevTools**: Use browser's developer tools for debugging the frontend -- **Svelte DevTools**: Install Svelte DevTools browser extension -- **Network Tab**: Monitor API requests and responses -- **Console**: Check for JavaScript errors and warnings - -## SDK and Client Development - -### Rust Client Development - -When developing Rust clients for the Ouroboros API: - -- **Code Generation**: Use OpenAPI Generator for Rust client code from current API schema -- **Linting**: Enforce `cargo clippy -- -D warnings` for strict checking -- **Testing**: Recommend `criterion` for benchmarks, `insta` for snapshot testing -- **Organization**: Keep generated SDK code in separate packages/repositories - -### SDK Best Practices - -- Generate from `contracts/current_api_openapi.json` specification -- Maintain separate versioning for SDK releases -- Include comprehensive examples and documentation -- Test against live API endpoints in CI/CD +--- -## User Preferences and Project Conventions +## Environment -### Maintainer Preferences +### Key Variables -- **Code Review**: Prefer coderabbit.ai over GitHub Copilot auto-reviews -- **Milestones**: Named as version numbers (e.g., `v2.0`) with descriptive summaries -- **Identity**: Always use handle 'UncleSp1d3r', never real name in commits or documentation -- **Commits**: Never commit on behalf of maintainer; always open PRs for review +`DATABASE_URL`, `REDIS_HOST`, `SECRET_KEY`, `FIRST_SUPERUSER`, `BACKEND_CORS_ORIGINS` -### Branch and PR Conventions +### URLs -- PRs must target `main` branch -- Keep PR scope manageable (under ~400 lines when feasible) -- Include descriptive PR titles following conventional commit format -- Link related issues in PR description -- Ensure CI checks pass before requesting review +- Swagger: http://localhost:8000/docs +- ReDoc: http://localhost:8000/redoc +- Frontend: http://localhost:5173 -## First Tasks Checklist for New AI Agents +### Health Endpoints -When starting work on Ouroboros: +- `/api-info` - API metadata +- `/health` - Docker health check -1. **Setup**: Run `just install` to install dependencies -2. **Start Development**: Run `just docker-dev-up-watch` for fullstack environment -3. **Verify URLs**: - - (Swagger UI) - - (ReDoc) - - (Frontend) -4. **Read Documentation**: - - This AGENTS.md file (comprehensive agent rules) - - `.cursor/rules/` (project-specific patterns) - - Project README.md (overview and features) -5. **Choose Test Strategy**: Select smallest tier covering your changes -6. **API Compliance**: - - If touching `/api/v1/client/*`, validate against `contracts/v1_api_swagger.json` - - If touching Control API, ensure RFC9457 `application/problem+json` responses -7. **Validate Changes**: Run appropriate test suite before marking complete +--- -### Onboarding Verification +## Security Essentials -Before starting work, verify: +- HTTPS only in production +- No hardcoded secrets (use env vars) +- SQLAlchemy ORM only (no raw SQL) +- Pydantic validation for all input +- Never leak stack traces to clients -- [x] Development environment runs successfully -- [x] All documentation links are accessible -- [x] Test commands work correctly -- [x] Understanding of API compatibility requirements -- [x] Familiarity with protected files and directories -- [x] Knowledge of required libraries (loguru, cashews, datetime.UTC) +--- -## AI Agent Guidelines +## Storage (MinIO) -When working with this codebase: +Buckets: `wordlists/`, `rules/`, `masks/`, `charsets/`, `temp/` -1. **Follow Existing Patterns**: Match the established code organization and style -2. **Respect API Contracts**: Never break Agent API v1 compatibility -3. **Use Proper Tools**: Use the specified libraries (loguru, cashews, etc.) -4. **Validate Changes**: Always run appropriate test tier before completing tasks -5. **Security First**: Follow security guidelines for all code changes -6. **Test Thoroughly**: Write and run appropriate tests for all changes -7. **Document Changes**: Update relevant documentation when making changes -8. **No Direct Commits**: Always open PRs; never push directly to main or commit on behalf of maintainer +--- -### Common Pitfalls to Avoid +## Common Pitfalls -- Using standard Python `logging` instead of `loguru` -- Using `functools.lru_cache` instead of `cashews` +- Using stdlib `logging` instead of `loguru` +- Using `lru_cache` instead of `cashews` - Using `datetime.utcnow()` instead of `datetime.now(datetime.UTC)` - Modifying protected files without permission - Breaking Agent API v1 compatibility -- Skipping the appropriate test validation step -- Hard-coding secrets or configuration values -- Using deprecated Svelte patterns in frontend code -- Running `just ci-check` for verification-only tasks (no code changes) -- Pushing directly to `main` branch - -This AGENTS.md file serves as the definitive guide for AI agents working with Ouroboros. All code changes must comply with these standards and pass the programmatic checks before submission. +- Pushing directly to `main` +- Running `just ci-check` for verification-only tasks +- Using deprecated `$app/stores` in Svelte diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..43c994c2 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1 @@ +@AGENTS.md diff --git a/alembic/versions/20260210_151904_f65a83aa8920_add_paused_state_to_attackstate_enum.py b/alembic/versions/20260210_151904_f65a83aa8920_add_paused_state_to_attackstate_enum.py new file mode 100644 index 00000000..47bf59dd --- /dev/null +++ b/alembic/versions/20260210_151904_f65a83aa8920_add_paused_state_to_attackstate_enum.py @@ -0,0 +1,60 @@ +"""add_paused_state_to_attackstate_enum + +Revision ID: f65a83aa8920 +Revises: 1587d62b626a +Create Date: 2026-02-10 15:19:04.295213+00:00 + +""" + +from collections.abc import Sequence + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = "f65a83aa8920" +down_revision: str | None = "1587d62b626a" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Add 'PAUSED' value to attackstate enum. + + PostgreSQL enums are immutable, so we use ALTER TYPE to add a new value. + The 'PAUSED' state is inserted after 'RUNNING' to maintain logical ordering. + Note: The database stores enum names (PAUSED) not values (paused). + """ + op.execute("ALTER TYPE attackstate ADD VALUE IF NOT EXISTS 'PAUSED' AFTER 'RUNNING'") + + +def downgrade() -> None: + """Remove 'PAUSED' value from attackstate enum. + + Note: PostgreSQL does not support removing enum values directly. + This downgrade recreates the enum without the 'PAUSED' value, which requires: + 1. Creating a new enum type without 'PAUSED' + 2. Updating the column to use the new type + 3. Dropping the old type + 4. Renaming the new type + + WARNING: This will fail if any rows contain 'PAUSED' as their state value. + Those rows must be updated to a different state before downgrading. + """ + # Create new enum without 'PAUSED' + op.execute(""" + CREATE TYPE attackstate_new AS ENUM ( + 'PENDING', 'RUNNING', 'COMPLETED', 'FAILED', 'ABANDONED' + ) + """) + + # Update column to use new type (this will fail if any rows have 'PAUSED' state) + op.execute(""" + ALTER TABLE attacks + ALTER COLUMN state TYPE attackstate_new + USING state::text::attackstate_new + """) + + # Drop old enum and rename new one + op.execute("DROP TYPE attackstate") + op.execute("ALTER TYPE attackstate_new RENAME TO attackstate") diff --git a/app/api/v1/endpoints/agent/v1_http_exception_handler.py b/app/api/v1/endpoints/agent/v1_http_exception_handler.py index a84f711d..c5479fcf 100644 --- a/app/api/v1/endpoints/agent/v1_http_exception_handler.py +++ b/app/api/v1/endpoints/agent/v1_http_exception_handler.py @@ -1,15 +1,67 @@ +from typing import Any + from fastapi import HTTPException, Request, Response, status from fastapi.exception_handlers import http_exception_handler from fastapi.responses import JSONResponse +# Status code to title mapping for RFC9457 Problem Details +_STATUS_TITLES = { + 400: "Bad Request", + 401: "Unauthorized", + 403: "Forbidden", + 404: "Not Found", + 409: "Conflict", + 422: "Unprocessable Entity", + 500: "Internal Server Error", +} + + +def _build_rfc9457_response(request: Request, exc: HTTPException) -> JSONResponse: + """Build an RFC9457-compliant Problem Details response from an HTTPException.""" + title = _STATUS_TITLES.get(exc.status_code, "HTTP Error") + + # Build base problem response + problem: dict = { + "type": "about:blank", + "title": title, + "status": exc.status_code, + "instance": str(request.url.path), + } + + # Support problem extensions when detail is a dictionary + if isinstance(exc.detail, dict): + detail_dict: dict[str, Any] = exc.detail + # Merge extension fields, but preserve required RFC9457 fields + extensions = { + key: value + for key, value in detail_dict.items() + if key not in ("type", "title", "status", "instance") + } + problem.update(extensions) + # Always set detail: use 'detail' from dict if present, otherwise fallback to title + problem["detail"] = detail_dict.get("detail", title) + else: + problem["detail"] = str(exc.detail) if exc.detail else title + + return JSONResponse( + status_code=exc.status_code, + content=problem, + headers={"Content-Type": "application/problem+json"}, + ) + # --- V1 Error Envelope Handler --- # To enable strict v1 error envelope compliance, register this handler on the main FastAPI app for all /api/v1/client/* and /api/v1/agent/* endpoints. +# Also handles /api/v1/control/* endpoints with RFC9457 Problem Details format. async def v1_http_exception_handler(request: Request, exc: Exception) -> Response: # Only handle HTTPException, otherwise re-raise if not isinstance(exc, HTTPException): raise exc + # Handle Control API paths with RFC9457 Problem Details format + if request.url.path.startswith("/api/v1/control/"): + return _build_rfc9457_response(request, exc) + # Only handle if route is /api/v1/agent/* or /api/v1/client/* if not request.url.path.startswith( "/api/v1/agent/" diff --git a/app/api/v1/endpoints/control/resources.py b/app/api/v1/endpoints/control/resources.py new file mode 100644 index 00000000..e03028df --- /dev/null +++ b/app/api/v1/endpoints/control/resources.py @@ -0,0 +1,111 @@ +""" +Control API resources endpoints. + +The Control API uses API key authentication and RFC9457 error responses. +This module provides endpoints for resource management operations including +manual cancellation of pending uploads. +""" + +from typing import Annotated +from uuid import UUID + +from fastapi import APIRouter, Depends, Response, status +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.deps import get_current_control_user +from app.core.services.resource_service import cancel_pending_resource +from app.db.session import get_db +from app.models.user import User + +router = APIRouter(prefix="/resources", tags=["Control - Resources"]) + + +@router.delete( + "/{resource_id}/cancel", + status_code=status.HTTP_204_NO_CONTENT, + summary="Cancel pending resource upload", + description=( + "Cancel a pending resource upload that has not yet been completed. " + "This will delete the resource record from the database and remove any " + "associated storage objects. Only pending (non-uploaded) resources can be cancelled. " + "For already-uploaded resources, use the standard DELETE endpoint instead." + ), + responses={ + 204: {"description": "Resource cancelled successfully"}, + 400: { + "description": "Invalid resource state - resource is already uploaded", + "content": { + "application/problem+json": { + "example": { + "type": "invalid-resource-state", + "title": "Invalid Resource State", + "status": 400, + "detail": "Cannot cancel resource that is already uploaded. Use DELETE to remove uploaded resources.", + "instance": "/api/v1/control/resources/123e4567-e89b-12d3-a456-426614174000/cancel", + } + } + }, + }, + 403: { + "description": "User does not have access to the resource's project", + "content": { + "application/problem+json": { + "example": { + "type": "project-access-denied", + "title": "Project Access Denied", + "status": 403, + "detail": "User does not have access to project 123", + "instance": "/api/v1/control/resources/123e4567-e89b-12d3-a456-426614174000/cancel", + } + } + }, + }, + 404: { + "description": "Resource not found", + "content": { + "application/problem+json": { + "example": { + "type": "resource-not-found", + "title": "Resource Not Found", + "status": 404, + "detail": "Resource 123e4567-e89b-12d3-a456-426614174000 not found", + "instance": "/api/v1/control/resources/123e4567-e89b-12d3-a456-426614174000/cancel", + } + } + }, + }, + }, +) +async def cancel_pending_resource_upload( + resource_id: UUID, + db: Annotated[AsyncSession, Depends(get_db)], + current_user: Annotated[User, Depends(get_current_control_user)], +) -> Response: + """ + Cancel a pending resource upload. + + This endpoint allows users to manually cancel a resource upload that is still + in the pending state (not yet marked as uploaded). This is useful for cleaning + up abandoned uploads without waiting for the automatic cleanup job. + + The operation will: + 1. Validate the user has access to the resource's project + 2. Verify the resource is in pending state (is_uploaded=False) + 3. Delete any associated object from MinIO storage + 4. Delete the resource record from the database + + Args: + resource_id: UUID of the resource to cancel. + db: Database session (injected). + current_user: Authenticated user from API key (injected). + + Returns: + Response: 204 No Content on success. + + Raises: + ResourceNotFoundError: If resource doesn't exist. + ProjectAccessDeniedError: If user lacks project access. + InvalidResourceStateError: If resource is already uploaded. + """ + await cancel_pending_resource(resource_id, db, current_user) + return Response(status_code=status.HTTP_204_NO_CONTENT) diff --git a/app/api/v1/endpoints/control/router.py b/app/api/v1/endpoints/control/router.py index 5144924e..94b29291 100644 --- a/app/api/v1/endpoints/control/router.py +++ b/app/api/v1/endpoints/control/router.py @@ -3,6 +3,7 @@ from app.api.v1.endpoints.control.campaigns import router as campaigns_router from app.api.v1.endpoints.control.hash_guess import router as hash_guess_router from app.api.v1.endpoints.control.projects import router as projects_router +from app.api.v1.endpoints.control.resources import router as resources_router from app.api.v1.endpoints.control.system import router as system_router from app.api.v1.endpoints.control.users import router as users_router @@ -15,5 +16,6 @@ router.include_router(campaigns_router) router.include_router(hash_guess_router) router.include_router(projects_router) +router.include_router(resources_router) router.include_router(system_router) router.include_router(users_router) diff --git a/app/api/v1/endpoints/web/attacks.py b/app/api/v1/endpoints/web/attacks.py index 9e03a8ec..43594ad4 100644 --- a/app/api/v1/endpoints/web/attacks.py +++ b/app/api/v1/endpoints/web/attacks.py @@ -43,8 +43,9 @@ get_campaign_attack_table_fragment_service, update_attack_service, ) +from app.core.state_machines import AttackStateMachine, InvalidStateTransitionError from app.db.session import get_db -from app.models.attack import Attack +from app.models.attack import Attack, AttackState from app.models.campaign import Campaign from app.models.user import User from app.schemas.attack import ( @@ -516,6 +517,16 @@ async def delete_attack( result = await delete_attack_service(attack_id, db) except AttackNotFoundError as e: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) from e + except InvalidStateTransitionError as e: + if isinstance(e.from_state, AttackState): + valid_actions = AttackStateMachine.get_valid_actions(e.from_state) + detail = f"Cannot abort attack from state '{e.from_state.value}'. Valid actions: {valid_actions}" + else: + detail = f"Cannot abort attack from state '{e.from_state.value}'." + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=detail, + ) from e return result diff --git a/app/api/v1/endpoints/web/campaigns.py b/app/api/v1/endpoints/web/campaigns.py index 99b020f3..7ffc288b 100644 --- a/app/api/v1/endpoints/web/campaigns.py +++ b/app/api/v1/endpoints/web/campaigns.py @@ -49,6 +49,7 @@ stop_campaign_service, update_campaign_service, ) +from app.core.state_machines import CampaignStateMachine, InvalidStateTransitionError from app.db.session import get_db from app.models.project import Project from app.models.user import User @@ -236,6 +237,18 @@ async def start_campaign( raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) from e except PermissionError as e: raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=str(e)) from e + except InvalidStateTransitionError as e: + from app.models.campaign import CampaignState + + if isinstance(e.from_state, CampaignState): + valid_actions = CampaignStateMachine.get_valid_actions(e.from_state) + detail = f"Cannot start campaign from state '{e.from_state.value}'. Valid actions: {valid_actions}" + else: + detail = f"Cannot start campaign from state '{e.from_state.value}'." + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=detail, + ) from e except ValueError as e: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=str(e) @@ -259,6 +272,18 @@ async def stop_campaign( return await stop_campaign_service(campaign_id, db) except CampaignNotFoundError as e: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) from e + except InvalidStateTransitionError as e: + from app.models.campaign import CampaignState + + if isinstance(e.from_state, CampaignState): + valid_actions = CampaignStateMachine.get_valid_actions(e.from_state) + detail = f"Cannot stop campaign from state '{e.from_state.value}'. Valid actions: {valid_actions}" + else: + detail = f"Cannot stop campaign from state '{e.from_state.value}'." + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=detail, + ) from e @router.get( @@ -402,6 +427,18 @@ async def archive_campaign( return Response(status_code=status.HTTP_204_NO_CONTENT) except CampaignNotFoundError as e: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) from e + except InvalidStateTransitionError as e: + from app.models.campaign import CampaignState + + if isinstance(e.from_state, CampaignState): + valid_actions = CampaignStateMachine.get_valid_actions(e.from_state) + detail = f"Cannot archive campaign from state '{e.from_state.value}'. Valid actions: {valid_actions}" + else: + detail = f"Cannot archive campaign from state '{e.from_state.value}'." + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=detail, + ) from e @router.post( diff --git a/app/core/config.py b/app/core/config.py index 64e9cefc..c7b90e9c 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -193,6 +193,18 @@ class Settings(BaseSettings): description="Timeout in seconds for background verification of resource uploads. If the file is not uploaded within this time, the resource is deleted. Tests should override this to a low value.", ) + # Resource Cleanup Job + RESOURCE_CLEANUP_INTERVAL_HOURS: int = Field( + default=1, + ge=1, + description="Interval in hours for periodic resource cleanup job", + ) + RESOURCE_CLEANUP_AGE_HOURS: int = Field( + default=24, + ge=1, + description="Age in hours after which pending resources are cleaned up", + ) + # MinIO S3-Compatible Storage MINIO_ENDPOINT: str = Field( default="minio:9000", diff --git a/app/core/control_exceptions.py b/app/core/control_exceptions.py index 1f5c9ab7..11cee656 100644 --- a/app/core/control_exceptions.py +++ b/app/core/control_exceptions.py @@ -103,3 +103,70 @@ class InternalServerError(ServerProblem): """Internal server error.""" title = "Internal Server Error" + + +class InvalidResourceStateError(BadRequestProblem): + """Invalid resource state error. + + Raised when an operation is attempted on a resource in an incompatible state, + such as cancelling an already-uploaded resource. Returns HTTP 400 Bad Request. + """ + + title = "Invalid Resource State" + + +class InvalidStateTransitionProblem(ConflictProblem): + """Invalid state transition error with RFC9457 Problem Details. + + Provides detailed error information including current state, attempted state, + and valid transitions for machine-readable error handling. + + Uses HTTP 409 Conflict since this represents a conflict between the current + resource state and the requested action. + """ + + title = "Invalid State Transition" + + # Instance attributes for type checking + current_state: str + attempted_state: str + entity_type: str + action: str | None + valid_transitions: list[str] | None + + def __init__( + self, + from_state: str, + to_state: str, + action: str | None = None, + entity_type: str = "entity", + valid_transitions: list[str] | None = None, + ) -> None: + """Initialize the InvalidStateTransitionProblem. + + Args: + from_state: The current state value. + to_state: The attempted target state value. + action: Optional user action that triggered the transition attempt. + entity_type: Type of entity (e.g., "campaign", "attack"). + valid_transitions: Optional list of valid target states from current state. + """ + if action: + detail = ( + f"Cannot perform action '{action}' on {entity_type}: " + f"transition from '{from_state}' to '{to_state}' is not allowed." + ) + else: + detail = f"Invalid {entity_type} state transition from '{from_state}' to '{to_state}'." + + if valid_transitions: + detail += f" Valid transitions from '{from_state}': {valid_transitions}." + + super().__init__(detail=detail) + + # Set instance attributes for extension fields + self.current_state = from_state + self.attempted_state = to_state + self.entity_type = entity_type + self.action = action + self.valid_transitions = valid_transitions diff --git a/app/core/control_rfc9457_middleware.py b/app/core/control_rfc9457_middleware.py index 87ed431c..e8b246b0 100644 --- a/app/core/control_rfc9457_middleware.py +++ b/app/core/control_rfc9457_middleware.py @@ -2,13 +2,17 @@ RFC9457 middleware for Control API routes only. This middleware intercepts exceptions on Control API routes (/api/v1/control/*) -and converts them to RFC9457-compliant problem details responses. +and converts them to RFC9457-compliant problem details responses. Supports +extension fields for InvalidStateTransitionProblem errors (current_state, +attempted_state, action, entity_type, valid_transitions). """ from collections.abc import Awaitable, Callable +from typing import Any -from fastapi import Request, Response +from fastapi import HTTPException, Request, Response from fastapi.responses import JSONResponse +from loguru import logger from starlette.middleware.base import BaseHTTPMiddleware from app.core.control_exceptions import ( @@ -22,6 +26,8 @@ InvalidAttackConfigError, InvalidHashFormatError, InvalidResourceFormatError, + InvalidResourceStateError, + InvalidStateTransitionProblem, ProjectAccessDeniedError, ProjectNotFoundError, ResourceNotFoundError, @@ -30,6 +36,17 @@ UserNotFoundError, ) +# Status code to title mapping for RFC9457 Problem Details +_STATUS_TITLES = { + 400: "Bad Request", + 401: "Unauthorized", + 403: "Forbidden", + 404: "Not Found", + 409: "Conflict", + 422: "Unprocessable Entity", + 500: "Internal Server Error", +} + class ControlRFC9457Middleware(BaseHTTPMiddleware): """Middleware that applies RFC9457 error handling only to Control API routes.""" @@ -45,6 +62,26 @@ async def dispatch( try: return await call_next(request) + except InvalidStateTransitionProblem as exc: + # Convert InvalidStateTransitionProblem to RFC9457 format with extension fields + content: dict = { + "type": exc.type, + "title": exc.title, + "status": exc.status_code, + "detail": exc.detail, + "instance": str(request.url.path), + # Extension fields - always present on InvalidStateTransitionProblem + "current_state": exc.current_state, + "attempted_state": exc.attempted_state, + "action": exc.action, + "entity_type": exc.entity_type, + "valid_transitions": exc.valid_transitions, + } + return JSONResponse( + status_code=exc.status_code, + content=content, + headers={"Content-Type": "application/problem+json"}, + ) except ( CampaignNotFoundError, AttackNotFoundError, @@ -59,6 +96,7 @@ async def dispatch( InvalidAttackConfigError, InvalidHashFormatError, InvalidResourceFormatError, + InvalidResourceStateError, InsufficientPermissionsError, InternalServerError, ProjectAccessDeniedError, @@ -75,6 +113,42 @@ async def dispatch( }, headers={"Content-Type": "application/problem+json"}, ) + except HTTPException as exc: + # Convert HTTPException to RFC9457 format for Control API routes + title = _STATUS_TITLES.get(exc.status_code, "HTTP Error") + + # Build base problem response + problem: dict = { + "type": "about:blank", + "title": title, + "status": exc.status_code, + "instance": str(request.url.path), + } + + # Support problem extensions when detail is a dictionary + if isinstance(exc.detail, dict): + detail_dict: dict[str, Any] = exc.detail + # Merge extension fields, but preserve required RFC9457 fields + extensions = { + key: value + for key, value in detail_dict.items() + if key not in ("type", "title", "status", "instance") + } + problem.update(extensions) + # Always set detail: use 'detail' from dict if present, otherwise fallback to title + problem["detail"] = detail_dict.get("detail", title) + else: + problem["detail"] = str(exc.detail) if exc.detail else title + + return JSONResponse( + status_code=exc.status_code, + content=problem, + headers={"Content-Type": "application/problem+json"}, + ) + except Exception: # Let other exceptions bubble up to be handled by existing handlers + logger.debug( + "Unhandled exception in Control API middleware, propagating to default handlers" + ) raise diff --git a/app/core/services/attack_service.py b/app/core/services/attack_service.py index 0aa409bd..6d60bfa7 100644 --- a/app/core/services/attack_service.py +++ b/app/core/services/attack_service.py @@ -9,6 +9,7 @@ from app.core.exceptions import InvalidAgentTokenError from app.core.services.attack_complexity_service import AttackEstimationService +from app.core.state_machines import AttackStateMachine from app.models.agent import Agent from app.models.attack import Attack, AttackMode, AttackState from app.models.attack_resource_file import AttackResourceFile, AttackResourceType @@ -1082,9 +1083,10 @@ async def _delete_ephemeral_resource( == AttackResourceType.EPHEMERAL_RULE_LIST ): await db.delete(rule_resource) - except (ValueError, TypeError, AttributeError) as exc: - logger.warning( - f"Failed to delete ephemeral rule resource for attack {attack_id}: {exc}" + except ValueError: + # Invalid UUID string format - not a resource reference, skip cleanup + logger.debug( + f"left_rule is not a valid UUID for attack {attack_id}, skipping ephemeral cleanup" ) # Store campaign_id for SSE trigger @@ -1100,6 +1102,8 @@ async def _delete_ephemeral_resource( return {"id": attack_id, "deleted": True} # If the attack has started, mark as abandoned and stop tasks + # Validate state transition using state machine (raises InvalidStateTransitionError if invalid) + AttackStateMachine.validate_action(attack.state, "abort") attack.state = AttackState.ABANDONED # Stop all tasks for this attack if hasattr(attack, "tasks") and attack.tasks: diff --git a/app/core/services/campaign_service.py b/app/core/services/campaign_service.py index a8583031..439e0b80 100644 --- a/app/core/services/campaign_service.py +++ b/app/core/services/campaign_service.py @@ -7,6 +7,7 @@ from sqlalchemy.orm import selectinload from app.core.services.attack_complexity_service import calculate_attack_complexity +from app.core.state_machines import CampaignStateMachine from app.models.agent import Agent, AgentState from app.models.attack import Attack, AttackState from app.models.campaign import Campaign, CampaignState @@ -68,10 +69,6 @@ async def list_campaigns_service( Returns: tuple[list[CampaignRead], int]: A tuple containing the list of campaigns and the total number of campaigns - - Raises: - CampaignNotFoundError: if campaign does not exist - HTTPException: if campaign is archived """ stmt = select(Campaign).where(Campaign.state != CampaignState.ARCHIVED) stmt = stmt.where( @@ -451,7 +448,7 @@ async def start_campaign_service(campaign_id: int, db: AsyncSession) -> Campaign CampaignRead: The updated campaign Raises: CampaignNotFoundError: if campaign does not exist - HTTPException: if campaign is archived + InvalidStateTransitionError: if transition is not valid """ result = await db.execute(select(Campaign).where(Campaign.id == campaign_id)) campaign = result.scalar_one_or_none() @@ -460,10 +457,10 @@ async def start_campaign_service(campaign_id: int, db: AsyncSession) -> Campaign if campaign.state == CampaignState.ACTIVE: logger.info(f"Campaign {campaign_id} is already active.") return CampaignRead.model_validate(campaign, from_attributes=True) - if campaign.state == CampaignState.ARCHIVED: - raise HTTPException( - status_code=400, detail="Cannot start an archived campaign." - ) + + # Validate state transition using state machine (raises InvalidStateTransitionError if invalid) + CampaignStateMachine.validate_action(campaign.state, "start") + campaign.state = CampaignState.ACTIVE await db.commit() await db.refresh(campaign) @@ -487,7 +484,7 @@ async def stop_campaign_service(campaign_id: int, db: AsyncSession) -> CampaignR CampaignRead: The updated campaign Raises: CampaignNotFoundError: if campaign does not exist - HTTPException: if campaign is archived + InvalidStateTransitionError: if transition is not valid """ result = await db.execute(select(Campaign).where(Campaign.id == campaign_id)) campaign = result.scalar_one_or_none() @@ -496,8 +493,10 @@ async def stop_campaign_service(campaign_id: int, db: AsyncSession) -> CampaignR if campaign.state == CampaignState.DRAFT: logger.info(f"Campaign {campaign_id} is already stopped (draft state).") return CampaignRead.model_validate(campaign, from_attributes=True) - if campaign.state == CampaignState.ARCHIVED: - raise HTTPException(status_code=400, detail="Cannot stop an archived campaign.") + + # Validate state transition using state machine (raises InvalidStateTransitionError if invalid) + CampaignStateMachine.validate_action(campaign.state, "stop") + campaign.state = CampaignState.DRAFT await db.commit() await db.refresh(campaign) @@ -580,7 +579,7 @@ async def archive_campaign_service(campaign_id: int, db: AsyncSession) -> Campai CampaignRead: The updated campaign Raises: CampaignNotFoundError: if campaign does not exist - HTTPException: if campaign is archived + InvalidStateTransitionError: if transition is not valid """ result = await db.execute(select(Campaign).where(Campaign.id == campaign_id)) campaign = result.scalar_one_or_none() @@ -588,6 +587,10 @@ async def archive_campaign_service(campaign_id: int, db: AsyncSession) -> Campai raise CampaignNotFoundError(f"Campaign {campaign_id} not found") if campaign.state == CampaignState.ARCHIVED: return CampaignRead.model_validate(campaign, from_attributes=True) + + # Validate state transition using state machine (raises InvalidStateTransitionError if invalid) + CampaignStateMachine.validate_action(campaign.state, "archive") + campaign.state = CampaignState.ARCHIVED await db.commit() await db.refresh(campaign) diff --git a/app/core/services/resource_service.py b/app/core/services/resource_service.py index b7e2c915..45aa62de 100644 --- a/app/core/services/resource_service.py +++ b/app/core/services/resource_service.py @@ -1661,3 +1661,165 @@ async def delete_upload_service( await db.commit() logger.info(f"Successfully deleted upload {upload_id} and all associated resources") + + +async def cleanup_stale_resource( + resource: AttackResourceFile, db: AsyncSession +) -> bool: + """ + Clean up a single stale pending resource. + + Deletes the resource from MinIO storage (if exists) and the database. + Uses structured logging for observability. + + Args: + resource: The AttackResourceFile to clean up + db: Database session (should already have the resource locked) + + Returns: + True if cleanup was successful, False if an error occurred + """ + from datetime import UTC, datetime + + resource_id = str(resource.id) + age_hours = (datetime.now(UTC) - resource.created_at).total_seconds() / 3600 + + try: + storage_service = get_storage_service() + bucket = settings.MINIO_BUCKET + + # Check if object exists in MinIO and delete if so + try: + await asyncio.to_thread( + storage_service.client.stat_object, bucket, resource_id + ) + # Object exists, delete it + await asyncio.to_thread( + storage_service.client.remove_object, bucket, resource_id + ) + logger.bind( + resource_id=resource_id, + age_hours=round(age_hours, 2), + project_id=resource.project_id, + storage_deleted=True, + ).info("Deleted stale resource from MinIO storage") + except S3Error as e: + if "NoSuchKey" in str(e) or "not found" in str(e).lower(): + # Object doesn't exist, that's fine + logger.bind( + resource_id=resource_id, + age_hours=round(age_hours, 2), + project_id=resource.project_id, + storage_deleted=False, + ).debug( + "Stale resource not found in MinIO (already deleted or never uploaded)" + ) + else: + raise + + # Delete database record + await db.delete(resource) + logger.bind( + resource_id=resource_id, + age_hours=round(age_hours, 2), + project_id=resource.project_id, + outcome="deleted", + ).info("Cleaned up stale pending resource") + + return True + + except asyncio.CancelledError: + # Re-raise to allow graceful task cancellation + raise + except (S3Error, SQLAlchemyError, OSError, ConnectionError) as e: + # Catch specific storage/database/network errors, not programming bugs + logger.bind( + resource_id=resource_id, + age_hours=round(age_hours, 2), + project_id=resource.project_id, + error=str(e), + error_type=type(e).__name__, + ).error("Failed to clean up stale pending resource") + await db.rollback() + return False + + +async def cancel_pending_resource( + resource_id: UUID, db: AsyncSession, current_user: User +) -> None: + """ + Cancel a pending resource upload. + + Validates user access, checks resource state, and performs cleanup. + Raises appropriate RFC9457 exceptions for error cases. + + Args: + resource_id: The UUID of the resource to cancel + db: Database session + current_user: The authenticated user making the request + + Raises: + ResourceNotFoundError: If resource doesn't exist + ProjectAccessDeniedError: If user doesn't have access to the resource's project + InvalidResourceStateError: If resource is already uploaded + """ + # Get the resource with row lock to prevent race conditions + from sqlalchemy import select + + from app.core.control_exceptions import ( + InvalidResourceStateError, + ProjectAccessDeniedError, + ResourceNotFoundError, + ) + + result = await db.execute( + select(AttackResourceFile) + .where(AttackResourceFile.id == resource_id) + .with_for_update() + ) + resource = result.scalar_one_or_none() + if not resource: + raise ResourceNotFoundError(detail=f"Resource {resource_id} not found") + + # Check project access + # For unrestricted resources (project_id=None), require superuser access + if resource.project_id is None: + if not current_user.is_superuser: + raise ProjectAccessDeniedError( + detail="Only superusers can cancel unrestricted resources" + ) + else: + accessible_projects = [ + assoc.project_id for assoc in (current_user.project_associations or []) + ] + if ( + resource.project_id not in accessible_projects + and not current_user.is_superuser + ): + raise ProjectAccessDeniedError( + detail=f"User does not have access to project {resource.project_id}" + ) + + # Check resource state + if resource.is_uploaded: + raise InvalidResourceStateError( + detail="Cannot cancel resource that is already uploaded. Use DELETE to remove uploaded resources." + ) + + # Perform cleanup + success = await cleanup_stale_resource(resource, db) + if not success: + from app.core.control_exceptions import InternalServerError + + raise InternalServerError(detail="Failed to clean up resource") + + # Commit the deletion + await db.commit() + + # Log the manual cancellation (use user_id instead of email for PII compliance) + logger.bind( + resource_id=str(resource_id), + user_id=current_user.id, + project_id=resource.project_id, + action="manual_cancel", + ).info("Resource upload cancelled by user") diff --git a/app/core/state_machines.py b/app/core/state_machines.py new file mode 100644 index 00000000..f01d2571 --- /dev/null +++ b/app/core/state_machines.py @@ -0,0 +1,423 @@ +"""State machine classes for Campaign and Attack state transitions. + +This module provides declarative state machines that encapsulate valid state transitions +and provide clear validation. The state machines distinguish between user-initiated actions +(start, stop, pause, resume, archive, unarchive) and system-driven transitions (automatic +completion, failure detection). + +State Transition Diagram for Campaign: + +```mermaid +stateDiagram-v2 + [*] --> DRAFT + DRAFT --> ACTIVE: start + ACTIVE --> PAUSED: pause + PAUSED --> ACTIVE: resume + ACTIVE --> COMPLETED: system_completes + ACTIVE --> DRAFT: stop + DRAFT --> ARCHIVED: archive + ACTIVE --> ARCHIVED: archive + PAUSED --> ARCHIVED: archive + COMPLETED --> ARCHIVED: archive + ARCHIVED --> DRAFT: unarchive + ERROR --> DRAFT: reset +``` + +State Transition Diagram for Attack: + +```mermaid +stateDiagram-v2 + [*] --> PENDING + PENDING --> RUNNING: start + RUNNING --> PAUSED: pause + PAUSED --> RUNNING: resume + RUNNING --> COMPLETED: system_completes + RUNNING --> FAILED: system_fails + RUNNING --> ABANDONED: abort + PAUSED --> ABANDONED: abort + FAILED --> PENDING: retry + PENDING --> ABANDONED: abandon + ABANDONED --> PENDING: reactivate + COMPLETED --> [*] +``` + +Usage Example (Service Layer Integration): + +```python +from app.core.state_machines import ( + CampaignStateMachine, + InvalidStateTransitionError, +) +from app.core.control_exceptions import ( + InvalidStateTransitionProblem, +) + + +async def start_campaign_service( + campaign_id: int, + db: AsyncSession, +) -> CampaignRead: + campaign = await get_campaign( + campaign_id, + db, + ) + + try: + CampaignStateMachine.validate_transition( + campaign.state, + CampaignState.ACTIVE, + action="start", + ) + except InvalidStateTransitionError as e: + # For Control API, convert to RFC9457 + raise InvalidStateTransitionProblem( + from_state=e.from_state, + to_state=e.to_state, + action=e.action, + entity_type="campaign", + ) + + campaign.state = CampaignState.ACTIVE + await ( + db.commit() + ) + return campaign +``` + +See also: Tech Plan spec for architectural context on state management. +""" + +from typing import ClassVar + +from app.models.attack import AttackState +from app.models.campaign import CampaignState + + +class InvalidStateTransitionError(Exception): + """Exception raised when an invalid state transition is attempted. + + Attributes: + from_state: The current state before the attempted transition. + to_state: The target state of the attempted transition. + action: Optional user action that triggered the transition attempt. + message: Descriptive error message. + """ + + def __init__( + self, + from_state: CampaignState | AttackState, + to_state: CampaignState | AttackState, + action: str | None = None, + ) -> None: + self.from_state = from_state + self.to_state = to_state + self.action = action + + if action: + self.message = ( + f"Cannot perform action '{action}': transition from " + f"'{from_state.value}' to '{to_state.value}' is not allowed" + ) + else: + self.message = f"Invalid state transition from '{from_state.value}' to '{to_state.value}'" + + super().__init__(self.message) + + +class CampaignStateMachine: + """State machine for Campaign state transitions. + + This class provides validation for campaign state transitions, supporting both + user-initiated actions (start, stop, pause, resume, archive, unarchive) and + system-driven transitions (automatic completion). + + Valid Transitions: + - DRAFT -> ACTIVE (start), ARCHIVED (archive) + - ACTIVE -> PAUSED (pause), DRAFT (stop), ARCHIVED (archive), COMPLETED (system) + - PAUSED -> ACTIVE (resume), ARCHIVED (archive) + - COMPLETED -> ARCHIVED (archive) + - ARCHIVED -> DRAFT (unarchive) + - ERROR -> DRAFT (reset) + """ + + # Mapping of each state to its valid target states + TRANSITIONS: ClassVar[dict[CampaignState, list[CampaignState]]] = { + CampaignState.DRAFT: [CampaignState.ACTIVE, CampaignState.ARCHIVED], + CampaignState.ACTIVE: [ + CampaignState.PAUSED, + CampaignState.DRAFT, + CampaignState.ARCHIVED, + CampaignState.COMPLETED, + ], + CampaignState.PAUSED: [CampaignState.ACTIVE, CampaignState.ARCHIVED], + CampaignState.COMPLETED: [CampaignState.ARCHIVED], + CampaignState.ARCHIVED: [CampaignState.DRAFT], + CampaignState.ERROR: [CampaignState.DRAFT], + } + + # Mapping of user actions to (from_state, to_state) pairs + ACTIONS: ClassVar[dict[str, dict[CampaignState, CampaignState]]] = { + "start": {CampaignState.DRAFT: CampaignState.ACTIVE}, + "stop": {CampaignState.ACTIVE: CampaignState.DRAFT}, + "pause": {CampaignState.ACTIVE: CampaignState.PAUSED}, + "resume": {CampaignState.PAUSED: CampaignState.ACTIVE}, + "archive": { + CampaignState.DRAFT: CampaignState.ARCHIVED, + CampaignState.ACTIVE: CampaignState.ARCHIVED, + CampaignState.PAUSED: CampaignState.ARCHIVED, + CampaignState.COMPLETED: CampaignState.ARCHIVED, + }, + "unarchive": {CampaignState.ARCHIVED: CampaignState.DRAFT}, + "reset": {CampaignState.ERROR: CampaignState.DRAFT}, + } + + @classmethod + def can_transition(cls, from_state: CampaignState, to_state: CampaignState) -> bool: + """Check if a state transition is valid. + + Args: + from_state: The current state. + to_state: The target state. + + Returns: + True if the transition is valid, False otherwise. + """ + valid_targets = cls.TRANSITIONS.get(from_state, []) + return to_state in valid_targets + + @classmethod + def validate_transition( + cls, + from_state: CampaignState, + to_state: CampaignState, + action: str | None = None, + ) -> None: + """Validate a state transition and raise an error if invalid. + + Args: + from_state: The current state. + to_state: The target state. + action: Optional user action that triggered the transition. + + Raises: + InvalidStateTransitionError: If the transition is not valid. + """ + if not cls.can_transition(from_state, to_state): + raise InvalidStateTransitionError(from_state, to_state, action) + + @classmethod + def validate_action( + cls, current_state: CampaignState, action: str + ) -> CampaignState: + """Validate a user action against the current state. + + Args: + current_state: The current campaign state. + action: The user action to validate (start, stop, pause, resume, archive, unarchive, reset). + + Returns: + The target state for the action. + + Raises: + InvalidStateTransitionError: If the action is not valid for the current state. + """ + action_map = cls.ACTIONS.get(action) + if action_map is None: + raise InvalidStateTransitionError( + current_state, + current_state, + action=action, + ) + + target_state = action_map.get(current_state) + if target_state is None: + # Find any valid target for this action to provide in error + valid_targets = list(action_map.values()) + target_for_error = valid_targets[0] if valid_targets else current_state + raise InvalidStateTransitionError(current_state, target_for_error, action) + + return target_state + + @classmethod + def get_valid_transitions(cls, from_state: CampaignState) -> list[CampaignState]: + """Get all valid target states from a given state. + + Args: + from_state: The current state. + + Returns: + List of valid target states. + """ + return cls.TRANSITIONS.get(from_state, []) + + @classmethod + def get_valid_actions(cls, from_state: CampaignState) -> list[str]: + """Get all valid actions from a given state. + + Args: + from_state: The current state. + + Returns: + List of valid action names. + """ + return [ + action + for action, state_map in cls.ACTIONS.items() + if from_state in state_map + ] + + +class AttackStateMachine: + """State machine for Attack state transitions. + + This class provides validation for attack state transitions, supporting both + user-initiated actions (start, pause, resume, retry, abandon, abort, reactivate) + and system-driven transitions (completion, failure). + + Valid Transitions: + - PENDING -> RUNNING (start), ABANDONED (abandon) + - RUNNING -> PAUSED (pause), COMPLETED (system), FAILED (system), ABANDONED (abort) + - PAUSED -> RUNNING (resume), ABANDONED (abort) + - COMPLETED -> (terminal state, no outgoing transitions) + - FAILED -> PENDING (retry) + - ABANDONED -> PENDING (reactivate) + """ + + # Mapping of each state to its valid target states + TRANSITIONS: ClassVar[dict[AttackState, list[AttackState]]] = { + AttackState.PENDING: [AttackState.RUNNING, AttackState.ABANDONED], + AttackState.RUNNING: [ + AttackState.PAUSED, + AttackState.COMPLETED, + AttackState.FAILED, + AttackState.ABANDONED, + ], + AttackState.PAUSED: [AttackState.RUNNING, AttackState.ABANDONED], + AttackState.COMPLETED: [], # Terminal state + AttackState.FAILED: [AttackState.PENDING], + AttackState.ABANDONED: [AttackState.PENDING], + } + + # Mapping of user actions to (from_state, to_state) pairs + ACTIONS: ClassVar[dict[str, dict[AttackState, AttackState]]] = { + "start": {AttackState.PENDING: AttackState.RUNNING}, + "pause": {AttackState.RUNNING: AttackState.PAUSED}, + "resume": {AttackState.PAUSED: AttackState.RUNNING}, + "retry": {AttackState.FAILED: AttackState.PENDING}, + "abandon": {AttackState.PENDING: AttackState.ABANDONED}, + "abort": { + AttackState.RUNNING: AttackState.ABANDONED, + AttackState.PAUSED: AttackState.ABANDONED, + }, + "reactivate": {AttackState.ABANDONED: AttackState.PENDING}, + } + + @classmethod + def can_transition(cls, from_state: AttackState, to_state: AttackState) -> bool: + """Check if a state transition is valid. + + Args: + from_state: The current state. + to_state: The target state. + + Returns: + True if the transition is valid, False otherwise. + """ + valid_targets = cls.TRANSITIONS.get(from_state, []) + return to_state in valid_targets + + @classmethod + def validate_transition( + cls, + from_state: AttackState, + to_state: AttackState, + action: str | None = None, + ) -> None: + """Validate a state transition and raise an error if invalid. + + Args: + from_state: The current state. + to_state: The target state. + action: Optional user action that triggered the transition. + + Raises: + InvalidStateTransitionError: If the transition is not valid. + """ + if not cls.can_transition(from_state, to_state): + raise InvalidStateTransitionError(from_state, to_state, action) + + @classmethod + def validate_action(cls, current_state: AttackState, action: str) -> AttackState: + """Validate a user action against the current state. + + Args: + current_state: The current attack state. + action: The user action to validate (start, pause, resume, retry, abandon, abort, reactivate). + + Returns: + The target state for the action. + + Raises: + InvalidStateTransitionError: If the action is not valid for the current state. + """ + action_map = cls.ACTIONS.get(action) + if action_map is None: + raise InvalidStateTransitionError( + current_state, + current_state, + action=action, + ) + + target_state = action_map.get(current_state) + if target_state is None: + # Find any valid target for this action to provide in error + valid_targets = list(action_map.values()) + target_for_error = valid_targets[0] if valid_targets else current_state + raise InvalidStateTransitionError(current_state, target_for_error, action) + + return target_state + + @classmethod + def get_valid_transitions(cls, from_state: AttackState) -> list[AttackState]: + """Get all valid target states from a given state. + + Args: + from_state: The current state. + + Returns: + List of valid target states. + """ + return cls.TRANSITIONS.get(from_state, []) + + @classmethod + def get_valid_actions(cls, from_state: AttackState) -> list[str]: + """Get all valid actions from a given state. + + Args: + from_state: The current state. + + Returns: + List of valid action names. + """ + return [ + action + for action, state_map in cls.ACTIONS.items() + if from_state in state_map + ] + + @classmethod + def is_terminal_state(cls, state: AttackState) -> bool: + """Check if a state is terminal (no outgoing transitions). + + Args: + state: The state to check. + + Returns: + True if the state is terminal, False otherwise. + + Raises: + ValueError: If the state is not a known AttackState. + """ + if state not in cls.TRANSITIONS: + raise ValueError(f"Unknown attack state: {state}") + return len(cls.TRANSITIONS[state]) == 0 diff --git a/app/core/tasks/resource_tasks.py b/app/core/tasks/resource_tasks.py index 1972c126..9739009d 100644 --- a/app/core/tasks/resource_tasks.py +++ b/app/core/tasks/resource_tasks.py @@ -1,4 +1,5 @@ import asyncio +from datetime import UTC, datetime, timedelta from minio.error import S3Error from sqlalchemy import delete @@ -18,8 +19,13 @@ async def verify_upload_and_cleanup( """ Background task to verify if the file was uploaded to MinIO. If not, delete the resource from DB. TODO: Upgrade to Celery when Redis is available. + + Note: This function is intentionally conservative - it only cleans up if it can confirm + the file exists in MinIO. On any error, it returns without cleanup to avoid deleting + resources that might still be uploading. """ await asyncio.sleep(timeout_seconds) + # Re-check DB in case resource was verified async with db as session: result = await session.execute( @@ -27,19 +33,26 @@ async def verify_upload_and_cleanup( ) resource_obj = result.scalar_one_or_none() if not resource_obj: - return # Already deleted or verified + logger.debug( + f"Resource {resource_id} already deleted or verified. Skipping cleanup." + ) + return if getattr(resource_obj, "is_uploaded", False): logger.info( f"Resource {resource_id} already marked as uploaded. Skipping cleanup." ) return + storage_service = get_storage_service() bucket = settings.MINIO_BUCKET try: exists = await storage_service.bucket_exists(bucket) if not exists: - logger.error(f"Bucket {bucket} does not exist") - return # Bucket gone, nothing to do + logger.error( + f"Bucket {bucket} does not exist. Cannot verify upload for {resource_id}." + ) + return + # Try to get object try: obj = storage_service.client.stat_object(bucket, str(resource_id)) @@ -47,16 +60,161 @@ async def verify_upload_and_cleanup( logger.info( f"File {resource_id} exists in MinIO, appears to be uploaded successfully. Skipping cleanup." ) - return # File exists, do nothing + return except (S3Error, OSError) as e: - logger.error(f"Error checking file existence in MinIO: {e}") - return # File not found, nothing to do - # Delete resource from DB + # On any storage error, return without cleanup - the file might still be uploading + logger.bind( + resource_id=resource_id, + error=str(e), + error_type=type(e).__name__, + ).debug("Error checking file existence in MinIO, skipping cleanup") + return + + # Delete resource from DB (only reached if stat_object returned None, which shouldn't happen) async with db as session: await session.execute( delete(AttackResourceFile).where(AttackResourceFile.id == resource_id) ) await session.commit() logger.info(f"Resource {resource_id} not found in MinIO, deleted from DB") - except (SQLAlchemyError, OSError) as e: - logger.error(f"Background upload verification failed: {e}") + + except (SQLAlchemyError, OSError, S3Error) as e: + logger.bind( + resource_id=resource_id, + error=str(e), + error_type=type(e).__name__, + ).error("Background upload verification failed") + + +async def cleanup_stale_pending_resources(db: AsyncSession) -> dict[str, int]: + """ + Clean up stale pending resources that haven't been uploaded within the configured age threshold. + + Queries for resources where is_uploaded=False and created_at < (now - age threshold). + Uses skip_locked to prevent concurrent cleanup conflicts without blocking. + Commits after each resource to minimize lock duration during I/O operations. + + Args: + db: Database session from sessionmanager.session() + + Returns: + Dictionary with cleanup summary: {deleted: int, errors: int} + """ + from app.core.services.resource_service import cleanup_stale_resource + + age_hours = settings.RESOURCE_CLEANUP_AGE_HOURS + cutoff_time = datetime.now(UTC) - timedelta(hours=age_hours) + + deleted = 0 + errors = 0 + + try: + # Query for stale pending resource IDs only (no FOR UPDATE lock on query) + # Each resource will be processed individually with its own lock + result = await db.execute( + select(AttackResourceFile.id) + .where(AttackResourceFile.is_uploaded == False) # noqa: E712 + .where(AttackResourceFile.created_at < cutoff_time) + ) + stale_resource_ids = [row[0] for row in result.fetchall()] + + if not stale_resource_ids: + logger.debug("No stale pending resources found for cleanup") + return {"deleted": 0, "errors": 0} + + logger.bind( + stale_count=len(stale_resource_ids), + age_threshold_hours=age_hours, + ).info("Found stale pending resources for cleanup") + + # Process each stale resource individually to minimize lock duration + for resource_id in stale_resource_ids: + try: + # Lock and fetch the resource for this specific deletion + result = await db.execute( + select(AttackResourceFile) + .where(AttackResourceFile.id == resource_id) + .with_for_update(skip_locked=True) + ) + resource = result.scalar_one_or_none() + + if resource is None: + # Resource was already deleted or locked by another worker + continue + + # Skip if resource was uploaded while we were processing + if resource.is_uploaded: + continue + + success = await cleanup_stale_resource(resource, db) + if success: + deleted += 1 + # Commit after each successful deletion to release lock quickly + await db.commit() + else: + errors += 1 + await db.rollback() + + except (S3Error, SQLAlchemyError, OSError, ConnectionError) as e: + # Catch specific storage/database/network errors, not programming bugs + logger.bind( + resource_id=str(resource_id), + error=str(e), + error_type=type(e).__name__, + ).error("Exception during stale resource cleanup") + errors += 1 + await db.rollback() + # Continue processing other resources + + except Exception as e: + logger.bind(error=str(e)).error("Failed to query stale pending resources") + await db.rollback() + raise + + return {"deleted": deleted, "errors": errors} + + +async def run_periodic_cleanup() -> None: + """ + Background task that runs periodically to clean up stale pending resources. + + Runs in an infinite loop with configurable sleep interval (default 1 hour). + Opens fresh database sessions via sessionmanager.session(). + Logs cleanup summary after each run. + Handles exceptions to prevent task crash. + """ + from app.db.session import sessionmanager + + interval_hours = settings.RESOURCE_CLEANUP_INTERVAL_HOURS + interval_seconds = interval_hours * 3600 + + logger.bind( + interval_hours=interval_hours, + age_threshold_hours=settings.RESOURCE_CLEANUP_AGE_HOURS, + ).info("Starting periodic resource cleanup task") + + while True: + try: + # Wait before first run to allow system to stabilize + await asyncio.sleep(interval_seconds) + + logger.debug("Running periodic resource cleanup") + + async with sessionmanager.session() as db: + summary = await cleanup_stale_pending_resources(db) + + logger.bind( + deleted=summary["deleted"], + errors=summary["errors"], + ).info("Periodic resource cleanup completed") + + except asyncio.CancelledError: + logger.info("Periodic resource cleanup task cancelled") + raise # Re-raise to allow proper task cancellation + except (S3Error, SQLAlchemyError, OSError, ConnectionError) as e: + # Catch specific storage/database/network errors, not programming bugs + logger.bind( + error=str(e), + error_type=type(e).__name__, + ).error("Periodic resource cleanup failed") + # Continue running, don't crash the task diff --git a/app/main.py b/app/main.py index 61824699..4729315c 100644 --- a/app/main.py +++ b/app/main.py @@ -1,9 +1,10 @@ """Ouroboros FastAPI Application.""" +import asyncio import logging import time from collections.abc import AsyncGenerator, Awaitable, Callable -from contextlib import asynccontextmanager +from contextlib import asynccontextmanager, suppress from cashews import cache from cashews.contrib.fastapi import ( @@ -55,12 +56,20 @@ def emit(self, record: logging.LogRecord) -> None: @asynccontextmanager async def lifespan(_app: FastAPI) -> AsyncGenerator[None]: """FastAPI lifespan events.""" + from app.core.tasks.resource_tasks import run_periodic_cleanup + # Initialize database session manager with consolidated settings sessionmanager.init(settings) + # Start periodic cleanup task + cleanup_task = asyncio.create_task(run_periodic_cleanup()) + yield - # Cleanup on shutdown + # Shutdown cleanup + cleanup_task.cancel() + with suppress(asyncio.CancelledError): + await cleanup_task await sessionmanager.close() @@ -276,9 +285,5 @@ async def invalid_agent_token_handler( return JSONResponse(status_code=401, content={"detail": str(exc)}) -# Register v1 error handler for all /api/v1/client/* and /api/v1/agent/* endpoints (contract compliance) -# The handler passes any non-Agent API endpoints to the default handler -app.add_exception_handler(HTTPException, v1_http_exception_handler) - # Setup custom OpenAPI documentation setup_openapi_customization(app) diff --git a/app/models/attack.py b/app/models/attack.py index 991a66b4..169e3907 100644 --- a/app/models/attack.py +++ b/app/models/attack.py @@ -21,6 +21,7 @@ class AttackMode(str, Enum): class AttackState(str, Enum): PENDING = "pending" RUNNING = "running" + PAUSED = "paused" COMPLETED = "completed" FAILED = "failed" ABANDONED = "abandoned" diff --git a/docs/epics/control-api/specs/Core_Flows__Control_API_User_Journeys.md b/docs/epics/control-api/specs/Core_Flows__Control_API_User_Journeys.md new file mode 100644 index 00000000..5a76fa5f --- /dev/null +++ b/docs/epics/control-api/specs/Core_Flows__Control_API_User_Journeys.md @@ -0,0 +1,516 @@ +# Core Flows: Control API User Journeys + +## Overview (validated) + +This document defines the core user flows for the Control API as a programmatic experience for terminal users, scripts, and third-party integrations. The flows describe user intent, operation sequence, and feedback expectations; the exact endpoint design is defined later in the Tech Plan. + +## Flow 1: Campaign Lifecycle Management + +**Description**: Create, configure, launch, monitor, and control password cracking campaigns programmatically. Note: users invoke actions (start/stop/pause/resume/archive/unarchive) while the system reports execution states (e.g., queued/active, running, completed) that may change automatically as work progresses. + +**Entry Point**: User has hash data to crack and wants to set up an automated campaign. + +### Flow Steps + +```mermaid +sequenceDiagram + participant User as CLI/Script + participant API as Control API + participant Storage as MinIO Storage + participant System as Ouroboros Core + + Note over User,System: Phase 1: Preparation + User->>API: Create hash list + API->>System: Validate and store hashes + System-->>API: Hash list created + API-->>User: Success (hash list id) + + User->>API: List resources (e.g., wordlists) + API-->>User: Success (resource list) + + Note over User,System: Phase 2: Campaign Creation + User->>API: Create campaign (reference hash list) + API->>System: Create campaign in draft state + System-->>API: Campaign created + API-->>User: Success (campaign id, initial state) + + Note over User,System: Phase 3: Attack Configuration + User->>API: Create attack (attach to campaign) + API->>System: Validate attack config + System-->>API: Attack created + API-->>User: Success (attack id, estimate summary) + + User->>API: Add multiple attacks to campaign + API->>System: Bulk create attacks + System-->>API: Attacks created + API-->>User: Success (bulk add summary) + + Note over User,System: Phase 4: Launch & Monitor + User->>API: Start campaign + API->>System: Validate and start campaign + System-->>API: Campaign started + API-->>User: Success (campaign started) + + loop Monitoring + User->>API: Get campaign status + API->>System: Get current progress + System-->>API: Progress data + API-->>User: Success (progress snapshot) + end + + Note over User,System: Phase 5: Control + User->>API: Stop/pause campaign + API->>System: Stop campaign gracefully + System-->>API: Campaign stopped + API-->>User: Success (campaign state updated) +``` + +**Step-by-Step Breakdown**: + +1. **Create hash list**: user uploads hash data and receives a hash list identifier + + - Response includes hash count, detected hash types, validation status + - Errors indicate format issues or duplicate hashes + +2. **Discover resources**: user lists available wordlists, rules, and masks in the project + + - Rich list response includes file sizes, line counts, last modified dates + - Filtering by type, project, and search terms + +3. **Create Campaign**: User creates campaign referencing hash list ID + + - Campaign starts in "draft" state, not yet executable + - Response includes campaign ID and initial metadata + +4. **Configure attacks** (two approaches): + + - **Separate**: create attacks and attach them to the campaign + - **Inline**: include attacks as part of a single campaign creation step + - Each attack validated for resource availability and configuration correctness + - Response includes estimated keyspace and time-to-completion + +5. **Validate before launch** (optional): user runs a pre-flight validation/check + + - Pre-flight check identifies missing resources, invalid configs + - Returns actionable error messages without modifying state + +6. **Start Campaign**: User initiates campaign execution + + - State transitions from "draft" to "running" + - Tasks automatically distributed to available agents + - Response confirms start time and initial task count + +7. **Monitor progress**: user polls status operations (single campaign and bulk dashboard views) + + - Individual campaign status: detailed progress, active tasks, errors + - Bulk status: monitor multiple campaigns for dashboard views + - Rich response includes progress percentage, ETA, crack count + +8. **Control Execution**: User can pause, resume, or stop campaigns + + - State transitions validated (can't start already-running campaign) + - Graceful shutdown ensures task completion before stopping + - Response confirms new state and timestamp + +**Exit Points**: + +- Campaign completes successfully -> User retrieves results (hash list is canonical, plus campaign-centric summaries/exports) +- Campaign paused -> User can resume or modify configuration +- Campaign fails -> User reviews errors and relaunches + +--- + +## Flow 2: Resource Management + +**Description**: Upload, organize, and manage attack resources (wordlists, rules, masks) for use in campaigns. + +**Entry Point**: User has custom wordlists or rules to use in attacks. + +### Flow Steps + +```mermaid +sequenceDiagram + participant User as CLI/Script + participant API as Control API + participant Storage as MinIO Storage + + Note over User,Storage: Phase 1: Initiate Upload + User->>API: Initiate upload (create pending resource + presigned URL) + API->>Storage: Generate presigned URL + Storage-->>API: Presigned URL + pending resource reference + API-->>User: Success (upload URL + pending resource reference) + + Note over User,Storage: Phase 2: Direct Upload + User->>Storage: PUT to presigned URL
(file content) + Storage-->>User: Upload success + + Note over User,Storage: Phase 3: Confirm Upload + User->>API: Confirm upload (finalize resource) + API->>Storage: Verify file exists + Storage-->>API: File metadata + API->>API: Finalize pending resource record + API-->>User: Success (resource finalized) + + Note over User,Storage: Phase 4: Manage Resources + User->>API: Get resource details + API-->>User: Success (resource summary) + + User->>API: Update resource metadata + API-->>User: Success (resource updated) + + User->>API: Preview resource content/lines + API->>Storage: Fetch content range + Storage-->>API: Content + API-->>User: Success (content preview) +``` + +**Step-by-Step Breakdown**: + +1. **Initiate upload**: user provides resource metadata and receives a presigned upload URL + + - System creates a pending resource record immediately + - Response provides a stable reference used for confirmation/finalization + +2. **Upload to Storage**: User uploads file directly to MinIO + + - Uses presigned URL from step 1 + - No API server involvement (better performance for large files) + - Standard HTTP PUT with file content + +3. **Confirm upload**: user finalizes the pending resource + + - System verifies file exists in storage + - System validates and analyzes the file (e.g., line count) + - Response confirms the resource is finalized and ready for use + +4. **List Resources**: User discovers available resources + + - Rich list includes file sizes, line counts, creation dates + - Filtering by type, project, search terms + - Pagination for large resource libraries + +5. **View Resource Details**: User inspects specific resource + + - Full metadata including usage statistics + - Preview of content (first N lines) + - Associated campaigns/attacks using this resource + +6. **Update Resource**: User modifies resource metadata or content + + - Rename, change description, update tags + - Inline content editing for small files + - Versioning for content changes + +7. **Delete Resource**: User removes unused resource + + - Validation prevents deletion if resource is in use + - Soft delete with recovery period + - Cascade options for associated data + +**Exit Points**: + +- Resource ready for use in attacks +- Resource deleted or archived +- Upload failed -> User retries with new presigned URL + +--- + +## Flow 3: Real-Time Monitoring & Status Polling + +**Description**: Monitor campaign progress, agent status, and system health through efficient polling patterns. + +**Entry Point**: User has running campaigns and wants to track progress. + +### Flow Steps + +```mermaid +sequenceDiagram + participant User as CLI/TUI + participant API as Control API + participant System as Ouroboros Core + + Note over User,System: Individual Campaign Monitoring + loop Every 5-10 seconds + User->>API: Get campaign status (drill-down) + API->>System: Get campaign state + System-->>API: Current progress + API-->>User: Success (status snapshot) + end + + Note over User,System: Bulk Dashboard Monitoring + loop Every 10-15 seconds + User->>API: List campaigns for dashboard (filter + page) + API->>System: Get multiple campaign states + System-->>API: Aggregated data + API-->>User: Success (dashboard items incl 1-level attack rollup) + end + + Note over User,System: Agent Fleet Monitoring + User->>API: Get agent fleet summary + API->>System: Get agent statistics + System-->>API: Fleet status + API-->>User: Success (fleet summary) + + Note over User,System: System Health Check + User->>API: Get system health status + API->>System: Check all components + System-->>API: Health data + API-->>User: Success (health summary) +``` + +**Step-by-Step Breakdown**: + +1. **Individual Campaign Status**: User polls single campaign + + - Detailed progress: percentage complete, ETA, current attack + - Active task count and agent assignments + - Recent crack count and error count + - Optimized for focused monitoring + +2. **Bulk campaign status**: user monitors multiple campaigns in a single dashboard query + + - Filterable and pageable + - Rich-but-shallow campaign rollups + - Includes a 1-level attack rollup summary (e.g., current attack + next attack) + +3. **Campaign Metrics**: User retrieves performance data + + - Hash rate over time + - Crack rate and success percentage + - Resource utilization per attack + - Historical trend data + +4. **Agent Fleet Summary**: User monitors agent availability + + - Total agents, active/idle breakdown + - Hardware capabilities summary + - Error rates and performance metrics + - Filtering by project assignment + +5. **System Health**: User checks overall system status + + - Component health (database, storage, queue) + - Resource usage (CPU, memory, disk) + - Active connections and throughput + - Alert conditions and warnings + +**Polling guidance (examples)**: + +- Individual status: 5-10 second intervals +- Bulk status: 10-15 second intervals +- System health: 30-60 second intervals +- Clients should back off on errors (e.g., exponential backoff) + +**Exit Points**: + +- Campaign completes -> Stop polling, retrieve results +- User stops monitoring -> Continue background execution +- System issues detected -> User investigates via drill-down operations + +--- + +## Flow 4: Template reuse (export, validate, import) + +**Description**: Export campaign configurations as templates and import them into other environments. + +**Entry Point**: User wants to reuse campaign configuration across projects or environments. + +### Flow Steps + +1. **Export Campaign Template**: User exports existing campaign + + - Includes all attacks, configurations, resource references + - Template format: JSON with schema version + - Resource references use GUIDs for portability + +2. **Validate Template**: User validates template before import + + - Dry-run mode checks resource availability + - Identifies missing resources and incompatibilities + - Returns detailed validation report + +3. **Import template**: user imports the template into the target environment + + - Partial import is allowed: create the campaign but skip attacks with missing resources + - Response clearly reports imported vs skipped items and why + +4. **Resolve Missing Resources**: User uploads missing resources + + - Identifies required resources from validation report + - Uploads via resource management flow + - Re-imports template after resources available + +5. **Customize Imported Campaign**: User modifies imported configuration + + - Updates campaign name, description + - Adjusts attack parameters + - Assigns to different project or hash list + +**Exit Points**: + +- Template successfully imported -> Campaign ready to launch +- Partial import -> User resolves missing resources +- Import failed -> User reviews validation errors + +--- + +## Flow 5: Batch Operations + +**Description**: Perform bulk operations on multiple campaigns for efficient management. Batch operations are intentionally **single-project scoped** (one project per request). + +**Entry Point**: User needs to control multiple campaigns simultaneously within a project. + +### Flow Steps + +1. **Bulk start campaigns**: user starts multiple campaigns + + - Default behavior is best-effort (per-item outcomes) + - User may request atomic behavior when needed + - Response includes success/failure for each campaign + +2. **Bulk Stop Campaigns**: User stops multiple campaigns + + - Same atomic vs best-effort choice + - Graceful shutdown for all campaigns + - Response includes final states + +3. **Bulk Status Check**: User retrieves status for multiple campaigns + + - Efficient single API call vs multiple individual calls + - Filtering and pagination support + - Rich data for dashboard rendering + +4. **Bulk Delete/Archive**: User removes multiple campaigns + + - Validation prevents deletion of running campaigns + - Cascade options for associated data + - Response includes deletion results + +**Batch Operation Modes**: + +- **Atomic**: All operations succeed or all fail (transaction-like) +- **Best-Effort**: Process each independently, return partial results +- **Configurable**: user chooses between best-effort and atomic behavior + +**Exit Points**: + +- All operations successful -> User continues workflow +- Partial success -> User reviews failures and retries +- All operations failed -> User investigates root cause + +--- + +## Flow 6: Error Recovery & Validation + +**Description**: Discover and resolve issues before and after operations. + +**Entry Point**: User wants to ensure operation will succeed or understand why it failed. + +### Flow Steps + +1. **Pre-Flight Validation**: User validates before attempting operation + + - Campaign start validation: checks resources, agents, hash list + - Attack creation validation: checks resource availability, config correctness + - Returns detailed validation report without modifying state + +2. **Operation Attempt**: User attempts operation + + - Operation fails with RFC9457 error response + - Error includes problem type, detail, and suggested fixes + - Instance URI points to specific resource + +3. **Error Analysis**: User examines error details + + - Problem type indicates category (missing resource, invalid config, etc.) + - Detail provides human-readable explanation + - Extensions include actionable suggestions + +4. **Issue Resolution**: User fixes identified problems + + - Uploads missing resources + - Corrects configuration errors + - Assigns required permissions + +5. **Retry Operation**: User retries after fixes + + - Validation endpoint confirms fixes + - Operation succeeds + - User continues workflow + +**Error response expectations (RFC9457)**: + +- Errors are returned as Problem Details (machine-readable type plus human-readable title/detail). +- Responses may include extensions to support automation (e.g., a list of missing resources and suggested next actions). + +**Exit Points**: + +- Validation passes -> User proceeds with operation +- Validation fails -> User resolves issues +- Operation succeeds after fixes -> User continues workflow + +--- + +## Flow 7: Administration (users, projects, API keys) + +**Description**: Headless administration for operators and system administrators. + +**Entry Point**: Admin needs to manage access and operational setup without using the Web UI. + +**Flow Steps**: + +1. Admin lists and manages projects (create/update/archive/delete where permitted). +2. Admin lists and manages users (create/update/deactivate) and their project access. +3. Admin manages API keys (create/rotate/revoke) and can audit key usage at a high level. + +**Exit Points**: + +- Admin changes applied -> projects/users/keys ready for campaign workflows + +--- + +## Cross-Flow Patterns + +### Authentication + +- All flows require API key authentication +- Token format: `cst__` +- Passed via `Authorization: Bearer ` header +- Invalid/expired tokens return 401 Unauthorized + +### Project scoping + +- **Default scoping:** clients scope most operations via a `project_id` query parameter (or omit it to operate over “all accessible projects” for list-style endpoints). +- **Deliberate hybrid:** batch campaign operations are scoped by project in the URL hierarchy (single project per batch request) to reduce cross-project mistakes. +- Operations are constrained to projects the caller can access. +- Admin workflows may support cross-project views where permitted. + +### Pagination + +- Offset-based pagination for list operations. +- Responses include returned items, total count, and paging metadata. +- Designed for sequential access and TUI rendering. + +### Rate limiting (guidance) + +- Rate limiting may be enforced to protect system stability. +- Any rate limiting and caching behavior should be compatible with scripts and provide clear feedback. + +### Data Richness + +- List operations return rich data (not deeply nested) +- Includes computed fields: progress, ETA, crack count +- Nested relationships limited to 1 level deep +- Drill-down operations provide full depth when needed + +--- + +## Success Metrics + +These flows are successful when: + +1. **Workflow Completeness**: Users can complete entire campaign lifecycle without Web UI +2. **Automation Efficiency**: Bulk operations significantly faster than manual Web UI clicks +3. **Integration Reliability**: Third-party tools can build stable integrations +4. **Error Clarity**: Users can diagnose and fix issues from error responses alone +5. **Performance**: Polling patterns don't overload API or database diff --git a/docs/epics/control-api/specs/Epic_Brief__Control_API_Completion.md b/docs/epics/control-api/specs/Epic_Brief__Control_API_Completion.md new file mode 100644 index 00000000..64ce38b2 --- /dev/null +++ b/docs/epics/control-api/specs/Epic_Brief__Control_API_Completion.md @@ -0,0 +1,52 @@ +# Epic Brief: Control API Completion + +## Summary + +Complete the Control API to provide a comprehensive programmatic interface for Ouroboros that supports terminal-based workflows, automation scripts, and third-party integrations. Primary users include red team operators running automated cracking campaigns, system administrators managing deployments, and CLI/TUI power users who prefer not to rely on a browser. The Control API must support both operational workflows (campaigns/resources/monitoring/results) and headless administration (users/projects/API keys). It should be stable and machine-friendly (consistent pagination, structured responses, and RFC9457 problem details), while remaining flexible enough to support both official tooling and external clients. + +## Context & Problem + +### Who's Affected + +**Primary Users:** + +- **Red Team Operators**: Security professionals running automated password cracking campaigns as part of penetration testing and security assessments +- **System Administrators**: Infrastructure managers who need to configure, monitor, and maintain Ouroboros deployments programmatically +- **CLI/TUI Power Users**: Terminal-focused users who prefer command-line workflows over browser-based interfaces + +**Secondary Stakeholders:** + +- **Third-Party Tool Developers**: Engineers building integrations with Ouroboros for security toolchains and CI/CD pipelines +- **Automation Script Authors**: Users creating custom workflows and batch operations for repetitive tasks + +### Current pain (validated) + +- **Browser dependency** blocks terminal-first workflows and automation. +- **Incomplete end-to-end headless coverage**: core actions (create campaigns/resources, run campaigns, monitor, retrieve results) are not consistently available through a single programmatic surface. +- **Missing programmatic admin** makes headless operations difficult (user/project/key management). +- **Integration friction**: third-party tools need a clear, stable, machine-readable API contract. + +### Where in the Product + +The Control API sits alongside the existing Web UI API and Agent API as one of three primary interfaces to Ouroboros: + +- **Agent API** (`/api/v1/client/*`): For distributed hashcat agents executing tasks +- **Web UI API** (`/api/v1/web/*`): For the SvelteKit browser-based dashboard +- **Control API**: For CLI/TUI clients, automation scripts, and third-party integrations (this Epic) + +The Control API reuses the same underlying business capabilities as the Web UI, but is optimized for machine workflows (stable error format, pagination, and clear project scoping). + +### Success Criteria + +This Epic is successful when: + +1. **Headless workflow completeness**: users can manage campaigns end-to-end (including results retrieval/exports) without touching the Web UI. +2. **Headless administration**: admins can manage users/projects/API keys without the Web UI. +3. **Reliable integration**: third-party tools and scripts can integrate through stable, machine-readable behaviors. +4. **Automation efficiency**: bulk operations and scripted workflows are materially faster than manual UI workflows. + +### Out of Scope + +- Building the CLI/TUI client itself (the Control API is the foundation) +- Web UI-only helper behaviors and presentation-specific endpoints +- Changes to the Agent API contract diff --git a/docs/epics/control-api/specs/Tech_Plan__Control_API_Completion.md b/docs/epics/control-api/specs/Tech_Plan__Control_API_Completion.md new file mode 100644 index 00000000..e8ac4aa4 --- /dev/null +++ b/docs/epics/control-api/specs/Tech_Plan__Control_API_Completion.md @@ -0,0 +1,128 @@ +# Tech Plan: Control API Completion + +## Architectural Approach + +**Goal:** Complete `/api/v1/control/*` for headless administration + automation-first operations (campaigns/resources/monitoring/results), reusing the existing service layer while enforcing machine-friendly behavior (offset pagination, strict state transitions, RFC9457). + +### API shape & scoping + +- **Default project scoping:** **query-parameter scoping** (matches existing Control API patterns in `file:app/api/v1/endpoints/control/campaigns.py`). + - List endpoints: `project_id` optional; when omitted, results are limited to projects visible via `current_user.project_associations`. + - Mutating endpoints: `project_id` is required (or implied by the referenced resource), and must pass `user_can_access_project_by_id(...)` from `file:app/core/authz.py`. +- **Batch operations scoping (deliberate hybrid):** batch endpoints are **path-scoped by project** to reduce accidental cross-project actions and to simplify caching / audit logs. Example shape: `/api/v1/control/projects/{project_id}/campaigns/batch-*`. +- **Admin surfaces** (users/projects/keys) remain non-project-scoped. + +### AuthN/AuthZ + +- AuthN: `get_current_control_user` (`file:app/core/deps.py`) using `Authorization: Bearer cst__`. +- AuthZ: Casbin enforcement via `user_can(...)` + project membership checks via `user_can_access_project_by_id(...)`. + +### RFC9457 error handling (non-negotiable) + +- **Normalization approach (validated):** Control routes may call into existing services that raise `HTTPException` (common across the codebase). For the Control API, those exceptions must be normalized into RFC9457. +- **Implementation strategy:** extend `file:app/core/control_rfc9457_middleware.py` to also intercept `fastapi.HTTPException` on `/api/v1/control/*` paths and emit an RFC9457 Problem response (using `about:blank` or a stable internal problem type set). This keeps changes localized and avoids a wide service-layer refactor. +- Endpoints still translate known domain failures into `file:app/core/control_exceptions.py` Problem types. +- Problem responses should support **extensions** (e.g., missing resources list) in addition to `type/title/status/detail/instance`. + +### Strict lifecycle state machines + +- Implement dedicated state machine classes (Campaign + Attack) and enforce them in service-layer operations (start/stop/pause/resume/archive/unarchive). +- Persisted states use existing enums (e.g., `CampaignState` in `file:app/models/campaign.py`). +- “Running” vs “Queued” should be treated as **derived runtime status** (computed from tasks/attacks) rather than requiring a new DB enum immediately. + +```mermaid +stateDiagram + [*] --> draft + draft --> active: start + active --> paused: pause + paused --> active: resume + active --> completed: system-completes + active --> draft: stop + draft --> archived: archive + active --> archived: archive + paused --> archived: archive + archived --> draft: unarchive (if allowed) +``` + +### Resource uploads (pending → confirm) + +- Use the existing “create pending + presign” approach in `file:app/core/services/resource_service.py`: + - Initiate returns a stable pending resource reference + presigned PUT URL. + - Confirm finalizes: verifies object exists, computes stats/checksum (via `StorageService.get_file_stats` in `file:app/core/services/storage_service.py`), sets `is_uploaded=True`. +- Cleanup (deterministic): + - Manual cancel endpoint is supported. + - Implement a **periodic sweep** (idempotent) that: + 1. finds stale pending resources (e.g., `is_uploaded=false` older than a cutoff), + 2. checks MinIO object presence, + 3. deletes DB record and object when appropriate. + - The sweep must open fresh DB sessions via `file:app/db/session.py` (not reuse request sessions) and be safe under concurrency (multiple workers) by using row-level locking or other idempotent guards. + - The existing timeout-based background check in `file:app/core/tasks/resource_tasks.py` can remain as a fast-path, but the sweep is the reliability backstop. + +### Monitoring & dashboard endpoints + +- “Bulk dashboard” endpoints return **rich-but-shallow** campaign rollups plus **1-level attack rollup** (current + next). +- Bulk status is cached for **5–10 seconds TTL**, **shared by project** (cache key includes `project_id` + normalized filters). + +### Batch operations + +- Batch operations are **single-project scoped**. +- Default mode is **best-effort** (per-item outcomes); optional `mode=atomic`. +- Response is an **array of per-item results** with `success` and a machine-readable error payload when failed. + +### Templates + +- Templates are **JSON** with explicit `schema_version` (reuse `CampaignTemplate` from `file:app/schemas/shared.py`). +- Import endpoint supports `validate_only=true`. +- Import supports **partial import**: campaign created, but attacks referencing missing resources are skipped with a structured report. + +### Results retrieval & exports + +- Results are canonical at the HashList/HashItem layer (`file:app/models/hash_list.py`, `file:app/models/hash_item.py`) plus campaign convenience views. +- Campaign results support: + - JSON (API-native), + - CSV (common tooling), + - Hashcat potfile format (integration-friendly). + +--- + +## Data Model + +### Reuse existing entities (no new tables preferred) + +- Campaign (`file:app/models/campaign.py`) +- Attack (`file:app/models/attack.py`) +- Task (`file:app/models/task.py`) +- Agent (`file:app/models/agent.py`) +- HashList/HashItem (`file:app/models/hash_list.py`, `file:app/models/hash_item.py`) +- Resources (`file:app/models/attack_resource_file.py`) +- CrackResult (`file:app/models/crack_result.py`) + +### Planned additions / changes (decision-sensitive) + +- **Control API keys:** keep the current **single key per user** model for this Epic (`User.api_key` in `file:app/models/user.py`). Multi-key support is explicitly deferred. +- **Pending resource cleanup:** avoid schema changes by using `AttackResourceFile.created_at` + `is_uploaded=False` to identify stale pending records for deterministic periodic sweep cleanup. + +--- + +## Component Architecture + +### API layer (Control routers) + +- Keep routers thin: auth, scoping, pagination normalization, error translation. +- Primary Control domains: + - Admin: users, projects, API keys + - Operational: campaigns, attacks, agents, tasks, resources, hash-lists + - “Convenience” workflows: templates, results, batch operations, monitoring + +### Service layer strategy (codebase fit) + +- Prefer reuse of existing services in `file:app/core/services/`: + - `campaign_service`, `attack_service`, `agent_service`, `task_service`, `hash_list_service`, `resource_service`, `storage_service` +- Add minimal “Control adapters” where needed to: + - convert page-based results to offset-based responses, + - translate `HTTPException` / generic exceptions into RFC9457 Problem types, + - enforce state machine transitions consistently. + +### Background/maintenance components + +- Resource pending cleanup should not reuse request-scoped DB sessions; background operations must open fresh sessions via the global session manager (`file:app/db/session.py`). diff --git a/docs/epics/control-api/tickets/Agent_&_Task_Monitoring.md b/docs/epics/control-api/tickets/Agent_&_Task_Monitoring.md new file mode 100644 index 00000000..d01fd481 --- /dev/null +++ b/docs/epics/control-api/tickets/Agent_&_Task_Monitoring.md @@ -0,0 +1,116 @@ +# Agent & Task Monitoring + +## Overview + +Implement Control API endpoints for agent fleet monitoring and task tracking. Enables users to monitor agent availability, performance, and task execution status. + +## Context + +Users need visibility into agent fleet health and task execution for troubleshooting and capacity planning. The Control API provides read-only monitoring; agent registration is handled by the Agent API. + +**Spec References:** + +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/d3caa175-100a-4242-b8b4-0c8139a48034` (Core Flows - Flow 3: Real-Time Monitoring, Steps 4-5) +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/874b33d9-e442-4af3-98d3-e08cb71a007c` (Tech Plan - Data Model) + +## Scope + +**In Scope:** + +- Agent fleet summary endpoint +- Agent list endpoint with filtering +- Agent detail endpoint (capabilities, performance, errors) +- Task list endpoint with filtering +- Task detail endpoint with logs +- Integration with existing `file:app/core/services/agent_service.py` +- Integration with existing `file:app/core/services/task_service.py` + +**Out of Scope:** + +- Agent registration (Agent API responsibility) +- Agent configuration updates (separate concern) +- Task creation (automatic via campaign execution) + +## Implementation Guidance + +**Endpoints:** + +- `GET /api/v1/control/agents/summary` - Agent fleet summary +- `GET /api/v1/control/agents` - List agents +- `GET /api/v1/control/agents/{id}` - Get agent details +- `GET /api/v1/control/tasks` - List tasks +- `GET /api/v1/control/tasks/{id}` - Get task details + +**Key Files:** + +- Create `file:app/api/v1/endpoints/control/agents.py` - New router +- Create `file:app/api/v1/endpoints/control/tasks.py` - New router +- `file:app/core/services/agent_service.py` - Existing service layer +- `file:app/core/services/task_service.py` - Existing service layer + +**Fleet Summary Response:** + +```python +{ + "total_agents": 15, + "active_agents": 12, + "idle_agents": 3, + "offline_agents": 0, + "total_capacity": {"cpu_cores": 192, "gpu_count": 24, "memory_gb": 768}, + "current_utilization": {"active_tasks": 45, "utilization_percent": 75.0}, +} +``` + +**Task List Response:** + +```python +{ + "items": [ + { + "id": 789, + "campaign_id": 123, + "attack_id": 456, + "agent_id": 12, + "state": "running", + "progress_percent": 34.5, + "started_at": "2024-01-15T10:00:00Z", + "estimated_completion": "2024-01-15T11:30:00Z", + } + ], + "total": 156, + "limit": 20, + "offset": 0, +} +``` + +## Acceptance Criteria + +- [ ] Users can view agent fleet summary (total, active/idle, capabilities) +- [ ] Users can list agents with filtering (project, status, capabilities) +- [ ] Users can view agent details (hardware, performance, error logs) +- [ ] Users can list tasks with filtering (campaign, agent, status) +- [ ] Users can view task details with execution logs +- [ ] All operations respect project scoping +- [ ] List endpoints use offset-based pagination +- [ ] All errors follow RFC9457 format + +## Testing Strategy + +**Backend Tests (Tier 1):** + +- Test agent fleet summary calculation +- Test agent listing with filtering +- Test agent detail retrieval +- Test task listing with filtering +- Test task detail retrieval +- Test project scoping + +**Test Command:** `just test-backend` + +## Dependencies + +None - can work in parallel with other monitoring tickets. + +## Related Tickets + +- Complements `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T11` (Campaign Status & Metrics) diff --git a/docs/epics/control-api/tickets/Attack_CRUD_&_Validation.md b/docs/epics/control-api/tickets/Attack_CRUD_&_Validation.md new file mode 100644 index 00000000..1f155a5b --- /dev/null +++ b/docs/epics/control-api/tickets/Attack_CRUD_&_Validation.md @@ -0,0 +1,116 @@ +# Attack CRUD & Validation + +## Overview + +Implement Control API endpoints for attack CRUD operations and validation. Attacks define specific cracking configurations within campaigns (dictionary, mask, hybrid, etc.). + +## Context + +Attacks are the building blocks of campaigns. Users need to create attacks with resource references, validate configurations, estimate keyspace, and manage attack lifecycle. This ticket focuses on CRUD and validation; lifecycle actions are handled separately. + +**Spec References:** + +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/d3caa175-100a-4242-b8b4-0c8139a48034` (Core Flows - Flow 1: Campaign Lifecycle, Phase 3) +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/874b33d9-e442-4af3-98d3-e08cb71a007c` (Tech Plan - Data Model) + +## Scope + +**In Scope:** + +- Attack create endpoint with resource validation +- Attack get endpoint (by ID) +- Attack list endpoint with filtering +- Attack update endpoint +- Attack delete endpoint with validation +- Attack validation endpoint (synchronous) +- Attack keyspace estimation endpoint +- Integration with existing `file:app/core/services/attack_service.py` + +**Out of Scope:** + +- Attack lifecycle actions (start, stop, pause - separate ticket) +- Attack reordering (separate ticket) +- Task management (separate concern) + +## Implementation Guidance + +**Endpoints:** + +- `POST /api/v1/control/attacks` - Create attack +- `GET /api/v1/control/attacks` - List attacks +- `GET /api/v1/control/attacks/{id}` - Get attack details +- `PATCH /api/v1/control/attacks/{id}` - Update attack +- `DELETE /api/v1/control/attacks/{id}` - Delete attack +- `POST /api/v1/control/attacks/validate` - Validate attack config +- `POST /api/v1/control/attacks/estimate` - Estimate keyspace + +**Key Files:** + +- Create `file:app/api/v1/endpoints/control/attacks.py` - New router +- `file:app/core/services/attack_service.py` - Existing service layer +- `file:app/core/services/attack_complexity_service.py` - Keyspace estimation +- `file:app/models/attack.py` - Attack model + +**Create Request:** + +```python +{ + "campaign_id": 123, + "name": "Dictionary Attack", + "attack_mode": 0, # Straight dictionary + "wordlist_id": 456, + "rule_list_id": 789, # Optional + "priority": 1, +} +``` + +**Validation Response:** + +```python +{ + "valid": true, + "warnings": [], + "estimated_keyspace": 14344391, + "estimated_time_seconds": 3600, + "resource_availability": { + "wordlist_456": "available", + "rule_list_789": "available", + }, +} +``` + +## Acceptance Criteria + +- [ ] Users can create attacks with resource references +- [ ] Attack creation validates resource availability +- [ ] Users can validate attack configurations before creation (synchronous) +- [ ] Users can estimate attack keyspace and time-to-completion +- [ ] Users can list attacks with filtering (campaign, type, status) +- [ ] Users can view attack details (config, resources, progress) +- [ ] Users can update attack configurations (when not running) +- [ ] Users can delete attacks (validation prevents deletion if running) +- [ ] All operations respect project scoping +- [ ] All errors follow RFC9457 format + +## Testing Strategy + +**Backend Tests (Tier 1):** + +- Test attack creation with valid and invalid resource references +- Test attack validation (various configurations) +- Test keyspace estimation (mock complexity service) +- Test list endpoint with filtering +- Test update and delete operations +- Test delete validation (prevent deletion of running attacks) + +**Test Command:** `just test-backend` + +## Dependencies + +- `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T1` (RFC9457 Middleware) for error handling +- `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T5` (Resource File CRUD) for resource references + +## Related Tickets + +- Required by `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T10` (Attack Lifecycle & Reordering) +- Required by `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T13` (Template Import/Export) diff --git a/docs/epics/control-api/tickets/Attack_Lifecycle_&_Reordering.md b/docs/epics/control-api/tickets/Attack_Lifecycle_&_Reordering.md new file mode 100644 index 00000000..041643e6 --- /dev/null +++ b/docs/epics/control-api/tickets/Attack_Lifecycle_&_Reordering.md @@ -0,0 +1,107 @@ +# Attack Lifecycle & Reordering + +## Overview + +Implement Control API endpoints for attack lifecycle operations (start, stop, pause) and attack reordering within campaigns. Attack order determines execution priority. + +## Context + +Attacks within a campaign execute in priority order. Users need to control individual attack execution and reorder attacks to optimize campaign strategy. All state transitions must be validated. + +**Spec References:** + +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/d3caa175-100a-4242-b8b4-0c8139a48034` (Core Flows - Flow 1: Campaign Lifecycle, Phase 3) +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/874b33d9-e442-4af3-98d3-e08cb71a007c` (Tech Plan - State Machines) + +## Scope + +**In Scope:** + +- Attack start/stop/pause endpoints +- Attack reordering within campaigns +- Attack performance metrics endpoint +- Integration with state machine from T2 +- Integration with existing `file:app/core/services/attack_service.py` + +**Out of Scope:** + +- Attack CRUD (handled in T9) +- Task management (separate concern) +- Campaign-level lifecycle (handled in T8) + +## Implementation Guidance + +**Endpoints:** + +- `POST /api/v1/control/attacks/{id}/start` - Start attack +- `POST /api/v1/control/attacks/{id}/stop` - Stop attack +- `POST /api/v1/control/attacks/{id}/pause` - Pause attack +- `POST /api/v1/control/campaigns/{campaign_id}/attacks/reorder` - Reorder attacks +- `GET /api/v1/control/attacks/{id}/metrics` - Get performance metrics + +**Key Files:** + +- `file:app/api/v1/endpoints/control/attacks.py` - Add lifecycle endpoints +- `file:app/core/services/attack_service.py` - Existing lifecycle services +- `file:app/core/state_machines.py` - State machine validation (from T2) +- `file:app/models/attack.py` - Attack model + +**Reorder Request:** + +```python +{ + "attack_order": [ + {"attack_id": 101, "priority": 1}, + {"attack_id": 102, "priority": 2}, + {"attack_id": 103, "priority": 3}, + ] +} +``` + +**Metrics Response:** + +```python +{ + "attack_id": 101, + "hash_rate": 1500000, # hashes/second + "progress_percent": 45.2, + "estimated_completion": "2024-01-15T14:30:00Z", + "cracks_found": 123, + "tasks_completed": 5, + "tasks_active": 2, +} +``` + +## Acceptance Criteria + +- [ ] Users can start individual attacks +- [ ] Users can stop individual attacks +- [ ] Users can pause individual attacks +- [ ] All state transitions are validated by `AttackStateMachine` +- [ ] Invalid transitions return RFC9457 errors +- [ ] Users can reorder attacks within a campaign +- [ ] Reordering updates attack priority correctly +- [ ] Users can view attack performance metrics +- [ ] Metrics include hash rate, progress, ETA, crack count +- [ ] All operations respect project scoping + +## Testing Strategy + +**Backend Tests (Tier 1):** + +- Test attack lifecycle actions with valid state transitions +- Test invalid state transitions (expect errors) +- Test attack reordering (various scenarios) +- Test performance metrics retrieval +- Test project scoping + +**Test Command:** `just test-backend` + +## Dependencies + +- `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T2` (State Machine Classes) - Required for validation +- `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T9` (Attack CRUD) - Required for attack data + +## Related Tickets + +- Required by `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T14` (Results & Batch Operations) diff --git a/docs/epics/control-api/tickets/Campaign_CRUD_&_Validation.md b/docs/epics/control-api/tickets/Campaign_CRUD_&_Validation.md new file mode 100644 index 00000000..25b71175 --- /dev/null +++ b/docs/epics/control-api/tickets/Campaign_CRUD_&_Validation.md @@ -0,0 +1,117 @@ +# Campaign CRUD & Validation + +## Overview + +Implement Control API endpoints for campaign CRUD operations and pre-flight validation. Campaigns coordinate password cracking attempts against hash lists. + +## Context + +Campaigns are the primary workflow entity in the Control API. Users need to create campaigns, configure them, validate before launch, and manage their lifecycle. This ticket focuses on CRUD and validation; lifecycle actions are handled separately. + +**Spec References:** + +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/d3caa175-100a-4242-b8b4-0c8139a48034` (Core Flows - Flow 1: Campaign Lifecycle, Steps 2-5) +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/874b33d9-e442-4af3-98d3-e08cb71a007c` (Tech Plan - Data Model) + +## Scope + +**In Scope:** + +- Campaign create endpoint (with optional inline attacks) +- Campaign get endpoint (by ID) +- Campaign update endpoint (metadata) +- Campaign delete endpoint with validation +- Pre-flight validation endpoint +- Integration with existing `file:app/core/services/campaign_service.py` +- Extend existing `file:app/api/v1/endpoints/control/campaigns.py` (currently only has list) + +**Out of Scope:** + +- Campaign lifecycle actions (start, stop, pause - separate ticket) +- Campaign monitoring (separate ticket) +- Batch operations (separate ticket) + +## Implementation Guidance + +**Endpoints:** + +- `POST /api/v1/control/campaigns` - Create campaign +- `GET /api/v1/control/campaigns/{id}` - Get campaign details +- `PATCH /api/v1/control/campaigns/{id}` - Update campaign +- `DELETE /api/v1/control/campaigns/{id}` - Delete campaign +- `POST /api/v1/control/campaigns/{id}/validate` - Pre-flight validation + +**Key Files:** + +- `file:app/api/v1/endpoints/control/campaigns.py` - Extend existing router +- `file:app/core/services/campaign_service.py` - Existing service layer +- `file:app/models/campaign.py` - Campaign model +- `file:app/schemas/campaign.py` - Request/response schemas + +**Create Request (with inline attacks):** + +```python +{ + "name": "Corporate Password Audit", + "hash_list_id": 123, + "project_id": 1, + "attacks": [ # Optional inline attacks + {"name": "Dictionary Attack", "attack_mode": 0, "wordlist_id": 456} + ], +} +``` + +**Validation Response:** + +```python +{ + "valid": false, + "errors": [ + { + "type": "missing_resource", + "detail": "Wordlist 456 not found", + "resource_id": 456, + } + ], + "warnings": [ + {"type": "no_agents", "detail": "No active agents available for project 1"} + ], +} +``` + +## Acceptance Criteria + +- [ ] Users can create campaigns referencing hash lists +- [ ] Campaign creation supports inline attack definitions (optional) +- [ ] Campaign creation supports separate attack creation workflow +- [ ] Users can view campaign details (including nested attacks) +- [ ] Users can update campaign metadata (name, description) +- [ ] Users can delete campaigns in draft state +- [ ] Delete validation prevents deletion of running campaigns +- [ ] Pre-flight validation checks hash list, resources, agents availability +- [ ] Validation returns actionable error messages +- [ ] All operations respect project scoping (query parameter) +- [ ] All errors follow RFC9457 format + +## Testing Strategy + +**Backend Tests (Tier 1):** + +- Test campaign creation with and without inline attacks +- Test campaign detail retrieval +- Test campaign updates +- Test delete validation (prevent deletion of running campaigns) +- Test pre-flight validation (various error scenarios) +- Test project scoping + +**Test Command:** `just test-backend` + +## Dependencies + +- `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T1` (RFC9457 Middleware) for error handling +- `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T4` (Hash List CRUD) for hash list references + +## Related Tickets + +- Required by `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T8` (Campaign Lifecycle Actions) +- Required by `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T11` (Campaign Status & Metrics) diff --git a/docs/epics/control-api/tickets/Campaign_Lifecycle_Actions.md b/docs/epics/control-api/tickets/Campaign_Lifecycle_Actions.md new file mode 100644 index 00000000..40d27dd8 --- /dev/null +++ b/docs/epics/control-api/tickets/Campaign_Lifecycle_Actions.md @@ -0,0 +1,118 @@ +# Campaign Lifecycle Actions + +## Overview + +Implement Control API endpoints for campaign lifecycle operations: start, stop, pause, resume, archive, and unarchive. These operations control campaign execution and are validated by state machines. + +## Context + +Campaign lifecycle management is core to the Control API workflow. Users need to start campaigns, pause them for resource management, stop them when complete, and archive them for cleanup. All state transitions must be validated to ensure data integrity. + +**Spec References:** + +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/d3caa175-100a-4242-b8b4-0c8139a48034` (Core Flows - Flow 1: Campaign Lifecycle, Steps 6-8) +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/874b33d9-e442-4af3-98d3-e08cb71a007c` (Tech Plan - State Machines) + +## Scope + +**In Scope:** + +- Campaign start endpoint with state validation +- Campaign stop endpoint (restartable, returns to draft) +- Campaign pause endpoint +- Campaign resume endpoint +- Campaign archive/unarchive endpoints +- Integration with state machine from T2 +- Integration with existing `file:app/core/services/campaign_service.py` + +**Out of Scope:** + +- Campaign CRUD (handled in T7) +- Campaign monitoring (handled in T11) +- Batch operations (handled in T14) + +## Implementation Guidance + +```mermaid +stateDiagram-v2 + [*] --> draft: create + draft --> active: start + active --> paused: pause + paused --> active: resume + active --> draft: stop (restartable) + active --> completed: system completes + draft --> archived: archive + active --> archived: archive + paused --> archived: archive + archived --> draft: unarchive +``` + +**Endpoints:** + +- `POST /api/v1/control/campaigns/{id}/start` - Start campaign +- `POST /api/v1/control/campaigns/{id}/stop` - Stop campaign (restartable) +- `POST /api/v1/control/campaigns/{id}/pause` - Pause campaign +- `POST /api/v1/control/campaigns/{id}/resume` - Resume campaign +- `POST /api/v1/control/campaigns/{id}/archive` - Archive campaign +- `POST /api/v1/control/campaigns/{id}/unarchive` - Unarchive campaign + +**Key Files:** + +- `file:app/api/v1/endpoints/control/campaigns.py` - Add lifecycle endpoints +- `file:app/core/services/campaign_service.py` - Existing lifecycle services +- `file:app/core/state_machines.py` - State machine validation (from T2) +- `file:app/models/campaign.py` - Campaign model + +**Lifecycle Action Pattern:** + +```python +@router.post("/{campaign_id}/start") +async def start_campaign( + campaign_id: int, + db: AsyncSession = Depends(get_db), + current_user: User = Depends(get_current_control_user), +): + campaign = await get_campaign_service(db, campaign_id) + + # Validate state transition + CampaignStateMachine.validate_transition(campaign.state, CampaignState.ACTIVE) + + # Perform lifecycle action + updated_campaign = await start_campaign_service(db, campaign_id) + return updated_campaign +``` + +## Acceptance Criteria + +- [ ] Users can start campaigns (draft → active) +- [ ] Users can stop campaigns (active → draft, restartable) +- [ ] Users can pause campaigns (active → paused) +- [ ] Users can resume campaigns (paused → active) +- [ ] Users can archive campaigns (any state → archived) +- [ ] Users can unarchive campaigns (archived → draft) +- [ ] All state transitions are validated by `CampaignStateMachine` +- [ ] Invalid transitions return RFC9457 errors with clear messages +- [ ] Lifecycle actions respect project scoping +- [ ] Start action validates resources and agents availability +- [ ] Stop action performs graceful shutdown (completes running tasks) + +## Testing Strategy + +**Backend Tests (Tier 1):** + +- Test all lifecycle actions with valid state transitions +- Test invalid state transitions (expect errors) +- Test start validation (missing resources, no agents) +- Test graceful stop (task completion) +- Test project scoping + +**Test Command:** `just test-backend` + +## Dependencies + +- `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T2` (State Machine Classes) - Required for validation +- `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T7` (Campaign CRUD) - Required for campaign data + +## Related Tickets + +- Required by `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T14` (Results & Batch Operations) diff --git a/docs/epics/control-api/tickets/Campaign_Status_&_Metrics.md b/docs/epics/control-api/tickets/Campaign_Status_&_Metrics.md new file mode 100644 index 00000000..a9861434 --- /dev/null +++ b/docs/epics/control-api/tickets/Campaign_Status_&_Metrics.md @@ -0,0 +1,118 @@ +# Campaign Status & Metrics + +## Overview + +Implement Control API endpoints for campaign status monitoring and metrics. Supports both individual campaign drill-down and bulk dashboard views optimized for TUI rendering. + +## Context + +Users need to monitor campaign progress through efficient polling. Individual status provides detailed drill-down, while bulk status enables dashboard views. Caching reduces database load for high-frequency polling. + +**Spec References:** + +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/d3caa175-100a-4242-b8b4-0c8139a48034` (Core Flows - Flow 3: Real-Time Monitoring, Steps 1-3) +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/874b33d9-e442-4af3-98d3-e08cb71a007c` (Tech Plan - Monitoring & Dashboard) + +## Scope + +**In Scope:** + +- Individual campaign status endpoint (detailed progress) +- Bulk campaign status endpoint (dashboard view) +- Campaign metrics endpoint (hash rate, crack rate, trends) +- Caching implementation (5-10s TTL, project-scoped) +- Rich-but-shallow response format (campaign + 1-level attack rollup) +- Integration with existing `file:app/core/services/campaign_service.py` + +**Out of Scope:** + +- Real-time streaming (using polling only) +- Historical analytics (current state only) +- Agent-level monitoring (separate ticket) + +## Implementation Guidance + +**Endpoints:** + +- `GET /api/v1/control/campaigns/{id}/status` - Individual campaign status +- `GET /api/v1/control/campaigns/status` - Bulk campaign status (dashboard) +- `GET /api/v1/control/campaigns/{id}/metrics` - Campaign metrics + +**Key Files:** + +- `file:app/api/v1/endpoints/control/campaigns.py` - Add monitoring endpoints +- `file:app/core/services/campaign_service.py` - Existing service layer +- `file:app/core/services/dashboard_service.py` - Dashboard aggregation +- Use `cashews` for caching (already imported in system.py) + +**Bulk Status Response (Dashboard):** + +```python +{ + "items": [ + { + "id": 123, + "name": "Corporate Audit", + "state": "running", + "progress_percent": 45.2, + "eta_seconds": 3600, + "crack_count": 1234, + "active_tasks": 5, + "current_attack": { + "id": 456, + "name": "Dictionary Attack", + "state": "running", + "progress_percent": 67.8, + }, + "next_attack": {"id": 457, "name": "Mask Attack", "state": "pending"}, + } + ], + "total": 42, + "limit": 20, + "offset": 0, +} +``` + +**Caching Pattern:** + +```python +from cashews import cache + +@cache(ttl="10s", key="campaign_status:{project_id}:{filters}") +async def get_bulk_campaign_status(...): + # Expensive query + return results +``` + +## Acceptance Criteria + +- [ ] Users can poll individual campaign status (progress, ETA, crack count) +- [ ] Individual status includes detailed attack breakdown +- [ ] Users can poll bulk campaign status with filtering and pagination +- [ ] Bulk status includes campaign rollup + current/next attack summary +- [ ] Bulk status is cached with 5-10s TTL, shared by project +- [ ] Cache key includes project_id and normalized filters +- [ ] Responses are optimized for TUI rendering (rich but not deeply nested) +- [ ] Campaign metrics include hash rate, crack rate, success percentage +- [ ] All operations respect project scoping +- [ ] All errors follow RFC9457 format + +## Testing Strategy + +**Backend Tests (Tier 1):** + +- Test individual campaign status retrieval +- Test bulk campaign status with filtering +- Test caching behavior (verify cache hits/misses) +- Test metrics calculation +- Test project scoping + +**Test Command:** `just test-backend` + +## Dependencies + +- `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T7` (Campaign CRUD) for campaign data + +## Related Tickets + +- Complements `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T12` (Agent & Task Monitoring) diff --git a/docs/epics/control-api/tickets/Hash_List_CRUD.md b/docs/epics/control-api/tickets/Hash_List_CRUD.md new file mode 100644 index 00000000..54b2be15 --- /dev/null +++ b/docs/epics/control-api/tickets/Hash_List_CRUD.md @@ -0,0 +1,105 @@ +# Hash List CRUD + +## Overview + +Implement Control API endpoints for hash list management (create, read, update, delete). Hash lists are the foundation of campaigns, containing the hashes to be cracked. + +## Context + +Hash lists are central to the Control API workflow. Users need to create hash lists before creating campaigns. The Control API must provide full CRUD operations with validation and project scoping. + +**Spec References:** + +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/d3caa175-100a-4242-b8b4-0c8139a48034` (Core Flows - Flow 1: Campaign Lifecycle, Step 1) +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/874b33d9-e442-4af3-98d3-e08cb71a007c` (Tech Plan - Data Model) + +## Scope + +**In Scope:** + +- Hash list create endpoint with validation +- Hash list get endpoint (by ID) +- Hash list list endpoint with filtering and pagination +- Hash list update endpoint (metadata only) +- Hash list delete endpoint with validation +- Project scoping and access control +- Integration with existing `file:app/core/services/hash_list_service.py` + +**Out of Scope:** + +- Hash cracking logic (Agent API responsibility) +- Campaign association (handled in campaign tickets) +- Hash type detection (use existing hash guess service) + +## Implementation Guidance + +**Endpoints:** + +- `POST /api/v1/control/hash-lists` - Create hash list +- `GET /api/v1/control/hash-lists` - List hash lists +- `GET /api/v1/control/hash-lists/{id}` - Get hash list details +- `PATCH /api/v1/control/hash-lists/{id}` - Update hash list metadata +- `DELETE /api/v1/control/hash-lists/{id}` - Delete hash list + +**Key Files:** + +- Create `file:app/api/v1/endpoints/control/hash_lists.py` - New router +- `file:app/core/services/hash_list_service.py` - Existing service layer +- `file:app/models/hash_list.py` - Hash list model +- `file:app/schemas/hash_list.py` - Request/response schemas + +**Response Format:** + +```python +# List response includes rich metadata +{ + "items": [ + { + "id": 123, + "name": "Corporate Hashes", + "hash_count": 5000, + "detected_types": ["NTLM", "bcrypt"], + "validation_status": "valid", + "project_id": 1, + "created_at": "2024-01-15T10:30:00Z", + } + ], + "total": 42, + "limit": 20, + "offset": 0, +} +``` + +## Acceptance Criteria + +- [ ] Users can create hash lists with hash data and metadata +- [ ] Hash validation detects invalid formats and returns clear errors +- [ ] Users can list hash lists with filtering (project, search, hash type) +- [ ] List endpoint uses offset-based pagination +- [ ] Users can view hash list details (count, types, validation status) +- [ ] Users can update hash list metadata (name, description) +- [ ] Users can delete unused hash lists +- [ ] Delete validation prevents deletion if hash list is used by campaigns +- [ ] All operations respect project scoping (query parameter) +- [ ] All errors follow RFC9457 format + +## Testing Strategy + +**Backend Tests (Tier 1):** + +- Test hash list creation with valid and invalid data +- Test hash validation (various hash formats) +- Test list endpoint with filtering and pagination +- Test update and delete operations +- Test project scoping (users can only access their projects) +- Test delete validation (prevent deletion of in-use hash lists) + +**Test Command:** `just test-backend` + +## Dependencies + +None - can work in parallel with other resource layer tickets. + +## Related Tickets + +- Required by `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T7` (Campaign CRUD & Validation) diff --git a/docs/epics/control-api/tickets/Presigned_Upload_Workflow.md b/docs/epics/control-api/tickets/Presigned_Upload_Workflow.md new file mode 100644 index 00000000..40c376b2 --- /dev/null +++ b/docs/epics/control-api/tickets/Presigned_Upload_Workflow.md @@ -0,0 +1,111 @@ +# Presigned Upload Workflow + +## Overview + +Implement the 2-step presigned upload workflow for resource files: initiate (create pending resource + presigned URL) and confirm (verify and finalize resource). This enables efficient direct-to-storage uploads without proxying through the API server. + +## Context + +Large resource files (wordlists, rules) should be uploaded directly to MinIO storage for performance. The Control API provides presigned URLs for direct upload, then confirms the upload to finalize the resource record. + +**Spec References:** + +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/d3caa175-100a-4242-b8b4-0c8139a48034` (Core Flows - Flow 2: Resource Management, Steps 1-3) +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/874b33d9-e442-4af3-98d3-e08cb71a007c` (Tech Plan - Resource Uploads) + +## Scope + +**In Scope:** + +- Upload initiate endpoint (create pending resource + presigned URL) +- Upload confirm endpoint (verify and finalize resource) +- Integration with existing `file:app/core/services/resource_service.py` +- Integration with `file:app/core/services/storage_service.py` +- Pending resource visibility in list operations + +**Out of Scope:** + +- Direct file upload to API (using presigned URLs only) +- Resource content editing +- Multi-part upload support + +## Implementation Guidance + +```mermaid +sequenceDiagram + participant Client + participant API as Control API + participant DB as Database + participant Storage as MinIO Storage + + Client->>API: POST /resources/initiate-upload + API->>DB: Create pending resource (is_uploaded=false) + API->>Storage: Generate presigned PUT URL + Storage-->>API: Presigned URL (expires in 1 hour) + API-->>Client: {resource_id, upload_url} + + Client->>Storage: PUT to presigned URL (file content) + Storage-->>Client: 200 OK + + Client->>API: POST /resources/{id}/confirm-upload + API->>Storage: Verify object exists + Storage-->>API: Object metadata + API->>Storage: Get file stats (size, line count) + API->>DB: Update resource (is_uploaded=true, stats) + API-->>Client: {resource details} +``` + +**Endpoints:** + +- `POST /api/v1/control/resources/initiate-upload` - Initiate upload +- `POST /api/v1/control/resources/{id}/confirm-upload` - Confirm upload + +**Key Files:** + +- `file:app/api/v1/endpoints/control/resources.py` - Add endpoints +- `file:app/core/services/resource_service.py` - Existing presign logic +- `file:app/core/services/storage_service.py` - Presigned URL generation +- `file:app/models/attack_resource_file.py` - Resource model + +**Initiate Response:** + +```python +{ + "resource_id": 789, + "upload_url": "https://minio.example.com/bucket/file?signature=...", + "expires_at": "2024-01-15T11:30:00Z", +} +``` + +## Acceptance Criteria + +- [ ] Users can initiate upload and receive presigned URL + pending resource ID +- [ ] Pending resource is created immediately with `is_uploaded=false` +- [ ] Presigned URL expires after 1 hour +- [ ] Users can upload directly to MinIO using presigned URL +- [ ] Users can confirm upload to finalize resource +- [ ] Confirm endpoint verifies file exists in storage +- [ ] File stats (size, line count, checksum) are computed on confirm +- [ ] Confirmed resource has `is_uploaded=true` +- [ ] Pending resources are visible in list operations (marked as pending) +- [ ] All errors follow RFC9457 format + +## Testing Strategy + +**Backend Tests (Tier 1):** + +- Test initiate endpoint (mock MinIO presigned URL generation) +- Test confirm endpoint (mock MinIO object verification) +- Test file stats computation +- Test error cases (file not found, invalid resource ID) +- Test pending resource visibility + +**Test Command:** `just test-backend` + +## Dependencies + +- Soft dependency on `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T3` (Resource Cleanup Job) for cleanup integration + +## Related Tickets + +- Complements `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T5` (Resource File CRUD) diff --git a/docs/epics/control-api/tickets/RFC9457_Middleware_Extension.md b/docs/epics/control-api/tickets/RFC9457_Middleware_Extension.md new file mode 100644 index 00000000..a1d9ad64 --- /dev/null +++ b/docs/epics/control-api/tickets/RFC9457_Middleware_Extension.md @@ -0,0 +1,113 @@ +# RFC9457 Middleware Extension + +## Overview + +Extend the existing RFC9457 middleware to normalize all `HTTPException` instances into RFC9457 Problem responses for Control API endpoints. This ensures consistent, machine-readable error responses across the entire Control API surface. + +## Context + +The Control API currently uses `file:app/core/control_rfc9457_middleware.py` to handle custom Problem exceptions. However, many service layer functions raise standard `fastapi.HTTPException`, which bypasses RFC9457 formatting. This creates inconsistent error responses that break the Control API contract. + +**Spec References:** + +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/874b33d9-e442-4af3-98d3-e08cb71a007c` (Tech Plan - RFC9457 Error Handling) +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/d3caa175-100a-4242-b8b4-0c8139a48034` (Core Flows - Flow 6: Error Recovery) + +## Scope + +**In Scope:** + +- Extend `ControlRFC9457Middleware` to intercept `fastapi.HTTPException` on `/api/v1/control/*` paths +- Convert HTTPException to RFC9457 Problem responses with appropriate problem types +- Support problem extensions for actionable error details (e.g., missing resources list) +- Preserve existing behavior for custom Problem exceptions +- Add comprehensive tests for middleware behavior + +**Out of Scope:** + +- Service layer refactoring (keeping existing exception patterns) +- Changes to Web UI or Agent API error handling +- Custom exception classes (use existing ones from `file:app/core/control_exceptions.py`) + +## Implementation Guidance + +```mermaid +sequenceDiagram + participant Client + participant Middleware as ControlRFC9457Middleware + participant Endpoint as Control Endpoint + participant Service as Service Layer + + Client->>Middleware: Request to /api/v1/control/* + Middleware->>Endpoint: Forward request + Endpoint->>Service: Call service function + + alt Service raises HTTPException + Service-->>Endpoint: HTTPException(status=404) + Endpoint-->>Middleware: HTTPException bubbles up + Middleware->>Middleware: Convert to RFC9457 Problem + Middleware-->>Client: Problem JSON (type, title, status, detail, instance) + else Service raises custom Problem + Service-->>Endpoint: UserNotFoundError + Endpoint-->>Middleware: Problem exception + Middleware->>Middleware: Already RFC9457 format + Middleware-->>Client: Problem JSON (existing behavior) + end +``` + +**Key Files:** + +- `file:app/core/control_rfc9457_middleware.py` - Extend this middleware +- `file:app/core/control_exceptions.py` - Reference existing Problem types +- `file:app/core/services/campaign_service.py` - Example service raising HTTPException + +**Middleware Extension Pattern:** + +```python +# In ControlRFC9457Middleware.__call__ +try: + response = await call_next(request) +except HTTPException as exc: + # Convert to RFC9457 Problem + problem = { + "type": "about:blank", # or map to specific problem type + "title": "HTTP Error", + "status": exc.status_code, + "detail": exc.detail, + "instance": str(request.url.path), + } + return JSONResponse( + status_code=exc.status_code, + content=problem, + headers={"Content-Type": "application/problem+json"}, + ) +``` + +## Acceptance Criteria + +- [ ] All `HTTPException` raised on `/api/v1/control/*` paths return RFC9457 format +- [ ] Problem responses include required fields: `type`, `title`, `status`, `detail`, `instance` +- [ ] Problem extensions are supported (e.g., `missing_resources` array) +- [ ] Existing custom Problem exceptions continue to work unchanged +- [ ] Middleware does not affect `/api/v1/web/*` or `/api/v1/client/*` paths +- [ ] Tests cover HTTPException conversion for common status codes (400, 401, 403, 404, 500) +- [ ] Tests verify existing Problem exception behavior is preserved + +## Testing Strategy + +**Backend Tests (Tier 1):** + +- Unit tests for middleware conversion logic +- Integration tests with actual Control API endpoints +- Verify RFC9457 format compliance for all error responses +- Test problem extensions (e.g., validation errors with field details) + +**Test Command:** `just test-backend` + +## Dependencies + +None - this is foundation work that can start immediately. + +## Related Tickets + +- Enables all other Control API tickets by ensuring consistent error handling diff --git a/docs/epics/control-api/tickets/Resource_Cleanup_Job.md b/docs/epics/control-api/tickets/Resource_Cleanup_Job.md new file mode 100644 index 00000000..a0449285 --- /dev/null +++ b/docs/epics/control-api/tickets/Resource_Cleanup_Job.md @@ -0,0 +1,127 @@ +# Resource Cleanup Job + +## Overview + +Implement a deterministic periodic sweep job to clean up stale pending resources that were never confirmed after upload initiation. This ensures storage hygiene and prevents abandoned uploads from accumulating. + +## Context + +The presigned upload workflow creates pending resource records immediately. If users never confirm the upload (network failure, client crash, etc.), these pending records remain indefinitely. A periodic cleanup job is needed to remove stale pending resources. + +**Spec References:** + +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/874b33d9-e442-4af3-98d3-e08cb71a007c` (Tech Plan - Resource Cleanup) +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/d3caa175-100a-4242-b8b4-0c8139a48034` (Core Flows - Flow 2: Resource Management) + +## Scope + +**In Scope:** + +- Create periodic sweep job for stale pending resources (24hr+ old) +- Implement idempotent cleanup logic with row-level locking +- Add manual cancel endpoint for immediate cleanup +- Use fresh DB sessions (not request-scoped) +- Add monitoring/logging for cleanup operations +- Integration with existing `file:app/core/tasks/resource_tasks.py` + +**Out of Scope:** + +- Real-time cleanup (periodic sweep is sufficient) +- Cleanup of confirmed resources (only pending) +- Storage quota enforcement + +## Implementation Guidance + +```mermaid +sequenceDiagram + participant Job as Cleanup Job + participant DB as Database + participant Storage as MinIO Storage + + loop Every 1 hour + Job->>DB: Find pending resources older than 24h + DB-->>Job: List of stale resources + + loop For each stale resource + Job->>DB: Lock resource row (FOR UPDATE) + Job->>Storage: Check if object exists + + alt Object exists + Storage-->>Job: Object found + Job->>Storage: Delete object + Job->>DB: Delete resource record + else Object not found + Storage-->>Job: Object not found + Job->>DB: Delete resource record + end + + Job->>DB: Commit transaction + end + + Job->>Job: Log cleanup summary + end +``` + +**Key Files:** + +- `file:app/core/tasks/resource_tasks.py` - Extend with cleanup job +- `file:app/core/services/resource_service.py` - Add cancel endpoint logic +- `file:app/db/session.py` - Use for fresh DB sessions +- `file:app/models/attack_resource_file.py` - Resource model + +**Cleanup Job Pattern:** + +```python +async def cleanup_stale_pending_resources(): + """Periodic job to clean up pending resources older than 24 hours.""" + async with get_async_session() as db: # Fresh session + cutoff = datetime.now(UTC) - timedelta(hours=24) + + # Find stale pending resources with row lock + stmt = ( + select(AttackResourceFile) + .where( + AttackResourceFile.is_uploaded == False, + AttackResourceFile.created_at < cutoff + ) + .with_for_update() + ) + result = await db.execute(stmt) + stale_resources = result.scalars().all() + + for resource in stale_resources: + # Delete from storage if exists + # Delete from database + # Log cleanup +``` + +## Acceptance Criteria + +- [ ] Periodic job runs every hour to clean up stale pending resources +- [ ] Job finds resources with `is_uploaded=False` older than 24 hours +- [ ] Cleanup is idempotent (safe to run multiple times) +- [ ] Row-level locking prevents concurrent cleanup conflicts +- [ ] Job uses fresh DB sessions via `file:app/db/session.py` +- [ ] Manual cancel endpoint allows immediate cleanup of specific resources +- [ ] Cleanup operations are logged with structured data (resource ID, age, outcome) +- [ ] Job handles errors gracefully (continues on individual failures) + +## Testing Strategy + +**Backend Tests (Tier 1):** + +- Unit tests for cleanup logic +- Test idempotency (running cleanup multiple times) +- Test concurrency safety (multiple workers) +- Test manual cancel endpoint +- Mock MinIO storage interactions + +**Test Command:** `just test-backend` + +## Dependencies + +None - this is foundation work that can start immediately. + +## Related Tickets + +- Soft dependency for `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T6` (Presigned Upload Workflow) diff --git a/docs/epics/control-api/tickets/Resource_File_CRUD.md b/docs/epics/control-api/tickets/Resource_File_CRUD.md new file mode 100644 index 00000000..1c28cc33 --- /dev/null +++ b/docs/epics/control-api/tickets/Resource_File_CRUD.md @@ -0,0 +1,106 @@ +# Resource File CRUD + +## Overview + +Implement Control API endpoints for resource file management (list, get, update, delete). Resources include wordlists, rules, and masks used in attacks. + +## Context + +Resource files are essential for attack configuration. Users need to discover available resources, view details, update metadata, and delete unused resources. This ticket focuses on CRUD operations; upload workflow is handled separately. + +**Spec References:** + +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/d3caa175-100a-4242-b8b4-0c8139a48034` (Core Flows - Flow 2: Resource Management, Steps 4-7) +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/874b33d9-e442-4af3-98d3-e08cb71a007c` (Tech Plan - Data Model) + +## Scope + +**In Scope:** + +- Resource list endpoint with filtering (type, project, search) +- Resource get endpoint with metadata and usage stats +- Resource update endpoint (metadata only) +- Resource delete endpoint with validation +- Resource content preview endpoint (first N lines) +- Integration with existing `file:app/core/services/resource_service.py` + +**Out of Scope:** + +- Resource upload (handled in presigned upload ticket) +- Resource content editing (metadata only) +- Resource versioning + +## Implementation Guidance + +**Endpoints:** + +- `GET /api/v1/control/resources` - List resources +- `GET /api/v1/control/resources/{id}` - Get resource details +- `GET /api/v1/control/resources/{id}/preview` - Preview content +- `PATCH /api/v1/control/resources/{id}` - Update metadata +- `DELETE /api/v1/control/resources/{id}` - Delete resource + +**Key Files:** + +- Create `file:app/api/v1/endpoints/control/resources.py` - New router +- `file:app/core/services/resource_service.py` - Existing service layer +- `file:app/core/services/storage_service.py` - For content preview +- `file:app/models/attack_resource_file.py` - Resource model + +**Response Format:** + +```python +# List response includes rich metadata +{ + "items": [ + { + "id": 456, + "name": "rockyou.txt", + "type": "wordlist", + "file_size_bytes": 139921507, + "line_count": 14344391, + "project_id": 1, + "is_uploaded": true, + "usage_count": 5, # Number of attacks using this resource + "created_at": "2024-01-10T08:00:00Z", + } + ], + "total": 128, + "limit": 20, + "offset": 0, +} +``` + +## Acceptance Criteria + +- [ ] Users can list resources with filtering (type, project, search) +- [ ] List endpoint uses offset-based pagination +- [ ] Users can view resource details including usage statistics +- [ ] Users can preview resource content (first 100 lines) +- [ ] Users can update resource metadata (name, description, tags) +- [ ] Users can delete unused resources +- [ ] Delete validation prevents deletion if resource is used by attacks +- [ ] All operations respect project scoping (query parameter) +- [ ] Pending resources are visible but marked as `is_uploaded=false` +- [ ] All errors follow RFC9457 format + +## Testing Strategy + +**Backend Tests (Tier 1):** + +- Test resource listing with filtering and pagination +- Test resource detail retrieval +- Test content preview (mock MinIO storage) +- Test metadata updates +- Test delete validation (prevent deletion of in-use resources) +- Test project scoping + +**Test Command:** `just test-backend` + +## Dependencies + +None - can work in parallel with other resource layer tickets. + +## Related Tickets + +- Required by `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T9` (Attack CRUD & Validation) diff --git a/docs/epics/control-api/tickets/Results_&_Batch_Operations.md b/docs/epics/control-api/tickets/Results_&_Batch_Operations.md new file mode 100644 index 00000000..8bac99cc --- /dev/null +++ b/docs/epics/control-api/tickets/Results_&_Batch_Operations.md @@ -0,0 +1,133 @@ +# Results & Batch Operations + +## Overview + +Implement Control API endpoints for campaign results retrieval/export and batch operations. Results are available in multiple formats (JSON, CSV, hashcat potfile). Batch operations enable efficient multi-campaign control. + +## Context + +Users need to retrieve cracked hashes after campaign completion and perform bulk operations on multiple campaigns. Results are canonical at the hash list level but campaign-centric views provide convenience. Batch operations are single-project scoped for safety. + +**Spec References:** + +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/d3caa175-100a-4242-b8b4-0c8139a48034` (Core Flows - Flow 5: Batch Operations, Flow 1 Exit) +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/874b33d9-e442-4af3-98d3-e08cb71a007c` (Tech Plan - Results, Batch Operations) + +## Scope + +**In Scope:** + +- Campaign results retrieval endpoint (summary) +- Results export endpoint (JSON, CSV, hashcat potfile) +- Batch campaign operations (start, stop, status) - single-project scoped +- Batch operation modes (atomic vs best-effort) +- Per-item success/failure reporting + +**Out of Scope:** + +- Cross-project batch operations +- Real-time result streaming +- Result filtering/search (use hash list endpoints) + +## Implementation Guidance + +**Endpoints:** + +- `GET /api/v1/control/campaigns/{id}/results` - Get campaign results summary +- `GET /api/v1/control/campaigns/{id}/results/export` - Export results (format query param) +- `POST /api/v1/control/projects/{project_id}/campaigns/batch-start` - Batch start +- `POST /api/v1/control/projects/{project_id}/campaigns/batch-stop` - Batch stop +- `GET /api/v1/control/projects/{project_id}/campaigns/batch-status` - Batch status + +**Key Files:** + +- `file:app/api/v1/endpoints/control/campaigns.py` - Add results endpoints +- Create `file:app/api/v1/endpoints/control/batch.py` - New router for batch ops +- `file:app/core/services/campaign_service.py` - Existing service layer +- `file:app/models/crack_result.py` - Crack result model + +**Results Export Formats:** + +```python +# JSON +{ + "campaign_id": 123, + "total_hashes": 5000, + "cracked_hashes": 1234, + "crack_rate": 24.68, + "results": [ + {"hash": "5f4dcc3b5aa765d61d8327deb882cf99", "plaintext": "password"} + ] +} + +# CSV +hash,plaintext +5f4dcc3b5aa765d61d8327deb882cf99,password + +# Hashcat potfile +5f4dcc3b5aa765d61d8327deb882cf99:password +``` + +**Batch Operation Request:** + +```python +{ + "campaign_ids": [123, 124, 125], + "mode": "best_effort", # or "atomic" +} +``` + +**Batch Operation Response:** + +```python +{ + "results": [ + {"id": 123, "success": true}, + { + "id": 124, + "success": false, + "error": { + "type": "invalid_state", + "detail": "Campaign 124 is already running", + }, + }, + {"id": 125, "success": true}, + ], + "summary": {"total": 3, "succeeded": 2, "failed": 1}, +} +``` + +## Acceptance Criteria + +- [ ] Users can retrieve campaign results summary (total, cracked, crack rate) +- [ ] Results are canonical at hash list level (via hash_list_id reference) +- [ ] Users can export results in JSON, CSV, and hashcat potfile formats +- [ ] Export format is specified via query parameter +- [ ] Users can perform batch start on multiple campaigns (single project) +- [ ] Users can perform batch stop on multiple campaigns (single project) +- [ ] Users can perform batch status check on multiple campaigns +- [ ] Batch operations support atomic and best-effort modes (default: best-effort) +- [ ] Batch responses include per-item success/failure details +- [ ] Batch endpoints are path-scoped by project (`/projects/{id}/campaigns/batch-*`) +- [ ] All errors follow RFC9457 format + +## Testing Strategy + +**Backend Tests (Tier 1):** + +- Test results retrieval and export (all formats) +- Test batch operations (start, stop, status) +- Test atomic vs best-effort modes +- Test per-item error reporting +- Test project scoping for batch operations + +**Test Command:** `just test-backend` + +## Dependencies + +- `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T8` (Campaign Lifecycle) for lifecycle operations +- `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T10` (Attack Lifecycle) for attack operations + +## Related Tickets + +- Complements `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T13` (Template Import/Export) diff --git a/docs/epics/control-api/tickets/State_Machine_Classes.md b/docs/epics/control-api/tickets/State_Machine_Classes.md new file mode 100644 index 00000000..bf284fba --- /dev/null +++ b/docs/epics/control-api/tickets/State_Machine_Classes.md @@ -0,0 +1,124 @@ +# State Machine Classes + +## Overview + +Implement dedicated state machine classes for Campaign and Attack lifecycle management. These classes enforce valid state transitions and provide clear error messages for invalid operations, ensuring data integrity and predictable behavior. + +## Context + +Campaigns and attacks have complex lifecycles with specific valid transitions. Currently, state transitions are validated ad-hoc in service functions, leading to inconsistent behavior and unclear error messages. Dedicated state machine classes centralize this logic and make it reusable. + +**Spec References:** + +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/874b33d9-e442-4af3-98d3-e08cb71a007c` (Tech Plan - State Machines) +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/d3caa175-100a-4242-b8b4-0c8139a48034` (Core Flows - Flow 1: Campaign Lifecycle) + +## Scope + +**In Scope:** + +- Create `CampaignStateMachine` class with transition validation +- Create `AttackStateMachine` class with transition validation +- Define valid state transitions for both entities +- Add state transition validation utilities +- Comprehensive tests for all state transitions + +**Out of Scope:** + +- Database schema changes (use existing `CampaignState` enum) +- Service layer integration (handled in lifecycle tickets) +- UI state representation + +## Implementation Guidance + +```mermaid +stateDiagram-v2 + [*] --> draft + draft --> active: start + active --> paused: pause + paused --> active: resume + active --> completed: system_completes + active --> draft: stop + draft --> archived: archive + active --> archived: archive + paused --> archived: archive + archived --> draft: unarchive + completed --> [*] +``` + +**Campaign State Machine:** + +- **States:** draft, active, paused, completed, archived +- **User Actions:** start, stop, pause, resume, archive, unarchive +- **System Transitions:** active → running (when tasks execute), running → completed (when all attacks finish) + +**Attack State Machine:** + +- **States:** pending, running, paused, completed, failed +- **User Actions:** start, stop, pause, resume +- **System Transitions:** pending → running (when assigned to agent), running → completed (when finished) + +**Key Files:** + +- Create `file:app/core/state_machines.py` - New file for state machine classes +- Reference `file:app/models/campaign.py` - Existing CampaignState enum +- Reference `file:app/models/attack.py` - Existing Attack model + +**State Machine Pattern:** + +```python +class CampaignStateMachine: + TRANSITIONS = { + CampaignState.DRAFT: [CampaignState.ACTIVE, CampaignState.ARCHIVED], + CampaignState.ACTIVE: [ + CampaignState.PAUSED, + CampaignState.DRAFT, + CampaignState.ARCHIVED, + ], + CampaignState.PAUSED: [CampaignState.ACTIVE, CampaignState.ARCHIVED], + # ... + } + + @classmethod + def can_transition(cls, from_state: CampaignState, to_state: CampaignState) -> bool: + return to_state in cls.TRANSITIONS.get(from_state, []) + + @classmethod + def validate_transition( + cls, from_state: CampaignState, to_state: CampaignState + ) -> None: + if not cls.can_transition(from_state, to_state): + raise InvalidStateTransitionError( + f"Cannot transition from {from_state} to {to_state}" + ) +``` + +## Acceptance Criteria + +- [ ] `CampaignStateMachine` enforces valid transitions (draft→active, active→paused, etc.) +- [ ] `AttackStateMachine` enforces valid transitions +- [ ] Invalid transitions raise `InvalidStateTransitionError` with clear messages +- [ ] State machines distinguish between user actions and system transitions +- [ ] State machines are reusable across service layer +- [ ] Comprehensive test coverage for all valid and invalid transitions +- [ ] Documentation includes state transition diagrams + +## Testing Strategy + +**Backend Tests (Tier 1):** + +- Unit tests for each state machine class +- Test all valid transitions +- Test all invalid transitions (expect errors) +- Test edge cases (e.g., transitioning from same state to same state) + +**Test Command:** `just test-backend` + +## Dependencies + +None - this is foundation work that can start immediately. + +## Related Tickets + +- Required by `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T8` (Campaign Lifecycle Actions) +- Required by `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T10` (Attack Lifecycle & Reordering) diff --git a/docs/epics/control-api/tickets/Template_Import_Export.md b/docs/epics/control-api/tickets/Template_Import_Export.md new file mode 100644 index 00000000..7875f67d --- /dev/null +++ b/docs/epics/control-api/tickets/Template_Import_Export.md @@ -0,0 +1,119 @@ +# Template Import/Export + +## Overview + +Implement Control API endpoints for campaign template export and import. Templates enable reusable campaign configurations across projects and environments. + +## Context + +Users need to export successful campaign configurations as templates and import them into other environments. Templates are JSON-formatted with schema versioning for evolution. Partial import is supported when resources are missing. + +**Spec References:** + +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/d3caa175-100a-4242-b8b4-0c8139a48034` (Core Flows - Flow 4: Template Reuse) +- `spec:84e8066f-28f2-4489-aeb6-0aeceb19dcde/874b33d9-e442-4af3-98d3-e08cb71a007c` (Tech Plan - Templates) + +## Scope + +**In Scope:** + +- Campaign template export endpoint (JSON with schema version) +- Template validation endpoint (`validate_only=true`) +- Template import endpoint with partial import support +- Template schema versioning +- Integration with existing `file:app/core/services/template_service.py` + +**Out of Scope:** + +- Template versioning and migration (single schema version for now) +- Template marketplace or sharing +- Attack-only templates (campaign-level only) + +## Implementation Guidance + +**Endpoints:** + +- `GET /api/v1/control/campaigns/{id}/export` - Export campaign as template +- `POST /api/v1/control/templates/validate` - Validate template +- `POST /api/v1/control/templates/import` - Import template + +**Key Files:** + +- Create `file:app/api/v1/endpoints/control/templates.py` - New router +- `file:app/core/services/template_service.py` - Existing service layer +- `file:app/schemas/shared.py` - CampaignTemplate schema + +**Template Format:** + +```python +{ + "schema_version": "1.0", + "campaign": { + "name": "Corporate Password Audit", + "description": "Standard audit template", + "attacks": [ + { + "name": "Dictionary Attack", + "attack_mode": 0, + "wordlist_name": "rockyou.txt", # Name, not ID + "rule_list_name": "best64.rule", + } + ], + }, +} +``` + +**Import Response (Partial Import):** + +```python +{ + "campaign_id": 789, + "imported": { + "campaign": true, + "attacks": [{"name": "Dictionary Attack", "imported": true, "attack_id": 101}], + }, + "skipped": { + "attacks": [ + { + "name": "Mask Attack", + "reason": "missing_resource", + "missing_resources": ["custom_masks.hcmask"], + } + ] + }, +} +``` + +## Acceptance Criteria + +- [ ] Users can export campaign as JSON template with schema version +- [ ] Export includes all attacks and resource references (by name, not ID) +- [ ] Users can validate template before import (`validate_only=true`) +- [ ] Validation identifies missing resources and incompatibilities +- [ ] Users can import template with partial import support +- [ ] Import creates campaign and skips attacks with missing resources +- [ ] Import response clearly reports imported vs skipped items with reasons +- [ ] Templates are portable across environments (use resource names, not IDs) +- [ ] All operations respect project scoping +- [ ] All errors follow RFC9457 format + +## Testing Strategy + +**Backend Tests (Tier 1):** + +- Test template export (various campaign configurations) +- Test template validation (valid and invalid templates) +- Test template import (full and partial import scenarios) +- Test missing resource handling +- Test schema version validation + +**Test Command:** `just test-backend` + +## Dependencies + +- `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T7` (Campaign CRUD) for campaign data +- `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T9` (Attack CRUD) for attack data + +## Related Tickets + +- Complements `ticket:84e8066f-28f2-4489-aeb6-0aeceb19dcde/T14` (Results & Batch Operations) diff --git a/frontend/.eslintrc.local.js b/frontend/.eslintrc.local.js deleted file mode 100644 index 85058507..00000000 --- a/frontend/.eslintrc.local.js +++ /dev/null @@ -1,10 +0,0 @@ -module.exports = { - rules: { - // Disable the no-unassigned-vars rule for Svelte component props - 'eslint/no-unassigned-vars': 'off', - - // Keep other linting rules active - '@typescript-eslint/no-explicit-any': 'error', - 'eslint-plugin-unicorn/prefer-string-replace-all': 'error', - }, -}; diff --git a/frontend/vitest-setup-client.ts b/frontend/vitest-setup-client.ts index fff2dbcf..2ad0f82f 100644 --- a/frontend/vitest-setup-client.ts +++ b/frontend/vitest-setup-client.ts @@ -60,6 +60,7 @@ Object.defineProperty(window, 'localStorage', { // TypeScript global augmentation for SvelteKit payload declare global { + // oxlint-disable-next-line no-var -- TypeScript requires `var` for global augmentation var __SVELTEKIT_PAYLOAD__: { data: Record; status: number; diff --git a/pyproject.toml b/pyproject.toml index ed5a3512..6f234ee1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ dependencies = [ "pydantic-settings>=2.12.0", "python-jose[cryptography]>=3.5.0", "passlib[bcrypt]>=1.7.4", - "python-multipart>=0.0.21", + "python-multipart>=0.0.22", "celery>=5.6.1", "redis>=7.1.0", "httpx[http2,brotli,zstd]>=0.28.1", diff --git a/tests/integration/control/test_control_error_handling.py b/tests/integration/control/test_control_error_handling.py index 0397b1c6..3f41caeb 100644 --- a/tests/integration/control/test_control_error_handling.py +++ b/tests/integration/control/test_control_error_handling.py @@ -3,14 +3,18 @@ from typing import Never import pytest -from fastapi import FastAPI +from fastapi import FastAPI, HTTPException from fastapi.testclient import TestClient +from app.api.v1.endpoints.agent.v1_http_exception_handler import ( + v1_http_exception_handler, +) from app.core.control_exceptions import ( CampaignNotFoundError, InsufficientPermissionsError, InternalServerError, InvalidAttackConfigError, + InvalidStateTransitionProblem, ProjectAccessDeniedError, ) from app.core.control_rfc9457_middleware import ControlRFC9457Middleware @@ -18,12 +22,15 @@ @pytest.fixture def test_app() -> FastAPI: - """Create a test FastAPI app with Control RFC9457 middleware.""" + """Create a test FastAPI app with Control RFC9457 middleware and exception handler.""" app = FastAPI() # Add Control API RFC9457 middleware app.add_middleware(ControlRFC9457Middleware) + # Register HTTPException handler for Control API paths + app.add_exception_handler(HTTPException, v1_http_exception_handler) + # Add test routes that raise custom exceptions (using Control API paths) @app.get("/api/v1/control/test/campaign-not-found", response_model=None) async def test_campaign_not_found() -> Never: @@ -45,6 +52,64 @@ async def test_project_access_denied() -> Never: async def test_internal_server_error() -> Never: raise InternalServerError(detail="An internal server error occurred") + @app.get("/api/v1/control/test/invalid-state-transition", response_model=None) + async def test_invalid_state_transition() -> Never: + raise InvalidStateTransitionProblem( + from_state="archived", + to_state="active", + action="start", + entity_type="campaign", + valid_transitions=["completed", "draft"], + ) + + # HTTPException test routes + @app.get("/api/v1/control/test/http-400", response_model=None) + async def test_http_400() -> Never: + raise HTTPException(status_code=400, detail="Invalid request") + + @app.get("/api/v1/control/test/http-401", response_model=None) + async def test_http_401() -> Never: + raise HTTPException(status_code=401, detail="Authentication required") + + @app.get("/api/v1/control/test/http-403", response_model=None) + async def test_http_403() -> Never: + raise HTTPException(status_code=403, detail="Access denied") + + @app.get("/api/v1/control/test/http-404", response_model=None) + async def test_http_404() -> Never: + raise HTTPException(status_code=404, detail="Resource not found") + + @app.get("/api/v1/control/test/http-409", response_model=None) + async def test_http_409() -> Never: + raise HTTPException(status_code=409, detail="Resource conflict") + + @app.get("/api/v1/control/test/http-422", response_model=None) + async def test_http_422() -> Never: + raise HTTPException(status_code=422, detail="Validation failed") + + @app.get("/api/v1/control/test/http-500", response_model=None) + async def test_http_500() -> Never: + raise HTTPException(status_code=500, detail="Server error") + + @app.get("/api/v1/control/test/http-dict-detail", response_model=None) + async def test_http_dict_detail() -> Never: + raise HTTPException( + status_code=422, detail={"field": "name", "error": "required"} + ) + + @app.get("/api/v1/control/test/http-unknown-status", response_model=None) + async def test_http_unknown_status() -> Never: + raise HTTPException(status_code=418, detail="I'm a teapot") + + # Non-Control API routes for path scoping tests + @app.get("/api/v1/web/test/http-error", response_model=None) + async def test_web_http_error() -> Never: + raise HTTPException(status_code=404, detail="Not found") + + @app.get("/api/v1/client/test/http-error", response_model=None) + async def test_client_http_error() -> Never: + raise HTTPException(status_code=404, detail="Not found") + return app @@ -146,6 +211,28 @@ def test_internal_server_error_format(client: TestClient) -> None: assert data["detail"] == "An internal server error occurred" +def test_invalid_state_transition_error_format(client: TestClient) -> None: + """Test that InvalidStateTransitionProblem returns RFC9457 format with extension fields.""" + response = client.get("/api/v1/control/test/invalid-state-transition") + + assert response.status_code == 409 + assert response.headers["content-type"] == "application/problem+json" + + data = response.json() + # Required RFC9457 fields + assert data["title"] == "Invalid State Transition" + assert data["status"] == 409 + assert "archived" in data["detail"] + assert "active" in data["detail"] + + # Extension fields specific to state transition errors + assert data["current_state"] == "archived" + assert data["attempted_state"] == "active" + assert data["action"] == "start" + assert data["entity_type"] == "campaign" + assert data["valid_transitions"] == ["completed", "draft"] + + def test_error_type_format(client: TestClient) -> None: """Test that error type follows kebab-case convention.""" response = client.get("/api/v1/control/test/campaign-not-found") @@ -157,3 +244,185 @@ def test_error_type_format(client: TestClient) -> None: assert "-" in error_type assert error_type.islower() assert " " not in error_type + + +# HTTPException conversion tests + + +def test_http_exception_400_format(client: TestClient) -> None: + """Test that HTTPException with 400 status returns RFC9457 format.""" + response = client.get("/api/v1/control/test/http-400") + + assert response.status_code == 400 + assert response.headers["content-type"] == "application/problem+json" + + data = response.json() + assert data["type"] == "about:blank" + assert data["title"] == "Bad Request" + assert data["status"] == 400 + assert data["detail"] == "Invalid request" + assert data["instance"] == "/api/v1/control/test/http-400" + + +def test_http_exception_401_format(client: TestClient) -> None: + """Test that HTTPException with 401 status returns RFC9457 format.""" + response = client.get("/api/v1/control/test/http-401") + + assert response.status_code == 401 + assert response.headers["content-type"] == "application/problem+json" + + data = response.json() + assert data["type"] == "about:blank" + assert data["title"] == "Unauthorized" + assert data["status"] == 401 + assert data["detail"] == "Authentication required" + + +def test_http_exception_403_format(client: TestClient) -> None: + """Test that HTTPException with 403 status returns RFC9457 format.""" + response = client.get("/api/v1/control/test/http-403") + + assert response.status_code == 403 + assert response.headers["content-type"] == "application/problem+json" + + data = response.json() + assert data["type"] == "about:blank" + assert data["title"] == "Forbidden" + assert data["status"] == 403 + assert data["detail"] == "Access denied" + + +def test_http_exception_404_format(client: TestClient) -> None: + """Test that HTTPException with 404 status returns RFC9457 format.""" + response = client.get("/api/v1/control/test/http-404") + + assert response.status_code == 404 + assert response.headers["content-type"] == "application/problem+json" + + data = response.json() + assert data["type"] == "about:blank" + assert data["title"] == "Not Found" + assert data["status"] == 404 + assert data["detail"] == "Resource not found" + assert data["instance"] == "/api/v1/control/test/http-404" + + +def test_http_exception_409_format(client: TestClient) -> None: + """Test that HTTPException with 409 status returns RFC9457 format.""" + response = client.get("/api/v1/control/test/http-409") + + assert response.status_code == 409 + assert response.headers["content-type"] == "application/problem+json" + + data = response.json() + assert data["type"] == "about:blank" + assert data["title"] == "Conflict" + assert data["status"] == 409 + assert data["detail"] == "Resource conflict" + + +def test_http_exception_422_format(client: TestClient) -> None: + """Test that HTTPException with 422 status returns RFC9457 format.""" + response = client.get("/api/v1/control/test/http-422") + + assert response.status_code == 422 + assert response.headers["content-type"] == "application/problem+json" + + data = response.json() + assert data["type"] == "about:blank" + assert data["title"] == "Unprocessable Entity" + assert data["status"] == 422 + assert data["detail"] == "Validation failed" + + +def test_http_exception_500_format(client: TestClient) -> None: + """Test that HTTPException with 500 status returns RFC9457 format.""" + response = client.get("/api/v1/control/test/http-500") + + assert response.status_code == 500 + assert response.headers["content-type"] == "application/problem+json" + + data = response.json() + assert data["type"] == "about:blank" + assert data["title"] == "Internal Server Error" + assert data["status"] == 500 + assert data["detail"] == "Server error" + + +def test_http_exception_dict_detail_format(client: TestClient) -> None: + """Test that HTTPException with dictionary detail includes extension fields.""" + response = client.get("/api/v1/control/test/http-dict-detail") + + assert response.status_code == 422 + assert response.headers["content-type"] == "application/problem+json" + + data = response.json() + # Required RFC9457 fields + assert data["type"] == "about:blank" + assert data["title"] == "Unprocessable Entity" + assert data["status"] == 422 + assert data["instance"] == "/api/v1/control/test/http-dict-detail" + + # Extension fields from the dictionary detail + assert data["field"] == "name" + assert data["error"] == "required" + + +def test_http_exception_unknown_status_format(client: TestClient) -> None: + """Test that HTTPException with unknown status code uses default title.""" + response = client.get("/api/v1/control/test/http-unknown-status") + + assert response.status_code == 418 + assert response.headers["content-type"] == "application/problem+json" + + data = response.json() + assert data["type"] == "about:blank" + assert data["title"] == "HTTP Error" # Default title for unknown status + assert data["status"] == 418 + assert data["detail"] == "I'm a teapot" + + +def test_http_exception_has_required_fields(client: TestClient) -> None: + """Test that HTTPException responses contain all required RFC9457 fields.""" + response = client.get("/api/v1/control/test/http-404") + + data = response.json() + + # Required fields according to RFC9457 + required_fields = ["type", "title", "status", "detail", "instance"] + + for field in required_fields: + assert field in data, f"Required field '{field}' missing from error response" + + # Verify field types + assert isinstance(data["type"], str) + assert isinstance(data["title"], str) + assert isinstance(data["status"], int) + assert isinstance(data["detail"], str) + assert isinstance(data["instance"], str) + + +def test_middleware_only_affects_control_api_web_path(client: TestClient) -> None: + """Test that middleware does NOT convert HTTPException on Web API paths.""" + response = client.get("/api/v1/web/test/http-error") + + assert response.status_code == 404 + # Should NOT be RFC9457 format - FastAPI default JSON response + assert response.headers["content-type"] == "application/json" + + data = response.json() + # FastAPI default format uses "detail" key directly + assert data == {"detail": "Not found"} + + +def test_middleware_only_affects_control_api_client_path(client: TestClient) -> None: + """Test that middleware does NOT convert HTTPException on Client API paths.""" + response = client.get("/api/v1/client/test/http-error") + + assert response.status_code == 404 + # Should NOT be RFC9457 format - uses agent/client error envelope format + assert response.headers["content-type"] == "application/json" + + data = response.json() + # Agent/Client API format uses "error" key (legacy compatibility) + assert data == {"error": "Not found"} diff --git a/tests/integration/web/test_web_campaigns.py b/tests/integration/web/test_web_campaigns.py index 243d68e0..e4a1a1c9 100644 --- a/tests/integration/web/test_web_campaigns.py +++ b/tests/integration/web/test_web_campaigns.py @@ -125,11 +125,11 @@ async def test_start_stop_campaign_archived( resp = await async_client.post(f"/api/v1/web/campaigns/{campaign.id}/start") assert resp.status_code == HTTPStatus.BAD_REQUEST data = resp.json() - assert "Cannot start an archived campaign." in data["detail"] + assert "Cannot start campaign from state 'archived'" in data["detail"] resp = await async_client.post(f"/api/v1/web/campaigns/{campaign.id}/stop") assert resp.status_code == HTTPStatus.BAD_REQUEST data = resp.json() - assert "Cannot stop an archived campaign." in data["detail"] + assert "Cannot stop campaign from state 'archived'" in data["detail"] @pytest.mark.asyncio diff --git a/tests/unit/test_attack_service.py b/tests/unit/test_attack_service.py index 67cdec26..b7730d40 100644 --- a/tests/unit/test_attack_service.py +++ b/tests/unit/test_attack_service.py @@ -527,3 +527,133 @@ def test_safe_int() -> None: assert _safe_int("invalid", 10) == 10 assert _safe_int(None, 5) == 5 assert _safe_int(3.14, 0) == 0 # Non-int, non-string + + +# --- State machine integration tests --- + + +@pytest.mark.asyncio +@patch("app.core.services.attack_service._broadcast_campaign_update") +async def test_delete_attack_service_aborts_running_attack( + mock_broadcast: AsyncMock, + db_session: AsyncSession, +) -> None: + """Test that delete_attack_service marks RUNNING attack as ABANDONED.""" + from app.models.attack import AttackState + from tests.factories.hash_list_factory import HashListFactory + from tests.utils.hash_type_utils import get_or_create_hash_type + + # Set factory sessions + AttackFactory.__async_session__ = db_session + CampaignFactory.__async_session__ = db_session + ProjectFactory.__async_session__ = db_session + HashListFactory.__async_session__ = db_session + + project = await ProjectFactory.create_async() + hash_type = await get_or_create_hash_type(db_session, 0, "MD5") + hash_list = await HashListFactory.create_async( + project_id=project.id, hash_type_id=hash_type.id + ) + campaign = await CampaignFactory.create_async( + project_id=project.id, hash_list_id=hash_list.id + ) + attack = await AttackFactory.create_async( + campaign_id=campaign.id, + state=AttackState.RUNNING, + ) + + # Delete running attack (should abort instead of delete) + result = await delete_attack_service(attack.id, db_session) + + assert result["deleted"] is True + assert result["id"] == attack.id + + # Verify attack is marked as ABANDONED (not actually deleted) + attack_result = await db_session.execute( + select(Attack).where(Attack.id == attack.id) + ) + updated_attack = attack_result.scalar_one() + assert updated_attack.state == AttackState.ABANDONED + + +@pytest.mark.asyncio +@patch("app.core.services.attack_service._broadcast_campaign_update") +async def test_delete_attack_service_aborts_paused_attack( + mock_broadcast: AsyncMock, + db_session: AsyncSession, +) -> None: + """Test that delete_attack_service marks PAUSED attack as ABANDONED.""" + from app.models.attack import AttackState + from tests.factories.hash_list_factory import HashListFactory + from tests.utils.hash_type_utils import get_or_create_hash_type + + # Set factory sessions + AttackFactory.__async_session__ = db_session + CampaignFactory.__async_session__ = db_session + ProjectFactory.__async_session__ = db_session + HashListFactory.__async_session__ = db_session + + project = await ProjectFactory.create_async() + hash_type = await get_or_create_hash_type(db_session, 0, "MD5") + hash_list = await HashListFactory.create_async( + project_id=project.id, hash_type_id=hash_type.id + ) + campaign = await CampaignFactory.create_async( + project_id=project.id, hash_list_id=hash_list.id + ) + attack = await AttackFactory.create_async( + campaign_id=campaign.id, + state=AttackState.PAUSED, + ) + + # Delete paused attack (should abort instead of delete) + result = await delete_attack_service(attack.id, db_session) + + assert result["deleted"] is True + assert result["id"] == attack.id + + # Verify attack is marked as ABANDONED (not actually deleted) + attack_result = await db_session.execute( + select(Attack).where(Attack.id == attack.id) + ) + updated_attack = attack_result.scalar_one() + assert updated_attack.state == AttackState.ABANDONED + + +@pytest.mark.asyncio +@patch("app.core.services.attack_service._broadcast_campaign_update") +async def test_delete_attack_service_rejects_completed_attack_abort( + mock_broadcast: AsyncMock, + db_session: AsyncSession, +) -> None: + """Test that delete_attack_service rejects aborting a COMPLETED attack.""" + from app.core.state_machines import InvalidStateTransitionError + from app.models.attack import AttackState + from tests.factories.hash_list_factory import HashListFactory + from tests.utils.hash_type_utils import get_or_create_hash_type + + # Set factory sessions + AttackFactory.__async_session__ = db_session + CampaignFactory.__async_session__ = db_session + ProjectFactory.__async_session__ = db_session + HashListFactory.__async_session__ = db_session + + project = await ProjectFactory.create_async() + hash_type = await get_or_create_hash_type(db_session, 0, "MD5") + hash_list = await HashListFactory.create_async( + project_id=project.id, hash_type_id=hash_type.id + ) + campaign = await CampaignFactory.create_async( + project_id=project.id, hash_list_id=hash_list.id + ) + attack = await AttackFactory.create_async( + campaign_id=campaign.id, + state=AttackState.COMPLETED, + ) + + # Attempt to delete a completed attack - should raise InvalidStateTransitionError + with pytest.raises(InvalidStateTransitionError) as exc_info: + await delete_attack_service(attack.id, db_session) + + assert exc_info.value.from_state == AttackState.COMPLETED + assert exc_info.value.action == "abort" diff --git a/tests/unit/test_campaign_service.py b/tests/unit/test_campaign_service.py index f1827ee3..086f201a 100644 --- a/tests/unit/test_campaign_service.py +++ b/tests/unit/test_campaign_service.py @@ -693,3 +693,150 @@ async def test_relaunch_campaign_service_success(db_session: AsyncSession) -> No assert result.campaign.id == campaign.id assert isinstance(result.attacks, list) + + +# --- State machine integration tests --- + + +@pytest.mark.asyncio +@patch("app.core.services.campaign_service._broadcast_campaign_update") +async def test_start_campaign_service_rejects_archived_state( + mock_broadcast: AsyncMock, + db_session: AsyncSession, +) -> None: + """Test that start_campaign_service rejects transition from ARCHIVED state.""" + from app.core.state_machines import InvalidStateTransitionError + from tests.factories.hash_list_factory import HashListFactory + from tests.utils.hash_type_utils import get_or_create_hash_type + + # Set factory sessions + CampaignFactory.__async_session__ = db_session + ProjectFactory.__async_session__ = db_session + HashListFactory.__async_session__ = db_session + + project = await ProjectFactory.create_async() + hash_type = await get_or_create_hash_type(db_session, 0, "MD5") + hash_list = await HashListFactory.create_async( + project_id=project.id, hash_type_id=hash_type.id + ) + + campaign = await CampaignFactory.create_async( + project_id=project.id, + hash_list_id=hash_list.id, + state=CampaignState.ARCHIVED, + ) + + # Attempt to start an archived campaign - should raise InvalidStateTransitionError + with pytest.raises(InvalidStateTransitionError) as exc_info: + await start_campaign_service(campaign.id, db_session) + + assert exc_info.value.from_state == CampaignState.ARCHIVED + assert exc_info.value.action == "start" + + +@pytest.mark.asyncio +@patch("app.core.services.campaign_service._broadcast_campaign_update") +async def test_stop_campaign_service_rejects_archived_state( + mock_broadcast: AsyncMock, + db_session: AsyncSession, +) -> None: + """Test that stop_campaign_service rejects transition from ARCHIVED state.""" + from app.core.state_machines import InvalidStateTransitionError + from tests.factories.hash_list_factory import HashListFactory + from tests.utils.hash_type_utils import get_or_create_hash_type + + # Set factory sessions + CampaignFactory.__async_session__ = db_session + ProjectFactory.__async_session__ = db_session + HashListFactory.__async_session__ = db_session + + project = await ProjectFactory.create_async() + hash_type = await get_or_create_hash_type(db_session, 0, "MD5") + hash_list = await HashListFactory.create_async( + project_id=project.id, hash_type_id=hash_type.id + ) + + campaign = await CampaignFactory.create_async( + project_id=project.id, + hash_list_id=hash_list.id, + state=CampaignState.ARCHIVED, + ) + + # Attempt to stop an archived campaign - should raise InvalidStateTransitionError + with pytest.raises(InvalidStateTransitionError) as exc_info: + await stop_campaign_service(campaign.id, db_session) + + assert exc_info.value.from_state == CampaignState.ARCHIVED + assert exc_info.value.action == "stop" + + +@pytest.mark.asyncio +@patch("app.core.services.campaign_service._broadcast_campaign_update") +async def test_stop_campaign_service_rejects_completed_state( + mock_broadcast: AsyncMock, + db_session: AsyncSession, +) -> None: + """Test that stop_campaign_service rejects transition from COMPLETED state.""" + from app.core.state_machines import InvalidStateTransitionError + from tests.factories.hash_list_factory import HashListFactory + from tests.utils.hash_type_utils import get_or_create_hash_type + + # Set factory sessions + CampaignFactory.__async_session__ = db_session + ProjectFactory.__async_session__ = db_session + HashListFactory.__async_session__ = db_session + + project = await ProjectFactory.create_async() + hash_type = await get_or_create_hash_type(db_session, 0, "MD5") + hash_list = await HashListFactory.create_async( + project_id=project.id, hash_type_id=hash_type.id + ) + + campaign = await CampaignFactory.create_async( + project_id=project.id, + hash_list_id=hash_list.id, + state=CampaignState.COMPLETED, + ) + + # Attempt to stop a completed campaign - should raise InvalidStateTransitionError + with pytest.raises(InvalidStateTransitionError) as exc_info: + await stop_campaign_service(campaign.id, db_session) + + assert exc_info.value.from_state == CampaignState.COMPLETED + assert exc_info.value.action == "stop" + + +@pytest.mark.asyncio +@patch("app.core.services.campaign_service._broadcast_campaign_update") +async def test_archive_campaign_service_rejects_error_state( + mock_broadcast: AsyncMock, + db_session: AsyncSession, +) -> None: + """Test that archive_campaign_service rejects transition from ERROR state.""" + from app.core.state_machines import InvalidStateTransitionError + from tests.factories.hash_list_factory import HashListFactory + from tests.utils.hash_type_utils import get_or_create_hash_type + + # Set factory sessions + CampaignFactory.__async_session__ = db_session + ProjectFactory.__async_session__ = db_session + HashListFactory.__async_session__ = db_session + + project = await ProjectFactory.create_async() + hash_type = await get_or_create_hash_type(db_session, 0, "MD5") + hash_list = await HashListFactory.create_async( + project_id=project.id, hash_type_id=hash_type.id + ) + + campaign = await CampaignFactory.create_async( + project_id=project.id, + hash_list_id=hash_list.id, + state=CampaignState.ERROR, + ) + + # Attempt to archive an ERROR campaign - should raise InvalidStateTransitionError + with pytest.raises(InvalidStateTransitionError) as exc_info: + await archive_campaign_service(campaign.id, db_session) + + assert exc_info.value.from_state == CampaignState.ERROR + assert exc_info.value.action == "archive" diff --git a/tests/unit/test_state_machines.py b/tests/unit/test_state_machines.py new file mode 100644 index 00000000..3f36b845 --- /dev/null +++ b/tests/unit/test_state_machines.py @@ -0,0 +1,883 @@ +"""Comprehensive tests for state machine classes.""" + +import pytest + +from app.core.control_exceptions import InvalidStateTransitionProblem +from app.core.state_machines import ( + AttackStateMachine, + CampaignStateMachine, + InvalidStateTransitionError, +) +from app.models.attack import AttackState +from app.models.campaign import CampaignState + + +class TestInvalidStateTransitionError: + """Tests for InvalidStateTransitionError exception.""" + + def test_error_with_action(self) -> None: + """Test error message when action is provided.""" + error = InvalidStateTransitionError( + CampaignState.DRAFT, CampaignState.PAUSED, action="pause" + ) + assert error.from_state == CampaignState.DRAFT + assert error.to_state == CampaignState.PAUSED + assert error.action == "pause" + assert "pause" in str(error) + assert "draft" in str(error) + assert "paused" in str(error) + + def test_error_without_action(self) -> None: + """Test error message when no action is provided.""" + error = InvalidStateTransitionError(CampaignState.DRAFT, CampaignState.PAUSED) + assert error.from_state == CampaignState.DRAFT + assert error.to_state == CampaignState.PAUSED + assert error.action is None + assert "draft" in str(error) + assert "paused" in str(error) + + +class TestCampaignStateMachine: + """Tests for CampaignStateMachine.""" + + # Valid transitions tests + def test_valid_transition_draft_to_active(self) -> None: + """Test valid transition from DRAFT to ACTIVE.""" + assert ( + CampaignStateMachine.can_transition( + CampaignState.DRAFT, CampaignState.ACTIVE + ) + is True + ) + # Should not raise + CampaignStateMachine.validate_transition( + CampaignState.DRAFT, CampaignState.ACTIVE, action="start" + ) + + def test_valid_transition_draft_to_archived(self) -> None: + """Test valid transition from DRAFT to ARCHIVED.""" + assert ( + CampaignStateMachine.can_transition( + CampaignState.DRAFT, CampaignState.ARCHIVED + ) + is True + ) + CampaignStateMachine.validate_transition( + CampaignState.DRAFT, CampaignState.ARCHIVED, action="archive" + ) + + def test_valid_transition_active_to_paused(self) -> None: + """Test valid transition from ACTIVE to PAUSED.""" + assert ( + CampaignStateMachine.can_transition( + CampaignState.ACTIVE, CampaignState.PAUSED + ) + is True + ) + CampaignStateMachine.validate_transition( + CampaignState.ACTIVE, CampaignState.PAUSED, action="pause" + ) + + def test_valid_transition_active_to_draft(self) -> None: + """Test valid transition from ACTIVE to DRAFT (stop).""" + assert ( + CampaignStateMachine.can_transition( + CampaignState.ACTIVE, CampaignState.DRAFT + ) + is True + ) + CampaignStateMachine.validate_transition( + CampaignState.ACTIVE, CampaignState.DRAFT, action="stop" + ) + + def test_valid_transition_active_to_archived(self) -> None: + """Test valid transition from ACTIVE to ARCHIVED.""" + assert ( + CampaignStateMachine.can_transition( + CampaignState.ACTIVE, CampaignState.ARCHIVED + ) + is True + ) + + def test_valid_transition_active_to_completed(self) -> None: + """Test valid transition from ACTIVE to COMPLETED (system).""" + assert ( + CampaignStateMachine.can_transition( + CampaignState.ACTIVE, CampaignState.COMPLETED + ) + is True + ) + + def test_valid_transition_paused_to_active(self) -> None: + """Test valid transition from PAUSED to ACTIVE (resume).""" + assert ( + CampaignStateMachine.can_transition( + CampaignState.PAUSED, CampaignState.ACTIVE + ) + is True + ) + CampaignStateMachine.validate_transition( + CampaignState.PAUSED, CampaignState.ACTIVE, action="resume" + ) + + def test_valid_transition_paused_to_archived(self) -> None: + """Test valid transition from PAUSED to ARCHIVED.""" + assert ( + CampaignStateMachine.can_transition( + CampaignState.PAUSED, CampaignState.ARCHIVED + ) + is True + ) + + def test_valid_transition_completed_to_archived(self) -> None: + """Test valid transition from COMPLETED to ARCHIVED.""" + assert ( + CampaignStateMachine.can_transition( + CampaignState.COMPLETED, CampaignState.ARCHIVED + ) + is True + ) + + def test_valid_transition_archived_to_draft(self) -> None: + """Test valid transition from ARCHIVED to DRAFT (unarchive).""" + assert ( + CampaignStateMachine.can_transition( + CampaignState.ARCHIVED, CampaignState.DRAFT + ) + is True + ) + CampaignStateMachine.validate_transition( + CampaignState.ARCHIVED, CampaignState.DRAFT, action="unarchive" + ) + + def test_valid_transition_error_to_draft(self) -> None: + """Test valid transition from ERROR to DRAFT (reset).""" + assert ( + CampaignStateMachine.can_transition( + CampaignState.ERROR, CampaignState.DRAFT + ) + is True + ) + CampaignStateMachine.validate_transition( + CampaignState.ERROR, CampaignState.DRAFT, action="reset" + ) + + # Invalid transitions tests + def test_invalid_transition_draft_to_paused(self) -> None: + """Test invalid transition from DRAFT to PAUSED.""" + assert ( + CampaignStateMachine.can_transition( + CampaignState.DRAFT, CampaignState.PAUSED + ) + is False + ) + with pytest.raises(InvalidStateTransitionError) as exc_info: + CampaignStateMachine.validate_transition( + CampaignState.DRAFT, CampaignState.PAUSED, action="pause" + ) + assert "draft" in str(exc_info.value).lower() + assert "paused" in str(exc_info.value).lower() + + def test_invalid_transition_draft_to_completed(self) -> None: + """Test invalid transition from DRAFT to COMPLETED.""" + assert ( + CampaignStateMachine.can_transition( + CampaignState.DRAFT, CampaignState.COMPLETED + ) + is False + ) + + def test_invalid_transition_completed_to_active(self) -> None: + """Test invalid transition from COMPLETED to ACTIVE.""" + assert ( + CampaignStateMachine.can_transition( + CampaignState.COMPLETED, CampaignState.ACTIVE + ) + is False + ) + with pytest.raises(InvalidStateTransitionError): + CampaignStateMachine.validate_transition( + CampaignState.COMPLETED, CampaignState.ACTIVE + ) + + def test_invalid_transition_archived_to_active(self) -> None: + """Test invalid transition from ARCHIVED to ACTIVE.""" + assert ( + CampaignStateMachine.can_transition( + CampaignState.ARCHIVED, CampaignState.ACTIVE + ) + is False + ) + + def test_invalid_transition_paused_to_completed(self) -> None: + """Test invalid transition from PAUSED to COMPLETED.""" + assert ( + CampaignStateMachine.can_transition( + CampaignState.PAUSED, CampaignState.COMPLETED + ) + is False + ) + + def test_invalid_transition_same_state(self) -> None: + """Test invalid transition from same state to same state.""" + assert ( + CampaignStateMachine.can_transition( + CampaignState.DRAFT, CampaignState.DRAFT + ) + is False + ) + with pytest.raises(InvalidStateTransitionError): + CampaignStateMachine.validate_transition( + CampaignState.DRAFT, CampaignState.DRAFT + ) + + # Action validation tests + def test_validate_action_start(self) -> None: + """Test start action from DRAFT.""" + target = CampaignStateMachine.validate_action(CampaignState.DRAFT, "start") + assert target == CampaignState.ACTIVE + + def test_validate_action_stop(self) -> None: + """Test stop action from ACTIVE.""" + target = CampaignStateMachine.validate_action(CampaignState.ACTIVE, "stop") + assert target == CampaignState.DRAFT + + def test_validate_action_pause(self) -> None: + """Test pause action from ACTIVE.""" + target = CampaignStateMachine.validate_action(CampaignState.ACTIVE, "pause") + assert target == CampaignState.PAUSED + + def test_validate_action_resume(self) -> None: + """Test resume action from PAUSED.""" + target = CampaignStateMachine.validate_action(CampaignState.PAUSED, "resume") + assert target == CampaignState.ACTIVE + + def test_validate_action_archive_from_draft(self) -> None: + """Test archive action from DRAFT.""" + target = CampaignStateMachine.validate_action(CampaignState.DRAFT, "archive") + assert target == CampaignState.ARCHIVED + + def test_validate_action_archive_from_active(self) -> None: + """Test archive action from ACTIVE.""" + target = CampaignStateMachine.validate_action(CampaignState.ACTIVE, "archive") + assert target == CampaignState.ARCHIVED + + def test_validate_action_archive_from_paused(self) -> None: + """Test archive action from PAUSED.""" + target = CampaignStateMachine.validate_action(CampaignState.PAUSED, "archive") + assert target == CampaignState.ARCHIVED + + def test_validate_action_archive_from_completed(self) -> None: + """Test archive action from COMPLETED.""" + target = CampaignStateMachine.validate_action( + CampaignState.COMPLETED, "archive" + ) + assert target == CampaignState.ARCHIVED + + def test_validate_action_unarchive(self) -> None: + """Test unarchive action from ARCHIVED.""" + target = CampaignStateMachine.validate_action( + CampaignState.ARCHIVED, "unarchive" + ) + assert target == CampaignState.DRAFT + + def test_validate_action_reset(self) -> None: + """Test reset action from ERROR.""" + target = CampaignStateMachine.validate_action(CampaignState.ERROR, "reset") + assert target == CampaignState.DRAFT + + def test_validate_action_invalid_from_state(self) -> None: + """Test action from invalid state raises error.""" + with pytest.raises(InvalidStateTransitionError) as exc_info: + CampaignStateMachine.validate_action(CampaignState.COMPLETED, "start") + assert exc_info.value.action == "start" + + def test_validate_action_unknown_action(self) -> None: + """Test unknown action raises error.""" + with pytest.raises(InvalidStateTransitionError) as exc_info: + CampaignStateMachine.validate_action(CampaignState.DRAFT, "unknown_action") + assert exc_info.value.action == "unknown_action" + + # Get valid transitions tests + def test_get_valid_transitions_draft(self) -> None: + """Test getting valid transitions from DRAFT.""" + valid = CampaignStateMachine.get_valid_transitions(CampaignState.DRAFT) + assert CampaignState.ACTIVE in valid + assert CampaignState.ARCHIVED in valid + assert len(valid) == 2 + + def test_get_valid_transitions_active(self) -> None: + """Test getting valid transitions from ACTIVE.""" + valid = CampaignStateMachine.get_valid_transitions(CampaignState.ACTIVE) + assert CampaignState.PAUSED in valid + assert CampaignState.DRAFT in valid + assert CampaignState.ARCHIVED in valid + assert CampaignState.COMPLETED in valid + assert len(valid) == 4 + + def test_get_valid_transitions_completed(self) -> None: + """Test getting valid transitions from COMPLETED.""" + valid = CampaignStateMachine.get_valid_transitions(CampaignState.COMPLETED) + assert CampaignState.ARCHIVED in valid + assert len(valid) == 1 + + # Get valid actions tests + def test_get_valid_actions_draft(self) -> None: + """Test getting valid actions from DRAFT.""" + valid = CampaignStateMachine.get_valid_actions(CampaignState.DRAFT) + assert "start" in valid + assert "archive" in valid + assert len(valid) == 2 + + def test_get_valid_actions_active(self) -> None: + """Test getting valid actions from ACTIVE.""" + valid = CampaignStateMachine.get_valid_actions(CampaignState.ACTIVE) + assert "stop" in valid + assert "pause" in valid + assert "archive" in valid + assert len(valid) == 3 + + def test_get_valid_actions_paused(self) -> None: + """Test getting valid actions from PAUSED.""" + valid = CampaignStateMachine.get_valid_actions(CampaignState.PAUSED) + assert "resume" in valid + assert "archive" in valid + assert len(valid) == 2 + + def test_get_valid_actions_completed(self) -> None: + """Test getting valid actions from COMPLETED.""" + valid = CampaignStateMachine.get_valid_actions(CampaignState.COMPLETED) + assert "archive" in valid + assert len(valid) == 1 + + def test_get_valid_actions_archived(self) -> None: + """Test getting valid actions from ARCHIVED.""" + valid = CampaignStateMachine.get_valid_actions(CampaignState.ARCHIVED) + assert "unarchive" in valid + assert len(valid) == 1 + + def test_get_valid_actions_error(self) -> None: + """Test getting valid actions from ERROR.""" + valid = CampaignStateMachine.get_valid_actions(CampaignState.ERROR) + assert "reset" in valid + assert len(valid) == 1 + + +class TestAttackStateMachine: + """Tests for AttackStateMachine.""" + + # Valid transitions tests + def test_valid_transition_pending_to_running(self) -> None: + """Test valid transition from PENDING to RUNNING.""" + assert ( + AttackStateMachine.can_transition(AttackState.PENDING, AttackState.RUNNING) + is True + ) + AttackStateMachine.validate_transition( + AttackState.PENDING, AttackState.RUNNING, action="start" + ) + + def test_valid_transition_pending_to_abandoned(self) -> None: + """Test valid transition from PENDING to ABANDONED.""" + assert ( + AttackStateMachine.can_transition( + AttackState.PENDING, AttackState.ABANDONED + ) + is True + ) + AttackStateMachine.validate_transition( + AttackState.PENDING, AttackState.ABANDONED, action="abandon" + ) + + def test_valid_transition_running_to_paused(self) -> None: + """Test valid transition from RUNNING to PAUSED.""" + assert ( + AttackStateMachine.can_transition(AttackState.RUNNING, AttackState.PAUSED) + is True + ) + AttackStateMachine.validate_transition( + AttackState.RUNNING, AttackState.PAUSED, action="pause" + ) + + def test_valid_transition_paused_to_running(self) -> None: + """Test valid transition from PAUSED to RUNNING (resume).""" + assert ( + AttackStateMachine.can_transition(AttackState.PAUSED, AttackState.RUNNING) + is True + ) + AttackStateMachine.validate_transition( + AttackState.PAUSED, AttackState.RUNNING, action="resume" + ) + + def test_valid_transition_running_to_completed(self) -> None: + """Test valid transition from RUNNING to COMPLETED.""" + assert ( + AttackStateMachine.can_transition( + AttackState.RUNNING, AttackState.COMPLETED + ) + is True + ) + + def test_valid_transition_running_to_failed(self) -> None: + """Test valid transition from RUNNING to FAILED.""" + assert ( + AttackStateMachine.can_transition(AttackState.RUNNING, AttackState.FAILED) + is True + ) + + def test_valid_transition_failed_to_pending(self) -> None: + """Test valid transition from FAILED to PENDING (retry).""" + assert ( + AttackStateMachine.can_transition(AttackState.FAILED, AttackState.PENDING) + is True + ) + AttackStateMachine.validate_transition( + AttackState.FAILED, AttackState.PENDING, action="retry" + ) + + def test_valid_transition_abandoned_to_pending(self) -> None: + """Test valid transition from ABANDONED to PENDING (reactivate).""" + assert ( + AttackStateMachine.can_transition( + AttackState.ABANDONED, AttackState.PENDING + ) + is True + ) + AttackStateMachine.validate_transition( + AttackState.ABANDONED, AttackState.PENDING, action="reactivate" + ) + + def test_valid_transition_running_to_abandoned(self) -> None: + """Test valid transition from RUNNING to ABANDONED (abort).""" + assert ( + AttackStateMachine.can_transition( + AttackState.RUNNING, AttackState.ABANDONED + ) + is True + ) + AttackStateMachine.validate_transition( + AttackState.RUNNING, AttackState.ABANDONED, action="abort" + ) + + def test_valid_transition_paused_to_abandoned(self) -> None: + """Test valid transition from PAUSED to ABANDONED (abort).""" + assert ( + AttackStateMachine.can_transition(AttackState.PAUSED, AttackState.ABANDONED) + is True + ) + AttackStateMachine.validate_transition( + AttackState.PAUSED, AttackState.ABANDONED, action="abort" + ) + + # Invalid transitions tests + def test_invalid_transition_pending_to_completed(self) -> None: + """Test invalid transition from PENDING to COMPLETED.""" + assert ( + AttackStateMachine.can_transition( + AttackState.PENDING, AttackState.COMPLETED + ) + is False + ) + with pytest.raises(InvalidStateTransitionError): + AttackStateMachine.validate_transition( + AttackState.PENDING, AttackState.COMPLETED + ) + + def test_invalid_transition_completed_to_running(self) -> None: + """Test invalid transition from COMPLETED to RUNNING.""" + assert ( + AttackStateMachine.can_transition( + AttackState.COMPLETED, AttackState.RUNNING + ) + is False + ) + + def test_invalid_transition_completed_to_pending(self) -> None: + """Test invalid transition from COMPLETED to PENDING.""" + assert ( + AttackStateMachine.can_transition( + AttackState.COMPLETED, AttackState.PENDING + ) + is False + ) + + def test_invalid_transition_failed_to_running(self) -> None: + """Test invalid transition from FAILED to RUNNING.""" + assert ( + AttackStateMachine.can_transition(AttackState.FAILED, AttackState.RUNNING) + is False + ) + + def test_invalid_transition_same_state(self) -> None: + """Test invalid transition from same state to same state.""" + assert ( + AttackStateMachine.can_transition(AttackState.PENDING, AttackState.PENDING) + is False + ) + + def test_invalid_pause_from_pending(self) -> None: + """Test that pause is invalid from PENDING state.""" + assert ( + AttackStateMachine.can_transition(AttackState.PENDING, AttackState.PAUSED) + is False + ) + with pytest.raises(InvalidStateTransitionError): + AttackStateMachine.validate_action(AttackState.PENDING, "pause") + + def test_invalid_pause_from_completed(self) -> None: + """Test that pause is invalid from COMPLETED (terminal) state.""" + assert ( + AttackStateMachine.can_transition(AttackState.COMPLETED, AttackState.PAUSED) + is False + ) + with pytest.raises(InvalidStateTransitionError): + AttackStateMachine.validate_action(AttackState.COMPLETED, "pause") + + def test_invalid_pause_from_failed(self) -> None: + """Test that pause is invalid from FAILED state.""" + with pytest.raises(InvalidStateTransitionError): + AttackStateMachine.validate_action(AttackState.FAILED, "pause") + + def test_invalid_resume_from_pending(self) -> None: + """Test that resume is invalid from PENDING state.""" + with pytest.raises(InvalidStateTransitionError): + AttackStateMachine.validate_action(AttackState.PENDING, "resume") + + def test_invalid_resume_from_running(self) -> None: + """Test that resume is invalid from RUNNING state (already running).""" + with pytest.raises(InvalidStateTransitionError): + AttackStateMachine.validate_action(AttackState.RUNNING, "resume") + + def test_invalid_resume_from_completed(self) -> None: + """Test that resume is invalid from COMPLETED (terminal) state.""" + with pytest.raises(InvalidStateTransitionError): + AttackStateMachine.validate_action(AttackState.COMPLETED, "resume") + + # Terminal state tests + def test_completed_is_terminal(self) -> None: + """Test that COMPLETED is a terminal state.""" + assert AttackStateMachine.is_terminal_state(AttackState.COMPLETED) is True + + def test_pending_is_not_terminal(self) -> None: + """Test that PENDING is not a terminal state.""" + assert AttackStateMachine.is_terminal_state(AttackState.PENDING) is False + + def test_running_is_not_terminal(self) -> None: + """Test that RUNNING is not a terminal state.""" + assert AttackStateMachine.is_terminal_state(AttackState.RUNNING) is False + + def test_failed_is_not_terminal(self) -> None: + """Test that FAILED is not a terminal state.""" + assert AttackStateMachine.is_terminal_state(AttackState.FAILED) is False + + def test_abandoned_is_not_terminal(self) -> None: + """Test that ABANDONED is not a terminal state.""" + assert AttackStateMachine.is_terminal_state(AttackState.ABANDONED) is False + + def test_paused_is_not_terminal(self) -> None: + """Test that PAUSED is not a terminal state.""" + assert AttackStateMachine.is_terminal_state(AttackState.PAUSED) is False + + # Action validation tests + def test_validate_action_start(self) -> None: + """Test start action from PENDING.""" + target = AttackStateMachine.validate_action(AttackState.PENDING, "start") + assert target == AttackState.RUNNING + + def test_validate_action_pause(self) -> None: + """Test pause action from RUNNING.""" + target = AttackStateMachine.validate_action(AttackState.RUNNING, "pause") + assert target == AttackState.PAUSED + + def test_validate_action_resume(self) -> None: + """Test resume action from PAUSED.""" + target = AttackStateMachine.validate_action(AttackState.PAUSED, "resume") + assert target == AttackState.RUNNING + + def test_validate_action_retry(self) -> None: + """Test retry action from FAILED.""" + target = AttackStateMachine.validate_action(AttackState.FAILED, "retry") + assert target == AttackState.PENDING + + def test_validate_action_abandon(self) -> None: + """Test abandon action from PENDING.""" + target = AttackStateMachine.validate_action(AttackState.PENDING, "abandon") + assert target == AttackState.ABANDONED + + def test_validate_action_reactivate(self) -> None: + """Test reactivate action from ABANDONED.""" + target = AttackStateMachine.validate_action(AttackState.ABANDONED, "reactivate") + assert target == AttackState.PENDING + + def test_validate_action_abort_from_running(self) -> None: + """Test abort action from RUNNING.""" + target = AttackStateMachine.validate_action(AttackState.RUNNING, "abort") + assert target == AttackState.ABANDONED + + def test_validate_action_abort_from_paused(self) -> None: + """Test abort action from PAUSED.""" + target = AttackStateMachine.validate_action(AttackState.PAUSED, "abort") + assert target == AttackState.ABANDONED + + def test_validate_action_abort_from_pending_invalid(self) -> None: + """Test that abort is invalid from PENDING (use abandon instead).""" + with pytest.raises(InvalidStateTransitionError): + AttackStateMachine.validate_action(AttackState.PENDING, "abort") + + def test_validate_action_abort_from_completed_invalid(self) -> None: + """Test that abort is invalid from COMPLETED (terminal state).""" + with pytest.raises(InvalidStateTransitionError): + AttackStateMachine.validate_action(AttackState.COMPLETED, "abort") + + def test_validate_action_invalid_from_state(self) -> None: + """Test action from invalid state raises error.""" + with pytest.raises(InvalidStateTransitionError) as exc_info: + AttackStateMachine.validate_action(AttackState.RUNNING, "start") + assert exc_info.value.action == "start" + + def test_validate_action_unknown_action(self) -> None: + """Test unknown action raises error.""" + with pytest.raises(InvalidStateTransitionError) as exc_info: + AttackStateMachine.validate_action(AttackState.PENDING, "unknown_action") + assert exc_info.value.action == "unknown_action" + + def test_validate_action_from_terminal_state(self) -> None: + """Test action from terminal state raises error.""" + with pytest.raises(InvalidStateTransitionError): + AttackStateMachine.validate_action(AttackState.COMPLETED, "start") + + # Get valid transitions tests + def test_get_valid_transitions_pending(self) -> None: + """Test getting valid transitions from PENDING.""" + valid = AttackStateMachine.get_valid_transitions(AttackState.PENDING) + assert AttackState.RUNNING in valid + assert AttackState.ABANDONED in valid + assert len(valid) == 2 + + def test_get_valid_transitions_running(self) -> None: + """Test getting valid transitions from RUNNING.""" + valid = AttackStateMachine.get_valid_transitions(AttackState.RUNNING) + assert AttackState.PAUSED in valid + assert AttackState.COMPLETED in valid + assert AttackState.FAILED in valid + assert AttackState.ABANDONED in valid + assert len(valid) == 4 + + def test_get_valid_transitions_paused(self) -> None: + """Test getting valid transitions from PAUSED.""" + valid = AttackStateMachine.get_valid_transitions(AttackState.PAUSED) + assert AttackState.RUNNING in valid + assert AttackState.ABANDONED in valid + assert len(valid) == 2 + + def test_get_valid_transitions_completed(self) -> None: + """Test getting valid transitions from COMPLETED (terminal).""" + valid = AttackStateMachine.get_valid_transitions(AttackState.COMPLETED) + assert len(valid) == 0 + + # Get valid actions tests + def test_get_valid_actions_pending(self) -> None: + """Test getting valid actions from PENDING.""" + valid = AttackStateMachine.get_valid_actions(AttackState.PENDING) + assert "start" in valid + assert "abandon" in valid + assert len(valid) == 2 + + def test_get_valid_actions_running(self) -> None: + """Test getting valid actions from RUNNING.""" + valid = AttackStateMachine.get_valid_actions(AttackState.RUNNING) + assert "pause" in valid + assert "abort" in valid + assert len(valid) == 2 + + def test_get_valid_actions_paused(self) -> None: + """Test getting valid actions from PAUSED.""" + valid = AttackStateMachine.get_valid_actions(AttackState.PAUSED) + assert "resume" in valid + assert "abort" in valid + assert len(valid) == 2 + + def test_get_valid_actions_completed(self) -> None: + """Test getting valid actions from COMPLETED (terminal).""" + valid = AttackStateMachine.get_valid_actions(AttackState.COMPLETED) + assert len(valid) == 0 + + def test_get_valid_actions_failed(self) -> None: + """Test getting valid actions from FAILED.""" + valid = AttackStateMachine.get_valid_actions(AttackState.FAILED) + assert "retry" in valid + assert len(valid) == 1 + + def test_get_valid_actions_abandoned(self) -> None: + """Test getting valid actions from ABANDONED.""" + valid = AttackStateMachine.get_valid_actions(AttackState.ABANDONED) + assert "reactivate" in valid + assert len(valid) == 1 + + +class TestInvalidStateTransitionProblem: + """Tests for InvalidStateTransitionProblem RFC9457 exception.""" + + def test_problem_with_action(self) -> None: + """Test problem creation with action.""" + problem = InvalidStateTransitionProblem( + from_state="draft", + to_state="paused", + action="pause", + entity_type="campaign", + ) + assert problem.title == "Invalid State Transition" + assert "pause" in problem.detail + assert "draft" in problem.detail + assert "paused" in problem.detail + assert "campaign" in problem.detail + assert problem.current_state == "draft" + assert problem.attempted_state == "paused" + assert problem.action == "pause" + assert problem.entity_type == "campaign" + + def test_problem_without_action(self) -> None: + """Test problem creation without action.""" + problem = InvalidStateTransitionProblem( + from_state="pending", + to_state="completed", + entity_type="attack", + ) + assert problem.title == "Invalid State Transition" + assert "pending" in problem.detail + assert "completed" in problem.detail + assert "attack" in problem.detail + assert problem.current_state == "pending" + assert problem.attempted_state == "completed" + assert problem.entity_type == "attack" + + def test_problem_with_valid_transitions(self) -> None: + """Test problem creation with valid transitions list.""" + problem = InvalidStateTransitionProblem( + from_state="draft", + to_state="paused", + action="pause", + entity_type="campaign", + valid_transitions=["active", "archived"], + ) + assert "active" in problem.detail + assert "archived" in problem.detail + assert problem.valid_transitions == ["active", "archived"] + + def test_problem_default_entity_type(self) -> None: + """Test problem with default entity type.""" + problem = InvalidStateTransitionProblem( + from_state="draft", + to_state="paused", + ) + assert problem.entity_type == "entity" + + +class TestStateMachineIntegration: + """Integration tests for state machine usage patterns.""" + + def test_campaign_full_lifecycle(self) -> None: + """Test a campaign going through its full lifecycle.""" + state = CampaignState.DRAFT + + # Start campaign + assert CampaignStateMachine.can_transition(state, CampaignState.ACTIVE) + state = CampaignState.ACTIVE + + # Pause campaign + assert CampaignStateMachine.can_transition(state, CampaignState.PAUSED) + state = CampaignState.PAUSED + + # Resume campaign + assert CampaignStateMachine.can_transition(state, CampaignState.ACTIVE) + state = CampaignState.ACTIVE + + # Complete campaign (system transition) + assert CampaignStateMachine.can_transition(state, CampaignState.COMPLETED) + state = CampaignState.COMPLETED + + # Archive campaign + assert CampaignStateMachine.can_transition(state, CampaignState.ARCHIVED) + + # Unarchive campaign (from archived state) + assert CampaignStateMachine.can_transition( + CampaignState.ARCHIVED, CampaignState.DRAFT + ) + + def test_attack_full_lifecycle(self) -> None: + """Test an attack going through its full lifecycle.""" + state = AttackState.PENDING + + # Start attack + assert AttackStateMachine.can_transition(state, AttackState.RUNNING) + state = AttackState.RUNNING + + # Attack completes + assert AttackStateMachine.can_transition(state, AttackState.COMPLETED) + state = AttackState.COMPLETED + + # Cannot transition from terminal state + assert AttackStateMachine.is_terminal_state(state) + assert len(AttackStateMachine.get_valid_transitions(state)) == 0 + + def test_attack_pause_resume_lifecycle(self) -> None: + """Test an attack being paused and resumed.""" + state = AttackState.PENDING + + # Start attack + assert AttackStateMachine.can_transition(state, AttackState.RUNNING) + state = AttackState.RUNNING + + # Pause attack + assert AttackStateMachine.can_transition(state, AttackState.PAUSED) + state = AttackState.PAUSED + + # Cannot pause again from paused + assert not AttackStateMachine.can_transition(state, AttackState.PAUSED) + + # Resume attack + assert AttackStateMachine.can_transition(state, AttackState.RUNNING) + state = AttackState.RUNNING + + # Attack completes + assert AttackStateMachine.can_transition(state, AttackState.COMPLETED) + # Final state transition validated, lifecycle complete + + def test_attack_failure_retry_cycle(self) -> None: + """Test an attack failing and being retried.""" + # Start with running attack (skip pending for brevity) + state = AttackState.RUNNING + + # Attack fails + assert AttackStateMachine.can_transition(state, AttackState.FAILED) + state = AttackState.FAILED + + # Retry attack + assert AttackStateMachine.can_transition(state, AttackState.PENDING) + state = AttackState.PENDING + + # Start again + assert AttackStateMachine.can_transition(state, AttackState.RUNNING) + + def test_error_conversion_to_rfc9457(self) -> None: + """Test converting InvalidStateTransitionError to RFC9457 problem.""" + try: + CampaignStateMachine.validate_transition( + CampaignState.DRAFT, CampaignState.PAUSED, action="pause" + ) + except InvalidStateTransitionError as e: + problem = InvalidStateTransitionProblem( + from_state=e.from_state.value, + to_state=e.to_state.value, + action=e.action, + entity_type="campaign", + valid_transitions=[ + s.value + for s in CampaignStateMachine.get_valid_transitions(e.from_state) + ], + ) + assert problem.current_state == "draft" + assert problem.attempted_state == "paused" + assert problem.action == "pause" + assert "active" in problem.valid_transitions + assert "archived" in problem.valid_transitions diff --git a/uv.lock b/uv.lock index 1418f8bb..045eb55f 100644 --- a/uv.lock +++ b/uv.lock @@ -2007,7 +2007,7 @@ requires-dist = [ { name = "pyjwt", specifier = ">=2.10.1" }, { name = "python-dotenv", specifier = ">=1.2.1" }, { name = "python-jose", extras = ["cryptography"], specifier = ">=3.5.0" }, - { name = "python-multipart", specifier = ">=0.0.21" }, + { name = "python-multipart", specifier = ">=0.0.22" }, { name = "redis", specifier = ">=7.1.0" }, { name = "sqlalchemy", specifier = ">=2.0.45" }, { name = "starlette", specifier = ">=0.50.0" },