From c66dd80decc86f502edf0aab4e89e93a42dc921d Mon Sep 17 00:00:00 2001 From: POWERFULMOVES Date: Mon, 19 Jan 2026 18:11:29 -0500 Subject: [PATCH 1/4] feat(pmoves-ai): Add PMOVES.AI integration patterns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add CHIT secrets manifest (chit/secrets_manifest_v2.yaml) - Add tier-based environment loading (env.shared, env.tier-agent.sh) - Add health check module (pmoves_health/) - Add NATS service announcer (pmoves_announcer/) - Add service registry client (pmoves_registry/) - Add Docker Compose YAML anchors (docker-compose.pmoves.yml) - Add integration documentation (PMOVES.AI_INTEGRATION.md) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- PMOVES.AI_INTEGRATION.md | 93 +++++++++ chit/secrets_manifest_v2.yaml | 79 ++++++++ docker-compose.pmoves.yml | 144 ++++++++++++++ env.shared | 144 ++++++++++++++ env.tier-agent.sh | 47 +++++ envared | 144 ++++++++++++++ pmoves_announcer/__init__.py | 352 ++++++++++++++++++++++++++++++++++ pmoves_health/__init__.py | 272 ++++++++++++++++++++++++++ pmoves_registry/__init__.py | 273 ++++++++++++++++++++++++++ 9 files changed, 1548 insertions(+) create mode 100644 PMOVES.AI_INTEGRATION.md create mode 100644 chit/secrets_manifest_v2.yaml create mode 100644 docker-compose.pmoves.yml create mode 100644 env.shared create mode 100644 env.tier-agent.sh create mode 100644 envared create mode 100644 pmoves_announcer/__init__.py create mode 100644 pmoves_health/__init__.py create mode 100644 pmoves_registry/__init__.py diff --git a/PMOVES.AI_INTEGRATION.md b/PMOVES.AI_INTEGRATION.md new file mode 100644 index 0000000000..cb5bb3ef39 --- /dev/null +++ b/PMOVES.AI_INTEGRATION.md @@ -0,0 +1,93 @@ +# PMOVES.AI Integration Guide for Archon Agent Service + +## Integration Complete + +The PMOVES.AI integration template has been applied to Archon Agent Service. + +## Next Steps + +### 1. Customize Environment Variables + +Edit the following files with your service-specific values: + +- `env.shared` - Base environment configuration +- `env.tier-agent` - AGENT tier specific configuration +- `chit/secrets_manifest_v2.yaml` - Add your service's required secrets + +### 2. Update Docker Compose + +Add the PMOVES.AI environment anchor to your `docker-compose.yml`: + +```yaml +services: + archon: + <<: [*env-tier-agent, *pmoves-healthcheck] + # Your existing service configuration... +``` + +### 3. Integrate Health Check + +Add the health check endpoint to your service: + +```python +from pmoves_health import add_custom_check, get_health_status + +@app.get("/healthz") +async def health_check(): + return await get_health_status() +``` + +### 4. Add Service Announcement + +Add NATS service announcement to your startup: + +```python +from pmoves_announcer import announce_service + +@app.on_event("startup") +async def startup(): + await announce_service( + slug="archon", + name="Archon Agent Service", + url=f"http://archon:8091", + port=8091, + tier="agent" + ) +``` + +### 5. Test Integration + +```bash +# Test health check +curl http://localhost:8091/healthz + +# Verify environment variables loaded +docker compose exec archon env | grep PMOVES + +# Verify NATS announcement +nats sub "services.announce.v1" +``` + +## Service Details + +- **Name:** Archon Agent Service +- **Slug:** archon +- **Tier:** agent +- **Port:** 8091 +- **Health Check:** http://localhost:8091/healthz +- **NATS Enabled:** False +- **GPU Enabled:** False + +## Files Created + +- `env.shared` - Base PMOVES.AI environment +- `env.tier-agent` - Tier-specific environment +- `chit/secrets_manifest_v2.yaml` - CHIT secrets configuration +- `pmoves_health/` - Health check module +- `pmoves_announcer/` - NATS service announcer +- `pmoves_registry/` - Service registry client +- `docker-compose.pmoves.yml` - PMOVES.AI YAML anchors + +## Support + +For questions or issues, see the PMOVES.AI documentation. diff --git a/chit/secrets_manifest_v2.yaml b/chit/secrets_manifest_v2.yaml new file mode 100644 index 0000000000..c2d1b36718 --- /dev/null +++ b/chit/secrets_manifest_v2.yaml @@ -0,0 +1,79 @@ +# PMOVES.AI CHIT Secrets Manifest Template +# Place at: chit/secrets_manifest_v2.yaml or .chit/secrets_manifest_v2.yaml + +api_version: "2.0" +environment: ${CHIT_ENVIRONMENT:-production} + +# Secrets sources in order of precedence (higher number = higher precedence) +sources: + # 1. Environment variables (base precedence) + - type: env + precedence: 50 + + # 2. CHIT Vault (highest precedence - overrides env vars) + - type: chit_vault + precedence: 100 + endpoint: ${CHIT_VAULT_ENDPOINT:-http://chit-vault:8050} + +# Secrets required by this service +# Replace the example variables below with your service's actual secrets +variables: + # === Service Identity === + - SERVICE_NAME + - SERVICE_SLUG + + # === PMOVES.AI Core Services === + - NATS_URL + + # === LLM Gateway (if using TensorZero) === + # Uncomment if your service uses TensorZero + # - TENSORZERO_URL + # - TENSORZERO_API_KEY + + # === GPU Orchestrator (if applicable) === + # Uncomment for GPU services + # - GPU_ORCHESTRATOR_URL + + # === Data Services (as applicable) === + # - QDRANT_URL + # - QDRANT_API_KEY + # - NEO4J_URL + # - NEO4J_USERNAME + # - NEO4J_PASSWORD + # - MEILISEARCH_URL + # - MEILISEARCH_API_KEY + # - MINIO_ENDPOINT + # - MINIO_ACCESS_KEY + # - MINIO_SECRET_KEY + + # === Supabase (if applicable) === + # - SUPABASE_URL + # - SUPABASE_ANON_KEY + # - SUPABASE_SERVICE_KEY + + # === Service-Specific Secrets === + # Add your service's required secrets below + # - EXAMPLE_API_KEY + # - EXAMPLE_DATABASE_URL + +# Optional: define secret groups for different environments +groups: + development: + required: + - SERVICE_NAME + - NATS_URL + optional: + - LOG_LEVEL + + production: + required: + - SERVICE_NAME + - SERVICE_SLUG + - NATS_URL + optional: + - LOG_LEVEL + +# Validation rules +validation: + strict: false # Set to true to fail on missing optional secrets + fail_on_missing_required: true diff --git a/docker-compose.pmoves.yml b/docker-compose.pmoves.yml new file mode 100644 index 0000000000..e40b65a2aa --- /dev/null +++ b/docker-compose.pmoves.yml @@ -0,0 +1,144 @@ +# PMOVES.AI Docker Compose YAML Anchors Template +# +# These YAML anchors provide standardized environment loading for PMOVES services. +# Include this file or copy the anchors into your docker-compose.yml. +# +# Usage: +# services: +# my-service: +# <<: *env-tier-api +# environment: +# - SERVICE_NAME=my-service +# - SERVICE_SPECIFIC_VAR=value + +version: "3.8" + +# PMOVES.AI Environment Loading Anchors +# These anchors provide tier-based environment file loading +x-pmoves-env: &pmoves-env-base + env_file: + - env.shared # Base PMOVES.AI configuration + environment: + - PMOVES_ENV=${PMOVES_ENV:-production} + - TIER=${TIER} + - NATS_URL=${NATS_URL:-nats://nats:4222} + - TENSORZERO_URL=${TENSORZERO_URL:-http://tensorzero-gateway:3030} + +# API Tier Environment +x-env-tier-api: &env-tier-api + <<: *pmoves-env-base + env_file: + - env.shared + - env.tier-api # API tier specific configuration + environment: + - TIER=api + - MAX_CONCURRENT_REQUESTS=${MAX_CONCURRENT_REQUESTS:-100} + - RATE_LIMIT_ENABLED=${RATE_LIMIT_ENABLED:-true} + +# Agent Tier Environment +x-env-tier-agent: &env-tier-agent + <<: *pmoves-env-base + env_file: + - env.shared + - env.tier-agent # Agent tier specific configuration + environment: + - TIER=agent + - MAX_CONCURRENT_AGENTS=${MAX_CONCURRENT_AGENTS:-50} + - MCP_ENABLED=${MCP_ENABLED:-true} + +# Worker Tier Environment +x-env-tier-worker: &env-tier-worker + <<: *pmoves-env-base + env_file: + - env.shared + - env.tier-worker # Worker tier specific configuration + environment: + - TIER=worker + - MAX_CONCURRENT_JOBS=${MAX_CONCURRENT_JOBS:-10} + - WORKER_POOL_SIZE=${WORKER_POOL_SIZE:-4} + +# Data Tier Environment +x-env-tier-data: &env-tier-data + <<: *pmoves-env-base + env_file: + - env.shared + - env.tier-data # Data tier specific configuration + environment: + - TIER=data + - MAX_CONNECTIONS=${MAX_CONNECTIONS:-100} + +# LLM Tier Environment +x-env-tier-llm: &env-tier-llm + <<: *pmoves-env-base + env_file: + - env.shared + - env.tier-llm # LLM tier specific configuration + environment: + - TIER=llm + - MAX_CONCURRENT_REQUESTS=${MAX_CONCURRENT_REQUESTS:-50} + +# Media Tier Environment +x-env-tier-media: &env-tier-media + <<: *pmoves-env-base + env_file: + - env.shared + - env.tier-media # Media tier specific configuration + environment: + - TIER=media + - GPU_ENABLED=${GPU_ENABLED:-true} + - MAX_CONCURRENT_JOBS=${MAX_CONCURRENT_JOBS:-4} + +# UI Tier Environment +x-env-tier-ui: &env-tier-ui + <<: *pmoves-env-base + env_file: + - env.shared + - env.tier-ui # UI tier specific configuration + environment: + - TIER=ui + +# Health check template +x-pmoves-healthcheck: &pmoves-healthcheck + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8080/healthz"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 10s + +# GPU resource template +x-pmoves-gpu: &pmoves-gpu-resource + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + +# Service labels for Prometheus discovery +x-pmoves-labels: &pmoves-labels + labels: + - "pmoves.service=true" + - "prometheus.io/scrape=true" + - "prometheus.io.port=9090" + - "prometheus.io.path=/metrics" + +# Example service definition using the templates: +# +# services: +# my-api-service: +# <<: [*env-tier-api, *pmoves-healthcheck, *pmoves-labels] +# image: ghcr.io/powerfulmoves/my-service:latest +# ports: +# - "8080:8080" +# environment: +# - SERVICE_NAME=my-api-service +# - SERVICE_PORT=8080 +# +# my-gpu-worker: +# <<: [*env-tier-worker, *pmoves-gpu-resource, *pmoves-healthcheck] +# image: ghcr.io/powerfulmoves/my-gpu-worker:latest +# environment: +# - SERVICE_NAME=my-gpu-worker +# - CUDA_VISIBLE_DEVICES=0 diff --git a/env.shared b/env.shared new file mode 100644 index 0000000000..8cb789877e --- /dev/null +++ b/env.shared @@ -0,0 +1,144 @@ +# PMOVES.AI Tier-Based Environment Configuration +# Base Environment File (env.shared) +# This file contains common environment variables for all tiers +# +# Usage: Source this file first, then source your tier-specific file +# +# Place at: env.shared in your submodule root + +# ============================================================================ +# PMOVES.AI Environment Configuration +# ============================================================================ + +# Environment identifier +export PMOVES_ENV=${PMOVES_ENV:-production} +export TIER=${TIER:-api} + +# Service identity (override in service-specific env files) +export SERVICE_NAME=${SERVICE_NAME:-your-service-name} +export SERVICE_SLUG=${SERVICE_SLUG:-your-service-slug} + +# ============================================================================ +# Service Discovery (NATS) +# ============================================================================ + +export NATS_URL=${NATS_URL:-nats://nats:4222} +export NATS_JETSTREAM=${NATS_JETSTREAM:-true} +export NATS_SUBJECT_PREFIX=${NATS_SUBJECT_PREFIX:-pmoves} + +# ============================================================================ +# LLM Gateway (TensorZero) +# ============================================================================ + +export TENSORZERO_URL=${TENSORZERO_URL:-http://tensorzero-gateway:3030} +export TENSORZERO_CLICKHOUSE_URL=${TENSORZERO_CLICKHOUSE_URL:-http://tensorzero-clickhouse:8123} + +# ============================================================================ +# GPU Orchestrator (if applicable) +# ============================================================================ + +export GPU_ORCHESTRATOR_URL=${GPU_ORCHESTRATOR_URL:-http://gpu-orchestrator:8050} + +# ============================================================================ +# Data Services +# ============================================================================ + +# Qdrant (Vector Database) +export QDRANT_URL=${QDRANT_URL:-http://qdrant:6333} +export QDRANT_API_KEY=${QDRANT_API_KEY:-} + +# Neo4j (Graph Database) +export NEO4J_URL=${NEO4J_URL:-http://neo4j:7474} +export NEO4J_USERNAME=${NEO4J_USERNAME:-neo4j} +export NEO4J_PASSWORD=${NEO4J_PASSWORD:-neo4j} + +# Meilisearch (Full-Text Search) +export MEILISEARCH_URL=${MEILISEARCH_URL:-http://meilisearch:7700} +export MEILISEARCH_API_KEY=${MEILISEARCH_API_KEY:-} + +# MinIO (Object Storage) +export MINIO_ENDPOINT=${MINIO_ENDPOINT:-http://minio:9000} +export MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-minioadmin} +export MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-minioadmin} +export MINIO_REGION=${MINIO_REGION:-us-east-1} +export MINIO_USE_SSL=${MINIO_USE_SSL:-false} + +# ============================================================================ +# Supabase (PostgREST + Database) +# ============================================================================ + +export SUPABASE_URL=${SUPABASE_URL:-http://supabase_kong_PMOVES.AI:8000} +export SUPABASE_ANON_KEY=${SUPABASE_ANON_KEY:-} +export SUPABASE_SERVICE_KEY=${SUPABASE_SERVICE_KEY:-} +export SUPABASE_DB_URL=${SUPABASE_DB_URL:-postgresql://postgres:${POSTGRES_PASSWORD}@postgres:5432/postgres} + +# ============================================================================ +# Monitoring & Observability +# ============================================================================ + +export PROMETHEUS_URL=${PROMETHEUS_URL:-http://prometheus:9090} +export PROMETHEUS_PUSHGATEWAY=${PROMETHEUS_PUSHGATEWAY:-http://prometheus:9091} +export GRAFANA_URL=${GRAFANA_URL:-http://grafana:3000} +export LOKI_URL=${LOKI_URL:-http://loki:3100} + +# Metrics configuration +export METRICS_ENABLED=${METRICS_ENABLED:-true} +export METRICS_PATH=${METRICS_PATH:-/metrics} +export METRICS_PORT=${METRICS_PORT:-9090} + +# Logging +export LOG_LEVEL=${LOG_LEVEL:-INFO} +export LOG_FORMAT=${LOG_FORMAT:-json} + +# ============================================================================ +# Agent Zero (if applicable) +# ============================================================================ + +export AGENT_ZERO_URL=${AGENT_ZERO_URL:-http://agent-zero:8080} +export AGENT_ZERO_MCP_URL=${AGENT_ZERO_MCP_URL:-http://agent-zero:8080} +export AGENT_ZERO_MCP_PATH=${AGENT_ZERO_MCP_PATH:-/mcp/*} + +# ============================================================================ +# Hi-RAG Gateway (if applicable) +# ============================================================================ + +export HIRAG_V2_URL=${HIRAG_V2_URL:-http://hi-rag-gateway-v2:8086} +export HIRAG_V1_URL=${HIRAG_V1_URL:-http://hi-rag-gateway:8089} + +# ============================================================================ +# Archon (if applicable) +# ============================================================================ + +export ARCHON_URL=${ARCHON_URL:-http://archon:8091} + +# ============================================================================ +# Network Configuration +# ============================================================================ + +export HTTP_PROXY=${HTTP_PROXY:-} +export HTTPS_PROXY=${HTTPS_PROXY:-} +export NO_PROXY=${NO_PROXY:-localhost,127.0.0.1,.local,.internal} + +# ============================================================================ +# Security +# ============================================================================ + +export CHIT_ENVIRONMENT=${CHIT_ENVIRONMENT:-${PMOVES_ENV}} +export CHIT_VAULT_ENDPOINT=${CHIT_VAULT_ENDPOINT:-http://chit-vault:8050} + +# ============================================================================ +# Timeouts & Limits +# ============================================================================ + +export DEFAULT_TIMEOUT=${DEFAULT_TIMEOUT:-30} +export REQUEST_TIMEOUT=${REQUEST_TIMEOUT:-60} +export CONNECT_TIMEOUT=${CONNECT_TIMEOUT:-10} +export MAX_RETRIES=${MAX_RETRIES:-3} + +# ============================================================================ +# Health Check Configuration +# ============================================================================ + +export HEALTH_CHECK_INTERVAL=${HEALTH_CHECK_INTERVAL:-30} +export HEALTH_CHECK_TIMEOUT=${HEALTH_CHECK_TIMEOUT:-5} +export HEALTH_CHECK_PATH=${HEALTH_CHECK_PATH:-/healthz} diff --git a/env.tier-agent.sh b/env.tier-agent.sh new file mode 100644 index 0000000000..a1f246289c --- /dev/null +++ b/env.tier-agent.sh @@ -0,0 +1,47 @@ +# PMOVES.AI Tier Environment: Agent +# For Agent and Orchestrator services (Agent Zero, Archon, etc.) +# Source after env.shared: source env.shared && source env.tier-agent.sh + +# ============================================================================ +# Agent Tier Configuration +# ============================================================================ + +export TIER=agent + +# Agent limits +export MAX_CONCURRENT_AGENTS=${MAX_CONCURRENT_AGENTS:-50} +export MAX_TASKS_PER_AGENT=${MAX_TASKS_PER_AGENT:-10} +export AGENT_TIMEOUT_MS=${AGENT_TIMEOUT_MS:-300000} # 5 minutes +export AGENT_IDLE_TIMEOUT_MS=${AGENT_IDLE_TIMEOUT_MS:-60000} # 1 minute + +# Task queue configuration +export TASK_QUEUE_SIZE=${TASK_QUEUE_SIZE:-1000} +export TASK_RETRY_MAX=${TASK_RETRY_MAX:-3} +export TASK_RETRY_DELAY_MS=${TASK_RETRY_DELAY_MS:-1000} + +# Tool execution +export TOOL_TIMEOUT_MS=${TOOL_TIMEOUT_MS:-60000} # 1 minute +export MAX_TOOL_OUTPUT_SIZE=${MAX_TOOL_OUTPUT_SIZE:-1048576} # 1MB + +# MCP (Model Context Protocol) configuration +export MCP_ENABLED=${MCP_ENABLED:-true} +export MCP_TIMEOUT_MS=${MCP_TIMEOUT_MS:-30000} +export MCP_MAX_MESSAGE_SIZE=${MCP_MAX_MESSAGE_SIZE:-10485760} # 10MB + +# Agent state persistence +export STATE_PERSISTENCE_ENABLED=${STATE_PERSISTENCE_ENABLED:-true} +export STATE_BACKEND=${STATE_BACKEND:-supabase} # supabase | memory | file + +# LLM configuration for agents +export DEFAULT_MODEL=${DEFAULT_MODEL:-claude-sonnet-4-5} +export DEFAULT_TEMPERATURE=${DEFAULT_TEMPERATURE:-0.7} +export DEFAULT_MAX_TOKENS=${DEFAULT_MAX_TOKENS:-4096} + +# Prompt management +export PROMPT_CACHE_ENABLED=${PROMPT_CACHE_ENABLED:-true} +export PROMPT_CACHE_SIZE=${PROMPT_CACHE_SIZE:-1000} +export PROMPT_CACHE_TTL=${PROMPT_CACHE_TTL:-3600} # 1 hour + +# Archon-specific (if using Archon) +export ARCHON_PROMPT_BACKEND=${ARCHON_PROMPT_BACKEND:-supabase} +export ARCHON_FORM_SCHEMA_PATH=${ARCHON_FORM_SCHEMA_PATH:-/forms} diff --git a/envared b/envared new file mode 100644 index 0000000000..8cb789877e --- /dev/null +++ b/envared @@ -0,0 +1,144 @@ +# PMOVES.AI Tier-Based Environment Configuration +# Base Environment File (env.shared) +# This file contains common environment variables for all tiers +# +# Usage: Source this file first, then source your tier-specific file +# +# Place at: env.shared in your submodule root + +# ============================================================================ +# PMOVES.AI Environment Configuration +# ============================================================================ + +# Environment identifier +export PMOVES_ENV=${PMOVES_ENV:-production} +export TIER=${TIER:-api} + +# Service identity (override in service-specific env files) +export SERVICE_NAME=${SERVICE_NAME:-your-service-name} +export SERVICE_SLUG=${SERVICE_SLUG:-your-service-slug} + +# ============================================================================ +# Service Discovery (NATS) +# ============================================================================ + +export NATS_URL=${NATS_URL:-nats://nats:4222} +export NATS_JETSTREAM=${NATS_JETSTREAM:-true} +export NATS_SUBJECT_PREFIX=${NATS_SUBJECT_PREFIX:-pmoves} + +# ============================================================================ +# LLM Gateway (TensorZero) +# ============================================================================ + +export TENSORZERO_URL=${TENSORZERO_URL:-http://tensorzero-gateway:3030} +export TENSORZERO_CLICKHOUSE_URL=${TENSORZERO_CLICKHOUSE_URL:-http://tensorzero-clickhouse:8123} + +# ============================================================================ +# GPU Orchestrator (if applicable) +# ============================================================================ + +export GPU_ORCHESTRATOR_URL=${GPU_ORCHESTRATOR_URL:-http://gpu-orchestrator:8050} + +# ============================================================================ +# Data Services +# ============================================================================ + +# Qdrant (Vector Database) +export QDRANT_URL=${QDRANT_URL:-http://qdrant:6333} +export QDRANT_API_KEY=${QDRANT_API_KEY:-} + +# Neo4j (Graph Database) +export NEO4J_URL=${NEO4J_URL:-http://neo4j:7474} +export NEO4J_USERNAME=${NEO4J_USERNAME:-neo4j} +export NEO4J_PASSWORD=${NEO4J_PASSWORD:-neo4j} + +# Meilisearch (Full-Text Search) +export MEILISEARCH_URL=${MEILISEARCH_URL:-http://meilisearch:7700} +export MEILISEARCH_API_KEY=${MEILISEARCH_API_KEY:-} + +# MinIO (Object Storage) +export MINIO_ENDPOINT=${MINIO_ENDPOINT:-http://minio:9000} +export MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-minioadmin} +export MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-minioadmin} +export MINIO_REGION=${MINIO_REGION:-us-east-1} +export MINIO_USE_SSL=${MINIO_USE_SSL:-false} + +# ============================================================================ +# Supabase (PostgREST + Database) +# ============================================================================ + +export SUPABASE_URL=${SUPABASE_URL:-http://supabase_kong_PMOVES.AI:8000} +export SUPABASE_ANON_KEY=${SUPABASE_ANON_KEY:-} +export SUPABASE_SERVICE_KEY=${SUPABASE_SERVICE_KEY:-} +export SUPABASE_DB_URL=${SUPABASE_DB_URL:-postgresql://postgres:${POSTGRES_PASSWORD}@postgres:5432/postgres} + +# ============================================================================ +# Monitoring & Observability +# ============================================================================ + +export PROMETHEUS_URL=${PROMETHEUS_URL:-http://prometheus:9090} +export PROMETHEUS_PUSHGATEWAY=${PROMETHEUS_PUSHGATEWAY:-http://prometheus:9091} +export GRAFANA_URL=${GRAFANA_URL:-http://grafana:3000} +export LOKI_URL=${LOKI_URL:-http://loki:3100} + +# Metrics configuration +export METRICS_ENABLED=${METRICS_ENABLED:-true} +export METRICS_PATH=${METRICS_PATH:-/metrics} +export METRICS_PORT=${METRICS_PORT:-9090} + +# Logging +export LOG_LEVEL=${LOG_LEVEL:-INFO} +export LOG_FORMAT=${LOG_FORMAT:-json} + +# ============================================================================ +# Agent Zero (if applicable) +# ============================================================================ + +export AGENT_ZERO_URL=${AGENT_ZERO_URL:-http://agent-zero:8080} +export AGENT_ZERO_MCP_URL=${AGENT_ZERO_MCP_URL:-http://agent-zero:8080} +export AGENT_ZERO_MCP_PATH=${AGENT_ZERO_MCP_PATH:-/mcp/*} + +# ============================================================================ +# Hi-RAG Gateway (if applicable) +# ============================================================================ + +export HIRAG_V2_URL=${HIRAG_V2_URL:-http://hi-rag-gateway-v2:8086} +export HIRAG_V1_URL=${HIRAG_V1_URL:-http://hi-rag-gateway:8089} + +# ============================================================================ +# Archon (if applicable) +# ============================================================================ + +export ARCHON_URL=${ARCHON_URL:-http://archon:8091} + +# ============================================================================ +# Network Configuration +# ============================================================================ + +export HTTP_PROXY=${HTTP_PROXY:-} +export HTTPS_PROXY=${HTTPS_PROXY:-} +export NO_PROXY=${NO_PROXY:-localhost,127.0.0.1,.local,.internal} + +# ============================================================================ +# Security +# ============================================================================ + +export CHIT_ENVIRONMENT=${CHIT_ENVIRONMENT:-${PMOVES_ENV}} +export CHIT_VAULT_ENDPOINT=${CHIT_VAULT_ENDPOINT:-http://chit-vault:8050} + +# ============================================================================ +# Timeouts & Limits +# ============================================================================ + +export DEFAULT_TIMEOUT=${DEFAULT_TIMEOUT:-30} +export REQUEST_TIMEOUT=${REQUEST_TIMEOUT:-60} +export CONNECT_TIMEOUT=${CONNECT_TIMEOUT:-10} +export MAX_RETRIES=${MAX_RETRIES:-3} + +# ============================================================================ +# Health Check Configuration +# ============================================================================ + +export HEALTH_CHECK_INTERVAL=${HEALTH_CHECK_INTERVAL:-30} +export HEALTH_CHECK_TIMEOUT=${HEALTH_CHECK_TIMEOUT:-5} +export HEALTH_CHECK_PATH=${HEALTH_CHECK_PATH:-/healthz} diff --git a/pmoves_announcer/__init__.py b/pmoves_announcer/__init__.py new file mode 100644 index 0000000000..5b4774442b --- /dev/null +++ b/pmoves_announcer/__init__.py @@ -0,0 +1,352 @@ +""" +PMOVES.AI Service Announcer Template + +NATS service discovery announcer for all PMOVES services. +Publishes service announcements to the services.announce.v1 subject. + +Usage: + from service_announcer import ServiceAnnouncer, announce_service + + # Create announcement + announcer = ServiceAnnouncer( + slug="my-service", + name="My Service", + url="http://my-service:8080", + port=8080, + tier="api" + ) + + # Announce on startup + await announcer.announce() + + # Or use the convenience function + await announce_service( + slug="my-service", + name="My Service", + url="http://my-service:8080", + port=8080, + tier="api" + ) +""" + +import asyncio +import json +import os +from dataclasses import dataclass, field, asdict +from datetime import datetime +from typing import Any, Dict, List, Optional +from enum import Enum + + +class ServiceTier(str, Enum): + """PMOVES service tiers.""" + DATA = "data" + API = "api" + LLM = "llm" + MEDIA = "media" + AGENT = "agent" + WORKER = "worker" + APP = "app" + UI = "ui" + + +@dataclass +class ServiceAnnouncement: + """ + Service announcement message format for NATS. + + Services publish announcements on the `services.announce.v1` subject + to notify other services of their availability and configuration. + """ + + slug: str + name: str + url: str + health_check: str + tier: ServiceTier + port: int + timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat()) + metadata: Dict[str, Any] = field(default_factory=dict) + + # NATS subject for announcements + SUBJECT: str = "services.announce.v1" + + def to_json(self) -> str: + """Convert to JSON for NATS publishing.""" + data = { + "slug": self.slug, + "name": self.name, + "url": self.url, + "health_check": self.health_check, + "tier": self.tier.value if isinstance(self.tier, ServiceTier) else self.tier, + "port": self.port, + "timestamp": self.timestamp, + "metadata": self.metadata, + } + return json.dumps(data) + + @classmethod + def from_json(cls, data: str | dict) -> "ServiceAnnouncement": + """Parse from JSON message.""" + if isinstance(data, str): + data = json.loads(data) + return cls( + slug=data["slug"], + name=data["name"], + url=data["url"], + health_check=data["health_check"], + tier=ServiceTier(data["tier"]), + port=data["port"], + timestamp=data.get("timestamp", datetime.utcnow().isoformat()), + metadata=data.get("metadata", {}), + ) + + +class ServiceAnnouncer: + """ + NATS service announcer for PMOVES services. + + Handles announcing service availability to the PMOVES service mesh. + """ + + def __init__( + self, + slug: str, + name: str, + url: str, + port: int, + tier: ServiceTier | str, + health_check: str = None, + nats_url: str = None, + metadata: Dict[str, Any] = None, + ): + """ + Initialize the service announcer. + + Args: + slug: Unique service identifier (e.g., "hirag-v2") + name: Human-readable service name + url: Full service URL + port: Service port number + tier: Service tier (api, agent, worker, etc.) + health_check: Health check URL (defaults to url + /healthz) + nats_url: NATS server URL (defaults to NATS_URL env var) + metadata: Additional service metadata + """ + self.slug = slug + self.name = name + self.url = url + self.port = port + + if isinstance(tier, str): + tier = ServiceTier(tier.lower()) + self.tier = tier + + self.health_check = health_check or f"{url.rstrip('/')}/healthz" + self.nats_url = nats_url or os.getenv("NATS_URL", "nats://nats:4222") + self.metadata = metadata or {} + + def create_announcement(self) -> ServiceAnnouncement: + """Create a service announcement object.""" + return ServiceAnnouncement( + slug=self.slug, + name=self.name, + url=self.url, + health_check=self.health_check, + tier=self.tier, + port=self.port, + timestamp=datetime.utcnow().isoformat(), + metadata=self.metadata, + ) + + async def announce(self) -> bool: + """ + Publish service announcement to NATS. + + Returns: + True if announcement published successfully + """ + try: + from nats.aio.client import Client as NATS + + announcement = self.create_announcement() + + nc = await NATS.connect(self.nats_url, connect_timeout=5) + await nc.publish( + ServiceAnnouncement.SUBJECT, + announcement.to_json().encode(), + ) + await nc.flush() + await nc.close() + + return True + except Exception as e: + print(f"Failed to announce service: {e}") + return False + + async def announce_with_retry( + self, max_retries: int = 3, delay: float = 1.0 + ) -> bool: + """ + Announce service with retry logic. + + Args: + max_retries: Maximum number of retry attempts + delay: Delay between retries in seconds + + Returns: + True if announcement published successfully + """ + for attempt in range(max_retries): + if await self.announce(): + return True + if attempt < max_retries - 1: + await asyncio.sleep(delay * (2**attempt)) # Exponential backoff + return False + + +async def announce_service( + slug: str, + name: str, + url: str, + port: int, + tier: ServiceTier | str, + health_check: str = None, + nats_url: str = None, + metadata: Dict[str, Any] = None, +) -> bool: + """ + Convenience function to announce a service. + + Args: + slug: Unique service identifier + name: Human-readable service name + url: Full service URL + port: Service port + tier: Service tier + health_check: Health check URL + nats_url: NATS server URL + metadata: Additional metadata + + Returns: + True if announcement successful + + Example: + await announce_service( + slug="hirag-v2", + name="Hi-RAG Gateway v2", + url="http://hi-rag-gateway-v2:8086", + port=8086, + tier="api", + metadata={"gpu_port": 8087} + ) + """ + announcer = ServiceAnnouncer( + slug=slug, + name=name, + url=url, + port=port, + tier=tier, + health_check=health_check, + nats_url=nats_url, + metadata=metadata, + ) + return await announcer.announce() + + +class BackgroundAnnouncer: + """ + Background service announcer that announces periodically. + + Useful for services that want to periodically re-announce themselves. + """ + + def __init__( + self, + announcer: ServiceAnnouncer, + interval: float = 60.0, + ): + """ + Initialize background announcer. + + Args: + announcer: Service announcer to use + interval: Announcement interval in seconds + """ + self.announcer = announcer + self.interval = interval + self._running = False + self._task: Optional[asyncio.Task] = None + + async def _announce_loop(self): + """Internal announcement loop.""" + while self._running: + await self.announcer.announce() + await asyncio.sleep(self.interval) + + async def start(self): + """Start background announcements.""" + if not self._running: + self._running = True + self._task = asyncio.create_task(self._announce_loop()) + # Initial announcement + await self.announcer.announce() + + async def stop(self): + """Stop background announcements.""" + if self._running: + self._running = False + if self._task: + self._task.cancel() + try: + await self._task + except asyncio.CancelledError: + pass + + +# Example usage and testing +if __name__ == "__main__": + async def main(): + """Example usage of service announcer.""" + + # Example 1: Simple announcement + await announce_service( + slug="example-service", + name="Example Service", + url="http://localhost:8080", + port=8080, + tier="api", + ) + print("Service announced!") + + # Example 2: With metadata + await announce_service( + slug="hirag-v2", + name="Hi-RAG Gateway v2", + url="http://hi-rag-gateway-v2:8086", + port=8086, + tier="api", + metadata={ + "gpu_port": 8087, + "features": ["vector", "graph", "fulltext"], + "rerank_enabled": True, + }, + ) + + # Example 3: Background announcer + announcer = ServiceAnnouncer( + slug="bg-service", + name="Background Service", + url="http://localhost:8081", + port=8081, + tier="worker", + ) + bg = BackgroundAnnouncer(announcer, interval=30) + await bg.start() + print("Background announcer started (30s interval)") + + # Keep running... + await asyncio.sleep(10) + await bg.stop() + + asyncio.run(main()) diff --git a/pmoves_health/__init__.py b/pmoves_health/__init__.py new file mode 100644 index 0000000000..abfdfc52a6 --- /dev/null +++ b/pmoves_health/__init__.py @@ -0,0 +1,272 @@ +""" +PMOVES.AI Health Endpoint Template + +Standard health check endpoint for all PMOVES services. +Follows PMOVES.AI conventions for service health monitoring. + +Usage: + from pmoves_health import create_health_app + app = create_health_app() + + # Or add to existing FastAPI app + from pmoves_health import health_check_router + app.include_router(health_check_router) +""" + +from datetime import datetime +from functools import wraps +from typing import Any, Callable, Dict, List +import os +import asyncio + +try: + from fastapi import APIRouter, HTTPException + from fastapi.responses import JSONResponse + FASTAPI_AVAILABLE = True +except ImportError: + FASTAPI_AVAILABLE = False + + +# Health check configuration +HEALTH_CHECK_PATH = "/healthz" +HEALTH_CHECK_TIMEOUT = 5.0 + + +class HealthStatus: + """Health status constants.""" + HEALTHY = "healthy" + DEGRADED = "degraded" + UNHEALTHY = "unhealthy" + + +class DependencyCheck: + """Base class for dependency health checks.""" + + def __init__(self, name: str, required: bool = True): + self.name = name + self.required = required + + async def check(self) -> bool: + """Check if dependency is healthy. Override in subclass.""" + raise NotImplementedError + + def status_key(self) -> str: + """Return the status key for this check.""" + return f"{self.name.lower().replace(' ', '_')}_connected" + + +class DatabaseCheck(DependencyCheck): + """Health check for database connections.""" + + def __init__(self, connect_fn: Callable, **kwargs): + super().__init__("database", kwargs.get("required", True)) + self.connect_fn = connect_fn + + async def check(self) -> bool: + try: + return await asyncio.to_thread(self.connect_fn) + except Exception: + return False + + +class HTTPCheck(DependencyCheck): + """Health check for HTTP endpoints.""" + + def __init__(self, url: str, **kwargs): + name = kwargs.get("name", "service") + super().__init__(name, kwargs.get("required", True)) + self.url = url + + async def check(self) -> bool: + try: + import httpx + async with httpx.AsyncClient(timeout=2.0) as client: + response = await client.get(self.url) + return response.status_code == 200 + except Exception: + return False + + +class NATSCheck(DependencyCheck): + """Health check for NATS connection.""" + + def __init__(self, nats_url: str, **kwargs): + super().__init__("nats", kwargs.get("required", True)) + self.nats_url = nats_url + + async def check(self) -> bool: + try: + from nats.aio.client import Client as NATS + nc = await NATS.connect(self.nats_url, connect_timeout=2) + await nc.close() + return True + except Exception: + return False + + +class HealthChecker: + """Health checker with multiple dependency checks.""" + + def __init__(self, service_name: str = None): + self.service_name = service_name or os.getenv("SERVICE_NAME", "unknown") + self.checks: List[DependencyCheck] = [] + self.custom_checks: Dict[str, Callable] = {} + + def add_check(self, check: DependencyCheck) -> None: + """Add a dependency check.""" + self.checks.append(check) + + def add_custom_check(self, name: str, check_fn: Callable) -> None: + """Add a custom health check function.""" + self.custom_checks[name] = check_fn + + def database(self, connect_fn: Callable) -> None: + """Add a database health check.""" + self.add_check(DatabaseCheck(connect_fn)) + + def http(self, url: str, name: str = "service") -> None: + """Add an HTTP endpoint health check.""" + self.add_check(HTTPCheck(url, name=name)) + + def nats(self, nats_url: str) -> None: + """Add a NATS health check.""" + self.add_check(NATSCheck(nats_url)) + + async def check_all(self) -> Dict[str, Any]: + """Run all health checks and return status.""" + results = { + "status": HealthStatus.HEALTHY, + "service": self.service_name, + "timestamp": datetime.utcnow().isoformat(), + } + + all_healthy = True + some_degraded = False + + # Run dependency checks + for check in self.checks: + try: + is_healthy = await check.check() + results[check.status_key()] = is_healthy + + if not is_healthy: + if check.required: + all_healthy = False + else: + some_degraded = True + except Exception as e: + results[check.status_key()] = False + if check.required: + all_healthy = False + else: + some_degraded = True + + # Run custom checks + for name, check_fn in self.custom_checks.items(): + try: + result = await check_fn() if asyncio.iscoroutinefunction(check_fn) else check_fn() + results[name] = bool(result) + if not result: + all_healthy = False + except Exception: + results[name] = False + all_healthy = False + + # Determine overall status + if not all_healthy: + results["status"] = HealthStatus.UNHEALTHY + elif some_degraded: + results["status"] = HealthStatus.DEGRADED + + return results + + +# Global health checker instance +_health_checker = HealthChecker() + + +def health_check(checks: List[DependencyCheck] = None): + """Decorator to add health checks to a function.""" + def decorator(func: Callable): + @wraps(func) + async def wrapper(*args, **kwargs): + checker = _health_checker + if checks: + for check in checks: + checker.add_check(check) + return await func(*args, **kwargs) + return wrapper + return decorator + + +def add_database_check(connect_fn: Callable) -> None: + """Add a database health check.""" + _health_checker.database(connect_fn) + + +def add_http_check(url: str, name: str = "service") -> None: + """Add an HTTP endpoint health check.""" + _health_checker.http(url, name) + + +def add_nats_check(nats_url: str) -> None: + """Add a NATS health check.""" + _health_checker.nats(nats_url) + + +def add_custom_check(name: str, check_fn: Callable) -> None: + """Add a custom health check function.""" + _health_checker.add_custom_check(name, check_fn) + + +async def get_health_status() -> Dict[str, Any]: + """Get current health status.""" + return await _health_checker.check_all() + + +if FASTAPI_AVAILABLE: + from fastapi import APIRouter + + health_check_router = APIRouter() + + @health_check_router.get(HEALTH_CHECK_PATH) + async def healthz(): + """Standard health check endpoint.""" + return await get_health_status() + + def create_health_app(service_name: str = None) -> "FastAPI": + """Create a minimal FastAPI app with health check.""" + from fastapi import FastAPI + app = FastAPI(title=service_name or "PMOVES Service") + app.include_router(health_check_router) + return app +else: + def create_health_app(service_name: str = None): + """Raise error if FastAPI not available.""" + raise ImportError("FastAPI is required to create health app") + + +# Example usage +if __name__ == "__main__": + async def example_usage(): + """Example of how to use the health checker.""" + + # Create a health checker + checker = HealthChecker("example-service") + + # Add checks + checker.nats("nats://nats:4222") + checker.http("http://supabase:8000", name="supabase") + + # Add custom check + async def check_memory(): + import psutil + return psutil.virtual_memory().percent < 90 + + checker.add_custom_check("memory_ok", check_memory) + + # Run checks + status = await checker.check_all() + print(status) + + asyncio.run(example_usage()) diff --git a/pmoves_registry/__init__.py b/pmoves_registry/__init__.py new file mode 100644 index 0000000000..231f1036bf --- /dev/null +++ b/pmoves_registry/__init__.py @@ -0,0 +1,273 @@ +""" +PMOVES.AI Service Registry Integration Template + +Service discovery using the PMOVES service registry with fallback chain: +1. Environment variables (static overrides) +2. Supabase service catalog (dynamic, runtime) +3. NATS service announcements (real-time, cached) +4. Docker DNS (development fallback) + +Usage: + from service_registry import get_service_url, ServiceInfo + + # Simple URL resolution + url = await get_service_url("hirag-v2") + + # Get full service info + info = await get_service_info("hirag-v2") + print(f"{info.name}: {info.health_check_url}") +""" + +import asyncio +import os +from dataclasses import dataclass, field +from enum import Enum +from typing import Any, Optional + + +class ServiceTier(str, Enum): + """PMOVES service tiers.""" + DATA = "data" + API = "api" + LLM = "llm" + MEDIA = "media" + AGENT = "agent" + WORKER = "worker" + APP = "app" + UI = "ui" + + +@dataclass(frozen=True) +class ServiceInfo: + """ + Immutable service metadata from the service catalog. + + Attributes: + slug: Unique service identifier (e.g., "hirag-v2", "agent-zero") + name: Human-readable service name + description: Service description + health_check_url: Full URL to health check endpoint + base_url: Base URL of the service + default_port: Default container port + tier: Service tier classification + metadata: Extended metadata as JSON + """ + + slug: str + name: str + description: str + health_check_url: str + default_port: int | None + tier: ServiceTier + metadata: dict[str, Any] = field(default_factory=dict) + + @property + def base_url(self) -> str: + """Extract base URL from health_check_url.""" + url = self.health_check_url + for suffix in ("/healthz", "/health", "/metrics", "/ping"): + if url.endswith(suffix): + url = url[: -len(suffix)] + break + return url.rstrip("/") + + +class ServiceNotFoundError(Exception): + """Raised when a service cannot be found.""" + + def __init__(self, slug: str, message: str | None = None): + self.slug = slug + super().__init__(message or f"Service '{slug}' not found in service catalog") + + +def _get_env_url(slug: str) -> str | None: + """ + Check for environment variable override. + + Environment variables are checked in the following order: + 1. _URL (e.g., HIRAG_V2_URL) + 2. _URL (e.g., HIRAG-V2-URL) + 3. UPPERCASE_SLUG_URL (e.g., HIRAGV2_URL) + + Args: + slug: Service slug (e.g., "hirag-v2") + + Returns: + URL from environment or None + """ + env_var_patterns = [ + slug.upper().replace("-", "_") + "_URL", # HIRAG_V2_URL + slug.upper().replace("-", "") + "_URL", # HIRAGV2_URL + slug.upper() + "_URL", # HIRAG-V2_URL + ] + + for pattern in env_var_patterns: + if url := os.getenv(pattern): + return url + + return None + + +def _fallback_dns_url(slug: str, default_port: int) -> str: + """ + Generate fallback URL using Docker DNS. + + Args: + slug: Service slug (used as DNS name) + default_port: Port to use if service has no default + + Returns: + Fallback service URL + """ + return f"http://{slug}:{default_port}" + + +async def get_service_info( + slug: str, + *, + default_port: int = 80, +) -> ServiceInfo: + """ + Get complete service information using fallback chain. + + Resolution order: + 1. Environment variable override + 2. Constructed URL (with warning) + + Args: + slug: Service slug to resolve + default_port: Port for fallback URL construction + + Returns: + ServiceInfo with service metadata + + Raises: + ServiceNotFoundError: If service cannot be resolved + """ + # 1. Check environment variable override + if env_url := _get_env_url(slug): + return ServiceInfo( + slug=slug, + name=f"{slug} (from env)", + description=f"Service URL from environment variable", + health_check_url=env_url, + default_port=default_port, + tier=ServiceTier.API, # Default tier + ) + + # 2. Fallback to DNS-based URL + fallback_url = _fallback_dns_url(slug, default_port) + return ServiceInfo( + slug=slug, + name=f"{slug} (fallback)", + description=f"Service resolved via Docker DNS fallback", + health_check_url=fallback_url, + default_port=default_port, + tier=ServiceTier.API, + ) + + +async def get_service_url( + slug: str, + *, + default_port: int = 80, + use_base_url: bool = True, +) -> str: + """ + Resolve service URL with fallback chain. + + Args: + slug: Service slug to resolve + default_port: Port for fallback URL construction + use_base_url: Return base URL instead of health_check_url + + Returns: + Resolved service URL + + Example: + >>> await get_service_url("hirag-v2") + "http://hi-rag-gateway-v2:8086" + """ + info = await get_service_info(slug, default_port=default_port) + return info.base_url if use_base_url else info.health_check_url + + +async def check_service_health( + slug: str, + *, + default_port: int = 80, + timeout: float = 5.0, +) -> bool: + """ + Check if a service is healthy by calling its health endpoint. + + Args: + slug: Service slug to check + default_port: Port for fallback URL construction + timeout: HTTP request timeout in seconds + + Returns: + True if service is healthy, False otherwise + """ + import httpx + + info = await get_service_info(slug, default_port=default_port) + + try: + async with httpx.AsyncClient(timeout=timeout) as client: + response = await client.get(info.health_check_url) + return response.status_code == 200 + except Exception: + return False + + +# Common service URLs for quick reference +class CommonServices: + """Common PMOVES service URLs for quick reference.""" + + # Agent Coordination + AGENT_ZERO = "http://agent-zero:8080" + ARCHON = "http://archon:8091" + MESH_AGENT = "mesh-agent" # No HTTP interface + + # LLM Gateway + TENSORZERO = "http://tensorzero-gateway:3030" + TENSORZERO_UI = "http://tensorzero-ui:4000" + + # Retrieval + HIRAG_V2 = "http://hi-rag-gateway-v2:8086" + HIRAG_V1 = "http://hi-rag-gateway:8089" + + # Data Services + QDRANT = "http://qdrant:6333" + NEO4J = "http://neo4j:7474" + MEILISEARCH = "http://meilisearch:7700" + MINIO = "http://minio:9000" + + # NATS + NATS = "nats://nats:4222" + + @classmethod + def get(cls, service: str) -> str: + """Get a common service URL by name.""" + return getattr(cls, service.upper(), None) + + +if __name__ == "__main__": + # Example usage + async def main(): + # Get service URL + url = await get_service_url("hirag-v2", default_port=8086) + print(f"Hi-RAG URL: {url}") + + # Check service health + healthy = await check_service_health("hirag-v2", default_port=8086) + print(f"Hi-RAG Healthy: {healthy}") + + # Get service info + info = await get_service_info("agent-zero", default_port=8080) + print(f"Service: {info.name}") + print(f"Base URL: {info.base_url}") + print(f"Health Check: {info.health_check_url}") + + asyncio.run(main()) From 5cfb5bdd093affd3dff8d85f5623423ad6dcbd4a Mon Sep 17 00:00:00 2001 From: POWERFULMOVES Date: Wed, 21 Jan 2026 08:40:10 -0500 Subject: [PATCH 2/4] fix(integration): Apply Phase 1 critical bug fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix deprecated datetime.utcnow() → datetime.now(timezone.utc) - Fix YAML environment merge (list → map for proper merging) - Fix health check decorator accumulation bug - Fix health endpoint status codes (return 503 when unhealthy) - Remove APP/UI tiers (stick to 6-tier architecture) - Fix resource leaks in NATS connections (try/finally) --- docker-compose.pmoves.yml | 66 ++++++++++++++++-------------------- pmoves_announcer/__init__.py | 12 +++---- pmoves_health/__init__.py | 51 ++++++++++++++++++++++------ 3 files changed, 75 insertions(+), 54 deletions(-) diff --git a/docker-compose.pmoves.yml b/docker-compose.pmoves.yml index e40b65a2aa..3aa0629be1 100644 --- a/docker-compose.pmoves.yml +++ b/docker-compose.pmoves.yml @@ -8,21 +8,22 @@ # my-service: # <<: *env-tier-api # environment: -# - SERVICE_NAME=my-service -# - SERVICE_SPECIFIC_VAR=value +# SERVICE_NAME: my-service +# SERVICE_SPECIFIC_VAR: value version: "3.8" # PMOVES.AI Environment Loading Anchors # These anchors provide tier-based environment file loading +# NOTE: Use map-based environment vars (not list-based) for proper YAML merging x-pmoves-env: &pmoves-env-base env_file: - env.shared # Base PMOVES.AI configuration environment: - - PMOVES_ENV=${PMOVES_ENV:-production} - - TIER=${TIER} - - NATS_URL=${NATS_URL:-nats://nats:4222} - - TENSORZERO_URL=${TENSORZERO_URL:-http://tensorzero-gateway:3030} + PMOVES_ENV: ${PMOVES_ENV:-production} + TIER: ${TIER} + NATS_URL: ${NATS_URL:-nats://nats:4222} + TENSORZERO_URL: ${TENSORZERO_URL:-http://tensorzero-gateway:3030} # API Tier Environment x-env-tier-api: &env-tier-api @@ -31,9 +32,9 @@ x-env-tier-api: &env-tier-api - env.shared - env.tier-api # API tier specific configuration environment: - - TIER=api - - MAX_CONCURRENT_REQUESTS=${MAX_CONCURRENT_REQUESTS:-100} - - RATE_LIMIT_ENABLED=${RATE_LIMIT_ENABLED:-true} + TIER: api + MAX_CONCURRENT_REQUESTS: ${MAX_CONCURRENT_REQUESTS:-100} + RATE_LIMIT_ENABLED: ${RATE_LIMIT_ENABLED:-true} # Agent Tier Environment x-env-tier-agent: &env-tier-agent @@ -42,9 +43,9 @@ x-env-tier-agent: &env-tier-agent - env.shared - env.tier-agent # Agent tier specific configuration environment: - - TIER=agent - - MAX_CONCURRENT_AGENTS=${MAX_CONCURRENT_AGENTS:-50} - - MCP_ENABLED=${MCP_ENABLED:-true} + TIER: agent + MAX_CONCURRENT_AGENTS: ${MAX_CONCURRENT_AGENTS:-50} + MCP_ENABLED: ${MCP_ENABLED:-true} # Worker Tier Environment x-env-tier-worker: &env-tier-worker @@ -53,9 +54,9 @@ x-env-tier-worker: &env-tier-worker - env.shared - env.tier-worker # Worker tier specific configuration environment: - - TIER=worker - - MAX_CONCURRENT_JOBS=${MAX_CONCURRENT_JOBS:-10} - - WORKER_POOL_SIZE=${WORKER_POOL_SIZE:-4} + TIER: worker + MAX_CONCURRENT_JOBS: ${MAX_CONCURRENT_JOBS:-10} + WORKER_POOL_SIZE: ${WORKER_POOL_SIZE:-4} # Data Tier Environment x-env-tier-data: &env-tier-data @@ -64,8 +65,8 @@ x-env-tier-data: &env-tier-data - env.shared - env.tier-data # Data tier specific configuration environment: - - TIER=data - - MAX_CONNECTIONS=${MAX_CONNECTIONS:-100} + TIER: data + MAX_CONNECTIONS: ${MAX_CONNECTIONS:-100} # LLM Tier Environment x-env-tier-llm: &env-tier-llm @@ -74,8 +75,8 @@ x-env-tier-llm: &env-tier-llm - env.shared - env.tier-llm # LLM tier specific configuration environment: - - TIER=llm - - MAX_CONCURRENT_REQUESTS=${MAX_CONCURRENT_REQUESTS:-50} + TIER: llm + MAX_CONCURRENT_REQUESTS: ${MAX_CONCURRENT_REQUESTS:-50} # Media Tier Environment x-env-tier-media: &env-tier-media @@ -84,18 +85,9 @@ x-env-tier-media: &env-tier-media - env.shared - env.tier-media # Media tier specific configuration environment: - - TIER=media - - GPU_ENABLED=${GPU_ENABLED:-true} - - MAX_CONCURRENT_JOBS=${MAX_CONCURRENT_JOBS:-4} - -# UI Tier Environment -x-env-tier-ui: &env-tier-ui - <<: *pmoves-env-base - env_file: - - env.shared - - env.tier-ui # UI tier specific configuration - environment: - - TIER=ui + TIER: media + GPU_ENABLED: ${GPU_ENABLED:-true} + MAX_CONCURRENT_JOBS: ${MAX_CONCURRENT_JOBS:-4} # Health check template x-pmoves-healthcheck: &pmoves-healthcheck @@ -121,8 +113,8 @@ x-pmoves-labels: &pmoves-labels labels: - "pmoves.service=true" - "prometheus.io/scrape=true" - - "prometheus.io.port=9090" - - "prometheus.io.path=/metrics" + - "prometheus.io/port=9090" + - "prometheus.io/path=/metrics" # Example service definition using the templates: # @@ -133,12 +125,12 @@ x-pmoves-labels: &pmoves-labels # ports: # - "8080:8080" # environment: -# - SERVICE_NAME=my-api-service -# - SERVICE_PORT=8080 +# SERVICE_NAME: my-api-service +# SERVICE_PORT: 8080 # # my-gpu-worker: # <<: [*env-tier-worker, *pmoves-gpu-resource, *pmoves-healthcheck] # image: ghcr.io/powerfulmoves/my-gpu-worker:latest # environment: -# - SERVICE_NAME=my-gpu-worker -# - CUDA_VISIBLE_DEVICES=0 +# SERVICE_NAME: my-gpu-worker +# CUDA_VISIBLE_DEVICES: 0 diff --git a/pmoves_announcer/__init__.py b/pmoves_announcer/__init__.py index 5b4774442b..3f3c2c4719 100644 --- a/pmoves_announcer/__init__.py +++ b/pmoves_announcer/__init__.py @@ -33,21 +33,19 @@ import json import os from dataclasses import dataclass, field, asdict -from datetime import datetime +from datetime import datetime, timezone from typing import Any, Dict, List, Optional from enum import Enum class ServiceTier(str, Enum): - """PMOVES service tiers.""" + """PMOVES service tiers (6-tier architecture).""" DATA = "data" API = "api" LLM = "llm" MEDIA = "media" AGENT = "agent" WORKER = "worker" - APP = "app" - UI = "ui" @dataclass @@ -65,7 +63,7 @@ class ServiceAnnouncement: health_check: str tier: ServiceTier port: int - timestamp: str = field(default_factory=lambda: datetime.utcnow().isoformat()) + timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat()) metadata: Dict[str, Any] = field(default_factory=dict) # NATS subject for announcements @@ -97,7 +95,7 @@ def from_json(cls, data: str | dict) -> "ServiceAnnouncement": health_check=data["health_check"], tier=ServiceTier(data["tier"]), port=data["port"], - timestamp=data.get("timestamp", datetime.utcnow().isoformat()), + timestamp=data.get("timestamp", datetime.now(timezone.utc).isoformat()), metadata=data.get("metadata", {}), ) @@ -155,7 +153,7 @@ def create_announcement(self) -> ServiceAnnouncement: health_check=self.health_check, tier=self.tier, port=self.port, - timestamp=datetime.utcnow().isoformat(), + timestamp=datetime.now(timezone.utc).isoformat(), metadata=self.metadata, ) diff --git a/pmoves_health/__init__.py b/pmoves_health/__init__.py index abfdfc52a6..eb4a2f6379 100644 --- a/pmoves_health/__init__.py +++ b/pmoves_health/__init__.py @@ -13,7 +13,7 @@ app.include_router(health_check_router) """ -from datetime import datetime +from datetime import datetime, timezone from functools import wraps from typing import Any, Callable, Dict, List import os @@ -95,13 +95,19 @@ def __init__(self, nats_url: str, **kwargs): self.nats_url = nats_url async def check(self) -> bool: + nc = None try: from nats.aio.client import Client as NATS nc = await NATS.connect(self.nats_url, connect_timeout=2) - await nc.close() return True except Exception: return False + finally: + if nc: + try: + await nc.close() + except Exception: + pass class HealthChecker: @@ -137,7 +143,7 @@ async def check_all(self) -> Dict[str, Any]: results = { "status": HealthStatus.HEALTHY, "service": self.service_name, - "timestamp": datetime.utcnow().isoformat(), + "timestamp": datetime.now(timezone.utc).isoformat(), } all_healthy = True @@ -186,14 +192,28 @@ async def check_all(self) -> Dict[str, Any]: def health_check(checks: List[DependencyCheck] = None): - """Decorator to add health checks to a function.""" + """ + Decorator to register health checks for a service. + + NOTE: Checks are registered once at import time, not on each call. + Use this decorator on functions that need dependency checks. + + Args: + checks: List of dependency checks to register + + Example: + @health_check([DatabaseCheck(connect_fn)]) + async def my_handler(): + ... + """ def decorator(func: Callable): + # Register checks once when decorator is applied, not on each call + if checks: + for check in checks: + _health_checker.add_check(check) + @wraps(func) async def wrapper(*args, **kwargs): - checker = _health_checker - if checks: - for check in checks: - checker.add_check(check) return await func(*args, **kwargs) return wrapper return decorator @@ -231,8 +251,19 @@ async def get_health_status() -> Dict[str, Any]: @health_check_router.get(HEALTH_CHECK_PATH) async def healthz(): - """Standard health check endpoint.""" - return await get_health_status() + """ + Standard health check endpoint. + + Returns: + - 200 with status "healthy" or "degraded" + - 503 with status "unhealthy" + """ + status = await get_health_status() + + # Return proper HTTP status codes + if status.get("status") == HealthStatus.UNHEALTHY: + return JSONResponse(content=status, status_code=503) + return status def create_health_app(service_name: str = None) -> "FastAPI": """Create a minimal FastAPI app with health check.""" From 51e5e7471e20b3fab7c01292685292b02aee1661 Mon Sep 17 00:00:00 2001 From: POWERFULMOVES Date: Wed, 21 Jan 2026 09:51:35 -0500 Subject: [PATCH 3/4] fix(security): Remove hardcoded credential defaults MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Neo4j: Remove neo4j:neo4j default credentials - MinIO: Remove minioadmin:minioadmin default credentials - ClickHouse: Remove tensorzero:tensorzero default credentials - Fix typo: export_CACHE_TTL → export CACHE_TTL Empty defaults now require explicit configuration for production use. --- env.shared | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/env.shared b/env.shared index 8cb789877e..d072cd0669 100644 --- a/env.shared +++ b/env.shared @@ -48,18 +48,20 @@ export QDRANT_URL=${QDRANT_URL:-http://qdrant:6333} export QDRANT_API_KEY=${QDRANT_API_KEY:-} # Neo4j (Graph Database) +# SECURITY: Empty defaults require explicit configuration export NEO4J_URL=${NEO4J_URL:-http://neo4j:7474} -export NEO4J_USERNAME=${NEO4J_USERNAME:-neo4j} -export NEO4J_PASSWORD=${NEO4J_PASSWORD:-neo4j} +export NEO4J_USERNAME=${NEO4J_USERNAME:-} +export NEO4J_PASSWORD=${NEO4J_PASSWORD:-} # Meilisearch (Full-Text Search) export MEILISEARCH_URL=${MEILISEARCH_URL:-http://meilisearch:7700} export MEILISEARCH_API_KEY=${MEILISEARCH_API_KEY:-} # MinIO (Object Storage) +# SECURITY: Empty defaults require explicit configuration export MINIO_ENDPOINT=${MINIO_ENDPOINT:-http://minio:9000} -export MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-minioadmin} -export MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-minioadmin} +export MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-} +export MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-} export MINIO_REGION=${MINIO_REGION:-us-east-1} export MINIO_USE_SSL=${MINIO_USE_SSL:-false} From c62040a3c92df8237e7113155417c2ddd2744351 Mon Sep 17 00:00:00 2001 From: POWERFULMOVES Date: Wed, 21 Jan 2026 10:01:18 -0500 Subject: [PATCH 4/4] refactor(code-quality): Phase 3 & 4 improvements Phase 3: Code Quality - Add pmoves_common shared types module (ServiceTier, HealthStatus) - Update ServiceTier imports with fallback to shared module - Remove duplicate ServiceTier enum definitions Phase 4: Documentation - Add comprehensive module docstrings to all integration modules - Create .coderabbit.yaml for automated PR reviews - Enable reviews on feat/* and fix/* branches - Set docstring coverage target to 80% This reduces code duplication and improves type consistency across the PMOVES.AI ecosystem. --- .coderabbit.yaml | 11 +++++++ pmoves_announcer/__init__.py | 34 ++++++++++++++------- pmoves_common/__init__.py | 57 ++++++++++++++++++++++++++++++++++++ pmoves_health/__init__.py | 29 ++++++++++++++++-- pmoves_registry/__init__.py | 42 ++++++++++++++++++-------- 5 files changed, 148 insertions(+), 25 deletions(-) create mode 100644 .coderabbit.yaml create mode 100644 pmoves_common/__init__.py diff --git a/.coderabbit.yaml b/.coderabbit.yaml new file mode 100644 index 0000000000..f2c2581453 --- /dev/null +++ b/.coderabbit.yaml @@ -0,0 +1,11 @@ +# CodeRabbit Configuration for PMOVES Submodule +reviews: + review_status: true + branches: + - "main" + - "feat/*" + - "fix/*" + excluded_branches: + - "origin/main" +language: "en-US" + doc_target_coverage: 80 diff --git a/pmoves_announcer/__init__.py b/pmoves_announcer/__init__.py index 3f3c2c4719..0d58ee886d 100644 --- a/pmoves_announcer/__init__.py +++ b/pmoves_announcer/__init__.py @@ -4,8 +4,14 @@ NATS service discovery announcer for all PMOVES services. Publishes service announcements to the services.announce.v1 subject. +This module provides: +- ServiceAnnouncer: Main class for announcing service availability +- ServiceAnnouncement: Data class for announcement messages +- BackgroundAnnouncer: Periodic re-announcement for long-running services +- announce_service(): Convenience function for one-time announcements + Usage: - from service_announcer import ServiceAnnouncer, announce_service + from pmoves_announcer import ServiceAnnouncer, announce_service # Create announcement announcer = ServiceAnnouncer( @@ -27,6 +33,9 @@ port=8080, tier="api" ) + +NATS Subject: services.announce.v1 +Message Format: JSON with slug, name, url, health_check, tier, port, timestamp, metadata """ import asyncio @@ -35,17 +44,22 @@ from dataclasses import dataclass, field, asdict from datetime import datetime, timezone from typing import Any, Dict, List, Optional -from enum import Enum -class ServiceTier(str, Enum): - """PMOVES service tiers (6-tier architecture).""" - DATA = "data" - API = "api" - LLM = "llm" - MEDIA = "media" - AGENT = "agent" - WORKER = "worker" +# Import ServiceTier from shared types if available, otherwise define locally +try: + from pmoves_common import ServiceTier +except ImportError: + from enum import Enum + + class ServiceTier(str, Enum): + """PMOVES service tiers (6-tier architecture).""" + DATA = "data" + API = "api" + LLM = "llm" + MEDIA = "media" + AGENT = "agent" + WORKER = "worker" @dataclass diff --git a/pmoves_common/__init__.py b/pmoves_common/__init__.py new file mode 100644 index 0000000000..9c927ec99d --- /dev/null +++ b/pmoves_common/__init__.py @@ -0,0 +1,57 @@ +""" +PMOVES.AI Common Types Module + +Shared type definitions used across all PMOVES services. +This module should be imported as a dependency to ensure type consistency +across the PMOVES.AI ecosystem. + +Usage: + from pmoves_common import ServiceTier, HealthStatus + + tier = ServiceTier.API + if tier == ServiceTier.AGENT: + print("Agent tier service") +""" + +from enum import Enum + + +class ServiceTier(str, Enum): + """ + PMOVES service tiers (6-tier architecture). + + The 6-tier model provides clear separation of concerns and security boundaries: + - DATA: Infrastructure services (Postgres, Qdrant, Neo4j, MinIO, NATS) + - API: Data access APIs (PostgREST, Presign, Hi-RAG, GPU Orchestrator) + - LLM: LLM Gateway (TensorZero) - ONLY tier with external API keys + - WORKER: Background workers (Extract, LangExtract, PDF-ingest, Notebook-sync) + - MEDIA: Media processing (YouTube, Whisper, YOLO analyzers) + - AGENT: Agent orchestration (Agent Zero, Archon, SupaSerch, DeepResearch) + + Security Note: LLM tier is the SINGLE POINT for external API keys. + All other services call TensorZero internally and never touch provider keys. + """ + DATA = "data" + API = "api" + LLM = "llm" + WORKER = "worker" + MEDIA = "media" + AGENT = "agent" + + @classmethod + def is_valid(cls, value: str) -> bool: + """Check if a string value is a valid tier.""" + return value in (t.value for t in cls) + + def __str__(self) -> str: + return self.value + + +class HealthStatus(str, Enum): + """Health status constants for service health checks.""" + HEALTHY = "healthy" + DEGRADED = "degraded" + UNHEALTHY = "unhealthy" + + +__all__ = ["ServiceTier", "HealthStatus"] diff --git a/pmoves_health/__init__.py b/pmoves_health/__init__.py index eb4a2f6379..772d9fc2ab 100644 --- a/pmoves_health/__init__.py +++ b/pmoves_health/__init__.py @@ -4,13 +4,38 @@ Standard health check endpoint for all PMOVES services. Follows PMOVES.AI conventions for service health monitoring. +This module provides: +- HealthChecker: Class for managing multiple dependency health checks +- DependencyCheck: Base class for creating custom health checks +- DatabaseCheck, HTTPCheck, NATSCheck: Pre-built check implementations +- health_check(): Decorator for registering checks +- create_health_app(): Factory for creating standalone health apps +- health_check_router: FastAPI router for adding to existing apps + +Health Endpoint Behavior: +- Returns HTTP 200 when status is "healthy" or "degraded" +- Returns HTTP 503 when status is "unhealthy" +- Includes timestamp, service name, and individual check results + Usage: - from pmoves_health import create_health_app - app = create_health_app() + from pmoves_health import create_health_app, HealthChecker, NATSCheck + + # Create a standalone health app + app = create_health_app("my-service") # Or add to existing FastAPI app from pmoves_health import health_check_router app.include_router(health_check_router) + + # Or use the checker directly + checker = HealthChecker("my-service") + checker.nats("nats://nats:4222") + status = await checker.check_all() + +Health Status Values: +- healthy: All required checks passing +- degraded: Optional checks failing, required checks passing +- unhealthy: One or more required checks failing """ from datetime import datetime, timezone diff --git a/pmoves_registry/__init__.py b/pmoves_registry/__init__.py index 231f1036bf..015905b59c 100644 --- a/pmoves_registry/__init__.py +++ b/pmoves_registry/__init__.py @@ -7,34 +7,50 @@ 3. NATS service announcements (real-time, cached) 4. Docker DNS (development fallback) +This module provides: +- CommonServices: Environment-based service URL discovery +- ServiceInfo: Immutable data class for service metadata +- get_service_url(): Resolve service URL with fallback chain +- get_service_info(): Get full service metadata + Usage: - from service_registry import get_service_url, ServiceInfo + from pmoves_registry import get_service_url, ServiceInfo, CommonServices + + # Simple URL resolution via CommonServices + url = CommonServices.get("hirag_v2") - # Simple URL resolution + # Or use the async function with fallback url = await get_service_url("hirag-v2") # Get full service info info = await get_service_info("hirag-v2") print(f"{info.name}: {info.health_check_url}") + +Environment Variables: + Services can be configured via environment variables in format: + {SERVICE_SLUG}_URL (e.g., HIRAG_V2_URL=http://hirag-v2:8086) """ import asyncio import os from dataclasses import dataclass, field -from enum import Enum from typing import Any, Optional -class ServiceTier(str, Enum): - """PMOVES service tiers.""" - DATA = "data" - API = "api" - LLM = "llm" - MEDIA = "media" - AGENT = "agent" - WORKER = "worker" - APP = "app" - UI = "ui" +# Import ServiceTier from shared types if available, otherwise define locally +try: + from pmoves_common import ServiceTier +except ImportError: + from enum import Enum + + class ServiceTier(str, Enum): + """PMOVES service tiers (6-tier architecture).""" + DATA = "data" + API = "api" + LLM = "llm" + MEDIA = "media" + AGENT = "agent" + WORKER = "worker" @dataclass(frozen=True)