diff --git a/.agents/AGENTS.md b/.agents/AGENTS.md
index 88b8d151e..478639829 100644
--- a/.agents/AGENTS.md
+++ b/.agents/AGENTS.md
@@ -225,6 +225,7 @@ See `subagent-index.toon` for complete listing of agents, subagents, workflows,
 | WordPress | `tools/wordpress/wp-dev.md`, `tools/wordpress/mainwp.md` |
 | SEO | `seo/dataforseo.md`, `seo/google-search-console.md` |
 | Video | `tools/video/video-prompt-design.md`, `tools/video/remotion.md`, `tools/video/higgsfield.md` |
+| Voice | `tools/voice/speech-to-speech.md` |
 | Parallel agents | `tools/ai-assistants/headless-dispatch.md`, `tools/ai-assistants/runners/` |
 | MCP dev | `tools/build-mcp/build-mcp.md` |
 | Agent design | `tools/build-agent/build-agent.md` |
diff --git a/.agents/scripts/speech-to-speech-helper.sh b/.agents/scripts/speech-to-speech-helper.sh
new file mode 100755
index 000000000..176f34c1c
--- /dev/null
+++ b/.agents/scripts/speech-to-speech-helper.sh
@@ -0,0 +1,526 @@
+#!/bin/bash
+# shellcheck disable=SC2034,SC2155
+
+# Speech-to-Speech Helper Script
+# Manages HuggingFace speech-to-speech pipeline
+# Supports local GPU (CUDA/MPS), Docker, and remote server deployment
+
+set -euo pipefail
+
+# Colors for output
+readonly GREEN='\033[0;32m'
+readonly BLUE='\033[0;34m'
+readonly YELLOW='\033[1;33m'
+readonly RED='\033[0;31m'
+readonly NC='\033[0m'
+
+# Defaults
+readonly S2S_REPO="https://github.com/huggingface/speech-to-speech.git"
+readonly S2S_DIR="${HOME}/.aidevops/.agent-workspace/work/speech-to-speech"
+readonly S2S_PID_FILE="${S2S_DIR}/.s2s.pid"
+readonly S2S_LOG_FILE="${S2S_DIR}/.s2s.log"
+readonly DEFAULT_RECV_PORT=12345
+readonly DEFAULT_SEND_PORT=12346
+
+print_info() {
+    local msg="$1"
+    echo -e "${BLUE}[INFO]${NC} $msg"
+    return 0
+}
+
+print_success() {
+    local msg="$1"
+    echo -e "${GREEN}[OK]${NC} $msg"
+    return 0
+}
+
+print_warning() {
+    local msg="$1"
+    echo -e "${YELLOW}[WARN]${NC} $msg"
+    return 0
+}
+
+print_error() {
+    local msg="$1"
+    echo -e "${RED}[ERROR]${NC} $msg" >&2
+    return 0
+}
+
+# ─── Dependency checks ───────────────────────────────────────────────
+
+check_python() {
+    if ! command -v python3 &>/dev/null; then
+        print_error "python3 is required but not installed"
+        return 1
+    fi
+    local py_version
+    py_version=$(python3 -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')
+    local major="${py_version%%.*}"
+    local minor="${py_version##*.}"
+    if [[ "$major" -lt 3 ]] || { [[ "$major" -eq 3 ]] && [[ "$minor" -lt 10 ]]; }; then
+        print_error "Python 3.10+ required, found $py_version"
+        return 1
+    fi
+    print_info "Python $py_version"
+    return 0
+}
+
+check_uv() {
+    if ! command -v uv &>/dev/null; then
+        print_warning "uv not found. Install: curl -LsSf https://astral.sh/uv/install.sh | sh"
+        print_info "Falling back to pip"
+        return 1
+    fi
+    return 0
+}
+
+detect_platform() {
+    local platform
+    platform=$(uname -s)
+    case "$platform" in
+        Darwin)
+            if [[ "$(uname -m)" == "arm64" ]]; then
+                echo "mac-arm64"
+            else
+                echo "mac-x86"
+            fi
+            ;;
+        Linux)
+            if command -v nvidia-smi &>/dev/null; then
+                echo "linux-cuda"
+            else
+                echo "linux-cpu"
+            fi
+            ;;
+        *)
+            echo "unknown"
+            ;;
+    esac
+    return 0
+}
+
+detect_gpu() {
+    local platform
+    platform=$(detect_platform)
+    case "$platform" in
+        mac-arm64)
+            print_info "Apple Silicon detected (MPS acceleration)"
+            echo "mps"
+            ;;
+        linux-cuda)
+            local gpu_info
+            gpu_info=$(nvidia-smi --query-gpu=name,memory.total --format=csv,noheader 2>/dev/null || echo "unknown")
+            print_info "NVIDIA GPU: $gpu_info"
+            echo "cuda"
+            ;;
+        *)
+            print_warning "No GPU acceleration detected, using CPU"
+            echo "cpu"
+            ;;
+    esac
+    return 0
+}
+
+# ─── Setup ────────────────────────────────────────────────────────────
+
+cmd_setup() {
+    print_info "Setting up speech-to-speech pipeline..."
+
+    check_python || return 1
+
+    # Clone or update repo
+    if [[ -d "$S2S_DIR/.git" ]]; then
+        print_info "Updating existing installation..."
+        git -C "$S2S_DIR" pull --ff-only 2>/dev/null || {
+            print_warning "Could not fast-forward, repo may have local changes"
+        }
+    else
+        print_info "Cloning speech-to-speech..."
+        mkdir -p "$(dirname "$S2S_DIR")"
+        git clone "$S2S_REPO" "$S2S_DIR"
+    fi
+
+    # Install dependencies based on platform
+    local platform
+    platform=$(detect_platform)
+    local req_file="requirements.txt"
+    if [[ "$platform" == "mac-arm64" ]] || [[ "$platform" == "mac-x86" ]]; then
+        req_file="requirements_mac.txt"
+    fi
+
+    print_info "Installing dependencies from $req_file..."
+    if check_uv; then
+        uv pip install -r "${S2S_DIR}/${req_file}"
+    else
+        pip install -r "${S2S_DIR}/${req_file}"
+    fi
+
+    # Download NLTK data
+    print_info "Downloading NLTK data..."
+    python3 -c "import nltk; nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger_eng')" >/dev/null
+
+    print_success "Setup complete. Run: speech-to-speech-helper.sh start"
+    return 0
+}
+
+# ─── Start pipeline ──────────────────────────────────────────────────
+
+cmd_start() {
+    local mode=""
+    local language="en"
+    local extra_args=()
+    local background=false
+
+    while [[ $# -gt 0 ]]; do
+        case "$1" in
+            --local-mac)    mode="local-mac"; shift ;;
+            --cuda)         mode="cuda"; shift ;;
+            --server)       mode="server"; shift ;;
+            --docker)       mode="docker"; shift ;;
+            --language)     language="$2"; shift 2 ;;
+            --background)   background=true; shift ;;
+            *)              extra_args+=("$1"); shift ;;
+        esac
+    done
+
+    if [[ -z "$mode" ]]; then
+        local gpu
+        gpu=$(detect_gpu)
+        case "$gpu" in
+            mps)  mode="local-mac" ;;
+            cuda) mode="cuda" ;;
+            *)    mode="cuda" ;;
+        esac
+        print_info "Auto-detected mode: $mode"
+    fi
+
+    # Check if already running
+    if [[ -f "$S2S_PID_FILE" ]]; then
+        local pid
+        pid=$(cat "$S2S_PID_FILE")
+        if kill -0 "$pid" 2>/dev/null; then
+            print_warning "Pipeline already running (PID $pid). Use 'stop' first."
+            return 1
+        fi
+        rm -f "$S2S_PID_FILE"
+    fi
+
+    if [[ ! -d "$S2S_DIR/.git" ]]; then
+        print_error "Not installed. Run: speech-to-speech-helper.sh setup"
+        return 1
+    fi
+
+    local cmd_args=()
+
+    case "$mode" in
+        local-mac)
+            cmd_args=(
+                python3 s2s_pipeline.py
+                --local_mac_optimal_settings
+                --device mps
+                --language "$language"
+            )
+            if [[ "$language" == "auto" ]]; then
+                cmd_args+=(--stt_model_name large-v3)
+                cmd_args+=(--mlx_lm_model_name mlx-community/Meta-Llama-3.1-8B-Instruct-4bit)
+            fi
+            ;;
+        cuda)
+            cmd_args=(
+                python3 s2s_pipeline.py
+                --recv_host 0.0.0.0
+                --send_host 0.0.0.0
+                --lm_model_name microsoft/Phi-3-mini-4k-instruct
+                --stt_compile_mode reduce-overhead
+                --tts_compile_mode default
+                --language "$language"
+            )
+            ;;
+        server)
+            cmd_args=(
+                python3 s2s_pipeline.py
+                --recv_host 0.0.0.0
+                --send_host 0.0.0.0
+                --language "$language"
+            )
+            ;;
+        docker)
+            cmd_docker_start "${extra_args[@]}"
+            return $?
+            ;;
+    esac
+
+    # Append any extra args
+    cmd_args+=("${extra_args[@]}")
+
+    print_info "Starting pipeline (mode: $mode, language: $language)..."
+    print_info "Command: ${cmd_args[*]}"
+
+    if [[ "$background" == true ]]; then
+        (cd "$S2S_DIR" && "${cmd_args[@]}" > "$S2S_LOG_FILE" 2>&1 &
+         echo $! > "$S2S_PID_FILE")
+        local pid
+        pid=$(cat "$S2S_PID_FILE")
+        print_success "Pipeline started in background (PID $pid)"
+        print_info "Logs: tail -f $S2S_LOG_FILE"
+    else
+        (cd "$S2S_DIR" && exec "${cmd_args[@]}")
+    fi
+
+    return 0
+}
+
+cmd_docker_start() {
+    if ! command -v docker &>/dev/null; then
+        print_error "Docker is not installed"
+        return 1
+    fi
+
+    if [[ ! -f "${S2S_DIR}/docker-compose.yml" ]]; then
+        print_error "docker-compose.yml not found. Run setup first."
+        return 1
+    fi
+
+    print_info "Starting with Docker..."
+    (cd "$S2S_DIR" && docker compose up -d)
+    print_success "Docker containers started"
+    print_info "Ports: ${DEFAULT_RECV_PORT} (recv), ${DEFAULT_SEND_PORT} (send)"
+    return 0
+}
+
+# ─── Client ───────────────────────────────────────────────────────────
+
+cmd_client() {
+    local host=""
+    local extra_args=()
+
+    while [[ $# -gt 0 ]]; do
+        case "$1" in
+            --host) host="$2"; shift 2 ;;
+            *)      extra_args+=("$1"); shift ;;
+        esac
+    done
+
+    if [[ -z "$host" ]]; then
+        print_error "Server host required: --host <ip>"
+        return 1
+    fi
+
+    if [[ ! -f "${S2S_DIR}/listen_and_play.py" ]]; then
+        print_error "Not installed. Run: speech-to-speech-helper.sh setup"
+        return 1
+    fi
+
+    print_info "Connecting to server at $host..."
+    (cd "$S2S_DIR" && python3 listen_and_play.py --host "$host" "${extra_args[@]}")
+    return 0
+}
+
+# ─── Stop ─────────────────────────────────────────────────────────────
+
+cmd_stop() {
+    # Stop background process
+    if [[ -f "$S2S_PID_FILE" ]]; then
+        local pid
+        pid=$(cat "$S2S_PID_FILE")
+        if kill -0 "$pid" 2>/dev/null; then
+            print_info "Stopping pipeline (PID $pid)..."
+            kill "$pid"
+            sleep 2
+            if kill -0 "$pid" 2>/dev/null; then
+                print_warning "Force killing..."
+                kill -9 "$pid" 2>/dev/null || true
+            fi
+            print_success "Pipeline stopped"
+        else
+            print_info "Process not running"
+        fi
+        rm -f "$S2S_PID_FILE"
+    else
+        print_info "No PID file found"
+    fi
+
+    # Stop Docker if running
+    if [[ -f "${S2S_DIR}/docker-compose.yml" ]]; then
+        if docker compose -f "${S2S_DIR}/docker-compose.yml" ps --quiet 2>/dev/null | grep -q .; then
+            print_info "Stopping Docker containers..."
+            (cd "$S2S_DIR" && docker compose down)
+            print_success "Docker containers stopped"
+        fi
+    fi
+
+    return 0
+}
+
+# ─── Status ───────────────────────────────────────────────────────────
+
+cmd_status() {
+    echo "=== Speech-to-Speech Status ==="
+    echo ""
+
+    # Installation
+    if [[ -d "$S2S_DIR/.git" ]]; then
+        local commit
+        commit=$(git -C "$S2S_DIR" log -1 --format='%h %s' 2>/dev/null || echo "unknown")
+        print_success "Installed: $S2S_DIR"
+        print_info "Commit: $commit"
+    else
+        print_warning "Not installed. Run: speech-to-speech-helper.sh setup"
+        return 0
+    fi
+
+    # Platform
+    local platform
+    platform=$(detect_platform)
+    local gpu
+    gpu=$(detect_gpu)
+    print_info "Platform: $platform (accelerator: $gpu)"
+
+    # Process
+    if [[ -f "$S2S_PID_FILE" ]]; then
+        local pid
+        pid=$(cat "$S2S_PID_FILE")
+        if kill -0 "$pid" 2>/dev/null; then
+            print_success "Running (PID $pid)"
+        else
+            print_warning "Stale PID file (process not running)"
+            rm -f "$S2S_PID_FILE"
+        fi
+    else
+        print_info "Not running"
+    fi
+
+    # Docker
+    if command -v docker &>/dev/null && [[ -f "${S2S_DIR}/docker-compose.yml" ]]; then
+        local docker_status
+        docker_status=$(docker compose -f "${S2S_DIR}/docker-compose.yml" ps --format "table {{.Name}}\t{{.Status}}" 2>/dev/null || echo "not running")
+        if echo "$docker_status" | grep -qi "up"; then
+            print_success "Docker: running"
+            echo "$docker_status"
+        else
+            print_info "Docker: not running"
+        fi
+    fi
+
+    echo ""
+    return 0
+}
+
+# ─── Config presets ───────────────────────────────────────────────────
+
+cmd_config() {
+    local preset="${1:-}"
+
+    case "$preset" in
+        low-latency)
+            echo "--stt faster-whisper --llm open_api --tts parler --stt_compile_mode reduce-overhead --tts_compile_mode default"
+            ;;
+        low-vram)
+            echo "--stt moonshine --llm open_api --tts pocket"
+            ;;
+        quality)
+            echo "--stt whisper --stt_model_name openai/whisper-large-v3 --llm transformers --lm_model_name microsoft/Phi-3-mini-4k-instruct --tts parler"
+            ;;
+        mac)
+            echo "--local_mac_optimal_settings --device mps --mlx_lm_model_name mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
+            ;;
+        multilingual)
+            echo "--stt_model_name large-v3 --language auto --tts melo"
+            ;;
+        *)
+            echo "Available presets:"
+            echo "  low-latency   - Fastest response (CUDA + OpenAI API)"
+            echo "  low-vram      - Minimal GPU memory (~4GB)"
+            echo "  quality       - Best quality (24GB+ VRAM)"
+            echo "  mac           - Optimal macOS Apple Silicon"
+            echo "  multilingual  - Auto language detection (6 languages)"
+            echo ""
+            echo "Usage: speech-to-speech-helper.sh start \$(speech-to-speech-helper.sh config low-latency)"
+            ;;
+    esac
+    return 0
+}
+
+# ─── Benchmark ────────────────────────────────────────────────────────
+
+cmd_benchmark() {
+    if [[ ! -d "$S2S_DIR/.git" ]]; then
+        print_error "Not installed. Run: speech-to-speech-helper.sh setup"
+        return 1
+    fi
+
+    if [[ ! -f "${S2S_DIR}/benchmark_stt.py" ]]; then
+        print_error "Benchmark script not found in repo"
+        return 1
+    fi
+
+    print_info "Running STT benchmark..."
+    (cd "$S2S_DIR" && python3 benchmark_stt.py "$@")
+    return 0
+}
+
+# ─── Help ─────────────────────────────────────────────────────────────
+
+cmd_help() {
+    echo "Speech-to-Speech Helper"
+    echo "Manages HuggingFace speech-to-speech pipeline"
+    echo ""
+    echo "Usage: $0 <command> [options]"
+    echo ""
+    echo "Commands:"
+    echo "  setup                Install/update the pipeline"
+    echo "  start [options]      Start the pipeline"
+    echo "  stop                 Stop running pipeline"
+    echo "  status               Show installation and runtime status"
+    echo "  client --host <ip>   Connect to remote server"
+    echo "  config [preset]      Show configuration presets"
+    echo "  benchmark            Run STT benchmark"
+    echo "  help                 Show this help"
+    echo ""
+    echo "Start options:"
+    echo "  --local-mac          macOS Apple Silicon (auto-detected)"
+    echo "  --cuda               NVIDIA GPU with torch compile"
+    echo "  --server             Server mode (remote clients connect)"
+    echo "  --docker             Docker with NVIDIA GPU"
+    echo "  --language <code>    Language: en, fr, es, zh, ja, ko, auto"
+    echo "  --background         Run in background"
+    echo ""
+    echo "Examples:"
+    echo "  $0 setup"
+    echo "  $0 start --local-mac"
+    echo "  $0 start --cuda --language auto --background"
+    echo "  $0 start --server"
+    echo "  $0 client --host 192.168.1.100"
+    echo "  $0 start \$($0 config low-latency)"
+    echo "  $0 stop"
+    echo ""
+    echo "Install dir: $S2S_DIR"
+    return 0
+}
+
+# ─── Main ─────────────────────────────────────────────────────────────
+
+main() {
+    local command="${1:-help}"
+    if [[ $# -gt 0 ]]; then
+        shift
+    fi
+
+    case "$command" in
+        setup)      cmd_setup "$@" ;;
+        start)      cmd_start "$@" ;;
+        stop)       cmd_stop "$@" ;;
+        status)     cmd_status "$@" ;;
+        client)     cmd_client "$@" ;;
+        config)     cmd_config "$@" ;;
+        benchmark)  cmd_benchmark "$@" ;;
+        help|--help|-h) cmd_help ;;
+        *)
+            print_error "Unknown command: $command"
+            cmd_help
+            return 1
+            ;;
+    esac
+}
+
+main "$@"
diff --git a/.agents/subagent-index.toon b/.agents/subagent-index.toon
index 60fef3354..f2279175d 100644
--- a/.agents/subagent-index.toon
+++ b/.agents/subagent-index.toon
@@ -22,7 +22,7 @@ flash,gemini-2.5-flash,Fast cheap large context
 pro,gemini-2.5-pro,Capable large context
 -->
 
-<!--TOON:subagents[39]{folder,purpose,key_files}:
+<!--TOON:subagents[40]{folder,purpose,key_files}:
 aidevops/,Framework internals - extending aidevops and architecture,setup|architecture|troubleshooting|self-improving-agents|claude-flow-comparison
 memory/,Cross-session memory - SQLite FTS5 storage,README
 seo/,Search optimization - keywords and rankings,dataforseo|serper|semrush|neuronwriter|google-search-console|gsc-sitemaps|site-crawler|eeat-score|analytics-tracking|bing-webmaster-tools|screaming-frog|rich-results|debug-opengraph|debug-favicon|contentking|programmatic-seo|schema-validator
@@ -43,6 +43,7 @@ tools/context/,Context optimization - semantic search and indexing,osgrep|augmen
 tools/conversion/,Format conversion - document transformation,pandoc
 tools/ocr/,OCR and text extraction - local document processing via Ollama,glm-ocr
 tools/video/,Video creation and downloading - programmatic generation and YouTube downloads,remotion|higgsfield|yt-dlp|video-prompt-design
+tools/voice/,Voice AI - speech-to-speech pipeline with local and cloud GPU deployment,speech-to-speech
 tools/data-extraction/,Data extraction - scraping business data,outscraper
 tools/deployment/,Deployment automation - self-hosted PaaS,coolify|coolify-cli|vercel|cloudron-app-packaging
 tools/git/,Git operations - GitHub/GitLab/Gitea CLIs,github-cli|gitlab-cli|gitea-cli|github-actions|worktrunk
@@ -77,7 +78,7 @@ bug-fixing,workflows/bug-fixing.md,Bug fix workflow
 feature-development,workflows/feature-development.md,Feature development workflow
 -->
 
-<!--TOON:scripts[38]{name,purpose}:
+<!--TOON:scripts[39]{name,purpose}:
 list-keys-helper.sh,List all API keys with storage locations
 linters-local.sh,Run local linting (ShellCheck secretlint patterns)
 code-audit-helper.sh,Run remote auditing (CodeRabbit Codacy SonarCloud)
@@ -116,4 +117,5 @@ runner-helper.sh,Named headless AI agent instances (create run status list edit
 matrix-dispatch-helper.sh,Matrix bot for dispatching messages to AI runners (setup start stop map test)
 supervisor-helper.sh,Autonomous supervisor for multi-task orchestration (init add batch dispatch reprompt evaluate pulse worker-status cleanup transition status list next running-count reset cancel db)
 schema-validator-helper.sh,Schema.org structured data validation (validate validate-json status install)
+speech-to-speech-helper.sh,Voice pipeline management (setup start stop status client config benchmark)
 -->
diff --git a/.agents/tools/voice/speech-to-speech.md b/.agents/tools/voice/speech-to-speech.md
new file mode 100644
index 000000000..2f0cb384c
--- /dev/null
+++ b/.agents/tools/voice/speech-to-speech.md
@@ -0,0 +1,293 @@
+---
+description: "HuggingFace Speech-to-Speech - modular voice pipeline (VAD, STT, LLM, TTS) for local GPU and cloud GPU deployment"
+mode: subagent
+upstream_url: https://github.com/huggingface/speech-to-speech
+tools:
+  read: true
+  write: false
+  edit: false
+  bash: true
+  glob: true
+  grep: true
+  webfetch: true
+  task: true
+---
+
+# Speech-to-Speech Pipeline
+
+<!-- AI-CONTEXT-START -->
+
+## Quick Reference
+
+- **Source**: [huggingface/speech-to-speech](https://github.com/huggingface/speech-to-speech) (Apache-2.0)
+- **Purpose**: Modular, open-source GPT-4o-style voice assistant pipeline
+- **Pipeline**: VAD -> STT -> LLM -> TTS (each component swappable)
+- **Helper**: `speech-to-speech-helper.sh [setup|start|stop|status|config|benchmark] [options]`
+- **Install dir**: `~/.aidevops/.agent-workspace/work/speech-to-speech/`
+- **Languages**: English, French, Spanish, Chinese, Japanese, Korean (auto-detect or fixed)
+
+**When to Use**: Read this when setting up voice interfaces, transcription pipelines, voice-driven DevOps, or phone-based AI assistants (pairs with Twilio).
+
+<!-- AI-CONTEXT-END -->
+
+## Architecture
+
+Four-stage cascaded pipeline connected via thread-safe queues:
+
+```text
+Microphone/Socket -> [VAD] -> [STT] -> [LLM] -> [TTS] -> Speaker/Socket
+                      |         |        |         |
+                   Silero    Whisper   Any HF    Parler
+                   VAD v5    variants  instruct  Melo
+                             Parafor.  OpenAI    ChatTTS
+                             Faster-W  MLX-LM    Kokoro
+                             Parakeet            FacebookMMS
+                             Moonshine           Pocket
+```
+
+Each stage runs in its own thread. Audio streams via socket (server/client) or local audio device.
+
+## Component Options
+
+### VAD (Voice Activity Detection)
+
+| Implementation | Notes |
+|---------------|-------|
+| Silero VAD v5 | Default, production-grade |
+
+Key parameters: `--thresh` (trigger sensitivity), `--min_speech_ms`, `--min_silence_ms`
+
+### STT (Speech to Text)
+
+| Implementation | Flag | Best For |
+|---------------|------|----------|
+| Whisper (Transformers) | `--stt whisper` | CUDA, general purpose |
+| Faster Whisper | `--stt faster-whisper` | CUDA, lower latency |
+| Lightning Whisper MLX | `--stt whisper-mlx` | macOS Apple Silicon |
+| MLX Audio Whisper | `--stt mlx-audio-whisper` | macOS, newer models |
+| Paraformer (FunASR) | `--stt paraformer` | Chinese, low latency |
+| Parakeet TDT | `--stt parakeet-tdt` | CUDA, NVIDIA NeMo |
+| Moonshine | `--stt moonshine` | Lightweight |
+
+Model selection: `--stt_model_name <model>` (any Whisper checkpoint on HF Hub)
+
+### LLM (Language Model)
+
+| Implementation | Flag | Best For |
+|---------------|------|----------|
+| Transformers | `--llm transformers` | CUDA, any HF model |
+| MLX-LM | `--llm mlx-lm` | macOS Apple Silicon |
+| OpenAI API | `--llm open_api` | Cloud, lowest latency |
+
+Model selection: `--lm_model_name <model>` or `--mlx_lm_model_name <model>`
+
+### TTS (Text to Speech)
+
+| Implementation | Flag | Best For |
+|---------------|------|----------|
+| Parler-TTS | `--tts parler` | CUDA, streaming output |
+| MeloTTS | `--tts melo` | Multi-language (6 langs) |
+| ChatTTS | `--tts chatTTS` | Natural conversational |
+| Kokoro | `--tts kokoro` | macOS default, quality |
+| FacebookMMS | `--tts facebookMMS` | 1000+ languages |
+| Pocket TTS | `--tts pocket` | Lightweight |
+
+## Deployment Modes
+
+### Local (macOS with Apple Silicon)
+
+Optimal for development and personal use. Uses MPS acceleration:
+
+```bash
+# One-liner with optimal Mac settings
+speech-to-speech-helper.sh start --local-mac
+
+# Equivalent to:
+python s2s_pipeline.py \
+    --local_mac_optimal_settings \
+    --device mps \
+    --stt parakeet-tdt \
+    --llm mlx-lm \
+    --tts kokoro \
+    --mlx_lm_model_name mlx-community/Meta-Llama-3.1-8B-Instruct-4bit
+```
+
+### Local (CUDA GPU)
+
+For workstations with NVIDIA GPU:
+
+```bash
+# Start with torch compile optimizations
+speech-to-speech-helper.sh start --cuda
+
+# Equivalent to:
+python s2s_pipeline.py \
+    --recv_host 0.0.0.0 --send_host 0.0.0.0 \
+    --lm_model_name microsoft/Phi-3-mini-4k-instruct \
+    --stt_compile_mode reduce-overhead \
+    --tts_compile_mode default
+```
+
+### Server/Client (Remote GPU)
+
+For cloud GPU instances (NVIDIA Cloud, Vast.ai, RunPod, Lambda):
+
+```bash
+# On GPU server
+speech-to-speech-helper.sh start --server
+
+# On local machine (audio I/O)
+speech-to-speech-helper.sh client --host <server-ip>
+
+# Or directly:
+python listen_and_play.py --host <server-ip>
+```
+
+### Docker (CUDA)
+
+```bash
+# Start with docker compose
+speech-to-speech-helper.sh start --docker
+
+# Uses: pytorch/pytorch:2.4.0-cuda12.1-cudnn9-devel
+# Ports: 12345 (recv), 12346 (send)
+# GPU: nvidia device 0
+```
+
+## Setup
+
+```bash
+# Install via helper (clones repo, installs deps)
+speech-to-speech-helper.sh setup
+
+# Or manually:
+git clone https://github.com/huggingface/speech-to-speech.git
+cd speech-to-speech
+
+# CUDA/Linux
+uv pip install -r requirements.txt
+
+# macOS
+uv pip install -r requirements_mac.txt
+
+# For MeloTTS (optional)
+python -m unidic download
+```
+
+### Requirements
+
+- Python 3.10+
+- PyTorch 2.4+ (CUDA) or 2.10+ (macOS)
+- `uv` package manager (recommended)
+- CUDA 12.1+ (for GPU) or Apple Silicon (for MPS)
+- `sounddevice` for local audio I/O
+- ~4GB VRAM minimum (varies by model selection)
+
+## Multi-Language
+
+```bash
+# Auto-detect language per utterance
+speech-to-speech-helper.sh start --local-mac --language auto
+
+# Fixed language (e.g., Chinese)
+speech-to-speech-helper.sh start --local-mac --language zh
+```
+
+Requires compatible STT model (e.g., `--stt_model_name large-v3`) and multilingual TTS (MeloTTS or ChatTTS - Parler-TTS is English-only currently).
+
+## CLI Parameters
+
+All parameters use prefix convention: `--stt_*`, `--lm_*`, `--tts_*`, `--melo_*`, etc.
+
+Generation parameters use `_gen_` infix: `--stt_gen_max_new_tokens 128`
+
+Full reference: `python s2s_pipeline.py -h` or see [arguments_classes/](https://github.com/huggingface/speech-to-speech/tree/main/arguments_classes)
+
+## Integration with aidevops
+
+### Voice-Driven DevOps
+
+Pair with the LLM stage to create voice-controlled DevOps:
+
+1. STT captures voice command
+2. LLM interprets as DevOps action (via system prompt)
+3. TTS confirms action and reports result
+
+### Transcription Pipeline
+
+Use STT stage standalone for meeting notes, podcast transcription. Run the pipeline with `--llm open_api` and a system prompt that outputs transcription only, or use the STT components directly via Python.
+
+### Phone Integration (Twilio)
+
+Combine with `services/communications/twilio.md` for phone-based AI:
+
+1. Twilio receives call, streams audio via WebSocket
+2. S2S pipeline processes speech in real-time
+3. TTS response streamed back to caller
+
+### Video Narration
+
+Pair with `tools/video/remotion.md` for generated voiceover:
+
+1. Generate script with LLM
+2. TTS produces audio track
+3. Remotion composites with video
+
+## Cloud GPU Providers
+
+For server/client deployment when local GPU is insufficient:
+
+| Provider | GPU Options | Pricing Model | Notes |
+|----------|------------|---------------|-------|
+| NVIDIA Cloud | A100, H100 | Per-hour | Official NVIDIA, best for production |
+| Vast.ai | Consumer + datacenter | Auction/fixed | Cheapest, variable availability |
+| RunPod | A100, 4090, H100 | Per-hour | Good balance of price/reliability |
+| Lambda | A100, H100 | Per-hour | Research-focused |
+
+Typical setup: SSH into instance, clone repo, install deps, run server mode. Connect from local machine with `listen_and_play.py`.
+
+## Recommended Configurations
+
+### Low Latency (CUDA)
+
+```bash
+--stt faster-whisper --llm open_api --tts parler \
+--stt_compile_mode reduce-overhead --tts_compile_mode default
+```
+
+### Low VRAM (~4GB)
+
+```bash
+--stt moonshine --llm open_api --tts pocket
+```
+
+### Best Quality (CUDA, 24GB+)
+
+```bash
+--stt whisper --stt_model_name openai/whisper-large-v3 \
+--llm transformers --lm_model_name microsoft/Phi-3-mini-4k-instruct \
+--tts parler
+```
+
+### macOS Optimal
+
+```bash
+--local_mac_optimal_settings --device mps \
+--mlx_lm_model_name mlx-community/Meta-Llama-3.1-8B-Instruct-4bit
+```
+
+## Troubleshooting
+
+| Issue | Solution |
+|-------|----------|
+| `Cannot use CUDA on macOS` | Use `--device mps` or `--local_mac_optimal_settings` |
+| MeloTTS import error | Run `python -m unidic download` |
+| High latency | Enable torch compile: `--stt_compile_mode reduce-overhead` |
+| Audio crackling | Increase `--min_silence_ms` or check sample rate |
+| OOM on GPU | Use smaller models or `--llm open_api` to offload LLM |
+
+## See Also
+
+- `services/communications/twilio.md` - Phone integration
+- `tools/video/remotion.md` - Video narration
+- `tools/video/heygen-skill/rules/voices.md` - AI voice cloning
diff --git a/README.md b/README.md
index 36bcfa9bd..7bb9d6459 100644
--- a/README.md
+++ b/README.md
@@ -664,7 +664,52 @@ Test suites are JSON files with prompts and validation rules (`expect_contains`,
 
 ### Voice Integration
 
-Speech-to-speech AI conversations:
+Open-source speech-to-speech pipeline based on [huggingface/speech-to-speech](https://github.com/huggingface/speech-to-speech) (Apache-2.0). Modular four-stage architecture with swappable components:
+
+```text
+Microphone → [VAD] → [STT] → [LLM] → [TTS] → Speaker
+              Silero   Whisper  Any HF   Parler/Melo/
+              VAD v5   variants instruct Kokoro/ChatTTS
+```
+
+**Quick start:**
+
+```bash
+# Install the pipeline
+speech-to-speech-helper.sh setup
+
+# Run locally on Mac (auto-configures MPS acceleration)
+speech-to-speech-helper.sh start --local-mac
+
+# Run on NVIDIA GPU with torch compile
+speech-to-speech-helper.sh start --cuda
+
+# Server mode (clients connect remotely)
+speech-to-speech-helper.sh start --server
+speech-to-speech-helper.sh client --host 192.168.1.100
+
+# Configuration presets
+speech-to-speech-helper.sh config low-latency   # Fastest (CUDA + OpenAI API)
+speech-to-speech-helper.sh config low-vram       # Minimal GPU (~4GB)
+speech-to-speech-helper.sh config quality        # Best quality (24GB+ VRAM)
+speech-to-speech-helper.sh config mac            # Apple Silicon optimal
+speech-to-speech-helper.sh config multilingual   # Auto language detection (6 langs)
+```
+
+**Recommended hardware for voice:**
+
+| Setup | CPU | RAM | GPU | Use Case |
+|-------|-----|-----|-----|----------|
+| Mac (local) | Apple M1+ | 16GB+ | MPS (unified) | Development, personal use |
+| Workstation (CUDA) | Any modern | 16GB+ | NVIDIA 8GB+ VRAM | Low-latency local voice |
+| Quality (CUDA) | Any modern | 32GB+ | NVIDIA 24GB+ VRAM | Full Whisper large-v3 + Parler |
+| Cloud GPU | - | - | A100/H100 | Production server, multi-user |
+
+**Cloud GPU providers** for server/client deployment: NVIDIA Cloud, Vast.ai, RunPod, Lambda.
+
+**Supported languages:** English, French, Spanish, Chinese, Japanese, Korean (auto-detect or fixed).
+
+**Additional voice methods:**
 
 | Method | Description |
 |--------|-------------|
@@ -672,6 +717,8 @@ Speech-to-speech AI conversations:
 | **iPhone Shortcut** | iOS: dictate → HTTP → speak response |
 | **Pipecat STS** | Full voice pipeline: Soniox STT → AI → Cartesia TTS |
 
+**See:** [speech-to-speech.md](.agents/tools/voice/speech-to-speech.md) for full component options, CLI parameters, and integration patterns (Twilio phone, video narration, voice-driven DevOps).
+
 ### Scheduled Agent Tasks
 
 Cron-based agent dispatch for automated workflows:
@@ -685,6 +732,23 @@ Cron-based agent dispatch for automated workflows:
 
 ## **Requirements**
 
+### **Recommended Hardware**
+
+aidevops itself is lightweight (shell scripts + markdown), but AI model workloads benefit from capable hardware:
+
+| Tier | Machine | CPU | RAM | GPU | Best For |
+|------|---------|-----|-----|-----|----------|
+| **Minimum** | Any modern laptop | 4+ cores | 8GB | None | Framework only, cloud AI APIs |
+| **Recommended** | Mac Studio / desktop | Apple M1+ or 8+ cores | 16GB+ | MPS (Apple) or NVIDIA 8GB+ | Local voice, browser automation, dev servers |
+| **Power User** | Workstation | 8+ cores | 32GB+ | NVIDIA 24GB+ VRAM | Full voice pipeline, local LLMs, parallel agents |
+| **Server** | Cloud GPU | Any | 16GB+ | A100 / H100 | Production voice, multi-user, batch processing |
+
+**Cloud GPU providers** for on-demand GPU access: [NVIDIA Cloud](https://www.nvidia.com/en-us/gpu-cloud/), [Vast.ai](https://vast.ai/), [RunPod](https://www.runpod.io/), [Lambda](https://lambdalabs.com/).
+
+**Note:** Most aidevops features (infrastructure management, SEO, code quality, Git workflows) require no GPU. GPU is only needed for local AI model inference (voice pipeline, vision models, local LLMs).
+
+### **Software Dependencies**
+
 ```bash
 # Install dependencies (auto-detected by setup.sh)
 brew install sshpass jq curl mkcert dnsmasq fd ripgrep  # macOS
@@ -829,6 +893,11 @@ See `.agents/tools/ocr/glm-ocr.md` for batch processing, PDF workflows, and Peek
 - **[Remotion](https://remotion.dev/)**: Programmatic video creation with React - create videos using code with 29 specialized rule files
 - **[Video Prompt Design](https://github.com/snubroot/Veo-3-Meta-Framework)**: Structured prompt engineering for AI video generation (Veo 3, 7-component framework, character consistency, audio design)
 
+### **Voice AI**
+
+- **[Speech-to-Speech](https://github.com/huggingface/speech-to-speech)**: Open-source modular voice pipeline (VAD → STT → LLM → TTS) with local GPU and cloud GPU deployment
+- **[Pipecat](https://github.com/pipecat-ai/pipecat)**: Real-time voice agent framework with Soniox STT, Cartesia TTS, and multi-LLM support
+
 ### **Performance & Monitoring**
 
 - **[PageSpeed Insights](https://pagespeed.web.dev/)**: Website performance auditing
diff --git a/TODO.md b/TODO.md
index 1fbceb266..d5f0c5053 100644
--- a/TODO.md
+++ b/TODO.md
@@ -339,8 +339,14 @@ Tasks with no open blockers - ready to work on. Use `/ready` to refresh this lis
   - Notes: Implemented complete system for importing skills from external GitHub repos. Created add-skill-helper.sh (~630 lines) for fetching, format detection (SKILL.md, AGENTS.md, .cursorrules, raw markdown), conversion, and registration. Created skill-update-helper.sh (~280 lines) for upstream update checking. Added skill-sources.json registry, /add-skill command, add-skill.md subagent. Updated setup.sh with create_skill_symlinks() for cross-tool compatibility. PR #135.
 - [x] t067 Optimise OpenCode MCP loading with on-demand activation #opencode #performance #mcp ~4h (ai:2h test:1h read:1h) logged:2026-01-21 blocked-by:t056 started:2026-01-21T06:15Z completed:2026-01-21 actual:30m
   - Notes: Implemented on-demand MCP loading pattern. Updated generate-opencode-agents.sh to sync MCP index on agent generation. Added MCP On-Demand Loading section to AGENTS.md. Pattern: MCPs disabled globally, enabled per-subagent via frontmatter, discoverable via mcp-index-helper.sh search.
-
-<!--TOON:backlog[85]{id,desc,owner,tags,est,est_ai,est_test,logged,status,blocked_by,blocks,parent}:
+- [ ] t131 Create tools/vision/ category for visual AI models #tools #vision #ai ~30m (ai:20m test:5m read:5m) logged:2026-02-06 related:t080
+  - Notes: New tool category for visual AI / computer vision models. Initial candidates: MiniCPM-o 4.5 (vision+voice+text, 9B params, Apache-2.0), LLaVA, CogVLM. Create tools/vision/ directory with overview.md covering model selection, local GPU deployment, cloud GPU options. Cross-reference from tools/voice/ for multimodal models.
+- [ ] t132 Evaluate tools/multimodal/ vs cross-references for models spanning voice+vision #architecture #tools #ai ~15m (ai:10m read:5m) logged:2026-02-06 related:t131,t080
+  - Notes: MiniCPM-o 4.5 does voice+vision+text. Decision needed: 1) Put in tools/vision/ with cross-refs from tools/voice/ (simpler), 2) Create tools/multimodal/ category (cleaner taxonomy but more dirs), 3) Put in whichever is the primary use case and cross-ref. Recommend option 1 initially, revisit when we have 3+ multimodal models.
+- [ ] t133 Cloud GPU deployment guide for AI model hosting #tools #infrastructure #gpu ~45m (ai:30m test:10m read:5m) logged:2026-02-06 related:t080,t071
+  - Notes: Shared guide for deploying GPU-intensive models to cloud providers. Covers: NVIDIA Cloud (A100/H100, official), Vast.ai (auction pricing, cheapest), RunPod (balanced), Lambda (research). Common patterns: SSH setup, Docker deployment, model caching, cost optimization. Referenced by tools/voice/speech-to-speech.md and future tools/vision/ subagents. Could live at tools/infrastructure/cloud-gpu.md or services/hosting/cloud-gpu.md.
+
+<!--TOON:backlog[89]{id,desc,owner,tags,est,est_ai,est_test,logged,status,blocked_by,blocks,parent}:
 t104,Install script integrity hardening (replace curl|sh with verified downloads),,security|supply-chain|plan,1.5h,45m,30m,2026-02-03T00:00Z,pending,,,
 t105,Remove eval in ampcode-cli.sh (use arrays + whitelist formats),,security|shell,15m,10m,5m,2026-02-03T00:00Z,pending,,,
 t106,Replace eval in system-cleanup.sh find command construction with safe args,,security|shell,1h,45m,15m,2026-02-03T00:00Z,pending,,,
@@ -447,6 +453,9 @@ t081,Set up Pipecat local voice agent with Soniox STT + Cartesia TTS + OpenAI/An
 t125,Add browser-use subagent for AI-native browser automation,,tools|browser|ai|automation,15m,10m,3m,2m,2026-02-05T00:00Z,pending,,,
 t126,Add Skyvern subagent for computer vision browser automation,,tools|browser|ai|automation,15m,10m,3m,2m,2026-02-05T00:00Z,pending,,,
 t127,Add Buzz offline transcription support,,tools|voice|transcription,15m,10m,3m,2m,2026-02-06T00:00Z,pending,,,
+t131,Create tools/vision/ category for visual AI models,,tools|vision|ai,30m,20m,5m,5m,2026-02-06T00:00Z,pending,,,
+t132,Evaluate tools/multimodal/ vs cross-references for models spanning voice+vision,,architecture|tools|ai,15m,10m,,5m,2026-02-06T00:00Z,pending,,,
+t133,Cloud GPU deployment guide for AI model hosting,,tools|infrastructure|gpu,45m,30m,10m,5m,2026-02-06T00:00Z,pending,,,
 -->
 
 <!--TOON:subtasks[0]{id,desc,est,status,blocked_by,parent}: