NillionNetwork · blefo · Apr 22, 2025 · Apr 22, 2025 · Apr 25, 2025 · Apr 25, 2025
diff --git a/README.md b/README.md
@@ -103,18 +103,20 @@ up -d
      -p 6379:6379 \
      redis:latest
 
-# Start PostgreSQL
-docker run -d --name postgres \
-  -e POSTGRES_USER=${POSTGRES_USER} \
-  -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} \
-  -e POSTGRES_DB=${POSTGRES_DB} \
-  -p 5432:5432 \
-  --network frontend_net \
-  --volume postgres_data:/var/lib/postgresql/data \
-  postgres:16
-```
-
-2. **Run API Server**
+2. **Start PostgreSQL**
+    ```shell
+    docker run -d --name postgres \
+      -e POSTGRES_USER=${POSTGRES_USER} \
+      -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} \
+      -e POSTGRES_DB=${POSTGRES_DB} \
+      -p 5432:5432 \
+      --network frontend_net \
+      --volume postgres_data:/var/lib/postgresql/data \
+      postgres:16
+    ```
+
+
+3. **Run API Server**
    ```shell
    # Development Environment
     fastapi dev nilai-api/src/nilai_api/__main__.py --port 8080
@@ -123,7 +125,7 @@ docker run -d --name postgres \
    uv run fastapi run nilai-api/src/nilai_api/__main__.py --port 8080
    ```
 
-3. **Run Model Instances**
+4. **Run Model Instances**
    ```shell
    # Example: Llama 3.2 1B Model
    # Development Environment

diff --git a/docker/audio.Dockerfile b/docker/audio.Dockerfile
@@ -0,0 +1,26 @@
+FROM python:3.10-slim
+
+WORKDIR /app
+
+COPY --link . /app/
+WORKDIR /app/nilai-audio/
+
+# Install system dependencies
+RUN apt-get update && \
+apt-get install build-essential curl ffmpeg -y && \
+apt-get clean && \
+apt-get autoremove && \
+rm -rf /var/lib/apt/lists/* && \
+pip install uv && \
+uv sync
+
+ENV LD_LIBRARY_PATH=/app/.venv/lib/python3.12/site-packages/nvidia/cudnn/lib/:${LD_LIBRARY_PATH}
+
+# Create necessary directories
+RUN mkdir -p uploads transcript_results_diarized summaries extracted_audio
+
+# Expose port for FastAPI
+EXPOSE 8000
+
+# Command to run the application
+CMD ["uv", "run", "uvicorn", "nilai_audio.main:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/docker/compose/docker-compose.audio-gpu.yml b/docker/compose/docker-compose.audio-gpu.yml
@@ -0,0 +1,39 @@
+# File: docker/compose/docker-compose.audio-gpu.yml
+services:
+  audio_gpu:
+    image: nillion/nilai-audio:latest
+    container_name: nilai-audio_gpu
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    ipc: host
+    ulimits:
+      memlock: -1
+      stack: 67108864
+    env_file:
+      - .env
+    restart: unless-stopped
+    volumes:
+      - hugging_face_models:/root/.cache/huggingface
+      - torch_models:/root/.cache/torch
+    networks:
+      - backend_net
+    ports:
+      - "8002:8000"
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/docs"]
+      interval: 30s
+      retries: 3
+      start_period: 90s
+      timeout: 15s
+
+volumes:
+  hugging_face_models:
+  torch_models:
+
+networks:
+  backend_net:
diff --git a/docker/compose/docker-compose.qwen-14b-gpu.yml b/docker/compose/docker-compose.qwen-14b-gpu.yml
@@ -0,0 +1,48 @@
+services:
+  llama_70b_gpu:
+    image: nillion/nilai-vllm:latest
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    ipc: host
+    ulimits:
+      memlock: -1
+      stack: 67108864
+    env_file:
+      - .env
+    restart: unless-stopped
+    depends_on:
+      etcd:
+        condition: service_healthy
+    command: >
+      --model Qwen/Qwen2.5-14B-Instruct-1M
+      --gpu-memory-utilization 0.39
+      --max-model-len 50000
+      --tensor-parallel-size 1
+      --enable-auto-tool-choice
+      --uvicorn-log-level warning
+    environment:
+      - SVC_HOST=qwen_14b_gpu
+      - SVC_PORT=8000
+      - ETCD_HOST=etcd
+      - ETCD_PORT=2379
+      - TOOL_SUPPORT=true
+    volumes:
+      - hugging_face_models:/root/.cache/huggingface
+    networks:
+      - backend_net
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
+      interval: 30s
+      retries: 3
+      start_period: 60s
+      timeout: 10s
+volumes:
+  hugging_face_models:
+
+networks:
+  backend_net:
diff --git a/nilai-api/src/nilai_api/config/mainnet.py b/nilai-api/src/nilai_api/config/mainnet.py
@@ -10,6 +10,7 @@
     "cognitivecomputations/Dolphin3.0-Llama3.1-8B": 30,
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": 5,
     "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4": 5,
+    "Qwen/Qwen2.5-14B-Instruct-1M": 5,
 }
 
 # It defines the number of requests allowed for each user for a given time frame.

diff --git a/nilai-api/src/nilai_api/config/testnet.py b/nilai-api/src/nilai_api/config/testnet.py
@@ -10,6 +10,7 @@
     "cognitivecomputations/Dolphin3.0-Llama3.1-8B": 5,
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": 5,
     "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4": 5,
+    "Qwen/Qwen2.5-14B-Instruct-1M": 5,
 }
 
 # It defines the number of requests allowed for each user for a given time frame.

diff --git a/nilai-audio/README.md b/nilai-audio/README.md
diff --git a/nilai-audio/pyproject.toml b/nilai-audio/pyproject.toml
@@ -0,0 +1,29 @@
+[project]
+name = "nilai-audio"
+version = "0.1.0"
+description = "WhisperX service with Nilai"
+readme = "README.md"
+authors = [
+    { name = "blefo", email = "[email protected]" }
+]
+requires-python = ">=3.12"
+dependencies = [
+    "torch>=2.0.0",
+    "torchaudio>=2.0.0",
+    "whisperx==3.3.2",
+    "fastapi>=0.110.0",
+    "uvicorn[standard]>=0.29.0",
+    "python-multipart",
+    "httpx>=0.27.0",
+    "nilai-common",
+    "pyannote.audio==3.3.2",
+    "nilai-models",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.uv.sources]
+nilai-common = { workspace = true }
+nilai-models = { workspace = true }
diff --git a/nilai-audio/run.sh b/nilai-audio/run.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+set -euo pipefail
+
+start_primary_process() {
+    echo "Starting the primary process"
+    echo "Args: [$*]"
+    uv run uvicorn main:app --host 0.0.0.0 --port 8000 &
+}
+
+start_secondary_process() {
+    echo "Starting the secondary process"
+    uv run python3 -m nilai_models.daemon
+}
+
+main() {
+    echo "Starting the main process with args: $*"
+    if [[ " $* " =~ "--standalone" ]]; then
+        echo "Starting the standalone server"
+        # Remove --standalone from arguments
+        args=("${@/--standalone/}")
+        start_primary_process "${args[@]}"
+    else
+        start_primary_process "$@"
+        start_secondary_process
+    fi
+
+    # Wait for any process to exit and exit with its status
+    wait -n
+    exit $?
+}
+
+main "$@"
diff --git a/nilai-audio/src/nilai_audio/__init__.py b/nilai-audio/src/nilai_audio/__init__.py
@@ -0,0 +1,2 @@
+def hello() -> str:
+    return "Hello from nilai-audio!"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		def hello() -> str:
		return "Hello from nilai-audio!"