NillionNetwork · blefo · Nov 7, 2025 · Oct 7, 2025 · Oct 7, 2025 · Oct 7, 2025
diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml
@@ -50,6 +50,7 @@ jobs:
           # Set dummy secrets for unit tests
           sed -i 's/HF_TOKEN=.*/HF_TOKEN=dummy_token/' .env
           sed -i 's/BRAVE_SEARCH_API=.*/BRAVE_SEARCH_API=dummy_api/' .env
+          sed -i 's/E2B_API_KEY=.*/E2B_API_KEY=dummy_token/' .env
 
       - name: pyright
         run: uv run pyright
@@ -73,7 +74,7 @@ jobs:
         with:
           aws-access-key-id: ${{ secrets.GH_AWS_ACCESS_KEY }}
           aws-secret-access-key: ${{ secrets.GH_AWS_SECRET_KEY }}
-          aws-region: "eu-west-1"
+          aws-region: "us-east-1"
       - name: Start EC2 runner
         id: start-ec2-runner
         uses: NillionNetwork/[email protected]
@@ -82,12 +83,12 @@ jobs:
           github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
           runners-per-machine: 3
           number-of-machines: 1
-          ec2-image-id: ami-0174a246556e8750b
-          ec2-instance-type: g4dn.xlarge
-          subnet-id: subnet-0ec4c353621eabae2
-          security-group-id: sg-03ee5c56e1f467aa0
-          key-name: production-github-runner-key
-          iam-role-name: github-runners-production-github-runner-ec2
+          ec2-image-id: ami-0e70d84403fc045d7
+          ec2-instance-type: g6.xlarge
+          subnet-id: subnet-0bb357f46d1bc355c
+          security-group-id: sg-022a5cdcf57e9618b
+          key-name: us-east-1-github-runner-key
+          iam-role-name: github-runners-us-east-1-github-runner-ec2
           aws-resource-tags: >
             [
               {"Key": "Name", "Value": "github-runner-${{ github.run_id }}-${{ github.run_number }}"},
@@ -96,7 +97,7 @@ jobs:
               {"Key": "Deployment", "Value": "github-runners"},
               {"Key": "Type", "Value": "GithubRunner"},
               {"Key": "User", "Value": "ec2-user"},
-              {"Key": "Environment", "Value": "production"}
+              {"Key": "Environment", "Value": "us-east-1"}
             ]
 
   build-images:
@@ -149,7 +150,7 @@ jobs:
          sed -i 's/NILDB_COLLECTION=.*/NILDB_COLLECTION=${{ secrets.NILDB_COLLECTION }}/' .env
 
       - name: Compose docker-compose.yml
-        run: python3 ./scripts/docker-composer.py --dev -f docker/compose/docker-compose.llama-1b-gpu.ci.yml -o development-compose.yml
+        run: python3 ./scripts/docker-composer.py --dev -f docker/compose/docker-compose.gpt-20b-gpu.ci.yml -o development-compose.yml
 
       - name: GPU stack versions (non-fatal)
         shell: bash
@@ -327,7 +328,7 @@ jobs:
         with:
           aws-access-key-id: ${{ secrets.GH_AWS_ACCESS_KEY }}
           aws-secret-access-key: ${{ secrets.GH_AWS_SECRET_KEY }}
-          aws-region: "eu-west-1"
+          aws-region: "us-east-1"
 
       - name: Stop EC2 runner
         uses: NillionNetwork/[email protected]

diff --git a/docker/compose/docker-compose.gpt-20b-gpu.ci.yml b/docker/compose/docker-compose.gpt-20b-gpu.ci.yml
@@ -0,0 +1,45 @@
+services:
+  gpt_20b_gpu:
+    image: nillion/nilai-vllm:latest
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+
+    ulimits:
+      memlock: -1
+      stack: 67108864
+    env_file:
+      - .env
+    restart: unless-stopped
+    depends_on:
+      etcd:
+        condition: service_healthy
+    command: >
+      --model openai/gpt-oss-20b
+      --gpu-memory-utilization 0.95
+      --max-model-len 10000
+      --max-num-batched-tokens 10000
+      --max-num-seqs 2
+      --tensor-parallel-size 1
+      --uvicorn-log-level warning
+      --async-scheduling
+    environment:
+      - SVC_HOST=gpt_20b_gpu
+      - SVC_PORT=8000
+      - ETCD_HOST=etcd
+      - ETCD_PORT=2379
+      - TOOL_SUPPORT=true
+    volumes:
+      - hugging_face_models:/root/.cache/huggingface  # cache models
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
+      interval: 30s
+      retries: 10
+      start_period: 900s
+      timeout: 15s
+volumes:
+  hugging_face_models:
diff --git a/docker/nilauth/config.yaml b/docker/nilauth/config.yaml
@@ -12,7 +12,7 @@ payments:
 
   subscriptions:
     renewal_threshold_seconds: 1000
-    length_seconds: 120
+    length_seconds: 900
     dollar_cost:
       nilai: 1
       nildb: 1

diff --git a/nilai-api/pyproject.toml b/nilai-api/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [
     "uvicorn>=0.32.1",
     "httpx>=0.27.2",
     "nilrag>=0.1.11",
-    "openai>=1.59.9",
+    "openai>=1.99.2",
     "pg8000>=1.31.2",
     "prometheus_fastapi_instrumentator>=7.0.2",
     "asyncpg>=0.30.0",

diff --git a/nilai-api/src/nilai_api/config/__init__.py b/nilai-api/src/nilai_api/config/__init__.py
@@ -1,14 +1,14 @@
 # Import all configuration models
 import json
+import logging
+from pydantic import BaseModel
 from .environment import EnvironmentConfig
 from .database import DatabaseConfig, EtcdConfig, RedisConfig
 from .auth import AuthConfig, DocsConfig
 from .nildb import NilDBConfig
 from .web_search import WebSearchSettings
 from .rate_limiting import RateLimitingConfig
 from .utils import create_config_model, CONFIG_DATA
-from pydantic import BaseModel
-import logging
 
 
 class NilAIConfig(BaseModel):
@@ -38,19 +38,25 @@ class NilAIConfig(BaseModel):
 
     def prettify(self):
         """Print the config in a pretty format removing passwords and other sensitive information"""
-        config_dict = self.model_dump()
-        keywords = ["pass", "token", "key"]
-        for key, value in config_dict.items():
-            if isinstance(value, str):
-                for keyword in keywords:
-                    print(key, keyword, keyword in key)
-                    if keyword in key and value is not None:
-                        config_dict[key] = "***************"
-            if isinstance(value, dict):
-                for k, v in value.items():
-                    for keyword in keywords:
-                        if keyword in k and v is not None:
-                            value[k] = "***************"
+        config_dict = self.model_dump(mode="json")
+
+        keywords = {"pass", "token", "key"}
+        for key, value in list(config_dict.items()):
+            if (
+                isinstance(value, str)
+                and any(k in key for k in keywords)
+                and value is not None
+            ):
+                config_dict[key] = "***************"
+            elif isinstance(value, dict):
+                for k, v in list(value.items()):
+                    if (
+                        isinstance(v, str)
+                        and any(kw in k for kw in keywords)
+                        and v is not None
+                    ):
+                        value[k] = "***************"
+
         return json.dumps(config_dict, indent=4)
 
 

diff --git a/nilai-api/src/nilai_api/config/config.yaml b/nilai-api/src/nilai_api/config/config.yaml
@@ -13,6 +13,7 @@ auth:
 docs:
   token: null
 
+
 # Web Search Configuration
 web_search:
   api_key: null
@@ -30,8 +31,8 @@ rate_limiting:
   user_rate_limit_minute: 100
   user_rate_limit_hour: 1000
   user_rate_limit_day: 10000
-  web_search_rate_limit_minute: 1
-  web_search_rate_limit_hour: 3
+  web_search_rate_limit_minute: 6
+  web_search_rate_limit_hour: 18
   web_search_rate_limit_day: 72
   web_search_rate_limit: null # For-good rate limit
   model_concurrent_rate_limit: