Skip to content
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
2b3de7c
feat: modularized chat completion endpoint
blefo Oct 7, 2025
6dcc8bc
feat: reorganize API models
blefo Oct 7, 2025
8ac1496
fix: add responses model
blefo Oct 7, 2025
835fa29
refactor: update import paths for API models
blefo Oct 7, 2025
8730af2
feat: add responses endpoint and integrate with existing API
blefo Oct 7, 2025
43849ea
refactor: update response handling and model structure in API
blefo Oct 7, 2025
5308a83
feat: implement web search for response handling
blefo Oct 7, 2025
1f9ddc7
chore: update OpenAI dependency version and enhance response model st…
blefo Oct 8, 2025
6662774
feat: implement tool call routing and execution in response handling
blefo Oct 9, 2025
3207733
refactor: enhance tool call routing and response handling
blefo Oct 10, 2025
7fe9059
refactor: update response handling and model types
blefo Oct 10, 2025
40c936f
feat: add comprehensive test suites for responses endpoints and updat…
blefo Oct 10, 2025
4224ca5
chore: update CI configuration and add new GPT-OSS-20b model
blefo Oct 13, 2025
d4fc0ae
feat: enhance tool execution workflow and add availability check
blefo Oct 14, 2025
abcebb0
chore: update AWS region in CI workflow from eu-west-1 to us-east-1 …
blefo Oct 15, 2025
d0c5385
test: refine tests
blefo Oct 16, 2025
99ee504
refactor: update configuration and enhance chat completion logic
blefo Oct 23, 2025
b8dfd5c
fix: cicd E2B API key name + increase timeout for web_search tests
blefo Oct 23, 2025
aa9603f
test: update web search instructions and increase timeout for responses
blefo Oct 23, 2025
29749a5
refactor: PR review #1 remove tools configuration and enhance web sea…
blefo Oct 28, 2025
f657767
fix: update content returned by tool_router
blefo Oct 28, 2025
6128333
fix: update token expiration logic to 30 minutes for invocation and d…
blefo Oct 28, 2025
6719ca7
fix: add optional expiration parameter to invocation token generation…
blefo Oct 29, 2025
c606707
fix: update OpenAI client base URL handling in e2e tests
blefo Oct 29, 2025
a8d3b89
fix: enhance OpenAI client creation and improve rate limiting test lo…
blefo Oct 29, 2025
778313a
refactor: streamline web search tests by removing retry logic and enh…
blefo Oct 29, 2025
6002f65
fix: adjust subscription length and update token expiration logic for…
blefo Nov 4, 2025
7f758ec
refactor: remove unused CONFIG import from rate limiting module
blefo Nov 4, 2025
61e499a
refactor: remove decapreted rate limiting tests
blefo Nov 4, 2025
baf6181
feat: implement web search rate limiting per second
blefo Nov 5, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions .github/workflows/cicd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ jobs:
# Set dummy secrets for unit tests
sed -i 's/HF_TOKEN=.*/HF_TOKEN=dummy_token/' .env
sed -i 's/BRAVE_SEARCH_API=.*/BRAVE_SEARCH_API=dummy_api/' .env
sed -i 's/E2B_API_KEY=.*/E2B_API_KEY=dummy_token/' .env

- name: pyright
run: uv run pyright
Expand All @@ -73,7 +74,7 @@ jobs:
with:
aws-access-key-id: ${{ secrets.GH_AWS_ACCESS_KEY }}
aws-secret-access-key: ${{ secrets.GH_AWS_SECRET_KEY }}
aws-region: "eu-west-1"
aws-region: "us-east-1"
- name: Start EC2 runner
id: start-ec2-runner
uses: NillionNetwork/[email protected]
Expand All @@ -82,12 +83,12 @@ jobs:
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
runners-per-machine: 3
number-of-machines: 1
ec2-image-id: ami-0174a246556e8750b
ec2-instance-type: g4dn.xlarge
subnet-id: subnet-0ec4c353621eabae2
security-group-id: sg-03ee5c56e1f467aa0
key-name: production-github-runner-key
iam-role-name: github-runners-production-github-runner-ec2
ec2-image-id: ami-0e70d84403fc045d7
ec2-instance-type: g6.xlarge
subnet-id: subnet-0bb357f46d1bc355c
security-group-id: sg-022a5cdcf57e9618b
key-name: us-east-1-github-runner-key
iam-role-name: github-runners-us-east-1-github-runner-ec2
aws-resource-tags: >
[
{"Key": "Name", "Value": "github-runner-${{ github.run_id }}-${{ github.run_number }}"},
Expand All @@ -96,7 +97,7 @@ jobs:
{"Key": "Deployment", "Value": "github-runners"},
{"Key": "Type", "Value": "GithubRunner"},
{"Key": "User", "Value": "ec2-user"},
{"Key": "Environment", "Value": "production"}
{"Key": "Environment", "Value": "us-east-1"}
]

build-images:
Expand Down Expand Up @@ -149,7 +150,7 @@ jobs:
sed -i 's/NILDB_COLLECTION=.*/NILDB_COLLECTION=${{ secrets.NILDB_COLLECTION }}/' .env

- name: Compose docker-compose.yml
run: python3 ./scripts/docker-composer.py --dev -f docker/compose/docker-compose.llama-1b-gpu.ci.yml -o development-compose.yml
run: python3 ./scripts/docker-composer.py --dev -f docker/compose/docker-compose.gpt-20b-gpu.ci.yml -o development-compose.yml

- name: GPU stack versions (non-fatal)
shell: bash
Expand Down Expand Up @@ -327,7 +328,7 @@ jobs:
with:
aws-access-key-id: ${{ secrets.GH_AWS_ACCESS_KEY }}
aws-secret-access-key: ${{ secrets.GH_AWS_SECRET_KEY }}
aws-region: "eu-west-1"
aws-region: "us-east-1"

- name: Stop EC2 runner
uses: NillionNetwork/[email protected]
Expand Down
45 changes: 45 additions & 0 deletions docker/compose/docker-compose.gpt-20b-gpu.ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
services:
gpt_20b_gpu:
image: nillion/nilai-vllm:latest
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]

ulimits:
memlock: -1
stack: 67108864
env_file:
- .env
restart: unless-stopped
depends_on:
etcd:
condition: service_healthy
command: >
--model openai/gpt-oss-20b
--gpu-memory-utilization 0.95
--max-model-len 10000
--max-num-batched-tokens 10000
--max-num-seqs 2
--tensor-parallel-size 1
--uvicorn-log-level warning
--async-scheduling
environment:
- SVC_HOST=gpt_20b_gpu
- SVC_PORT=8000
- ETCD_HOST=etcd
- ETCD_PORT=2379
- TOOL_SUPPORT=true
volumes:
- hugging_face_models:/root/.cache/huggingface # cache models
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
retries: 10
start_period: 900s
timeout: 15s
volumes:
hugging_face_models:
2 changes: 1 addition & 1 deletion docker/nilauth/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ payments:

subscriptions:
renewal_threshold_seconds: 1000
length_seconds: 120
length_seconds: 900
dollar_cost:
nilai: 1
nildb: 1
Expand Down
2 changes: 1 addition & 1 deletion nilai-api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies = [
"uvicorn>=0.32.1",
"httpx>=0.27.2",
"nilrag>=0.1.11",
"openai>=1.59.9",
"openai>=1.99.2",
"pg8000>=1.31.2",
"prometheus_fastapi_instrumentator>=7.0.2",
"asyncpg>=0.30.0",
Expand Down
36 changes: 21 additions & 15 deletions nilai-api/src/nilai_api/config/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# Import all configuration models
import json
import logging
from pydantic import BaseModel
from .environment import EnvironmentConfig
from .database import DatabaseConfig, EtcdConfig, RedisConfig
from .auth import AuthConfig, DocsConfig
from .nildb import NilDBConfig
from .web_search import WebSearchSettings
from .rate_limiting import RateLimitingConfig
from .utils import create_config_model, CONFIG_DATA
from pydantic import BaseModel
import logging


class NilAIConfig(BaseModel):
Expand Down Expand Up @@ -38,19 +38,25 @@ class NilAIConfig(BaseModel):

def prettify(self):
"""Print the config in a pretty format removing passwords and other sensitive information"""
config_dict = self.model_dump()
keywords = ["pass", "token", "key"]
for key, value in config_dict.items():
if isinstance(value, str):
for keyword in keywords:
print(key, keyword, keyword in key)
if keyword in key and value is not None:
config_dict[key] = "***************"
if isinstance(value, dict):
for k, v in value.items():
for keyword in keywords:
if keyword in k and v is not None:
value[k] = "***************"
config_dict = self.model_dump(mode="json")

keywords = {"pass", "token", "key"}
for key, value in list(config_dict.items()):
if (
isinstance(value, str)
and any(k in key for k in keywords)
and value is not None
):
config_dict[key] = "***************"
elif isinstance(value, dict):
for k, v in list(value.items()):
if (
isinstance(v, str)
and any(kw in k for kw in keywords)
and v is not None
):
value[k] = "***************"

return json.dumps(config_dict, indent=4)


Expand Down
5 changes: 3 additions & 2 deletions nilai-api/src/nilai_api/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ auth:
docs:
token: null


# Web Search Configuration
web_search:
api_key: null
Expand All @@ -30,8 +31,8 @@ rate_limiting:
user_rate_limit_minute: 100
user_rate_limit_hour: 1000
user_rate_limit_day: 10000
web_search_rate_limit_minute: 1
web_search_rate_limit_hour: 3
web_search_rate_limit_minute: 6
web_search_rate_limit_hour: 18
web_search_rate_limit_day: 72
web_search_rate_limit: null # For-good rate limit
model_concurrent_rate_limit:
Expand Down
Loading
Loading