Skip to content

Commit

Permalink
Merge pull request #448 from julep-ai/f/litellm
Browse files Browse the repository at this point in the history
feat(agents-api): Add litellm proxy to docker compose
  • Loading branch information
creatorrr authored Aug 9, 2024
2 parents 6483fbe + 781c7ab commit 31a485e
Show file tree
Hide file tree
Showing 8 changed files with 761 additions and 1,352 deletions.
4 changes: 2 additions & 2 deletions agents-api/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ services:
container_name: text-embeddings-inference
environment:
- DTYPE=float16
- MODEL_ID=BAAI/bge-m3
- MODEL_ID=Alibaba-NLP/gte-large-en-v1.5

image: ghcr.io/huggingface/text-embeddings-inference:1.3
image: ghcr.io/huggingface/text-embeddings-inference:1.5
ports:
- "8082:80"
volumes:
Expand Down
1,917 changes: 573 additions & 1,344 deletions agents-api/poetry.lock

Large diffs are not rendered by default.

8 changes: 2 additions & 6 deletions agents-api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,19 @@ pycozo = {extras = ["embedded"], version = "^0.7.6"}
uvicorn = "^0.23.2"
fire = "^0.5.0"
environs = "^10.3.0"
google-cloud-aiplatform = "^1.33.0"
pandas = "^2.1.0"
openai = "^1.12.0"
httpx = "^0.26.0"
async-lru = "^2.0.4"
sentry-sdk = {extras = ["fastapi"], version = "^1.38.0"}
temporalio = "^1.4.0"
pydantic = "^2.5.3"
arrow = "^1.3.0"
jinja2 = "^3.1.3"
jinja2schema = "^0.1.4"
jsonschema = "^4.21.1"
litellm = "^1.35.32"
litellm = "^1.43.3"
numpy = "^1.26.4"
transformers = "^4.40.1"
tiktoken = "^0.6.0"
xxhash = "^3.4.1"
tiktoken = "^0.7.0"
tenacity = "^8.3.0"
beartype = "^0.18.5"
pydantic-partial = "^0.5.5"
Expand Down
1 change: 1 addition & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ include:
- ./model-serving/docker-compose.yml
- ./gateway/docker-compose.yml
- ./agents-api/docker-compose.yml
- ./llm-proxy/docker-compose.yml

# TODO: Enable after testing
# - ./monitoring/docker-compose.yml
1 change: 1 addition & 0 deletions llm-proxy/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
!.keys
1 change: 1 addition & 0 deletions llm-proxy/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.keys
54 changes: 54 additions & 0 deletions llm-proxy/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
services:
litellm:
image: ghcr.io/berriai/litellm:main-stable
volumes:
- ./litellm-config.yaml:/app/config.yaml
- .keys:/app/.keys
ports:
- "4000:4000"
env_file:
- ../.env
command:
[
"--config",
"/app/config.yaml",
"--port",
"4000",
"--num_workers",
"8",
"--telemetry",
"False"
]

depends_on:
- litellm-db
- litellm-redis

litellm-db:
image: postgres
restart: always
volumes:
- litellm-db-data:/var/lib/postgresql/data
ports:
- "5432:5432"
env_file:
- ../.env
healthcheck:
test: [ "CMD-SHELL", "pg_isready -d litellm -U llmproxy" ]
interval: 1s
timeout: 5s
retries: 10

litellm-redis:
image: redis/redis-stack-server
restart: always
volumes:
- litellm-redis-data:/data
ports:
- "6379:6379"
env_file:
- ../.env

volumes:
litellm-db-data:
litellm-redis-data:
127 changes: 127 additions & 0 deletions llm-proxy/litellm-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
environment_variables:
NO_DOCS: "true"

model_list:
# -*= Paid models =*-
# -------------------

# Gemini models
- model_name: gemini-1.5-pro
litellm_params:
model: vertex_ai_beta/gemini-1.5-pro
tags: ["paid"]
vertex_credentials: os.environ/GOOGLE_APPLICATION_CREDENTIALS

- model_name: claude-3.5-sonnet
litellm_params:
model: vertex_ai/claude-3-5-sonnet@20240620
tags: ["paid"]
vertex_credentials: os.environ/GOOGLE_APPLICATION_CREDENTIALS

# OpenAI models
- model_name: "gpt-4-turbo"
litellm_params:
model: "openai/gpt-4-turbo"
tags: ["paid"]
api_key: os.environ/OPENAI_API_KEY

- model_name: "gpt-4o"
litellm_params:
model: "openai/gpt-4o"
tags: ["paid"]
api_key: os.environ/OPENAI_API_KEY

# Anthropic models
- model_name: "claude-3.5-sonnet"
litellm_params:
model: "claude-3-5-sonnet-20240620"
tags: ["paid"]
api_key: os.environ/ANTHROPIC_API_KEY

# Groq models
- model_name: "llama-3.1-70b"
litellm_params:
model: "groq/llama-3.1-70b-versatile"
tags: ["paid"]
api_key: os.environ/GROQ_API_KEY

- model_name: "llama-3.1-8b"
litellm_params:
model: "groq/llama-3.1-8b-instant"
tags: ["paid"]
api_key: os.environ/GROQ_API_KEY


# -*= Embedding models =*-
# ------------------------

- model_name: text-embedding-3-large
litellm_params:
model: "openai/text-embedding-3-large"
api_key: os.environ/OPENAI_API_KEY
tags: ["paid"]

- model_name: voyage-multilingual-2
litellm_params:
model: "voyage/voyage-multilingual-2"
api_key: os.environ/VOYAGE_API_KEY
tags: ["paid"]

- model_name: voyage-large-2
litellm_params:
model: "voyage/voyage-large-2"
api_key: os.environ/VOYAGE_API_KEY
tags: ["paid"]

- model_name: gte-large-en-v1.5
litellm_params:
model: openai/Alibaba-NLP/gte-large-en-v1.5
api_base: os.environ/EMBEDDING_SERVICE_BASE
tags: ["free"]

- model_name: bge-m3
litellm_params:
model: openai/BAAI/bge-m3
api_base: os.environ/EMBEDDING_SERVICE_BASE
tags: ["free"]


# -*= Free models =*-
# -------------------

- model_name: gpt-4o-mini
litellm_params:
model: openai/gpt-4o-mini
api_key: os.environ/OPENAI_API_KEY
tags: ["free"]


# https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py
litellm_settings:
num_retries: 3
request_timeout: 180
allowed_fails: 3
cooldown_time: 30
drop_params: true
modify_params: true
telemetry: false
retry: true
add_function_to_prompt: true

set_verbose: false
cache: true
cache_params: # set cache params for redis
type: redis
namespace: "litellm_caching"
host: os.environ/LITELLM_REDIS_HOST
port: os.environ/LITELLM_REDIS_PORT
password: os.environ/LITELLM_REDIS_PASSWORD

router_settings:
routing_strategy: simple-shuffle
num_retries: 3

general_settings:
master_key: os.environ/LITELLM_MASTER_KEY
database_url: os.environ/LITELLM_DATABASE_URL
enforce_user_param: true

0 comments on commit 31a485e

Please sign in to comment.