tei update, .env update, type fix, ellipsis fix

julep-ai · Jul 2, 2024 · e7ab98f · e7ab98f
1 parent debe630
commit e7ab98f
Show file tree

Hide file tree

Showing 6 changed files with 22 additions and 20 deletions.
diff --git a/.env.example b/.env.example
@@ -7,7 +7,6 @@ COZO_PORT=9070
 COZO_ROCKSDB_DIR=cozo.db
 DTYPE=float16
 EMBEDDING_SERVICE_URL=http://text-embeddings-inference/embed
-DOCS_EMBEDDING_SERVICE_URL=http://docs-text-embeddings-inference/embed
 GATEWAY_PORT=80
 GPU_MEMORY_UTILIZATION=0.90
 
@@ -22,7 +21,7 @@ MODEL_API_KEY=myauthkey
 MODEL_API_KEY_HEADER_NAME=Authorization
 MODEL_API_URL=http://model-serving:8000
 MODEL_INFERENCE_URL=http://model-serving:8000/v1
-MODEL_ID=BAAI/llm-embedder
+MODEL_ID=BAAI/bge-m3
 
 # MODEL_NAME="OpenPipe/Hermes-2-Theta-Llama-3-8B-32k"
 MODEL_NAME="julep-ai/Hermes-2-Theta-Llama-3-8B"

diff --git a/agents-api/agents_api/autogen/openapi_model.py b/agents-api/agents_api/autogen/openapi_model.py
@@ -837,7 +837,7 @@ class ImageUrl(BaseModel):
     """
     URL or base64 data url (e.g. `data:image/jpeg;base64,<the base64 encoded image>`)
     """
-    detail: Detail | None = "auto"
+    detail: Detail | None = "auto"  # pytype: disable=annotation-type-mismatch
     """
     image detail to feed into the model can be low | high | auto
     """

diff --git a/agents-api/agents_api/model_registry.py b/agents-api/agents_api/model_registry.py
@@ -3,20 +3,16 @@
 """
 
 import ast
-import datetime
 import json
-import os
-from typing import Dict
 from agents_api.clients.worker.types import ChatML
 from agents_api.common.exceptions.agents import (
     AgentModelNotValid,
     MissingAgentModelAPIKeyError,
 )
 import litellm
 from litellm.utils import get_valid_models
-import yaml
 from pydantic import BaseModel
-from typing import List, Dict, Literal, Optional
+from typing import Dict, Literal, Optional
 import xml.etree.ElementTree as ET
 
 
@@ -108,7 +104,7 @@
     "TinyLlama/TinyLlama_v1.1": 2048,
     "casperhansen/llama-3-8b-instruct-awq": 8192,
     "julep-ai/Hermes-2-Theta-Llama-3-8B": 8192,
-    "OpenPipe/Hermes-2-Theta-Llama-3-8B-32k": 32768
+    "OpenPipe/Hermes-2-Theta-Llama-3-8B-32k": 32768,
 }
 
 LOCAL_MODELS_WITH_TOOL_CALLS = {
@@ -121,6 +117,7 @@
 ALL_AVAILABLE_MODELS = litellm.model_list + list(LOCAL_MODELS.keys())
 VALID_MODELS = get_valid_models() + list(LOCAL_MODELS.keys())
 
+
 class FunctionCall(BaseModel):
     arguments: dict
     """
@@ -144,6 +141,7 @@ class FunctionSignature(BaseModel):
     function: FunctionDefinition
     type: Literal["function"]
 
+
 class PromptSchema(BaseModel):
     Role: str
     Objective: str
@@ -208,10 +206,12 @@ def validate_and_extract_tool_calls(assistant_content):
                         # Fallback to ast.literal_eval if json.loads fails
                         json_data = ast.literal_eval(json_text)
                     except (SyntaxError, ValueError) as eval_err:
-                        error_message = f"JSON parsing failed with both json.loads and ast.literal_eval:\n"\
-                                        f"- JSON Decode Error: {json_err}\n"\
-                                        f"- Fallback Syntax/Value Error: {eval_err}\n"\
-                                        f"- Problematic JSON text: {json_text}"
+                        error_message = (
+                            f"JSON parsing failed with both json.loads and ast.literal_eval:\n"
+                            f"- JSON Decode Error: {json_err}\n"
+                            f"- Fallback Syntax/Value Error: {eval_err}\n"
+                            f"- Problematic JSON text: {json_text}"
+                        )
                         continue
             except Exception as e:
                 error_message = f"Cannot strip text: {e}"

diff --git a/agents-api/agents_api/routers/sessions/session.py b/agents-api/agents_api/routers/sessions/session.py
@@ -31,9 +31,8 @@
 from ...model_registry import (
     LOCAL_MODELS,
     LOCAL_MODELS_WITH_TOOL_CALLS,
-    get_extra_settings,
     load_context,
-    validate_and_extract_tool_calls
+    validate_and_extract_tool_calls,
 )
 from ...models.entry.add_entries import add_entries_query
 from ...models.entry.proc_mem_context import proc_mem_context_query
@@ -398,7 +397,6 @@ async def forward(
         if session_data is not None:
             settings.model = session_data.model
 
-
         return messages, settings, doc_ids
 
     @cache
@@ -436,9 +434,13 @@ async def generate(
             api_key=api_key,
         )
         if model in LOCAL_MODELS_WITH_TOOL_CALLS:
-            validation, tool_call, error_msg = validate_and_extract_tool_calls(res.choices[0].message.content)
-            if (validation):
-                res.choices[0].message.role = "function_call" if tool_call else "assistant"
+            validation, tool_call, error_msg = validate_and_extract_tool_calls(
+                res.choices[0].message.content
+            )
+            if validation:
+                res.choices[0].message.role = (
+                    "function_call" if tool_call else "assistant"
+                )
                 res.choices[0].finish_reason = "tool_calls"
                 res.choices[0].message.tool_calls = tool_call
                 res.choices[0].message.content = json.dumps(tool_call)

diff --git a/agents-api/docker-compose.yml b/agents-api/docker-compose.yml
@@ -61,7 +61,7 @@ services:
       - DTYPE=float16
       - MODEL_ID=BAAI/bge-m3
 
-    image: ghcr.io/huggingface/text-embeddings-inference:1.0
+    image: ghcr.io/huggingface/text-embeddings-inference:1.3
     ports:
       - "8082:80"
     volumes:

diff --git a/model-serving/Dockerfile b/model-serving/Dockerfile
@@ -8,4 +8,5 @@ ENV MAX_MODEL_LEN 8192
 ENV MAX_NUM_SEQS 1
 ENV GPU_MEMORY_UTILIZATION 0.95
 ENV DTYPE bfloat16
+ENV MODEL_API_KEY myauthkey
 ENTRYPOINT python3 -m vllm.entrypoints.openai.api_server --model $MODEL_NAME --tensor-parallel-size $TP_SIZE --enforce-eager --gpu-memory-utilization $GPU_MEMORY_UTILIZATION --max-model-len $MAX_MODEL_LEN --max-num-seqs $MAX_NUM_SEQS --dtype $DTYPE --trust-remote-code --api_key=$MODEL_API_KEY