Skip to content

Commit

Permalink
tei update, .env update, type fix, ellipsis fix
Browse files Browse the repository at this point in the history
  • Loading branch information
alt-glitch committed Jul 2, 2024
1 parent debe630 commit e7ab98f
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 20 deletions.
3 changes: 1 addition & 2 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ COZO_PORT=9070
COZO_ROCKSDB_DIR=cozo.db
DTYPE=float16
EMBEDDING_SERVICE_URL=http://text-embeddings-inference/embed
DOCS_EMBEDDING_SERVICE_URL=http://docs-text-embeddings-inference/embed
GATEWAY_PORT=80
GPU_MEMORY_UTILIZATION=0.90

Expand All @@ -22,7 +21,7 @@ MODEL_API_KEY=myauthkey
MODEL_API_KEY_HEADER_NAME=Authorization
MODEL_API_URL=http://model-serving:8000
MODEL_INFERENCE_URL=http://model-serving:8000/v1
MODEL_ID=BAAI/llm-embedder
MODEL_ID=BAAI/bge-m3

# MODEL_NAME="OpenPipe/Hermes-2-Theta-Llama-3-8B-32k"
MODEL_NAME="julep-ai/Hermes-2-Theta-Llama-3-8B"
Expand Down
2 changes: 1 addition & 1 deletion agents-api/agents_api/autogen/openapi_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -837,7 +837,7 @@ class ImageUrl(BaseModel):
"""
URL or base64 data url (e.g. `data:image/jpeg;base64,<the base64 encoded image>`)
"""
detail: Detail | None = "auto"
detail: Detail | None = "auto" # pytype: disable=annotation-type-mismatch
"""
image detail to feed into the model can be low | high | auto
"""
Expand Down
20 changes: 10 additions & 10 deletions agents-api/agents_api/model_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,16 @@
"""

import ast
import datetime
import json
import os
from typing import Dict
from agents_api.clients.worker.types import ChatML
from agents_api.common.exceptions.agents import (
AgentModelNotValid,
MissingAgentModelAPIKeyError,
)
import litellm
from litellm.utils import get_valid_models
import yaml
from pydantic import BaseModel
from typing import List, Dict, Literal, Optional
from typing import Dict, Literal, Optional
import xml.etree.ElementTree as ET


Expand Down Expand Up @@ -108,7 +104,7 @@
"TinyLlama/TinyLlama_v1.1": 2048,
"casperhansen/llama-3-8b-instruct-awq": 8192,
"julep-ai/Hermes-2-Theta-Llama-3-8B": 8192,
"OpenPipe/Hermes-2-Theta-Llama-3-8B-32k": 32768
"OpenPipe/Hermes-2-Theta-Llama-3-8B-32k": 32768,
}

LOCAL_MODELS_WITH_TOOL_CALLS = {
Expand All @@ -121,6 +117,7 @@
ALL_AVAILABLE_MODELS = litellm.model_list + list(LOCAL_MODELS.keys())
VALID_MODELS = get_valid_models() + list(LOCAL_MODELS.keys())


class FunctionCall(BaseModel):
arguments: dict
"""
Expand All @@ -144,6 +141,7 @@ class FunctionSignature(BaseModel):
function: FunctionDefinition
type: Literal["function"]


class PromptSchema(BaseModel):
Role: str
Objective: str
Expand Down Expand Up @@ -208,10 +206,12 @@ def validate_and_extract_tool_calls(assistant_content):
# Fallback to ast.literal_eval if json.loads fails
json_data = ast.literal_eval(json_text)
except (SyntaxError, ValueError) as eval_err:
error_message = f"JSON parsing failed with both json.loads and ast.literal_eval:\n"\
f"- JSON Decode Error: {json_err}\n"\
f"- Fallback Syntax/Value Error: {eval_err}\n"\
f"- Problematic JSON text: {json_text}"
error_message = (
f"JSON parsing failed with both json.loads and ast.literal_eval:\n"
f"- JSON Decode Error: {json_err}\n"
f"- Fallback Syntax/Value Error: {eval_err}\n"
f"- Problematic JSON text: {json_text}"
)
continue
except Exception as e:
error_message = f"Cannot strip text: {e}"
Expand Down
14 changes: 8 additions & 6 deletions agents-api/agents_api/routers/sessions/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,8 @@
from ...model_registry import (
LOCAL_MODELS,
LOCAL_MODELS_WITH_TOOL_CALLS,
get_extra_settings,
load_context,
validate_and_extract_tool_calls
validate_and_extract_tool_calls,
)
from ...models.entry.add_entries import add_entries_query
from ...models.entry.proc_mem_context import proc_mem_context_query
Expand Down Expand Up @@ -398,7 +397,6 @@ async def forward(
if session_data is not None:
settings.model = session_data.model


return messages, settings, doc_ids

@cache
Expand Down Expand Up @@ -436,9 +434,13 @@ async def generate(
api_key=api_key,
)
if model in LOCAL_MODELS_WITH_TOOL_CALLS:
validation, tool_call, error_msg = validate_and_extract_tool_calls(res.choices[0].message.content)
if (validation):
res.choices[0].message.role = "function_call" if tool_call else "assistant"
validation, tool_call, error_msg = validate_and_extract_tool_calls(
res.choices[0].message.content
)
if validation:
res.choices[0].message.role = (
"function_call" if tool_call else "assistant"
)
res.choices[0].finish_reason = "tool_calls"
res.choices[0].message.tool_calls = tool_call
res.choices[0].message.content = json.dumps(tool_call)
Expand Down
2 changes: 1 addition & 1 deletion agents-api/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ services:
- DTYPE=float16
- MODEL_ID=BAAI/bge-m3

image: ghcr.io/huggingface/text-embeddings-inference:1.0
image: ghcr.io/huggingface/text-embeddings-inference:1.3
ports:
- "8082:80"
volumes:
Expand Down
1 change: 1 addition & 0 deletions model-serving/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ ENV MAX_MODEL_LEN 8192
ENV MAX_NUM_SEQS 1
ENV GPU_MEMORY_UTILIZATION 0.95
ENV DTYPE bfloat16
ENV MODEL_API_KEY myauthkey
ENTRYPOINT python3 -m vllm.entrypoints.openai.api_server --model $MODEL_NAME --tensor-parallel-size $TP_SIZE --enforce-eager --gpu-memory-utilization $GPU_MEMORY_UTILIZATION --max-model-len $MAX_MODEL_LEN --max-num-seqs $MAX_NUM_SEQS --dtype $DTYPE --trust-remote-code --api_key=$MODEL_API_KEY

0 comments on commit e7ab98f

Please sign in to comment.