Merge pull request #1527 from h2oai/guided_json

guided_json for vllm and openai use
h2oai · Apr 3, 2024 · 84fcce1 · 84fcce1
2 parents 0254827 + 4832389
commit 84fcce1
Show file tree

Hide file tree

Showing 26 changed files with 913 additions and 101 deletions.
diff --git a/gradio_utils/grclient.py b/gradio_utils/grclient.py
@@ -637,6 +637,13 @@ def query_or_summarize_or_extract(
         metadata_in_context: list = [],
         image_file: Union[str, list] = None,
         image_control: str = None,
+
+        response_format: str = 'text',
+        guided_json: Union[str, dict] = '',
+        guided_regex: str = '',
+        guided_choice: str = '',
+        guided_grammar: str = '',
+
         prompt_type: Union[int, str] = None,
         prompt_dict: Dict = None,
         jq_schema=".[]",
@@ -770,6 +777,13 @@ def query_or_summarize_or_extract(
             :param image_file: Initial image for UI (or actual image for CLI) Vision Q/A.  Or list of images for some models
             :param image_control: Initial image for UI Image Control
 
+            :param response_format: json_object or text
+            # https://github.com/vllm-project/vllm/blob/a3c226e7eb19b976a937e745f3867eb05f809278/vllm/entrypoints/openai/protocol.py#L117-L135
+            :param guided_json:
+            :param guided_regex:
+            :param guided_choice:
+            :param guided_grammar:
+
             :param prompt_type: type of prompt, usually matched to fine-tuned model or plain for foundational model
             :param prompt_dict: If prompt_type=custom, then expects (some) items returned by get_prompt(..., return_dict=True)
 

diff --git a/openai_server/backend.py b/openai_server/backend.py
@@ -143,6 +143,11 @@ def get_response(instruction, gen_kwargs, verbose=False, chunk_response=True, st
         # presence_penalty=(repetition_penalty - 1.0) * 2.0 + 0.0,  # so good default
         gen_kwargs['repetition_penalty'] = 0.5 * (gen_kwargs['presence_penalty'] - 0.0) + 1.0
 
+    if gen_kwargs.get('response_format'):
+        # pydantic ensures type and key
+        # transcribe to h2oGPT way of just value
+        gen_kwargs['response_format'] = gen_kwargs.get('response_format')['type']
+
     kwargs.update(**gen_kwargs)
 
     # concurrent gradio client

diff --git a/openai_server/server.py b/openai_server/server.py
@@ -7,7 +7,7 @@
 from threading import Thread
 import time
 from traceback import print_exception
-from typing import List, Dict
+from typing import List, Dict, Optional, Literal, Union
 from pydantic import BaseModel, Field
 
 import uvicorn
@@ -35,6 +35,12 @@ class Generation(BaseModel):
     min_p: float | None = 0.0
 
 
+class ResponseFormat(BaseModel):
+    # type must be "json_object" or "text"
+    type: str = Literal["text", "json_object"]
+
+
+# https://github.com/vllm-project/vllm/blob/a3c226e7eb19b976a937e745f3867eb05f809278/vllm/entrypoints/openai/protocol.py#L62
 class H2oGPTParams(BaseModel):
     # keep in sync with evaluate()
     # handled by extra_body passed to OpenAI API
@@ -76,8 +82,8 @@ class H2oGPTParams(BaseModel):
     jq_schema: List | None = None
     extract_frames: int | None = 10
     llava_prompt: str | None = 'auto'
-    #visible_models
-    #h2ogpt_key,
+    # visible_models
+    # h2ogpt_key,
     add_search_to_context: bool | None = False
 
     chat_conversation: List | None = []
@@ -102,6 +108,30 @@ class H2oGPTParams(BaseModel):
     image_file: str | None = None
     image_control: str | None = None
 
+    response_format: Optional[ResponseFormat] = Field(
+        default=None,
+        description=
+        ("Similar to chat completion, this parameter specifies the format of "
+         "output. Only {'type': 'json_object'} or {'type': 'text' } is "
+         "supported."),
+    )
+    guided_json: Optional[Union[str, dict, BaseModel]] = Field(
+        default=None,
+        description="If specified, the output will follow the JSON schema.",
+    )
+    guided_regex: Optional[str] = Field(
+        default=None,
+        description=("If specified, the output will follow the regex pattern."),
+    )
+    guided_choice: Optional[List[str]] = Field(
+        default=None,
+        description="If specified, the output will be exactly one of the choices.",
+    )
+    guided_grammar: Optional[str] = Field(
+        default=None,
+        description="If specified, the output will follow the context free grammar.",
+    )
+
 
 class Params(H2oGPTParams):
     # https://platform.openai.com/docs/api-reference/completions/create

diff --git a/reqs_optional/requirements_optional_langchain.txt b/reqs_optional/requirements_optional_langchain.txt
@@ -8,35 +8,31 @@ torch==2.2.1; sys_platform == "darwin" and platform_machine == "arm64"
 #langchain-core==0.1.10
 
 # optional for chat with PDF
-langchain==0.0.354
-langchain_experimental==0.0.47
-langchain-community==0.0.8
-langsmith==0.0.77
-langchain-core==0.1.6
+langchain==0.1.14
+langchain_experimental==0.0.56
+langchain-community==0.0.31
+langsmith==0.1.38
+langchain-core==0.1.38
+langchain-text-splitters==0.0.1
 
 pypdf>=3.17.1
 # avoid textract, requires old six
 #textract==1.6.5
 pypdfium2>=4.24.0
 
-
 # for HF embeddings
 sentence_transformers>=2.2.2,<2.3.0
 
 # optional: for OpenAI endpoint or embeddings (requires key)
-replicate==0.20.0
-anthropic==0.18.1
-langchain-anthropic==0.1.3
+replicate==0.25.1
+anthropic==0.21.3
+langchain-anthropic==0.1.4
 
 langchain-google-genai==1.0.1
 google-generativeai==0.4.1
 
-#langchain_mistralai==0.0.5
-#mistralai==0.1.6
-
-langchain_mistralai==0.0.2
-mistralai==0.0.8
-
+langchain_mistralai==0.1.0
+mistralai==0.1.8
 
 groq==0.4.2
 langchain-groq==0.0.1
@@ -87,7 +83,8 @@ jq>=1.4.1; platform_machine == "x86_64"
 pip-licenses>=4.3.0
 
 # weaviate vector db
-weaviate-client>=3.25.3
+# required for httpx for mistralai
+weaviate-client==3.26.2
 
 # vllm==0.2.2
 

diff --git a/src/audio_langchain.py b/src/audio_langchain.py
@@ -288,9 +288,8 @@ def lazy_parse(self, blob: Blob) -> Iterator[Document]:
 """
 from typing import List, Union, Any, Tuple
 
-import requests
 from langchain.docstore.document import Document
-from langchain.document_loaders import ImageCaptionLoader
+from langchain_community.document_loaders import ImageCaptionLoader
 
 from utils import get_device, NullContext, clear_torch_cache, have_use_faster, makedirs, get_gradio_tmp
 

diff --git a/src/cli.py b/src/cli.py
@@ -74,6 +74,12 @@ def run_cli(  # for local function:
         image_file=None,
         image_control=None,
 
+        response_format=None,
+        guided_json=None,
+        guided_regex=None,
+        guided_choice=None,
+        guided_grammar=None,
+
         # for evaluate kwargs
         captions_model=None,
         caption_loader=None,

diff --git a/src/client_test.py b/src/client_test.py
@@ -173,6 +173,13 @@ def get_args(prompt, prompt_type=None, chat=False, stream_output=False,
 
                          image_file=None,
                          image_control=None,
+
+                         response_format=None,
+                         guided_json=None,
+                         guided_regex=None,
+                         guided_choice=None,
+                         guided_grammar=None,
+
                          )
     diff = 0
     from evaluate_params import eval_func_param_names

diff --git a/src/enums.py b/src/enums.py
@@ -559,3 +559,7 @@ def gr_to_lg(image_audio_loaders,
 coqui_lock_name = 'coqui'
 
 split_google = "::::::::::"
+
+response_formats = ['json_object', 'text']
+
+invalid_json_str = '{}'
diff --git a/src/eval.py b/src/eval.py
@@ -93,6 +93,12 @@ def run_eval(  # for local function:
         image_file=None,
         image_control=None,
 
+        response_format=None,
+        guided_json=None,
+        guided_regex=None,
+        guided_choice=None,
+        guided_grammar=None,
+
         # for evaluate kwargs:
         captions_model=None,
         caption_loader=None,

diff --git a/src/evaluate_params.py b/src/evaluate_params.py
@@ -83,6 +83,12 @@
 
                          'image_file',
                          'image_control',
+
+                         'response_format',
+                         'guided_json',
+                         'guided_regex',
+                         'guided_choice',
+                         'guided_grammar',
                          ]
 
 # form evaluate defaults for submit_nochat_api