pinecone-io · igiloh-pinecone · Jan 15, 2024 · Nov 16, 2023 · Nov 27, 2023 · Nov 27, 2023
diff --git a/config/config.yaml b/config/config.yaml
@@ -46,7 +46,7 @@ chat_engine:
   # Configuration of the LLM (Large Language Model)
   # -------------------------------------------------------------------------------------------------------------
   llm: &llm
-    type: OpenAILLM                     # Options: [OpenAILLM]
+    type: OpenAILLM                     # Options: [OpenAILLM, AzureOpenAILLM]
     params:
       model_name: gpt-3.5-turbo         # The name of the model to use.
       # You can add any additional parameters which are supported by the LLM's `ChatCompletion()` API. The values set
@@ -115,7 +115,7 @@ chat_engine:
       # The record encoder is responsible for encoding document chunks to a vector representation
       # --------------------------------------------------------------------------
       record_encoder:
-        type: OpenAIRecordEncoder       # Options: [OpenAIRecordEncoder]
+        type: OpenAIRecordEncoder       # Options: [OpenAIRecordEncoder, AzureOpenAIRecordEncoder]
         params:
           model_name:                   # The name of the model to use for encoding
             text-embedding-ada-002

diff --git a/src/canopy/knowledge_base/record_encoder/__init__.py b/src/canopy/knowledge_base/record_encoder/__init__.py
@@ -1,3 +1,3 @@
 from .base import RecordEncoder
 from .dense import DenseRecordEncoder
-from .openai import OpenAIRecordEncoder
+from .openai import OpenAIRecordEncoder, AzureOpenAIRecordEncoder
diff --git a/src/canopy/knowledge_base/record_encoder/openai.py b/src/canopy/knowledge_base/record_encoder/openai.py
@@ -1,5 +1,5 @@
 from typing import List
-from pinecone_text.dense.openai_encoder import OpenAIEncoder
+from pinecone_text.dense.openai_encoder import OpenAIEncoder, AzureOpenAIEncoder
 from canopy.knowledge_base.models import KBDocChunk, KBEncodedDocChunk, KBQuery
 from canopy.knowledge_base.record_encoder.dense import DenseRecordEncoder
 from canopy.models.data_models import Query
@@ -49,3 +49,49 @@ async def _aencode_documents_batch(self,
 
     async def _aencode_queries_batch(self, queries: List[Query]) -> List[KBQuery]:
         raise NotImplementedError
+
+
+class AzureOpenAIRecordEncoder(DenseRecordEncoder):
+    """
+    OpenAIRecordEncoder is a type of DenseRecordEncoder that uses the OpenAI `embeddings` API.
+    The implementation uses the `OpenAIEncoder` class from the `pinecone-text` library.
+    For more information about see: https://github.com/pinecone-io/pinecone-text
+
+    """  # noqa: E501
+
+    def __init__(self,
+                 *,
+                 model_name: str = "text-embedding-ada-002",
+                 batch_size: int = 400,
+                 **kwargs):
+        """
+        Initialize the OpenAIRecordEncoder
+
+        Args:
+            model_name: The name of the OpenAI embeddings model to use for encoding. See https://platform.openai.com/docs/models/embeddings
+            batch_size: The number of documents or queries to encode at once.
+                        Defaults to 400.
+            **kwargs: Additional arguments to pass to the underlying `pinecone-text. OpenAIEncoder`.
+        """  # noqa: E501
+        encoder = AzureOpenAIEncoder(model_name, **kwargs)
+        super().__init__(dense_encoder=encoder, batch_size=batch_size)
+
+    def encode_documents(self, documents: List[KBDocChunk]) -> List[KBEncodedDocChunk]:
+        """
+        Encode a list of documents, takes a list of KBDocChunk and returns a list of KBEncodedDocChunk.
+
+        Args:
+            documents: A list of KBDocChunk to encode.
+
+        Returns:
+            encoded chunks: A list of KBEncodedDocChunk, with the `values` field populated by the generated embeddings vector.
+        """  # noqa: E501
+        return super().encode_documents(documents)
+
+    async def _aencode_documents_batch(self,
+                                       documents: List[KBDocChunk]
+                                       ) -> List[KBEncodedDocChunk]:
+        raise NotImplementedError
+
+    async def _aencode_queries_batch(self, queries: List[Query]) -> List[KBQuery]:
+        raise NotImplementedError
diff --git a/src/canopy/llm/openai.py b/src/canopy/llm/openai.py
@@ -4,6 +4,7 @@
 import jsonschema
 import openai
 import json
+import os
 
 from openai.types.chat import ChatCompletionToolParam
 from tenacity import (
@@ -206,3 +207,135 @@ async def agenerate_queries(self,
                                 model_params: Optional[dict] = None,
                                 ) -> List[Query]:
         raise NotImplementedError()
+
+
+class AzureOpenAILLM(OpenAILLM):
+    """
+    Azure OpenAI LLM wrapper built on top of the OpenAI Python client.
+
+    Note: OpenAI requires a valid API key to use this class.
+          You can set the "AZURE_OPENAI_KEY" environment variable to your API key.
+          Or you can directly set it as follows:
+          >>> import openai
+          >>> openai.api_key = "YOUR_API_KEY"
+    """
+    def __init__(self,
+                 model_name: str = "gpt-3.5-turbo",
+                 *,
+                 api_key: Optional[str] = None,
+                 base_url: Optional[str] = None,
+                 **kwargs: Any,
+                 ):
+        """
+        Initialize the Azure OpenAI LLM.
+
+        Args:
+            model_name: The name of the model to use. See https://platform.openai.com/docs/models
+            api_key: Your OpenAI API key. Defaults to None (uses the "OPENAI_API_KEY" environment variable).
+            organization: Your OpenAI organization. Defaults to None (uses the "OPENAI_ORG" environment variable if set, otherwise uses the "default" organization).
+            base_url: The base URL to use for the OpenAI API. Defaults to None (uses the default OpenAI API URL).
+            **kwargs: Generation default parameters to use for each request. See https://platform.openai.com/docs/api-reference/chat/create
+                    For example, you can set the temperature, top_p etc
+                    These params can be overridden by passing a `model_params` argument to the `chat_completion` or `enforced_function_call` methods.
+        """  # noqa: E501
+        super().__init__(model_name)
+        self._client = openai.AzureOpenAI(
+            api_key=api_key or os.getenv("OPENAI_API_KEY"),
+            api_version="2023-10-01-preview",
+            azure_endpoint=base_url or os.getenv("OPENAI_BASE_URL")
+        )
+        self.default_model_params = kwargs
+
+    @retry(
+        reraise=True,
+        stop=stop_after_attempt(3),
+        retry=retry_if_exception_type(
+            (json.decoder.JSONDecodeError,
+             jsonschema.ValidationError)
+        ),
+    )
+    def enforced_function_call(self,
+                               messages: Messages,
+                               function: Function,
+                               *,
+                               max_tokens: Optional[int] = None,
+                               model_params: Optional[dict] = None,) -> dict:
+        """
+        This function enforces the model to respond with a specific function call.
+
+        To read more about this feature, see: https://platform.openai.com/docs/guides/gpt/function-calling
+
+        Note: this function is wrapped in a retry decorator to handle transient errors.
+
+        Args:
+            messages: Messages (chat history) to send to the model.
+            function: Function to call. See canopy.llm.models.Function for more details.
+            max_tokens: Maximum number of tokens to generate. Defaults to None (generates until stop sequence or until hitting max context size).
+            model_params: Model parameters to use for this request. Defaults to None (uses the default model parameters).
+                          Overrides the default model parameters if set on initialization.
+                          For example, you can pass: {"temperature": 0.9, "top_p": 1.0} to override the default temperature and top_p.
+                          see: https://platform.openai.com/docs/api-reference/chat/create
+
+        Returns:
+            dict: Function call arguments as a dictionary.
+
+        Usage:
+            >>> from canopy.llm import OpenAILLM
+            >>> from canopy.llm.models import Function, FunctionParameters, FunctionArrayProperty
+            >>> from canopy.models.data_models import UserMessage
+            >>> llm = OpenAILLM()
+            >>> messages = [UserMessage(content="I was wondering what is the capital of France?")]
+            >>> function = Function(
+            ...     name="query_knowledgebase",
+            ...     description="Query search engine for relevant information",
+            ...     parameters=FunctionParameters(
+            ...         required_properties=[
+            ...             FunctionArrayProperty(
+            ...                 name="queries",
+            ...                 items_type="string",
+            ...                 description='List of queries to send to the search engine.',
+            ...             ),
+            ...         ]
+            ...     )
+            ... )
+            >>> llm.enforced_function_call(messages, function)
+            {'queries': ['capital of France']}
+        """  # noqa: E501
+
+        model_params_dict: Dict[str, Any] = deepcopy(self.default_model_params)
+        model_params_dict.update(
+            model_params or {}
+        )
+
+        function_dict = cast(ChatCompletionToolParam, function.dict())
+
+        chat_completion = self._client.chat.completions.create(
+            model=self.model_name,
+            messages=[m.dict() for m in messages],
+            functions=[function_dict],
+            function_call={"name": function.name},
+            max_tokens=max_tokens,
+            **model_params_dict
+        )
+
+        result = chat_completion.choices[0].message.function_call.arguments
+        arguments = json.loads(result)
+
+        jsonschema.validate(instance=arguments, schema=function.parameters.dict())
+        return arguments
+
+    async def achat_completion(self,
+                               messages: Messages, *, stream: bool = False,
+                               max_generated_tokens: Optional[int] = None,
+                               model_params: Optional[dict] = None,
+                               ) -> Union[ChatResponse,
+                                          Iterable[StreamingChatChunk]]:
+        raise NotImplementedError()
+
+    async def agenerate_queries(self,
+                                messages: Messages,
+                                *,
+                                max_generated_tokens: Optional[int] = None,
+                                model_params: Optional[dict] = None,
+                                ) -> List[Query]:
+        raise NotImplementedError()
diff --git a/src/canopy_cli/cli.py b/src/canopy_cli/cli.py
@@ -401,6 +401,8 @@ def _chat(
     click.echo(click.style(f"\n {speaker}:\n", fg=speaker_color))
     if stream:
         for chunk in openai_response:
+            if not chunk.choices:
+                continue
             openai_response_id = chunk.id
             intenal_model = chunk.model
             text = chunk.choices[0].delta.content or ""

diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py
@@ -44,7 +44,7 @@
     ContextResponse,
 )
 
-from canopy.llm.openai import OpenAILLM
+from canopy.llm.openai import OpenAILLM, AzureOpenAILLM
 from canopy_cli.errors import ConfigError
 from canopy import __version__
 
@@ -332,7 +332,15 @@ def _init_engines():
         Tokenizer.initialize()
         kb = KnowledgeBase(index_name=index_name)
         context_engine = ContextEngine(knowledge_base=kb)
-        llm = OpenAILLM()
+        if llm == "OpenAILLM":
+            llm = OpenAILLM()
+        elif llm == "AzureOpenAILLM":
+            llm = AzureOpenAILLM(
+                api_key=os.getenv("OPENAI_API_KEY"),
+                base_url=os.getenv("OPENAI_BASE_URL"),
+            )
+        else:
+            raise ValueError("Config issue: LLM must be either OpenAILLM or AzureOpenAILLM")
         chat_engine = ChatEngine(context_engine=context_engine, llm=llm)
 
     kb.connect()