Quansight · pmeier · Jan 31, 2024 · Jan 31, 2024 · Jan 31, 2024 · Jan 31, 2024
diff --git a/docs/references/faq.md b/docs/references/faq.md
@@ -39,3 +39,11 @@
    ```bash
    export MOSAICML_API_KEY="XXXXX"
    ```
+
+### [Google](https://ai.google.dev/)
+
+1. ADDME
+2. Set the `GOOGLE_API_KEY` environment variable with your Google API key:
+   ```bash
+   export GOOGLE_API_KEY="XXXXX"
+   ```
diff --git a/pyproject.toml b/pyproject.toml
@@ -25,7 +25,7 @@ dependencies = [
     "emoji",
     "fastapi",
     "httpx",
-    "httpx_sse",
+    "httpx-sse",
     "importlib_metadata>=4.6; python_version<'3.10'",
     "packaging",
     "panel>=1.3.6,<1.4",
@@ -56,6 +56,7 @@ Repository = "https://github.com/Quansight/ragna"
 # to update the array below, run scripts/update_optional_dependencies.py
 all = [
     "chromadb>=0.4.13",
+    "ijson",
     "lancedb>=0.2",
     "pyarrow",
     "pymupdf>=1.23.6",
@@ -139,12 +140,13 @@ disallow_incomplete_defs = false
 
 [[tool.mypy.overrides]]
 module = [
+    "docx",
     "fitz",
+    "ijson",
     "lancedb",
     "param",
-    "pyarrow",
-    "docx",
     "pptx",
+    "pyarrow",
     "sentence_transformers",
 ]
 ignore_missing_imports = true

diff --git a/ragna/_compat.py b/ragna/_compat.py
@@ -53,9 +53,22 @@ def _anext() -> Callable[[AsyncIterator[T]], Awaitable[T]]:
     if sys.version_info[:2] >= (3, 10):
         anext = builtins.anext
     else:
-
-        async def anext(ait: AsyncIterator[T]) -> T:
-            return await ait.__anext__()
+        sentinel = object()
+
+        def anext(
+            ait: AsyncIterator[T],
+            default: T = sentinel,  # type: ignore[assignment]
+        ) -> Awaitable[T]:
+            if default is sentinel:
+                return ait.__anext__()
+
+            async def anext_with_default() -> T:
+                try:
+                    return await ait.__anext__()
+                except StopAsyncIteration:
+                    return default
+
+            return anext_with_default()
 
     return anext
 

diff --git a/ragna/assistants/__init__.py b/ragna/assistants/__init__.py
@@ -1,6 +1,8 @@
 __all__ = [
     "Claude",
     "ClaudeInstant",
+    "GeminiPro",
+    "GeminiUltra",
     "Gpt35Turbo16k",
     "Gpt4",
     "Mpt7bInstruct",
@@ -10,6 +12,7 @@
 
 from ._anthropic import Claude, ClaudeInstant
 from ._demo import RagnaDemoAssistant
+from ._google import GeminiPro, GeminiUltra
 from ._mosaicml import Mpt7bInstruct, Mpt30bInstruct
 from ._openai import Gpt4, Gpt35Turbo16k
 

diff --git a/ragna/assistants/_api.py b/ragna/assistants/_api.py
@@ -17,8 +17,7 @@ def requirements(cls) -> list[Requirement]:
 
     def __init__(self) -> None:
         self._client = httpx.AsyncClient(
-            headers={"User-Agent": f"{ragna.__version__}/{self}"},
-            timeout=60,
+            headers={"User-Agent": f"{ragna.__version__}/{self}"}, timeout=60
         )
         self._api_key = os.environ[self._API_KEY_ENV_VAR]
 

diff --git a/ragna/assistants/_google.py b/ragna/assistants/_google.py
@@ -0,0 +1,117 @@
+from typing import AsyncIterator
+
+from ragna._compat import anext
+from ragna.core import PackageRequirement, Requirement, Source
+
+from ._api import ApiAssistant
+
+
+class AsyncIteratorReader:
+    def __init__(self, ait: AsyncIterator[bytes]) -> None:
+        self._ait = ait
+
+    async def read(self, n: int) -> bytes:
+        if n == 0:
+            return b""
+        return await anext(self._ait, b"")  # type: ignore[call-arg]
+
+
+class GoogleApiAssistant(ApiAssistant):
+    _API_KEY_ENV_VAR = "GOOGLE_API_KEY"
+    _MODEL: str
+    _CONTEXT_SIZE: int
+
+    @classmethod
+    def requirements(cls) -> list[Requirement]:
+        return [
+            *super().requirements(),
+            PackageRequirement("ijson"),
+        ]
+
+    @classmethod
+    def display_name(cls) -> str:
+        return f"Google/{cls._MODEL}"
+
+    @property
+    def max_input_size(self) -> int:
+        return self._CONTEXT_SIZE
+
+    def _instructize_prompt(self, prompt: str, sources: list[Source]) -> str:
+        # https://ai.google.dev/docs/prompt_best_practices#add-contextual-information
+        return "\n".join(
+            [
+                "Answer the prompt using only the pieces of context below.",
+                "If you don't know the answer, just say so. Don't try to make up additional context.",
+                f"Prompt: {prompt}",
+                *[f"\n{source.content}" for source in sources],
+            ]
+        )
+
+    async def _call_api(
+        self, prompt: str, sources: list[Source], *, max_new_tokens: int
+    ) -> AsyncIterator[str]:
+        import ijson
+
+        async with self._client.stream(
+            "POST",
+            f"https://generativelanguage.googleapis.com/v1beta/models/{self._MODEL}:streamGenerateContent",
+            params={"key": self._api_key},
+            headers={"Content-Type": "application/json"},
+            json={
+                "contents": [
+                    {"parts": [{"text": self._instructize_prompt(prompt, sources)}]}
+                ],
+                # https://ai.google.dev/docs/safety_setting_gemini
+                "safetySettings": [
+                    {"category": f"HARM_CATEGORY_{category}", "threshold": "BLOCK_NONE"}
+                    for category in [
+                        "HARASSMENT",
+                        "HATE_SPEECH",
+                        "SEXUALLY_EXPLICIT",
+                        "DANGEROUS_CONTENT",
+                    ]
+                ],
+                # https://ai.google.dev/tutorials/rest_quickstart#configuration
+                "generationConfig": {
+                    "temperature": 0.0,
 "temperature": 0.0, 
 "parameters": {"temperature": 0.0, "max_new_tokens": max_new_tokens}, 
 "temperature": 0.0, 
 "temperature": 0.0, 
 "parameters": {"temperature": 0.0, "max_new_tokens": max_new_tokens}, 
 "temperature": 0.0, 
+                    "maxOutputTokens": max_new_tokens,
+                },
+            },
+        ) as response:
+            async for chunk in ijson.items(
+                AsyncIteratorReader(response.aiter_bytes(1024)),
+                "item.candidates.item.content.parts.item.text",
+            ):
+                yield chunk
+
+
+class GeminiPro(GoogleApiAssistant):
+    """[Google Gemini Pro](https://ai.google.dev/models/gemini)
+
+    !!! info "Required environment variables"
+
+        - `GOOGLE_API_KEY`
+
+    !!! info "Required packages"
+
+        - `ijson`
+    """
+
+    _MODEL = "gemini-pro"
+    _CONTEXT_SIZE = 30_720
+
+
+class GeminiUltra(GoogleApiAssistant):
+    """[Google Gemini Ultra](https://ai.google.dev/models/gemini)
+
+    !!! info "Required environment variables"
+
+        - `GOOGLE_API_KEY`
+
+    !!! info "Required packages"
+
+        - `ijson`
+    """
+
+    _MODEL = "gemini-ultra"
+    _CONTEXT_SIZE = 30_720
diff --git a/ragna/deploy/_ui/api_wrapper.py b/ragna/deploy/_ui/api_wrapper.py
@@ -18,7 +18,7 @@ class ApiWrapper(param.Parameterized):
     auth_token = param.String(default=None)
 
     def __init__(self, api_url, **params):
-        self.client = httpx.AsyncClient(base_url=api_url)
+        self.client = httpx.AsyncClient(base_url=api_url, timeout=60)
 self._client = httpx.AsyncClient( 
     headers={"User-Agent": f"{ragna.__version__}/{self}"}, 
     timeout=60, 
 ) 
 self._client = httpx.AsyncClient( 
     headers={"User-Agent": f"{ragna.__version__}/{self}"}, 
     timeout=60, 
 ) 
 
         super().__init__(**params)
 

diff --git a/requirements-docker.lock b/requirements-docker.lock
@@ -109,6 +109,8 @@ idna==3.6
     #   anyio
     #   httpx
     #   requests
+ijson==3.2.3
+    # via Ragna (pyproject.toml)
 importlib-metadata==6.11.0
     # via opentelemetry-api
 importlib-resources==6.1.1