diff --git a/docs/components/embedders/models/voyageai.mdx b/docs/components/embedders/models/voyageai.mdx new file mode 100644 index 0000000000..37cf1306fb --- /dev/null +++ b/docs/components/embedders/models/voyageai.mdx @@ -0,0 +1,37 @@ +--- +title: VoyageAI +--- + +To use VoyageAI embedding models, set the `VOYAGE_API_KEY` environment variable. You can obtain the Voyage API key from the [VoyageAI Dashboard](https://dash.voyageai.com/api-keys). + +### Usage + +```python +import os +from mem0 import Memory + +os.environ["VOYAGE_API_KEY"] = "your_api_key" +os.environ["OPENAI_API_KEY"] = "your_api_key" # For LLM + +config = { + "embedder": { + "provider": "voyageai", + "config": { + "model": "voyage-3" + } + } +} + +m = Memory.from_config(config) +m.add("I'm visiting Paris", user_id="john") +``` + +### Config + +Here are the parameters available for configuring VoyageAI embedder: + +| Parameter | Description | Default Value | +| --- | --- | --- | +| `model` | The name of the embedding model to use | `voyage-3` | +| `embedding_dims` | Dimensions of the embedding model | `None`(which uses the [pre-defined default values](https://docs.voyageai.com/docs/embeddings) according to the selected model) | +| `api_key` | The VoyageAI API key | `None` | diff --git a/docs/components/embedders/overview.mdx b/docs/components/embedders/overview.mdx index bb356357a3..b62b979373 100644 --- a/docs/components/embedders/overview.mdx +++ b/docs/components/embedders/overview.mdx @@ -16,6 +16,7 @@ See the list of supported embedders below. + ## Usage diff --git a/mem0/embeddings/configs.py b/mem0/embeddings/configs.py index b35ffba414..4413893b38 100644 --- a/mem0/embeddings/configs.py +++ b/mem0/embeddings/configs.py @@ -13,7 +13,16 @@ class EmbedderConfig(BaseModel): @field_validator("config") def validate_config(cls, v, values): provider = values.data.get("provider") - if provider in ["openai", "ollama", "huggingface", "azure_openai", "gemini", "vertexai", "together"]: + if provider in [ + "openai", + "ollama", + "huggingface", + "azure_openai", + "gemini", + "vertexai", + "together", + "voyageai", + ]: return v else: raise ValueError(f"Unsupported embedding provider: {provider}") diff --git a/mem0/embeddings/voyageai.py b/mem0/embeddings/voyageai.py new file mode 100644 index 0000000000..eff31352f7 --- /dev/null +++ b/mem0/embeddings/voyageai.py @@ -0,0 +1,34 @@ +import os +from typing import Optional + +from voyageai import Client + +from mem0.configs.embeddings.base import BaseEmbedderConfig +from mem0.embeddings.base import EmbeddingBase + + +class VoyageAIEmbedding(EmbeddingBase): + def __init__(self, config: Optional[BaseEmbedderConfig] = None): + super().__init__(config) + + self.config.model = self.config.model or "voyage-3" + self.config.embedding_dims = self.config.embedding_dims + + api_key = self.config.api_key or os.getenv("VOYAGE_API_KEY") + self.client = Client(api_key=api_key) + + def embed(self, text): + """ + Get the embedding for the given text using VoyageAI. + + Args: + text (str): The text to embed. + + Returns: + list: The embedding vector. + """ + return self.client.embed( + texts=[text], + model=self.config.model, + output_dimension=self.config.embedding_dims, + ).embeddings[0] diff --git a/mem0/memory/utils.py b/mem0/memory/utils.py index f889abe97c..310e26bcb0 100644 --- a/mem0/memory/utils.py +++ b/mem0/memory/utils.py @@ -1,5 +1,3 @@ -import json - from mem0.configs.prompts import FACT_RETRIEVAL_PROMPT @@ -18,13 +16,14 @@ def parse_messages(messages): response += f"assistant: {msg['content']}\n" return response + def format_entities(entities): if not entities: return "" - + formatted_lines = [] for entity in entities: simplified = f"{entity['source']} -- {entity['relation'].upper()} -- {entity['destination']}" formatted_lines.append(simplified) - return "\n".join(formatted_lines) \ No newline at end of file + return "\n".join(formatted_lines) diff --git a/mem0/utils/factory.py b/mem0/utils/factory.py index bdff8fe234..2176dab811 100644 --- a/mem0/utils/factory.py +++ b/mem0/utils/factory.py @@ -45,6 +45,7 @@ class EmbedderFactory: "gemini": "mem0.embeddings.gemini.GoogleGenAIEmbedding", "vertexai": "mem0.embeddings.vertexai.VertexAIEmbedding", "together": "mem0.embeddings.together.TogetherEmbedding", + "voyageai": "mem0.embeddings.voyageai.VoyageAIEmbedding", } @classmethod diff --git a/tests/embeddings/test_voyageai_embeddings.py b/tests/embeddings/test_voyageai_embeddings.py new file mode 100644 index 0000000000..75b98f4ab6 --- /dev/null +++ b/tests/embeddings/test_voyageai_embeddings.py @@ -0,0 +1,44 @@ +from unittest.mock import Mock, patch + +import pytest + +from mem0.configs.embeddings.base import BaseEmbedderConfig +from mem0.embeddings.voyageai import VoyageAIEmbedding + + +@pytest.fixture +def mock_voyageai_client(): + with patch("mem0.embeddings.voyageai.Client") as mock_voyageai: + mock_client = Mock() + mock_voyageai.return_value = mock_client + yield mock_client + + +def test_embed_default_model(mock_voyageai_client): + config = BaseEmbedderConfig() + embedder = VoyageAIEmbedding(config) + mock_response = Mock() + mock_response.embeddings = [[0.1, 0.2, 0.3]] + mock_voyageai_client.embed.return_value = mock_response + + result = embedder.embed("Default embedder") + + mock_voyageai_client.embed.assert_called_once_with( + texts=["Default embedder"], model="voyage-3", output_dimension=None + ) + assert result == [0.1, 0.2, 0.3] + + +def test_embed_custom_model(mock_voyageai_client): + config = BaseEmbedderConfig(model="voyage-3-large", embedding_dims=2048) + embedder = VoyageAIEmbedding(config) + mock_response = Mock() + mock_response.embeddings = [[0.4, 0.5, 0.6]] + mock_voyageai_client.embed.return_value = mock_response + + result = embedder.embed("Custom embedder") + + mock_voyageai_client.embed.assert_called_once_with( + texts=["Custom embedder"], model="voyage-3-large", output_dimension=2048 + ) + assert result == [0.4, 0.5, 0.6]