From 6e6cd925209d39d65bd82d282dfa4f8ad932d89b Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Tue, 12 Mar 2024 10:34:05 +0100 Subject: [PATCH 1/8] Add support for voyageai module --- test/collection/test_config.py | 35 +++++++++++++ .../classes/config_named_vectors.py | 50 +++++++++++++++++++ .../collections/classes/config_vectorizers.py | 50 +++++++++++++++++++ 3 files changed, 135 insertions(+) diff --git a/test/collection/test_config.py b/test/collection/test_config.py index f789d39f8..37319ac63 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -270,6 +270,22 @@ def test_basic_config(): } }, ), + ( + Configure.Vectorizer.text2vec_voyageai( + vectorize_collection_name=False, + model="voyage-large-2", + truncate=False, + baseURL="https://voyage.made-up.com", + ), + { + "text2vec-voyageai": { + "vectorizeClassName": False, + "model": "voyage-large-2", + "baseURL": "https://voyage.made-up.com", + "truncate": False, + } + }, + ), ( Configure.Vectorizer.img2vec_neural( image_fields=["test"], @@ -1046,6 +1062,25 @@ def test_vector_config_flat_pq() -> None: } }, ), + ( + [ + Configure.NamedVectors.text2vec_voyageai( + name="test", source_properties=["prop"], truncate=True + ) + ], + { + "test": { + "vectorizer": { + "text2vec-voyageai": { + "properties": ["prop"], + "vectorizeClassName": True, + "truncate": True, + } + }, + "vectorIndexType": "hnsw", + } + }, + ), ( [ Configure.NamedVectors.img2vec_neural( diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index be765cbf1..7e954a6d5 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -28,6 +28,7 @@ _Text2VecOpenAIConfigCreate, _Text2VecPalmConfigCreate, _Text2VecTransformersConfigCreate, + _Text2VecVoyageConfigCreate, _VectorizerConfigCreate, AWSModel, AWSService, @@ -38,6 +39,7 @@ OpenAIModel, OpenAIType, Vectorizers, + VoyageModel, _map_multi2vec_fields, ) @@ -683,6 +685,54 @@ def text2vec_jinaai( vector_index_config=vector_index_config, ) + @staticmethod + def text2vec_voyageai( + name: str, + *, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + model: Optional[Union[VoyageModel, str]] = None, + baseURL: Optional[str] = None, + truncate: Optional[bool] = None, + ) -> _NamedVectorConfigCreate: + """Create a named vector using the `text2vec-jinaai` model. + + See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-jinaai) + for detailed usage. + + Arguments: + `name` + The name of the named vector. + `source_properties` + Which properties should be included when vectorizing. By default all text properties are included. + `vector_index_config` + The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default + `vectorize_collection_name` + Whether to vectorize the collection name. Defaults to `True`. + `model` + The model to use. Defaults to `None`, which uses the server-defined default. + See the + [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-voyageai#available-models) for more details. + `vectorize_collection_name` + Whether to vectorize the collection name. Defaults to `True`. + `baseURL` + The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. + `truncate` + Whether to truncate the input texts to fit within the context length. Defaults to `None`, which uses the server-defined default. + """ + return _NamedVectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecVoyageConfigCreate( + model=model, + vectorizeClassName=vectorize_collection_name, + baseURL=baseURL, + truncate=truncate, + ), + vector_index_config=vector_index_config, + ) + class _NamedVectorsUpdate: @staticmethod diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index 5aad11a1f..58e917cbc 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -24,6 +24,7 @@ "text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002" ] JinaModel: TypeAlias = Literal["jina-embeddings-v2-base-en", "jina-embeddings-v2-small-en"] +VoyageModel: TypeAlias = Literal["voyage-large-2, voyage-code-2, voyage-2"] AWSModel: TypeAlias = Literal[ "amazon.titan-embed-text-v1", "cohere.embed-english-v3", @@ -62,6 +63,8 @@ class Vectorizers(str, Enum): Weaviate module backed by Transformers text-based embedding models. `TEXT2VEC_JINAAI` Weaviate module backed by Jina AI text-based embedding models. + `TEXT2VEC_VOYAGEAI` + Weaviate module backed by Voyage AI text-based embedding models. `IMG2VEC_NEURAL` Weaviate module backed by a ResNet-50 neural network for images. `MULTI2VEC_CLIP` @@ -82,6 +85,7 @@ class Vectorizers(str, Enum): TEXT2VEC_PALM = "text2vec-palm" TEXT2VEC_TRANSFORMERS = "text2vec-transformers" TEXT2VEC_JINAAI = "text2vec-jinaai" + TEXT2VEC_VOYAGEAI = "text2vec-voyageai" IMG2VEC_NEURAL = "img2vec-neural" MULTI2VEC_CLIP = "multi2vec-clip" MULTI2VEC_BIND = "multi2vec-bind" @@ -285,6 +289,20 @@ class _Text2VecJinaConfigCreate(_Text2VecJinaConfig, _VectorizerConfigCreate): pass +class _Text2VecVoyageConfig(_ConfigCreateModel): + vectorizer: Vectorizers = Field( + default=Vectorizers.TEXT2VEC_VOYAGEAI, frozen=True, exclude=True + ) + model: Optional[str] + baseURL: Optional[str] + truncate: Optional[bool] + vectorizeClassName: bool + + +class _Text2VecVoyageConfigCreate(_Text2VecVoyageConfig, _VectorizerConfigCreate): + pass + + class _Img2VecNeuralConfig(_ConfigCreateModel): vectorizer: Vectorizers = Field(default=Vectorizers.IMG2VEC_NEURAL, frozen=True, exclude=True) imageFields: List[str] @@ -788,3 +806,35 @@ def text2vec_jinaai( Whether to vectorize the collection name. Defaults to `True`. """ return _Text2VecJinaConfigCreate(model=model, vectorizeClassName=vectorize_collection_name) + + @staticmethod + def text2vec_voyageai( + model: Optional[Union[VoyageModel, str]] = None, + baseURL: Optional[str] = None, + truncate: Optional[bool] = None, + vectorize_collection_name: bool = True, + ) -> _VectorizerConfigCreate: + """Create a `_Text2VecVoyageConfigCreate` object for use when vectorizing using the `text2vec-voyageai` model. + + See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-voyageai) + for detailed usage. + + Arguments: + `model` + The model to use. Defaults to `None`, which uses the server-defined default. + See the + [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-voyageai#available-models) for more details. + `vectorize_collection_name` + Whether to vectorize the collection name. Defaults to `True`. + `baseURL` + The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. + `truncate` + Whether to truncate the input texts to fit within the context length. Defaults to `None`, which uses the server-defined default. + + """ + return _Text2VecVoyageConfigCreate( + model=model, + baseURL=baseURL, + truncate=truncate, + vectorizeClassName=vectorize_collection_name, + ) From 55add062cb4b0030ff56740e7a190691aa13bc27 Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Tue, 12 Mar 2024 10:54:40 +0100 Subject: [PATCH 2/8] Voyage fixes --- weaviate/collections/classes/config_named_vectors.py | 8 +++----- weaviate/collections/classes/config_vectorizers.py | 11 +++++------ 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index 7e954a6d5..8b6fa09a1 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -693,7 +693,7 @@ def text2vec_voyageai( vector_index_config: Optional[_VectorIndexConfigCreate] = None, vectorize_collection_name: bool = True, model: Optional[Union[VoyageModel, str]] = None, - baseURL: Optional[str] = None, + base_url: Optional[str] = None, truncate: Optional[bool] = None, ) -> _NamedVectorConfigCreate: """Create a named vector using the `text2vec-jinaai` model. @@ -714,9 +714,7 @@ def text2vec_voyageai( The model to use. Defaults to `None`, which uses the server-defined default. See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-voyageai#available-models) for more details. - `vectorize_collection_name` - Whether to vectorize the collection name. Defaults to `True`. - `baseURL` + `base_url` The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. `truncate` Whether to truncate the input texts to fit within the context length. Defaults to `None`, which uses the server-defined default. @@ -727,7 +725,7 @@ def text2vec_voyageai( vectorizer=_Text2VecVoyageConfigCreate( model=model, vectorizeClassName=vectorize_collection_name, - baseURL=baseURL, + baseURL=base_url, truncate=truncate, ), vector_index_config=vector_index_config, diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index 58e917cbc..6f4d84388 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -810,7 +810,7 @@ def text2vec_jinaai( @staticmethod def text2vec_voyageai( model: Optional[Union[VoyageModel, str]] = None, - baseURL: Optional[str] = None, + base_uRL: Optional[str] = None, truncate: Optional[bool] = None, vectorize_collection_name: bool = True, ) -> _VectorizerConfigCreate: @@ -824,17 +824,16 @@ def text2vec_voyageai( The model to use. Defaults to `None`, which uses the server-defined default. See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-voyageai#available-models) for more details. - `vectorize_collection_name` - Whether to vectorize the collection name. Defaults to `True`. - `baseURL` + `base_uRL` The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. `truncate` Whether to truncate the input texts to fit within the context length. Defaults to `None`, which uses the server-defined default. - + `vectorize_collection_name` + Whether to vectorize the collection name. Defaults to `True`. """ return _Text2VecVoyageConfigCreate( model=model, - baseURL=baseURL, + baseURL=base_uRL, truncate=truncate, vectorizeClassName=vectorize_collection_name, ) From 2cc4b1662ef08dc41940966bf8d684325c07a16e Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Tue, 12 Mar 2024 12:46:29 +0100 Subject: [PATCH 3/8] More fixes --- test/collection/test_config.py | 2 +- weaviate/collections/classes/config_vectorizers.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 37319ac63..406f1a554 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -275,7 +275,7 @@ def test_basic_config(): vectorize_collection_name=False, model="voyage-large-2", truncate=False, - baseURL="https://voyage.made-up.com", + base_url="https://voyage.made-up.com", ), { "text2vec-voyageai": { diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index 6f4d84388..bf28d3daf 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -810,7 +810,7 @@ def text2vec_jinaai( @staticmethod def text2vec_voyageai( model: Optional[Union[VoyageModel, str]] = None, - base_uRL: Optional[str] = None, + base_url: Optional[str] = None, truncate: Optional[bool] = None, vectorize_collection_name: bool = True, ) -> _VectorizerConfigCreate: @@ -824,7 +824,7 @@ def text2vec_voyageai( The model to use. Defaults to `None`, which uses the server-defined default. See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-voyageai#available-models) for more details. - `base_uRL` + `base_url` The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. `truncate` Whether to truncate the input texts to fit within the context length. Defaults to `None`, which uses the server-defined default. @@ -833,7 +833,7 @@ def text2vec_voyageai( """ return _Text2VecVoyageConfigCreate( model=model, - baseURL=base_uRL, + baseURL=base_url, truncate=truncate, vectorizeClassName=vectorize_collection_name, ) From 47ddcdcdd2cc7013b0fad494208dddad848530fb Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Tue, 12 Mar 2024 12:49:35 +0100 Subject: [PATCH 4/8] Add new parameters for transformers --- test/collection/test_config.py | 2 ++ .../collections/classes/config_named_vectors.py | 12 ++++++++++++ .../collections/classes/config_vectorizers.py | 15 +++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 406f1a554..4776e1798 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -262,11 +262,13 @@ def test_basic_config(): Configure.Vectorizer.text2vec_transformers( pooling_strategy="cls", vectorize_collection_name=False, + inference_url="https://api.transformers.com", ), { "text2vec-transformers": { "vectorizeClassName": False, "poolingStrategy": "cls", + "inferenceUrl": "https://api.transformers.com", } }, ), diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index 8b6fa09a1..46bbce5c3 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -619,6 +619,9 @@ def text2vec_transformers( vector_index_config: Optional[_VectorIndexConfigCreate] = None, vectorize_collection_name: bool = True, pooling_strategy: Literal["masked_mean", "cls"] = "masked_mean", + inference_url: Optional[str] = None, + passage_inference_url: Optional[str] = None, + query_inference_url: Optional[str] = None, ) -> _NamedVectorConfigCreate: """Create a named vector using the `text2vec_transformers` model. @@ -636,6 +639,12 @@ def text2vec_transformers( Whether to vectorize the collection name. Defaults to `True`. `pooling_strategy` The pooling strategy to use. Defaults to `masked_mean`. + `inference_url` + The inferenceUrl to use where API requests should go. You can use either this OR passage/query_inference_url. Defaults to `None`, which uses the server-defined default. + `passage_inference_url` + The inferenceUrl to use where passage API requests should go. You can use either this and query_inference_url OR inference_url. Defaults to `None`, which uses the server-defined default. + `query_inference_url` + The inferenceUrl to use where query API requests should go. You can use either this and passage_inference_url OR inference_url. Defaults to `None`, which uses the server-defined default. """ return _NamedVectorConfigCreate( name=name, @@ -643,6 +652,9 @@ def text2vec_transformers( vectorizer=_Text2VecTransformersConfigCreate( poolingStrategy=pooling_strategy, vectorizeClassName=vectorize_collection_name, + inferenceUrl=inference_url, + passageInferenceUrl=passage_inference_url, + queryInferenceUrl=query_inference_url, ), vector_index_config=vector_index_config, ) diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index bf28d3daf..cbceaaf83 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -264,6 +264,9 @@ class _Text2VecTransformersConfig(_ConfigCreateModel): ) poolingStrategy: Literal["masked_mean", "cls"] vectorizeClassName: bool + inferenceUrl: Optional[str] + passageInferenceUrl: Optional[str] + queryInferenceUrl: Optional[str] class _Text2VecTransformersConfigCreate(_Text2VecTransformersConfig, _VectorizerConfigCreate): @@ -767,6 +770,9 @@ def text2vec_palm( def text2vec_transformers( pooling_strategy: Literal["masked_mean", "cls"] = "masked_mean", vectorize_collection_name: bool = True, + inference_url: Optional[str] = None, + passage_inference_url: Optional[str] = None, + query_inference_url: Optional[str] = None, ) -> _VectorizerConfigCreate: """Create a `_Text2VecTransformersConfigCreate` object for use when vectorizing using the `text2vec-transformers` model. @@ -778,6 +784,12 @@ def text2vec_transformers( The pooling strategy to use. Defaults to `masked_mean`. `vectorize_collection_name` Whether to vectorize the collection name. Defaults to `True`. + `inference_url` + The inferenceUrl to use where API requests should go. You can use either this OR passage/query_inference_url. Defaults to `None`, which uses the server-defined default. + `passage_inference_url` + The inferenceUrl to use where passage API requests should go. You can use either this and query_inference_url OR inference_url. Defaults to `None`, which uses the server-defined default. + `query_inference_url` + The inferenceUrl to use where query API requests should go. You can use either this and passage_inference_url OR inference_url. Defaults to `None`, which uses the server-defined default. Raises: `pydantic.ValidationError` if `pooling_strategy` is not a valid value from the `PoolingStrategy` type. @@ -785,6 +797,9 @@ def text2vec_transformers( return _Text2VecTransformersConfigCreate( poolingStrategy=pooling_strategy, vectorizeClassName=vectorize_collection_name, + inferenceUrl=inference_url, + passageInferenceUrl=passage_inference_url, + queryInferenceUrl=query_inference_url, ) @staticmethod From da9e36ff17bd1d4bbda7a8c20ffe7d2639469930 Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Tue, 12 Mar 2024 13:36:46 +0100 Subject: [PATCH 5/8] Add interference url field for clip module --- weaviate/collections/classes/config_named_vectors.py | 12 ++++++++++-- weaviate/collections/classes/config_vectorizers.py | 11 ++++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index 46bbce5c3..1764148cc 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -321,10 +321,11 @@ def img2vec_neural( def multi2vec_clip( name: str, *, - image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, - text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, vector_index_config: Optional[_VectorIndexConfigCreate] = None, vectorize_collection_name: bool = True, + image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + interference_url: Optional[str] = None, ) -> _NamedVectorConfigCreate: """Create a named vector using the `multi2vec_clip` model. @@ -340,6 +341,12 @@ def multi2vec_clip( The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default `vectorize_collection_name` Whether to vectorize the collection name. Defaults to `True`. + `image_fields` + The image fields to use in vectorization. + `text_fields` + The text fields to use in vectorization. + `inference_url` + The inference url to use where API requests should go. Defaults to `None`, which uses the server-defined default. """ return _NamedVectorConfigCreate( name=name, @@ -347,6 +354,7 @@ def multi2vec_clip( imageFields=_map_multi2vec_fields(image_fields), textFields=_map_multi2vec_fields(text_fields), vectorizeClassName=vectorize_collection_name, + inferenceUrl=interference_url, ), vector_index_config=vector_index_config, ) diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index cbceaaf83..c80e110c9 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -345,6 +345,7 @@ def _to_dict(self) -> Dict[str, Any]: class _Multi2VecClipConfig(_Multi2VecBase): vectorizer: Vectorizers = Field(default=Vectorizers.MULTI2VEC_CLIP, frozen=True, exclude=True) + inferenceUrl: Optional[str] class _Multi2VecClipConfigCreate(_Multi2VecClipConfig, _VectorizerConfigCreate): @@ -417,6 +418,7 @@ def img2vec_neural( def multi2vec_clip( image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + interference_url: Optional[str] = None, vectorize_collection_name: bool = True, ) -> _VectorizerConfigCreate: """Create a `_Multi2VecClipConfigCreate` object for use when vectorizing using the `multi2vec-clip` model. @@ -429,6 +431,8 @@ def multi2vec_clip( The image fields to use in vectorization. `text_fields` The text fields to use in vectorization. + `inference_url` + The inference url to use where API requests should go. Defaults to `None`, which uses the server-defined default. `vectorize_collection_name` Whether to vectorize the collection name. Defaults to `True`. @@ -439,6 +443,7 @@ def multi2vec_clip( imageFields=_map_multi2vec_fields(image_fields), textFields=_map_multi2vec_fields(text_fields), vectorizeClassName=vectorize_collection_name, + inferenceUrl=interference_url, ) @staticmethod @@ -785,11 +790,11 @@ def text2vec_transformers( `vectorize_collection_name` Whether to vectorize the collection name. Defaults to `True`. `inference_url` - The inferenceUrl to use where API requests should go. You can use either this OR passage/query_inference_url. Defaults to `None`, which uses the server-defined default. + The inference url to use where API requests should go. You can use either this OR passage/query_inference_url. Defaults to `None`, which uses the server-defined default. `passage_inference_url` - The inferenceUrl to use where passage API requests should go. You can use either this and query_inference_url OR inference_url. Defaults to `None`, which uses the server-defined default. + The inference url to use where passage API requests should go. You can use either this and query_inference_url OR inference_url. Defaults to `None`, which uses the server-defined default. `query_inference_url` - The inferenceUrl to use where query API requests should go. You can use either this and passage_inference_url OR inference_url. Defaults to `None`, which uses the server-defined default. + The inference url to use where query API requests should go. You can use either this and passage_inference_url OR inference_url. Defaults to `None`, which uses the server-defined default. Raises: `pydantic.ValidationError` if `pooling_strategy` is not a valid value from the `PoolingStrategy` type. From 326df9ee06c8bbb4fbe3e166ff9cf0938ba60f57 Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Thu, 14 Mar 2024 06:41:11 +0100 Subject: [PATCH 6/8] Add support for mistral --- test/collection/test_config.py | 4 ++++ weaviate/collections/classes/config.py | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 4776e1798..00d52a9b9 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -553,6 +553,10 @@ def test_config_with_vectorizer_and_properties( Configure.Generative.anyscale(), {"generative-anyscale": {}}, ), + ( + Configure.Generative.mistral(temperature=0.5, max_tokens=100, model="model"), + {"generative-mistral": {"temperature": 0.5, "maxTokens": 100, "model": "model"}}, + ), ( Configure.Generative.openai( model="gpt-4", diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 7f02599b3..bd3b4a28e 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -164,6 +164,7 @@ class GenerativeSearches(str, Enum): PALM = "generative-palm" AWS = "generative-aws" ANYSCALE = "generative-anyscale" + MISTRAL = "generative-mistral" class Rerankers(str, Enum): @@ -367,6 +368,15 @@ class _GenerativeAnyscale(_GenerativeConfigCreate): model: Optional[str] +class _GenerativeMistral(_GenerativeConfigCreate): + generative: GenerativeSearches = Field( + default=GenerativeSearches.MISTRAL, frozen=True, exclude=True + ) + temperature: Optional[float] + model: Optional[str] + maxTokens: Optional[int] + + class _GenerativeOpenAIConfigBase(_GenerativeConfigCreate): generative: GenerativeSearches = Field( default=GenerativeSearches.OPENAI, frozen=True, exclude=True @@ -464,6 +474,14 @@ def anyscale( ) -> _GenerativeConfigCreate: return _GenerativeAnyscale(model=model, temperature=temperature) + @staticmethod + def mistral( + model: Optional[str] = None, + temperature: Optional[float] = None, + max_tokens: Optional[int] = None, + ) -> _GenerativeConfigCreate: + return _GenerativeMistral(model=model, temperature=temperature, maxTokens=max_tokens) + @staticmethod def openai( model: Optional[str] = None, From 8ffddb4a45e8ca53995d85c6b5581f1392f5ec0e Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Thu, 14 Mar 2024 08:27:41 +0100 Subject: [PATCH 7/8] Add multi2vec-palm --- test/collection/test_config.py | 46 +++++++++++++++- .../classes/config_named_vectors.py | 55 +++++++++++++++++++ .../collections/classes/config_vectorizers.py | 55 +++++++++++++++++++ 3 files changed, 154 insertions(+), 2 deletions(-) diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 00d52a9b9..2b0cada35 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -311,6 +311,23 @@ def test_basic_config(): } }, ), + ( + Configure.Vectorizer.multi2vec_palm( + image_fields=["image"], + text_fields=["text"], + project_id="project", + location="us-central1", + ), + { + "multi2vec-palm": { + "imageFields": ["image"], + "textFields": ["text"], + "projectId": "project", + "location": "us-central1", + "vectorizeClassName": True, + } + }, + ), ( Configure.Vectorizer.multi2vec_clip( image_fields=[Multi2VecField(name="image")], @@ -722,7 +739,7 @@ def test_config_with_generative( def test_config_with_reranker( reranker_config: _RerankerConfigCreate, expected_mc: dict, -): +) -> None: config = _CollectionConfigCreate(name="test", reranker_config=reranker_config) assert config._to_dict() == { **DEFAULTS, @@ -732,7 +749,7 @@ def test_config_with_reranker( } -def test_config_with_properties(): +def test_config_with_properties() -> None: config = _CollectionConfigCreate( name="test", description="test", @@ -1126,6 +1143,31 @@ def test_vector_config_flat_pq() -> None: } }, ), + ( + [ + Configure.NamedVectors.multi2vec_palm( + name="test", + image_fields=["image"], + text_fields=["text"], + project_id="project", + location="us-central1", + ) + ], + { + "test": { + "vectorizer": { + "multi2vec-palm": { + "imageFields": ["image"], + "textFields": ["text"], + "projectId": "project", + "location": "us-central1", + "vectorizeClassName": True, + } + }, + "vectorIndexType": "hnsw", + } + }, + ), ( [ Configure.NamedVectors.multi2vec_bind( diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index 1764148cc..60b140a2f 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -17,6 +17,7 @@ _Img2VecNeuralConfigCreate, _Multi2VecBindConfigCreate, _Multi2VecClipConfigCreate, + _Multi2VecPalmConfig, _Ref2VecCentroidConfigCreate, _Text2VecAWSConfigCreate, _Text2VecAzureOpenAIConfigCreate, @@ -359,6 +360,60 @@ def multi2vec_clip( vector_index_config=vector_index_config, ) + @staticmethod + def multi2vec_palm( + name: str, + *, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + location: str, + project_id: str, + image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + dimensions: Optional[int] = None, + model_id: Optional[str] = None, + ) -> _NamedVectorConfigCreate: + """Create a named vector using the `multi2vec_clip` model. + + See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-gpt4all) + for detailed usage. + + Arguments: + `name` + The name of the named vector. + `source_properties` + Which properties should be included when vectorizing. By default all text properties are included. + `vector_index_config` + The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default + `vectorize_collection_name` + Whether to vectorize the collection name. Defaults to `True`. + `location` + Where the model runs. REQUIRED. + `project_id` + The project ID to use, REQUIRED. + `image_fields` + The image fields to use in vectorization. + `text_fields` + The text fields to use in vectorization. + `dimensions` + The number of dimensions to use. Defaults to `None`, which uses the server-defined default. + `model_id` + The model ID to use. Defaults to `None`, which uses the server-defined default. + """ + return _NamedVectorConfigCreate( + name=name, + vectorizer=_Multi2VecPalmConfig( + projectId=project_id, + location=location, + imageFields=_map_multi2vec_fields(image_fields), + textFields=_map_multi2vec_fields(text_fields), + dimensions=dimensions, + modelId=model_id, + vectorizeClassName=vectorize_collection_name, + ), + vector_index_config=vector_index_config, + ) + @staticmethod def multi2vec_bind( name: str, diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index c80e110c9..679f533b2 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -89,6 +89,7 @@ class Vectorizers(str, Enum): IMG2VEC_NEURAL = "img2vec-neural" MULTI2VEC_CLIP = "multi2vec-clip" MULTI2VEC_BIND = "multi2vec-bind" + MULTI2VEC_PALM = "multi2vec-palm" REF2VEC_CENTROID = "ref2vec-centroid" @@ -352,6 +353,15 @@ class _Multi2VecClipConfigCreate(_Multi2VecClipConfig, _VectorizerConfigCreate): pass +class _Multi2VecPalmConfig(_Multi2VecBase, _VectorizerConfigCreate): + vectorizer: Vectorizers = Field(default=Vectorizers.MULTI2VEC_PALM, frozen=True, exclude=True) + projectId: str + location: Optional[str] + modelId: Optional[str] + dimensions: Optional[int] + vectorizeClassName: bool + + class _Multi2VecBindConfig(_Multi2VecBase): vectorizer: Vectorizers = Field(default=Vectorizers.MULTI2VEC_BIND, frozen=True, exclude=True) audioFields: Optional[List[Multi2VecField]] @@ -771,6 +781,51 @@ def text2vec_palm( vectorizeClassName=vectorize_collection_name, ) + @staticmethod + def multi2vec_palm( + *, + location: str, + project_id: str, + image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + dimensions: Optional[int] = None, + model_id: Optional[str] = None, + vectorize_collection_name: bool = True, + ) -> _VectorizerConfigCreate: + """Create a `_Multi2VecPalmConfig` object for use when vectorizing using the `text2vec-palm` model. + + See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-palm) + for detailed usage. + + Arguments: + `location` + Where the model runs. REQUIRED. + `project_id` + The project ID to use, REQUIRED. + `image_fields` + The image fields to use in vectorization. + `text_fields` + The text fields to use in vectorization. + `dimensions` + The number of dimensions to use. Defaults to `None`, which uses the server-defined default. + `model_id` + The model ID to use. Defaults to `None`, which uses the server-defined default. + `vectorize_collection_name` + Whether to vectorize the collection name. Defaults to `True`. + + Raises: + `pydantic.ValidationError` if `api_endpoint` is not a valid URL. + """ + return _Multi2VecPalmConfig( + projectId=project_id, + location=location, + imageFields=_map_multi2vec_fields(image_fields), + textFields=_map_multi2vec_fields(text_fields), + dimensions=dimensions, + modelId=model_id, + vectorizeClassName=vectorize_collection_name, + ) + @staticmethod def text2vec_transformers( pooling_strategy: Literal["masked_mean", "cls"] = "masked_mean", From 9abe1155fa552d19f5e40631277386053cceea88 Mon Sep 17 00:00:00 2001 From: Dirk Kulawiak Date: Thu, 14 Mar 2024 12:51:24 +0100 Subject: [PATCH 8/8] Review fixes --- weaviate/collections/classes/config_named_vectors.py | 10 ++-------- weaviate/collections/classes/config_vectorizers.py | 3 +++ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index 60b140a2f..ceef721f7 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -336,8 +336,6 @@ def multi2vec_clip( Arguments: `name` The name of the named vector. - `source_properties` - Which properties should be included when vectorizing. By default all text properties are included. `vector_index_config` The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default `vectorize_collection_name` @@ -381,8 +379,6 @@ def multi2vec_palm( Arguments: `name` The name of the named vector. - `source_properties` - Which properties should be included when vectorizing. By default all text properties are included. `vector_index_config` The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default `vectorize_collection_name` @@ -436,8 +432,6 @@ def multi2vec_bind( Arguments: `name` The name of the named vector. - `source_properties` - Which properties should be included when vectorizing. By default all text properties are included. `vector_index_config` The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default `vectorize_collection_name` @@ -474,8 +468,8 @@ def ref2vec_centroid( Arguments: `name` The name of the named vector. - `source_properties` - Which properties should be included when vectorizing. By default all text properties are included. + `reference_properties` + The reference properties to use in vectorization, REQUIRED. `vector_index_config` The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default `vectorize_collection_name` diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index 679f533b2..69f1c2095 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -69,6 +69,8 @@ class Vectorizers(str, Enum): Weaviate module backed by a ResNet-50 neural network for images. `MULTI2VEC_CLIP` Weaviate module backed by a Sentence-BERT CLIP model for images and text. + `MULTI2VEC_PALM` + Weaviate module backed by a palm model for images and text. `MULTI2VEC_BIND` Weaviate module backed by the ImageBind model for images, text, audio, depth, IMU, thermal, and video. `REF2VEC_CENTROID` @@ -884,6 +886,7 @@ def text2vec_jinaai( @staticmethod def text2vec_voyageai( + *, model: Optional[Union[VoyageModel, str]] = None, base_url: Optional[str] = None, truncate: Optional[bool] = None,