Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for multi2vec-jinaai; docstring changes #1422

Merged
merged 6 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions test/collection/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,32 @@ def test_basic_config():
}
},
),
(
Configure.Vectorizer.text2vec_jinaai(
model="jina-embeddings-v3",
vectorize_collection_name=False,
dimensions=512,
),
{
"text2vec-jinaai": {
"model": "jina-embeddings-v3",
"vectorizeClassName": False,
"dimensions": 512,
}
},
),
(
Configure.Vectorizer.multi2vec_jinaai(
model="jina-clip-v2",
vectorize_collection_name=False,
),
{
"multi2vec-jinaai": {
"model": "jina-clip-v2",
"vectorizeClassName": False,
}
},
),
(
Configure.Vectorizer.text2vec_voyageai(
vectorize_collection_name=False,
Expand Down Expand Up @@ -1249,6 +1275,20 @@ def test_vector_config_flat_pq() -> None:
}
},
),
(
[Configure.NamedVectors.multi2vec_jinaai(name="test", text_fields=["prop"])],
{
"test": {
"vectorizer": {
"multi2vec-jinaai": {
"vectorizeClassName": True,
"textFields": ["prop"],
}
},
"vectorIndexType": "hnsw",
}
},
),
(
[Configure.NamedVectors.text2vec_gpt4all(name="test", source_properties=["prop"])],
{
Expand Down
98 changes: 73 additions & 25 deletions weaviate/collections/classes/config_named_vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,10 @@
AWSModel,
AWSService,
CohereModel,
CohereMultimodalModel,
CohereTruncation,
JinaModel,
JinaMultimodalModel,
Multi2VecField,
OpenAIModel,
OpenAIType,
Expand All @@ -51,6 +53,7 @@
_Text2VecDatabricksConfig,
_Text2VecVoyageConfig,
_Multi2VecCohereConfig,
_Multi2VecJinaConfig,
)
from ...warnings import _Warnings

Expand Down Expand Up @@ -161,7 +164,7 @@ def text2vec_cohere(
) -> _NamedVectorConfigCreate:
"""Create a named vector using the `text2vec_cohere` model.

See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-cohere)
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/cohere/embeddings)
for detailed usage.

Arguments:
Expand All @@ -183,7 +186,7 @@ def text2vec_cohere(
The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.

Raises:
`pydantic.ValidationError` if `truncate` is not a valid value from the `CohereModel` type.
`pydantic.ValidationError` if `model` is not a valid value from the `CohereModel` type or if `truncate` is not a valid value from the `CohereTruncation` type.
"""
return _NamedVectorConfigCreate(
name=name,
Expand All @@ -204,14 +207,14 @@ def multi2vec_cohere(
vector_index_config: Optional[_VectorIndexConfigCreate] = None,
vectorize_collection_name: bool = True,
base_url: Optional[AnyHttpUrl] = None,
model: Optional[Union[CohereModel, str]] = None,
model: Optional[Union[CohereMultimodalModel, str]] = None,
truncate: Optional[CohereTruncation] = None,
image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
) -> _NamedVectorConfigCreate:
"""Create a named vector using the `multi2vec_cohere` model.

See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-cohere)
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/cohere/embeddings-multimodal)
for detailed usage.

Arguments:
Expand All @@ -225,8 +228,6 @@ def multi2vec_cohere(
The model to use. Defaults to `None`, which uses the server-defined default.
`truncate`
The truncation strategy to use. Defaults to `None`, which uses the server-defined default.
`vectorize_collection_name`
Whether to vectorize the collection name. Defaults to `True`.
`base_url`
The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
`image_fields`
Expand All @@ -235,7 +236,7 @@ def multi2vec_cohere(
The text fields to use in vectorization.

Raises:
`pydantic.ValidationError` if `truncate` is not a valid value from the `CohereModel` type.
`pydantic.ValidationError` if `model` is not a valid value from the `CohereMultimodalModel` type or if `truncate` is not a valid value from the `CohereTruncation` type.
"""
return _NamedVectorConfigCreate(
name=name,
Expand Down Expand Up @@ -294,7 +295,7 @@ def text2vec_databricks(
) -> _NamedVectorConfigCreate:
"""Create a named vector using the `text2vec-databricks` model.

See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-databricks)
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/databricks/embeddings)
for detailed usage.

Arguments:
Expand Down Expand Up @@ -333,7 +334,7 @@ def text2vec_mistral(
) -> _NamedVectorConfigCreate:
"""Create a named vector using the `text2vec-mistral` model.

See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-mistral)
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/mistral/embeddings)
for detailed usage.

Arguments:
Expand Down Expand Up @@ -370,7 +371,7 @@ def text2vec_ollama(
) -> _NamedVectorConfigCreate:
"""Create a named vector using the `text2vec-ollama` model.

See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-ollama)
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/ollama/embeddings)
for detailed usage.

Arguments:
Expand Down Expand Up @@ -417,7 +418,7 @@ def text2vec_openai(
) -> _NamedVectorConfigCreate:
"""Create a named vector using the `text2vec_openai` model.

See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-openai)
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/openai/embeddings)
for detailed usage.

Arguments:
Expand Down Expand Up @@ -473,7 +474,7 @@ def text2vec_aws(
) -> _NamedVectorConfigCreate:
"""Create a named vector using the `text2vec_aws` model.

See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-aws)
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/aws/embeddings)
for detailed usage.

Arguments:
Expand Down Expand Up @@ -546,7 +547,7 @@ def multi2vec_clip(
) -> _NamedVectorConfigCreate:
"""Create a named vector using the `multi2vec_clip` model.

See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-gpt4all)
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings-multimodal)
for detailed usage.

Arguments:
Expand Down Expand Up @@ -726,7 +727,7 @@ def multi2vec_bind(
) -> _NamedVectorConfigCreate:
"""Create a named vector using the `multi2vec_bind` model.

See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-gpt4all)
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/imagebind/embeddings-multimodal)
for detailed usage.

Arguments:
Expand Down Expand Up @@ -797,7 +798,7 @@ def text2vec_azure_openai(
) -> _NamedVectorConfigCreate:
"""Create a named vector using the `text2vec_azure_openai` model.

See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-gpt4all)
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/openai-azure/embeddings)
for detailed usage.

Arguments:
Expand Down Expand Up @@ -832,7 +833,7 @@ def text2vec_gpt4all(
) -> _NamedVectorConfigCreate:
"""Create a named vector using the `text2vec_gpt4all` model.

See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-gpt4all)
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/gpt4all/embeddings)
for detailed usage.

Arguments:
Expand Down Expand Up @@ -871,7 +872,7 @@ def text2vec_huggingface(
) -> _NamedVectorConfigCreate:
"""Create a named vector using the `text2vec_huggingface` model.

See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-huggingface)
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/huggingface/embeddings)
for detailed usage.

Arguments:
Expand Down Expand Up @@ -901,7 +902,7 @@ def text2vec_huggingface(
Raises:
`pydantic.ValidationError` if the arguments passed to the function are invalid.
It is important to note that some of these variables are mutually exclusive.
See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-huggingface) for more details.
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/huggingface/embeddings#vectorizer-parameters) for more details.
"""
return _NamedVectorConfigCreate(
name=name,
Expand Down Expand Up @@ -936,7 +937,7 @@ def text2vec_palm(
) -> _NamedVectorConfigCreate:
"""Create a named vector using the `text2vec_palm` model.

See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-palm)
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings)
for detailed usage.

Arguments:
Expand Down Expand Up @@ -990,7 +991,7 @@ def text2vec_google(
) -> _NamedVectorConfigCreate:
"""Create a named vector using the `text2vec_palm` model.

See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-google)
See the [documentation]https://weaviate.io/developers/weaviate/model-providers/google/embeddings)
for detailed usage.

Arguments:
Expand Down Expand Up @@ -1043,7 +1044,7 @@ def text2vec_transformers(
) -> _NamedVectorConfigCreate:
"""Create a named vector using the `text2vec_transformers` model.

See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-transformers)
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings)
for detailed usage.

Arguments:
Expand Down Expand Up @@ -1090,7 +1091,7 @@ def text2vec_jinaai(
) -> _NamedVectorConfigCreate:
"""Create a named vector using the `text2vec-jinaai` model.

See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-jinaai)
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings)
for detailed usage.

Arguments:
Expand All @@ -1109,7 +1110,7 @@ def text2vec_jinaai(
`model`
The model to use. Defaults to `None`, which uses the server-defined default.
See the
[documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-jinaai#available-models) for more details.
[documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings#available-models) for more details.
"""
return _NamedVectorConfigCreate(
name=name,
Expand All @@ -1123,6 +1124,53 @@ def text2vec_jinaai(
vector_index_config=vector_index_config,
)

@staticmethod
def multi2vec_jinaai(
name: str,
*,
vector_index_config: Optional[_VectorIndexConfigCreate] = None,
vectorize_collection_name: bool = True,
base_url: Optional[AnyHttpUrl] = None,
model: Optional[Union[JinaMultimodalModel, str]] = None,
image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None,
) -> _NamedVectorConfigCreate:
"""Create a named vector using the `multi2vec_jinaai` model.

See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings-multimodal)
for detailed usage.

Arguments:
`name`
The name of the named vector.
`vector_index_config`
The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default
`vectorize_collection_name`
Whether to vectorize the collection name. Defaults to `True`.
`model`
The model to use. Defaults to `None`, which uses the server-defined default.
`base_url`
The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
`image_fields`
The image fields to use in vectorization.
`text_fields`
The text fields to use in vectorization.

Raises:
`pydantic.ValidationError` if `model` is not a valid value from the `JinaMultimodalModel` type.
"""
return _NamedVectorConfigCreate(
name=name,
vectorizer=_Multi2VecJinaConfig(
baseURL=base_url,
model=model,
vectorizeClassName=vectorize_collection_name,
imageFields=_map_multi2vec_fields(image_fields),
textFields=_map_multi2vec_fields(text_fields),
),
vector_index_config=vector_index_config,
)

@staticmethod
def text2vec_voyageai(
name: str,
Expand All @@ -1136,7 +1184,7 @@ def text2vec_voyageai(
) -> _NamedVectorConfigCreate:
"""Create a named vector using the `text2vec-jinaai` model.

See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-jinaai)
See the [documentation](https://weaviate.io/developers/weaviate/model-providers/voyageai/embeddings)
for detailed usage.

Arguments:
Expand All @@ -1151,7 +1199,7 @@ def text2vec_voyageai(
`model`
The model to use. Defaults to `None`, which uses the server-defined default.
See the
[documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-voyageai#available-models) for more details.
[documentation](https://weaviate.io/developers/weaviate/model-providers/voyageai/embeddings#available-models) for more details.
`base_url`
The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
`truncate`
Expand Down
Loading