Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for new modules #943

Merged
merged 8 commits into from
Mar 14, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions test/collection/test_config.py
Original file line number Diff line number Diff line change
@@ -270,6 +270,22 @@ def test_basic_config():
}
},
),
(
Configure.Vectorizer.text2vec_voyageai(
vectorize_collection_name=False,
model="voyage-large-2",
truncate=False,
baseURL="https://voyage.made-up.com",
),
{
"text2vec-voyageai": {
"vectorizeClassName": False,
"model": "voyage-large-2",
"baseURL": "https://voyage.made-up.com",
"truncate": False,
}
},
),
(
Configure.Vectorizer.img2vec_neural(
image_fields=["test"],
@@ -1046,6 +1062,25 @@ def test_vector_config_flat_pq() -> None:
}
},
),
(
[
Configure.NamedVectors.text2vec_voyageai(
name="test", source_properties=["prop"], truncate=True
)
],
{
"test": {
"vectorizer": {
"text2vec-voyageai": {
"properties": ["prop"],
"vectorizeClassName": True,
"truncate": True,
}
},
"vectorIndexType": "hnsw",
}
},
),
(
[
Configure.NamedVectors.img2vec_neural(
50 changes: 50 additions & 0 deletions weaviate/collections/classes/config_named_vectors.py
Original file line number Diff line number Diff line change
@@ -28,6 +28,7 @@
_Text2VecOpenAIConfigCreate,
_Text2VecPalmConfigCreate,
_Text2VecTransformersConfigCreate,
_Text2VecVoyageConfigCreate,
_VectorizerConfigCreate,
AWSModel,
AWSService,
@@ -38,6 +39,7 @@
OpenAIModel,
OpenAIType,
Vectorizers,
VoyageModel,
_map_multi2vec_fields,
)

@@ -683,6 +685,54 @@ def text2vec_jinaai(
vector_index_config=vector_index_config,
)

@staticmethod
def text2vec_voyageai(
name: str,
*,
source_properties: Optional[List[str]] = None,
vector_index_config: Optional[_VectorIndexConfigCreate] = None,
vectorize_collection_name: bool = True,
model: Optional[Union[VoyageModel, str]] = None,
baseURL: Optional[str] = None,
dirkkul marked this conversation as resolved.
Show resolved Hide resolved
truncate: Optional[bool] = None,
) -> _NamedVectorConfigCreate:
"""Create a named vector using the `text2vec-jinaai` model.

See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-jinaai)
for detailed usage.

Arguments:
`name`
The name of the named vector.
`source_properties`
Which properties should be included when vectorizing. By default all text properties are included.
`vector_index_config`
The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default
`vectorize_collection_name`
Whether to vectorize the collection name. Defaults to `True`.
`model`
The model to use. Defaults to `None`, which uses the server-defined default.
See the
[documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-voyageai#available-models) for more details.
`vectorize_collection_name`
Whether to vectorize the collection name. Defaults to `True`.
dirkkul marked this conversation as resolved.
Show resolved Hide resolved
`baseURL`
The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
`truncate`
Whether to truncate the input texts to fit within the context length. Defaults to `None`, which uses the server-defined default.
"""
return _NamedVectorConfigCreate(
name=name,
source_properties=source_properties,
vectorizer=_Text2VecVoyageConfigCreate(
model=model,
vectorizeClassName=vectorize_collection_name,
baseURL=baseURL,
truncate=truncate,
),
vector_index_config=vector_index_config,
)


class _NamedVectorsUpdate:
@staticmethod
50 changes: 50 additions & 0 deletions weaviate/collections/classes/config_vectorizers.py
Original file line number Diff line number Diff line change
@@ -24,6 +24,7 @@
"text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"
]
JinaModel: TypeAlias = Literal["jina-embeddings-v2-base-en", "jina-embeddings-v2-small-en"]
VoyageModel: TypeAlias = Literal["voyage-large-2, voyage-code-2, voyage-2"]
AWSModel: TypeAlias = Literal[
"amazon.titan-embed-text-v1",
"cohere.embed-english-v3",
@@ -62,6 +63,8 @@ class Vectorizers(str, Enum):
Weaviate module backed by Transformers text-based embedding models.
`TEXT2VEC_JINAAI`
Weaviate module backed by Jina AI text-based embedding models.
`TEXT2VEC_VOYAGEAI`
Weaviate module backed by Voyage AI text-based embedding models.
`IMG2VEC_NEURAL`
Weaviate module backed by a ResNet-50 neural network for images.
`MULTI2VEC_CLIP`
@@ -82,6 +85,7 @@ class Vectorizers(str, Enum):
TEXT2VEC_PALM = "text2vec-palm"
TEXT2VEC_TRANSFORMERS = "text2vec-transformers"
TEXT2VEC_JINAAI = "text2vec-jinaai"
TEXT2VEC_VOYAGEAI = "text2vec-voyageai"
IMG2VEC_NEURAL = "img2vec-neural"
MULTI2VEC_CLIP = "multi2vec-clip"
MULTI2VEC_BIND = "multi2vec-bind"
@@ -285,6 +289,20 @@ class _Text2VecJinaConfigCreate(_Text2VecJinaConfig, _VectorizerConfigCreate):
pass


class _Text2VecVoyageConfig(_ConfigCreateModel):
vectorizer: Vectorizers = Field(
default=Vectorizers.TEXT2VEC_VOYAGEAI, frozen=True, exclude=True
)
model: Optional[str]
baseURL: Optional[str]
truncate: Optional[bool]
vectorizeClassName: bool


class _Text2VecVoyageConfigCreate(_Text2VecVoyageConfig, _VectorizerConfigCreate):
pass


class _Img2VecNeuralConfig(_ConfigCreateModel):
vectorizer: Vectorizers = Field(default=Vectorizers.IMG2VEC_NEURAL, frozen=True, exclude=True)
imageFields: List[str]
@@ -788,3 +806,35 @@ def text2vec_jinaai(
Whether to vectorize the collection name. Defaults to `True`.
"""
return _Text2VecJinaConfigCreate(model=model, vectorizeClassName=vectorize_collection_name)

@staticmethod
def text2vec_voyageai(
model: Optional[Union[VoyageModel, str]] = None,
baseURL: Optional[str] = None,
dirkkul marked this conversation as resolved.
Show resolved Hide resolved
truncate: Optional[bool] = None,
vectorize_collection_name: bool = True,
) -> _VectorizerConfigCreate:
"""Create a `_Text2VecVoyageConfigCreate` object for use when vectorizing using the `text2vec-voyageai` model.

See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-voyageai)
for detailed usage.

Arguments:
`model`
The model to use. Defaults to `None`, which uses the server-defined default.
See the
[documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-voyageai#available-models) for more details.
`vectorize_collection_name`
Whether to vectorize the collection name. Defaults to `True`.
dirkkul marked this conversation as resolved.
Show resolved Hide resolved
`baseURL`
The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default.
`truncate`
Whether to truncate the input texts to fit within the context length. Defaults to `None`, which uses the server-defined default.

"""
return _Text2VecVoyageConfigCreate(
model=model,
baseURL=baseURL,
truncate=truncate,
vectorizeClassName=vectorize_collection_name,
)