Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Named vectors #878

Merged
merged 50 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
a240730
Refactor config
dirkkul Feb 15, 2024
aa3c43e
Support for named vectors
dirkkul Feb 15, 2024
f6eecb2
Add missing files
dirkkul Feb 15, 2024
c2c887a
Add support for batch
dirkkul Feb 15, 2024
f8b6855
Add support for remaining vectorizers
dirkkul Feb 16, 2024
ecaeef8
Add vector index configuration and basic export
dirkkul Feb 16, 2024
a7c0bdd
Add export of vector index config
dirkkul Feb 16, 2024
28ce9d0
Add vector index config to remaining vectorizers
dirkkul Feb 16, 2024
3b56d04
Add named vectors to batch and cleanup docstrings and add missing ref…
dirkkul Feb 16, 2024
ab04844
Remove accidentally added test
dirkkul Feb 16, 2024
ca2b36b
Add more docstrings and fix tests
dirkkul Feb 16, 2024
d6ab24c
Fix export and reimport of named vectors
dirkkul Feb 16, 2024
fc74291
fix old tests
dirkkul Feb 16, 2024
78c0671
Fix translation from source_properties to properties
dirkkul Feb 16, 2024
012151b
Fix another test
dirkkul Feb 16, 2024
d7f8277
Add aggregate handling
dirkkul Feb 19, 2024
b3a1023
Merge branch 'main' of https://github.com/weaviate/weaviate-python-cl…
tsmith023 Feb 20, 2024
5b2d616
Fix incorrect argument type in fetch_objects function
tsmith023 Feb 20, 2024
385d93e
fix mock broken in merge
tsmith023 Feb 20, 2024
873491d
tidy up tests
tsmith023 Feb 20, 2024
e43bbe6
use newer version with server-side fixes
tsmith023 Feb 20, 2024
00abe52
Merge branch 'main' of https://github.com/weaviate/weaviate-python-cl…
tsmith023 Feb 20, 2024
8114e75
fix 3.8 and 3.9 TypeAlias import
tsmith023 Feb 20, 2024
067ff98
Fix parsing of sourceProperties from schema in config
tsmith023 Feb 20, 2024
cad710a
correctly fix exporting and importing logic
tsmith023 Feb 20, 2024
cea8201
throw error when creating named vectors with old versions
tsmith023 Feb 20, 2024
cb1f844
add skip
tsmith023 Feb 20, 2024
1214fde
bump to latest stable/v1.24 build
tsmith023 Feb 20, 2024
579cd3b
uncomment assertion
tsmith023 Feb 20, 2024
420d4f8
update CI image
tsmith023 Feb 21, 2024
d7acf82
Merge branch 'main' of https://github.com/weaviate/weaviate-python-cl…
tsmith023 Feb 21, 2024
d02f632
fix old version
tsmith023 Feb 21, 2024
92e71a0
Add dimensions to openai vectorizer
dirkkul Feb 21, 2024
67fccde
Merge branch 'main' into named_vectors
dirkkul Feb 21, 2024
feadc7e
Merge pull request #904 from weaviate/openai_dimension
dirkkul Feb 21, 2024
4f97da0
support updating individual named vectors configs
tsmith023 Feb 21, 2024
018aab4
add sleeps to help avoid read-only flakes
tsmith023 Feb 21, 2024
f22bf86
increase sleep times
tsmith023 Feb 21, 2024
8a74cd9
throw error when updating quantizer type (not allowed)
tsmith023 Feb 21, 2024
0663dbf
fix config parsing for new Weaviate behaviour
tsmith023 Feb 21, 2024
faa00ec
fix to latest version
tsmith023 Feb 21, 2024
b163722
Fix assertion in test_batch_add
tsmith023 Feb 21, 2024
76fbcd4
bump ci ver to latest RC
tsmith023 Feb 22, 2024
9de532d
enforce all params to be KW in named vectors static methods
tsmith023 Feb 22, 2024
a26b525
change name in update, udpate docstrings, catch abnd reraise exceptions
tsmith023 Feb 22, 2024
67e05bb
allow old syntax of update vectorizer with new parameter, deprecate old
tsmith023 Feb 22, 2024
ace5801
refactor named vec configs to preserve `_to_dict` inheritance
tsmith023 Feb 23, 2024
dd2642c
comment out updating named vectors due to server instability
tsmith023 Feb 23, 2024
ff43777
allow vectorizer config in update but without namedvectors
tsmith023 Feb 23, 2024
6bb1891
only allow enabled in pqconfigupdate
tsmith023 Feb 23, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ on:

env:
OLD_WEAVIATE_VERSION: 1.23.10
NEW_WEAVIATE_VERSION: preview--f005cb8
NEW_WEAVIATE_VERSION: 1.24.0-rc.1

jobs:
lint-and-format:
Expand Down
23 changes: 18 additions & 5 deletions integration/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from typing import Any, Optional, List, Generator, Protocol, Type, Dict, Tuple
from typing import Any, Optional, List, Generator, Protocol, Type, Dict, Tuple, Union

import pytest
from _pytest.fixtures import SubRequest
Expand All @@ -22,6 +22,8 @@
from weaviate.collections.classes.types import Properties
from weaviate.config import AdditionalConfig

from weaviate.collections.classes.config_named_vectors import _NamedVectorConfigCreate


class CollectionFactory(Protocol):
"""Typing for fixture."""
Expand All @@ -31,7 +33,9 @@ def __call__(
name: str = "",
properties: Optional[List[Property]] = None,
references: Optional[List[_ReferencePropertyBase]] = None,
vectorizer_config: Optional[_VectorizerConfigCreate] = None,
vectorizer_config: Optional[
Union[_VectorizerConfigCreate, List[_NamedVectorConfigCreate]]
] = None,
inverted_index_config: Optional[_InvertedIndexConfigCreate] = None,
multi_tenancy_config: Optional[_MultiTenancyConfigCreate] = None,
generative_config: Optional[_GenerativeConfigCreate] = None,
Expand All @@ -57,7 +61,9 @@ def _factory(
name: str = "",
properties: Optional[List[Property]] = None,
references: Optional[List[_ReferencePropertyBase]] = None,
vectorizer_config: Optional[_VectorizerConfigCreate] = None,
vectorizer_config: Optional[
Union[_VectorizerConfigCreate, List[_NamedVectorConfigCreate]]
] = None,
inverted_index_config: Optional[_InvertedIndexConfigCreate] = None,
multi_tenancy_config: Optional[_MultiTenancyConfigCreate] = None,
generative_config: Optional[_GenerativeConfigCreate] = None,
Expand Down Expand Up @@ -109,7 +115,11 @@ class OpenAICollection(Protocol):
"""Typing for fixture."""

def __call__(
self, name: str = "", vectorizer_config: Optional[_VectorizerConfigCreate] = None
self,
name: str = "",
vectorizer_config: Optional[
Union[_VectorizerConfigCreate, List[_NamedVectorConfigCreate]]
] = None,
) -> Collection[Any, Any]:
"""Typing for fixture."""
...
Expand All @@ -120,7 +130,10 @@ def openai_collection(
collection_factory: CollectionFactory,
) -> Generator[OpenAICollection, None, None]:
def _factory(
name: str = "", vectorizer_config: Optional[_VectorizerConfigCreate] = None
name: str = "",
vectorizer_config: Optional[
Union[_VectorizerConfigCreate, List[_NamedVectorConfigCreate]]
] = None,
) -> Collection[Any, Any]:
api_key = os.environ.get("OPENAI_APIKEY")
if api_key is None:
Expand Down
6 changes: 3 additions & 3 deletions integration/test_batch_v4.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import uuid
from dataclasses import dataclass
from typing import Generator, List, Optional, Sequence, Protocol, Tuple, Callable
from typing import Generator, List, Optional, Protocol, Tuple, Callable

import pytest
from _pytest.fixtures import SubRequest
Expand All @@ -20,7 +20,7 @@
ReferenceToMulti,
)
from weaviate.collections.classes.tenants import Tenant
from weaviate.types import UUID
from weaviate.types import UUID, VECTORS

UUID1 = uuid.UUID("806827e0-2b31-43ca-9269-24fa95a221f9")
UUID2 = uuid.UUID("8ad0d33c-8db1-4437-87f3-72161ca2a51a")
Expand Down Expand Up @@ -146,7 +146,7 @@ def test_flushing(client_factory: ClientFactory) -> None:
def test_add_object(
client_factory: ClientFactory,
uid: Optional[UUID],
vector: Optional[Sequence],
vector: Optional[VECTORS],
) -> None:
client, name = client_factory()
with client.batch.fixed_size() as batch:
Expand Down
39 changes: 39 additions & 0 deletions integration/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,45 @@ def test_create_export_and_recreate(client: weaviate.WeaviateClient, request: Su
assert not client.collections.exists(name2)


def test_create_export_and_recreate_named_vectors(
client: weaviate.WeaviateClient, request: SubRequest
) -> None:
if client._connection._weaviate_version.is_lower_than(1, 24, 0):
pytest.skip("Named vectors are not supported in versions lower than 1.24.0")

name1 = request.node.name
name2 = request.node.name + "2"
client.collections.delete([name1, name2])

col = client.collections.create(
name=name1,
properties=[
Property(
name="name",
data_type=DataType.TEXT,
vectorize_property_name=True,
),
],
vectorizer_config=[
Configure.NamedVectors.text2vec_contextionary(
"name",
source_properties=["name"],
vectorize_collection_name=False,
),
Configure.NamedVectors.none("custom", vector_index_config=Configure.VectorIndex.flat()),
],
)
conf = col.config.get()
conf.name = name2

col2 = client.collections.create_from_config(conf)

conf2 = col2.config.get()
assert conf2.vector_config == conf.vector_config

client.collections.delete([name1, name2])


def test_collection_name_capitalization(
client: weaviate.WeaviateClient, request: SubRequest
) -> None:
Expand Down
6 changes: 4 additions & 2 deletions integration/test_collection_batch.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import uuid
from dataclasses import dataclass
from typing import Generator, Optional, Sequence, Union, Any, Protocol
from typing import Generator, Optional, Union, Any, Protocol

import pytest

Expand All @@ -16,6 +16,8 @@
from weaviate.collections.classes.internal import _CrossReference, ReferenceToMulti
from weaviate.collections.classes.tenants import Tenant

from weaviate.types import VECTORS

UUID = Union[str, uuid.UUID]


Expand Down Expand Up @@ -83,7 +85,7 @@ def _factory(name: str = "", multi_tenancy: bool = False) -> Collection[Any, Any
)
@pytest.mark.parametrize("uuid", [None, UUID1, str(UUID2), UUID3.hex])
def test_add_object(
batch_collection: BatchCollection, uuid: Optional[UUID], vector: Optional[Sequence]
batch_collection: BatchCollection, uuid: Optional[UUID], vector: Optional[VECTORS]
) -> None:
collection = batch_collection()

Expand Down
13 changes: 9 additions & 4 deletions integration/test_collection_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ def test_collection_config_update(collection_factory: CollectionFactory) -> None
stopwords_removals=["the"],
),
replication_config=Reconfigure.replication(factor=2),
vector_index_config=Reconfigure.VectorIndex.hnsw(
vectorizer_config=Reconfigure.VectorIndex.hnsw(
vector_cache_max_objects=2000000,
quantizer=Reconfigure.VectorIndex.Quantizer.pq(
bit_compression=True,
Expand Down Expand Up @@ -405,7 +405,7 @@ def test_collection_config_update(collection_factory: CollectionFactory) -> None
assert config.vector_index_type == VectorIndexType.HNSW

collection.config.update(
vector_index_config=Reconfigure.VectorIndex.hnsw(
vectorizer_config=Reconfigure.VectorIndex.hnsw(
quantizer=Reconfigure.VectorIndex.Quantizer.pq(enabled=False),
)
)
Expand Down Expand Up @@ -449,6 +449,7 @@ def test_hnsw_with_bq(collection_factory: CollectionFactory) -> None:

config = collection.config.get()
assert config.vector_index_type == VectorIndexType.HNSW
assert config.vector_index_config is not None
assert isinstance(config.vector_index_config.quantizer, _BQConfig)


Expand All @@ -462,25 +463,27 @@ def test_update_flat(collection_factory: CollectionFactory) -> None:

config = collection.config.get()
assert config.vector_index_type == VectorIndexType.FLAT
assert config.vector_index_config is not None
assert config.vector_index_config.vector_cache_max_objects == 5
assert isinstance(config.vector_index_config.quantizer, _BQConfig)
assert config.vector_index_config.quantizer.rescore_limit == 10

collection.config.update(
vector_index_config=Reconfigure.VectorIndex.flat(
vectorizer_config=Reconfigure.VectorIndex.flat(
vector_cache_max_objects=10,
quantizer=Reconfigure.VectorIndex.Quantizer.bq(rescore_limit=20),
),
)
config = collection.config.get()
assert config.vector_index_type == VectorIndexType.FLAT
assert config.vector_index_config is not None
assert config.vector_index_config.vector_cache_max_objects == 10
assert isinstance(config.vector_index_config.quantizer, _BQConfig)
assert config.vector_index_config.quantizer.rescore_limit == 20

# Cannot currently disabled BQ after it has been enabled
# collection.config.update(
# vector_index_config=Reconfigure.VectorIndex.flat(
# vectorizer_config=Reconfigure.VectorIndex.flat(
# quantizer=Reconfigure.VectorIndex.Quantizer.bq(enabled=False),
# )
# )
Expand Down Expand Up @@ -567,6 +570,7 @@ def test_config_vector_index_flat_and_quantizer_bq(collection_factory: Collectio

conf = collection.config.get()
assert conf.vector_index_type == VectorIndexType.FLAT
assert conf.vector_index_config is not None
assert conf.vector_index_config.vector_cache_max_objects == 234
assert isinstance(conf.vector_index_config.quantizer, _BQConfig)
assert conf.vector_index_config.quantizer.rescore_limit == 456
Expand All @@ -583,6 +587,7 @@ def test_config_vector_index_hnsw_and_quantizer_pq(collection_factory: Collectio

conf = collection.config.get()
assert conf.vector_index_type == VectorIndexType.HNSW
assert conf.vector_index_config is not None
assert conf.vector_index_config.vector_cache_max_objects == 234
assert isinstance(conf.vector_index_config, _VectorIndexConfigHNSW)
assert conf.vector_index_config.ef_construction == 789
Expand Down
Loading
Loading