Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…n-client into implement-async-client
  • Loading branch information
tsmith023 committed Jul 3, 2024
2 parents 6ed818d + 6b46c0c commit da125a6
Show file tree
Hide file tree
Showing 36 changed files with 1,138 additions and 362 deletions.
3 changes: 2 additions & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[flake8]
max-line-length = 100
exclude = .git, venv, .venv, .pytest_cache, dist, .idea, docs/conf.py, weaviate/collections/orm.py, weaviate/collections/classes/orm.py, weaviate/proto/**/*.py
ignore = D100, D104, D105, D107, E203, E266, E501, E731, W503
ignore = D100, D104, D105, D107, E203, E266, E501, E704, E731, W503
per-file-ignores =
weaviate/cluster/types.py:A005
weaviate/collections/classes/types.py:A005
Expand All @@ -14,4 +14,5 @@ per-file-ignores =
# D104: Missing docstring in public package
# D105: Missing docstring in magic method
# D107: Missing docstring in __init__
# E704: Multiple statements on one line (def)

12 changes: 6 additions & 6 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
cache: 'pip' # caching pip dependencies
- run: pip install -r requirements-devel.txt
- name: Run mypy
run: mypy --warn-unused-ignores --python-version ${{matrix.version}} ${{ matrix.folder }}
run: mypy --config-file ./pyproject.toml --warn-unused-ignores --python-version ${{matrix.version}} ${{ matrix.folder }}
- uses: jakebailey/pyright-action@v2
with:
version: 1.1.347
Expand Down Expand Up @@ -321,17 +321,17 @@ jobs:
$WEAVIATE_126
]
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Download build artifact to append to release
uses: actions/download-artifact@v4
with:
name: weaviate-python-client-wheel
- run: |
pip install weaviate_client-*.whl
pip install pytest pytest-asyncio pytest-benchmark pytest-profiling grpcio grpcio-tools pytest-xdist
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
pip install -r requirements-devel.txt # install test dependencies
- name: free space
run: sudo rm -rf /usr/local/lib/android
- run: rm -r weaviate
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ repos:
- id: black

- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.6.0
hooks:
- id: no-commit-to-branch
- id: trailing-whitespace
Expand All @@ -19,7 +19,7 @@ repos:


- repo: https://github.com/PyCQA/flake8
rev: 6.1.0
rev: 7.1.0
hooks:
- id: flake8
name: linting
Expand Down
30 changes: 30 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,23 @@
Changelog
=========

Version 4.6.6
--------------

This patch version includes:

- Log batch errors
- Only the last 100k successfully added UUIDs are kept in memory to prevent OOM situations.
- Fix tenant creation with string input

In the v3 copy that is part of v4:

- Fixes GraphQL query injection vulnerability caused by incorrect escaping of backslashes in plain text input builder methods. Many thanks to `@adamleko <https://github.com/adamleko>`_, `@bismuthsalamander <https://github.com/bismuthsalamander>`_, and `@tardigrade-9 <https://github.com/tardigrade-9>`_ for their help in fixing this issue
- Fixes batch retry with tenants




Version 4.6.5
--------------

Expand Down Expand Up @@ -529,6 +546,19 @@ This beta version includes:
- No more builder methods or raw dictionaries
- Join the discussion and contribute your feedback `here <https://forum.weaviate.io/t/python-v4-client-feedback-megathread/892>`_

Version 3.26.5
--------------
This patch version includes

- Fixes GraphQL query injection vulnerability caused by incorrect escaping of backslashes in plain text input builder methods
- Many thanks to `@adamleko <https://github.com/adamleko>`_, `@bismuthsalamander <https://github.com/bismuthsalamander>`_, and `@tardigrade-9 <https://github.com/tardigrade-9>`_ for their help in fixing this issue

Version 3.26.4
--------------
This patch version includes

- Fixes batch retry with tenants

Version 3.26.2
--------------
This patch version includes
Expand Down
104 changes: 1 addition & 103 deletions integration/test_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import pytest

import weaviate.classes as wvc
from integration.conftest import CollectionFactory, CollectionFactoryGet, _sanitize_collection_name
from integration.constants import WEAVIATE_LOGO_OLD_ENCODED, WEAVIATE_LOGO_NEW_ENCODED
from weaviate.collections.classes.batch import ErrorObject
Expand Down Expand Up @@ -51,8 +52,6 @@
)
from weaviate.types import UUID, UUIDS

import weaviate.classes as wvc

UUID1 = uuid.UUID("806827e0-2b31-43ca-9269-24fa95a221f9")
UUID2 = uuid.UUID("8ad0d33c-8db1-4437-87f3-72161ca2a51a")
UUID3 = uuid.UUID("83d99755-9deb-4b16-8431-d1dff4ab0a75")
Expand Down Expand Up @@ -863,107 +862,6 @@ def test_query_properties(collection_factory: CollectionFactory) -> None:
assert len(objects) == 0


def test_near_vector(collection_factory: CollectionFactory) -> None:
collection = collection_factory(
properties=[Property(name="Name", data_type=DataType.TEXT)],
vectorizer_config=Configure.Vectorizer.text2vec_contextionary(
vectorize_collection_name=False
),
)
uuid_banana = collection.data.insert({"Name": "Banana"})
collection.data.insert({"Name": "Fruit"})
collection.data.insert({"Name": "car"})
collection.data.insert({"Name": "Mountain"})

banana = collection.query.fetch_object_by_id(uuid_banana, include_vector=True)

full_objects = collection.query.near_vector(
banana.vector["default"], return_metadata=MetadataQuery(distance=True, certainty=True)
).objects
assert len(full_objects) == 4

objects_distance = collection.query.near_vector(
banana.vector["default"], distance=full_objects[2].metadata.distance
).objects
assert len(objects_distance) == 3

objects_distance = collection.query.near_vector(
banana.vector["default"], certainty=full_objects[2].metadata.certainty
).objects
assert len(objects_distance) == 3


def test_near_vector_limit(collection_factory: CollectionFactory) -> None:
collection = collection_factory(
properties=[Property(name="Name", data_type=DataType.TEXT)],
vectorizer_config=Configure.Vectorizer.text2vec_contextionary(
vectorize_collection_name=False
),
)
uuid_banana = collection.data.insert({"Name": "Banana"})
collection.data.insert({"Name": "Fruit"})
collection.data.insert({"Name": "car"})
collection.data.insert({"Name": "Mountain"})

banana = collection.query.fetch_object_by_id(uuid_banana, include_vector=True)

objs = collection.query.near_vector(banana.vector["default"], limit=2).objects
assert len(objs) == 2


def test_near_vector_offset(collection_factory: CollectionFactory) -> None:
collection = collection_factory(
properties=[Property(name="Name", data_type=DataType.TEXT)],
vectorizer_config=Configure.Vectorizer.text2vec_contextionary(
vectorize_collection_name=False
),
)
uuid_banana = collection.data.insert({"Name": "Banana"})
uuid_fruit = collection.data.insert({"Name": "Fruit"})
collection.data.insert({"Name": "car"})
collection.data.insert({"Name": "Mountain"})

banana = collection.query.fetch_object_by_id(uuid_banana, include_vector=True)

objs = collection.query.near_vector(banana.vector["default"], offset=1).objects
assert len(objs) == 3
assert objs[0].uuid == uuid_fruit


def test_near_vector_group_by_argument(collection_factory: CollectionFactory) -> None:
collection = collection_factory(
properties=[
Property(name="Name", data_type=DataType.TEXT),
Property(name="Count", data_type=DataType.INT),
],
vectorizer_config=Configure.Vectorizer.text2vec_contextionary(
vectorize_collection_name=False
),
)
uuid_banana1 = collection.data.insert({"Name": "Banana", "Count": 51})
collection.data.insert({"Name": "Banana", "Count": 72})
collection.data.insert({"Name": "car", "Count": 12})
collection.data.insert({"Name": "Mountain", "Count": 1})

banana1 = collection.query.fetch_object_by_id(uuid_banana1, include_vector=True)

ret = collection.query.near_vector(
banana1.vector["default"],
group_by=GroupBy(
prop="name",
number_of_groups=4,
objects_per_group=10,
),
return_metadata=MetadataQuery(distance=True, certainty=True),
)

assert len(ret.objects) == 4
assert ret.objects[0].belongs_to_group == "Banana"
assert ret.objects[1].belongs_to_group == "Banana"
assert ret.objects[2].belongs_to_group == "car"
assert ret.objects[3].belongs_to_group == "Mountain"


def test_near_object(collection_factory: CollectionFactory) -> None:
collection = collection_factory(
properties=[Property(name="Name", data_type=DataType.TEXT)],
Expand Down
134 changes: 133 additions & 1 deletion integration/test_collection_config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Generator

import pytest
import pytest as pytest
from _pytest.fixtures import SubRequest

import weaviate
from integration.conftest import OpenAICollection, CollectionFactory
Expand Down Expand Up @@ -898,3 +899,134 @@ def test_dynamic_collection(collection_factory: CollectionFactory) -> None:
assert config.vector_index_config.flat.vector_cache_max_objects == 9876
assert isinstance(config.vector_index_config.flat.quantizer, _BQConfig)
assert config.vector_index_config.flat.quantizer.rescore_limit == 11


def test_config_unknown_module(request: SubRequest) -> None:
with weaviate.connect_to_local() as client:
collection_name = _sanitize_collection_name(request.node.name)
client.collections.delete(name=collection_name)
collection = client.collections.create_from_dict(
{
"class": collection_name,
"vectorizer": "none",
"moduleConfig": {"generative-dummy": {}, "reranker-dummy": {}},
"properties": [
{"name": "prop", "dataType": ["text"]},
],
}
)
config = collection.config.get()
assert config.generative_config is not None
assert isinstance(config.generative_config.generative, str)
assert config.generative_config.generative == "generative-dummy"

assert config.reranker_config is not None
assert isinstance(config.reranker_config.reranker, str)
assert config.reranker_config.reranker == "reranker-dummy"

client.collections.delete(name=collection_name)

collection2 = client.collections.create_from_config(config)
config2 = collection2.config.get()
assert config == config2
assert config2.generative_config is not None
assert isinstance(config2.generative_config.generative, str)
assert config2.generative_config.generative == "generative-dummy"

assert config2.reranker_config is not None
assert isinstance(config2.reranker_config.reranker, str)
assert config2.reranker_config.reranker == "reranker-dummy"

client.collections.delete(name=collection_name)


def test_create_custom_module(collection_factory: CollectionFactory) -> None:
collection = collection_factory(
generative_config=Configure.Generative.custom(
"generative-anyscale", module_config={"temperature": 0.5}
)
)
config = collection.config.get()

collection2 = collection_factory(
generative_config=Configure.Generative.anyscale(temperature=0.5)
)
config2 = collection2.config.get()

assert config.generative_config == config2.generative_config
assert isinstance(config.generative_config.generative, str)
assert config.generative_config.generative == "generative-anyscale"
assert config.generative_config.model == {"temperature": 0.5}


def test_create_custom_reranker(collection_factory: CollectionFactory) -> None:
collection = collection_factory(
reranker_config=Configure.Reranker.custom(
"reranker-cohere", module_config={"model": "rerank-english-v2.0"}
)
)
config = collection.config.get()

collection2 = collection_factory(
reranker_config=Configure.Reranker.cohere(model="rerank-english-v2.0")
)
config2 = collection2.config.get()

assert config.reranker_config == config2.reranker_config
assert isinstance(config.reranker_config.reranker, str)
assert config.reranker_config.reranker == "reranker-cohere"
assert config.reranker_config.model == {"model": "rerank-english-v2.0"}


def test_create_custom_vectorizer(collection_factory: CollectionFactory) -> None:
collection = collection_factory(
properties=[Property(name="text", data_type=DataType.TEXT)],
vectorizer_config=Configure.Vectorizer.custom(
"text2vec-contextionary", module_config={"vectorizeClassName": False}
),
)
config = collection.config.get()

collection2 = collection_factory(
properties=[Property(name="text", data_type=DataType.TEXT)],
vectorizer_config=Configure.Vectorizer.text2vec_contextionary(
vectorize_collection_name=False
),
)
config2 = collection2.config.get()

assert config.vectorizer_config == config2.vectorizer_config
assert isinstance(config.vectorizer_config.vectorizer, str)
assert config.vectorizer_config.vectorizer == "text2vec-contextionary"
assert not config.vectorizer_config.vectorize_collection_name


def test_create_custom_vectorizer_named(collection_factory: CollectionFactory) -> None:
collection_dummy = collection_factory("dummy")
if collection_dummy._connection._weaviate_version.is_lower_than(1, 24, 0):
pytest.skip("Named index is not supported in Weaviate versions lower than 1.24.0")

collection = collection_factory(
properties=[Property(name="text", data_type=DataType.TEXT)],
vectorizer_config=[
Configure.NamedVectors.custom(
"name",
module_name="text2vec-contextionary",
module_config={"vectorizeClassName": False},
)
],
)
config = collection.config.get()

collection2 = collection_factory(
properties=[Property(name="text", data_type=DataType.TEXT)],
vectorizer_config=[
Configure.NamedVectors.text2vec_contextionary("name", vectorize_collection_name=False)
],
)
config2 = collection2.config.get()

assert config.vector_config == config2.vector_config
assert len(config.vector_config) == 1
assert config.vector_config["name"].vectorizer.vectorizer == "text2vec-contextionary"
assert config.vector_config["name"].vectorizer.model == {"vectorizeClassName": False}
Loading

0 comments on commit da125a6

Please sign in to comment.