diff --git a/profiling/test_import_and_query.py b/profiling/test_import_and_query.py index 624e15c88..87d84d1c6 100644 --- a/profiling/test_import_and_query.py +++ b/profiling/test_import_and_query.py @@ -1,14 +1,12 @@ import time -from typing import List import uuid +from typing import List import h5py # type: ignore +from _pytest.fixtures import SubRequest import weaviate import weaviate.classes as wvc - -from _pytest.fixtures import SubRequest - from weaviate.collections.collection import Collection from .conftest import get_file_path @@ -68,25 +66,6 @@ def load_records_v4(collection: Collection, vectors: List[List[float]]) -> None: print(f"V4: Finished writing {len(vectors)} records in {time.time()-start}s") -def load_records_v3(client: weaviate.Client, vectors: List[List[float]], name: str) -> None: - start = time.time() - - client.batch.configure(batch_size=1000, num_workers=2) - - with client.batch as batch: - for i, vector in enumerate(vectors): - data_object = {"i": i} - - batch.add_data_object( - data_object=data_object, - vector=vector, - class_name=name, - uuid=uuid.UUID(int=i), - ) - - print(f"V3: Finished writing {len(vectors)} records in {time.time()-start}s") - - def query_v4( collection: Collection, vectors: List[List[float]], neighbours: List[List[int]], ef: int ) -> None: @@ -107,40 +86,6 @@ def query_v4( ) -def query_v3( - collection: Collection, - client: weaviate.Client, - vectors: List[List[float]], - neighbours: List[List[int]], - ef: int, -) -> None: - collection.config.update(vector_index_config=wvc.config.Reconfigure.VectorIndex.hnsw(ef=ef)) - start = time.time() - recall = 0.0 - - for i, vec in enumerate(vectors): - res = ( - client.query.get(collection.name, ["i _additional{id}"]) - .with_near_vector( - { - "vector": vec, - } - ) - .with_limit(LIMIT) - .do() - ) - res_ids = [ - uuid.UUID(res["_additional"]["id"]).int for res in res["data"]["Get"][collection.name] - ] - ideal_neighbors = set(neighbours[i][:LIMIT]) - - recall += len(ideal_neighbors.intersection(res_ids)) / LIMIT - - print( - f"V3: Querying {len(vectors)} records with ef {ef} in {time.time()-start}s with recall {recall/len(vectors)}" - ) - - def run_v4(file: str, name: str, efc: int, m: int) -> None: sift_file = get_file_path(file) @@ -156,32 +101,6 @@ def run_v4(file: str, name: str, efc: int, m: int) -> None: query_v4(collection, vectors_test, ideal_neighbors, ef) -def run_v3(file: str, name: str, efc: int, m: int) -> None: - sift_file = get_file_path(file) - - f = h5py.File(sift_file) - vectors_import = f["train"] - vectors_test = f["test"] - ideal_neighbors = f["neighbors"] - - client = weaviate.Client(url="http://localhost:8080") - - # use v4 client to create schema to avoid duplicate code - clientv4 = weaviate.connect_to_local() - collection = create_schema(clientv4, name, efc, m, 1, "l2-squared") - load_records_v3(client, vectors_import, name) - for ef in EF_VALUES: - query_v3(collection, client, vectors_test, ideal_neighbors, ef) - - -def test_sift_v3(request: SubRequest) -> None: - run_v3(file="sift-128-euclidean.hdf5", name=request.node.name, efc=128, m=32) - - -def test_dbpedia_v3(request: SubRequest) -> None: - run_v3(file="dbpedia-openai-1000k-angular.hdf5", name=request.node.name, efc=384, m=20) - - def test_sift_v4(request: SubRequest) -> None: run_v4(file="sift-128-euclidean.hdf5", name=request.node.name, efc=128, m=32) diff --git a/requirements-devel.txt b/requirements-devel.txt index 6c50db6f6..127cfab68 100644 --- a/requirements-devel.txt +++ b/requirements-devel.txt @@ -1,4 +1,3 @@ -requests==2.32.3 httpx==0.25.2 validators==0.34.0 authlib==1.3.1 diff --git a/setup.cfg b/setup.cfg index 2f00324fe..330504f21 100644 --- a/setup.cfg +++ b/setup.cfg @@ -20,24 +20,14 @@ packages = weaviate weaviate.connect weaviate.collections - weaviate.schema - weaviate.schema.properties - weaviate.batch weaviate.backup - weaviate.classification - weaviate.contextionary - weaviate.data - weaviate.data.references - weaviate.data.replication weaviate.gql - weaviate.cluster weaviate.proto weaviate.proto.v1 platforms = any include_package_data = True install_requires = - requests>=2.30.0,<3.0.0 httpx>=0.25.0,<=0.27.0 validators==0.34.0 authlib>=1.2.1,<1.3.2