Skip to content

Commit

Permalink
Remove dependecies
Browse files Browse the repository at this point in the history
  • Loading branch information
dirkkul committed Dec 4, 2024
1 parent 7fe57d3 commit 4766a39
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 94 deletions.
85 changes: 2 additions & 83 deletions profiling/test_import_and_query.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import time
from typing import List
import uuid
from typing import List

import h5py # type: ignore
from _pytest.fixtures import SubRequest

import weaviate
import weaviate.classes as wvc

from _pytest.fixtures import SubRequest

from weaviate.collections.collection import Collection
from .conftest import get_file_path

Expand Down Expand Up @@ -68,25 +66,6 @@ def load_records_v4(collection: Collection, vectors: List[List[float]]) -> None:
print(f"V4: Finished writing {len(vectors)} records in {time.time()-start}s")


def load_records_v3(client: weaviate.Client, vectors: List[List[float]], name: str) -> None:
start = time.time()

client.batch.configure(batch_size=1000, num_workers=2)

with client.batch as batch:
for i, vector in enumerate(vectors):
data_object = {"i": i}

batch.add_data_object(
data_object=data_object,
vector=vector,
class_name=name,
uuid=uuid.UUID(int=i),
)

print(f"V3: Finished writing {len(vectors)} records in {time.time()-start}s")


def query_v4(
collection: Collection, vectors: List[List[float]], neighbours: List[List[int]], ef: int
) -> None:
Expand All @@ -107,40 +86,6 @@ def query_v4(
)


def query_v3(
collection: Collection,
client: weaviate.Client,
vectors: List[List[float]],
neighbours: List[List[int]],
ef: int,
) -> None:
collection.config.update(vector_index_config=wvc.config.Reconfigure.VectorIndex.hnsw(ef=ef))
start = time.time()
recall = 0.0

for i, vec in enumerate(vectors):
res = (
client.query.get(collection.name, ["i _additional{id}"])
.with_near_vector(
{
"vector": vec,
}
)
.with_limit(LIMIT)
.do()
)
res_ids = [
uuid.UUID(res["_additional"]["id"]).int for res in res["data"]["Get"][collection.name]
]
ideal_neighbors = set(neighbours[i][:LIMIT])

recall += len(ideal_neighbors.intersection(res_ids)) / LIMIT

print(
f"V3: Querying {len(vectors)} records with ef {ef} in {time.time()-start}s with recall {recall/len(vectors)}"
)


def run_v4(file: str, name: str, efc: int, m: int) -> None:
sift_file = get_file_path(file)

Expand All @@ -156,32 +101,6 @@ def run_v4(file: str, name: str, efc: int, m: int) -> None:
query_v4(collection, vectors_test, ideal_neighbors, ef)


def run_v3(file: str, name: str, efc: int, m: int) -> None:
sift_file = get_file_path(file)

f = h5py.File(sift_file)
vectors_import = f["train"]
vectors_test = f["test"]
ideal_neighbors = f["neighbors"]

client = weaviate.Client(url="http://localhost:8080")

# use v4 client to create schema to avoid duplicate code
clientv4 = weaviate.connect_to_local()
collection = create_schema(clientv4, name, efc, m, 1, "l2-squared")
load_records_v3(client, vectors_import, name)
for ef in EF_VALUES:
query_v3(collection, client, vectors_test, ideal_neighbors, ef)


def test_sift_v3(request: SubRequest) -> None:
run_v3(file="sift-128-euclidean.hdf5", name=request.node.name, efc=128, m=32)


def test_dbpedia_v3(request: SubRequest) -> None:
run_v3(file="dbpedia-openai-1000k-angular.hdf5", name=request.node.name, efc=384, m=20)


def test_sift_v4(request: SubRequest) -> None:
run_v4(file="sift-128-euclidean.hdf5", name=request.node.name, efc=128, m=32)

Expand Down
1 change: 0 additions & 1 deletion requirements-devel.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
requests==2.32.3
httpx==0.25.2
validators==0.34.0
authlib==1.3.1
Expand Down
10 changes: 0 additions & 10 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,14 @@ packages =
weaviate
weaviate.connect
weaviate.collections
weaviate.schema
weaviate.schema.properties
weaviate.batch
weaviate.backup
weaviate.classification
weaviate.contextionary
weaviate.data
weaviate.data.references
weaviate.data.replication
weaviate.gql
weaviate.cluster
weaviate.proto
weaviate.proto.v1

platforms = any
include_package_data = True
install_requires =
requests>=2.30.0,<3.0.0
httpx>=0.25.0,<=0.27.0
validators==0.34.0
authlib>=1.2.1,<1.3.2
Expand Down

0 comments on commit 4766a39

Please sign in to comment.