From fbee7ddb682d20ad72d6c9d9690bffd88c046e66 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 10 Jan 2025 12:21:26 +0000 Subject: [PATCH 1/8] Add support for debug get object endpoint: - Introduce `debug` namespace at the client-level - Add `get_object` method to it that hits `/object/{className}/{id}` --- integration/test_client_debug.py | 36 ++++++++ profiling/httpx.py | 34 +++++++ profiling/test_batch.py | 149 +++++++++++++++++++++++++++++++ weaviate/client.py | 10 +++ weaviate/client.pyi | 9 +- weaviate/debug/__init__.py | 7 ++ weaviate/debug/debug.py | 46 ++++++++++ weaviate/debug/sync.py | 7 ++ weaviate/debug/sync.pyi | 17 ++++ weaviate/debug/types.py | 17 ++++ 10 files changed, 329 insertions(+), 3 deletions(-) create mode 100644 integration/test_client_debug.py create mode 100644 profiling/httpx.py create mode 100644 profiling/test_batch.py create mode 100644 weaviate/debug/__init__.py create mode 100644 weaviate/debug/debug.py create mode 100644 weaviate/debug/sync.py create mode 100644 weaviate/debug/sync.pyi create mode 100644 weaviate/debug/types.py diff --git a/integration/test_client_debug.py b/integration/test_client_debug.py new file mode 100644 index 000000000..830b89196 --- /dev/null +++ b/integration/test_client_debug.py @@ -0,0 +1,36 @@ +from integration.conftest import ClientFactory, CollectionFactory + +from weaviate.classes.config import DataType, Property + + +def test_get_object_single_node( + client_factory: ClientFactory, collection_factory: CollectionFactory +) -> None: + client = client_factory() + collection = collection_factory(properties=[Property(name="name", data_type=DataType.TEXT)]) + + uuid = collection.data.insert({"name": "John Doe"}) + + debug_obj = client.debug.get_object(collection.name, uuid) + assert debug_obj is not None + assert str(debug_obj.uuid) == str(uuid) + + non_existant_uuid = "00000000-0000-0000-0000-000000000000" + debug_obj = client.debug.get_object(collection.name, non_existant_uuid) + assert debug_obj is None + + +def test_get_object_multi_node( + client_factory: ClientFactory, collection_factory: CollectionFactory +) -> None: + client = client_factory(ports=(8087, 50058)) + collection = collection_factory( + ports=(8087, 50058), properties=[Property(name="name", data_type=DataType.TEXT)] + ) + + uuid = collection.data.insert({"name": "John Doe"}) + + for nodename in ["node1", "node2", "node3"]: + debug_obj = client.debug.get_object(collection.name, uuid, nodename=nodename) + assert debug_obj is not None + assert str(debug_obj.uuid) == str(uuid) diff --git a/profiling/httpx.py b/profiling/httpx.py new file mode 100644 index 000000000..5715a7bd1 --- /dev/null +++ b/profiling/httpx.py @@ -0,0 +1,34 @@ +import gc +import httpx +import objgraph +from collections import deque + + +def sync() -> None: + with httpx.Client() as client: + client.get("https://www.google.com") + + +# async def async_() -> None: +# async with httpx.AsyncClient() as client: +# await client.get("https://www.google.com") +# gc.collect() +# print("Uncollectible Garbage: ", len(gc.garbage)) + +if __name__ == "__main__": + gc.set_debug(gc.DEBUG_SAVEALL) + sync() + gc.collect() + print("Uncollectible Garbage: ", len(gc.garbage)) + for idx, obj in enumerate(gc.garbage): + print(f"Uncollectable: {obj.__class__}\n{obj}") + if obj.__class__ in [dict, list, tuple, deque, set]: + continue + objgraph.show_chain( + objgraph.find_backref_chain(obj, objgraph.is_proper_module), + filename=f"objgraphs-bw/refs_{idx}.png", + ) + objgraph.show_refs( + objgraph.find_ref_chain(obj, objgraph.is_proper_module), + filename=f"objgraphs-fw/refs_{idx}.png", + ) diff --git a/profiling/test_batch.py b/profiling/test_batch.py new file mode 100644 index 000000000..f977b72ae --- /dev/null +++ b/profiling/test_batch.py @@ -0,0 +1,149 @@ +# run: +# - profiling: pytest -m profiling profiling/test_batch.py --profile-svg +# - benchmark: pytest profiling/test_profiling.py --benchmark-only --benchmark-disable-gc +import gc +import linecache +import tracemalloc + +import objgraph +import weaviate + +from pympler import asizeof +from numpy import random +from tqdm import tqdm +from weaviate.classes.config import DataType, Property +from weaviate.collections import Collection + +HOW_MANY = 200000 + + +def random_str() -> str: + return "".join([chr(random.randint(97, 123)) for _ in range(10)]) + + +def make_collection(client: weaviate.WeaviateClient, name: str) -> Collection: + client.collections.delete(name) + return client.collections.create( + name=name, + properties=[ + Property(name="a", data_type=DataType.TEXT), + Property(name="b", data_type=DataType.TEXT), + Property(name="c", data_type=DataType.TEXT), + Property(name="d", data_type=DataType.TEXT), + Property(name="e", data_type=DataType.TEXT), + ], + ) + + +def ingest_fakes(collection_src: Collection) -> None: + with collection_src.batch.dynamic() as batch: + for i in range(HOW_MANY): + if i % 10000 == 0: + print(f"Ingested {i} objects") + print(f"There are {len(gc.garbage)} objects that cannot be collected") + # for obj in gc.garbage: + # print(f"Uncollectable: {obj}") + batch.add_object( + properties={ + "a": random_str(), + "b": random_str(), + "c": random_str(), + "d": random_str(), + "e": random_str(), + }, + ) + + +def migrate_data_matt(collection_src: Collection, collection_tgt: Collection): + with collection_tgt.batch.dynamic() as batch: + i = 1 + for q in tqdm(collection_src.iterator(include_vector=False)): + if i % 10000 == 0: + print(f"Migrated {i} objects") + print(f"There are {len(gc.garbage)} objects that cannot be collected") + # for obj in gc.garbage: + # print(f"Uncollectable: {obj}") + if i > HOW_MANY: + break + + source_uuid = str(q.uuid) + + # Check if object exists in target collection + try: + obj = collection_tgt.query.fetch_object_by_id(uuid=source_uuid) + if obj is not None: + continue + except Exception as e: + print(f"Error fetching object by ID: {e}") + continue + + # Insert the new object + try: + batch.add_object(properties=q.properties, uuid=source_uuid) + except Exception as e: + print(f"Error adding object to batch: {e}") + continue + + i += 1 + return i + + +def display_top(snapshot, key_type="lineno", limit=10): + snapshot = snapshot.filter_traces( + ( + tracemalloc.Filter(False, ""), + tracemalloc.Filter(False, ""), + ) + ) + top_stats = snapshot.statistics(key_type) + + print("Top %s lines" % limit) + for index, stat in enumerate(top_stats[:limit], 1): + frame = stat.traceback[0] + print("#%s: %s:%s: %.1f KiB" % (index, frame.filename, frame.lineno, stat.size / 1024)) + line = linecache.getline(frame.filename, frame.lineno).strip() + if line: + print(" %s" % line) + + other = top_stats[limit:] + if other: + size = sum(stat.size for stat in other) + print("%s other: %.1f KiB" % (len(other), size / 1024)) + total = sum(stat.size for stat in top_stats) + print("Total allocated size: %.1f KiB" % (total / 1024)) + + +def main() -> None: + tracemalloc.start(10) + objgraph.show_growth() + gc.set_debug(gc.DEBUG_SAVEALL) + with weaviate.connect_to_local() as _: + # src = make_collection(client, 'src') + # tgt = make_collection(client, 'tgt') + # ingest_fakes(src) + # print(migrate_data_matt(src, tgt)) + pass + gc.collect() + print( + f"There are {len(gc.garbage)} objects that cannot be collected. Their total memory footprint is {asizeof.asizeof(gc.garbage)} bytes" + ) + # for idx, obj in enumerate(gc.garbage): + # print(f"Uncollectable: {obj.__class__}\n{obj}") + # if obj.__class__ in [dict, list, tuple, deque, set]: + # continue + # objgraph.show_chain( + # objgraph.find_backref_chain(obj, objgraph.is_proper_module), + # filename=f"objgraphs-bw/refs_{idx}.png" + # ) + # objgraph.show_refs( + # objgraph.find_ref_chain(obj, objgraph.is_proper_module), + # filename=f"objgraphs-fw/refs_{idx}.png" + # ) + objgraph.show_most_common_types() + objgraph.show_growth() + snapshot = tracemalloc.take_snapshot() + display_top(snapshot) + + +if __name__ == "__main__": + main() diff --git a/weaviate/client.py b/weaviate/client.py index c5f92fc46..16fe042bc 100644 --- a/weaviate/client.py +++ b/weaviate/client.py @@ -21,6 +21,7 @@ from .connect.base import ( ConnectionParams, ) +from .debug import _Debug, _DebugAsync from .embedded import EmbeddedOptions from .rbac import _RolesAsync, _Roles from .types import NUMBER @@ -87,7 +88,12 @@ def __init__( Use it to retrieve collection objects using `client.collections.get("MyCollection")` or to create new collections using `client.collections.create("MyCollection", ...)`. """ + self.debug = _Debug(self._connection) + """This namespace contains functionality used to debug Weaviate clusters. As such, it is deemed experimental and is subject to change. + + We can make no guarantees about the stability of this namespace nor the potential for future breaking changes. Use at your own risk.""" self.roles = _Roles(self._connection) + """This namespace contains all functionality to manage Weaviate's RBAC functionality.""" def __enter__(self) -> "WeaviateClient": self.connect() # pyright: ignore # gets patched by syncify.convert to be sync @@ -146,6 +152,10 @@ def __init__( Use it to retrieve collection objects using `client.collections.get("MyCollection")` or to create new collections using `await client.collections.create("MyCollection", ...)`. """ + self.debug = _DebugAsync(self._connection) + """This namespace contains functionality used to debug Weaviate clusters. As such, it is deemed experimental and is subject to change. + + We can make no guarantees about the stability of this namespace nor the potential for future breaking changes. Use at your own risk.""" self.roles = _RolesAsync(self._connection) """This namespace contains all functionality to manage Weaviate's RBAC functionality.""" diff --git a/weaviate/client.pyi b/weaviate/client.pyi index f95268c58..dbec6ae2f 100644 --- a/weaviate/client.pyi +++ b/weaviate/client.pyi @@ -12,6 +12,7 @@ from weaviate.collections.collections.sync import _Collections from .collections.batch.client import _BatchClientWrapper from .collections.cluster import _Cluster, _ClusterAsync from .connect import ConnectionV4 +from .debug import _Debug, _DebugAsync from .rbac import _Roles, _RolesAsync from .types import NUMBER @@ -25,9 +26,10 @@ from weaviate.client_base import _WeaviateClientInit class WeaviateAsyncClient(_WeaviateClientInit): _connection: ConnectionV4 - collections: _CollectionsAsync backup: _BackupAsync + collections: _CollectionsAsync cluster: _ClusterAsync + debug: _DebugAsync roles: _RolesAsync async def close(self) -> None: ... async def connect(self) -> None: ... @@ -42,10 +44,11 @@ class WeaviateAsyncClient(_WeaviateClientInit): class WeaviateClient(_WeaviateClientInit): _connection: ConnectionV4 - collections: _Collections - batch: _BatchClientWrapper backup: _Backup + batch: _BatchClientWrapper + collections: _Collections cluster: _Cluster + debug: _Debug roles: _Roles def close(self) -> None: ... def connect(self) -> None: ... diff --git a/weaviate/debug/__init__.py b/weaviate/debug/__init__.py new file mode 100644 index 000000000..cd35503a0 --- /dev/null +++ b/weaviate/debug/__init__.py @@ -0,0 +1,7 @@ +from .debug import _DebugAsync +from .sync import _Debug + +__all__ = [ + "_Debug", + "_DebugAsync", +] diff --git a/weaviate/debug/debug.py b/weaviate/debug/debug.py new file mode 100644 index 000000000..45a262072 --- /dev/null +++ b/weaviate/debug/debug.py @@ -0,0 +1,46 @@ +from typing import Dict, Optional + +from weaviate.classes.config import ConsistencyLevel +from weaviate.connect import ConnectionV4 +from weaviate.connect.v4 import _ExpectedStatusCodes +from weaviate.debug.types import DebugObject +from weaviate.types import UUID + + +class _DebugBase: + def __init__( + self, + connection: ConnectionV4, + ) -> None: + self._connection = connection + + +class _DebugAsync(_DebugBase): + async def get_object( + self, + collection: str, + uuid: UUID, + *, + consistency_level: Optional[ConsistencyLevel] = None, + nodename: Optional[str] = None, + tenant: Optional[str] = None, + ) -> Optional[DebugObject]: + path = f"/objects/{collection}/{str(uuid)}" + + params: Dict[str, str] = {} + if consistency_level is not None: + params["consistency"] = consistency_level.value + if nodename is not None: + params["nodename"] = nodename + if tenant is not None: + params["tenant"] = tenant + + res = await self._connection.get( + path=path, + params=params, + error_msg="Object was not retrieved", + status_codes=_ExpectedStatusCodes(ok_in=[200, 404], error="get object"), + ) + if res.status_code == 404: + return None + return DebugObject(**res.json()) diff --git a/weaviate/debug/sync.py b/weaviate/debug/sync.py new file mode 100644 index 000000000..736186fd3 --- /dev/null +++ b/weaviate/debug/sync.py @@ -0,0 +1,7 @@ +from weaviate import syncify +from weaviate.debug.debug import _DebugAsync + + +@syncify.convert +class _Debug(_DebugAsync): + pass diff --git a/weaviate/debug/sync.pyi b/weaviate/debug/sync.pyi new file mode 100644 index 000000000..73f58d726 --- /dev/null +++ b/weaviate/debug/sync.pyi @@ -0,0 +1,17 @@ +from typing import Optional + +from weaviate.classes.config import ConsistencyLevel +from weaviate.debug.debug import _DebugBase +from weaviate.debug.types import DebugObject +from weaviate.types import UUID + +class _Debug(_DebugBase): + def get_object( + self, + collection: str, + uuid: UUID, + *, + consistency_level: Optional[ConsistencyLevel] = None, + nodename: Optional[str] = None, + tenant: Optional[str] = None, + ) -> Optional[DebugObject]: ... diff --git a/weaviate/debug/types.py b/weaviate/debug/types.py new file mode 100644 index 000000000..33a604584 --- /dev/null +++ b/weaviate/debug/types.py @@ -0,0 +1,17 @@ +from datetime import datetime +from typing import Any, Dict, Optional + +from pydantic import BaseModel, Field + +from weaviate.types import uuid_package + + +class DebugObject(BaseModel): + collection: str = Field(..., alias="class") + creation_time: datetime = Field(..., alias="creationTimeUnix") + last_update_time: datetime = Field(..., alias="lastUpdateTimeUnix") + properties: Dict[str, Any] = Field(...) + tenant: Optional[str] = Field(None) + uuid: uuid_package.UUID = Field(..., alias="id") + vector: Optional[list[float]] = Field(None) + vectors: Optional[Dict[str, list[float]]] = Field(None) From 016ee08b0a4bb44d5a5d0364a53143de3d4e846e Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 10 Jan 2025 12:23:41 +0000 Subject: [PATCH 2/8] Remove mistakenly commited files --- profiling/httpx.py | 34 --------- profiling/test_batch.py | 149 ---------------------------------------- 2 files changed, 183 deletions(-) delete mode 100644 profiling/httpx.py delete mode 100644 profiling/test_batch.py diff --git a/profiling/httpx.py b/profiling/httpx.py deleted file mode 100644 index 5715a7bd1..000000000 --- a/profiling/httpx.py +++ /dev/null @@ -1,34 +0,0 @@ -import gc -import httpx -import objgraph -from collections import deque - - -def sync() -> None: - with httpx.Client() as client: - client.get("https://www.google.com") - - -# async def async_() -> None: -# async with httpx.AsyncClient() as client: -# await client.get("https://www.google.com") -# gc.collect() -# print("Uncollectible Garbage: ", len(gc.garbage)) - -if __name__ == "__main__": - gc.set_debug(gc.DEBUG_SAVEALL) - sync() - gc.collect() - print("Uncollectible Garbage: ", len(gc.garbage)) - for idx, obj in enumerate(gc.garbage): - print(f"Uncollectable: {obj.__class__}\n{obj}") - if obj.__class__ in [dict, list, tuple, deque, set]: - continue - objgraph.show_chain( - objgraph.find_backref_chain(obj, objgraph.is_proper_module), - filename=f"objgraphs-bw/refs_{idx}.png", - ) - objgraph.show_refs( - objgraph.find_ref_chain(obj, objgraph.is_proper_module), - filename=f"objgraphs-fw/refs_{idx}.png", - ) diff --git a/profiling/test_batch.py b/profiling/test_batch.py deleted file mode 100644 index f977b72ae..000000000 --- a/profiling/test_batch.py +++ /dev/null @@ -1,149 +0,0 @@ -# run: -# - profiling: pytest -m profiling profiling/test_batch.py --profile-svg -# - benchmark: pytest profiling/test_profiling.py --benchmark-only --benchmark-disable-gc -import gc -import linecache -import tracemalloc - -import objgraph -import weaviate - -from pympler import asizeof -from numpy import random -from tqdm import tqdm -from weaviate.classes.config import DataType, Property -from weaviate.collections import Collection - -HOW_MANY = 200000 - - -def random_str() -> str: - return "".join([chr(random.randint(97, 123)) for _ in range(10)]) - - -def make_collection(client: weaviate.WeaviateClient, name: str) -> Collection: - client.collections.delete(name) - return client.collections.create( - name=name, - properties=[ - Property(name="a", data_type=DataType.TEXT), - Property(name="b", data_type=DataType.TEXT), - Property(name="c", data_type=DataType.TEXT), - Property(name="d", data_type=DataType.TEXT), - Property(name="e", data_type=DataType.TEXT), - ], - ) - - -def ingest_fakes(collection_src: Collection) -> None: - with collection_src.batch.dynamic() as batch: - for i in range(HOW_MANY): - if i % 10000 == 0: - print(f"Ingested {i} objects") - print(f"There are {len(gc.garbage)} objects that cannot be collected") - # for obj in gc.garbage: - # print(f"Uncollectable: {obj}") - batch.add_object( - properties={ - "a": random_str(), - "b": random_str(), - "c": random_str(), - "d": random_str(), - "e": random_str(), - }, - ) - - -def migrate_data_matt(collection_src: Collection, collection_tgt: Collection): - with collection_tgt.batch.dynamic() as batch: - i = 1 - for q in tqdm(collection_src.iterator(include_vector=False)): - if i % 10000 == 0: - print(f"Migrated {i} objects") - print(f"There are {len(gc.garbage)} objects that cannot be collected") - # for obj in gc.garbage: - # print(f"Uncollectable: {obj}") - if i > HOW_MANY: - break - - source_uuid = str(q.uuid) - - # Check if object exists in target collection - try: - obj = collection_tgt.query.fetch_object_by_id(uuid=source_uuid) - if obj is not None: - continue - except Exception as e: - print(f"Error fetching object by ID: {e}") - continue - - # Insert the new object - try: - batch.add_object(properties=q.properties, uuid=source_uuid) - except Exception as e: - print(f"Error adding object to batch: {e}") - continue - - i += 1 - return i - - -def display_top(snapshot, key_type="lineno", limit=10): - snapshot = snapshot.filter_traces( - ( - tracemalloc.Filter(False, ""), - tracemalloc.Filter(False, ""), - ) - ) - top_stats = snapshot.statistics(key_type) - - print("Top %s lines" % limit) - for index, stat in enumerate(top_stats[:limit], 1): - frame = stat.traceback[0] - print("#%s: %s:%s: %.1f KiB" % (index, frame.filename, frame.lineno, stat.size / 1024)) - line = linecache.getline(frame.filename, frame.lineno).strip() - if line: - print(" %s" % line) - - other = top_stats[limit:] - if other: - size = sum(stat.size for stat in other) - print("%s other: %.1f KiB" % (len(other), size / 1024)) - total = sum(stat.size for stat in top_stats) - print("Total allocated size: %.1f KiB" % (total / 1024)) - - -def main() -> None: - tracemalloc.start(10) - objgraph.show_growth() - gc.set_debug(gc.DEBUG_SAVEALL) - with weaviate.connect_to_local() as _: - # src = make_collection(client, 'src') - # tgt = make_collection(client, 'tgt') - # ingest_fakes(src) - # print(migrate_data_matt(src, tgt)) - pass - gc.collect() - print( - f"There are {len(gc.garbage)} objects that cannot be collected. Their total memory footprint is {asizeof.asizeof(gc.garbage)} bytes" - ) - # for idx, obj in enumerate(gc.garbage): - # print(f"Uncollectable: {obj.__class__}\n{obj}") - # if obj.__class__ in [dict, list, tuple, deque, set]: - # continue - # objgraph.show_chain( - # objgraph.find_backref_chain(obj, objgraph.is_proper_module), - # filename=f"objgraphs-bw/refs_{idx}.png" - # ) - # objgraph.show_refs( - # objgraph.find_ref_chain(obj, objgraph.is_proper_module), - # filename=f"objgraphs-fw/refs_{idx}.png" - # ) - objgraph.show_most_common_types() - objgraph.show_growth() - snapshot = tracemalloc.take_snapshot() - display_top(snapshot) - - -if __name__ == "__main__": - main() From 00e6bab73f09ce15d4cda76e96344d44358c7a8a Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 10 Jan 2025 12:25:38 +0000 Subject: [PATCH 3/8] Ensure DebugObject is correctly exported from the lib --- integration/test_client_debug.py | 2 ++ weaviate/classes/debug.py | 5 +++++ 2 files changed, 7 insertions(+) create mode 100644 weaviate/classes/debug.py diff --git a/integration/test_client_debug.py b/integration/test_client_debug.py index 830b89196..bf9facfb2 100644 --- a/integration/test_client_debug.py +++ b/integration/test_client_debug.py @@ -1,6 +1,7 @@ from integration.conftest import ClientFactory, CollectionFactory from weaviate.classes.config import DataType, Property +from weaviate.classes.debug import DebugObject def test_get_object_single_node( @@ -13,6 +14,7 @@ def test_get_object_single_node( debug_obj = client.debug.get_object(collection.name, uuid) assert debug_obj is not None + assert isinstance(debug_obj, DebugObject) assert str(debug_obj.uuid) == str(uuid) non_existant_uuid = "00000000-0000-0000-0000-000000000000" diff --git a/weaviate/classes/debug.py b/weaviate/classes/debug.py new file mode 100644 index 000000000..f478e09b5 --- /dev/null +++ b/weaviate/classes/debug.py @@ -0,0 +1,5 @@ +from weaviate.debug.types import DebugObject + +__all__ = [ + "DebugObject", +] From b66220e967cfb29e084caa7bc6113f25342a748e Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 10 Jan 2025 12:33:39 +0000 Subject: [PATCH 4/8] Respond to review comments --- integration/test_client_debug.py | 4 ++-- weaviate/classes/debug.py | 4 ++-- weaviate/debug/debug.py | 13 +++++++++---- weaviate/debug/sync.pyi | 4 ++-- weaviate/debug/types.py | 2 +- 5 files changed, 16 insertions(+), 11 deletions(-) diff --git a/integration/test_client_debug.py b/integration/test_client_debug.py index bf9facfb2..150905a46 100644 --- a/integration/test_client_debug.py +++ b/integration/test_client_debug.py @@ -1,7 +1,7 @@ from integration.conftest import ClientFactory, CollectionFactory from weaviate.classes.config import DataType, Property -from weaviate.classes.debug import DebugObject +from weaviate.classes.debug import DebugRESTObject def test_get_object_single_node( @@ -14,7 +14,7 @@ def test_get_object_single_node( debug_obj = client.debug.get_object(collection.name, uuid) assert debug_obj is not None - assert isinstance(debug_obj, DebugObject) + assert isinstance(debug_obj, DebugRESTObject) assert str(debug_obj.uuid) == str(uuid) non_existant_uuid = "00000000-0000-0000-0000-000000000000" diff --git a/weaviate/classes/debug.py b/weaviate/classes/debug.py index f478e09b5..0f299d8d4 100644 --- a/weaviate/classes/debug.py +++ b/weaviate/classes/debug.py @@ -1,5 +1,5 @@ -from weaviate.debug.types import DebugObject +from weaviate.debug.types import DebugRESTObject __all__ = [ - "DebugObject", + "DebugRESTObject", ] diff --git a/weaviate/debug/debug.py b/weaviate/debug/debug.py index 45a262072..3fbb753b1 100644 --- a/weaviate/debug/debug.py +++ b/weaviate/debug/debug.py @@ -3,7 +3,7 @@ from weaviate.classes.config import ConsistencyLevel from weaviate.connect import ConnectionV4 from weaviate.connect.v4 import _ExpectedStatusCodes -from weaviate.debug.types import DebugObject +from weaviate.debug.types import DebugRESTObject from weaviate.types import UUID @@ -16,7 +16,7 @@ def __init__( class _DebugAsync(_DebugBase): - async def get_object( + async def get_object_over_rest( self, collection: str, uuid: UUID, @@ -24,7 +24,12 @@ async def get_object( consistency_level: Optional[ConsistencyLevel] = None, nodename: Optional[str] = None, tenant: Optional[str] = None, - ) -> Optional[DebugObject]: + ) -> Optional[DebugRESTObject]: + """Use the REST API endpoint /objects/{className}/{id} to retrieve an object directly from the database without search. + + The key difference between `debug.get_object_over_rest` and `query.fetch_object_by_id` is the underlying protocol. + This method uses REST while that method uses gRPC. + """ path = f"/objects/{collection}/{str(uuid)}" params: Dict[str, str] = {} @@ -43,4 +48,4 @@ async def get_object( ) if res.status_code == 404: return None - return DebugObject(**res.json()) + return DebugRESTObject(**res.json()) diff --git a/weaviate/debug/sync.pyi b/weaviate/debug/sync.pyi index 73f58d726..8b4978fed 100644 --- a/weaviate/debug/sync.pyi +++ b/weaviate/debug/sync.pyi @@ -2,7 +2,7 @@ from typing import Optional from weaviate.classes.config import ConsistencyLevel from weaviate.debug.debug import _DebugBase -from weaviate.debug.types import DebugObject +from weaviate.debug.types import DebugRESTObject from weaviate.types import UUID class _Debug(_DebugBase): @@ -14,4 +14,4 @@ class _Debug(_DebugBase): consistency_level: Optional[ConsistencyLevel] = None, nodename: Optional[str] = None, tenant: Optional[str] = None, - ) -> Optional[DebugObject]: ... + ) -> Optional[DebugRESTObject]: ... diff --git a/weaviate/debug/types.py b/weaviate/debug/types.py index 33a604584..88b11e08c 100644 --- a/weaviate/debug/types.py +++ b/weaviate/debug/types.py @@ -6,7 +6,7 @@ from weaviate.types import uuid_package -class DebugObject(BaseModel): +class DebugRESTObject(BaseModel): collection: str = Field(..., alias="class") creation_time: datetime = Field(..., alias="creationTimeUnix") last_update_time: datetime = Field(..., alias="lastUpdateTimeUnix") From 16677ad78f081f02d3baedd8ee7084c4fd6f2215 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 10 Jan 2025 12:35:06 +0000 Subject: [PATCH 5/8] Ignore A005 in flake8 for debug/types.py file --- .flake8 | 1 + 1 file changed, 1 insertion(+) diff --git a/.flake8 b/.flake8 index 877c7cb55..e0a91e187 100644 --- a/.flake8 +++ b/.flake8 @@ -7,6 +7,7 @@ per-file-ignores = weaviate/collections/classes/types.py:A005 weaviate/collections/collections/__init__.py:A005 weaviate/collections/__init__.py:A005 + weaviate/debug/types.py:A005 weaviate/types.py:A005 weaviate/warnings.py:A005 From 269d8cbbb57e53ff62a94dc39f99932ed4390295 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 10 Jan 2025 12:39:31 +0000 Subject: [PATCH 6/8] Fix `node_name` param name --- weaviate/debug/debug.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/weaviate/debug/debug.py b/weaviate/debug/debug.py index 3fbb753b1..513c68ff3 100644 --- a/weaviate/debug/debug.py +++ b/weaviate/debug/debug.py @@ -22,7 +22,7 @@ async def get_object_over_rest( uuid: UUID, *, consistency_level: Optional[ConsistencyLevel] = None, - nodename: Optional[str] = None, + node_name: Optional[str] = None, tenant: Optional[str] = None, ) -> Optional[DebugRESTObject]: """Use the REST API endpoint /objects/{className}/{id} to retrieve an object directly from the database without search. @@ -35,8 +35,8 @@ async def get_object_over_rest( params: Dict[str, str] = {} if consistency_level is not None: params["consistency"] = consistency_level.value - if nodename is not None: - params["nodename"] = nodename + if node_name is not None: + params["node_name"] = node_name if tenant is not None: params["tenant"] = tenant From 8e2e1e81f5f583490e1da58ea8eede4fd1424a04 Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 10 Jan 2025 13:00:35 +0000 Subject: [PATCH 7/8] Fix misaligned stubs --- integration/test_client_debug.py | 6 +++--- weaviate/debug/sync.pyi | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/integration/test_client_debug.py b/integration/test_client_debug.py index 150905a46..1e3a2d060 100644 --- a/integration/test_client_debug.py +++ b/integration/test_client_debug.py @@ -12,13 +12,13 @@ def test_get_object_single_node( uuid = collection.data.insert({"name": "John Doe"}) - debug_obj = client.debug.get_object(collection.name, uuid) + debug_obj = client.debug.get_object_over_rest(collection.name, uuid) assert debug_obj is not None assert isinstance(debug_obj, DebugRESTObject) assert str(debug_obj.uuid) == str(uuid) non_existant_uuid = "00000000-0000-0000-0000-000000000000" - debug_obj = client.debug.get_object(collection.name, non_existant_uuid) + debug_obj = client.debug.get_object_over_rest(collection.name, non_existant_uuid) assert debug_obj is None @@ -33,6 +33,6 @@ def test_get_object_multi_node( uuid = collection.data.insert({"name": "John Doe"}) for nodename in ["node1", "node2", "node3"]: - debug_obj = client.debug.get_object(collection.name, uuid, nodename=nodename) + debug_obj = client.debug.get_object_over_rest(collection.name, uuid, nodename=nodename) assert debug_obj is not None assert str(debug_obj.uuid) == str(uuid) diff --git a/weaviate/debug/sync.pyi b/weaviate/debug/sync.pyi index 8b4978fed..3a98c3624 100644 --- a/weaviate/debug/sync.pyi +++ b/weaviate/debug/sync.pyi @@ -6,7 +6,7 @@ from weaviate.debug.types import DebugRESTObject from weaviate.types import UUID class _Debug(_DebugBase): - def get_object( + def get_object_over_rest( self, collection: str, uuid: UUID, From cefdfc9748702b657d03f61cf072d67bb417f78d Mon Sep 17 00:00:00 2001 From: Tommy Smith Date: Fri, 10 Jan 2025 13:14:33 +0000 Subject: [PATCH 8/8] Fix `node_name` var in stubs --- integration/test_client_debug.py | 4 ++-- weaviate/debug/sync.pyi | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/integration/test_client_debug.py b/integration/test_client_debug.py index 1e3a2d060..7726a1e5a 100644 --- a/integration/test_client_debug.py +++ b/integration/test_client_debug.py @@ -32,7 +32,7 @@ def test_get_object_multi_node( uuid = collection.data.insert({"name": "John Doe"}) - for nodename in ["node1", "node2", "node3"]: - debug_obj = client.debug.get_object_over_rest(collection.name, uuid, nodename=nodename) + for node_name in ["node1", "node2", "node3"]: + debug_obj = client.debug.get_object_over_rest(collection.name, uuid, node_name=node_name) assert debug_obj is not None assert str(debug_obj.uuid) == str(uuid) diff --git a/weaviate/debug/sync.pyi b/weaviate/debug/sync.pyi index 3a98c3624..810a60769 100644 --- a/weaviate/debug/sync.pyi +++ b/weaviate/debug/sync.pyi @@ -12,6 +12,6 @@ class _Debug(_DebugBase): uuid: UUID, *, consistency_level: Optional[ConsistencyLevel] = None, - nodename: Optional[str] = None, + node_name: Optional[str] = None, tenant: Optional[str] = None, ) -> Optional[DebugRESTObject]: ...