From eca80c624026ad1a7638e4dbea4e59f21b57aa2a Mon Sep 17 00:00:00 2001 From: hmacr Date: Tue, 25 Jul 2023 19:02:53 +0530 Subject: [PATCH] address review comments --- src/marqo/tensor_search/constants.py | 3 +++ src/marqo/tensor_search/tensor_search.py | 12 +++++++++--- src/marqo/tensor_search/utils.py | 7 +++++++ tests/tensor_search/test_get_stats.py | 8 ++++++-- tests/tensor_search/test_utils.py | 19 +++++++++++++++++++ 5 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/marqo/tensor_search/constants.py b/src/marqo/tensor_search/constants.py index 9314362f0..d34c7b6e7 100644 --- a/src/marqo/tensor_search/constants.py +++ b/src/marqo/tensor_search/constants.py @@ -27,3 +27,6 @@ NON_OFFICIAL_LUCENE_SPECIAL_CHARS = { ' ' } + +NUM_BYTES_IN_KB = 1024 +SUPPORTED_SIZES_FOR_STATS = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB'] diff --git a/src/marqo/tensor_search/tensor_search.py b/src/marqo/tensor_search/tensor_search.py index 9d848e1af..b9a676c27 100644 --- a/src/marqo/tensor_search/tensor_search.py +++ b/src/marqo/tensor_search/tensor_search.py @@ -269,11 +269,17 @@ def _autofill_index_settings(index_settings: dict): def get_stats(config: Config, index_name: str): doc_count = HttpRequests(config).post(path=F"{index_name}/_count")["count"] - index_info = HttpRequests(config).get(path=F"_cat/indices/{index_name}?format=json") - size = index_info[0]["store.size"] + index_stats = HttpRequests(config).get(path=F"{index_name}/_stats")["indices"] + size_in_bytes = None + try: + size_in_bytes = index_stats[index_name]["total"]["store"]["size_in_bytes"] + except AttributeError: + raise errors.IndexNotFoundError(message="Tried to get a non-existent index: {}".format(index_name)) + + formatted_size = utils.convert_bytes_to_human_readable_format(size_in_bytes) return { "numberOfDocuments": doc_count, - "size": size + "size": formatted_size } diff --git a/src/marqo/tensor_search/utils.py b/src/marqo/tensor_search/utils.py index c6b691e0e..3cc2db091 100644 --- a/src/marqo/tensor_search/utils.py +++ b/src/marqo/tensor_search/utils.py @@ -2,6 +2,7 @@ import typing import functools import json +import math from timeit import default_timer as timer import torch from marqo import errors @@ -349,3 +350,9 @@ def is_tensor_field(field: str, return field in tensor_fields else: return field not in non_tensor_fields + + +def convert_bytes_to_human_readable_format(size_in_bytes: int) -> str: + size_factor = math.floor(math.log(size_in_bytes) / math.log(constants.NUM_BYTES_IN_KB)) + processed_size = size_in_bytes / math.pow(constants.NUM_BYTES_IN_KB, size_factor) + return f"{processed_size:.2f} {constants.SUPPORTED_SIZES_FOR_STATS[size_factor]}" diff --git a/tests/tensor_search/test_get_stats.py b/tests/tensor_search/test_get_stats.py index 6ce81c693..1012df470 100644 --- a/tests/tensor_search/test_get_stats.py +++ b/tests/tensor_search/test_get_stats.py @@ -20,7 +20,9 @@ def test_get_stats_empty(self): except IndexNotFoundError as s: pass tensor_search.create_vector_index(config=self.config, index_name=self.index_name_1) - assert tensor_search.get_stats(config=self.config, index_name=self.index_name_1)["numberOfDocuments"] == 0 + index_stats = tensor_search.get_stats(config=self.config, index_name=self.index_name_1) + assert index_stats["numberOfDocuments"] == 0 + assert len(index_stats["size"]) != 0 def test_get_stats_non_empty(self): try: @@ -35,4 +37,6 @@ def test_get_stats_non_empty(self): auto_refresh=True, device="cpu" ) ) - assert tensor_search.get_stats(config=self.config, index_name=self.index_name_1)["numberOfDocuments"] == 3 + index_stats = tensor_search.get_stats(config=self.config, index_name=self.index_name_1) + assert index_stats["numberOfDocuments"] == 3 + assert len(index_stats["size"]) != 0 diff --git a/tests/tensor_search/test_utils.py b/tests/tensor_search/test_utils.py index ccb4342f3..b556e63ad 100644 --- a/tests/tensor_search/test_utils.py +++ b/tests/tensor_search/test_utils.py @@ -398,3 +398,22 @@ def test_is_tensor_field_providing_one_empty(self): non_tensor_fields = [] with self.assertRaises(errors.InternalError): utils.is_tensor_field('field1', tensor_fields=tensor_fields, non_tensor_fields=non_tensor_fields) + + def test_convert_bytes_to_human_readable_format(self): + size_in_bytes = 1000 # 1000 B + assert utils.convert_bytes_to_human_readable_format(size_in_bytes) == "1000.00 B" + + size_in_bytes = 16121 # 15.74 KB + assert utils.convert_bytes_to_human_readable_format(size_in_bytes) == "15.74 KB" + + size_in_bytes = 9874321 # 9.42 MB + assert utils.convert_bytes_to_human_readable_format(size_in_bytes) == "9.42 MB" + + size_in_bytes = 10000000000 # 9.31 GB + assert utils.convert_bytes_to_human_readable_format(size_in_bytes) == "9.31 GB" + + size_in_bytes = 712893712304234 # 648.37 TB + assert utils.convert_bytes_to_human_readable_format(size_in_bytes) == "648.37 TB" + + size_in_bytes = 6212893712323224 # 5.52 PB + assert utils.convert_bytes_to_human_readable_format(size_in_bytes) == "5.52 PB"