Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
address review comments
Browse files Browse the repository at this point in the history
hmacr committed Jul 26, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent 2c0bbee commit eca80c6
Showing 5 changed files with 44 additions and 5 deletions.
3 changes: 3 additions & 0 deletions src/marqo/tensor_search/constants.py
Original file line number Diff line number Diff line change
@@ -27,3 +27,6 @@
NON_OFFICIAL_LUCENE_SPECIAL_CHARS = {
' '
}

NUM_BYTES_IN_KB = 1024
SUPPORTED_SIZES_FOR_STATS = ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']
12 changes: 9 additions & 3 deletions src/marqo/tensor_search/tensor_search.py
Original file line number Diff line number Diff line change
@@ -269,11 +269,17 @@ def _autofill_index_settings(index_settings: dict):

def get_stats(config: Config, index_name: str):
doc_count = HttpRequests(config).post(path=F"{index_name}/_count")["count"]
index_info = HttpRequests(config).get(path=F"_cat/indices/{index_name}?format=json")
size = index_info[0]["store.size"]
index_stats = HttpRequests(config).get(path=F"{index_name}/_stats")["indices"]
size_in_bytes = None
try:
size_in_bytes = index_stats[index_name]["total"]["store"]["size_in_bytes"]
except AttributeError:
raise errors.IndexNotFoundError(message="Tried to get a non-existent index: {}".format(index_name))

formatted_size = utils.convert_bytes_to_human_readable_format(size_in_bytes)
return {
"numberOfDocuments": doc_count,
"size": size
"size": formatted_size
}


7 changes: 7 additions & 0 deletions src/marqo/tensor_search/utils.py
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@
import typing
import functools
import json
import math
from timeit import default_timer as timer
import torch
from marqo import errors
@@ -349,3 +350,9 @@ def is_tensor_field(field: str,
return field in tensor_fields
else:
return field not in non_tensor_fields


def convert_bytes_to_human_readable_format(size_in_bytes: int) -> str:
size_factor = math.floor(math.log(size_in_bytes) / math.log(constants.NUM_BYTES_IN_KB))
processed_size = size_in_bytes / math.pow(constants.NUM_BYTES_IN_KB, size_factor)
return f"{processed_size:.2f} {constants.SUPPORTED_SIZES_FOR_STATS[size_factor]}"
8 changes: 6 additions & 2 deletions tests/tensor_search/test_get_stats.py
Original file line number Diff line number Diff line change
@@ -20,7 +20,9 @@ def test_get_stats_empty(self):
except IndexNotFoundError as s:
pass
tensor_search.create_vector_index(config=self.config, index_name=self.index_name_1)
assert tensor_search.get_stats(config=self.config, index_name=self.index_name_1)["numberOfDocuments"] == 0
index_stats = tensor_search.get_stats(config=self.config, index_name=self.index_name_1)
assert index_stats["numberOfDocuments"] == 0
assert len(index_stats["size"]) != 0

def test_get_stats_non_empty(self):
try:
@@ -35,4 +37,6 @@ def test_get_stats_non_empty(self):
auto_refresh=True, device="cpu"
)
)
assert tensor_search.get_stats(config=self.config, index_name=self.index_name_1)["numberOfDocuments"] == 3
index_stats = tensor_search.get_stats(config=self.config, index_name=self.index_name_1)
assert index_stats["numberOfDocuments"] == 3
assert len(index_stats["size"]) != 0
19 changes: 19 additions & 0 deletions tests/tensor_search/test_utils.py
Original file line number Diff line number Diff line change
@@ -398,3 +398,22 @@ def test_is_tensor_field_providing_one_empty(self):
non_tensor_fields = []
with self.assertRaises(errors.InternalError):
utils.is_tensor_field('field1', tensor_fields=tensor_fields, non_tensor_fields=non_tensor_fields)

def test_convert_bytes_to_human_readable_format(self):
size_in_bytes = 1000 # 1000 B
assert utils.convert_bytes_to_human_readable_format(size_in_bytes) == "1000.00 B"

size_in_bytes = 16121 # 15.74 KB
assert utils.convert_bytes_to_human_readable_format(size_in_bytes) == "15.74 KB"

size_in_bytes = 9874321 # 9.42 MB
assert utils.convert_bytes_to_human_readable_format(size_in_bytes) == "9.42 MB"

size_in_bytes = 10000000000 # 9.31 GB
assert utils.convert_bytes_to_human_readable_format(size_in_bytes) == "9.31 GB"

size_in_bytes = 712893712304234 # 648.37 TB
assert utils.convert_bytes_to_human_readable_format(size_in_bytes) == "648.37 TB"

size_in_bytes = 6212893712323224 # 5.52 PB
assert utils.convert_bytes_to_human_readable_format(size_in_bytes) == "5.52 PB"

0 comments on commit eca80c6

Please sign in to comment.