Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use estimated document count when possible for count() operations #5398

Merged
merged 2 commits into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions fiftyone/core/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -7726,6 +7726,22 @@ def count(self, field_or_expr=None, expr=None, safe=False):
Returns:
the count
"""

# Optimization: use estimated document count when possible
if self._is_full_collection() and (
expr is None
and (
field_or_expr is None
or (
etau.is_str(field_or_expr)
and field_or_expr == "frames"
and self._has_frame_fields()
)
)
):
frames = field_or_expr == "frames"
return self._dataset._estimated_count(frames=frames)

make = lambda field_or_expr: foa.Count(
field_or_expr, expr=expr, safe=safe
)
Expand Down Expand Up @@ -10602,6 +10618,22 @@ def _has_frame_fields(self):
def _handle_id_fields(self, field_name):
return _handle_id_fields(self, field_name)

def _is_full_collection(self):
if isinstance(self, fod.Dataset) and self.media_type != fom.GROUP:
return True

# pylint:disable=no-member
if (
isinstance(self, fov.DatasetView)
and self._dataset.media_type == fom.GROUP
and len(self._stages) == 1
and isinstance(self._stages[0], fos.SelectGroupSlices)
and self._pipeline() == []
):
return True

return False

def _is_label_field(self, field_name, label_type_or_types):
try:
label_type = self._get_label_field_type(field_name)
Expand Down
9 changes: 9 additions & 0 deletions fiftyone/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,15 @@ def __deepcopy__(self, memo):
def __len__(self):
return self.count()

def _estimated_count(self, frames=False):
if frames:
if self._frame_collection is None:
return None

return self._frame_collection.estimated_document_count()

return self._sample_collection.estimated_document_count()

def __getitem__(self, id_filepath_slice):
if isinstance(id_filepath_slice, numbers.Integral):
raise ValueError(
Expand Down
Loading