From bf1d29dce868d8e7e7ba758c6c64483ea854a17e Mon Sep 17 00:00:00 2001 From: Benjamin Kane Date: Mon, 2 Dec 2024 17:19:36 -0500 Subject: [PATCH] Fix bounds for embedded lists in QP (#5202) * remove ne matching and extra sorting * use exists * add to embedded list bounds tests * skip nonfinites sort for lists * handle NaN formatting * debugging * nonfinites tweaks * selector fixes * always use distinct for embedded lists * fix selector --- .../core/src/components/Common/utils.tsx | 3 +- .../NumericFieldFilter/RangeSlider.tsx | 5 +- .../Filters/NumericFieldFilter/state.ts | 13 +-- fiftyone/server/lightning.py | 42 +++++---- fiftyone/server/view.py | 17 ++-- tests/unittests/lightning_tests.py | 88 +++++++++++++------ 6 files changed, 105 insertions(+), 63 deletions(-) diff --git a/app/packages/core/src/components/Common/utils.tsx b/app/packages/core/src/components/Common/utils.tsx index cf70f4ca91..b5cecb2050 100644 --- a/app/packages/core/src/components/Common/utils.tsx +++ b/app/packages/core/src/components/Common/utils.tsx @@ -68,13 +68,14 @@ export const getFormatter = (fieldType: string, timeZone: string, bounds) => { ); } - return numeral(v).format( + const str = numeral(v).format( [INT_FIELD, FRAME_NUMBER_FIELD, FRAME_SUPPORT_FIELD].includes(fieldType) ? "0a" : bounds[1] - bounds[0] < 0.1 ? "0.0000a" : "0.00a" ); + return str === "NaN" ? v.toString() : str; }, }; }; diff --git a/app/packages/core/src/components/Filters/NumericFieldFilter/RangeSlider.tsx b/app/packages/core/src/components/Filters/NumericFieldFilter/RangeSlider.tsx index b9990cfa28..e098bcd9e0 100644 --- a/app/packages/core/src/components/Filters/NumericFieldFilter/RangeSlider.tsx +++ b/app/packages/core/src/components/Filters/NumericFieldFilter/RangeSlider.tsx @@ -35,9 +35,12 @@ const RangeSlider = ({ const one = useRecoilValue(state.oneBound({ path, modal })); const timeZone = useRecoilValue(fos.timeZone); const hasBounds = useRecoilValue(state.hasBounds({ path, modal })); + const nonfinitesText = useRecoilValue(state.nonfinitesText({ path, modal })); if (!hasBounds) { - return ; + return ( + + ); } const showSlider = hasBounds && !(excluded && defaultRange); diff --git a/app/packages/core/src/components/Filters/NumericFieldFilter/state.ts b/app/packages/core/src/components/Filters/NumericFieldFilter/state.ts index 927b4af89e..6dbaf87e23 100644 --- a/app/packages/core/src/components/Filters/NumericFieldFilter/state.ts +++ b/app/packages/core/src/components/Filters/NumericFieldFilter/state.ts @@ -1,5 +1,5 @@ import type { Nonfinite } from "@fiftyone/state"; -import { boundsAtom, nonfiniteAtom, rangeAtom } from "@fiftyone/state"; +import { boundsAtom, nonfiniteData, rangeAtom } from "@fiftyone/state"; import { selectorFamily } from "recoil"; export const FLOAT_NONFINITES: Nonfinite[] = ["inf", "ninf", "nan"]; @@ -25,14 +25,17 @@ export const hasDefaultRange = selectorFamily({ }, }); -export const hasNonfinites = selectorFamily({ - key: "hasNonfinites", +export const nonfinitesText = selectorFamily({ + key: "nonfinitesText", get: (params: { path: string; modal: boolean }) => ({ get }) => { - return FLOAT_NONFINITES.every((key) => - get(nonfiniteAtom({ key, ...params })) + const data = get(nonfiniteData({ ...params, extended: false })); + const result = Object.entries(data).filter( + ([k, v]) => k !== "none" && Boolean(v) ); + + return result.length ? result.map(([key]) => key).join(", ") : null; }, }); diff --git a/fiftyone/server/lightning.py b/fiftyone/server/lightning.py index 701588864d..9eeed0acb4 100644 --- a/fiftyone/server/lightning.py +++ b/fiftyone/server/lightning.py @@ -9,6 +9,7 @@ from bson import ObjectId from dataclasses import asdict, dataclass from datetime import date, datetime +import math import typing as t import asyncio @@ -139,11 +140,11 @@ async def lightning_resolver( for item in sublist ] - filter = ( - {f"{dataset.group_field}.name": input.slice} - if dataset.group_field and input.slice - else None - ) + if dataset.group_field and input.slice: + filter = {f"{dataset.group_field}.name": input.slice} + dataset.group_slice = input.slice + else: + filter = {} result = await _do_async_pooled_queries(dataset, flattened, filter) results = [] @@ -316,13 +317,15 @@ async def _do_async_query( filter: t.Optional[t.Mapping[str, str]], ): if isinstance(query, DistinctQuery): - if query.has_list and not query.filters: + if query.has_list: return await _do_distinct_query(collection, query, filter) return await _do_distinct_pipeline(dataset, collection, query, filter) if filter: - query.insert(0, {"$match": filter}) + for k, v in filter.items(): + query.insert(0, {"$match": {k: v}}) + query.insert(0, {"$sort": {k: 1}}) return [i async for i in collection.aggregate(query)] @@ -420,29 +423,19 @@ def _first( ): pipeline = [{"$sort": {path: sort}}] - if floats: - pipeline.extend(_handle_nonfinites(path, sort)) - - if sort: - pipeline.append({"$match": {path: {"$ne": None}}}) - matched_arrays = _match_arrays(dataset, path, is_frame_field) if matched_arrays: pipeline += matched_arrays + elif floats: + pipeline.extend(_handle_nonfinites(path, sort)) - pipeline.append({"$limit": 1}) - + pipeline.extend([{"$match": {path: {"$exists": True}}}, {"$limit": 1}]) unwound = _unwind(dataset, path, is_frame_field) if unwound: pipeline += unwound if floats: pipeline.extend(_handle_nonfinites(path, sort)) - if sort: - pipeline.append({"$match": {path: {"$ne": None}}}) - - pipeline.append({"$sort": {path: sort}}) - return pipeline + [ { "$group": { @@ -513,8 +506,13 @@ def _match_arrays(dataset: fo.Dataset, path: str, is_frame_field: bool): def _parse_result(data): if data and data[0]: value = data[0] - if value.get("value", None) is not None: - return value["value"] + if "value" in value: + value = value["value"] + return ( + value + if not isinstance(value, float) or math.isfinite(value) + else None + ) return value.get("_id", None) diff --git a/fiftyone/server/view.py b/fiftyone/server/view.py index 9dbae98af9..414386f731 100644 --- a/fiftyone/server/view.py +++ b/fiftyone/server/view.py @@ -614,8 +614,9 @@ def _make_range_query(path: str, field: fof.Field, args): def _make_scalar_expression(f, args, field, list_field=None, is_label=False): expr = None if _is_support(field): - mn, mx = args["range"] - expr = (f[0] >= mn) & (f[1] <= mx) + if "range" in args: + mn, mx = args["range"] + expr = (f[0] >= mn) & (f[1] <= mx) elif isinstance(field, fof.ListField): if isinstance(list_field, str): return f.filter( @@ -640,12 +641,14 @@ def _make_scalar_expression(f, args, field, list_field=None, is_label=False): if not true and not false: expr = (f != True) & (f != False) elif _is_datetime(field): - mn, mx = args["range"] - p = fou.timestamp_to_datetime - expr = (f >= p(mn)) & (f <= p(mx)) + if "range" in args: + mn, mx = args["range"] + p = fou.timestamp_to_datetime + expr = (f >= p(mn)) & (f <= p(mx)) elif isinstance(field, (fof.FloatField, fof.IntField)): - mn, mx = args["range"] - expr = (f >= mn) & (f <= mx) + if "range" in args: + mn, mx = args["range"] + expr = (f >= mn) & (f <= mx) else: values = args["values"] if not values: diff --git a/tests/unittests/lightning_tests.py b/tests/unittests/lightning_tests.py index b631e8cf08..bd59703ee5 100644 --- a/tests/unittests/lightning_tests.py +++ b/tests/unittests/lightning_tests.py @@ -535,6 +535,7 @@ async def test_floats(self, dataset: fo.Dataset): """ result = await _execute(query, dataset, fo.FloatField, keys) + self.assertListEqual( result.data["lightning"], [ @@ -568,7 +569,7 @@ async def test_floats(self, dataset: fo.Dataset): { "path": "classification.inf_list", "inf": True, - "max": 1.0, + "max": None, "min": -1.0, "nan": False, "ninf": False, @@ -587,7 +588,7 @@ async def test_floats(self, dataset: fo.Dataset): "path": "classification.nan_list", "inf": False, "max": 1.0, - "min": -1.0, + "min": None, "nan": True, "ninf": False, "none": False, @@ -605,7 +606,7 @@ async def test_floats(self, dataset: fo.Dataset): "path": "classification.ninf_list", "inf": False, "max": 1.0, - "min": -1.0, + "min": None, "nan": False, "ninf": True, "none": False, @@ -631,7 +632,7 @@ async def test_floats(self, dataset: fo.Dataset): { "path": "detections.detections.inf", "inf": True, - "max": 1.0, + "max": None, "min": -1.0, "nan": False, "ninf": False, @@ -640,7 +641,7 @@ async def test_floats(self, dataset: fo.Dataset): { "path": "detections.detections.inf_list", "inf": True, - "max": 1.0, + "max": None, "min": -1.0, "nan": False, "ninf": False, @@ -650,7 +651,7 @@ async def test_floats(self, dataset: fo.Dataset): "path": "detections.detections.nan", "inf": False, "max": 1.0, - "min": -1.0, + "min": None, "nan": True, "ninf": False, "none": False, @@ -659,7 +660,7 @@ async def test_floats(self, dataset: fo.Dataset): "path": "detections.detections.nan_list", "inf": False, "max": 1.0, - "min": -1.0, + "min": None, "nan": True, "ninf": False, "none": False, @@ -668,7 +669,7 @@ async def test_floats(self, dataset: fo.Dataset): "path": "detections.detections.ninf", "inf": False, "max": 1.0, - "min": -1.0, + "min": None, "nan": False, "ninf": True, "none": False, @@ -677,7 +678,7 @@ async def test_floats(self, dataset: fo.Dataset): "path": "detections.detections.ninf_list", "inf": False, "max": 1.0, - "min": -1.0, + "min": None, "nan": False, "ninf": True, "none": False, @@ -730,7 +731,7 @@ async def test_floats(self, dataset: fo.Dataset): { "path": "frames.classification.inf_list", "inf": True, - "max": 1.0, + "max": None, "min": -1.0, "nan": False, "ninf": False, @@ -749,7 +750,7 @@ async def test_floats(self, dataset: fo.Dataset): "path": "frames.classification.nan_list", "inf": False, "max": 1.0, - "min": -1.0, + "min": None, "nan": True, "ninf": False, "none": False, @@ -767,7 +768,7 @@ async def test_floats(self, dataset: fo.Dataset): "path": "frames.classification.ninf_list", "inf": False, "max": 1.0, - "min": -1.0, + "min": None, "nan": False, "ninf": True, "none": False, @@ -793,7 +794,7 @@ async def test_floats(self, dataset: fo.Dataset): { "path": "frames.detections.detections.inf", "inf": True, - "max": 1.0, + "max": None, "min": -1.0, "nan": False, "ninf": False, @@ -802,7 +803,7 @@ async def test_floats(self, dataset: fo.Dataset): { "path": "frames.detections.detections.inf_list", "inf": True, - "max": 1.0, + "max": None, "min": -1.0, "nan": False, "ninf": False, @@ -812,7 +813,7 @@ async def test_floats(self, dataset: fo.Dataset): "path": "frames.detections.detections.nan", "inf": False, "max": 1.0, - "min": -1.0, + "min": None, "nan": True, "ninf": False, "none": False, @@ -821,7 +822,7 @@ async def test_floats(self, dataset: fo.Dataset): "path": "frames.detections.detections.nan_list", "inf": False, "max": 1.0, - "min": -1.0, + "min": None, "nan": True, "ninf": False, "none": False, @@ -830,7 +831,7 @@ async def test_floats(self, dataset: fo.Dataset): "path": "frames.detections.detections.ninf", "inf": False, "max": 1.0, - "min": -1.0, + "min": None, "nan": False, "ninf": True, "none": False, @@ -839,7 +840,7 @@ async def test_floats(self, dataset: fo.Dataset): "path": "frames.detections.detections.ninf_list", "inf": False, "max": 1.0, - "min": -1.0, + "min": None, "nan": False, "ninf": True, "none": False, @@ -928,7 +929,7 @@ async def test_floats(self, dataset: fo.Dataset): { "path": "inf_list", "inf": True, - "max": 1.0, + "max": None, "min": -1.0, "nan": False, "ninf": False, @@ -947,7 +948,7 @@ async def test_floats(self, dataset: fo.Dataset): "path": "nan_list", "inf": False, "max": 1.0, - "min": -1.0, + "min": None, "nan": True, "ninf": False, "none": False, @@ -965,7 +966,7 @@ async def test_floats(self, dataset: fo.Dataset): "path": "ninf_list", "inf": False, "max": 1.0, - "min": -1.0, + "min": None, "nan": False, "ninf": True, "none": False, @@ -1059,7 +1060,11 @@ async def test_group_dataset(self, dataset: fo.Dataset): group = fo.Group() one = fo.Sample( classifications=fo.Classifications( - classifications=[fo.Classification(label="one")] + classifications=[ + fo.Classification(label="one"), + fo.Classification(confidence=1), + fo.Classification(confidence=-1), + ] ), filepath="one.png", group=group.element("one"), @@ -1068,7 +1073,11 @@ async def test_group_dataset(self, dataset: fo.Dataset): ) two = fo.Sample( classifications=fo.Classifications( - classifications=[fo.Classification(label="two")] + classifications=[ + fo.Classification(label="two"), + fo.Classification(confidence=2), + fo.Classification(confidence=-2), + ] ), filepath="two.png", group=group.element("two"), @@ -1080,6 +1089,11 @@ async def test_group_dataset(self, dataset: fo.Dataset): query = """ query Query($input: LightningInput!) { lightning(input: $input) { + ... on FloatLightningResult { + path + min + max + } ... on IntLightningResult { path min @@ -1097,8 +1111,13 @@ async def test_group_dataset(self, dataset: fo.Dataset): result = await _execute( query, dataset, - (fo.IntField, fo.StringField), - ["classifications.classifications.label", "numeric", "string"], + (fo.FloatField, fo.IntField, fo.StringField), + [ + "classifications.classifications.confidence", + "classifications.classifications.label", + "numeric", + "string", + ], frames=False, slice="one", ) @@ -1106,6 +1125,11 @@ async def test_group_dataset(self, dataset: fo.Dataset): self.assertListEqual( result.data["lightning"], [ + { + "path": "classifications.classifications.confidence", + "min": -1.0, + "max": 1.0, + }, { "path": "classifications.classifications.label", "values": ["one"], @@ -1119,8 +1143,13 @@ async def test_group_dataset(self, dataset: fo.Dataset): result = await _execute( query, dataset, - (fo.IntField, fo.StringField), - ["classifications.classifications.label", "numeric", "string"], + (fo.FloatField, fo.IntField, fo.StringField), + [ + "classifications.classifications.confidence", + "classifications.classifications.label", + "numeric", + "string", + ], frames=False, slice="two", ) @@ -1128,6 +1157,11 @@ async def test_group_dataset(self, dataset: fo.Dataset): self.assertListEqual( result.data["lightning"], [ + { + "path": "classifications.classifications.confidence", + "min": -2.0, + "max": 2.0, + }, { "path": "classifications.classifications.label", "values": ["two"],