Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Dense Mask IoU #5283

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/source/user_guide/evaluation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -729,8 +729,9 @@ The only difference between each task type is in how the IoU between objects is
calculated:

- For object detections, IoUs are computed between each pair of bounding boxes
- For instance segmentations and polygons, IoUs are computed between the
polygonal shapes rather than their rectangular bounding boxes
- For instance segmentations, when ``use_masks=True``, IoUs are computed
between the dense pixel masks rather than their rectangular bounding boxes
- For polygons, IoUs are computed between the polygonal shapes
- For keypoint tasks,
`object keypoint similarity <https://cocodataset.org/#keypoints-eval>`_
is computed for each pair of objects, using the extent of the ground truth
Expand All @@ -744,8 +745,7 @@ stored in |Detections| format.

For instance segmentation tasks, the ground truth and predicted objects should
be stored in |Detections| format, and each |Detection| instance should have its
:attr:`mask <fiftyone.core.labels.Detection.mask>` attribute populated to
define the extent of the object within its bounding box.
mask populated to define the extent of the object within its bounding box.

.. note::

Expand Down
3 changes: 2 additions & 1 deletion fiftyone/utils/eval/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ class COCOEvaluationConfig(DetectionEvaluationConfig):
of the provided :class:`fiftyone.core.labels.Polyline` instances
rather than using their actual geometries
tolerance (None): a tolerance, in pixels, when generating approximate
polylines for instance masks. Typical values are 1-3 pixels
polylines for instance masks. Typical values are 1-3 pixels. By
default, IoUs are computed directly on the dense pixel masks
compute_mAP (False): whether to perform the necessary computations so
that mAP and PR curves can be generated
iou_threshs (None): a list of IoU thresholds to use when computing mAP
Expand Down
3 changes: 2 additions & 1 deletion fiftyone/utils/eval/openimages.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ class OpenImagesEvaluationConfig(DetectionEvaluationConfig):
of the provided :class:`fiftyone.core.labels.Polyline` instances
rather than using their actual geometries
tolerance (None): a tolerance, in pixels, when generating approximate
polylines for instance masks. Typical values are 1-3 pixels
polylines for instance masks. Typical values are 1-3 pixels. By
default, IoUs are computed directly on the dense pixel masks
max_preds (None): the maximum number of predicted objects to evaluate
when computing mAP and PR curves
error_level (1): the error level to use when manipulating instance
Expand Down
183 changes: 154 additions & 29 deletions fiftyone/utils/iou.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import eta.core.numutils as etan
import eta.core.utils as etau
import eta.core.image as etai

import fiftyone.core.labels as fol
import fiftyone.core.utils as fou
Expand Down Expand Up @@ -73,7 +74,8 @@ def compute_ious(
of the provided :class:`fiftyone.core.labels.Polyline` instances
rather than using their actual geometries
tolerance (None): a tolerance, in pixels, when generating approximate
polylines for instance masks. Typical values are 1-3 pixels
polylines for instance masks. Typical values are 1-3 pixels. By
default, IoUs are computed directly on the dense pixel masks
sparse (False): whether to return a sparse dict of non-zero IoUs rather
than a full matrix
error_level (1): the error level to use when manipulating instance
Expand Down Expand Up @@ -136,11 +138,6 @@ def compute_ious(
)

if use_masks:
# @todo when tolerance is None, consider using dense masks rather than
# polygonal approximations?
if tolerance is None:
tolerance = 2

return _compute_mask_ious(
preds,
gts,
Expand Down Expand Up @@ -528,6 +525,65 @@ def compute_bbox_iou(gt, pred, gt_crowd=False):
return min(etan.safe_divide(inter, union), 1)


def _dense_iou(gt, pred, gt_crowd=False):
"""Computes the IoU between the given ground truth and predicted
detection masks.
Args:
gt: a :class:`fiftyone.core.labels.Detection`
pred: a :class:`fiftyone.core.labels.Detection`
gt_crowd (False): whether the ground truth object is a crowd
Returns:
the IoU, in ``[0, 1]``
"""
gt_mask = gt.mask
gt_bb = gt.bounding_box # x,y,w,h of box
gt_mask_h, gt_mask_w = gt_mask.shape

pred_mask = pred.mask
pred_bb = pred.bounding_box # x,y,w,h of box
pred_mask_h, pred_mask_w = pred_mask.shape

gt_img_w = round(gt_mask_w / gt_bb[2])
gt_img_h = round(gt_mask_h / gt_bb[3])

pred_img_w = round(pred_mask_w / pred_bb[2])
pred_img_h = round(pred_mask_h / pred_bb[3])

gt_mask_full = np.zeros((gt_img_h, gt_img_w))
pred_mask_full = np.zeros((pred_img_h, pred_img_w))

x1 = round(gt_bb[0] * gt_img_w)
y1 = round(gt_bb[1] * gt_img_h)
x2 = round(x1 + (gt_bb[2] * gt_img_w))
y2 = round(y1 + (gt_bb[3] * gt_img_h))
Comment on lines +557 to +560
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can make a function out of this since it is repeated for pred

gt_mask_full[y1:y2, x1:x2] = gt_mask

x1 = round(pred_bb[0] * pred_img_w)
y1 = round(pred_bb[1] * pred_img_h)
x2 = round(x1 + (pred_bb[2] * pred_img_w))
y2 = round(y1 + (pred_bb[3] * pred_img_h))
pred_mask_full[y1:y2, x1:x2] = pred_mask

if gt_img_w != pred_img_w or gt_img_h != pred_img_h:
gt_size = gt_img_w * gt_img_h
pred_size = pred_img_w * pred_img_h
if gt_size > pred_size:
pred_mask_full = etai.resize(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This resolves to cv2.resize which by default uses linear interpolation. I'm pretty sure values in (0, 1) are treated as boolean True in np.logical_and which may cause some strange edge case bugs in thin objects for example. I can check this more thoroughly later. Besides that looks good to me but I haven't run it yet.

pred_mask_full, height=gt_img_h, width=gt_img_w
)
else:
gt_mask_full = etai.resize(
gt_mask_full, height=pred_img_h, width=pred_img_w
)

inter = np.logical_and(gt_mask_full, pred_mask_full).sum()
union = np.logical_or(gt_mask_full, pred_mask_full).sum()

return min(etan.safe_divide(inter, union), 1)


def _get_detection_box(det, dimension=None):
if dimension is None:
dimension = _get_bbox_dim(det)
Expand Down Expand Up @@ -559,6 +615,10 @@ def _get_poly_box(x):
return _get_detection_box(detection)


def _get_mask_box(x):
return _get_detection_box(x)


def _compute_bbox_ious(
preds,
gts,
Expand Down Expand Up @@ -624,6 +684,61 @@ def _compute_bbox_ious(
return ious


def _compute_dense_mask_ious(
preds,
gts,
error_level,
iscrowd=None,
classwise=False,
gt_crowds=None,
sparse=False,
):
is_symmetric = preds is gts

if sparse:
ious = defaultdict(list)
else:
ious = np.zeros((len(preds), len(gts)))

if iscrowd is not None:
gt_crowds = [iscrowd(gt) for gt in gts]
else:
gt_crowds = [False] * len(gts)
Comment on lines +703 to +706
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

gt_crowds shouldn't be an input argument since it is set by these lines.


index_property = rti.Property()
bbox_iou_fcn = compute_bbox_iou
index_property.dimension = 2

rtree_index = rti.Index(properties=index_property, interleaved=False)
for i, gt in enumerate(gts):
box = _get_mask_box(gt)
rtree_index.insert(i, box)

for i, pred in enumerate(preds):
box = _get_mask_box(pred)
indices = rtree_index.intersection(box)
for j in indices: # pylint: disable=not-an-iterable
gt = gts[j]
gt_crowd = gt_crowds[j]
if classwise and pred.label != gt.label:
continue

if is_symmetric and j > i:
continue

iou = _dense_iou(gt, pred, gt_crowd=gt_crowd)

if sparse:
ious[pred.id].append((gt.id, iou))
if is_symmetric:
ious[gt.id].append((pred.id, iou))
else:
ious[i, j] = iou
if is_symmetric:
ious[j, i] = iou
return ious


def _compute_polygon_ious(
preds,
gts,
Expand Down Expand Up @@ -767,34 +882,44 @@ def _compute_mask_ious(
):
is_symmetric = preds is gts

with contextlib.ExitStack() as context:
# We're ignoring errors, so suppress shapely logging that occurs when
# invalid geometries are encountered
if error_level > 1:
context.enter_context(
fou.LoggingLevel(logging.CRITICAL, logger="shapely")
)

pred_polys = _masks_to_polylines(preds, tolerance, error_level)

if is_symmetric:
gt_polys = pred_polys
else:
gt_polys = _masks_to_polylines(gts, tolerance, error_level)

if iscrowd is not None:
gt_crowds = [iscrowd(gt) for gt in gts]
else:
gt_crowds = [False] * len(gts)

return _compute_polygon_ious(
pred_polys,
gt_polys,
error_level,
classwise=classwise,
gt_crowds=gt_crowds,
sparse=sparse,
)
if tolerance is None:
return _compute_dense_mask_ious(
preds,
gts,
error_level,
classwise=classwise,
gt_crowds=gt_crowds,
sparse=sparse,
)
else:
with contextlib.ExitStack() as context:
# We're ignoring errors, so suppress shapely logging that occurs when
# invalid geometries are encountered
if error_level > 1:
context.enter_context(
fou.LoggingLevel(logging.CRITICAL, logger="shapely")
)

pred_polys = _masks_to_polylines(preds, tolerance, error_level)

if is_symmetric:
gt_polys = pred_polys
else:
gt_polys = _masks_to_polylines(gts, tolerance, error_level)

return _compute_polygon_ious(
pred_polys,
gt_polys,
error_level,
classwise=classwise,
gt_crowds=gt_crowds,
sparse=sparse,
)


def _compute_segment_ious(preds, gts, sparse=False):
Expand Down
Loading