diff --git a/keras/api/_tf_keras/keras/layers/__init__.py b/keras/api/_tf_keras/keras/layers/__init__.py index 370ae1358c7d..6b4787936ea3 100644 --- a/keras/api/_tf_keras/keras/layers/__init__.py +++ b/keras/api/_tf_keras/keras/layers/__init__.py @@ -145,6 +145,9 @@ from keras.src.layers.preprocessing.image_preprocessing.center_crop import ( CenterCrop, ) +from keras.src.layers.preprocessing.image_preprocessing.max_num_bounding_box import ( + MaxNumBoundingBoxes, +) from keras.src.layers.preprocessing.image_preprocessing.random_brightness import ( RandomBrightness, ) diff --git a/keras/api/_tf_keras/keras/utils/__init__.py b/keras/api/_tf_keras/keras/utils/__init__.py index 32bd17d960f2..0df452559c55 100644 --- a/keras/api/_tf_keras/keras/utils/__init__.py +++ b/keras/api/_tf_keras/keras/utils/__init__.py @@ -4,6 +4,7 @@ since your modifications would be overwritten. """ +from keras.api.utils import bounding_boxes from keras.api.utils import legacy from keras.src.backend.common.global_state import clear_session from keras.src.backend.common.keras_tensor import is_keras_tensor diff --git a/keras/api/_tf_keras/keras/utils/bounding_boxes/__init__.py b/keras/api/_tf_keras/keras/utils/bounding_boxes/__init__.py new file mode 100644 index 000000000000..7b16301c8971 --- /dev/null +++ b/keras/api/_tf_keras/keras/utils/bounding_boxes/__init__.py @@ -0,0 +1,21 @@ +"""DO NOT EDIT. + +This file was autogenerated. Do not edit it by hand, +since your modifications would be overwritten. +""" + +from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import ( + affine_transform, +) +from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import ( + clip_to_image_size, +) +from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import ( + convert_format, +) +from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import ( + crop, +) +from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import ( + pad, +) diff --git a/keras/api/layers/__init__.py b/keras/api/layers/__init__.py index 2ae17dcfbd93..632ce68acf4f 100644 --- a/keras/api/layers/__init__.py +++ b/keras/api/layers/__init__.py @@ -145,6 +145,9 @@ from keras.src.layers.preprocessing.image_preprocessing.center_crop import ( CenterCrop, ) +from keras.src.layers.preprocessing.image_preprocessing.max_num_bounding_box import ( + MaxNumBoundingBoxes, +) from keras.src.layers.preprocessing.image_preprocessing.random_brightness import ( RandomBrightness, ) diff --git a/keras/api/utils/__init__.py b/keras/api/utils/__init__.py index 32bd17d960f2..0df452559c55 100644 --- a/keras/api/utils/__init__.py +++ b/keras/api/utils/__init__.py @@ -4,6 +4,7 @@ since your modifications would be overwritten. """ +from keras.api.utils import bounding_boxes from keras.api.utils import legacy from keras.src.backend.common.global_state import clear_session from keras.src.backend.common.keras_tensor import is_keras_tensor diff --git a/keras/api/utils/bounding_boxes/__init__.py b/keras/api/utils/bounding_boxes/__init__.py new file mode 100644 index 000000000000..7b16301c8971 --- /dev/null +++ b/keras/api/utils/bounding_boxes/__init__.py @@ -0,0 +1,21 @@ +"""DO NOT EDIT. + +This file was autogenerated. Do not edit it by hand, +since your modifications would be overwritten. +""" + +from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import ( + affine_transform, +) +from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import ( + clip_to_image_size, +) +from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import ( + convert_format, +) +from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import ( + crop, +) +from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import ( + pad, +) diff --git a/keras/src/layers/__init__.py b/keras/src/layers/__init__.py index 7c425cdf8136..985161bc86b2 100644 --- a/keras/src/layers/__init__.py +++ b/keras/src/layers/__init__.py @@ -88,6 +88,9 @@ from keras.src.layers.preprocessing.image_preprocessing.center_crop import ( CenterCrop, ) +from keras.src.layers.preprocessing.image_preprocessing.max_num_bounding_box import ( + MaxNumBoundingBoxes, +) from keras.src.layers.preprocessing.image_preprocessing.random_brightness import ( RandomBrightness, ) diff --git a/keras/src/layers/preprocessing/image_preprocessing/auto_contrast.py b/keras/src/layers/preprocessing/image_preprocessing/auto_contrast.py index 83077d9d5dc9..4d98a88fb379 100644 --- a/keras/src/layers/preprocessing/image_preprocessing/auto_contrast.py +++ b/keras/src/layers/preprocessing/image_preprocessing/auto_contrast.py @@ -88,7 +88,10 @@ def transform_labels(self, labels, transformation, training=True): return labels def transform_bounding_boxes( - self, bounding_boxes, transformation, training=True + self, + bounding_boxes, + transformation, + training=True, ): return bounding_boxes diff --git a/keras/src/layers/preprocessing/image_preprocessing/base_image_preprocessing_layer.py b/keras/src/layers/preprocessing/image_preprocessing/base_image_preprocessing_layer.py index c64f61ef15cc..88080621c9db 100644 --- a/keras/src/layers/preprocessing/image_preprocessing/base_image_preprocessing_layer.py +++ b/keras/src/layers/preprocessing/image_preprocessing/base_image_preprocessing_layer.py @@ -64,7 +64,10 @@ def transform_labels(self, labels, transformation, training=True): raise NotImplementedError() def transform_bounding_boxes( - self, bounding_boxes, transformation, training=True + self, + bounding_boxes, + transformation, + training=True, ): raise NotImplementedError() @@ -88,13 +91,19 @@ def transform_single_label(self, label, transformation, training=True): return self.backend.numpy.squeeze(outputs, axis=0) def transform_single_bounding_box( - self, bounding_box, transformation, training=True + self, + bounding_box, + transformation, + training=True, ): - bounding_boxes = self.backend.numpy.expand_dims(bounding_box, axis=0) + bounding_boxes = self._format_single_input_bounding_box(bounding_box) outputs = self.transform_bounding_boxes( - bounding_boxes, transformation=transformation, training=training + bounding_boxes, + transformation=transformation, + training=training, ) - return self.backend.numpy.squeeze(outputs, axis=0) + bounding_box = self._format_single_output_bounding_box(outputs) + return bounding_box def transform_single_segmentation_mask( self, segmentation_mask, transformation, training=True @@ -144,8 +153,11 @@ def call(self, data, training=True): "`bounding_box_format='xyxy'`." ) bounding_boxes = densify_bounding_boxes( - data["bounding_boxes"], backend=self.backend + data["bounding_boxes"], + is_batched=is_batched, + backend=self.backend, ) + if is_batched: data["bounding_boxes"] = self.transform_bounding_boxes( bounding_boxes, @@ -203,6 +215,32 @@ def call(self, data, training=True): training=training, ) + def _format_single_input_bounding_box(self, bounding_box): + for key in bounding_box: + if key == "labels": + bounding_box[key] = self.backend.numpy.expand_dims( + bounding_box[key], axis=0 + ) + if key == "boxes": + bounding_box[key] = self.backend.numpy.expand_dims( + bounding_box[key], axis=0 + ) + + return bounding_box + + def _format_single_output_bounding_box(self, bounding_boxes): + for key in bounding_boxes: + if key == "labels": + bounding_boxes[key] = self.backend.numpy.squeeze( + bounding_boxes[key], axis=0 + ) + if key == "boxes": + bounding_boxes[key] = self.backend.numpy.squeeze( + bounding_boxes[key], axis=0 + ) + + return bounding_boxes + def get_config(self): config = super().get_config() if self.bounding_box_format is not None: diff --git a/keras/src/layers/preprocessing/image_preprocessing/bounding_boxes/bounding_box.py b/keras/src/layers/preprocessing/image_preprocessing/bounding_boxes/bounding_box.py new file mode 100644 index 000000000000..0f26900c5397 --- /dev/null +++ b/keras/src/layers/preprocessing/image_preprocessing/bounding_boxes/bounding_box.py @@ -0,0 +1,519 @@ +import math + +from keras.src.utils import backend_utils + + +class BoundingBox: + def __init__(self): + self.backend = backend_utils.DynamicBackend() + + def convert_format( + self, + boxes, + source: str, + target: str, + height=None, + width=None, + dtype="float32", + ): + """Converts `boxes` from one format to another. + + Supported formats are: + + - `"xyxy"`, also known as `corners` format. In this format the first + four axes represent `[left, top, right, bottom]` in that order. + - `"rel_xyxy"`. In this format, the axes are the same as `"xyxy"` but + the x coordinates are normalized using the image width, and the y + axes the image height. All values in `rel_xyxy` are in the range + `(0, 1)`. + - `"xywh"`. In this format the first four axes represent + `[left, top, width, height]`. + - `"rel_xywh". In this format the first four axes represent + [left, top, width, height], just like `"xywh"`. Unlike `"xywh"`, + the values are in the range (0, 1) instead of absolute pixel values. + - `"center_xyWH"`. In this format the first two coordinates represent + the x and y coordinates of the center of the bounding box, while the + last two represent the width and height of the bounding box. + - `"center_yxHW"`. In this format the first two coordinates represent + the y and x coordinates of the center of the bounding box, while the + last two represent the height and width of the bounding box. + - `"yxyx"`. In this format the first four axes represent + [top, left, bottom, right] in that order. + - `"rel_yxyx"`. In this format, the axes are the same as `"yxyx"` but + the x coordinates are normalized using the image width, and the y + axes the image height. All values in `rel_yxyx` are in the range + (0, 1). + Formats are case insensitive. It is recommended that you capitalize + width and height to maximize the visual difference between `"xyWH"` + and `"xyxy"`. + + Relative formats, abbreviated `rel`, make use of the shapes of the + `images` passed. In these formats, the coordinates, widths, and heights + are all specified as percentages of the host image. + + Example: + + ```python + boxes = { + "boxes": [TODO], + "labels": [TODO], + } + boxes_in_xywh = keras.utils.bounding_boxes.convert_format( + boxes, + source='xyxy', + target='xyWH' + ) + ``` + + Args: + boxes: tensor representing bounding boxes in the format specified in + the `source` parameter. `boxes` can optionally have extra + dimensions stacked on the final axis to store metadata. boxes + should be a 3D tensor, with the shape + `[batch_size, num_boxes, 4]`. Alternatively, boxes can be a + dictionary with key 'boxes' containing a tensor matching the + aforementioned spec. + source:One of `"xyxy"`, `"yxyx"`, `"xywh"`, `"center_xywh"`, + `"center_yxhw"`, `"rel_xyxy"`, "rel_yxyx", "rel_xywh", + "rel_center_xywh". Used to specify the original format of the + `boxes` parameter. + target:One of `"xyxy"`, `"yxyx"`, `"xywh"`, `"center_xywh"`, + `"center_yxhw"`, `"rel_xyxy"`, "rel_yxyx", "rel_xywh", + "rel_center_xywh". Used to specify the destination format of + the `boxes` parameter. + images: (Optional) a batch of images aligned with `boxes` on the + first axis. Should be at least 3 dimensions, with the first 3 + dimensions representing: `[batch_size, height, width]`. Used in + some converters to compute relative pixel values of the bounding + box dimensions. Required when transforming from a rel format to + a non-rel format. + dtype: the data type to use when transforming the boxes, defaults to + `"float32"`. + """ + if isinstance(boxes, dict): + boxes["boxes"] = self.convert_format( + boxes["boxes"], + source=source, + target=target, + height=height, + width=width, + dtype=dtype, + ) + return boxes + + to_xyxy_converters = { + "xyxy": self._xyxy_to_xyxy, + "yxyx": self._yxyx_to_xyxy, + "xywh": self._xywh_to_xyxy, + "center_xywh": self._center_xywh_to_xyxy, + "center_yxhw": self._center_yxhw_to_xyxy, + "rel_xyxy": self._rel_xyxy_to_xyxy, + "rel_yxyx": self._rel_yxyx_to_xyxy, + "rel_xywh": self._rel_xywh_to_xyxy, + "rel_center_xywh": self._rel_center_xywh_to_xyxy, + } + from_xyxy_converters = { + "xyxy": self._xyxy_to_xyxy, + "yxyx": self._xyxy_to_yxyx, + "xywh": self._xyxy_to_xywh, + "center_xywh": self._xyxy_to_center_xywh, + "center_yxhw": self._xyxy_to_center_yxhw, + "rel_xyxy": self._xyxy_to_rel_xyxy, + "rel_yxyx": self._xyxy_to_rel_yxyx, + "rel_xywh": self._xyxy_to_rel_xywh, + "rel_center_xywh": self._xyxy_to_rel_center_xywh, + } + + ops = self.backend + boxes_shape = ops.shape(boxes) + if boxes_shape[-1] != 4: + raise ValueError( + "`boxes` must be a tensor with the last dimension of 4. " + f"Received: boxes.shape={boxes_shape}" + ) + source = source.lower() + target = target.lower() + if source not in to_xyxy_converters.keys(): + raise ValueError( + f"Available source: {list(to_xyxy_converters.keys())}. " + f"Received: source={source}" + ) + if target not in from_xyxy_converters.keys(): + raise ValueError( + f"Available target: {list(from_xyxy_converters.keys())}. " + f"Received: target={target}" + ) + boxes = ops.cast(boxes, dtype) + if source == target: + return boxes + if height is not None: + height = ops.cast(height, dtype) + if width is not None: + width = ops.cast(width, dtype) + + if source.startswith("rel_") and target.startswith("rel_"): + source = source.replace("rel_", "", 1) + target = target.replace("rel_", "", 1) + to_xyxy_converter = to_xyxy_converters[source] + from_xyxy_converter = from_xyxy_converters[target] + in_xyxy_boxes = to_xyxy_converter(boxes, height, width) + return from_xyxy_converter(in_xyxy_boxes, height, width) + + def clip_to_image_size( + self, bounding_boxes, height=None, width=None, format="xyxy" + ): + if format not in ("xyxy", "rel_xyxy"): + raise NotImplementedError + if format == "xyxy" and (height is None or width is None): + raise ValueError( + "`height` and `width` must be set if `format='xyxy'`." + ) + + ops = self.backend + boxes, labels = bounding_boxes["boxes"], bounding_boxes["labels"] + + if format == "xyxy": + x1, y1, x2, y2 = ops.numpy.split(boxes, 4, axis=-1) + x1 = ops.numpy.clip(x1, 0, width) + y1 = ops.numpy.clip(y1, 0, height) + x2 = ops.numpy.clip(x2, 0, width) + y2 = ops.numpy.clip(y2, 0, height) + boxes = ops.numpy.concatenate([x1, y1, x2, y2], axis=-1) + + areas = self._compute_area(boxes) + areas = ops.numpy.squeeze(areas, axis=-1) + labels = ops.numpy.where(areas > 0, labels, -1) + elif format == "rel_xyxy": + x1, y1, x2, y2 = ops.numpy.split(boxes, 4, axis=-1) + x1 = ops.numpy.clip(x1, 0.0, 1.0) + y1 = ops.numpy.clip(y1, 0.0, 1.0) + x2 = ops.numpy.clip(x2, 0.0, 1.0) + y2 = ops.numpy.clip(y2, 0.0, 1.0) + boxes = ops.numpy.concatenate([x1, y1, x2, y2], axis=-1) + + areas = self._compute_area(boxes) + areas = ops.numpy.squeeze(areas, axis=-1) + labels = ops.numpy.where(areas > 0, labels, -1) + + result = bounding_boxes.copy() + result["boxes"] = boxes + result["labels"] = labels + return result + + def affine( + self, + boxes, + angle, + translate_x, + translate_y, + scale, + shear_x, + shear_y, + height, + width, + center_x=None, + center_y=None, + ): + ops = self.backend + + boxes_shape = ops.shape(boxes) + batch_size = boxes_shape[0] + n_boxes = boxes_shape[1] + if center_x is None: + center_x = 0.5 + if center_y is None: + center_y = 0.5 + + matrix = self._compute_inverse_affine_matrix( + center_x, + center_y, + angle, + translate_x, + translate_y, + scale, + shear_x, + shear_y, + height, + width, + ) + transposed_matrix = ops.numpy.transpose(matrix[:, :2, :], [0, 2, 1]) + points = boxes # [B, N, 4] + points = ops.numpy.stack( + [ + points[..., 0], + points[..., 1], + points[..., 2], + points[..., 1], + points[..., 2], + points[..., 3], + points[..., 0], + points[..., 3], + ], + axis=-1, + ) + points = ops.numpy.reshape(points, [batch_size, n_boxes, 4, 2]) + points = ops.numpy.concatenate( + [ + points, + ops.numpy.ones([batch_size, n_boxes, 4, 1], points.dtype), + ], + axis=-1, + ) + transformed_points = ops.numpy.einsum( + "bnxy,byz->bnxz", points, transposed_matrix + ) + boxes_min = ops.numpy.amin(transformed_points, axis=2) + boxes_max = ops.numpy.amax(transformed_points, axis=2) + outputs = ops.numpy.concatenate([boxes_min, boxes_max], axis=-1) + return outputs + + def crop(self, boxes, top, left, height, width): + ops = self.backend + + x1, y1, x2, y2 = ops.numpy.split(boxes, 4, axis=-1) + x1 = x1 - left + y1 = y1 - top + x2 = x2 - left + y2 = y2 - top + x1 = ops.numpy.clip(x1, 0, width) + y1 = ops.numpy.clip(y1, 0, height) + x2 = ops.numpy.clip(x2, 0, width) + y2 = ops.numpy.clip(y2, 0, height) + outputs = ops.numpy.concatenate([x1, y1, x2, y2], axis=-1) + return outputs + + def pad(self, boxes, top, left): + ops = self.backend + + x1, y1, x2, y2 = ops.numpy.split(boxes, 4, axis=-1) + x1 = x1 + left + y1 = y1 + top + x2 = x2 + left + y2 = y2 + top + outputs = ops.numpy.concatenate([x1, y1, x2, y2], axis=-1) + return outputs + + # Converters + + def _xyxy_to_xyxy(self, boxes, height=None, width=None): + return boxes + + def _yxyx_to_xyxy(self, boxes, height=None, width=None): + y1, x1, y2, x2 = self.backend.numpy.split(boxes, 4, axis=-1) + return self.backend.numpy.concatenate([x1, y1, x2, y2], axis=-1) + + def _xywh_to_xyxy(self, boxes, height=None, width=None): + x1, y1, w, h = self.backend.numpy.split(boxes, 4, axis=-1) + x2 = x1 + w + y2 = y1 + h + return self.backend.numpy.concatenate([x1, y1, x2, y2], axis=-1) + + def _center_xywh_to_xyxy(self, boxes, height=None, width=None): + ops = self.backend + cx, cy, w, h = ops.numpy.split(boxes, 4, axis=-1) + half_w = w / 2.0 + half_h = h / 2.0 + x1 = cx - half_w + y1 = cy - half_h + x2 = cx + half_w + y2 = cy + half_h + return self.backend.numpy.concatenate([x1, y1, x2, y2], axis=-1) + + def _center_yxhw_to_xyxy(self, boxes, height=None, width=None): + ops = self.backend + cy, cx, h, w = ops.numpy.split(boxes, 4, axis=-1) + half_w = w / 2.0 + half_h = h / 2.0 + x1 = cx - half_w + y1 = cy - half_h + x2 = cx + half_w + y2 = cy + half_h + return self.backend.numpy.concatenate([x1, y1, x2, y2], axis=-1) + + def _rel_xyxy_to_xyxy(self, boxes, height=None, width=None): + ops = self.backend + rel_x1, rel_y1, rel_x2, rel_y2 = ops.numpy.split(boxes, 4, axis=-1) + x1 = rel_x1 * width + y1 = rel_y1 * height + x2 = rel_x2 * width + y2 = rel_y2 * height + return self.backend.numpy.concatenate([x1, y1, x2, y2], axis=-1) + + def _rel_yxyx_to_xyxy(self, boxes, height=None, width=None): + ops = self.backend + rel_y1, rel_x1, rel_y2, rel_x2 = ops.numpy.split(boxes, 4, axis=-1) + x1 = rel_x1 * width + y1 = rel_y1 * height + x2 = rel_x2 * width + y2 = rel_y2 * height + return self.backend.numpy.concatenate([x1, y1, x2, y2], axis=-1) + + def _rel_xywh_to_xyxy(self, boxes, height=None, width=None): + ops = self.backend + rel_x1, rel_y1, rel_w, rel_h = ops.numpy.split(boxes, 4, axis=-1) + x1 = rel_x1 * width + y1 = rel_y1 * height + x2 = (rel_x1 + rel_w) * width + y2 = (rel_y1 + rel_h) * height + return self.backend.numpy.concatenate([x1, y1, x2, y2], axis=-1) + + def _rel_center_xywh_to_xyxy(self, boxes, height=None, width=None): + ops = self.backend + rel_cx, rel_cy, rel_w, rel_h = ops.numpy.split(boxes, 4, axis=-1) + half_rel_w = rel_w / 2.0 + half_rel_h = rel_h / 2.0 + x1 = (rel_cx - half_rel_w) * height + y1 = (rel_cy - half_rel_h) * width + x2 = (rel_cx + half_rel_w) * height + y2 = (rel_cy + half_rel_h) * width + return self.backend.numpy.concatenate([x1, y1, x2, y2], axis=-1) + + def _xyxy_to_yxyx(self, boxes, height=None, width=None): + x1, y1, x2, y2 = self.backend.numpy.split(boxes, 4, axis=-1) + return self.backend.numpy.concatenate([y1, x1, y2, x2], axis=-1) + + def _xyxy_to_xywh(self, boxes, height=None, width=None): + x1, y1, x2, y2 = self.backend.numpy.split(boxes, 4, axis=-1) + w = x2 - x1 + h = y2 - y1 + return self.backend.numpy.concatenate([x1, y1, w, h], axis=-1) + + def _xyxy_to_center_xywh(self, boxes, height=None, width=None): + x1, y1, x2, y2 = self.backend.numpy.split(boxes, 4, axis=-1) + cx = x1 + ((x2 - x1) / 2.0) + cy = y1 + ((y2 - y1) / 2.0) + w = x2 - x1 + h = y2 - y1 + return self.backend.numpy.concatenate([cx, cy, w, h], axis=-1) + + def _xyxy_to_center_yxhw(self, boxes, height=None, width=None): + x1, y1, x2, y2 = self.backend.numpy.split(boxes, 4, axis=-1) + cx = x1 + ((x2 - x1) / 2.0) + cy = y1 + ((y2 - y1) / 2.0) + w = x2 - x1 + h = y2 - y1 + return self.backend.numpy.concatenate([cy, cx, h, w], axis=-1) + + def _xyxy_to_rel_xyxy(self, boxes, height=None, width=None): + x1, y1, x2, y2 = self.backend.numpy.split(boxes, 4, axis=-1) + rel_x1 = self.backend.numpy.divide(x1, width) + rel_y1 = self.backend.numpy.divide(y1, height) + rel_x2 = self.backend.numpy.divide(x2, width) + rel_y2 = self.backend.numpy.divide(y2, height) + return self.backend.numpy.concatenate( + [rel_x1, rel_y1, rel_x2, rel_y2], axis=-1 + ) + + def _xyxy_to_rel_yxyx(self, boxes, height=None, width=None): + x1, y1, x2, y2 = self.backend.numpy.split(boxes, 4, axis=-1) + rel_x1 = self.backend.numpy.divide(x1, width) + rel_y1 = self.backend.numpy.divide(y1, height) + rel_x2 = self.backend.numpy.divide(x2, width) + rel_y2 = self.backend.numpy.divide(y2, height) + return self.backend.numpy.concatenate( + [rel_y1, rel_x1, rel_y2, rel_x2], axis=-1 + ) + + def _xyxy_to_rel_xywh(self, boxes, height=None, width=None): + x1, y1, x2, y2 = self.backend.numpy.split(boxes, 4, axis=-1) + rel_x1 = x1 / width + rel_y1 = y1 / height + rel_w = (x2 - x1) / width + rel_h = (y2 - y1) / height + return self.backend.numpy.concatenate( + [rel_x1, rel_y1, rel_w, rel_h], axis=-1 + ) + + def _xyxy_to_rel_center_xywh(self, boxes, height=None, width=None): + x1, y1, x2, y2 = self.backend.numpy.split(boxes, 4, axis=-1) + rel_cx = (x1 + ((x2 - x1) / 2.0)) / width + rel_cy = (y1 + ((y2 - y1) / 2.0)) / height + rel_w = (x2 - x1) / width + rel_h = (y2 - y1) / height + return self.backend.numpy.concatenate( + [rel_cx, rel_cy, rel_w, rel_h], axis=-1 + ) + + # Clip + def _compute_area(self, boxes, format="xyxy"): + if format not in ("xyxy", "rel_xyxy"): + raise NotImplementedError + + ops = self.backend + x1, y1, x2, y2 = ops.numpy.split(boxes, 4, axis=-1) + widths = x2 - x1 + heights = y2 - y1 + return widths * heights + + # Affine + def _compute_inverse_affine_matrix( + self, + center_x, + center_y, + angle, + translate_x, + translate_y, + scale, + shear_x, + shear_y, + height, + width, + ): + # Ref: TF._geometry._get_inverse_affine_matrix + ops = self.backend + batch_size = ops.shape(angle)[0] + dtype = angle.dtype + width = ops.cast(width, dtype) + height = ops.cast(height, dtype) + + angle = -angle + shear_x = -shear_x + shear_y = -shear_y + + cx = center_x * width + cy = center_y * height + rot = ops.numpy.multiply(angle, 1.0 / 180.0 * math.pi) + tx = -translate_x * width + ty = -translate_y * height + sx = ops.numpy.multiply(shear_x, 1.0 / 180.0 * math.pi) + sy = ops.numpy.multiply(shear_y, 1.0 / 180.0 * math.pi) + + # Cached results + cos_sy = ops.numpy.cos(sy) + tan_sx = ops.numpy.tan(sx) + rot_minus_sy = rot - sy + cx_plus_tx = cx + tx + cy_plus_ty = cy + ty + + # Rotate Scale Shear (RSS) without scaling + a = ops.numpy.cos(rot_minus_sy) / cos_sy + b = -(a * tan_sx + ops.numpy.sin(rot)) + c = ops.numpy.sin(rot_minus_sy) / cos_sy + d = ops.numpy.cos(rot) - c * tan_sx + + # Inverted rotation matrix with scale and shear + # det([[a, b], [c, d]]) == 1, since det(rotation) = 1 and det(shear) = 1 + a0 = d * scale + a1 = -b * scale + b0 = -c * scale + b1 = a * scale + a2 = cx - a0 * cx_plus_tx - a1 * cy_plus_ty + b2 = cy - b0 * cx_plus_tx - b1 * cy_plus_ty + + # Shape of matrix: [[batch_size], ...] -> [batch_size, 6] + matrix = ops.numpy.stack( + [ + a0, + a1, + a2, + b0, + b1, + b2, + ops.numpy.zeros([batch_size], dtype), + ops.numpy.zeros([batch_size], dtype), + ops.numpy.ones([batch_size], dtype), + ], + axis=-1, + ) + matrix = ops.numpy.reshape(matrix, [batch_size, 3, 3]) + return matrix diff --git a/keras/src/layers/preprocessing/image_preprocessing/bounding_boxes/converters.py b/keras/src/layers/preprocessing/image_preprocessing/bounding_boxes/converters.py index 02fd23813c5e..6e34c37ef2ec 100644 --- a/keras/src/layers/preprocessing/image_preprocessing/bounding_boxes/converters.py +++ b/keras/src/layers/preprocessing/image_preprocessing/bounding_boxes/converters.py @@ -1,417 +1,104 @@ -"""Converter functions for working with bounding box formats.""" - -from keras.src import ops -from keras.src.utils import tf_utils - - -# Internal exception to propagate the fact images was not passed to a converter -# that needs it. -class RequiresImagesException(Exception): - pass - - -ALL_AXES = 4 - - -def _center_yxhw_to_xyxy(boxes, images=None, image_shape=None): - y, x, height, width = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate( - [x - width / 2.0, y - height / 2.0, x + width / 2.0, y + height / 2.0], - axis=-1, - ) - - -def _center_xywh_to_xyxy(boxes, images=None, image_shape=None): - x, y, width, height = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate( - [x - width / 2.0, y - height / 2.0, x + width / 2.0, y + height / 2.0], - axis=-1, - ) - - -def _xywh_to_xyxy(boxes, images=None, image_shape=None): - x, y, width, height = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate([x, y, x + width, y + height], axis=-1) - - -def _xyxy_to_center_yxhw(boxes, images=None, image_shape=None): - left, top, right, bottom = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate( - [ - (top + bottom) / 2.0, - (left + right) / 2.0, - bottom - top, - right - left, - ], - axis=-1, - ) - - -def _rel_xywh_to_xyxy(boxes, images=None, image_shape=None): - image_height, image_width = _image_shape(images, image_shape, boxes) - x, y, width, height = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate( - [ - image_width * x, - image_height * y, - image_width * (x + width), - image_height * (y + height), - ], - axis=-1, - ) - - -def _xyxy_no_op(boxes, images=None, image_shape=None): - return boxes - - -def _xyxy_to_xywh(boxes, images=None, image_shape=None): - left, top, right, bottom = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate( - [left, top, right - left, bottom - top], - axis=-1, - ) - - -def _xyxy_to_rel_xywh(boxes, images=None, image_shape=None): - image_height, image_width = _image_shape(images, image_shape, boxes) - left, top, right, bottom = ops.split(boxes, ALL_AXES, axis=-1) - left, right = ( - left / image_width, - right / image_width, - ) - top, bottom = top / image_height, bottom / image_height - return ops.concatenate( - [left, top, right - left, bottom - top], - axis=-1, - ) - - -def _xyxy_to_center_xywh(boxes, images=None, image_shape=None): - left, top, right, bottom = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate( - [ - (left + right) / 2.0, - (top + bottom) / 2.0, - right - left, - bottom - top, - ], - axis=-1, - ) - - -def _rel_xyxy_to_xyxy(boxes, images=None, image_shape=None): - image_height, image_width = _image_shape(images, image_shape, boxes) - left, top, right, bottom = ops.split( - boxes, - ALL_AXES, - axis=-1, - ) - left, right = left * image_width, right * image_width - top, bottom = top * image_height, bottom * image_height - return ops.concatenate( - [left, top, right, bottom], - axis=-1, - ) - - -def _xyxy_to_rel_xyxy(boxes, images=None, image_shape=None): - image_height, image_width = _image_shape(images, image_shape, boxes) - left, top, right, bottom = ops.split( - boxes, - ALL_AXES, - axis=-1, - ) - left, right = left / image_width, right / image_width - top, bottom = top / image_height, bottom / image_height - return ops.concatenate( - [left, top, right, bottom], - axis=-1, - ) - - -def _yxyx_to_xyxy(boxes, images=None, image_shape=None): - y1, x1, y2, x2 = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate([x1, y1, x2, y2], axis=-1) - - -def _rel_yxyx_to_xyxy(boxes, images=None, image_shape=None): - image_height, image_width = _image_shape(images, image_shape, boxes) - top, left, bottom, right = ops.split( - boxes, - ALL_AXES, - axis=-1, - ) - left, right = left * image_width, right * image_width - top, bottom = top * image_height, bottom * image_height - return ops.concatenate( - [left, top, right, bottom], - axis=-1, - ) - - -def _xyxy_to_yxyx(boxes, images=None, image_shape=None): - x1, y1, x2, y2 = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate([y1, x1, y2, x2], axis=-1) - - -def _xyxy_to_rel_yxyx(boxes, images=None, image_shape=None): - image_height, image_width = _image_shape(images, image_shape, boxes) - left, top, right, bottom = ops.split(boxes, ALL_AXES, axis=-1) - left, right = left / image_width, right / image_width - top, bottom = top / image_height, bottom / image_height - return ops.concatenate( - [top, left, bottom, right], - axis=-1, - ) - - -TO_XYXY_CONVERTERS = { - "xywh": _xywh_to_xyxy, - "center_xywh": _center_xywh_to_xyxy, - "center_yxhw": _center_yxhw_to_xyxy, - "rel_xywh": _rel_xywh_to_xyxy, - "xyxy": _xyxy_no_op, - "rel_xyxy": _rel_xyxy_to_xyxy, - "yxyx": _yxyx_to_xyxy, - "rel_yxyx": _rel_yxyx_to_xyxy, -} - -FROM_XYXY_CONVERTERS = { - "xywh": _xyxy_to_xywh, - "center_xywh": _xyxy_to_center_xywh, - "center_yxhw": _xyxy_to_center_yxhw, - "rel_xywh": _xyxy_to_rel_xywh, - "xyxy": _xyxy_no_op, - "rel_xyxy": _xyxy_to_rel_xyxy, - "yxyx": _xyxy_to_yxyx, - "rel_yxyx": _xyxy_to_rel_yxyx, -} +from keras.src.api_export import keras_export +from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.bounding_box import ( # noqa: E501 + BoundingBox, +) +from keras.src.utils import backend_utils +@keras_export("keras.utils.bounding_boxes.convert_format") def convert_format( - boxes, source, target, images=None, image_shape=None, dtype="float32" + boxes, source, target, height=None, width=None, dtype="float32" ): - f"""Converts bounding_boxes from one format to another. - - Supported formats are: - - - `"xyxy"`, also known as `corners` format. In this format the first four - axes represent `[left, top, right, bottom]` in that order. - - `"rel_xyxy"`. In this format, the axes are the same as `"xyxy"` but the x - coordinates are normalized using the image width, and the y axes the - image height. All values in `rel_xyxy` are in the range `(0, 1)`. - - `"xywh"`. In this format the first four axes represent - `[left, top, width, height]`. - - `"rel_xywh". In this format the first four axes represent - [left, top, width, height], just like `"xywh"`. Unlike `"xywh"`, the - values are in the range (0, 1) instead of absolute pixel values. - - `"center_xyWH"`. In this format the first two coordinates represent the x - and y coordinates of the center of the bounding box, while the last two - represent the width and height of the bounding box. - - `"center_yxHW"`. In this format the first two coordinates represent the y - and x coordinates of the center of the bounding box, while the last two - represent the height and width of the bounding box. - - `"yxyx"`. In this format the first four axes represent - [top, left, bottom, right] in that order. - - `"rel_yxyx"`. In this format, the axes are the same as `"yxyx"` but the x - coordinates are normalized using the image width, and the y axes the - image height. All values in `rel_yxyx` are in the range (0, 1). - Formats are case insensitive. It is recommended that you capitalize width - and height to maximize the visual difference between `"xyWH"` and `"xyxy"`. - - Relative formats, abbreviated `rel`, make use of the shapes of the `images` - passed. In these formats, the coordinates, widths, and heights are all - specified as percentages of the host image. - - Example: - - ```python - boxes = { - "boxes": [TODO], - "labels": [TODO], - } - boxes_in_xywh = keras.utils.bounding_boxes.convert_format( - boxes, - source='xyxy', - target='xyWH' - ) - ``` - - Args: - boxes: tensor representing bounding boxes in the format specified in - the `source` parameter. `boxes` can optionally have extra - dimensions stacked on the final axis to store metadata. boxes - should be a 3D tensor, with the shape `[batch_size, num_boxes, 4]`. - Alternatively, boxes can be a dictionary with key 'boxes' containing - a tensor matching the aforementioned spec. - source:One of {" ".join([f'"{f}"' for f in TO_XYXY_CONVERTERS.keys()])}. - Used to specify the original format of the `boxes` parameter. - target:One of {" ".join([f'"{f}"' for f in TO_XYXY_CONVERTERS.keys()])}. - Used to specify the destination format of the `boxes` parameter. - images: (Optional) a batch of images aligned with `boxes` on the first - axis. Should be at least 3 dimensions, with the first 3 dimensions - representing: `[batch_size, height, width]`. Used in some - converters to compute relative pixel values of the bounding box - dimensions. Required when transforming from a rel format to a - non-rel format. - dtype: the data type to use when transforming the boxes, defaults to - `"float32"`. - """ - if isinstance(boxes, dict): - converted_boxes = boxes.copy() - converted_boxes["boxes"] = convert_format( - boxes["boxes"], - source=source, - target=target, - images=images, - image_shape=image_shape, - dtype=dtype, - ) - return converted_boxes - - if boxes.shape[-1] is not None and boxes.shape[-1] != 4: - raise ValueError( - "Expected `boxes` to be a Tensor with a final dimension of " - f"`4`. Instead, got `boxes.shape={boxes.shape}`." - ) - if images is not None and image_shape is not None: - raise ValueError( - "convert_format() expects either `images` or `image_shape`, but " - f"not both. Received images={images} image_shape={image_shape}" - ) - - _validate_image_shape(image_shape) - - source = source.lower() - target = target.lower() - if source not in TO_XYXY_CONVERTERS: - raise ValueError( - "`convert_format()` received an unsupported format for the " - "argument `source`. `source` should be one of " - f"{TO_XYXY_CONVERTERS.keys()}. Got source={source}" - ) - if target not in FROM_XYXY_CONVERTERS: - raise ValueError( - "`convert_format()` received an unsupported format for the " - "argument `target`. `target` should be one of " - f"{FROM_XYXY_CONVERTERS.keys()}. Got target={target}" - ) - - boxes = ops.cast(boxes, dtype) - if source == target: - return boxes - - # rel->rel conversions should not require images - if source.startswith("rel") and target.startswith("rel"): - source = source.replace("rel_", "", 1) - target = target.replace("rel_", "", 1) - - boxes, images, squeeze = _format_inputs(boxes, images) - to_xyxy_fn = TO_XYXY_CONVERTERS[source] - from_xyxy_fn = FROM_XYXY_CONVERTERS[target] - - try: - in_xyxy = to_xyxy_fn(boxes, images=images, image_shape=image_shape) - result = from_xyxy_fn(in_xyxy, images=images, image_shape=image_shape) - except RequiresImagesException: - raise ValueError( - "convert_format() must receive `images` or `image_shape` when " - "transforming between relative and absolute formats." - f"convert_format() received source=`{format}`, target=`{format}, " - f"but images={images} and image_shape={image_shape}." - ) - - return _format_outputs(result, squeeze) - - -def _format_inputs(boxes, images): - boxes_rank = len(boxes.shape) - if boxes_rank > 3: - raise ValueError( - "Expected len(boxes.shape)=2, or len(boxes.shape)=3, got " - f"len(boxes.shape)={boxes_rank}" - ) - boxes_includes_batch = boxes_rank == 3 - # Determine if images needs an expand_dims() call - if images is not None: - images_rank = len(images.shape) - if images_rank > 4: - raise ValueError( - "Expected len(images.shape)=2, or len(images.shape)=3, got " - f"len(images.shape)={images_rank}" - ) - images_include_batch = images_rank == 4 - if boxes_includes_batch != images_include_batch: - raise ValueError( - "convert_format() expects both boxes and images to be batched, " - "or both boxes and images to be unbatched. Received " - f"len(boxes.shape)={boxes_rank}, " - f"len(images.shape)={images_rank}. Expected either " - "len(boxes.shape)=2 AND len(images.shape)=3, or " - "len(boxes.shape)=3 AND len(images.shape)=4." - ) - if not images_include_batch: - images = ops.expand_dims(images, axis=0) - - if not boxes_includes_batch: - return ops.expand_dims(boxes, axis=0), images, True - return boxes, images, False - - -def _validate_image_shape(image_shape): - # Escape early if image_shape is None and skip validation. - if image_shape is None: - return - # tuple/list - if isinstance(image_shape, (tuple, list)): - if len(image_shape) != 3: - raise ValueError( - "image_shape should be of length 3, but got " - f"image_shape={image_shape}" - ) - return - - # tensor - if ops.is_tensor(image_shape): - if len(image_shape.shape) > 1: - raise ValueError( - "image_shape.shape should be (3), but got " - f"image_shape.shape={image_shape.shape}" - ) - if image_shape.shape[0] != 3: - raise ValueError( - "image_shape.shape should be (3), but got " - f"image_shape.shape={image_shape.shape}" - ) - return - - # Warn about failure cases - raise ValueError( - "Expected image_shape to be either a tuple, list, Tensor. " - f"Received image_shape={image_shape}" - ) - - -def _format_outputs(boxes, squeeze): - if squeeze: - return ops.squeeze(boxes, axis=0) + # Switch to tensorflow backend if we are in tf.data pipe + box_utils = BoundingBox() + if backend_utils.in_tf_graph(): + box_utils.backend.set_backend("tensorflow") + boxes = box_utils.convert_format( + boxes=boxes, + source=source, + target=target, + height=height, + width=width, + dtype=dtype, + ) + # Switch back to original backend + box_utils.backend.reset() return boxes -def _image_shape(images, image_shape, boxes): - if images is None and image_shape is None: - raise RequiresImagesException() - - if image_shape is None: - if not tf_utils.is_ragged_tensor(images): - image_shape = ops.shape(images) - height, width = image_shape[1], image_shape[2] - else: - height = ops.reshape(images.row_lengths(), (-1, 1)) - width = ops.reshape(ops.max(images.row_lengths(axis=2), 1), (-1, 1)) - height = ops.expand_dims(height, axis=-1) - width = ops.expand_dims(width, axis=-1) - else: - height, width = image_shape[0], image_shape[1] - return ops.cast(height, boxes.dtype), ops.cast(width, boxes.dtype) +@keras_export("keras.utils.bounding_boxes.clip_to_image_size") +def clip_to_image_size(bounding_boxes, height=None, width=None, format="xyxy"): + # Switch to tensorflow backend if we are in tf.data pipe + + box_utils = BoundingBox() + if backend_utils.in_tf_graph(): + box_utils.backend.set_backend("tensorflow") + bounding_boxes = box_utils.clip_to_image_size( + bounding_boxes, height=height, width=width, format=format + ) + # Switch back to original backend + box_utils.backend.reset() + return bounding_boxes + + +@keras_export("keras.utils.bounding_boxes.affine_transform") +def affine_transform( + boxes, + angle, + translate_x, + translate_y, + scale, + shear_x, + shear_y, + height, + width, + center_x=None, + center_y=None, + format="xyxy", +): + if format != "xyxy": + raise NotImplementedError + # Switch to tensorflow backend if we are in tf.data pipe + box_utils = BoundingBox() + if backend_utils.in_tf_graph(): + box_utils.backend.set_backend("tensorflow") + outputs = box_utils.affine( + boxes, + angle, + translate_x, + translate_y, + scale, + shear_x, + shear_y, + height, + width, + center_x=center_x, + center_y=center_y, + ) + box_utils.backend.reset() + return outputs + + +@keras_export("keras.utils.bounding_boxes.crop") +def crop(boxes, top, left, height, width, format="xyxy"): + if format != "xyxy": + raise NotImplementedError + box_utils = BoundingBox() + if backend_utils.in_tf_graph(): + box_utils.backend.set_backend("tensorflow") + outputs = box_utils.crop(boxes, top, left, height, width) + box_utils.backend.reset() + return outputs + + +@keras_export("keras.utils.bounding_boxes.pad") +def pad(boxes, top, left, format="xyxy"): + if format != "xyxy": + raise NotImplementedError + box_utils = BoundingBox() + if backend_utils.in_tf_graph(): + box_utils.backend.set_backend("tensorflow") + + outputs = box_utils.pad(boxes, top, left) + box_utils.backend.reset() + return outputs diff --git a/keras/src/layers/preprocessing/image_preprocessing/bounding_boxes/validation.py b/keras/src/layers/preprocessing/image_preprocessing/bounding_boxes/validation.py index 11772edf08ca..f73170189122 100644 --- a/keras/src/layers/preprocessing/image_preprocessing/bounding_boxes/validation.py +++ b/keras/src/layers/preprocessing/image_preprocessing/bounding_boxes/validation.py @@ -2,8 +2,28 @@ from keras.src.utils import tf_utils +def _classes_shape(batched, classes_shape, max_boxes): + if max_boxes is None: + return None + if batched: + return [None, max_boxes] + classes_shape[2:] + return [max_boxes] + classes_shape[2:] + + +def _box_shape(batched, boxes_shape, max_boxes): + # ensure we dont drop the final axis in RaggedTensor mode + if max_boxes is None: + shape = list(boxes_shape) + shape[-1] = 4 + return shape + if batched: + return [None, max_boxes, 4] + return [max_boxes, 4] + + def densify_bounding_boxes( bounding_boxes, + is_batched=False, max_boxes=None, boxes_default_value=0, labels_default_value=-1, @@ -78,15 +98,19 @@ def densify_bounding_boxes( if tf_utils.is_ragged_tensor(boxes): bounding_boxes["boxes"] = bounding_boxes["boxes"].to_tensor( default_value=boxes_default_value, - shape="TODO", + shape=_classes_shape( + is_batched, bounding_boxes["boxes"].shape, max_boxes + ), ) bounding_boxes["labels"] = bounding_boxes["labels"].to_tensor( default_value=labels_default_value, - shape="TODO", + shape=_box_shape( + is_batched, bounding_boxes["labels"].shape, max_boxes + ), ) return bounding_boxes - bounding_boxes["boxes"] = backend.convert_to_tensor(boxes, dtype="int32") + bounding_boxes["boxes"] = backend.convert_to_tensor(boxes, dtype="float32") bounding_boxes["labels"] = backend.convert_to_tensor(labels) return bounding_boxes diff --git a/keras/src/layers/preprocessing/image_preprocessing/max_num_bounding_box.py b/keras/src/layers/preprocessing/image_preprocessing/max_num_bounding_box.py new file mode 100644 index 000000000000..dff68d0f3d01 --- /dev/null +++ b/keras/src/layers/preprocessing/image_preprocessing/max_num_bounding_box.py @@ -0,0 +1,89 @@ +from keras.src.api_export import keras_export +from keras.src.layers.preprocessing.image_preprocessing.base_image_preprocessing_layer import ( # noqa: E501 + BaseImagePreprocessingLayer, +) + + +@keras_export("keras.layers.MaxNumBoundingBoxes") +class MaxNumBoundingBoxes(BaseImagePreprocessingLayer): + """Ensure the maximum number of bounding boxes. + + Args: + max_number: Desired output number of bounding boxes. + padding_value: The padding value of the `boxes` and `labels` in + `bounding_boxes`. Defaults to `-1`. + """ + + def __init__(self, max_number, fill_value=-1, **kwargs): + super().__init__(**kwargs) + self.max_number = int(max_number) + self.fill_value = int(fill_value) + + def transform_images(self, images, transformation=None, training=True): + return images + + def transform_labels(self, labels, transformation=None, training=True): + return labels + + def transform_bounding_boxes( + self, bounding_boxes, transformation, training=True + ): + ops = self.backend + boxes = bounding_boxes["boxes"] + labels = bounding_boxes["labels"] + boxes_shape = ops.shape(boxes) + batch_size = boxes_shape[0] + num_boxes = boxes_shape[1] + + # Get pad size + pad_size = ops.numpy.maximum( + ops.numpy.subtract(self.max_number, num_boxes), 0 + ) + boxes = boxes[:, : self.max_number, ...] + boxes = ops.numpy.pad( + boxes, + [[0, 0], [0, pad_size], [0, 0]], + constant_values=self.fill_value, + ) + labels = labels[:, : self.max_number] + labels = ops.numpy.pad( + labels, [[0, 0], [0, pad_size]], constant_values=self.fill_value + ) + + # Ensure shape + boxes = ops.numpy.reshape(boxes, [batch_size, self.max_number, 4]) + labels = ops.numpy.reshape(labels, [batch_size, self.max_number]) + + bounding_boxes = bounding_boxes.copy() + bounding_boxes["boxes"] = boxes + bounding_boxes["labels"] = labels + return bounding_boxes + + def transform_segmentation_masks( + self, segmentation_masks, transformation=None, training=True + ): + return self.transform_images(segmentation_masks) + + def compute_output_shape(self, input_shape): + if isinstance(input_shape, dict) and "bounding_boxes" in input_shape: + input_keys = set(input_shape["bounding_boxes"].keys()) + extra_keys = input_keys - set(("boxes", "labels")) + if extra_keys: + raise KeyError( + "There are unsupported keys in `bounding_boxes`: " + f"{list(extra_keys)}. " + "Only `boxes` and `labels` are supported." + ) + + boxes_shape = list(input_shape["bounding_boxes"]["boxes"]) + boxes_shape[1] = self.max_number + labels_shape = list(input_shape["bounding_boxes"]["labels"]) + labels_shape[1] = self.max_number + input_shape["bounding_boxes"]["boxes"] = boxes_shape + input_shape["bounding_boxes"]["labels"] = labels_shape + return input_shape + + def get_config(self): + config = super().get_config() + config.update({"max_number": self.max_number}) + return config diff --git a/keras/src/layers/preprocessing/image_preprocessing/max_num_bounding_box_test.py b/keras/src/layers/preprocessing/image_preprocessing/max_num_bounding_box_test.py new file mode 100644 index 000000000000..efc8037aecea --- /dev/null +++ b/keras/src/layers/preprocessing/image_preprocessing/max_num_bounding_box_test.py @@ -0,0 +1,77 @@ +import numpy as np +from tensorflow import data as tf_data + +from keras.src import backend +from keras.src import layers +from keras.src import testing + + +class MaxNumBoundingBoxesTest(testing.TestCase): + def test_max_num_bounding_boxes_basics(self): + self.run_layer_test( + layers.MaxNumBoundingBoxes, + init_kwargs={ + "max_number": 40, + "fill_value": -1, + }, + input_shape=(12, 12, 3), + expected_output_shape=(12, 12, 3), + expected_num_trainable_weights=0, + expected_num_non_trainable_weights=0, + expected_num_seed_generators=0, + expected_num_losses=0, + supports_masking=False, + run_training_check=False, + ) + + def test_output_shapes(self): + if backend.config.image_data_format() == "channels_last": + image_shape = (10, 8, 3) + else: + image_shape = (3, 10, 8) + input_image = np.random.random(image_shape) + bounding_boxes = { + "boxes": np.array( + [ + [2, 1, 4, 3], + [6, 4, 8, 6], + ] + ), # Example boxes (normalized) + "labels": np.array([1, 2]), # Dummy labels + } + layer = layers.MaxNumBoundingBoxes( + max_number=40, bounding_box_format="xyxy" + ) + + input_data = {"images": input_image, "bounding_boxes": bounding_boxes} + output = layer(input_data) + self.assertAllEqual(output["bounding_boxes"]["boxes"].shape, (40, 4)) + self.assertAllEqual(output["bounding_boxes"]["labels"].shape, (40,)) + + def test_output_shapes_with_tf_data(self): + if backend.config.image_data_format() == "channels_last": + image_shape = (1, 10, 8, 3) + else: + image_shape = (1, 3, 10, 8) + input_image = np.random.random(image_shape) + bounding_boxes = { + "boxes": np.array( + [ + [ + [2, 1, 4, 3], + [6, 4, 8, 6], + ] + ] + ), # Example boxes (normalized) + "labels": np.array([[1, 2]]), # Dummy labels + } + layer = layers.MaxNumBoundingBoxes( + max_number=40, bounding_box_format="xyxy" + ) + input_data = {"images": input_image, "bounding_boxes": bounding_boxes} + ds = tf_data.Dataset.from_tensor_slices(input_data) + ds = ds.map(layer) + ds = ds.batch(1) + output = next(iter(ds)) + self.assertAllEqual(output["bounding_boxes"]["boxes"].shape, (1, 40, 4)) + self.assertAllEqual(output["bounding_boxes"]["labels"].shape, (1, 40)) diff --git a/keras/src/layers/preprocessing/image_preprocessing/random_brightness.py b/keras/src/layers/preprocessing/image_preprocessing/random_brightness.py index 49f8ae487864..74aa3106b424 100644 --- a/keras/src/layers/preprocessing/image_preprocessing/random_brightness.py +++ b/keras/src/layers/preprocessing/image_preprocessing/random_brightness.py @@ -133,7 +133,10 @@ def transform_labels(self, labels, transformation, training=True): return labels def transform_bounding_boxes( - self, bounding_boxes, transformation, training=True + self, + bounding_boxes, + transformation, + training=True, ): return bounding_boxes diff --git a/keras/src/layers/preprocessing/image_preprocessing/random_contrast.py b/keras/src/layers/preprocessing/image_preprocessing/random_contrast.py index 5a3b85e73b6e..c9525cb651fa 100644 --- a/keras/src/layers/preprocessing/image_preprocessing/random_contrast.py +++ b/keras/src/layers/preprocessing/image_preprocessing/random_contrast.py @@ -98,7 +98,10 @@ def transform_labels(self, labels, transformation, training=True): return labels def transform_bounding_boxes( - self, bounding_boxes, transformation, training=True + self, + bounding_boxes, + transformation, + training=True, ): return bounding_boxes diff --git a/keras/src/layers/preprocessing/image_preprocessing/random_crop.py b/keras/src/layers/preprocessing/image_preprocessing/random_crop.py index 087c32517cc6..62571e69a931 100644 --- a/keras/src/layers/preprocessing/image_preprocessing/random_crop.py +++ b/keras/src/layers/preprocessing/image_preprocessing/random_crop.py @@ -3,6 +3,9 @@ from keras.src.layers.preprocessing.image_preprocessing.base_image_preprocessing_layer import ( # noqa: E501 BaseImagePreprocessingLayer, ) +from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import ( # noqa: E501 + convert_format, +) from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.validation import ( # noqa: E501 densify_bounding_boxes, ) @@ -178,11 +181,14 @@ def transform_labels(self, labels, transformation, training=True): return labels def transform_bounding_boxes( - self, bounding_boxes, transformation, training=True + self, + bounding_boxes, + transformation, + training=True, ): """ bounding_boxes = { - "boxes": (batch, num_boxes, 4), # left-top-right-bottom + "boxes": (batch, num_boxes, 4), # left-top-right-bottom (xyxy) "labels": (batch, num_boxes, num_classes), } or @@ -197,7 +203,16 @@ def transform_bounding_boxes( bounding_boxes, backend=self.backend ) boxes = bounding_boxes["boxes"] - + # Convert to a standard xyxy as operations are done xyxy by default. + boxes = convert_format( + boxes=boxes, + source=self.bounding_box_format, + target="xyxy", + height=self.height, + width=self.width, + ) + h_start = self.backend.cast(h_start, boxes.dtype) + w_start = self.backend.cast(w_start, boxes.dtype) if len(self.backend.shape(boxes)) == 3: boxes = self.backend.numpy.stack( [ @@ -218,6 +233,16 @@ def transform_bounding_boxes( ], axis=-1, ) + + # Convert to user defined bounding box format + boxes = convert_format( + boxes=boxes, + source="xyxy", + target=self.bounding_box_format, + height=self.height, + width=self.width, + ) + return { "boxes": boxes, "labels": bounding_boxes["labels"], diff --git a/keras/src/layers/preprocessing/image_preprocessing/random_flip.py b/keras/src/layers/preprocessing/image_preprocessing/random_flip.py index 758f76eae3b7..104b846c0546 100644 --- a/keras/src/layers/preprocessing/image_preprocessing/random_flip.py +++ b/keras/src/layers/preprocessing/image_preprocessing/random_flip.py @@ -89,7 +89,10 @@ def transform_labels(self, labels, transformation, training=True): return labels def transform_bounding_boxes( - self, bounding_boxes, transformation, training=True + self, + bounding_boxes, + transformation, + training=True, ): raise NotImplementedError diff --git a/keras/src/layers/preprocessing/image_preprocessing/random_rotation.py b/keras/src/layers/preprocessing/image_preprocessing/random_rotation.py index b27cd4909e90..16ebff45e6c3 100644 --- a/keras/src/layers/preprocessing/image_preprocessing/random_rotation.py +++ b/keras/src/layers/preprocessing/image_preprocessing/random_rotation.py @@ -4,6 +4,9 @@ from keras.src.layers.preprocessing.image_preprocessing.base_image_preprocessing_layer import ( # noqa: E501 BaseImagePreprocessingLayer, ) +from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import ( # noqa: E501 + convert_format, +) from keras.src.random.seed_generator import SeedGenerator @@ -125,9 +128,19 @@ def transform_labels(self, labels, transformation, training=True): return labels def transform_bounding_boxes( - self, bounding_boxes, transformation, training=True + self, + bounding_boxes, + transformation, + training=True, ): boxes = bounding_boxes["boxes"] + boxes = convert_format( + boxes=boxes, + source=self.bounding_box_format, + target="xyxy", + height=self.height, + width=self.width, + ) shape = self.backend.shape(boxes) ones = self.backend.ones((shape[0], shape[1], 1, 1)) homogeneous_boxes = self.backend.concatenate([boxes, ones], axis=2) @@ -141,6 +154,13 @@ def transform_bounding_boxes( transformed_boxes = self.backend.reshape( transformed_boxes, (shape[0], shape[1], 4) ) + boxes = convert_format( + boxes=boxes, + source="xyxy", + target=self.bounding_box_format, + height=self.height, + width=self.width, + ) return {"boxes": transformed_boxes, "labels": bounding_boxes["labels"]} def transform_segmentation_masks( diff --git a/keras/src/layers/preprocessing/image_preprocessing/random_translation.py b/keras/src/layers/preprocessing/image_preprocessing/random_translation.py index 8933ec50c4e9..ae357b41f24a 100644 --- a/keras/src/layers/preprocessing/image_preprocessing/random_translation.py +++ b/keras/src/layers/preprocessing/image_preprocessing/random_translation.py @@ -167,7 +167,10 @@ def transform_labels(self, labels, transformation, training=True): return labels def transform_bounding_boxes( - self, bounding_boxes, transformation, training=True + self, + bounding_boxes, + transformation, + training=True, ): raise NotImplementedError diff --git a/keras/src/layers/preprocessing/image_preprocessing/random_zoom.py b/keras/src/layers/preprocessing/image_preprocessing/random_zoom.py index 2c6c1ba52a1d..78473e4e2e98 100644 --- a/keras/src/layers/preprocessing/image_preprocessing/random_zoom.py +++ b/keras/src/layers/preprocessing/image_preprocessing/random_zoom.py @@ -176,7 +176,10 @@ def transform_labels(self, labels, transformation, training=True): return labels def transform_bounding_boxes( - self, bounding_boxes, transformation, training=True + self, + bounding_boxes, + transformation, + training=True, ): raise NotImplementedError diff --git a/keras/src/layers/preprocessing/image_preprocessing/resizing.py b/keras/src/layers/preprocessing/image_preprocessing/resizing.py index c21d079fa899..0653025cc530 100644 --- a/keras/src/layers/preprocessing/image_preprocessing/resizing.py +++ b/keras/src/layers/preprocessing/image_preprocessing/resizing.py @@ -3,6 +3,12 @@ from keras.src.layers.preprocessing.image_preprocessing.base_image_preprocessing_layer import ( # noqa: E501 BaseImagePreprocessingLayer, ) +from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import ( # noqa: E501 + clip_to_image_size, +) +from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes.converters import ( # noqa: E501 + convert_format, +) from keras.src.ops.core import _saturate_cast @@ -85,6 +91,12 @@ def __init__( self.pad_to_aspect_ratio = pad_to_aspect_ratio self.fill_mode = fill_mode self.fill_value = fill_value + if self.data_format == "channels_first": + self.height_axis = -2 + self.width_axis = -1 + elif self.data_format == "channels_last": + self.height_axis = -3 + self.width_axis = -2 def transform_images(self, images, transformation=None, training=True): size = (self.height, self.width) @@ -112,10 +124,154 @@ def transform_segmentation_masks( def transform_labels(self, labels, transformation=None, training=True): return labels + def get_random_transformation(self, data, training=True, seed=None): + + if isinstance(data, dict): + input_shape = self.backend.shape(data["images"]) + else: + input_shape = self.backend.shape(data) + + input_height, input_width = ( + input_shape[self.height_axis], + input_shape[self.width_axis], + ) + + return input_height, input_width + def transform_bounding_boxes( - self, bounding_boxes, transformation, training=True + self, + bounding_boxes, + transformation, + training=True, ): - raise NotImplementedError + ops = self.backend + input_height, input_width = transformation + mask_negative_1s = ops.numpy.all(bounding_boxes["boxes"] == -1, axis=-1) + mask_zeros = ops.numpy.all(bounding_boxes["boxes"] == 0, axis=-1) + boxes_mask = ops.numpy.logical_or(mask_negative_1s, mask_zeros) + + bounding_boxes = convert_format( + bounding_boxes, + source=self.bounding_box_format, + target="xyxy", + height=input_height, + width=input_width, + ) + + bounding_boxes["boxes"] = self._transform_xyxy( + bounding_boxes["boxes"], + input_height=input_height, + input_width=input_width, + ) + + bounding_boxes = clip_to_image_size( + bounding_boxes=bounding_boxes, + height=self.height, + width=self.width, + format="xyxy", + ) + + bounding_boxes["boxes"] = ops.numpy.where( + ops.numpy.expand_dims(boxes_mask, axis=-1), + ops.convert_to_tensor( + [0.0, 0.0, 0.0, 0.0], dtype=bounding_boxes["boxes"].dtype + ), + bounding_boxes["boxes"], + ) + + bounding_boxes = convert_format( + bounding_boxes, + source="xyxy", + target=self.bounding_box_format, + height=self.height, + width=self.width, + ) + + return bounding_boxes + + def _transform_xyxy(self, boxes, input_height, input_width): + ops = self.backend + input_height = ops.cast(input_height, dtype=boxes.dtype) + input_width = ops.cast(input_width, dtype=boxes.dtype) + + if self.pad_to_aspect_ratio: + return self._transform_boxes_pad_to_aspect_ratio( + boxes, input_height, input_width + ) + elif self.crop_to_aspect_ratio: + return self._transform_boxes_crop_to_aspect_ratio( + boxes, input_height, input_width + ) + else: + return self._transform_boxes_stretch( + boxes, input_height, input_width + ) + + def _transform_boxes_pad_to_aspect_ratio( + self, boxes, input_height, input_width + ): + """Transforms bounding boxes for padding to aspect ratio.""" + ops = self.backend + height_ratio = ops.cast(self.height / input_height, dtype=boxes.dtype) + width_ratio = ops.cast(self.width / input_width, dtype=boxes.dtype) + min_aspect_ratio = ops.numpy.minimum(height_ratio, width_ratio) + y_offset = (self.height - input_height * min_aspect_ratio) // 2 + x_offset = (self.width - input_width * min_aspect_ratio) // 2 + return ops.numpy.stack( + [ + boxes[..., 0] * min_aspect_ratio + x_offset, + boxes[..., 1] * min_aspect_ratio + y_offset, + boxes[..., 2] * min_aspect_ratio + x_offset, + boxes[..., 3] * min_aspect_ratio + y_offset, + ], + axis=-1, + ) + + def _transform_boxes_crop_to_aspect_ratio( + self, boxes, input_height, input_width + ): + """Transforms bounding boxes for cropping to aspect ratio.""" + ops = self.backend + source_aspect_ratio = input_width / input_height + target_aspect_ratio = self.width / self.height + new_width = ops.numpy.where( + source_aspect_ratio > target_aspect_ratio, + self.height * source_aspect_ratio, + self.width, + ) + new_height = ops.numpy.where( + source_aspect_ratio > target_aspect_ratio, + self.height, + self.width / source_aspect_ratio, + ) + scale_x = new_width / input_width + scale_y = new_height / input_height + crop_left = (new_width - self.width) // 2 + crop_top = (new_height - self.height) // 2 + return ops.numpy.stack( + [ + boxes[..., 0] * scale_x - crop_left, + boxes[..., 1] * scale_y - crop_top, + boxes[..., 2] * scale_x - crop_left, + boxes[..., 3] * scale_y - crop_top, + ], + axis=-1, + ) + + def _transform_boxes_stretch(self, boxes, input_height, input_width): + """Transforms bounding boxes by simple stretching.""" + ops = self.backend + height_ratio = ops.cast(self.height / input_height, dtype=boxes.dtype) + width_ratio = ops.cast(self.width / input_width, dtype=boxes.dtype) + return ops.numpy.stack( + [ + boxes[..., 0] * width_ratio, + boxes[..., 1] * height_ratio, + boxes[..., 2] * width_ratio, + boxes[..., 3] * height_ratio, + ], + axis=-1, + ) def compute_output_shape(self, input_shape): input_shape = list(input_shape) diff --git a/keras/src/layers/preprocessing/image_preprocessing/resizing_test.py b/keras/src/layers/preprocessing/image_preprocessing/resizing_test.py index 4d0374238ddb..c2b81c6d2d9d 100644 --- a/keras/src/layers/preprocessing/image_preprocessing/resizing_test.py +++ b/keras/src/layers/preprocessing/image_preprocessing/resizing_test.py @@ -221,3 +221,106 @@ def test_data_stretch(self, size, data_format): size[0], size[1], data_format=data_format, crop_to_aspect_ratio=True )(img) self.assertEqual(output.shape, (1, *size, 4)) + + @parameterized.named_parameters( + ( + "with_pad_to_aspect_ratio", + True, + False, + [[6.0, 2.0, 10.0, 6.0], [14.0, 8.0, 18.0, 12.0]], + ), + ( + "with_crop_to_aspect_ratio", + False, + True, + [[5.0, 0.5, 10.0, 5.5], [15.0, 8.0, 20.0, 13.0]], + ), + ( + "boxes_stretch", + False, + False, + [[5.0, 2.0, 10.0, 6.0], [15.0, 8.0, 20.0, 12.0]], + ), + ) + def test_resize_bounding_boxes( + self, pad_to_aspect_ratio, crop_to_aspect_ratio, expected_boxes + ): + if backend.config.image_data_format() == "channels_last": + image_shape = (10, 8, 3) + else: + image_shape = (3, 10, 8) + input_image = np.random.random(image_shape) + bounding_boxes = { + "boxes": np.array( + [ + [2, 1, 4, 3], + [6, 4, 8, 6], + ] + ), # Example boxes (normalized) + "labels": np.array([[1, 2]]), # Dummy labels + } + input_data = {"images": input_image, "bounding_boxes": bounding_boxes} + resizing_layer = layers.Resizing( + height=20, + width=20, + pad_to_aspect_ratio=pad_to_aspect_ratio, + crop_to_aspect_ratio=crop_to_aspect_ratio, + bounding_box_format="xyxy", + ) + output = resizing_layer(input_data) + self.assertAllClose(output["bounding_boxes"]["boxes"], expected_boxes) + + @parameterized.named_parameters( + ( + "with_pad_to_aspect_ratio", + True, + False, + [[6.0, 2.0, 10.0, 6.0], [14.0, 8.0, 18.0, 12.0]], + ), + ( + "with_crop_to_aspect_ratio", + False, + True, + [[5.0, 0.5, 10.0, 5.5], [15.0, 8.0, 20.0, 13.0]], + ), + ( + "boxes_stretch", + False, + False, + [[5.0, 2.0, 10.0, 6.0], [15.0, 8.0, 20.0, 12.0]], + ), + ) + def test_resize_tf_data_bounding_boxes( + self, pad_to_aspect_ratio, crop_to_aspect_ratio, expected_boxes + ): + if backend.config.image_data_format() == "channels_last": + image_shape = (1, 10, 8, 3) + else: + image_shape = (1, 3, 10, 8) + input_image = np.random.random(image_shape) + bounding_boxes = { + "boxes": np.array( + [ + [ + [2, 1, 4, 3], + [6, 4, 8, 6], + ] + ] + ), # Example boxes (normalized) + "labels": np.array([[1, 2]]), # Dummy labels + } + + input_data = {"images": input_image, "bounding_boxes": bounding_boxes} + + ds = tf_data.Dataset.from_tensor_slices(input_data) + resizing_layer = layers.Resizing( + height=20, + width=20, + pad_to_aspect_ratio=pad_to_aspect_ratio, + crop_to_aspect_ratio=crop_to_aspect_ratio, + bounding_box_format="xyxy", + ) + ds = ds.map(resizing_layer) + output = next(iter(ds)) + expected_boxes = np.array(expected_boxes) + self.assertAllClose(output["bounding_boxes"]["boxes"], expected_boxes)