diff --git a/.gitignore b/.gitignore index 684bac1..d33c12d 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,10 @@ #Ignore pycache dirs object_detection/object_detection/Detectors/__pycache__/ object_detection/object_detection/__pycache__/ +**/__pycache__ + +# Ignore log folder +log/ #Ignore .vscode dir .vscode \ No newline at end of file diff --git a/instance_segmentation/config/params.yaml b/instance_segmentation/config/params.yaml new file mode 100644 index 0000000..8b2cd5c --- /dev/null +++ b/instance_segmentation/config/params.yaml @@ -0,0 +1,14 @@ +## !!! THIS IS A TEMPORARY ADDITION TO THE FILE, NOT YET TESTED !!! +instance_segmentation: + ros__parameters: + input_img_topic: color_camera/image_raw + output_bb_topic: instance_segmentation/img_bb + output_img_topic: instance_segmentation/img + publish_output_img: 1 + model_params: + segmentator_type: YOLOv8 + model_dir_path: /home/deepansh/git/percep_ws/src/ros-perception-pipeline/models + weight_file_name: yolov8s-seg.pt + confidence_threshold: 0.7 + show_fps: 1 + diff --git a/instance_segmentation/instance_segmentation/InstanceSegmentation.py b/instance_segmentation/instance_segmentation/InstanceSegmentation.py new file mode 100644 index 0000000..ac39b7a --- /dev/null +++ b/instance_segmentation/instance_segmentation/InstanceSegmentation.py @@ -0,0 +1,138 @@ +#! /usr/bin/env python3 + +import os +import importlib + +import rclpy +from rclpy.node import Node + +from sensor_msgs.msg import Image +from masks_msgs.msg import Mask, MaskArray + +from cv_bridge import CvBridge +import cv2 +import numpy as np + +class InstanceSegmentation(Node): + def __init__(self): + super().__init__('instance_segmentation') + + # create an empty list that will hold the names of all available segmentators + self.available_segmentators = [] + + # fill available_segmentators with the segmentators from Segmentators dir + self.discover_segmentators() + + self.declare_parameters( + namespace='', + parameters=[ + + ('input_img_topic', ""), + ('output_bb_topic', ""), + ('output_img_topic', "") + ('output_vision_topic', ""), + ('model_params.segmentator_type', ""), + ('model_params.model_dir_path', ""), + ('model_params.weight_file_name', ""), + ('model_params.confidence_threshold', 0.7), + ('model_params.show_fps', 1), + ] + ) + + # node params + self.input_img_topic = self.get_parameter('input_img_topic').value + self.output_bb_topic = self.get_parameter('output_bb_topic').value + self.output_img_topic = self.get_parameter('output_img_topic').value + self.output_vision_topic = self.get_parameter('output_vision_topic').value + self.output_mask_topic = self.get_parameter("output_mask_topic").value + + # model params + self.segmentator_type = self.get_parameter('model_params.segmentator_type').value + self.model_dir_path = self.get_parameter('model_params.model_dir_path').value + self.weight_file_name = self.get_parameter('model_params.weight_file_name').value + self.confidence_threshold = self.get_parameter('model_params.confidence_threshold').value + self.show_fps = self.get_parameter('model_params.show_fps').value + + print(f"Model dir: {self.model_dir_path}") + print(f"Model: {self.weight_file_name}") + + print(self.segmentator_type) + # raise an exception if specified segmentator was not found + if self.segmentator_type not in self.available_segmentators: + raise ModuleNotFoundError(self.segmentator_type + " Segmentator specified in config was not found. " + + "Check the Segmentators dir for available segmentators.") + else: + self.load_segmentator() + + self.load_segmentator() + + self.img_pub = self.create_publisher(Image, self.output_img_topic, 10) + self.bb_pub = None + # self.img_sub = self.create_subscription(Image, self.input_img_topic, self.segmentation_cb, 10) + self.img_sub = self.create_subscription(Image, self.input_img_topic, self.segmentation_image, 10) + self.vision_msg_pub = self.create_publisher(MaskArray, self.output_vision_topic, 10) + + + self.bridge = CvBridge() + + + def discover_segmentators(self): + curr_dir = os.path.dirname(__file__) + dir_contents = os.listdir(curr_dir + "/Segmentators") + + for entity in dir_contents: + if entity.endswith('.py'): + self.available_segmentators.append(entity[:-3]) + + print(self.available_segmentators) + + self.available_segmentators.remove('__init__') + + + def load_segmentator(self): + segmentator_mod = importlib.import_module(".Segmentators." + self.segmentator_type, "instance_segmentation") + segmentator_class = getattr(segmentator_mod, self.segmentator_type) + self.segmentator = segmentator_class(self.model_dir_path, self.weight_file_name) + + self.segmentator.build_model() + self.segmentator.load_classes() + + print("Your segmentator : {} has been loaded !".format(self.segmentator_type)) + + + def segmentation_cb(self, img_msg): + cv_image = self.bridge.imgmsg_to_cv2(img_msg, "bgr8") + + predictions, _ = self.segmentator.get_predictions(cv_image=cv_image) + mask_arr = MaskArray + + if predictions == None : + print("Image input from topic : {} is empty".format(self.input_img_topic)) + else : + for prediction in predictions: + mask = Mask + mask.polygon = prediction["mask"] + mask.label = prediction["class_id"] + mask_arr.append(mask) + + # Draw polygons on the output frame + cv2.polylines(cv_image, [np.array(mask.polygon, dtype=np.int32)], isClosed=True, color=(0, 255, 0), thickness=2) + + output_frame = self.bridge.cv2_to_imgmsg(cv_image, "bgr8") + self.img_pub.publish(output_frame) + self.vision_msg_pub.publish(mask_arr) + + + +def main(): + rclpy.init() + iseg = InstanceSegmentation() + try: + rclpy.spin(iseg) + + except Exception as e: + print(e) + + +if __name__ == "__main__": + main() diff --git a/instance_segmentation/instance_segmentation/SegmentatorBase.py b/instance_segmentation/instance_segmentation/SegmentatorBase.py new file mode 100644 index 0000000..a0f66c5 --- /dev/null +++ b/instance_segmentation/instance_segmentation/SegmentatorBase.py @@ -0,0 +1,34 @@ +from abc import ABC, abstractmethod +import numpy as np + + +class SegmentatorBase(ABC): + + def __init__(self) -> None: + self.predictions = [] + + def create_predictions_list(self, class_ids, confidences, masks): + for i in range(len(class_ids)): + obj_dict = { + "class_id": class_ids[i], + "confidence": confidences[i], + "mask": masks[i] + } + + self.predictions.append(obj_dict) + + @abstractmethod + def build_model(self, model_dir_path: str, weight_file_name: str) -> None: + pass + + @abstractmethod + def load_classes(self, model_dir_path: str) -> None: + pass + + @abstractmethod + def get_predictions(self, cv_image: np.ndarray) -> list[dict]: + pass + + @abstractmethod + def get_segmented_image(self, cv_image: np.ndarray) -> list[dict]: + pass \ No newline at end of file diff --git a/instance_segmentation/instance_segmentation/Segmentators/YOLOv8.py b/instance_segmentation/instance_segmentation/Segmentators/YOLOv8.py new file mode 100644 index 0000000..8ea4b97 --- /dev/null +++ b/instance_segmentation/instance_segmentation/Segmentators/YOLOv8.py @@ -0,0 +1,106 @@ +import cv2 +from ultralytics import YOLO +import os +import sys +import time +import numpy as np + +from ..SegmentatorBase import SegmentatorBase + + +class YOLOv8(SegmentatorBase): + def __init__(self, conf_threshold = 0.7, + score_threshold = 0.4, nms_threshold = 0.25, + show_fps = 1, is_cuda = 0): + + super().__init__() + + + self.conf_threshold = conf_threshold + self.show_fps = show_fps + self.is_cuda = is_cuda + + #FPS + if self.show_fps : + self.frame_count = 0 + self.total_frames = 0 + self.fps = -1 + self.start = time.time_ns() + self.frame = None + + + self.predictions = [] + self.build_model() + self.load_classes() + + + def build_model(self,model_dir_path,weight_file_name) : + + try : + model_path = os.path.join(model_dir_path, weight_file_name) + self.model = YOLO(model_path) + + except : + raise Exception("Error loading given model from path: {}. Maybe the file doesn't exist?".format(model_path)) + + def load_classes(self, model_dir_path): + + self.class_list = [] + + with open(model_dir_path + "/classes.txt", "r") as f: + self.class_list = [cname.strip() for cname in f.readlines()] + + return self.class_list + + def get_predictions(self, cv_image): + + if cv_image is None: + # TODO: show warning message (different color, maybe) + return None,None + + else : + self.frame = cv_image + self.frame_count += 1 + self.total_frames += 1 + + class_ids = [] + confidences = [] + masks = [] + result = self.model.predict(self.frame, conf = self.conf_threshold) # Perform object detection on image + row = result[0].boxes + + for box in row: + class_ids.append(box.cls) + confidences.append(box.conf) + # masks.append(mask.xyxy) + for mask in result[0].masks: + masks.append(mask.xy) + + + print("frame_count : ", self.frame_count) + + if self.show_fps: + if self.frame_count >= 30: + self.end = time.time_ns() + self.fps = 1000000000 * self.frame_count / (self.end - self.start) + self.frame_count = 0 + self.start = time.time_ns() + + if self.fps > 0: + self.fps_label = "FPS: %.2f" % self.fps + cv2.putText(cv_image, self.fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) + + return self.predictions, cv_image + + + def get_segmented_image(self, cv_image): + # results = self.model.predict(cv_image, conf = self.conf_threshold, save=True) # Perform object detection on image + # results = self.model.predict(cv_image, conf = 0.2, save=True) # Perform object detection on image + results = self.model.predict(cv_image, conf = 0.2, save=True, show=True) # Perform object detection on image + # result_image_array = np.array(result_image) + # print(results[0].path) + # print("\nhuhuhaha\n") + # cv2.imshow(result_image) + # return result_image_array + result_image = cv2.imread("~/git/percep_ws/src/ros-perception-pipeline/instance_segmentation/runs/segment/predict/image0.jpg") + return result_image diff --git a/instance_segmentation/instance_segmentation/Segmentators/__init__.py b/instance_segmentation/instance_segmentation/Segmentators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/instance_segmentation/instance_segmentation/__init__.py b/instance_segmentation/instance_segmentation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/instance_segmentation/instance_segmentation/__pycache__/InstanceSegmentation.cpython-310.pyc b/instance_segmentation/instance_segmentation/__pycache__/InstanceSegmentation.cpython-310.pyc new file mode 100644 index 0000000..f577c3e Binary files /dev/null and b/instance_segmentation/instance_segmentation/__pycache__/InstanceSegmentation.cpython-310.pyc differ diff --git a/instance_segmentation/instance_segmentation/__pycache__/SegmentatorBase.cpython-310.pyc b/instance_segmentation/instance_segmentation/__pycache__/SegmentatorBase.cpython-310.pyc new file mode 100644 index 0000000..3ebfb14 Binary files /dev/null and b/instance_segmentation/instance_segmentation/__pycache__/SegmentatorBase.cpython-310.pyc differ diff --git a/instance_segmentation/instance_segmentation/__pycache__/__init__.cpython-310.pyc b/instance_segmentation/instance_segmentation/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..5e190b6 Binary files /dev/null and b/instance_segmentation/instance_segmentation/__pycache__/__init__.cpython-310.pyc differ diff --git a/instance_segmentation/launch/__pycache__/instance_segmentation.launch.cpython-310.pyc b/instance_segmentation/launch/__pycache__/instance_segmentation.launch.cpython-310.pyc new file mode 100644 index 0000000..ed7f0dc Binary files /dev/null and b/instance_segmentation/launch/__pycache__/instance_segmentation.launch.cpython-310.pyc differ diff --git a/instance_segmentation/launch/instance_segmentation.launch.py b/instance_segmentation/launch/instance_segmentation.launch.py new file mode 100644 index 0000000..14d37cf --- /dev/null +++ b/instance_segmentation/launch/instance_segmentation.launch.py @@ -0,0 +1,45 @@ +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys + +from ament_index_python.packages import get_package_share_directory + +from launch import LaunchDescription +from launch.actions import IncludeLaunchDescription, DeclareLaunchArgument +from launch.substitutions import LaunchConfiguration +from launch.launch_description_sources import PythonLaunchDescriptionSource +from launch_ros.actions import Node + + +def generate_launch_description(): + pkg_instance_segmentation = get_package_share_directory("instance_segmentation") + + params = os.path.join( + pkg_instance_segmentation, + 'config', + 'params.yaml' + ) + + node=Node( + package = 'instance_segmentation', + name = 'instance_segmentation', + executable = 'InstanceSegmentation', + parameters = [params], + output="screen" + ) + + + return LaunchDescription([node]) diff --git a/instance_segmentation/package.xml b/instance_segmentation/package.xml new file mode 100644 index 0000000..c13084b --- /dev/null +++ b/instance_segmentation/package.xml @@ -0,0 +1,19 @@ + + + + instance_segmentation + 0.0.0 + TODO: Package description + deepansh + TODO: License declaration + + vision_msgs + + ament_copyright + ament_flake8 + ament_pep257 + python3-pytest + + ament_python + + diff --git a/instance_segmentation/resource/instance_segmentation b/instance_segmentation/resource/instance_segmentation new file mode 100644 index 0000000..e69de29 diff --git a/instance_segmentation/runs/segment/predict/image0.jpg b/instance_segmentation/runs/segment/predict/image0.jpg new file mode 100644 index 0000000..38cdfef Binary files /dev/null and b/instance_segmentation/runs/segment/predict/image0.jpg differ diff --git a/instance_segmentation/setup.cfg b/instance_segmentation/setup.cfg new file mode 100644 index 0000000..a4402a5 --- /dev/null +++ b/instance_segmentation/setup.cfg @@ -0,0 +1,4 @@ +[develop] +script_dir=$base/lib/instance_segmentation +[install] +install_scripts=$base/lib/instance_segmentation diff --git a/instance_segmentation/setup.py b/instance_segmentation/setup.py new file mode 100644 index 0000000..3eac59e --- /dev/null +++ b/instance_segmentation/setup.py @@ -0,0 +1,30 @@ +from setuptools import find_packages, setup +import os +from glob import glob + +package_name = 'instance_segmentation' + +setup( + name=package_name, + version='0.0.0', + packages=find_packages(exclude=['test']), + data_files=[ + ('share/ament_index/resource_index/packages', + ['resource/' + package_name]), + ('share/' + package_name, ['package.xml']), + (os.path.join('share', package_name, 'config'), glob('config/*.yaml')), + (os.path.join('share', package_name, 'launch'), glob('launch/*.launch.py')), + ], + install_requires=['setuptools', 'vision_msgs'], + zip_safe=True, + maintainer='deepansh', + maintainer_email='gl.deepansh@gmail.com', + description='TODO: Package description', + license='TODO: License declaration', + tests_require=['pytest'], + entry_points={ + 'console_scripts': [ + 'InstanceSegmentation = instance_segmentation.InstanceSegmentation:main', + ], + }, +) diff --git a/instance_segmentation/test/test_copyright.py b/instance_segmentation/test/test_copyright.py new file mode 100644 index 0000000..97a3919 --- /dev/null +++ b/instance_segmentation/test/test_copyright.py @@ -0,0 +1,25 @@ +# Copyright 2015 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_copyright.main import main +import pytest + + +# Remove the `skip` decorator once the source file(s) have a copyright header +@pytest.mark.skip(reason='No copyright header has been placed in the generated source file.') +@pytest.mark.copyright +@pytest.mark.linter +def test_copyright(): + rc = main(argv=['.', 'test']) + assert rc == 0, 'Found errors' diff --git a/instance_segmentation/test/test_flake8.py b/instance_segmentation/test/test_flake8.py new file mode 100644 index 0000000..27ee107 --- /dev/null +++ b/instance_segmentation/test/test_flake8.py @@ -0,0 +1,25 @@ +# Copyright 2017 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_flake8.main import main_with_errors +import pytest + + +@pytest.mark.flake8 +@pytest.mark.linter +def test_flake8(): + rc, errors = main_with_errors(argv=[]) + assert rc == 0, \ + 'Found %d code style errors / warnings:\n' % len(errors) + \ + '\n'.join(errors) diff --git a/instance_segmentation/test/test_pep257.py b/instance_segmentation/test/test_pep257.py new file mode 100644 index 0000000..b234a38 --- /dev/null +++ b/instance_segmentation/test/test_pep257.py @@ -0,0 +1,23 @@ +# Copyright 2015 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_pep257.main import main +import pytest + + +@pytest.mark.linter +@pytest.mark.pep257 +def test_pep257(): + rc = main(argv=['.', 'test']) + assert rc == 0, 'Found code style errors / warnings' diff --git a/masks_msgs/CMakeLists.txt b/masks_msgs/CMakeLists.txt new file mode 100644 index 0000000..8c7c6f9 --- /dev/null +++ b/masks_msgs/CMakeLists.txt @@ -0,0 +1,36 @@ +cmake_minimum_required(VERSION 3.8) +project(masks_msgs) + +if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + add_compile_options(-Wall -Wextra -Wpedantic) +endif() + +# find dependencies +find_package(ament_cmake REQUIRED) +find_package(vision_msgs REQUIRED) +find_package(rosidl_default_generators REQUIRED) + +# uncomment the following section in order to fill in +# further dependencies manually. +# find_package( REQUIRED) + +rosidl_generate_interfaces(${PROJECT_NAME} + "msg/Mask.msg" + "msg/MaskArray.msg" + DEPENDENCIES vision_msgs # Add packages that above messages depend on, in this case geometry_msgs for Sphere.msg +) + + +if(BUILD_TESTING) + find_package(ament_lint_auto REQUIRED) + # the following line skips the linter which checks for copyrights + # comment the line when a copyright and license is added to all source files + set(ament_cmake_copyright_FOUND TRUE) + # the following line skips cpplint (only works in a git repo) + # comment the line when this package is in a git repo and when + # a copyright and license is added to all source files + set(ament_cmake_cpplint_FOUND TRUE) + ament_lint_auto_find_test_dependencies() +endif() + +ament_package() diff --git a/masks_msgs/msg/Mask.msg b/masks_msgs/msg/Mask.msg new file mode 100644 index 0000000..0a64b66 --- /dev/null +++ b/masks_msgs/msg/Mask.msg @@ -0,0 +1,2 @@ +vision_msgs/Point2D[] polygon +int64 label \ No newline at end of file diff --git a/masks_msgs/msg/MaskArray.msg b/masks_msgs/msg/MaskArray.msg new file mode 100644 index 0000000..635b2a1 --- /dev/null +++ b/masks_msgs/msg/MaskArray.msg @@ -0,0 +1 @@ +masks_msgs/Mask[] mask_list \ No newline at end of file diff --git a/masks_msgs/msg/__init__.py b/masks_msgs/msg/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/masks_msgs/package.xml b/masks_msgs/package.xml new file mode 100644 index 0000000..5823fd8 --- /dev/null +++ b/masks_msgs/package.xml @@ -0,0 +1,23 @@ + + + + masks_msgs + 0.0.0 + TODO: Package description + deepansh + TODO: License declaration + + ament_cmake + + ament_lint_auto + ament_lint_common + + vision_msgs + rosidl_default_generators + rosidl_default_runtime + rosidl_interface_packages + + + ament_cmake + + diff --git a/models/classes.txt b/models/classes.txt new file mode 100644 index 0000000..f0c1017 --- /dev/null +++ b/models/classes.txt @@ -0,0 +1,3 @@ +hello +hi +test \ No newline at end of file diff --git a/models/yolov8s-seg.onnx b/models/yolov8s-seg.onnx new file mode 100644 index 0000000..73c958c Binary files /dev/null and b/models/yolov8s-seg.onnx differ diff --git a/models/yolov8s-seg.pt b/models/yolov8s-seg.pt new file mode 100644 index 0000000..6e924a3 Binary files /dev/null and b/models/yolov8s-seg.pt differ diff --git a/object_detection/object_detection/Detectors/YOLOv5.py b/object_detection/object_detection/Detectors/YOLOv5.py index 571f3d0..072c799 100644 --- a/object_detection/object_detection/Detectors/YOLOv5.py +++ b/object_detection/object_detection/Detectors/YOLOv5.py @@ -146,6 +146,6 @@ def get_predictions(self, cv_image): super().create_predictions_list(class_ids, confidences, boxes) - print("Detected ids: ", class_ids) + print("Detected ids: ", class_ids) return self.predictions \ No newline at end of file diff --git a/output.png b/output.png new file mode 100644 index 0000000..944640e Binary files /dev/null and b/output.png differ diff --git a/vision_msgs/CHANGELOG.rst b/vision_msgs/CHANGELOG.rst new file mode 100644 index 0000000..4413644 --- /dev/null +++ b/vision_msgs/CHANGELOG.rst @@ -0,0 +1,95 @@ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Changelog for package vision_msgs +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +4.0.0 (2022-03-19) +------------------ +* Merge pull request `#67 `_ from ijnek/ijnek-point-2d + Add Point2d message, and use it in Pose2D +* Update msg/Point2D.msg + Co-authored-by: Adam Allevato +* add Point2D message +* replace deprecated pose2d with pose (`#64 `_) + * replace deprecated geometry_msgs/Pose2D with geometry_msgs/Pose + * replace Pose2D with Pose. + * add Pose2D.msg + * undo some changes made + * Update msg/Pose2D.msg + Co-authored-by: Adam Allevato + Co-authored-by: Adam Allevato +* Clarify array messages in readme +* Trailing newline in bbox2Darray +* Add bbox array msgs to CMakeLists +* Added BoundingBox2DArray message +* Contributors: Adam Allevato, Fruchtzwerg94, Kenji Brameld + +3.0.1 (2021-07-20) +------------------ +* Patch for how C++14 is set for ROS2 (`#58 `_) +* Contributors: Dustin Franklin + +3.0.0 (2021-04-13) +------------------ +* Add license snippet in CONTRIBUTING.md +* Decouple source data from the detection/classification messages. (`#53 `_) + * Decouple source data from the detection/classification messages. + This commit drops dependency on sensor_msgs + * Improved documentation. +* Merge pull request `#52 `_ from mintar/clarify-class-object-id + Rename tracking_id -> id, id -> class_id +* Rename DetectionXD.tracking_id -> id +* Rename ObjectHypothesis.id -> class_id +* Merge pull request `#51 `_ from ros-perception/clarify-bbox-size + Clarify comment for size fields in bounding box messages +* Revert confusing comment about bbox orientation +* Merge pull request `#50 `_ from ros-perception/remove-is-tracking-field + Remove is_tracking field +* Remove other mentions to is_tracking field +* Clarify bbox size comment +* Remove tracking_id from Detection3D as well +* Remove is_tracking field + This field does not seem useful, and we are not aware of anyone using it at this time. `VisionInfo` is probably a better place for this information anyway, if it were needed. + See `#47 `_ for earlier discussions. +* Clarify: ObjectHypothesis[] ~= Classification (`#49 `_) + * Clarify: ObjectHypothesis[] ~= Classification + https://github.com/ros-perception/vision_msgs/issues/46 requested Array message types for ObjectHypothesis and/or ObjectHypothesisWithPose. As pointed out in the issue, these already exist in the form of the `ClassificationXD` and `DetectionXD` message types. + * Clarify ObjectHypothesisWithPose[] ~= Detection +* Use composition in ObjectHypothesisWithPose (`#48 `_) +* Contributors: Adam Allevato, Martin Günther, Martin Pecka, root + +2.0.0 (2020-08-11) +------------------ +* Fix lint error for draconian header guard rule +* Rename create_aabb to use C++ extension + This fixes linting errors which assume that .h means that a file + is C (rather than C++). +* Add CONTRIBUTING.md +* Fix various linting issues +* Add gitignore + Sync ros2 with master +* Update test for ros2 +* add BoundingBox3DArray message (`#30 `_) + * add BoundingBoxArray message +* Make msg gen package deps more specific (`#24 `_) + Make message_generation and message_runtime use more specific depend tags +* Merge branch 'kinetic-devel' +* Removed "proposal" from readme (`#23 `_) +* add tracking ID to the Detection Message (`#19 `_) + * add tracking ID to the Detection + * modify comments + * Change UUID messages to strings + * Improve comment for tracking_id and fix whitespace +* Convert id to string (`#22 `_) +* Specify that id is explicitly for object class +* Fix dependency of unit test. (`#14 `_) +* 0.0.1 +* Pre-release commit - setting up versioning and changelog +* Rolled BoundingRect into BoundingBox2D + Added helper functions to make it easier to go from corner-size representation to + center-size representation, plus associated tests. +* Added license +* Small fixes in message comments (`#10 `_) +* Contributors: Adam Allevato, Leroy Rügemer, Martin Günther, Masaya Kataoka, Ronald Ensing, Shane Loretz, mistermult +* Switched to ROS2 for package definition files, create_aabb, etc. +* [ros2] use package format 3 (`#12 `_) +* Contributors: Adam Allevato, Martin Günther, Mikael Arguedas, procopiostein diff --git a/vision_msgs/CMakeLists.txt b/vision_msgs/CMakeLists.txt new file mode 100644 index 0000000..0536003 --- /dev/null +++ b/vision_msgs/CMakeLists.txt @@ -0,0 +1,66 @@ +cmake_minimum_required(VERSION 3.5) +project(vision_msgs) + +if(NOT WIN32) + if(NOT CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 14) + endif() + if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + add_compile_options(-Wall -Wextra -Wpedantic) + endif() +endif() + +find_package(ament_cmake REQUIRED) +find_package(std_msgs REQUIRED) +find_package(geometry_msgs REQUIRED) +find_package(rosidl_default_generators REQUIRED) + +set(msg_files + msg/BoundingBox2D.msg + msg/BoundingBox2DArray.msg + msg/BoundingBox3D.msg + msg/BoundingBox3DArray.msg + msg/Classification.msg + msg/Detection2DArray.msg + msg/Detection2D.msg + msg/Detection3DArray.msg + msg/Detection3D.msg + msg/LabelInfo.msg + msg/ObjectHypothesis.msg + msg/ObjectHypothesisWithPose.msg + msg/VisionClass.msg + msg/Point2D.msg + msg/Pose2D.msg + msg/VisionInfo.msg +) + +rosidl_generate_interfaces(${PROJECT_NAME} + ${msg_files} + DEPENDENCIES std_msgs geometry_msgs + ADD_LINTER_TESTS) + +install(DIRECTORY include/${PROJECT_NAME}/ + DESTINATION include/${PROJECT_NAME} +) + +if(BUILD_TESTING) + find_package(ament_lint_auto REQUIRED) + + ament_lint_auto_find_test_dependencies() + + ament_add_gtest(vision_msgs_test test/main.cpp) + add_dependencies(vision_msgs_test ${PROJECT_NAME}) + ament_target_dependencies(vision_msgs_test + geometry_msgs + std_msgs + ) + # TODO(sloretz) rosidl_generate_interfaces() should make using generated messages in same project simpler + target_include_directories(vision_msgs_test PUBLIC + include + ${CMAKE_CURRENT_BINARY_DIR}/rosidl_generator_cpp + ) +endif() + +ament_export_dependencies(rosidl_default_runtime) +ament_export_include_directories(include) +ament_package() diff --git a/vision_msgs/include/vision_msgs/create_aabb.hpp b/vision_msgs/include/vision_msgs/create_aabb.hpp new file mode 100644 index 0000000..103dba6 --- /dev/null +++ b/vision_msgs/include/vision_msgs/create_aabb.hpp @@ -0,0 +1,71 @@ +// Copyright 2017 Open Source Robotics Foundation, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef VISION_MSGS__CREATE_AABB_HPP_ +#define VISION_MSGS__CREATE_AABB_HPP_ + +#include "vision_msgs/msg/bounding_box2_d.hpp" +#include "vision_msgs/msg/bounding_box3_d.hpp" + +namespace vision_msgs +{ +/** + * Create an axis-aligned bounding box (AABB) given the upper-left corner, + * width, and height. This allows easy conversion from the OpenCV rectangle + * representation. + */ +static inline msg::BoundingBox2D createAABB2D( + uint32_t left, + uint32_t top, + uint32_t width, + uint32_t height) +{ + msg::BoundingBox2D bbox; + + bbox.center.position.x = left + width / 2.0; + bbox.center.position.y = top + height / 2.0; + bbox.size_x = width; + bbox.size_y = height; + + return bbox; +} + +/** + * Create an axis-aligned bounding box (AABB) given the upper-left-front + * corner, width, height, and depth. This allows easy conversion from the + * OpenCV rectangle representation. + */ +static inline msg::BoundingBox3D createAABB3D( + uint32_t min_x, + uint32_t min_y, + uint32_t min_z, + uint32_t size_x, + uint32_t size_y, + uint32_t size_z) +{ + msg::BoundingBox3D bbox; + + bbox.center.position.x = min_x + size_x / 2.0; + bbox.center.position.y = min_y + size_y / 2.0; + bbox.center.position.z = min_z + size_z / 2.0; + bbox.center.orientation.w = 1; + bbox.size.x = size_x; + bbox.size.y = size_y; + bbox.size.z = size_z; + + return bbox; +} +} // namespace vision_msgs + +#endif // VISION_MSGS__CREATE_AABB_HPP_ diff --git a/vision_msgs/msg/BoundingBox2D.msg b/vision_msgs/msg/BoundingBox2D.msg new file mode 100644 index 0000000..1f9152d --- /dev/null +++ b/vision_msgs/msg/BoundingBox2D.msg @@ -0,0 +1,13 @@ +# A 2D bounding box that can be rotated about its center. +# All dimensions are in pixels, but represented using floating-point +# values to allow sub-pixel precision. If an exact pixel crop is required +# for a rotated bounding box, it can be calculated using Bresenham's line +# algorithm. + +# The 2D position (in pixels) and orientation of the bounding box center. +vision_msgs/Pose2D center + +# The total size (in pixels) of the bounding box surrounding the object relative +# to the pose of its center. +float64 size_x +float64 size_y diff --git a/vision_msgs/msg/BoundingBox2DArray.msg b/vision_msgs/msg/BoundingBox2DArray.msg new file mode 100644 index 0000000..31bd42f --- /dev/null +++ b/vision_msgs/msg/BoundingBox2DArray.msg @@ -0,0 +1,2 @@ +std_msgs/Header header +vision_msgs/BoundingBox2D[] boxes diff --git a/vision_msgs/msg/BoundingBox3D.msg b/vision_msgs/msg/BoundingBox3D.msg new file mode 100644 index 0000000..cb8f49d --- /dev/null +++ b/vision_msgs/msg/BoundingBox3D.msg @@ -0,0 +1,10 @@ +# A 3D bounding box that can be positioned and rotated about its center (6 DOF) +# Dimensions of this box are in meters, and as such, it may be migrated to +# another package, such as geometry_msgs, in the future. + +# The 3D position and orientation of the bounding box center +geometry_msgs/Pose center + +# The total size of the bounding box, in meters, surrounding the object's center +# pose. +geometry_msgs/Vector3 size diff --git a/vision_msgs/msg/BoundingBox3DArray.msg b/vision_msgs/msg/BoundingBox3DArray.msg new file mode 100644 index 0000000..c998e69 --- /dev/null +++ b/vision_msgs/msg/BoundingBox3DArray.msg @@ -0,0 +1,2 @@ +std_msgs/Header header +vision_msgs/BoundingBox3D[] boxes \ No newline at end of file diff --git a/vision_msgs/msg/Classification.msg b/vision_msgs/msg/Classification.msg new file mode 100644 index 0000000..ec1404c --- /dev/null +++ b/vision_msgs/msg/Classification.msg @@ -0,0 +1,17 @@ +# Defines a classification result. +# +# This result does not contain any position information. It is designed for +# classifiers, which simply provide class probabilities given an instance of +# source data (e.g., an image or a point cloud). + +std_msgs/Header header + +# A list of class probabilities. This list need not provide a probability for +# every possible class, just ones that are nonzero, or above some +# user-defined threshold. +ObjectHypothesis[] results + +# Source data that generated this classification are not a part of the message. +# If you need to access them, use an exact or approximate time synchronizer in +# your code, as this message's header should match the header of the source +# data. diff --git a/vision_msgs/msg/Detection2D.msg b/vision_msgs/msg/Detection2D.msg new file mode 100644 index 0000000..c2eb7f4 --- /dev/null +++ b/vision_msgs/msg/Detection2D.msg @@ -0,0 +1,23 @@ +# Defines a 2D detection result. +# +# This is similar to a 2D classification, but includes position information, +# allowing a classification result for a specific crop or image point to +# to be located in the larger image. + +std_msgs/Header header + +# Class probabilities +ObjectHypothesisWithPose[] results + +# 2D bounding box surrounding the object. +BoundingBox2D bbox + +# ID used for consistency across multiple detection messages. Detections +# of the same object in different detection messages should have the same id. +# This field may be empty. +string id + +# Source data that generated this detection are not a part of the message. +# If you need to access them, use an exact or approximate time synchronizer in +# your code, as this message's header should match the header of the source +# data. diff --git a/vision_msgs/msg/Detection2DArray.msg b/vision_msgs/msg/Detection2DArray.msg new file mode 100644 index 0000000..7d05be0 --- /dev/null +++ b/vision_msgs/msg/Detection2DArray.msg @@ -0,0 +1,7 @@ +# A list of 2D detections, for a multi-object 2D detector. + +std_msgs/Header header + +# A list of the detected proposals. A multi-proposal detector might generate +# this list with many candidate detections generated from a single input. +Detection2D[] detections diff --git a/vision_msgs/msg/Detection3D.msg b/vision_msgs/msg/Detection3D.msg new file mode 100644 index 0000000..bed3f72 --- /dev/null +++ b/vision_msgs/msg/Detection3D.msg @@ -0,0 +1,23 @@ +# Defines a 3D detection result. +# +# This extends a basic 3D classification by including the pose of the +# detected object. + +std_msgs/Header header + +# Class probabilities. Does not have to include hypotheses for all possible +# object ids, the scores for any ids not listed are assumed to be 0. +ObjectHypothesisWithPose[] results + +# 3D bounding box surrounding the object. +BoundingBox3D bbox + +# ID used for consistency across multiple detection messages. Detections +# of the same object in different detection messages should have the same id. +# This field may be empty. +string id + +# Source data that generated this classification are not a part of the message. +# If you need to access them, use an exact or approximate time synchronizer in +# your code, as this message's header should match the header of the source +# data. diff --git a/vision_msgs/msg/Detection3DArray.msg b/vision_msgs/msg/Detection3DArray.msg new file mode 100644 index 0000000..b3d3011 --- /dev/null +++ b/vision_msgs/msg/Detection3DArray.msg @@ -0,0 +1,7 @@ +# A list of 3D detections, for a multi-object 3D detector. + +std_msgs/Header header + +# A list of the detected proposals. A multi-proposal detector might generate +# this list with many candidate detections generated from a single input. +Detection3D[] detections diff --git a/vision_msgs/msg/LabelInfo.msg b/vision_msgs/msg/LabelInfo.msg new file mode 100644 index 0000000..d73a3c5 --- /dev/null +++ b/vision_msgs/msg/LabelInfo.msg @@ -0,0 +1,25 @@ +# Provides meta-information about a visual pipeline. +# +# This message serves a similar purpose to sensor_msgs/CameraInfo, but instead +# of being tied to hardware, it represents information about a specific +# computer vision pipeline. This information stays constant (or relatively +# constant) over time, and so it is wasteful to send it with each individual +# result. By listening to these messages, subscribers will receive +# the context in which published vision messages are to be interpreted. +# Each vision pipeline should publish its LabelInfo messages to its own topic, +# in a manner similar to CameraInfo. +# This message is meant to allow converting data from vision pipelines that +# return id based classifications back to human readable string class names. + +# Used for sequencing +std_msgs/Header header + +# An array of uint16 keys and string values containing the association +# between class identifiers and their names. According to the amount +# of classes and the datatype used to store their ids internally, the +# maxiumum class id allowed (65535 for uint16 and 255 for uint8) belongs to +# the "UNLABELED" class. +vision_msgs/VisionClass[] class_map + +# The value between 0-1 used as confidence threshold for the inference. +float32 threshold diff --git a/vision_msgs/msg/ObjectHypothesis.msg b/vision_msgs/msg/ObjectHypothesis.msg new file mode 100644 index 0000000..e6716e8 --- /dev/null +++ b/vision_msgs/msg/ObjectHypothesis.msg @@ -0,0 +1,12 @@ +# An object hypothesis that contains no pose information. +# If you would like to define an array of ObjectHypothesis messages, +# please see the Classification message type. + +# The unique ID of the object class. To get additional information about +# this ID, such as its human-readable class name, listeners should perform a +# lookup in a metadata database. See vision_msgs/VisionInfo.msg for more detail. +string class_id + +# The probability or confidence value of the detected object. By convention, +# this value should lie in the range [0-1]. +float64 score diff --git a/vision_msgs/msg/ObjectHypothesisWithPose.msg b/vision_msgs/msg/ObjectHypothesisWithPose.msg new file mode 100644 index 0000000..bc6c6e8 --- /dev/null +++ b/vision_msgs/msg/ObjectHypothesisWithPose.msg @@ -0,0 +1,16 @@ +# An object hypothesis that contains pose information. +# If you would like to define an array of ObjectHypothesisWithPose messages, +# please see the Detection2D or Detection3D message types. + +# The object hypothesis (ID and score). +ObjectHypothesis hypothesis + +# The 6D pose of the object hypothesis. This pose should be +# defined as the pose of some fixed reference point on the object, such as +# the geometric center of the bounding box, the center of mass of the +# object or the origin of a reference mesh of the object. +# Note that this pose is not stamped; frame information can be defined by +# parent messages. +# Also note that different classes predicted for the same input data may have +# different predicted 6D poses. +geometry_msgs/PoseWithCovariance pose diff --git a/vision_msgs/msg/Point2D.msg b/vision_msgs/msg/Point2D.msg new file mode 100644 index 0000000..0145541 --- /dev/null +++ b/vision_msgs/msg/Point2D.msg @@ -0,0 +1,5 @@ +# Represents a 2D point in pixel coordinates. +# XY matches the sensor_msgs/Image convention: X is positive right and Y is positive down. + +float64 x +float64 y diff --git a/vision_msgs/msg/Pose2D.msg b/vision_msgs/msg/Pose2D.msg new file mode 100644 index 0000000..367dcec --- /dev/null +++ b/vision_msgs/msg/Pose2D.msg @@ -0,0 +1,4 @@ +# Represents a 2D pose (coordinates and a radian rotation). Rotation is positive counterclockwise. + +vision_msgs/Point2D position +float64 theta diff --git a/vision_msgs/msg/VisionClass.msg b/vision_msgs/msg/VisionClass.msg new file mode 100644 index 0000000..93f2181 --- /dev/null +++ b/vision_msgs/msg/VisionClass.msg @@ -0,0 +1,12 @@ +# A key value pair that maps an integer class_id to a string class label +# in computer vision systems. + +# The int value that identifies the class. +# Elements identified with 65535, the maximum uint16 value are assumed +# to belong to the "UNLABELED" class. For vision pipelines using less +# than 255 classes the "UNLABELED" is the maximum value in the uint8 +# range. +uint16 class_id + +# The name of the class represented by the class_id +string class_name diff --git a/vision_msgs/msg/VisionInfo.msg b/vision_msgs/msg/VisionInfo.msg new file mode 100644 index 0000000..8058070 --- /dev/null +++ b/vision_msgs/msg/VisionInfo.msg @@ -0,0 +1,40 @@ +# Provides meta-information about a visual pipeline. +# +# This message serves a similar purpose to sensor_msgs/CameraInfo, but instead +# of being tied to hardware, it represents information about a specific +# computer vision pipeline. This information stays constant (or relatively +# constant) over time, and so it is wasteful to send it with each individual +# result. By listening to these messages, subscribers will receive +# the context in which published vision messages are to be interpreted. +# Each vision pipeline should publish its VisionInfo messages to its own topic, +# in a manner similar to CameraInfo. + +# Used for sequencing +std_msgs/Header header + +# Name of the vision pipeline. This should be a value that is meaningful to an +# outside user. +string method + +# Location where the metadata database is stored. The recommended location is +# as an XML string on the ROS parameter server, but the exact implementation +# and information is left up to the user. +# The database should store information attached to class ids. Each +# class id should map to an atomic, visually recognizable element. This +# definition is intentionally vague to allow extreme flexibility. The +# elements could be classes in a pixel segmentation algorithm, object classes +# in a detector, different people's faces in a face detection algorithm, etc. +# Vision pipelines report results in terms of numeric IDs, which map into +# this database. +# The information stored in this database is, again, left up to the user. The +# database could be as simple as a map from ID to class name, or it could +# include information such as object meshes or colors to use for +# visualization. +string database_location + +# Metadata database version. This counter is incremented +# each time the pipeline begins using a new version of the database (useful +# in the case of online training or user modifications). +# The counter value can be monitored by listeners to ensure that the pipeline +# and the listener are using the same metadata. +int32 database_version \ No newline at end of file diff --git a/vision_msgs/package.xml b/vision_msgs/package.xml new file mode 100644 index 0000000..6f60ee8 --- /dev/null +++ b/vision_msgs/package.xml @@ -0,0 +1,37 @@ + + + vision_msgs + 4.1.1 + + Messages for interfacing with various computer vision pipelines, such as + object detectors. + + + Adam Allevato + + Apache License 2.0 + + Adam Allevato + Adam Allevato + Apache License 2.0 + + ament_cmake + rosidl_default_generators + + std_msgs + geometry_msgs + + std_msgs + geometry_msgs + rosidl_default_runtime + + ament_cmake_gtest + ament_lint_auto + ament_lint_common + + rosidl_interface_packages + + + ament_cmake + + diff --git a/vision_msgs/test/main.cpp b/vision_msgs/test/main.cpp new file mode 100644 index 0000000..e9cb7b2 --- /dev/null +++ b/vision_msgs/test/main.cpp @@ -0,0 +1,44 @@ +// Copyright 2017 Open Source Robotics Foundation, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "vision_msgs/msg/bounding_box2_d.hpp" +#include "vision_msgs/msg/bounding_box3_d.hpp" +#include "vision_msgs/create_aabb.hpp" + +TEST(vision_msgs, CreateAABB2D) +{ + vision_msgs::msg::BoundingBox2D bbox = vision_msgs::createAABB2D(1, 2, 3, 4); + EXPECT_FLOAT_EQ(bbox.center.position.x, 2.5); // 1 + 3/2 + EXPECT_FLOAT_EQ(bbox.center.position.y, 4); // 2 + 4/2 + EXPECT_EQ(bbox.size_x, 3); + EXPECT_EQ(bbox.size_y, 4); + EXPECT_EQ(bbox.center.theta, 0); +} + +TEST(vision_msgs, CreateAABB3D) +{ + vision_msgs::msg::BoundingBox3D bbox = vision_msgs::createAABB3D(1, 2, 3, 4, 5, 6); + EXPECT_FLOAT_EQ(bbox.center.position.x, 3); // 1 + 4/2 + EXPECT_FLOAT_EQ(bbox.center.position.y, 4.5); // 2 + 5/2 + EXPECT_FLOAT_EQ(bbox.center.position.z, 6); // 3 + 6/2 + EXPECT_EQ(bbox.center.orientation.x, 0); + EXPECT_EQ(bbox.center.orientation.y, 0); + EXPECT_EQ(bbox.center.orientation.z, 0); + EXPECT_EQ(bbox.center.orientation.w, 1); + EXPECT_EQ(bbox.size.x, 4); + EXPECT_EQ(bbox.size.y, 5); + EXPECT_EQ(bbox.size.z, 6); +}