cvat-ai · zhiltsov-max · Nov 25, 2024 · Nov 10, 2023 · Nov 10, 2023 · Nov 10, 2023
diff --git a/changelog.d/20231110_175126_mzhiltso_update_dm_export-added.md b/changelog.d/20231110_175126_mzhiltso_update_dm_export-added.md
@@ -0,0 +1,4 @@
+### Added
+
+- Support for direct .json file import in Datumaro format
+  (<https://github.com/opencv/cvat/pull/7125>)
diff --git a/changelog.d/20231110_175127_mzhiltso_update_dm_export-fixed.md b/changelog.d/20231110_175127_mzhiltso_update_dm_export-fixed.md
@@ -0,0 +1,4 @@
+### Fixed
+
+- Export without images in Datumaro format should include image info
+  (<https://github.com/opencv/cvat/pull/7125>)
@@ -20,7 +20,6 @@
 import numpy as np
 import rq
 from attr import attrib, attrs
-from datumaro.components.media import PointCloud
 from django.db.models import QuerySet
 from django.utils import timezone
 
@@ -1312,27 +1311,27 @@ def add_task(self, task, files):
         self._project_annotation.add_task(task, files, self)
 
 @attrs(frozen=True, auto_attribs=True)
-class ImageSource:
+class MediaSource:
     db_data: Data
     is_video: bool = attrib(kw_only=True)
 
-class ImageProvider:
-    def __init__(self, sources: Dict[int, ImageSource]) -> None:
+class MediaProvider:
+    def __init__(self, sources: Dict[int, MediaSource]) -> None:
         self._sources = sources
 
     def unload(self) -> None:
         pass
 
-class ImageProvider2D(ImageProvider):
-    def __init__(self, sources: Dict[int, ImageSource]) -> None:
+class MediaProvider2D(MediaProvider):
+    def __init__(self, sources: Dict[int, MediaSource]) -> None:
         super().__init__(sources)
         self._current_source_id = None
         self._frame_provider = None
 
     def unload(self) -> None:
         self._unload_source()
 
-    def get_image_for_frame(self, source_id: int, frame_index: int, **image_kwargs):
+    def get_media_for_frame(self, source_id: int, frame_index: int, **image_kwargs) -> dm.Image:
         source = self._sources[source_id]
 
         if source.is_video:
@@ -1355,7 +1354,7 @@ def image_loader(_):
                     out_type=FrameProvider.Type.BUFFER)[0].getvalue()
             return dm.ByteImage(data=image_loader, **image_kwargs)
 
-    def _load_source(self, source_id: int, source: ImageSource) -> None:
+    def _load_source(self, source_id: int, source: MediaSource) -> None:
         if self._current_source_id == source_id:
             return
 
@@ -1370,8 +1369,8 @@ def _unload_source(self) -> None:
 
         self._current_source_id = None
 
-class ImageProvider3D(ImageProvider):
-    def __init__(self, sources: Dict[int, ImageSource]) -> None:
+class MediaProvider3D(MediaProvider):
+    def __init__(self, sources: Dict[int, MediaSource]) -> None:
         super().__init__(sources)
         self._images_per_source = {
             source_id: {
@@ -1381,7 +1380,7 @@ def __init__(self, sources: Dict[int, ImageSource]) -> None:
             for source_id, source in sources.items()
         }
 
-    def get_image_for_frame(self, source_id: int, frame_id: int, **image_kwargs):
+    def get_media_for_frame(self, source_id: int, frame_id: int, **image_kwargs) -> dm.PointCloud:
         source = self._sources[source_id]
 
         point_cloud_path = osp.join(
@@ -1391,17 +1390,17 @@ def get_image_for_frame(self, source_id: int, frame_id: int, **image_kwargs):
         image = self._images_per_source[source_id][frame_id]
 
         related_images = [
-            path
+            dm.Image(path=path)
             for rf in image.related_files.all()
             for path in [osp.realpath(str(rf.path))]
             if osp.isfile(path)
         ]
 
-        return point_cloud_path, related_images
+        return dm.PointCloud(point_cloud_path, extra_images=related_images)
 
-IMAGE_PROVIDERS_BY_DIMENSION = {
-    DimensionType.DIM_3D: ImageProvider3D,
-    DimensionType.DIM_2D: ImageProvider2D,
+IMAGE_PROVIDERS_BY_DIMENSION: Dict[DimensionType, MediaProvider] = {
+    DimensionType.DIM_3D: MediaProvider3D,
+    DimensionType.DIM_2D: MediaProvider2D,
 }
 
 class CVATDataExtractorMixin:
@@ -1410,7 +1409,7 @@ def __init__(self, *,
     ):
         self.convert_annotations = convert_annotations or convert_cvat_anno_to_dm
 
-        self._image_provider: Optional[ImageProvider] = None
+        self._image_provider: Optional[MediaProvider] = None
 
     def __enter__(self):
         return self
@@ -1482,7 +1481,7 @@ def __init__(
         **kwargs
     ):
         dm.SourceExtractor.__init__(
-            self, media_type=dm.Image if dimension == DimensionType.DIM_2D else PointCloud
+            self, media_type=dm.Image if dimension == DimensionType.DIM_2D else dm.PointCloud
         )
         CVATDataExtractorMixin.__init__(self, **kwargs)
 
@@ -1491,7 +1490,6 @@ def __init__(
         self._user = self._load_user_info(instance_meta) if dimension == DimensionType.DIM_3D else {}
         self._dimension = dimension
         self._format_type = format_type
-        dm_items = []
 
         is_video = instance_meta['mode'] == 'interpolation'
         ext = ''
@@ -1500,44 +1498,56 @@ def __init__(
 
         if dimension == DimensionType.DIM_3D or include_images:
             self._image_provider = IMAGE_PROVIDERS_BY_DIMENSION[dimension](
-                {0: ImageSource(instance_data.db_data, is_video=is_video)}
+                {0: MediaSource(instance_data.db_data, is_video=is_video)}
             )
 
+        dm_items: List[dm.DatasetItem] = []
         for frame_data in instance_data.group_by_frame(include_empty=True):
-            image_args = {
-                'path': frame_data.name + ext,
-                'size': (frame_data.height, frame_data.width),
-            }
-
+            dm_media_args = { 'path': frame_data.name + ext }
             if dimension == DimensionType.DIM_3D:
-                dm_image = self._image_provider.get_image_for_frame(0, frame_data.id, **image_args)
-            elif include_images:
-                dm_image = self._image_provider.get_image_for_frame(0, frame_data.idx, **image_args)
+                dm_media: dm.PointCloud = self._image_provider.get_media_for_frame(
+                    0, frame_data.id, **dm_media_args
+                )
+
+                if not include_images:
+                    dm_media_args["extra_images"] = [
+                        dm.Image(path=osp.basename(image.path))
+                        for image in dm_media.extra_images
+                    ]
+                    dm_media = dm.PointCloud(**dm_media_args)
             else:
-                dm_image = dm.Image(**image_args)
+                dm_media_args['size'] = (frame_data.height, frame_data.width)
+                if include_images:
+                    dm_media: dm.Image = self._image_provider.get_media_for_frame(
+                        0, frame_data.idx, **dm_media_args
+                    )
+                else:
+                    dm_media = dm.Image(**dm_media_args)
+
             dm_anno = self._read_cvat_anno(frame_data, instance_meta['labels'])
 
+            dm_attributes = {'frame': frame_data.frame}
+
             if dimension == DimensionType.DIM_2D:
                 dm_item = dm.DatasetItem(
-                        id=osp.splitext(frame_data.name)[0],
-                        annotations=dm_anno, media=dm_image,
-                        attributes={'frame': frame_data.frame
-                    })
+                    id=osp.splitext(frame_data.name)[0],
+                    annotations=dm_anno, media=dm_media,
+                    attributes=dm_attributes
+                )
             elif dimension == DimensionType.DIM_3D:
-                attributes = {'frame': frame_data.frame}
                 if format_type == "sly_pointcloud":
-                    attributes["name"] = self._user["name"]
-                    attributes["createdAt"] = self._user["createdAt"]
-                    attributes["updatedAt"] = self._user["updatedAt"]
-                    attributes["labels"] = []
+                    dm_attributes["name"] = self._user["name"]
+                    dm_attributes["createdAt"] = self._user["createdAt"]
+                    dm_attributes["updatedAt"] = self._user["updatedAt"]
+                    dm_attributes["labels"] = []
                     for (idx, (_, label)) in enumerate(instance_meta['labels']):
-                        attributes["labels"].append({"label_id": idx, "name": label["name"], "color": label["color"], "type": label["type"]})
-                        attributes["track_id"] = -1
+                        dm_attributes["labels"].append({"label_id": idx, "name": label["name"], "color": label["color"], "type": label["type"]})
+                        dm_attributes["track_id"] = -1
 
                 dm_item = dm.DatasetItem(
                     id=osp.splitext(osp.split(frame_data.name)[-1])[0],
-                    annotations=dm_anno, media=PointCloud(dm_image[0]), related_images=dm_image[1],
-                    attributes=attributes
+                    annotations=dm_anno, media=dm_media,
+                    attributes=dm_attributes
                 )
 
             dm_items.append(dm_item)
@@ -1567,7 +1577,7 @@ def __init__(
         **kwargs
     ):
         dm.Extractor.__init__(
-            self, media_type=dm.Image if dimension == DimensionType.DIM_2D else PointCloud
+            self, media_type=dm.Image if dimension == DimensionType.DIM_2D else dm.PointCloud
         )
         CVATDataExtractorMixin.__init__(self, **kwargs)
 
@@ -1576,12 +1586,10 @@ def __init__(
         self._dimension = dimension
         self._format_type = format_type
 
-        dm_items: List[dm.DatasetItem] = []
-
         if self._dimension == DimensionType.DIM_3D or include_images:
             self._image_provider = IMAGE_PROVIDERS_BY_DIMENSION[self._dimension](
                 {
-                    task.id: ImageSource(task.data, is_video=task.mode == 'interpolation')
+                    task.id: MediaSource(task.data, is_video=task.mode == 'interpolation')
                     for task in project_data.tasks
                 }
             )
@@ -1592,43 +1600,57 @@ def __init__(
             for is_video in [task.mode == 'interpolation']
         }
 
+        dm_items: List[dm.DatasetItem] = []
         for frame_data in project_data.group_by_frame(include_empty=True):
-            image_args = {
-                'path': frame_data.name + ext_per_task[frame_data.task_id],
-                'size': (frame_data.height, frame_data.width),
-            }
+            dm_media_args = { 'path': frame_data.name + ext_per_task[frame_data.task_id] }
             if self._dimension == DimensionType.DIM_3D:
-                dm_image = self._image_provider.get_image_for_frame(
-                    frame_data.task_id, frame_data.id, **image_args)
-            elif include_images:
-                dm_image = self._image_provider.get_image_for_frame(
-                    frame_data.task_id, frame_data.idx, **image_args)
+                dm_media: dm.PointCloud = self._image_provider.get_media_for_frame(
+                    frame_data.task_id, frame_data.id, **dm_media_args
+                )
+
+                if not include_images:
+                    dm_media_args["extra_images"] = [
+                        dm.Image(path=osp.basename(image.path))
+                        for image in dm_media.extra_images
+                    ]
+                    dm_media = dm.PointCloud(**dm_media_args)
             else:
-                dm_image = dm.Image(**image_args)
+                dm_media_args['size'] = (frame_data.height, frame_data.width)
+                if include_images:
+                    dm_media: dm.Image = self._image_provider.get_media_for_frame(
+                        frame_data.task_id, frame_data.idx, **dm_media_args
+                    )
+                else:
+                    dm_media = dm.Image(**dm_media_args)
+
             dm_anno = self._read_cvat_anno(frame_data, project_data.meta[project_data.META_FIELD]['labels'])
+
+            dm_attributes = {'frame': frame_data.frame}
+
             if self._dimension == DimensionType.DIM_2D:
                 dm_item = dm.DatasetItem(
                     id=osp.splitext(frame_data.name)[0],
-                    annotations=dm_anno, media=dm_image,
+                    annotations=dm_anno, media=dm_media,
                     subset=frame_data.subset,
-                    attributes={'frame': frame_data.frame}
+                    attributes=dm_attributes,
                 )
-            else:
-                attributes = {'frame': frame_data.frame}
+            elif self._dimension == DimensionType.DIM_3D:
                 if format_type == "sly_pointcloud":
-                    attributes["name"] = self._user["name"]
-                    attributes["createdAt"] = self._user["createdAt"]
-                    attributes["updatedAt"] = self._user["updatedAt"]
-                    attributes["labels"] = []
+                    dm_attributes["name"] = self._user["name"]
+                    dm_attributes["createdAt"] = self._user["createdAt"]
+                    dm_attributes["updatedAt"] = self._user["updatedAt"]
+                    dm_attributes["labels"] = []
                     for (idx, (_, label)) in enumerate(project_data.meta[project_data.META_FIELD]['labels']):
-                        attributes["labels"].append({"label_id": idx, "name": label["name"], "color": label["color"], "type": label["type"]})
-                        attributes["track_id"] = -1
+                        dm_attributes["labels"].append({"label_id": idx, "name": label["name"], "color": label["color"], "type": label["type"]})
+                        dm_attributes["track_id"] = -1
 
                 dm_item = dm.DatasetItem(
                     id=osp.splitext(osp.split(frame_data.name)[-1])[0],
-                    annotations=dm_anno, media=PointCloud(dm_image[0]), related_images=dm_image[1],
-                    attributes=attributes, subset=frame_data.subset
+                    annotations=dm_anno, media=dm_media,
+                    subset=frame_data.subset,
+                    attributes=dm_attributes,
                 )
+
             dm_items.append(dm_item)
 
         self._items = dm_items