Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Datumaro format #7125

Merged
merged 36 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
68fec71
Add image paths in datumaro export without images, allow json import …
zhiltsov-max Nov 10, 2023
13f7917
Refactor conv_mask_to_poly parameter uses
zhiltsov-max Nov 10, 2023
8b4dd1b
Add tests
zhiltsov-max Nov 10, 2023
19ed5d6
Update changelog
zhiltsov-max Nov 10, 2023
1d3c0e8
Merge branch 'develop' into zm/update-dm-format
zhiltsov-max Nov 10, 2023
aad486e
Fix linter
zhiltsov-max Nov 10, 2023
84f90bc
Merge remote-tracking branch 'origin/zm/update-dm-format' into zm/upd…
zhiltsov-max Nov 10, 2023
6e08d4b
Fix error
zhiltsov-max Nov 10, 2023
ecddc93
Add support for json import in datumaro 3d format
zhiltsov-max Nov 29, 2023
88dbded
Update function parameters info
zhiltsov-max Nov 29, 2023
c465fe6
Replace strtobool use
zhiltsov-max Nov 29, 2023
a9a6b7b
Update header
zhiltsov-max Mar 13, 2024
d132195
Merge branch 'develop' into zm/update-dm-format
zhiltsov-max Mar 13, 2024
66fe48b
Revert mask_to_poly changes as out of scope
zhiltsov-max Mar 13, 2024
a9e22f0
Revert irrelevant changes
zhiltsov-max Mar 13, 2024
1d41dd7
Update dependencies
zhiltsov-max Mar 13, 2024
fb13347
Fixes
zhiltsov-max Mar 13, 2024
4f48bb9
Tests
zhiltsov-max Mar 13, 2024
a3ab674
Fixes
zhiltsov-max Mar 14, 2024
cb335a4
Merge branch 'develop' into zm/update-dm-format
zhiltsov-max Mar 14, 2024
0bd42e3
Merge branch 'develop' into zm/update-dm-format
zhiltsov-max Mar 18, 2024
cabf7ea
Merge branch 'develop' into zm/update-dm-format
zhiltsov-max Mar 29, 2024
cd01372
Merge branch 'develop' into zm/update-dm-format
zhiltsov-max Apr 5, 2024
0805830
Rename files with changelog updates
zhiltsov-max Apr 5, 2024
1f4e505
Remove unused variable
zhiltsov-max Apr 5, 2024
9205097
Remove extra import
zhiltsov-max Apr 5, 2024
e36ac50
Merge branch 'develop' into zm/update-dm-format
zhiltsov-max Apr 16, 2024
082caf1
Merge branch 'develop' into zm/update-dm-format
zhiltsov-max Apr 17, 2024
e43f131
Merge branch 'develop' into zm/update-dm-format
zhiltsov-max May 8, 2024
be65cdd
Add logic for project import fail when only annotations are uploaded …
zhiltsov-max Nov 15, 2024
8634ffa
Update tests
zhiltsov-max Nov 15, 2024
037b07d
Merge branch 'develop' into zm/update-dm-format
zhiltsov-max Nov 15, 2024
56db39b
Update tests after merge
zhiltsov-max Nov 15, 2024
66383bb
Update changelog
zhiltsov-max Nov 16, 2024
8b337d9
Refactor media providers
zhiltsov-max Nov 16, 2024
ea91c87
Refactor test
zhiltsov-max Nov 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
### Added

- Support for direct .json file import in Datumaro format
(<https://github.com/opencv/cvat/pull/7125>)
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
### Fixed

- Export without images in Datumaro format should include image info
(<https://github.com/opencv/cvat/pull/7125>)
158 changes: 90 additions & 68 deletions cvat/apps/dataset_manager/bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import numpy as np
import rq
from attr import attrib, attrs
from datumaro.components.media import PointCloud
from django.db.models import QuerySet
from django.utils import timezone

Expand Down Expand Up @@ -1312,27 +1311,27 @@ def add_task(self, task, files):
self._project_annotation.add_task(task, files, self)

@attrs(frozen=True, auto_attribs=True)
class ImageSource:
class MediaSource:
db_data: Data
is_video: bool = attrib(kw_only=True)

class ImageProvider:
def __init__(self, sources: Dict[int, ImageSource]) -> None:
class MediaProvider:
def __init__(self, sources: Dict[int, MediaSource]) -> None:
self._sources = sources

def unload(self) -> None:
pass

class ImageProvider2D(ImageProvider):
def __init__(self, sources: Dict[int, ImageSource]) -> None:
class MediaProvider2D(MediaProvider):
def __init__(self, sources: Dict[int, MediaSource]) -> None:
super().__init__(sources)
self._current_source_id = None
self._frame_provider = None

def unload(self) -> None:
self._unload_source()

def get_image_for_frame(self, source_id: int, frame_index: int, **image_kwargs):
def get_media_for_frame(self, source_id: int, frame_index: int, **image_kwargs) -> dm.Image:
source = self._sources[source_id]

if source.is_video:
Expand All @@ -1355,7 +1354,7 @@ def image_loader(_):
out_type=FrameProvider.Type.BUFFER)[0].getvalue()
return dm.ByteImage(data=image_loader, **image_kwargs)

def _load_source(self, source_id: int, source: ImageSource) -> None:
def _load_source(self, source_id: int, source: MediaSource) -> None:
if self._current_source_id == source_id:
return

Expand All @@ -1370,8 +1369,8 @@ def _unload_source(self) -> None:

self._current_source_id = None

class ImageProvider3D(ImageProvider):
def __init__(self, sources: Dict[int, ImageSource]) -> None:
class MediaProvider3D(MediaProvider):
def __init__(self, sources: Dict[int, MediaSource]) -> None:
super().__init__(sources)
self._images_per_source = {
source_id: {
Expand All @@ -1381,7 +1380,7 @@ def __init__(self, sources: Dict[int, ImageSource]) -> None:
for source_id, source in sources.items()
}

def get_image_for_frame(self, source_id: int, frame_id: int, **image_kwargs):
def get_media_for_frame(self, source_id: int, frame_id: int, **image_kwargs) -> dm.PointCloud:
source = self._sources[source_id]

point_cloud_path = osp.join(
Expand All @@ -1391,17 +1390,17 @@ def get_image_for_frame(self, source_id: int, frame_id: int, **image_kwargs):
image = self._images_per_source[source_id][frame_id]

related_images = [
path
dm.Image(path=path)
for rf in image.related_files.all()
for path in [osp.realpath(str(rf.path))]
if osp.isfile(path)
]

return point_cloud_path, related_images
return dm.PointCloud(point_cloud_path, extra_images=related_images)

IMAGE_PROVIDERS_BY_DIMENSION = {
DimensionType.DIM_3D: ImageProvider3D,
DimensionType.DIM_2D: ImageProvider2D,
IMAGE_PROVIDERS_BY_DIMENSION: Dict[DimensionType, MediaProvider] = {
DimensionType.DIM_3D: MediaProvider3D,
DimensionType.DIM_2D: MediaProvider2D,
}

class CVATDataExtractorMixin:
Expand All @@ -1410,7 +1409,7 @@ def __init__(self, *,
):
self.convert_annotations = convert_annotations or convert_cvat_anno_to_dm

self._image_provider: Optional[ImageProvider] = None
self._image_provider: Optional[MediaProvider] = None

def __enter__(self):
return self
Expand Down Expand Up @@ -1482,7 +1481,7 @@ def __init__(
**kwargs
):
dm.SourceExtractor.__init__(
self, media_type=dm.Image if dimension == DimensionType.DIM_2D else PointCloud
self, media_type=dm.Image if dimension == DimensionType.DIM_2D else dm.PointCloud
)
CVATDataExtractorMixin.__init__(self, **kwargs)

Expand All @@ -1491,7 +1490,6 @@ def __init__(
self._user = self._load_user_info(instance_meta) if dimension == DimensionType.DIM_3D else {}
self._dimension = dimension
self._format_type = format_type
dm_items = []

is_video = instance_meta['mode'] == 'interpolation'
ext = ''
Expand All @@ -1500,44 +1498,56 @@ def __init__(

if dimension == DimensionType.DIM_3D or include_images:
self._image_provider = IMAGE_PROVIDERS_BY_DIMENSION[dimension](
{0: ImageSource(instance_data.db_data, is_video=is_video)}
{0: MediaSource(instance_data.db_data, is_video=is_video)}
)

dm_items: List[dm.DatasetItem] = []
for frame_data in instance_data.group_by_frame(include_empty=True):
image_args = {
'path': frame_data.name + ext,
'size': (frame_data.height, frame_data.width),
}

dm_media_args = { 'path': frame_data.name + ext }
if dimension == DimensionType.DIM_3D:
dm_image = self._image_provider.get_image_for_frame(0, frame_data.id, **image_args)
elif include_images:
dm_image = self._image_provider.get_image_for_frame(0, frame_data.idx, **image_args)
dm_media: dm.PointCloud = self._image_provider.get_media_for_frame(
0, frame_data.id, **dm_media_args
)

if not include_images:
dm_media_args["extra_images"] = [
dm.Image(path=osp.basename(image.path))
for image in dm_media.extra_images
]
dm_media = dm.PointCloud(**dm_media_args)
else:
dm_image = dm.Image(**image_args)
dm_media_args['size'] = (frame_data.height, frame_data.width)
if include_images:
dm_media: dm.Image = self._image_provider.get_media_for_frame(
0, frame_data.idx, **dm_media_args
)
else:
dm_media = dm.Image(**dm_media_args)

dm_anno = self._read_cvat_anno(frame_data, instance_meta['labels'])

dm_attributes = {'frame': frame_data.frame}

if dimension == DimensionType.DIM_2D:
dm_item = dm.DatasetItem(
id=osp.splitext(frame_data.name)[0],
annotations=dm_anno, media=dm_image,
attributes={'frame': frame_data.frame
})
id=osp.splitext(frame_data.name)[0],
annotations=dm_anno, media=dm_media,
attributes=dm_attributes
)
elif dimension == DimensionType.DIM_3D:
attributes = {'frame': frame_data.frame}
if format_type == "sly_pointcloud":
attributes["name"] = self._user["name"]
attributes["createdAt"] = self._user["createdAt"]
attributes["updatedAt"] = self._user["updatedAt"]
attributes["labels"] = []
dm_attributes["name"] = self._user["name"]
dm_attributes["createdAt"] = self._user["createdAt"]
dm_attributes["updatedAt"] = self._user["updatedAt"]
dm_attributes["labels"] = []
for (idx, (_, label)) in enumerate(instance_meta['labels']):
attributes["labels"].append({"label_id": idx, "name": label["name"], "color": label["color"], "type": label["type"]})
attributes["track_id"] = -1
dm_attributes["labels"].append({"label_id": idx, "name": label["name"], "color": label["color"], "type": label["type"]})
dm_attributes["track_id"] = -1

dm_item = dm.DatasetItem(
id=osp.splitext(osp.split(frame_data.name)[-1])[0],
annotations=dm_anno, media=PointCloud(dm_image[0]), related_images=dm_image[1],
attributes=attributes
annotations=dm_anno, media=dm_media,
attributes=dm_attributes
)

dm_items.append(dm_item)
Expand Down Expand Up @@ -1567,7 +1577,7 @@ def __init__(
**kwargs
):
dm.Extractor.__init__(
self, media_type=dm.Image if dimension == DimensionType.DIM_2D else PointCloud
self, media_type=dm.Image if dimension == DimensionType.DIM_2D else dm.PointCloud
)
CVATDataExtractorMixin.__init__(self, **kwargs)

Expand All @@ -1576,12 +1586,10 @@ def __init__(
self._dimension = dimension
self._format_type = format_type

dm_items: List[dm.DatasetItem] = []

if self._dimension == DimensionType.DIM_3D or include_images:
self._image_provider = IMAGE_PROVIDERS_BY_DIMENSION[self._dimension](
{
task.id: ImageSource(task.data, is_video=task.mode == 'interpolation')
task.id: MediaSource(task.data, is_video=task.mode == 'interpolation')
for task in project_data.tasks
}
)
Expand All @@ -1592,43 +1600,57 @@ def __init__(
for is_video in [task.mode == 'interpolation']
}

dm_items: List[dm.DatasetItem] = []
for frame_data in project_data.group_by_frame(include_empty=True):
image_args = {
'path': frame_data.name + ext_per_task[frame_data.task_id],
'size': (frame_data.height, frame_data.width),
}
dm_media_args = { 'path': frame_data.name + ext_per_task[frame_data.task_id] }
if self._dimension == DimensionType.DIM_3D:
dm_image = self._image_provider.get_image_for_frame(
frame_data.task_id, frame_data.id, **image_args)
elif include_images:
dm_image = self._image_provider.get_image_for_frame(
frame_data.task_id, frame_data.idx, **image_args)
dm_media: dm.PointCloud = self._image_provider.get_media_for_frame(
frame_data.task_id, frame_data.id, **dm_media_args
)

if not include_images:
dm_media_args["extra_images"] = [
dm.Image(path=osp.basename(image.path))
for image in dm_media.extra_images
]
dm_media = dm.PointCloud(**dm_media_args)
else:
dm_image = dm.Image(**image_args)
dm_media_args['size'] = (frame_data.height, frame_data.width)
if include_images:
dm_media: dm.Image = self._image_provider.get_media_for_frame(
frame_data.task_id, frame_data.idx, **dm_media_args
)
else:
dm_media = dm.Image(**dm_media_args)

dm_anno = self._read_cvat_anno(frame_data, project_data.meta[project_data.META_FIELD]['labels'])

dm_attributes = {'frame': frame_data.frame}

if self._dimension == DimensionType.DIM_2D:
dm_item = dm.DatasetItem(
id=osp.splitext(frame_data.name)[0],
annotations=dm_anno, media=dm_image,
annotations=dm_anno, media=dm_media,
subset=frame_data.subset,
attributes={'frame': frame_data.frame}
attributes=dm_attributes,
)
else:
attributes = {'frame': frame_data.frame}
elif self._dimension == DimensionType.DIM_3D:
if format_type == "sly_pointcloud":
attributes["name"] = self._user["name"]
attributes["createdAt"] = self._user["createdAt"]
attributes["updatedAt"] = self._user["updatedAt"]
attributes["labels"] = []
dm_attributes["name"] = self._user["name"]
dm_attributes["createdAt"] = self._user["createdAt"]
dm_attributes["updatedAt"] = self._user["updatedAt"]
dm_attributes["labels"] = []
for (idx, (_, label)) in enumerate(project_data.meta[project_data.META_FIELD]['labels']):
attributes["labels"].append({"label_id": idx, "name": label["name"], "color": label["color"], "type": label["type"]})
attributes["track_id"] = -1
dm_attributes["labels"].append({"label_id": idx, "name": label["name"], "color": label["color"], "type": label["type"]})
dm_attributes["track_id"] = -1

dm_item = dm.DatasetItem(
id=osp.splitext(osp.split(frame_data.name)[-1])[0],
annotations=dm_anno, media=PointCloud(dm_image[0]), related_images=dm_image[1],
attributes=attributes, subset=frame_data.subset
annotations=dm_anno, media=dm_media,
subset=frame_data.subset,
attributes=dm_attributes,
)

dm_items.append(dm_item)

self._items = dm_items
Expand Down
Loading
Loading