Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Datumaro format #7125

Merged
merged 36 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
68fec71
Add image paths in datumaro export without images, allow json import …
zhiltsov-max Nov 10, 2023
13f7917
Refactor conv_mask_to_poly parameter uses
zhiltsov-max Nov 10, 2023
8b4dd1b
Add tests
zhiltsov-max Nov 10, 2023
19ed5d6
Update changelog
zhiltsov-max Nov 10, 2023
1d3c0e8
Merge branch 'develop' into zm/update-dm-format
zhiltsov-max Nov 10, 2023
aad486e
Fix linter
zhiltsov-max Nov 10, 2023
84f90bc
Merge remote-tracking branch 'origin/zm/update-dm-format' into zm/upd…
zhiltsov-max Nov 10, 2023
6e08d4b
Fix error
zhiltsov-max Nov 10, 2023
ecddc93
Add support for json import in datumaro 3d format
zhiltsov-max Nov 29, 2023
88dbded
Update function parameters info
zhiltsov-max Nov 29, 2023
c465fe6
Replace strtobool use
zhiltsov-max Nov 29, 2023
a9a6b7b
Update header
zhiltsov-max Mar 13, 2024
d132195
Merge branch 'develop' into zm/update-dm-format
zhiltsov-max Mar 13, 2024
66fe48b
Revert mask_to_poly changes as out of scope
zhiltsov-max Mar 13, 2024
a9e22f0
Revert irrelevant changes
zhiltsov-max Mar 13, 2024
1d41dd7
Update dependencies
zhiltsov-max Mar 13, 2024
fb13347
Fixes
zhiltsov-max Mar 13, 2024
4f48bb9
Tests
zhiltsov-max Mar 13, 2024
a3ab674
Fixes
zhiltsov-max Mar 14, 2024
cb335a4
Merge branch 'develop' into zm/update-dm-format
zhiltsov-max Mar 14, 2024
0bd42e3
Merge branch 'develop' into zm/update-dm-format
zhiltsov-max Mar 18, 2024
cabf7ea
Merge branch 'develop' into zm/update-dm-format
zhiltsov-max Mar 29, 2024
cd01372
Merge branch 'develop' into zm/update-dm-format
zhiltsov-max Apr 5, 2024
0805830
Rename files with changelog updates
zhiltsov-max Apr 5, 2024
1f4e505
Remove unused variable
zhiltsov-max Apr 5, 2024
9205097
Remove extra import
zhiltsov-max Apr 5, 2024
e36ac50
Merge branch 'develop' into zm/update-dm-format
zhiltsov-max Apr 16, 2024
082caf1
Merge branch 'develop' into zm/update-dm-format
zhiltsov-max Apr 17, 2024
e43f131
Merge branch 'develop' into zm/update-dm-format
zhiltsov-max May 8, 2024
be65cdd
Add logic for project import fail when only annotations are uploaded …
zhiltsov-max Nov 15, 2024
8634ffa
Update tests
zhiltsov-max Nov 15, 2024
037b07d
Merge branch 'develop' into zm/update-dm-format
zhiltsov-max Nov 15, 2024
56db39b
Update tests after merge
zhiltsov-max Nov 15, 2024
66383bb
Update changelog
zhiltsov-max Nov 16, 2024
8b337d9
Refactor media providers
zhiltsov-max Nov 16, 2024
ea91c87
Refactor test
zhiltsov-max Nov 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions changelog.d/20231110_175126_mzhiltso_update_dm_format.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
### Added

- Support for direct .json file import in Datumaro format
(<https://github.com/opencv/cvat/pull/7125>)

### Fixed

- Export without images in Datumaro format should include image info
(<https://github.com/opencv/cvat/pull/7125>)
195 changes: 115 additions & 80 deletions cvat/apps/dataset_manager/bindings.py

Large diffs are not rendered by default.

16 changes: 12 additions & 4 deletions cvat/apps/dataset_manager/formats/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
from datumaro.components.annotation import AnnotationType
from datumaro.plugins.coco_format.importer import CocoImporter

from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor, detect_dataset, \
import_dm_annotations
from cvat.apps.dataset_manager.bindings import (
GetCVATDataExtractor, NoMediaInAnnotationFileError, import_dm_annotations, detect_dataset
)
from cvat.apps.dataset_manager.util import make_zip_archive

from .registry import dm_env, exporter, importer
Expand All @@ -35,6 +36,9 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
else:
if load_data_callback:
raise NoMediaInAnnotationFileError()

dataset = Dataset.import_from(src_file.name,
'coco_instances', env=dm_env)
import_dm_annotations(dataset, instance_data)
Expand All @@ -52,6 +56,8 @@ def _export(dst_file, temp_dir, instance_data, save_images=False):
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
def remove_extra_annotations(dataset):
for item in dataset:
# Boxes would have invalid (skeleton) labels, so remove them
# TODO: find a way to import boxes
annotations = [ann for ann in item.annotations
if ann.type != AnnotationType.bbox]
item.annotations = annotations
Expand All @@ -66,7 +72,9 @@ def remove_extra_annotations(dataset):
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
else:
dataset = Dataset.import_from(src_file.name,
'coco_person_keypoints', env=dm_env)
if load_data_callback:
raise NoMediaInAnnotationFileError()

dataset = Dataset.import_from(src_file.name, 'coco_person_keypoints', env=dm_env)
remove_extra_annotations(dataset)
import_dm_annotations(dataset, instance_data)
17 changes: 13 additions & 4 deletions cvat/apps/dataset_manager/formats/cvat.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,16 @@
from datumaro.util.image import Image
from defusedxml import ElementTree

from cvat.apps.dataset_manager.bindings import (ProjectData, TaskData, JobData, detect_dataset,
get_defaulted_subset,
import_dm_annotations,
match_dm_item)
from cvat.apps.dataset_manager.bindings import (
NoMediaInAnnotationFileError,
ProjectData,
TaskData,
JobData,
detect_dataset,
get_defaulted_subset,
import_dm_annotations,
match_dm_item
)
from cvat.apps.dataset_manager.util import make_zip_archive
from cvat.apps.engine.frame_provider import FrameQuality, FrameOutputType, make_frame_provider

Expand Down Expand Up @@ -1456,4 +1462,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs
for p in anno_paths:
load_anno(p, instance_data)
else:
if load_data_callback:
raise NoMediaInAnnotationFileError()

load_anno(src_file, instance_data)
54 changes: 27 additions & 27 deletions cvat/apps/dataset_manager/formats/datumaro.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,43 +3,40 @@
#
# SPDX-License-Identifier: MIT

import zipfile
from datumaro.components.dataset import Dataset
from datumaro.components.extractor import ItemTransform
from datumaro.util.image import Image

from pyunpack import Archive

from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset,
import_dm_annotations)
from cvat.apps.dataset_manager.bindings import (
GetCVATDataExtractor, import_dm_annotations, NoMediaInAnnotationFileError, detect_dataset
)
from cvat.apps.dataset_manager.util import make_zip_archive
from cvat.apps.engine.models import DimensionType

from .registry import dm_env, exporter, importer

class DeleteImagePath(ItemTransform):
def transform_item(self, item):
image = None
if item.has_image and item.image.has_data:
image = Image(data=item.image.data, size=item.image.size)
return item.wrap(image=image, point_cloud='', related_images=[])


@exporter(name="Datumaro", ext="ZIP", version="1.0")
def _export(dst_file, temp_dir, instance_data, save_images=False):
with GetCVATDataExtractor(instance_data=instance_data, include_images=save_images) as extractor:
with GetCVATDataExtractor(
instance_data=instance_data, include_images=save_images
) as extractor:
dataset = Dataset.from_extractors(extractor, env=dm_env)
if not save_images:
dataset.transform(DeleteImagePath)
dataset.export(temp_dir, 'datumaro', save_images=save_images)

make_zip_archive(temp_dir, dst_file)

@importer(name="Datumaro", ext="ZIP", version="1.0")
@importer(name="Datumaro", ext="JSON, ZIP", version="1.0")
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)
if zipfile.is_zipfile(src_file):
zipfile.ZipFile(src_file).extractall(temp_dir)

detect_dataset(temp_dir, format_name='datumaro', importer=dm_env.importers.get('datumaro'))
dataset = Dataset.import_from(temp_dir, 'datumaro', env=dm_env)
detect_dataset(temp_dir, format_name='datumaro', importer=dm_env.importers.get('datumaro'))
dataset = Dataset.import_from(temp_dir, 'datumaro', env=dm_env)
else:
if load_data_callback:
raise NoMediaInAnnotationFileError()

dataset = Dataset.import_from(src_file.name, 'datumaro', env=dm_env)

if load_data_callback is not None:
load_data_callback(dataset, instance_data)
Expand All @@ -52,19 +49,22 @@ def _export(dst_file, temp_dir, instance_data, save_images=False):
dimension=DimensionType.DIM_3D,
) as extractor:
dataset = Dataset.from_extractors(extractor, env=dm_env)

if not save_images:
dataset.transform(DeleteImagePath)
dataset.export(temp_dir, 'datumaro', save_images=save_images)

make_zip_archive(temp_dir, dst_file)

@importer(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D)
@importer(name="Datumaro 3D", ext="JSON, ZIP", version="1.0", dimension=DimensionType.DIM_3D)
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)
if zipfile.is_zipfile(src_file):
zipfile.ZipFile(src_file).extractall(temp_dir)

detect_dataset(temp_dir, format_name='datumaro', importer=dm_env.importers.get('datumaro'))
dataset = Dataset.import_from(temp_dir, 'datumaro', env=dm_env)
else:
if load_data_callback:
raise NoMediaInAnnotationFileError()

detect_dataset(temp_dir, format_name='datumaro', importer=dm_env.importers.get('datumaro'))
dataset = Dataset.import_from(temp_dir, 'datumaro', env=dm_env)
dataset = Dataset.import_from(src_file.name, 'datumaro', env=dm_env)

if load_data_callback is not None:
load_data_callback(dataset, instance_data)
Expand Down
71 changes: 66 additions & 5 deletions tests/python/rest_api/test_projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from typing import Optional, Union

import pytest
from cvat_sdk.api_client import ApiClient, Configuration, models
from cvat_sdk.api_client import ApiClient, Configuration, exceptions, models
from cvat_sdk.api_client.api_client import Endpoint
from cvat_sdk.api_client.exceptions import ForbiddenException
from cvat_sdk.core.helpers import get_paginated_collection
Expand All @@ -37,8 +37,10 @@
from shared.utils.helpers import generate_image_files

from .utils import (
DATUMARO_FORMAT_FOR_DIMENSION,
CollectionSimpleFilterTestBase,
create_task,
export_dataset,
export_project_backup,
export_project_dataset,
)
Expand Down Expand Up @@ -991,6 +993,68 @@ def test_can_export_and_import_dataset_after_deleting_related_storage(

self._test_import_project(admin_user, project_id, "CVAT 1.1", import_data)

@pytest.mark.parametrize(
"dimension, format_name",
[
*DATUMARO_FORMAT_FOR_DIMENSION.items(),
("2d", "CVAT 1.1"),
("3d", "CVAT 1.1"),
("2d", "COCO 1.0"),
],
)
def test_cant_import_annotations_as_project(self, admin_user, tasks, format_name, dimension):
task = next(t for t in tasks if t.get("size") if t["dimension"] == dimension)

def _export_task(task_id: int, format_name: str) -> io.BytesIO:
with make_api_client(admin_user) as api_client:
return io.BytesIO(
export_dataset(
api_client.tasks_api,
api_version=2,
id=task_id,
format=format_name,
save_images=False,
)
)

if format_name in list(DATUMARO_FORMAT_FOR_DIMENSION.values()):
with zipfile.ZipFile(_export_task(task["id"], format_name)) as zip_file:
annotations = zip_file.read("annotations/default.json")

dataset_file = io.BytesIO(annotations)
dataset_file.name = "annotations.json"
elif format_name == "CVAT 1.1":
with zipfile.ZipFile(_export_task(task["id"], "CVAT for images 1.1")) as zip_file:
annotations = zip_file.read("annotations.xml")

dataset_file = io.BytesIO(annotations)
dataset_file.name = "annotations.xml"
elif format_name == "COCO 1.0":
with zipfile.ZipFile(_export_task(task["id"], format_name)) as zip_file:
annotations = zip_file.read("annotations/instances_default.json")

dataset_file = io.BytesIO(annotations)
dataset_file.name = "annotations.json"
else:
assert False

with make_api_client(admin_user) as api_client:
project, _ = api_client.projects_api.create(
project_write_request=models.ProjectWriteRequest(
name=f"test_annotations_import_as_project {format_name}"
)
)

import_data = {"dataset_file": dataset_file}

with pytest.raises(exceptions.ApiException, match="Dataset file should be zip archive"):
self._test_import_project(
admin_user,
project.id,
format_name=format_name,
data=import_data,
)

@pytest.mark.parametrize(
"export_format, subset_path_template",
[
Expand Down Expand Up @@ -1045,10 +1109,7 @@ def test_creates_subfolders_for_subsets_on_export(
len([f for f in zip_file.namelist() if f.startswith(folder_prefix)]) > 0
), f"No {folder_prefix} in {zip_file.namelist()}"

def test_export_project_with_honeypots(
self,
admin_user: str,
):
def test_export_project_with_honeypots(self, admin_user: str):
project_spec = {
"name": "Project with honeypots",
"labels": [{"name": "cat"}],
Expand Down
83 changes: 83 additions & 0 deletions tests/python/rest_api/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,11 @@
)

from .utils import (
DATUMARO_FORMAT_FOR_DIMENSION,
CollectionSimpleFilterTestBase,
compare_annotations,
create_task,
export_dataset,
export_task_backup,
export_task_dataset,
parse_frame_step,
Expand Down Expand Up @@ -969,6 +971,46 @@ def test_uses_subset_name(
subset_path in path for path in zip_file.namelist()
), f"No {subset_path} in {zip_file.namelist()}"

@pytest.mark.parametrize(
"dimension, mode", [("2d", "annotation"), ("2d", "interpolation"), ("3d", "annotation")]
)
def test_datumaro_export_without_annotations_includes_image_info(
self, admin_user, tasks, mode, dimension
):
task = next(
t for t in tasks if t.get("size") if t["mode"] == mode if t["dimension"] == dimension
)

with make_api_client(admin_user) as api_client:
dataset_file = io.BytesIO(
export_dataset(
api_client.tasks_api,
api_version=2,
id=task["id"],
format=DATUMARO_FORMAT_FOR_DIMENSION[dimension],
save_images=False,
)
)

with zipfile.ZipFile(dataset_file) as zip_file:
annotations = json.loads(zip_file.read("annotations/default.json"))

assert annotations["items"]
for item in annotations["items"]:
assert "media" not in item

if dimension == "2d":
assert osp.splitext(item["image"]["path"])[0] == item["id"]
assert not Path(item["image"]["path"]).is_absolute()
assert tuple(item["image"]["size"]) > (0, 0)
elif dimension == "3d":
assert osp.splitext(osp.basename(item["point_cloud"]["path"]))[0] == item["id"]
assert not Path(item["point_cloud"]["path"]).is_absolute()
for related_image in item["related_images"]:
assert not Path(related_image["path"]).is_absolute()
if "size" in related_image:
assert tuple(related_image["size"]) > (0, 0)


@pytest.mark.usefixtures("restore_db_per_function")
@pytest.mark.usefixtures("restore_cvat_data_per_function")
Expand Down Expand Up @@ -5181,6 +5223,47 @@ def test_import_annotations_after_deleting_related_cloud_storage(
task.import_annotations(self.import_format, file_path)
self._check_annotations(task_id)

@pytest.mark.parametrize("dimension", ["2d", "3d"])
def test_can_import_datumaro_json(self, admin_user, tasks, dimension):
task = next(
t
for t in tasks
if t.get("size")
if t["dimension"] == dimension and t.get("validation_mode") != "gt_pool"
)

with make_api_client(admin_user) as api_client:
original_annotations = json.loads(
api_client.tasks_api.retrieve_annotations(task["id"])[1].data
)

dataset_archive = io.BytesIO(
export_dataset(
api_client.tasks_api,
api_version=2,
id=task["id"],
format=DATUMARO_FORMAT_FOR_DIMENSION[dimension],
save_images=False,
)
)

with zipfile.ZipFile(dataset_archive) as zip_file:
annotations = zip_file.read("annotations/default.json")

with TemporaryDirectory() as tempdir:
annotations_path = Path(tempdir) / "annotations.json"
annotations_path.write_bytes(annotations)
self.client.tasks.retrieve(task["id"]).import_annotations(
DATUMARO_FORMAT_FOR_DIMENSION[dimension], annotations_path
)

with make_api_client(admin_user) as api_client:
updated_annotations = json.loads(
api_client.tasks_api.retrieve_annotations(task["id"])[1].data
)

assert compare_annotations(original_annotations, updated_annotations) == {}

@pytest.mark.parametrize(
"format_name",
[
Expand Down
Loading
Loading