From d577b209f66ad3e146d100b8504463c324211202 Mon Sep 17 00:00:00 2001 From: Dmitrii Lavrukhin Date: Fri, 26 Jul 2024 16:08:17 +0400 Subject: [PATCH] progress reporting on import --- datumaro/plugins/yolo_format/extractor.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/datumaro/plugins/yolo_format/extractor.py b/datumaro/plugins/yolo_format/extractor.py index fc3077cb9a..4efafa5c22 100644 --- a/datumaro/plugins/yolo_format/extractor.py +++ b/datumaro/plugins/yolo_format/extractor.py @@ -109,11 +109,15 @@ def __init__( # called 'classes', 'names' and 'backup'. subsets = {k: v for k, v in self._config.items() if k not in self.RESERVED_CONFIG_KEYS} - for subset_name, list_path in subsets.items(): + pbars = self._ctx.progress_reporter.split(len(subsets)) + for (subset_name, list_path), pbar in zip(subsets.items(), pbars): subset = YoloExtractor.Subset(subset_name, self) subset.items = OrderedDict( (self.name_from_path(p), self.localize_path(p)) - for p in self._iterate_over_image_paths(subset_name, list_path) + for p in pbar.iter( + self._iterate_over_image_paths(subset_name, list_path), + desc=f"Importing '{subset_name}'", + ) ) subsets[subset_name] = subset @@ -125,7 +129,7 @@ def _iterate_over_image_paths(self, subset_name: str, list_path: str): raise InvalidAnnotationError(f"Can't find '{subset_name}' subset list file") with open(list_path, "r", encoding="utf-8") as f: - yield from (p for p in f if p.strip()) + return [stripped for p in f if (stripped := p.strip())] @cached_property def _config(self) -> Dict[str, str]: @@ -381,19 +385,19 @@ def _iterate_over_image_paths( ): if isinstance(subset_images_source, str): if subset_images_source.endswith(YoloPath.SUBSET_LIST_EXT): - yield from super()._iterate_over_image_paths(subset_name, subset_images_source) + return super()._iterate_over_image_paths(subset_name, subset_images_source) else: path = osp.join(self._path, self.localize_path(subset_images_source)) if not osp.isdir(path): raise InvalidAnnotationError(f"Can't find '{subset_name}' subset image folder") - yield from ( + return [ osp.relpath(osp.join(root, file), self._path) for root, dirs, files in os.walk(path) for file in files if osp.isfile(osp.join(root, file)) - ) + ] else: - yield from subset_images_source + return subset_images_source class Yolo8SegmentationExtractor(Yolo8Extractor):