open-mmlab · hhaAndroid · Nov 25, 2022 · Nov 23, 2022 · Nov 24, 2022 · Nov 24, 2022
diff --git a/tools/dataset_converters/labelme2coco.py b/tools/dataset_converters/labelme2coco.py
@@ -5,7 +5,8 @@
     $ python labelme2coco.py \
                 --img-dir /path/to/images \
                 --labels-dir /path/to/labels \
-                --out /path/to/coco_instances.json
+                --out /path/to/coco_instances.json \
+                [--class-id-txt /path/to/class_with_id.txt]
 
 Note:
     Labels dir file structure:
@@ -21,12 +22,26 @@
          ├── image1.jpg
          ├── image2.png
          └── ...
+
+    If user set `--class-id-txt` then will use it in `categories` field,
+    if not set, then will generate auto base on the all labelme label
+    files to `class_with_id.json`.
+
+    class_with_id.txt example, each line is "id class_name":
+    ```txt
+    1 cat
+    2 dog
+    3 bicycle
+    4 motorcycle
+
+    ```
 """
 import argparse
 import json
 from pathlib import Path
 
 import numpy as np
+from mmengine import track_iter_progress
 
 from mmyolo.utils.misc import IMG_EXTENSIONS
 
@@ -37,6 +52,8 @@ def parse_args():
     parser.add_argument(
         '--labels-dir', type=str, help='Dataset labels directory')
     parser.add_argument('--out', type=str, help='COCO label json output path')
+    parser.add_argument(
+        '--class-id-txt', default=None, type=str, help='All class id txt path')
     args = parser.parse_args()
     return args
 
@@ -76,16 +93,18 @@ def format_coco_annotations(points: list, image_id: int, annotations_id: int,
     return annotation_info
 
 
-def parse_labelme_to_coco(image_dir: str, labels_root: str) -> dict:
+def parse_labelme_to_coco(image_dir: str, labels_root: str,
+                          all_classes_id: dict) -> (dict, dict):
     """Gen COCO json format label from labelme format label.
 
     Args:
         image_dir (str): Image dir path.
         labels_root (str): Image label root path.
+        all_classes_id (str): All class with id.
 
     Return:
         coco_json (dict): COCO json data.
-
+        category_to_id (dict): category id and name.
 
     COCO json example:
 
@@ -142,13 +161,20 @@ def parse_labelme_to_coco(image_dir: str, labels_root: str) -> dict:
 
     image_id = 0
     annotations_id = 0
-    category_to_id = dict()
-    categories_labels = []
+    if all_classes_id is None:
+        category_to_id = dict()
+        categories_labels = []
+    else:
+        category_to_id = all_classes_id
+        categories_labels = list(all_classes_id.keys())
+
+    # filter incorrect image file
+    img_file_list = [
+        img_file for img_file in Path(image_dir).iterdir()
+        if img_file.suffix.lower() in IMG_EXTENSIONS
+    ]
 
-    for img_file in Path(image_dir).iterdir():
-        # filter incorrect image file
-        if img_file.suffix.lower() not in IMG_EXTENSIONS:
-            continue
+    for img_file in track_iter_progress(img_file_list):
 
         # get label file according to the image file name
         label_path = Path(labels_root).joinpath(
@@ -159,7 +185,6 @@ def parse_labelme_to_coco(image_dir: str, labels_root: str) -> dict:
 
         # load labelme label
         with open(label_path, encoding='utf-8') as f:
-            print(f'Convert labelme to COCO from: {label_path}')
             labelme_data = json.load(f)
 
         image_id = image_id + 1  # coco id begin from 1
@@ -180,7 +205,9 @@ def parse_labelme_to_coco(image_dir: str, labels_root: str) -> dict:
 
             # Update coco 'categories' field
             class_name = label_shapes['label']
-            if class_name not in categories_labels:
+
+            if (all_classes_id is None) and (class_name
+                                             not in categories_labels):
                 # only update when not been added before
                 coco_json['categories'].append({
                     'id':
@@ -190,6 +217,12 @@ def parse_labelme_to_coco(image_dir: str, labels_root: str) -> dict:
                 categories_labels.append(class_name)
                 category_to_id[class_name] = len(categories_labels)
 
+            elif (all_classes_id is not None) and (class_name
+                                                   not in categories_labels):
+                # check class name
+                raise ValueError(f'Got unexpected class name {class_name}, '
+                                 'which is not in your `--class-id-txt`.')
+
             # get shape type and convert it to coco format
             shape_type = label_shapes['shape_type']
             if shape_type != 'rectangle':
@@ -206,37 +239,64 @@ def parse_labelme_to_coco(image_dir: str, labels_root: str) -> dict:
                 points, image_id, annotations_id, category_to_id[class_name])
             coco_json['annotations'].append(coco_annotations)
 
-    print('*' * 20)
     print(f'Total image = {image_id}')
     print(f'Total annotations = {annotations_id}')
     print(f'Number of categories = {len(categories_labels)}, '
           f'which is {categories_labels}')
 
-    return coco_json
+    return coco_json, category_to_id
 
 
-def convert_labelme_to_coco(image_dir: str, labels_dir: str, out_path: str):
+def convert_labelme_to_coco(image_dir: str, labels_dir: str, out_path: str,
+                            class_id_txt: str):
     """Convert labelme format label to COCO json format label.
 
     Args:
         image_dir (str): Image dir path.
         labels_dir (str): Image label path.
         out_path (str): COCO json file save path.
+        class_id_txt (str): All class id txt file path.
     """
     assert Path(out_path).suffix == '.json'
 
+    if class_id_txt is not None:
+        assert Path(class_id_txt).suffix == '.txt'
+
+        all_classes_id = dict()
+        with open(class_id_txt, encoding='utf-8') as f:
+            txt_lines = f.read().splitlines()
+        assert len(txt_lines) > 0
+
+        for txt_line in txt_lines:
+            v, k = txt_line.split(' ')
+            all_classes_id.update({k: v})
+    else:
+        all_classes_id = None
+
     # convert to coco json
-    coco_json_data = parse_labelme_to_coco(image_dir, labels_dir)
+    coco_json_data, category_to_id = parse_labelme_to_coco(
+        image_dir, labels_dir, all_classes_id)
 
     # save json result
     Path(out_path).parent.mkdir(exist_ok=True, parents=True)
     print(f'Saving json to {out_path}')
-    json.dump(coco_json_data, open(out_path, 'w'), indent=4)
+    json.dump(coco_json_data, open(out_path, 'w'), indent=2)
+
+    if class_id_txt is None:
+        category_to_id_path = Path(out_path).with_name('class_with_id.txt')
+        print(f'Saving class id txt to {category_to_id_path}')
+        with open(category_to_id_path, 'w', encoding='utf-8') as f:
+            for k, v in category_to_id.items():
+                f.write(f'{v} {k}\n')
+    else:
+        print('Not Saving new class id txt, user should using '
+              f'{class_id_txt} for training config')
 
 
 def main():
     args = parse_args()
-    convert_labelme_to_coco(args.img_dir, args.labels_dir, args.out)
+    convert_labelme_to_coco(args.img_dir, args.labels_dir, args.out,
+                            args.class_id_txt)
     print('All done!')