Skip to content

[Improvement] Add output class_with_id.txt after labelme2coco #313

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 25, 2022
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 77 additions & 17 deletions tools/dataset_converters/labelme2coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
$ python labelme2coco.py \
--img-dir /path/to/images \
--labels-dir /path/to/labels \
--out /path/to/coco_instances.json
--out /path/to/coco_instances.json \
[--class-id-txt /path/to/class_with_id.txt]

Note:
Labels dir file structure:
Expand All @@ -21,12 +22,26 @@
├── image1.jpg
├── image2.png
└── ...

If user set `--class-id-txt` then will use it in `categories` field,
if not set, then will generate auto base on the all labelme label
files to `class_with_id.json`.

class_with_id.txt example, each line is "id class_name":
```txt
1 cat
2 dog
3 bicycle
4 motorcycle

```
"""
import argparse
import json
from pathlib import Path

import numpy as np
from mmengine import track_iter_progress

from mmyolo.utils.misc import IMG_EXTENSIONS

Expand All @@ -37,6 +52,8 @@ def parse_args():
parser.add_argument(
'--labels-dir', type=str, help='Dataset labels directory')
parser.add_argument('--out', type=str, help='COCO label json output path')
parser.add_argument(
'--class-id-txt', default=None, type=str, help='All class id txt path')
args = parser.parse_args()
return args

Expand Down Expand Up @@ -76,16 +93,18 @@ def format_coco_annotations(points: list, image_id: int, annotations_id: int,
return annotation_info


def parse_labelme_to_coco(image_dir: str, labels_root: str) -> dict:
def parse_labelme_to_coco(image_dir: str, labels_root: str,
all_classes_id: dict) -> (dict, dict):
"""Gen COCO json format label from labelme format label.

Args:
image_dir (str): Image dir path.
labels_root (str): Image label root path.
all_classes_id (str): All class with id.

Return:
coco_json (dict): COCO json data.

category_to_id (dict): category id and name.

COCO json example:

Expand Down Expand Up @@ -142,13 +161,20 @@ def parse_labelme_to_coco(image_dir: str, labels_root: str) -> dict:

image_id = 0
annotations_id = 0
category_to_id = dict()
categories_labels = []
if all_classes_id is None:
category_to_id = dict()
categories_labels = []
else:
category_to_id = all_classes_id
categories_labels = list(all_classes_id.keys())

# filter incorrect image file
img_file_list = [
img_file for img_file in Path(image_dir).iterdir()
if img_file.suffix.lower() in IMG_EXTENSIONS
]

for img_file in Path(image_dir).iterdir():
# filter incorrect image file
if img_file.suffix.lower() not in IMG_EXTENSIONS:
continue
for img_file in track_iter_progress(img_file_list):

# get label file according to the image file name
label_path = Path(labels_root).joinpath(
Expand All @@ -159,7 +185,6 @@ def parse_labelme_to_coco(image_dir: str, labels_root: str) -> dict:

# load labelme label
with open(label_path, encoding='utf-8') as f:
print(f'Convert labelme to COCO from: {label_path}')
labelme_data = json.load(f)

image_id = image_id + 1 # coco id begin from 1
Expand All @@ -180,7 +205,9 @@ def parse_labelme_to_coco(image_dir: str, labels_root: str) -> dict:

# Update coco 'categories' field
class_name = label_shapes['label']
if class_name not in categories_labels:

if (all_classes_id is None) and (class_name
not in categories_labels):
# only update when not been added before
coco_json['categories'].append({
'id':
Expand All @@ -190,6 +217,12 @@ def parse_labelme_to_coco(image_dir: str, labels_root: str) -> dict:
categories_labels.append(class_name)
category_to_id[class_name] = len(categories_labels)

elif (all_classes_id is not None) and (class_name
not in categories_labels):
# check class name
raise ValueError(f'Got unexpected class name {class_name}, '
'which is not in your `--class-id-txt`.')

# get shape type and convert it to coco format
shape_type = label_shapes['shape_type']
if shape_type != 'rectangle':
Expand All @@ -206,37 +239,64 @@ def parse_labelme_to_coco(image_dir: str, labels_root: str) -> dict:
points, image_id, annotations_id, category_to_id[class_name])
coco_json['annotations'].append(coco_annotations)

print('*' * 20)
print(f'Total image = {image_id}')
print(f'Total annotations = {annotations_id}')
print(f'Number of categories = {len(categories_labels)}, '
f'which is {categories_labels}')

return coco_json
return coco_json, category_to_id


def convert_labelme_to_coco(image_dir: str, labels_dir: str, out_path: str):
def convert_labelme_to_coco(image_dir: str, labels_dir: str, out_path: str,
class_id_txt: str):
"""Convert labelme format label to COCO json format label.

Args:
image_dir (str): Image dir path.
labels_dir (str): Image label path.
out_path (str): COCO json file save path.
class_id_txt (str): All class id txt file path.
"""
assert Path(out_path).suffix == '.json'

if class_id_txt is not None:
assert Path(class_id_txt).suffix == '.txt'

all_classes_id = dict()
with open(class_id_txt, encoding='utf-8') as f:
txt_lines = f.read().splitlines()
assert len(txt_lines) > 0

for txt_line in txt_lines:
v, k = txt_line.split(' ')
all_classes_id.update({k: v})
else:
all_classes_id = None

# convert to coco json
coco_json_data = parse_labelme_to_coco(image_dir, labels_dir)
coco_json_data, category_to_id = parse_labelme_to_coco(
image_dir, labels_dir, all_classes_id)

# save json result
Path(out_path).parent.mkdir(exist_ok=True, parents=True)
print(f'Saving json to {out_path}')
json.dump(coco_json_data, open(out_path, 'w'), indent=4)
json.dump(coco_json_data, open(out_path, 'w'), indent=2)

if class_id_txt is None:
category_to_id_path = Path(out_path).with_name('class_with_id.txt')
print(f'Saving class id txt to {category_to_id_path}')
with open(category_to_id_path, 'w', encoding='utf-8') as f:
for k, v in category_to_id.items():
f.write(f'{v} {k}\n')
else:
print('Not Saving new class id txt, user should using '
f'{class_id_txt} for training config')


def main():
args = parse_args()
convert_labelme_to_coco(args.img_dir, args.labels_dir, args.out)
convert_labelme_to_coco(args.img_dir, args.labels_dir, args.out,
args.class_id_txt)
print('All done!')


Expand Down