Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Add dataset analysis script #172

Merged
merged 32 commits into from
Nov 3, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
d23dea6
messages
Zheng-LinXiao Oct 17, 2022
27a815d
again
Zheng-LinXiao Oct 19, 2022
8057135
again1
Zheng-LinXiao Oct 19, 2022
9bd865f
again_2
Zheng-LinXiao Oct 19, 2022
69aaae3
again_3
Zheng-LinXiao Oct 20, 2022
d0525c1
again_4
Zheng-LinXiao Oct 20, 2022
5b52274
again_5
Zheng-LinXiao Oct 20, 2022
cd959b7
Update docs/zh_cn/user_guides/useful_tools.md
Zheng-LinXiao Oct 21, 2022
ffdbdf6
Update docs/zh_cn/user_guides/useful_tools.md
Zheng-LinXiao Oct 21, 2022
8513e81
Update docs/en/user_guides/useful_tools.md
Zheng-LinXiao Oct 21, 2022
cdb15cb
Update docs/en/user_guides/useful_tools.md
Zheng-LinXiao Oct 21, 2022
50f48ba
Update docs/en/user_guides/useful_tools.md
Zheng-LinXiao Oct 21, 2022
0f85995
Update docs/zh_cn/user_guides/useful_tools.md
Zheng-LinXiao Oct 21, 2022
32e4452
Update tools/analysis_tools/dataset_analysis.py
Zheng-LinXiao Oct 21, 2022
98e55b0
Update tools/analysis_tools/dataset_analysis.py
Zheng-LinXiao Oct 21, 2022
816ca18
Update tools/analysis_tools/dataset_analysis.py
Zheng-LinXiao Oct 21, 2022
654b454
Update tools/analysis_tools/dataset_analysis.py
Zheng-LinXiao Oct 21, 2022
2b0264a
Merge branch 'dev' into branchname
PeterH0323 Oct 21, 2022
70069c4
modify code
Zheng-LinXiao Oct 22, 2022
a7d676d
Update docs/en/user_guides/useful_tools.md
Zheng-LinXiao Oct 23, 2022
8899ac2
Modify document
Zheng-LinXiao Oct 24, 2022
3dc4622
Modify document
Zheng-LinXiao Oct 24, 2022
4d5765a
Merge branch 'branchname' of github.com:Zheng-LinXiao/mmyolo into bra…
Zheng-LinXiao Oct 24, 2022
87e7a2c
new code
Zheng-LinXiao Oct 31, 2022
23910d4
revise decuments and codes
Zheng-LinXiao Oct 31, 2022
d9faedb
Revise datails
Zheng-LinXiao Nov 1, 2022
b7c4271
Update tools/analysis_tools/dataset_analysis.py
Zheng-LinXiao Nov 1, 2022
5920ff0
modify func name
Zheng-LinXiao Nov 1, 2022
78da584
code
Zheng-LinXiao Nov 2, 2022
fafb44d
Documentation and code
Zheng-LinXiao Nov 2, 2022
4dfd80c
modify error meaasge
Zheng-LinXiao Nov 3, 2022
7780e4d
deleted height,
Zheng-LinXiao Nov 3, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions tools/analysis_tools/browse_dataset_distribution_function1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os.path as osp

import matplotlib.pyplot as plt
from mmengine.config import Config
from pycocotools.coco import COCO


def parse_args():
parser = argparse.ArgumentParser(
description='Distribution of categories and bbox instances')
parser.add_argument('config', help='config file path')
parser.add_argument('--ann-file', default='train', help='dataset ann-file')
parser.add_argument(
'--out-dir',
default='./data',
type=str,
help='If there is no display interface, you can save it')
args = parser.parse_args()
return args


def main():
args = parse_args()
cfg = Config.fromfile(args.config)

data_root = cfg.data_root
dataset_type = cfg.dataset_type
if args.ann_file == 'train':
ann_file = cfg.train_dataloader.dataset.ann_file
elif args.ann_file == 'val':
ann_file = cfg.val_dataloader.dataset.ann_file

labels = []
Num = []
hhaAndroid marked this conversation as resolved.
Show resolved Hide resolved
coco = COCO(osp.join(data_root, ann_file))
categories = coco.loadCats(coco.getCatIds())
category_names = [category['name'] for category in categories]

for category_name in category_names:
category = coco.getCatIds(catNms=category_name)
annId = coco.getAnnIds(catIds=category)
hhaAndroid marked this conversation as resolved.
Show resolved Hide resolved
labels.append(category_name)
Num.append(len(annId))
hhaAndroid marked this conversation as resolved.
Show resolved Hide resolved

fig = plt.figure()
plt.bar(labels, Num, align='center')
hhaAndroid marked this conversation as resolved.
Show resolved Hide resolved
plt.xticks(rotation=70)
# X,Y name and title
plt.xlabel('Category')
plt.ylabel('Number')
plt.title(dataset_type)
out = args.out_dir
fig.set_size_inches(35, 18)
fig.savefig(f'{out}/{dataset_type}.jpg') # Save Image
hhaAndroid marked this conversation as resolved.
Show resolved Hide resolved
# plt.show() # Show Image


if __name__ == '__main__':
main()
66 changes: 66 additions & 0 deletions tools/analysis_tools/browse_dataset_distribution_function2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os.path as osp

import matplotlib.pyplot as plt
from mmengine.config import Config
from pycocotools.coco import COCO


def parse_args():
parser = argparse.ArgumentParser(
description='Width and height distribution of category and bbox')
parser.add_argument('config', help='config file path')
parser.add_argument('--ann-file', default='train', help='dataset ann-file')
parser.add_argument(
'--out-dir',
default='./data',
type=str,
help='If there is no display interface, you can save it')
args = parser.parse_args()
return args


def main():
args = parse_args()
cfg = Config.fromfile(args.config)

data_root = cfg.data_root
if args.ann_file == 'train':
ann_file = cfg.train_dataloader.dataset.ann_file
elif args.ann_file == 'val':
ann_file = cfg.val_dataloader.dataset.ann_file

labels = []
ann_bbox_w = []
ann_bbow_h = []
coco = COCO(osp.join(data_root, ann_file))
categories = coco.loadCats(coco.getCatIds())
category_names = [category['name'] for category in categories]

for category_name in category_names:
category = coco.getCatIds(catNms=category_name)
annId = coco.getAnnIds(catIds=category)
labels.append(category_name)
annotations = coco.loadAnns(annId)
anns = [ann['bbox'] for ann in annotations]
for i in range(len(anns)):
ann = anns[i]
bbox_w = ann[2]
bbow_h = ann[3]
ann_bbox_w.append(bbox_w)
ann_bbow_h.append(bbow_h)

fig = plt.figure()
plt.scatter(ann_bbox_w, ann_bbow_h)
plt.xlabel('Width of bbox')
plt.ylabel('High of bbox')
plt.title(f'Current Display Category:{category_name}')
out = args.out_dir
fig.set_size_inches(35, 18)
plt.savefig(f'{out}/{category_name}.jpg') # Save Image
# plt.show() # Show Image


if __name__ == '__main__':
main()
81 changes: 81 additions & 0 deletions tools/analysis_tools/browse_dataset_distribution_function3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os.path as osp

import matplotlib.pyplot as plt
from mmengine.config import Config
from pycocotools.coco import COCO


def parse_args():
parser = argparse.ArgumentParser(
description='Proportional distribution of bbox width and height')
parser.add_argument('config', help='config file path')
parser.add_argument('--ann-file', default='train', help='dataset ann-file')
parser.add_argument(
'--out-dir',
default='./data',
type=str,
help='If there is no display interface, you can save it')
args = parser.parse_args()
return args


def main():
args = parse_args()
cfg = Config.fromfile(args.config)

data_root = cfg.data_root
if args.ann_file == 'train':
ann_file = cfg.train_dataloader.dataset.ann_file
elif args.ann_file == 'val':
ann_file = cfg.val_dataloader.dataset.ann_file

labels = []
ann_bbox_w = []
ann_bbow_h = []
ratio = []
count_list = list()
ratio_sum = []
ratio_list = []
coco = COCO(osp.join(data_root, ann_file))
categories = coco.loadCats(coco.getCatIds())
category_names = [category['name'] for category in categories]

for category_name in category_names:
category = coco.getCatIds(catNms=category_name)
annId = coco.getAnnIds(catIds=category)
labels.append(category_name)
annotations = coco.loadAnns(annId)
anns = [ann['bbox'] for ann in annotations]
for i in range(len(anns)):
ann = anns[i]
bbox_w = ann[2]
bbow_h = ann[3]
ann_bbox_w.append(bbox_w)
ann_bbow_h.append(bbow_h)
ratios = bbox_w / bbow_h
ratio.append(round(ratios, 2))
count_set = set(ratio)
for t in count_set:
count_list.append((t, ratio.count(t)))
for n in range(len(count_list)):
count = count_list[n]
ratio_sums = count[1]
ratio_lists = count[0]
ratio_sum.append(ratio_sums)
ratio_list.append(ratio_lists)

fig = plt.figure()
plt.scatter(ratio_list, ratio_sum)
plt.xlabel('The width to height ratio')
plt.ylabel('Quantity of same width to height ratio')
plt.title(category_name)
out = args.out_dir
fig.set_size_inches(35, 18)
plt.savefig(f'{out}/{category_name}.jpg') # Save Image
# plt.show() # Show Image


if __name__ == '__main__':
main()
86 changes: 86 additions & 0 deletions tools/analysis_tools/browse_dataset_distribution_function4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os.path as osp

import matplotlib.pyplot as plt
from mmengine.config import Config
from pycocotools.coco import COCO


def parse_args():
parser = argparse.ArgumentParser(
description='Classification and area distribution')
parser.add_argument('config', help='train config file path')
parser.add_argument('--ann-file', default='train', help='dataset ann-file')
parser.add_argument('--size', default=None, help='dataset type')
hhaAndroid marked this conversation as resolved.
Show resolved Hide resolved
parser.add_argument(
'--out-dir',
default='./data',
type=str,
help='If there is no display interface, you can save it')

args = parser.parse_args()
return args


def main():
args = parse_args()
cfg = Config.fromfile(args.config)

data_root = cfg.data_root
if args.ann_file == 'train':
ann_file = cfg.train_dataloader.dataset.ann_file
elif args.ann_file == 'val':
ann_file = cfg.val_dataloader.dataset.ann_file

labels = []
small = []
small_num = []
medium = []
medium_num = []
large = []
large_num = []
coco = COCO(osp.join(data_root, ann_file))
categories = coco.loadCats(coco.getCatIds())
category_names = [category['name'] for category in categories]

for category_name in category_names:
category = coco.getCatIds(catNms=category_name)
annId = coco.getAnnIds(catIds=category)
labels.append(category_name)
annotations = coco.loadAnns(annId)
areas = [area['area'] for area in annotations]
for i in range(len(areas)):
area = areas[i]
if area < 1024:
small.append(area)
elif 1024 < area < 9216:
medium.append(area)
else:
large.append(area)
small_num.append(len(small))
medium_num.append(len(medium))
large_num.append(len(large))
figure_name = args.size
fig = plt.figure()
if figure_name == 'small':
plt.bar(labels, small_num, align='center')
elif figure_name == 'medium':
plt.bar(labels, medium_num, align='center')
elif figure_name == 'large':
plt.bar(labels, large_num, align='center')
else:
print('Please enter the correct size, such as small, medium, large')
plt.xticks(rotation=70)
# X,Y name and title
plt.xlabel('Category')
plt.ylabel('Area')
plt.title(f'{figure_name}')
out = args.out_dir
fig.set_size_inches(35, 18)
fig.savefig(f'{out}/{figure_name}.jpg') # Save Image
# plt.show() # Show Image


if __name__ == '__main__':
main()