-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathssd_target_vector_builder.py
139 lines (129 loc) · 6.14 KB
/
ssd_target_vector_builder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import numpy as np
from utils import class_to_one_hot_vector
class SSDTargetVectorBuilder(object):
"""Class for producing a target vector (as required by the SSD
object detection model) for images given their bounding box
annotations.
Args:
default_boxes_generator (DefaultBoxesGenerator): Instance of the
`DefaultBoxesGenerator` class which provides default boxes in
the form of instances of the `BoundingBox` class.
class_to_index_map (dict): Dictionary for mapping class names
(including the background class) to integer identifiers.
background_class_name (string): The name of the "background"
class, that is the class that corresponds to no object.
iou_threshold (float, optional): Threshold on the IoU between a
default box and a ground truth box for determining whether
there's a match between the two or not. An IoU greater than or
equal to the threshold indicates a match. Defaults to 0.5.
label_smoothing_factor (float, optional): Factor used to smooth
classification labels. Must be between 0 and 1. Defaults to
0, which corresponds to not applying label smoothing.
"""
def __init__(
self,
default_boxes_generator,
class_to_index_map,
background_class_name,
iou_threshold=0.5,
label_smoothing_factor=0.0
):
self.default_boxes_generator = default_boxes_generator
self.class_to_index_map = class_to_index_map
self.background_class_name = background_class_name
self.iou_threshold = iou_threshold
self.label_smoothing_factor = label_smoothing_factor
def build_target_vector(self, annotation):
"""Builds a target vector for an image given its bounding box
annotations.
Args:
annotation (dict): Dictionary containing annotations for a
single image. The dictionary is expected to have a
specific structure, that is:
{
'width': The image's width,
'height': The image's height,
'objects': [
{
'class': The class label for this object,
'bounding_box': The bounding box for this
object. It must be an instance of the
`BoundingBox` class
},
...
]
}
Returns:
tuple(numpy.array, numpy.array): A pair containing the
target vector for classification and the target vector for
localization. Both vectors are 1D numpy arrays.
"""
target_vector_for_classification = []
target_vector_for_localization = []
for default_box in self.default_boxes_generator.generate_default_boxes(
annotation['height'],
annotation['width']
):
# Match the default box with at most 1 ground truth box
best_iou = 0
best_ground_truth_box = None
best_ground_truth_box_class = None
for obj in annotation['objects']:
# Compute the intersection over union between the
# default box and the ground truth box
ground_truth_box = obj['bounding_box']
iou = default_box.intersection_over_union(
ground_truth_box
)
if (iou >= self.iou_threshold
and iou > best_iou):
# Update the best match for this default box
best_iou = iou
best_ground_truth_box = ground_truth_box
best_ground_truth_box_class = obj['class']
if best_ground_truth_box is not None:
# Set the target class for the default box as that of
# the matched ground truth box
class_id = self.class_to_index_map[best_ground_truth_box_class]
target_vector_for_classification.extend(
class_to_one_hot_vector(
class_id=class_id,
n_classes=len(self.class_to_index_map),
smoothing_factor=self.label_smoothing_factor
)
)
# Set the offsets for the default box as the offsets
# from the matched ground truth box
target_vector_for_localization.extend(
best_ground_truth_box.deviation_relative_to(
default_box
)
)
else:
# Set the target class for the default box as the
# background class
class_id = self.class_to_index_map[self.background_class_name]
target_vector_for_classification.extend(
class_to_one_hot_vector(
class_id=class_id,
n_classes=len(self.class_to_index_map),
smoothing_factor=self.label_smoothing_factor
)
)
# This default box didn't match any ground truth box,
# meaning that it is a "negative" box. Negative boxes do
# not contribute to the loss, so technically it doesn't
# matter how we set their target offsets. At the same
# time, however, one must be able to identify the
# negative boxes in order to factor them out while
# computing the loss. The trick here is to use a special
# value (`np.inf` in this case) for all 4 target offsets
# so that during the computation of the loss we can
# identify negative boxes by checking which values of
# `target_vector_for_localization` equal that special
# value
target_vector_for_localization.extend((np.inf,) * 4)
return (
np.array(target_vector_for_classification),
np.array(target_vector_for_localization)
)