forked from unicode-org/ml-confusables-generator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_preprocessing.py
235 lines (193 loc) · 8.55 KB
/
data_preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
# Copyright (C) 2020 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
import numpy as np
import os
import tensorflow as tf
import tensorflow_addons as tfa
def _get_label(file_path, one_hot, num_classes, class_names):
"""Given a path to label file. Expect label file to be in the format
generated by 'source/generate_source_file.py'.
Args:
file_path: Str, path to label file.
one_hot: Bool, if true, return labels in form of one-hot encoding.
num_classes: Int, number of classes in total.
class_names: List of Str, list of all class names.
Returns:
label (if using one-hot encoding): tf.Tensor, one hot encoding of
label. For example '[0,0,1,0,0]'.
OR
idx (if not using one-hot encoding): tf.Tensor, label index. For
example '2'.
"""
# Convert path to file name
file_name = tf.strings.split(file_path, os.path.sep)[-1]
# Derive label from file name
class_name = tf.strings.split(file_name, '_')[0]
idx = tf.reduce_min(tf.where(tf.equal(class_names, class_name)))
if one_hot:
label = tf.one_hot(idx, num_classes)
return label
else:
return idx
def _decode_img(img):
"""Decode image and convert to tf.Tensor.
Args:
img: tf.Tensor, image of type String.
Returns:
img: tf.Tensor, image of type float32.
"""
# Convert compressed string to a 3D uint8 tensor
img = tf.io.decode_png(img)
# Convert data type to float between 0 and 1
return tf.image.convert_image_dtype(img, tf.float32)
def process_path(file_path, one_hot, num_classes, class_names):
"""Process file path to produce image and label tensor.
Args:
file_path: Str, path to image file.
one_hot: Bool, if true, return labels in form of one-hot encoding.
num_classes: Int, number of classes in total.
class_names: List of Str, list of all class names.
Returns:
img: tf.Tensor, image tensor in type tf.float32.
label: tf.Tensor, label tensor represented by one-hot encoding.
"""
# Get label and image Tensor
label = _get_label(file_path, one_hot, num_classes, class_names)
img = tf.io.read_file(file_path)
img = _decode_img(img)
return img, label
def process_img_path(file_path):
"""Similar to process_path. Do not require label path! Uses file
name as label.
Args:
file_path: Str, path to image file.
Returns:
img: tf.Tensor, image tensor in type tf.float32.
label: tf.Tensor, label tensor represented by file name.
"""
label = tf.strings.split(file_path, os.path.sep)[-1]
img = tf.io.read_file(file_path)
img = _decode_img(img)
return img, label
def convert_format(img, label, grayscale_in, grayscale_out):
"""Convert image format from RGB to grayscale or grayscale to RGB.
Args:
img: tf.Tensor: image tensor in type tf.float32.
label: tf.Tensor: label tensor.
Returns:
img: tf.Tensor, image tensor in type tf.float32.
label: tf.Tensor, label tensor.
grayscale_in: Bool, format of the input image. If true, handled as
grayscale image. If false, handled as RGB image.
grayscale_out: bool, format of the output image. If true, train model
with grayscale image. If false, train with RGB image.
"""
# Convert between graycale and rgb
if grayscale_in and not grayscale_out:
# If input is grayscale and output is rgb
img = tf.image.grayscale_to_rgb(img) # use tensorflow function
elif not grayscale_in and grayscale_out:
# If input is rgb and output is grayscale
# img = tf.reduce_mean(img, axis=2)
img = tf.image.rgb_to_grayscale(img)
return img, label
def random_rotate(img, label, stddev):
"""Rotate image by x degree. Variable x follows normal distribution
with 'stddev' as standard deviation. Truncate x to be only within plus
minus 2 standard deviations (re-sample if not within range). Round x
according to bankers rounding.
Args:
img: tf.Tensor, image tensor.
label: tf.Tensor: label tensor.
stddev: Float, standard deviation of the normal distribution.
Returns:
img: tf.Tensor, image after rotation.
"""
# Follows normal distribution whose magnitude is more than 2
# standard deviations from the mean are dropped and re-picked.
degree = tf.random.truncated_normal(shape=[], stddev=stddev)
# Rounds half to even. Also known as bankers rounding.
degree = tf.math.round(degree)
return tfa.image.rotate(img, degree), label
def random_zoom(img, label, max_percent, stddev, img_height, img_width):
"""Crop to zoom in on the image by x percent. Largest value of x is
restricted by 'max_percent'. x follows normal distribution with 'stddev'
as standard deviation. Truncate x to be only within plus minus 2
standard deviations (re-sample if not within range). Round x according
to bankers rounding and get absolute value.
Args:
img: tf.Tensor, image tensor.
label: tf.Tensor: label tensor.
max_percent: Float, the maximum percent to zoom in. For example, if
max_percent = 4.0, at most, zoom in at 96%.
stddev: Float, standard deviation of the normal distribution.
img_height: Int, height of the original image tensor.
img_width: Int, width of the original image tensor.
"""
# Generate n crop settings, ranging from a 0% to n% crop.
scales = list(np.arange((100 - max_percent) / 100, 1.0, 0.01))
# Reverse crop settings to make sure most of the image are unchanged.
scales.reverse()
boxes = np.zeros((len(scales), 4))
for i, scale in enumerate(scales):
x1 = y1 = 0.5 - (0.5 * scale)
x2 = y2 = 0.5 + (0.5 * scale)
boxes[i] = [x1, y1, x2, y2]
# Get n cropped images
crops = tf.image.crop_and_resize([img], boxes=boxes,
box_indices=np.zeros(len(scales)),
crop_size=(img_height, img_width))
# I am personally shamed of this implementation here
# TODO: Change distribution here
# TODO: Add fault proof
idx = tf.random.truncated_normal(shape=[], stddev=stddev)
idx = tf.math.abs(idx) # idx >= 0
idx = tf.math.round(idx) # Bankers rounding
idx = tf.cast(idx, tf.dtypes.int32)
img = crops[idx]
return img, label
def augment(img, label, random_rotate, rotate_stddev, random_zoom,
zoom_percent, zoom_stddev, height, width):
"""Data augmentation. Two augmentation method are carried out:
1. Random rotate: randomly rotate image by x degree. x follows
normal distribution with 0 as mean as described in config file.
2. Random zoom: crop (zoom in) on the image by x percent. x follows
normal distribution with 0 as mean as described in config file.
Args:
img: tf.Tensor, image tensor in type tf.float32.
label: tf.Tensor, label tensor.
random_rotate: Bool, if true, randomly rotate image. Image dimension
won't change.
rotate_stddev: Float, standard deviation of rotation angle in degree,
which follows normal distribution.
random_zoom: Bool, if true, randomly zoom in on the image. Image
dimension won't change.
zoom_percent: Float, the maximum percentage to zoom in on the image.
zoom_stddev: Float, standard deviation of zoom-in percentage, which
follows half-normal distribution.
height: Int, height of the image.
width: Int, width of the image.
Returns:
img: tf.Tensor: image tensor in type tf.float32.
label: tf.Tensor: label tensor.
"""
# Randomly rotate by -2 to +2 standard deviations.
if random_rotate:
img = random_rotate(img, label, rotate_stddev)
# Randomly zoom in on image by maximum zoom_percent.
if random_zoom:
img = random_zoom(img, label, zoom_percent, zoom_stddev, height, width)
return img, label
def resize(img, label, height, width):
"""Resize image for compatibility with Keras model.
TODO: Add custom CNN models to avoid resizing
Args:
img: tf.Tensor: image tensor in type tf.float32.
label: tf.Tensor: label tensor.
height: Int, intended output height of the image.
width: Int, intended output width of the image.
Returns:
img: tf.Tensor: image tensor in type tf.float32.
label: tf.Tensor: label tensor.
"""
return tf.image.resize(img, (height, width)), label