-
Notifications
You must be signed in to change notification settings - Fork 0
/
data.py
58 lines (54 loc) · 1.97 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import sys
sys.path.insert(0,'../incubator-mxnet/python')
from mxnet.test_utils import download
import os.path as osp
def verified(file_path, sha1hash):
import hashlib
sha1 = hashlib.sha1()
with open(file_path, 'rb') as f:
while True:
data = f.read(1048576)
if not data:
break
sha1.update(data)
matched = sha1.hexdigest() == sha1hash
if not matched:
print('Found hash mismatch in file {}, possibly due to incomplete download.'.format(file_path))
return matched
url_format = 'https://apache-mxnet.s3-accelerate.amazonaws.com/gluon/dataset/pikachu/{}'
hashes = {'train.rec': 'e6bcb6ffba1ac04ff8a9b1115e650af56ee969c8',
'train.idx': 'dcf7318b2602c06428b9988470c731621716c393',
'val.rec': 'd6c33f799b4d058e82f2cb5bd9a976f69d72d520'}
for k, v in hashes.items():
fname = 'pikachu_' + k
target = osp.join('data', fname)
url = url_format.format(k)
if not osp.exists(target) or not verified(target, v):
print('Downloading', target, url)
download(url, fname=fname, dirname='data', overwrite=True)
import mxnet.image as image
data_shape = 256
batch_size = 32
def get_iterators(data_shape, batch_size):
class_names = ['pikachu']
num_class = len(class_names)
train_iter = image.ImageDetIter(
batch_size=batch_size,
data_shape=(3, data_shape, data_shape),
path_imgrec='./data/pikachu_train.rec',
path_imgidx='./data/pikachu_train.idx',
shuffle=True,
mean=True,
rand_crop=1,
min_object_covered=0.95,
max_attempts=200)
val_iter = image.ImageDetIter(
batch_size=batch_size,
data_shape=(3, data_shape, data_shape),
path_imgrec='./data/pikachu_val.rec',
shuffle=False,
mean=True)
return train_iter, val_iter, class_names, num_class
train_data, test_data, class_names, num_class = get_iterators(data_shape, batch_size)
batch = train_data.next()
# print(batch)