From a25c6b21593a77d790fdefe775ad1d3db82c4a0a Mon Sep 17 00:00:00 2001 From: Karan Jariwala Date: Wed, 27 Mar 2019 15:49:43 -0700 Subject: [PATCH 1/4] Added RNN integration test for fit() API --- ci/docker/runtime_functions.sh | 14 + tests/nightly/JenkinsfileForBinaries | 16 ++ tests/nightly/estimator/test_sentiment_rnn.py | 272 ++++++++++++++++++ 3 files changed, 302 insertions(+) create mode 100644 tests/nightly/estimator/test_sentiment_rnn.py diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index de1b7795ce69..128ae2babc35 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -1296,6 +1296,20 @@ nightly_scala_demo_test_cpu() { bash bin/run_im.sh } +nightly_estimator_rnn_gpu() { + set -ex + cd /work/mxnet/tests/nightly/estimator + export PYTHONPATH=/work/mxnet/python/ + python test_sentiment_rnn.py --type gpu +} + +nightly_estimator_rnn_cpu() { + set -ex + cd /work/mxnet/tests/nightly/estimator + export PYTHONPATH=/work/mxnet/python/ + python test_sentiment_rnn.py --type cpu +} + # Deploy deploy_docs() { diff --git a/tests/nightly/JenkinsfileForBinaries b/tests/nightly/JenkinsfileForBinaries index 53e1c30e188f..c18f48a5a272 100755 --- a/tests/nightly/JenkinsfileForBinaries +++ b/tests/nightly/JenkinsfileForBinaries @@ -106,6 +106,22 @@ core_logic: { utils.docker_run('ubuntu_nightly_gpu', 'nightly_tutorial_test_ubuntu_python3_gpu', true, '1500m') } } + }, + 'estimator: RNN GPU': { + node(NODE_LINUX_GPU) { + ws('workspace/estimator-test-rnn-gpu') { + utils.unpack_and_init('gpu', mx_lib) + utils.docker_run('ubuntu_nightly_gpu', 'nightly_estimator_test_rnn_gpu', true) + } + } + }, + 'estimator: RNN CPU': { + node(NODE_LINUX_CPU) { + ws('workspace/estimator-test-rnn-cpu') { + utils.unpack_and_init('cpu', mx_lib) + utils.docker_run('ubuntu_nightly_cpu', 'nightly_estimator_test_rnn_cpu', true) + } + } } } } diff --git a/tests/nightly/estimator/test_sentiment_rnn.py b/tests/nightly/estimator/test_sentiment_rnn.py new file mode 100644 index 000000000000..f0e72532b86f --- /dev/null +++ b/tests/nightly/estimator/test_sentiment_rnn.py @@ -0,0 +1,272 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Gluon Text Sentiment Classification Example using RNN/CNN +Example modified from below link: +https://github.com/d2l-ai/d2l-en/blob/master/chapter_natural-language-processing/sentiment-analysis-rnn.md +https://github.com/d2l-ai/d2l-en/blob/master/chapter_natural-language-processing/sentiment-analysis-cnn.md""" + +import argparse +import os +import tarfile +import random +import collections +import mxnet as mx +from mxnet import nd +from mxnet.contrib import text +from mxnet.gluon import data as gdata, loss as gloss, utils as gutils, nn, rnn +from mxnet.gluon.estimator import estimator as est + + +class TextCNN(nn.Block): + def __init__(self, vocab, embed_size, kernel_sizes, num_channels, + **kwargs): + super(TextCNN, self).__init__(**kwargs) + self.embedding = nn.Embedding(len(vocab), embed_size) + # The embedding layer does not participate in training + self.constant_embedding = nn.Embedding(len(vocab), embed_size) + self.dropout = nn.Dropout(0.5) + self.decoder = nn.Dense(2) + # The max-over-time pooling layer has no weight, so it can share an + # instance + self.pool = nn.GlobalMaxPool1D() + # Create multiple one-dimensional convolutional layers + self.convs = nn.Sequential() + for c, k in zip(num_channels, kernel_sizes): + self.convs.add(nn.Conv1D(c, k, activation='relu')) + + def forward(self, inputs): + # Concatenate the output of two embedding layers with shape of + # (batch size, number of words, word vector dimension) by word vector + embeddings = nd.concat( + self.embedding(inputs), self.constant_embedding(inputs), dim=2) + # According to the input format required by Conv1D, the word vector + # dimension, that is, the channel dimension of the one-dimensional + # convolutional layer, is transformed into the previous dimension + embeddings = embeddings.transpose((0, 2, 1)) + # For each one-dimensional convolutional layer, after max-over-time + # pooling, an NDArray with the shape of (batch size, channel size, 1) + # can be obtained. Use the flatten function to remove the last + # dimension and then concatenate on the channel dimension + encoding = nd.concat(*[nd.flatten( + self.pool(conv(embeddings))) for conv in self.convs], dim=1) + # After applying the dropout method, use a fully connected layer to + # obtain the output + outputs = self.decoder(self.dropout(encoding)) + return outputs + + +class BiRNN(nn.Block): + def __init__(self, vocab, embed_size, num_hiddens, num_layers, **kwargs): + super(BiRNN, self).__init__(**kwargs) + self.embedding = nn.Embedding(len(vocab), embed_size) + # Set Bidirectional to True to get a bidirectional recurrent neural + # network + self.encoder = rnn.LSTM(num_hiddens, num_layers=num_layers, + bidirectional=True, input_size=embed_size) + self.decoder = nn.Dense(2) + + def forward(self, inputs): + # The shape of inputs is (batch size, number of words). Because LSTM + # needs to use sequence as the first dimension, the input is + # transformed and the word feature is then extracted. The output shape + # is (number of words, batch size, word vector dimension). + embeddings = self.embedding(inputs.T) + # The shape of states is (number of words, batch size, 2 * number of + # hidden units). + states = self.encoder(embeddings) + # Concatenate the hidden states of the initial time step and final + # time step to use as the input of the fully connected layer. Its + # shape is (batch size, 4 * number of hidden units) + encoding = nd.concat(states[0], states[-1]) + outputs = self.decoder(encoding) + return outputs + + +def download_imdb(data_dir='./data'): + ''' + Download and extract the IMDB dataset + ''' + url = ('http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz') + sha1 = '01ada507287d82875905620988597833ad4e0903' + if not os.path.exists(data_dir): + os.makedirs(data_dir) + file_path = os.path.join(data_dir, 'aclImdb_v1.tar.gz') + if not os.path.isfile(file_path): + file_path = gutils.download(url, data_dir, sha1_hash=sha1) + with tarfile.open(file_path, 'r') as f: + f.extractall(data_dir) + + +def read_imdb(folder='train'): + ''' + Read the IMDB dataset + ''' + data = [] + for label in ['pos', 'neg']: + folder_name = os.path.join('./data/aclImdb/', folder, label) + for file in os.listdir(folder_name): + with open(os.path.join(folder_name, file), 'rb') as f: + review = f.read().decode('utf-8').replace('\n', '').lower() + data.append([review, 1 if label == 'pos' else 0]) + random.shuffle(data) + return data + + +def get_tokenized_imdb(data): + ''' + Tokenized the words + ''' + + def tokenizer(text): + return [tok.lower() for tok in text.split(' ')] + + return [tokenizer(review) for review, _ in data] + + +def get_vocab_imdb(data): + ''' + Get the indexed tokens + ''' + tokenized_data = get_tokenized_imdb(data) + counter = collections.Counter([tk for st in tokenized_data for tk in st]) + return text.vocab.Vocabulary(counter, min_freq=5) + + +def preprocess_imdb(data, vocab): + ''' + Make the length of each comment 500 by truncating or adding 0s + ''' + max_l = 500 + + def pad(x): + return x[:max_l] if len(x) > max_l else x + [0] * (max_l - len(x)) + + tokenized_data = get_tokenized_imdb(data) + features = nd.array([pad(vocab.to_indices(x)) for x in tokenized_data]) + labels = nd.array([score for _, score in data]) + return features, labels + + +def test_estimator_cpu(): + ''' + Test estimator by doing one pass over each model with synthetic data + ''' + models = ['TextCNN', 'BiRNN'] + context = mx.cpu() + batch_size = 64 + num_epochs = 1 + lr = 0.01 + embed_size = 100 + + train_data = mx.nd.random.randint(low=0, high=100, shape=(2 * batch_size, 500)) + train_label = mx.nd.random.randint(low=0, high=2, shape=(2 * batch_size,)) + val_data = mx.nd.random.randint(low=0, high=100, shape=(batch_size, 500)) + val_label = mx.nd.random.randint(low=0, high=2, shape=(batch_size,)) + + train_dataloader = gdata.DataLoader(dataset=gdata.ArrayDataset(train_data, train_label), + batch_size=batch_size, shuffle=True) + val_dataloader = gdata.DataLoader(dataset=gdata.ArrayDataset(val_data, val_label), + batch_size=batch_size) + vocab_list = mx.nd.zeros(shape=(100,)) + + # Get the model + for model in models: + if model == 'TextCNN': + kernel_sizes, nums_channels = [3, 4, 5], [100, 100, 100] + net = TextCNN(vocab_list, embed_size, kernel_sizes, nums_channels) + else: + num_hiddens, num_layers = 100, 2 + net = BiRNN(vocab_list, embed_size, num_hiddens, num_layers) + net.initialize(mx.init.Xavier(), ctx=context) + # Define trainer + trainer = mx.gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': lr}) + # Define loss and evaluation metrics + loss = gloss.SoftmaxCrossEntropyLoss() + acc = mx.metric.Accuracy() + + # Define estimator + e = est.Estimator(net=net, loss=loss, metrics=acc, + trainers=trainer, context=context) + # Begin training + e.fit(train_data=train_dataloader, val_data=val_dataloader, + epochs=num_epochs, batch_size=batch_size) + + +def test_estimator_gpu(): + ''' + Test estimator by training Bidirectional RNN for 5 epochs on the IMDB dataset + and verify accuracy + ''' + batch_size = 64 + num_epochs = 5 + lr = 0.01 + embed_size = 100 + + # Set context + if mx.context.num_gpus() > 0: + ctx = mx.gpu(0) + else: + ctx = mx.cpu() + + # data + download_imdb() + train_data, test_data = read_imdb('train'), read_imdb('test') + vocab = get_vocab_imdb(train_data) + + train_set = gdata.ArrayDataset(*preprocess_imdb(train_data, vocab)) + test_set = gdata.ArrayDataset(*preprocess_imdb(test_data, vocab)) + train_dataloader = gdata.DataLoader(train_set, batch_size, shuffle=True) + test_dataloader = gdata.DataLoader(test_set, batch_size) + + # Model + num_hiddens, num_layers = 100, 2 + net = BiRNN(vocab, embed_size, num_hiddens, num_layers) + net.initialize(mx.init.Xavier(), ctx=ctx) + + glove_embedding = text.embedding.create( + 'glove', pretrained_file_name='glove.6B.100d.txt', vocabulary=vocab) + + net.embedding.weight.set_data(glove_embedding.idx_to_vec) + net.embedding.collect_params().setattr('grad_req', 'null') + + # Define Trainer + trainer = mx.gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': lr}) + # Define loss and evaluation metrics + loss = gloss.SoftmaxCrossEntropyLoss() + acc = mx.metric.Accuracy() + + # Define estimator + e = est.Estimator(net=net, loss=loss, metrics=acc, + trainers=trainer, context=ctx) + # Begin training + e.fit(train_data=train_dataloader, val_data=test_dataloader, + epochs=num_epochs) + + assert e.train_stats['train_' + acc.name][num_epochs - 1] > 0.70 + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='test gluon estimator') + parser.add_argument('--type', type=str, default='cpu') + opt = parser.parse_args() + if opt.type == 'cpu': + test_estimator_cpu() + elif opt.type == 'gpu': + test_estimator_gpu() + else: + raise RuntimeError("Unknown test type") From ae81b08fd94686d0501fe4e3c3762f3fef154db5 Mon Sep 17 00:00:00 2001 From: Karan Jariwala Date: Mon, 1 Apr 2019 16:12:01 -0700 Subject: [PATCH 2/4] Addressed review comments: change in JenkinFile, tmp directory, ctx with condense if/else, renamed imports --- tests/nightly/Jenkinsfile | 16 ++++++ tests/nightly/JenkinsfileForBinaries | 16 ------ tests/nightly/estimator/test_sentiment_rnn.py | 50 +++++++++---------- 3 files changed, 39 insertions(+), 43 deletions(-) diff --git a/tests/nightly/Jenkinsfile b/tests/nightly/Jenkinsfile index 758c86476f75..f76503c62daf 100755 --- a/tests/nightly/Jenkinsfile +++ b/tests/nightly/Jenkinsfile @@ -136,6 +136,22 @@ core_logic: { utils.docker_run('ubuntu_nightly_cpu', 'nightly_test_javascript', false) } } + }, + 'estimator: RNN GPU': { + node(NODE_LINUX_GPU) { + ws('workspace/estimator-test-rnn-gpu') { + utils.unpack_and_init('gpu', mx_lib) + utils.docker_run('ubuntu_nightly_gpu', 'nightly_estimator_test_rnn_gpu', true) + } + } + }, + 'estimator: RNN CPU': { + node(NODE_LINUX_CPU) { + ws('workspace/estimator-test-rnn-cpu') { + utils.unpack_and_init('cpu', mx_lib) + utils.docker_run('ubuntu_nightly_cpu', 'nightly_estimator_test_rnn_cpu', true) + } + } } } } diff --git a/tests/nightly/JenkinsfileForBinaries b/tests/nightly/JenkinsfileForBinaries index c18f48a5a272..53e1c30e188f 100755 --- a/tests/nightly/JenkinsfileForBinaries +++ b/tests/nightly/JenkinsfileForBinaries @@ -106,22 +106,6 @@ core_logic: { utils.docker_run('ubuntu_nightly_gpu', 'nightly_tutorial_test_ubuntu_python3_gpu', true, '1500m') } } - }, - 'estimator: RNN GPU': { - node(NODE_LINUX_GPU) { - ws('workspace/estimator-test-rnn-gpu') { - utils.unpack_and_init('gpu', mx_lib) - utils.docker_run('ubuntu_nightly_gpu', 'nightly_estimator_test_rnn_gpu', true) - } - } - }, - 'estimator: RNN CPU': { - node(NODE_LINUX_CPU) { - ws('workspace/estimator-test-rnn-cpu') { - utils.unpack_and_init('cpu', mx_lib) - utils.docker_run('ubuntu_nightly_cpu', 'nightly_estimator_test_rnn_cpu', true) - } - } } } } diff --git a/tests/nightly/estimator/test_sentiment_rnn.py b/tests/nightly/estimator/test_sentiment_rnn.py index f0e72532b86f..371adb5d1eeb 100644 --- a/tests/nightly/estimator/test_sentiment_rnn.py +++ b/tests/nightly/estimator/test_sentiment_rnn.py @@ -26,9 +26,9 @@ import random import collections import mxnet as mx -from mxnet import nd +from mxnet import nd, gluon from mxnet.contrib import text -from mxnet.gluon import data as gdata, loss as gloss, utils as gutils, nn, rnn +from mxnet.gluon import nn, rnn from mxnet.gluon.estimator import estimator as est @@ -97,7 +97,7 @@ def forward(self, inputs): return outputs -def download_imdb(data_dir='./data'): +def download_imdb(data_dir='/tmp/data'): ''' Download and extract the IMDB dataset ''' @@ -107,7 +107,7 @@ def download_imdb(data_dir='./data'): os.makedirs(data_dir) file_path = os.path.join(data_dir, 'aclImdb_v1.tar.gz') if not os.path.isfile(file_path): - file_path = gutils.download(url, data_dir, sha1_hash=sha1) + file_path = gluon.utils.download(url, data_dir, sha1_hash=sha1) with tarfile.open(file_path, 'r') as f: f.extractall(data_dir) @@ -118,7 +118,7 @@ def read_imdb(folder='train'): ''' data = [] for label in ['pos', 'neg']: - folder_name = os.path.join('./data/aclImdb/', folder, label) + folder_name = os.path.join('/tmp/data/aclImdb/', folder, label) for file in os.listdir(folder_name): with open(os.path.join(folder_name, file), 'rb') as f: review = f.read().decode('utf-8').replace('\n', '').lower() @@ -178,9 +178,9 @@ def test_estimator_cpu(): val_data = mx.nd.random.randint(low=0, high=100, shape=(batch_size, 500)) val_label = mx.nd.random.randint(low=0, high=2, shape=(batch_size,)) - train_dataloader = gdata.DataLoader(dataset=gdata.ArrayDataset(train_data, train_label), + train_dataloader = gluon.data.DataLoader(dataset=gluon.data.ArrayDataset(train_data, train_label), batch_size=batch_size, shuffle=True) - val_dataloader = gdata.DataLoader(dataset=gdata.ArrayDataset(val_data, val_label), + val_dataloader = gluon.data.DataLoader(dataset=gluon.data.ArrayDataset(val_data, val_label), batch_size=batch_size) vocab_list = mx.nd.zeros(shape=(100,)) @@ -196,7 +196,7 @@ def test_estimator_cpu(): # Define trainer trainer = mx.gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': lr}) # Define loss and evaluation metrics - loss = gloss.SoftmaxCrossEntropyLoss() + loss = gluon.loss.SoftmaxCrossEntropyLoss() acc = mx.metric.Accuracy() # Define estimator @@ -218,20 +218,17 @@ def test_estimator_gpu(): embed_size = 100 # Set context - if mx.context.num_gpus() > 0: - ctx = mx.gpu(0) - else: - ctx = mx.cpu() + ctx = mx.gpu() if mx.context.num_gpus() > 0 else mx.cpu() # data download_imdb() train_data, test_data = read_imdb('train'), read_imdb('test') vocab = get_vocab_imdb(train_data) - train_set = gdata.ArrayDataset(*preprocess_imdb(train_data, vocab)) - test_set = gdata.ArrayDataset(*preprocess_imdb(test_data, vocab)) - train_dataloader = gdata.DataLoader(train_set, batch_size, shuffle=True) - test_dataloader = gdata.DataLoader(test_set, batch_size) + train_set = gluon.data.ArrayDataset(*preprocess_imdb(train_data, vocab)) + test_set = gluon.data.ArrayDataset(*preprocess_imdb(test_data, vocab)) + train_dataloader = gluon.data.DataLoader(train_set, batch_size, shuffle=True) + test_dataloader = gluon.data.DataLoader(test_set, batch_size) # Model num_hiddens, num_layers = 100, 2 @@ -247,7 +244,7 @@ def test_estimator_gpu(): # Define Trainer trainer = mx.gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': lr}) # Define loss and evaluation metrics - loss = gloss.SoftmaxCrossEntropyLoss() + loss = gluon.loss.SoftmaxCrossEntropyLoss() acc = mx.metric.Accuracy() # Define estimator @@ -260,13 +257,12 @@ def test_estimator_gpu(): assert e.train_stats['train_' + acc.name][num_epochs - 1] > 0.70 -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='test gluon estimator') - parser.add_argument('--type', type=str, default='cpu') - opt = parser.parse_args() - if opt.type == 'cpu': - test_estimator_cpu() - elif opt.type == 'gpu': - test_estimator_gpu() - else: - raise RuntimeError("Unknown test type") +parser = argparse.ArgumentParser(description='test gluon estimator') +parser.add_argument('--type', type=str, default='cpu') +opt = parser.parse_args() +if opt.type == 'cpu': + test_estimator_cpu() +elif opt.type == 'gpu': + test_estimator_gpu() +else: + raise RuntimeError("Unknown test type") From a7dfeb3afd6abf9a1754f0c22d5292180c346064 Mon Sep 17 00:00:00 2001 From: Karan Jariwala Date: Mon, 1 Apr 2019 16:20:04 -0700 Subject: [PATCH 3/4] CPU test doesn't require nvidiadocker container --- tests/nightly/Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/nightly/Jenkinsfile b/tests/nightly/Jenkinsfile index f76503c62daf..a65da2d0b87e 100755 --- a/tests/nightly/Jenkinsfile +++ b/tests/nightly/Jenkinsfile @@ -149,7 +149,7 @@ core_logic: { node(NODE_LINUX_CPU) { ws('workspace/estimator-test-rnn-cpu') { utils.unpack_and_init('cpu', mx_lib) - utils.docker_run('ubuntu_nightly_cpu', 'nightly_estimator_test_rnn_cpu', true) + utils.docker_run('ubuntu_nightly_cpu', 'nightly_estimator_test_rnn_cpu', false) } } } From 60ec9b8c09b0466f7c4ea4379b23b9fa788ffb3d Mon Sep 17 00:00:00 2001 From: Karan Jariwala Date: Tue, 2 Apr 2019 12:32:40 -0700 Subject: [PATCH 4/4] Modified the structure by removing the redundant code --- tests/nightly/estimator/test_sentiment_rnn.py | 102 ++++++++++-------- 1 file changed, 55 insertions(+), 47 deletions(-) diff --git a/tests/nightly/estimator/test_sentiment_rnn.py b/tests/nightly/estimator/test_sentiment_rnn.py index 371adb5d1eeb..7e42831786ce 100644 --- a/tests/nightly/estimator/test_sentiment_rnn.py +++ b/tests/nightly/estimator/test_sentiment_rnn.py @@ -29,7 +29,7 @@ from mxnet import nd, gluon from mxnet.contrib import text from mxnet.gluon import nn, rnn -from mxnet.gluon.estimator import estimator as est +from mxnet.gluon.estimator import estimator class TextCNN(nn.Block): @@ -162,16 +162,38 @@ def pad(x): return features, labels -def test_estimator_cpu(): +def run(net, train_dataloader, test_dataloader, **kwargs): + ''' + Train a test sentiment model + ''' + num_epochs = kwargs['epochs'] + ctx = kwargs['ctx'] + batch_size = kwargs['batch_size'] + lr = kwargs['lr'] + + # Define trainer + trainer = mx.gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': lr}) + # Define loss and evaluation metrics + loss = gluon.loss.SoftmaxCrossEntropyLoss() + acc = mx.metric.Accuracy() + + # Define estimator + est = estimator.Estimator(net=net, loss=loss, metrics=acc, + trainers=trainer, context=ctx) + # Begin training + est.fit(train_data=train_dataloader, val_data=test_dataloader, + epochs=num_epochs, batch_size=batch_size) + return est + + +def test_estimator_cpu(**kwargs): ''' Test estimator by doing one pass over each model with synthetic data ''' models = ['TextCNN', 'BiRNN'] - context = mx.cpu() - batch_size = 64 - num_epochs = 1 - lr = 0.01 - embed_size = 100 + ctx = kwargs['ctx'] + batch_size = kwargs['batch_size'] + embed_size = kwargs['embed_size'] train_data = mx.nd.random.randint(low=0, high=100, shape=(2 * batch_size, 500)) train_label = mx.nd.random.randint(low=0, high=2, shape=(2 * batch_size,)) @@ -179,9 +201,9 @@ def test_estimator_cpu(): val_label = mx.nd.random.randint(low=0, high=2, shape=(batch_size,)) train_dataloader = gluon.data.DataLoader(dataset=gluon.data.ArrayDataset(train_data, train_label), - batch_size=batch_size, shuffle=True) + batch_size=batch_size, shuffle=True) val_dataloader = gluon.data.DataLoader(dataset=gluon.data.ArrayDataset(val_data, val_label), - batch_size=batch_size) + batch_size=batch_size) vocab_list = mx.nd.zeros(shape=(100,)) # Get the model @@ -192,33 +214,20 @@ def test_estimator_cpu(): else: num_hiddens, num_layers = 100, 2 net = BiRNN(vocab_list, embed_size, num_hiddens, num_layers) - net.initialize(mx.init.Xavier(), ctx=context) - # Define trainer - trainer = mx.gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': lr}) - # Define loss and evaluation metrics - loss = gluon.loss.SoftmaxCrossEntropyLoss() - acc = mx.metric.Accuracy() - - # Define estimator - e = est.Estimator(net=net, loss=loss, metrics=acc, - trainers=trainer, context=context) - # Begin training - e.fit(train_data=train_dataloader, val_data=val_dataloader, - epochs=num_epochs, batch_size=batch_size) - - -def test_estimator_gpu(): + net.initialize(mx.init.Xavier(), ctx=ctx) + + run(net, train_dataloader, val_dataloader, **kwargs) + + +def test_estimator_gpu(**kwargs): ''' Test estimator by training Bidirectional RNN for 5 epochs on the IMDB dataset and verify accuracy ''' - batch_size = 64 - num_epochs = 5 - lr = 0.01 - embed_size = 100 - - # Set context - ctx = mx.gpu() if mx.context.num_gpus() > 0 else mx.cpu() + ctx = kwargs['ctx'] + batch_size = kwargs['batch_size'] + num_epochs = kwargs['epochs'] + embed_size = kwargs['embed_size'] # data download_imdb() @@ -241,28 +250,27 @@ def test_estimator_gpu(): net.embedding.weight.set_data(glove_embedding.idx_to_vec) net.embedding.collect_params().setattr('grad_req', 'null') - # Define Trainer - trainer = mx.gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': lr}) - # Define loss and evaluation metrics - loss = gluon.loss.SoftmaxCrossEntropyLoss() - acc = mx.metric.Accuracy() - - # Define estimator - e = est.Estimator(net=net, loss=loss, metrics=acc, - trainers=trainer, context=ctx) - # Begin training - e.fit(train_data=train_dataloader, val_data=test_dataloader, - epochs=num_epochs) + est = run(net, train_dataloader, test_dataloader, **kwargs) - assert e.train_stats['train_' + acc.name][num_epochs - 1] > 0.70 + assert est.train_stats['train_accuracy'][num_epochs - 1] > 0.70 parser = argparse.ArgumentParser(description='test gluon estimator') parser.add_argument('--type', type=str, default='cpu') opt = parser.parse_args() +kwargs = { + 'batch_size': 64, + 'lr': 0.01, + 'embed_size': 100 +} + if opt.type == 'cpu': - test_estimator_cpu() + kwargs['ctx'] = mx.cpu() + kwargs['epochs'] = 1 + test_estimator_cpu(**kwargs) elif opt.type == 'gpu': - test_estimator_gpu() + kwargs['ctx'] = mx.gpu() + kwargs['epochs'] = 5 + test_estimator_gpu(**kwargs) else: raise RuntimeError("Unknown test type")