diff --git a/example/bayesian-methods/bdk_demo.py b/example/bayesian-methods/bdk_demo.py index bd883d226a2d..a59b4df4fe44 100644 --- a/example/bayesian-methods/bdk_demo.py +++ b/example/bayesian-methods/bdk_demo.py @@ -58,7 +58,7 @@ def backward(self, out_grad, in_data, out_data, in_grad): class LogSoftmax(mx.operator.NumpyOp): - """Generate helper functions to evaluate softmax loss function""" + """Generate helper functions to calculate the logarithm of softmax""" def __init__(self): super(LogSoftmax, self).__init__(False) diff --git a/example/capsnet/capsulenet.py b/example/capsnet/capsulenet.py index 05df9cdc56c4..4d455dbc504c 100644 --- a/example/capsnet/capsulenet.py +++ b/example/capsnet/capsulenet.py @@ -14,7 +14,10 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -"""Generate MXNet implementation of CapsNet""" +"""Generate MXNet implementation of CapsNet +Reference 1: https://www.cs.toronto.edu/~fritz/absps/transauto6.pdf +Reference 2: https://arxiv.org/pdf/1710.09829.pdf +""" import os import re import gzip @@ -190,7 +193,7 @@ def __call__(self, num_update): def do_training(num_epoch, optimizer, kvstore, learning_rate, model_prefix, decay): - """Run training to CapsNet""" + """Perform CapsNet training""" summary_writer = SummaryWriter(args.tblog_dir) lr_scheduler = SimpleLRScheduler(learning_rate) optimizer_params = {'lr_scheduler': lr_scheduler} diff --git a/example/ctc/multiproc_data.py b/example/ctc/multiproc_data.py index 313ab4eec840..f4c667621f70 100644 --- a/example/ctc/multiproc_data.py +++ b/example/ctc/multiproc_data.py @@ -14,7 +14,6 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - """Contains a class for handling multi-process data generation""" from __future__ import print_function diff --git a/example/gluon/dc_gan/dcgan.py b/example/gluon/dc_gan/dcgan.py index 970c35d54df4..93af13ababf3 100644 --- a/example/gluon/dc_gan/dcgan.py +++ b/example/gluon/dc_gan/dcgan.py @@ -14,28 +14,27 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +"""Generate MXNet implementation of Deep Convolutional Generative Adversarial Networks""" -import matplotlib as mpl -mpl.use('Agg') -from matplotlib import pyplot as plt - +import logging +from datetime import datetime import argparse +import os +import time +import numpy as np +from matplotlib import pyplot as plt +import matplotlib as mpl import mxnet as mx from mxnet import gluon from mxnet.gluon import nn from mxnet import autograd -import numpy as np -import logging -from datetime import datetime -import os -import time - from inception_score import get_inception_score +mpl.use('Agg') + def fill_buf(buf, i, img, shape): - """ - Reposition the images generated by the generator so that it can be saved as picture matrix. + """Reposition the images generated by the generator so that it can be saved as picture matrix. :param buf: the images metric :param i: index of each image :param img: images generated by generator once @@ -48,12 +47,10 @@ def fill_buf(buf, i, img, shape): sx = (i%m)*shape[0] sy = (i//m)*shape[1] buf[sy:sy+shape[1], sx:sx+shape[0], :] = img - return None def visual(title, X, name): - """ - Image visualization and preservation + """Image visualization and preservation :param title: title :param X: images to visualized :param name: saved picture`s name @@ -79,9 +76,11 @@ def visual(title, X, name): parser.add_argument('--batch-size', type=int, default=64, help='input batch size, default is 64') parser.add_argument('--nz', type=int, default=100, help='size of the latent z vector, default is 100') parser.add_argument('--ngf', type=int, default=64, help='the channel of each generator filter layer, default is 64.') -parser.add_argument('--ndf', type=int, default=64, help='the channel of each descriminator filter layer, default is 64.') +parser.add_argument('--ndf', type=int, default=64, help='the channel of each descriminator filter layer, ' + 'default is 64.') parser.add_argument('--nepoch', type=int, default=25, help='number of epochs to train for, default is 25.') -parser.add_argument('--niter', type=int, default=10, help='save generated images and inception_score per niter iters, default is 100.') +parser.add_argument('--niter', type=int, default=10, help='save generated images and inception_score per niter iters, ' + 'default is 100.') parser.add_argument('--lr', type=float, default=0.0002, help='learning rate, default=0.0002') parser.add_argument('--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5') parser.add_argument('--cuda', action='store_true', help='enables cuda') @@ -89,7 +88,8 @@ def visual(title, X, name): parser.add_argument('--netD', default='', help="path to netD (to continue training)") parser.add_argument('--outf', default='./results', help='folder to output images and model checkpoints') parser.add_argument('--check-point', default=True, help="save results at each epoch or not") -parser.add_argument('--inception_score', type=bool, default=True, help='To record the inception_score, default is True.') +parser.add_argument('--inception_score', type=bool, default=True, help='To record the inception_score, ' + 'default is True.') opt = parser.parse_args() print(opt) @@ -115,6 +115,7 @@ def visual(title, X, name): def transformer(data, label): + """Get the translation of images""" # resize to 64x64 data = mx.image.imresize(data, 64, 64) # transpose from (64, 64, 3) to (3, 64, 64) @@ -128,7 +129,17 @@ def transformer(data, label): # get dataset with the batch_size num each time -def get_dataset(dataset): +def get_dataset(dataset_name): + """Load the dataset and split it to train/valid data + + :param dataset_name: string + + Returns: + train_data: int array + training dataset + val_data: int array + valid dataset + """ # mnist if dataset == "mnist": train_data = gluon.data.DataLoader( @@ -152,6 +163,7 @@ def get_dataset(dataset): def get_netG(): + """Get net G""" # build the generator netG = nn.Sequential() with netG.name_scope(): @@ -180,6 +192,7 @@ def get_netG(): def get_netD(): + """Get the netD""" # build the discriminator netD = nn.Sequential() with netD.name_scope(): @@ -206,6 +219,7 @@ def get_netD(): def get_configurations(netG, netD): + """Get configurations for net""" # loss loss = gluon.loss.SoftmaxCrossEntropyLoss() @@ -233,6 +247,7 @@ def ins_save(inception_score): # main function def main(): + """Entry point to dcgan""" print("|------- new changes!!!!!!!!!") # to get the dataset and net configuration train_data, val_data = get_dataset(dataset) @@ -300,7 +315,7 @@ def main(): name, acc = metric.get() logging.info('discriminator loss = %f, generator loss = %f, binary training acc = %f at iter %d epoch %d' - % (mx.nd.mean(errD).asscalar(), mx.nd.mean(errG).asscalar(), acc, iter, epoch)) + , mx.nd.mean(errD).asscalar(), mx.nd.mean(errG).asscalar(), acc, iter, epoch) if iter % niter == 0: visual('gout', fake.asnumpy(), name=os.path.join(outf, 'fake_img_iter_%d.png' % iter)) visual('data', data.asnumpy(), name=os.path.join(outf, 'real_img_iter_%d.png' % iter)) @@ -316,13 +331,13 @@ def main(): name, acc = metric.get() metric.reset() - logging.info('\nbinary training acc at epoch %d: %s=%f' % (epoch, name, acc)) - logging.info('time: %f' % (time.time() - tic)) + logging.info('\nbinary training acc at epoch %d: %s=%f', epoch, name, acc) + logging.info('time: %f', time.time() - tic) # save check_point if check_point: - netG.save_parameters(os.path.join(outf,'generator_epoch_%d.params' %epoch)) - netD.save_parameters(os.path.join(outf,'discriminator_epoch_%d.params' % epoch)) + netG.save_parameters(os.path.join(outf, 'generator_epoch_%d.params' %epoch)) + netD.save_parameters(os.path.join(outf, 'discriminator_epoch_%d.params' % epoch)) # save parameter netG.save_parameters(os.path.join(outf, 'generator.params')) @@ -335,6 +350,6 @@ def main(): if __name__ == '__main__': if opt.inception_score: - print("Use inception_score to metric this DCgan model, the reusult is save as a picture named \"inception_score.png\"!") + print("Use inception_score to metric this DCgan model, the reusult is save as a picture " + "named \"inception_score.png\"!") main() - diff --git a/example/gluon/lstm_crf/lstm_crf.py b/example/gluon/lstm_crf/lstm_crf.py index 9c2218577312..011dcfbc4aea 100644 --- a/example/gluon/lstm_crf/lstm_crf.py +++ b/example/gluon/lstm_crf/lstm_crf.py @@ -14,46 +14,50 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - +"""This example demonstrates how the LSTM-CRF model can be implemented +in Gluon to perform noun-phrase chunking as a sequence labeling task. +""" +import sys import mxnet as mx from mxnet import autograd as ag, ndarray as nd, gluon from mxnet.gluon import Block, nn, rnn import mxnet.optimizer as optim -import sys - -# This example demonstrates how the LSTM-CRF model can be implemented -# in Gluon to perform noun-phrase chunking as a sequence labeling task. mx.random.seed(1) + # Helper functions to make the code more readable. def to_scalar(x): return int(x.asscalar()) + def argmax(vec): # return the argmax as a python int idx = nd.argmax(vec, axis=1) return to_scalar(idx) -def prepare_sequence(seq, word2idx): - return nd.array([word2idx[w] for w in seq]) + +def prepare_sequence(seq, word2Idx): + return nd.array([word2Idx[w] for w in seq]) + # Compute log sum exp is numerically more stable than multiplying probabilities def log_sum_exp(vec): max_score = nd.max(vec).asscalar() return nd.log(nd.sum(nd.exp(vec - max_score))) + max_score + # Model class BiLSTM_CRF(Block): - def __init__(self, vocab_size, tag2idx, embedding_dim, hidden_dim): + """Get BiLSTM_CRF model""" + def __init__(self, vocab_size, tag2Idx, embedding_dim, hidden_dim): super(BiLSTM_CRF, self).__init__() with self.name_scope(): self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.vocab_size = vocab_size - self.tag2idx = tag2idx - self.tagset_size = len(tag2idx) - + self.tag2idx = tag2Idx + self.tagset_size = len(tag2Idx) self.word_embeds = nn.Embedding(vocab_size, embedding_dim) self.lstm = rnn.LSTM(hidden_dim // 2, num_layers=1, bidirectional=True) @@ -62,9 +66,7 @@ def __init__(self, vocab_size, tag2idx, embedding_dim, hidden_dim): # Matrix of transition parameters. Entry i,j is the score of # transitioning *to* i *from* j. - self.transitions = self.params.get("crf_transition_matrix", - shape=(self.tagset_size, self.tagset_size)) - + self.transitions = self.params.get("crf_transition_matrix", shape=(self.tagset_size, self.tagset_size)) self.hidden = self.init_hidden() def init_hidden(self): @@ -98,24 +100,25 @@ def _forward_alg(self, feats): alpha = log_sum_exp(terminal_var) return alpha - def _get_lstm_features(self, sentence): + def _get_lstm_features(self, sentences): self.hidden = self.init_hidden() - length = sentence.shape[0] - embeds = self.word_embeds(sentence).reshape((length, 1, -1)) + length = sentences.shape[0] + embeds = self.word_embeds(sentences).reshape((length, 1, -1)) lstm_out, self.hidden = self.lstm(embeds, self.hidden) lstm_out = lstm_out.reshape((length, self.hidden_dim)) lstm_feats = self.hidden2tag(lstm_out) return nd.split(lstm_feats, num_outputs=length, axis=0, squeeze_axis=True) - def _score_sentence(self, feats, tags): + def _score_sentence(self, feats, tags_array): # Gives the score of a provided tag sequence score = nd.array([0]) - tags = nd.concat(nd.array([self.tag2idx[START_TAG]]), *tags, dim=0) - for i, feat in enumerate(feats): + tags_array = nd.concat(nd.array([self.tag2idx[START_TAG]]), *tags_array, dim=0) + for idx, feat in enumerate(feats): score = score + \ - self.transitions.data()[to_scalar(tags[i+1]), to_scalar(tags[i])] + feat[to_scalar(tags[i+1])] + self.transitions.data()[to_scalar(tags_array[idx+1]), + to_scalar(tags_array[idx])] + feat[to_scalar(tags_array[idx+1])] score = score + self.transitions.data()[self.tag2idx[STOP_TAG], - to_scalar(tags[int(tags.shape[0]-1)])] + to_scalar(tags.array[int(tags_array.shape[0]-1)])] return score def _viterbi_decode(self, feats): @@ -160,20 +163,21 @@ def _viterbi_decode(self, feats): best_path.reverse() return path_score, best_path - def neg_log_likelihood(self, sentence, tags): - feats = self._get_lstm_features(sentence) + def neg_log_likelihood(self, sentences, tags_list): + feats = self._get_lstm_features(sentences) forward_score = self._forward_alg(feats) - gold_score = self._score_sentence(feats, tags) + gold_score = self._score_sentence(feats, tags_list) return forward_score - gold_score - def forward(self, sentence): # dont confuse this with _forward_alg above. + def forward(self, sentences): # dont confuse this with _forward_alg above. # Get the emission scores from the BiLSTM - lstm_feats = self._get_lstm_features(sentence) + lstm_feats = self._get_lstm_features(sentences) # Find the best path, given the features. score, tag_seq = self._viterbi_decode(lstm_feats) return score, tag_seq + # Run training START_TAG = "" STOP_TAG = "" @@ -210,6 +214,7 @@ def forward(self, sentence): # dont confuse this with _forward_alg above. for epoch in range(300): # again, normally you would NOT do 300 epochs, it is toy data neg_log_likelihood_acc = 0. + iter = 0 for i, (sentence, tags) in enumerate(training_data): # Step 1. Get our inputs ready for the network, that is, # turn them into Variables of word indices. @@ -226,7 +231,8 @@ def forward(self, sentence): # dont confuse this with _forward_alg above. neg_log_likelihood.backward() optimizer.step(1) neg_log_likelihood_acc += neg_log_likelihood.mean() - print("Epoch [{}], Negative Log Likelihood {:.4f}".format(epoch, neg_log_likelihood_acc.asscalar()/(i+1))) + iter = i + print("Epoch [{}], Negative Log Likelihood {:.4f}".format(epoch, neg_log_likelihood_acc.asscalar()/(iter+1))) # Check predictions after training precheck_sent = prepare_sequence(training_data[0][0], word2idx)