diff --git a/example/gluon/word_language_model/README.md b/example/gluon/word_language_model/README.md index 43d173b868ab..4a77950d01bc 100644 --- a/example/gluon/word_language_model/README.md +++ b/example/gluon/word_language_model/README.md @@ -28,7 +28,9 @@ python train.py --cuda --tied --nhid 650 --emsize 650 --epochs 40 --dropout 0.5 ``` python train.py --cuda --tied --nhid 1500 --emsize 1500 --epochs 60 --dropout 0.65 # Test ppl of 88.42 ``` - +``` +python train.py --export-model # hybridize and export model graph. See below for visualization options. +```
@@ -38,7 +40,8 @@ usage: train.py [-h] [--model MODEL] [--emsize EMSIZE] [--nhid NHID] [--nlayers NLAYERS] [--lr LR] [--clip CLIP] [--epochs EPOCHS] [--batch_size N] [--bptt BPTT] [--dropout DROPOUT] [--tied] [--cuda] [--log-interval N] [--save SAVE] [--gctype GCTYPE] - [--gcthreshold GCTHRESHOLD] + [--gcthreshold GCTHRESHOLD] [--hybridize] [--static-alloc] + [--static-shape] [--export-model] MXNet Autograd RNN/LSTM Language Model on Wikitext-2. @@ -62,4 +65,23 @@ optional arguments: `none` for now. --gcthreshold GCTHRESHOLD threshold for 2bit gradient compression + --hybridize whether to hybridize in mxnet>=1.3 (default=False) + --static-alloc whether to use static-alloc hybridize in mxnet>=1.3 + (default=False) + --static-shape whether to use static-shape hybridize in mxnet>=1.3 + (default=False) + --export-model export a symbol graph and exit (default=False) +``` + +You may visualize the graph with `mxnet.viz.plot_network` without any additional dependencies. Alternatively, if [mxboard](https://github.com/awslabs/mxboard) is installed, use the following approach for interactive visualization. +```python +#!python +import mxnet, mxboard +with mxboard.SummaryWriter(logdir='./model-graph') as sw: + sw.add_graph(mxnet.sym.load('./model-symbol.json')) +``` +```bash +#!/bin/bash +tensorboard --logdir=./model-graph/ ``` +![model graph](./model-graph.png?raw=true "rnn model graph") diff --git a/example/gluon/word_language_model/model-graph.png b/example/gluon/word_language_model/model-graph.png new file mode 100644 index 000000000000..c621518c57be Binary files /dev/null and b/example/gluon/word_language_model/model-graph.png differ diff --git a/example/gluon/word_language_model/model.py b/example/gluon/word_language_model/model.py index a810c416d0ce..ec6e700a854a 100644 --- a/example/gluon/word_language_model/model.py +++ b/example/gluon/word_language_model/model.py @@ -19,7 +19,7 @@ from mxnet import gluon from mxnet.gluon import nn, rnn -class RNNModel(gluon.Block): +class RNNModel(gluon.HybridBlock): """A model with an encoder, recurrent layer, and a decoder.""" def __init__(self, mode, vocab_size, num_embed, num_hidden, @@ -53,7 +53,7 @@ def __init__(self, mode, vocab_size, num_embed, num_hidden, self.num_hidden = num_hidden - def forward(self, inputs, hidden): + def hybrid_forward(self, F, inputs, hidden): emb = self.drop(self.encoder(inputs)) output, hidden = self.rnn(emb, hidden) output = self.drop(output) diff --git a/example/gluon/word_language_model/train.py b/example/gluon/word_language_model/train.py index 7f0a916b79bd..d08c07ec921d 100644 --- a/example/gluon/word_language_model/train.py +++ b/example/gluon/word_language_model/train.py @@ -58,6 +58,14 @@ takes `2bit` or `none` for now.') parser.add_argument('--gcthreshold', type=float, default=0.5, help='threshold for 2bit gradient compression') +parser.add_argument('--hybridize', action='store_true', + help='whether to hybridize in mxnet>=1.3 (default=False)') +parser.add_argument('--static-alloc', action='store_true', + help='whether to use static-alloc hybridize in mxnet>=1.3 (default=False)') +parser.add_argument('--static-shape', action='store_true', + help='whether to use static-shape hybridize in mxnet>=1.3 (default=False)') +parser.add_argument('--export-model', action='store_true', + help='export a symbol graph and exit (default=False)') args = parser.parse_args() print(args) @@ -72,6 +80,15 @@ else: context = mx.cpu(0) +if args.export_model: + args.hybridize = True + +# optional parameters only for mxnet >= 1.3 +hybridize_optional = dict(filter(lambda kv:kv[1], + {'static_alloc':args.static_alloc, 'static_shape':args.static_shape}.items())) +if args.hybridize: + print('hybridize_optional', hybridize_optional) + dirname = './data' dirname = os.path.expanduser(dirname) if not os.path.exists(dirname): @@ -114,6 +131,8 @@ ntokens = len(vocab) model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid, args.nlayers, args.dropout, args.tied) +if args.hybridize: + model.hybridize(**hybridize_optional) model.initialize(mx.init.Xavier(), ctx=context) compression_params = None if args.gctype == 'none' else {'type': args.gctype, 'threshold': args.gcthreshold} @@ -123,6 +142,8 @@ 'wd': 0}, compression_params=compression_params) loss = gluon.loss.SoftmaxCrossEntropyLoss() +if args.hybridize: + loss.hybridize(**hybridize_optional) ############################################################################### # Training code @@ -177,6 +198,10 @@ def train(): epoch, i, cur_L, math.exp(cur_L))) total_L = 0.0 + if args.export_model: + model.export('model') + return + val_L = eval(val_data) print('[Epoch %d] time cost %.2fs, valid loss %.2f, valid ppl %.2f'%( @@ -193,6 +218,8 @@ def train(): if __name__ == '__main__': train() - model.load_parameters(args.save, context) - test_L = eval(test_data) - print('Best test loss %.2f, test ppl %.2f'%(test_L, math.exp(test_L))) + if not args.export_model: + model.load_parameters(args.save, context) + test_L = eval(test_data) + print('Best test loss %.2f, test ppl %.2f'%(test_L, math.exp(test_L))) +