From 992c3c0dd90c0723de6934e826a49bad6569eeac Mon Sep 17 00:00:00 2001
From: Junru Shao <junrushao1994@gmail.com>
Date: Thu, 28 Feb 2019 17:41:39 -0800
Subject: [PATCH] [MXNET-1330] Bring nnvm::Tuple to mxnet::Tuple (#14270)

* Bring nnvm::Tuple to mxnet::Tuple

* Retrigger CI

* Fix issues casued by rebase

* Address comments from Jun

* Trigger CI

* Address comments from Da

* Retrigger due to flakiness

* Retrigger CI
---
 amalgamation/prep_nnvm.sh                     |   1 -
 docs/architecture/overview.md                 |  22 +-
 docs/faq/add_op_in_backend.md                 |   8 +-
 docs/faq/new_op.md                            |   2 +-
 include/mxnet/base.h                          |   4 +-
 include/mxnet/executor.h                      |   4 +-
 include/mxnet/ndarray.h                       |  74 +-
 include/mxnet/op_attr_types.h                 |   2 +-
 include/mxnet/operator.h                      |  20 +-
 include/mxnet/operator_util.h                 |   8 +-
 include/mxnet/tensor_blob.h                   |  12 +-
 include/mxnet/tuple.h                         | 682 ++++++++++++++++++
 plugin/caffe/caffe_blob.cc                    |   4 +-
 plugin/caffe/caffe_blob.h                     |   4 +-
 plugin/caffe/caffe_loss-inl.h                 |  12 +-
 plugin/caffe/caffe_loss.cc                    |   4 +-
 plugin/caffe/caffe_op-inl.h                   |  14 +-
 plugin/caffe/caffe_op.cc                      |   4 +-
 plugin/opencv/cv_api.cc                       |   6 +-
 plugin/sframe/iter_sframe.cc                  |   4 +-
 plugin/torch/torch_criterion-inl.h            |  14 +-
 plugin/torch/torch_module-inl.h               |  12 +-
 plugin/warpctc/warpctc-inl.h                  |  12 +-
 src/c_api/c_api.cc                            |  18 +-
 src/c_api/c_api_common.h                      |   4 +-
 src/c_api/c_api_executor.cc                   |   8 +-
 src/c_api/c_api_function.cc                   |  10 +-
 src/c_api/c_api_symbolic.cc                   |  10 +-
 src/c_api/c_predict_api.cc                    |  42 +-
 src/common/exec_utils.h                       |   8 +-
 src/common/serialization.h                    |   1 -
 src/common/utils.h                            |  14 +-
 src/executor/attach_op_execs_pass.cc          |   6 +-
 src/executor/exec_pass.h                      |  53 +-
 src/executor/graph_executor.cc                |  59 +-
 src/executor/graph_executor.h                 |   8 +-
 src/executor/infer_graph_attr_pass.cc         |   8 +-
 src/executor/tensorrt_pass.cc                 |   8 +-
 src/executor/trt_graph_executor.cc            |  21 +-
 src/executor/trt_graph_executor.h             |   8 +-
 src/imperative/cached_op.cc                   |  20 +-
 src/imperative/imperative.cc                  |   4 +-
 src/imperative/imperative_utils.h             |  20 +-
 src/io/image_aug_default.cc                   |   2 +-
 src/io/image_det_aug_default.cc               |   2 +-
 src/io/image_io.cc                            |  17 +-
 src/io/image_iter_common.h                    |   2 +-
 src/io/inst_vector.h                          |   2 +-
 src/io/iter_batchloader.h                     |   6 +-
 src/io/iter_csv.cc                            |   8 +-
 src/io/iter_image_det_recordio.cc             |   2 +-
 src/io/iter_image_recordio_2.cc               |   4 +-
 src/io/iter_libsvm.cc                         |  12 +-
 src/io/iter_mnist.cc                          |   2 +-
 src/io/iter_sparse.h                          |   2 +-
 src/io/iter_sparse_batchloader.h              |   8 +-
 src/io/iter_sparse_prefetcher.h               |   2 +-
 src/kvstore/comm.h                            |  14 +-
 src/kvstore/comm_tree.h                       |   8 +-
 src/kvstore/kvstore_dist.h                    |   5 +-
 src/kvstore/kvstore_dist_server.h             |  10 +-
 src/kvstore/kvstore_nccl.h                    |   6 +-
 src/ndarray/ndarray.cc                        |  62 +-
 src/ndarray/ndarray_function.h                |  10 +-
 src/nnvm/gradient.cc                          | 281 ++++++++
 src/nnvm/graph_algorithm.h                    | 131 ++++
 src/nnvm/legacy_op_util.cc                    |  20 +-
 src/nnvm/plan_memory.cc                       | 412 +++++++++++
 src/operator/batch_norm_v1-inl.h              |  16 +-
 src/operator/batch_norm_v1.cc                 |   4 +-
 src/operator/bilinear_sampler-inl.h           |  12 +-
 src/operator/bilinear_sampler.cc              |   2 +-
 src/operator/contrib/adamw-inl.h              |   8 +-
 src/operator/contrib/adamw.cc                 |   4 +-
 .../contrib/adaptive_avg_pooling-inl.h        |  10 +-
 src/operator/contrib/adaptive_avg_pooling.cc  |   2 +-
 src/operator/contrib/bilinear_resize-inl.h    |   6 +-
 src/operator/contrib/bilinear_resize.cc       |   2 +-
 src/operator/contrib/boolean_mask.cc          |   2 +-
 src/operator/contrib/boolean_mask.cu          |   2 +-
 src/operator/contrib/bounding_box-inl.h       |  37 +-
 src/operator/contrib/bounding_box.cc          |   6 +-
 src/operator/contrib/count_sketch-inl.h       |  18 +-
 src/operator/contrib/count_sketch.cc          |   4 +-
 .../contrib/deformable_convolution-inl.h      |  44 +-
 .../contrib/deformable_convolution.cc         |   8 +-
 .../contrib/deformable_convolution.cu         |   4 +-
 .../contrib/deformable_psroi_pooling-inl.h    |  16 +-
 .../contrib/deformable_psroi_pooling.cc       |   4 +-
 src/operator/contrib/dgl_graph.cc             |  68 +-
 src/operator/contrib/fft-inl.h                |  24 +-
 src/operator/contrib/fft.cc                   |   2 +-
 src/operator/contrib/ifft-inl.h               |  22 +-
 src/operator/contrib/ifft.cc                  |   2 +-
 src/operator/contrib/index_copy-inl.h         |   4 +-
 src/operator/contrib/index_copy.cc            |   2 +-
 src/operator/contrib/krprod.cc                |   8 +-
 src/operator/contrib/multi_proposal-inl.h     |  10 +-
 src/operator/contrib/multibox_detection-inl.h |  21 +-
 src/operator/contrib/multibox_detection.cc    |   4 +-
 src/operator/contrib/multibox_prior-inl.h     |  13 +-
 src/operator/contrib/multibox_prior.cc        |   4 +-
 src/operator/contrib/multibox_target-inl.h    |  23 +-
 src/operator/contrib/multibox_target.cc       |   4 +-
 src/operator/contrib/nn/deformable_im2col.cuh |  18 +-
 src/operator/contrib/nn/deformable_im2col.h   |  20 +-
 src/operator/contrib/nnvm_to_onnx-inl.h       |   6 +-
 src/operator/contrib/nnvm_to_onnx.cc          |  30 +-
 src/operator/contrib/nnz.cc                   |   6 +-
 src/operator/contrib/optimizer_op.cc          |   6 +-
 src/operator/contrib/proposal-inl.h           |  10 +-
 src/operator/contrib/psroi_pooling-inl.h      |  14 +-
 src/operator/contrib/psroi_pooling.cc         |   4 +-
 src/operator/contrib/quadratic_op-inl.h       |   4 +-
 src/operator/contrib/quadratic_op.cc          |   2 +-
 src/operator/contrib/roi_align-inl.h          |   2 +-
 src/operator/contrib/roi_align.cc             |   8 +-
 src/operator/contrib/sync_batch_norm-inl.h    |  16 +-
 src/operator/contrib/sync_batch_norm.cc       |   4 +-
 src/operator/contrib/tensorrt.cc              |   8 +-
 src/operator/control_flow.cc                  |  68 +-
 src/operator/convolution_v1-inl.h             |  34 +-
 src/operator/convolution_v1.cc                |   8 +-
 src/operator/convolution_v1.cu                |   4 +-
 src/operator/correlation-inl.h                |  12 +-
 src/operator/correlation.cc                   |   2 +-
 src/operator/crop-inl.h                       |  18 +-
 src/operator/cross_device_copy.cc             |   8 +-
 src/operator/custom/custom.cc                 |  14 +-
 src/operator/custom/native_op-inl.h           |  16 +-
 src/operator/custom/ndarray_op-inl.h          |  10 +-
 src/operator/elemwise_op_common.h             |   8 +-
 src/operator/grid_generator-inl.h             |  22 +-
 src/operator/grid_generator.cc                |   2 +-
 .../identity_attach_KL_sparse_reg-inl.h       |  10 +-
 src/operator/image/image_random-inl.h         |  24 +-
 src/operator/image/image_random.cc            |   4 +-
 src/operator/image/resize-inl.h               |   8 +-
 src/operator/image/resize.cc                  |   2 +-
 src/operator/instance_norm-inl.h              |  14 +-
 src/operator/instance_norm.cc                 |   2 +-
 src/operator/l2_normalization-inl.h           |  18 +-
 src/operator/l2_normalization.cc              |   4 +-
 src/operator/leaky_relu-inl.h                 |  30 +-
 src/operator/leaky_relu.cc                    |   2 +-
 src/operator/loss_binary_op-inl.h             |   6 +-
 src/operator/loss_binary_op.cc                |   2 +-
 src/operator/make_loss-inl.h                  |  12 +-
 src/operator/make_loss.cc                     |   4 +-
 src/operator/nn/activation.cc                 |   2 +-
 src/operator/nn/batch_norm-inl.h              |   4 +-
 src/operator/nn/batch_norm.cc                 |  20 +-
 src/operator/nn/batch_norm.cu                 |   4 +-
 src/operator/nn/concat.cc                     |  24 +-
 src/operator/nn/convolution-inl.h             |  20 +-
 src/operator/nn/convolution.cc                |  10 +-
 src/operator/nn/convolution.cu                |  20 +-
 src/operator/nn/ctc_loss-inl.h                |  14 +-
 src/operator/nn/ctc_loss.cc                   |   2 +-
 src/operator/nn/cudnn/cudnn_algoreg-inl.h     |   6 +-
 src/operator/nn/cudnn/cudnn_batch_norm.cc     |  16 +-
 src/operator/nn/cudnn/cudnn_convolution-inl.h |  55 +-
 .../nn/cudnn/cudnn_deconvolution-inl.h        |  56 +-
 src/operator/nn/deconvolution-inl.h           |  40 +-
 src/operator/nn/deconvolution.cc              |  10 +-
 src/operator/nn/deconvolution.cu              |  12 +-
 src/operator/nn/depthwise_convolution-inl.h   |   4 +-
 src/operator/nn/dropout-inl.h                 |  12 +-
 src/operator/nn/dropout.cc                    |   6 +-
 src/operator/nn/fully_connected-inl.h         |   8 +-
 src/operator/nn/fully_connected.cc            |  12 +-
 src/operator/nn/im2col.cuh                    |  16 +-
 src/operator/nn/im2col.h                      |  22 +-
 src/operator/nn/layer_norm-inl.h              |   8 +-
 src/operator/nn/layer_norm.cc                 |  14 +-
 src/operator/nn/lrn.cc                        |   8 +-
 src/operator/nn/mkldnn/mkldnn_base-inl.h      |  10 +-
 .../nn/mkldnn/mkldnn_fully_connected.cc       |   8 +-
 src/operator/nn/mkldnn/mkldnn_pooling-inl.h   |   2 +-
 src/operator/nn/mkldnn/mkldnn_slice.cc        |   4 +-
 src/operator/nn/pool.cuh                      |  44 +-
 src/operator/nn/pool.h                        | 160 ++--
 src/operator/nn/pooling-inl.h                 |  44 +-
 src/operator/nn/pooling.cc                    |  16 +-
 src/operator/nn/softmax-inl.h                 |   8 +-
 src/operator/nn/softmax.cc                    |  12 +-
 src/operator/nn/upsampling-inl.h              |   8 +-
 src/operator/nn/upsampling.cc                 |   8 +-
 .../nnpack/nnpack_fully_connected-inl.h       |   4 +-
 src/operator/operator_common.h                |  14 +-
 src/operator/operator_util.cc                 |  34 +-
 src/operator/optimizer_op-inl.h               |   8 +-
 src/operator/optimizer_op.cc                  |  32 +-
 src/operator/pad-inl.h                        |  12 +-
 src/operator/pad.cc                           |   2 +-
 src/operator/pooling_v1-inl.h                 |  30 +-
 src/operator/pooling_v1.cc                    |   4 +-
 src/operator/quantization/dequantize-inl.h    |   6 +-
 src/operator/quantization/dequantize.cc       |   2 +-
 .../mkldnn/mkldnn_requantize-inl.h            |   4 +-
 .../quantization/quantization_utils.h         |   8 +-
 src/operator/quantization/quantize-inl.h      |  10 +-
 src/operator/quantization/quantize.cc         |   2 +-
 src/operator/quantization/quantize_v2-inl.h   |  13 +-
 src/operator/quantization/quantize_v2.cc      |   2 +-
 src/operator/quantization/quantized_concat.cc |  18 +-
 src/operator/quantization/quantized_conv.cc   |  18 +-
 src/operator/quantization/quantized_conv.cu   |  34 +-
 .../quantization/quantized_flatten-inl.h      |  14 +-
 .../quantization/quantized_flatten.cc         |   2 +-
 .../quantization/quantized_fully_connected.cc |  26 +-
 .../quantization/quantized_fully_connected.cu |   6 +-
 .../quantization/quantized_pooling.cc         |  18 +-
 .../quantization/quantized_pooling.cu         |   2 +-
 src/operator/quantization/requantize-inl.h    |   4 +-
 src/operator/quantization/requantize.cc       |   2 +-
 src/operator/random/multisample_op.cc         |   2 +-
 src/operator/random/multisample_op.h          |  16 +-
 src/operator/random/sample_multinomial_op.cc  |   2 +-
 src/operator/random/sample_multinomial_op.h   |  16 +-
 src/operator/random/sample_op.cc              |   4 +-
 src/operator/random/sample_op.h               |  18 +-
 src/operator/random/shuffle_op.cc             |   4 +-
 src/operator/random/shuffle_op.cu             |   2 +-
 src/operator/random/unique_sample_op.cc       |   2 +-
 src/operator/random/unique_sample_op.h        |   8 +-
 src/operator/regression_output-inl.h          |   8 +-
 src/operator/regression_output.cc             |   2 +-
 src/operator/rnn-inl.h                        |  20 +-
 src/operator/rnn.cc                           |   2 +-
 src/operator/roi_pooling-inl.h                |  14 +-
 src/operator/roi_pooling.cc                   |   2 +-
 src/operator/sequence_last-inl.h              |  16 +-
 src/operator/sequence_last.cc                 |   2 +-
 src/operator/sequence_mask-inl.h              |  12 +-
 src/operator/sequence_mask.cc                 |   2 +-
 src/operator/sequence_reverse-inl.h           |  12 +-
 src/operator/sequence_reverse.cc              |   2 +-
 src/operator/slice_channel-inl.h              |  16 +-
 src/operator/slice_channel.cc                 |   2 +-
 src/operator/softmax_output-inl.h             |  20 +-
 src/operator/softmax_output.cc                |  16 +-
 src/operator/spatial_transformer-inl.h        |  20 +-
 src/operator/spatial_transformer.cc           |   2 +-
 src/operator/subgraph/common.h                |  12 +-
 src/operator/subgraph/mkldnn/mkldnn_conv.cc   |  14 +-
 src/operator/subgraph_op_common.cc            |  10 +-
 src/operator/subgraph_op_common.h             |   6 +-
 src/operator/svm_output-inl.h                 |  16 +-
 src/operator/svm_output.cc                    |   2 +-
 src/operator/swapaxis-inl.h                   |  18 +-
 src/operator/swapaxis.cc                      |   2 +-
 src/operator/tensor/broadcast_reduce-inl.cuh  |  12 +-
 src/operator/tensor/broadcast_reduce-inl.h    |   9 +-
 src/operator/tensor/broadcast_reduce_op.h     | 164 ++---
 .../tensor/broadcast_reduce_op_index.cc       |   4 +-
 .../tensor/broadcast_reduce_op_value.cc       |  10 +-
 .../tensor/broadcast_reduce_op_value.cu       |   2 +-
 src/operator/tensor/cast_storage-inl.cuh      |   1 -
 src/operator/tensor/cast_storage-inl.h        |   2 +-
 src/operator/tensor/cast_storage.cc           |   2 +-
 src/operator/tensor/control_flow_op.cc        |   2 +-
 src/operator/tensor/control_flow_op.h         |   6 +-
 src/operator/tensor/diag_op-inl.h             |  26 +-
 src/operator/tensor/diag_op.cc                |   2 +-
 src/operator/tensor/dot-inl.h                 |  32 +-
 src/operator/tensor/dot.cc                    |   4 +-
 .../elemwise_binary_broadcast_op-inl.cuh      |   2 +-
 .../tensor/elemwise_binary_broadcast_op.h     |  44 +-
 src/operator/tensor/elemwise_binary_op-inl.h  |   2 +-
 src/operator/tensor/elemwise_binary_op.h      |   2 +-
 .../tensor/elemwise_binary_scalar_op.h        |   2 +-
 .../tensor/elemwise_binary_scalar_op_basic.cc |   2 +-
 .../elemwise_binary_scalar_op_extended.cc     |   2 +-
 src/operator/tensor/elemwise_sum.cc           |  10 +-
 src/operator/tensor/elemwise_unary_op.h       |  10 +-
 .../tensor/elemwise_unary_op_basic.cc         |  34 +-
 src/operator/tensor/histogram-inl.h           |  12 +-
 src/operator/tensor/histogram.cc              |   2 +-
 src/operator/tensor/indexing_op.cc            |  34 +-
 src/operator/tensor/indexing_op.cu            |  10 +-
 src/operator/tensor/indexing_op.h             |  86 +--
 src/operator/tensor/init_op.cc                |  16 +-
 src/operator/tensor/init_op.h                 |  28 +-
 src/operator/tensor/la_op.cc                  |  20 +-
 src/operator/tensor/la_op.h                   |  56 +-
 src/operator/tensor/matrix_op-inl.h           | 281 ++++----
 src/operator/tensor/matrix_op.cc              |  42 +-
 src/operator/tensor/matrix_op.cu              |   4 +-
 src/operator/tensor/ordering_op-inl.h         |  38 +-
 src/operator/tensor/ordering_op.cc            |   6 +-
 src/operator/tensor/ravel.cc                  |   4 +-
 src/operator/tensor/ravel.h                   |  20 +-
 src/operator/tensor/sparse_retain-inl.h       |   6 +-
 src/operator/tensor/sparse_retain.cc          |   2 +-
 src/operator/tensor/square_sum-inl.h          |   6 +-
 src/profiler/profiler.h                       |   4 +-
 tests/cpp/include/test_core_op.h              |  22 +-
 tests/cpp/include/test_legacy_op.h            |  16 +-
 tests/cpp/include/test_mkldnn.h               |  46 +-
 tests/cpp/include/test_ndarray_utils.h        |  20 +-
 tests/cpp/include/test_op.h                   |   4 +-
 tests/cpp/include/test_op_runner.h            |  12 +-
 tests/cpp/include/test_tune.h                 |   8 +-
 tests/cpp/include/test_util.h                 |  44 +-
 tests/cpp/misc/serialization.cc               |  10 +-
 tests/cpp/operator/activation_perf.cc         |  14 +-
 tests/cpp/operator/batchnorm_test.cc          |  48 +-
 tests/cpp/operator/coreop_perf.cc             |   6 +-
 tests/cpp/operator/dropout_perf.cc            |  14 +-
 tests/cpp/operator/fully_conn_perf.cc         |  25 +-
 tests/cpp/operator/mkldnn_operator_test.cc    |  30 +-
 tests/cpp/operator/mkldnn_test.cc             |   4 +-
 .../operator/runner/core_op_runner_test.cc    |  44 +-
 tests/cpp/operator/slice_channel_perf.cc      |  14 +-
 tests/cpp/operator/tune/operator_tune_test.cc |   6 +-
 316 files changed, 3841 insertions(+), 2252 deletions(-)
 create mode 100644 include/mxnet/tuple.h
 create mode 100644 src/nnvm/gradient.cc
 create mode 100644 src/nnvm/graph_algorithm.h
 create mode 100644 src/nnvm/plan_memory.cc

diff --git a/amalgamation/prep_nnvm.sh b/amalgamation/prep_nnvm.sh
index 8e30481d9ebf..a8f63b6b4b1a 100755
--- a/amalgamation/prep_nnvm.sh
+++ b/amalgamation/prep_nnvm.sh
@@ -40,7 +40,6 @@ echo '#define MSHADOW_FORCE_STREAM
 #include "mshadow/tensor.h"
 #include "mxnet/base.h"
 #include "dmlc/json.h"
-#include "nnvm/tuple.h"
 #include "mxnet/tensor_blob.h"' > temp
 cat nnvm.cc >> temp
 mv temp ../../../../amalgamation/nnvm.cc
diff --git a/docs/architecture/overview.md b/docs/architecture/overview.md
index fefa1e8a0a0c..e2e9ce2bd3eb 100644
--- a/docs/architecture/overview.md
+++ b/docs/architecture/overview.md
@@ -301,9 +301,9 @@ The `OperatorProperty` interface consists of:
 * **InferShape:**
 
 ```c++
-           virtual bool InferShape(std::vector<TShape> *in_shape,
-                                   std::vector<TShape> *out_shape,
-                                   std::vector<TShape> *aux_shape) const = 0;
+           virtual bool InferShape(mxnet::ShapeVector *in_shape,
+                                   mxnet::ShapeVector *out_shape,
+                                   mxnet::ShapeVector *aux_shape) const = 0;
 ```
 
 This interface has two purposes:
@@ -322,9 +322,9 @@ MXNet defines two interfaces to achieve this:
 
 ```c++
            virtual std::vector<ResourceRequest> ForwardResource(
-               const std::vector<TShape> &in_shape) const;
+               const mxnet::ShapeVector &in_shape) const;
            virtual std::vector<ResourceRequest> BackwardResource(
-               const std::vector<TShape> &in_shape) const;
+               const mxnet::ShapeVector &in_shape) const;
 ```
   The `ResourceRequest` structure (in `resource.h`) currently contains only a type flag:
 
@@ -473,7 +473,7 @@ To do so, you could define a `ConvolutionParam` structure, as follows:
 ```c++
     #include <dmlc/parameter.h>
     struct ConvolutionParam : public dmlc::Parameter<ConvolutionParam> {
-      TShape kernel, stride, pad;
+      mxnet::TShape kernel, stride, pad;
       uint32_t num_filter, num_group, workspace;
       bool no_bias;
     };
@@ -582,10 +582,10 @@ must be provided before any calculation occurs.
 let's check input data shape consistency and provide output shape.
 
 ```cpp
-    typedef TShape (*UnaryShapeFunction)(const TShape& src,
+    typedef mxnet::TShape (*UnaryShapeFunction)(const mxnet::TShape& src,
                                          const EnvArguments& env);
-    typedef TShape (*BinaryShapeFunction)(const TShape& lhs,
-                                          const TShape& rhs,
+    typedef mxnet::TShape (*BinaryShapeFunction)(const mxnet::TShape& lhs,
+                                          const mxnet::TShape& rhs,
                                           const EnvArguments& env);
 ```
 You can use `mshadow::TShape` to check input data shape and designate output data shape.
@@ -611,9 +611,9 @@ In our smooth l1 loss example, it's okay to use the default behavior whereby the
 Written explicitly, it is:
 
 ```cpp
-    inline TShape SmoothL1Shape_(const TShape& src,
+    inline mxnet::TShape SmoothL1Shape_(const mxnet::TShape& src,
                                  const EnvArguments& env) {
-      return TShape(src);
+      return mxnet::TShape(src);
     }
 ```
 
diff --git a/docs/faq/add_op_in_backend.md b/docs/faq/add_op_in_backend.md
index 0e734d62bce2..15f4ed9fbab4 100644
--- a/docs/faq/add_op_in_backend.md
+++ b/docs/faq/add_op_in_backend.md
@@ -175,8 +175,8 @@ element-wise multiplication and addition.
 For our `quadratic` operator, shape inference possesses quite similar logic.
 ```cpp
 inline bool QuadraticOpShape(const nnvm::NodeAttrs& attrs,
-                             std::vector<TShape>* in_attrs,
-                             std::vector<TShape>* out_attrs) {
+                             mxnet::ShapeVector* in_attrs,
+                             mxnet::ShapeVector* out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
 
@@ -216,8 +216,8 @@ The function `QuadraticOpShape` posted here is for the purpose of illustration o
 ```cpp
 template<int n_in, int n_out>
 inline bool ElemwiseShape(const nnvm::NodeAttrs& attrs,
-                          std::vector<TShape> *in_attrs,
-                          std::vector<TShape> *out_attrs);
+                          mxnet::ShapeVector *in_attrs,
+                          mxnet::ShapeVector *out_attrs);
 ```
 
 The same logic goes for data type inference. We will leave the analysis of
diff --git a/docs/faq/new_op.md b/docs/faq/new_op.md
index 4d51eaf8059d..4c10708b944d 100644
--- a/docs/faq/new_op.md
+++ b/docs/faq/new_op.md
@@ -258,7 +258,7 @@ can add argument descriptions in bulk with `.add_arguments(ActivationParam::__FI
 
 #### FInferShape or TIsBackward (for Backward Only Ops)
 
-Normally operators need to have `FInferShape` with prototype `bool(const nnvm::NodeAttrs& attrs, std::vector<TShape> *in_attrs, std::vector<TShape> *out_attrs)`. `FInferShape` fills unknown shapes (`shape.ndim() == 0`) in in_attrs/out_attrs based on known shapes in in_attrs/out_attrs. Use `ElemwiseShape<n_in, n_out>` for simple operators with uniform shapes.
+Normally operators need to have `FInferShape` with prototype `bool(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector *in_attrs, mxnet::ShapeVector *out_attrs)`. `FInferShape` fills unknown shapes (`shape.ndim() == 0`) in in_attrs/out_attrs based on known shapes in in_attrs/out_attrs. Use `ElemwiseShape<n_in, n_out>` for simple operators with uniform shapes.
 
 Operators that are only used for a backward pass can instead register `.set_attr<nnvm::TIsBackward>("TIsBackward", true)`
 and their shapes with be copied from the corresponding forward operators.
diff --git a/include/mxnet/base.h b/include/mxnet/base.h
index 2ea6ebbdf3d7..b239cb1f7302 100644
--- a/include/mxnet/base.h
+++ b/include/mxnet/base.h
@@ -33,9 +33,9 @@
 #include "mshadow/tensor.h"
 // nnvm headers for symbolic construction.
 #include "nnvm/op.h"
-#include "nnvm/tuple.h"
 #include "nnvm/symbolic.h"
 #include "libinfo.h"
+#include "tuple.h"
 
 
 /*!
@@ -95,8 +95,6 @@ typedef mshadow::gpu gpu;
 typedef mshadow::index_t index_t;
 /*! \brief data type that will be used to store ndarray */
 typedef mshadow::default_real_t real_t;
-/*! \brief Shape data structure used to record shape information */
-using TShape = nnvm::TShape;
 /*! \brief operator structure from NNVM */
 using Op = nnvm::Op;
 
diff --git a/include/mxnet/executor.h b/include/mxnet/executor.h
index aec10091a540..24b23ed783f3 100644
--- a/include/mxnet/executor.h
+++ b/include/mxnet/executor.h
@@ -121,7 +121,7 @@ class Executor {
                             const bool allow_up_sizing,
                             const Context& default_ctx,
                             const std::map<std::string, Context>& ctx_map,
-                            const std::unordered_map<std::string, TShape>&
+                            const std::unordered_map<std::string, mxnet::TShape>&
                               provided_arg_shapes,
                             std::vector<NDArray>* in_args,
                             std::vector<NDArray>* arg_grads,
@@ -155,7 +155,7 @@ class Executor {
                               const std::vector<Context>& in_arg_ctxes,
                               const std::vector<Context>& arg_grad_ctxes,
                               const std::vector<Context>& aux_state_ctxes,
-                              const std::unordered_map<std::string, TShape>& arg_shape_map,
+                              const std::unordered_map<std::string, mxnet::TShape>& arg_shape_map,
                               const std::unordered_map<std::string, int>& arg_dtype_map,
                               const std::unordered_map<std::string, int>& arg_stype_map,
                               const std::vector<OpReqType>& grad_req_types,
diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h
index 5de42e19a657..feb562aa76fa 100644
--- a/include/mxnet/ndarray.h
+++ b/include/mxnet/ndarray.h
@@ -91,7 +91,7 @@ class NDArray {
    * \param delay_alloc whether delay the allocation
    * \param dtype data type of this ndarray
    */
-  NDArray(const TShape &shape, Context ctx,
+  NDArray(const mxnet::TShape &shape, Context ctx,
           bool delay_alloc = false, int dtype = mshadow::default_type_flag)
       : ptr_(std::make_shared<Chunk>(shape, ctx, delay_alloc, dtype)),
         shape_(shape), dtype_(dtype), storage_type_(kDefaultStorage),
@@ -99,10 +99,10 @@ class NDArray {
   }
   /*! \brief constructor for NDArray with storage type
    */
-  NDArray(const NDArrayStorageType stype, const TShape &shape, Context ctx,
+  NDArray(const NDArrayStorageType stype, const mxnet::TShape &shape, Context ctx,
           bool delay_alloc = true, int dtype = mshadow::default_type_flag,
-          std::vector<int> aux_types = {}, std::vector<TShape> aux_shapes = {},
-          TShape storage_shape = TShape(mshadow::Shape1(0)));
+          std::vector<int> aux_types = {}, mxnet::ShapeVector aux_shapes = {},
+          mxnet::TShape storage_shape = mxnet::TShape(mshadow::Shape1(0)));
   /*!
    * \brief constructs a new dynamic NDArray whose shape is unknown,
    *        hence the NDArray is inherently lazily created
@@ -110,7 +110,7 @@ class NDArray {
    * \param dtype data type of this ndarray
    */
   explicit NDArray(Context ctx, int dtype = mshadow::default_type_flag) {
-    ptr_ = std::make_shared<Chunk>(TShape(mshadow::Shape1(0)), ctx, true, dtype);
+    ptr_ = std::make_shared<Chunk>(mxnet::TShape(mshadow::Shape1(0)), ctx, true, dtype);
     dtype_ = dtype;
     storage_type_ = kDefaultStorage;
     entry_ = {nullptr, 0, 0};
@@ -148,7 +148,7 @@ class NDArray {
   }
 
   /*! \brief create ndarray from shared memory */
-  NDArray(int shared_pid, int shared_id, const TShape& shape, int dtype)
+  NDArray(int shared_pid, int shared_id, const mxnet::TShape& shape, int dtype)
       : ptr_(std::make_shared<Chunk>(shared_pid, shared_id, shape, dtype)), shape_(shape),
         dtype_(dtype), storage_type_(kDefaultStorage), entry_({nullptr, 0, 0}) {
   }
@@ -163,7 +163,7 @@ class NDArray {
    * \param aux_data the memory content of static aux data
    * \param dev_id the device id this tensor sits at
    */
-  NDArray(const NDArrayStorageType stype, const TShape &shape,
+  NDArray(const NDArrayStorageType stype, const mxnet::TShape &shape,
           const TBlob &data, const std::vector<TBlob> &aux_data, int dev_id)
       : ptr_(std::make_shared<Chunk>(stype, data, aux_data, dev_id)), shape_(shape),
         dtype_(data.type_flag_), storage_type_(stype), entry_({nullptr, 0, 0}) {
@@ -172,7 +172,7 @@ class NDArray {
    * \brief initialize the NDArray, assuming it is not assigned a meaningful shape before
    * \param shape the shape of the NDArray
    */
-  void Init(const TShape &shape) {
+  void Init(const mxnet::TShape &shape) {
     ptr_->Init(shape, this->dtype_);
     this->shape_ = shape;
   }
@@ -210,7 +210,7 @@ class NDArray {
   /*!
    * \return the shape of current NDArray.
    */
-  inline const TShape& shape() const {
+  inline const mxnet::TShape& shape() const {
     return shape_;
   }
   /*!
@@ -218,7 +218,7 @@ class NDArray {
    *  It is only intended for non-default storage. For row-sparse storage, it is the shape of
    *  the tensor which stores the non-zero values.
    */
-  inline const TShape &storage_shape() const {
+  inline const mxnet::TShape &storage_shape() const {
     CHECK(ptr_ != nullptr);
     CHECK_NE(storage_type(), kDefaultStorage)
              << "storage_shape() is not intended for kDefaultStorage.";
@@ -230,14 +230,14 @@ class NDArray {
    * \param index the index of the aux data
    * \return the shape of aux data at given index
    */
-  inline const TShape& aux_shape(size_t index) const {
+  inline const mxnet::TShape& aux_shape(size_t index) const {
     CHECK_NE(storage_type(), kDefaultStorage)
              << "aux_shape() is not intended for kDefaultStorage.";
     return ptr_->aux_shapes[index];
   }
 
   /* \return the shapes of all aux data */
-  const std::vector<TShape>& aux_shapes() const {
+  const mxnet::ShapeVector& aux_shapes() const {
     CHECK_NE(storage_type(), kDefaultStorage)
              << "aux_shapes() is not intended for kDefaultStorage.";
     return ptr_->aux_shapes;
@@ -257,7 +257,7 @@ class NDArray {
    * for the final result. After the operation is done, the exact size of
    * the shape is known and need to be reset using this function.
    */
-  inline void set_aux_shape(size_t index, const TShape& shape) const {
+  inline void set_aux_shape(size_t index, const mxnet::TShape& shape) const {
     CHECK_NE(storage_type(), kDefaultStorage)
       << "set_aux_shape() is not intended for kDefaultStorage.";
     ptr_->set_aux_shape(index, shape);
@@ -552,7 +552,7 @@ class NDArray {
    * \param dtype The data type.
    * \return NDArray in new shape and type.
    */
-  inline NDArray AsArray(const TShape &shape, int dtype) const {
+  inline NDArray AsArray(const mxnet::TShape &shape, int dtype) const {
     CHECK_EQ(storage_type(), kDefaultStorage)
              << "AsArray is intended only for kDefaultStorage.";
     CHECK_GE(ptr_->shandle.size,
@@ -628,13 +628,13 @@ class NDArray {
    * \param shape new shape
    * \return NDArray in new shape
    */
-  NDArray Reshape(const TShape &shape) const;
+  NDArray Reshape(const mxnet::TShape &shape) const;
   /*!
    * \brief Get an reshaped NDArray. Supports autograd recording
    * \param shape new shape
    * \return NDArray in new shape
    */
-  NDArray ReshapeWithRecord(const TShape &shape);
+  NDArray ReshapeWithRecord(const mxnet::TShape &shape);
   /*!
    * \brief Return a copy of this NDArray without autograd history
    */
@@ -660,10 +660,10 @@ class NDArray {
    * This function can only be called by ndarray of default
    * storage type and effectively changes the ndarray's shape_.
    * Note: This function is named as this to avoid overload conflict
-   * with CheckAndAlloc(const std::vector<TShape> &aux_shapes), since
-   * TShape tmp = some_shape is equivalent to TShape tmp = {some_shape}.
+   * with CheckAndAlloc(const mxnet::ShapeVector &aux_shapes), since
+   * mxnet::TShape tmp = some_shape is equivalent to mxnet::TShape tmp = {some_shape}.
    */
-  void ReshapeAndAlloc(const TShape& shape) {
+  void ReshapeAndAlloc(const mxnet::TShape& shape) {
     CHECK_EQ(storage_type(), kDefaultStorage);
     CHECK(!is_none());
     shape_ = shape;
@@ -674,17 +674,17 @@ class NDArray {
    * \brief Alloc memory for non-default storage
    * aux_shape is only known at run time
    */
-  inline void CheckAndAlloc(const std::vector<TShape> &aux_shapes) const {
+  inline void CheckAndAlloc(const mxnet::ShapeVector &aux_shapes) const {
     CHECK_NE(storage_type(), kDefaultStorage)
              << "CheckAndAlloc(aux_shapes) is not intended for kDefaultStorage";
     ptr_->CheckAndAlloc(shape_, aux_shapes, dtype_);
   }
-  inline void CheckAndAllocData(const TShape &storage_shape) const {
+  inline void CheckAndAllocData(const mxnet::TShape &storage_shape) const {
     CHECK_NE(storage_type(), kDefaultStorage)
              << "CheckAndAllocData is not intended for kDefaultStorage";
     ptr_->CheckAndAllocData(storage_shape, dtype_);
   }
-  inline void CheckAndAllocAuxData(size_t i, const TShape &aux_shape) const {
+  inline void CheckAndAllocAuxData(size_t i, const mxnet::TShape &aux_shape) const {
     CHECK_NE(storage_type(), kDefaultStorage)
              << "CheckAndAllocAuxData is not intended for kDefaultStorage";
     ptr_->CheckAndAllocAuxData(i, aux_shape);
@@ -775,7 +775,7 @@ class NDArray {
    * which can be expensive.
    * It's used by FullyConnected right now.
    */
-  NDArray MKLDNNDataReshape(const TShape &shape) const;
+  NDArray MKLDNNDataReshape(const mxnet::TShape &shape) const;
 
    /*!
    * \ Fix mkldnn memory descriptor mismatch from NDArray.
@@ -844,16 +844,16 @@ class NDArray {
     // The shape of the chunk data.
     // This might not be the same shape as the NDArray, since the storage may be sparse.
     // The default value for storage_shape is {0} when an empty non-default NDArray is created.
-    TShape storage_shape;
+    mxnet::TShape storage_shape;
     // The shape of aux data. The default value for the shape depends on the type of storage.
     // If aux_shapes[i].Size() is zero, aux data i is empty.
-    std::vector<TShape> aux_shapes;
+    mxnet::ShapeVector aux_shapes;
 
     /*! \brief default cosntructor */
     Chunk() : static_data(true), delay_alloc(false) {}
 
     /*! \brief construct a new chunk */
-    Chunk(TShape shape, Context ctx_, bool delay_alloc_, int dtype)
+    Chunk(mxnet::TShape shape, Context ctx_, bool delay_alloc_, int dtype)
         : static_data(false), delay_alloc(true), ctx(ctx_) {
       auto size = shape.Size();
       storage_shape = shape;
@@ -880,7 +880,7 @@ class NDArray {
       storage_shape = data.shape_;
     }
 
-    Chunk(int shared_pid, int shared_id, const TShape& shape, int dtype)
+    Chunk(int shared_pid, int shared_id, const mxnet::TShape& shape, int dtype)
         : static_data(false), delay_alloc(false) {
       var = Engine::Get()->NewVariable();
       ctx = Context::CPUShared(0);
@@ -892,9 +892,9 @@ class NDArray {
       storage_shape = shape;
     }
     // Constructor for a non-default storage chunk
-    Chunk(NDArrayStorageType storage_type_, const TShape &storage_shape_, Context ctx_,
+    Chunk(NDArrayStorageType storage_type_, const mxnet::TShape &storage_shape_, Context ctx_,
           bool delay_alloc_, int dtype, const std::vector<int> &aux_types_,
-          const std::vector<TShape> &aux_shapes_)
+          const mxnet::ShapeVector &aux_shapes_)
         : static_data(false), delay_alloc(delay_alloc_), storage_type(storage_type_),
           aux_types(aux_types_), ctx(ctx_), storage_shape(storage_shape_),
           aux_shapes(aux_shapes_) {
@@ -944,7 +944,7 @@ class NDArray {
     }
 
     /*! \brief set the shape for ith aux data, and update storage shape if necessary */
-    inline void set_aux_shape(const size_t i, const TShape& shape) {
+    inline void set_aux_shape(const size_t i, const mxnet::TShape& shape) {
       aux_shapes[i] = shape;
       if (storage_shape.ndim() > 0) {
         if (storage_type == kRowSparseStorage && i == rowsparse::kIdx) {
@@ -989,20 +989,20 @@ class NDArray {
       }
     }
     /*! \brief initialize the shape and dtype, assuming it is not initialized before. */
-    void Init(const TShape &shape, int dtype) {
+    void Init(const mxnet::TShape &shape, int dtype) {
       auto size = shape.Size();
       storage_shape = shape;
       shandle.size = size * mshadow::mshadow_sizeof(dtype);
       this->CheckAndAlloc();
     }
-    inline void CheckAndAlloc(const TShape &shape, const std::vector<TShape> &aux_shapes,
+    inline void CheckAndAlloc(const mxnet::TShape &shape, const mxnet::ShapeVector &aux_shapes,
                               int dtype) {
       // calculate size, perform allocation
       if (kRowSparseStorage == storage_type) {
         // For row sparse, aux_shape indicates the number of rows to allocate
         auto aux_shape = aux_shapes[rowsparse::kIdx];
         CheckAndAllocAuxData(rowsparse::kIdx, aux_shape);
-        TShape storage_shape(shape);
+        mxnet::TShape storage_shape(shape);
         storage_shape[0] = aux_shape[0];
         CheckAndAllocData(storage_shape, dtype);
       } else if (kCSRStorage == storage_type) {
@@ -1017,12 +1017,12 @@ class NDArray {
     // storage shape is also updated
     // if data is already allocated, try reuse the storage. Otherwise, free the current one
     // and allocate new storage
-    void CheckAndAllocData(const TShape &shape, int dtype);
+    void CheckAndAllocData(const mxnet::TShape &shape, int dtype);
 
 #if MXNET_USE_MKLDNN == 1
     // Have MKL memory reference to the data in the default storage
     // or create memory for MKLDNN.
-    void SetMKLMem(const TShape &shape, int dtype);
+    void SetMKLMem(const mxnet::TShape &shape, int dtype);
     // If the data is stored in MKLDNN layout, we reorder data in mkl_mem_ and
     // save the result in shandle.
     void Reorder2Default();
@@ -1037,7 +1037,7 @@ class NDArray {
     // aux shape is also updated
     // if aux data is already allocated, try reuse the storage. Otherwise, free the current one
     // and allocate new storage
-    inline void CheckAndAllocAuxData(size_t i, const TShape &shape) {
+    inline void CheckAndAllocAuxData(size_t i, const mxnet::TShape &shape) {
       CHECK_EQ(shape.ndim(), 1) << "shape must be 1D in CheckAndAllocAuxData";
       CHECK_NE(storage_type, kUndefinedStorage)
         << "storage type cannot be kUndefinedStorage in CheckAndAllocAuxData";
@@ -1065,7 +1065,7 @@ class NDArray {
   /*! \brief internal data of NDArray */
   std::shared_ptr<Chunk> ptr_{nullptr};
   /*! \brief shape of current NDArray */
-  TShape shape_;
+  mxnet::TShape shape_;
   /*! \brief byte offset in chunk */
   size_t byte_offset_ = 0;
   /*! \brief type of data */
diff --git a/include/mxnet/op_attr_types.h b/include/mxnet/op_attr_types.h
index 22bba301221d..889b5028a460 100644
--- a/include/mxnet/op_attr_types.h
+++ b/include/mxnet/op_attr_types.h
@@ -206,7 +206,7 @@ class OpStatePtr {
  */
 using FCreateOpState = std::function<OpStatePtr (const NodeAttrs& attrs,
                                                  Context ctx,
-                                                 const std::vector<TShape>& in_shape,
+                                                 const mxnet::ShapeVector& in_shape,
                                                  const std::vector<int>& in_type)>;
 /*!
  * \brief Execution mode of this operator.
diff --git a/include/mxnet/operator.h b/include/mxnet/operator.h
index cfa162780495..1dc04244f673 100644
--- a/include/mxnet/operator.h
+++ b/include/mxnet/operator.h
@@ -50,7 +50,7 @@ namespace mxnet {
  *  To add new operator(aka. layers of neural nets) to mxnet, developer need to create
  *  a new OperatorProperty and its corresponding Operator.
  *
- * \sa TBlob, TShape, OperatorProperty
+ * \sa TBlob, mxnet::TShape, OperatorProperty
  */
 class Operator {
  public:
@@ -192,15 +192,15 @@ class OperatorProperty {
    *     common practice: set the shape of data input, and usually weight's shape can be inferred
    *
    * \param out_shape the shape of outputs of the operator
-   *     InferShape will modify the vector to fill output TShape
+   *     InferShape will modify the vector to fill output mxnet::TShape
    * \param aux_shape the shape of auxiliary states of the operator
-   *     InferShape will modify the vector to fill output TShape
+   *     InferShape will modify the vector to fill output mxnet::TShape
    * \return true if the shape inference is successful, false if there is not enough information.
    * \throws dmlc::Error if the known arg_shapes are inconsistent.
    */
-  virtual bool InferShape(std::vector<TShape> *in_shape,
-                          std::vector<TShape> *out_shape,
-                          std::vector<TShape> *aux_shape) const = 0;
+  virtual bool InferShape(mxnet::ShapeVector *in_shape,
+                          mxnet::ShapeVector *out_shape,
+                          mxnet::ShapeVector *aux_shape) const = 0;
   /*!
    * \brief infer the data types of outputs and unknown input arguments
    * \param in_type the type of input arguments of the operator
@@ -255,10 +255,10 @@ class OperatorProperty {
    * \param in_type dtype of the input ndarrays
    * \return the created operator
    */
-  virtual Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  virtual Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                      std::vector<int> *in_type) const {
     std::vector<int> out_type, aux_type;
-    std::vector<TShape> out_shape, aux_shape;
+    mxnet::ShapeVector out_shape, aux_shape;
     out_type.resize(this->ListOutputs().size());
     out_shape.resize(this->ListOutputs().size());
     aux_type.resize(this->ListAuxiliaryStates().size());
@@ -284,7 +284,7 @@ class OperatorProperty {
    * \return Additional resource request
    */
   virtual std::vector<ResourceRequest> ForwardResource(
-      const std::vector<TShape> &in_shape) const {
+      const mxnet::ShapeVector &in_shape) const {
     return std::vector<ResourceRequest>();
   }
   /*!
@@ -295,7 +295,7 @@ class OperatorProperty {
    * \return Additional resource request
    */
   virtual std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const {
+      const mxnet::ShapeVector &in_shape) const {
     return std::vector<ResourceRequest>();
   }
   /*!
diff --git a/include/mxnet/operator_util.h b/include/mxnet/operator_util.h
index bebe3f13ae45..941e76e6d70b 100644
--- a/include/mxnet/operator_util.h
+++ b/include/mxnet/operator_util.h
@@ -97,7 +97,7 @@ typedef void (*SourceFunction)(const EnvArguments& env,
  * \param env The Environment arguments.
  * \return The inferred result shape.
  */
-typedef TShape (*SourceShapeFunction)(const EnvArguments& env);
+typedef mxnet::TShape (*SourceShapeFunction)(const EnvArguments& env);
 
 /*!
  * \brief Unary function that takes a src and save result to ret.
@@ -119,7 +119,7 @@ typedef void (*UnaryFunction)(const TBlob& src,
  * \param env The Environment arguments.
  * \return The inferred result shape.
  */
-typedef TShape (*UnaryShapeFunction)(const TShape& src,
+typedef mxnet::TShape (*UnaryShapeFunction)(const mxnet::TShape& src,
                                      const EnvArguments& env);
 
 /*!
@@ -189,8 +189,8 @@ typedef void (*BinaryFunction)(const TBlob& lhs,
  * \param env The Environment arguments.
  * \return The inferred result shape.
  */
-typedef TShape (*BinaryShapeFunction)(const TShape& lhs,
-                                      const TShape& rhs,
+typedef mxnet::TShape (*BinaryShapeFunction)(const mxnet::TShape& lhs,
+                                      const mxnet::TShape& rhs,
                                       const EnvArguments& env);
 /*!
  * \brief Gradient function that takes only output gradient and computes gradient wrt to input.
diff --git a/include/mxnet/tensor_blob.h b/include/mxnet/tensor_blob.h
index 412877a58218..7d059025b03e 100755
--- a/include/mxnet/tensor_blob.h
+++ b/include/mxnet/tensor_blob.h
@@ -69,7 +69,7 @@ class TBlob {
   /*! \brief pointer to the data */
   void *dptr_;
   /*! \brief shape of the tensor */
-  TShape shape_;
+  mxnet::TShape shape_;
   /*! \brief type flag of the tensor blob */
   int type_flag_;
 
@@ -87,7 +87,7 @@ class TBlob {
    * \param dev_id the device id
    */
   template<typename DType>
-  TBlob(DType *dptr, const TShape &shape, int dev_mask, int dev_id = -1)
+  TBlob(DType *dptr, const mxnet::TShape &shape, int dev_mask, int dev_id = -1)
       : dptr_(dptr), shape_(shape),
         type_flag_(mshadow::DataType<DType>::kFlag) {
     SetDLTensor(dev_mask, dev_id);
@@ -100,7 +100,7 @@ class TBlob {
    * \param type_flag the type flag. Can be one of enum mshadow::dtype
    * \param dev_id the device id
    */
-  TBlob(void *dptr, const TShape &shape, int dev_mask, int type_flag, int dev_id = -1)
+  TBlob(void *dptr, const mxnet::TShape &shape, int dev_mask, int type_flag, int dev_id = -1)
       : dptr_(dptr), shape_(shape), type_flag_(type_flag) {
     SetDLTensor(dev_mask, dev_id);
   }
@@ -110,7 +110,7 @@ class TBlob {
    */
   explicit TBlob(const DLTensor &dltensor)
       : dptr_(dltensor.data),
-        shape_(TShape(dltensor.shape, dltensor.shape + dltensor.ndim)),
+        shape_(mxnet::TShape(dltensor.shape, dltensor.shape + dltensor.ndim)),
         type_flag_(DLDataTypeTransform(dltensor.dtype)),
         dltensor_(dltensor) {
     // compactness check for DLTensor
@@ -175,7 +175,7 @@ class TBlob {
    * \param shape desired shape
    * \return reshaped blob
    */
-  inline TBlob reshape(const TShape& shape) const {
+  inline TBlob reshape(const mxnet::TShape& shape) const {
     CHECK_EQ(this->shape_.Size(), shape.Size()) << "Shape size mismatch "
     << this->shape_.Size() << " v.s. "  << shape.Size();
     TBlob ret(this->dptr_, shape, this->dev_mask(), this->type_flag_, this->dev_id());
@@ -417,7 +417,7 @@ class TBlob {
 }  // namespace mxnet
 
 namespace dmlc {
-// Add a few patches to support TShape in dmlc/parameter.
+// Add a few patches to support mxnet::TShape in dmlc/parameter.
 DMLC_DECLARE_TYPE_NAME(mxnet::TShape, "Shape(tuple)");
 DMLC_DECLARE_TYPE_NAME(nnvm::Tuple<int>, "Shape(tuple)");
 DMLC_DECLARE_TYPE_NAME(nnvm::Tuple<dmlc::optional<int>>, "Shape(tuple)");
diff --git a/include/mxnet/tuple.h b/include/mxnet/tuple.h
new file mode 100644
index 000000000000..7c1367333630
--- /dev/null
+++ b/include/mxnet/tuple.h
@@ -0,0 +1,682 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*!
+ *  Copyright (c) 2016 by Contributors
+ * \file mxnet/tuple.h
+ * \brief Data structure Tuple and TShape to store dynamic sized shapes.
+ */
+#ifndef MXNET_TUPLE_H_
+#define MXNET_TUPLE_H_
+
+#include <vector>
+#include <type_traits>
+#include <algorithm>
+#include <utility>
+#include <iostream>
+#include <string>
+#include "nnvm/op_attr_types.h"
+#include "nnvm/graph_attr_types.h"
+#include "nnvm/graph.h"
+#include "nnvm/pass.h"
+
+namespace mxnet {
+
+/*!
+ * \brief A dynamic sized array data structure that is optimized for storing
+ *        small number of elements with same type.
+ *
+ *  Data will be stored in stack when number of elements is small.
+ *  It is suitable to hold shape of Tensor.
+ *
+ * \tparam ValueType The type of data stored inside tuple.
+ * \sa TShape
+ */
+template<typename ValueType>
+class Tuple {
+ public:
+  /*! \brief default constructor */
+  Tuple() = default;
+  /*! \brief destructor */
+  inline ~Tuple() {
+    delete [] data_heap_;
+  }
+  /*!
+   * \brief copy constructor from another tuple
+   * \param s the source tuple
+   */
+  inline Tuple(const Tuple<ValueType>& s) {
+    this->assign(s.begin(), s.end());
+  }
+  /*!
+   * \brief constructor from initializer list
+   * \param init the initializer_list
+   */
+  inline Tuple(std::initializer_list<ValueType> init) {
+    this->assign(init.begin(), init.end());
+  }
+  /*!
+   * \brief constructor from vector
+   * \param init the vector
+   */
+  inline Tuple(std::vector<ValueType> init) {  // NOLINT(runtime/explicit)
+    this->assign(init.begin(), init.end());
+  }
+  /*!
+   * \brief move constructor from Tuple
+   * \param src the source shape
+   */
+
+  inline Tuple(Tuple<ValueType>&& src) {   // NOLINT(runtime/explicit)
+    this->swap(src);
+  }
+  /*!
+   * \brief construct the Tuple from content of iterator
+   * \param begin the beginning of iterator
+   * \param end end the end of the iterator
+   * \tparam RandomAccessIterator iterator type
+   */
+  template<typename RandomAccessIterator>
+  inline Tuple(RandomAccessIterator begin,
+               RandomAccessIterator end) {
+    this->assign(begin, end);
+  }
+  /*!
+   * \brief Assign content to tuple from iterator.
+   * \param begin the beginning of iterator
+   * \param end end the end of the iterator
+   * \tparam RandomAccessIterator iterator type
+   */
+  template<typename RandomAccessIterator>
+  inline void assign(RandomAccessIterator begin,
+                     RandomAccessIterator end) {
+    this->SetDim(end - begin);
+    std::copy(begin, end, this->begin());
+  }
+  /*!
+   * \brief Swap current object with other
+   * \param other another object to be swapped.
+   */
+  inline void swap(Tuple<ValueType>& other) {  // NOLINT(*)
+    std::swap(ndim_, other.ndim_);
+    std::swap(num_heap_allocated_, other.num_heap_allocated_);
+    std::swap(data_stack_, other.data_stack_);
+    std::swap(data_heap_, other.data_heap_);
+  }
+  /*!
+   * \brief assignment from another tuple.
+   * \param src source tuple
+   * \return reference of self
+   */
+  inline Tuple<ValueType>& operator=(const Tuple<ValueType>& src) {
+    this->assign(src.begin(), src.end());
+    return *this;
+  }
+  /*!
+   * \brief assignment from rvalue of another tuple.
+   * \param src source tuple
+   * \return reference of self
+   */
+  inline Tuple<ValueType>& operator=(Tuple<ValueType>&& src) {
+    Tuple<ValueType>(std::move(src)).swap(*this);
+    return *this;
+  }
+  /*!
+   * \brief assignment from initializer list
+   * \param init the source initializer list
+   * \return reference of self
+   */
+  inline Tuple<ValueType> &operator=(std::initializer_list<ValueType> init) {
+    this->assign(init.begin(), init.end());
+    return *this;
+  }
+  /*!
+   * \return whether two tuple equals
+   * \param s the tuple to compare against
+   */
+  inline bool operator==(const Tuple<ValueType> &s) const {
+    if (ndim_ != s.ndim_) return false;
+    return std::equal(begin(), end(), s.begin());
+  }
+  /*!
+   * \return whether two tuple not equal
+   * \param s the tuple to compare against
+   */
+  inline bool operator!=(const Tuple<ValueType> &s) const {
+    return !(*this == s);
+  }
+  /*! \return the begin data pointer to content of the tuple */
+  inline const ValueType *begin() const {
+    return ndim_ <= kStackCache ? data_stack_ : data_heap_;
+  }
+  /*! \return the begin data pointer to content of the tuple */
+  inline ValueType *begin() {
+    return ndim_ <= kStackCache ? data_stack_ : data_heap_;
+  }
+  /*! \return the data pointer to end of the tuple */
+  inline const ValueType* end() const {
+    return ndim_ <= kStackCache ? (data_stack_ + ndim_): (data_heap_ + ndim_);
+  }
+  /*! \return the data pointer to end the tuple */
+  inline ValueType* end() {
+    return ndim_ <= kStackCache ? (data_stack_ + ndim_): (data_heap_ + ndim_);
+  }
+  /*! \return number of dimension of the tuple */
+  inline uint32_t ndim() const {
+    return ndim_;
+  }
+  /*!
+   * \brief get corresponding index
+   * \param i dimension index
+   * \return the corresponding dimension size
+   */
+  inline ValueType& operator[](size_t i) {
+    return begin()[i];
+  }
+  /*!
+   * \brief get corresponding index
+   * \param i dimension index
+   * \return the corresponding dimension size
+   */
+  inline const ValueType& operator[](size_t i) const {
+    return begin()[i];
+  }
+  /*!
+   * \brief Save Tuple to JSON.
+   * \param writer JSONWriter
+   */
+  inline void Save(dmlc::JSONWriter* writer) const {
+    std::vector<ValueType> tmp(begin(), end());
+    writer->Write(tmp);
+  }
+  /*!
+   * \brief Load Tuple from JSON.
+   * \param reader JSONReader
+   */
+  inline void Load(dmlc::JSONReader* reader) {
+    std::vector<ValueType> tmp;
+    reader->Read(&tmp);
+    this->assign(tmp.begin(), tmp.end());
+  }
+  /*!
+   * \brief allow output string of tuple to ostream
+   * \param os the output stream
+   * \param t the tuple
+   * \return the ostream
+   */
+  friend std::ostream &operator<<(std::ostream &os, const Tuple<ValueType> &t) {
+    os << '[';
+    const ValueType* begin = t.begin();
+    const ValueType* end = t.end();
+    for (const ValueType* it = begin; it != end; ++it) {
+      if (it != begin) os << ',';
+      os << *it;
+    }
+    os << ']';
+    return os;
+  }
+  /*!
+   * \brief read tuple from the istream
+   * \param is the input stream
+   * \param t The tuple
+   * \return the istream
+   */
+  friend std::istream &operator>>(std::istream &is, Tuple<ValueType> &t) {
+    // get (
+    while (true) {
+      char ch = is.peek();
+      if (isdigit(ch) || ch == '-') {
+        ValueType idx;
+        if (is >> idx) {
+          t.assign(&idx, &idx + 1);
+        }
+        return is;
+      }
+      is.get();
+      if (ch == '(' || ch == '[') break;
+      if (!isspace(ch)) {
+        is.setstate(std::ios::failbit);
+        return is;
+    }
+    }
+    // Handle empty tuple
+    while (isspace(is.peek())) {
+      is.get();
+    }
+    if (is.peek() == ')' || is.peek() == ']') {
+      is.get();
+      return is;
+    }
+    // Handle non-empty tuple
+    ValueType idx;
+    std::vector<ValueType> tmp;
+    while (is >> idx) {
+      tmp.push_back(idx);
+      char ch;
+      do {
+        ch = is.get();
+      } while (isspace(ch));
+      if (std::is_integral<ValueType>::value && ch == 'L') {
+        ch = is.get();
+      }
+      if (ch == ',') {
+        while (true) {
+          ch = is.peek();
+          if (isspace(ch)) {
+            is.get(); continue;
+          }
+          if (ch == ')' || ch == ']') {
+            is.get(); break;
+          }
+          break;
+        }
+        if (ch == ')' || ch == ']') break;
+      } else if (ch == ')' || ch == ']') {
+        break;
+      } else {
+        is.setstate(std::ios::failbit);
+        return is;
+      }
+    }
+    t.assign(tmp.begin(), tmp.end());
+    return is;
+  }
+  /*!
+   * \brief save the content into binary stream
+   * \param strm the output stream
+   * \tparam DType data type that save to
+   * \tparam TStream any stream type that have write
+   */
+  template<typename DType = ValueType, typename TStream>
+  inline void Save(TStream *strm) const;
+  /*!
+   * \brief load the content from binary stream
+   * \param strm the output stream
+   * \tparam DType data type that load from
+   * \tparam TStream any stream type that have write
+   * \return whether the load is successful
+   */
+  template<typename DType = ValueType, typename TStream>
+  inline bool Load(TStream *strm);
+
+ protected:
+  // stack cache size
+  static const uint32_t kStackCache = 4;
+  /*! \brief number of dimension of the tuple */
+  uint32_t ndim_{0};
+  /*! \brief number of cells allocated in data_heap_ */
+  uint32_t num_heap_allocated_{0};
+  /*! \brief in stack space used to store shape when it is small */
+  ValueType data_stack_[kStackCache];
+  /*! \brief space to store shape when dimension is big*/
+  ValueType* data_heap_{nullptr};
+  // internal function to change the dimension
+  inline void SetDim(uint32_t ndim) {
+    if (ndim > kStackCache &&
+        ndim > num_heap_allocated_) {
+      delete [] data_heap_;
+      data_heap_ = new ValueType[ndim];
+      num_heap_allocated_ = ndim;
+    }
+    ndim_ = ndim;
+  }
+};
+
+/*!
+ * \brief A Shape class that is used to represent shape of each tensor.
+ */
+class TShape : public Tuple<dim_t> {
+ public:
+  /*! \brief default constructor */
+  TShape() = default;
+  /*!
+   * constructor to construct a shape with all 1.
+   * \param ndim the number of dimension
+   */
+  inline TShape(uint32_t ndim) {  // NOLINT(*)
+    this->SetDim(ndim);
+    std::fill_n(begin(), ndim, 1);
+  }
+  /*!
+   * \brief copy constructor of TShape
+   * \param s source shape.
+   */
+  inline TShape(const Tuple<dim_t>& s) { // NOLINT(*)
+    this->assign(s.begin(), s.end());
+  }
+  /*!
+   * \brief constructor from initializer list
+   * \param init the initializer_list
+   */
+  inline TShape(std::initializer_list<dim_t> init) {
+    this->assign(init.begin(), init.end());
+  }
+  /*!
+   * \brief move constructor.
+   * \param s source shape.
+   */
+  inline TShape(Tuple<dim_t>&& s) {  // NOLINT(*)
+    this->swap(s);
+  }
+  /*!
+   * \brief construct the Tuple from content of iterator
+   * \param begin the beginning of iterator
+   * \param end end the end of the iterator
+   * \tparam RandomAccessIterator iterator type
+   */
+  template<typename RandomAccessIterator>
+  inline TShape(RandomAccessIterator begin,
+                RandomAccessIterator end) {
+    this->assign(begin, end);
+  }
+  /*!
+   * \brief assignment function from tshape
+   * \param src source shape.
+   * \return self.
+   */
+  inline TShape& operator=(const Tuple<dim_t>& src) {
+    this->assign(src.begin(), src.end());
+    return *this;
+  }
+  /*!
+   * \brief move assignment function from tshape
+   * \param src source shape.
+   * \return self.
+   */
+  inline TShape& operator=(Tuple<dim_t>&& src) {  // NOLINT(*)
+    TShape(std::move(src)).swap(*this);  // NOLINT(*)
+    return *this;
+  }
+  /*! \return total number of elements in the shape */
+  inline size_t Size() const {
+    dim_t size = 1;
+    const dim_t* start = begin(), *fin = end();
+    for (const dim_t* it = start; it != fin; ++it) {
+      size *= *it;
+    }
+    return size;
+  }
+  /*!
+   * \return product shape in [dimstart,dimend)
+   * \param dimstart start dimension
+   * \param dimend end dimension
+   */
+  inline size_t ProdShape(int dimstart, int dimend) const {
+    dim_t num = 1;
+    const dim_t *d = this->data();
+    for (int i = dimstart; i < dimend; ++i) {
+      num *= d[i];
+    }
+    return num;
+  }
+  /*! \return the begin data pointer to content of the tuple */
+  inline const dim_t *data() const {
+    return begin();
+  }
+  /*! \return the begin data pointer to content of the tuple */
+  inline dim_t *data() {
+    return begin();
+  }
+#ifdef MSHADOW_XINLINE
+  template<int dim>
+  inline TShape(const mshadow::Shape<dim> &s) {// NOLINT(*)
+    this->assign(s.shape_, s.shape_ + dim);
+  }
+
+  template<int dim>
+  inline TShape(mshadow::Shape<dim> &&s) {// NOLINT(*)
+    this->assign(s.shape_, s.shape_ + dim);
+  }
+  /*!
+   * \brief assignment from shape
+   * \param shape source shape
+   * \tparam dim shape dimension
+   * \return reference of self
+   */
+  template<int dim>
+  inline TShape &operator=(const mshadow::Shape<dim> &shape) {
+    this->assign(shape.shape_, shape.shape_ + dim);
+    return *this;
+  }
+  /*!
+   * \brief get the shape of tensor specifying dim
+   * \return the shape requested
+   * \tparam dim dimension of the tensor
+   */
+  template<int dim>
+  inline mshadow::Shape<dim> get() const {
+    CHECK_EQ(dim, static_cast<int>(ndim()))
+        << "dimension do not match target dimension " << dim << " vs " << ndim();
+    const dim_t *d = this->data();
+    mshadow::Shape<dim> s;
+    for (int i = 0; i < dim; ++i) {
+      s[i] = d[i];
+    }
+    return s;
+  }
+  /*!
+   * flatten the higher dimension to second dimension, return a 2D shape
+   * \return the flat 2d shape
+   */
+  inline mshadow::Shape<2> FlatTo2D(void) const {
+    mshadow::Shape<2> s;
+    if (ndim() == 0) return mshadow::Shape2(0, 0);
+    const dim_t *d = this->data();
+    s.shape_[1] = d[ndim() - 1];
+    dim_t ymax = 1;
+    for (size_t i = 1; i < ndim(); ++i) {
+      ymax *= d[i - 1];
+    }
+    s.shape_[0] = ymax;
+    return s;
+  }
+  /*!
+   * flatten the shape into three parts: [0, axis_begin), [axis_begin, axis_end], (axis_end, ndim)
+   * \param axis_begin The beginning axis specified.
+   * \param axis_end The ending axis specified.
+   * \return the flat 3d shape
+   */
+  inline mshadow::Shape<3> FlatTo3D(size_t axis_begin, size_t axis_end) const {
+    CHECK(axis_end >= axis_begin);
+    mshadow::Shape<3> s;
+    if (ndim() == 0) return mshadow::Shape3(0, 0, 0);
+    const dim_t *d = this->data();
+    s.shape_[0] = 1;
+    s.shape_[1] = 1;
+    s.shape_[2] = 1;
+
+    for (size_t i = 0; i < axis_begin; ++i) {
+      s.shape_[0] *= d[i];
+    }
+    for (size_t i = axis_begin; i <= axis_end; ++i) {
+      s.shape_[1] *= d[i];
+    }
+    for (size_t i = axis_end + 1; i < ndim(); ++i) {
+      s.shape_[2] *= d[i];
+    }
+    return s;
+  }
+  /*!
+   * flatten the axis before and after the specified axis, so it becomes 3D tensor
+   * \param axis The axis specified.
+   * \return the flat 3d shape
+   */
+  inline mshadow::Shape<3> FlatTo3D(size_t axis) const {
+    return FlatTo3D(axis, axis);
+  }
+  inline bool operator==(const TShape &s) const {
+    if (ndim() != s.ndim()) return false;
+    return std::equal(begin(), end(), s.begin());
+  }
+  inline bool operator!=(const TShape &s) const {
+    return !(*this == s);
+  }
+  /*!
+   * \return whether two shape equals
+   * \param s the shape to compare against
+   * \tparam dim dimension of the shape
+   */
+  template<int dim>
+  inline bool operator==(const mshadow::Shape<dim> &s) const {
+    if (ndim_ != dim) return false;
+    const dim_t *d = dim <= kStackCache ? data_stack_ : data_heap_;
+    for (size_t i = 0; i < dim; ++i) {
+      if (d[i] != s.shape_[i]) return false;
+    }
+    return true;
+  }
+  /*!
+   * \return whether two shape not equals
+   * \param s the shape to compare against
+   * \tparam dim dimension of the shape
+   */
+  template<int dim>
+  inline bool operator!=(const mshadow::Shape<dim> &s) const {
+    return !(*this == s);
+  }
+#endif
+};
+
+/*! \brief helper function to cast type of container elements */
+template<typename SrcIter, typename DstIter>
+inline DstIter ShapeTypeCast(const SrcIter begin,
+                             const SrcIter end,
+                             DstIter dst_begin) {
+  typedef typename std::iterator_traits<SrcIter>::value_type SrcDType;
+  typedef typename std::iterator_traits<DstIter>::value_type DstDType;
+  auto cast = [](const SrcDType& dim) { return static_cast<DstDType>(dim); };
+  return std::transform(begin, end, dst_begin, cast);
+}
+
+/*! \brief helper function to transform a container to TShape with type cast */
+template<typename SrcIter>
+inline TShape ShapeTypeCast(const SrcIter begin, const SrcIter end) {
+  size_t ndim = std::distance(begin, end);
+  TShape res(ndim);
+  ShapeTypeCast(begin, end, res.begin());
+  return res;
+}
+
+/*! \tparam ValueType The type of data stored inside tuple. */
+template<typename ValueType>
+template<typename DType, typename TStream>
+inline void Tuple<ValueType>::Save(TStream *strm) const {
+  strm->Write(&ndim_, sizeof(ndim_));
+  if (typeid(DType) == typeid(ValueType)) {
+    strm->Write(begin(), sizeof(ValueType) * ndim_);
+  } else {
+    std::vector<DType> buffer(ndim_);
+    ShapeTypeCast(begin(), end(), buffer.data());
+    strm->Write(buffer.data(), sizeof(DType) * ndim_);
+  }
+}
+
+/*! \tparam ValueType The type of data stored inside tuple. */
+template<typename ValueType>
+template<typename DType, typename TStream>
+inline bool Tuple<ValueType>::Load(TStream *strm) {
+  if (strm->Read(&ndim_, sizeof(ndim_)) != sizeof(ndim_)) return false;
+  this->SetDim(ndim_);
+  size_t nread = sizeof(DType) * ndim_;
+  if (typeid(DType) == typeid(ValueType)) {
+    if (strm->Read(begin(), nread) != nread) return false;
+  } else {
+    std::vector<DType> buffer(ndim_);
+    if (strm->Read(buffer.data(), nread) != nread) return false;
+    ShapeTypeCast(buffer.begin(), buffer.end(), begin());
+  }
+  return true;
+}
+
+}  // namespace mxnet
+
+namespace std {
+/*! \brief hash function for Tuple. */
+template<typename T>
+struct hash<mxnet::Tuple<T> > {
+  /*! \brief hash a Tuple into unsigned int */
+  size_t operator()(const mxnet::Tuple<T>& val) const {
+    std::hash<uint32_t> hash_uint;
+    size_t res = hash_uint(val.ndim());
+    for (uint32_t i = 0; i < val.ndim(); ++i) {
+      res = dmlc::HashCombine(res, val[i]);
+    }
+    return res;
+  }
+};
+
+/*! \brief hash function for TShape. */
+template<>
+struct hash<mxnet::TShape> {
+  /*! \brief hash a TShape into unsigned int */
+  size_t operator()(const mxnet::TShape& val) const {
+    std::hash<uint32_t> hash_uint;
+    size_t res = hash_uint(val.ndim());
+    for (uint32_t i = 0; i < val.ndim(); ++i) {
+      res = dmlc::HashCombine(res, val[i]);
+    }
+    return res;
+  }
+};
+}  // namespace std
+
+namespace dmlc {
+/*! \brief description for optional TShape */
+DMLC_DECLARE_TYPE_NAME(optional<mxnet::TShape>, "Shape or None");
+// avoid low version of MSVC
+#if !defined(_MSC_VER)
+template<typename T>
+struct type_name_helper<mxnet::Tuple<T> > {
+  static inline std::string value() {
+    return "tuple of <" + type_name<T>() + ">";
+  }
+};
+#endif
+}  // namespace dmlc
+
+namespace mxnet {
+/*!
+ * \brief The result holder of shape of each NodeEntry in the graph.
+ * \note Stored under graph.attrs["shape"], provided by Pass "InferShape"
+ *
+ * \code
+ *  Graph g = ApplyPass(src_graph, "InferShape");
+ *  const ShapeVector& shapes = g.GetAttr<ShapeVector>("shape");
+ *  // get shape by entry id
+ *  TShape entry_shape = shapes[g.indexed_graph().entry_id(my_entry)];
+ * \endcode
+ *
+ * \sa FInferShape
+ */
+using ShapeVector = std::vector<mxnet::TShape>;
+
+/*!
+ * \brief Shape inference function.
+ *  Update the shapes given the input shape information.
+ *  TShape.ndim() == -1 means the shape is still unknown.
+ *
+ * \note Register under "FInferShape",
+ *  by default do not update any shapes.
+ *
+ *  FInferShape is needed by shape inference
+ */
+using FInferShape = nnvm::FInferNodeEntryAttr<mxnet::TShape>;
+
+}  // namespace mxnet
+
+#endif  // MXNET_TUPLE_H_
diff --git a/plugin/caffe/caffe_blob.cc b/plugin/caffe/caffe_blob.cc
index 4d655f32dd01..6a75439f3e4e 100644
--- a/plugin/caffe/caffe_blob.cc
+++ b/plugin/caffe/caffe_blob.cc
@@ -72,7 +72,7 @@ void SetDataGradToBlob<mshadow::gpu, double>(caffeMemoryTypes memType,
     MXCAFFEBLOB(*blob, double)->set_gpu_diff(data_ptr);
 }
 
-TShape Vector2TShape(const std::vector<int> &vec_int) {
+mxnet::TShape Vector2TShape(const std::vector<int> &vec_int) {
   std::vector<mshadow::index_t> vec;
   for (uint32_t i = 0; i < vec_int.size(); ++i)
     vec.push_back(vec_int[i]);
@@ -82,7 +82,7 @@ TShape Vector2TShape(const std::vector<int> &vec_int) {
   return {vec.begin(), vec.end()};
 }
 
-std::vector<int> TShape2Vector(const TShape &tshape) {
+std::vector<int> TShape2Vector(const mxnet::TShape &tshape) {
   std::vector<int> s;
   for (uint32_t i =0 ; i < tshape.ndim(); ++i)
     s.push_back(tshape[i]);
diff --git a/plugin/caffe/caffe_blob.h b/plugin/caffe/caffe_blob.h
index a54c5c81ff47..6243b5dc8c88 100644
--- a/plugin/caffe/caffe_blob.h
+++ b/plugin/caffe/caffe_blob.h
@@ -39,8 +39,8 @@ namespace caffe {
 // Declare Memory Type for Caffe blob
 enum caffeMemoryTypes {Data, Grad, Non};
 
-TShape Vector2TShape(const std::vector<int> &vec_int);
-std::vector<int> TShape2Vector(const TShape &tshape);
+mxnet::TShape Vector2TShape(const std::vector<int> &vec_int);
+std::vector<int> TShape2Vector(const mxnet::TShape &tshape);
 
 // implementation of tensor to blob, called by TensorToBlob
 template<typename Device, typename Dtype>
diff --git a/plugin/caffe/caffe_loss-inl.h b/plugin/caffe/caffe_loss-inl.h
index 60b03b1d923b..98c714612dca 100644
--- a/plugin/caffe/caffe_loss-inl.h
+++ b/plugin/caffe/caffe_loss-inl.h
@@ -222,9 +222,9 @@ class CaffeLossProp : public OperatorProperty {
   }
 
   /*brief Set up caffeop to infer output shape*/
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     using ::caffe::Blob;
     using std::vector;
@@ -236,7 +236,7 @@ class CaffeLossProp : public OperatorProperty {
     vector<Blob<float> *> bot_blobs, top_blobs;
 
     for (int i = 0; i < param_.num_data; ++i) {
-      TShape tshape = (*in_shape)[i];
+      mxnet::TShape tshape = (*in_shape)[i];
       if (tshape.ndim() == 0) return false;
       auto blob_ptr = new Blob<float>();
       blob_ptr->Reshape(caffe::TShape2Vector(tshape));
@@ -251,7 +251,7 @@ class CaffeLossProp : public OperatorProperty {
     // Initialize out shapes
     out_shape->clear();
     for (auto blob : top_blobs) {
-      TShape tshape = caffe::Vector2TShape(blob->shape());
+      mxnet::TShape tshape = caffe::Vector2TShape(blob->shape());
       out_shape->push_back(tshape);
     }
 
@@ -288,7 +288,7 @@ class CaffeLossProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
 
diff --git a/plugin/caffe/caffe_loss.cc b/plugin/caffe/caffe_loss.cc
index 5ce8bb247e86..47424d1cad80 100644
--- a/plugin/caffe/caffe_loss.cc
+++ b/plugin/caffe/caffe_loss.cc
@@ -47,10 +47,10 @@ Operator *CreateOp<cpu>(CaffeLossParam param, int dtype) {
 }
 
 // DO_BIND_DISPATCH comes from static_operator_common.h
-Operator *CaffeLossProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator *CaffeLossProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                      std::vector<int> *in_type) const {
   std::vector<int> out_type, aux_type;
-  std::vector<TShape> out_shape, aux_shape;
+  mxnet::ShapeVector out_shape, aux_shape;
   out_type.resize(this->ListOutputs().size());
   out_shape.resize(this->ListOutputs().size());
   aux_type.resize(this->ListAuxiliaryStates().size());
diff --git a/plugin/caffe/caffe_op-inl.h b/plugin/caffe/caffe_op-inl.h
index 2c1c9bac170a..b4ab0926199c 100644
--- a/plugin/caffe/caffe_op-inl.h
+++ b/plugin/caffe/caffe_op-inl.h
@@ -274,9 +274,9 @@ class CaffeOpProp : public OperatorProperty {
    * \brief Set up caffeOp_ to infer weights & output shape
    * \brief Initialize param_'s in & out dims
    */
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     if (caffeOp_ == NULL)
       caffeOp_ = caffe::LayerRegistry<float>::CreateLayer(param_.prototxt);
     using namespace mshadow;
@@ -287,7 +287,7 @@ class CaffeOpProp : public OperatorProperty {
     vector<Blob<float> *> bot_blobs, top_blobs;
 
     for (int i = 0; i < param_.num_data; ++i) {
-      TShape tshape = (*in_shape)[i];
+      mxnet::TShape tshape = (*in_shape)[i];
       if (tshape.ndim() == 0) return false;
       auto blob_ptr = new Blob<float>();
       blob_ptr->Reshape(caffe::TShape2Vector(tshape));
@@ -302,13 +302,13 @@ class CaffeOpProp : public OperatorProperty {
     // Set weight shape
     CHECK_EQ(param_.num_weight, caffeOp_->blobs().size());
     for (int i = 0; i < param_.num_weight ; ++i) {
-      TShape tshape = caffe::Vector2TShape(caffeOp_->blobs()[i]->shape());
+      mxnet::TShape tshape = caffe::Vector2mxnet::TShape(caffeOp_->blobs()[i]->shape());
       SHAPE_ASSIGN_CHECK(*in_shape, i + param_.num_data, tshape);
     }
     // Initialize out shapes
     out_shape->clear();
     for (auto blob : top_blobs) {
-      TShape tshape = caffe::Vector2TShape(blob->shape());
+      mxnet::TShape tshape = caffe::Vector2mxnet::TShape(blob->shape());
       out_shape->push_back(tshape);
     }
 
@@ -334,7 +334,7 @@ class CaffeOpProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  private:
diff --git a/plugin/caffe/caffe_op.cc b/plugin/caffe/caffe_op.cc
index 9db9df04068d..715ae0b82d8e 100644
--- a/plugin/caffe/caffe_op.cc
+++ b/plugin/caffe/caffe_op.cc
@@ -47,10 +47,10 @@ Operator* CreateOp<cpu>(CaffeOpParam param, int dtype) {
 }
 
 // DO_BIND_DISPATCH comes from static_operator_common.h
-Operator *CaffeOpProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator *CaffeOpProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                      std::vector<int> *in_type) const {
   std::vector<int> out_type, aux_type;
-  std::vector<TShape> out_shape, aux_shape;
+  mxnet::ShapeVector out_shape, aux_shape;
   out_type.resize(this->ListOutputs().size());
   out_shape.resize(this->ListOutputs().size());
   aux_type.resize(this->ListAuxiliaryStates().size());
diff --git a/plugin/opencv/cv_api.cc b/plugin/opencv/cv_api.cc
index 53eb8524c7cf..b0915fd40579 100644
--- a/plugin/opencv/cv_api.cc
+++ b/plugin/opencv/cv_api.cc
@@ -93,7 +93,7 @@ MXNET_DLL int MXCVImdecode(const unsigned char *img, const mx_uint len,
   } else {
     LOG(FATAL) << "Only supports png and jpg.";
   }
-  NDArray ndout(TShape(dims, dims+3), Context::CPU(), true, mshadow::kUint8);
+  NDArray ndout(mxnet::TShape(dims, dims+3), Context::CPU(), true, mshadow::kUint8);
   unsigned char *img_cpy = new unsigned char[len];
   memcpy(img_cpy, img, sizeof(unsigned char)*len);
   Engine::Get()->PushSync([=](RunContext ctx){
@@ -124,7 +124,7 @@ MXNET_DLL int MXCVResize(NDArrayHandle src, const mx_uint w, const mx_uint h,
   CHECK_EQ(ndsrc.dtype(), mshadow::kUint8);
 
   mx_uint dims[3] = {h, w, ndsrc.shape()[2]};
-  NDArray ndout(TShape(dims, dims+3), Context::CPU(), true, mshadow::kUint8);
+  NDArray ndout(mxnet::TShape(dims, dims+3), Context::CPU(), true, mshadow::kUint8);
 
   Engine::Get()->PushSync([=](RunContext ctx){
       ndout.CheckAndAlloc();
@@ -156,7 +156,7 @@ MXNET_DLL int MXCVcopyMakeBorder(NDArrayHandle src,
 
   int h = ndsrc.shape()[0], w = ndsrc.shape()[1], c = ndsrc.shape()[2];
   mx_uint dims[3] = {top+h+bot, left+w+right, c};
-  NDArray ndout(TShape(dims, dims+3), Context::CPU(), true, mshadow::kUint8);
+  NDArray ndout(mxnet::TShape(dims, dims+3), Context::CPU(), true, mshadow::kUint8);
 
   Engine::Get()->PushSync([=](RunContext ctx){
       ndout.CheckAndAlloc();
diff --git a/plugin/sframe/iter_sframe.cc b/plugin/sframe/iter_sframe.cc
index 9f09916b8166..6a6b03f9c2fb 100644
--- a/plugin/sframe/iter_sframe.cc
+++ b/plugin/sframe/iter_sframe.cc
@@ -50,8 +50,8 @@ struct SFrameParam : public dmlc::Parameter<SFrameParam> {
   std::string path_sframe;
   std::string data_field;
   std::string label_field;
-  TShape data_shape;
-  TShape label_shape;
+  mxnet::TShape data_shape;
+  mxnet::TShape label_shape;
   DMLC_DECLARE_PARAMETER(SFrameParam) {
     DMLC_DECLARE_FIELD(path_sframe).set_default("")
     .describe("Dataset Param: path to image dataset sframe");
diff --git a/plugin/torch/torch_criterion-inl.h b/plugin/torch/torch_criterion-inl.h
index e0687ab39bff..2138bd8f1335 100644
--- a/plugin/torch/torch_criterion-inl.h
+++ b/plugin/torch/torch_criterion-inl.h
@@ -42,13 +42,13 @@ namespace mxnet {
 namespace op {
 struct TorchCriterionParam : public dmlc::Parameter<TorchCriterionParam> {
   std::string lua_string;
-  TShape label_shape;
+  mxnet::TShape label_shape;
   float grad_scale;
   DMLC_DECLARE_PARAMETER(TorchCriterionParam) {
     DMLC_DECLARE_FIELD(lua_string)
     .describe("lua string that is called to generate the torch criterion object");
     DMLC_DECLARE_FIELD(label_shape)
-    .set_default(TShape())
+    .set_default(mxnet::TShape())
     .enforce_nonzero()
     .describe("Shape of label (without batch size).");
     DMLC_DECLARE_FIELD(grad_scale)
@@ -183,18 +183,18 @@ class TorchCriterionProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 2);
-    const TShape &dshape = in_shape->at(0);
+    const mxnet::TShape &dshape = in_shape->at(0);
     if (dshape.ndim() == 0) return false;
     std::vector<index_t> lshape;
     lshape.push_back(dshape[0]);
     lshape.insert(lshape.end(), param_.label_shape.data(),
       param_.label_shape.data() +  param_.label_shape.ndim());
-    TShape shape(lshape.begin(), lshape.end());
+    mxnet::TShape shape(lshape.begin(), lshape.end());
     SHAPE_ASSIGN_CHECK(*in_shape, 1, shape);
     out_shape->clear();
     out_shape->push_back(Shape1(dshape[0]));
diff --git a/plugin/torch/torch_module-inl.h b/plugin/torch/torch_module-inl.h
index 7fb0440aa575..386f0e31fb43 100644
--- a/plugin/torch/torch_module-inl.h
+++ b/plugin/torch/torch_module-inl.h
@@ -347,9 +347,9 @@ class TorchModuleProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     if (torchState_ == nullptr) {
       this->InitTorchState();
     }
@@ -397,7 +397,7 @@ class TorchModuleProp : public OperatorProperty {
         THFloatTensor* param = reinterpret_cast<THFloatTensor*>(luaT_toudata(L, -1,
           TorchTensor::TensorType(mshadow::cpu::kDevMask)));
         long int* size = param->size;  // NOLINT(*)
-        (*in_shape)[index++] = TShape(size, size + THFloatTensor_nDimension(param));
+        (*in_shape)[index++] = mxnet::TShape(size, size + THFloatTensor_nDimension(param));
         lua_pop(L, 1);
       }
       lua_pop(L, 2);
@@ -408,7 +408,7 @@ class TorchModuleProp : public OperatorProperty {
       THFloatTensor* output = reinterpret_cast<THFloatTensor*>(luaT_toudata(L, -1,
         TorchTensor::TensorType(mshadow::cpu::kDevMask)));
       long int* size = output->size;  // NOLINT(*)
-      (*out_shape)[0] = TShape(size, size + THFloatTensor_nDimension(output));
+      (*out_shape)[0] = mxnet::TShape(size, size + THFloatTensor_nDimension(output));
     } else {
       for (uint32_t data_index = 0; data_index < param_.num_outputs; ++data_index) {
         lua_pushnil(L);
@@ -417,7 +417,7 @@ class TorchModuleProp : public OperatorProperty {
           THFloatTensor* out = reinterpret_cast<THFloatTensor*>(luaT_toudata(L, -1,
             TorchTensor::TensorType(mshadow::cpu::kDevMask)));
           long int* size = out->size;  // NOLINT(*)
-          (*out_shape)[index++] = TShape(size, size + THFloatTensor_nDimension(out));
+          (*out_shape)[index++] = mxnet::TShape(size, size + THFloatTensor_nDimension(out));
         }
       }
     }
diff --git a/plugin/warpctc/warpctc-inl.h b/plugin/warpctc/warpctc-inl.h
index 5a540c57940a..37677d21fd14 100644
--- a/plugin/warpctc/warpctc-inl.h
+++ b/plugin/warpctc/warpctc-inl.h
@@ -247,14 +247,14 @@ class WarpCTCProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 2) << "Input:[data, label]";
-    const TShape &dshape = in_shape->at(0);
+    const mxnet::TShape &dshape = in_shape->at(0);
     if (dshape.ndim() == 0) return false;
-    TShape label_shape(dshape.ndim() - 1);
+    mxnet::TShape label_shape(dshape.ndim() - 1);
     label_shape[0] = param_.label_length * (dshape[0] / param_.input_length);
     SHAPE_ASSIGN_CHECK(*in_shape, warpctc_enum::kLabel, label_shape);
 
@@ -276,7 +276,7 @@ class WarpCTCProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index 7e03acccdfae..5a7329acaeab 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -170,7 +170,7 @@ int MXNDArrayCreate(const mx_uint *shape,
                     NDArrayHandle *out) {
   API_BEGIN();
   *out = new NDArray(
-      TShape(shape, shape + ndim),
+      mxnet::TShape(shape, shape + ndim),
       Context::Create(static_cast<Context::DeviceType>(dev_type), dev_id),
       delay_alloc != 0);
   API_END();
@@ -185,7 +185,7 @@ int MXNDArrayCreateEx(const mx_uint *shape,
                     NDArrayHandle *out) {
   API_BEGIN();
   *out = new NDArray(
-      TShape(shape, shape + ndim),
+      mxnet::TShape(shape, shape + ndim),
       Context::Create(static_cast<Context::DeviceType>(dev_type), dev_id),
       delay_alloc != 0,
       dtype);
@@ -206,7 +206,7 @@ int MXNDArrayCreateSparseEx(int storage_type,
                     NDArrayHandle *out) {
   API_BEGIN();
   std::vector<int> aux_types;
-  std::vector<TShape> aux_shapes;
+  mxnet::ShapeVector aux_shapes;
   auto shape_start = aux_shape;
   for (size_t i = 0; i < num_aux; i++) {
     // types
@@ -217,7 +217,7 @@ int MXNDArrayCreateSparseEx(int storage_type,
   }
   *out = new NDArray(
       NDArrayStorageType(storage_type),
-      TShape(shape, shape + ndim),
+      mxnet::TShape(shape, shape + ndim),
       Context::Create(static_cast<Context::DeviceType>(dev_type), dev_id),
       delay_alloc != 0,
       dtype, aux_types, aux_shapes);
@@ -433,7 +433,7 @@ MXNET_DLL int MXNDArrayReshape(NDArrayHandle handle,
   NDArray *ptr = new NDArray();
   API_BEGIN();
   NDArray *arr = static_cast<NDArray*>(handle);
-  TShape new_shape(dims, dims+ndim);
+  mxnet::TShape new_shape(dims, dims+ndim);
   int size = 1;
   int pos = -1;
   for (int i = 0; i < ndim; ++i) {
@@ -473,7 +473,7 @@ MXNET_DLL int MXNDArrayReshape64(NDArrayHandle handle,
   nnvm::Tuple<dim_t> shape(dims, dims+ndim);
   CHECK_GT(arr->shape().Size(), 0) << "Source ndarray's shape is undefined. Input shape: "
     << arr->shape();
-  TShape new_shape = mxnet::op::InferReshapeShape(shape, arr->shape(), reverse);
+  mxnet::TShape new_shape = mxnet::op::InferReshapeShape(shape, arr->shape(), reverse);
   *ptr = arr->ReshapeWithRecord(new_shape);
   *out = ptr;
   API_END_HANDLE_ERROR(delete ptr);
@@ -498,7 +498,7 @@ int MXNDArrayGetShape(NDArrayHandle handle,
   API_BEGIN();
   NDArray *arr = static_cast<NDArray*>(handle);
   if (!arr->is_none()) {
-    const TShape &s = arr->shape();
+    const mxnet::TShape &s = arr->shape();
     *out_dim = s.ndim();
     std::vector<uint32_t>& buffer = ret->arg_shape_buffer;
     buffer.resize(s.ndim());
@@ -789,7 +789,7 @@ int MXDataIterGetLabel(DataIterHandle handle, NDArrayHandle *out) {
   NDArray* pndarray = new NDArray();
   // temp hack to make label 1D
   // TODO(tianjun) make label 1D when label_width=0
-  TShape shape = db.data[1].shape();
+  mxnet::TShape shape = db.data[1].shape();
   if (shape[1] == 1) {
     *pndarray = db.data[1].Reshape(mshadow::Shape1(shape[0]));
   } else {
@@ -1397,6 +1397,6 @@ int MXNDArrayGetSharedMemHandle(NDArrayHandle handle, int* shared_pid, int* shar
 int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const mx_uint *shape,
                                  mx_uint ndim, int dtype, NDArrayHandle *out) {
   API_BEGIN();
-  *out = new NDArray(shared_pid, shared_id, TShape(shape, shape + ndim), dtype);
+  *out = new NDArray(shared_pid, shared_id, mxnet::TShape(shape, shape + ndim), dtype);
   API_END();
 }
diff --git a/src/c_api/c_api_common.h b/src/c_api/c_api_common.h
index ecb05bc78ca4..b5adfa37eca9 100644
--- a/src/c_api/c_api_common.h
+++ b/src/c_api/c_api_common.h
@@ -68,7 +68,7 @@ struct MXAPIThreadLocalEntry {
   /*! \brief holder for NDArray handles */
   std::vector<NDArray*> ndinputs, ndoutputs;
   /*! \brief result holder for returning shapes */
-  std::vector<TShape> arg_shapes, out_shapes, aux_shapes;
+  mxnet::ShapeVector arg_shapes, out_shapes, aux_shapes;
   /*! \brief result holder for returning type flags */
   std::vector<int> arg_types, out_types, aux_types;
   /*! \brief result holder for returning storage types */
@@ -83,7 +83,7 @@ struct MXAPIThreadLocalEntry {
   std::vector<bool> save_inputs, save_outputs;
   // helper function to setup return value of shape array
   inline static void SetupShapeArrayReturnWithBuffer(
-      const std::vector<TShape> &shapes,
+      const mxnet::ShapeVector &shapes,
       std::vector<mx_uint> *ndim,
       std::vector<const mx_uint*> *data,
       std::vector<uint32_t> *buffer) {
diff --git a/src/c_api/c_api_executor.cc b/src/c_api/c_api_executor.cc
index 66566ed703eb..a2e8bb810e6f 100644
--- a/src/c_api/c_api_executor.cc
+++ b/src/c_api/c_api_executor.cc
@@ -408,10 +408,10 @@ int MXExecutorSimpleBind(SymbolHandle symbol_handle,
   }
 
   // create shape map for in_args and aux_states
-  std::unordered_map<std::string, TShape> arg_shape_map(num_provided_arg_shapes);
+  std::unordered_map<std::string, mxnet::TShape> arg_shape_map(num_provided_arg_shapes);
   for (mx_uint i = 0; i < num_provided_arg_shapes; ++i) {
     auto p = arg_shape_map.emplace(provided_arg_shape_names[i],
-        TShape(provided_arg_shape_data+provided_arg_shape_idx[i],
+        mxnet::TShape(provided_arg_shape_data+provided_arg_shape_idx[i],
           provided_arg_shape_data+provided_arg_shape_idx[i+1]));
     CHECK(p.second) << "Duplicate shapes are provided for argument "
       << provided_arg_shape_names[i] << " in simple_bind";
@@ -562,10 +562,10 @@ int MXExecutorReshape(int partial_shaping,
   API_BEGIN();
   *out = nullptr;  // ensure we can know whether to free executor on early abort
   // create shape map for in_args and aux_states
-  std::unordered_map<std::string, TShape> kwargs(num_provided_arg_shapes);
+  std::unordered_map<std::string, mxnet::TShape> kwargs(num_provided_arg_shapes);
   for (mx_uint i = 0; i < num_provided_arg_shapes; ++i) {
     auto p = kwargs.emplace(provided_arg_shape_names[i],
-        TShape(provided_arg_shape_data+provided_arg_shape_idx[i],
+        mxnet::TShape(provided_arg_shape_data+provided_arg_shape_idx[i],
           provided_arg_shape_data+provided_arg_shape_idx[i+1]));
     CHECK(p.second) << "Duplicate shapes are provided for argument "
       << provided_arg_shape_names[i] << " in reshape of executor";
diff --git a/src/c_api/c_api_function.cc b/src/c_api/c_api_function.cc
index 7091be2e72c5..50f9b32d6e47 100644
--- a/src/c_api/c_api_function.cc
+++ b/src/c_api/c_api_function.cc
@@ -37,7 +37,7 @@ namespace custom_function {
 struct CustomFunctionParam {
   size_t num_args, num_outs;
   std::shared_ptr<MXCallbackList> info;
-  std::vector<TShape> out_shapes;
+  std::vector<mxnet::TShape> out_shapes;
   std::vector<int> out_dtypes;
 };
 
@@ -64,7 +64,7 @@ std::vector<nnvm::NodeEntry> Gradient(
 
 OpStatePtr CreateState(const nnvm::NodeAttrs& attrs,
                        Context ctx,
-                       const std::vector<TShape>& ishape,
+                       const mxnet::ShapeVector& ishape,
                        const std::vector<int>& itype) {
   LOG(FATAL) << "Not reached";
   return OpStatePtr::Create<void*>(nullptr);
@@ -141,9 +141,9 @@ NNVM_REGISTER_OP(_CustomFunction)
     const CustomFunctionParam& params = nnvm::get<CustomFunctionParam>(attrs.parsed);
     return params.num_outs;
   })
-.set_attr<nnvm::FInferShape>("FInferShape",
-  [](const NodeAttrs& attrs, std::vector<TShape> *in_shape,
-     std::vector<TShape> *out_shape) {
+.set_attr<mxnet::FInferShape>("FInferShape",
+  [](const NodeAttrs& attrs, mxnet::ShapeVector *in_shape,
+     mxnet::ShapeVector *out_shape) {
     const CustomFunctionParam& params = nnvm::get<CustomFunctionParam>(attrs.parsed);
     *out_shape = params.out_shapes;
     return true;
diff --git a/src/c_api/c_api_symbolic.cc b/src/c_api/c_api_symbolic.cc
index 9f0d2834fcce..e07716267288 100644
--- a/src/c_api/c_api_symbolic.cc
+++ b/src/c_api/c_api_symbolic.cc
@@ -520,18 +520,18 @@ int MXSymbolInferShape(SymbolHandle sym,
   MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get();
   API_BEGIN();
   nnvm::Graph g = Symbol2Graph(*s);
-  nnvm::ShapeVector arg_shapes(g.indexed_graph().input_nodes().size(), TShape());
+  mxnet::ShapeVector arg_shapes(g.indexed_graph().input_nodes().size(), mxnet::TShape());
   if (keys == nullptr && num_args != 0) {
     std::vector<uint32_t> read_only_args = mxnet::ReadOnlyArgIndices(g.indexed_graph());
     CHECK_LE(num_args, read_only_args.size());
     for (mx_uint i = 0; i < num_args; ++i) {
-      arg_shapes[read_only_args[i]] = nnvm::ShapeTypeCast(
+      arg_shapes[read_only_args[i]] = mxnet::ShapeTypeCast(
           arg_shape_data + arg_ind_ptr[i], arg_shape_data + arg_ind_ptr[i+1]);
     }
   } else {
-    std::unordered_map<std::string, TShape> kwargs;
+    std::unordered_map<std::string, mxnet::TShape> kwargs;
     for (mx_uint i = 0; i < num_args; ++i) {
-      kwargs[keys[i]] = nnvm::ShapeTypeCast(
+      kwargs[keys[i]] = mxnet::ShapeTypeCast(
           arg_shape_data + arg_ind_ptr[i], arg_shape_data + arg_ind_ptr[i+1]);
     }
     mxnet::MatchArguments(g.indexed_graph(), kwargs, &arg_shapes, "InferShape");
@@ -544,7 +544,7 @@ int MXSymbolInferShape(SymbolHandle sym,
   }
 
   // copy back
-  CopyAttr(g.indexed_graph(), g.GetAttr<nnvm::ShapeVector>("shape"),
+  CopyAttr(g.indexed_graph(), g.GetAttr<mxnet::ShapeVector>("shape"),
            &(ret->arg_shapes), &(ret->out_shapes), &(ret->aux_shapes));
 
   // copy data back
diff --git a/src/c_api/c_predict_api.cc b/src/c_api/c_predict_api.cc
index bd599e0b6423..3b9f43d86079 100644
--- a/src/c_api/c_predict_api.cc
+++ b/src/c_api/c_predict_api.cc
@@ -46,7 +46,7 @@ struct MXAPIPredictor {
   // auxiliary arrays
   std::vector<NDArray> aux_arrays;
   // output shapes
-  std::vector<TShape> out_shapes;
+  mxnet::ShapeVector out_shapes;
   // uint32_t buffer for output shapes
   std::vector<uint32_t> out_shapes_buffer;
   // key to arguments
@@ -61,7 +61,7 @@ struct MXAPIPredictor {
 
 struct MXAPINDList {
   std::vector<std::string> keys;
-  std::vector<TShape> shapes;
+  mxnet::ShapeVector shapes;
   std::vector<uint32_t> shapes_buffer;
   std::vector<size_t> indptr;
   std::vector<mx_float> data;
@@ -168,17 +168,17 @@ int _CreatePartialOut(const char* symbol_json_str,
   }
 
   // shape inference and bind
-  std::unordered_map<std::string, TShape> known_shape;
+  std::unordered_map<std::string, mxnet::TShape> known_shape;
   for (mx_uint i = 0; i < num_input_nodes; ++i) {
     known_shape[std::string(input_keys[i])] =
-        TShape(input_shape_data + input_shape_indptr[i],
+        mxnet::TShape(input_shape_data + input_shape_indptr[i],
                input_shape_data + input_shape_indptr[i + 1]);
   }
   std::vector<std::string> arg_names = sym.ListInputNames(Symbol::kReadOnlyArgs);
   std::vector<std::string> aux_names = sym.ListInputNames(Symbol::kAuxiliaryStates);
-  std::vector<TShape> out_shapes(sym.ListOutputNames().size());
-  std::vector<TShape> aux_shapes(aux_names.size());
-  std::vector<TShape> arg_shapes;
+  mxnet::ShapeVector out_shapes(sym.ListOutputNames().size());
+  mxnet::ShapeVector aux_shapes(aux_names.size());
+  mxnet::ShapeVector arg_shapes;
   std::unordered_map<std::string, size_t> key2arg;
   for (size_t i = 0; i < arg_names.size(); ++i) {
     std::string key = arg_names[i];
@@ -186,7 +186,7 @@ int _CreatePartialOut(const char* symbol_json_str,
   }
 
   try {
-    std::vector<TShape> in_shapes;
+    mxnet::ShapeVector in_shapes;
     for (std::string key : sym.ListInputNames(Symbol::kAll)) {
       if (known_shape.count(key) != 0) {
         in_shapes.push_back(known_shape[key]);
@@ -200,7 +200,7 @@ int _CreatePartialOut(const char* symbol_json_str,
     CHECK(infer_complete)
       << "The shape information of is not enough to get the shapes";
     CopyAttr(g.indexed_graph(),
-             g.GetAttr<nnvm::ShapeVector>("shape"),
+             g.GetAttr<mxnet::ShapeVector>("shape"),
              &arg_shapes, &out_shapes, &aux_shapes);
   } catch (const mxnet::op::InferShapeError &err) {
     throw dmlc::Error(err.msg);
@@ -348,22 +348,22 @@ int MXPredReshape(mx_uint num_input_nodes,
 
   API_BEGIN();
   // shape inference
-  std::unordered_map<std::string, TShape> new_shape;
+  std::unordered_map<std::string, mxnet::TShape> new_shape;
   for (mx_uint i = 0; i < num_input_nodes; ++i) {
     new_shape[std::string(input_keys[i])] =
-        TShape(input_shape_data + input_shape_indptr[i],
+        mxnet::TShape(input_shape_data + input_shape_indptr[i],
             input_shape_data + input_shape_indptr[i + 1]);
   }
   ret->sym = p->sym;
   std::vector<std::string> arg_names = ret->sym.ListInputNames(Symbol::kReadOnlyArgs);
   std::vector<std::string> aux_names = ret->sym.ListInputNames(Symbol::kAuxiliaryStates);
-  std::vector<TShape> out_shapes(ret->sym.ListOutputNames().size());
-  std::vector<TShape> aux_shapes(aux_names.size());
-  std::vector<TShape> arg_shapes;
+  mxnet::ShapeVector out_shapes(ret->sym.ListOutputNames().size());
+  mxnet::ShapeVector aux_shapes(aux_names.size());
+  mxnet::ShapeVector arg_shapes;
   ret->key2arg = p->key2arg;
 
   try {
-    std::vector<TShape> in_shapes;
+    mxnet::ShapeVector in_shapes;
     in_shapes.reserve(arg_names.size());
     for (std::string key : ret->sym.ListInputNames(Symbol::kAll)) {
       if (new_shape.count(key) != 0) {
@@ -378,7 +378,7 @@ int MXPredReshape(mx_uint num_input_nodes,
     CHECK(infer_complete)
       << "The shape information of is not enough to get the shapes";
     CopyAttr(g.indexed_graph(),
-             g.GetAttr<nnvm::ShapeVector>("shape"),
+             g.GetAttr<mxnet::ShapeVector>("shape"),
              &arg_shapes, &out_shapes, &aux_shapes);
   } catch (const mxnet::op::InferShapeError &err) {
     throw dmlc::Error(err.msg);
@@ -387,7 +387,7 @@ int MXPredReshape(mx_uint num_input_nodes,
   ret->arg_arrays = p->arg_arrays;
   ret->ctx = p->ctx;
   for (size_t i=0; i < arg_names.size(); ++i) {
-    TShape newShape = arg_shapes[i];
+    mxnet::TShape newShape = arg_shapes[i];
     NDArray &arr = p->arg_arrays[i];
     if (new_shape.count(arg_names[i]) != 0) {
       ret->arg_arrays[i].ReshapeAndAlloc(newShape);
@@ -399,7 +399,7 @@ int MXPredReshape(mx_uint num_input_nodes,
   }
 
   for (size_t i=0; i < aux_names.size(); ++i) {
-    TShape newShape = aux_shapes[i];
+    mxnet::TShape newShape = aux_shapes[i];
     NDArray &arr = p->aux_arrays[i];
     CHECK_EQ(newShape.Size(), arr.shape().Size())
       << "aux " << aux_names[i]
@@ -435,7 +435,7 @@ int MXPredGetOutputShape(PredictorHandle handle,
   CHECK_LT(out_index, p->out_arrays.size())
       << "Index exceed number of outputs";
 
-  const TShape& s = p->out_shapes[out_index];
+  const mxnet::TShape& s = p->out_shapes[out_index];
   p->out_shapes_buffer.resize(s.ndim());
   nnvm::ShapeTypeCast(s.begin(), s.end(), p->out_shapes_buffer.data());
   *shape_data = p->out_shapes_buffer.data();
@@ -509,7 +509,7 @@ int MXNDListCreate(const char* nd_file_bytes,
   }
   ret->indptr.push_back(0);
   for (auto &array : arrays) {
-    TShape shape = array.shape();
+    mxnet::TShape shape = array.shape();
     size_t begin = ret->data.size();
     size_t size = shape.Size();
     ret->shapes.push_back(shape);
@@ -534,7 +534,7 @@ int MXNDListGet(NDListHandle handle,
       << "Index out of range";
   *out_key = p->keys[index].c_str();
   *out_data = dmlc::BeginPtr(p->data) + p->indptr[index];
-  const TShape& s = p->shapes[index];
+  const mxnet::TShape& s = p->shapes[index];
   p->shapes_buffer.resize(s.ndim());
   nnvm::ShapeTypeCast(s.begin(), s.end(), p->shapes_buffer.data());
   *out_shape = p->shapes_buffer.data();
diff --git a/src/common/exec_utils.h b/src/common/exec_utils.h
index 8de6f65253aa..279ecbd67f09 100644
--- a/src/common/exec_utils.h
+++ b/src/common/exec_utils.h
@@ -284,7 +284,7 @@ inline std::string storage_str(int storage_id) {
  */
 inline void LogMemoryPlan(const nnvm::Graph& g) {
   const auto &idx = g.indexed_graph();
-  const auto& vshape = g.GetAttr<nnvm::ShapeVector>("shape");
+  const auto& vshape = g.GetAttr<mxnet::ShapeVector>("shape");
   const auto& vtype = g.GetAttr<nnvm::DTypeVector>("dtype");
   const auto& vstorage = g.GetAttr<nnvm::StorageVector>("storage_id");
   // find node range
@@ -373,13 +373,13 @@ inline void LogInferStorage(const nnvm::Graph& g) {
 // prints a helpful message after shape inference errors in executor.
 inline void HandleInferShapeError(const size_t num_forward_inputs,
                                   const nnvm::IndexedGraph& idx,
-                                  const nnvm::ShapeVector& inferred_shapes) {
+                                  const mxnet::ShapeVector& inferred_shapes) {
   int cnt = 10;
   std::ostringstream oss;
   for (size_t i = 0; i < num_forward_inputs; ++i) {
     const uint32_t nid = idx.input_nodes().at(i);
     const uint32_t eid = idx.entry_id(nid, 0);
-    const TShape& inferred_shape = inferred_shapes[eid];
+    const mxnet::TShape& inferred_shape = inferred_shapes[eid];
     if (inferred_shape.ndim() == 0 || inferred_shape.Size() == 0U) {
       const std::string& arg_name = idx[nid].source->attrs.name;
       oss << arg_name << ": " << inferred_shape << ", ";
@@ -451,7 +451,7 @@ inline void HandleInferStorageTypeError(const size_t num_forward_inputs,
  * if enable_row_sparse_sharing is `True`, otherwise default storage only.
  */
 inline NDArray ReshapeOrCreate(const std::string& name,
-                               const TShape& dest_arg_shape,
+                               const mxnet::TShape& dest_arg_shape,
                                const int dest_arg_dtype,
                                const NDArrayStorageType dest_arg_stype,
                                const Context& ctx,
diff --git a/src/common/serialization.h b/src/common/serialization.h
index 8a1bcc6e6ed2..8192ee210a1c 100644
--- a/src/common/serialization.h
+++ b/src/common/serialization.h
@@ -30,7 +30,6 @@
 #include <dmlc/logging.h>
 #include <mxnet/graph_attr_types.h>
 #include <nnvm/graph_attr_types.h>
-#include <nnvm/tuple.h>
 
 #include <cstring>
 #include <map>
diff --git a/src/common/utils.h b/src/common/utils.h
index b902b38b90ce..8e6966952890 100644
--- a/src/common/utils.h
+++ b/src/common/utils.h
@@ -118,10 +118,10 @@ void CheckFormatCSRImpl(const RunContext &rctx, const NDArray &input,
   using namespace op::mxnet_op;
   CHECK_EQ(input.storage_type(), kCSRStorage)
           << "CheckFormatCSRImpl is for CSRNDArray";
-  const TShape shape = input.shape();
-  const TShape idx_shape = input.aux_shape(csr::kIdx);
-  const TShape indptr_shape = input.aux_shape(csr::kIndPtr);
-  const TShape storage_shape = input.storage_shape();
+  const mxnet::TShape shape = input.shape();
+  const mxnet::TShape idx_shape = input.aux_shape(csr::kIdx);
+  const mxnet::TShape indptr_shape = input.aux_shape(csr::kIndPtr);
+  const mxnet::TShape storage_shape = input.storage_shape();
   if ((shape.ndim() != 2) ||
       (idx_shape.ndim() != 1 || indptr_shape.ndim() != 1 || storage_shape.ndim() != 1) ||
       (indptr_shape[0] != shape[0] + 1) ||
@@ -172,7 +172,7 @@ void CheckFormatRSPImpl(const RunContext &rctx, const NDArray &input,
   using namespace op::mxnet_op;
   CHECK_EQ(input.storage_type(), kRowSparseStorage)
           << "CheckFormatRSPImpl is for RSPNDArray";
-  const TShape idx_shape = input.aux_shape(rowsparse::kIdx);
+  const mxnet::TShape idx_shape = input.aux_shape(rowsparse::kIdx);
   if (idx_shape[0] != input.storage_shape()[0]) {
     MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
       DType* err = err_cpu.dptr<DType>();
@@ -689,7 +689,7 @@ MSHADOW_XINLINE int ilog2ui(unsigned int a) {
 /*!
  * \brief Return an NDArray of all zeros.
  */
-inline NDArray InitZeros(const NDArrayStorageType stype, const TShape &shape,
+inline NDArray InitZeros(const NDArrayStorageType stype, const mxnet::TShape &shape,
                          const Context &ctx, const int dtype) {
   // NDArray with default storage
   if (stype == kDefaultStorage) {
@@ -704,7 +704,7 @@ inline NDArray InitZeros(const NDArrayStorageType stype, const TShape &shape,
 /*!
  * \brief Helper to add a NDArray of zeros to a std::vector.
  */
-inline void EmplaceBackZeros(const NDArrayStorageType stype, const TShape &shape,
+inline void EmplaceBackZeros(const NDArrayStorageType stype, const mxnet::TShape &shape,
                              const Context &ctx, const int dtype,
                              std::vector<NDArray> *vec) {
   // NDArray with default storage
diff --git a/src/executor/attach_op_execs_pass.cc b/src/executor/attach_op_execs_pass.cc
index fe3a6bd3d09f..b04d132ee9f6 100644
--- a/src/executor/attach_op_execs_pass.cc
+++ b/src/executor/attach_op_execs_pass.cc
@@ -263,7 +263,7 @@ class FComputeExExecutor : public OpExecutor {
 
 void CreateOpExecs(const Graph& g, OpExecVector* p_ret, size_t i) {
   using nnvm::DTypeVector;
-  using nnvm::ShapeVector;
+  using mxnet::ShapeVector;
   using nnvm::FMutateInputs;
 
   static auto& fcreate_op_state = nnvm::Op::GetAttr<FCreateOpState>("FCreateOpState");
@@ -272,7 +272,7 @@ void CreateOpExecs(const Graph& g, OpExecVector* p_ret, size_t i) {
   static auto& is_layer_backward = nnvm::Op::GetAttr<bool>("TIsLayerOpBackward");
 
   const auto& vdtype = g.GetAttr<DTypeVector>("dtype");
-  const auto& vshape = g.GetAttr<ShapeVector>("shape");
+  const auto& vshape = g.GetAttr<mxnet::ShapeVector>("shape");
   const auto& vctx = g.GetAttr<ContextVector>("context");
   const auto& dispatch_modes = g.GetAttr<DispatchModeVector>("dispatch_mode");
   // get the graph
@@ -293,7 +293,7 @@ void CreateOpExecs(const Graph& g, OpExecVector* p_ret, size_t i) {
   }
   CHECK(dispatch_modes[i] != DispatchMode::kUndefined);
   if (fcreate_op_state.count(op)) {
-    std::vector<TShape> ishape;
+    mxnet::ShapeVector ishape;
     std::vector<int> itype;
     for (const auto& e : inode.inputs) {
       ishape.emplace_back(vshape[idx.entry_id(e)]);
diff --git a/src/executor/exec_pass.h b/src/executor/exec_pass.h
index 52f7c790c77e..dd4132301346 100644
--- a/src/executor/exec_pass.h
+++ b/src/executor/exec_pass.h
@@ -173,7 +173,7 @@ Graph DetectInplaceAddTo(Graph g);
  *         The index of ShapeVector is given by graph.indexed_graph().entry_id.
  */
 Graph InferShape(Graph&& graph,
-                 nnvm::ShapeVector&& shape_inputs = nnvm::ShapeVector(),
+                 mxnet::ShapeVector&& shape_inputs = mxnet::ShapeVector(),
                  const std::string& shape_attr_key = "");
 
 /*!
@@ -217,4 +217,55 @@ std::vector<std::unordered_set<nnvm::Node*>> GetTrtCompatibleSubsets(const Graph
 }  // namespace exec
 }  // namespace mxnet
 
+namespace nnvm {
+namespace pass {
+/*!
+ * \brief Get the gradient graph whose outputs are gradients of xs wrt to ys.
+ * \param graph The input graph.
+ * \param ys The entries we want to take gradient from.
+ * \param xs The input to take gradient with respect to.
+ * \param ys_out_grad The symbol for additional gradient to be propagate back to y.
+ * \param aggregate_fun Aggregation function applied to aggregate the inputs.
+ * \param mirror_fun Optional mirror function to do mirror optimization and save memory.
+ * \param attr_hint_fun Optional, hint function to output a node that like src, but its attr is same as like.
+ * \param zero_ops Optional, list of operators that outputs a single zero array. The first one
+ *  must be zeros_like.
+ * \param copy_op_str Optional, name of the copy operation required to handle duplicates
+ *  on the edge of the graph
+ * \return A new graph, whose outputs correspond to inputs of xs.
+ */
+inline Graph MXGradient(
+    Graph graph,
+    std::vector<NodeEntry> ys,
+    std::vector<NodeEntry> xs,
+    std::vector<NodeEntry> ys_out_grad,
+    std::function<NodeEntry(std::vector<NodeEntry>&& inputs)> aggregate_fun = nullptr,
+    std::function<int(const Node& node)> mirror_fun = nullptr,
+    std::function<NodeEntry(const NodeEntry& src, const NodeEntry &like)>
+    attr_hint_fun = nullptr,
+    std::vector<const Op*> zero_ops = std::vector<const Op*>(),
+    std::string copy_op_str = std::string()) {
+  graph.attrs["grad_ys"] = std::make_shared<any>(std::move(ys));
+  graph.attrs["grad_xs"] = std::make_shared<any>(std::move(xs));
+  graph.attrs["grad_ys_out_grad"] = std::make_shared<any>(std::move(ys_out_grad));
+  if (aggregate_fun != nullptr) {
+    graph.attrs["grad_aggregate_fun"] = std::make_shared<any>(aggregate_fun);
+  }
+  if (mirror_fun != nullptr) {
+    graph.attrs["grad_mirror_fun"] = std::make_shared<any>(mirror_fun);
+  }
+  if (attr_hint_fun != nullptr) {
+    graph.attrs["attr_hint_fun"] = std::make_shared<any>(attr_hint_fun);
+  }
+  if (zero_ops.size()) {
+    graph.attrs["zero_ops"] = std::make_shared<any>(std::move(zero_ops));
+  }
+  if (copy_op_str != std::string()) {
+      graph.attrs["copy_op"] = std::make_shared<any>(std::move(copy_op_str));
+  }
+  return ApplyPass(std::move(graph), "MXGradient");
+}
+}  // namespace pass
+}  // namespace nnvm
+
 #endif  // MXNET_EXECUTOR_EXEC_PASS_H_
diff --git a/src/executor/graph_executor.cc b/src/executor/graph_executor.cc
index 8302dc133c64..ca2cea093c5d 100644
--- a/src/executor/graph_executor.cc
+++ b/src/executor/graph_executor.cc
@@ -279,7 +279,7 @@ nnvm::Graph GraphExecutor::InitFullGraph(nnvm::Symbol symbol,
   zero_ops.push_back(nnvm::Op::Get("_zeros"));
 
   // take gradient
-  nnvm::Graph g_grad = nnvm::pass::Gradient(
+  nnvm::Graph g_grad = nnvm::pass::MXGradient(
       g, symbol.outputs, xs, head_grad_entry_,
       AggregateGradient, need_mirror, nullptr,
       zero_ops, "_copy");
@@ -325,7 +325,7 @@ void GraphExecutor::Init(nnvm::Symbol symbol,
   const auto& mutable_nodes = idx.mutable_input_nodes();
   size_t arg_top = 0, aux_top = 0;
   data_entry_.resize(idx.num_node_entries());
-  nnvm::ShapeVector arg_shapes;
+  mxnet::ShapeVector arg_shapes;
   nnvm::DTypeVector arg_dtypes;
   StorageTypeVector arg_stypes(idx.num_node_entries(), -1);
   for (size_t i = 0; i < num_forward_inputs_; ++i) {
@@ -367,11 +367,11 @@ void GraphExecutor::Init(nnvm::Symbol symbol,
   }
 
   // expand arg_shapes and arg_dtypes to contain backward inputs
-  arg_shapes.resize(idx.input_nodes().size(), TShape());
+  arg_shapes.resize(idx.input_nodes().size(), mxnet::TShape());
   g = InferShape(std::move(g), std::move(arg_shapes), "__shape__");
   if (g.GetAttr<size_t>("shape_num_unknown_nodes") != 0U) {
     HandleInferShapeError(num_forward_inputs_, g.indexed_graph(),
-                          g.GetAttr<nnvm::ShapeVector>("shape"));
+                          g.GetAttr<mxnet::ShapeVector>("shape"));
   }
 
   arg_dtypes.resize(idx.input_nodes().size(), -1);
@@ -401,7 +401,7 @@ void GraphExecutor::Init(nnvm::Symbol symbol,
  * shared data arrays are provided.
  */
 void GraphExecutor::InitArguments(const nnvm::IndexedGraph& idx,
-                                  const nnvm::ShapeVector& inferred_shapes,
+                                  const mxnet::ShapeVector& inferred_shapes,
                                   const nnvm::DTypeVector& inferred_dtypes,
                                   const StorageTypeVector& inferred_stypes,
                                   const std::vector<Context>& in_arg_ctxes,
@@ -419,7 +419,7 @@ void GraphExecutor::InitArguments(const nnvm::IndexedGraph& idx,
   for (size_t i = 0; i < num_forward_inputs_; ++i) {
     const uint32_t nid = idx.input_nodes().at(i);
     const uint32_t eid = idx.entry_id(nid, 0);
-    const TShape& inferred_shape = inferred_shapes[eid];
+    const mxnet::TShape& inferred_shape = inferred_shapes[eid];
     const int inferred_dtype = inferred_dtypes[eid];
     const NDArrayStorageType inferred_stype = (NDArrayStorageType) inferred_stypes[eid];
     const std::string& arg_name = idx[nid].source->attrs.name;
@@ -471,7 +471,7 @@ void GraphExecutor::InitArguments(const nnvm::IndexedGraph& idx,
  * and shared_exec if available.
  */
 void GraphExecutor::InitArguments(const nnvm::IndexedGraph& idx,
-                                  const nnvm::ShapeVector& inferred_shapes,
+                                  const mxnet::ShapeVector& inferred_shapes,
                                   const nnvm::DTypeVector& inferred_dtypes,
                                   const StorageTypeVector& inferred_stypes,
                                   const std::vector<Context>& in_arg_ctxes,
@@ -491,7 +491,7 @@ void GraphExecutor::InitArguments(const nnvm::IndexedGraph& idx,
   for (size_t i = 0; i < num_forward_inputs_; ++i) {
     const uint32_t nid = idx.input_nodes().at(i);
     const uint32_t eid = idx.entry_id(nid, 0);
-    const TShape& inferred_shape = inferred_shapes[eid];
+    const mxnet::TShape& inferred_shape = inferred_shapes[eid];
     const int inferred_dtype = inferred_dtypes[eid];
     const NDArrayStorageType inferred_stype = (NDArrayStorageType) inferred_stypes[eid];
     const std::string& arg_name = idx[nid].source->attrs.name;
@@ -635,7 +635,7 @@ void GraphExecutor::FinishInitGraph(nnvm::Symbol symbol,
       if (vstorage_type[i] != kDefaultStorage) arg_storage_id[i] = kDynamicStorageID;
     }
     g.attrs["storage"] = std::make_shared<dmlc::any>(std::move(arg_storage_id));
-    g = nnvm::ApplyPass(g, "PlanMemory");
+    g = nnvm::ApplyPass(g, "MXPlanMemory");
   }
   g = DetectInplaceAddTo(g);
 
@@ -694,7 +694,7 @@ void GraphExecutor::Init(nnvm::Symbol symbol,
                          const std::vector<Context>& in_arg_ctxes,
                          const std::vector<Context>& arg_grad_ctxes,
                          const std::vector<Context>& aux_state_ctxes,
-                         const std::unordered_map<std::string, TShape>& arg_shape_map,
+                         const std::unordered_map<std::string, mxnet::TShape>& arg_shape_map,
                          const std::unordered_map<std::string, int>& arg_dtype_map,
                          const std::unordered_map<std::string, int>& arg_stype_map,
                          const std::vector<OpReqType>& grad_req_types,
@@ -714,7 +714,7 @@ void GraphExecutor::Init(nnvm::Symbol symbol,
   // Initialize arg_shapes and arg_dtypes for shape and type inferences.
   // It contains all in_args and aux_states' shapes and types in a certain order.
   const nnvm::IndexedGraph& idx = g.indexed_graph();
-  nnvm::ShapeVector arg_shapes(idx.input_nodes().size(), TShape());
+  mxnet::ShapeVector arg_shapes(idx.input_nodes().size(), mxnet::TShape());
   nnvm::DTypeVector arg_dtypes(idx.input_nodes().size(), -1);
   StorageTypeVector arg_stypes(idx.input_nodes().size(), kUndefinedStorage);
   for (size_t i = 0; i < num_forward_inputs_; ++i) {
@@ -736,7 +736,7 @@ void GraphExecutor::Init(nnvm::Symbol symbol,
   g = InferShape(std::move(g), std::move(arg_shapes), "__shape__");
   if (g.GetAttr<size_t>("shape_num_unknown_nodes") != 0U) {
     HandleInferShapeError(num_forward_inputs_, g.indexed_graph(),
-                          g.GetAttr<nnvm::ShapeVector>("shape"));
+                          g.GetAttr<mxnet::ShapeVector>("shape"));
   }
 
   g = InferType(std::move(g), std::move(arg_dtypes), "__dtype__");
@@ -754,13 +754,13 @@ void GraphExecutor::Init(nnvm::Symbol symbol,
   // Create in_args, arg_grads, and aux_states using
   // the inferred shapes and dtypes.
   if (nullptr == shared_buffer) {  // regular simple bind
-    InitArguments(idx, g.GetAttr<nnvm::ShapeVector>("shape"),
+    InitArguments(idx, g.GetAttr<mxnet::ShapeVector>("shape"),
                   g.GetAttr<nnvm::DTypeVector>("dtype"),
                   g.GetAttr<StorageTypeVector>("storage_type"),
                   in_arg_ctxes, arg_grad_ctxes, aux_state_ctxes,
                   grad_req_types, in_arg_vec, arg_grad_vec, aux_state_vec);
   } else {  // simple bind using shared data arrays and shared_exec
-    InitArguments(idx, g.GetAttr<nnvm::ShapeVector>("shape"),
+    InitArguments(idx, g.GetAttr<mxnet::ShapeVector>("shape"),
                   g.GetAttr<nnvm::DTypeVector>("dtype"),
                   g.GetAttr<StorageTypeVector>("storage_type"),
                   in_arg_ctxes, arg_grad_ctxes, aux_state_ctxes,
@@ -788,7 +788,7 @@ Executor* GraphExecutor::Reshape(const bool partial_shaping,
                                  const bool allow_up_sizing,
                                  const Context& default_ctx,
                                  const std::map<std::string, Context>& ctx_map,
-                                 const std::unordered_map<std::string, TShape>&
+                                 const std::unordered_map<std::string, mxnet::TShape>&
                                    provided_arg_shapes,
                                  std::vector<NDArray>* in_args,
                                  std::vector<NDArray>* arg_grads,
@@ -799,7 +799,7 @@ Executor* GraphExecutor::Reshape(const bool partial_shaping,
   nnvm::Symbol symbol;
   symbol.outputs = g.outputs;
   const nnvm::IndexedGraph& idx = g.indexed_graph();
-  nnvm::ShapeVector arg_shapes(idx.input_nodes().size(), TShape());
+  mxnet::ShapeVector arg_shapes(idx.input_nodes().size(), mxnet::TShape());
   for (size_t i = 0; i < num_forward_inputs_; ++i) {
     const uint32_t nid = idx.input_nodes().at(i);
     const std::string& name = idx[nid].source->attrs.name;
@@ -811,9 +811,9 @@ Executor* GraphExecutor::Reshape(const bool partial_shaping,
   g = InferShape(std::move(g), std::move(arg_shapes), "__shape__");
   if (g.GetAttr<size_t>("shape_num_unknown_nodes") != 0U) {
     HandleInferShapeError(num_forward_inputs_, g.indexed_graph(),
-                          g.GetAttr<nnvm::ShapeVector>("shape"));
+                          g.GetAttr<mxnet::ShapeVector>("shape"));
   }
-  const nnvm::ShapeVector& shape_vec = g.GetAttr<nnvm::ShapeVector>("shape");
+  const mxnet::ShapeVector& shape_vec = g.GetAttr<mxnet::ShapeVector>("shape");
   std::vector<OpReqType> grad_req_types;
   size_t grad_top = 0;
   const size_t num_args = in_arg_map_.size();
@@ -824,7 +824,7 @@ Executor* GraphExecutor::Reshape(const bool partial_shaping,
   aux_states->reserve(num_aux);
   for (uint32_t nid : idx.input_nodes()) {
     std::string name = idx[nid].source->attrs.name;
-    const TShape& new_shape = shape_vec[idx.entry_id(nid, 0)];
+    const mxnet::TShape& new_shape = shape_vec[idx.entry_id(nid, 0)];
     if (idx.mutable_input_nodes().count(nid) == 0) {
       NDArray& arr = in_arg_map_.at(name);
       auto it = arg_grad_map_.find(name);
@@ -927,13 +927,13 @@ Graph GraphExecutor::InitGraph(nnvm::Symbol symbol,
 // initialize the memory of each entries
 void GraphExecutor::InitDataEntryMemory(std::vector<NDArray>* shared_pool) {
   using nnvm::DTypeVector;
-  using nnvm::ShapeVector;
+  using mxnet::ShapeVector;
   using nnvm::StorageVector;
   // get the graph
   const auto& idx = graph_.indexed_graph();
   // get the storage
   const auto& vdtype = graph_.GetAttr<DTypeVector>("dtype");
-  const auto& vshape = graph_.GetAttr<ShapeVector>("shape");
+  const auto& vshape = graph_.GetAttr<mxnet::ShapeVector>("shape");
   const auto& vstorage = graph_.GetAttr<StorageVector>("storage_id");
   const auto& vstorage_type = graph_.GetAttr<StorageTypeVector>("storage_type");
   const auto& vctx = graph_.GetAttr<ContextVector>("context");
@@ -1036,7 +1036,7 @@ void GraphExecutor::InitDataEntryMemory(std::vector<NDArray>* shared_pool) {
       size_t nword = (bytes + 3) / 4;
       CHECK_LE(nword, std::numeric_limits<nnvm::dim_t>::max());
       // allocate float arrays
-      TShape shape{static_cast<nnvm::dim_t>(nword)};
+      mxnet::TShape shape{static_cast<nnvm::dim_t>(nword)};
       // TODO(junwu): adding delay_alloc=true to create nd
       // is a temporary solution.
       NDArray nd(shape, ctx, true);
@@ -1467,7 +1467,7 @@ GraphExecutor::CachedSegOpr GraphExecutor::CreateCachedSegOpr(size_t topo_start,
 
 // Infer shapes, dtypes, stypes, contexts for the forward graph
 static nnvm::Graph InferForwardAttrs(nnvm::Graph g,
-                                     nnvm::ShapeVector arg_shapes,
+                                     mxnet::ShapeVector arg_shapes,
                                      nnvm::DTypeVector arg_dtypes,
                                      StorageTypeVector arg_stypes,
                                      const Context& default_ctx,
@@ -1481,7 +1481,7 @@ static nnvm::Graph InferForwardAttrs(nnvm::Graph g,
   g = InferShape(std::move(g), std::move(arg_shapes), "__shape__");
   if (g.GetAttr<size_t>("shape_num_unknown_nodes") != 0U) {
     HandleInferShapeError(num_forward_inputs, indexed_graph,
-                          g.GetAttr<nnvm::ShapeVector>("shape"));
+                          g.GetAttr<mxnet::ShapeVector>("shape"));
   }
   g = InferType(std::move(g), std::move(arg_dtypes), "__dtype__");
   if (g.GetAttr<size_t>("dtype_num_unknown_nodes") != 0U) {
@@ -1500,7 +1500,7 @@ static nnvm::Graph InferForwardAttrs(nnvm::Graph g,
 // This is a common function for bind and simple_bind flows.
 static nnvm::Symbol PartitionGraph(const nnvm::Symbol& src,
                                    const std::string& prop_name,
-                                   const nnvm::ShapeVector& arg_shapes,
+                                   const mxnet::ShapeVector& arg_shapes,
                                    const nnvm::DTypeVector& arg_dtypes,
                                    const StorageTypeVector& arg_stypes,
                                    const Context& default_ctx,
@@ -1532,7 +1532,8 @@ static nnvm::Symbol PartitionGraph(const nnvm::Symbol& src,
 // This is for simple_bind flow.
 static nnvm::Symbol PartitionGraph(const nnvm::Symbol& src,
                                    const std::string& prop_name,
-                                   const std::unordered_map<std::string, TShape>& arg_shape_map,
+                                   const std::unordered_map<std::string, mxnet::TShape>
+                                                                             & arg_shape_map,
                                    const std::unordered_map<std::string, int>& arg_dtype_map,
                                    const std::unordered_map<std::string, int>& arg_stype_map,
                                    const Context& default_ctx,
@@ -1540,7 +1541,7 @@ static nnvm::Symbol PartitionGraph(const nnvm::Symbol& src,
                                    const std::vector<Context>& in_arg_ctxes,
                                    const std::vector<Context>& aux_state_ctxes) {
   const std::vector<std::string> input_names = src.ListInputNames(Symbol::kAll);
-  nnvm::ShapeVector arg_shapes(input_names.size(), TShape());
+  mxnet::ShapeVector arg_shapes(input_names.size(), mxnet::TShape());
   nnvm::DTypeVector arg_dtypes(input_names.size(), -1);
   StorageTypeVector arg_stypes(input_names.size(), kUndefinedStorage);
   for (size_t i = 0; i < input_names.size(); ++i) {
@@ -1574,7 +1575,7 @@ static nnvm::Symbol PartitionGraph(const nnvm::Symbol& src,
   const std::vector<std::string> aux_names = src.ListInputNames(nnvm::Symbol::kAuxiliaryStates);
   CHECK_EQ(arg_names.size(), in_args->size());
   CHECK_EQ(aux_names.size(), aux_states.size());
-  nnvm::ShapeVector arg_shapes;  // all input shapes
+  mxnet::ShapeVector arg_shapes;  // all input shapes
   arg_shapes.reserve(input_names.size());
   nnvm::DTypeVector arg_dtypes;  // all input dtypes
   arg_dtypes.reserve(input_names.size());
@@ -1629,7 +1630,7 @@ Executor *Executor::SimpleBind(nnvm::Symbol symbol,
                                const std::vector<Context>& in_arg_ctxes,
                                const std::vector<Context>& arg_grad_ctxes,
                                const std::vector<Context>& aux_state_ctxes,
-                               const std::unordered_map<std::string, TShape>& arg_shape_map,
+                               const std::unordered_map<std::string, mxnet::TShape>& arg_shape_map,
                                const std::unordered_map<std::string, int>& arg_dtype_map,
                                const std::unordered_map<std::string, int>& arg_stype_map,
                                const std::vector<OpReqType>& grad_req_types,
diff --git a/src/executor/graph_executor.h b/src/executor/graph_executor.h
index c899a6f5b463..ed49e5bc8bc9 100644
--- a/src/executor/graph_executor.h
+++ b/src/executor/graph_executor.h
@@ -94,7 +94,7 @@ class GraphExecutor : public Executor {
             const std::vector<Context>& in_arg_ctxes,
             const std::vector<Context>& arg_grad_ctxes,
             const std::vector<Context>& aux_state_ctxes,
-            const std::unordered_map<std::string, TShape>& arg_shape_map,
+            const std::unordered_map<std::string, mxnet::TShape>& arg_shape_map,
             const std::unordered_map<std::string, int>& arg_dtype_map,
             const std::unordered_map<std::string, int>& arg_stype_map,
             const std::vector<OpReqType>& grad_req_types,
@@ -111,7 +111,7 @@ class GraphExecutor : public Executor {
                     const bool allow_up_sizing,
                     const Context& default_ctx,
                     const std::map<std::string, Context>& ctx_map,
-                    const std::unordered_map<std::string, TShape>&
+                    const std::unordered_map<std::string, mxnet::TShape>&
                       provided_arg_shapes,
                     std::vector<NDArray>* in_args,
                     std::vector<NDArray>* arg_grads,
@@ -153,7 +153,7 @@ class GraphExecutor : public Executor {
   };
   // Initialize in_args, arg_grads, and aux_states
   void InitArguments(const nnvm::IndexedGraph& idx,
-                     const nnvm::ShapeVector& inferred_shapes,
+                     const mxnet::ShapeVector& inferred_shapes,
                      const nnvm::DTypeVector& inferred_dtypes,
                      const StorageTypeVector& inferred_stypes,
                      const std::vector<Context>& in_arg_ctxes,
@@ -166,7 +166,7 @@ class GraphExecutor : public Executor {
   // Initialize in_args, arg_grads and aux_states with
   // shared_buffer and shared_exec
   virtual void InitArguments(const nnvm::IndexedGraph& idx,
-                             const nnvm::ShapeVector& inferred_shapes,
+                             const mxnet::ShapeVector& inferred_shapes,
                              const nnvm::DTypeVector& inferred_dtypes,
                              const StorageTypeVector& inferred_stypes,
                              const std::vector<Context>& in_arg_ctxes,
diff --git a/src/executor/infer_graph_attr_pass.cc b/src/executor/infer_graph_attr_pass.cc
index e4dd3f6677e4..af8094ad92af 100644
--- a/src/executor/infer_graph_attr_pass.cc
+++ b/src/executor/infer_graph_attr_pass.cc
@@ -589,7 +589,7 @@ nnvm::Graph InferShapeAttr(nnvm::Graph &&ret,
 }
 
 nnvm::Graph InferShape(nnvm::Graph&& graph,
-                       nnvm::ShapeVector&& shape_inputs,
+                       mxnet::ShapeVector&& shape_inputs,
                        const std::string& shape_attr_key) {
   using dmlc::any;
   if (shape_inputs.size() != 0) {
@@ -598,11 +598,11 @@ nnvm::Graph InferShape(nnvm::Graph&& graph,
   if (shape_attr_key.length() != 0) {
     graph.attrs["shape_attr_key"] = std::make_shared<any>(shape_attr_key);
   }
-  return InferShapeAttr(
-      std::move(graph), nnvm::TShape(),
+  return InferAttr<mxnet::TShape, mxnet::FInferShape>(
+      std::move(graph), mxnet::TShape(),
       "FInferShape", "shape_inputs", "shape_attr_key",
       "shape", "shape_num_unknown_nodes",
-      [](const nnvm::TShape& s) { return s.ndim() == 0 || s.Size() == 0; },
+      [](const mxnet::TShape& s) { return s.ndim() == 0 || s.Size() == 0; },
       nullptr, true, nullptr);
 }
 
diff --git a/src/executor/tensorrt_pass.cc b/src/executor/tensorrt_pass.cc
index 762dc0de9db5..f847d59a1298 100644
--- a/src/executor/tensorrt_pass.cc
+++ b/src/executor/tensorrt_pass.cc
@@ -343,17 +343,17 @@ Graph UpdateSubgraphAttrs(Graph&& subgraph, const Graph& g,
   const auto& idx     = g.indexed_graph();
   const auto& sub_idx = subgraph.indexed_graph();
 
-  const auto& shape               = g.GetAttr<nnvm::ShapeVector>("shape");
+  const auto& shape               = g.GetAttr<mxnet::ShapeVector>("shape");
   const auto& dtype               = g.GetAttr<nnvm::DTypeVector>("dtype");
   const auto& storage_type        = g.GetAttr<StorageTypeVector>("storage_type");
-  const auto& shape_inputs        = g.GetAttr<nnvm::ShapeVector>("shape_inputs");
+  const auto& shape_inputs        = g.GetAttr<mxnet::ShapeVector>("shape_inputs");
   const auto& dtype_inputs        = g.GetAttr<nnvm::DTypeVector>("dtype_inputs");
   const auto& storage_type_inputs = g.GetAttr<StorageTypeVector>("storage_type_inputs");
 
-  nnvm::ShapeVector sub_shape(sub_idx.num_node_entries());
+  mxnet::ShapeVector sub_shape(sub_idx.num_node_entries());
   nnvm::DTypeVector sub_dtype(sub_idx.num_node_entries());
   StorageTypeVector sub_storage_type(sub_idx.num_node_entries());
-  nnvm::ShapeVector sub_shape_inputs(sub_idx.input_nodes().size());
+  mxnet::ShapeVector sub_shape_inputs(sub_idx.input_nodes().size());
   nnvm::DTypeVector sub_dtype_inputs(sub_idx.input_nodes().size());
   StorageTypeVector sub_storage_type_inputs(sub_idx.input_nodes().size());
 
diff --git a/src/executor/trt_graph_executor.cc b/src/executor/trt_graph_executor.cc
index 85ce16885c88..c923922d5184 100644
--- a/src/executor/trt_graph_executor.cc
+++ b/src/executor/trt_graph_executor.cc
@@ -60,7 +60,7 @@ void TrtGraphExecutor::Init(nnvm::Symbol symbol,
                             std::vector<Context> *in_arg_ctxes,
                             std::vector<Context> *arg_grad_ctxes,
                             std::vector<Context> *aux_state_ctxes,
-                            std::unordered_map<std::string, TShape> *arg_shape_map,
+                            std::unordered_map<std::string, mxnet::TShape> *arg_shape_map,
                             std::unordered_map<std::string, int> *arg_dtype_map,
                             std::unordered_map<std::string, int> *arg_stype_map,
                             std::vector<OpReqType> *grad_req_types,
@@ -95,7 +95,7 @@ void TrtGraphExecutor::Init(nnvm::Symbol symbol,
   // Initialize arg_shapes and arg_dtypes for shape and type inferences.
   // It contains all in_args and aux_states' shapes and types in a certain order.
   const nnvm::IndexedGraph& idx = g.indexed_graph();
-  nnvm::ShapeVector arg_shapes(idx.input_nodes().size(), TShape());
+  mxnet::ShapeVector arg_shapes(idx.input_nodes().size(), mxnet::TShape());
   nnvm::DTypeVector arg_dtypes(idx.input_nodes().size(), -1);
   StorageTypeVector arg_stypes(idx.input_nodes().size(), kUndefinedStorage);
   for (size_t i = 0; i < num_forward_inputs_; ++i) {
@@ -117,7 +117,7 @@ void TrtGraphExecutor::Init(nnvm::Symbol symbol,
   g = InferShape(std::move(g), std::move(arg_shapes), "__shape__");
   if (g.GetAttr<size_t>("shape_num_unknown_nodes") != 0U) {
     HandleInferShapeError(num_forward_inputs_, g.indexed_graph(),
-                          g.GetAttr<nnvm::ShapeVector>("shape"));
+                          g.GetAttr<mxnet::ShapeVector>("shape"));
   }
 
   g = InferType(std::move(g), std::move(arg_dtypes), "__dtype__");
@@ -142,7 +142,7 @@ void TrtGraphExecutor::Init(nnvm::Symbol symbol,
     }
   }
 
-  InitArguments(g.indexed_graph(), g.GetAttr<nnvm::ShapeVector>("shape"),
+  InitArguments(g.indexed_graph(), g.GetAttr<mxnet::ShapeVector>("shape"),
                 g.GetAttr<nnvm::DTypeVector>("dtype"),
                 g.GetAttr<StorageTypeVector>("storage_type"),
                 *in_arg_ctxes, *arg_grad_ctxes, *aux_state_ctxes,
@@ -165,7 +165,7 @@ void TrtGraphExecutor::Init(nnvm::Symbol symbol,
  * and shared_exec if available.
  */
 void TrtGraphExecutor::InitArguments(const nnvm::IndexedGraph& idx,
-                                  const nnvm::ShapeVector& inferred_shapes,
+                                  const mxnet::ShapeVector& inferred_shapes,
                                   const nnvm::DTypeVector& inferred_dtypes,
                                   const StorageTypeVector& inferred_stypes,
                                   const std::vector<Context>& in_arg_ctxes,
@@ -185,7 +185,7 @@ void TrtGraphExecutor::InitArguments(const nnvm::IndexedGraph& idx,
   for (size_t i = 0; i < num_forward_inputs_; ++i) {
     const uint32_t nid = idx.input_nodes().at(i);
     const uint32_t eid = idx.entry_id(nid, 0);
-    const TShape& inferred_shape = inferred_shapes[eid];
+    const mxnet::TShape& inferred_shape = inferred_shapes[eid];
     const int inferred_dtype = inferred_dtypes[eid];
     const auto inferred_stype = (NDArrayStorageType) inferred_stypes[eid];
     const std::string& arg_name = idx[nid].source->attrs.name;
@@ -319,7 +319,7 @@ Graph TrtGraphExecutor::ReinitGraph(Graph&& g, const Context &default_ctx,
                                  std::vector<Context> *arg_grad_ctxes,
                                  std::vector<Context> *aux_state_ctxes,
                                  std::vector<OpReqType> *grad_req_types,
-                                 std::unordered_map<std::string, TShape> *arg_shape_map,
+                                 std::unordered_map<std::string, mxnet::TShape> *arg_shape_map,
                                  std::unordered_map<std::string, int> *arg_dtype_map,
                                  std::unordered_map<std::string, int> *arg_stype_map,
                                  std::unordered_map<std::string, NDArray> *params_map) {
@@ -356,7 +356,7 @@ Graph TrtGraphExecutor::ReinitGraph(Graph&& g, const Context &default_ctx,
     num_forward_nodes_ = std::max(
         num_forward_nodes_, static_cast<size_t>(idx.outputs()[i].node_id + 1));
   }
-  nnvm::ShapeVector arg_shapes(idx.input_nodes().size(), TShape());
+  mxnet::ShapeVector arg_shapes(idx.input_nodes().size(), mxnet::TShape());
   nnvm::DTypeVector arg_dtypes(idx.input_nodes().size(), -1);
   StorageTypeVector arg_stypes(idx.input_nodes().size(), kUndefinedStorage);
   for (size_t i = 0; i < num_forward_inputs_; ++i) {
@@ -378,7 +378,7 @@ Graph TrtGraphExecutor::ReinitGraph(Graph&& g, const Context &default_ctx,
   g = InferShape(std::move(g), std::move(arg_shapes), "__shape__");
   if (g.GetAttr<size_t>("shape_num_unknown_nodes") != 0U) {
     HandleInferShapeError(num_forward_inputs_, g.indexed_graph(),
-                          g.GetAttr<nnvm::ShapeVector>("shape"));
+                          g.GetAttr<mxnet::ShapeVector>("shape"));
   }
 
   g = InferType(std::move(g), std::move(arg_dtypes), "__dtype__");
@@ -415,7 +415,8 @@ Executor *TrtGraphExecutor::TensorRTBind(nnvm::Symbol symbol,
                                          std::vector<Context> *in_arg_ctxes,
                                          std::vector<Context> *arg_grad_ctxes,
                                          std::vector<Context> *aux_state_ctxes,
-                                         std::unordered_map<std::string, TShape> *arg_shape_map,
+                                         std::unordered_map<std::string, mxnet::TShape>
+                                                                              *arg_shape_map,
                                          std::unordered_map<std::string, int> *arg_dtype_map,
                                          std::unordered_map<std::string, int> *arg_stype_map,
                                          std::vector<OpReqType> *grad_req_types,
diff --git a/src/executor/trt_graph_executor.h b/src/executor/trt_graph_executor.h
index 96ac4426270a..a4ec5bf657ae 100644
--- a/src/executor/trt_graph_executor.h
+++ b/src/executor/trt_graph_executor.h
@@ -40,7 +40,7 @@ class TrtGraphExecutor : public GraphExecutor {
                                 std::vector<Context> *in_arg_ctxes,
                                 std::vector<Context>* arg_grad_ctxes,
                                 std::vector<Context>* aux_state_ctxes,
-                                std::unordered_map<std::string, TShape>* arg_shape_map,
+                                std::unordered_map<std::string, mxnet::TShape>* arg_shape_map,
                                 std::unordered_map<std::string, int>* arg_dtype_map,
                                 std::unordered_map<std::string, int>* arg_stype_map,
                                 std::vector<OpReqType>* grad_req_types,
@@ -58,7 +58,7 @@ class TrtGraphExecutor : public GraphExecutor {
                     std::vector<Context> *in_arg_ctxes,
                     std::vector<Context> *arg_grad_ctxes,
                     std::vector<Context> *aux_state_ctxes,
-                    std::unordered_map<std::string, TShape> *arg_shape_map,
+                    std::unordered_map<std::string, mxnet::TShape> *arg_shape_map,
                     std::unordered_map<std::string, int> *arg_dtype_map,
                     std::unordered_map<std::string, int> *arg_stype_map,
                     std::vector<OpReqType> *grad_req_types,
@@ -81,13 +81,13 @@ class TrtGraphExecutor : public GraphExecutor {
         std::vector<Context> *arg_grad_ctxes,
         std::vector<Context> *aux_state_ctxes,
         std::vector<OpReqType> *grad_req_types,
-        std::unordered_map<std::string, TShape> *arg_shape_map,
+        std::unordered_map<std::string, mxnet::TShape> *arg_shape_map,
         std::unordered_map<std::string, int> *arg_dtype_map,
         std::unordered_map<std::string, int> *arg_stype_map,
         std::unordered_map<std::string, NDArray> *params_map);
 
   void InitArguments(const nnvm::IndexedGraph& idx,
-                     const nnvm::ShapeVector& inferred_shapes,
+                     const mxnet::ShapeVector& inferred_shapes,
                      const nnvm::DTypeVector& inferred_dtypes,
                      const StorageTypeVector& inferred_stypes,
                      const std::vector<Context>& in_arg_ctxes,
diff --git a/src/imperative/cached_op.cc b/src/imperative/cached_op.cc
index 8dd0a4deaac3..9db235bca532 100644
--- a/src/imperative/cached_op.cc
+++ b/src/imperative/cached_op.cc
@@ -175,7 +175,7 @@ CachedOp::CachedOp(
     CHECK_GT(xs.size(), 0)
         << "There are no inputs in computation graph that require gradients.";
 
-    grad_graph_ = pass::Gradient(
+    grad_graph_ = pass::MXGradient(
         fwd_graph_, fwd_graph_.outputs, xs, ograd_entries_,
         exec::AggregateGradient, nullptr, nullptr,
         zero_ops, "_copy");
@@ -405,8 +405,8 @@ bool CachedOp::SetBackwardGraph(
     g.attrs["backward_ref_count"] = std::make_shared<dmlc::any>(std::move(ref_count));
   }
 
-  auto shapes = info->fwd_graph.GetAttr<ShapeVector>("shape");
-  shapes.resize(idx.num_node_entries(), TShape());
+  auto shapes = info->fwd_graph.GetAttr<mxnet::ShapeVector>("shape");
+  shapes.resize(idx.num_node_entries(), mxnet::TShape());
   auto dtypes = info->fwd_graph.GetAttr<DTypeVector>("dtype");
   dtypes.resize(idx.num_node_entries(), -1);
   auto stypes = info->fwd_graph.GetAttr<StorageTypeVector>("storage_type");
@@ -624,7 +624,7 @@ void CachedOp::StaticRunOps(
   const auto& op_execs = state.execs;
 
   std::vector<NDArray*> ndinputs, ndoutputs;
-  nnvm::ShapeVector arg_shapes;
+  mxnet::ShapeVector arg_shapes;
   nnvm::DTypeVector arg_dtypes;
   std::vector<OpReqType> req;
 
@@ -739,7 +739,7 @@ OpStatePtr CachedOp::StaticForward(
   }
 
   const auto& dtypes = g.GetAttr<DTypeVector>("dtype");
-  const auto& shapes = g.GetAttr<ShapeVector>("shape");
+  const auto& shapes = g.GetAttr<mxnet::ShapeVector>("shape");
   const auto& stypes = g.GetAttr<StorageTypeVector>("storage_type");
 
   for (size_t i = 0; i < outputs.size(); ++i) {
@@ -816,7 +816,7 @@ OpStatePtr CachedOp::DynamicForward(
                  mem_plan, arrays, &array_reqs);
 
   const auto& dtypes = g.GetAttr<DTypeVector>("dtype");
-  const auto& shapes = g.GetAttr<ShapeVector>("shape");
+  const auto& shapes = g.GetAttr<mxnet::ShapeVector>("shape");
   const auto& stypes = g.GetAttr<StorageTypeVector>("storage_type");
 
   for (size_t i = 0; i < outputs.size(); ++i) {
@@ -1230,7 +1230,7 @@ void CachedOpBackward(const OpStatePtr& state_ptr,
 
 OpStatePtr CreateCachedOpState(const NodeAttrs& attrs,
                                Context ctx,
-                               const std::vector<TShape>& in_shapes,
+                               const mxnet::ShapeVector& in_shapes,
                                const std::vector<int>& in_types) {
   const CachedOpPtr& op = nnvm::get<CachedOpPtr>(attrs.parsed);
   return OpStatePtr::Create<CachedOpActualState>(op);
@@ -1335,10 +1335,10 @@ NNVM_REGISTER_OP(_CachedOp)
     return op->ListForwardOutputNames();
   })
 .set_attr<FCreateOpState>("FCreateOpState", CreateCachedOpState)
-.set_attr<nnvm::FInferShape>("FInferShape",
+.set_attr<mxnet::FInferShape>("FInferShape",
   [](const nnvm::NodeAttrs& attrs,
-     std::vector<TShape> *in_shapes,
-     std::vector<TShape> *out_shapes) {
+     mxnet::ShapeVector *in_shapes,
+     mxnet::ShapeVector *out_shapes) {
     const CachedOpPtr& op = nnvm::get<CachedOpPtr>(attrs.parsed);
     return op::DefaultSubgraphOpShapeHelper(op->GetForwardSym(), in_shapes, out_shapes);
   })
diff --git a/src/imperative/imperative.cc b/src/imperative/imperative.cc
index a381b2384113..8d1f65518565 100644
--- a/src/imperative/imperative.cc
+++ b/src/imperative/imperative.cc
@@ -351,7 +351,7 @@ std::vector<NDArray*> Imperative::Backward(
         << "There are no inputs in computation graph that require gradients.";
   }
 
-  Graph g_graph = pass::Gradient(
+  Graph g_graph = pass::MXGradient(
       graph, graph.outputs, xs, ograd_entries,
       exec::AggregateGradient, nullptr, nullptr,
       zero_ops, "_copy");
@@ -479,7 +479,7 @@ std::vector<NDArray*> Imperative::Backward(
     array_reqs[eid] = x_reqs[i - num_forward_outputs];
   }
 
-  const auto& shapes = graph.GetAttr<ShapeVector>("shape");
+  const auto& shapes = graph.GetAttr<mxnet::ShapeVector>("shape");
   const auto& dtypes = graph.GetAttr<DTypeVector>("dtype");
   const auto& stypes = graph.GetAttr<StorageTypeVector>("storage_type");
   const auto& dispatch_modes = graph.GetAttr<DispatchModeVector>("dispatch_mode");
diff --git a/src/imperative/imperative_utils.h b/src/imperative/imperative_utils.h
index a8db4810f7c1..5eecfe8c6f23 100644
--- a/src/imperative/imperative_utils.h
+++ b/src/imperative/imperative_utils.h
@@ -100,18 +100,18 @@ inline void SetShapeType(const Context& ctx,
                          const std::vector<NDArray*>& inputs,
                          const std::vector<NDArray*>& outputs,
                          DispatchMode* dispatch_mode) {
-  static auto& infershape = nnvm::Op::GetAttr<nnvm::FInferShape>("FInferShape");
+  static auto& infershape = nnvm::Op::GetAttr<mxnet::FInferShape>("FInferShape");
   static auto& infertype = nnvm::Op::GetAttr<nnvm::FInferType>("FInferType");
   static auto& inferstorage = nnvm::Op::GetAttr<FInferStorageType>("FInferStorageType");
   MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get();
   // infer shape
-  std::vector<TShape>& in_shapes  = ret->arg_shapes;
+  mxnet::ShapeVector& in_shapes  = ret->arg_shapes;
   in_shapes.clear();
   in_shapes.reserve(inputs.size());
   for (auto& i : inputs) {
     in_shapes.push_back(i->shape());
   }
-  std::vector<TShape>& out_shapes = ret->out_shapes;
+  mxnet::ShapeVector& out_shapes = ret->out_shapes;
   out_shapes.clear();
   out_shapes.reserve(outputs.size());
   for (auto& i : outputs) {
@@ -563,7 +563,7 @@ inline void PushOperator(const OpStatePtr& state,
   }
 }
 
-inline bool CheckAndInferShape(nnvm::Graph* p_g, nnvm::ShapeVector&& shapes,
+inline bool CheckAndInferShape(nnvm::Graph* p_g, mxnet::ShapeVector&& shapes,
                                bool use_inputs,
                                std::pair<uint32_t, uint32_t> node_range = {0, 0},
                                std::pair<uint32_t, uint32_t> entry_range = {0, 0},
@@ -575,9 +575,9 @@ inline bool CheckAndInferShape(nnvm::Graph* p_g, nnvm::ShapeVector&& shapes,
   nnvm::Graph& g = *p_g;
   if (use_inputs) {
     if (g.attrs.count("shape_inputs") &&
-        g.GetAttr<ShapeVector>("shape_inputs") == shapes) return true;
+        g.GetAttr<mxnet::ShapeVector>("shape_inputs") == shapes) return true;
   } else if (g.attrs.count("shape")) {
-    const auto& prev_shapes = g.GetAttr<ShapeVector>("shape");
+    const auto& prev_shapes = g.GetAttr<mxnet::ShapeVector>("shape");
     CHECK_EQ(prev_shapes.size(), shapes.size());
     bool match = true;
     for (size_t i = 0; i < shapes.size(); ++i) {
@@ -773,11 +773,11 @@ inline MemoryPlanVector PlanMemory(
   }
   g.attrs["ref_count"] = std::make_shared<dmlc::any>(ref_count);
   g.attrs["storage"] = std::make_shared<dmlc::any>(std::move(storage));
-  g = nnvm::ApplyPass(g, "PlanMemory");
+  g = nnvm::ApplyPass(g, "MXPlanMemory");
   if (detect_inplace_addto) g = exec::DetectInplaceAddTo(g);
 
   const auto& dtypes = g.GetAttr<DTypeVector>("dtype");
-  const auto& shapes = g.GetAttr<ShapeVector>("shape");
+  const auto& shapes = g.GetAttr<mxnet::ShapeVector>("shape");
   const auto& storage_inplace = g.GetAttr<std::vector<int> >("storage_inplace_index");
   const auto& storage_ids = g.GetAttr<StorageVector>("storage_id");
   uint32_t entry_start = entry_range.first;
@@ -818,7 +818,7 @@ inline std::multimap<size_t, NDArray> AllocateMemory(
     std::multimap<size_t, NDArray>&& pool = std::multimap<size_t, NDArray>()) {
   using namespace nnvm;
   const auto& dtypes = g.GetAttr<DTypeVector>("dtype");
-  const auto& shapes = g.GetAttr<ShapeVector>("shape");
+  const auto& shapes = g.GetAttr<mxnet::ShapeVector>("shape");
   const auto& stypes = g.GetAttr<StorageTypeVector>("storage_type");
 
   std::multimap<size_t, NDArray> new_pool;
@@ -840,7 +840,7 @@ inline std::multimap<size_t, NDArray> AllocateMemory(
         new_pool.insert(*iter);
         pool.erase(iter);
       } else {
-        NDArray buff(TShape({static_cast<nnvm::dim_t>(mem_plan[i].size)}),
+        NDArray buff(mxnet::TShape({static_cast<nnvm::dim_t>(mem_plan[i].size)}),
                      default_ctx, true, mshadow::kUint8);
         *arrays[i] = buff.AsArray(shapes[i], dtypes[i]);
         new_pool.insert({mem_plan[i].size, buff});
diff --git a/src/io/image_aug_default.cc b/src/io/image_aug_default.cc
index cd06de2b2ad1..5fb0b0f21e7e 100644
--- a/src/io/image_aug_default.cc
+++ b/src/io/image_aug_default.cc
@@ -96,7 +96,7 @@ struct DefaultImageAugmentParam : public dmlc::Parameter<DefaultImageAugmentPara
   /*! \brief padding size */
   int pad;
   /*! \brief shape of the image data*/
-  TShape data_shape;
+  mxnet::TShape data_shape;
 
   // declare parameters
   DMLC_DECLARE_PARAMETER(DefaultImageAugmentParam) {
diff --git a/src/io/image_det_aug_default.cc b/src/io/image_det_aug_default.cc
index afe5174b75d4..91711bfb0359 100644
--- a/src/io/image_det_aug_default.cc
+++ b/src/io/image_det_aug_default.cc
@@ -104,7 +104,7 @@ struct DefaultImageDetAugmentParam : public dmlc::Parameter<DefaultImageDetAugme
   /*! \brief interpolation method 0-NN 1-bilinear 2-cubic 3-area 4-lanczos4 9-auto 10-rand  */
   int inter_method;
   /*! \brief shape of the image data */
-  TShape data_shape;
+  mxnet::TShape data_shape;
   /*! \brief resize mode, 0-force
    * 1-Shrink to data_shape, preserve ratio,
    * 2-fit to data_shape, preserve ratio
diff --git a/src/io/image_io.cc b/src/io/image_io.cc
index 44fcdb8321de..ab55c1a4c901 100644
--- a/src/io/image_io.cc
+++ b/src/io/image_io.cc
@@ -32,7 +32,6 @@
 #include <mshadow/base.h>
 #include <nnvm/op.h>
 #include <nnvm/op_attr_types.h>
-#include <nnvm/tuple.h>
 
 #include <fstream>
 #include <cstring>
@@ -189,7 +188,7 @@ void Imdecode(const nnvm::NodeAttrs& attrs,
   size_t len = inputs[0].shape().Size();
   CHECK(len > 0) << "Input cannot be an empty buffer";
 
-  TShape oshape(3);
+  mxnet::TShape oshape(3);
   oshape[2] = param.flag == 0 ? 1 : 3;
   if (get_jpeg_size(str_img, len, &oshape[1], &oshape[0])) {
   } else if (get_png_size(str_img, len, &oshape[1], &oshape[0])) {
@@ -229,7 +228,7 @@ void Imread(const nnvm::NodeAttrs& attrs,
   CHECK(file.good()) << "Failed reading image file: '" << param.filename << "' "
             << strerror(errno);
 
-  TShape oshape(3);
+  mxnet::TShape oshape(3);
   oshape[2] = param.flag == 0 ? 1 : 3;
   if (get_jpeg_size(buff.get(), fsize, &oshape[1], &oshape[0])) {
   } else if (get_png_size(buff.get(), fsize, &oshape[1], &oshape[0])) {
@@ -271,8 +270,8 @@ struct ResizeParam : public dmlc::Parameter<ResizeParam> {
 DMLC_REGISTER_PARAMETER(ResizeParam);
 
 inline bool ResizeShape(const nnvm::NodeAttrs& attrs,
-                        std::vector<TShape> *ishape,
-                        std::vector<TShape> *oshape) {
+                        mxnet::ShapeVector *ishape,
+                        mxnet::ShapeVector *oshape) {
   const auto& param = nnvm::get<ResizeParam>(attrs.parsed);
   if (ishape->size() != 1 || (*ishape)[0].ndim() != 3) return false;
 
@@ -319,8 +318,8 @@ struct MakeBorderParam : public dmlc::Parameter<MakeBorderParam> {
 DMLC_REGISTER_PARAMETER(MakeBorderParam);
 
 inline bool MakeBorderShape(const nnvm::NodeAttrs& attrs,
-                        std::vector<TShape> *ishape,
-                        std::vector<TShape> *oshape) {
+                        mxnet::ShapeVector *ishape,
+                        mxnet::ShapeVector *oshape) {
   const auto& param = nnvm::get<MakeBorderParam>(attrs.parsed);
   if (ishape->size() != 1 || (*ishape)[0].ndim() != 3) return false;
 
@@ -382,7 +381,7 @@ NNVM_REGISTER_OP(_cvimresize)
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr_parser(op::ParamParser<ResizeParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ResizeShape)
+.set_attr<mxnet::FInferShape>("FInferShape", ResizeShape)
 .set_attr<nnvm::FInferType>("FInferType", op::ElemwiseType<1, 1>)
 .set_attr<FCompute>("FCompute<cpu>", Imresize)
 .add_argument("src", "NDArray", "source image")
@@ -393,7 +392,7 @@ NNVM_REGISTER_OP(_cvcopyMakeBorder)
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr_parser(op::ParamParser<MakeBorderParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", MakeBorderShape)
+.set_attr<mxnet::FInferShape>("FInferShape", MakeBorderShape)
 .set_attr<nnvm::FInferType>("FInferType", op::ElemwiseType<1, 1>)
 .set_attr<FCompute>("FCompute<cpu>", copyMakeBorder)
 .add_argument("src", "NDArray", "source image")
diff --git a/src/io/image_iter_common.h b/src/io/image_iter_common.h
index 10cd8ab4e5de..4bbcb9d21f9a 100644
--- a/src/io/image_iter_common.h
+++ b/src/io/image_iter_common.h
@@ -118,7 +118,7 @@ struct ImageRecParserParam : public dmlc::Parameter<ImageRecParserParam> {
   /*! \brief label-width */
   int label_width;
   /*! \brief input shape */
-  TShape data_shape;
+  mxnet::TShape data_shape;
   /*! \brief number of threads */
   int preprocess_threads;
   /*! \brief whether to remain silent */
diff --git a/src/io/inst_vector.h b/src/io/inst_vector.h
index f06a4e4aabe9..91106e788cdd 100644
--- a/src/io/inst_vector.h
+++ b/src/io/inst_vector.h
@@ -187,7 +187,7 @@ class TBlobContainer : public TBlob {
       release();
     }
   }
-  void resize(const TShape &shape, int type_flag) {
+  void resize(const mxnet::TShape &shape, int type_flag) {
     if (tensor_container_) {
       CHECK_EQ(this->type_flag_, type_flag);
       this->shape_ = shape;
diff --git a/src/io/iter_batchloader.h b/src/io/iter_batchloader.h
index be911f695c8e..69eb05f7d729 100644
--- a/src/io/iter_batchloader.h
+++ b/src/io/iter_batchloader.h
@@ -146,7 +146,7 @@ class BatchLoader : public IIterator<TBlobBatch> {
   /*! \brief base iterator */
   IIterator<DataInst> *base_;
   /*! \brief data shape */
-  std::vector<TShape> shape_;
+  mxnet::ShapeVector shape_;
   /*! \brief unit size */
   std::vector<size_t> unit_size_;
   // initialize the data holder by using from the first batch.
@@ -155,7 +155,7 @@ class BatchLoader : public IIterator<TBlobBatch> {
     data_.resize(first_batch.data.size());
     unit_size_.resize(first_batch.data.size());
     for (size_t i = 0; i < first_batch.data.size(); ++i) {
-      TShape src_shape = first_batch.data[i].shape_;
+      mxnet::TShape src_shape = first_batch.data[i].shape_;
       int src_type_flag = first_batch.data[i].type_flag_;
       // init object attributes
       std::vector<index_t> shape_vec;
@@ -163,7 +163,7 @@ class BatchLoader : public IIterator<TBlobBatch> {
       for (index_t dim = 0; dim < src_shape.ndim(); ++dim) {
         shape_vec.push_back(src_shape[dim]);
       }
-      TShape dst_shape(shape_vec.begin(), shape_vec.end());
+      mxnet::TShape dst_shape(shape_vec.begin(), shape_vec.end());
       shape_[i] = dst_shape;
       data_[i].resize(mshadow::Shape1(dst_shape.Size()), src_type_flag);
       unit_size_[i] = src_shape.Size();
diff --git a/src/io/iter_csv.cc b/src/io/iter_csv.cc
index 5fd149535be2..0c1b82355410 100644
--- a/src/io/iter_csv.cc
+++ b/src/io/iter_csv.cc
@@ -37,11 +37,11 @@ struct CSVIterParam : public dmlc::Parameter<CSVIterParam> {
   /*! \brief path to data csv file */
   std::string data_csv;
   /*! \brief data shape */
-  TShape data_shape;
+  mxnet::TShape data_shape;
   /*! \brief path to label csv file */
   std::string label_csv;
   /*! \brief label shape */
-  TShape label_shape;
+  mxnet::TShape label_shape;
   // declare parameters
   DMLC_DECLARE_PARAMETER(CSVIterParam) {
     DMLC_DECLARE_FIELD(data_csv)
@@ -52,7 +52,7 @@ struct CSVIterParam : public dmlc::Parameter<CSVIterParam> {
         .describe("The input CSV file or a directory path. "
                   "If NULL, all labels will be returned as 0.");
     index_t shape1[] = {1};
-    DMLC_DECLARE_FIELD(label_shape).set_default(TShape(shape1, shape1 + 1))
+    DMLC_DECLARE_FIELD(label_shape).set_default(mxnet::TShape(shape1, shape1 + 1))
         .describe("The shape of one label.");
   }
 };
@@ -148,7 +148,7 @@ class CSVIterTyped: public CSVIterBase {
   }
 
  private:
-  inline TBlob AsTBlob(const dmlc::Row<uint32_t, DType>& row, const TShape& shape) {
+  inline TBlob AsTBlob(const dmlc::Row<uint32_t, DType>& row, const mxnet::TShape& shape) {
     CHECK_EQ(row.length, shape.Size())
         << "The data size in CSV do not match size of shape: "
         << "specified shape=" << shape << ", the csv row-length=" << row.length;
diff --git a/src/io/iter_image_det_recordio.cc b/src/io/iter_image_det_recordio.cc
index 8bfded75f098..876c07520f52 100644
--- a/src/io/iter_image_det_recordio.cc
+++ b/src/io/iter_image_det_recordio.cc
@@ -151,7 +151,7 @@ struct ImageDetRecParserParam : public dmlc::Parameter<ImageDetRecParserParam> {
   /*! \brief label-width, use -1 for variable width */
   int label_width;
   /*! \brief input shape */
-  TShape data_shape;
+  mxnet::TShape data_shape;
   /*! \brief number of threads */
   int preprocess_threads;
   /*! \brief whether to remain silent */
diff --git a/src/io/iter_image_recordio_2.cc b/src/io/iter_image_recordio_2.cc
index 00c38198659f..5d5261b22611 100644
--- a/src/io/iter_image_recordio_2.cc
+++ b/src/io/iter_image_recordio_2.cc
@@ -278,12 +278,12 @@ inline bool ImageRecordIOParser2<DType>::ParseNext(DataBatch *out) {
     for (index_t dim = 0; dim < param_.data_shape.ndim(); ++dim) {
       shape_vec.push_back(param_.data_shape[dim]);
     }
-    TShape data_shape(shape_vec.begin(), shape_vec.end());
+    mxnet::TShape data_shape(shape_vec.begin(), shape_vec.end());
 
     shape_vec.clear();
     shape_vec.push_back(batch_param_.batch_size);
     shape_vec.push_back(param_.label_width);
-    TShape label_shape(shape_vec.begin(), shape_vec.end());
+    mxnet::TShape label_shape(shape_vec.begin(), shape_vec.end());
 
     auto ctx = Context::CPU(0);
     auto dev_id = param_.device_id;
diff --git a/src/io/iter_libsvm.cc b/src/io/iter_libsvm.cc
index 8abb768ad4c8..3decc7b33e04 100644
--- a/src/io/iter_libsvm.cc
+++ b/src/io/iter_libsvm.cc
@@ -36,11 +36,11 @@ struct LibSVMIterParam : public dmlc::Parameter<LibSVMIterParam> {
   /*! \brief path to data libsvm file */
   std::string data_libsvm;
   /*! \brief data shape */
-  TShape data_shape;
+  mxnet::TShape data_shape;
   /*! \brief path to label libsvm file */
   std::string label_libsvm;
   /*! \brief label shape */
-  TShape label_shape;
+  mxnet::TShape label_shape;
   /*! \brief partition the data into multiple parts */
   int num_parts;
   /*! \brief the index of the part will read*/
@@ -55,7 +55,7 @@ struct LibSVMIterParam : public dmlc::Parameter<LibSVMIterParam> {
         .describe("The input LibSVM label file or a directory path. "
                   "If NULL, all labels will be read from ``data_libsvm``.");
     index_t shape1[] = {1};
-    DMLC_DECLARE_FIELD(label_shape).set_default(TShape(shape1, shape1 + 1))
+    DMLC_DECLARE_FIELD(label_shape).set_default(mxnet::TShape(shape1, shape1 + 1))
         .describe("The shape of one label.");
     DMLC_DECLARE_FIELD(num_parts).set_default(1)
         .describe("partition the data into multiple parts");
@@ -153,7 +153,7 @@ class LibSVMIter: public SparseIIterator<DataInst> {
     return param_.label_shape.Size() > 1 ? kCSRStorage : kDefaultStorage;
   }
 
-  virtual const TShape GetShape(bool is_data) const {
+  virtual const mxnet::TShape GetShape(bool is_data) const {
     if (is_data) return param_.data_shape;
     return param_.label_shape;
   }
@@ -161,13 +161,13 @@ class LibSVMIter: public SparseIIterator<DataInst> {
  private:
   inline TBlob AsDataBlob(const dmlc::Row<uint64_t>& row) {
     const real_t* ptr = row.value;
-    TShape shape(mshadow::Shape1(row.length));
+    mxnet::TShape shape(mshadow::Shape1(row.length));
     return TBlob((real_t*) ptr, shape, cpu::kDevMask);  // NOLINT(*)
   }
 
   inline TBlob AsIdxBlob(const dmlc::Row<uint64_t>& row) {
     const uint64_t* ptr = row.index;
-    TShape shape(mshadow::Shape1(row.length));
+    mxnet::TShape shape(mshadow::Shape1(row.length));
     return TBlob((int64_t*) ptr, shape, cpu::kDevMask, mshadow::kInt64);  // NOLINT(*)
   }
 
diff --git a/src/io/iter_mnist.cc b/src/io/iter_mnist.cc
index 139cf47d63ed..0163a62400f7 100644
--- a/src/io/iter_mnist.cc
+++ b/src/io/iter_mnist.cc
@@ -104,7 +104,7 @@ class MNISTIter: public IIterator<TBlobBatch> {
     out_.batch_size = param_.batch_size;
     if (param_.shuffle) this->Shuffle();
     if (param_.silent == 0) {
-      TShape s;
+      mxnet::TShape s;
       s = batch_data_.shape_;
       if (param_.flat) {
         LOG(INFO) << "MNISTIter: load " << (unsigned)img_.size(0) << " images, shuffle="
diff --git a/src/io/iter_sparse.h b/src/io/iter_sparse.h
index beaf5c682998..22b1836be419 100644
--- a/src/io/iter_sparse.h
+++ b/src/io/iter_sparse.h
@@ -38,7 +38,7 @@ class SparseIIterator : public IIterator<DType> {
   /*! \brief storage type of the data or label */
   virtual const NDArrayStorageType GetStorageType(bool is_data) const = 0;
   /*! \brief shape of the data or label */
-  virtual const TShape GetShape(bool is_data) const = 0;
+  virtual const mxnet::TShape GetShape(bool is_data) const = 0;
 };  // class SparseIIterator
 
 }  // namespace mxnet
diff --git a/src/io/iter_sparse_batchloader.h b/src/io/iter_sparse_batchloader.h
index 398d6e00fe7b..17c509a0f56b 100644
--- a/src/io/iter_sparse_batchloader.h
+++ b/src/io/iter_sparse_batchloader.h
@@ -108,14 +108,14 @@ class SparseBatchLoader : public BatchLoader, public SparseIIterator<TBlobBatch>
     return sparse_base_->GetStorageType(is_data);
   }
 
-  virtual const TShape GetShape(bool is_data) const {
-    TShape inst_shape = sparse_base_->GetShape(is_data);
+  virtual const mxnet::TShape GetShape(bool is_data) const {
+    mxnet::TShape inst_shape = sparse_base_->GetShape(is_data);
     std::vector<index_t> shape_vec;
     shape_vec.push_back(param_.batch_size);
     for (index_t dim = 0; dim < inst_shape.ndim(); ++dim) {
       shape_vec.push_back(inst_shape[dim]);
     }
-    return TShape(shape_vec.begin(), shape_vec.end());
+    return mxnet::TShape(shape_vec.begin(), shape_vec.end());
   }
 
  private:
@@ -186,7 +186,7 @@ class SparseBatchLoader : public BatchLoader, public SparseIIterator<TBlobBatch>
     // allocate buffer
     for (size_t i = 0; i < num_arrays; ++i) {
       // init object attributes
-      TShape dst_shape(mshadow::Shape1(buff_sizes[i]));
+      mxnet::TShape dst_shape(mshadow::Shape1(buff_sizes[i]));
       data_[i].resize(mshadow::Shape1(buff_sizes[i]), dtypes_[i]);
       CHECK(data_[i].dptr_ != nullptr);
     }
diff --git a/src/io/iter_sparse_prefetcher.h b/src/io/iter_sparse_prefetcher.h
index 3908f9bd3826..3f06052b0292 100644
--- a/src/io/iter_sparse_prefetcher.h
+++ b/src/io/iter_sparse_prefetcher.h
@@ -134,7 +134,7 @@ class SparsePrefetcherIter : public PrefetcherIter {
     return sparse_loader_->GetStorageType(is_data);
   }
 
-  virtual const TShape GetShape(bool is_data) const {
+  virtual const mxnet::TShape GetShape(bool is_data) const {
     return sparse_loader_->GetShape(is_data);
   }
 
diff --git a/src/kvstore/comm.h b/src/kvstore/comm.h
index 08f6155cb5b4..88e363b5de7d 100644
--- a/src/kvstore/comm.h
+++ b/src/kvstore/comm.h
@@ -50,7 +50,7 @@ class Comm {
    * \brief init key with the data shape and storage shape
    */
   virtual void Init(int key, const NDArrayStorageType stype,
-                    const TShape& shape, int dtype = mshadow::kFloat32) = 0;
+                    const mxnet::TShape& shape, int dtype = mshadow::kFloat32) = 0;
   /**
    * \brief returns src[0] + .. + src[src.size()-1]
    */
@@ -110,7 +110,7 @@ class CommCPU : public Comm {
   }
   virtual ~CommCPU() { }
 
-  void Init(int key, const NDArrayStorageType stype, const TShape& shape,
+  void Init(int key, const NDArrayStorageType stype, const mxnet::TShape& shape,
             int type = mshadow::kFloat32) override {
     // Delayed allocation - the dense merged buffer might not be used at all if push()
     // only sees sparse arrays
@@ -456,7 +456,7 @@ class CommDevice : public Comm {
 
   virtual ~CommDevice() { }
 
-  void Init(int key, const NDArrayStorageType stype, const TShape& shape,
+  void Init(int key, const NDArrayStorageType stype, const mxnet::TShape& shape,
             int dtype = mshadow::kFloat32) override {
     sorted_key_attrs_.emplace_back(key, shape, dtype);
     inited_ = false;
@@ -568,9 +568,9 @@ class CommDevice : public Comm {
                                   false, buf.merged.dtype());
         buf.residual[i] = 0;
         int64_t small_size = gc_->GetCompressedSize(buf.merged.shape().Size());
-        buf.compressed_recv_buf[i] = NDArray(TShape{small_size}, buf.merged.ctx(),
+        buf.compressed_recv_buf[i] = NDArray(mxnet::TShape{small_size}, buf.merged.ctx(),
                                         false, buf.merged.dtype());
-        buf.compressed_send_buf[i] = NDArray(TShape{small_size}, src[i].ctx(),
+        buf.compressed_send_buf[i] = NDArray(mxnet::TShape{small_size}, src[i].ctx(),
                                         false, buf.merged.dtype());
       }
     }
@@ -673,7 +673,7 @@ class CommDevice : public Comm {
     }
   }
 
-  using KeyAttrs = std::tuple<int, TShape, int>;
+  using KeyAttrs = std::tuple<int, mxnet::TShape, int>;
   // try to allocate buff on device evenly
   void InitMergeBuffer(const std::vector<Context>& devs) {
     std::sort(sorted_key_attrs_.begin(), sorted_key_attrs_.end(), [](
@@ -688,7 +688,7 @@ class CommDevice : public Comm {
 
     for (auto& sorted_key_attr : sorted_key_attrs_) {
       const int key  = std::get<0>(sorted_key_attr);
-      const TShape& shape = std::get<1>(sorted_key_attr);
+      const mxnet::TShape& shape = std::get<1>(sorted_key_attr);
       const int type = std::get<2>(sorted_key_attr);
       auto& buf = merge_buf_[key];
       Context ctx;
diff --git a/src/kvstore/comm_tree.h b/src/kvstore/comm_tree.h
index 11d99c021917..11ca2d6528be 100644
--- a/src/kvstore/comm_tree.h
+++ b/src/kvstore/comm_tree.h
@@ -58,7 +58,7 @@ class CommDeviceTree : public CommDevice {
 
   virtual ~CommDeviceTree() { }
 
-  void Init(int key, const NDArrayStorageType stype, const TShape& shape,
+  void Init(int key, const NDArrayStorageType stype, const mxnet::TShape& shape,
             int dtype = mshadow::kFloat32) override {
     tree_sorted_key_attrs_.emplace_back(key, shape, dtype);
     sorted_key_attrs_.emplace_back(key, shape, dtype);
@@ -385,7 +385,7 @@ class CommDeviceTree : public CommDevice {
 #endif
   }
 
-  using KeyAttrs = std::tuple<int, TShape, int>;
+  using KeyAttrs = std::tuple<int, mxnet::TShape, int>;
   // try to allocate buff on device evenly
   void InitMergeBufferTree() {
     LOG(INFO) << "Using Tree";
@@ -402,7 +402,7 @@ class CommDeviceTree : public CommDevice {
 
     for (auto& tree_sorted_key_attr : tree_sorted_key_attrs_) {
       const int key  = std::get<0>(tree_sorted_key_attr);
-      const TShape& shape = std::get<1>(tree_sorted_key_attr);
+      const mxnet::TShape& shape = std::get<1>(tree_sorted_key_attr);
       const int type = std::get<2>(tree_sorted_key_attr);
 
       if (key_dist.find(shape.Size()) == key_dist.end())
@@ -444,7 +444,7 @@ class CommDeviceTree : public CommDevice {
 
         // buf.merged enforces that we only visit each GPU once
         if (buf.merged.empty()) {
-          TShape shape_copy = shape;
+          mxnet::TShape shape_copy = shape;
           int total_size = shape.Size();
           unsigned first_size = shape[0];
           if (total_size > gpuarray_bound_ && first_size >= 2*devs_.size()) {
diff --git a/src/kvstore/kvstore_dist.h b/src/kvstore/kvstore_dist.h
index 23fbf67474ee..9fe41c51a2c6 100644
--- a/src/kvstore/kvstore_dist.h
+++ b/src/kvstore/kvstore_dist.h
@@ -383,8 +383,9 @@ class KVStoreDist : public KVStoreLocal {
 
     // Init the small buffer and residual_ buffer for quantize
     if (small_buf.is_none()) {
-      small_buf = NDArray(TShape{pskv.size}, comm_buf.ctx(), false, dtype);
-      res_buf = NDArray(TShape{static_cast<int64_t>(original_size)}, comm_buf.ctx(), false, dtype);
+      small_buf = NDArray(mxnet::TShape{pskv.size}, comm_buf.ctx(), false, dtype);
+      res_buf = NDArray(mxnet::TShape{static_cast<int64_t>(original_size)},
+                        comm_buf.ctx(), false, dtype);
       res_buf = 0;
     }
     gradient_compression_->Quantize(comm_buf, &small_buf, &res_buf, priority);
diff --git a/src/kvstore/kvstore_dist_server.h b/src/kvstore/kvstore_dist_server.h
index 372b58dbbf3d..0cb1a11e3fcc 100644
--- a/src/kvstore/kvstore_dist_server.h
+++ b/src/kvstore/kvstore_dist_server.h
@@ -459,7 +459,7 @@ class KVStoreDistServer {
     auto unit_len = req_data.lens[1] / num_bytes;
     CHECK_GT(unit_len, 0);
     size_t ds[] = {num_rows, (size_t) unit_len};
-    TShape dshape(ds, ds + 2);
+    mxnet::TShape dshape(ds, ds + 2);
     CHECK_EQ(req_data.vals.size(), num_rows * unit_len * num_bytes);
     TBlob recv_blob;
     MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
@@ -546,7 +546,7 @@ class KVStoreDistServer {
           // data
           TBlob idx_blob(indices.data(), mshadow::Shape1(num_rows), cpu::kDevMask);
           size_t ds[] = {(size_t) num_rows, (size_t) unit_len};
-          TShape dshape(ds, ds + 2);
+          mxnet::TShape dshape(ds, ds + 2);
           TBlob recv_blob;
           MSHADOW_REAL_TYPE_SWITCH(type.dtype, DType, {
             recv_blob = TBlob(reinterpret_cast<DType*>(req_data.vals.data()),
@@ -620,12 +620,12 @@ class KVStoreDistServer {
       auto& stored = store_[key];
 
       size_t ds[] = {(size_t)req_data.lens[1] / mshadow::mshadow_sizeof(type.dtype)};
-      TShape dshape(ds, ds + 1);
+      mxnet::TShape dshape(ds, ds + 1);
       TBlob recv_blob(reinterpret_cast<real_t*>(req_data.vals.data()), dshape, cpu::kDevMask);
       NDArray recved = NDArray(recv_blob, 0);
 
       NDArray decomp_buf = decomp_buf_[key];
-      dshape = TShape{(int64_t) original_size};
+      dshape = mxnet::TShape{(int64_t) original_size};
 
       if (decomp_buf.is_none()) {
         decomp_buf = NDArray(dshape, Context());
@@ -684,7 +684,7 @@ class KVStoreDistServer {
     // the operators with \a NDArray are actually finished
     if (req_meta.push) {
       size_t ds[] = {(size_t) req_data.lens[0] / mshadow::mshadow_sizeof(type.dtype)};
-      TShape dshape(ds, ds + 1);
+      mxnet::TShape dshape(ds, ds + 1);
       TBlob recv_blob;
       MSHADOW_REAL_TYPE_SWITCH(type.dtype, DType, {
         recv_blob = TBlob(reinterpret_cast<DType*>(req_data.vals.data()), dshape, cpu::kDevMask);
diff --git a/src/kvstore/kvstore_nccl.h b/src/kvstore/kvstore_nccl.h
index a4ba533917b8..0c1411002e7f 100644
--- a/src/kvstore/kvstore_nccl.h
+++ b/src/kvstore/kvstore_nccl.h
@@ -443,7 +443,7 @@ class KVStoreNCCL : public KVStoreLocal {
   }
 
   // Initialize single key
-  void InitKey(int key, const NDArrayStorageType stype, const TShape& shape,
+  void InitKey(int key, const NDArrayStorageType stype, const mxnet::TShape& shape,
             int dtype = mshadow::kFloat32) {
     if (stype == kDefaultStorage) {
       key_attrs_.push_back(std::make_tuple(key, shape, dtype));
@@ -492,11 +492,11 @@ class KVStoreNCCL : public KVStoreLocal {
     }
   }
 
-  using KeyAttrs = std::tuple<int, TShape, int>;
+  using KeyAttrs = std::tuple<int, mxnet::TShape, int>;
   void InitMergeBuffer(const std::vector<Context>& devs) {
     for (size_t i = 0; i < key_attrs_.size(); ++i) {
       int key  = std::get<0>(key_attrs_[i]);
-      TShape s = std::get<1>(key_attrs_[i]);
+      mxnet::TShape s = std::get<1>(key_attrs_[i]);
       int type = std::get<2>(key_attrs_[i]);
       auto& buf = merge_buf_[key];
       // always use devs[0] as root
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index 648f9584618c..b09d38aa1863 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -50,9 +50,9 @@ DMLC_REGISTRY_ENABLE(::mxnet::NDArrayFunctionReg);
 
 namespace mxnet {
 
-NDArray::NDArray(const NDArrayStorageType stype, const TShape &shape, Context ctx,
+NDArray::NDArray(const NDArrayStorageType stype, const mxnet::TShape &shape, Context ctx,
     bool delay_alloc, int dtype, std::vector<int> aux_types,
-    std::vector<TShape> aux_shapes, TShape storage_shape) : shape_(shape),
+    mxnet::ShapeVector aux_shapes, mxnet::TShape storage_shape) : shape_(shape),
   dtype_(dtype), storage_type_(stype), entry_({nullptr, 0, 0}) {
   // Assign default aux types if not given
   if (aux_types.size() == 0
@@ -70,10 +70,10 @@ NDArray::NDArray(const NDArrayStorageType stype, const TShape &shape, Context ct
   if (aux_shapes.size() == 0
       && stype != kDefaultStorage) {
     if (stype == kRowSparseStorage) {
-      aux_shapes = {TShape(mshadow::Shape1(0))};
+      aux_shapes = {mxnet::TShape(mshadow::Shape1(0))};
     } else if (stype == kCSRStorage) {
       // aux shapes for indptr and indices
-      aux_shapes = {TShape(mshadow::Shape1(0)), TShape(mshadow::Shape1(0))};
+      aux_shapes = {mxnet::TShape(mshadow::Shape1(0)), mxnet::TShape(mshadow::Shape1(0))};
     } else {
       LOG(FATAL) << "Unknown storage type " << stype;
     }
@@ -129,7 +129,7 @@ NDArray::Chunk::~Chunk() {
   }, shandle.ctx, var);
 }
 
-void NDArray::Chunk::CheckAndAllocData(const TShape &shape, int dtype) {
+void NDArray::Chunk::CheckAndAllocData(const mxnet::TShape &shape, int dtype) {
   CHECK_NE(aux_shapes.size(), 0)
       << "data is expected to be allocated after aux_data";
   auto dbytes = shape.Size() * mshadow::mshadow_sizeof(dtype);
@@ -171,7 +171,7 @@ nnvm::Symbol NDArray::get_autograd_symbol() const {
 NDArray::NDArray(mkldnn::memory::primitive_desc mem_pd)
     : storage_type_(kDefaultStorage), entry_({nullptr, 0, 0}) {
   auto mem_desc = mem_pd.desc();
-  shape_ = TShape(mem_desc.data.dims, mem_desc.data.dims + mem_desc.data.ndims);
+  shape_ = mxnet::TShape(mem_desc.data.dims, mem_desc.data.dims + mem_desc.data.ndims);
   dtype_ = get_mxnet_type(mem_desc.data.data_type);
   ptr_ = std::make_shared<Chunk>(shape_, Context::CPU(), true, dtype_);
   ptr_->CheckAndAlloc(mem_pd.get_size());
@@ -182,7 +182,7 @@ NDArray::NDArray(const std::shared_ptr<mkldnn::memory> &mkldnn_mem)
     : storage_type_(kDefaultStorage), entry_({nullptr, 0, 0}) {
   auto mem_pd = mkldnn_mem->get_primitive_desc();
   auto mem_desc = mem_pd.desc();
-  shape_ = TShape(mem_desc.data.dims, mem_desc.data.dims + mem_desc.data.ndims);
+  shape_ = mxnet::TShape(mem_desc.data.dims, mem_desc.data.dims + mem_desc.data.ndims);
   dtype_ = get_mxnet_type(mem_desc.data.data_type);
   ptr_ = std::make_shared<Chunk>(shape_, Context::CPU(), true, dtype_);
   ptr_->shandle.dptr = mkldnn_mem->get_data_handle();
@@ -192,7 +192,7 @@ NDArray::NDArray(const std::shared_ptr<mkldnn::memory> &mkldnn_mem)
   ptr_->static_data = true;
 }
 
-NDArray NDArray::MKLDNNDataReshape(const TShape &shape) const {
+NDArray NDArray::MKLDNNDataReshape(const mxnet::TShape &shape) const {
   CHECK(!is_none()) << "NDArray is not initialized";
   CHECK_GE(shape_.Size(), shape.Size())
     << "NDArray.Reshape: target shape size is larger current shape";
@@ -231,7 +231,7 @@ NDArray NDArray::MKLDNNDataReshape(const TShape &shape) const {
 
 #endif
 
-NDArray NDArray::Reshape(const TShape &shape) const {
+NDArray NDArray::Reshape(const mxnet::TShape &shape) const {
   CHECK(!is_none()) << "NDArray is not initialized";
   CHECK_GE(shape_.Size(), shape.Size())
     << "NDArray.Reshape: target shape size is larger current shape";
@@ -246,7 +246,7 @@ NDArray NDArray::Reshape(const TShape &shape) const {
   return ret;
 }
 
-NDArray NDArray::ReshapeWithRecord(const TShape &shape) {
+NDArray NDArray::ReshapeWithRecord(const mxnet::TShape &shape) {
   NDArray ret = this->Reshape(shape);
   if (!Imperative::Get()->is_recording()) return ret;
 
@@ -300,7 +300,7 @@ NDArray NDArray::At(index_t idx) const {
       << "Storage type " << storage_type() << " doesn't support At()";
   NDArray ret = this->Slice(idx, idx+1);
   if (shape_.ndim() > 1) {
-    return ret.Reshape(TShape(shape_.data()+1, shape_.data()+shape_.ndim()));
+    return ret.Reshape(mxnet::TShape(shape_.data()+1, shape_.data()+shape_.ndim()));
   } else {
     return ret;
   }
@@ -311,7 +311,7 @@ NDArray NDArray::AtWithRecord(index_t idx) {
       << "Storage type " << storage_type() << " doesn't support At()";
   NDArray ret = this->SliceWithRecord(idx, idx+1);
   if (shape_.ndim() > 1) {
-    return ret.ReshapeWithRecord(TShape(shape_.data()+1, shape_.data()+shape_.ndim()));
+    return ret.ReshapeWithRecord(mxnet::TShape(shape_.data()+1, shape_.data()+shape_.ndim()));
   } else {
     return ret;
   }
@@ -325,13 +325,13 @@ NDArray NDArray::aux_ndarray(size_t i) const {
   CHECK_NE(storage_type(), kDefaultStorage);
   CHECK(i < ptr_->aux_shapes.size());
   // create a delay_alloc default ndarray as output
-  NDArray ret(TShape(), ctx(), true, aux_type(i));
+  NDArray ret(mxnet::TShape(), ctx(), true, aux_type(i));
   ret.SyncCopyFromNDArray(*this, i);
   return ret;
 }
 
 NDArray NDArray::data_ndarray() const {
-  NDArray ret(TShape(), ctx(), true, dtype_);
+  NDArray ret(mxnet::TShape(), ctx(), true, dtype_);
   ret.SyncCopyFromNDArray(*this);
   return ret;
 }
@@ -454,7 +454,7 @@ void NDArray::Chunk::MKLDNNDataReorder(const mkldnn::memory::primitive_desc &pd)
   mkl_mem_.reset(new MKLDNNMemory(pd, shandle.dptr));
 }
 
-void NDArray::Chunk::SetMKLMem(const TShape &shape, int dtype) {
+void NDArray::Chunk::SetMKLMem(const mxnet::TShape &shape, int dtype) {
   // The shape of the array and the one of the MKL memory may mismatch.
   // For example, if the array stores parameters, the MKL memory may store data
   // in 5 dimensions while the NDArray stores data in 4 dimensions.
@@ -549,7 +549,7 @@ const mkldnn::memory *NDArray::GetMKLDNNDataReorder(
     // If they have different shapes, we need to reshape the array first.
     // Since this method will only be used inside an operator, we can call
     // MKLDNNDataReshape to reshape an array.
-    TShape required_shape(desc2.data.ndims);
+    mxnet::TShape required_shape(desc2.data.ndims);
     for (int i = 0; i < desc2.data.ndims; i++)
       required_shape[i] = desc2.data.dims[i];
     NDArray reshaped = MKLDNNDataReshape(required_shape);
@@ -575,7 +575,7 @@ NDArray NDArray::Reorder2Default() const {
 
   // create new ndarray from  mkldnn layout
   mkldnn::memory::desc from_desc = ptr_->mkl_mem_->GetPrimitiveDesc().desc();
-  TShape tshape(from_desc.data.ndims);
+  mxnet::TShape tshape(from_desc.data.ndims);
   for (int i = 0; i < from_desc.data.ndims; i++) tshape[i] = from_desc.data.dims[i];
   NDArray ret(tshape, ctx(), false, dtype());
   mkldnn::memory::primitive_desc def_pd = ptr_->mkl_mem_->GetPrimitiveDesc(format);
@@ -737,7 +737,7 @@ void NDArray::UpdateMKLDNNMemDesc(mkldnn::memory::format format) {
 
 void NDArray::SetTBlob() const {
   CHECK(ptr_ != nullptr);
-  TShape shape = shape_;
+  mxnet::TShape shape = shape_;
   char *dptr = static_cast<char*>(ptr_->shandle.dptr);
   auto stype = storage_type();
   if (stype == kDefaultStorage) {
@@ -1161,7 +1161,7 @@ void CopyFromToImpl(const NDArray& from, const NDArray& to,
     if (from_stype == to_stype) {
       casted_nd = from;  // same stype, no need to cast from
     } else {  // different stypes on different ctx needs an temporary casted_nd
-      const TShape& shape = from.shape();
+      const mxnet::TShape& shape = from.shape();
       if (to_stype == kDefaultStorage) {
         casted_nd = NDArray(shape, from_ctx);
       } else {
@@ -1567,7 +1567,7 @@ NDArray &NDArray::operator/=(const real_t &src) {
   return ScalarOpApply<ndarray::Div>(this, src);
 }
 
-/* magic number for ndarray version 1, with int64_t TShape */
+/* magic number for ndarray version 1, with int64_t mxnet::TShape */
 static const uint32_t NDARRAY_V1_MAGIC = 0xF993fac8;
 
 /* magic number for ndarray version 2, with storage type */
@@ -1643,14 +1643,14 @@ void NDArray::Save(dmlc::Stream *strm) const {
   }
 }
 
-bool LegacyTShapeLoad(dmlc::Stream *strm, TShape *shape, const uint32_t magic) {
+bool LegacyTShapeLoad(dmlc::Stream *strm, mxnet::TShape *shape, const uint32_t magic) {
   switch (magic) {
     case NDARRAY_V1_MAGIC:
       return shape->Load(strm);
     default:
-      // meet legacy TShape, magic is ndim here
+      // meet legacy mxnet::TShape, magic is ndim here
       uint32_t ndim = magic;
-      *shape = TShape(ndim);
+      *shape = mxnet::TShape(ndim);
       std::vector<uint32_t> buffer(ndim);
       size_t nread = ndim * sizeof(uint32_t);
       if (strm->Read(buffer.data(), nread) != nread) return false;
@@ -1661,7 +1661,7 @@ bool LegacyTShapeLoad(dmlc::Stream *strm, TShape *shape, const uint32_t magic) {
 
 bool NDArray::LegacyLoad(dmlc::Stream *strm, const uint32_t magic) {
   // load shape
-  TShape shape;
+  mxnet::TShape shape;
   if (!LegacyTShapeLoad(strm, &shape, magic)) return false;
   if (shape.ndim() == 0) {
     *this = NDArray(); return true;
@@ -1703,13 +1703,13 @@ bool NDArray::Load(dmlc::Stream *strm) {
   const int32_t nad = num_aux_data(static_cast<NDArrayStorageType>(stype));
 
   // load storage shape
-  TShape sshape;
+  mxnet::TShape sshape;
   if (nad > 0) {
     if (!sshape.Load(strm)) return false;
   }
 
   // load shape
-  TShape shape;
+  mxnet::TShape shape;
   if (!shape.Load(strm)) return false;
   if (shape.ndim() == 0) {
     *this = NDArray(); return true;
@@ -1725,7 +1725,7 @@ bool NDArray::Load(dmlc::Stream *strm) {
 
   // load aux_types and aux_shapes
   std::vector<int32_t> aux_types;
-  std::vector<TShape> aux_shapes;
+  mxnet::ShapeVector aux_shapes;
   if (nad > 0) {
     aux_types.resize(nad);
     aux_shapes.resize(nad);
@@ -1819,7 +1819,7 @@ NDArray NDArray::Copy(Context ctx) const {
 }
 
 void NDArray::SyncCopyFromCPU(const void *data, size_t size) const {
-  TShape dshape = this->shape();
+  mxnet::TShape dshape = this->shape();
   CHECK_EQ(dshape.Size(), size)
       << "Memory size do not match";
   TBlob src((void*)data, dshape, cpu::kDevMask, this->dtype_, 0); // NOLINT(*)
@@ -1876,7 +1876,7 @@ void NDArray::SyncCopyFromNDArray(const NDArray& src, int i, int j) {
   // get or create a dst tblob for copying src to it
   // if dst is a dense format and has not been allocated, allocate memory for it
   // else if dst is not initialized, allocate corresponding data blob for it
-  auto get_dst_data = [&](const TShape& src_shape) {
+  auto get_dst_data = [&](const mxnet::TShape& src_shape) {
     if (this->storage_type() == kDefaultStorage) {
       this->ReshapeAndAlloc(src_shape);
     } else if (!this->storage_initialized()) {
@@ -1950,7 +1950,7 @@ void NDArray::SyncCopyFromNDArray(const NDArray& src, int i, int j) {
 }
 
 void NDArray::SyncCopyToCPU(void *data, size_t size) const {
-  TShape dshape = this->shape();
+  mxnet::TShape dshape = this->shape();
   CHECK_EQ(dshape.Size(), size)
       << "Memory size do not match";
   TBlob dst(data, dshape, cpu::kDevMask, this->dtype_, 0); // NOLINT(*)
@@ -2056,7 +2056,7 @@ void CopyFromToSimple(
 NNVM_REGISTER_OP(_copyto)
 .set_num_inputs(1)
 .set_num_outputs(1)
-.set_attr<nnvm::FInferShape>("FInferShape", op::ElemwiseShape<1, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", op::ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInferType>("FInferType",
   [](const NodeAttrs& attrs, std::vector<int> *in_type, std::vector<int> *out_type) {
     return !op::type_is_none((*in_type)[0]) && !op::type_is_none((*out_type)[0]);
diff --git a/src/ndarray/ndarray_function.h b/src/ndarray/ndarray_function.h
index 97c23b67592a..70b626dbb9b7 100644
--- a/src/ndarray/ndarray_function.h
+++ b/src/ndarray/ndarray_function.h
@@ -38,7 +38,7 @@ namespace mxnet {
 /*! \brief namespace to support all possible Ndarray operator */
 namespace ndarray {
 struct BinaryBase {
-  inline static TShape GetShape(const TShape &lshape, const TShape &rshape) {
+  inline static mxnet::TShape GetShape(const mxnet::TShape &lshape, const mxnet::TShape &rshape) {
     CHECK(lshape == rshape) << "operands shape mismatch";
     CHECK(lshape.ndim() != 0) << "source operand have zero dimension shape";
     return lshape;
@@ -94,7 +94,7 @@ struct ClipMax : public BinaryBase {
 
 
 struct OneHotEncode {
-  inline static TShape GetShape(const TShape &index, const TShape &proptype) {
+  inline static mxnet::TShape GetShape(const mxnet::TShape &index, const mxnet::TShape &proptype) {
     CHECK(index.ndim() == 1 && proptype.ndim() == 2) << "OneHotEncode only support 1d index.";
     CHECK_EQ(index[0], proptype[0]) << "OneHotEncode shape inconsistent";
     return proptype;
@@ -102,7 +102,7 @@ struct OneHotEncode {
 };
 
 struct MatChooseRowElem {
-  inline static TShape GetShape(const TShape &lshape, const TShape &rshape) {
+  inline static mxnet::TShape GetShape(const mxnet::TShape &lshape, const mxnet::TShape &rshape) {
     CHECK(lshape.ndim() == 2 && rshape.ndim() == 1)
         << "choose_row_element only support 2D Matrix and 1D index";
     CHECK_EQ(lshape[0], rshape[0]) << "choose_row_element index and matrix shape mismatch";
@@ -111,7 +111,9 @@ struct MatChooseRowElem {
 };
 
 struct MatFillRowElem {
-  inline static TShape GetShape(const TShape &lshape, const TShape &mshape, const TShape &rshape) {
+  inline static mxnet::TShape GetShape(const mxnet::TShape &lshape,
+                                       const mxnet::TShape &mshape,
+                                       const mxnet::TShape &rshape) {
     CHECK(lshape.ndim() == 2 && mshape.ndim() == 1 && rshape.ndim() == 1)
         << "fill_row_element only support 2D Matrix, 1D value and 1D index";
     CHECK((lshape[0] == mshape[0]) && (mshape[0] == rshape[0]))
diff --git a/src/nnvm/gradient.cc b/src/nnvm/gradient.cc
new file mode 100644
index 000000000000..4927191a5964
--- /dev/null
+++ b/src/nnvm/gradient.cc
@@ -0,0 +1,281 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2016 by Contributors
+ * \file gradients.cc
+ * \brief Passes that takes gradient of the graph
+ * This code code was modified based on mxnet codebase by Min Lin
+ */
+#include <nnvm/pass.h>
+#include <nnvm/op_attr_types.h>
+#include <mxnet/base.h>
+#include <algorithm>
+#include <functional>
+
+namespace nnvm {
+namespace pass {
+namespace {
+
+// default aggregate gradient function
+// require operator zeros and elemwise_sum to be presented.
+NodeEntry DefaultAggregateGradient(std::vector<NodeEntry>&& v) {
+  if (v.size() == 1) {
+    return std::move(v[0]);
+  } else if (v.size() == 0) {
+    NodePtr zero_node = Node::Create();
+    zero_node->attrs.op = Op::Get("zeros");
+    zero_node->attrs.name = "zero_grad";
+    zero_node->attrs.op->attr_parser(&(zero_node->attrs));
+    return NodeEntry{zero_node, 0, 0};
+  } else {
+    NodePtr sum_node = Node::Create();
+    sum_node->attrs.op = Op::Get("elemwise_sum");
+    sum_node->inputs = std::move(v);
+    sum_node->attrs.name = "grad_sum";
+    sum_node->attrs.dict["num_args"] = std::to_string(sum_node->inputs.size());
+    sum_node->attrs.op->attr_parser(&(sum_node->attrs));
+    return NodeEntry{sum_node, 0, 0};
+  }
+}
+
+bool CheckGradAllZero(const std::vector<NodeEntry>& grads,
+                      const std::vector<const Op*>& zero_ops) {
+  if (!grads.size() || !zero_ops.size()) return false;
+  for (const auto& g : grads) {
+    bool found = false;
+    for (const auto& op : zero_ops) {
+      if (g.node->op() == op) {
+        found = true;
+        break;
+      }
+    }
+    if (!found) return false;
+  }
+  return true;
+}
+
+// helper entry
+struct GradEntry {
+#ifdef _MSC_VER
+  NodeEntry sum = NodeEntry{nullptr, 0, 0};
+#else
+  NodeEntry sum{nullptr, 0, 0};
+#endif
+  std::vector<NodeEntry> grads;
+  bool need_attr_hint{true};
+};
+
+Graph Gradient(Graph src) {
+  using nnvm::FGradient;
+  using MirrorFun = std::function<int (const Node& node)>;
+  using AttrHintFun = std::function<NodeEntry (const NodeEntry& src, const NodeEntry &like)>;
+
+  CHECK_NE(src.attrs.count("grad_ys"), 0U)
+      << "Gradient require grad_ys to be presented.";
+  CHECK_NE(src.attrs.count("grad_ys_out_grad"), 0U)
+      << "Gradient require grad_ys_out_grad to be presented.";
+  CHECK_NE(src.attrs.count("grad_xs"), 0U)
+      << "Gradient require grad_xs to be presented.";
+  const std::vector<NodeEntry>& ys =
+      src.GetAttr<std::vector<NodeEntry> >("grad_ys");
+  const std::vector<NodeEntry>& ys_out_grad =
+      src.GetAttr<std::vector<NodeEntry> >("grad_ys_out_grad");
+  const std::vector<NodeEntry>& xs =
+      src.GetAttr<std::vector<NodeEntry> >("grad_xs");
+  using AggFun = std::function<NodeEntry (std::vector<NodeEntry>&& inputs)>;
+  AggFun agg_fun = DefaultAggregateGradient;
+  if (src.attrs.count("grad_aggregate_fun") != 0) {
+    agg_fun = src.GetAttr<AggFun>("grad_aggregate_fun");
+  }
+  MirrorFun mirror_fun = nullptr;
+  if (src.attrs.count("grad_mirror_fun") != 0) {
+    mirror_fun = src.GetAttr<MirrorFun>("grad_mirror_fun");
+  }
+  AttrHintFun attr_hint_fun = nullptr;
+  if (src.attrs.count("attr_hint_fun") != 0) {
+    attr_hint_fun = src.GetAttr<AttrHintFun>("attr_hint_fun");
+  }
+  std::vector<const Op*> zero_ops;
+  if (src.attrs.count("zero_ops") != 0) {
+    zero_ops = src.GetAttr<std::vector<const Op*> >("zero_ops");
+  }
+  const Op* copy_op = (src.attrs.count("copy_op") != 0) ?
+      Op::Get(src.GetAttr<std::string>("copy_op")) :
+      nullptr;
+
+  // topo sort
+  std::vector<NodePtr> topo_order;
+  std::unordered_map<Node*, std::vector<GradEntry> > output_grads;
+
+  DFSVisit(ys, [&](const NodePtr& node) {
+      if (output_grads.count(node.get()) == 0) {
+        output_grads[node.get()].resize(node->num_outputs());
+      }
+      topo_order.push_back(node);
+    });
+
+  CHECK_EQ(ys.size(), ys_out_grad.size());
+  for (size_t i = 0; i < ys.size(); ++i) {
+    NodeEntry ograd = ys_out_grad[i];
+    output_grads[ys[i].node.get()][ys[i].index].grads = { ograd };
+  }
+
+  // Check that all xs are reachable from ys
+  for (size_t i = 0; i < xs.size(); ++i) {
+    CHECK(output_grads.find(xs[i].node.get()) != output_grads.end())
+        << "Cannot differentiate with respect to the " << i+1 << "-th variable "
+        << "because it is unreachable from the outputs.";
+  }
+
+  // construct mirror reduece memory strategy if needed
+  std::unordered_map<Node*, NodePtr> mirror_map;
+  if (mirror_fun != nullptr) {
+    for (const NodePtr& n : topo_order) {
+      if (mirror_fun(*n)) {
+        NodePtr new_node = Node::Create();
+        *new_node = *n;
+        new_node->attrs.name += "_mirror";
+        for (auto& e : new_node->inputs) {
+          e.node = mirror_map.at(e.node.get());
+        }
+        for (auto& n : new_node->control_deps) {
+          n = mirror_map.at(n.get());
+        }
+        mirror_map[n.get()] = std::move(new_node);
+      } else {
+        mirror_map[n.get()] = n;
+      }
+    }
+  }
+
+  // traverse backward
+  static auto& grad_fun_map = Op::GetAttr<FGradient>("FGradient");
+  static auto& finfer_shape = Op::GetAttr<mxnet::FInferShape>("FInferShape");
+
+  std::vector<NodeEntry> out_agg_grads;
+  for (auto rit = topo_order.rbegin(); rit != topo_order.rend(); ++rit) {
+    const NodePtr& ptr = *rit;
+    if (ptr->is_variable()) continue;
+    out_agg_grads.clear();
+    auto& out_grad_vec = output_grads.at(ptr.get());
+    for (uint32_t i = 0; i < out_grad_vec.size(); ++i) {
+      GradEntry& e = out_grad_vec[i];
+      e.sum = agg_fun(std::move(e.grads));
+      if (e.need_attr_hint && attr_hint_fun != nullptr) {
+        e.sum = attr_hint_fun(e.sum, NodeEntry{ptr, 0, i});
+      }
+      out_agg_grads.push_back(e.sum);
+    }
+    if ((*rit)->inputs.size() != 0) {
+      NodePtr fwd_node = (mirror_map.size() == 0 ? ptr : mirror_map.at(ptr.get()));
+      std::vector<NodeEntry> input_grads;
+      if (grad_fun_map.count(ptr->op())) {
+        input_grads = grad_fun_map[ptr->op()](fwd_node, out_agg_grads);
+        CHECK_EQ((*rit)->inputs.size(), input_grads.size())
+            << "Gradient function not returning enough gradient";
+      } else if (CheckGradAllZero(out_agg_grads, zero_ops)) {
+        for (size_t i = 0; i < fwd_node->num_inputs(); ++i) {
+          std::ostringstream os;
+          if (1 == fwd_node->num_inputs()) {
+            os << fwd_node->attrs.name << "_backward";
+          } else {
+            os << fwd_node->attrs.name << "_in" << i << "_backward";
+          }
+          auto p = Node::Create();
+          p->attrs.op = zero_ops[0];
+          p->attrs.name = os.str();
+          p->inputs.push_back(fwd_node->inputs[i]);
+          p->control_deps.emplace_back(fwd_node);
+          if (p->op()->attr_parser != nullptr) {
+            p->op()->attr_parser(&(p->attrs));
+          }
+          input_grads.emplace_back(nnvm::NodeEntry{p, 0, 0});
+        }
+      } else {
+        LOG(FATAL) << "Operator " << fwd_node->op()->name << " is non-differentiable "
+                   << "because it didn't register FGradient attribute.";
+      }
+      auto git = input_grads.begin();
+      for (auto it = (*rit)->inputs.begin(); it != (*rit)->inputs.end(); ++it, ++git) {
+        auto& ge = output_grads[it->node.get()][it->index];
+        // if any of the backward op can do shape inference, the hint is not necessary.
+        if (finfer_shape.count(git->node->op())) {
+          ge.need_attr_hint = false;
+        }
+        ge.grads.emplace_back(std::move(*git));
+      }
+    }
+  }
+  // take out the xs' grads
+  Graph ret;
+  ret.outputs.resize(xs.size());
+  NodeEntryMap<std::pair<size_t, size_t> > unique_grads;
+  size_t counter = 0;
+  for (const NodeEntry& e : xs) {
+    GradEntry& entry = output_grads[e.node.get()][e.index];
+    // aggregate sum if there haven't been
+    if (entry.sum.node.get() == nullptr) {
+      entry.sum = agg_fun(std::move(entry.grads));
+      if (entry.need_attr_hint && attr_hint_fun != nullptr) {
+        entry.sum = attr_hint_fun(entry.sum, e);
+      }
+    }
+    if (copy_op != nullptr) {
+      auto kv = unique_grads.find(entry.sum);
+      if (kv == unique_grads.end()) {
+        unique_grads.emplace(std::move(entry.sum), std::make_pair(1, counter));
+      } else {
+        NodePtr copy_node = Node::Create();
+        std::ostringstream os;
+        os << entry.sum.node->attrs.name << "_" << kv->second.first << "_copy";
+        kv->second.first++;
+        copy_node->attrs.op = copy_op;
+        copy_node->attrs.name = os.str();
+        copy_node->inputs.emplace_back(entry.sum);
+        if (copy_node->attrs.op->attr_parser != nullptr) {
+            copy_node->attrs.op->attr_parser(&(copy_node->attrs));
+        }
+        unique_grads.emplace(NodeEntry{std::move(copy_node), 0, 0}, std::make_pair(1, counter));
+      }
+    } else {
+        ret.outputs[counter] = entry.sum;
+    }
+    ++counter;
+  }
+  if (copy_op != nullptr) {
+    for (const auto& kv : unique_grads) {
+      ret.outputs[kv.second.second] = kv.first;
+    }
+  }
+  return ret;
+}
+
+// register pass
+NNVM_REGISTER_PASS(MXGradient)
+.describe("Return a gradient graph of src.attrs[\"ys\"] wrt src.attrs[\"xs\"]")
+.set_body(Gradient)
+.set_change_graph(true)
+.depend_graph_attr("grad_ys")
+.depend_graph_attr("grad_xs")
+.depend_graph_attr("grad_ys_out_grad");
+
+}  // namespace
+}  // namespace pass
+}  // namespace nnvm
diff --git a/src/nnvm/graph_algorithm.h b/src/nnvm/graph_algorithm.h
new file mode 100644
index 000000000000..d1590c3b9846
--- /dev/null
+++ b/src/nnvm/graph_algorithm.h
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2016 by Contributors
+ * \file graph_algorithm.h
+ * \brief This header contains graph algorithms on StaticGraph.
+ *  It is used  compute informations such as whether two
+ *  operations can run in parallel, and helps allocation.
+*/
+#ifndef MXNET_NNVM_GRAPH_ALGORITHM_H_
+#define MXNET_NNVM_GRAPH_ALGORITHM_H_
+
+#include <nnvm/graph.h>
+#include <vector>
+
+namespace nnvm {
+namespace pass {
+
+/*!
+ * \brief Find best path in the DAG, with reward defined
+ *  by sum of reward of each node along the path.
+ * \param graph the original static graph.
+ * \param topo_order topo order of the nodes in the graph.
+ * \param node_reward the reward of each node.
+ * \param path the output path of nodes.
+ * \return the total reward of best path.
+ */
+inline uint32_t FindBestPath(
+    const IndexedGraph& graph,
+    const std::vector<uint32_t>& node_reward,
+    std::vector<uint32_t>* path) {
+  const uint32_t num_nodes = static_cast<uint32_t>(graph.num_nodes());
+  CHECK_EQ(num_nodes, node_reward.size());
+
+  std::vector<uint32_t> best_reward(node_reward.size(), 0);
+  std::vector<uint32_t> next_node(node_reward.size(), num_nodes);
+  uint32_t best_solution = 0, best_start_node = 0;
+
+  // traverse in reverse topo order
+  for (uint32_t i = static_cast<uint32_t>(graph.num_nodes()); i != 0; --i) {
+    const uint32_t nid = i - 1;
+    best_reward[nid] += node_reward[nid];
+    if (best_reward[nid] > best_solution) {
+      best_solution = best_reward[nid];
+      best_start_node = nid;
+    }
+    for (const auto& e : graph[nid].inputs) {
+      const uint32_t prev = e.node_id;
+      if (best_reward[nid] > best_reward[prev]) {
+        best_reward[prev] = best_reward[nid];
+        next_node[prev] = nid;
+      }
+    }
+  }
+  path->clear();
+  uint32_t reward = 0;
+  for (uint32_t nid = best_start_node; nid < num_nodes; nid = next_node[nid]) {
+    path->push_back(nid); reward += node_reward[nid];
+  }
+  CHECK_EQ(reward, best_solution);
+  return best_solution;
+}
+
+/*!
+ * \brief Color the nodes in the graph into index.
+ *  The coloring algorithm tries to assign node group
+ *  such that node in the same group cannot run in parallel.
+ *
+ * \param graph the original indexed graph.
+ * \param node_importance The importance of the node
+ * \param max_ncolor maximum number of colors allowed.
+ * \param color the color index of each of the node.
+ * \return the total number of colors.
+ */
+inline uint32_t ColorNodeGroup(
+    const IndexedGraph &graph,
+    std::vector<uint32_t> node_importance,
+    uint32_t max_ncolor,
+    std::vector<uint32_t> *color) {
+  CHECK_NE(max_ncolor, 0U);
+  CHECK_EQ(graph.num_nodes(), node_importance.size());
+
+  color->clear();
+  color->resize(graph.num_nodes(), max_ncolor);
+  uint32_t cindex;
+  // greedy algorithm, every time
+  // find a path with best reward and assign a new color
+  // All the nodes in the path cannot run in parallel.
+  for (cindex = 0; cindex < max_ncolor - 1; ++cindex) {
+    std::vector<uint32_t> path;
+    uint32_t reward = FindBestPath(graph, node_importance, &path);
+    if (reward == 0) break;
+    for (uint32_t nid : path) {
+      if (node_importance[nid] != 0) {
+        CHECK_EQ(color->at(nid), max_ncolor);
+        color->at(nid) = cindex;
+        // make the importance 0 after color is decided.
+        node_importance[nid] = 0;
+      }
+    }
+  }
+  // assign i for rest of the node
+  for (uint32_t i = 0; i < graph.num_nodes(); ++i) {
+    if (color->at(i) == max_ncolor) {
+      color->at(i) = cindex;
+    }
+  }
+  return cindex + 1;
+}
+
+}  // namespace pass
+}  // namespace nnvm
+
+#endif  // MXNET_NNVM_GRAPH_ALGORITHM_H_
diff --git a/src/nnvm/legacy_op_util.cc b/src/nnvm/legacy_op_util.cc
index 4ab777b6adb7..16ad0053e29a 100644
--- a/src/nnvm/legacy_op_util.cc
+++ b/src/nnvm/legacy_op_util.cc
@@ -217,12 +217,12 @@ bool OpPropInferAttr(const NodeAttrs& attrs,
 }
 
 bool OpPropInferShape(const NodeAttrs& attrs,
-                      std::vector<TShape> *iattr,
-                      std::vector<TShape> *oattr) {
+                      mxnet::ShapeVector *iattr,
+                      mxnet::ShapeVector *oattr) {
   auto finfer = [](const OperatorProperty* op,
-                   std::vector<TShape> *in,
-                   std::vector<TShape> *out,
-                   std::vector<TShape> *aux) {
+                   mxnet::ShapeVector *in,
+                   mxnet::ShapeVector *out,
+                   mxnet::ShapeVector *aux) {
     return op->InferShape(in, out, aux);
   };
   return OpPropInferAttr(attrs, iattr, oattr, finfer);
@@ -294,23 +294,23 @@ std::vector<std::pair<int, int> > OpPropInplaceOption(const NodeAttrs& attrs) {
 }
 
 std::vector<ResourceRequest> OpPropResourceRequest(const NodeAttrs& attrs) {
-  std::vector<TShape> ishape;
+  mxnet::ShapeVector ishape;
   auto& prop = nnvm::get<ParsedOpProp>(attrs.parsed);
   return prop.ptr->ForwardResource(ishape);
 }
 
 std::vector<ResourceRequest> OpBackResourceRequest(const NodeAttrs& attrs) {
-  std::vector<TShape> ishape;
+  mxnet::ShapeVector ishape;
   auto& prop = nnvm::get<ParsedOpProp>(attrs.parsed);
   return prop.ptr->BackwardResource(ishape);
 }
 
 OpStatePtr OpPropCreateLayerOp(const NodeAttrs& attrs,
                                Context ctx,
-                               const std::vector<TShape>& ishape,
+                               const mxnet::ShapeVector& ishape,
                                const std::vector<int>& itype) {
   auto& prop = nnvm::get<ParsedOpProp>(attrs.parsed);
-  std::vector<TShape> is(ishape.begin(), ishape.begin() + prop.arguments.size());
+  mxnet::ShapeVector is(ishape.begin(), ishape.begin() + prop.arguments.size());
   std::vector<int> it(itype.begin(), itype.begin() + prop.arguments.size());
   return OpStatePtr::Create<OperatorState>(prop.ptr->CreateOperatorEx(ctx, &is, &it),
                                            prop.ptr.get());
@@ -452,7 +452,7 @@ void RegisterLegacyOpProp() {
     op.set_attr<nnvm::FListInputNames>("FListInputNames", OpPropListInputNames);
     op.set_attr<nnvm::FListOutputNames>("FListOutputNames", OpPropListOutputNames);
     op.set_attr<nnvm::FNumVisibleOutputs>("FNumVisibleOutputs", OpPropNumVisibleOutputs);
-    op.set_attr<nnvm::FInferShape>("FInferShape", OpPropInferShape);
+    op.set_attr<mxnet::FInferShape>("FInferShape", OpPropInferShape);
     op.set_attr<nnvm::FInferType>("FInferType", OpPropInferType);
     op.set_attr<nnvm::FMutateInputs>("FMutateInputs", OpPropMutateInputs);
     op.set_attr<nnvm::FInplaceOption>("FInplaceOption", OpPropInplaceOption);
diff --git a/src/nnvm/plan_memory.cc b/src/nnvm/plan_memory.cc
new file mode 100644
index 000000000000..2b18f990c845
--- /dev/null
+++ b/src/nnvm/plan_memory.cc
@@ -0,0 +1,412 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2016 by Contributors
+ * \file plan_memory.cc
+ * \brief Assign memory tag to each of the data entries.
+ */
+#include <nnvm/graph.h>
+#include <nnvm/pass.h>
+#include <nnvm/graph_attr_types.h>
+#include <nnvm/op_attr_types.h>
+#include <nnvm/top/tensor.h>
+#include <mxnet/base.h>
+#include <memory>
+#include "graph_algorithm.h"
+
+namespace nnvm {
+namespace pass {
+
+namespace {
+  using namespace nnvm::top;
+// Return bytes of data flag.
+static int GetDTypeSize(int type_flag) {
+  switch (type_flag) {
+    case kUint8:
+    case kInt8:
+      return 1;
+    case kFloat16:
+    case kInt16:
+    case kUint16:
+      return 2;
+    case kFloat32:
+    case kInt32:
+    case kUint32:
+      return 4;
+    case kFloat64:
+    case kInt64:
+    case kUint64:
+      return 8;
+    default:
+      LOG(FATAL) << "unknown type_flag=" << type_flag;
+      return -1;
+  }
+}
+
+// simple graph based allocator.
+class GraphAllocator {
+ public:
+  // storage id equals integer.
+  using StorageID = int;
+
+  // bad storage id
+  static const StorageID kBadStorageID = -1;
+  // external storage id
+  static const StorageID kExternalStorageID = -2;
+  // dynamic storage id
+  static const StorageID kDynamicStorageID = -3;
+
+  // request a free storage
+  StorageID Request(int dev_id, int dtype, mxnet::TShape shape, uint32_t node_id) {
+    if (shape.ndim() == 0) return kBadStorageID;
+    // search memory block in [size / match_range_, size * match_range_)
+    // TODO(tqchen) add size of the dtype, assume 4 bytes for now
+    size_t size = shape.Size() * 4;
+    if (match_range_ == 0) return this->Alloc(dev_id, size);
+    auto begin = free_.lower_bound(size / match_range_);
+    auto mid = free_.lower_bound(size);
+    auto end = free_.upper_bound(size * match_range_);
+    // search for memory blocks larger than requested
+    for (auto it = mid; it != end; ++it) {
+      StorageEntry *e = it->second;
+      if (e->device_id != dev_id) continue;
+      if (node_color_.size() != 0 &&
+          node_color_[e->released_by_node] != node_color_[node_id]) continue;
+      // Use exect matching strategy
+      e->max_bytes = std::max(size, e->max_bytes);
+      // find a exact match, erase from map and return
+      free_.erase(it);
+      return e->id;
+    }
+    // then search for memory blocks smaller than requested space
+    for (auto it = mid; it != begin;) {
+      --it;
+      StorageEntry *e = it->second;
+      if (e->device_id != dev_id) continue;
+      if (node_color_.size() != 0 &&
+          node_color_[e->released_by_node] != node_color_[node_id]) continue;
+      // Use exect matching strategy
+      e->max_bytes = std::max(size, e->max_bytes);
+      // erase from map and return
+      free_.erase(it);
+      return e->id;
+    }
+    // cannot find anything return a new one.
+    return this->Alloc(dev_id, size);
+  }
+  // release a memory space.
+  void Release(StorageID id, uint32_t node_id) {
+    CHECK_NE(id, kBadStorageID);
+    if (id == kExternalStorageID || id == kDynamicStorageID) return;
+    StorageEntry *e = data_[id].get();
+    e->released_by_node = node_id;
+    free_.insert({e->max_bytes, e});
+  }
+
+  // totoal number of bytes allocated
+  size_t TotalAllocBytes() const {
+    size_t total = 0;
+    for (auto &p : data_) {
+      total += p->max_bytes;
+    }
+    return total;
+  }
+
+  // constructor
+  explicit GraphAllocator(const IndexedGraph* idx, const size_t match_range) : idx_(idx) {
+    this->Init(match_range, dmlc::GetEnv("NNVM_EXEC_NUM_TEMP", 1));
+  }
+
+ private:
+  // initialize the graph allocator
+  void Init(const size_t match_range, const uint32_t num_match_color) {
+    match_range_ = match_range;
+    num_match_color_ = num_match_color;
+    if (num_match_color_ > 1) {
+      std::vector<uint32_t> importance(idx_->num_nodes(), 0);
+      for (uint32_t nid = 0; nid < idx_->num_nodes(); ++nid) {
+        if ((*idx_)[nid].source->is_variable()) continue;
+        importance[nid] = 1;
+      }
+      num_match_color_ = pass::ColorNodeGroup(
+          *idx_, importance, num_match_color_, &node_color_);
+    }
+  }
+
+  StorageID Alloc(int dev_id, size_t size) {
+    StorageID id = static_cast<StorageID>(data_.size());
+    std::unique_ptr<StorageEntry> ptr(new StorageEntry());
+    ptr->id = id;
+    ptr->device_id = dev_id;
+    ptr->max_bytes = size;
+    data_.emplace_back(std::move(ptr));
+    return id;
+  }
+  // internal storage entry
+  struct StorageEntry {
+    // the id of the entry.
+    StorageID id;
+    // the device id of the storage.
+    int device_id;
+    // maximum size of storage requested.
+    size_t max_bytes{0};
+    // node index that released it last time
+    uint32_t released_by_node{0};
+  };
+  // scale used for rough match
+  size_t match_range_;
+  // whether use color based match algorithm
+  uint32_t num_match_color_{1};
+  // the size of each dtype
+  std::vector<size_t> dtype_size_dict_;
+  // free list of storage entry
+  std::multimap<size_t, StorageEntry*> free_;
+  // all the storage resources available
+  std::vector<std::unique_ptr<StorageEntry> > data_;
+  // color of nodes in the graph, used for auxiliary policy making.
+  std::vector<uint32_t> node_color_;
+  // internal indexed graph
+  const IndexedGraph* idx_;
+};
+
+/*
+ * Internal method to perform the memory allocation for a graph
+ * */
+size_t AllocMemory(const Graph& ret, const IndexedGraph& idx,
+                   const std::pair<uint32_t, uint32_t>& node_range,
+                   StorageVector* storage_ptr,
+                   std::vector<int>* storage_inplace_index_ptr,
+                   const std::vector<uint32_t>& entry_ref_count,
+                   GraphAllocator* allocator) {
+  static auto& finplace_option = Op::GetAttr<FInplaceOption>("FInplaceOption");
+  static auto& finplace_identity = Op::GetAttr<FInplaceIdentity>("FInplaceIdentity");
+  static auto& fignore_inputs = Op::GetAttr<FIgnoreInputs>("FIgnoreInputs");
+
+  // Get reference
+  auto &storage = *storage_ptr;
+  auto &storage_inplace_index = *storage_inplace_index_ptr;
+
+  // Get attributes from the graph
+  const mxnet::ShapeVector& shape_vec = ret.GetAttr<mxnet::ShapeVector>("shape");
+  const DTypeVector& dtype_vec = ret.GetAttr<DTypeVector>("dtype");
+  const DeviceVector* device_vec = nullptr;
+
+  if (ret.attrs.count("device") != 0) {
+    device_vec = &(ret.GetAttr<DeviceVector>("device"));
+  }
+  size_t num_not_allocated = 0;
+  std::vector<GraphAllocator::StorageID> storage_ref_count(idx.num_node_entries(), 0);
+
+  for (uint32_t nid = node_range.first; nid < node_range.second; ++nid) {
+    const auto& inode = idx[nid];
+    if (inode.source->is_variable()) continue;
+    // check inplace option
+    if (finplace_option.count(inode.source->op()) != 0) {
+      auto inplace_pairs = finplace_option[inode.source->op()](inode.source->attrs);
+      std::vector<bool> identity;
+      if (finplace_identity.count(inode.source->op()) != 0) {
+        identity = finplace_identity[inode.source->op()](inode.source->attrs);
+        CHECK_EQ(identity.size(), inplace_pairs.size())
+            << "FInplaceOption and FInplaceIdentity returned vectors of different "
+            << "size for operator " << inode.source->op()->name;
+      } else {
+        identity = std::vector<bool>(inplace_pairs.size(), false);
+      }
+      std::vector<bool> taken(inode.inputs.size(), false);
+      for (size_t ipair = 0; ipair < inplace_pairs.size(); ++ipair) {
+        const auto& kv = inplace_pairs[ipair];
+        uint32_t eid_out = idx.entry_id(nid, kv.second);
+        uint32_t eid_in = idx.entry_id(inode.inputs[kv.first]);
+        auto sid_out = storage[eid_out];
+        auto sid_in = storage[eid_in];
+        bool ignore_all_inputs = (fignore_inputs.count(inode.source->op()) != 0 &&
+                                  fignore_inputs[inode.source->op()](
+                                      inode.source->attrs).size() == inode.source->num_inputs());
+        if (taken[kv.first] == false &&
+            sid_out == GraphAllocator::kBadStorageID &&
+            sid_in >= 0 &&
+            ((storage_ref_count[sid_in] == 1 && !ignore_all_inputs) || identity[ipair]) &&
+            entry_ref_count[eid_out] > 0 &&
+            shape_vec[eid_out].Size() == shape_vec[eid_in].Size() &&
+             (dtype_vec[eid_out] == dtype_vec[eid_in] ||
+             GetDTypeSize(dtype_vec[eid_out]) == GetDTypeSize(dtype_vec[eid_in]))) {
+          // inplace optimization
+          taken[kv.first] = true;
+          storage[eid_out] = sid_in;
+          // Reuse storage for output and add ref count of output
+          // to storage. This will get substracted later in free
+          // input section.
+          storage_ref_count[sid_in] += entry_ref_count[eid_out];
+          storage_inplace_index[eid_out] = kv.first;
+        }
+      }
+    }
+    // normal allocation
+    const int dev_id = (device_vec != nullptr) ? device_vec->at(nid) : 0;
+    // sort output nodes based on size before allocating output
+    std::multimap<size_t, uint32_t> eids;
+    for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
+      uint32_t eid = idx.entry_id(nid, index);
+      // only request memory for kBadStorageID
+      if (storage[eid] == GraphAllocator::kBadStorageID) {
+        auto &eshape = shape_vec[eid];
+        size_t esize = 0;
+        if (eshape.ndim() != 0) esize = eshape.Size();
+        eids.insert(std::make_pair(esize, eid));
+      }
+    }
+    for (auto rit = eids.rbegin(); rit != eids.rend(); ++rit) {
+        uint32_t eid = rit->second;
+        auto sid = allocator->Request(dev_id, dtype_vec[eid], shape_vec[eid], nid);
+        if (sid >= 0) {
+          storage_ref_count[sid] = entry_ref_count[eid];
+        }
+        storage[eid] = sid;
+    }
+    // check if certain inputs is ignored.
+    std::vector<uint32_t> ignore_inputs;
+    if (fignore_inputs.count(inode.source->op()) != 0) {
+      ignore_inputs = fignore_inputs[inode.source->op()](inode.source->attrs);
+      std::sort(ignore_inputs.begin(), ignore_inputs.end());
+    }
+    // then free inputs
+    for (size_t i = 0; i < inode.inputs.size(); ++i) {
+      // ref counter of ignored input is already decreased.
+      if (std::binary_search(ignore_inputs.begin(), ignore_inputs.end(), i)) continue;
+      const auto& e = inode.inputs[i];
+      uint32_t eid = idx.entry_id(e);
+      auto sid = storage[eid];
+      // storage_ref_count == 0 means it is taken by inplace op
+      if (sid < 0) continue;
+      // if we decrease it to zero, means we are ready to relase
+      --storage_ref_count[sid];
+      if (storage_ref_count[sid] == 0) {
+        allocator->Release(sid, nid);
+      }
+    }
+    // check if there are outputs that can be freeded immediately
+    // these output are not referenced by any operator.
+    for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
+      uint32_t eid = idx.entry_id(nid, index);
+      auto sid = storage[eid];
+      if (sid >= 0 && storage_ref_count[sid] == 0) {
+        allocator->Release(sid, nid);
+        // use -2 to indicate that the node was never touched.
+        storage_inplace_index[eid] = -2;
+      }
+      if (storage[eid] == GraphAllocator::kBadStorageID) {
+        ++num_not_allocated;
+      }
+    }
+  }
+  return num_not_allocated;
+}
+
+
+// function to plan memory
+Graph PlanMemory(Graph ret) {
+  // setup ref counter
+  const IndexedGraph& idx = ret.indexed_graph();
+  static auto& fignore_inputs = Op::GetAttr<FIgnoreInputs>("FIgnoreInputs");
+  std::pair<uint32_t, uint32_t> node_range = {0, idx.num_nodes()};
+  if (ret.attrs.count("node_range")) {
+    node_range = ret.MoveCopyAttr<std::pair<uint32_t, uint32_t> >("node_range");
+  }
+  // reference counter of each node
+  std::vector<uint32_t> ref_count;
+  // step 1: initialize reference count
+  if (ret.attrs.count("ref_count") != 0) {
+    ref_count = ret.MoveCopyAttr<std::vector<uint32_t> >("ref_count");
+  } else {
+    ref_count.resize(idx.num_node_entries(), 0);
+    for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
+      const auto& inode = idx[nid];
+      if (inode.source->is_variable()) continue;
+      for (const auto& e : inode.inputs) {
+        ++ref_count[idx.entry_id(e)];
+      }
+      // no dataflow dependency is needed for those are ignored.
+      // revoke the dependency counter.
+      if (fignore_inputs.count(inode.source->op()) != 0) {
+        auto ignore_inputs = fignore_inputs[inode.source->op()](inode.source->attrs);
+        for (uint32_t i : ignore_inputs) {
+          --ref_count[idx.entry_id(inode.inputs[i])];
+        }
+      }
+    }
+    for (const auto& e : idx.outputs()) {
+      ++ref_count[idx.entry_id(e)];
+    }
+  }
+  // step 2: allocate memory.
+  StorageVector storage;
+  if (ret.attrs.count("storage") != 0) {
+    storage = ret.MoveCopyAttr<StorageVector>("storage");
+  } else {
+    storage.resize(idx.num_node_entries(), -1);
+  }
+
+  // Search the best NNVM_EXEC_MATCH_RANGE parameter. This is turned off by default
+  size_t min_allocated_bytes = -1;
+  size_t max_match_range = dmlc::GetEnv("NNVM_EXEC_MATCH_RANGE", 16);
+  size_t min_match_range =
+         dmlc::GetEnv("NNVM_AUTO_SEARCH_MATCH_RANGE", false) ? 1 : max_match_range;
+  for (size_t match_range = min_match_range; match_range <= max_match_range; match_range *= 2) {
+    // Make a copy of related fields
+    StorageVector storage_vec(storage);
+    std::vector<int> storage_inplace_index(idx.num_node_entries(), -1);
+
+    // the allocator
+    GraphAllocator allocator(&idx, match_range);
+
+    // number of entries that are not statically allocated.
+    size_t storage_num_not_allocated =
+      AllocMemory(ret, idx, node_range, &storage_vec, &storage_inplace_index,
+                  ref_count, &allocator);
+    size_t storage_allocated_bytes = allocator.TotalAllocBytes();
+
+    // Choose the plan which leads to minimal memory usage
+    if (min_allocated_bytes > storage_allocated_bytes) {
+      ret.attrs["storage_id"] = std::make_shared<any>(std::move(storage_vec));
+      ret.attrs["storage_inplace_index"] = std::make_shared<any>(std::move(storage_inplace_index));
+      ret.attrs["storage_allocated_bytes"] = std::make_shared<any>(storage_allocated_bytes);
+      ret.attrs["storage_num_not_allocated"] = std::make_shared<any>(storage_num_not_allocated);
+      min_allocated_bytes = storage_allocated_bytes;
+    }
+
+    if (max_match_range == 0) {
+      break;
+    }
+  }
+  return ret;
+}
+
+NNVM_REGISTER_PASS(MXPlanMemory)
+.describe("Plan the memory allocation of each node entries.")
+.set_body(PlanMemory)
+.set_change_graph(false)
+.depend_graph_attr("dtype")
+.depend_graph_attr("shape")
+.provide_graph_attr("storage_id")
+.provide_graph_attr("storage_inplace_index");
+
+}  // namespace
+}  // namespace pass
+}  // namespace nnvm
diff --git a/src/operator/batch_norm_v1-inl.h b/src/operator/batch_norm_v1-inl.h
index f4116e30186e..f407a5cce61b 100644
--- a/src/operator/batch_norm_v1-inl.h
+++ b/src/operator/batch_norm_v1-inl.h
@@ -255,15 +255,15 @@ class BatchNormV1Prop : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 3U) << "Input:[data, gamma, beta]";
-    const TShape &dshape = in_shape->at(0);
+    const mxnet::TShape &dshape = in_shape->at(0);
     if (dshape.ndim() == 0) return false;
-    in_shape->at(1) = TShape(Shape1(dshape[1]));
-    in_shape->at(2) = TShape(Shape1(dshape[1]));
+    in_shape->at(1) = mxnet::TShape(Shape1(dshape[1]));
+    in_shape->at(2) = mxnet::TShape(Shape1(dshape[1]));
     out_shape->clear();
     out_shape->push_back(dshape);
     out_shape->push_back(Shape1(dshape[1]));
@@ -331,7 +331,7 @@ class BatchNormV1Prop : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
@@ -363,7 +363,7 @@ class BatchNormV1Prop : public OperatorProperty {
       return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
       std::vector<int> *in_type) const override;
 
   inline const BatchNormV1Param& getParam() const {
diff --git a/src/operator/batch_norm_v1.cc b/src/operator/batch_norm_v1.cc
index 2d19107eda1e..dc9f724aff18 100644
--- a/src/operator/batch_norm_v1.cc
+++ b/src/operator/batch_norm_v1.cc
@@ -35,9 +35,9 @@ Operator *CreateOp<cpu>(BatchNormV1Param param, int dtype) {
 }
 
 // DO_BIND_DISPATCH comes from operator_common.h
-Operator *BatchNormV1Prop::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator *BatchNormV1Prop::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
     std::vector<int> *in_type) const {
-    std::vector<TShape> out_shape, aux_shape;
+    mxnet::ShapeVector out_shape, aux_shape;
     std::vector<int> out_type, aux_type;
     CHECK(InferType(in_type, &out_type, &aux_type));
     CHECK(InferShape(in_shape, &out_shape, &aux_shape));
diff --git a/src/operator/bilinear_sampler-inl.h b/src/operator/bilinear_sampler-inl.h
index 49a5b5e5d5d8..8b1ff38709b6 100644
--- a/src/operator/bilinear_sampler-inl.h
+++ b/src/operator/bilinear_sampler-inl.h
@@ -142,13 +142,13 @@ class BilinearSamplerProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 2U) << "Input:[data, grid]";
-    const TShape &dshape = (*in_shape)[bs::kData];
-    const TShape &lshape = (*in_shape)[bs::kGrid];
+    const mxnet::TShape &dshape = (*in_shape)[bs::kData];
+    const mxnet::TShape &lshape = (*in_shape)[bs::kGrid];
     if (dshape.ndim() == 0) return false;
     CHECK_EQ(dshape.ndim(), 4U) \
         << "input data should be 4D in batch-num_filter-y-x";
@@ -226,7 +226,7 @@ class BilinearSamplerProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/bilinear_sampler.cc b/src/operator/bilinear_sampler.cc
index c435fdeca481..a2442bffa232 100644
--- a/src/operator/bilinear_sampler.cc
+++ b/src/operator/bilinear_sampler.cc
@@ -170,7 +170,7 @@ Operator* CreateOp<cpu>(BilinearSamplerParam param, int dtype) {
   return op;
 }
 
-Operator *BilinearSamplerProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator *BilinearSamplerProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                      std::vector<int> *in_type) const {
   DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
 }
diff --git a/src/operator/contrib/adamw-inl.h b/src/operator/contrib/adamw-inl.h
index 66bd4f3f3ba4..07feaefe87aa 100644
--- a/src/operator/contrib/adamw-inl.h
+++ b/src/operator/contrib/adamw-inl.h
@@ -83,14 +83,14 @@ struct AdamWParam : public dmlc::Parameter<AdamWParam> {
 // total_in = 6: weight, grad, mean, var, weight32, rescale_grad (fp32)
 template<int n_in, int n_out, int total_in>
 inline bool MPUpdateInferShape(const nnvm::NodeAttrs& attrs,
-                               std::vector<TShape> *in_attrs,
-                               std::vector<TShape> *out_attrs) {
+                               mxnet::ShapeVector *in_attrs,
+                               mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), static_cast<size_t>(total_in)) << " in operator " << attrs.name;
   CHECK_EQ(out_attrs->size(), static_cast<size_t>(n_out)) << " in operator " << attrs.name;
   // rescale_grad.shape = (1,)
   SHAPE_ASSIGN_CHECK(*in_attrs, total_in - 1, mshadow::Shape1(1));
-  return ElemwiseAttr<TShape, shape_is_none, shape_assign, true, shape_string, n_in, n_out>(
-      attrs, in_attrs, out_attrs, TShape());
+  return ElemwiseAttr<mxnet::TShape, shape_is_none, shape_assign, true, shape_string, n_in, n_out>(
+      attrs, in_attrs, out_attrs, mxnet::TShape());
 }
 
 // rescale_grad is a reserved argument at position -1. Example:
diff --git a/src/operator/contrib/adamw.cc b/src/operator/contrib/adamw.cc
index 874cce8d8772..f0716c6020f9 100644
--- a/src/operator/contrib/adamw.cc
+++ b/src/operator/contrib/adamw.cc
@@ -78,7 +78,7 @@ the update is skipped.
 .set_num_inputs(6)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<AdamWParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", MPUpdateInferShape<2, 1, 6>)
+.set_attr<mxnet::FInferShape>("FInferShape", MPUpdateInferShape<2, 1, 6>)
 .set_attr<nnvm::FInferType>("FInferType", MPUpdateInferType<2, 1, 6>)
 .set_attr<nnvm::FMutateInputs>("FMutateInputs",
   [](const nnvm::NodeAttrs& attrs) {
@@ -121,7 +121,7 @@ the update is skipped.
 .set_num_inputs(5)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<AdamWParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", MPUpdateInferShape<4, 1, 5>)
+.set_attr<mxnet::FInferShape>("FInferShape", MPUpdateInferShape<4, 1, 5>)
 .set_attr<nnvm::FInferType>("FInferType", MPUpdateInferType<4, 1, 5>)
 .set_attr<nnvm::FMutateInputs>("FMutateInputs",
   [](const nnvm::NodeAttrs& attrs) {
diff --git a/src/operator/contrib/adaptive_avg_pooling-inl.h b/src/operator/contrib/adaptive_avg_pooling-inl.h
index 12284d9d85d2..0d66de0a5692 100644
--- a/src/operator/contrib/adaptive_avg_pooling-inl.h
+++ b/src/operator/contrib/adaptive_avg_pooling-inl.h
@@ -48,9 +48,9 @@ namespace mxnet {
 namespace op {
 
 struct AdaptiveAvgPoolParam : public dmlc::Parameter<AdaptiveAvgPoolParam> {
-  TShape output_size;
+  mxnet::TShape output_size;
   DMLC_DECLARE_PARAMETER(AdaptiveAvgPoolParam) {
-    DMLC_DECLARE_FIELD(output_size).set_default(TShape())
+    DMLC_DECLARE_FIELD(output_size).set_default(mxnet::TShape())
     .describe("int (output size) or a tuple of int for output (height, width).");
   }
 };
@@ -118,13 +118,13 @@ inline void AdaptiveAvgPoolOpBackward(const nnvm::NodeAttrs& attrs,
 
 
 static bool AdaptiveAvgPoolOpInferShape(const nnvm::NodeAttrs& attrs,
-                                       std::vector<TShape> *in_shape,
-                                       std::vector<TShape> *out_shape) {
+                                       mxnet::ShapeVector *in_shape,
+                                       mxnet::ShapeVector *out_shape) {
   using namespace mshadow;
   CHECK_EQ(in_shape->size(), 1U) << "Input:[data]";
   CHECK_EQ(out_shape->size(), 1U) << "Output:[data]";
   const AdaptiveAvgPoolParam& param = nnvm::get<AdaptiveAvgPoolParam>(attrs.parsed);
-  TShape dshape(in_shape->at(0));
+  mxnet::TShape dshape(in_shape->at(0));
   if (dshape.ndim() == 0) return false;
   if (param.output_size.ndim() == 0) {
     dshape[2] = 1;
diff --git a/src/operator/contrib/adaptive_avg_pooling.cc b/src/operator/contrib/adaptive_avg_pooling.cc
index 720cf9844864..42c39cc157c6 100644
--- a/src/operator/contrib/adaptive_avg_pooling.cc
+++ b/src/operator/contrib/adaptive_avg_pooling.cc
@@ -215,7 +215,7 @@ The pooling kernel and stride sizes are automatically chosen for desired output
 .set_attr_parser(ParamParser<AdaptiveAvgPoolParam>)
 .set_num_inputs(1)
 .set_num_outputs(1)
-.set_attr<nnvm::FInferShape>("FInferShape", AdaptiveAvgPoolOpInferShape)
+.set_attr<mxnet::FInferShape>("FInferShape", AdaptiveAvgPoolOpInferShape)
 .set_attr<FCompute>("FCompute<cpu>", AdaptiveAvgPoolOpForward<cpu>)
 .set_attr<nnvm::FGradient>("FGradient",
   ElemwiseGradUseNone{"_backward_contrib_AdaptiveAvgPooling2D"})
diff --git a/src/operator/contrib/bilinear_resize-inl.h b/src/operator/contrib/bilinear_resize-inl.h
index 5a653d8a175c..46c8e1aa7c0d 100644
--- a/src/operator/contrib/bilinear_resize-inl.h
+++ b/src/operator/contrib/bilinear_resize-inl.h
@@ -127,13 +127,13 @@ inline void BilinearSampleOpBackward(const nnvm::NodeAttrs& attrs,
 
 
 static bool BilinearSampleOpInferShape(const nnvm::NodeAttrs& attrs,
-                                       std::vector<TShape> *in_shape,
-                                       std::vector<TShape> *out_shape) {
+                                       mxnet::ShapeVector *in_shape,
+                                       mxnet::ShapeVector *out_shape) {
   using namespace mshadow;
   CHECK_EQ(in_shape->size(), 1U) << "Input:[data]";
   CHECK_EQ(out_shape->size(), 1U) << "Output:[data]";
   const BilinearSampleParam& param = nnvm::get<BilinearSampleParam>(attrs.parsed);
-  TShape dshape(in_shape->at(0));
+  mxnet::TShape dshape(in_shape->at(0));
   if (dshape.ndim() == 0) return false;
   if (param.scale_height.has_value()) {
     dshape[2] = static_cast<int>(param.scale_height.value() * in_shape->at(0)[2]);
diff --git a/src/operator/contrib/bilinear_resize.cc b/src/operator/contrib/bilinear_resize.cc
index 074f74aefcc9..1288e9d22691 100644
--- a/src/operator/contrib/bilinear_resize.cc
+++ b/src/operator/contrib/bilinear_resize.cc
@@ -176,7 +176,7 @@ for more details.
 .set_attr_parser(ParamParser<BilinearSampleParam>)
 .set_num_inputs(1)
 .set_num_outputs(1)
-.set_attr<nnvm::FInferShape>("FInferShape", BilinearSampleOpInferShape)
+.set_attr<mxnet::FInferShape>("FInferShape", BilinearSampleOpInferShape)
 .set_attr<FCompute>("FCompute<cpu>", BilinearSampleOpForward<cpu>)
 .set_attr<nnvm::FGradient>("FGradient",
   ElemwiseGradUseNone{"_backward_contrib_BilinearResize2D"})
diff --git a/src/operator/contrib/boolean_mask.cc b/src/operator/contrib/boolean_mask.cc
index 18ba8c3fdcf6..e22c493d5e2c 100644
--- a/src/operator/contrib/boolean_mask.cc
+++ b/src/operator/contrib/boolean_mask.cc
@@ -136,7 +136,7 @@ inline void BooleanMaskForward<cpu>(const nnvm::NodeAttrs& attrs,
     valid_num = prefix_sum[idx_size - 1];
   });
   // set the output shape forcefully
-  TShape s = data.shape();
+  mxnet::TShape s = data.shape();
   s[axis] = valid_num;
   const_cast<NDArray &>(out).Init(s);
   // do the copy
diff --git a/src/operator/contrib/boolean_mask.cu b/src/operator/contrib/boolean_mask.cu
index 25a781ceec4b..04f61eea0384 100644
--- a/src/operator/contrib/boolean_mask.cu
+++ b/src/operator/contrib/boolean_mask.cu
@@ -80,7 +80,7 @@ inline void BooleanMaskForward<gpu>(const nnvm::NodeAttrs& attrs,
                        cudaMemcpyDeviceToHost));
   CHECK(valid_num > 0) << "boolean_mask behavior not defined when all masks are 0";
   // Set the output shape forcefully
-  TShape data_shape = data.shape();
+  mxnet::TShape data_shape = data.shape();
   data_shape[axis] = valid_num;
   const_cast<NDArray &>(out).Init(data_shape);
   size_t input_size = data.shape().Size();
diff --git a/src/operator/contrib/bounding_box-inl.h b/src/operator/contrib/bounding_box-inl.h
index 031dd952d386..650e58d0e0cd 100644
--- a/src/operator/contrib/bounding_box-inl.h
+++ b/src/operator/contrib/bounding_box-inl.h
@@ -26,7 +26,6 @@
 #define MXNET_OPERATOR_CONTRIB_BOUNDING_BOX_INL_H_
 #include <mxnet/operator_util.h>
 #include <dmlc/optional.h>
-#include <nnvm/tuple.h>
 #include <vector>
 #include <utility>
 #include <string>
@@ -89,8 +88,8 @@ struct BoxNMSParam : public dmlc::Parameter<BoxNMSParam> {
 };  // BoxNMSParam
 
 inline bool BoxNMSShape(const nnvm::NodeAttrs& attrs,
-                           std::vector<TShape> *in_attrs,
-                           std::vector<TShape> *out_attrs) {
+                           mxnet::ShapeVector *in_attrs,
+                           mxnet::ShapeVector *out_attrs) {
   const BoxNMSParam& param = nnvm::get<BoxNMSParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 2U);
@@ -98,7 +97,7 @@ inline bool BoxNMSShape(const nnvm::NodeAttrs& attrs,
     return false;
   }
 
-  TShape& ishape = (*in_attrs)[0];
+  mxnet::TShape& ishape = (*in_attrs)[0];
   int indim = ishape.ndim();
   CHECK(indim >= 2)
     << "input must have dim >= 2"
@@ -137,7 +136,7 @@ inline bool BoxNMSShape(const nnvm::NodeAttrs& attrs,
     CHECK_NE(id_index, score_index)
       << "id_index: " << id_index << " conflict with score_index: " << score_index;
   }
-  TShape oshape = ishape;
+  mxnet::TShape oshape = ishape;
   oshape[indim - 1] = 1;
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, ishape);  // out_shape[0] == in_shape
   SHAPE_ASSIGN_CHECK(*out_attrs, 1, oshape);  // out_shape[1]
@@ -398,7 +397,7 @@ void BoxNMSForward(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(outputs.size(), 2U) << "BoxNMS output: [output, temp]";
   const BoxNMSParam& param = nnvm::get<BoxNMSParam>(attrs.parsed);
   Stream<xpu> *s = ctx.get_stream<xpu>();
-  TShape in_shape = inputs[box_nms_enum::kData].shape_;
+  mxnet::TShape in_shape = inputs[box_nms_enum::kData].shape_;
   int indim = in_shape.ndim();
   int num_batch = indim <= 2? 1 : in_shape.ProdShape(0, indim - 2);
   int num_elem = in_shape[indim - 2];
@@ -547,7 +546,7 @@ void BoxNMSBackward(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(inputs.size(), 4U);
   CHECK_EQ(outputs.size(), 1U);
   Stream<xpu> *s = ctx.get_stream<xpu>();
-  TShape in_shape = outputs[box_nms_enum::kData].shape_;
+  mxnet::TShape in_shape = outputs[box_nms_enum::kData].shape_;
   int indim = in_shape.ndim();
   int num_batch = indim <= 2? 1 : in_shape.ProdShape(0, indim - 2);
   int num_elem = in_shape[indim - 2];
@@ -579,12 +578,12 @@ struct BoxOverlapParam : public dmlc::Parameter<BoxOverlapParam> {
 };  // BoxOverlapParam
 
 inline bool BoxOverlapShape(const nnvm::NodeAttrs& attrs,
-                           std::vector<TShape> *in_attrs,
-                           std::vector<TShape> *out_attrs) {
+                           mxnet::ShapeVector *in_attrs,
+                           mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 2U);
   CHECK_EQ(out_attrs->size(), 1U);
-  TShape& lshape = (*in_attrs)[0];
-  TShape& rshape = (*in_attrs)[1];
+  mxnet::TShape& lshape = (*in_attrs)[0];
+  mxnet::TShape& rshape = (*in_attrs)[1];
 
   CHECK_GE(lshape.ndim(), 2)
     << "lhs must have dim >= 2 "
@@ -602,7 +601,7 @@ inline bool BoxOverlapShape(const nnvm::NodeAttrs& attrs,
     << rdim << " provided";
 
   // assign output shape
-  TShape oshape(lshape.ndim() + rshape.ndim() - 2);
+  mxnet::TShape oshape(lshape.ndim() + rshape.ndim() - 2);
   int idx = 0;
   for (index_t i = 0; i < lshape.ndim() - 1; ++i) {
     oshape[idx++] = lshape[i];
@@ -648,8 +647,8 @@ void BoxOverlapForward(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(outputs.size(), 1U);
   const BoxOverlapParam& param = nnvm::get<BoxOverlapParam>(attrs.parsed);
   Stream<xpu> *s = ctx.get_stream<xpu>();
-  TShape lshape = inputs[0].shape_;
-  TShape rshape = inputs[1].shape_;
+  mxnet::TShape lshape = inputs[0].shape_;
+  mxnet::TShape rshape = inputs[1].shape_;
   int lsize = lshape.ProdShape(0, lshape.ndim() - 1);
   int rsize = rshape.ProdShape(0, rshape.ndim() - 1);
   MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
@@ -703,19 +702,19 @@ struct BipartiteMatchingParam : public dmlc::Parameter<BipartiteMatchingParam> {
 };  // BipartiteMatchingParam
 
 inline bool MatchingShape(const nnvm::NodeAttrs& attrs,
-                          std::vector<TShape> *in_attrs,
-                          std::vector<TShape> *out_attrs) {
+                          mxnet::ShapeVector *in_attrs,
+                          mxnet::ShapeVector *out_attrs) {
   // const BipartiteMatchingParam& param = nnvm::get<BipartiteMatchingParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 2U);
-  TShape& dshape = (*in_attrs)[0];
+  mxnet::TShape& dshape = (*in_attrs)[0];
 
   CHECK_GE(dshape.ndim(), 2)
     << "score matrix must have dim >= 2 "
     << dshape.ndim() << " provided";
 
   // assign output shape
-  TShape oshape(dshape.ndim() - 1);
+  mxnet::TShape oshape(dshape.ndim() - 1);
   for (index_t i = 0; i < dshape.ndim() - 1; ++i) {
     oshape[i] = dshape[i];
   }
@@ -772,7 +771,7 @@ void BipartiteMatchingForward(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(outputs.size(), 2U);
   const BipartiteMatchingParam& param = nnvm::get<BipartiteMatchingParam>(attrs.parsed);
   Stream<xpu> *s = ctx.get_stream<xpu>();
-  TShape dshape = inputs[0].shape_;
+  mxnet::TShape dshape = inputs[0].shape_;
   int row = dshape[dshape.ndim() - 2];
   int col = dshape[dshape.ndim() - 1];
   int batch_size = dshape.Size() / row / col;
diff --git a/src/operator/contrib/bounding_box.cc b/src/operator/contrib/bounding_box.cc
index 56925f94de55..d73f99245118 100644
--- a/src/operator/contrib/bounding_box.cc
+++ b/src/operator/contrib/bounding_box.cc
@@ -91,7 +91,7 @@ Examples::
 .set_num_outputs(2)
 .set_attr_parser(ParamParser<BoxNMSParam>)
 .set_attr<nnvm::FNumVisibleOutputs>("FNumVisibleOutputs", BoxNMSNumVisibleOutputs)
-.set_attr<nnvm::FInferShape>("FInferShape", BoxNMSShape)
+.set_attr<mxnet::FInferShape>("FInferShape", BoxNMSShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 2>)
 .set_attr<FResourceRequest>("FResourceRequest",
   [](const NodeAttrs& attrs) {
@@ -135,7 +135,7 @@ NNVM_REGISTER_OP(_contrib_box_iou)
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"lhs", "rhs"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", BoxOverlapShape)
+.set_attr<mxnet::FInferShape>("FInferShape", BoxOverlapShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)
 .set_attr<FCompute>("FCompute<cpu>", BoxOverlapForward<cpu>)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_contrib_box_iou"})
@@ -181,7 +181,7 @@ NNVM_REGISTER_OP(_contrib_bipartite_matching)
   [](const NodeAttrs& attrs) {
     return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", MatchingShape)
+.set_attr<mxnet::FInferShape>("FInferShape", MatchingShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 2>)
 .set_attr<FCompute>("FCompute<cpu>", BipartiteMatchingForward<cpu>)
 .set_attr<nnvm::FGradient>("FGradient",
diff --git a/src/operator/contrib/count_sketch-inl.h b/src/operator/contrib/count_sketch-inl.h
index dd3bf54ab6a6..f3a294f6ad46 100644
--- a/src/operator/contrib/count_sketch-inl.h
+++ b/src/operator/contrib/count_sketch-inl.h
@@ -76,8 +76,8 @@ class CountSketchOp : public Operator {
         // h and s should be 1d vectors
         Tensor<xpu, 2, DType> data = in_data[CountSketch::kData].FlatTo2D<xpu, DType>(s);
 
-        const TShape& hshape = in_data[CountSketch::kH].shape_;
-        const TShape& sshape = in_data[CountSketch::kS].shape_;
+        const mxnet::TShape& hshape = in_data[CountSketch::kH].shape_;
+        const mxnet::TShape& sshape = in_data[CountSketch::kS].shape_;
         Tensor<xpu, 1, DType> h = in_data[CountSketch::kH].get_with_shape<xpu, 1, DType>(
             Shape1(hshape.ProdShape(0, hshape.ndim())), s);
         Tensor<xpu, 1, DType> ss = in_data[CountSketch::kS].get_with_shape<xpu, 1, DType>(
@@ -103,8 +103,8 @@ class CountSketchOp : public Operator {
     Tensor<xpu, 2, DType> ograd = out_grad[CountSketch::kOut].FlatTo2D<xpu, DType>(s);
     Tensor<xpu, 2, DType> dgrad = in_grad[CountSketch::kData].FlatTo2D<xpu, DType>(s);
 
-    const TShape& hshape = in_data[CountSketch::kH].shape_;
-    const TShape& sshape = in_data[CountSketch::kS].shape_;
+    const mxnet::TShape& hshape = in_data[CountSketch::kH].shape_;
+    const mxnet::TShape& sshape = in_data[CountSketch::kS].shape_;
         Tensor<xpu, 1, DType> h = in_data[CountSketch::kH].get_with_shape<xpu, 1, DType>(
                                             Shape1(hshape.ProdShape(0, hshape.ndim())), s);
     Tensor<xpu, 1, DType> ss = in_data[CountSketch::kS].get_with_shape<xpu, 1, DType>(
@@ -144,12 +144,12 @@ class CountSketchProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 3) <<"Input:[data, h, s]";
-    const TShape &dshape = (*in_shape)[CountSketch::kData];
+    const mxnet::TShape &dshape = (*in_shape)[CountSketch::kData];
     // require data to be known
     if (dshape.ndim() == 0) return false;
 
@@ -229,7 +229,7 @@ class CountSketchProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                               std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/contrib/count_sketch.cc b/src/operator/contrib/count_sketch.cc
index ca239b63246f..4b6504e564ee 100644
--- a/src/operator/contrib/count_sketch.cc
+++ b/src/operator/contrib/count_sketch.cc
@@ -32,9 +32,9 @@ Operator *CreateOp<cpu>(CountSketchParam param, int dtype) {
     LOG(FATAL) << "CountSketch is only available for GPU.";
     return nullptr;
 }
-Operator *CountSketchProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator *CountSketchProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                             std::vector<int> *in_type) const {
-    std::vector<TShape> out_shape, aux_shape;
+    mxnet::ShapeVector out_shape, aux_shape;
     std::vector<int> out_type, aux_type;
     CHECK(InferType(in_type, &out_type, &aux_type));
     CHECK(InferShape(in_shape, &out_shape, &aux_shape));
diff --git a/src/operator/contrib/deformable_convolution-inl.h b/src/operator/contrib/deformable_convolution-inl.h
index 7328eb38308f..f50641fca6d6 100644
--- a/src/operator/contrib/deformable_convolution-inl.h
+++ b/src/operator/contrib/deformable_convolution-inl.h
@@ -57,10 +57,10 @@ namespace conv {
 }
 
 struct DeformableConvolutionParam : public dmlc::Parameter<DeformableConvolutionParam> {
-  TShape kernel;
-  TShape stride;
-  TShape dilate;
-  TShape pad;
+  mxnet::TShape kernel;
+  mxnet::TShape stride;
+  mxnet::TShape dilate;
+  mxnet::TShape pad;
   uint32_t num_filter;
   uint32_t num_group;
   uint32_t num_deformable_group;
@@ -69,11 +69,11 @@ struct DeformableConvolutionParam : public dmlc::Parameter<DeformableConvolution
   dmlc::optional<int> layout;
   DMLC_DECLARE_PARAMETER(DeformableConvolutionParam) {
     DMLC_DECLARE_FIELD(kernel).describe("Convolution kernel size: (h, w) or (d, h, w)");
-    DMLC_DECLARE_FIELD(stride).set_default(TShape())
+    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape())
       .describe("Convolution stride: (h, w) or (d, h, w). Defaults to 1 for each dimension.");
-    DMLC_DECLARE_FIELD(dilate).set_default(TShape())
+    DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape())
       .describe("Convolution dilate: (h, w) or (d, h, w). Defaults to 1 for each dimension.");
-    DMLC_DECLARE_FIELD(pad).set_default(TShape())
+    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape())
       .describe("Zero pad for convolution: (h, w) or (d, h, w). Defaults to no padding.");
     DMLC_DECLARE_FIELD(num_filter).set_range(1, 100000)
       .describe("Convolution filter(channel) number");
@@ -127,7 +127,7 @@ class DeformableConvolutionOp : public Operator {
     Tensor<xpu, 1, DType> workspace = ctx.requested[conv::kTempSpace]
       .get_space_typed<xpu, 1, DType>(Shape1(col_buffer_size_), s);
     // calculate the shape of col_buffer
-    TShape col_buffer_shape(num_spatial_axes_ + 1);
+    mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1);
     col_buffer_shape[0] = conv_in_channels_ * param_.kernel.Size();
     for (size_t i = 1; i < col_buffer_shape.ndim(); ++i) {
       col_buffer_shape[i] = out_data[0].shape_[i + 1];
@@ -189,7 +189,7 @@ class DeformableConvolutionOp : public Operator {
     Tensor<xpu, 1, DType> workspace = ctx.requested[conv::kTempSpace]
       .get_space_typed<xpu, 1, DType>(Shape1(col_buffer_size_), s);
     // calculate the shape of col_buffer
-    TShape col_buffer_shape(num_spatial_axes_ + 1);
+    mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1);
     col_buffer_shape[0] = conv_in_channels_ * param_.kernel.Size();
     for (index_t i = 1; i < col_buffer_shape.ndim(); ++i) {
       col_buffer_shape[i] = out_grad[conv::kData].shape_[i + 1];
@@ -265,7 +265,9 @@ class DeformableConvolutionOp : public Operator {
   }
 
  private:
-  void LayerSetUp(const TShape& ishape, const TShape& offset_shape, const TShape& oshape) {
+  void LayerSetUp(const mxnet::TShape& ishape,
+                  const mxnet::TShape& offset_shape,
+                  const mxnet::TShape& oshape) {
     channel_axis_ = 1;  // hard code channel axis
     const index_t first_spatial_axis = channel_axis_ + 1;
     const index_t num_axes = param_.kernel.ndim() + 2;
@@ -325,8 +327,8 @@ class DeformableConvolutionOp : public Operator {
 
 template<typename xpu>
 Operator* CreateOp(DeformableConvolutionParam param, int dtype,
-  std::vector<TShape> *in_shape,
-  std::vector<TShape> *out_shape,
+  mxnet::ShapeVector *in_shape,
+  mxnet::ShapeVector *out_shape,
   Context ctx);
 
 #if DMLC_USE_CXX11
@@ -357,18 +359,18 @@ class DeformableConvolutionProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-    std::vector<TShape> *out_shape,
-    std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+    mxnet::ShapeVector *out_shape,
+    mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     if (!param_.no_bias) {
       CHECK_EQ(in_shape->size(), 4U) << "Input:[data, offset, weight, bias]";
     } else {
       CHECK_EQ(in_shape->size(), 3U) << "Input:[data, offset, weight]";
     }
-    out_shape->resize(1, TShape());
-    const TShape &dshp = (*in_shape)[conv::kData];
-    const TShape &oshp = (*in_shape)[conv::kOffset];
+    out_shape->resize(1, mxnet::TShape());
+    const mxnet::TShape &dshp = (*in_shape)[conv::kData];
+    const mxnet::TShape &oshp = (*in_shape)[conv::kOffset];
     if (dshp.ndim() == 0) return false;
     if (param_.kernel.ndim() == 2) {
       // 2d conv
@@ -484,12 +486,12 @@ class DeformableConvolutionProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> ForwardResource(
-    const std::vector<TShape> &in_shape) const override {
+    const mxnet::ShapeVector &in_shape) const override {
     return{ ResourceRequest::kTempSpace };
   }
 
   std::vector<ResourceRequest> BackwardResource(
-    const std::vector<TShape> &in_shape) const override {
+    const mxnet::ShapeVector &in_shape) const override {
     return{ ResourceRequest::kTempSpace };
   }
 
@@ -498,7 +500,7 @@ class DeformableConvolutionProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
     std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/contrib/deformable_convolution.cc b/src/operator/contrib/deformable_convolution.cc
index 78a7a1250d3c..8bb1ae23f40d 100644
--- a/src/operator/contrib/deformable_convolution.cc
+++ b/src/operator/contrib/deformable_convolution.cc
@@ -33,8 +33,8 @@ DMLC_REGISTER_PARAMETER(DeformableConvolutionParam);
 
 template<>
 Operator* CreateOp<cpu>(DeformableConvolutionParam param, int dtype,
-                        std::vector<TShape> *in_shape,
-                        std::vector<TShape> *out_shape,
+                        mxnet::ShapeVector *in_shape,
+                        mxnet::ShapeVector *out_shape,
                         Context ctx) {
   Operator *op = nullptr;
   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
@@ -45,9 +45,9 @@ Operator* CreateOp<cpu>(DeformableConvolutionParam param, int dtype,
 
 // DO_BIND_DISPATCH comes from operator_common.h
 Operator *DeformableConvolutionProp::CreateOperatorEx(Context ctx,
-                                            std::vector<TShape> *in_shape,
+                                            mxnet::ShapeVector *in_shape,
                                             std::vector<int> *in_type) const {
-  std::vector<TShape> out_shape, aux_shape;
+  mxnet::ShapeVector out_shape, aux_shape;
   std::vector<int> out_type, aux_type;
   CHECK(InferType(in_type, &out_type, &aux_type));
   CHECK(InferShape(in_shape, &out_shape, &aux_shape));
diff --git a/src/operator/contrib/deformable_convolution.cu b/src/operator/contrib/deformable_convolution.cu
index f2200a9978ca..0e8151229a4a 100644
--- a/src/operator/contrib/deformable_convolution.cu
+++ b/src/operator/contrib/deformable_convolution.cu
@@ -33,8 +33,8 @@ namespace op {
 
   template<>
   Operator* CreateOp<gpu>(DeformableConvolutionParam param, int dtype,
-    std::vector<TShape> *in_shape,
-    std::vector<TShape> *out_shape,
+    mxnet::ShapeVector *in_shape,
+    mxnet::ShapeVector *out_shape,
     Context ctx) {
     Operator *op = NULL;
     MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
diff --git a/src/operator/contrib/deformable_psroi_pooling-inl.h b/src/operator/contrib/deformable_psroi_pooling-inl.h
index d391f045a1b5..e466c065abbc 100644
--- a/src/operator/contrib/deformable_psroi_pooling-inl.h
+++ b/src/operator/contrib/deformable_psroi_pooling-inl.h
@@ -49,7 +49,7 @@ namespace deformablepsroipool {
 }  // deformablepsroipool
 
 struct DeformablePSROIPoolingParam : public dmlc::Parameter<DeformablePSROIPoolingParam> {
-  // TShape pooled_size;
+  // mxnet::TShape pooled_size;
   float spatial_scale;
   int output_dim;
   int group_size;
@@ -215,25 +215,25 @@ class DeformablePSROIPoolingProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-    std::vector<TShape> *out_shape,
-    std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+    mxnet::ShapeVector *out_shape,
+    mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     if (param_.no_trans) {
       CHECK_EQ(in_shape->size(), 2) << "Input:[data, rois]";
     } else {
       CHECK_EQ(in_shape->size(), 3) << "Input:[data, rois, trans]";
       // trans: [num_rois, 2, pooled_h, pooled_w]
-      TShape tshape = in_shape->at(deformablepsroipool::kTrans);
+      mxnet::TShape tshape = in_shape->at(deformablepsroipool::kTrans);
       CHECK_EQ(tshape.ndim(), 4) << "trans should be a 4D tensor of shape";
     }
 
     // data: [batch_size, c, h, w]
-    TShape dshape = in_shape->at(deformablepsroipool::kData);
+    mxnet::TShape dshape = in_shape->at(deformablepsroipool::kData);
     CHECK_EQ(dshape.ndim(), 4) << "data should be a 4D tensor";
 
     // bbox: [num_rois, 5]
-    TShape bshape = in_shape->at(deformablepsroipool::kBox);
+    mxnet::TShape bshape = in_shape->at(deformablepsroipool::kBox);
     CHECK_EQ(bshape.ndim(), 2) << "bbox should be a 2D tensor of shape [batch, 5]";
     CHECK_EQ(bshape[1], 5) << "bbox should be a 2D tensor of shape [batch, 5]";
 
@@ -292,7 +292,7 @@ class DeformablePSROIPoolingProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
     std::vector<int> *in_type) const override;
 
 
diff --git a/src/operator/contrib/deformable_psroi_pooling.cc b/src/operator/contrib/deformable_psroi_pooling.cc
index 6aaf607f059a..d9d4cf8f78c5 100644
--- a/src/operator/contrib/deformable_psroi_pooling.cc
+++ b/src/operator/contrib/deformable_psroi_pooling.cc
@@ -89,9 +89,9 @@ namespace op {
   }
 
   Operator *DeformablePSROIPoolingProp::CreateOperatorEx(
-    Context ctx, std::vector<TShape> *in_shape,
+    Context ctx, mxnet::ShapeVector *in_shape,
     std::vector<int> *in_type) const {
-    std::vector<TShape> out_shape, aux_shape;
+    mxnet::ShapeVector out_shape, aux_shape;
     std::vector<int> out_type, aux_type;
     CHECK(InferType(in_type, &out_type, &aux_type));
     CHECK(InferShape(in_shape, &out_shape, &aux_shape));
diff --git a/src/operator/contrib/dgl_graph.cc b/src/operator/contrib/dgl_graph.cc
index a03cbef0b5ca..f19af84ce9c6 100644
--- a/src/operator/contrib/dgl_graph.cc
+++ b/src/operator/contrib/dgl_graph.cc
@@ -241,8 +241,8 @@ static bool CSRNeighborNonUniformSampleStorageType(const nnvm::NodeAttrs& attrs,
  * Check uniform Shape
  */
 static bool CSRNeighborUniformSampleShape(const nnvm::NodeAttrs& attrs,
-                                          std::vector<TShape> *in_attrs,
-                                          std::vector<TShape> *out_attrs) {
+                                          mxnet::ShapeVector *in_attrs,
+                                          mxnet::ShapeVector *out_attrs) {
   const NeighborSampleParam& params =
     nnvm::get<NeighborSampleParam>(attrs.parsed);
 
@@ -259,7 +259,7 @@ static bool CSRNeighborUniformSampleShape(const nnvm::NodeAttrs& attrs,
 
   // Output
   bool success = true;
-  TShape out_shape(1);
+  mxnet::TShape out_shape(1);
   // We use the last element to store the actual
   // number of vertices in the subgraph.
   out_shape[0] = params.max_num_vertices + 1;
@@ -270,7 +270,7 @@ static bool CSRNeighborUniformSampleShape(const nnvm::NodeAttrs& attrs,
               out_attrs->at(i).Size() != 0U;
   }
   // sub_csr
-  TShape out_csr_shape(2);
+  mxnet::TShape out_csr_shape(2);
   out_csr_shape[0] = params.max_num_vertices;
   out_csr_shape[1] = in_attrs->at(0)[1];
   for (size_t i = 0; i < num_subgraphs; i++) {
@@ -280,7 +280,7 @@ static bool CSRNeighborUniformSampleShape(const nnvm::NodeAttrs& attrs,
               out_attrs->at(i + num_subgraphs).Size() != 0U;
   }
   // sub_layer
-  TShape out_layer_shape(1);
+  mxnet::TShape out_layer_shape(1);
   out_layer_shape[0] = params.max_num_vertices;
   for (size_t i = 0; i < num_subgraphs; i++) {
     SHAPE_ASSIGN_CHECK(*out_attrs, i + 2*num_subgraphs, out_layer_shape);
@@ -296,8 +296,8 @@ static bool CSRNeighborUniformSampleShape(const nnvm::NodeAttrs& attrs,
  * Check non-uniform Shape
  */
 static bool CSRNeighborNonUniformSampleShape(const nnvm::NodeAttrs& attrs,
-                                             std::vector<TShape> *in_attrs,
-                                             std::vector<TShape> *out_attrs) {
+                                             mxnet::ShapeVector *in_attrs,
+                                             mxnet::ShapeVector *out_attrs) {
   const NeighborSampleParam& params =
     nnvm::get<NeighborSampleParam>(attrs.parsed);
 
@@ -317,7 +317,7 @@ static bool CSRNeighborNonUniformSampleShape(const nnvm::NodeAttrs& attrs,
 
   // Output
   bool success = true;
-  TShape out_shape(1);
+  mxnet::TShape out_shape(1);
   // We use the last element to store the actual
   // number of vertices in the subgraph.
   out_shape[0] = params.max_num_vertices + 1;
@@ -328,7 +328,7 @@ static bool CSRNeighborNonUniformSampleShape(const nnvm::NodeAttrs& attrs,
               out_attrs->at(i).Size() != 0U;
   }
   // sub_csr
-  TShape out_csr_shape(2);
+  mxnet::TShape out_csr_shape(2);
   out_csr_shape[0] = params.max_num_vertices;
   out_csr_shape[1] = in_attrs->at(0)[1];
   for (size_t i = 0; i < num_subgraphs; i++) {
@@ -338,7 +338,7 @@ static bool CSRNeighborNonUniformSampleShape(const nnvm::NodeAttrs& attrs,
               out_attrs->at(i + num_subgraphs).Size() != 0U;
   }
   // sub_probability
-  TShape out_prob_shape(1);
+  mxnet::TShape out_prob_shape(1);
   out_prob_shape[0] = params.max_num_vertices;
   for (size_t i = 0; i < num_subgraphs; i++) {
     SHAPE_ASSIGN_CHECK(*out_attrs, i + 2*num_subgraphs, out_prob_shape);
@@ -347,7 +347,7 @@ static bool CSRNeighborNonUniformSampleShape(const nnvm::NodeAttrs& attrs,
               out_attrs->at(i + 2*num_subgraphs).Size() != 0U;
   }
   // sub_layer
-  TShape out_layer_shape(1);
+  mxnet::TShape out_layer_shape(1);
   out_layer_shape[0] = params.max_num_vertices;
   for (size_t i = 0; i < num_subgraphs; i++) {
     SHAPE_ASSIGN_CHECK(*out_attrs, i + 3*num_subgraphs, out_prob_shape);
@@ -679,8 +679,8 @@ static void SampleSubgraph(const NDArray &csr,
     }
   }
   // Construct sub_csr_graph
-  TShape shape_1(1);
-  TShape shape_2(1);
+  mxnet::TShape shape_1(1);
+  mxnet::TShape shape_2(1);
   shape_1[0] = num_edges;
   shape_2[0] = max_num_vertices+1;
   sub_csr.CheckAndAllocData(shape_1);
@@ -809,7 +809,7 @@ of max_num_vertices, and the valid number of vertices is the same as the ones in
   return num_subgraphs * 3;
 })
 .set_attr<FInferStorageType>("FInferStorageType", CSRNeighborUniformSampleStorageType)
-.set_attr<nnvm::FInferShape>("FInferShape", CSRNeighborUniformSampleShape)
+.set_attr<mxnet::FInferShape>("FInferShape", CSRNeighborUniformSampleShape)
 .set_attr<nnvm::FInferType>("FInferType", CSRNeighborUniformSampleType)
 .set_attr<FComputeEx>("FComputeEx<cpu>", CSRNeighborUniformSampleComputeExCPU)
 .add_argument("csr_matrix", "NDArray-or-Symbol", "csr matrix")
@@ -908,7 +908,7 @@ of max_num_vertices, and the valid number of vertices is the same as the ones in
   return num_subgraphs * 4;
 })
 .set_attr<FInferStorageType>("FInferStorageType", CSRNeighborNonUniformSampleStorageType)
-.set_attr<nnvm::FInferShape>("FInferShape", CSRNeighborNonUniformSampleShape)
+.set_attr<mxnet::FInferShape>("FInferShape", CSRNeighborNonUniformSampleShape)
 .set_attr<nnvm::FInferType>("FInferType", CSRNeighborNonUniformSampleType)
 .set_attr<FComputeEx>("FComputeEx<cpu>", CSRNeighborNonUniformSampleComputeExCPU)
 .add_argument("csr_matrix", "NDArray-or-Symbol", "csr matrix")
@@ -951,8 +951,8 @@ static bool DGLSubgraphStorageType(const nnvm::NodeAttrs& attrs,
 }
 
 static bool DGLSubgraphShape(const nnvm::NodeAttrs& attrs,
-                             std::vector<TShape> *in_attrs,
-                             std::vector<TShape> *out_attrs) {
+                             mxnet::ShapeVector *in_attrs,
+                             mxnet::ShapeVector *out_attrs) {
   const DGLSubgraphParam& params = nnvm::get<DGLSubgraphParam>(attrs.parsed);
   CHECK_EQ(in_attrs->at(0).ndim(), 2U);
   for (size_t i = 1; i < in_attrs->size(); i++)
@@ -960,13 +960,13 @@ static bool DGLSubgraphShape(const nnvm::NodeAttrs& attrs,
 
   size_t num_g = params.num_args - 1;
   for (size_t i = 0; i < num_g; i++) {
-    TShape gshape(2);
+    mxnet::TShape gshape(2);
     gshape[0] = in_attrs->at(i + 1)[0];
     gshape[1] = in_attrs->at(i + 1)[0];
     out_attrs->at(i) = gshape;
   }
   for (size_t i = num_g; i < out_attrs->size(); i++) {
-    TShape gshape(2);
+    mxnet::TShape gshape(2);
     gshape[0] = in_attrs->at(i - num_g + 1)[0];
     gshape[1] = in_attrs->at(i - num_g + 1)[0];
     out_attrs->at(i) = gshape;
@@ -1081,9 +1081,9 @@ static void GetSubgraph(const NDArray &csr_arr, const NDArray &varr,
     row_idx[i + 1] = col_idx.size();
   }
 
-  TShape nz_shape(1);
+  mxnet::TShape nz_shape(1);
   nz_shape[0] = col_idx.size();
-  TShape indptr_shape(1);
+  mxnet::TShape indptr_shape(1);
   indptr_shape[0] = row_idx.size();
 
   // Store the non-zeros in a subgraph with edge attributes of new edge ids.
@@ -1176,7 +1176,7 @@ edge Ids.
   return names;
 })
 .set_attr<FInferStorageType>("FInferStorageType", DGLSubgraphStorageType)
-.set_attr<nnvm::FInferShape>("FInferShape", DGLSubgraphShape)
+.set_attr<mxnet::FInferShape>("FInferShape", DGLSubgraphShape)
 .set_attr<nnvm::FInferType>("FInferType", DGLSubgraphType)
 .set_attr<FComputeEx>("FComputeEx<cpu>", DGLSubgraphComputeExCPU)
 .set_attr<std::string>("key_var_num_args", "num_args")
@@ -1188,8 +1188,8 @@ edge Ids.
 ///////////////////////// Edge Id ///////////////////////////
 
 inline bool EdgeIDShape(const nnvm::NodeAttrs& attrs,
-                        std::vector<TShape>* in_attrs,
-                        std::vector<TShape>* out_attrs) {
+                        mxnet::ShapeVector* in_attrs,
+                        mxnet::ShapeVector* out_attrs) {
   CHECK_EQ(in_attrs->size(), 3U);
   CHECK_EQ(out_attrs->size(), 1U);
   CHECK_EQ(in_attrs->at(1).ndim(), 1U);
@@ -1339,7 +1339,7 @@ The storage type of ``edge_id`` output depends on storage types of inputs
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data", "u", "v"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", EdgeIDShape)
+.set_attr<mxnet::FInferShape>("FInferShape", EdgeIDShape)
 .set_attr<nnvm::FInferType>("FInferType", EdgeIDType)
 .set_attr<FInferStorageType>("FInferStorageType", EdgeIDStorageType)
 .set_attr<FComputeEx>("FComputeEx<cpu>", EdgeIDForwardEx<cpu>)
@@ -1350,8 +1350,8 @@ The storage type of ``edge_id`` output depends on storage types of inputs
 ///////////////////////// DGL Adjacency ///////////////////////////
 
 inline bool DGLAdjacencyShape(const nnvm::NodeAttrs& attrs,
-                              std::vector<TShape>* in_attrs,
-                              std::vector<TShape>* out_attrs) {
+                              mxnet::ShapeVector* in_attrs,
+                              mxnet::ShapeVector* out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
 
@@ -1411,7 +1411,7 @@ the data value of float32.
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", DGLAdjacencyShape)
+.set_attr<mxnet::FInferShape>("FInferShape", DGLAdjacencyShape)
 .set_attr<nnvm::FInferType>("FInferType", DGLAdjacencyType)
 .set_attr<FInferStorageType>("FInferStorageType", DGLAdjacencyStorageType)
 .set_attr<FComputeEx>("FComputeEx<cpu>", DGLAdjacencyForwardEx<cpu>)
@@ -1460,9 +1460,9 @@ static void CompactSubgraph(const NDArray &csr, const NDArray &vids,
     CHECK_NE(row_ids[i], -1);
   }
 
-  TShape nz_shape(1);
+  mxnet::TShape nz_shape(1);
   nz_shape[0] = num_elems;
-  TShape indptr_shape(1);
+  mxnet::TShape indptr_shape(1);
   CHECK_EQ(out_csr.shape()[0], graph_size);
   indptr_shape[0] = graph_size + 1;
   CHECK_GE(in_ptr_data.shape_[0], indptr_shape[0]);
@@ -1522,8 +1522,8 @@ static bool SubgraphCompactStorageType(const nnvm::NodeAttrs& attrs,
 }
 
 static bool SubgraphCompactShape(const nnvm::NodeAttrs& attrs,
-                                 std::vector<TShape> *in_attrs,
-                                 std::vector<TShape> *out_attrs) {
+                                 mxnet::ShapeVector *in_attrs,
+                                 mxnet::ShapeVector *out_attrs) {
   const SubgraphCompactParam& params = nnvm::get<SubgraphCompactParam>(attrs.parsed);
   size_t num_g = get_num_graphs(params);
   CHECK_EQ(num_g * 2, in_attrs->size());
@@ -1540,7 +1540,7 @@ static bool SubgraphCompactShape(const nnvm::NodeAttrs& attrs,
   }
 
   for (size_t i = 0; i < num_g; i++) {
-    TShape gshape(2);
+    mxnet::TShape gshape(2);
     gshape[0] = params.graph_sizes[i];
     gshape[1] = params.graph_sizes[i];
     out_attrs->at(i) = gshape;
@@ -1620,7 +1620,7 @@ empty rows and empty columns.
   return names;
 })
 .set_attr<FInferStorageType>("FInferStorageType", SubgraphCompactStorageType)
-.set_attr<nnvm::FInferShape>("FInferShape", SubgraphCompactShape)
+.set_attr<mxnet::FInferShape>("FInferShape", SubgraphCompactShape)
 .set_attr<nnvm::FInferType>("FInferType", SubgraphCompactType)
 .set_attr<FComputeEx>("FComputeEx<cpu>", SubgraphCompactComputeExCPU)
 .set_attr<std::string>("key_var_num_args", "num_args")
diff --git a/src/operator/contrib/fft-inl.h b/src/operator/contrib/fft-inl.h
index c5c8574f19e7..247f6290c02a 100644
--- a/src/operator/contrib/fft-inl.h
+++ b/src/operator/contrib/fft-inl.h
@@ -90,9 +90,9 @@ class FFTOp : public Operator {
 
 
     Stream<xpu> *s = ctx.get_stream<xpu>();
-    // const TShape& oshape = out_data[fft::kOutComplex].shape_;
-    const TShape& ishape = in_data[fft::kData].shape_;
-    const TShape& oshape = out_data[fft::kOutComplex].shape_;
+    // const mxnet::TShape& oshape = out_data[fft::kOutComplex].shape_;
+    const mxnet::TShape& ishape = in_data[fft::kData].shape_;
+    const mxnet::TShape& oshape = out_data[fft::kOutComplex].shape_;
     Tensor<xpu, 2, DType> data = in_data[fft::kData].get_with_shape<xpu, 2, DType>(
           Shape2(n_ffts, dim_), s);
     Tensor<xpu, 2, DType> out = out_data[fft::kOutComplex].get_with_shape<xpu, 2, DType>(
@@ -153,8 +153,8 @@ class FFTOp : public Operator {
 
     Stream<xpu> *s = ctx.get_stream<xpu>();
 
-    const TShape& ishape = in_grad[fft::kData].shape_;
-    const TShape& oshape = out_grad[fft::kOutComplex].shape_;
+    const mxnet::TShape& ishape = in_grad[fft::kData].shape_;
+    const mxnet::TShape& oshape = out_grad[fft::kOutComplex].shape_;
     Tensor<xpu, 2, DType> gdata = in_grad[fft::kData].get_with_shape<xpu, 2, DType>(
           Shape2(n_ffts, dim_), s);
     Tensor<xpu, 2, DType> grad = out_grad[fft::kOutComplex].get_with_shape<xpu, 2, DType>(
@@ -234,12 +234,12 @@ class FFTProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 1) <<"Input:[data]";
-    const TShape &dshape = (*in_shape)[fft::kData];
+    const mxnet::TShape &dshape = (*in_shape)[fft::kData];
     // require data to be known
     if (dshape.ndim() == 0) return false;
 
@@ -289,12 +289,12 @@ class FFTProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> ForwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
   std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
@@ -311,7 +311,7 @@ class FFTProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                               std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/contrib/fft.cc b/src/operator/contrib/fft.cc
index 4a4395836e3f..1262835cbb58 100644
--- a/src/operator/contrib/fft.cc
+++ b/src/operator/contrib/fft.cc
@@ -33,7 +33,7 @@ Operator *CreateOp<cpu>(FFTParam param, int dtype) {
   return nullptr;
 }
 
-Operator *FFTProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator *FFTProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                                     std::vector<int> *in_type) const {
   DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
 }
diff --git a/src/operator/contrib/ifft-inl.h b/src/operator/contrib/ifft-inl.h
index da560c3c5178..e53c0f60fa9e 100644
--- a/src/operator/contrib/ifft-inl.h
+++ b/src/operator/contrib/ifft-inl.h
@@ -88,8 +88,8 @@ class IFFTOp : public Operator {
     }
 
     Stream<xpu> *s = ctx.get_stream<xpu>();
-    const TShape& ishape = in_data[ifft::kData].shape_;
-    const TShape& oshape = out_data[ifft::kOut].shape_;
+    const mxnet::TShape& ishape = in_data[ifft::kData].shape_;
+    const mxnet::TShape& oshape = out_data[ifft::kOut].shape_;
     Tensor<xpu, 2, DType> data = in_data[ifft::kData].get_with_shape<xpu, 2, DType>(
           Shape2(n_iffts, dim_*2), s);
     Tensor<xpu, 2, DType> out = out_data[ifft::kOut].get_with_shape<xpu, 2, DType>(
@@ -150,8 +150,8 @@ class IFFTOp : public Operator {
 
     Stream<xpu> *s = ctx.get_stream<xpu>();
 
-    const TShape& ishape = in_grad[ifft::kData].shape_;
-    const TShape& oshape = out_grad[ifft::kOut].shape_;
+    const mxnet::TShape& ishape = in_grad[ifft::kData].shape_;
+    const mxnet::TShape& oshape = out_grad[ifft::kOut].shape_;
     Tensor<xpu, 2, DType> gdata = in_grad[ifft::kData].get_with_shape<xpu, 2, DType>(
           Shape2(n_iffts, dim_*2), s);
     Tensor<xpu, 2, DType> grad = out_grad[ifft::kOut].get_with_shape<xpu, 2, DType>(
@@ -224,12 +224,12 @@ class IFFTProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 1) <<"Input:[data]";
-    const TShape &dshape = (*in_shape)[ifft::kData];
+    const mxnet::TShape &dshape = (*in_shape)[ifft::kData];
     // require data to be known
     if (dshape.ndim() == 0) return false;
 
@@ -280,12 +280,12 @@ class IFFTProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> ForwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
   std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
@@ -302,7 +302,7 @@ class IFFTProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                               std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/contrib/ifft.cc b/src/operator/contrib/ifft.cc
index cb4605d8b787..f60220e6190f 100644
--- a/src/operator/contrib/ifft.cc
+++ b/src/operator/contrib/ifft.cc
@@ -34,7 +34,7 @@ Operator *CreateOp<cpu>(IFFTParam param, int dtype) {
   return nullptr;
 }
 
-Operator *IFFTProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator *IFFTProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                                     std::vector<int> *in_type) const {
   DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
 }
diff --git a/src/operator/contrib/index_copy-inl.h b/src/operator/contrib/index_copy-inl.h
index 923fb0f4f138..d93bf47949a8 100644
--- a/src/operator/contrib/index_copy-inl.h
+++ b/src/operator/contrib/index_copy-inl.h
@@ -141,8 +141,8 @@ void IndexCopyBackward(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool IndexCopyShape(const nnvm::NodeAttrs& attrs,
-                           std::vector<TShape> *in_attrs,
-                           std::vector<TShape> *out_attrs) {
+                           mxnet::ShapeVector *in_attrs,
+                           mxnet::ShapeVector *out_attrs) {
   // inputs[0]: original tensor
   // inputs[1]: index vector
   // inputs[2]: copied tensor
diff --git a/src/operator/contrib/index_copy.cc b/src/operator/contrib/index_copy.cc
index 70a32a1b2d99..bcf6c02d3d37 100644
--- a/src/operator/contrib/index_copy.cc
+++ b/src/operator/contrib/index_copy.cc
@@ -67,7 +67,7 @@ Examples::
 )code" ADD_FILELINE)
 .set_num_inputs(3)
 .set_num_outputs(1)
-.set_attr<nnvm::FInferShape>("FInferShape", IndexCopyShape)
+.set_attr<mxnet::FInferShape>("FInferShape", IndexCopyShape)
 .set_attr<nnvm::FInferType>("FInferType", IndexCopyType)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_contrib_backward_index_copy"})
 .set_attr<FCompute>("FCompute<cpu>", IndexCopyForward<cpu>)
diff --git a/src/operator/contrib/krprod.cc b/src/operator/contrib/krprod.cc
index 8fc7661afb78..f0325645f4dd 100644
--- a/src/operator/contrib/krprod.cc
+++ b/src/operator/contrib/krprod.cc
@@ -42,8 +42,8 @@ namespace op {
 
 inline bool KhatriRaoShape(
       const nnvm::NodeAttrs& attrs,
-      std::vector<TShape> *in_attrs,
-      std::vector<TShape> *out_attrs) {
+      mxnet::ShapeVector *in_attrs,
+      mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(out_attrs->size(), 1);
   CHECK_GE(in_attrs->size(), 1);
 
@@ -51,7 +51,7 @@ inline bool KhatriRaoShape(
   // (when inputs_transposed is set to true/false)
   int num_columns = static_cast<int>((*in_attrs)[0][1]);
   int num_rows = 1;
-  for (const TShape& attr_shape : (*in_attrs)) {
+  for (const mxnet::TShape& attr_shape : (*in_attrs)) {
     CHECK_EQ(num_columns, static_cast<int>(attr_shape[1]));
     num_rows *= attr_shape[0];
   }
@@ -112,7 +112,7 @@ Example::
     return ret;
   })
 .set_num_outputs(1)
-.set_attr<nnvm::FInferShape>("FInferShape", KhatriRaoShape)
+.set_attr<mxnet::FInferShape>("FInferShape", KhatriRaoShape)
 .set_attr<nnvm::FInferType>("FInferType",
   [](const nnvm::NodeAttrs& attrs,
      std::vector<int> *in_attrs,
diff --git a/src/operator/contrib/multi_proposal-inl.h b/src/operator/contrib/multi_proposal-inl.h
index e2ba7c48df2f..4b9a41c2fa87 100644
--- a/src/operator/contrib/multi_proposal-inl.h
+++ b/src/operator/contrib/multi_proposal-inl.h
@@ -102,12 +102,12 @@ class MultiProposalProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 3) << "Input:[cls_prob, bbox_pred, im_info]";
-    const TShape &dshape = in_shape->at(proposal::kClsProb);
+    const mxnet::TShape &dshape = in_shape->at(proposal::kClsProb);
     if (dshape.ndim() == 0) return false;
     Shape<4> bbox_pred_shape;
     bbox_pred_shape = Shape4(dshape[0], dshape[1] * 2, dshape[2], dshape[3]);
@@ -135,7 +135,7 @@ class MultiProposalProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> ForwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
diff --git a/src/operator/contrib/multibox_detection-inl.h b/src/operator/contrib/multibox_detection-inl.h
index fcf22727ffb1..977126ad269d 100644
--- a/src/operator/contrib/multibox_detection-inl.h
+++ b/src/operator/contrib/multibox_detection-inl.h
@@ -29,7 +29,6 @@
 #include <dmlc/parameter.h>
 #include <mxnet/operator.h>
 #include <mxnet/base.h>
-#include <nnvm/tuple.h>
 #include <map>
 #include <vector>
 #include <string>
@@ -87,7 +86,7 @@ class MultiBoxDetectionOp : public Operator {
      using namespace mshadow;
      using namespace mshadow::expr;
      CHECK_EQ(in_data.size(), 3U) << "Input: [cls_prob, loc_pred, anchor]";
-     TShape ashape = in_data[mboxdet_enum::kAnchor].shape_;
+     mxnet::TShape ashape = in_data[mboxdet_enum::kAnchor].shape_;
      CHECK_EQ(out_data.size(), 1U);
 
      Stream<xpu> *s = ctx.get_stream<xpu>();
@@ -147,14 +146,14 @@ class MultiBoxDetectionProp : public OperatorProperty {
     return {"cls_prob", "loc_pred", "anchor"};
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 3U) << "Inputs: [cls_prob, loc_pred, anchor]";
-    TShape cshape = in_shape->at(mboxdet_enum::kClsProb);
-    TShape lshape = in_shape->at(mboxdet_enum::kLocPred);
-    TShape ashape = in_shape->at(mboxdet_enum::kAnchor);
+    mxnet::TShape cshape = in_shape->at(mboxdet_enum::kClsProb);
+    mxnet::TShape lshape = in_shape->at(mboxdet_enum::kLocPred);
+    mxnet::TShape ashape = in_shape->at(mboxdet_enum::kAnchor);
     CHECK_EQ(cshape.ndim(), 3U) << "Provided: " << cshape;
     CHECK_EQ(lshape.ndim(), 2U) << "Provided: " << lshape;
     CHECK_EQ(ashape.ndim(), 3U) << "Provided: " << ashape;
@@ -162,7 +161,7 @@ class MultiBoxDetectionProp : public OperatorProperty {
     CHECK_EQ(cshape[2] * 4, lshape[1]) << "# anchors mismatch with # loc";
     CHECK_GT(ashape[1], 0U) << "Number of anchors must > 0";
     CHECK_EQ(ashape[2], 4U);
-    TShape oshape = TShape(3);
+    mxnet::TShape oshape = mxnet::TShape(3);
     oshape[0] = cshape[0];
     oshape[1] = ashape[1];
     oshape[2] = 6;  // [id, prob, xmin, ymin, xmax, ymax]
@@ -182,7 +181,7 @@ class MultiBoxDetectionProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> ForwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
@@ -191,7 +190,7 @@ class MultiBoxDetectionProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/contrib/multibox_detection.cc b/src/operator/contrib/multibox_detection.cc
index b4f66d8fcf1d..8d1082914df7 100644
--- a/src/operator/contrib/multibox_detection.cc
+++ b/src/operator/contrib/multibox_detection.cc
@@ -205,9 +205,9 @@ Operator *CreateOp<cpu>(MultiBoxDetectionParam param, int dtype) {
 }
 
 Operator* MultiBoxDetectionProp::CreateOperatorEx(Context ctx,
-                                                  std::vector<TShape> *in_shape,
+                                                  mxnet::ShapeVector *in_shape,
                                                   std::vector<int> *in_type) const {
-  std::vector<TShape> out_shape, aux_shape;
+  mxnet::ShapeVector out_shape, aux_shape;
   std::vector<int> out_type, aux_type;
   CHECK(InferShape(in_shape, &out_shape, &aux_shape));
   CHECK(InferType(in_type, &out_type, &aux_type));
diff --git a/src/operator/contrib/multibox_prior-inl.h b/src/operator/contrib/multibox_prior-inl.h
index 6602b43ca01f..3636a6016bd2 100644
--- a/src/operator/contrib/multibox_prior-inl.h
+++ b/src/operator/contrib/multibox_prior-inl.h
@@ -29,7 +29,6 @@
 #include <dmlc/parameter.h>
 #include <mxnet/operator.h>
 #include <mxnet/base.h>
-#include <nnvm/tuple.h>
 #include <map>
 #include <vector>
 #include <string>
@@ -169,19 +168,19 @@ class MultiBoxPriorProp: public OperatorProperty {
     return {"data"};
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 1) << "Inputs: [data]" << in_shape->size();
-    TShape dshape = in_shape->at(mboxprior_enum::kData);
+    mxnet::TShape dshape = in_shape->at(mboxprior_enum::kData);
     CHECK_GE(dshape.ndim(), 4) << "Input data should be 4D: batch-channel-y-x";
     int in_height = dshape[2];
     CHECK_GT(in_height, 0) << "Input height should > 0";
     int in_width = dshape[3];
     CHECK_GT(in_width, 0) << "Input width should > 0";
     // since input sizes are same in each batch, we could share MultiBoxPrior
-    TShape oshape = TShape(3);
+    mxnet::TShape oshape = mxnet::TShape(3);
     int num_sizes = param_.sizes.ndim();
     int num_ratios = param_.ratios.ndim();
     oshape[0] = 1;
@@ -208,7 +207,7 @@ class MultiBoxPriorProp: public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/contrib/multibox_prior.cc b/src/operator/contrib/multibox_prior.cc
index 579ea608aa9f..ee8f5bfac772 100644
--- a/src/operator/contrib/multibox_prior.cc
+++ b/src/operator/contrib/multibox_prior.cc
@@ -83,9 +83,9 @@ Operator* CreateOp<cpu>(MultiBoxPriorParam param, int dtype) {
   return op;
 }
 
-Operator* MultiBoxPriorProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator* MultiBoxPriorProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                        std::vector<int> *in_type) const {
-  std::vector<TShape> out_shape, aux_shape;
+  mxnet::ShapeVector out_shape, aux_shape;
   std::vector<int> out_type, aux_type;
   CHECK(InferShape(in_shape, &out_shape, &aux_shape));
   CHECK(InferType(in_type, &out_type, &aux_type));
diff --git a/src/operator/contrib/multibox_target-inl.h b/src/operator/contrib/multibox_target-inl.h
index daf870a1517a..f7a92882650c 100644
--- a/src/operator/contrib/multibox_target-inl.h
+++ b/src/operator/contrib/multibox_target-inl.h
@@ -29,7 +29,6 @@
 #include <dmlc/parameter.h>
 #include <mxnet/operator.h>
 #include <mxnet/base.h>
-#include <nnvm/tuple.h>
 #include <map>
 #include <vector>
 #include <string>
@@ -211,26 +210,26 @@ class MultiBoxTargetProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 3) << "Input: [anchor, label, clsPred]";
-    TShape ashape = in_shape->at(mboxtarget_enum::kAnchor);
+    mxnet::TShape ashape = in_shape->at(mboxtarget_enum::kAnchor);
     CHECK_EQ(ashape.ndim(), 3) << "Anchor should be batch shared N*4 tensor";
     CHECK_EQ(ashape[0], 1) << "Anchors are shared across batches, first dimension should be 1";
     CHECK_GT(ashape[1], 0) << "Number boxes should be greater than 0";
     CHECK_EQ(ashape[2], 4) << "Box dimension should be 4: [xmin, ymin, xmax, ymax]";
-    TShape lshape = in_shape->at(mboxtarget_enum::kLabel);
+    mxnet::TShape lshape = in_shape->at(mboxtarget_enum::kLabel);
     CHECK_EQ(lshape.ndim(), 3) << "Label should be [batch, num_labels, label_width] tensor";
     CHECK_GT(lshape[1], 0) << "Padded label should be greater than 0";
     CHECK_GE(lshape[2], 5) << "Label width should be greater than or equal to 5";
-    TShape pshape = in_shape->at(mboxtarget_enum::kClsPred);
+    mxnet::TShape pshape = in_shape->at(mboxtarget_enum::kClsPred);
     CHECK_EQ(pshape.ndim(), 3) << "Prediction: [batch, num_classes, num_anchors]";
     CHECK_EQ(pshape[2], ashape[1]) << "Number of anchors mismatch";
-    TShape loc_shape = Shape2(lshape[0], ashape.Size());  // batch - (num_box * 4)
-    TShape lm_shape = loc_shape;
-    TShape label_shape = Shape2(lshape[0], ashape[1]);  // batch - num_box
+    mxnet::TShape loc_shape = Shape2(lshape[0], ashape.Size());  // batch - (num_box * 4)
+    mxnet::TShape lm_shape = loc_shape;
+    mxnet::TShape label_shape = Shape2(lshape[0], ashape[1]);  // batch - num_box
     out_shape->clear();
     out_shape->push_back(loc_shape);
     out_shape->push_back(lm_shape);
@@ -257,7 +256,7 @@ class MultiBoxTargetProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> ForwardResource(
-       const std::vector<TShape> &in_shape) const override {
+       const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
@@ -266,7 +265,7 @@ class MultiBoxTargetProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                               std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/contrib/multibox_target.cc b/src/operator/contrib/multibox_target.cc
index 093234b59ec3..a1f2aac250ff 100644
--- a/src/operator/contrib/multibox_target.cc
+++ b/src/operator/contrib/multibox_target.cc
@@ -291,9 +291,9 @@ Operator *CreateOp<cpu>(MultiBoxTargetParam param, int dtype) {
   return op;
 }
 
-Operator* MultiBoxTargetProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator* MultiBoxTargetProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                        std::vector<int> *in_type) const {
-  std::vector<TShape> out_shape, aux_shape;
+  mxnet::ShapeVector out_shape, aux_shape;
   std::vector<int> out_type, aux_type;
   CHECK(InferShape(in_shape, &out_shape, &aux_shape));
   CHECK(InferType(in_type, &out_type, &aux_type));
diff --git a/src/operator/contrib/nn/deformable_im2col.cuh b/src/operator/contrib/nn/deformable_im2col.cuh
index 5914184d5bbe..5f206d23d8d7 100644
--- a/src/operator/contrib/nn/deformable_im2col.cuh
+++ b/src/operator/contrib/nn/deformable_im2col.cuh
@@ -283,8 +283,8 @@ __global__ void deformable_im2col_gpu_kernel(const int n, const DType* data_im,
 template <typename DType>
 inline void deformable_im2col(mshadow::Stream<gpu>* s,
   const DType* data_im, const DType* data_offset,
-  const TShape& im_shape, const TShape& col_shape, const TShape& kernel_shape,
-  const TShape& pad, const TShape& stride, const TShape& dilation,
+  const mxnet::TShape& im_shape, const mxnet::TShape& col_shape, const mxnet::TShape& kernel_shape,
+  const mxnet::TShape& pad, const mxnet::TShape& stride, const mxnet::TShape& dilation,
   const uint32_t deformable_group, DType* data_col) {
   // num_axes should be smaller than block size
   index_t num_spatial_axes = kernel_shape.ndim();
@@ -381,9 +381,9 @@ __global__ void deformable_col2im_gpu_kernel(const int n, const DType* data_col,
 template <typename DType>
 inline void deformable_col2im(mshadow::Stream<gpu>* s,
   const DType* data_col, const DType* data_offset,
-  const TShape& im_shape, const TShape& col_shape, const TShape& kernel_shape,
-  const TShape& pad, const TShape& stride,
-  const TShape& dilation, const uint32_t deformable_group,
+  const mxnet::TShape& im_shape, const mxnet::TShape& col_shape, const mxnet::TShape& kernel_shape,
+  const mxnet::TShape& pad, const mxnet::TShape& stride,
+  const mxnet::TShape& dilation, const uint32_t deformable_group,
   DType* grad_im, OpReqType req) {
   index_t num_spatial_axes = kernel_shape.ndim();
   index_t im_size = im_shape.ProdShape(1, im_shape.ndim());
@@ -489,10 +489,10 @@ __global__ void deformable_col2im_coord_gpu_kernel(const int n, const DType* dat
  */
 template <typename DType>
 inline void deformable_col2im_coord(mshadow::Stream<gpu>* s,
-  const DType* data_col, const DType* data_im, const DType* data_offset, const TShape& im_shape,
-  const TShape& col_shape, const TShape& kernel_shape,
-  const TShape& pad, const TShape& stride,
-  const TShape& dilation, const uint32_t deformable_group, DType* grad_offset, OpReqType req) {
+  const DType* data_col, const DType* data_im, const DType* data_offset, const mxnet::TShape& im_shape,
+  const mxnet::TShape& col_shape, const mxnet::TShape& kernel_shape,
+  const mxnet::TShape& pad, const mxnet::TShape& stride,
+  const mxnet::TShape& dilation, const uint32_t deformable_group, DType* grad_offset, OpReqType req) {
   index_t num_spatial_axes = kernel_shape.ndim();
   index_t num_kernels = col_shape[1] * col_shape[2] * 2 * kernel_shape[0] * kernel_shape[1] * deformable_group;
   index_t channel_per_deformable_group = col_shape[0] / deformable_group;
diff --git a/src/operator/contrib/nn/deformable_im2col.h b/src/operator/contrib/nn/deformable_im2col.h
index 1c25982ed32c..1f96fe5b2366 100644
--- a/src/operator/contrib/nn/deformable_im2col.h
+++ b/src/operator/contrib/nn/deformable_im2col.h
@@ -87,8 +87,8 @@ namespace op {
 template <typename DType>
 inline void deformable_im2col(mshadow::Stream<cpu>* s,
   const DType* data_im, const DType* data_offset,
-  const TShape& im_shape, const TShape& col_shape, const TShape& kernel_shape,
-  const TShape& pad, const TShape& stride, const TShape& dilation,
+  const mxnet::TShape& im_shape, const mxnet::TShape& col_shape, const mxnet::TShape& kernel_shape,
+  const mxnet::TShape& pad, const mxnet::TShape& stride, const mxnet::TShape& dilation,
   const uint32_t deformable_group, DType* data_col) {
   if (2 == kernel_shape.ndim()) {
     LOG(FATAL) << "only implemented in GPU";
@@ -115,9 +115,9 @@ inline void deformable_im2col(mshadow::Stream<cpu>* s,
 template <typename DType>
 inline void deformable_col2im(mshadow::Stream<cpu>* s,
   const DType* data_col, const DType* data_offset,
-  const TShape& im_shape, const TShape& col_shape, const TShape& kernel_shape,
-  const TShape& pad, const TShape& stride,
-  const TShape& dilation, const uint32_t deformable_group,
+  const mxnet::TShape& im_shape, const mxnet::TShape& col_shape, const mxnet::TShape& kernel_shape,
+  const mxnet::TShape& pad, const mxnet::TShape& stride,
+  const mxnet::TShape& dilation, const uint32_t deformable_group,
   DType* grad_im, OpReqType req) {
   LOG(FATAL) << "only implemented in GPU";
 }
@@ -141,10 +141,12 @@ inline void deformable_col2im(mshadow::Stream<cpu>* s,
 
 template <typename DType>
 inline void deformable_col2im_coord(mshadow::Stream<cpu>* s,
-  const DType* data_col, const DType* data_im, const DType* data_offset, const TShape& im_shape,
-  const TShape& col_shape, const TShape& kernel_shape,
-  const TShape& pad, const TShape& stride,
-  const TShape& dilation, const uint32_t deformable_group, DType* grad_offset, OpReqType req) {
+  const DType* data_col, const DType* data_im,
+  const DType* data_offset, const mxnet::TShape& im_shape,
+  const mxnet::TShape& col_shape, const mxnet::TShape& kernel_shape,
+  const mxnet::TShape& pad, const mxnet::TShape& stride,
+  const mxnet::TShape& dilation, const uint32_t deformable_group,
+  DType* grad_offset, OpReqType req) {
   LOG(FATAL) << "only implemented in GPU";
 }
 
diff --git a/src/operator/contrib/nnvm_to_onnx-inl.h b/src/operator/contrib/nnvm_to_onnx-inl.h
index 0994f7e632f3..052948521ba8 100644
--- a/src/operator/contrib/nnvm_to_onnx-inl.h
+++ b/src/operator/contrib/nnvm_to_onnx-inl.h
@@ -58,7 +58,7 @@ namespace op {
 namespace nnvm_to_onnx {
     enum class TypeIO { Inputs = 0, Outputs = 1 };
     using NameToIdx_t = std::map<std::string, int32_t>;
-    using InferenceTuple_t = std::tuple<uint32_t, TShape, int, int>;
+    using InferenceTuple_t = std::tuple<uint32_t, mxnet::TShape, int, int>;
     using InferenceMap_t = std::map<std::string, InferenceTuple_t>;
 }  // namespace nnvm_to_onnx
 
@@ -96,14 +96,14 @@ using namespace nnvm;
 using namespace ::onnx;
 using int64 = ::google::protobuf::int64;
 
-std::unordered_map<std::string, TShape> GetPlaceholderShapes(const ShapeVector& shape_inputs,
+std::unordered_map<std::string, mxnet::TShape> GetPlaceholderShapes(const ShapeVector& shape_inputs,
     const nnvm::IndexedGraph& ig);
 
 std::unordered_map<std::string, uint32_t> GetOutputLookup(const nnvm::IndexedGraph& ig);
 
 void ConvertPlaceholder(
   const std::string& node_name,
-  const std::unordered_map<std::string, TShape>& placeholder_shapes,
+  const std::unordered_map<std::string, mxnet::TShape>& placeholder_shapes,
   GraphProto* graph_proto);
 
 void ConvertConstant(GraphProto* graph_proto,
diff --git a/src/operator/contrib/nnvm_to_onnx.cc b/src/operator/contrib/nnvm_to_onnx.cc
index 58a465455d42..0417a085616a 100644
--- a/src/operator/contrib/nnvm_to_onnx.cc
+++ b/src/operator/contrib/nnvm_to_onnx.cc
@@ -72,7 +72,7 @@ op::ONNXParam ConvertNnvmGraphToOnnx(
   const nnvm::IndexedGraph& ig = g.indexed_graph();
   const auto& storage_types = g.GetAttr<StorageTypeVector>("storage_type");
   const auto& dtypes = g.GetAttr<DTypeVector>("dtype");
-  const auto& shape_inputs = g.GetAttr<ShapeVector>("shape_inputs");
+  const auto& shape_inputs = g.GetAttr<mxnet::ShapeVector>("shape_inputs");
 
   // TODO(kellens): At the moment this check always passes no matter the weight dtypes used in your
   // graph.  We should first iterate over datatypes by name and ensure  they're valid types
@@ -104,7 +104,7 @@ op::ONNXParam ConvertNnvmGraphToOnnx(
   auto subgraph_name_id = subgraph_count.fetch_add(1);
   graph_proto->set_name("MXNetTRTSubgraph" + std::to_string(subgraph_name_id));
 
-  std::unordered_map<std::string, TShape> placeholder_shapes =
+  std::unordered_map<std::string, mxnet::TShape> placeholder_shapes =
       GetPlaceholderShapes(shape_inputs, ig);
   std::unordered_map<std::string, uint32_t> output_lookup = GetOutputLookup(ig);
   uint32_t current_input = 0;
@@ -189,10 +189,10 @@ void ConvertConvolution(NodeProto* node_proto, const NodeAttrs& attrs,
 
   node_proto->set_op_type("Conv");
 
-  const TShape kernel = conv_param.kernel;
-  const TShape stride = conv_param.stride;
-  const TShape dilate = conv_param.dilate;
-  const TShape pad = conv_param.pad;
+  const mxnet::TShape kernel = conv_param.kernel;
+  const mxnet::TShape stride = conv_param.stride;
+  const mxnet::TShape dilate = conv_param.dilate;
+  const mxnet::TShape pad = conv_param.pad;
   const uint32_t num_group = conv_param.num_group;
   // const bool no_bias = conv_param.no_bias;
   const dmlc::optional<int> layout = conv_param.layout;
@@ -244,9 +244,9 @@ void ConvertPooling(NodeProto* node_proto, const NodeAttrs& attrs,
                     const array_view<IndexedGraph::NodeEntry>& /*inputs*/) {
   const auto& pooling_param = nnvm::get<op::PoolingParam>(attrs.parsed);
 
-  const TShape kernel = pooling_param.kernel;
-  const TShape stride = pooling_param.stride;
-  const TShape pad = pooling_param.pad;
+  const mxnet::TShape kernel = pooling_param.kernel;
+  const mxnet::TShape stride = pooling_param.stride;
+  const mxnet::TShape pad = pooling_param.pad;
   const int pool_type = pooling_param.pool_type;
   const bool global_pool = pooling_param.global_pool;
 
@@ -411,12 +411,12 @@ void ConvertElementwiseAdd(NodeProto* node_proto, const NodeAttrs& /*attrs*/,
   node_proto->set_op_type("Add");
 }
 
-std::unordered_map<std::string, TShape> GetPlaceholderShapes(
+std::unordered_map<std::string, mxnet::TShape> GetPlaceholderShapes(
     const ShapeVector& shape_inputs, const nnvm::IndexedGraph& ig) {
-  std::unordered_map<std::string, TShape> placeholder_shapes;
+  std::unordered_map<std::string, mxnet::TShape> placeholder_shapes;
   for (uint32_t i = 0; i < shape_inputs.size(); ++i) {
     std::string name = ig[ig.input_nodes()[i]].source->attrs.name;
-    TShape shp = shape_inputs[i];
+    mxnet::TShape shp = shape_inputs[i];
     if (shp.ndim() > 0) {
       placeholder_shapes.emplace(name, shp);
     }
@@ -441,7 +441,7 @@ std::unordered_map<std::string, uint32_t> GetOutputLookup(
 
 void ConvertPlaceholder(
     const std::string& node_name,
-    const std::unordered_map<std::string, TShape>& placeholder_shapes,
+    const std::unordered_map<std::string, mxnet::TShape>& placeholder_shapes,
     GraphProto* const graph_proto) {
   auto val_info_proto = graph_proto->add_input();
   auto type_proto = val_info_proto->mutable_type()->mutable_tensor_type();
@@ -470,7 +470,7 @@ void ConvertConstant(
 
   const NDArray nd = shared_buffer->find(node_name)->second;
   const TBlob& blob = nd.data();
-  const TShape shape = blob.shape_;
+  const mxnet::TShape shape = blob.shape_;
 
   for (auto& dim : shape) {
     initializer_proto->add_dims(static_cast<int64>(dim));
@@ -506,7 +506,7 @@ void ConvertOutput(
     const StorageTypeVector& storage_types, const DTypeVector& dtypes) {
   const nnvm::IndexedGraph& ig = g.indexed_graph();
   uint32_t out_idx = ig.entry_id(ig.outputs()[out_iter->second]);
-  TShape out_shape = g.GetAttr<nnvm::ShapeVector>("shape")[out_idx];
+  mxnet::TShape out_shape = g.GetAttr<mxnet::ShapeVector>("shape")[out_idx];
   int storage_type = storage_types[out_idx];
   int dtype = dtypes[out_idx];
 
diff --git a/src/operator/contrib/nnz.cc b/src/operator/contrib/nnz.cc
index a94f52fdaa5e..940c9e02219e 100644
--- a/src/operator/contrib/nnz.cc
+++ b/src/operator/contrib/nnz.cc
@@ -55,8 +55,8 @@ static bool NNZType(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool NNZShape(const nnvm::NodeAttrs& attrs,
-                     std::vector<TShape> *in_attrs,
-                     std::vector<TShape> *out_attrs) {
+                     mxnet::ShapeVector *in_attrs,
+                     mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
   // csr_matrix is 2-D
@@ -178,7 +178,7 @@ This operator only supports CSR matrix on CPU.
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<NNZParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", NNZShape)
+.set_attr<mxnet::FInferShape>("FInferShape", NNZShape)
 .set_attr<nnvm::FInferType>("FInferType", NNZType)
 .set_attr<FInferStorageType>("FInferStorageType", NNZStorageType)
 .set_attr<FComputeEx>("FComputeEx<cpu>", NNZComputeEx<cpu>)
diff --git a/src/operator/contrib/optimizer_op.cc b/src/operator/contrib/optimizer_op.cc
index 96f431bc569d..9f948bad81b6 100644
--- a/src/operator/contrib/optimizer_op.cc
+++ b/src/operator/contrib/optimizer_op.cc
@@ -35,8 +35,8 @@ DMLC_REGISTER_PARAMETER(GroupAdagradParam);
  * \brief Shape inference function for Group AdaGrad.
  */
 inline bool GroupAdagradShape(const nnvm::NodeAttrs &attrs,
-                              std::vector<TShape> *in_attrs,
-                              std::vector<TShape> *out_attrs) {
+                              mxnet::ShapeVector *in_attrs,
+                              mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 3U);
   CHECK_EQ(out_attrs->size(), 1U);
 
@@ -72,7 +72,7 @@ Note that non-zero values for the weight decay option are not supported.
 .set_num_inputs(3)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<GroupAdagradParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", GroupAdagradShape)
+.set_attr<mxnet::FInferShape>("FInferShape", GroupAdagradShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<3, 1>)
 .set_attr<FInferStorageType>("FInferStorageType", GroupAdagradStorageType)
 .set_attr<nnvm::FMutateInputs>("FMutateInputs",
diff --git a/src/operator/contrib/proposal-inl.h b/src/operator/contrib/proposal-inl.h
index a1f9e49e6cab..9908ca96ec5f 100644
--- a/src/operator/contrib/proposal-inl.h
+++ b/src/operator/contrib/proposal-inl.h
@@ -100,12 +100,12 @@ class ProposalProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 3) << "Input:[cls_prob, bbox_pred, im_info]";
-    const TShape &dshape = in_shape->at(proposal::kClsProb);
+    const mxnet::TShape &dshape = in_shape->at(proposal::kClsProb);
     if (dshape.ndim() == 0) return false;
     Shape<4> bbox_pred_shape;
     bbox_pred_shape = Shape4(dshape[0], dshape[1] * 2, dshape[2], dshape[3]);
@@ -133,7 +133,7 @@ class ProposalProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> ForwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
diff --git a/src/operator/contrib/psroi_pooling-inl.h b/src/operator/contrib/psroi_pooling-inl.h
index fb20ef0bdddc..50d812882043 100644
--- a/src/operator/contrib/psroi_pooling-inl.h
+++ b/src/operator/contrib/psroi_pooling-inl.h
@@ -49,7 +49,7 @@ enum PSROIPoolingOpOutputs {kOut};
 }  // psroipool
 
 struct PSROIPoolingParam : public dmlc::Parameter<PSROIPoolingParam> {
-  // TShape pooled_size;
+  // mxnet::TShape pooled_size;
   float spatial_scale;
   int output_dim;
   int pooled_size;
@@ -168,18 +168,18 @@ class PSROIPoolingProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 2) << "Input:[data, rois]";
 
     // data: [batch_size, c, h, w]
-    TShape dshape = in_shape->at(psroipool::kData);
+    mxnet::TShape dshape = in_shape->at(psroipool::kData);
     CHECK_EQ(dshape.ndim(), 4) << "data should be a 4D tensor";
 
     // bbox: [num_rois, 5]
-    TShape bshape = in_shape->at(psroipool::kBox);
+    mxnet::TShape bshape = in_shape->at(psroipool::kBox);
     CHECK_EQ(bshape.ndim(), 2) << "bbox should be a 2D tensor of shape [batch, 5]";
     CHECK_EQ(bshape[1], 5) << "bbox should be a 2D tensor of shape [batch, 5]";
 
@@ -227,7 +227,7 @@ class PSROIPoolingProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
 
diff --git a/src/operator/contrib/psroi_pooling.cc b/src/operator/contrib/psroi_pooling.cc
index c3b66a15852b..ad9ef6dee9b9 100644
--- a/src/operator/contrib/psroi_pooling.cc
+++ b/src/operator/contrib/psroi_pooling.cc
@@ -241,9 +241,9 @@ Operator *CreateOp<cpu>(PSROIPoolingParam param, int dtype) {
   return op;
 }
 
-Operator *PSROIPoolingProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator *PSROIPoolingProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                            std::vector<int> *in_type) const {
-  std::vector<TShape> out_shape, aux_shape;
+  mxnet::ShapeVector out_shape, aux_shape;
   std::vector<int> out_type, aux_type;
   CHECK(InferType(in_type, &out_type, &aux_type));
   CHECK(InferShape(in_shape, &out_shape, &aux_shape));
diff --git a/src/operator/contrib/quadratic_op-inl.h b/src/operator/contrib/quadratic_op-inl.h
index a6fa260f10e8..e679fedc8e57 100644
--- a/src/operator/contrib/quadratic_op-inl.h
+++ b/src/operator/contrib/quadratic_op-inl.h
@@ -53,8 +53,8 @@ struct QuadraticParam : public dmlc::Parameter<QuadraticParam> {
 };
 
 inline bool QuadraticOpShape(const nnvm::NodeAttrs& attrs,
-                             std::vector<TShape>* in_attrs,
-                             std::vector<TShape>* out_attrs) {
+                             mxnet::ShapeVector* in_attrs,
+                             mxnet::ShapeVector* out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
 
diff --git a/src/operator/contrib/quadratic_op.cc b/src/operator/contrib/quadratic_op.cc
index 043379e44074..a023c27a4a8e 100644
--- a/src/operator/contrib/quadratic_op.cc
+++ b/src/operator/contrib/quadratic_op.cc
@@ -55,7 +55,7 @@ The storage type of ``quadratic`` output depends on storage types of inputs
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", QuadraticOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", QuadraticOpShape)
 .set_attr<nnvm::FInferType>("FInferType", QuadraticOpType)
 .set_attr<FInferStorageType>("FInferStorageType", QuadraticOpStorageType)
 .set_attr<FCompute>("FCompute<cpu>", QuadraticOpForward<cpu>)
diff --git a/src/operator/contrib/roi_align-inl.h b/src/operator/contrib/roi_align-inl.h
index 9f4d7ce48827..b28e437a7e09 100644
--- a/src/operator/contrib/roi_align-inl.h
+++ b/src/operator/contrib/roi_align-inl.h
@@ -44,7 +44,7 @@ enum ROIAlignOpOutputs {kOut};
 
 
 struct ROIAlignParam : public dmlc::Parameter<ROIAlignParam> {
-  TShape pooled_size;
+  mxnet::TShape pooled_size;
   float spatial_scale;
   int sample_ratio;
   bool position_sensitive;
diff --git a/src/operator/contrib/roi_align.cc b/src/operator/contrib/roi_align.cc
index e584ea30325d..53ddba02bc7b 100644
--- a/src/operator/contrib/roi_align.cc
+++ b/src/operator/contrib/roi_align.cc
@@ -547,16 +547,16 @@ He, Kaiming, et al. "Mask R-CNN." ICCV, 2017
   return std::vector<std::string>{"output"};
 })
 .set_attr_parser(ParamParser<ROIAlignParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", [](const nnvm::NodeAttrs& attrs,
-      std::vector<TShape> *in_shape, std::vector<TShape> *out_shape){
+.set_attr<mxnet::FInferShape>("FInferShape", [](const nnvm::NodeAttrs& attrs,
+      mxnet::ShapeVector *in_shape, mxnet::ShapeVector *out_shape){
   using namespace mshadow;
   const ROIAlignParam& param = nnvm::get<ROIAlignParam>(attrs.parsed);
   CHECK_EQ(in_shape->size(), 2) << "Input:[data, rois]";
   // data: [batch_size, c, h, w]
-  TShape dshape = in_shape->at(roialign::kData);
+  mxnet::TShape dshape = in_shape->at(roialign::kData);
   CHECK_EQ(dshape.ndim(), 4) << "data should be a 4D tensor";
   // bbox: [num_rois, 5]
-  TShape bshape = in_shape->at(roialign::kBox);
+  mxnet::TShape bshape = in_shape->at(roialign::kBox);
   CHECK_EQ(bshape.ndim(), 2) << "bbox should be a 2D tensor of shape [batch, 5]";
   CHECK_EQ(bshape[1], 5) << "bbox should be a 2D tensor of shape [batch, 5]";
   // out: [num_rois, c, pooled_h, pooled_w]
diff --git a/src/operator/contrib/sync_batch_norm-inl.h b/src/operator/contrib/sync_batch_norm-inl.h
index 78f1c09dfe03..b94416640f55 100644
--- a/src/operator/contrib/sync_batch_norm-inl.h
+++ b/src/operator/contrib/sync_batch_norm-inl.h
@@ -469,15 +469,15 @@ class SyncBatchNormProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 3U) << "Input:[data, gamma, beta]";
-    const TShape &dshape = in_shape->at(0);
+    const mxnet::TShape &dshape = in_shape->at(0);
     if (dshape.ndim() == 0) return false;
-    in_shape->at(1) = TShape(Shape1(dshape[1]));
-    in_shape->at(2) = TShape(Shape1(dshape[1]));
+    in_shape->at(1) = mxnet::TShape(Shape1(dshape[1]));
+    in_shape->at(2) = mxnet::TShape(Shape1(dshape[1]));
     out_shape->clear();
     out_shape->push_back(dshape);
     out_shape->push_back(Shape1(dshape[1]));
@@ -545,7 +545,7 @@ class SyncBatchNormProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
@@ -577,7 +577,7 @@ class SyncBatchNormProp : public OperatorProperty {
       return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
       std::vector<int> *in_type) const override;
 
   inline const SyncBatchNormParam& getParam() const {
diff --git a/src/operator/contrib/sync_batch_norm.cc b/src/operator/contrib/sync_batch_norm.cc
index 1b465d88b69e..418688e7c98a 100644
--- a/src/operator/contrib/sync_batch_norm.cc
+++ b/src/operator/contrib/sync_batch_norm.cc
@@ -34,9 +34,9 @@ Operator *CreateOp<cpu>(SyncBatchNormParam param, int dtype) {
 }
 
 // DO_BIND_DISPATCH comes from operator_common.h
-Operator *SyncBatchNormProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator *SyncBatchNormProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
     std::vector<int> *in_type) const {
-    std::vector<TShape> out_shape, aux_shape;
+    mxnet::ShapeVector out_shape, aux_shape;
     std::vector<int> out_type, aux_type;
     CHECK(InferType(in_type, &out_type, &aux_type));
     CHECK(InferShape(in_shape, &out_shape, &aux_shape));
diff --git a/src/operator/contrib/tensorrt.cc b/src/operator/contrib/tensorrt.cc
index 88a65fba3ea3..5b3df70fd825 100644
--- a/src/operator/contrib/tensorrt.cc
+++ b/src/operator/contrib/tensorrt.cc
@@ -63,7 +63,7 @@ OpStatePtr GetPtrMapping(nvinfer1::ICudaEngine* trt_engine,
 }
 
 OpStatePtr TRTCreateState(const nnvm::NodeAttrs& attrs, Context /*ctx*/,
-                          const std::vector<TShape>& /*ishape*/,
+                          const mxnet::ShapeVector& /*ishape*/,
                           const std::vector<int>& /*itype*/) {
   const auto& node_param = nnvm::get<ONNXParam>(attrs.parsed);
 
@@ -110,8 +110,8 @@ void TRTParamParser(nnvm::NodeAttrs* attrs) {
   attrs->parsed = std::move(param_);
 }
 
-inline bool TRTInferShape(const NodeAttrs& attrs, std::vector<TShape>* /*in_shape*/,
-                          std::vector<TShape>* out_shape) {
+inline bool TRTInferShape(const NodeAttrs& attrs, mxnet::ShapeVector* /*in_shape*/,
+                          mxnet::ShapeVector* out_shape) {
   const auto &node_param = nnvm::get<ONNXParam>(attrs.parsed);
   for (auto& el : node_param.output_map) {
     (*out_shape)[std::get<0>(el.second)] = std::get<1>(el.second);
@@ -168,7 +168,7 @@ NNVM_REGISTER_OP(_trt_op)
       return node_param.output_map.size();
     })
     .set_attr_parser(TRTParamParser)
-    .set_attr<nnvm::FInferShape>("FInferShape", TRTInferShape)
+    .set_attr<mxnet::FInferShape>("FInferShape", TRTInferShape)
     .set_attr<nnvm::FInferType>("FInferType", TRTInferType)
     .set_attr<nnvm::FListInputNames>("FListInputNames", TRTListInputNames)
     .set_attr<nnvm::FListOutputNames>("FListOutputNames", TRTListOutputNames)
diff --git a/src/operator/control_flow.cc b/src/operator/control_flow.cc
index 25c8f704cbc5..ac6fea7c143b 100644
--- a/src/operator/control_flow.cc
+++ b/src/operator/control_flow.cc
@@ -266,22 +266,22 @@ static void remap(const std::vector<T> &op_in, size_t start,
   }
 }
 
-static inline TShape SliceFirstDim(const TShape &s) {
+static inline mxnet::TShape SliceFirstDim(const mxnet::TShape &s) {
   if (s.ndim() > 1) {
-    return TShape(s.begin() + 1, s.end());
+    return mxnet::TShape(s.begin() + 1, s.end());
   } else {
-    return TShape(mshadow::Shape1(1));
+    return mxnet::TShape(mshadow::Shape1(1));
   }
 }
 
 static bool ForeachShape(const nnvm::NodeAttrs& attrs,
-                         std::vector<TShape> *in_shape,
-                         std::vector<TShape> *out_shape) {
+                         mxnet::ShapeVector *in_shape,
+                         mxnet::ShapeVector *out_shape) {
   const ForeachParam& params = nnvm::get<ForeachParam>(attrs.parsed);
   CHECK_EQ(out_shape->size(), (size_t) params.num_outputs);
   CHECK_EQ(attrs.subgraphs.size(), 1U);
 
-  std::vector<TShape> subg_in_shape(in_shape->size());
+  mxnet::ShapeVector subg_in_shape(in_shape->size());
   // data shape
   std::vector<bool> data_1d(params.in_data_locs.ndim(), false);
   for (size_t i = 0; i < params.in_data_locs.ndim(); i++) {
@@ -297,9 +297,9 @@ static bool ForeachShape(const nnvm::NodeAttrs& attrs,
   remap(*in_shape, params.in_data_locs.ndim() + params.in_state_locs.ndim(),
         params.remain_locs, &subg_in_shape);
 
-  std::vector<TShape> subg_out_shape = *out_shape;
+  mxnet::ShapeVector subg_out_shape = *out_shape;
   for (int i = 0; i < params.num_out_data; i++) {
-    TShape shape = subg_out_shape[i];
+    mxnet::TShape shape = subg_out_shape[i];
     // If we don't have shape info, we don't need to do anything.
     if (shape.ndim() == 0)
       continue;
@@ -320,7 +320,7 @@ static bool ForeachShape(const nnvm::NodeAttrs& attrs,
     if (g_out_shape.ndim() == 0)
       continue;
 
-    auto out = TShape(g_out_shape.ndim() + 1);
+    auto out = mxnet::TShape(g_out_shape.ndim() + 1);
     out[0] = len;
     for (size_t i = 1; i < out.ndim(); i++)
       out[i] = g_out_shape[i - 1];
@@ -340,11 +340,11 @@ static bool ForeachShape(const nnvm::NodeAttrs& attrs,
       continue;
 
     if (data_1d[i]) {
-      TShape s(1);
+      mxnet::TShape s(1);
       s[0] = len;
       SHAPE_ASSIGN_CHECK(*in_shape, i, s);
     } else {
-      auto in = TShape(shape.ndim() + 1);
+      auto in = mxnet::TShape(shape.ndim() + 1);
       in[0] = len;
       for (size_t i = 1; i < in.ndim(); i++)
         in[i] = shape[i - 1];
@@ -465,7 +465,7 @@ static bool BackwardForeachStorageType(const nnvm::NodeAttrs& attrs,
 
 static OpStatePtr CreateForeachState(const NodeAttrs& attrs,
                                      Context ctx,
-                                     const std::vector<TShape>& ishape,
+                                     const mxnet::ShapeVector& ishape,
                                      const std::vector<int>& itype) {
   const ForeachParam& params = nnvm::get<ForeachParam>(attrs.parsed);
   return OpStatePtr::Create<ForeachState>(*attrs.subgraphs[0], params);
@@ -727,11 +727,11 @@ static void WhileLoopGradComputeExCPU(const OpStatePtr& state_ptr,
 }
 
 static bool WhileLoopShape(const nnvm::NodeAttrs& attrs,
-                           std::vector<TShape> *in_shape,
-                           std::vector<TShape> *out_shape) {
-  using nnvm::ShapeVector;
+                           mxnet::ShapeVector *in_shape,
+                           mxnet::ShapeVector *out_shape) {
+  using mxnet::ShapeVector;
   const WhileLoopParam& params = nnvm::get<WhileLoopParam>(attrs.parsed);
-  static const std::function<bool(const TShape &)> is_udf = is_shape_udf;
+  static const std::function<bool(const mxnet::TShape &)> is_udf = is_shape_udf;
   // sanity checks
   CHECK_EQ(in_shape->size() + 2U, (size_t) params.num_args);
   CHECK_EQ(out_shape->size(), (size_t) params.num_outputs);
@@ -776,7 +776,7 @@ static bool WhileLoopShape(const nnvm::NodeAttrs& attrs,
     g.attrs["shape"] = std::make_shared<dmlc::any>(std::move(shapes));
     g = exec::InferShape(std::move(g));
     // now `shapes' won't be used anymore, use new_shapes instead
-    const auto& new_shapes = g.GetAttr<ShapeVector>("shape");
+    const auto& new_shapes = g.GetAttr<mxnet::ShapeVector>("shape");
     // copy subg_in back to in_shape
     for (size_t i = 0; i < subg_in.size(); ++i) {
       auto eid = idx.entry_id(input_nids[i], 0);
@@ -799,7 +799,7 @@ static bool WhileLoopShape(const nnvm::NodeAttrs& attrs,
         // when the shape is not fully inferred
         continue;
       }
-      auto out = TShape(g_out_shape.ndim() + 1);
+      auto out = mxnet::TShape(g_out_shape.ndim() + 1);
       out[0] = params.max_iterations;
       for (size_t i = 1; i < out.ndim(); i++)
         out[i] = g_out_shape[i - 1];
@@ -817,8 +817,8 @@ static bool WhileLoopShape(const nnvm::NodeAttrs& attrs,
     }
     return g.GetAttr<size_t>("shape_num_unknown_nodes") == 0;
   };
-  ShapeVector cond_out_shape{TShape(1U)};  // this means: [(1, )]
-  ShapeVector func_out_shape(params.num_outputs);
+  mxnet::ShapeVector cond_out_shape{mxnet::TShape(1U)};  // this means: [(1, )]
+  mxnet::ShapeVector func_out_shape(params.num_outputs);
   CHECK(params.sync_in_out(in_shape, out_shape, is_udf));
   bool succ_0 = infer_subg(attrs.subgraphs[0], &cond_out_shape, params.cond_input_locs, 0, false);
   CHECK(params.sync_in_out(in_shape, out_shape, is_udf));
@@ -898,7 +898,7 @@ static bool BackwardWhileLoopStorageType(const nnvm::NodeAttrs& attrs,
 
 static OpStatePtr CreateWhileLoopState(const NodeAttrs& attrs,
                                        Context ctx,
-                                       const std::vector<TShape>& ishape,
+                                       const mxnet::ShapeVector& ishape,
                                        const std::vector<int>& itype) {
   const WhileLoopParam& params = nnvm::get<WhileLoopParam>(attrs.parsed);
   return OpStatePtr::Create<WhileLoopState>(params, *attrs.subgraphs[0], *attrs.subgraphs[1]);
@@ -1034,11 +1034,11 @@ static void CondGradComputeExCPU(const OpStatePtr& state_ptr,
 }
 
 static bool CondShape(const nnvm::NodeAttrs& attrs,
-                      std::vector<TShape> *in_shape,
-                      std::vector<TShape> *out_shape) {
-  using nnvm::ShapeVector;
+                      mxnet::ShapeVector *in_shape,
+                      mxnet::ShapeVector *out_shape) {
+  using mxnet::ShapeVector;
   const CondParam& params = nnvm::get<CondParam>(attrs.parsed);
-  static const std::function<bool(const TShape &)> is_udf = is_shape_udf;
+  static const std::function<bool(const mxnet::TShape &)> is_udf = is_shape_udf;
   // sanity checks
   CHECK_EQ(in_shape->size() + 3U, (size_t) params.num_args);
   CHECK_EQ(out_shape->size(), (size_t) params.num_outputs);
@@ -1051,8 +1051,8 @@ static bool CondShape(const nnvm::NodeAttrs& attrs,
                                                    const nnvm::Tuple<dim_t> &input_locs,
                                                    bool fill_out_shape) {
     // create subg_in
-    ShapeVector subg_in;
-    ShapeVector &subg_out = *_subg_out;
+    mxnet::ShapeVector subg_in;
+    mxnet::ShapeVector &subg_out = *_subg_out;
     extract_by_loc(*in_shape, input_locs, &subg_in);
     // create an indexed graph
     nnvm::Graph g;
@@ -1066,7 +1066,7 @@ static bool CondShape(const nnvm::NodeAttrs& attrs,
     CHECK_EQ(idx.input_nodes().size(), subg_in.size());
     CHECK_EQ(idx.outputs().size(), subg_out.size());
     // create empty shapes for inference
-    ShapeVector shapes(idx.num_node_entries());
+    mxnet::ShapeVector shapes(idx.num_node_entries());
     // copy subg_in into shapes
     for (size_t i = 0; i < subg_in.size(); ++i) {
       auto eid = idx.entry_id(input_nids[i], 0);
@@ -1081,7 +1081,7 @@ static bool CondShape(const nnvm::NodeAttrs& attrs,
     g.attrs["shape"] = std::make_shared<dmlc::any>(std::move(shapes));
     g = exec::InferShape(std::move(g));
     // now `shapes' won't be used anymore, use new_shapes instead
-    const auto& new_shapes = g.GetAttr<ShapeVector>("shape");
+    const auto& new_shapes = g.GetAttr<mxnet::ShapeVector>("shape");
     // copy subg_in back to in_shape
     for (size_t i = 0; i < subg_in.size(); ++i) {
       auto eid = idx.entry_id(input_nids[i], 0);
@@ -1107,7 +1107,7 @@ static bool CondShape(const nnvm::NodeAttrs& attrs,
     }
     return g.GetAttr<size_t>("shape_num_unknown_nodes") == 0;
   };
-  ShapeVector cond_out_shape{TShape(1U)};  // this means: [(1, )]
+  ShapeVector cond_out_shape{mxnet::TShape(1U)};  // this means: [(1, )]
   ShapeVector then_out_shape(params.num_outputs);
   ShapeVector else_out_shape(params.num_outputs);
   bool succ_0 = infer_subg(attrs.subgraphs[0], &cond_out_shape, \
@@ -1234,7 +1234,7 @@ static bool BackwardCondStorageType(const nnvm::NodeAttrs& attrs,
 
 static OpStatePtr CreateCondState(const NodeAttrs& attrs,
                                   Context ctx,
-                                  const std::vector<TShape>& ishape,
+                                  const mxnet::ShapeVector& ishape,
                                   const std::vector<int>& itype) {
   const CondParam& params = nnvm::get<CondParam>(attrs.parsed);
   return OpStatePtr::Create<CondState>(
@@ -1279,7 +1279,7 @@ NNVM_REGISTER_OP(_foreach)
 })
 .set_attr<nnvm::FGradient>("FGradient", ForeachGradient)
 .set_attr<FCreateOpState>("FCreateOpState", CreateForeachState)
-.set_attr<nnvm::FInferShape>("FInferShape", ForeachShape)
+.set_attr<mxnet::FInferShape>("FInferShape", ForeachShape)
 .set_attr<nnvm::FInferType>("FInferType", ForeachType)
 .set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", ForeachComputeExCPU)
 // Foreach operator works like an executor. Its code will always run on CPU.
@@ -1342,7 +1342,7 @@ NNVM_REGISTER_OP(_while_loop)
 })
 .set_attr<nnvm::FGradient>("FGradient", WhileLoopGradient)
 .set_attr<FCreateOpState>("FCreateOpState", CreateWhileLoopState)
-.set_attr<nnvm::FInferShape>("FInferShape", WhileLoopShape)
+.set_attr<mxnet::FInferShape>("FInferShape", WhileLoopShape)
 .set_attr<nnvm::FInferType>("FInferType", WhileLoopType)
 .set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", WhileLoopComputeExCPU)
 .set_attr<FExecType>("FExecType", [](const NodeAttrs& attrs) {
@@ -1405,7 +1405,7 @@ NNVM_REGISTER_OP(_cond)
 })
 .set_attr<nnvm::FGradient>("FGradient", CondGradient)
 .set_attr<FCreateOpState>("FCreateOpState", CreateCondState)
-.set_attr<nnvm::FInferShape>("FInferShape", CondShape)
+.set_attr<mxnet::FInferShape>("FInferShape", CondShape)
 .set_attr<nnvm::FInferType>("FInferType", CondType)
 .set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", CondComputeExCPU)
 .set_attr<FExecType>("FExecType", [](const NodeAttrs& attrs) {
diff --git a/src/operator/convolution_v1-inl.h b/src/operator/convolution_v1-inl.h
index 758ce12d8006..ed6748a9c85c 100644
--- a/src/operator/convolution_v1-inl.h
+++ b/src/operator/convolution_v1-inl.h
@@ -51,10 +51,10 @@ enum ConvolutionV1OpCudnnTune {kOff, kLimited, kFastest};
 }
 
 struct ConvolutionV1Param : public dmlc::Parameter<ConvolutionV1Param> {
-  TShape kernel;
-  TShape stride;
-  TShape dilate;
-  TShape pad;
+  mxnet::TShape kernel;
+  mxnet::TShape stride;
+  mxnet::TShape dilate;
+  mxnet::TShape pad;
   uint32_t num_filter;
   uint32_t num_group;
   uint64_t workspace;
@@ -64,11 +64,11 @@ struct ConvolutionV1Param : public dmlc::Parameter<ConvolutionV1Param> {
   dmlc::optional<int> layout;
   DMLC_DECLARE_PARAMETER(ConvolutionV1Param) {
     DMLC_DECLARE_FIELD(kernel).describe("convolution kernel size: (h, w) or (d, h, w)");
-    DMLC_DECLARE_FIELD(stride).set_default(TShape())
+    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape())
     .describe("convolution stride: (h, w) or (d, h, w)");
-    DMLC_DECLARE_FIELD(dilate).set_default(TShape())
+    DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape())
     .describe("convolution dilate: (h, w) or (d, h, w)");
-    DMLC_DECLARE_FIELD(pad).set_default(TShape())
+    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape())
     .describe("pad for convolution: (h, w) or (d, h, w)");
     DMLC_DECLARE_FIELD(num_filter).set_range(1, 100000)
     .describe("convolution filter(channel) number");
@@ -357,8 +357,8 @@ class ConvolutionV1Op : public Operator {
 
 template<typename xpu>
 Operator* CreateOp(ConvolutionV1Param param, int dtype,
-                   std::vector<TShape> *in_shape,
-                   std::vector<TShape> *out_shape,
+                   mxnet::ShapeVector *in_shape,
+                   mxnet::ShapeVector *out_shape,
                    Context ctx);
 
 #if DMLC_USE_CXX11
@@ -393,9 +393,9 @@ class ConvolutionV1Prop : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     if (!param_.no_bias) {
       CHECK_EQ(in_shape->size(), 3U) << "Input:[data, weight, bias]";
@@ -403,8 +403,8 @@ class ConvolutionV1Prop : public OperatorProperty {
       CHECK_EQ(in_shape->size(), 2U) << "Input:[data, weight]";
     }
     // CHECK_EQ(out_shape->size(), 1) << "Output: [output]";
-    out_shape->resize(1, TShape());
-    const TShape &dshp = (*in_shape)[conv_v1::kData];
+    out_shape->resize(1, mxnet::TShape());
+    const mxnet::TShape &dshp = (*in_shape)[conv_v1::kData];
     if (dshp.ndim() ==  0) return false;
     if (param_.kernel.ndim() == 2) {
       // 2d conv_v1
@@ -530,12 +530,12 @@ class ConvolutionV1Prop : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> ForwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
   std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
@@ -544,7 +544,7 @@ class ConvolutionV1Prop : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/convolution_v1.cc b/src/operator/convolution_v1.cc
index b6250a7a77f5..723dc867f52f 100644
--- a/src/operator/convolution_v1.cc
+++ b/src/operator/convolution_v1.cc
@@ -35,8 +35,8 @@ DMLC_REGISTER_PARAMETER(ConvolutionV1Param);
 
 template<>
 Operator* CreateOp<cpu>(ConvolutionV1Param param, int dtype,
-                        std::vector<TShape> *in_shape,
-                        std::vector<TShape> *out_shape,
+                        mxnet::ShapeVector *in_shape,
+                        mxnet::ShapeVector *out_shape,
                         Context ctx) {
   Operator *op = nullptr;
   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
@@ -47,9 +47,9 @@ Operator* CreateOp<cpu>(ConvolutionV1Param param, int dtype,
 
 // DO_BIND_DISPATCH comes from operator_common.h
 Operator *ConvolutionV1Prop::CreateOperatorEx(Context ctx,
-                                              std::vector<TShape> *in_shape,
+                                              mxnet::ShapeVector *in_shape,
                                               std::vector<int> *in_type) const {
-  std::vector<TShape> out_shape, aux_shape;
+  mxnet::ShapeVector out_shape, aux_shape;
   std::vector<int> out_type, aux_type;
   CHECK(InferType(in_type, &out_type, &aux_type));
   CHECK(InferShape(in_shape, &out_shape, &aux_shape));
diff --git a/src/operator/convolution_v1.cu b/src/operator/convolution_v1.cu
index f3928ab451b6..81cf7f5b9ee4 100644
--- a/src/operator/convolution_v1.cu
+++ b/src/operator/convolution_v1.cu
@@ -34,8 +34,8 @@ namespace mxnet {
 namespace op {
 template<>
 Operator* CreateOp<gpu>(ConvolutionV1Param param, int dtype,
-                        std::vector<TShape> *in_shape,
-                        std::vector<TShape> *out_shape,
+                        mxnet::ShapeVector *in_shape,
+                        mxnet::ShapeVector *out_shape,
                         Context ctx) {
   Operator *op = NULL;
   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
diff --git a/src/operator/correlation-inl.h b/src/operator/correlation-inl.h
index e1cc972d3bbb..3c7422365056 100644
--- a/src/operator/correlation-inl.h
+++ b/src/operator/correlation-inl.h
@@ -186,13 +186,13 @@ void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) overr
   std::map<std::string, std::string> GetParams() const override {
     return param_.__DICT__();
   }
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 2U) << "Input:[data1, data2]";
-    TShape dshape1 = in_shape->at(Correlation::kData1);
-    TShape dshape2 = in_shape->at(Correlation::kData2);
+    mxnet::TShape dshape1 = in_shape->at(Correlation::kData1);
+    mxnet::TShape dshape2 = in_shape->at(Correlation::kData2);
     CHECK_EQ(dshape1.ndim(), 4U) << "data should be a 4D tensor";
     CHECK_EQ(dshape2.ndim(), 4U) << "data should be a 4D tensor";
     int paddedbottomheight;
@@ -266,7 +266,7 @@ void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) overr
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/correlation.cc b/src/operator/correlation.cc
index d0c664ad4f9c..30a147177687 100644
--- a/src/operator/correlation.cc
+++ b/src/operator/correlation.cc
@@ -155,7 +155,7 @@ Operator *CreateOp<cpu>(CorrelationParam param, int dtype) {
   });
   return op;
 }
-Operator* CorrelationProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator* CorrelationProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                             std::vector<int> *in_type) const {
   DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0));
 }
diff --git a/src/operator/crop-inl.h b/src/operator/crop-inl.h
index b6e49975bd1c..325807abb44e 100644
--- a/src/operator/crop-inl.h
+++ b/src/operator/crop-inl.h
@@ -45,8 +45,8 @@ enum CropOpOutputs {kOut};
 
 struct CropParam : public dmlc::Parameter<CropParam> {
   int num_args;
-  TShape offset;
-  TShape h_w;
+  mxnet::TShape offset;
+  mxnet::TShape h_w;
   bool center_crop;
   DMLC_DECLARE_PARAMETER(CropParam) {
     DMLC_DECLARE_FIELD(num_args).set_range(1, 3)
@@ -54,9 +54,9 @@ struct CropParam : public dmlc::Parameter<CropParam> {
       "for crop height and width, else if equals two, then we will use the height"
       "and width of the second input symbol, we name crop_like here");
     int shape[] = {0, 0};
-    DMLC_DECLARE_FIELD(offset).set_default(TShape(shape, shape + 2))
+    DMLC_DECLARE_FIELD(offset).set_default(mxnet::TShape(shape, shape + 2))
     .describe("crop offset coordinate: (y, x)");
-    DMLC_DECLARE_FIELD(h_w).set_default(TShape(shape, shape + 2))
+    DMLC_DECLARE_FIELD(h_w).set_default(mxnet::TShape(shape, shape + 2))
     .describe("crop height and width: (h, w)");
     DMLC_DECLARE_FIELD(center_crop).set_default(false)
     .describe("If set to true, then it will use be the center_crop,"
@@ -169,12 +169,12 @@ class CropProp : public OperatorProperty {
     return ret;
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), static_cast<size_t>(param_.num_args));
-    TShape data_shape = in_shape->at(crop_enum::kData);
+    mxnet::TShape data_shape = in_shape->at(crop_enum::kData);
     if (data_shape.ndim() == 0) return false;
     CHECK_EQ(data_shape.ndim(), 4U) << \
         "Input data should be 4D in batch-num_filter-y-x";
@@ -191,7 +191,7 @@ class CropProp : public OperatorProperty {
       crop_shape.push_back(param_.h_w[0]);
       crop_shape.push_back(param_.h_w[1]);
     } else if (param_.num_args == 2) {
-      TShape crop_like_shape = in_shape->at(crop_enum::kCropLike);
+      mxnet::TShape crop_like_shape = in_shape->at(crop_enum::kCropLike);
       crop_shape.push_back(crop_like_shape[2]);
       crop_shape.push_back(crop_like_shape[3]);
     }
diff --git a/src/operator/cross_device_copy.cc b/src/operator/cross_device_copy.cc
index 08a7d52a9ac4..f8c7ced5058d 100644
--- a/src/operator/cross_device_copy.cc
+++ b/src/operator/cross_device_copy.cc
@@ -49,11 +49,11 @@ class CrossDeviceCopyProp : public OperatorProperty {
     return std::map<std::string, std::string>();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     CHECK_EQ(in_shape->size(), 1) << "Input:[data]";
-    const TShape &dshape = in_shape->at(0);
+    const mxnet::TShape &dshape = in_shape->at(0);
     if (dshape.ndim() == 0) return false;
     out_shape->clear();
     out_shape->push_back(dshape);
diff --git a/src/operator/custom/custom.cc b/src/operator/custom/custom.cc
index 2643abbe9e5f..39cca4d7c436 100644
--- a/src/operator/custom/custom.cc
+++ b/src/operator/custom/custom.cc
@@ -128,8 +128,8 @@ void AttrParser(NodeAttrs* attrs) {
 }
 
 bool InferShape(const NodeAttrs& attrs,
-                std::vector<TShape> *in_shape,
-                std::vector<TShape> *out_shape) {
+                mxnet::ShapeVector *in_shape,
+                mxnet::ShapeVector *out_shape) {
   const CustomParam& params = nnvm::get<CustomParam>(attrs.parsed);
 
   size_t total = params.num_args + params.num_outs + params.num_auxs;
@@ -153,19 +153,19 @@ bool InferShape(const NodeAttrs& attrs,
           params.info->contexts[kCustomOpPropInferShape]));
 
   for (size_t i = 0; i < params.num_args; ++i) {
-    SHAPE_ASSIGN_CHECK(*in_shape, i, TShape(shapes[i], shapes[i]+ndims[i]));
+    SHAPE_ASSIGN_CHECK(*in_shape, i, mxnet::TShape(shapes[i], shapes[i]+ndims[i]));
   }
 
   size_t base = params.num_args;
   for (size_t i = 0; i < params.num_outs; ++i) {
     SHAPE_ASSIGN_CHECK(*out_shape, i,
-        TShape(shapes[base+i], shapes[base+i]+ndims[base+i]));
+        mxnet::TShape(shapes[base+i], shapes[base+i]+ndims[base+i]));
   }
 
   base = params.num_args + params.num_outs;
   for (size_t i = 0; i < params.num_auxs; ++i) {
     SHAPE_ASSIGN_CHECK(*in_shape, params.num_args+i,
-        TShape(shapes[base+i], shapes[base+i]+ndims[base+i]));
+        mxnet::TShape(shapes[base+i], shapes[base+i]+ndims[base+i]));
   }
   return true;
 }
@@ -255,7 +255,7 @@ std::vector<nnvm::NodeEntry> Gradient(
 
 
 OpStatePtr CreateState(const NodeAttrs& attrs, Context ctx,
-                       const std::vector<TShape>& in_shape,
+                       const mxnet::ShapeVector& in_shape,
                        const std::vector<int>& in_type) {
   const CustomParam& params = nnvm::get<CustomParam>(attrs.parsed);
 
@@ -554,7 +554,7 @@ Please check the tutorial here: http://mxnet.io/faq/new_op.html.
     return params.num_outs;
   })
 .set_attr_parser(AttrParser)
-.set_attr<nnvm::FInferShape>("FInferShape", InferShape)
+.set_attr<mxnet::FInferShape>("FInferShape", InferShape)
 .set_attr<nnvm::FInferType>("FInferType", InferType)
 .set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs) {
     std::vector<std::string> args = List<kCustomOpPropListArguments>(attrs);
diff --git a/src/operator/custom/native_op-inl.h b/src/operator/custom/native_op-inl.h
index 8da04abc0a39..6fbbae18afe8 100644
--- a/src/operator/custom/native_op-inl.h
+++ b/src/operator/custom/native_op-inl.h
@@ -130,13 +130,13 @@ class NativeOp : public Operator {
   std::vector<uint32_t*> shapes;
   std::vector<uint32_t> shapes_buffer_;
   std::vector<int> tags;
-  std::map<std::string, std::pair<TShape, mshadow::Tensor<cpu, 2> > > buffer_map;
+  std::map<std::string, std::pair<mxnet::TShape, mshadow::Tensor<cpu, 2> > > buffer_map;
 
   virtual void SyncBuffer(const TBlob &tblob,
                           const std::string &name,
                           mshadow::Stream<xpu> *stream) {
     using namespace mshadow;
-    std::map<std::string, std::pair<TShape, mshadow::Tensor<cpu, 2> > >::iterator buffer =
+    std::map<std::string, std::pair<mxnet::TShape, mshadow::Tensor<cpu, 2> > >::iterator buffer =
       buffer_map.find(name);
     if (buffer == buffer_map.end() || buffer->second.first != tblob.shape_) {
       if (buffer != buffer_map.end()) {
@@ -144,7 +144,7 @@ class NativeOp : public Operator {
         buffer_map.erase(buffer);
       }
       buffer_map[name] =
-        std::pair<TShape, Tensor<cpu, 2> >(tblob.shape_,
+        std::pair<mxnet::TShape, Tensor<cpu, 2> >(tblob.shape_,
                                          NewTensor<cpu>(tblob.shape_.FlatTo2D(),
                                                         0.0f,
                                                         false));
@@ -220,9 +220,9 @@ class NativeOpProp : public OperatorProperty {
   }
 
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     std::vector<uint32_t*> shapes;
     std::vector<int> ndims;
     size_t size = 0;
@@ -239,11 +239,11 @@ class NativeOpProp : public OperatorProperty {
     param_.pinfo->infer_shape(shapes.size(), ndims.data(), shapes.data(),
           param_.pinfo->p_infer_shape);
     for (unsigned i = 0; i < in_shape->size(); ++i) {
-      SHAPE_ASSIGN_CHECK(*in_shape, i, TShape(shapes[i], shapes[i]+ndims[i]));
+      SHAPE_ASSIGN_CHECK(*in_shape, i, mxnet::TShape(shapes[i], shapes[i]+ndims[i]));
     }
     out_shape->clear();
     for (unsigned i = param_.num_inputs_; i < shapes.size(); ++i) {
-      out_shape->push_back(TShape(shapes[i], shapes[i]+ndims[i]));
+      out_shape->push_back(mxnet::TShape(shapes[i], shapes[i]+ndims[i]));
     }
     return true;
   }
diff --git a/src/operator/custom/ndarray_op-inl.h b/src/operator/custom/ndarray_op-inl.h
index 5490747d7d4d..4973be9a1e3d 100644
--- a/src/operator/custom/ndarray_op-inl.h
+++ b/src/operator/custom/ndarray_op-inl.h
@@ -122,9 +122,9 @@ class NDArrayOpProp : public OperatorProperty {
   }
 
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     std::vector<uint32_t*> shapes;
     std::vector<int> ndims;
     size_t size = 0;
@@ -141,11 +141,11 @@ class NDArrayOpProp : public OperatorProperty {
     CHECK(param_.pinfo->infer_shape(shapes.size(), ndims.data(), shapes.data(),
                                     param_.pinfo->p_infer_shape));
     for (unsigned i = 0; i < in_shape->size(); ++i) {
-      SHAPE_ASSIGN_CHECK(*in_shape, i, TShape(shapes[i], shapes[i]+ndims[i]));
+      SHAPE_ASSIGN_CHECK(*in_shape, i, mxnet::TShape(shapes[i], shapes[i]+ndims[i]));
     }
     out_shape->clear();
     for (unsigned i = param_.num_inputs_; i < shapes.size(); ++i) {
-      out_shape->push_back(TShape(shapes[i], shapes[i]+ndims[i]));
+      out_shape->push_back(mxnet::TShape(shapes[i], shapes[i]+ndims[i]));
     }
     return true;
   }
diff --git a/src/operator/elemwise_op_common.h b/src/operator/elemwise_op_common.h
index e622ce216ad0..2edaa55540c1 100644
--- a/src/operator/elemwise_op_common.h
+++ b/src/operator/elemwise_op_common.h
@@ -160,16 +160,16 @@ inline bool ElemwiseAttr(const nnvm::NodeAttrs& attrs,
 
 template<index_t n_in, index_t n_out>
 inline bool ElemwiseShape(const nnvm::NodeAttrs& attrs,
-                          std::vector<TShape> *in_attrs,
-                          std::vector<TShape> *out_attrs) {
+                          mxnet::ShapeVector *in_attrs,
+                          mxnet::ShapeVector *out_attrs) {
   if (n_in != -1) {
     CHECK_EQ(in_attrs->size(), static_cast<size_t>(n_in)) << " in operator " << attrs.name;
   }
   if (n_out != -1) {
     CHECK_EQ(out_attrs->size(), static_cast<size_t>(n_out)) << " in operator " << attrs.name;
   }
-  return ElemwiseAttr<TShape, shape_is_none, shape_assign, true, shape_string>(
-    attrs, in_attrs, out_attrs, TShape());
+  return ElemwiseAttr<mxnet::TShape, shape_is_none, shape_assign, true, shape_string>(
+    attrs, in_attrs, out_attrs, mxnet::TShape());
 }
 
 template<index_t n_in, index_t n_out>
diff --git a/src/operator/grid_generator-inl.h b/src/operator/grid_generator-inl.h
index 258ec9ae9571..9083ae1009bc 100644
--- a/src/operator/grid_generator-inl.h
+++ b/src/operator/grid_generator-inl.h
@@ -50,7 +50,7 @@ enum GridGeneratorTransformType {kAffine, kWarp};
 
 struct GridGeneratorParam : public dmlc::Parameter<GridGeneratorParam> {
   int transform_type;
-  TShape target_shape;
+  mxnet::TShape target_shape;
   DMLC_DECLARE_PARAMETER(GridGeneratorParam) {
     int shape[] = {0, 0};
     DMLC_DECLARE_FIELD(transform_type)
@@ -59,7 +59,7 @@ struct GridGeneratorParam : public dmlc::Parameter<GridGeneratorParam> {
     .describe("The type of transformation. For `affine`, input data should be an affine matrix "
               "of size (batch, 6). For `warp`, input data should be an optical flow of size "
               "(batch, 2, h, w).");
-    DMLC_DECLARE_FIELD(target_shape).set_default(TShape(shape, shape + 2))
+    DMLC_DECLARE_FIELD(target_shape).set_default(mxnet::TShape(shape, shape + 2))
     .describe("Specifies the output shape (H, W). This is required if transformation type is "
               "`affine`. If transformation type is `warp`, this parameter is ignored.");
   }
@@ -126,7 +126,7 @@ class GridGeneratorOp : public Operator {
         Assign(out, req[grid::kOut],
                (data + broadcast_with_axis(grid_dst, -1, data.shape_[0])) /
                  broadcast_to(reshape(workspace, Shape4(1, 2, 1, 1)),
-                              TShape(data.shape_)) - scalar<DType>(1));
+                              mxnet::TShape(data.shape_)) - scalar<DType>(1));
         break;
       }
     }
@@ -169,7 +169,7 @@ class GridGeneratorOp : public Operator {
         workspace[1] = scalar<DType>((DType(gdata.size(2)) - 1.0) / 2.0);
         Assign(gdata, req[grid::kData],
                grad / broadcast_to(reshape(workspace, Shape4(1, 2, 1, 1)),
-                                   TShape(gdata.shape_)));
+                                   mxnet::TShape(gdata.shape_)));
         break;
       }
     }
@@ -209,12 +209,12 @@ class GridGeneratorProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 1U) << "Input:[data]";
-    const TShape &lshape = (*in_shape)[grid::kData];
+    const mxnet::TShape &lshape = (*in_shape)[grid::kData];
     if (lshape.ndim() ==  0) return false;
     out_shape->clear();
     switch (param_.transform_type) {
@@ -300,7 +300,7 @@ class GridGeneratorProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> ForwardResource(
-    const std::vector<TShape> &in_shape) const override {
+    const mxnet::ShapeVector &in_shape) const override {
     switch (param_.transform_type) {
     case grid::kAffine: {
       return{};
@@ -313,7 +313,7 @@ class GridGeneratorProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     switch (param_.transform_type) {
       case grid::kAffine: {
         return {};
@@ -330,7 +330,7 @@ class GridGeneratorProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/grid_generator.cc b/src/operator/grid_generator.cc
index 96ec5d5a7e7a..b1b6025b01be 100644
--- a/src/operator/grid_generator.cc
+++ b/src/operator/grid_generator.cc
@@ -39,7 +39,7 @@ Operator* CreateOp<cpu>(GridGeneratorParam param, int dtype) {
   return op;
 }
 
-Operator *GridGeneratorProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator *GridGeneratorProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                      std::vector<int> *in_type) const {
   DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
 }
diff --git a/src/operator/identity_attach_KL_sparse_reg-inl.h b/src/operator/identity_attach_KL_sparse_reg-inl.h
index 591ea5956383..764853115a2c 100644
--- a/src/operator/identity_attach_KL_sparse_reg-inl.h
+++ b/src/operator/identity_attach_KL_sparse_reg-inl.h
@@ -129,12 +129,12 @@ class IdentityAttachKLSparseRegProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 1U);
-    const TShape &dshape = in_shape->at(sparsereg::kData);
+    const mxnet::TShape &dshape = in_shape->at(sparsereg::kData);
     if (dshape.ndim() == 0) return false;
     out_shape->clear();
     out_shape->push_back(dshape);
@@ -179,7 +179,7 @@ class IdentityAttachKLSparseRegProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
diff --git a/src/operator/image/image_random-inl.h b/src/operator/image/image_random-inl.h
index 0f4d173be79a..c37324678120 100644
--- a/src/operator/image/image_random-inl.h
+++ b/src/operator/image/image_random-inl.h
@@ -87,21 +87,21 @@ void NormalizeBackwardImplCUDA(mshadow::Stream<gpu> *s,
 
 // Shape and Type inference for image to tensor operator
 inline bool ToTensorShape(const nnvm::NodeAttrs& attrs,
-                          std::vector<TShape> *in_attrs,
-                          std::vector<TShape> *out_attrs) {
+                          mxnet::ShapeVector *in_attrs,
+                          mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
 
-  TShape &shp = (*in_attrs)[0];
+  mxnet::TShape &shp = (*in_attrs)[0];
   if (!shp.ndim()) return false;
 
   CHECK((shp.ndim() == 3) || (shp.ndim() == 4))
       << "Input image must have shape (height, width, channels), or "
       << "(N, height, width, channels) but got " << shp;
   if (shp.ndim() == 3) {
-    SHAPE_ASSIGN_CHECK(*out_attrs, 0, TShape({shp[2], shp[0], shp[1]}));
+    SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape({shp[2], shp[0], shp[1]}));
   } else if (shp.ndim() == 4) {
-    SHAPE_ASSIGN_CHECK(*out_attrs, 0, TShape({shp[0], shp[3], shp[1], shp[2]}));
+    SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape({shp[0], shp[3], shp[1], shp[2]}));
   }
 
   return true;
@@ -234,8 +234,8 @@ struct NormalizeParam : public dmlc::Parameter<NormalizeParam> {
 
 // Shape inference
 inline bool NormalizeOpShape(const nnvm::NodeAttrs& attrs,
-                          std::vector<TShape> *in_attrs,
-                          std::vector<TShape> *out_attrs) {
+                          mxnet::ShapeVector *in_attrs,
+                          mxnet::ShapeVector *out_attrs) {
   const NormalizeParam &param = nnvm::get<NormalizeParam>(attrs.parsed);
 
   const auto& dshape = (*in_attrs)[0];
@@ -532,9 +532,9 @@ inline uint8_t saturate_cast(const float& src) {
 }
 
 inline bool ImageShape(const nnvm::NodeAttrs& attrs,
-                       std::vector<TShape> *in_attrs,
-                       std::vector<TShape> *out_attrs) {
-  TShape& dshape = (*in_attrs)[0];
+                       mxnet::ShapeVector *in_attrs,
+                       mxnet::ShapeVector *out_attrs) {
+  mxnet::TShape& dshape = (*in_attrs)[0];
   CHECK_EQ(dshape.ndim(), 3)
       << "Input image must have shape (height, width, channels), but got " << dshape;
   auto nchannels = dshape[dshape.ndim()-1];
@@ -546,7 +546,7 @@ inline bool ImageShape(const nnvm::NodeAttrs& attrs,
 }
 
 template<typename DType, int axis>
-void FlipImpl(const TShape &shape, DType *src, DType *dst) {
+void FlipImpl(const mxnet::TShape &shape, DType *src, DType *dst) {
   int head = 1, mid = shape[axis], tail = 1;
   for (int i = 0; i < axis; ++i) head *= shape[i];
   for (uint32_t i = axis+1; i < shape.ndim(); ++i) tail *= shape[i];
@@ -1067,7 +1067,7 @@ inline void RandomLighting(const nnvm::NodeAttrs &attrs,
     [](const NodeAttrs& attrs){                                             \
       return std::vector<std::pair<int, int> >{{0, 0}};                     \
     })                                                                      \
-  .set_attr<nnvm::FInferShape>("FInferShape", ImageShape)                   \
+  .set_attr<mxnet::FInferShape>("FInferShape", ImageShape)                   \
   .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)             \
   .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })   \
   .add_argument("data", "NDArray-or-Symbol", "The input.")
diff --git a/src/operator/image/image_random.cc b/src/operator/image/image_random.cc
index 810bffbdd7bb..0b95b198ae64 100644
--- a/src/operator/image/image_random.cc
+++ b/src/operator/image/image_random.cc
@@ -95,7 +95,7 @@ with values in the range [0, 1)
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", ToTensorShape)
+.set_attr<mxnet::FInferShape>("FInferShape", ToTensorShape)
 .set_attr<nnvm::FInferType>("FInferType", ToTensorType)
 .set_attr<FCompute>("FCompute<cpu>", ToTensorOpForward<cpu>)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
@@ -170,7 +170,7 @@ NNVM_REGISTER_OP(_image_normalize)
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", NormalizeOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", NormalizeOpShape)
 .set_attr<nnvm::FInferType>("FInferType", NormalizeOpType)
 .set_attr<FCompute>("FCompute<cpu>", NormalizeOpForward<cpu>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
diff --git a/src/operator/image/resize-inl.h b/src/operator/image/resize-inl.h
index 3e1310068073..de2189838d76 100644
--- a/src/operator/image/resize-inl.h
+++ b/src/operator/image/resize-inl.h
@@ -113,8 +113,8 @@ inline SizeParam GetHeightAndWidth(int data_h,
 }
 
 inline bool ResizeShape(const nnvm::NodeAttrs& attrs,
-                             std::vector<TShape> *in_attrs,
-                             std::vector<TShape> *out_attrs) {
+                             mxnet::ShapeVector *in_attrs,
+                             mxnet::ShapeVector *out_attrs) {
   // input attrs should only be (h, w, c) or (n, h, w, c)
   CHECK((in_attrs->at(0).ndim() == 3U) || (in_attrs->at(0).ndim() == 4U))
     << "Input image dimension should be 3 or 4 but got "
@@ -124,11 +124,11 @@ inline bool ResizeShape(const nnvm::NodeAttrs& attrs,
   SizeParam size;
   if (ishape.ndim() == 3) {
     size = GetHeightAndWidth(ishape[H], ishape[W], param);
-    SHAPE_ASSIGN_CHECK(*out_attrs, 0, TShape({size.height, size.width, ishape[C]}));
+    SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape({size.height, size.width, ishape[C]}));
   } else {
     size = GetHeightAndWidth(ishape[kH], ishape[kW], param);
     SHAPE_ASSIGN_CHECK(*out_attrs, 0,
-      TShape({ishape[N], size.height, size.width, ishape[kC]}));
+      mxnet::TShape({ishape[N], size.height, size.width, ishape[kC]}));
   }
   return true;
 }
diff --git a/src/operator/image/resize.cc b/src/operator/image/resize.cc
index d3b28f08008f..d93769faa8b3 100644
--- a/src/operator/image/resize.cc
+++ b/src/operator/image/resize.cc
@@ -71,7 +71,7 @@ to the given size
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<ResizeParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ResizeShape)
+.set_attr<mxnet::FInferShape>("FInferShape", ResizeShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FCompute>("FCompute<cpu>", Resize<cpu>)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
diff --git a/src/operator/instance_norm-inl.h b/src/operator/instance_norm-inl.h
index 258c164450d0..b7e579e2d066 100644
--- a/src/operator/instance_norm-inl.h
+++ b/src/operator/instance_norm-inl.h
@@ -203,15 +203,15 @@ class InstanceNormProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape, std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape, mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 3U) << "Input:[data]";
-    const TShape &dshape = in_shape->at(0);
+    const mxnet::TShape &dshape = in_shape->at(0);
     if (dshape.ndim() == 0) return false;
 
-    in_shape->at(1) = TShape(Shape1(dshape[1]));
-    in_shape->at(2) = TShape(Shape1(dshape[1]));
+    in_shape->at(1) = mxnet::TShape(Shape1(dshape[1]));
+    in_shape->at(2) = mxnet::TShape(Shape1(dshape[1]));
     out_shape->clear();
     out_shape->push_back(dshape);
     out_shape->push_back(Shape2(dshape[0], dshape[1]));
@@ -236,7 +236,7 @@ class InstanceNormProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
@@ -257,7 +257,7 @@ class InstanceNormProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator *CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator *CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/instance_norm.cc b/src/operator/instance_norm.cc
index 9305a74b0e07..53bc7c986d9e 100644
--- a/src/operator/instance_norm.cc
+++ b/src/operator/instance_norm.cc
@@ -35,7 +35,7 @@ Operator* CreateOp<cpu>(InstanceNormParam param, int dtype) {
 
 // DO_BIND_DISPATCH comes from operator_common.h
 Operator* InstanceNormProp::CreateOperatorEx(Context ctx,
-                                             std::vector<TShape>* in_shape,
+                                             mxnet::ShapeVector* in_shape,
                                              std::vector<int>* in_type) const {
   DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
 }
diff --git a/src/operator/l2_normalization-inl.h b/src/operator/l2_normalization-inl.h
index c7e71424ada9..975e81f78c25 100644
--- a/src/operator/l2_normalization-inl.h
+++ b/src/operator/l2_normalization-inl.h
@@ -85,7 +85,7 @@ class L2NormalizationOp : public Operator {
     CHECK_EQ(in_data.size(), 1U);
     CHECK_EQ(out_data.size(), 2U);
     Stream<xpu> *s = ctx.get_stream<xpu>();
-    TShape orig_shape = in_data[l2_normalization::kData].shape_;
+    mxnet::TShape orig_shape = in_data[l2_normalization::kData].shape_;
     if (param_.mode == l2_normalization::kInstance) {
       Shape<2> dshape = Shape2(orig_shape[0],
         orig_shape.ProdShape(1, orig_shape.ndim()));
@@ -156,7 +156,7 @@ class L2NormalizationOp : public Operator {
     CHECK_EQ(req.size(), 1U);
 
     Stream<xpu> *s = ctx.get_stream<xpu>();
-    TShape orig_shape = out_data[l2_normalization::kOut].shape_;
+    mxnet::TShape orig_shape = out_data[l2_normalization::kOut].shape_;
     if (param_.mode == l2_normalization::kInstance) {
       Shape<2> dshape = Shape2(orig_shape[0],
         orig_shape.ProdShape(1, orig_shape.ndim()));
@@ -260,12 +260,12 @@ class L2NormalizationProp : public OperatorProperty {
     return dtype != -1;
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 1U) << "L2Normalization layer only accepts data as input";
-    const TShape &dshape = (*in_shape)[l2_normalization::kData];
+    const mxnet::TShape &dshape = (*in_shape)[l2_normalization::kData];
     // require data to be known
     if ((*in_shape)[l2_normalization::kData].ndim() == 0) return false;
     out_shape->clear();
@@ -274,7 +274,7 @@ class L2NormalizationProp : public OperatorProperty {
       out_shape->push_back(Shape1(dshape[0]));
     } else if (param_.mode == l2_normalization::kChannel) {
       CHECK_GE(dshape.ndim(), 3U) << "At lease 3 dimensions required in channel mode";
-      TShape norm_shape = dshape;
+      mxnet::TShape norm_shape = dshape;
       norm_shape[1] = 1;
       out_shape->push_back(norm_shape);
     } else if (param_.mode == l2_normalization::kSpatial) {
@@ -315,7 +315,7 @@ class L2NormalizationProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
@@ -324,7 +324,7 @@ class L2NormalizationProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/l2_normalization.cc b/src/operator/l2_normalization.cc
index 6801a0a20576..92307af814d2 100644
--- a/src/operator/l2_normalization.cc
+++ b/src/operator/l2_normalization.cc
@@ -48,7 +48,7 @@ class L2NormalizationOpCPU : public L2NormalizationOp<cpu, DType> {
     CHECK_EQ(in_data.size(), 1U);
     CHECK_EQ(out_data.size(), 2U);
     Stream<cpu> *s = ctx.get_stream<cpu>();
-    TShape orig_shape = in_data[l2_normalization::kData].shape_;
+    mxnet::TShape orig_shape = in_data[l2_normalization::kData].shape_;
     auto omp_threads = engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
     if (this->param_.mode == l2_normalization::kInstance) {
       Shape<2> dshape = Shape2(orig_shape[0],
@@ -133,7 +133,7 @@ Operator* CreateOp<cpu>(L2NormalizationParam param, int dtype) {
 }
 
 // DO_BIND_DISPATCH comes from static_operator_common.h
-Operator* L2NormalizationProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator* L2NormalizationProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                                 std::vector<int> *in_type) const {
   DO_BIND_DISPATCH(CreateOp, this->param_, in_type->at(0));
 }
diff --git a/src/operator/leaky_relu-inl.h b/src/operator/leaky_relu-inl.h
index fe2668959af1..c7fa3f0443ee 100644
--- a/src/operator/leaky_relu-inl.h
+++ b/src/operator/leaky_relu-inl.h
@@ -111,9 +111,9 @@ class LeakyReLUOp : public Operator {
         break;
       }
       case leakyrelu::kPReLU: {
-        TShape gshape = expand_shape(in_data[leakyrelu::kGamma].shape_,
+        mxnet::TShape gshape = expand_shape(in_data[leakyrelu::kGamma].shape_,
                                      in_data[leakyrelu::kData].shape_);
-        TShape new_lshape, new_rshape, new_oshape;
+        mxnet::TShape new_lshape, new_rshape, new_oshape;
         const int ndim = op::BinaryBroadcastShapeCompact(in_data[leakyrelu::kData].shape_,
                                                          gshape,
                                                          out_data[leakyrelu::kOut].shape_,
@@ -237,9 +237,9 @@ class LeakyReLUOp : public Operator {
         break;
       }
       case leakyrelu::kPReLU: {
-        TShape gshape = expand_shape(in_grad[leakyrelu::kGamma].shape_,
+        mxnet::TShape gshape = expand_shape(in_grad[leakyrelu::kGamma].shape_,
                                      in_grad[leakyrelu::kData].shape_);
-        TShape new_lshape, new_rshape, new_oshape;
+        mxnet::TShape new_lshape, new_rshape, new_oshape;
         const bool need_bc = BinaryBroadcastShapeCompact(in_grad[leakyrelu::kData].shape_,
                                                          gshape,
                                                          out_grad[leakyrelu::kOut].shape_,
@@ -297,8 +297,8 @@ class LeakyReLUOp : public Operator {
   static MSHADOW_XINLINE size_t minthree(const size_t a, const size_t b, const size_t c) {
     return a < b ? (a < c ? a : c) : (b < c ? b : c);
   }
-  static inline TShape expand_shape(const TShape& src, const TShape& dst) {
-    TShape result(dst.ndim());
+  static inline mxnet::TShape expand_shape(const mxnet::TShape& src, const mxnet::TShape& dst) {
+    mxnet::TShape result(dst.ndim());
     int s = src.ndim() - 1;
     for (int i = dst.ndim() - 1; i >= 0; i--) {
       if (s >= 0 && i <= 1 && (dst[i] == src[s] || src[s] == 1)) {
@@ -328,21 +328,21 @@ class LeakyReLUProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     if (param_.act_type == leakyrelu::kPReLU) {
       CHECK_EQ(in_shape->size(), 2U) << "Input:[data, gamma]";
     } else {
       CHECK_EQ(in_shape->size(), 1U) << "Input:[data]";
     }
-    const TShape &dshape = in_shape->at(leakyrelu::kData);
+    const mxnet::TShape &dshape = in_shape->at(leakyrelu::kData);
     if (dshape.ndim() == 0) return false;
     if (param_.act_type == leakyrelu::kPReLU) {
-      const TShape &gshape = in_shape->at(leakyrelu::kGamma);
+      const mxnet::TShape &gshape = in_shape->at(leakyrelu::kGamma);
       if (gshape.ndim() == 0) {
-        in_shape->at(leakyrelu::kGamma) = TShape(Shape1(dshape[1]));
+        in_shape->at(leakyrelu::kGamma) = mxnet::TShape(Shape1(dshape[1]));
       }
       if (dshape == gshape) {
         SHAPE_ASSIGN_CHECK(*out_shape, 0, dshape);
@@ -450,7 +450,7 @@ class LeakyReLUProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> ForwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     if (param_.act_type == leakyrelu::kRReLU) {
       return {ResourceRequest::kRandom};
     } else {
@@ -459,7 +459,7 @@ class LeakyReLUProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
@@ -468,7 +468,7 @@ class LeakyReLUProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                            std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/leaky_relu.cc b/src/operator/leaky_relu.cc
index 45f9511c9085..214e41a84611 100644
--- a/src/operator/leaky_relu.cc
+++ b/src/operator/leaky_relu.cc
@@ -38,7 +38,7 @@ Operator *CreateOp<cpu>(LeakyReLUParam param, int dtype) {
   return op;
 }
 
-Operator *LeakyReLUProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator *LeakyReLUProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                           std::vector<int> *in_type) const {
   DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0));
 }
diff --git a/src/operator/loss_binary_op-inl.h b/src/operator/loss_binary_op-inl.h
index 1362997231a0..a3853c56359a 100644
--- a/src/operator/loss_binary_op-inl.h
+++ b/src/operator/loss_binary_op-inl.h
@@ -35,15 +35,15 @@ namespace op {
 
 // return a shape of scalar
 inline bool SoftmaxCrossEntropyShape(const nnvm::NodeAttrs& attrs,
-                                     std::vector<TShape> *in_attrs,
-                                     std::vector<TShape> *out_attrs) {
+                                     mxnet::ShapeVector *in_attrs,
+                                     mxnet::ShapeVector *out_attrs) {
   CHECK_EQ((*in_attrs)[0].ndim(), 2U)
       << "SoftmaxCrossEntropy only accept 2D data";
   CHECK_EQ((*in_attrs)[1].ndim(), 1U)
       << "SoftmaxCrossEntropy only accept 1D label";
   CHECK_EQ((*in_attrs)[0][0], (*in_attrs)[1][0])
       << "SoftmaxCrossEntropy: data label shape mismatch";
-  SHAPE_ASSIGN_CHECK(*out_attrs, 0, TShape(1));
+  SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1));
   return true;
 }
 
diff --git a/src/operator/loss_binary_op.cc b/src/operator/loss_binary_op.cc
index df8576cfbb83..696c8589a0dc 100644
--- a/src/operator/loss_binary_op.cc
+++ b/src/operator/loss_binary_op.cc
@@ -59,7 +59,7 @@ Example::
 )code" ADD_FILELINE)
 .set_num_inputs(2)
 .set_num_outputs(1)
-.set_attr<nnvm::FInferShape>("FInferShape", SoftmaxCrossEntropyShape)
+.set_attr<mxnet::FInferShape>("FInferShape", SoftmaxCrossEntropyShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)
 .set_attr<FResourceRequest>("FResourceRequest",
   [](const NodeAttrs& attrs) {
diff --git a/src/operator/make_loss-inl.h b/src/operator/make_loss-inl.h
index b83e5b9b687b..d6f14b1f2d85 100644
--- a/src/operator/make_loss-inl.h
+++ b/src/operator/make_loss-inl.h
@@ -136,12 +136,12 @@ class MakeLossProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 1U);
-    const TShape &dshape = in_shape->at(make_loss_enum::kData);
+    const mxnet::TShape &dshape = in_shape->at(make_loss_enum::kData);
     if (dshape.ndim() == 0) return false;
     out_shape->clear();
     out_shape->push_back(dshape);
@@ -180,7 +180,7 @@ class MakeLossProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     if (param_.normalization == make_loss_enum::kValid) {
       return {ResourceRequest::kTempSpace};
     }
@@ -198,7 +198,7 @@ class MakeLossProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/make_loss.cc b/src/operator/make_loss.cc
index 7e45f4ce4ff4..950f17ed955e 100644
--- a/src/operator/make_loss.cc
+++ b/src/operator/make_loss.cc
@@ -35,9 +35,9 @@ Operator *CreateOp<cpu>(MakeLossParam param, int dtype) {
   return op;
 }
 
-Operator *MakeLossProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator *MakeLossProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                          std::vector<int> *in_type) const {
-  std::vector<TShape> out_shape, aux_shape;
+  mxnet::ShapeVector out_shape, aux_shape;
   std::vector<int> out_type, aux_type;
   CHECK(InferType(in_type, &out_type, &aux_type));
   CHECK(InferShape(in_shape, &out_shape, &aux_shape));
diff --git a/src/operator/nn/activation.cc b/src/operator/nn/activation.cc
index fb920c31ce37..10e736258ab1 100644
--- a/src/operator/nn/activation.cc
+++ b/src/operator/nn/activation.cc
@@ -191,7 +191,7 @@ NNVM_REGISTER_OP(_backward_Activation)
 #if MXNET_USE_MKLDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", BackwardActStorageType)
 #endif
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<-1, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<-1, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<-1, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption", [](const NodeAttrs& attrs){
   return std::vector<std::pair<int, int> >{{0, 0}};
diff --git a/src/operator/nn/batch_norm-inl.h b/src/operator/nn/batch_norm-inl.h
index 3f47d58bb8c3..70e91c164090 100644
--- a/src/operator/nn/batch_norm-inl.h
+++ b/src/operator/nn/batch_norm-inl.h
@@ -304,7 +304,7 @@ class BNTensor3 {
     }
   }
 
-  inline BNTensor3(DType *p, const TShape& shape, const int indexOfChannel)
+  inline BNTensor3(DType *p, const mxnet::TShape& shape, const int indexOfChannel)
     : dptr_(p)
       , indexOfChannel_(static_cast<size_t>(indexOfChannel < 0
                                ? (static_cast<int>(shape.ndim()) + indexOfChannel)
@@ -393,7 +393,7 @@ class BNTensor3 {
   size_t shape_[COUNT];
 };
 
-inline int GetRealAxis(const TShape& shape, int axis) {
+inline int GetRealAxis(const mxnet::TShape& shape, int axis) {
   if (axis < 0) {
     axis += shape.ndim();
   }
diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc
index 6254a1e18662..511fe455e946 100644
--- a/src/operator/nn/batch_norm.cc
+++ b/src/operator/nn/batch_norm.cc
@@ -317,13 +317,13 @@ void BatchNormBackwardImpl(mshadow::Stream<cpu> *,
 DMLC_REGISTER_PARAMETER(BatchNormParam);
 
 static bool BatchNormShape(const nnvm::NodeAttrs& attrs,
-                           std::vector<TShape> *in_shape,
-                           std::vector<TShape> *out_shape) {
+                           mxnet::ShapeVector *in_shape,
+                           mxnet::ShapeVector *out_shape) {
   const BatchNormParam& param = nnvm::get<BatchNormParam>(attrs.parsed);
   using namespace mshadow;
   CHECK_EQ(in_shape->size(), 5U) << "Input:[data, gamma, beta, MovingMean, MovingVar]";
   CHECK_EQ(out_shape->size(), 3U);
-  const TShape &dshape = in_shape->at(batchnorm::kData);
+  const mxnet::TShape &dshape = in_shape->at(batchnorm::kData);
 
   const size_t channelAxis = static_cast<size_t>(param.axis < 0
       ? static_cast<int>(dshape.ndim()) + param.axis
@@ -336,10 +336,10 @@ static bool BatchNormShape(const nnvm::NodeAttrs& attrs,
     return false;
   }
 
-  in_shape->at(batchnorm::kGamma) = TShape(Shape1(channelCount));
-  in_shape->at(batchnorm::kBeta) = TShape(Shape1(channelCount));
-  in_shape->at(batchnorm::kInMovingMean) = TShape(Shape1(channelCount));  // kMovingMean
-  in_shape->at(batchnorm::kInMovingVar) = TShape(Shape1(channelCount));  // kMovingVar
+  in_shape->at(batchnorm::kGamma) = mxnet::TShape(Shape1(channelCount));
+  in_shape->at(batchnorm::kBeta) = mxnet::TShape(Shape1(channelCount));
+  in_shape->at(batchnorm::kInMovingMean) = mxnet::TShape(Shape1(channelCount));  // kMovingMean
+  in_shape->at(batchnorm::kInMovingVar) = mxnet::TShape(Shape1(channelCount));  // kMovingVar
 
   out_shape->clear();
   out_shape->push_back(dshape);                // kOut
@@ -381,7 +381,7 @@ static bool BatchNormType(const nnvm::NodeAttrs& attrs,
 
 #if MXNET_USE_MKLDNN == 1
 static inline bool SupportMKLDNNBN(const NDArray &input, const BatchNormParam &param) {
-  TShape shape = input.shape();
+  mxnet::TShape shape = input.shape();
   return SupportMKLDNN(input) && shape.ndim() == 4
       && param.axis == mxnet::op::batchnorm::DEFAULT_AXIS
       && shape[param.axis] % 8 == 0
@@ -418,7 +418,7 @@ void BatchNormGradComputeExCPU(const nnvm::NodeAttrs &attrs,
   CHECK_EQ(inputs.size(), 8U);
   const BatchNormParam &param = nnvm::get<BatchNormParam>(attrs.parsed);
 
-  TShape shape = inputs[0].shape();
+  mxnet::TShape shape = inputs[0].shape();
   // MKLDNN batchnorm only works well on the special MKLDNN layout.
   if (SupportMKLDNNBN(inputs[0], param)
       && (inputs[3].IsMKLDNNData() || inputs[0].IsMKLDNNData())) {
@@ -591,7 +591,7 @@ then set ``gamma`` to 1 and its gradient to 0.
 .set_attr<nnvm::FMutateInputs>("FMutateInputs", [](const nnvm::NodeAttrs& attrs) {
   return std::vector<uint32_t>{3, 4};
 })
-.set_attr<nnvm::FInferShape>("FInferShape", BatchNormShape)
+.set_attr<mxnet::FInferShape>("FInferShape", BatchNormShape)
 .set_attr<nnvm::FInferType>("FInferType", BatchNormType)
 .set_attr<FInferStorageType>("FInferStorageType", BatchNormStorageType)
 .set_attr<FCompute>("FCompute<cpu>", BatchNormCompute<cpu>)
diff --git a/src/operator/nn/batch_norm.cu b/src/operator/nn/batch_norm.cu
index 03962cbc0f33..1199ec7fcce5 100644
--- a/src/operator/nn/batch_norm.cu
+++ b/src/operator/nn/batch_norm.cu
@@ -664,7 +664,7 @@ void BatchNormCompute<gpu>(const nnvm::NodeAttrs& attrs,
   std::vector<TBlob> in_data(inputs.begin(), inputs.begin() + 3);
   std::vector<TBlob> aux_states(inputs.begin() + 3, inputs.end());
   int dtype = inputs[0].type_flag_;
-  TShape shape = inputs[0].shape_;
+  mxnet::TShape shape = inputs[0].shape_;
 
   param.axis = mxnet::op::batchnorm::GetRealAxis(shape, param.axis);
 #if MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 5
@@ -693,7 +693,7 @@ void BatchNormGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(inputs.size(), 8U);
   BatchNormParam param = nnvm::get<BatchNormParam>(attrs.parsed);
   int dtype = inputs[0].type_flag_;
-  TShape shape = inputs[0].shape_;
+  mxnet::TShape shape = inputs[0].shape_;
 
   param.axis = mxnet::op::batchnorm::GetRealAxis(shape, param.axis);
 #if MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 5
diff --git a/src/operator/nn/concat.cc b/src/operator/nn/concat.cc
index 711fe9c49fa4..fa441c45321e 100644
--- a/src/operator/nn/concat.cc
+++ b/src/operator/nn/concat.cc
@@ -33,17 +33,17 @@ namespace mxnet {
 namespace op {
 
 static bool ConcatShape(const nnvm::NodeAttrs& attrs,
-                        std::vector<TShape> *in_shape,
-                        std::vector<TShape> *out_shape) {
+                        mxnet::ShapeVector *in_shape,
+                        mxnet::ShapeVector *out_shape) {
   using namespace mshadow;
   const ConcatParam& param_ = nnvm::get<ConcatParam>(attrs.parsed);
   CHECK_EQ(in_shape->size(), static_cast<size_t>(param_.num_args));
-  TShape dshape;
+  mxnet::TShape dshape;
   index_t size = 0;
   bool has_zero = false;
   int axis = -1;
   for (int i = 0; i < param_.num_args; ++i) {
-    TShape tmp = (*in_shape)[i];
+    mxnet::TShape tmp = (*in_shape)[i];
     if (tmp.ndim()) {
       axis = CheckAxis(param_.dim, tmp.ndim());
       has_zero = tmp[axis] == 0 || has_zero;
@@ -53,7 +53,7 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs,
     }
   }
 
-  TShape tmp = (*out_shape)[0];
+  mxnet::TShape tmp = (*out_shape)[0];
   if (tmp.ndim()) {
     axis = CheckAxis(param_.dim, tmp.ndim());
     tmp[axis] = 0;
@@ -79,17 +79,17 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs,
 // The first (and sometimes the second) input may be unknown on the target axis.
 // If the two inputs are unknown, they always have the same shape.
 static bool RNNParamConcatShape(const nnvm::NodeAttrs& attrs,
-                                std::vector<TShape> *in_shape,
-                                std::vector<TShape> *out_shape) {
+                                mxnet::ShapeVector *in_shape,
+                                mxnet::ShapeVector *out_shape) {
   using namespace mshadow;
   const ConcatParam& param_ = nnvm::get<ConcatParam>(attrs.parsed);
   CHECK_EQ(in_shape->size(), static_cast<size_t>(param_.num_args));
-  TShape dshape;
+  mxnet::TShape dshape;
   index_t size = 0;
   std::vector<int> zero_indices;
   int axis = -1;
   for (int i = 0; i < param_.num_args; ++i) {
-    TShape tmp = (*in_shape)[i];
+    mxnet::TShape tmp = (*in_shape)[i];
     if (tmp.ndim()) {
       axis = CheckAxis(param_.dim, tmp.ndim());
       if (tmp[axis] == 0) {
@@ -102,7 +102,7 @@ static bool RNNParamConcatShape(const nnvm::NodeAttrs& attrs,
     }
   }
 
-  TShape tmp = (*out_shape)[0];
+  mxnet::TShape tmp = (*out_shape)[0];
   if (tmp.ndim()) {
     axis = CheckAxis(param_.dim, tmp.ndim());
     tmp[axis] = 0;
@@ -373,7 +373,7 @@ Example::
 .set_attr<bool>("TIsMKLDNN", true)
 #endif
 CONCAT_FORWARD_ATTRS
-.set_attr<nnvm::FInferShape>("FInferShape", ConcatShape)
+.set_attr<mxnet::FInferShape>("FInferShape", ConcatShape)
 .add_argument("data", "NDArray-or-Symbol[]", "List of arrays to concatenate")
 .add_arguments(ConcatParam::__FIELDS__());
 
@@ -406,7 +406,7 @@ NNVM_REGISTER_OP(_rnn_param_concat)
 })
 #endif
 CONCAT_FORWARD_ATTRS
-.set_attr<nnvm::FInferShape>("FInferShape", RNNParamConcatShape)
+.set_attr<mxnet::FInferShape>("FInferShape", RNNParamConcatShape)
 .add_argument("data", "NDArray-or-Symbol[]", "List of arrays to concatenate")
 .add_arguments(ConcatParam::__FIELDS__());
 
diff --git a/src/operator/nn/convolution-inl.h b/src/operator/nn/convolution-inl.h
index a5f384ec44a8..7ae34ae363b4 100644
--- a/src/operator/nn/convolution-inl.h
+++ b/src/operator/nn/convolution-inl.h
@@ -56,10 +56,10 @@ enum ConvolutionOpCudnnTune {kOff, kLimited, kFastest};
 }
 
 struct ConvolutionParam : public dmlc::Parameter<ConvolutionParam> {
-  TShape kernel;
-  TShape stride;
-  TShape dilate;
-  TShape pad;
+  mxnet::TShape kernel;
+  mxnet::TShape stride;
+  mxnet::TShape dilate;
+  mxnet::TShape pad;
   uint32_t num_filter;
   uint32_t num_group;
   uint64_t workspace;
@@ -69,11 +69,11 @@ struct ConvolutionParam : public dmlc::Parameter<ConvolutionParam> {
   dmlc::optional<int> layout;
   DMLC_DECLARE_PARAMETER(ConvolutionParam) {
     DMLC_DECLARE_FIELD(kernel).describe("Convolution kernel size: (w,), (h, w) or (d, h, w)");
-    DMLC_DECLARE_FIELD(stride).set_default(TShape())
+    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape())
     .describe("Convolution stride: (w,), (h, w) or (d, h, w). Defaults to 1 for each dimension.");
-    DMLC_DECLARE_FIELD(dilate).set_default(TShape())
+    DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape())
     .describe("Convolution dilate: (w,), (h, w) or (d, h, w). Defaults to 1 for each dimension.");
-    DMLC_DECLARE_FIELD(pad).set_default(TShape())
+    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape())
     .describe("Zero pad for convolution: (w,), (h, w) or (d, h, w). Defaults to no padding.");
     DMLC_DECLARE_FIELD(num_filter).set_range(1, 100000)
     .describe("Convolution filter(channel) number");
@@ -209,7 +209,7 @@ class ConvolutionOp {
       Tensor<xpu, 1, DType> workspace = ctx.requested[conv::kTempSpace]
         .get_space_typed<xpu, 1, DType>(Shape1(col_buffer_size_), s);
       // calculate the shape of col_buffer
-      TShape col_buffer_shape(num_spatial_axes_ + 1);
+      mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1);
       col_buffer_shape[0] = conv_in_channels_ * param_.kernel.Size();
       for (index_t i = 1; i < col_buffer_shape.ndim(); ++i) {
         col_buffer_shape[i] = out_data[0].shape_[i+1];
@@ -295,7 +295,7 @@ class ConvolutionOp {
       Tensor<xpu, 1, DType> workspace = ctx.requested[conv::kTempSpace]
         .get_space_typed<xpu, 1, DType>(Shape1(col_buffer_size_), s);
       // calculate the shape of col_buffer
-      TShape col_buffer_shape(num_spatial_axes_ + 1);
+      mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1);
       col_buffer_shape[0] = conv_in_channels_ * param_.kernel.Size();
       for (index_t i = 1; i < col_buffer_shape.ndim(); ++i) {
         col_buffer_shape[i] = out_grad[conv::kData].shape_[i+1];
@@ -339,7 +339,7 @@ class ConvolutionOp {
   }
 
  private:
-  void LayerSetUp(const TShape& ishape, const TShape& oshape) {
+  void LayerSetUp(const mxnet::TShape& ishape, const mxnet::TShape& oshape) {
     channel_axis_ = 1;  // hard code channel axis
     const index_t first_spatial_axis = channel_axis_ + 1;
     const index_t num_axes = param_.kernel.ndim() + 2;
diff --git a/src/operator/nn/convolution.cc b/src/operator/nn/convolution.cc
index 53b0c1380ed3..527a0073930f 100644
--- a/src/operator/nn/convolution.cc
+++ b/src/operator/nn/convolution.cc
@@ -84,8 +84,8 @@ static void ConvolutionGradComputeExCPU(const nnvm::NodeAttrs& attrs,
 #endif
 
 static bool ConvolutionShape(const nnvm::NodeAttrs& attrs,
-                             std::vector<TShape> *in_shape,
-                             std::vector<TShape> *out_shape) {
+                             mxnet::ShapeVector *in_shape,
+                             mxnet::ShapeVector *out_shape) {
   using namespace mshadow;
   const ConvolutionParam& param_ = nnvm::get<ConvolutionParam>(attrs.parsed);
   if (!param_.no_bias) {
@@ -94,8 +94,8 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs,
     CHECK_EQ(in_shape->size(), 2U) << "Input:[data, weight]";
   }
   // CHECK_EQ(out_shape->size(), 1) << "Output: [output]";
-  out_shape->resize(1, TShape());
-  const TShape &dshp = (*in_shape)[conv::kData];
+  out_shape->resize(1, mxnet::TShape());
+  const mxnet::TShape &dshp = (*in_shape)[conv::kData];
   if (dshp.ndim() ==  0) return false;
 
   if (param_.kernel.ndim() == 1) {
@@ -477,7 +477,7 @@ There are other options to tune the performance.
     [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"output"};
 })
-.set_attr<nnvm::FInferShape>("FInferShape", ConvolutionShape)
+.set_attr<mxnet::FInferShape>("FInferShape", ConvolutionShape)
 .set_attr<nnvm::FInferType>("FInferType", ConvolutionType)
 #if MXNET_USE_MKLDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", ConvStorageType)
diff --git a/src/operator/nn/convolution.cu b/src/operator/nn/convolution.cu
index daccc5518efc..010be8a208fb 100644
--- a/src/operator/nn/convolution.cu
+++ b/src/operator/nn/convolution.cu
@@ -39,8 +39,8 @@ template<typename DType>
 static CuDNNConvolutionOp<DType>& GetCuDNNConvOp(const ConvolutionParam& param,
                                                  int forward_compute_type,
                                                  int backward_compute_type,
-                                                 const std::vector<TShape>& in_shape,
-                                                 const std::vector<TShape>& out_shape,
+                                                 const mxnet::ShapeVector& in_shape,
+                                                 const mxnet::ShapeVector& out_shape,
                                                  const RunContext& rctx,
                                                  bool add_to_weight) {
 #if DMLC_CXX11_THREAD_LOCAL
@@ -115,8 +115,8 @@ void ConvolutionCompute<gpu>(const nnvm::NodeAttrs& attrs,
       param.kernel.ndim() == 2 &&
       param.dilate == mshadow::Shape2(1, 1) &&
       dtype == mshadow::kFloat32) {
-    std::vector<TShape> in_shape(inputs.size());
-    std::vector<TShape> out_shape(1, outputs[0].shape_);
+    mxnet::ShapeVector in_shape(inputs.size());
+    mxnet::ShapeVector out_shape(1, outputs[0].shape_);
     for (size_t i = 0; i < in_shape.size(); i++)
       in_shape[i] = inputs[i].shape_;
     DepthwiseConvolutionOp<float> op;
@@ -142,8 +142,8 @@ void ConvolutionCompute<gpu>(const nnvm::NodeAttrs& attrs,
       op.Init(param);
       op.Forward(ctx, inputs, req, outputs);
     } else {
-      std::vector<TShape> in_shape(inputs.size());
-      std::vector<TShape> out_shape(1, outputs[0].shape_);
+      mxnet::ShapeVector in_shape(inputs.size());
+      mxnet::ShapeVector out_shape(1, outputs[0].shape_);
       for (size_t i = 0; i < in_shape.size(); i++)
         in_shape[i] = inputs[i].shape_;
       // req[conv::kWeight] is only set for backward, so assume the typical 'write' for now.
@@ -195,8 +195,8 @@ void ConvolutionGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
       param.dilate == mshadow::Shape2(1, 1) &&
       dtype == mshadow::kFloat32) {
     // The first element stores out grad.
-    std::vector<TShape> in_shape(in_data.size());
-    std::vector<TShape> out_shape(1, out_grad.shape_);
+    mxnet::ShapeVector in_shape(in_data.size());
+    mxnet::ShapeVector out_shape(1, out_grad.shape_);
     for (size_t i = 0; i < in_shape.size(); i++)
       in_shape[i] = in_data[i].shape_;
     DepthwiseConvolutionOp<float> op;
@@ -223,8 +223,8 @@ void ConvolutionGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
       op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad);
     } else {
       // The first element stores out grad.
-      std::vector<TShape> in_shape(in_data.size());
-      std::vector<TShape> out_shape(1, out_grad.shape_);
+      mxnet::ShapeVector in_shape(in_data.size());
+      mxnet::ShapeVector out_shape(1, out_grad.shape_);
       for (size_t i = 0; i < in_shape.size(); i++)
         in_shape[i] = in_data[i].shape_;
       auto add_to_weight = req[conv::kWeight] == kAddTo;
diff --git a/src/operator/nn/ctc_loss-inl.h b/src/operator/nn/ctc_loss-inl.h
index 754cf8471b5d..357888dc30f1 100644
--- a/src/operator/nn/ctc_loss-inl.h
+++ b/src/operator/nn/ctc_loss-inl.h
@@ -208,14 +208,14 @@ inline uint32_t CTCLossOpNumInputs(const NodeAttrs& attrs) {
 }
 
 inline bool CTCLossOpShape(const nnvm::NodeAttrs &attrs,
-                           std::vector<TShape>* in_attrs,
-                           std::vector<TShape>* out_attrs) {
+                           mxnet::ShapeVector* in_attrs,
+                           mxnet::ShapeVector* out_attrs) {
     const CTCLossOpParam& param = nnvm::get<CTCLossOpParam>(attrs.parsed);
     CHECK_EQ(in_attrs->size(), CTCLossOpNumInputs(attrs));
     CHECK_EQ(out_attrs->size(), 2U);
 
-    const TShape &dshape = (*in_attrs)[ctc_loss::kData];
-    const TShape &lshape = (*in_attrs)[ctc_loss::kLabel];
+    const mxnet::TShape &dshape = (*in_attrs)[ctc_loss::kData];
+    const mxnet::TShape &lshape = (*in_attrs)[ctc_loss::kLabel];
     CHECK_EQ(dshape.ndim(), 3U) << "The number of dimensions of data array must be 3.";
     CHECK_EQ(lshape.ndim(), 2U) << "The number of dimensions of labels array must be 2.";
     CHECK_EQ(dshape[1], lshape[0])
@@ -223,14 +223,14 @@ inline bool CTCLossOpShape(const nnvm::NodeAttrs &attrs,
 
     if (param.use_data_lengths) {
       int kInputLength = 2;
-      const TShape &dlshape = (*in_attrs)[kInputLength];
+      const mxnet::TShape &dlshape = (*in_attrs)[kInputLength];
       CHECK_EQ(dlshape.ndim(), 1U) << "Data length array must be a vector.";
       CHECK_EQ(dlshape[0], dshape[1])
           << "The batch size for the data and data lengths must be the same.";
     }
     if (param.use_label_lengths) {
       int kLabelLength = 2 + param.use_data_lengths;
-      const TShape &llshape = (*in_attrs)[kLabelLength];
+      const mxnet::TShape &llshape = (*in_attrs)[kLabelLength];
       CHECK_EQ(llshape.ndim(), 1U) << "Label length array must be a vector.";
       CHECK_EQ(llshape[0], lshape[0])
           << "The batch size for the labels and label lengths must be the same.";
@@ -239,7 +239,7 @@ inline bool CTCLossOpShape(const nnvm::NodeAttrs &attrs,
                                       "the maximum sequence length of the "
                                       "data.";
 
-    TShape oshape(1);
+    mxnet::TShape oshape(1);
     oshape[0] = dshape[1];  // batch size
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape);  // forward output
     SHAPE_ASSIGN_CHECK(*out_attrs, 1, dshape);  // grad output
diff --git a/src/operator/nn/ctc_loss.cc b/src/operator/nn/ctc_loss.cc
index d9c7606f2e28..f718b42bfaa4 100644
--- a/src/operator/nn/ctc_loss.cc
+++ b/src/operator/nn/ctc_loss.cc
@@ -110,7 +110,7 @@ information on the definition and the algorithm.
   [](const NodeAttrs& attrs) {
     return 1;
   })
-.set_attr<nnvm::FInferShape>("FInferShape", CTCLossOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", CTCLossOpShape)
 .set_attr<nnvm::FInferType>("FInferType", CTCLossOpType)
 .set_attr<FInferStorageType>("FInferStorageType", CTCLossOpStorageType)
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs)
diff --git a/src/operator/nn/cudnn/cudnn_algoreg-inl.h b/src/operator/nn/cudnn/cudnn_algoreg-inl.h
index 21d3a30ba7cd..cef9d6f86940 100644
--- a/src/operator/nn/cudnn/cudnn_algoreg-inl.h
+++ b/src/operator/nn/cudnn/cudnn_algoreg-inl.h
@@ -72,8 +72,8 @@ class CuDNNAlgoReg {
                             CuDNNAlgo<cudnnConvolutionBwdFilterAlgo_t> *)>;
 
   void FindOrElseRegister(const ParamType &param,
-            const std::vector<TShape> &in_shape,
-            const std::vector<TShape> &out_shape,
+            const mxnet::ShapeVector &in_shape,
+            const mxnet::ShapeVector &out_shape,
             cudnnDataType_t cudnn_data_type,
             cudnnDataType_t cudnn_forward_compute_type,
             cudnnDataType_t cudnn_backward_compute_type,
@@ -127,7 +127,7 @@ class CuDNNAlgoReg {
 
   struct ParamKey {
     ParamType param;
-    TShape data_shape, weight_shape, out_shape;
+    mxnet::TShape data_shape, weight_shape, out_shape;
     cudnnDataType_t cudnn_data_type;
     cudnnDataType_t cudnn_forward_compute_type;
     cudnnDataType_t cudnn_backward_compute_type;
diff --git a/src/operator/nn/cudnn/cudnn_batch_norm.cc b/src/operator/nn/cudnn/cudnn_batch_norm.cc
index f1d229dd5421..5632028dd769 100644
--- a/src/operator/nn/cudnn/cudnn_batch_norm.cc
+++ b/src/operator/nn/cudnn/cudnn_batch_norm.cc
@@ -32,16 +32,16 @@ namespace mxnet {
 namespace op {
 #if MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 4
 
-static bool BatchNormShape(const nnvm::NodeAttrs& attrs, std::vector<TShape> *in_shape,
-    std::vector<TShape> *out_shape) {
+static bool BatchNormShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector *in_shape,
+    mxnet::ShapeVector *out_shape) {
   using namespace mshadow;
   CHECK_EQ(in_shape->size(), 5U) << "Input:[data, gamma, beta, moving_mean, moving_var]";
-  const TShape &dshape = in_shape->at(0);
+  const mxnet::TShape &dshape = in_shape->at(0);
   if (dshape.ndim() == 0) return false;
-  in_shape->at(1) = TShape(Shape1(dshape[1]));
-  in_shape->at(2) = TShape(Shape1(dshape[1]));
-  in_shape->at(3) = TShape(Shape1(dshape[1]));
-  in_shape->at(4) = TShape(Shape1(dshape[1]));
+  in_shape->at(1) = mxnet::TShape(Shape1(dshape[1]));
+  in_shape->at(2) = mxnet::TShape(Shape1(dshape[1]));
+  in_shape->at(3) = mxnet::TShape(Shape1(dshape[1]));
+  in_shape->at(4) = mxnet::TShape(Shape1(dshape[1]));
 
   out_shape->clear();
   out_shape->push_back(dshape);
@@ -85,7 +85,7 @@ NNVM_REGISTER_OP(CuDNNBatchNorm)
 .set_attr<nnvm::FMutateInputs>("FMutateInputs", [](const nnvm::NodeAttrs& attrs) {
   return std::vector<uint32_t>{3, 4};
 })
-.set_attr<nnvm::FInferShape>("FInferShape", BatchNormShape)
+.set_attr<mxnet::FInferShape>("FInferShape", BatchNormShape)
 .set_attr<FCompute>("FCompute<cpu>", BatchNormCompute_CPU)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseInOut{"_backward_CuDNNBatchNorm"})
 .add_argument("data", "NDArray-or-Symbol", "Input data to batch normalization")
diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h
index f68d2e3e8ead..e11f7cc81d25 100644
--- a/src/operator/nn/cudnn/cudnn_convolution-inl.h
+++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h
@@ -59,8 +59,8 @@ class CuDNNConvolutionOp {
   void Init(const ConvolutionParam& param,
             int forward_compute_type,
             int backward_compute_type,
-            const std::vector<TShape>& in_shape,
-            const std::vector<TShape>& out_shape,
+            const mxnet::ShapeVector& in_shape,
+            const mxnet::ShapeVector& out_shape,
             const RunContext& rctx,
             bool add_to_weight) {
     using namespace mshadow;
@@ -430,8 +430,8 @@ class CuDNNConvolutionOp {
     return converted;
   }
 
-  void InitDescriptors(const std::vector<TShape>& in_shape,
-                       const std::vector<TShape>& out_shape,
+  void InitDescriptors(const mxnet::ShapeVector& in_shape,
+                       const mxnet::ShapeVector& out_shape,
                        cudnnDataType_t cudnn_forward_compute_type,
                        cudnnDataType_t cudnn_backward_compute_type) {
     using namespace mshadow;
@@ -439,10 +439,10 @@ class CuDNNConvolutionOp {
     CHECK_EQ(in_shape.size(), expected);
     CHECK_EQ(out_shape.size(), 1U);
 
-    TShape dshape = in_shape[conv::kData];
-    TShape wshape = in_shape[conv::kWeight];
-    TShape oshape = out_shape[conv::kOut];
-    TShape dstride, ostride;
+    mxnet::TShape dshape = in_shape[conv::kData];
+    mxnet::TShape wshape = in_shape[conv::kWeight];
+    mxnet::TShape oshape = out_shape[conv::kOut];
+    mxnet::TShape dstride, ostride;
 #if CUDNN_MAJOR <= 6
     wshape[0] /= param_.num_group;
 #endif
@@ -456,9 +456,12 @@ class CuDNNConvolutionOp {
 #endif
     if (param_.kernel.ndim() == 1 || param_.kernel.ndim() == 2) {
       // 1d or 2d conv
-      auto pad = param_.kernel.ndim() == 2 ? param_.pad : TShape({0, param_.pad[0]});
-      auto stride = param_.kernel.ndim() == 2 ? param_.stride : TShape({1, param_.stride[0]});
-      auto dilate = param_.kernel.ndim() == 2 ? param_.dilate : TShape({1, param_.dilate[0]});
+      auto pad = param_.kernel.ndim() == 2 ?
+        param_.pad : mxnet::TShape({0, param_.pad[0]});
+      auto stride = param_.kernel.ndim() == 2 ?
+        param_.stride : mxnet::TShape({1, param_.stride[0]});
+      auto dilate = param_.kernel.ndim() == 2 ?
+        param_.dilate : mxnet::TShape({1, param_.dilate[0]});
       CUDNN_CALL(cudnnSetConvolution2dDescriptor(forward_conv_desc_,
                                                pad[0],
                                                pad[1],
@@ -501,15 +504,15 @@ class CuDNNConvolutionOp {
         oshape = ConvertLayout(oshape.get<4>(), param_.layout.value(), kNCHW);
       } else {
         wshape = ConvertLayout(wshape.get<3>(), param_.layout.value(), kNCW);
-        wshape = TShape({wshape[0], wshape[1], 1, wshape[2]});
+        wshape = mxnet::TShape({wshape[0], wshape[1], 1, wshape[2]});
         dstride = ConvertLayout(Strides<3>(dshape), param_.layout.value(), kNCW);
-        dstride = TShape({dstride[0], dstride[1], dstride[1], dstride[2]});
+        dstride = mxnet::TShape({dstride[0], dstride[1], dstride[1], dstride[2]});
         dshape = ConvertLayout(dshape.get<3>(), param_.layout.value(), kNCW);
-        dshape = TShape({dshape[0], dshape[1], 1, dshape[2]});
+        dshape = mxnet::TShape({dshape[0], dshape[1], 1, dshape[2]});
         ostride = ConvertLayout(Strides<3>(oshape), param_.layout.value(), kNCW);
-        ostride = TShape({ostride[0], ostride[1], ostride[1], ostride[2]});
+        ostride = mxnet::TShape({ostride[0], ostride[1], ostride[1], ostride[2]});
         oshape = ConvertLayout(oshape.get<3>(), param_.layout.value(), kNCW);
-        oshape = TShape({oshape[0], oshape[1], 1, oshape[2]});
+        oshape = mxnet::TShape({oshape[0], oshape[1], 1, oshape[2]});
       }
       CUDNN_CALL(cudnnSetFilter4dDescriptor(filter_desc_,
                                             dtype_,
@@ -608,7 +611,7 @@ class CuDNNConvolutionOp {
                                           ostride_buffer.data()));
 
     if (!param_.no_bias) {
-      TShape bias = in_shape[conv::kBias];
+      mxnet::TShape bias = in_shape[conv::kBias];
       #if CUDNN_MAJOR >= 7
       bias_offset_ = bias[0];
       std::vector<int> bias_shape = {1,
@@ -634,8 +637,8 @@ class CuDNNConvolutionOp {
   }
 
   void CuDNNAlgoSetter(const RunContext& rctx,
-                  const std::vector<TShape>& in_shape,
-                  const std::vector<TShape>& out_shape,
+                  const mxnet::ShapeVector& in_shape,
+                  const mxnet::ShapeVector& out_shape,
                   cudnnDataType_t cudnn_forward_compute_type,
                   cudnnDataType_t cudnn_backward_compute_type,
                   CuDNNAlgo<cudnnConvolutionFwdAlgo_t> *fwd,
@@ -845,8 +848,8 @@ class CuDNNConvolutionOp {
   }
 
   void SelectAlgo(const RunContext& rctx,
-                  const std::vector<TShape>& in_shape,
-                  const std::vector<TShape>& out_shape,
+                  const mxnet::ShapeVector& in_shape,
+                  const mxnet::ShapeVector& out_shape,
                   cudnnDataType_t cudnn_forward_compute_type,
                   cudnnDataType_t cudnn_backward_compute_type) {
     auto algo_setter = [&](CuDNNAlgo<cudnnConvolutionFwdAlgo_t> *fwd,
@@ -966,7 +969,7 @@ class CuDNNConvolutionOp {
                &forward_workspace_byte_));
   }
 
-  int *CastTShapeToIntPtr(const TShape& s, std::vector<int> *buffer) {
+  int *CastTShapeToIntPtr(const mxnet::TShape& s, std::vector<int> *buffer) {
     buffer->resize(s.ndim());
     nnvm::ShapeTypeCast(s.begin(), s.end(), buffer->data());
     return buffer->data();
@@ -993,12 +996,12 @@ class CuDNNConvolutionOp {
     return data_ptr;
   }
 
-  // Converts a TShape to a Shape<> of strides.
+  // Converts a mxnet::TShape to a Shape<> of strides.
   // e.g. {shape[0], shape[1], shape[2]} -> {shape[1]*shape[2], shape[2], 1}
   template <int dim>
-  inline Shape<dim> Strides(const TShape &s) {
+  inline Shape<dim> Strides(const mxnet::TShape &s) {
     uint32_t ndim = s.ndim();
-    TShape strides(ndim);
+    mxnet::TShape strides(ndim);
     for (uint32_t i = 0; i != ndim; ++i)
       strides[i] = s.ProdShape(i+1, ndim);
     return strides.get<dim>();
@@ -1032,7 +1035,7 @@ class CuDNNConvolutionOp {
   }
 
   // Given a tensor shape of this operation, return the number of features 'c'
-  int64_t Features(const TShape &dshape) {
+  int64_t Features(const mxnet::TShape &dshape) {
     int c = 0;
     switch (dshape.ndim()) {
       case 3: c = ConvertLayout(dshape.get<3>(), param_.layout.value(), kNCW)[1]; break;
diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h
index 72ba2c95fc6a..ec95d2be3309 100644
--- a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h
+++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h
@@ -55,8 +55,8 @@ class CuDNNDeconvolutionOp {
   void Init(DeconvolutionParam param,
             int forward_compute_type,
             int backward_compute_type,
-            const std::vector<TShape>& in_shape,
-            const std::vector<TShape>& out_shape,
+            const mxnet::ShapeVector& in_shape,
+            const mxnet::ShapeVector& out_shape,
             const RunContext& rctx,
             bool add_to_weight) {
     using namespace mshadow;
@@ -348,8 +348,8 @@ class CuDNNDeconvolutionOp {
     return converted;
   }
 
-  inline void InitDescriptors(const std::vector<TShape> &in_shape,
-                              const std::vector<TShape> &out_shape,
+  inline void InitDescriptors(const mxnet::ShapeVector &in_shape,
+                              const mxnet::ShapeVector &out_shape,
                               cudnnDataType_t cudnn_forward_compute_type,
                               cudnnDataType_t cudnn_backward_compute_type) {
     using namespace mshadow;
@@ -357,10 +357,10 @@ class CuDNNDeconvolutionOp {
     CHECK_EQ(in_shape.size(), expected);
     CHECK_EQ(out_shape.size(), 1U);
 
-    TShape dshape = in_shape[deconv::kData];
-    TShape wshape = in_shape[deconv::kWeight];
-    TShape oshape = out_shape[deconv::kOut];
-    TShape dstride, ostride;
+    mxnet::TShape dshape = in_shape[deconv::kData];
+    mxnet::TShape wshape = in_shape[deconv::kWeight];
+    mxnet::TShape oshape = out_shape[deconv::kOut];
+    mxnet::TShape dstride, ostride;
     wshape[0] /= param_.num_group;
 #if CUDNN_MAJOR <= 5
       // As of cuDNN_v6, the unsuffixed version of cudnnSetConvolution2dDescriptor()
@@ -382,8 +382,10 @@ class CuDNNDeconvolutionOp {
         o_pad[0] = 0;
         o_pad[1] = o_pad_1D[0];
       }
-      auto stride = param_.kernel.ndim() == 2 ? param_.stride : TShape({1, param_.stride[0]});
-      auto dilate = param_.kernel.ndim() == 2 ? param_.dilate : TShape({1, param_.dilate[0]});
+      auto stride = param_.kernel.ndim() == 2 ?
+        param_.stride : mxnet::TShape({1, param_.stride[0]});
+      auto dilate = param_.kernel.ndim() == 2 ?
+        param_.dilate : mxnet::TShape({1, param_.dilate[0]});
 
       CUDNN_CALL(cudnnSetConvolution2dDescriptor(forward_conv_desc_,
                                                  o_pad[0],
@@ -427,15 +429,15 @@ class CuDNNDeconvolutionOp {
         oshape = ConvertLayout(oshape.get<4>(), param_.layout.value(), kNCHW);
       } else {
         wshape = ConvertLayout(wshape.get<3>(), param_.layout.value(), kNCW);
-        wshape = TShape({wshape[0], wshape[1], 1, wshape[2]});
+        wshape = mxnet::TShape({wshape[0], wshape[1], 1, wshape[2]});
         dstride = ConvertLayout(Strides<3>(dshape), param_.layout.value(), kNCW);
-        dstride = TShape({dstride[0], dstride[1], dstride[1], dstride[2]});
+        dstride = mxnet::TShape({dstride[0], dstride[1], dstride[1], dstride[2]});
         dshape = ConvertLayout(dshape.get<3>(), param_.layout.value(), kNCW);
-        dshape = TShape({dshape[0], dshape[1], 1, dshape[2]});
+        dshape = mxnet::TShape({dshape[0], dshape[1], 1, dshape[2]});
         ostride = ConvertLayout(Strides<3>(oshape), param_.layout.value(), kNCW);
-        ostride = TShape({ostride[0], ostride[1], ostride[1], ostride[2]});
+        ostride = mxnet::TShape({ostride[0], ostride[1], ostride[1], ostride[2]});
         oshape = ConvertLayout(oshape.get<3>(), param_.layout.value(), kNCW);
-        oshape = TShape({oshape[0], oshape[1], 1, oshape[2]});
+        oshape = mxnet::TShape({oshape[0], oshape[1], 1, oshape[2]});
       }
       CUDNN_CALL(cudnnSetFilter4dDescriptor(filter_desc_,
                                             dtype_,
@@ -521,7 +523,7 @@ class CuDNNDeconvolutionOp {
                                           CastTShapeToIntPtr(ostride, &ostride_buffer)));
 
     if (!param_.no_bias) {
-      TShape bias = in_shape[deconv::kBias];
+      mxnet::TShape bias = in_shape[deconv::kBias];
       bias_offset_ = bias[0] / param_.num_group;
       std::vector<int> bias_shape = {1,
                                      static_cast<int>(bias[0] / param_.num_group),
@@ -540,8 +542,8 @@ class CuDNNDeconvolutionOp {
   }
 
   void CuDNNAlgoSetter(const RunContext& rctx,
-                       const std::vector<TShape>& in_shape,
-                       const std::vector<TShape>& out_shape,
+                       const mxnet::ShapeVector& in_shape,
+                       const mxnet::ShapeVector& out_shape,
                        cudnnDataType_t cudnn_forward_compute_type,
                        cudnnDataType_t cudnn_backward_compute_type,
                        CuDNNAlgo<cudnnConvolutionFwdAlgo_t> *fwd,
@@ -755,8 +757,8 @@ class CuDNNDeconvolutionOp {
   }
 
   void SelectAlgo(const RunContext& rctx,
-                  const std::vector<TShape>& in_shape,
-                  const std::vector<TShape>& out_shape,
+                  const mxnet::ShapeVector& in_shape,
+                  const mxnet::ShapeVector& out_shape,
                   cudnnDataType_t cudnn_forward_compute_type,
                   cudnnDataType_t cudnn_backward_compute_type) {
     auto algo_setter = [&](CuDNNAlgo<cudnnConvolutionFwdAlgo_t> *fwd,
@@ -884,7 +886,7 @@ class CuDNNDeconvolutionOp {
                                         back_filter_algo_workspace_size);
   }
 
-  int *CastTShapeToIntPtr(const TShape& s, std::vector<int> *buffer) {
+  int *CastTShapeToIntPtr(const mxnet::TShape& s, std::vector<int> *buffer) {
     buffer->resize(s.ndim());
     nnvm::ShapeTypeCast(s.begin(), s.end(), buffer->data());
     return buffer->data();
@@ -911,12 +913,12 @@ class CuDNNDeconvolutionOp {
     return data_ptr;
   }
 
-  // Converts a TShape to a Shape<> of strides.
+  // Converts a mxnet::TShape to a Shape<> of strides.
   // e.g. {shape[0], shape[1], shape[2]} -> {shape[1]*shape[2], shape[2], 1}
   template <int dim>
-  inline Shape<dim> Strides(const TShape &s) {
+  inline Shape<dim> Strides(const mxnet::TShape &s) {
     uint32_t ndim = s.ndim();
-    TShape strides(ndim);
+    mxnet::TShape strides(ndim);
     for (uint32_t i = 0; i != ndim; ++i)
       strides[i] = s.ProdShape(i+1, ndim);
     return strides.get<dim>();
@@ -943,7 +945,7 @@ class CuDNNDeconvolutionOp {
 
 
   // Given a tensor shape of this operation, return the number of features 'c'
-  int64_t Features(const TShape &dshape) {
+  int64_t Features(const mxnet::TShape &dshape) {
     int c = 0;
     switch (dshape.ndim()) {
       case 3: c = ConvertLayout(dshape.get<3>(), param_.layout.value(), kNCW)[1]; break;
@@ -980,8 +982,8 @@ class CuDNNDeconvolutionOp {
 
   int forward_compute_type_;
   int backward_compute_type_;
-  const std::vector<TShape> in_shapes_;
-  const std::vector<TShape> out_shapes_;
+  const mxnet::ShapeVector in_shapes_;
+  const mxnet::ShapeVector out_shapes_;
 
   // Temp workspace size in bytes needed for Forward() operation.  Note that
   // in deconvolution, this is handled by the cuDNN backprop-to-data kernel.
diff --git a/src/operator/nn/deconvolution-inl.h b/src/operator/nn/deconvolution-inl.h
index d89a489c0183..5248c1211ac7 100644
--- a/src/operator/nn/deconvolution-inl.h
+++ b/src/operator/nn/deconvolution-inl.h
@@ -49,12 +49,12 @@ namespace deconv {
 }
 
 struct DeconvolutionParam : public dmlc::Parameter<DeconvolutionParam> {
-  TShape kernel;
-  TShape stride;
-  TShape dilate;
-  TShape pad;
-  TShape adj;
-  TShape target_shape;
+  mxnet::TShape kernel;
+  mxnet::TShape stride;
+  mxnet::TShape dilate;
+  mxnet::TShape pad;
+  mxnet::TShape adj;
+  mxnet::TShape target_shape;
   uint32_t num_filter;
   uint32_t num_group;
   uint64_t workspace;
@@ -65,13 +65,13 @@ struct DeconvolutionParam : public dmlc::Parameter<DeconvolutionParam> {
   DMLC_DECLARE_PARAMETER(DeconvolutionParam) {
     DMLC_DECLARE_FIELD(kernel).describe("Deconvolution kernel size: (w,), (h, w) or (d, h, w). "
                   "This is same as the kernel size used for the corresponding convolution");
-    DMLC_DECLARE_FIELD(stride).set_default(TShape())
+    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape())
         .describe("The stride used for the corresponding convolution: (w,), (h, w) or (d, h, w). "
                   "Defaults to 1 for each dimension.");
-    DMLC_DECLARE_FIELD(dilate).set_default(TShape())
+    DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape())
         .describe("Dilation factor for each dimension of the input: (w,), (h, w) or (d, h, w). "
                   "Defaults to 1 for each dimension.");
-    DMLC_DECLARE_FIELD(pad).set_default(TShape())
+    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape())
         .describe("The amount of implicit zero padding added during convolution for each "
                   "dimension of the input: "
                   "(w,), (h, w) or (d, h, w). "
@@ -79,11 +79,11 @@ struct DeconvolutionParam : public dmlc::Parameter<DeconvolutionParam> {
                   "If `target_shape` is set, "
                   "`pad` will be ignored and a padding that will generate the target shape "
                   "will be used. Defaults to no padding.");
-    DMLC_DECLARE_FIELD(adj).set_default(TShape())
+    DMLC_DECLARE_FIELD(adj).set_default(mxnet::TShape())
         .describe("Adjustment for output shape: (w,), (h, w) or (d, h, w). "
                   "If `target_shape` is set, "
                   "`adj` will be ignored and computed accordingly.");
-    DMLC_DECLARE_FIELD(target_shape).set_default(TShape())
+    DMLC_DECLARE_FIELD(target_shape).set_default(mxnet::TShape())
         .describe("Shape of the output tensor: (w,), (h, w) or (d, h, w).");
     DMLC_DECLARE_FIELD(num_filter).set_range(1, 100000)
         .describe("Number of output filters.");
@@ -118,7 +118,7 @@ struct DeconvolutionParam : public dmlc::Parameter<DeconvolutionParam> {
   }
 
   template<size_t ndim>
-  void InferPad(TShape input, index_t (&o_pad)[ndim], index_t (&o_adj)[ndim] ) const {
+  void InferPad(mxnet::TShape input, index_t (&o_pad)[ndim], index_t (&o_adj)[ndim] ) const {
     // Modified by Li.bs
     // Use tag to control the calculation of pad
     bool bCal = false;
@@ -231,7 +231,7 @@ class DeconvolutionOp {
     Tensor<xpu, 4, DType> out = TBlobTo4DTensor(out_data[deconv::kOut], s);
     index_t o_pad[2], o_adj[2];
     if (param_.kernel.ndim() == 2) {
-      param_.InferPad(TShape({in_data_shape[2], in_data_shape[3]}), o_pad, o_adj);
+      param_.InferPad(mxnet::TShape({in_data_shape[2], in_data_shape[3]}), o_pad, o_adj);
     } else {
       index_t o_pad_1D[1], o_adj_1D[1];
       param_.InferPad({in_data_shape[2]}, o_pad_1D, o_adj_1D);
@@ -240,9 +240,9 @@ class DeconvolutionOp {
       o_adj[0] = 0;
       o_adj[1] = o_adj_1D[0];
     }
-    auto stride = param_.kernel.ndim() == 2 ? param_.stride : TShape({1, param_.stride[0]});
-    auto dilate = param_.kernel.ndim() == 2 ? param_.dilate : TShape({1, param_.dilate[0]});
-    auto kernel = param_.kernel.ndim() == 2 ? param_.kernel : TShape({1, param_.kernel[0]});
+    auto stride = param_.kernel.ndim() == 2 ? param_.stride : mxnet::TShape({1, param_.stride[0]});
+    auto dilate = param_.kernel.ndim() == 2 ? param_.dilate : mxnet::TShape({1, param_.dilate[0]});
+    auto kernel = param_.kernel.ndim() == 2 ? param_.kernel : mxnet::TShape({1, param_.kernel[0]});
     auto kernel_size = kernel.Size();
 
     Shape<3> wmat_shape =
@@ -351,7 +351,7 @@ class DeconvolutionOp {
 
     index_t o_pad[2], o_adj[2];
     if (param_.kernel.ndim() == 2) {
-      param_.InferPad(TShape({in_data_shape[2], in_data_shape[3]}), o_pad, o_adj);
+      param_.InferPad(mxnet::TShape({in_data_shape[2], in_data_shape[3]}), o_pad, o_adj);
     } else {
       index_t o_pad_1D[1], o_adj_1D[1];
       param_.InferPad({in_data_shape[2]}, o_pad_1D, o_adj_1D);
@@ -360,9 +360,9 @@ class DeconvolutionOp {
       o_adj[0] = 0;
       o_adj[1] = o_adj_1D[0];
     }
-    auto stride = param_.kernel.ndim() == 2 ? param_.stride : TShape({1, param_.stride[0]});
-    auto dilate = param_.kernel.ndim() == 2 ? param_.dilate : TShape({1, param_.dilate[0]});
-    auto kernel = param_.kernel.ndim() == 2 ? param_.kernel : TShape({1, param_.kernel[0]});
+    auto stride = param_.kernel.ndim() == 2 ? param_.stride : mxnet::TShape({1, param_.stride[0]});
+    auto dilate = param_.kernel.ndim() == 2 ? param_.dilate : mxnet::TShape({1, param_.dilate[0]});
+    auto kernel = param_.kernel.ndim() == 2 ? param_.kernel : mxnet::TShape({1, param_.kernel[0]});
     auto kernel_size = kernel.Size();
 
     Shape<3> wmat_shape =
diff --git a/src/operator/nn/deconvolution.cc b/src/operator/nn/deconvolution.cc
index 039c732c831d..27928b9b41c3 100644
--- a/src/operator/nn/deconvolution.cc
+++ b/src/operator/nn/deconvolution.cc
@@ -36,8 +36,8 @@ namespace mxnet {
 namespace op {
 
 static bool DeconvolutionShape(const nnvm::NodeAttrs& attrs,
-                               std::vector<TShape> *in_shape,
-                               std::vector<TShape> *out_shape) {
+                               mxnet::ShapeVector *in_shape,
+                               mxnet::ShapeVector *out_shape) {
   const DeconvolutionParam& param_ = nnvm::get<DeconvolutionParam>(attrs.parsed);
 #if MXNET_USE_CUDNN == 0
   if (param_.kernel.ndim() > 2) {
@@ -52,8 +52,8 @@ static bool DeconvolutionShape(const nnvm::NodeAttrs& attrs,
   } else {
     CHECK_EQ(in_shape->size(), 2U) << "Input:[data, weight]";
   }
-  out_shape->resize(1, TShape());
-  const TShape &dshape = (*in_shape)[deconv::kData];
+  out_shape->resize(1, mxnet::TShape());
+  const mxnet::TShape &dshape = (*in_shape)[deconv::kData];
   if (dshape.ndim() ==  0) return false;
 
   if (param_.kernel.ndim() == 1) {
@@ -403,7 +403,7 @@ NNVM_REGISTER_OP(Deconvolution)
     [](const NodeAttrs& attrs) {
   return std::vector<std::string>{"output"};
 })
-.set_attr<nnvm::FInferShape>("FInferShape", DeconvolutionShape)
+.set_attr<mxnet::FInferShape>("FInferShape", DeconvolutionShape)
 .set_attr<nnvm::FInferType>("FInferType", DeconvolutionType)
 #if MXNET_USE_MKLDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", DeconvStorageType)
diff --git a/src/operator/nn/deconvolution.cu b/src/operator/nn/deconvolution.cu
index 1c3970b9e716..6332c1f9078f 100644
--- a/src/operator/nn/deconvolution.cu
+++ b/src/operator/nn/deconvolution.cu
@@ -37,8 +37,8 @@ template<typename DType>
 static CuDNNDeconvolutionOp<DType> &GetCuDNNDeconvOp(const DeconvolutionParam& param,
                                                      int forward_compute_type,
                                                      int backward_compute_type,
-                                                     const std::vector<TShape>& in_shape,
-                                                     const std::vector<TShape>& out_shape,
+                                                     const mxnet::ShapeVector& in_shape,
+                                                     const mxnet::ShapeVector& out_shape,
                                                      const RunContext& rctx,
                                                      bool add_to_weight) {
 #if DMLC_CXX11_THREAD_LOCAL
@@ -109,8 +109,8 @@ void DeconvolutionCompute<gpu>(const nnvm::NodeAttrs& attrs,
       op.Init(param);
       op.Forward(ctx, inputs, req, outputs);
     } else {
-      std::vector<TShape> in_shape(inputs.size());
-      std::vector<TShape> out_shape(1, outputs[0].shape_);
+      mxnet::ShapeVector in_shape(inputs.size());
+      mxnet::ShapeVector out_shape(1, outputs[0].shape_);
       for (size_t i = 0; i < in_shape.size(); i++) {
         in_shape[i] = inputs[i].shape_;
       }
@@ -158,8 +158,8 @@ void DeconvolutionGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
       op.Init(param);
       op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad);
     } else {
-      std::vector<TShape> in_shape(in_data.size());
-      std::vector<TShape> out_shape(1, out_grad.shape_);
+      mxnet::ShapeVector in_shape(in_data.size());
+      mxnet::ShapeVector out_shape(1, out_grad.shape_);
       for (size_t i = 0; i < in_shape.size(); i++) {
         in_shape[i] = in_data[i].shape_;
       }
diff --git a/src/operator/nn/depthwise_convolution-inl.h b/src/operator/nn/depthwise_convolution-inl.h
index 69e6f693b852..9db2650491a8 100644
--- a/src/operator/nn/depthwise_convolution-inl.h
+++ b/src/operator/nn/depthwise_convolution-inl.h
@@ -42,8 +42,8 @@ template<typename DType>
 class DepthwiseConvolutionOp {
  public:
   void Init(const ConvolutionParam& param,
-            const std::vector<TShape>& in_shape,
-            const std::vector<TShape>& out_shape) {
+            const mxnet::ShapeVector& in_shape,
+            const mxnet::ShapeVector& out_shape) {
     args_.batch = in_shape[conv::kData][0];
     args_.in_channel = in_shape[conv::kData][1];
     args_.in_height = in_shape[conv::kData][2];
diff --git a/src/operator/nn/dropout-inl.h b/src/operator/nn/dropout-inl.h
index 2a828994fb44..f184fbdc2282 100644
--- a/src/operator/nn/dropout-inl.h
+++ b/src/operator/nn/dropout-inl.h
@@ -64,7 +64,7 @@ const int MAX_DIM = 5;
 struct DropoutParam : public dmlc::Parameter<DropoutParam> {
   float p;
   int mode;
-  TShape axes;
+  mxnet::TShape axes;
   dmlc::optional<bool> cudnn_off;
   DMLC_DECLARE_PARAMETER(DropoutParam) {
     DMLC_DECLARE_FIELD(p).set_default(0.5)
@@ -75,7 +75,7 @@ struct DropoutParam : public dmlc::Parameter<DropoutParam> {
     .add_enum("always", dropout::kAlways)
     .set_default(dropout::kTraining)
     .describe("Whether to only turn on dropout during training or to also turn on for inference.");
-    DMLC_DECLARE_FIELD(axes).set_default(TShape())
+    DMLC_DECLARE_FIELD(axes).set_default(mxnet::TShape())
     .describe("Axes for variational dropout kernel.");
     DMLC_DECLARE_FIELD(cudnn_off).set_default(dmlc::optional<bool>(true))
     .describe("Whether to turn off cudnn in dropout operator. "
@@ -370,7 +370,7 @@ class DropoutOp {
                                           mask.dptr<DType>(),
                                           this->pkeep_);
           // broadcast mul
-          TShape new_lshape, new_rshape, new_oshape;
+          mxnet::TShape new_lshape, new_rshape, new_oshape;
           int ndim = BinaryBroadcastShapeCompact(in.shape_,
                                                  mask.shape_, out.shape_,
                                                  &new_lshape, &new_rshape, &new_oshape);
@@ -438,7 +438,7 @@ class DropoutOp {
         return;
       } else {
         // broardcast mul
-        TShape new_lshape, new_rshape, new_oshape;
+        mxnet::TShape new_lshape, new_rshape, new_oshape;
         int ndim = BinaryBroadcastShapeCompact(grad.shape_,
                                                mask.shape_, gdata.shape_,
                                                &new_lshape, &new_rshape, &new_oshape);
@@ -475,7 +475,7 @@ class DropoutOp {
   /*! \brief Dropout mode */
   dropout::DropoutOpMode mode_;
   /*! \brief Axes on which dropout mask is shared in the form of broadcast multiply */
-  TShape axes_;
+  mxnet::TShape axes_;
   /*! \brief Flag to record whether forward is executed in pass-through mode */
   bool dropout_passthrough_;
 #if MXNET_USE_CUDNN_DROPOUT
@@ -491,7 +491,7 @@ class DropoutOp {
 
 static OpStatePtr CreateDropoutState(const nnvm::NodeAttrs &attrs,
                                      const Context ctx,
-                                     const std::vector<TShape> &in_shapes,
+                                     const mxnet::ShapeVector &in_shapes,
                                      const std::vector<int> &in_types) {
   const DropoutParam& param = nnvm::get<DropoutParam>(attrs.parsed);
   OpStatePtr state;
diff --git a/src/operator/nn/dropout.cc b/src/operator/nn/dropout.cc
index d6cbeb4e561d..5fdc672d766e 100644
--- a/src/operator/nn/dropout.cc
+++ b/src/operator/nn/dropout.cc
@@ -89,12 +89,12 @@ Example::
     [](const NodeAttrs& attrs) {
   return 1;
 })
-.set_attr<nnvm::FInferShape>("FInferShape", [](const nnvm::NodeAttrs& attrs,
-      std::vector<TShape> *in_shape, std::vector<TShape> *out_shape){
+.set_attr<mxnet::FInferShape>("FInferShape", [](const nnvm::NodeAttrs& attrs,
+      mxnet::ShapeVector *in_shape, mxnet::ShapeVector *out_shape){
   using namespace mshadow;
   CHECK_EQ(in_shape->size(), 1U);
   const DropoutParam& param = nnvm::get<DropoutParam>(attrs.parsed);
-  TShape dshape(in_shape->at(0));
+  mxnet::TShape dshape(in_shape->at(0));
   if (dshape.ndim() == 0) return false;
   out_shape->clear();
   out_shape->push_back(dshape);
diff --git a/src/operator/nn/fully_connected-inl.h b/src/operator/nn/fully_connected-inl.h
index 2b75419d2a81..93d384d51e6f 100644
--- a/src/operator/nn/fully_connected-inl.h
+++ b/src/operator/nn/fully_connected-inl.h
@@ -84,8 +84,8 @@ void FCForward(const OpContext &ctx, const FullyConnectedParam &param,
   CHECK_EQ(s->blas_handle_ownership_, Stream<xpu>::OwnHandle)
       << "Must init CuBLAS handle in stream";
 #endif  // __CUDACC__
-  const TShape& ishape = in_data[fullc::kData].shape_;
-  const TShape& oshape = out_data[fullc::kOut].shape_;
+  const mxnet::TShape& ishape = in_data[fullc::kData].shape_;
+  const mxnet::TShape& oshape = out_data[fullc::kOut].shape_;
 
   Tensor<xpu, 2, DType> wmat = in_data[fullc::kWeight].get<xpu, 2, DType>(s);
   Tensor<xpu, 2, DType> data, out;
@@ -128,8 +128,8 @@ void FCBackward(const OpContext &ctx, const FullyConnectedParam &param,
   // TODO(bing): check the BLAS Handle, be careful
   //  maybe need blas handle from context
   Stream<xpu> *s = ctx.get_stream<xpu>();
-  const TShape& ishape = in_data[fullc::kData].shape_;
-  const TShape& oshape = out_grad[fullc::kOut].shape_;
+  const mxnet::TShape& ishape = in_data[fullc::kData].shape_;
+  const mxnet::TShape& oshape = out_grad[fullc::kOut].shape_;
 
   Tensor<xpu, 2, DType> wmat = in_data[fullc::kWeight].get<xpu, 2, DType>(s);
   Tensor<xpu, 2, DType> data, grad, gdata;
diff --git a/src/operator/nn/fully_connected.cc b/src/operator/nn/fully_connected.cc
index a178b2759bf9..2e02de300e8f 100644
--- a/src/operator/nn/fully_connected.cc
+++ b/src/operator/nn/fully_connected.cc
@@ -33,8 +33,8 @@ namespace mxnet {
 namespace op {
 
 static bool FullyConnectedShape(const nnvm::NodeAttrs& attrs,
-                                std::vector<TShape> *in_shape,
-                                std::vector<TShape> *out_shape) {
+                                mxnet::ShapeVector *in_shape,
+                                mxnet::ShapeVector *out_shape) {
   const FullyConnectedParam& param = nnvm::get<FullyConnectedParam>(attrs.parsed);
   using namespace mshadow;
   if (!param.no_bias) {
@@ -43,8 +43,8 @@ static bool FullyConnectedShape(const nnvm::NodeAttrs& attrs,
     CHECK_EQ(in_shape->size(), 2U) << "Input:[data, weight]";
   }
   CHECK_EQ(out_shape->size(), 1U);
-  TShape dshape = (*in_shape)[fullc::kData];
-  TShape oshape = (*out_shape)[0];
+  mxnet::TShape dshape = (*in_shape)[fullc::kData];
+  mxnet::TShape oshape = (*out_shape)[0];
   // require data to be known
   if (dshape.ndim() ==  0) return false;
 
@@ -63,7 +63,7 @@ static bool FullyConnectedShape(const nnvm::NodeAttrs& attrs,
   }
 
   if (!param.flatten) {
-    TShape result_shape(dshape);
+    mxnet::TShape result_shape(dshape);
     result_shape[dshape.ndim()-1] = param.num_hidden;
     SHAPE_ASSIGN_CHECK(*out_shape, 0, result_shape);
   } else {
@@ -294,7 +294,7 @@ If ``no_bias`` is set to be true, then the ``bias`` term is ignored.
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
 #endif
-.set_attr<nnvm::FInferShape>("FInferShape", FullyConnectedShape)
+.set_attr<mxnet::FInferShape>("FInferShape", FullyConnectedShape)
 .set_attr<nnvm::FInferType>("FInferType", FullyConnectedType)
 .set_attr<FCompute>("FCompute<cpu>", FullyConnectedCompute<cpu>)
 .set_attr<FComputeEx>("FComputeEx<cpu>", FullyConnectedComputeExCPU)
diff --git a/src/operator/nn/im2col.cuh b/src/operator/nn/im2col.cuh
index f9b601c5971b..d013996a79b7 100644
--- a/src/operator/nn/im2col.cuh
+++ b/src/operator/nn/im2col.cuh
@@ -282,10 +282,10 @@ __global__ void im2col_nd_gpu_kernel(const int n, const DType* data_im,
  */
 template <typename DType>
 inline void im2col(mshadow::Stream<gpu>* s,
-                   const DType* data_im, const TShape& im_shape,
-                   const TShape& col_shape, const TShape& kernel_shape,
-                   const TShape& pad, const TShape& stride,
-                   const TShape& dilation, DType* data_col) {
+                   const DType* data_im, const mxnet::TShape& im_shape,
+                   const mxnet::TShape& col_shape, const mxnet::TShape& kernel_shape,
+                   const mxnet::TShape& pad, const mxnet::TShape& stride,
+                   const mxnet::TShape& dilation, DType* data_col) {
   // num_axes should be smaller than block size
   index_t num_spatial_axes = kernel_shape.ndim();
   CHECK_LT(num_spatial_axes, mshadow::cuda::kBaseThreadNum);
@@ -468,10 +468,10 @@ __global__ void col2im_nd_gpu_kernel(const int n, const DType* data_col,
  */
 template <typename DType>
 inline void col2im(mshadow::Stream<gpu>* s,
-                   const DType* data_col, const TShape& im_shape,
-                   const TShape& col_shape, const TShape& kernel_shape,
-                   const TShape& pad, const TShape& stride,
-                   const TShape& dilation, DType* data_im, OpReqType req) {
+                   const DType* data_col, const mxnet::TShape& im_shape,
+                   const mxnet::TShape& col_shape, const mxnet::TShape& kernel_shape,
+                   const mxnet::TShape& pad, const mxnet::TShape& stride,
+                   const mxnet::TShape& dilation, DType* data_im, OpReqType req) {
   index_t num_spatial_axes = kernel_shape.ndim();
   index_t im_size = im_shape.ProdShape(1, im_shape.ndim());
   // num_axes should be smaller than block size
diff --git a/src/operator/nn/im2col.h b/src/operator/nn/im2col.h
index ce4d9e31db9c..0059a420726d 100644
--- a/src/operator/nn/im2col.h
+++ b/src/operator/nn/im2col.h
@@ -148,9 +148,9 @@ inline void im2col_cpu(const DType* data_im, const int channels,
  */
 template <typename DType>
 inline void im2col_nd_core_cpu(const DType* data_input, const bool im2col,
-    const TShape& im_shape, const TShape& col_shape,
-    const TShape& kernel_shape, const TShape& pad, const TShape& stride,
-    const TShape& dilation, DType* data_output, OpReqType req = mxnet::kWriteTo) {
+    const mxnet::TShape& im_shape, const mxnet::TShape& col_shape,
+    const mxnet::TShape& kernel_shape, const mxnet::TShape& pad, const mxnet::TShape& stride,
+    const mxnet::TShape& dilation, DType* data_output, OpReqType req = mxnet::kWriteTo) {
   if (mxnet::kNullOp == req) return;
   index_t num_spatial_axes = kernel_shape.ndim();
   if (!im2col) {
@@ -234,10 +234,10 @@ inline void im2col_nd_core_cpu(const DType* data_input, const bool im2col,
  */
 template <typename DType>
 inline void im2col(mshadow::Stream<cpu>* s,
-                   const DType* data_im, const TShape& im_shape,
-                   const TShape& col_shape, const TShape& kernel_shape,
-                   const TShape& pad, const TShape& stride,
-                   const TShape& dilation, DType* data_col) {
+                   const DType* data_im, const mxnet::TShape& im_shape,
+                   const mxnet::TShape& col_shape, const mxnet::TShape& kernel_shape,
+                   const mxnet::TShape& pad, const mxnet::TShape& stride,
+                   const mxnet::TShape& dilation, DType* data_col) {
   if (2 == kernel_shape.ndim()) {
     im2col_cpu(data_im, im_shape[1], im_shape[2], im_shape[3],
                kernel_shape[0], kernel_shape[1], pad[0], pad[1],
@@ -315,10 +315,10 @@ inline void col2im_cpu(const DType* data_col, const int channels,
  */
 template <typename DType>
 inline void col2im(mshadow::Stream<cpu>* s,
-                   const DType* data_col, const TShape& im_shape,
-                   const TShape& col_shape, const TShape& kernel_shape,
-                   const TShape& pad, const TShape& stride,
-                   const TShape& dilation, DType* data_im, OpReqType req) {
+                   const DType* data_col, const mxnet::TShape& im_shape,
+                   const mxnet::TShape& col_shape, const mxnet::TShape& kernel_shape,
+                   const mxnet::TShape& pad, const mxnet::TShape& stride,
+                   const mxnet::TShape& dilation, DType* data_im, OpReqType req) {
   index_t num_spatial_axes = kernel_shape.ndim();
   if (2 == num_spatial_axes) {
     col2im_cpu(data_col, im_shape[1], im_shape[2], im_shape[3],
diff --git a/src/operator/nn/layer_norm-inl.h b/src/operator/nn/layer_norm-inl.h
index 18f088f758e4..dc4914bf2457 100644
--- a/src/operator/nn/layer_norm-inl.h
+++ b/src/operator/nn/layer_norm-inl.h
@@ -82,7 +82,7 @@ void LayerNormCompute(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(inputs.size(), 3U);
   Stream<xpu> *s = ctx.get_stream<xpu>();
   // Reshape gamma and beta to be broadcastable
-  TShape new_param_shape(inputs[0].shape_.begin(), inputs[0].shape_.end());
+  mxnet::TShape new_param_shape(inputs[0].shape_.begin(), inputs[0].shape_.end());
   for (int i = 0; i < inputs[0].ndim(); i++) {
     if (i != axis) {
       new_param_shape[i] = 1;
@@ -91,7 +91,7 @@ void LayerNormCompute(const nnvm::NodeAttrs& attrs,
   const TBlob gamma = inputs[1].reshape(new_param_shape);
   const TBlob beta = inputs[2].reshape(new_param_shape);
   // Compute necessary data for the reduce operation.
-  TShape red_src_shape, red_dst_shape;
+  mxnet::TShape red_src_shape, red_dst_shape;
   BroadcastReduceShapeCompact(inputs[0].shape_, outputs[layernorm::kMean].shape_,
                               &red_src_shape, &red_dst_shape);
   const TBlob in_data = inputs[0].reshape(red_src_shape);
@@ -172,7 +172,7 @@ void LayerNormGradCompute(const nnvm::NodeAttrs& attrs,
   CHECK(axis >= 0 && axis < inputs[0].ndim()) << "Channel axis out of range: " << param.axis;
   Stream<xpu> *s = ctx.get_stream<xpu>();
   // Reshape gamma to be broadcastable
-  TShape new_param_shape(inputs[0].shape_.begin(), inputs[0].shape_.end());
+  mxnet::TShape new_param_shape(inputs[0].shape_.begin(), inputs[0].shape_.end());
   for (int i = 0; i < inputs[0].ndim(); i++) {
     if (i != axis) {
       new_param_shape[i] = 1;
@@ -184,7 +184,7 @@ void LayerNormGradCompute(const nnvm::NodeAttrs& attrs,
   const TBlob mean = inputs[3];
   const TBlob std = inputs[4];
   // Prepare the necessary shapes for reduction
-  TShape red_src_shape, red_dst_shape, red_exclude_src_shape, red_exclude_dst_shape;
+  mxnet::TShape red_src_shape, red_dst_shape, red_exclude_src_shape, red_exclude_dst_shape;
   BroadcastReduceShapeCompact(ograd.shape_, mean.shape_, &red_src_shape, &red_dst_shape);
   BroadcastReduceShapeCompact(ograd.shape_, gamma.shape_,
                               &red_exclude_src_shape, &red_exclude_dst_shape);
diff --git a/src/operator/nn/layer_norm.cc b/src/operator/nn/layer_norm.cc
index 3a24242419dc..d4c308398cb7 100644
--- a/src/operator/nn/layer_norm.cc
+++ b/src/operator/nn/layer_norm.cc
@@ -33,12 +33,12 @@ namespace op {
 DMLC_REGISTER_PARAMETER(LayerNormParam);
 
 static bool LayerNormShape(const nnvm::NodeAttrs& attrs,
-                           std::vector<TShape> *in_shape,
-                           std::vector<TShape> *out_shape) {
+                           mxnet::ShapeVector *in_shape,
+                           mxnet::ShapeVector *out_shape) {
   const LayerNormParam& param = nnvm::get<LayerNormParam>(attrs.parsed);
   using namespace mshadow;
   CHECK_EQ(in_shape->size(), 3U) << "Input:[data, gamma, beta]";
-  const TShape &dshape = in_shape->at(layernorm::kData);
+  const mxnet::TShape &dshape = in_shape->at(layernorm::kData);
   int axis = param.axis;
   if (axis < 0) {
     axis += static_cast<int>(dshape.ndim());
@@ -52,12 +52,12 @@ static bool LayerNormShape(const nnvm::NodeAttrs& attrs,
     return false;
   }
 
-  in_shape->at(layernorm::kGamma) = TShape(Shape1(channelCount));
-  in_shape->at(layernorm::kBeta) = TShape(Shape1(channelCount));
+  in_shape->at(layernorm::kGamma) = mxnet::TShape(Shape1(channelCount));
+  in_shape->at(layernorm::kBeta) = mxnet::TShape(Shape1(channelCount));
 
   out_shape->clear();
   out_shape->push_back(dshape);                // kOut
-  TShape moments_shape(dshape.begin(), dshape.end());
+  mxnet::TShape moments_shape(dshape.begin(), dshape.end());
   moments_shape[axis] = 1;
   out_shape->push_back(moments_shape);  // kMean
   out_shape->push_back(moments_shape);  // kInvstd
@@ -108,7 +108,7 @@ axis to be the last item in the input shape.
   const LayerNormParam& param = nnvm::get<LayerNormParam>(attrs.parsed);
   return param.output_mean_var ? 3 : 1;
 })
-.set_attr<nnvm::FInferShape>("FInferShape", LayerNormShape)
+.set_attr<mxnet::FInferShape>("FInferShape", LayerNormShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<3, 3>)
 .set_attr<FCompute>("FCompute<cpu>", LayerNormCompute<cpu>)
 .set_attr<nnvm::FGradient>("FGradient", [](const nnvm::NodePtr& n,
diff --git a/src/operator/nn/lrn.cc b/src/operator/nn/lrn.cc
index 020cb479acc6..410bdab667e5 100644
--- a/src/operator/nn/lrn.cc
+++ b/src/operator/nn/lrn.cc
@@ -35,11 +35,11 @@ namespace mxnet {
 namespace op {
 
 bool LRNShape(const nnvm::NodeAttrs& attrs,
-              std::vector<TShape> *in_shape,
-              std::vector<TShape> *out_shape) {
+              mxnet::ShapeVector *in_shape,
+              mxnet::ShapeVector *out_shape) {
   using namespace mshadow;
   CHECK_EQ(in_shape->size(), 1U) << "Input:[data]";
-  const TShape &dshape = in_shape->at(0);
+  const mxnet::TShape &dshape = in_shape->at(0);
   if (dshape.ndim() == 0) return false;
   out_shape->clear();
   out_shape->push_back(dshape);
@@ -167,7 +167,7 @@ number of kernels in the layer.
 .set_attr<nnvm::FNumVisibleOutputs>("FNumVisibleOutputs",
                                     [](const NodeAttrs& attrs) { return 1; })
 .set_attr_parser(ParamParser<LRNParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", LRNShape)
+.set_attr<mxnet::FInferShape>("FInferShape", LRNShape)
 .set_attr<nnvm::FInferType>("FInferType", LRNType)
 #if MXNET_USE_MKLDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", LRNForwardInferStorageType)
diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h
index bf220b847c0e..0a89c0f31981 100644
--- a/src/operator/nn/mkldnn/mkldnn_base-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h
@@ -115,7 +115,7 @@ struct data_type_enum<uint8_t> {
   enum { type = mkldnn::memory::data_type::u8 };
 };
 
-static inline bool SupportMKLDNNArray(int dtype, const TShape &shape) {
+static inline bool SupportMKLDNNArray(int dtype, const mxnet::TShape &shape) {
   int ndim = shape.ndim();
   bool support = ndim == 1 || ndim == 2 || ndim == 4;
   support = support && (dtype == mshadow::kFloat32 || dtype == mshadow::kInt32
@@ -127,7 +127,7 @@ static inline bool SupportStorageMKLDNN(int stype) {
   return stype == kDefaultStorage;
 }
 
-static inline bool SupportMKLDNN(int dtype, const TShape &shape) {
+static inline bool SupportMKLDNN(int dtype, const mxnet::TShape &shape) {
   int ndim = shape.ndim();
   return dtype == mshadow::kFloat32 && (ndim == 1 || ndim == 2 || ndim == 4);
 }
@@ -461,7 +461,7 @@ mkldnn_memory_format_t GetDefaultFormat(int num_dims);
 mkldnn::memory::primitive_desc GetPrimitiveDesc(mkldnn::memory::primitive_desc pd,
                                                 mkldnn_memory_format_t format);
 
-inline bool same_shape(const TShape &shape, const mkldnn_dims_t dims, int ndims) {
+inline bool same_shape(const mxnet::TShape &shape, const mkldnn_dims_t dims, int ndims) {
   if (shape.ndim() != (size_t)ndims)
     return false;
   for (int i = 0; i < ndims; i++)
@@ -480,7 +480,7 @@ inline bool same_shape(const mkldnn::memory::desc &desc1,
   return true;
 }
 
-inline bool same_shape(const TShape &shape, int dtype,
+inline bool same_shape(const mxnet::TShape &shape, int dtype,
                        const mkldnn::memory::desc &desc) {
   return same_shape(shape, desc.data.dims, desc.data.ndims)
       && get_mkldnn_type(dtype) == desc.data.data_type;
@@ -553,7 +553,7 @@ class MKLDNNMemory {
     return mem->get_primitive_desc() == pd;
   }
 
-  bool SameFormat(const TShape &shape, int dtype) const {
+  bool SameFormat(const mxnet::TShape &shape, int dtype) const {
     return same_shape(shape, dtype, desc);
   }
 
diff --git a/src/operator/nn/mkldnn/mkldnn_fully_connected.cc b/src/operator/nn/mkldnn/mkldnn_fully_connected.cc
index 5f672cd51fd5..05ef7ebd6573 100644
--- a/src/operator/nn/mkldnn/mkldnn_fully_connected.cc
+++ b/src/operator/nn/mkldnn/mkldnn_fully_connected.cc
@@ -182,8 +182,8 @@ void MKLDNNFCForward(const nnvm::NodeAttrs& attrs, const OpContext &ctx,
                      const std::vector<NDArray> &out_data) {
   TmpMemMgr::Get()->Init(ctx.requested[fullc::kTempSpace]);
   const FullyConnectedParam& param = nnvm::get<FullyConnectedParam>(attrs.parsed);
-  const TShape& ishape = in_data[fullc::kData].shape();
-  const TShape& oshape = out_data[fullc::kOut].shape();
+  const mxnet::TShape& ishape = in_data[fullc::kData].shape();
+  const mxnet::TShape& oshape = out_data[fullc::kOut].shape();
   NDArray weight = in_data[fullc::kWeight];
   NDArray data = in_data[fullc::kData];
   // If the input data is a view of an MKLDNN array, we should create a new
@@ -232,8 +232,8 @@ void MKLDNNFCBackward(const nnvm::NodeAttrs& attrs, const OpContext &ctx,
   TmpMemMgr::Get()->Init(ctx.requested[fullc::kTempSpace]);
   const std::vector<NDArray> &in_grad = outputs;
   const FullyConnectedParam& param = nnvm::get<FullyConnectedParam>(attrs.parsed);
-  const TShape& ishape = inputs[fullc::kData + 1].shape();
-  const TShape& oshape = inputs[fullc::kOut].shape();
+  const mxnet::TShape& ishape = inputs[fullc::kData + 1].shape();
+  const mxnet::TShape& oshape = inputs[fullc::kOut].shape();
 
   NDArray weight = inputs[fullc::kWeight + 1];
   NDArray data = inputs[fullc::kData + 1];
diff --git a/src/operator/nn/mkldnn/mkldnn_pooling-inl.h b/src/operator/nn/mkldnn/mkldnn_pooling-inl.h
index de3d63e24f6c..9b9f0193979b 100644
--- a/src/operator/nn/mkldnn/mkldnn_pooling-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_pooling-inl.h
@@ -109,7 +109,7 @@ inline bool SupportMKLDNNPooling(const PoolingParam &param) {
 }
 
 inline bool SupportMKLDNNPooling(const PoolingParam &param,
-                                 const TShape &dshape) {
+                                 const mxnet::TShape &dshape) {
   bool ret = SupportMKLDNNPooling(param);
   if (!ret)
     return false;
diff --git a/src/operator/nn/mkldnn/mkldnn_slice.cc b/src/operator/nn/mkldnn/mkldnn_slice.cc
index f3c8a14e0c63..3f3d82020598 100644
--- a/src/operator/nn/mkldnn/mkldnn_slice.cc
+++ b/src/operator/nn/mkldnn/mkldnn_slice.cc
@@ -35,8 +35,8 @@ namespace op {
 MKLDNNSliceFwd::MKLDNNSliceFwd(const SliceParam &param,
                                const NDArray &in,
                                const NDArray &out) {
-  const TShape ishape = in.shape();
-  const TShape oshape = out.shape();
+  const mxnet::TShape ishape = in.shape();
+  const mxnet::TShape oshape = out.shape();
   uint32_t N = ishape.ndim();
   mkldnn::memory::dims dims(N);
   mkldnn::memory::dims offsets(N);
diff --git a/src/operator/nn/pool.cuh b/src/operator/nn/pool.cuh
index 671bc7932ef9..e771b3681573 100644
--- a/src/operator/nn/pool.cuh
+++ b/src/operator/nn/pool.cuh
@@ -731,9 +731,9 @@ __global__ void unpool_sum_3d_gpu_kernel(const int nthreads, const DType* out_gr
  * \param count_include_pad for avg pooling, should 0 pad values be averaged in the window
  */
 template<typename DType, int layout, int p>
-inline void pool(mshadow::Stream<gpu>* s, const DType* in_data, const TShape& ishape,
-                 const TShape& oshape, const TShape& kernel, const TShape& pad,
-                 const TShape& stride, const int pool_type, OpReqType req_type,
+inline void pool(mshadow::Stream<gpu>* s, const DType* in_data, const mxnet::TShape& ishape,
+                 const mxnet::TShape& oshape, const mxnet::TShape& kernel, const mxnet::TShape& pad,
+                 const mxnet::TShape& stride, const int pool_type, OpReqType req_type,
                  DType* out_data, const bool count_include_pad) {
   CHECK_EQ(req_type, kWriteTo) << "Only support req=kWriteTo in pooling operations";
   using namespace mxnet_op;
@@ -866,15 +866,15 @@ inline void pool(mshadow::Stream<gpu>* s, const DType* in_data, const TShape& is
  * \param layout I/O tensor layout, e.g. NCHW vs. NHWC
  */
 template<typename DType, int p>
-inline void pool(mshadow::Stream<gpu>* s, const DType* in_data, const TShape& ishape,
-                 const TShape& oshape, const TShape& kernel, const TShape& pad,
-                 const TShape& stride, const int pool_type, OpReqType req_type,
+inline void pool(mshadow::Stream<gpu>* s, const DType* in_data, const mxnet::TShape& ishape,
+                 const mxnet::TShape& oshape, const mxnet::TShape& kernel, const mxnet::TShape& pad,
+                 const mxnet::TShape& stride, const int pool_type, OpReqType req_type,
                  DType* out_data, const bool count_include_pad, int layout) {
   if (kernel.ndim() == 1) {
     if (layout == mshadow::kNWC) {
       // standardize shapes to NCW to aid templated kernel invocation
-      TShape ishape_ncw = ConvertLayout(ishape.get<3>(), mshadow::kNWC, mshadow::kNCW);
-      TShape oshape_ncw = ConvertLayout(oshape.get<3>(), mshadow::kNWC, mshadow::kNCW);
+      mxnet::TShape ishape_ncw = ConvertLayout(ishape.get<3>(), mshadow::kNWC, mshadow::kNCW);
+      mxnet::TShape oshape_ncw = ConvertLayout(oshape.get<3>(), mshadow::kNWC, mshadow::kNCW);
       pool<DType, mshadow::kNWC, p>(s, in_data, ishape_ncw, oshape_ncw, kernel,
                                     pad, stride, pool_type, req_type, out_data, count_include_pad);
     } else if (layout == mshadow::kNCW) {
@@ -886,8 +886,8 @@ inline void pool(mshadow::Stream<gpu>* s, const DType* in_data, const TShape& is
   } else if (kernel.ndim() == 2) {
     if (layout == mshadow::kNHWC) {
       // standardize shapes to NCHW to aid templated kernel invocation
-      TShape ishape_nchw = ConvertLayout(ishape.get<4>(), mshadow::kNHWC, mshadow::kNCHW);
-      TShape oshape_nchw = ConvertLayout(oshape.get<4>(), mshadow::kNHWC, mshadow::kNCHW);
+      mxnet::TShape ishape_nchw = ConvertLayout(ishape.get<4>(), mshadow::kNHWC, mshadow::kNCHW);
+      mxnet::TShape oshape_nchw = ConvertLayout(oshape.get<4>(), mshadow::kNHWC, mshadow::kNCHW);
       pool<DType, mshadow::kNHWC, p>(s, in_data, ishape_nchw, oshape_nchw, kernel,
                                      pad, stride, pool_type, req_type, out_data, count_include_pad);
     } else if (layout == mshadow::kNCHW) {
@@ -899,8 +899,8 @@ inline void pool(mshadow::Stream<gpu>* s, const DType* in_data, const TShape& is
   } else if (kernel.ndim() == 3) {
     if (layout == mshadow::kNDHWC) {
       // standardize shapes to NCDHW to aid templated kernel invocation
-      TShape ishape_ncdhw = ConvertLayout(ishape.get<5>(), mshadow::kNDHWC, mshadow::kNCDHW);
-      TShape oshape_ncdhw = ConvertLayout(oshape.get<5>(), mshadow::kNDHWC, mshadow::kNCDHW);
+      mxnet::TShape ishape_ncdhw = ConvertLayout(ishape.get<5>(), mshadow::kNDHWC, mshadow::kNCDHW);
+      mxnet::TShape oshape_ncdhw = ConvertLayout(oshape.get<5>(), mshadow::kNDHWC, mshadow::kNCDHW);
       pool<DType, mshadow::kNDHWC, p>(s, in_data, ishape_ncdhw, oshape_ncdhw, kernel,
                                      pad, stride, pool_type, req_type, out_data, count_include_pad);
     } else if (layout == mshadow::kNCDHW) {
@@ -932,8 +932,8 @@ inline void pool(mshadow::Stream<gpu>* s, const DType* in_data, const TShape& is
  */
 template<typename DType, int layout, int p>
 inline void unpool(mshadow::Stream<gpu>* s, const DType* out_grad, const DType* in_data,
-                   const DType* out_data, const TShape& ishape, const TShape& oshape,
-                   const TShape& kernel, const TShape& pad, const TShape& stride,
+                   const DType* out_data, const mxnet::TShape& ishape, const mxnet::TShape& oshape,
+                   const mxnet::TShape& kernel, const mxnet::TShape& pad, const mxnet::TShape& stride,
                    const int pool_type, OpReqType req_type, DType* in_grad,
                    const bool count_include_pad) {
   if (mxnet::kNullOp == req_type) return;
@@ -1086,15 +1086,15 @@ inline void unpool(mshadow::Stream<gpu>* s, const DType* out_grad, const DType*
  */
 template<typename DType, int p>
 inline void unpool(mshadow::Stream<gpu>* s, const DType* out_grad, const DType* in_data,
-                   const DType* out_data, const TShape& ishape, const TShape& oshape,
-                   const TShape& kernel, const TShape& pad, const TShape& stride,
+                   const DType* out_data, const mxnet::TShape& ishape, const mxnet::TShape& oshape,
+                   const mxnet::TShape& kernel, const mxnet::TShape& pad, const mxnet::TShape& stride,
                    const int pool_type, OpReqType req_type, DType* in_grad,
                    const bool count_include_pad, int layout) {
   if (kernel.ndim() == 1) {
     if (layout == mshadow::kNWC) {
       // standardize shapes to NCW to aid templated kernel invocation
-      TShape ishape_ncw = ConvertLayout(ishape.get<3>(), mshadow::kNWC, mshadow::kNCW);
-      TShape oshape_ncw = ConvertLayout(oshape.get<3>(), mshadow::kNWC, mshadow::kNCW);
+      mxnet::TShape ishape_ncw = ConvertLayout(ishape.get<3>(), mshadow::kNWC, mshadow::kNCW);
+      mxnet::TShape oshape_ncw = ConvertLayout(oshape.get<3>(), mshadow::kNWC, mshadow::kNCW);
       unpool<DType, mshadow::kNWC, p>(s, out_grad, in_data, out_data, ishape_ncw, oshape_ncw,
                               kernel, pad, stride, pool_type, req_type, in_grad, count_include_pad);
     } else if (layout == mshadow::kNCW) {
@@ -1106,8 +1106,8 @@ inline void unpool(mshadow::Stream<gpu>* s, const DType* out_grad, const DType*
   } else if (kernel.ndim() == 2) {
     if (layout == mshadow::kNHWC) {
       // standardize shapes to NCHW to aid templated kernel invocation
-      TShape ishape_nchw = ConvertLayout(ishape.get<4>(), mshadow::kNHWC, mshadow::kNCHW);
-      TShape oshape_nchw = ConvertLayout(oshape.get<4>(), mshadow::kNHWC, mshadow::kNCHW);
+      mxnet::TShape ishape_nchw = ConvertLayout(ishape.get<4>(), mshadow::kNHWC, mshadow::kNCHW);
+      mxnet::TShape oshape_nchw = ConvertLayout(oshape.get<4>(), mshadow::kNHWC, mshadow::kNCHW);
       unpool<DType, mshadow::kNHWC, p>(s, out_grad, in_data, out_data, ishape_nchw, oshape_nchw,
                               kernel, pad, stride, pool_type, req_type, in_grad, count_include_pad);
     } else if (layout == mshadow::kNCHW) {
@@ -1119,8 +1119,8 @@ inline void unpool(mshadow::Stream<gpu>* s, const DType* out_grad, const DType*
   } else if (kernel.ndim() == 3) {
     if (layout == mshadow::kNDHWC) {
       // standardize shapes to NCDHW to aid templated kernel invocation
-      TShape ishape_ncdhw = ConvertLayout(ishape.get<5>(), mshadow::kNDHWC, mshadow::kNCDHW);
-      TShape oshape_ncdhw = ConvertLayout(oshape.get<5>(), mshadow::kNDHWC, mshadow::kNCDHW);
+      mxnet::TShape ishape_ncdhw = ConvertLayout(ishape.get<5>(), mshadow::kNDHWC, mshadow::kNCDHW);
+      mxnet::TShape oshape_ncdhw = ConvertLayout(oshape.get<5>(), mshadow::kNDHWC, mshadow::kNCDHW);
       unpool<DType, mshadow::kNDHWC, p>(s, out_grad, in_data, out_data, ishape_ncdhw, oshape_ncdhw,
                               kernel, pad, stride, pool_type, req_type, in_grad, count_include_pad);
     } else if (layout == mshadow::kNCDHW) {
diff --git a/src/operator/nn/pool.h b/src/operator/nn/pool.h
index 3c8c19a02607..4ea39c3db166 100644
--- a/src/operator/nn/pool.h
+++ b/src/operator/nn/pool.h
@@ -82,8 +82,9 @@ enum PoolingOpPadConventionType {kValid, kFull, kSame};
  * Do not call this kernel directly. Use the interface pool().
  */
 template<typename DType>
-inline void pool_max_1d_ncw_cpu(const DType *in_data, const TShape &ishape, const TShape &oshape,
-                                const TShape &kernel, const TShape &pad, const TShape &stride,
+inline void pool_max_1d_ncw_cpu(const DType *in_data, const mxnet::TShape &ishape,
+                                const mxnet::TShape &oshape, const mxnet::TShape &kernel,
+                                const mxnet::TShape &pad, const mxnet::TShape &stride,
                                 DType *out_data) {
   using mshadow::red::limits::MinValue;
   const int width = ishape[2];
@@ -118,9 +119,10 @@ inline void pool_max_1d_ncw_cpu(const DType *in_data, const TShape &ishape, cons
  * Do not call this kernel directly. Use the interface pool().
  */
 template<typename DType>
-inline void pool_max_1d_nwc_cpu(const DType* in_data, const TShape& ishape, const TShape& oshape,
-                            const TShape& kernel, const TShape& pad, const TShape& stride,
-                            DType* out_data) {
+inline void pool_max_1d_nwc_cpu(const DType* in_data, const mxnet::TShape& ishape,
+                                const mxnet::TShape& oshape, const mxnet::TShape& kernel,
+                                const mxnet::TShape& pad, const mxnet::TShape& stride,
+                                DType* out_data) {
   using mshadow::red::limits::MinValue;
   const int width = ishape[1];
   const int pooled_width = oshape[1];
@@ -157,8 +159,9 @@ inline void pool_max_1d_nwc_cpu(const DType* in_data, const TShape& ishape, cons
  * Do not call this kernel directly. Use the interface pool().
  */
 template<typename DType>
-inline void pool_max_2d_nchw_cpu(const DType *in_data, const TShape &ishape, const TShape &oshape,
-                                 const TShape &kernel, const TShape &pad, const TShape &stride,
+inline void pool_max_2d_nchw_cpu(const DType *in_data, const mxnet::TShape &ishape,
+                                 const mxnet::TShape &oshape, const mxnet::TShape &kernel,
+                                 const mxnet::TShape &pad, const mxnet::TShape &stride,
                                  DType *out_data) {
   using mshadow::red::limits::MinValue;
   const int height = ishape[2], width = ishape[3];
@@ -202,9 +205,10 @@ inline void pool_max_2d_nchw_cpu(const DType *in_data, const TShape &ishape, con
  * Do not call this kernel directly. Use the interface pool().
  */
 template<typename DType>
-inline void pool_max_2d_nhwc_cpu(const DType* in_data, const TShape& ishape, const TShape& oshape,
-                            const TShape& kernel, const TShape& pad, const TShape& stride,
-                            DType* out_data) {
+inline void pool_max_2d_nhwc_cpu(const DType* in_data, const mxnet::TShape& ishape,
+                                 const mxnet::TShape& oshape, const mxnet::TShape& kernel,
+                                 const mxnet::TShape& pad, const mxnet::TShape& stride,
+                                 DType* out_data) {
   using mshadow::red::limits::MinValue;
   const int height = ishape[1], width = ishape[2];
   const int pooled_height = oshape[1], pooled_width = oshape[2];
@@ -250,8 +254,9 @@ inline void pool_max_2d_nhwc_cpu(const DType* in_data, const TShape& ishape, con
  * Do not call this kernel directly. Use the interface pool().
  */
 template<typename DType>
-inline void pool_max_3d_ncdhw_cpu(const DType *in_data, const TShape &ishape, const TShape &oshape,
-                                  const TShape &kernel, const TShape &pad, const TShape &stride,
+inline void pool_max_3d_ncdhw_cpu(const DType *in_data, const mxnet::TShape &ishape,
+                                  const mxnet::TShape &oshape, const mxnet::TShape &kernel,
+                                  const mxnet::TShape &pad, const mxnet::TShape &stride,
                                   DType *out_data) {
   using mshadow::red::limits::MinValue;
   const int depth = ishape[2], height = ishape[3], width = ishape[4];
@@ -302,9 +307,10 @@ inline void pool_max_3d_ncdhw_cpu(const DType *in_data, const TShape &ishape, co
  * Do not call this kernel directly. Use the interface pool().
  */
 template<typename DType>
-inline void pool_max_3d_ndhwc_cpu(const DType* in_data, const TShape& ishape, const TShape& oshape,
-                            const TShape& kernel, const TShape& pad, const TShape& stride,
-                            DType* out_data) {
+inline void pool_max_3d_ndhwc_cpu(const DType* in_data, const mxnet::TShape& ishape,
+                                  const mxnet::TShape& oshape, const mxnet::TShape& kernel,
+                                  const mxnet::TShape& pad, const mxnet::TShape& stride,
+                                  DType* out_data) {
   using mshadow::red::limits::MinValue;
   const int depth = ishape[1], height = ishape[2], width = ishape[3];
   const int pooled_depth = oshape[1], pooled_height = oshape[2], pooled_width = oshape[3];
@@ -357,8 +363,9 @@ inline void pool_max_3d_ndhwc_cpu(const DType* in_data, const TShape& ishape, co
  * Do not call this kernel directly. Use the interface pool().
  */
 template<typename DType, int p = 1>
-inline void pool_sum_1d_ncw_cpu(const DType *in_data, const TShape &ishape, const TShape &oshape,
-                                const TShape &kernel, const TShape &pad, const TShape &stride,
+inline void pool_sum_1d_ncw_cpu(const DType *in_data, const mxnet::TShape &ishape,
+                                const mxnet::TShape &oshape, const mxnet::TShape &kernel,
+                                const mxnet::TShape &pad, const mxnet::TShape &stride,
                                 DType *out_data,
                                 const bool get_avg = false, const bool count_include_pad = true) {
   using AccType = typename PoolingTypes<DType>::AccType;
@@ -397,10 +404,11 @@ inline void pool_sum_1d_ncw_cpu(const DType *in_data, const TShape &ishape, cons
  * Do not call this kernel directly. Use the interface pool().
  */
 template<typename DType, int p = 1>
-inline void pool_sum_1d_nwc_cpu(const DType* in_data, const TShape& ishape, const TShape& oshape,
-                            const TShape& kernel, const TShape& pad, const TShape& stride,
-                            DType* out_data,
-                            const bool get_avg = false, const bool count_include_pad = true) {
+inline void pool_sum_1d_nwc_cpu(const DType* in_data, const mxnet::TShape& ishape,
+                                const mxnet::TShape& oshape, const mxnet::TShape& kernel,
+                                const mxnet::TShape& pad, const mxnet::TShape& stride,
+                                DType* out_data,
+                                const bool get_avg = false, const bool count_include_pad = true) {
   using AccType = typename PoolingTypes<DType>::AccType;
   const int width = ishape[1];
   const int pooled_width = oshape[1];
@@ -440,8 +448,9 @@ inline void pool_sum_1d_nwc_cpu(const DType* in_data, const TShape& ishape, cons
  * Do not call this kernel directly. Use the interface pool().
  */
 template<typename DType, int p = 1>
-inline void pool_sum_2d_nchw_cpu(const DType *in_data, const TShape &ishape, const TShape &oshape,
-                                 const TShape &kernel, const TShape &pad, const TShape &stride,
+inline void pool_sum_2d_nchw_cpu(const DType *in_data, const mxnet::TShape &ishape,
+                                 const mxnet::TShape &oshape, const mxnet::TShape &kernel,
+                                 const mxnet::TShape &pad, const mxnet::TShape &stride,
                                  DType *out_data,
                                  const bool get_avg = false, const bool count_include_pad = true) {
   using AccType = typename PoolingTypes<DType>::AccType;
@@ -488,10 +497,11 @@ inline void pool_sum_2d_nchw_cpu(const DType *in_data, const TShape &ishape, con
  * Do not call this kernel directly. Use the interface pool().
  */
 template<typename DType, int p = 1>
-inline void pool_sum_2d_nhwc_cpu(const DType* in_data, const TShape& ishape, const TShape& oshape,
-                            const TShape& kernel, const TShape& pad, const TShape& stride,
-                            DType* out_data,
-                            const bool get_avg = false, const bool count_include_pad = true) {
+inline void pool_sum_2d_nhwc_cpu(const DType* in_data, const mxnet::TShape& ishape,
+                                 const mxnet::TShape& oshape, const mxnet::TShape& kernel,
+                                 const mxnet::TShape& pad, const mxnet::TShape& stride,
+                                 DType* out_data,
+                                 const bool get_avg = false, const bool count_include_pad = true) {
   using AccType = typename PoolingTypes<DType>::AccType;
   const int height = ishape[1], width = ishape[2];
   const int pooled_height = oshape[1], pooled_width = oshape[2];
@@ -541,8 +551,9 @@ inline void pool_sum_2d_nhwc_cpu(const DType* in_data, const TShape& ishape, con
  * Do not call this kernel directly. Use the interface pool().
  */
 template<typename DType, int p = 1>
-inline void pool_sum_3d_ncdhw_cpu(const DType *in_data, const TShape &ishape, const TShape &oshape,
-                                  const TShape &kernel, const TShape &pad, const TShape &stride,
+inline void pool_sum_3d_ncdhw_cpu(const DType *in_data, const mxnet::TShape &ishape,
+                                  const mxnet::TShape &oshape, const mxnet::TShape &kernel,
+                                  const mxnet::TShape &pad, const mxnet::TShape &stride,
                                   DType *out_data,
                                   const bool get_avg = false, const bool count_include_pad = true) {
   using AccType = typename PoolingTypes<DType>::AccType;
@@ -599,10 +610,11 @@ inline void pool_sum_3d_ncdhw_cpu(const DType *in_data, const TShape &ishape, co
  * Do not call this kernel directly. Use the interface pool().
  */
 template<typename DType, int p = 1>
-inline void pool_sum_3d_ndhwc_cpu(const DType* in_data, const TShape& ishape, const TShape& oshape,
-                            const TShape& kernel, const TShape& pad, const TShape& stride,
-                            DType* out_data,
-                            const bool get_avg = false, const bool count_include_pad = true) {
+inline void pool_sum_3d_ndhwc_cpu(const DType* in_data, const mxnet::TShape& ishape,
+                                  const mxnet::TShape& oshape, const mxnet::TShape& kernel,
+                                  const mxnet::TShape& pad, const mxnet::TShape& stride,
+                                  DType* out_data,
+                                  const bool get_avg = false, const bool count_include_pad = true) {
   using AccType = typename PoolingTypes<DType>::AccType;
   const int depth = ishape[1], height = ishape[2], width = ishape[3];
   const int pooled_depth = oshape[1], pooled_height = oshape[2], pooled_width = oshape[3];
@@ -663,9 +675,9 @@ inline void pool_sum_3d_ndhwc_cpu(const DType* in_data, const TShape& ishape, co
  */
 template<typename DType>
 inline void unpool_max_1d_ncw_cpu(const DType *out_grad, const DType *in_data,
-                                  const DType *out_data, const TShape &ishape,
-                                  const TShape &oshape, const TShape &kernel,
-                                  const TShape &pad, const TShape &stride,
+                                  const DType *out_data, const mxnet::TShape &ishape,
+                                  const mxnet::TShape &oshape, const mxnet::TShape &kernel,
+                                  const mxnet::TShape &pad, const mxnet::TShape &stride,
                                   DType *in_grad) {
   const int width = ishape[2];
   const int pooled_width = oshape[2];
@@ -707,9 +719,9 @@ inline void unpool_max_1d_ncw_cpu(const DType *out_grad, const DType *in_data,
  */
 template<typename DType>
 inline void unpool_max_1d_nwc_cpu(const DType* out_grad, const DType* in_data,
-                              const DType* out_data, const TShape& ishape,
-                              const TShape& oshape, const TShape& kernel,
-                              const TShape& pad, const TShape& stride,
+                              const DType* out_data, const mxnet::TShape& ishape,
+                              const mxnet::TShape& oshape, const mxnet::TShape& kernel,
+                              const mxnet::TShape& pad, const mxnet::TShape& stride,
                               DType* in_grad) {
   const int width = ishape[1];
   const int pooled_width = oshape[1];
@@ -755,9 +767,9 @@ inline void unpool_max_1d_nwc_cpu(const DType* out_grad, const DType* in_data,
  */
 template<typename DType>
 inline void unpool_max_2d_nchw_cpu(const DType *out_grad, const DType *in_data,
-                                   const DType *out_data, const TShape &ishape,
-                                   const TShape &oshape, const TShape &kernel,
-                                   const TShape &pad, const TShape &stride,
+                                   const DType *out_data, const mxnet::TShape &ishape,
+                                   const mxnet::TShape &oshape, const mxnet::TShape &kernel,
+                                   const mxnet::TShape &pad, const mxnet::TShape &stride,
                                    DType *in_grad) {
   const int height = ishape[2], width = ishape[3];
   const int pooled_height = oshape[2], pooled_width = oshape[3];
@@ -811,9 +823,9 @@ inline void unpool_max_2d_nchw_cpu(const DType *out_grad, const DType *in_data,
  */
 template<typename DType>
 inline void unpool_max_2d_nhwc_cpu(const DType* out_grad, const DType* in_data,
-                              const DType* out_data, const TShape& ishape,
-                              const TShape& oshape, const TShape& kernel,
-                              const TShape& pad, const TShape& stride,
+                              const DType* out_data, const mxnet::TShape& ishape,
+                              const mxnet::TShape& oshape, const mxnet::TShape& kernel,
+                              const mxnet::TShape& pad, const mxnet::TShape& stride,
                               DType* in_grad) {
   const int height = ishape[1], width = ishape[2];
   const int pooled_height = oshape[1], pooled_width = oshape[2];
@@ -871,9 +883,9 @@ inline void unpool_max_2d_nhwc_cpu(const DType* out_grad, const DType* in_data,
  */
 template<typename DType>
 inline void unpool_max_3d_ncdhw_cpu(const DType *out_grad, const DType *in_data,
-                                    const DType *out_data, const TShape &ishape,
-                                    const TShape &oshape, const TShape &kernel,
-                                    const TShape &pad, const TShape &stride,
+                                    const DType *out_data, const mxnet::TShape &ishape,
+                                    const mxnet::TShape &oshape, const mxnet::TShape &kernel,
+                                    const mxnet::TShape &pad, const mxnet::TShape &stride,
                                     DType *in_grad) {
   const int depth = ishape[2], height = ishape[3], width = ishape[4];
   const int pooled_depth = oshape[2], pooled_height = oshape[3], pooled_width = oshape[4];
@@ -935,9 +947,9 @@ inline void unpool_max_3d_ncdhw_cpu(const DType *out_grad, const DType *in_data,
  */
 template<typename DType>
 inline void unpool_max_3d_ndhwc_cpu(const DType* out_grad, const DType* in_data,
-                              const DType* out_data, const TShape& ishape,
-                              const TShape& oshape, const TShape& kernel,
-                              const TShape& pad, const TShape& stride,
+                              const DType* out_data, const mxnet::TShape& ishape,
+                              const mxnet::TShape& oshape, const mxnet::TShape& kernel,
+                              const mxnet::TShape& pad, const mxnet::TShape& stride,
                               DType* in_grad) {
   const int depth = ishape[1], height = ishape[2], width = ishape[3];
   const int pooled_depth = oshape[1], pooled_height = oshape[2], pooled_width = oshape[3];
@@ -1004,8 +1016,9 @@ inline void unpool_max_3d_ndhwc_cpu(const DType* out_grad, const DType* in_data,
 template<typename DType, int p = 1>
 inline void unpool_sum_1d_ncw_cpu(const DType *out_grad, const DType *in_data,
                                   const DType *out_data,
-                                  const TShape &ishape, const TShape &oshape, const TShape &kernel,
-                                  const TShape &pad, const TShape &stride, DType *in_grad,
+                                  const mxnet::TShape &ishape, const mxnet::TShape &oshape,
+                                  const mxnet::TShape &kernel, const mxnet::TShape &pad,
+                                  const mxnet::TShape &stride, DType *in_grad,
                                   const bool is_avg = false, const bool count_include_pad = true) {
   const int width = ishape[2];
   const int pooled_width = oshape[2];
@@ -1043,9 +1056,9 @@ inline void unpool_sum_1d_ncw_cpu(const DType *out_grad, const DType *in_data,
  */
 template<typename DType, int p = 1>
 inline void unpool_sum_1d_nwc_cpu(const DType* out_grad, const DType* in_data,
-                                  const DType *out_data, const TShape &ishape,
-                                  const TShape &oshape, const TShape &kernel,
-                                  const TShape &pad, const TShape &stride,
+                                  const DType *out_data, const mxnet::TShape &ishape,
+                                  const mxnet::TShape &oshape, const mxnet::TShape &kernel,
+                                  const mxnet::TShape &pad, const mxnet::TShape &stride,
                                   DType *in_grad, const bool is_avg = false,
                                   const bool count_include_pad = true) {
   const int width = ishape[1];
@@ -1088,9 +1101,9 @@ inline void unpool_sum_1d_nwc_cpu(const DType* out_grad, const DType* in_data,
  */
 template<typename DType, int p = 1>
 inline void unpool_sum_2d_nchw_cpu(const DType *out_grad, const DType *in_data,
-                                   const DType *out_data, const TShape &ishape,
-                                   const TShape &oshape, const TShape &kernel,
-                                   const TShape &pad, const TShape &stride,
+                                   const DType *out_data, const mxnet::TShape &ishape,
+                                   const mxnet::TShape &oshape, const mxnet::TShape &kernel,
+                                   const mxnet::TShape &pad, const mxnet::TShape &stride,
                                    DType *in_grad, const bool is_avg = false,
                                    const bool count_include_pad = true) {
   const int height = ishape[2], width = ishape[3];
@@ -1141,9 +1154,9 @@ inline void unpool_sum_2d_nchw_cpu(const DType *out_grad, const DType *in_data,
  */
 template<typename DType, int p = 1>
 inline void unpool_sum_2d_nhwc_cpu(const DType* out_grad, const DType* in_data,
-                                   const DType *out_data, const TShape &ishape,
-                                   const TShape &oshape, const TShape &kernel,
-                                   const TShape &pad, const TShape &stride,
+                                   const DType *out_data, const mxnet::TShape &ishape,
+                                   const mxnet::TShape &oshape, const mxnet::TShape &kernel,
+                                   const mxnet::TShape &pad, const mxnet::TShape &stride,
                                    DType *in_grad, const bool is_avg = false,
                                    const bool count_include_pad = true) {
   const int height = ishape[1], width = ishape[2];
@@ -1196,9 +1209,9 @@ inline void unpool_sum_2d_nhwc_cpu(const DType* out_grad, const DType* in_data,
  */
 template<typename DType, int p = 1>
 inline void unpool_sum_3d_ncdhw_cpu(const DType *out_grad, const DType *in_data,
-                                    const DType *out_data, const TShape &ishape,
-                                    const TShape &oshape, const TShape &kernel,
-                                    const TShape &pad, const TShape &stride,
+                                    const DType *out_data, const mxnet::TShape &ishape,
+                                    const mxnet::TShape &oshape, const mxnet::TShape &kernel,
+                                    const mxnet::TShape &pad, const mxnet::TShape &stride,
                                     DType *in_grad, const bool is_avg = false,
                                     const bool count_include_pad = true) {
   const int depth = ishape[2], height = ishape[3], width = ishape[4];
@@ -1257,9 +1270,9 @@ inline void unpool_sum_3d_ncdhw_cpu(const DType *out_grad, const DType *in_data,
  */
 template<typename DType, int p = 1>
 inline void unpool_sum_3d_ndhwc_cpu(const DType* out_grad, const DType* in_data,
-                                    const DType *out_data, const TShape &ishape,
-                                    const TShape &oshape, const TShape &kernel,
-                                    const TShape &pad, const TShape &stride,
+                                    const DType *out_data, const mxnet::TShape &ishape,
+                                    const mxnet::TShape &oshape, const mxnet::TShape &kernel,
+                                    const mxnet::TShape &pad, const mxnet::TShape &stride,
                                     DType *in_grad, const bool is_avg = false,
                                     const bool count_include_pad = true) {
   const int depth = ishape[1], height = ishape[2], width = ishape[3];
@@ -1329,9 +1342,9 @@ inline void unpool_sum_3d_ndhwc_cpu(const DType* out_grad, const DType* in_data,
  * \param p_value value of p for Lp pooling
  */
 template<typename DType, int p>
-inline void pool(mshadow::Stream<cpu>* s, const DType* in_data, const TShape& ishape,
-                 const TShape& oshape, const TShape& kernel, const TShape& pad,
-                 const TShape& stride, const int pool_type, OpReqType req_type,
+inline void pool(mshadow::Stream<cpu>* s, const DType* in_data, const mxnet::TShape& ishape,
+                 const mxnet::TShape& oshape, const mxnet::TShape& kernel, const mxnet::TShape& pad,
+                 const mxnet::TShape& stride, const int pool_type, OpReqType req_type,
                  DType* out_data, const bool count_include_pad, int layout) {
   CHECK_EQ(req_type, kWriteTo) << "Only support req=kWriteTo in pooling operations";
   if (kernel.ndim() == 1) {
@@ -1447,8 +1460,9 @@ inline void pool(mshadow::Stream<cpu>* s, const DType* in_data, const TShape& is
  */
 template<typename DType, int p>
 inline void unpool(mshadow::Stream<cpu>* s, const DType* out_grad, const DType* in_data,
-                   const DType* out_data, const TShape& ishape, const TShape& oshape,
-                   const TShape& kernel, const TShape& pad, const TShape& stride,
+                   const DType* out_data, const mxnet::TShape& ishape,
+                   const mxnet::TShape& oshape, const mxnet::TShape& kernel,
+                   const mxnet::TShape& pad, const mxnet::TShape& stride,
                    const int pool_type, OpReqType req_type, DType* in_grad,
                    const bool count_include_pad, int layout) {
   if (mxnet::kNullOp == req_type) return;
diff --git a/src/operator/nn/pooling-inl.h b/src/operator/nn/pooling-inl.h
index af00fd5cfa3c..9e1e73bf19e2 100644
--- a/src/operator/nn/pooling-inl.h
+++ b/src/operator/nn/pooling-inl.h
@@ -44,9 +44,9 @@ namespace op {
 void PoolingParamParser(nnvm::NodeAttrs *attrs);
 
 struct PoolingParam : public dmlc::Parameter<PoolingParam> {
-  TShape kernel;
-  TShape stride;
-  TShape pad;
+  mxnet::TShape kernel;
+  mxnet::TShape stride;
+  mxnet::TShape pad;
   int pool_type;
   int pooling_convention;
   bool global_pool;
@@ -55,7 +55,7 @@ struct PoolingParam : public dmlc::Parameter<PoolingParam> {
   dmlc::optional<bool> count_include_pad;
   dmlc::optional<int> layout;
   DMLC_DECLARE_PARAMETER(PoolingParam) {
-    DMLC_DECLARE_FIELD(kernel).set_default(TShape())  // add default value here
+    DMLC_DECLARE_FIELD(kernel).set_default(mxnet::TShape())  // add default value here
     .enforce_nonzero()
     .describe("Pooling kernel size: (y, x) or (d, y, x)");
 
@@ -78,11 +78,11 @@ struct PoolingParam : public dmlc::Parameter<PoolingParam> {
     .add_enum("same", pool_enum::kSame)
     .describe("Pooling convention to be applied.");
 
-    DMLC_DECLARE_FIELD(stride).set_default(TShape())
+    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape())
     .enforce_nonzero()
     .describe("Stride: for pooling (y, x) or (d, y, x). Defaults to 1 for each dimension.");
 
-    DMLC_DECLARE_FIELD(pad).set_default(TShape())
+    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape())
     .describe("Pad for pooling: (y, x) or (d, y, x). Defaults to no padding.");
 
     DMLC_DECLARE_FIELD(p_value).set_default(dmlc::optional<int>())
@@ -185,26 +185,26 @@ class PoolingOp {
                const OpReqType& req, const TBlob& out_data) {
     using namespace mshadow;
     Stream<xpu> *s = ctx.get_stream<xpu>();
-    const TShape& ishape = in_data.shape_;
-    TShape kernel = param_.kernel;
-    TShape padding = param_.pad;
-    TShape stride = param_.stride;
+    const mxnet::TShape& ishape = in_data.shape_;
+    mxnet::TShape kernel = param_.kernel;
+    mxnet::TShape padding = param_.pad;
+    mxnet::TShape stride = param_.stride;
     int layout = param_.GetLayout(ishape.ndim());
     if (param_.global_pool) {
       // with global pooling, kernel shape corresponds to input shape with 'N' and 'C' removed
       if (layout == mshadow::kNWC || layout == mshadow::kNHWC || layout == mshadow::kNDHWC) {
-        kernel = TShape(ishape.data() + 1,
+        kernel = mxnet::TShape(ishape.data() + 1,
                         ishape.data() + ishape.ndim() - 1);
 
       } else {
-        kernel = TShape(ishape.data() + 2,
+        kernel = mxnet::TShape(ishape.data() + 2,
                         ishape.data() + ishape.ndim());
       }
-      padding = TShape(ishape.ndim() - 2);
+      padding = mxnet::TShape(ishape.ndim() - 2);
       for (index_t i = 0; i < ishape.ndim() - 2; i++) {
         padding[i] = 0;
       }
-      stride = TShape(ishape.ndim() - 2);
+      stride = mxnet::TShape(ishape.ndim() - 2);
     }
     const int p_value = (param_.pool_type == pool_enum::kLpPooling && param_.p_value.has_value()) ?
                         param_.p_value.value() : 1;
@@ -242,26 +242,26 @@ class PoolingOp {
                 const OpReqType& req, const TBlob& in_grad) {
     using namespace mshadow;
     Stream<xpu> *s = ctx.get_stream<xpu>();
-    const TShape& ishape = in_data.shape_;
-    TShape kernel = param_.kernel;
-    TShape padding = param_.pad;
-    TShape stride = param_.stride;
+    const mxnet::TShape& ishape = in_data.shape_;
+    mxnet::TShape kernel = param_.kernel;
+    mxnet::TShape padding = param_.pad;
+    mxnet::TShape stride = param_.stride;
     int layout = param_.GetLayout(ishape.ndim());
     if (param_.global_pool) {
       // with global pooling, kernel shape corresponds to input shape with 'N' and 'C' removed
       if (layout == mshadow::kNWC || layout == mshadow::kNHWC || layout == mshadow::kNDHWC) {
-        kernel = TShape(ishape.data() + 1,
+        kernel = mxnet::TShape(ishape.data() + 1,
                         ishape.data() + ishape.ndim() - 1);
 
       } else {
-        kernel = TShape(ishape.data() + 2,
+        kernel = mxnet::TShape(ishape.data() + 2,
                         ishape.data() + ishape.ndim());
       }
-      padding = TShape(ishape.ndim() - 2);
+      padding = mxnet::TShape(ishape.ndim() - 2);
       for (index_t i = 0; i < ishape.ndim() - 2; i++) {
         padding[i] = 0;
       }
-      stride = TShape(ishape.ndim() - 2);
+      stride = mxnet::TShape(ishape.ndim() - 2);
     }
 
     const int p_value = (param_.pool_type == pool_enum::kLpPooling && param_.p_value.has_value()) ?
diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc
index 9e9af4d97fd9..2d16604baa20 100644
--- a/src/operator/nn/pooling.cc
+++ b/src/operator/nn/pooling.cc
@@ -91,14 +91,14 @@ static bool PoolingType(const nnvm::NodeAttrs& attrs,
 }
 
 static bool PoolingShape(const nnvm::NodeAttrs &attrs,
-                         std::vector<TShape> *in_shape,
-                         std::vector<TShape> *out_shape) {
+                         mxnet::ShapeVector *in_shape,
+                         mxnet::ShapeVector *out_shape) {
   const PoolingParam &param = nnvm::get<PoolingParam>(attrs.parsed);
   CHECK_EQ(in_shape->size(), 1U);
   if (param.pool_type == pool_enum::kLpPooling) {
     CHECK(param.p_value.has_value());
   }
-  const TShape &dshape = (*in_shape)[0];
+  const mxnet::TShape &dshape = (*in_shape)[0];
   if (param.pooling_convention == pool_enum::kSame) {
     CHECK_EQ(dshape.ndim(), 3U)
       << "Pooling: Input data should be 3D in (batch, channel, x)"
@@ -117,7 +117,7 @@ static bool PoolingShape(const nnvm::NodeAttrs &attrs,
   if (dshape.ndim() == 0) return false;
   int layout = param.GetLayout(dshape.ndim());
   if (param.global_pool) {
-    TShape oshape = dshape;
+    mxnet::TShape oshape = dshape;
     size_t c_index = 0;
     switch (layout) {
       case mshadow::kNCW:
@@ -171,7 +171,7 @@ static bool PoolingShape(const nnvm::NodeAttrs &attrs,
                           param.stride[0]));
     }
     // Convert back from standard (NCW) layout space to the actual layout type
-    TShape oshape = (layout == mshadow::kNWC) ?
+    mxnet::TShape oshape = (layout == mshadow::kNWC) ?
                     ConvertLayout(oshape_ncw, mshadow::kNCW, mshadow::kNWC) : oshape_ncw;
     out_shape->clear();
     out_shape->push_back(oshape);  // save output shape
@@ -209,7 +209,7 @@ static bool PoolingShape(const nnvm::NodeAttrs &attrs,
                                param.stride[1]));
     }
     // Convert back from standard (NCHW) layout space to the actual layout type
-    TShape oshape = (layout == mshadow::kNHWC) ?
+    mxnet::TShape oshape = (layout == mshadow::kNHWC) ?
                     ConvertLayout(oshape_nchw, mshadow::kNCHW, mshadow::kNHWC) : oshape_nchw;
     out_shape->clear();
     out_shape->push_back(oshape);  // save output shape
@@ -251,7 +251,7 @@ static bool PoolingShape(const nnvm::NodeAttrs &attrs,
                                 param.stride[2]));
     }
     // Convert back from standard (NCDHW) layout space to the actual layout type
-    TShape oshape = (layout == mshadow::kNDHWC) ?
+    mxnet::TShape oshape = (layout == mshadow::kNDHWC) ?
                     ConvertLayout(oshape_ncdhw, mshadow::kNCDHW, mshadow::kNDHWC) : oshape_ncdhw;
     out_shape->clear();
     out_shape->push_back(oshape);  // save output shape
@@ -440,7 +440,7 @@ For each window ``X``, the mathematical expression for Lp pooling is:
 .set_attr<FInferStorageType>("FInferStorageType", PoolingStorageType)
 #endif
 .set_attr<nnvm::FInferType>("FInferType", PoolingType)
-.set_attr<nnvm::FInferShape>("FInferShape", PoolingShape)
+.set_attr<mxnet::FInferShape>("FInferShape", PoolingShape)
 .set_attr<FCompute>("FCompute<cpu>", PoolingCompute<cpu>)
 #if MXNET_USE_MKLDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
diff --git a/src/operator/nn/softmax-inl.h b/src/operator/nn/softmax-inl.h
index 90950bc9e92e..096d87416081 100644
--- a/src/operator/nn/softmax-inl.h
+++ b/src/operator/nn/softmax-inl.h
@@ -332,8 +332,8 @@ static inline bool SoftmaxOpType(const nnvm::NodeAttrs& attrs,
 }
 
 static inline bool SoftmaxGradOpShape(const nnvm::NodeAttrs& attrs,
-                                      std::vector<TShape> *in_attrs,
-                                      std::vector<TShape> *out_attrs) {
+                                      mxnet::ShapeVector *in_attrs,
+                                      mxnet::ShapeVector *out_attrs) {
   if (softmax_has_dtype_override(attrs)) {
     return ElemwiseShape<3, 1>(attrs, in_attrs, out_attrs);
   } else {
@@ -409,7 +409,7 @@ void SoftmaxCompute(const nnvm::NodeAttrs& attrs,
   int axis = CheckAxis(param.axis, inputs[0].ndim());
   const double temperature = param.temperature.has_value() ?
     param.temperature.value() : 1.0;
-  TShape shape = AxisShapeCompact(inputs[0].shape_, &axis, true);
+  mxnet::TShape shape = AxisShapeCompact(inputs[0].shape_, &axis, true);
   MXNET_REAL_ACC_TYPE_SWITCH(inputs[0].type_flag_, DType, AType, {
     MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, OType, {
       if (shape.ndim() == 2) {
@@ -440,7 +440,7 @@ void SoftmaxGradCompute(const nnvm::NodeAttrs& attrs,
   int axis = CheckAxis(param.axis, inputs[0].ndim());
   const double temperature = param.temperature.has_value() ?
     param.temperature.value() : 1.0;
-  TShape shape = AxisShapeCompact(inputs[0].shape_, &axis, true);
+  mxnet::TShape shape = AxisShapeCompact(inputs[0].shape_, &axis, true);
 
   int out_idx = softmax_has_dtype_override(attrs) ? 2 : 1;
 
diff --git a/src/operator/nn/softmax.cc b/src/operator/nn/softmax.cc
index c88f738c356d..b84dd93300f8 100644
--- a/src/operator/nn/softmax.cc
+++ b/src/operator/nn/softmax.cc
@@ -106,7 +106,7 @@ Example::
 .set_attr<nnvm::FInferType>("FInferType", SoftmaxOpType)
 .set_num_inputs(1)
 .set_num_outputs(1)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
   [](const NodeAttrs& attrs){
     return std::vector<std::pair<int, int> >{{0, 0}};
@@ -118,7 +118,7 @@ NNVM_REGISTER_OP(_backward_softmax)
 .set_num_inputs(SoftmaxGradOpNumInputs)
 .set_num_outputs(1)
 .set_attr<nnvm::FListInputNames>("FListInputNames", SoftmaxGradOpInputNames)
-.set_attr<nnvm::FInferShape>("FInferShape", SoftmaxGradOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", SoftmaxGradOpShape)
 .set_attr<nnvm::FInferType>("FInferType", SoftmaxGradOpType)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption", SoftmaxGradOpInplaceOption)
 .add_argument("args", "NDArray-or-Symbol[]", "Positional input arguments")
@@ -161,7 +161,7 @@ Example::
 .set_attr<nnvm::FInferType>("FInferType", SoftmaxOpType)
 .set_num_inputs(1)
 .set_num_outputs(1)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
   [](const NodeAttrs& attrs){
     return std::vector<std::pair<int, int> >{{0, 0}};
@@ -173,7 +173,7 @@ NNVM_REGISTER_OP(_backward_softmin)
 .set_num_inputs(SoftmaxGradOpNumInputs)
 .set_num_outputs(1)
 .set_attr<nnvm::FListInputNames>("FListInputNames", SoftmaxGradOpInputNames)
-.set_attr<nnvm::FInferShape>("FInferShape", SoftmaxGradOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", SoftmaxGradOpShape)
 .set_attr<nnvm::FInferType>("FInferType", SoftmaxGradOpType)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption", SoftmaxGradOpInplaceOption)
 .add_argument("args", "NDArray-or-Symbol[]", "Positional input arguments")
@@ -204,7 +204,7 @@ Examples::
 .set_attr<nnvm::FInferType>("FInferType", SoftmaxOpType)
 .set_num_inputs(1)
 .set_num_outputs(1)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
   [](const NodeAttrs& attrs){
     return std::vector<std::pair<int, int> >{{0, 0}};
@@ -216,7 +216,7 @@ NNVM_REGISTER_OP(_backward_log_softmax)
 .set_num_inputs(SoftmaxGradOpNumInputs)
 .set_num_outputs(1)
 .set_attr<nnvm::FListInputNames>("FListInputNames", SoftmaxGradOpInputNames)
-.set_attr<nnvm::FInferShape>("FInferShape", SoftmaxGradOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", SoftmaxGradOpShape)
 .set_attr<nnvm::FInferType>("FInferType", SoftmaxGradOpType)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption", SoftmaxGradOpInplaceOption)
 .add_argument("args", "NDArray-or-Symbol[]", "Positional input arguments")
diff --git a/src/operator/nn/upsampling-inl.h b/src/operator/nn/upsampling-inl.h
index feb44c894a7a..662ba78cd84a 100644
--- a/src/operator/nn/upsampling-inl.h
+++ b/src/operator/nn/upsampling-inl.h
@@ -177,13 +177,13 @@ static inline DeconvolutionParam GetDeconvolutionParam(const UpSamplingParam& pa
   p.num_filter = param.num_filter;
   p.no_bias =  true;
   int shape[] = {1, 1};
-  p.dilate = TShape(shape, shape + 2);
+  p.dilate = mxnet::TShape(shape, shape + 2);
   shape[0] = shape[1] = kernel;
-  p.kernel = TShape(shape, shape + 2);
+  p.kernel = mxnet::TShape(shape, shape + 2);
   shape[0] = shape[1] = stride;
-  p.stride = TShape(shape, shape + 2);
+  p.stride = mxnet::TShape(shape, shape + 2);
   shape[0] = shape[1] = pad;
-  p.pad = TShape(shape, shape + 2);
+  p.pad = mxnet::TShape(shape, shape + 2);
   return p;
 }
 
diff --git a/src/operator/nn/upsampling.cc b/src/operator/nn/upsampling.cc
index b6b3d873df7d..d09017bf713e 100644
--- a/src/operator/nn/upsampling.cc
+++ b/src/operator/nn/upsampling.cc
@@ -32,11 +32,11 @@ namespace mxnet {
 namespace op {
 
 static bool UpSamplingShape(const nnvm::NodeAttrs& attrs,
-                            std::vector<TShape> *in_shape, std::vector<TShape> *out_shape) {
+                            mxnet::ShapeVector *in_shape, mxnet::ShapeVector *out_shape) {
   const UpSamplingParam& param_ = nnvm::get<UpSamplingParam>(attrs.parsed);
   CHECK_GE(in_shape->size(), 1U);
-  const TShape &dshape = (*in_shape)[0];
-  TShape oshape = dshape;
+  const mxnet::TShape &dshape = (*in_shape)[0];
+  mxnet::TShape oshape = dshape;
   if (param_.sample_type == up_enum::kNearest) {
     CHECK_EQ(in_shape->size(), static_cast<size_t>(param_.num_args));
     oshape[1] = 0;
@@ -136,7 +136,7 @@ NNVM_REGISTER_OP(UpSampling)
     [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"output"};
 })
-.set_attr<nnvm::FInferShape>("FInferShape", UpSamplingShape)
+.set_attr<mxnet::FInferShape>("FInferShape", UpSamplingShape)
 .set_attr<nnvm::FInferType>("FInferType", UpSamplingType)
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
   const UpSamplingParam& param = nnvm::get<UpSamplingParam>(n.parsed);
diff --git a/src/operator/nnpack/nnpack_fully_connected-inl.h b/src/operator/nnpack/nnpack_fully_connected-inl.h
index b6a60f760265..422334949c48 100644
--- a/src/operator/nnpack/nnpack_fully_connected-inl.h
+++ b/src/operator/nnpack/nnpack_fully_connected-inl.h
@@ -64,8 +64,8 @@ class NNPACKFullyConnectedOp : public FullyConnectedOp<xpu, DType> {
     size_t expected = param_.no_bias ? 2 : 3;
     CHECK_EQ(in_data.size(), expected);
     CHECK_EQ(out_data.size(), 1);
-    const TShape& ishape = in_data[fullc::kData].shape_;
-    const TShape& oshape = out_data[fullc::kOut].shape_;
+    const mxnet::TShape& ishape = in_data[fullc::kData].shape_;
+    const mxnet::TShape& oshape = out_data[fullc::kOut].shape_;
     Stream<xpu> *s = ctx.get_stream<xpu>();
     Tensor<xpu, 2, DType> data = in_data[fullc::kData].get_with_shape<xpu, 2, DType>(
         Shape2(ishape[0], ishape.ProdShape(1, ishape.ndim())), s);
diff --git a/src/operator/operator_common.h b/src/operator/operator_common.h
index b1822647cf01..f629534dabd0 100644
--- a/src/operator/operator_common.h
+++ b/src/operator/operator_common.h
@@ -104,7 +104,7 @@ struct InferStorageTypeError : public dmlc::Error {
 };
 
 /*! \brief check if shape is empty or contains unknown (0) dim. */
-inline bool shape_is_none(const TShape& x) {
+inline bool shape_is_none(const mxnet::TShape& x) {
   return x.ndim() == 0 || x.Size() == 0;
 }
 
@@ -119,12 +119,12 @@ inline bool storage_type_is_none(const int& x) {
 }
 
 /*! \brief check if shape is scalar({1}). */
-inline bool shape_is_scalar(const TShape& x) {
+inline bool shape_is_scalar(const mxnet::TShape& x) {
   return x.ndim() == 1 && x.Size() == 1;
 }
 
 /*! \brief get string representation of shape */
-inline std::string shape_string(const TShape& x) {
+inline std::string shape_string(const mxnet::TShape& x) {
   std::ostringstream os;
   os << x;
   return os.str();
@@ -158,7 +158,7 @@ inline std::string type_string(const int& x) {
  * \param x source shape.
  * \return whether x and y are compatible.
  */
-inline bool shape_assign(TShape *y, const TShape& x) {
+inline bool shape_assign(mxnet::TShape *y, const mxnet::TShape& x) {
   if (y->ndim() == 0) {
     *y = x;
     return true;
@@ -221,7 +221,7 @@ inline bool dispatch_mode_assign(DispatchMode *y, const DispatchMode& x) {
  */
 #define SHAPE_ASSIGN_CHECK(shape_array, index, shape)                       \
   {                                                                         \
-    if (!::mxnet::op::shape_assign(&(shape_array)[index], TShape(shape))) { \
+    if (!::mxnet::op::shape_assign(&(shape_array)[index], mxnet::TShape(shape))) { \
       std::ostringstream os;                                                \
       os << "Shape inconsistent, Provided = " << (shape_array)[index] << ','\
          << " inferred shape=" << shape;                                    \
@@ -556,13 +556,13 @@ class OpSignature {
 #endif
   }
 
-  void AddSign(const std::vector<TShape> &shapes) {
+  void AddSign(const mxnet::ShapeVector &shapes) {
     for (auto &shape : shapes) {
       AddSign(shape);
     }
   }
 
-  void AddSign(const TShape &shape) {
+  void AddSign(const mxnet::TShape &shape) {
     for (size_t i = 0; i < shape.ndim(); i++) {
       hash = hash * 2 + shape[i];
       eles.push_back(shape[i]);
diff --git a/src/operator/operator_util.cc b/src/operator/operator_util.cc
index 0c6f176a023a..b87428ca2b64 100644
--- a/src/operator/operator_util.cc
+++ b/src/operator/operator_util.cc
@@ -392,12 +392,12 @@ class SimpleOpPropBase : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> ForwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return source->resource_requests_;
   }
 
   std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return source->resource_requests_;
   }
 
@@ -468,7 +468,7 @@ void SimpleOpRegEntryImpl::RegisterSourceImperative() {
     }
     // shape inference.
     CHECK(source_shape_ != nullptr);
-    TShape dshape = source_shape_(env);
+    mxnet::TShape dshape = source_shape_(env);
     // check output shape.
     CHECK(!out->is_none());
     CHECK(out->shape() == dshape) << "target shape mismatch "
@@ -551,9 +551,9 @@ struct SimpleSourceOperator : public Operator {
 
 class SimpleSourceOpProp : public SimpleOpPropBase {
  public:
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     CHECK_EQ(in_shape->size(), 0)
         << in_shape->size();
     CHECK(source->source_shape_ != nullptr);
@@ -631,7 +631,7 @@ void SimpleOpRegEntryImpl::RegisterUnaryImperative() {
         << "operator " << this->name << " do not take keyword arguments";
     }
     // shape inference.
-    TShape dshape;
+    mxnet::TShape dshape;
     if (unary_shape_ != nullptr) {
       dshape = unary_shape_(src.shape(), env);
     } else {
@@ -768,12 +768,12 @@ struct SimpleUnaryOperator : public Operator {
 
 class SimpleUnaryOpProp : public SimpleOpPropBase {
  public:
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 1) << "Input:[data]";
-    const TShape &dshape = in_shape->at(0);
+    const mxnet::TShape &dshape = in_shape->at(0);
     if (dshape.ndim() == 0) return false;
     out_shape->clear();
     if (source->unary_shape_ == nullptr) {
@@ -892,7 +892,7 @@ void SimpleOpRegEntryImpl::RegisterBinaryImperative() {
     }
 
     // shape inference.
-    TShape dshape;
+    mxnet::TShape dshape;
     if (binary_shape_ != nullptr) {
       dshape = binary_shape_(lhs.shape(), rhs.shape(), env);
     } else {
@@ -1046,13 +1046,13 @@ struct SimpleBinaryOperator : public Operator {
 
 class SimpleBinaryOpProp : public SimpleOpPropBase {
  public:
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 2) << "Input:[lhs, rhs]";
-    const TShape& lshape = in_shape->at(0);
-    const TShape& rshape = in_shape->at(1);
+    const mxnet::TShape& lshape = in_shape->at(0);
+    const mxnet::TShape& rshape = in_shape->at(1);
     out_shape->clear();
     if (source->binary_shape_ == nullptr) {
       if (in_shape->at(0).ndim() != 0) {
diff --git a/src/operator/optimizer_op-inl.h b/src/operator/optimizer_op-inl.h
index 223a1aa6c37d..49eb96b9f8b2 100644
--- a/src/operator/optimizer_op-inl.h
+++ b/src/operator/optimizer_op-inl.h
@@ -142,8 +142,8 @@ struct MultiSGDMomParam : public dmlc::Parameter<MultiSGDMomParam> {
 
 template<typename ParamType, int input_stride>
 inline bool MultiSGDShape(const nnvm::NodeAttrs& attrs,
-                          std::vector<TShape> *in_attrs,
-                          std::vector<TShape> *out_attrs) {
+                          mxnet::ShapeVector *in_attrs,
+                          mxnet::ShapeVector *out_attrs) {
   const ParamType& param = dmlc::get<ParamType>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), input_stride * param.num_weights);
   CHECK_EQ(out_attrs->size(), param.num_weights);
@@ -163,8 +163,8 @@ inline bool MultiSGDShape(const nnvm::NodeAttrs& attrs,
     << param.num_weights << ", and got " << param.wds.ndim();
   // Weights and gradients
   for (int i = 0; i < param.num_weights; ++i) {
-    std::vector<TShape> input_vec;
-    std::vector<TShape> output_vec({output_shapes[i]});
+    mxnet::ShapeVector input_vec;
+    mxnet::ShapeVector output_vec({output_shapes[i]});
     for (int j = 0; j < input_stride; ++j) {
       input_vec.push_back(input_shapes[i * input_stride + j]);
     }
diff --git a/src/operator/optimizer_op.cc b/src/operator/optimizer_op.cc
index 982995ad2f95..367b91b2646c 100644
--- a/src/operator/optimizer_op.cc
+++ b/src/operator/optimizer_op.cc
@@ -60,7 +60,7 @@ It updates the weights using::
 .set_num_inputs(2)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<SignSGDParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<2, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<2, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)
 .set_attr<FCompute>("FCompute<cpu>", SignSGDUpdate<cpu>)
 .add_argument("weight", "NDArray-or-Symbol", "Weight")
@@ -89,7 +89,7 @@ Where the parameter ``momentum`` is the decay rate of momentum estimates at each
 .set_num_inputs(3)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<SignumParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<3, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<3, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<3, 1>)
 .set_attr<nnvm::FMutateInputs>("FMutateInputs",
   [](const nnvm::NodeAttrs& attrs) {
@@ -332,7 +332,7 @@ It updates the weights using::
     return static_cast<uint32_t>(param.num_weights);
   })
 .set_attr_parser(ParamParser<MultiSGDParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", MultiSGDShape<MultiSGDParam, 2>)
+.set_attr<mxnet::FInferShape>("FInferShape", MultiSGDShape<MultiSGDParam, 2>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<-1, -1>)
 .set_attr<nnvm::FListInputNames>("FListInputNames",
   [](const NodeAttrs& attrs) {
@@ -377,7 +377,7 @@ Where the parameter ``momentum`` is the decay rate of momentum estimates at each
     return static_cast<uint32_t>(param.num_weights);
   })
 .set_attr_parser(ParamParser<MultiSGDMomParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", MultiSGDShape<MultiSGDMomParam, 3>)
+.set_attr<mxnet::FInferShape>("FInferShape", MultiSGDShape<MultiSGDMomParam, 3>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<-1, -1>)
 .set_attr<nnvm::FListInputNames>("FListInputNames",
   [](const NodeAttrs& attrs) {
@@ -420,7 +420,7 @@ It updates the weights using::
     return static_cast<uint32_t>(param.num_weights);
   })
 .set_attr_parser(ParamParser<MultiSGDParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", MultiSGDShape<MultiSGDParam, 3>)
+.set_attr<mxnet::FInferShape>("FInferShape", MultiSGDShape<MultiSGDParam, 3>)
 .set_attr<nnvm::FInferType>("FInferType", MP_MultiSGD_InferType<MultiSGDParam, 3, 1>)
 .set_attr<nnvm::FListInputNames>("FListInputNames",
   [](const NodeAttrs& attrs) {
@@ -475,7 +475,7 @@ Where the parameter ``momentum`` is the decay rate of momentum estimates at each
     return static_cast<uint32_t>(param.num_weights);
   })
 .set_attr_parser(ParamParser<MultiSGDMomParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", MultiSGDShape<MultiSGDMomParam, 4>)
+.set_attr<mxnet::FInferShape>("FInferShape", MultiSGDShape<MultiSGDMomParam, 4>)
 .set_attr<nnvm::FInferType>("FInferType", MP_MultiSGD_InferType<MultiSGDMomParam, 4, 2>)
 .set_attr<nnvm::FListInputNames>("FListInputNames",
   [](const NodeAttrs& attrs) {
@@ -521,7 +521,7 @@ only the row slices whose indices appear in grad.indices are updated::
 .set_num_inputs(2)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<SGDParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<2, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<2, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)
 .set_attr<FInferStorageType>("FInferStorageType", SGDStorageType)
 .set_attr<FCompute>("FCompute<cpu>", SGDUpdate<cpu>)
@@ -562,7 +562,7 @@ only the row slices whose indices appear in grad.indices are updated (for both w
 .set_num_inputs(3)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<SGDMomParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<3, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<3, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<3, 1>)
 .set_attr<FInferStorageType>("FInferStorageType", StdOptStorageType<1, SGDMomParam>)
 .set_attr<nnvm::FMutateInputs>("FMutateInputs",
@@ -589,7 +589,7 @@ NNVM_REGISTER_OP(mp_sgd_update)
 .set_num_inputs(3)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<SGDParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<3, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<3, 1>)
 .set_attr<nnvm::FInferType>("FInferType", MP_SGD_InferType<2, 1, 3>)
 .set_attr<FCompute>("FCompute<cpu>", MP_SGDUpdate<cpu>)
 .set_attr<nnvm::FMutateInputs>("FMutateInputs",
@@ -606,7 +606,7 @@ NNVM_REGISTER_OP(mp_sgd_mom_update)
 .set_num_inputs(4)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<SGDMomParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<4, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<4, 1>)
 .set_attr<nnvm::FInferType>("FInferType", MP_SGD_InferType<2, 1, 4>)
 .set_attr<nnvm::FMutateInputs>("FMutateInputs",
   [](const nnvm::NodeAttrs& attrs) {
@@ -637,7 +637,7 @@ available at http://proceedings.mlr.press/v70/zheng17a/zheng17a.pdf.
 .set_num_inputs(5)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<FTMLParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<5, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<5, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<5, 1>)
 .set_attr<nnvm::FMutateInputs>("FMutateInputs",
   [](const nnvm::NodeAttrs& attrs) {
@@ -685,7 +685,7 @@ only the row slices whose indices appear in grad.indices are updated (for w, m a
 .set_num_inputs(4)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<AdamParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<4, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<4, 1>)
 .set_attr<FResourceRequest>("FResourceRequest",
   [](const NodeAttrs& attrs) {
     return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
@@ -743,7 +743,7 @@ Hinton suggests the momentum term :math:`\gamma` to be 0.9 and the learning rate
 .set_num_inputs(3)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<RMSPropParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<3, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<3, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<3, 1>)
 .set_attr<nnvm::FMutateInputs>("FMutateInputs",
   [](const nnvm::NodeAttrs &attrs) {
@@ -782,7 +782,7 @@ to be 0.9 and the learning rate :math:`\eta` to be 0.0001.
 .set_num_inputs(5)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<RMSPropAlexParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<5, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<5, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<5, 1>)
 .set_attr<nnvm::FMutateInputs>("FMutateInputs",
   [](const nnvm::NodeAttrs& attrs) {
@@ -822,7 +822,7 @@ only the row slices whose indices appear in grad.indices are updated (for w, z a
 .set_num_inputs(4)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<FtrlParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<4, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<4, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<4, 1>)
 .set_attr<FInferStorageType>("FInferStorageType", ElemwiseStorageType<4, 1, false, true, false>)
 .set_attr<nnvm::FMutateInputs>("FMutateInputs",
@@ -855,7 +855,7 @@ Note that non-zero values for the weight decay option are not supported.
 .set_num_inputs(3)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<AdagradParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<3, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<3, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<3, 1>)
 .set_attr<FInferStorageType>("FInferStorageType", AdagradStorageType)
 .set_attr<nnvm::FMutateInputs>("FMutateInputs",
diff --git a/src/operator/pad-inl.h b/src/operator/pad-inl.h
index 0b43e2d0cfd2..140d7099e817 100644
--- a/src/operator/pad-inl.h
+++ b/src/operator/pad-inl.h
@@ -50,7 +50,7 @@ enum PadOpOutputs { kOut };
 struct PadParam : public dmlc::Parameter<PadParam> {
   int mode;
   double constant_value;
-  TShape pad_width;
+  mxnet::TShape pad_width;
   DMLC_DECLARE_PARAMETER(PadParam) {
     DMLC_DECLARE_FIELD(mode)
         .add_enum("constant", pad_enum::kConstant)
@@ -200,12 +200,12 @@ class PadProp : public OperatorProperty {
     return dtype != -1;
   }
 
-  bool InferShape(std::vector<TShape> *in_shape, std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape, mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 1U) << "Can only be one input to symbol.";
 
-    const TShape &dshape = (*in_shape)[pad_enum::kData];
+    const mxnet::TShape &dshape = (*in_shape)[pad_enum::kData];
 
     auto rank = dshape.ndim();
     auto pad = param_.pad_width;
@@ -229,7 +229,7 @@ class PadProp : public OperatorProperty {
                         "only supports padding sizes smaller than the input size.";
       }
     }
-    TShape oshape = dshape;
+    mxnet::TShape oshape = dshape;
     for (size_t i = 0; i < dshape.ndim(); ++i) {
       oshape[i] =
           param_.pad_width[2 * i] + param_.pad_width[2 * i + 1] + dshape[i];
@@ -258,7 +258,7 @@ class PadProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator *CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator *CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/pad.cc b/src/operator/pad.cc
index 6c66b29082c4..9a5d7561ac01 100644
--- a/src/operator/pad.cc
+++ b/src/operator/pad.cc
@@ -674,7 +674,7 @@ Operator *CreateOp<cpu>(PadParam param, int dtype) {
 }
 
 // DO_BIND_DISPATCH comes from operator_common.h
-Operator *PadProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator *PadProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                     std::vector<int> *in_type) const {
   DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
 }
diff --git a/src/operator/pooling_v1-inl.h b/src/operator/pooling_v1-inl.h
index 8942ddc0d716..4e0ccc1caeb9 100644
--- a/src/operator/pooling_v1-inl.h
+++ b/src/operator/pooling_v1-inl.h
@@ -48,14 +48,14 @@ enum PoolingV1OpPadConventionType {kValid, kFull};
 }  // namespace pool_v1_enum
 
 struct PoolingV1Param : public dmlc::Parameter<PoolingV1Param> {
-  TShape kernel;
-  TShape stride;
-  TShape pad;
+  mxnet::TShape kernel;
+  mxnet::TShape stride;
+  mxnet::TShape pad;
   int pool_type;
   int pooling_convention;
   bool global_pool;
   DMLC_DECLARE_PARAMETER(PoolingV1Param) {
-    DMLC_DECLARE_FIELD(kernel).set_default(TShape())
+    DMLC_DECLARE_FIELD(kernel).set_default(mxnet::TShape())
     .enforce_nonzero()
     .describe("pooling kernel size: (y, x) or (d, y, x)");
 
@@ -73,11 +73,11 @@ struct PoolingV1Param : public dmlc::Parameter<PoolingV1Param> {
     .add_enum("valid", pool_v1_enum::kValid)
     .describe("Pooling convention to be applied.");
 
-    DMLC_DECLARE_FIELD(stride).set_default(TShape())
+    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape())
     .enforce_nonzero()
     .describe("stride: for pooling (y, x) or (d, y, x)");
 
-    DMLC_DECLARE_FIELD(pad).set_default(TShape())
+    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape())
     .describe("pad for pooling: (y, x) or (d, y, x)");
   }
 };
@@ -104,8 +104,8 @@ class PoolingV1Op : public Operator {
     }
 
     // reset padding size for global pooling
-    TShape padding = param_.pad;
-    // TShape kernel = param_.kernel;
+    mxnet::TShape padding = param_.pad;
+    // mxnet::TShape kernel = param_.kernel;
     if (param_.global_pool) {
       padding[0] = padding[1] = 0;
       // kernel[0] = kernel[1] = 0;
@@ -159,7 +159,7 @@ class PoolingV1Op : public Operator {
     }
 
     // reset padding size for global pooling
-    TShape padding = param_.pad;
+    mxnet::TShape padding = param_.pad;
     if (param_.global_pool) {
       padding[0] = padding[1] = 0;
     }
@@ -237,16 +237,16 @@ class PoolingV1Prop : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     CHECK_EQ(in_shape->size(), 1U);
-    const TShape &dshape = (*in_shape)[0];
+    const mxnet::TShape &dshape = (*in_shape)[0];
     CHECK_GE(dshape.ndim(), 4U) << "Pooling: Input data should be 4D in (batch, channel, y, x) "
                                << "Or 5D in (batch, channel, d, y, x)";
     CHECK_LE(dshape.ndim(), 5U) << "Pooling: Input data should be 4D in (batch, channel, y, x) "
                                << "Or 5D in (batch, channel, d, y, x)";
-    TShape oshape = dshape;
+    mxnet::TShape oshape = dshape;
     if (dshape.ndim() ==  0) return false;
     if (param_.global_pool) {
       if (dshape.ndim() == 4) {
@@ -364,7 +364,7 @@ class PoolingV1Prop : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/pooling_v1.cc b/src/operator/pooling_v1.cc
index afb51d762ddd..9e350e88c9ee 100644
--- a/src/operator/pooling_v1.cc
+++ b/src/operator/pooling_v1.cc
@@ -52,9 +52,9 @@ Operator *CreateOp<cpu>(PoolingV1Param param, int dtype) {
 }
 
 // DO_BIND_DISPATCH comes from operator_common.h
-Operator* PoolingV1Prop::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator* PoolingV1Prop::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                      std::vector<int> *in_type) const {
-  std::vector<TShape> out_shape, aux_shape;
+  mxnet::ShapeVector out_shape, aux_shape;
   std::vector<int> out_type, aux_type;
   CHECK(InferType(in_type, &out_type, &aux_type));
   CHECK(InferShape(in_shape, &out_shape, &aux_shape));
diff --git a/src/operator/quantization/dequantize-inl.h b/src/operator/quantization/dequantize-inl.h
index 799e13665664..dcda5a8b4bef 100644
--- a/src/operator/quantization/dequantize-inl.h
+++ b/src/operator/quantization/dequantize-inl.h
@@ -93,13 +93,13 @@ void DequantizeCompute(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool DequantizeShape(const nnvm::NodeAttrs& attrs,
-                          std::vector<TShape> *in_attrs,
-                          std::vector<TShape> *out_attrs) {
+                          mxnet::ShapeVector *in_attrs,
+                          mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 3U);
   CHECK_EQ(out_attrs->size(), 1U);
 
   for (size_t i = 1; i < 3; ++i) {
-    SHAPE_ASSIGN_CHECK(*in_attrs, i, TShape({1}));
+    SHAPE_ASSIGN_CHECK(*in_attrs, i, mxnet::TShape({1}));
   }
 
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
diff --git a/src/operator/quantization/dequantize.cc b/src/operator/quantization/dequantize.cc
index e20bc1722213..a4d57b9b4461 100644
--- a/src/operator/quantization/dequantize.cc
+++ b/src/operator/quantization/dequantize.cc
@@ -68,7 +68,7 @@ by keep zero centered for the quantized value:
 .set_attr_parser(ParamParser<DequantizeParam>)
 .set_num_inputs(3)
 .set_num_outputs(1)
-.set_attr<nnvm::FInferShape>("FInferShape", DequantizeShape)
+.set_attr<mxnet::FInferShape>("FInferShape", DequantizeShape)
 .set_attr<nnvm::FInferType>("FInferType", DequantizeType)
 .set_attr<FInferStorageType>("FInferStorageType", DequantizeStorageType)
 #if MXNET_USE_MKLDNN == 1
diff --git a/src/operator/quantization/mkldnn/mkldnn_requantize-inl.h b/src/operator/quantization/mkldnn/mkldnn_requantize-inl.h
index 409c53dd3b9a..45713589dd48 100644
--- a/src/operator/quantization/mkldnn/mkldnn_requantize-inl.h
+++ b/src/operator/quantization/mkldnn/mkldnn_requantize-inl.h
@@ -111,11 +111,11 @@ static void MKLDNNRequantizeForward(const nnvm::NodeAttrs& attrs,
     MKLDNNRequantizeForwardKer(attrs, ctx, inputs, req, outputs, real_range);
   // Model is not calibrated
   } else {
-    TShape src_shape, dst_shape;
+    mxnet::TShape src_shape, dst_shape;
     const size_t actual_float_size = sizeof(float);
     const size_t actual_quantized_size = sizeof(SrcDType);
     const size_t temp_reduce_size = ConfigReduce<cpu, SrcDType>(s,
-                         inputs[0].shape(), TShape({1}), &src_shape, &dst_shape);
+                         inputs[0].shape(), mxnet::TShape({1}), &src_shape, &dst_shape);
     Tensor<cpu, 1, char> temp_space =
       ctx.requested[0].get_space_typed<cpu, 1, char>(
       Shape1(2*actual_float_size+2*actual_quantized_size+temp_reduce_size), s);
diff --git a/src/operator/quantization/quantization_utils.h b/src/operator/quantization/quantization_utils.h
index efc841009706..c540ea441431 100644
--- a/src/operator/quantization/quantization_utils.h
+++ b/src/operator/quantization/quantization_utils.h
@@ -174,10 +174,10 @@ struct QuantizationRangeForMultiplicationStruct {
 
 template<typename xpu, typename DType>
 inline size_t ConfigReduce(mshadow::Stream<xpu>* s,
-                           const TShape& data_shape,
-                           const TShape& out_shape,
-                           TShape* src_shape,
-                           TShape* dst_shape) {
+                           const mxnet::TShape& data_shape,
+                           const mxnet::TShape& out_shape,
+                           mxnet::TShape* src_shape,
+                           mxnet::TShape* dst_shape) {
   BroadcastReduceShapeCompact(data_shape, out_shape, src_shape, dst_shape);
   constexpr int NDim = 2;
   CHECK_EQ(src_shape->ndim(), NDim);
diff --git a/src/operator/quantization/quantize-inl.h b/src/operator/quantization/quantize-inl.h
index 8b7a11cc5a89..747deadd68fe 100644
--- a/src/operator/quantization/quantize-inl.h
+++ b/src/operator/quantization/quantize-inl.h
@@ -110,18 +110,18 @@ void QuantizeCompute(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool QuantizeShape(const nnvm::NodeAttrs& attrs,
-                          std::vector<TShape> *in_attrs,
-                          std::vector<TShape> *out_attrs) {
+                          mxnet::ShapeVector *in_attrs,
+                          mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 3U);
   CHECK_EQ(out_attrs->size(), 3U);
 
   for (size_t i = 1; i < 3; ++i) {
-    SHAPE_ASSIGN_CHECK(*in_attrs, i, TShape({1}));
+    SHAPE_ASSIGN_CHECK(*in_attrs, i, mxnet::TShape({1}));
   }
 
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
-  SHAPE_ASSIGN_CHECK(*out_attrs, 1, TShape{1});
-  SHAPE_ASSIGN_CHECK(*out_attrs, 2, TShape{1});
+  SHAPE_ASSIGN_CHECK(*out_attrs, 1, mxnet::TShape{1});
+  SHAPE_ASSIGN_CHECK(*out_attrs, 2, mxnet::TShape{1});
   return !shape_is_none(out_attrs->at(0));
 }
 
diff --git a/src/operator/quantization/quantize.cc b/src/operator/quantization/quantize.cc
index e486f058bfd5..c28d8c860924 100644
--- a/src/operator/quantization/quantize.cc
+++ b/src/operator/quantization/quantize.cc
@@ -79,7 +79,7 @@ where
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data", "min_range", "max_range"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", QuantizeShape)
+.set_attr<mxnet::FInferShape>("FInferShape", QuantizeShape)
 .set_attr<nnvm::FInferType>("FInferType", QuantizeType)
 .set_attr<FInferStorageType>("FInferStorageType", QuantizeStorageType)
 #if MXNET_USE_MKLDNN == 1
diff --git a/src/operator/quantization/quantize_v2-inl.h b/src/operator/quantization/quantize_v2-inl.h
index 5ae10a7e4fa8..7a0998383824 100644
--- a/src/operator/quantization/quantize_v2-inl.h
+++ b/src/operator/quantization/quantize_v2-inl.h
@@ -152,10 +152,11 @@ void QuantizeV2Compute(const nnvm::NodeAttrs &attrs, const OpContext &ctx,
       LOG(FATAL) << "quantize op only supports int8 and uint8 as output type";
     }
   } else {  // model is not calibrated
-    TShape src_shape, dst_shape;
+    mxnet::TShape src_shape, dst_shape;
     const size_t actual_float_size = sizeof(float);
     const size_t temp_reduce_size =
-        ConfigReduce<xpu, SrcDType>(s, inputs[0].shape_, TShape({1}), &src_shape, &dst_shape);
+        ConfigReduce<xpu, SrcDType>(s, inputs[0].shape_, mxnet::TShape({1}),
+                                    &src_shape, &dst_shape);
     Tensor<xpu, 1, char> temp_space = ctx.requested[0].get_space_typed<xpu, 1, char>(
         Shape1(2 * actual_float_size + temp_reduce_size), s);
     const int dev_id = ctx.run_ctx.ctx.dev_id;
@@ -185,14 +186,14 @@ void QuantizeV2Compute(const nnvm::NodeAttrs &attrs, const OpContext &ctx,
   }
 }
 
-static inline bool QuantizeV2Shape(const nnvm::NodeAttrs &attrs, std::vector<TShape> *in_attrs,
-                                   std::vector<TShape> *out_attrs) {
+static inline bool QuantizeV2Shape(const nnvm::NodeAttrs &attrs, mxnet::ShapeVector *in_attrs,
+                                   mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 3U);
 
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
-  SHAPE_ASSIGN_CHECK(*out_attrs, 1, TShape{1});
-  SHAPE_ASSIGN_CHECK(*out_attrs, 2, TShape{1});
+  SHAPE_ASSIGN_CHECK(*out_attrs, 1, mxnet::TShape{1});
+  SHAPE_ASSIGN_CHECK(*out_attrs, 2, mxnet::TShape{1});
   return !shape_is_none(out_attrs->at(0));
 }
 
diff --git a/src/operator/quantization/quantize_v2.cc b/src/operator/quantization/quantize_v2.cc
index 21410933d35e..e221d580d228 100644
--- a/src/operator/quantization/quantize_v2.cc
+++ b/src/operator/quantization/quantize_v2.cc
@@ -80,7 +80,7 @@ If min_calib_range isn't presented, the output type will be int8.
 .set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs) {
   return std::vector<std::string>{"data"};
 })
-.set_attr<nnvm::FInferShape>("FInferShape", QuantizeV2Shape)
+.set_attr<mxnet::FInferShape>("FInferShape", QuantizeV2Shape)
 .set_attr<nnvm::FInferType>("FInferType", QuantizeV2Type)
 .set_attr<FInferStorageType>("FInferStorageType", QuantizeV2StorageType)
 #if MXNET_USE_MKLDNN == 1
diff --git a/src/operator/quantization/quantized_concat.cc b/src/operator/quantization/quantized_concat.cc
index 3504df82d243..f5c1e8e6ceae 100644
--- a/src/operator/quantization/quantized_concat.cc
+++ b/src/operator/quantization/quantized_concat.cc
@@ -28,17 +28,17 @@
 namespace mxnet {
 namespace op {
 
-static bool ConcatShape(const nnvm::NodeAttrs& attrs, std::vector<TShape>* in_shape,
-                        std::vector<TShape>* out_shape) {
+static bool ConcatShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector* in_shape,
+                        mxnet::ShapeVector* out_shape) {
   const ConcatParam& param_ = nnvm::get<ConcatParam>(attrs.parsed);
   CHECK_EQ(in_shape->size(), static_cast<size_t>(param_.num_args * 3));
   CHECK_EQ(out_shape->size(), 3U);
-  TShape dshape;
+  mxnet::TShape dshape;
   index_t size = 0;
   bool has_zero = false;
   int axis = -1;
   for (int i = 0; i < param_.num_args; ++i) {
-    TShape tmp = (*in_shape)[i];
+    mxnet::TShape tmp = (*in_shape)[i];
     if (tmp.ndim()) {
       axis = CheckAxis(param_.dim, tmp.ndim());
       has_zero = tmp[axis] == 0 || has_zero;
@@ -48,7 +48,7 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs, std::vector<TShape>* in_sh
     }
   }
 
-  TShape tmp = (*out_shape)[0];
+  mxnet::TShape tmp = (*out_shape)[0];
   if (tmp.ndim()) {
     axis = CheckAxis(param_.dim, tmp.ndim());
     tmp[axis] = 0;
@@ -67,10 +67,10 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs, std::vector<TShape>* in_sh
       << "Incompatible output shape: expected " << dshape << ", got " << (*out_shape)[0];
 
   for (int i = param_.num_args; i < param_.num_args * 3; ++i) {
-    SHAPE_ASSIGN_CHECK(*in_shape, i, TShape{1});
+    SHAPE_ASSIGN_CHECK(*in_shape, i, mxnet::TShape{1});
   }
-  SHAPE_ASSIGN_CHECK(*out_shape, 1, TShape{1});
-  SHAPE_ASSIGN_CHECK(*out_shape, 2, TShape{1});
+  SHAPE_ASSIGN_CHECK(*out_shape, 1, mxnet::TShape{1});
+  SHAPE_ASSIGN_CHECK(*out_shape, 2, mxnet::TShape{1});
   return dshape.Size() != 0;
 }
 
@@ -128,7 +128,7 @@ If any input holds int8, then the output will be int8. Otherwise output will be
   return std::vector<std::string>{"output", "min_output", "max_output"};
 })
 .set_attr<nnvm::FInferType>("FInferType", ConcatType)
-.set_attr<nnvm::FInferShape>("FInferShape", ConcatShape)
+.set_attr<mxnet::FInferShape>("FInferShape", ConcatShape)
 .set_attr<std::string>("key_var_num_args", "num_args")
 .add_argument("data", "NDArray-or-Symbol[]", "List of arrays to concatenate")
 .add_arguments(ConcatParam::__FIELDS__());
diff --git a/src/operator/quantization/quantized_conv.cc b/src/operator/quantization/quantized_conv.cc
index ed62228b9249..7841c3acb47c 100644
--- a/src/operator/quantization/quantized_conv.cc
+++ b/src/operator/quantization/quantized_conv.cc
@@ -32,8 +32,8 @@ namespace mxnet {
 namespace op {
 
 bool QuantizedConvShape(const nnvm::NodeAttrs& attrs,
-                        std::vector<TShape>* in_shape,
-                        std::vector<TShape>* out_shape) {
+                        mxnet::ShapeVector* in_shape,
+                        mxnet::ShapeVector* out_shape) {
   using namespace mshadow;
   const ConvolutionParam& param = nnvm::get<ConvolutionParam>(attrs.parsed);
   CHECK_EQ(param.num_group, 1U) << "quantized_conv only supports num_group=1 for now";
@@ -45,7 +45,7 @@ bool QuantizedConvShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(param.kernel.ndim(), 2U) << "quantized_conv only supports 2D convolution for now";
   CHECK(param.dilate.ndim() == 0U || param.dilate.Size() == 1U)
     << "quantized_conv only supports dilation=1 for all dimensions";
-  const TShape& dshape =  in_shape->at(0);
+  const mxnet::TShape& dshape =  in_shape->at(0);
   CHECK_EQ(dshape.ndim(), 4U);
   if (dshape.ndim() == 0U) return false;
 
@@ -55,7 +55,7 @@ bool QuantizedConvShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(param.num_filter % 4, 0U)
     << "for 8bit cudnn conv, the number of channel must be multiple of 4";
 
-  TShape wshape{0, 0, 0, 0};
+  mxnet::TShape wshape{0, 0, 0, 0};
   wshape[N] = param.num_filter;
   wshape[H] = param.kernel[0];
   wshape[W] = param.kernel[1];
@@ -64,22 +64,22 @@ bool QuantizedConvShape(const nnvm::NodeAttrs& attrs,
   const int start = param.no_bias? 2 : 3;
   const int end = param.no_bias? 6 : 9;
   for (int i = start; i < end; ++i) {
-    SHAPE_ASSIGN_CHECK(*in_shape, i, TShape{1});
+    SHAPE_ASSIGN_CHECK(*in_shape, i, mxnet::TShape{1});
   }
   if (!param.no_bias) {
     SHAPE_ASSIGN_CHECK(*in_shape, 2, Shape1(param.num_filter));
   }
 
   auto AddPad = [](index_t dsize, index_t pad) { return dsize + 2 * pad; };
-  TShape oshape{1, 1, 1, 1};
+  mxnet::TShape oshape{1, 1, 1, 1};
   oshape[N] = dshape[N];
   oshape[C] = wshape[N];
   oshape[H] = (AddPad(dshape[H], param.pad[0]) - wshape[H]) / param.stride[0] + 1;
   oshape[W] = (AddPad(dshape[W], param.pad[1]) - wshape[W]) / param.stride[1] + 1;
 
   SHAPE_ASSIGN_CHECK(*out_shape, 0, oshape);
-  SHAPE_ASSIGN_CHECK(*out_shape, 1, TShape({1}));
-  SHAPE_ASSIGN_CHECK(*out_shape, 2, TShape({1}));
+  SHAPE_ASSIGN_CHECK(*out_shape, 1, mxnet::TShape({1}));
+  SHAPE_ASSIGN_CHECK(*out_shape, 2, mxnet::TShape({1}));
   return true;
 }
 
@@ -157,7 +157,7 @@ and max thresholds representing the threholds for quantizing the float32 output
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"output", "min_output", "max_output"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", QuantizedConvShape)
+.set_attr<mxnet::FInferShape>("FInferShape", QuantizedConvShape)
 .set_attr<nnvm::FInferType>("FInferType", QuantizedConvType)
 .set_attr<FInferStorageType>("FInferStorageType", QuantizedConvStorageType)
 .set_attr<FResourceRequest>("FResourceRequest",
diff --git a/src/operator/quantization/quantized_conv.cu b/src/operator/quantization/quantized_conv.cu
index a76782b8baa4..ee688c0648c8 100644
--- a/src/operator/quantization/quantized_conv.cu
+++ b/src/operator/quantization/quantized_conv.cu
@@ -62,8 +62,8 @@ class QuantizedCuDNNConvOp {
 
   void Init(const ConvolutionParam& param,
             const OpContext& ctx,
-            const std::vector<TShape>& in_shape,
-            const std::vector<TShape>& out_shape) {
+            const mxnet::ShapeVector& in_shape,
+            const mxnet::ShapeVector& out_shape) {
     param_ = param;
     CHECK_EQ(param_.kernel.ndim(), 2U)
       << "QuantizedCuDNNConvOp only supports 2D convolution for now";
@@ -106,9 +106,9 @@ class QuantizedCuDNNConvOp {
     const TBlob& data   = in_data[0];
     const TBlob& filter = in_data[1];
     const TBlob& out    = out_data[0];
-    const TShape& dshape = data.shape_;
-    const TShape& fshape = filter.shape_;
-    const TShape& oshape = out.shape_;
+    const mxnet::TShape& dshape = data.shape_;
+    const mxnet::TShape& fshape = filter.shape_;
+    const mxnet::TShape& oshape = out.shape_;
 
     // allocate workspace
     const int dev_id = ctx.run_ctx.ctx.dev_id;
@@ -123,24 +123,24 @@ class QuantizedCuDNNConvOp {
         ctx.requested[0].get_space_typed<gpu, 1, char>(mshadow::Shape1(total_temp_bytes), s);
       char* temp_dptr = temp_space.dptr_;
       TBlob data_(reinterpret_cast<SrcType*>(temp_dptr),
-                  TShape({dshape[N], dshape[H], dshape[W], dshape[C]}),
+                  mxnet::TShape({dshape[N], dshape[H], dshape[W], dshape[C]}),
                   dev_mask, DataType<SrcType>::kFlag, dev_id);
       temp_dptr += data_size * sizeof(SrcType);
       TBlob filter_(reinterpret_cast<SrcType*>(temp_dptr),
-                    TShape({fshape[N], fshape[H], fshape[W], fshape[C]}),
+                    mxnet::TShape({fshape[N], fshape[H], fshape[W], fshape[C]}),
                     dev_mask, DataType<SrcType>::kFlag, dev_id);
       temp_dptr += weight_size * sizeof(SrcType);
 
       // input:  [NCHW] => [NHWC](batch, in_height, in_width, in_channels)
       // filter: [NCHW] => [NHWC](out_channels, filter_height, filter_width, in_channels)
-      TransposeImpl<gpu>(ctx.run_ctx, data,   data_,   TShape({N, H, W, C}));
-      TransposeImpl<gpu>(ctx.run_ctx, filter, filter_, TShape({N, H, W, C}));
+      TransposeImpl<gpu>(ctx.run_ctx, data,   data_,   mxnet::TShape({N, H, W, C}));
+      TransposeImpl<gpu>(ctx.run_ctx, filter, filter_, mxnet::TShape({N, H, W, C}));
       TBlob out_(reinterpret_cast<DstType*>(temp_dptr),
-                 TShape({oshape[N], oshape[H], oshape[W], oshape[C]}),
+                 mxnet::TShape({oshape[N], oshape[H], oshape[W], oshape[C]}),
                  dev_mask, DataType<DstType>::kFlag, dev_id);
       temp_dptr += output_size * sizeof(DstType);
       TBlob out_tcast(reinterpret_cast<int32_t*>(temp_dptr),
-                      TShape({oshape[N], oshape[H], oshape[W], oshape[C]}),
+                      mxnet::TShape({oshape[N], oshape[H], oshape[W], oshape[C]}),
                       dev_mask, DataType<int32_t>::kFlag, dev_id);
       temp_dptr += output_size * sizeof(int32_t);
       // input:  [NHWC](batch, in_height, in_width, in_channels)
@@ -165,7 +165,7 @@ class QuantizedCuDNNConvOp {
       Tensor<gpu, 1, int32_t> out_tcast_tensor = out_tcast.FlatTo1D<gpu, int32_t>(s);
       Assign(out_tcast_tensor, kWriteTo, mshadow::expr::tcast<int32_t>(out_tensor));
       // output: [NHWC](batch, out_height, out_width, out_channels) => [NCHW]
-      TransposeImpl<gpu>(ctx.run_ctx, out_tcast, out, TShape({0, 3, 1, 2}));
+      TransposeImpl<gpu>(ctx.run_ctx, out_tcast, out, mxnet::TShape({0, 3, 1, 2}));
     } else {
       LOG(FATAL) << "quantized_conv only supports NCHW for now";
     }
@@ -193,11 +193,11 @@ class QuantizedCuDNNConvOp {
     }
   }
 
-  void InitDescriptors(const std::vector<TShape>& in_shape,
-                       const std::vector<TShape>& out_shape) {
-    const TShape& dshape =  in_shape[0];
-    const TShape& kshape =  in_shape[1];
-    const TShape& oshape = out_shape[0];
+  void InitDescriptors(const mxnet::ShapeVector& in_shape,
+                       const mxnet::ShapeVector& out_shape) {
+    const mxnet::TShape& dshape =  in_shape[0];
+    const mxnet::TShape& kshape =  in_shape[1];
+    const mxnet::TShape& oshape = out_shape[0];
     CUDNN_CALL(cudnnSetConvolution2dDescriptor(conv_desc_,
                                                param_.pad[0],
                                                param_.pad[1],
diff --git a/src/operator/quantization/quantized_flatten-inl.h b/src/operator/quantization/quantized_flatten-inl.h
index b7209fd28f5a..99a262de19ca 100644
--- a/src/operator/quantization/quantized_flatten-inl.h
+++ b/src/operator/quantization/quantized_flatten-inl.h
@@ -80,12 +80,12 @@ void QuantizedFlattenCompute(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool QuantizedFlattenShape(const nnvm::NodeAttrs& attrs,
-                                  std::vector<TShape> *in_attrs,
-                                  std::vector<TShape> *out_attrs) {
+                                  mxnet::ShapeVector *in_attrs,
+                                  mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 3U);
   CHECK_EQ(out_attrs->size(), 3U);
 
-  const TShape &dshape = (*in_attrs)[0];
+  const mxnet::TShape &dshape = (*in_attrs)[0];
   if (shape_is_none(dshape)) return false;
 
   uint32_t target_dim = 1;
@@ -93,11 +93,11 @@ inline bool QuantizedFlattenShape(const nnvm::NodeAttrs& attrs,
     target_dim *= dshape[i];
   }
 
-  SHAPE_ASSIGN_CHECK(*in_attrs, 1, TShape{1});
-  SHAPE_ASSIGN_CHECK(*in_attrs, 2, TShape{1});
+  SHAPE_ASSIGN_CHECK(*in_attrs, 1, mxnet::TShape{1});
+  SHAPE_ASSIGN_CHECK(*in_attrs, 2, mxnet::TShape{1});
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::Shape2(dshape[0], target_dim));
-  SHAPE_ASSIGN_CHECK(*out_attrs, 1, TShape{1});
-  SHAPE_ASSIGN_CHECK(*out_attrs, 2, TShape{1});
+  SHAPE_ASSIGN_CHECK(*out_attrs, 1, mxnet::TShape{1});
+  SHAPE_ASSIGN_CHECK(*out_attrs, 2, mxnet::TShape{1});
   return true;
 }
 
diff --git a/src/operator/quantization/quantized_flatten.cc b/src/operator/quantization/quantized_flatten.cc
index 3f426a59bdd2..f283d98cf10b 100644
--- a/src/operator/quantization/quantized_flatten.cc
+++ b/src/operator/quantization/quantized_flatten.cc
@@ -31,7 +31,7 @@ namespace op {
 NNVM_REGISTER_OP(_contrib_quantized_flatten)
 .set_num_inputs(3)
 .set_num_outputs(3)
-.set_attr<nnvm::FInferShape>("FInferShape", QuantizedFlattenShape)
+.set_attr<mxnet::FInferShape>("FInferShape", QuantizedFlattenShape)
 .set_attr<nnvm::FInferType>("FInferType", QuantizedFlattenType)
 .set_attr<FCompute>("FCompute<cpu>", QuantizedFlattenCompute<cpu>)
 .set_attr<nnvm::FListInputNames>("FListInputNames",
diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc
index 64ce73ba1cf7..f51b6fdd1798 100644
--- a/src/operator/quantization/quantized_fully_connected.cc
+++ b/src/operator/quantization/quantized_fully_connected.cc
@@ -35,8 +35,8 @@ enum QuantizedfcOpResource {kTempSpace};
 }
 
 bool QuantizedFullyConnectedShape(const nnvm::NodeAttrs& attrs,
-                                  std::vector<TShape> *in_shape,
-                                  std::vector<TShape> *out_shape) {
+                                  mxnet::ShapeVector *in_shape,
+                                  mxnet::ShapeVector *out_shape) {
   const FullyConnectedParam& param = nnvm::get<FullyConnectedParam>(attrs.parsed);
   CHECK(param.flatten) << "QuantizedFullyConnectedOp only supports flatten=true for now";
   using namespace mshadow;
@@ -46,21 +46,21 @@ bool QuantizedFullyConnectedShape(const nnvm::NodeAttrs& attrs,
 
   CHECK(!shape_is_none(in_shape->at(0)))
     << "QuantizedFullyConnectedOp input data shape must be given";
-  const TShape& dshape = in_shape->at(0);
-  TShape wshape = Shape2(param.num_hidden, dshape.ProdShape(1, dshape.ndim()));
+  const mxnet::TShape& dshape = in_shape->at(0);
+  mxnet::TShape wshape = Shape2(param.num_hidden, dshape.ProdShape(1, dshape.ndim()));
   SHAPE_ASSIGN_CHECK(*in_shape, 1, wshape);
   if (!param.no_bias) {
-    TShape bshape = Shape1(param.num_hidden);
+    mxnet::TShape bshape = Shape1(param.num_hidden);
     SHAPE_ASSIGN_CHECK(*in_shape, 2, bshape);
   }
 
   for (size_t i = num_inputs; i < 3 * num_inputs; ++i) {
-    SHAPE_ASSIGN_CHECK(*in_shape, i, TShape{1});
+    SHAPE_ASSIGN_CHECK(*in_shape, i, mxnet::TShape{1});
   }
 
-  SHAPE_ASSIGN_CHECK(*out_shape, 0, TShape({dshape[0], wshape[0]}));
-  SHAPE_ASSIGN_CHECK(*out_shape, 1, TShape({1}));
-  SHAPE_ASSIGN_CHECK(*out_shape, 2, TShape({1}));
+  SHAPE_ASSIGN_CHECK(*out_shape, 0, mxnet::TShape({dshape[0], wshape[0]}));
+  SHAPE_ASSIGN_CHECK(*out_shape, 1, mxnet::TShape({1}));
+  SHAPE_ASSIGN_CHECK(*out_shape, 2, mxnet::TShape({1}));
   return true;
 }
 
@@ -153,9 +153,9 @@ void QuantizedFullyConnectedForward(const nnvm::NodeAttrs& attrs,
   const NDArray& data = in_data[0];
   const NDArray& weight = in_data[1];
   const NDArray& out = out_data[0];
-  TShape dshape = data.shape();
-  TShape wshape = weight.shape();
-  TShape oshape = out.shape();
+  mxnet::TShape dshape = data.shape();
+  mxnet::TShape wshape = weight.shape();
+  mxnet::TShape oshape = out.shape();
   auto output_temp = out.data().dptr<int32_t>();
   auto weight_temp = weight.data().dptr<SrcType>();
   auto data_temp = data.data().dptr<SrcType>();
@@ -261,7 +261,7 @@ and max thresholds representing the threholds for quantizing the float32 output
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"output", "min_output", "max_output"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", QuantizedFullyConnectedShape)
+.set_attr<mxnet::FInferShape>("FInferShape", QuantizedFullyConnectedShape)
 .set_attr<nnvm::FInferType>("FInferType", QuantizedFullyConnectedType)
 .set_attr<FInferStorageType>("FInferStorageType", QuantizedFullyConnectedStorageType)
 .set_attr<FNeedRequantize>("FNeedRequantize", [](const NodeAttrs& attrs) { return true; })
diff --git a/src/operator/quantization/quantized_fully_connected.cu b/src/operator/quantization/quantized_fully_connected.cu
index beecc7598642..e8580e2e2c9d 100644
--- a/src/operator/quantization/quantized_fully_connected.cu
+++ b/src/operator/quantization/quantized_fully_connected.cu
@@ -70,9 +70,9 @@ void QuantizedFullyConnectedForwardGPU(const nnvm::NodeAttrs& attrs,
   const TBlob& data   =  inputs[0];
   const TBlob& weight =  inputs[1];
   const TBlob& out    = outputs[0];
-  TShape dshape = data.shape_;
-  TShape wshape = weight.shape_;
-  TShape oshape = out.shape_;
+  mxnet::TShape dshape = data.shape_;
+  mxnet::TShape wshape = weight.shape_;
+  mxnet::TShape oshape = out.shape_;
   // (m, n) * (k, n).T = (m, k)
   // A * B.T = C
 
diff --git a/src/operator/quantization/quantized_pooling.cc b/src/operator/quantization/quantized_pooling.cc
index b9daf2592b7d..cdc98eeac6f6 100644
--- a/src/operator/quantization/quantized_pooling.cc
+++ b/src/operator/quantization/quantized_pooling.cc
@@ -31,12 +31,12 @@ namespace mxnet {
 namespace op {
 
 bool QuantizedPoolingShape(const nnvm::NodeAttrs& attrs,
-                           std::vector<TShape> *in_shape,
-                           std::vector<TShape> *out_shape) {
+                           mxnet::ShapeVector *in_shape,
+                           mxnet::ShapeVector *out_shape) {
   const PoolingParam& param = nnvm::get<PoolingParam>(attrs.parsed);
   CHECK_EQ(in_shape->size(), 3U);
   if (shape_is_none(in_shape->at(0))) return false;
-  const TShape &dshape = (*in_shape)[0];
+  const mxnet::TShape &dshape = (*in_shape)[0];
   CHECK_EQ(dshape.ndim(), 4U)
       << "quantized_pooling: Input data should be 4D in "
       << "(batch, channel, y, x)";
@@ -45,7 +45,7 @@ bool QuantizedPoolingShape(const nnvm::NodeAttrs& attrs,
       << "QuantizedPoolingOp only supports NCHW layout for now, saw " << layout;
   // NCHW layout
   const int N = 0, H = 2, W = 3, C = 1;
-  TShape oshape(4);
+  mxnet::TShape oshape(4);
   CHECK_EQ(param.kernel.ndim(), 2) << "QuantizedPoolingOp only supports 2D pooling for now";
   CHECK(param.kernel[0] <= dshape[H] + 2 * param.pad[0])
       << "kernel size (" << param.kernel[0]
@@ -81,13 +81,13 @@ bool QuantizedPoolingShape(const nnvm::NodeAttrs& attrs,
     }
   }
 
-  SHAPE_ASSIGN_CHECK(*in_shape, 1, TShape{1});
-  SHAPE_ASSIGN_CHECK(*in_shape, 2, TShape{1});
+  SHAPE_ASSIGN_CHECK(*in_shape, 1, mxnet::TShape{1});
+  SHAPE_ASSIGN_CHECK(*in_shape, 2, mxnet::TShape{1});
 
   out_shape->clear();
   out_shape->push_back(oshape);
-  out_shape->push_back(TShape{1});
-  out_shape->push_back(TShape{1});
+  out_shape->push_back(mxnet::TShape{1});
+  out_shape->push_back(mxnet::TShape{1});
   return true;
 }
 
@@ -154,7 +154,7 @@ the float32 data into int8.
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"output", "min_output", "max_output"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", QuantizedPoolingShape)
+.set_attr<mxnet::FInferShape>("FInferShape", QuantizedPoolingShape)
 .set_attr<nnvm::FInferType>("FInferType", QuantizedPoolingType)
 .set_attr<FInferStorageType>("FInferStorageType", QuantizedPoolingStorageType)
 .set_attr<FNeedRequantize>("FNeedRequantize",
diff --git a/src/operator/quantization/quantized_pooling.cu b/src/operator/quantization/quantized_pooling.cu
index 2bbac5fc1512..a8fba87090ab 100644
--- a/src/operator/quantization/quantized_pooling.cu
+++ b/src/operator/quantization/quantized_pooling.cu
@@ -39,7 +39,7 @@ class QuantizedCuDNNPoolingOp {
     CUDNN_CALL(cudnnCreateTensorDescriptor(&out_desc_));
   }
 
-  void Init(const PoolingParam& param, const TShape& dshape, const TShape& oshape) {
+  void Init(const PoolingParam& param, const mxnet::TShape& dshape, const mxnet::TShape& oshape) {
     const int N = 0, H = 2, W = 3, C = 1;
     const cudnnDataType_t dtype = mshadow::DataType<DType>::kCudnnFlag;
     CHECK(param.kernel.ndim() == 2) << "Only support 2D pooling";
diff --git a/src/operator/quantization/requantize-inl.h b/src/operator/quantization/requantize-inl.h
index 148453e63257..21d58d4607eb 100644
--- a/src/operator/quantization/requantize-inl.h
+++ b/src/operator/quantization/requantize-inl.h
@@ -107,11 +107,11 @@ void RequantizeForward(const nnvm::NodeAttrs& attrs,
         inputs[0].dptr<SrcDType>(), inputs[1].dptr<float>(), inputs[2].dptr<float>(),
         MaxAbs(param.min_calib_range.value(), param.max_calib_range.value()));
   } else {  // model is not calibrated
-    TShape src_shape, dst_shape;
+    mxnet::TShape src_shape, dst_shape;
     const size_t actual_float_size = sizeof(float);
     const size_t actual_quantized_size = sizeof(SrcDType);
     const size_t temp_reduce_size = ConfigReduce<xpu, SrcDType>(
-        s, inputs[0].shape_, TShape({1}), &src_shape, &dst_shape);
+        s, inputs[0].shape_, mxnet::TShape({1}), &src_shape, &dst_shape);
     Tensor<xpu, 1, char> temp_space =
       ctx.requested[0].get_space_typed<xpu, 1, char>(
           Shape1(2*actual_float_size+2*actual_quantized_size+temp_reduce_size), s);
diff --git a/src/operator/quantization/requantize.cc b/src/operator/quantization/requantize.cc
index 68b1b65e4e7b..edfb58e5cbd5 100644
--- a/src/operator/quantization/requantize.cc
+++ b/src/operator/quantization/requantize.cc
@@ -61,7 +61,7 @@ inference accuracy.
 .set_attr_parser(ParamParser<RequantizeParam>)
 .set_num_inputs(3)
 .set_num_outputs(3)
-.set_attr<nnvm::FInferShape>("FInferShape", QuantizeShape)
+.set_attr<mxnet::FInferShape>("FInferShape", QuantizeShape)
 .set_attr<nnvm::FInferType>("FInferType", RequantizeType)
 .set_attr<FInferStorageType>("FInferStorageType", RequantizeStorageType)
 #if MXNET_USE_MKLDNN == 1
diff --git a/src/operator/random/multisample_op.cc b/src/operator/random/multisample_op.cc
index a88db09442e8..240126b17b79 100644
--- a/src/operator/random/multisample_op.cc
+++ b/src/operator/random/multisample_op.cc
@@ -44,7 +44,7 @@ DMLC_REGISTER_PARAMETER(MultiSampleParam);
     [](const NodeAttrs& attrs) { \
       std::vector<std::string> v = {input_name_1, input_name_2}; v.resize(num_inputs); return v; \
     }) \
-  .set_attr<nnvm::FInferShape>("FInferShape", MultiSampleOpShape) \
+  .set_attr<mxnet::FInferShape>("FInferShape", MultiSampleOpShape) \
   .set_attr<nnvm::FInferType>("FInferType", MultiSampleOpType) \
   .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) { \
       return std::vector<ResourceRequest>{ResourceRequest::kParallelRandom, \
diff --git a/src/operator/random/multisample_op.h b/src/operator/random/multisample_op.h
index abd4a2c6c6d9..e9f266932e13 100644
--- a/src/operator/random/multisample_op.h
+++ b/src/operator/random/multisample_op.h
@@ -38,11 +38,11 @@ namespace mxnet {
 namespace op {
 
 struct MultiSampleParam : public dmlc::Parameter<MultiSampleParam> {
-  TShape shape;
+  mxnet::TShape shape;
   int dtype;
   DMLC_DECLARE_PARAMETER(MultiSampleParam) {
     DMLC_DECLARE_FIELD(shape)
-      .set_default(TShape())
+      .set_default(mxnet::TShape())
       .describe("Shape to be sampled from each random distribution.");
     DMLC_DECLARE_FIELD(dtype)
     .add_enum("None", -1)
@@ -56,8 +56,8 @@ struct MultiSampleParam : public dmlc::Parameter<MultiSampleParam> {
 };
 
 inline bool MultiSampleOpShape(const nnvm::NodeAttrs& attrs,
-                               std::vector<TShape>* in_attrs,
-                               std::vector<TShape>* out_attrs) {
+                               mxnet::ShapeVector* in_attrs,
+                               mxnet::ShapeVector* out_attrs) {
   CHECK_GT(in_attrs->size(), 0)
     << "sampling operator takes 1 or 2 arguments (" << in_attrs->size() << " given)";
   CHECK_LT(in_attrs->size(), 3)
@@ -65,17 +65,17 @@ inline bool MultiSampleOpShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(out_attrs->size(), 1);
   // Get shape to be sampled for each parameter set.
   const MultiSampleParam& param = nnvm::get<MultiSampleParam>(attrs.parsed);
-  TShape sshape = param.shape;
+  mxnet::TShape sshape = param.shape;
   for (size_t i = 0; i < sshape.ndim(); ++i) {
     CHECK_GT(sshape[i], 0) << "shape parameter must be non-zero within each dimension";
   }
   // Examine output shape whether it is already defined.
-  TShape tshape((*out_attrs)[0]);
+  mxnet::TShape tshape((*out_attrs)[0]);
   // The illegal case of tshape.ndim() <= sshape.ndim() will
   // automatically crash when we back-propagate from inputs to outputs.
   if (tshape.ndim() > sshape.ndim()) {
     // Promote down by removing last dimensions which represent the samples.
-    tshape = TShape(tshape.begin(), tshape.begin()+(tshape.ndim()-sshape.ndim()));
+    tshape = mxnet::TShape(tshape.begin(), tshape.begin()+(tshape.ndim()-sshape.ndim()));
   }
   // Shape assignemnt/checking for inputs.
   for (const auto& in_attr : *in_attrs) {
@@ -88,7 +88,7 @@ inline bool MultiSampleOpShape(const nnvm::NodeAttrs& attrs,
     // Shape assignment/check for propagation from inputs to output.
     std::vector<int> cshape(tshape.begin(), tshape.end());
     cshape.insert(cshape.end(), sshape.begin(), sshape.end());
-    TShape oshape(cshape.begin(), cshape.end());
+    mxnet::TShape oshape(cshape.begin(), cshape.end());
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape);
   }
   return true;
diff --git a/src/operator/random/sample_multinomial_op.cc b/src/operator/random/sample_multinomial_op.cc
index 1bacb023588f..7858b03ea87f 100644
--- a/src/operator/random/sample_multinomial_op.cc
+++ b/src/operator/random/sample_multinomial_op.cc
@@ -67,7 +67,7 @@ Examples::
     return param.get_prob ? 2U : 1U;
   })
 .set_attr_parser(ParamParser<SampleMultinomialParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", SampleMultinomialOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", SampleMultinomialOpShape)
 .set_attr<nnvm::FInferType>("FInferType", SampleMultinomialOpType)
 .set_attr<FResourceRequest>("FResourceRequest",
   [](const nnvm::NodeAttrs& attrs) {
diff --git a/src/operator/random/sample_multinomial_op.h b/src/operator/random/sample_multinomial_op.h
index e0f0d685c8ce..e76cd646b850 100644
--- a/src/operator/random/sample_multinomial_op.h
+++ b/src/operator/random/sample_multinomial_op.h
@@ -36,12 +36,12 @@ namespace mxnet {
 namespace op {
 
 struct SampleMultinomialParam : public dmlc::Parameter<SampleMultinomialParam> {
-  TShape shape;
+  mxnet::TShape shape;
   bool get_prob;
   int dtype;
   DMLC_DECLARE_PARAMETER(SampleMultinomialParam) {
     DMLC_DECLARE_FIELD(shape)
-      .set_default(TShape())
+      .set_default(mxnet::TShape())
       .describe("Shape to be sampled from each random distribution.");
     DMLC_DECLARE_FIELD(get_prob)
     .set_default(false)
@@ -61,13 +61,13 @@ struct SampleMultinomialParam : public dmlc::Parameter<SampleMultinomialParam> {
 
 
 inline bool SampleMultinomialOpShape(const nnvm::NodeAttrs& attrs,
-                                     std::vector<TShape>* in_attrs,
-                                     std::vector<TShape>* out_attrs) {
+                                     mxnet::ShapeVector* in_attrs,
+                                     mxnet::ShapeVector* out_attrs) {
   const SampleMultinomialParam& param = nnvm::get<SampleMultinomialParam>(attrs.parsed);
 
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), param.get_prob ? 2U : 1U);
-  const TShape& ishape = (*in_attrs)[0];
+  const mxnet::TShape& ishape = (*in_attrs)[0];
   if (!ishape.ndim()) return false;
 
   MSHADOW_TYPE_SWITCH(param.dtype, DType, {
@@ -80,13 +80,13 @@ inline bool SampleMultinomialOpShape(const nnvm::NodeAttrs& attrs,
       SHAPE_ASSIGN_CHECK(*out_attrs, 0, param.shape);
       if (param.get_prob) SHAPE_ASSIGN_CHECK(*out_attrs, 1, param.shape);
     } else {
-      SHAPE_ASSIGN_CHECK(*out_attrs, 0, TShape(1));
-      if (param.get_prob) SHAPE_ASSIGN_CHECK(*out_attrs, 1, TShape(1));
+      SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1));
+      if (param.get_prob) SHAPE_ASSIGN_CHECK(*out_attrs, 1, mxnet::TShape(1));
     }
     return true;
   }
 
-  TShape oshape(ishape.ndim() - 1 + param.shape.ndim());
+  mxnet::TShape oshape(ishape.ndim() - 1 + param.shape.ndim());
   for (size_t i = 0; i < ishape.ndim() - 1; ++i) {
     oshape[i] = ishape[i];
   }
diff --git a/src/operator/random/sample_op.cc b/src/operator/random/sample_op.cc
index b065615e1fb1..56a162be5da4 100644
--- a/src/operator/random/sample_op.cc
+++ b/src/operator/random/sample_op.cc
@@ -51,7 +51,7 @@ DMLC_REGISTER_PARAMETER(SampleGenNegBinomialLikeParam);
   .set_num_inputs(0)                                                                         \
   .set_num_outputs(1)                                                                        \
   .set_attr_parser(ParamParser<ParamType>)                                                   \
-  .set_attr<nnvm::FInferShape>("FInferShape", InitShape<ParamType>)                          \
+  .set_attr<mxnet::FInferShape>("FInferShape", InitShape<ParamType>)                          \
   .set_attr<nnvm::FInferType>("FInferType", SampleOpType<ParamType>)                \
   .set_attr<FResourceRequest>("FResourceRequest", SampleResource)                            \
   .add_arguments(ParamType::__FIELDS__())                                                    \
@@ -64,7 +64,7 @@ DMLC_REGISTER_PARAMETER(SampleGenNegBinomialLikeParam);
   .set_num_inputs(1)                                                                      \
   .set_num_outputs(1)                                                                     \
   .set_attr_parser(ParamParser<ParamType>)                                                \
-  .set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)                        \
+  .set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)                        \
   .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)                           \
   .set_attr<FResourceRequest>("FResourceRequest", SampleResource)                         \
   .set_attr<nnvm::FIgnoreInputs>("FIgnoreInputs",                                         \
diff --git a/src/operator/random/sample_op.h b/src/operator/random/sample_op.h
index b12dfafbcfc8..b327ee266603 100644
--- a/src/operator/random/sample_op.h
+++ b/src/operator/random/sample_op.h
@@ -40,7 +40,7 @@ namespace op {
 
 
 struct SampleOpParam {
-  TShape shape;
+  mxnet::TShape shape;
   std::string ctx;
   int dtype;
 };
@@ -91,7 +91,7 @@ struct SampleUniformParam : public dmlc::Parameter<SampleUniformParam>,
     DMLC_DECLARE_FIELD(high).set_default(1.0f)
     .describe("Upper bound of the distribution.");
     DMLC_DECLARE_FIELD(shape)
-    .set_default(TShape())
+    .set_default(mxnet::TShape())
     .describe("Shape of the output.");
     DMLC_DECLARE_FIELD(ctx)
     .set_default("")
@@ -116,7 +116,7 @@ struct SampleNormalParam : public dmlc::Parameter<SampleNormalParam>,
     DMLC_DECLARE_FIELD(scale).set_default(1.0f)
     .describe("Standard deviation of the distribution.");
     DMLC_DECLARE_FIELD(shape)
-    .set_default(TShape())
+    .set_default(mxnet::TShape())
     .describe("Shape of the output.");
     DMLC_DECLARE_FIELD(ctx)
     .set_default("")
@@ -141,7 +141,7 @@ struct SampleGammaParam : public dmlc::Parameter<SampleGammaParam>,
     DMLC_DECLARE_FIELD(beta).set_default(1.0f)
     .describe("Beta parameter (scale) of the gamma distribution.");
     DMLC_DECLARE_FIELD(shape)
-    .set_default(TShape())
+    .set_default(mxnet::TShape())
     .describe("Shape of the output.");
     DMLC_DECLARE_FIELD(ctx)
     .set_default("")
@@ -164,7 +164,7 @@ struct SampleExponentialParam : public dmlc::Parameter<SampleExponentialParam>,
     DMLC_DECLARE_FIELD(lam).set_default(1.0f)
     .describe("Lambda parameter (rate) of the exponential distribution.");
     DMLC_DECLARE_FIELD(shape)
-    .set_default(TShape())
+    .set_default(mxnet::TShape())
     .describe("Shape of the output.");
     DMLC_DECLARE_FIELD(ctx)
     .set_default("")
@@ -187,7 +187,7 @@ struct SamplePoissonParam : public dmlc::Parameter<SamplePoissonParam>,
     DMLC_DECLARE_FIELD(lam).set_default(1.0f)
     .describe("Lambda parameter (rate) of the Poisson distribution.");
     DMLC_DECLARE_FIELD(shape)
-    .set_default(TShape())
+    .set_default(mxnet::TShape())
     .describe("Shape of the output.");
     DMLC_DECLARE_FIELD(ctx)
     .set_default("")
@@ -212,7 +212,7 @@ struct SampleNegBinomialParam : public dmlc::Parameter<SampleNegBinomialParam>,
     DMLC_DECLARE_FIELD(p).set_default(1.0f)
     .describe("Failure probability in each experiment.");
     DMLC_DECLARE_FIELD(shape)
-    .set_default(TShape())
+    .set_default(mxnet::TShape())
     .describe("Shape of the output.");
     DMLC_DECLARE_FIELD(ctx)
     .set_default("")
@@ -237,7 +237,7 @@ struct SampleGenNegBinomialParam : public dmlc::Parameter<SampleGenNegBinomialPa
     DMLC_DECLARE_FIELD(alpha).set_default(1.0f)
     .describe("Alpha (dispersion) parameter of the negative binomial distribution.");
     DMLC_DECLARE_FIELD(shape)
-    .set_default(TShape())
+    .set_default(mxnet::TShape())
     .describe("Shape of the output.");
     DMLC_DECLARE_FIELD(ctx)
     .set_default("")
@@ -262,7 +262,7 @@ struct SampleRandIntParam : public dmlc::Parameter<SampleRandIntParam>,
     DMLC_DECLARE_FIELD(high)
     .describe("Upper bound of the distribution.");
     DMLC_DECLARE_FIELD(shape)
-    .set_default(TShape())
+    .set_default(mxnet::TShape())
     .describe("Shape of the output.");
     DMLC_DECLARE_FIELD(ctx)
     .set_default("")
diff --git a/src/operator/random/shuffle_op.cc b/src/operator/random/shuffle_op.cc
index 83c9034e364e..1bd70b1f323a 100644
--- a/src/operator/random/shuffle_op.cc
+++ b/src/operator/random/shuffle_op.cc
@@ -91,7 +91,7 @@ void ShuffleForwardCPU(const nnvm::NodeAttrs& attrs,
     return;
   }
   CHECK_NE(req[0], kAddTo) << "Shuffle does not support AddTo";
-  const TShape& input_shape = inputs[0].shape_;
+  const mxnet::TShape& input_shape = inputs[0].shape_;
   const index_t size = inputs[0].Size();
   const index_t first_axis_len = input_shape[0];
   Stream<cpu> *s = ctx.get_stream<cpu>();
@@ -125,7 +125,7 @@ but the order of the elements in each row does not change.
 )code")
 .set_num_inputs(1)
 .set_num_outputs(1)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FResourceRequest>("FResourceRequest",
   [](const nnvm::NodeAttrs& attrs) {
diff --git a/src/operator/random/shuffle_op.cu b/src/operator/random/shuffle_op.cu
index 51588494a63c..91b378deace0 100644
--- a/src/operator/random/shuffle_op.cu
+++ b/src/operator/random/shuffle_op.cu
@@ -54,7 +54,7 @@ void ShuffleForwardGPU(const nnvm::NodeAttrs& attrs,
     return;
   }
   CHECK_NE(req[0], kAddTo) << "Shuffle does not support AddTo";
-  const TShape& input_shape = inputs[0].shape_;
+  const mxnet::TShape& input_shape = inputs[0].shape_;
   const index_t size = inputs[0].Size();
   const index_t first_axis_len = input_shape[0];
   const index_t stride = size / first_axis_len;
diff --git a/src/operator/random/unique_sample_op.cc b/src/operator/random/unique_sample_op.cc
index 49366697ed6e..b6664240d802 100644
--- a/src/operator/random/unique_sample_op.cc
+++ b/src/operator/random/unique_sample_op.cc
@@ -64,7 +64,7 @@ Example::
    trials[0] = 16435
 
 )code" ADD_FILELINE)
-.set_attr<nnvm::FInferShape>("FInferShape", SampleUniqueShape<SampleUniqueZifpianParam>)
+.set_attr<mxnet::FInferShape>("FInferShape", SampleUniqueShape<SampleUniqueZifpianParam>)
 .set_attr<nnvm::FInferType>("FInferType", SampleUniqueType<SampleUniqueZifpianParam>)
 .set_attr<FCompute>("FCompute<cpu>", SampleUniqueZifpian);
 
diff --git a/src/operator/random/unique_sample_op.h b/src/operator/random/unique_sample_op.h
index 2e93b501f1b4..87998c8f46b1 100644
--- a/src/operator/random/unique_sample_op.h
+++ b/src/operator/random/unique_sample_op.h
@@ -41,12 +41,12 @@ namespace op {
 
 struct SampleUniqueZifpianParam : public dmlc::Parameter<SampleUniqueZifpianParam> {
   int range_max;
-  TShape shape;
+  mxnet::TShape shape;
   DMLC_DECLARE_PARAMETER(SampleUniqueZifpianParam) {
     DMLC_DECLARE_FIELD(range_max)
     .describe("The number of possible classes.");
     DMLC_DECLARE_FIELD(shape)
-    .set_default(TShape())
+    .set_default(mxnet::TShape())
     .describe("2-D shape of the output, where shape[0] is the batch size, and shape[1] "
               "is the number of candidates to sample for each batch.");
   }
@@ -54,8 +54,8 @@ struct SampleUniqueZifpianParam : public dmlc::Parameter<SampleUniqueZifpianPara
 
 template<typename ParamType>
 inline bool SampleUniqueShape(const nnvm::NodeAttrs& attrs,
-                              std::vector<TShape> *in_attrs,
-                              std::vector<TShape> *out_attrs) {
+                              mxnet::ShapeVector *in_attrs,
+                              mxnet::ShapeVector *out_attrs) {
   const ParamType& param = nnvm::get<ParamType>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 0U);
   CHECK_EQ(out_attrs->size(), 2U);
diff --git a/src/operator/regression_output-inl.h b/src/operator/regression_output-inl.h
index 59cbde3de202..8b63a8a2cff6 100644
--- a/src/operator/regression_output-inl.h
+++ b/src/operator/regression_output-inl.h
@@ -52,11 +52,11 @@ struct RegressionOutputParam : public dmlc::Parameter<RegressionOutputParam> {
 };
 
 inline bool RegressionOpShape(const nnvm::NodeAttrs& attrs,
-                              std::vector<TShape> *in_attrs,
-                              std::vector<TShape> *out_attrs) {
+                              mxnet::ShapeVector *in_attrs,
+                              mxnet::ShapeVector *out_attrs) {
   using namespace mshadow;
   CHECK_EQ(in_attrs->size(), 2U) << "Input:[data, label]";
-  const TShape &dshape = in_attrs->at(0);
+  const mxnet::TShape &dshape = in_attrs->at(0);
   if (dshape.ndim() == 0) return false;
   auto &lshape = (*in_attrs)[1];
   if (lshape.ndim() == 0) {
@@ -219,7 +219,7 @@ inline void RegressionBackwardCSRImpl(mshadow::Stream<xpu> *s,
   using namespace mshadow;
   using namespace mxnet_op;
   using namespace csr;
-  const TShape dshape = data.shape();
+  const mxnet::TShape dshape = data.shape();
   const nnvm::dim_t num_rows = dshape[0];
   const nnvm::dim_t row_length = dshape[1];
   CHECK_EQ(label.aux_type(kIndPtr), label.aux_type(kIdx))
diff --git a/src/operator/regression_output.cc b/src/operator/regression_output.cc
index 5632baca0d4d..a337ec1ca1ad 100644
--- a/src/operator/regression_output.cc
+++ b/src/operator/regression_output.cc
@@ -35,7 +35,7 @@
     [](const NodeAttrs& attrs) {                                                       \
       return std::vector<std::string>{"data", "label"};                                \
     })                                                                                 \
-  .set_attr<nnvm::FInferShape>("FInferShape", RegressionOpShape)                       \
+  .set_attr<mxnet::FInferShape>("FInferShape", RegressionOpShape)                       \
   .set_attr<nnvm::FGradient>("FGradient", RegressionOpGrad{__bwdop$})                  \
   .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)                        \
   .set_attr<nnvm::FInplaceOption>("FInplaceOption",                                    \
diff --git a/src/operator/rnn-inl.h b/src/operator/rnn-inl.h
index 545e31bd8ff8..71ad331786ae 100644
--- a/src/operator/rnn-inl.h
+++ b/src/operator/rnn-inl.h
@@ -666,16 +666,16 @@ class RNNProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     if (param_.mode == rnn_enum::kLstm) {
       CHECK_EQ(in_shape->size(), 4U) << "Input:[data, parameters, state, cell_state]";
     } else {
       CHECK_EQ(in_shape->size(), 3U) << "Input:[data, parameters, state]";
     }
-    const TShape &dshape = (*in_shape)[rnn_enum::kData];
+    const mxnet::TShape &dshape = (*in_shape)[rnn_enum::kData];
     if (dshape.ndim() ==  0) return false;
     CHECK_EQ(dshape.ndim(), 3U) \
         << "Input data should be rank-3 tensor of dim [sequence length, batch size, input size]";
@@ -705,7 +705,7 @@ class RNNProp : public OperatorProperty {
 
     out_shape->clear();
     // output: [sequence len, batch, output size]
-    TShape oshape = dshape;
+    mxnet::TShape oshape = dshape;
     if (param_.projection_size.has_value()) {
       oshape[2] = numDirections * param_.projection_size.value();
     } else {
@@ -716,7 +716,7 @@ class RNNProp : public OperatorProperty {
       return true;
     } else {
       // outStateShape: [layer_num, batch, state size]
-      TShape outStateShape = dshape;
+      mxnet::TShape outStateShape = dshape;
       outStateShape[0] = total_layers;
       outStateShape[1] = batch_size;
       if (param_.projection_size.has_value()) {
@@ -727,7 +727,7 @@ class RNNProp : public OperatorProperty {
       out_shape->push_back(outStateShape);
       // Deal with lstm cell state
       if (param_.mode == rnn_enum::kLstm) {
-        TShape cellStateShape = dshape;
+        mxnet::TShape cellStateShape = dshape;
         cellStateShape[0] = total_layers;
         cellStateShape[1] = batch_size;
         cellStateShape[2] = param_.state_size;
@@ -796,12 +796,12 @@ class RNNProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> ForwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
   std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
@@ -810,7 +810,7 @@ class RNNProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/rnn.cc b/src/operator/rnn.cc
index 82b03c0fafcb..621b9eb110e7 100644
--- a/src/operator/rnn.cc
+++ b/src/operator/rnn.cc
@@ -37,7 +37,7 @@ Operator *CreateOp<cpu>(RNNParam param, int dtype) {
 }
 
 Operator *RNNProp::CreateOperatorEx(Context ctx,
-                                  std::vector<TShape> *in_shape,
+                                  mxnet::ShapeVector *in_shape,
                                   std::vector<int> *in_type) const {
   DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
 }
diff --git a/src/operator/roi_pooling-inl.h b/src/operator/roi_pooling-inl.h
index 2f83a8ff3295..ce0efe9b07c9 100644
--- a/src/operator/roi_pooling-inl.h
+++ b/src/operator/roi_pooling-inl.h
@@ -48,7 +48,7 @@ enum ROIPoolingOpOutputs {kOut, kMaxIdx};
 }  // roipool
 
 struct ROIPoolingParam : public dmlc::Parameter<ROIPoolingParam> {
-  TShape pooled_size;
+  mxnet::TShape pooled_size;
   float spatial_scale;
   DMLC_DECLARE_PARAMETER(ROIPoolingParam) {
     DMLC_DECLARE_FIELD(pooled_size)
@@ -167,18 +167,18 @@ class ROIPoolingProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 2U) << "Input:[data, rois]";
 
     // data: [batch_size, c, h, w]
-    TShape dshape = in_shape->at(roipool::kData);
+    mxnet::TShape dshape = in_shape->at(roipool::kData);
     CHECK_EQ(dshape.ndim(), 4U) << "data should be a 4D tensor";
 
     // bbox: [num_rois, 5]
-    TShape bshape = in_shape->at(roipool::kBox);
+    mxnet::TShape bshape = in_shape->at(roipool::kBox);
     CHECK_EQ(bshape.ndim(), 2U) << "bbox should be a 2D tensor of shape [batch, 5]";
     CHECK_EQ(bshape[1], 5U) << "bbox should be a 2D tensor of shape [batch, 5]";
 
@@ -229,7 +229,7 @@ class ROIPoolingProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/roi_pooling.cc b/src/operator/roi_pooling.cc
index 7f15dcb406dc..8862d0db1401 100644
--- a/src/operator/roi_pooling.cc
+++ b/src/operator/roi_pooling.cc
@@ -241,7 +241,7 @@ Operator *CreateOp<cpu>(ROIPoolingParam param, int dtype) {
   return op;
 }
 
-Operator *ROIPoolingProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator *ROIPoolingProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                            std::vector<int> *in_type) const {
   DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0));
 }
diff --git a/src/operator/sequence_last-inl.h b/src/operator/sequence_last-inl.h
index 61506c2af3de..b4db80bdd721 100644
--- a/src/operator/sequence_last-inl.h
+++ b/src/operator/sequence_last-inl.h
@@ -246,15 +246,15 @@ class SequenceLastProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape, std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape, mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), param_.use_sequence_length ? 2U : 1U)
         << "Input:[data, sequence_length]";
     CHECK((param_.axis == 0) || (param_.axis == 1))
         << "Current implementation expects axis to be 0 or 1.";
 
-    const TShape &dshape = (*in_shape)[seq_last::kData];
+    const mxnet::TShape &dshape = (*in_shape)[seq_last::kData];
     CHECK_GT(dshape.ndim(), 1U)
         << "The data array must be of rank 2 or greater.";
     // seq length vector is same as batch size
@@ -263,11 +263,11 @@ class SequenceLastProp : public OperatorProperty {
       SHAPE_ASSIGN_CHECK(*in_shape, seq_last::kSequenceLength, Shape1(sbatch));
 
     // calculate output size
-    TShape shape_o(dshape.ndim() - 1);
+    mxnet::TShape shape_o(dshape.ndim() - 1);
     shape_o[0] = sbatch;
     for (index_t i = 1; i < shape_o.ndim(); ++i) shape_o[i] = dshape[i + 1];
 
-    const TShape &oshape = shape_o;
+    const mxnet::TShape &oshape = shape_o;
     out_shape->clear();
     out_shape->push_back(oshape);
     return true;
@@ -297,12 +297,12 @@ class SequenceLastProp : public OperatorProperty {
   std::string TypeString() const override { return "SequenceLast"; }
 
   std::vector<ResourceRequest> ForwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
   std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
@@ -320,7 +320,7 @@ class SequenceLastProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator *CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator *CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/sequence_last.cc b/src/operator/sequence_last.cc
index f2388a8efbf3..44869c518504 100644
--- a/src/operator/sequence_last.cc
+++ b/src/operator/sequence_last.cc
@@ -40,7 +40,7 @@ Operator *CreateOp<cpu>(SequenceLastParam param, int dtype, int itype) {
 
 // DO_BIND_DISPATCH comes from operator_common.h
 Operator *SequenceLastProp::CreateOperatorEx(Context ctx,
-                                             std::vector<TShape> *in_shape,
+                                             mxnet::ShapeVector *in_shape,
                                              std::vector<int> *in_type) const {
   if (in_type->size() >= 2 && (*in_type)[1] != -1) {
     DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0], (*in_type)[1]);
diff --git a/src/operator/sequence_mask-inl.h b/src/operator/sequence_mask-inl.h
index c2584abd4178..372cf57e03dc 100644
--- a/src/operator/sequence_mask-inl.h
+++ b/src/operator/sequence_mask-inl.h
@@ -239,13 +239,13 @@ class SequenceMaskProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape, std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape, mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), param_.use_sequence_length ? 2U : 1U)
         << "Input:[data, sequence_length]";
 
-    const TShape &dshape = (*in_shape)[seq_mask::kData];
+    const mxnet::TShape &dshape = (*in_shape)[seq_mask::kData];
     CHECK_GT(dshape.ndim(), 1U)
         << "The data array must be of rank 2 or greater.";
     CHECK((param_.axis == 0) || (param_.axis == 1))
@@ -256,7 +256,7 @@ class SequenceMaskProp : public OperatorProperty {
     if (param_.use_sequence_length)
       SHAPE_ASSIGN_CHECK(*in_shape, seq_mask::kSequenceLength, Shape1(sbatch));
 
-    const TShape &oshape = dshape;
+    const mxnet::TShape &oshape = dshape;
     out_shape->clear();
     out_shape->push_back(oshape);
     return true;
@@ -295,7 +295,7 @@ class SequenceMaskProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
@@ -317,7 +317,7 @@ class SequenceMaskProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator *CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator *CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/sequence_mask.cc b/src/operator/sequence_mask.cc
index 76e58386b8ad..c3bf12d3a862 100644
--- a/src/operator/sequence_mask.cc
+++ b/src/operator/sequence_mask.cc
@@ -40,7 +40,7 @@ Operator *CreateOp<cpu>(SequenceMaskParam param, int dtype, int itype) {
 
 // DO_BIND_DISPATCH comes from operator_common.h
 Operator *SequenceMaskProp::CreateOperatorEx(Context ctx,
-                                             std::vector<TShape> *in_shape,
+                                             mxnet::ShapeVector *in_shape,
                                              std::vector<int> *in_type) const {
   if (in_type->size() >= 2 && (*in_type)[1] != -1) {
     DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0], (*in_type)[1]);
diff --git a/src/operator/sequence_reverse-inl.h b/src/operator/sequence_reverse-inl.h
index eb9f71ccce9e..03210d325699 100644
--- a/src/operator/sequence_reverse-inl.h
+++ b/src/operator/sequence_reverse-inl.h
@@ -220,14 +220,14 @@ class SequenceReverseProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape, std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape, mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), param_.use_sequence_length ? 2U : 1U)
         << "Input:[data, sequence_length]";
     CHECK_EQ(param_.axis, 0) << "Current implementation expects axis to be 0.";
 
-    const TShape &dshape = (*in_shape)[seq_reverse::kData];
+    const mxnet::TShape &dshape = (*in_shape)[seq_reverse::kData];
     CHECK_GT(dshape.ndim(), 1U)
         << "The data array must be of rank 2 or greater.";
     // seq length vector is same as batch size
@@ -235,7 +235,7 @@ class SequenceReverseProp : public OperatorProperty {
       SHAPE_ASSIGN_CHECK(*in_shape, seq_reverse::kSequenceLength,
                          Shape1(dshape[1]));
 
-    const TShape &oshape = dshape;
+    const mxnet::TShape &oshape = dshape;
     out_shape->clear();
     out_shape->push_back(oshape);
     return true;
@@ -275,7 +275,7 @@ class SequenceReverseProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
@@ -284,7 +284,7 @@ class SequenceReverseProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator *CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator *CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/sequence_reverse.cc b/src/operator/sequence_reverse.cc
index 9225b6b5dae2..0621d3470d80 100644
--- a/src/operator/sequence_reverse.cc
+++ b/src/operator/sequence_reverse.cc
@@ -40,7 +40,7 @@ Operator *CreateOp<cpu>(SequenceReverseParam param, int dtype, int itype) {
 
 // DO_BIND_DISPATCH comes from operator_common.h
 Operator *SequenceReverseProp::CreateOperatorEx(
-    Context ctx, std::vector<TShape> *in_shape,
+    Context ctx, mxnet::ShapeVector *in_shape,
     std::vector<int> *in_type) const {
 
   if (in_type->size() >= 2 && (*in_type)[1] != -1) {
diff --git a/src/operator/slice_channel-inl.h b/src/operator/slice_channel-inl.h
index 3b14a26ea649..6125782d525b 100644
--- a/src/operator/slice_channel-inl.h
+++ b/src/operator/slice_channel-inl.h
@@ -188,13 +188,13 @@ class SliceChannelProp : public OperatorProperty {
     return true;
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 1U);
-    TShape dshape = in_shape->at(slice_enum::kData);
-    TShape ishape = in_shape->at(slice_enum::kData);
+    mxnet::TShape dshape = in_shape->at(slice_enum::kData);
+    mxnet::TShape ishape = in_shape->at(slice_enum::kData);
     if (dshape.ndim() == 0) return false;
     if (param_.axis >= 0) {
       CHECK_LT(static_cast<size_t>(param_.axis), dshape.ndim());
@@ -223,7 +223,7 @@ class SliceChannelProp : public OperatorProperty {
       for (int d = real_axis; d < static_cast<int>(dshape.ndim()) - 1; ++d) {
         dshape[d] = dshape[d+1];
       }
-      dshape = TShape(&dshape[0], &dshape[dshape.ndim()-1]);
+      dshape = mxnet::TShape(&dshape[0], &dshape[dshape.ndim()-1]);
     }
     CHECK_EQ(static_cast<int>((*out_shape).size()), param_.num_outputs)
       << "Size of output shape mismatch!";
@@ -231,7 +231,7 @@ class SliceChannelProp : public OperatorProperty {
       SHAPE_ASSIGN_CHECK(*out_shape, i, dshape);
       // Perform incomplete shape inference.
       // We can back-calculate the inshape based on the out_shape.
-      TShape back_calculate_dshape = ishape;
+      mxnet::TShape back_calculate_dshape = ishape;
       if (param_.squeeze_axis && (dshape.ndim() == ishape.ndim() - 1)) {
         for (int d = 0; d < real_axis; ++d) {
           back_calculate_dshape[d] = (*out_shape)[i][d];
@@ -275,7 +275,7 @@ class SliceChannelProp : public OperatorProperty {
     return nullptr;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/slice_channel.cc b/src/operator/slice_channel.cc
index 7c633bb8196f..b051b9b90309 100644
--- a/src/operator/slice_channel.cc
+++ b/src/operator/slice_channel.cc
@@ -38,7 +38,7 @@ Operator* CreateOp<cpu>(SliceChannelParam param, int dtype) {
 }
 
 Operator* SliceChannelProp::CreateOperatorEx(Context ctx,
-                                             std::vector<TShape>* in_shape,
+                                             mxnet::ShapeVector* in_shape,
                                              std::vector<int>* in_type) const {
   DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
 }
diff --git a/src/operator/softmax_output-inl.h b/src/operator/softmax_output-inl.h
index 5a01d3a73a95..c5ad90ab95bc 100644
--- a/src/operator/softmax_output-inl.h
+++ b/src/operator/softmax_output-inl.h
@@ -331,23 +331,23 @@ class SoftmaxOutputProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 2U) << "Input:[data, label]";
-    const TShape &dshape = in_shape->at(0);
+    const mxnet::TShape &dshape = in_shape->at(0);
     if (dshape.ndim() == 0) return false;
 
     // label.shape == data.shape: use probability as label
     if (dshape != (*in_shape)[softmaxout_enum::kLabel]) {
       if (param_.multi_output) {
-        TShape lshape1 = Shape2(dshape[0], dshape.Size()/dshape[0]/dshape[1]);
-        TShape lshape2(dshape.ndim() - 1);
+        mxnet::TShape lshape1 = Shape2(dshape[0], dshape.Size()/dshape[0]/dshape[1]);
+        mxnet::TShape lshape2(dshape.ndim() - 1);
         lshape2[0] = dshape[0];
         for (index_t i = 2; i < dshape.ndim(); ++i)
           lshape2[i-1] = dshape[i];
-        TShape lshape3 = dshape;
+        mxnet::TShape lshape3 = dshape;
         lshape3[1] = 1;
         if (in_shape->at(softmaxout_enum::kLabel).ndim() == 0) {
           in_shape->at(softmaxout_enum::kLabel) = lshape1;
@@ -361,7 +361,7 @@ class SoftmaxOutputProp : public OperatorProperty {
           throw InferShapeError(os.str(), softmaxout_enum::kLabel);
         }
       } else {
-        TShape label_shape(dshape.ndim() - 1);
+        mxnet::TShape label_shape(dshape.ndim() - 1);
         for (index_t i = 0; i + 1 < dshape.ndim(); ++i)
           label_shape[i] = dshape[i];
         SHAPE_ASSIGN_CHECK(*in_shape, softmaxout_enum::kLabel, label_shape);
@@ -427,7 +427,7 @@ class SoftmaxOutputProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
@@ -436,7 +436,7 @@ class SoftmaxOutputProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  protected:
diff --git a/src/operator/softmax_output.cc b/src/operator/softmax_output.cc
index 322ac0b93426..c34e9095f4c1 100644
--- a/src/operator/softmax_output.cc
+++ b/src/operator/softmax_output.cc
@@ -79,23 +79,23 @@ static bool SoftmaxOutputType(const nnvm::NodeAttrs& attrs,
 }
 
 static bool SoftmaxOutputShape(const nnvm::NodeAttrs& attrs,
-                               std::vector<TShape> *in_shape,
-                               std::vector<TShape> *out_shape) {
+                               mxnet::ShapeVector *in_shape,
+                               mxnet::ShapeVector *out_shape) {
   using namespace mshadow;
   const SoftmaxOutputParam& param = nnvm::get<SoftmaxOutputParam>(attrs.parsed);
   CHECK_EQ(in_shape->size(), 2U) << "Input:[data, label]";
-  const TShape &dshape = in_shape->at(0);
+  const mxnet::TShape &dshape = in_shape->at(0);
   if (dshape.ndim() == 0) return false;
 
   // label.shape == data.shape: use probability as label
   if (dshape != (*in_shape)[softmaxout_enum::kLabel]) {
     if (param.multi_output) {
-      TShape lshape1 = Shape2(dshape[0], dshape.Size()/dshape[0]/dshape[1]);
-      TShape lshape2(dshape.ndim() - 1);
+      mxnet::TShape lshape1 = Shape2(dshape[0], dshape.Size()/dshape[0]/dshape[1]);
+      mxnet::TShape lshape2(dshape.ndim() - 1);
       lshape2[0] = dshape[0];
       for (index_t i = 2; i < dshape.ndim(); ++i)
         lshape2[i-1] = dshape[i];
-      TShape lshape3 = dshape;
+      mxnet::TShape lshape3 = dshape;
       lshape3[1] = 1;
       if (in_shape->at(softmaxout_enum::kLabel).ndim() == 0) {
         in_shape->at(softmaxout_enum::kLabel) = lshape1;
@@ -109,7 +109,7 @@ static bool SoftmaxOutputShape(const nnvm::NodeAttrs& attrs,
         throw InferShapeError(os.str(), softmaxout_enum::kLabel);
       }
     } else {
-      TShape label_shape(dshape.ndim() - 1);
+      mxnet::TShape label_shape(dshape.ndim() - 1);
       for (index_t i = 0; i + 1 < dshape.ndim(); ++i)
         label_shape[i] = dshape[i];
       SHAPE_ASSIGN_CHECK(*in_shape, softmaxout_enum::kLabel, label_shape);
@@ -242,7 +242,7 @@ NNVM_REGISTER_OP(SoftmaxOutput)
 .set_attr<nnvm::FListOutputNames>("FListOutputNames", [](const NodeAttrs& attrs) {
   return std::vector<std::string>{"output"};
 })
-.set_attr<nnvm::FInferShape>("FInferShape", SoftmaxOutputShape)
+.set_attr<mxnet::FInferShape>("FInferShape", SoftmaxOutputShape)
 .set_attr<nnvm::FInferType>("FInferType", SoftmaxOutputType)
 .set_attr<FCompute>("FCompute<cpu>", SoftmaxOutputCompute<cpu>)
 .set_attr<nnvm::FGradient>("FGradient", SoftmaxOutputGrad{"_backward_SoftmaxOutput"})
diff --git a/src/operator/spatial_transformer-inl.h b/src/operator/spatial_transformer-inl.h
index a7ecdaecb103..9e5dee842d0d 100644
--- a/src/operator/spatial_transformer-inl.h
+++ b/src/operator/spatial_transformer-inl.h
@@ -51,13 +51,13 @@ enum SpatialTransformerSamplerType {kBilinear};
 }
 
 struct SpatialTransformerParam : public dmlc::Parameter<SpatialTransformerParam> {
-  TShape target_shape;
+  mxnet::TShape target_shape;
   int transform_type;
   int sampler_type;
   dmlc::optional<bool> cudnn_off;
   DMLC_DECLARE_PARAMETER(SpatialTransformerParam) {
     int shape[] = {0, 0};
-    DMLC_DECLARE_FIELD(target_shape).set_default(TShape(shape, shape + 2))
+    DMLC_DECLARE_FIELD(target_shape).set_default(mxnet::TShape(shape, shape + 2))
         .describe("output shape(h, w) of spatial transformer: (y, x)");
     DMLC_DECLARE_FIELD(transform_type).add_enum("affine", st::kAffine)
         .describe("transformation type");
@@ -181,15 +181,15 @@ class SpatialTransformerProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 2U) << "Input:[data, loc]";
     CHECK_EQ(param_.transform_type, st::kAffine) << "only supports affine transform currently";
     CHECK_EQ(param_.sampler_type, st::kBilinear) << "only supports bilinear sampling currently";
-    const TShape &dshape = (*in_shape)[st::kData];
-    const TShape &lshape = (*in_shape)[st::kLoc];
+    const mxnet::TShape &dshape = (*in_shape)[st::kData];
+    const mxnet::TShape &lshape = (*in_shape)[st::kLoc];
     if (dshape.ndim() ==  0) return false;
     CHECK_EQ(dshape.ndim(), 4U) \
         << "input data should be 4D in batch-num_filter-y-x";
@@ -263,13 +263,13 @@ class SpatialTransformerProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> ForwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
   #if CUDNN_MAJOR >= 5
   std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
   #endif
@@ -279,7 +279,7 @@ class SpatialTransformerProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/spatial_transformer.cc b/src/operator/spatial_transformer.cc
index 2dcb427ef036..6c413f884df9 100644
--- a/src/operator/spatial_transformer.cc
+++ b/src/operator/spatial_transformer.cc
@@ -160,7 +160,7 @@ Operator* CreateOp<cpu>(SpatialTransformerParam param, int dtype) {
   return op;
 }
 
-Operator *SpatialTransformerProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator *SpatialTransformerProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                      std::vector<int> *in_type) const {
   DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
 }
diff --git a/src/operator/subgraph/common.h b/src/operator/subgraph/common.h
index 4e1cd66b8b68..814e83743514 100644
--- a/src/operator/subgraph/common.h
+++ b/src/operator/subgraph/common.h
@@ -50,8 +50,8 @@ inline std::vector<std::string> DefaultSubgraphOpListOutputs(const nnvm::NodeAtt
 }
 
 inline bool DefaultSubgraphOpShapeHelper(const nnvm::Symbol& subgraph_sym,
-                                         std::vector<TShape> *in_shapes,
-                                         std::vector<TShape> *out_shapes) {
+                                         mxnet::ShapeVector *in_shapes,
+                                         mxnet::ShapeVector *out_shapes) {
   using namespace exec;
   nnvm::Graph g;
   g.outputs = subgraph_sym.outputs;
@@ -60,7 +60,7 @@ inline bool DefaultSubgraphOpShapeHelper(const nnvm::Symbol& subgraph_sym,
   CHECK_EQ(idx_g.outputs().size(), out_shapes->size());
 
   // Put the input and output shapes to the shape vector.
-  nnvm::ShapeVector shapes(idx_g.num_node_entries());
+  mxnet::ShapeVector shapes(idx_g.num_node_entries());
   const auto &input_nids = idx_g.input_nodes();
   CHECK_EQ(input_nids.size(), in_shapes->size());
   for (size_t i = 0; i < in_shapes->size(); i++) {
@@ -78,7 +78,7 @@ inline bool DefaultSubgraphOpShapeHelper(const nnvm::Symbol& subgraph_sym,
   g = exec::InferShape(std::move(g));
 
   // Copy the inferred shape back to the input shapes and the output shapes.
-  shapes = g.GetAttr<nnvm::ShapeVector>("shape");
+  shapes = g.GetAttr<mxnet::ShapeVector>("shape");
   // assign to in_shapes
   for (size_t i = 0; i < in_shapes->size(); ++i) {
     const auto eid = idx_g.entry_id(input_nids[i], 0);
@@ -94,8 +94,8 @@ inline bool DefaultSubgraphOpShapeHelper(const nnvm::Symbol& subgraph_sym,
 }
 
 inline bool DefaultSubgraphOpShape(const nnvm::NodeAttrs& attrs,
-                                   std::vector<TShape> *in_shapes,
-                                   std::vector<TShape> *out_shapes) {
+                                   mxnet::ShapeVector *in_shapes,
+                                   mxnet::ShapeVector *out_shapes) {
   return DefaultSubgraphOpShapeHelper(*attrs.subgraphs[0], in_shapes, out_shapes);
 }
 
diff --git a/src/operator/subgraph/mkldnn/mkldnn_conv.cc b/src/operator/subgraph/mkldnn/mkldnn_conv.cc
index 499d7390eaad..e53ab2538a90 100644
--- a/src/operator/subgraph/mkldnn/mkldnn_conv.cc
+++ b/src/operator/subgraph/mkldnn/mkldnn_conv.cc
@@ -480,7 +480,7 @@ static std::vector<std::string> SgMKLDNNConvListOutputNames(
 
 static OpStatePtr CreateSgMKLDNNConvState(const nnvm::NodeAttrs &attrs,
                                           Context ctx,
-                                          const std::vector<TShape> &in_shapes,
+                                          const mxnet::ShapeVector &in_shapes,
                                           const std::vector<int> &in_types) {
   return OpStatePtr::Create<SgMKLDNNConvOperator>(attrs);
 }
@@ -510,15 +510,15 @@ static void FilterMinMaxIndice(const MKLDNNConvParam &mkldnn_param,
 }
 
 static bool SgMKLDNNConvInferShape(const nnvm::NodeAttrs &attrs,
-                                   std::vector<TShape> *in_shapes,
-                                   std::vector<TShape> *out_shapes) {
+                                   mxnet::ShapeVector *in_shapes,
+                                   mxnet::ShapeVector *out_shapes) {
   auto const &param = nnvm::get<MKLDNNConvFusionParam>(attrs.parsed);
   if (param.full_conv_param.mkldnn_param.quantized) {
     std::unordered_set<size_t> minmax_indice;
-    std::vector<TShape> base_in_shapes;
-    std::vector<TShape> base_out_shapes;
+    mxnet::ShapeVector base_in_shapes;
+    mxnet::ShapeVector base_out_shapes;
 
-    FilterMinMaxIndice<TShape>(param.full_conv_param.mkldnn_param, in_shapes,
+    FilterMinMaxIndice<mxnet::TShape>(param.full_conv_param.mkldnn_param, in_shapes,
                                out_shapes, &base_in_shapes, &base_out_shapes,
                                &minmax_indice);
     bool result =
@@ -684,7 +684,7 @@ NNVM_REGISTER_OP(_sg_mkldnn_conv)
 .set_attr<nnvm::FListInputNames>("FListInputNames", SgMKLDNNConvListInputNames)
 .set_attr<nnvm::FListOutputNames>("FListOutputNames", SgMKLDNNConvListOutputNames)
 .set_attr<FCreateOpState>("FCreateOpState", CreateSgMKLDNNConvState)
-.set_attr<nnvm::FInferShape>("FInferShape", SgMKLDNNConvInferShape)
+.set_attr<mxnet::FInferShape>("FInferShape", SgMKLDNNConvInferShape)
 .set_attr<nnvm::FInferType>("FInferType", SgMKLDNNConvInferType)
 .set_attr<FInferStorageType>("FInferStorageType", SgMKLDNNConvOpStorageType)
 .set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", SgMKLDNNConvOpForward)
diff --git a/src/operator/subgraph_op_common.cc b/src/operator/subgraph_op_common.cc
index 4b8f63abd4ce..8934438d428a 100644
--- a/src/operator/subgraph_op_common.cc
+++ b/src/operator/subgraph_op_common.cc
@@ -119,8 +119,8 @@ bool InferSubgraphStorage(const nnvm::Symbol &subgraph,
 }
 
 bool InferSubgraphShape(const nnvm::Symbol &subgraph,
-                        std::vector<TShape> *in_shape,
-                        std::vector<TShape> *out_shape) {
+                        mxnet::ShapeVector *in_shape,
+                        mxnet::ShapeVector *out_shape) {
   nnvm::Graph g;
   g.outputs = subgraph.outputs;
   const auto& idx = g.indexed_graph();
@@ -128,7 +128,7 @@ bool InferSubgraphShape(const nnvm::Symbol &subgraph,
   CHECK_EQ(idx.outputs().size(), out_shape->size());
 
   // Put the input and output shapes to the shape vector.
-  nnvm::ShapeVector shapes(idx.num_node_entries());
+  mxnet::ShapeVector shapes(idx.num_node_entries());
   const auto &input_nids = idx.input_nodes();
   CHECK_EQ(input_nids.size(), in_shape->size());
   for (size_t i = 0; i < in_shape->size(); i++) {
@@ -145,7 +145,7 @@ bool InferSubgraphShape(const nnvm::Symbol &subgraph,
   g.attrs["shape"] = std::make_shared<dmlc::any>(std::move(shapes));
   g = exec::InferShape(std::move(g));
 
-  const auto& shapes1 = g.GetAttr<nnvm::ShapeVector>("shape");
+  const auto& shapes1 = g.GetAttr<mxnet::ShapeVector>("shape");
   // Inferring the shape in the subgraph may infer the shape of the inputs.
   // We need to copy the inferred input shapes back.
   CHECK_EQ(input_nids.size(), in_shape->size());
@@ -177,7 +177,7 @@ bool as_bool_scalar(const NDArray &a) {
   return false;
 }
 
-bool is_shape_udf(const TShape &x) {
+bool is_shape_udf(const mxnet::TShape &x) {
   return x.ndim() == 0 || x.Size() == 0;
 }
 
diff --git a/src/operator/subgraph_op_common.h b/src/operator/subgraph_op_common.h
index c316fca91d95..91adf576dc07 100644
--- a/src/operator/subgraph_op_common.h
+++ b/src/operator/subgraph_op_common.h
@@ -44,8 +44,8 @@ bool InferSubgraphDataType(const nnvm::Symbol &subgraph, std::vector<int> *in_ty
  * subgraph.
  */
 bool InferSubgraphShape(const nnvm::Symbol &subgraph,
-                        std::vector<TShape> *in_shape,
-                        std::vector<TShape> *out_shape);
+                        mxnet::ShapeVector *in_shape,
+                        mxnet::ShapeVector *out_shape);
 
 /*
  * Infer the storage types of inputs and outputs of an operator that contains a
@@ -59,7 +59,7 @@ bool InferSubgraphStorage(const nnvm::Symbol &subgraph,
 
 bool as_bool_scalar(const NDArray &a);
 
-bool is_shape_udf(const TShape &x);
+bool is_shape_udf(const mxnet::TShape &x);
 
 bool is_stype_udf(const int &x);
 
diff --git a/src/operator/svm_output-inl.h b/src/operator/svm_output-inl.h
index 011b9ad10284..1609764f0ebe 100644
--- a/src/operator/svm_output-inl.h
+++ b/src/operator/svm_output-inl.h
@@ -99,7 +99,7 @@ class SVMOutputOp : public Operator {
     CHECK_GE(in_grad.size(), 1U);
     CHECK_GE(req.size(), 1U);
     Stream<xpu> *s = ctx.get_stream<xpu>();
-    const TShape& label_shape = in_data[svm_enum::kLabel].shape_;
+    const mxnet::TShape& label_shape = in_data[svm_enum::kLabel].shape_;
 
     Tensor<xpu, 1, DType> label = in_data[svm_enum::kLabel].get_with_shape<xpu, 1, DType>(
         Shape1(label_shape.ProdShape(0, label_shape.ndim())), s);
@@ -137,14 +137,14 @@ class SVMOutputProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 2U) << "Input:[data, label]";
-    const TShape &dshape = in_shape->at(0);
+    const mxnet::TShape &dshape = in_shape->at(0);
     if (dshape.ndim() == 0) return false;
-    TShape label_shape(dshape.ndim() - 1);
+    mxnet::TShape label_shape(dshape.ndim() - 1);
     for (index_t i = 0; i + 1 < dshape.ndim(); ++i)
       label_shape[i] = dshape[i];
     SHAPE_ASSIGN_CHECK(*in_shape, svm_enum::kLabel, label_shape);
@@ -203,7 +203,7 @@ class SVMOutputProp : public OperatorProperty {
   }
 
   std::vector<ResourceRequest> BackwardResource(
-      const std::vector<TShape> &in_shape) const override {
+      const mxnet::ShapeVector &in_shape) const override {
     return {ResourceRequest::kTempSpace};
   }
 
@@ -212,7 +212,7 @@ class SVMOutputProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  protected:
diff --git a/src/operator/svm_output.cc b/src/operator/svm_output.cc
index a291f7298706..a52aa4779176 100644
--- a/src/operator/svm_output.cc
+++ b/src/operator/svm_output.cc
@@ -79,7 +79,7 @@ Operator *CreateOp<cpu>(SVMOutputParam param, int dtype) {
 }
 
 // DO_BIND_DISPATCH comes from operator_common.h
-Operator *SVMOutputProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator *SVMOutputProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                      std::vector<int> *in_type) const {
   DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
 }
diff --git a/src/operator/swapaxis-inl.h b/src/operator/swapaxis-inl.h
index 7b0e2fa602d9..ce835084ab32 100644
--- a/src/operator/swapaxis-inl.h
+++ b/src/operator/swapaxis-inl.h
@@ -68,7 +68,7 @@ class SwapAxisOp : public Operator {
   }
 
   void Reshape2Five(mshadow::Shape<5> *inter_shape,
-                    const TShape &shape,
+                    const mxnet::TShape &shape,
                     uint32_t dim1, uint32_t dim2) {
     using namespace mshadow;
     using namespace mshadow::expr;
@@ -113,8 +113,8 @@ class SwapAxisOp : public Operator {
     TBlob data_out = out_data[swapaxisenum::kData];
     OpReqType out_req = req[swapaxisenum::kData];
 
-    TShape shape_in = data_in.shape_;
-    TShape shape_out = data_out.shape_;
+    mxnet::TShape shape_in = data_in.shape_;
+    mxnet::TShape shape_out = data_out.shape_;
 
     Shape<5> inter_shape;
 
@@ -181,15 +181,15 @@ class SwapAxisProp : public OperatorProperty {
     return param_.__DICT__();
   }
 
-  bool InferShape(std::vector<TShape> *in_shape,
-                  std::vector<TShape> *out_shape,
-                  std::vector<TShape> *aux_shape) const override {
+  bool InferShape(mxnet::ShapeVector *in_shape,
+                  mxnet::ShapeVector *out_shape,
+                  mxnet::ShapeVector *aux_shape) const override {
     CHECK_EQ(in_shape->size(), 1U);
 
-    TShape &shape0 = (*in_shape)[swapaxisenum::kData];
+    mxnet::TShape &shape0 = (*in_shape)[swapaxisenum::kData];
     out_shape->clear();
     out_shape->push_back(shape0);
-    TShape &shape1 = (*out_shape)[swapaxisenum::kOut];
+    mxnet::TShape &shape1 = (*out_shape)[swapaxisenum::kOut];
 
     std::swap(shape1[param_.dim1], shape1[param_.dim2]);
 
@@ -229,7 +229,7 @@ class SwapAxisProp : public OperatorProperty {
     return NULL;
   }
 
-  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                              std::vector<int> *in_type) const override;
 
  private:
diff --git a/src/operator/swapaxis.cc b/src/operator/swapaxis.cc
index b78062fde8be..45bcca4db9ae 100644
--- a/src/operator/swapaxis.cc
+++ b/src/operator/swapaxis.cc
@@ -38,7 +38,7 @@ Operator* CreateOp<cpu>(SwapAxisParam param, int dtype) {
   return op;
 }
 
-Operator* SwapAxisProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+Operator* SwapAxisProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
                                          std::vector<int> *in_type) const {
   DO_BIND_DISPATCH(CreateOp, param_, in_type->at(0));
 }
diff --git a/src/operator/tensor/broadcast_reduce-inl.cuh b/src/operator/tensor/broadcast_reduce-inl.cuh
index 33bf72798fd6..5d6c49ff8882 100644
--- a/src/operator/tensor/broadcast_reduce-inl.cuh
+++ b/src/operator/tensor/broadcast_reduce-inl.cuh
@@ -366,8 +366,8 @@ static inline uint64_t calc_num_load(const int X, const int Y, const int* stride
 }
 
 template<int ndim, typename DType>
-ReduceImplConfig<ndim> ConfigureReduceImpl(const TShape& small, const TShape& big, const TShape* lhs,
-  const TShape* rhs) {
+ReduceImplConfig<ndim> ConfigureReduceImpl(const mxnet::TShape& small, const mxnet::TShape& big, const mxnet::TShape* lhs,
+  const mxnet::TShape* rhs) {
 
   ReduceImplConfig<ndim> config;
 
@@ -636,16 +636,16 @@ void Reduce(Stream<gpu> *s, const TBlob& small, const OpReqType req,
 }
 
 template<int ndim, typename DType>
-size_t ReduceWorkspaceSize(Stream<gpu> *s, const TShape& small, const OpReqType req,
-                           const TShape& big) {
+size_t ReduceWorkspaceSize(Stream<gpu> *s, const mxnet::TShape& small, const OpReqType req,
+                           const mxnet::TShape& big) {
   if (req == kNullOp) return 0;
   ReduceImplConfig<ndim> config = ConfigureReduceImpl<ndim, DType>(small, big, NULL, NULL);
   return config.workspace_size;
 }
 
 template<int ndim, typename DType>
-size_t ReduceWorkspaceSize(Stream<gpu> *s, const TShape& small, const OpReqType req,
-                           const TShape& big, const TShape& lhs, const TShape& rhs) {
+size_t ReduceWorkspaceSize(Stream<gpu> *s, const mxnet::TShape& small, const OpReqType req,
+                           const mxnet::TShape& big, const mxnet::TShape& lhs, const mxnet::TShape& rhs) {
   if (req == kNullOp) return 0;
   ReduceImplConfig<ndim> config = ConfigureReduceImpl<ndim, DType>(small, big, &lhs, &rhs);
   return config.workspace_size;
diff --git a/src/operator/tensor/broadcast_reduce-inl.h b/src/operator/tensor/broadcast_reduce-inl.h
index 141d2fb83d0d..0f6913e6e9df 100644
--- a/src/operator/tensor/broadcast_reduce-inl.h
+++ b/src/operator/tensor/broadcast_reduce-inl.h
@@ -260,14 +260,15 @@ void ReduceWithExtraMem(Stream<cpu>* s, const TBlob& small, const OpReqType req,
 }
 
 template<int ndim, typename DType>
-size_t ReduceWorkspaceSize(Stream<cpu> *s, const TShape& small, const OpReqType req,
-                           const TShape& big) {
+size_t ReduceWorkspaceSize(Stream<cpu> *s, const mxnet::TShape& small, const OpReqType req,
+                           const mxnet::TShape& big) {
   return 0;
 }
 
 template<int ndim, typename DType>
-size_t ReduceWorkspaceSize(Stream<cpu> *s, const TShape& small, const OpReqType req,
-                           const TShape& big, const TShape& lhs, const TShape& rhs) {
+size_t ReduceWorkspaceSize(Stream<cpu> *s, const mxnet::TShape& small, const OpReqType req,
+                           const mxnet::TShape& big, const mxnet::TShape& lhs,
+                           const mxnet::TShape& rhs) {
   return 0;
 }
 
diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h
index 6aeeadfe820d..b13906af6624 100644
--- a/src/operator/tensor/broadcast_reduce_op.h
+++ b/src/operator/tensor/broadcast_reduce_op.h
@@ -37,11 +37,11 @@
 namespace mxnet {
 namespace op {
 struct ReduceAxesParam : public dmlc::Parameter<ReduceAxesParam> {
-  dmlc::optional<TShape> axis;
+  dmlc::optional<mxnet::TShape> axis;
   bool keepdims;
   bool exclude;
   DMLC_DECLARE_PARAMETER(ReduceAxesParam) {
-    DMLC_DECLARE_FIELD(axis).set_default(dmlc::optional<TShape>())
+    DMLC_DECLARE_FIELD(axis).set_default(dmlc::optional<mxnet::TShape>())
       .describe(R"code(The axis or axes along which to perform the reduction.
 
       The default, `axis=()`, will compute over all elements into a
@@ -66,12 +66,12 @@ struct ReduceAxesParam : public dmlc::Parameter<ReduceAxesParam> {
 
 struct NormParam : public dmlc::Parameter<NormParam> {
   int ord;
-  dmlc::optional<TShape> axis;
+  dmlc::optional<mxnet::TShape> axis;
   bool keepdims;
   DMLC_DECLARE_PARAMETER(NormParam) {
     DMLC_DECLARE_FIELD(ord).set_default(2)
       .describe("Order of the norm. Currently ord=1 and ord=2 is supported.");
-    DMLC_DECLARE_FIELD(axis).set_default(dmlc::optional<TShape>())
+    DMLC_DECLARE_FIELD(axis).set_default(dmlc::optional<mxnet::TShape>())
       .describe(R"code(The axis or axes along which to perform the reduction.
       The default, `axis=()`, will compute over all elements into a
       scalar array with shape `(1,)`.
@@ -126,20 +126,20 @@ struct PickParam : public dmlc::Parameter<PickParam> {
 };
 
 struct BroadcastAxesParam : public dmlc::Parameter<BroadcastAxesParam> {
-  TShape axis;
-  TShape size;
+  mxnet::TShape axis;
+  mxnet::TShape size;
   DMLC_DECLARE_PARAMETER(BroadcastAxesParam) {
-    DMLC_DECLARE_FIELD(axis).set_default(TShape())
+    DMLC_DECLARE_FIELD(axis).set_default(mxnet::TShape())
       .describe("The axes to perform the broadcasting.");
-    DMLC_DECLARE_FIELD(size).set_default(TShape())
+    DMLC_DECLARE_FIELD(size).set_default(mxnet::TShape())
       .describe("Target sizes of the broadcasting axes.");
   }
 };
 
 struct BroadcastToParam : public dmlc::Parameter<BroadcastToParam> {
-  TShape shape;
+  mxnet::TShape shape;
   DMLC_DECLARE_PARAMETER(BroadcastToParam) {
-    DMLC_DECLARE_FIELD(shape).set_default(TShape())
+    DMLC_DECLARE_FIELD(shape).set_default(mxnet::TShape())
       .describe("The shape of the desired array."
                 " We can set the dim to zero if it's same as the original."
                 " E.g `A = broadcast_to(B, shape=(10, 0, 0))` "
@@ -148,12 +148,12 @@ struct BroadcastToParam : public dmlc::Parameter<BroadcastToParam> {
 };
 
 struct BroadcastLikeParam : public dmlc::Parameter<BroadcastLikeParam> {
-  dmlc::optional<TShape> lhs_axes;
-  dmlc::optional<TShape> rhs_axes;
+  dmlc::optional<mxnet::TShape> lhs_axes;
+  dmlc::optional<mxnet::TShape> rhs_axes;
   DMLC_DECLARE_PARAMETER(BroadcastLikeParam) {
-    DMLC_DECLARE_FIELD(lhs_axes).set_default(dmlc::optional<TShape>())
+    DMLC_DECLARE_FIELD(lhs_axes).set_default(dmlc::optional<mxnet::TShape>())
       .describe("Axes to perform broadcast on in the first input array");
-    DMLC_DECLARE_FIELD(rhs_axes).set_default(dmlc::optional<TShape>())
+    DMLC_DECLARE_FIELD(rhs_axes).set_default(dmlc::optional<mxnet::TShape>())
       .describe("Axes to copy from the second input array");
   }
 };
@@ -164,7 +164,7 @@ inline int CheckAxis(int axis, int ndim) {
   return (axis + ndim)%ndim;
 }
 
-inline TShape AxisShapeCompact(TShape shape, int *axis, bool allow_2d) {
+inline mxnet::TShape AxisShapeCompact(mxnet::TShape shape, int *axis, bool allow_2d) {
   int ndim = static_cast<int>(shape.ndim());
   index_t leading = 1, trailing = 1, M = shape[*axis];
   for (int i = 0; i < *axis; ++i) leading *= shape[i];
@@ -181,23 +181,24 @@ inline TShape AxisShapeCompact(TShape shape, int *axis, bool allow_2d) {
   return mshadow::Shape3(leading, M, trailing);
 }
 
-inline TShape ReduceAxisShapeImpl(const TShape& ishape, const dmlc::optional<int>& axis,
-                                  bool keepdims) {
+inline mxnet::TShape ReduceAxisShapeImpl(const mxnet::TShape& ishape,
+                                         const dmlc::optional<int>& axis,
+                                         bool keepdims) {
   if (!axis || ishape.ndim() == 1) {
     if (keepdims) {
-      return TShape(ishape.ndim());
+      return mxnet::TShape(ishape.ndim());
     }
     return mshadow::Shape1(1);
   }
 
   int new_axis = CheckAxis(axis.value(), ishape.ndim());
   if (keepdims) {
-    TShape oshape = ishape;
+    mxnet::TShape oshape = ishape;
     oshape[new_axis] = 1;
     return oshape;
   }
 
-  TShape oshape(ishape.ndim() - 1);
+  mxnet::TShape oshape(ishape.ndim() - 1);
   for (int i = 0; i < new_axis; ++i) oshape[i] = ishape[i];
   for (int i = new_axis+1; i < static_cast<int>(ishape.ndim()); ++i) {
     oshape[i-1] = ishape[i];
@@ -206,11 +207,11 @@ inline TShape ReduceAxisShapeImpl(const TShape& ishape, const dmlc::optional<int
 }
 
 inline bool ReduceAxisShape(const nnvm::NodeAttrs& attrs,
-                            std::vector<TShape> *in_attrs,
-                            std::vector<TShape> *out_attrs) {
+                            mxnet::ShapeVector *in_attrs,
+                            mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
-  TShape& ishape = (*in_attrs)[0];
+  mxnet::TShape& ishape = (*in_attrs)[0];
   if (ishape.ndim() == 0) return false;
 
   const ReduceAxisParam& param = nnvm::get<ReduceAxisParam>(attrs.parsed);
@@ -219,18 +220,19 @@ inline bool ReduceAxisShape(const nnvm::NodeAttrs& attrs,
   return true;
 }
 
-inline TShape ReduceAxesShapeImpl(const TShape& ishape, const dmlc::optional<TShape>& axis,
-                                  bool keepdims, bool exclude) {
-  // if axis doesn't have value, treat it same TShape().
+inline mxnet::TShape ReduceAxesShapeImpl(const mxnet::TShape& ishape,
+                                         const dmlc::optional<mxnet::TShape>& axis,
+                                         bool keepdims, bool exclude) {
+  // if axis doesn't have value, treat it same mxnet::TShape().
   if (!axis.has_value() || axis.value().ndim() == 0) {
     if (keepdims) {
-      return TShape(ishape.ndim());
+      return mxnet::TShape(ishape.ndim());
     } else {
-      return TShape(1);
+      return mxnet::TShape(1);
     }
   }
   // axis has value
-  TShape axes(axis.value());
+  mxnet::TShape axes(axis.value());
   for (index_t i = 0; i < axes.ndim(); i++) {
     if (axes[i] < 0) {
       axes[i] += ishape.ndim();
@@ -250,13 +252,13 @@ inline TShape ReduceAxesShapeImpl(const TShape& ishape, const dmlc::optional<TSh
     << "Reduction axis " << axis.value()
     << " Exceeds input dimensions " << ishape;
 
-  TShape oshape;
+  mxnet::TShape oshape;
   if (keepdims) {
-    oshape = TShape(ishape);
+    oshape = mxnet::TShape(ishape);
   } else if (exclude) {
-    oshape = TShape(axes.ndim());
+    oshape = mxnet::TShape(axes.ndim());
   } else {
-    oshape = TShape(std::max<index_t>(1, ishape.ndim() - axes.ndim()));
+    oshape = mxnet::TShape(std::max<index_t>(1, ishape.ndim() - axes.ndim()));
   }
 
   if (keepdims && exclude) {
@@ -288,8 +290,8 @@ inline TShape ReduceAxesShapeImpl(const TShape& ishape, const dmlc::optional<TSh
 }
 
 inline bool ReduceAxesShape(const nnvm::NodeAttrs& attrs,
-                            std::vector<TShape> *in_attrs,
-                            std::vector<TShape> *out_attrs) {
+                            mxnet::ShapeVector *in_attrs,
+                            mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
   if ((*in_attrs)[0].ndim() == 0) return false;
@@ -301,8 +303,8 @@ inline bool ReduceAxesShape(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool NormShape(const nnvm::NodeAttrs& attrs,
-                      std::vector<TShape> *in_attrs,
-                      std::vector<TShape> *out_attrs) {
+                      mxnet::ShapeVector *in_attrs,
+                      mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
   if ((*in_attrs)[0].ndim() == 0) return false;
@@ -314,15 +316,15 @@ inline bool NormShape(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool BroadcastAxesShape(const nnvm::NodeAttrs& attrs,
-                               std::vector<TShape> *in_attrs,
-                               std::vector<TShape> *out_attrs) {
+                               mxnet::ShapeVector *in_attrs,
+                               mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
   if ((*in_attrs)[0].ndim() == 0) return false;
   const BroadcastAxesParam& param = nnvm::get<BroadcastAxesParam>(attrs.parsed);
   CHECK_EQ(param.axis.ndim() , param.size.ndim());
-  TShape &ishape = (*in_attrs)[0];
-  TShape oshape = ishape;
+  mxnet::TShape &ishape = (*in_attrs)[0];
+  mxnet::TShape oshape = ishape;
   for (index_t i = 0; i < param.axis.ndim(); ++i) {
     CHECK_EQ(oshape[param.axis[i]], 1U) << "Broadcasting axis must have size 1";
     oshape[param.axis[i]] = param.size[i];
@@ -332,16 +334,16 @@ inline bool BroadcastAxesShape(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool BroadcastToShape(const nnvm::NodeAttrs& attrs,
-                             std::vector<TShape> *in_attrs,
-                            std::vector<TShape> *out_attrs) {
+                             mxnet::ShapeVector *in_attrs,
+                            mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
-  TShape& ishape = (*in_attrs)[0];
+  mxnet::TShape& ishape = (*in_attrs)[0];
   if (ishape.ndim() == 0) return false;
   const BroadcastToParam& param = nnvm::get<BroadcastToParam>(attrs.parsed);
   CHECK_EQ(ishape.ndim(), param.shape.ndim())
     << "Operand of shape " << ishape << " cannot be broadcasted to " << param.shape;
-  TShape oshape = param.shape;
+  mxnet::TShape oshape = param.shape;
   for (index_t i = 0; i < ishape.ndim(); ++i) {
     if (oshape[i] != 0) {
       CHECK(ishape[i] == oshape[i] || ishape[i] == 1)
@@ -355,26 +357,26 @@ inline bool BroadcastToShape(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool BroadcastLikeShape(const nnvm::NodeAttrs& attrs,
-                             std::vector<TShape> *in_attrs,
-                            std::vector<TShape> *out_attrs) {
+                             mxnet::ShapeVector *in_attrs,
+                            mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 2U);
   CHECK_EQ(out_attrs->size(), 1U);
-  TShape& lhs_shape = (*in_attrs)[0];
-  TShape& rhs_shape = (*in_attrs)[1];
+  mxnet::TShape& lhs_shape = (*in_attrs)[0];
+  mxnet::TShape& rhs_shape = (*in_attrs)[1];
 
   if ((lhs_shape.ndim() == 0) || (lhs_shape.ndim() == 0)) {
     return false;
   }
 
   const BroadcastLikeParam& param = nnvm::get<BroadcastLikeParam>(attrs.parsed);
-  TShape oshape;
+  mxnet::TShape oshape;
 
   // lhs or rhs or both params were not specified
   if (!param.lhs_axes.has_value() || !param.rhs_axes.has_value()) {
     CHECK_EQ(lhs_shape.ndim(), rhs_shape.ndim())
       << "Operand of shape " << lhs_shape << " cannot be broadcasted to " << rhs_shape;
 
-    oshape = TShape(rhs_shape);
+    oshape = mxnet::TShape(rhs_shape);
     for (index_t i = 0; i < lhs_shape.ndim(); ++i) {
       if (rhs_shape[i] != 0) {
         CHECK(lhs_shape[i] == rhs_shape[i] || lhs_shape[i] == 1)
@@ -393,7 +395,7 @@ inline bool BroadcastLikeShape(const nnvm::NodeAttrs& attrs,
     CHECK(lhs_axes.ndim() > 0)
       << "Empty axes tuple is not allowed";
 
-    oshape = TShape(lhs_shape);
+    oshape = mxnet::TShape(lhs_shape);
     for (index_t i = 0; i < lhs_axes.ndim(); ++i) {
       auto copyfrom = lhs_axes[i];
       if (copyfrom < 0) {
@@ -419,11 +421,11 @@ inline bool BroadcastLikeShape(const nnvm::NodeAttrs& attrs,
   return true;
 }
 
-inline void BroadcastReduceShapeCompact(const TShape& big, const TShape& small,
-                                        TShape *new_big, TShape *new_small) {
+inline void BroadcastReduceShapeCompact(const mxnet::TShape& big, const mxnet::TShape& small,
+                                        mxnet::TShape *new_big, mxnet::TShape *new_small) {
   index_t idim = std::max<index_t>(big.ndim(), MXNET_SPECIAL_MAX_NDIM);
-  *new_big = TShape(idim);
-  *new_small = TShape(idim);
+  *new_big = mxnet::TShape(idim);
+  *new_small = mxnet::TShape(idim);
   index_t j = 0;
   if (small.Size() == 1) {
     (*new_big)[j++] = big.Size();
@@ -482,7 +484,7 @@ inline bool ReduceAxesOpForwardStorage(const nnvm::NodeAttrs& attrs,
     dispatched = storage_type_assign(&out_stype, kDefaultStorage, dispatch_mode,
                                      DispatchMode::kFCompute);
   }
-  TShape axis = param.axis.has_value() ? param.axis.value() : TShape();
+  mxnet::TShape axis = param.axis.has_value() ? param.axis.value() : mxnet::TShape();
   if (!dispatched && in_stype == kCSRStorage && axis.ndim() == 1 &&
       (axis[0] == 0 || axis[0] == 1) && !param.keepdims && !param.exclude) {
     // If input is csr and axis is 0 or 1, and neither of keepdims or exclude
@@ -512,7 +514,7 @@ void SearchAxisCompute(const nnvm::NodeAttrs& attrs,
   if (!param.axis) LOG(FATAL) << "Global reduction not supported yet";
 
   int axis = CheckAxis(param.axis.value(), inputs[0].shape_.ndim());
-  TShape shape = AxisShapeCompact(inputs[0].shape_, &axis, false);
+  mxnet::TShape shape = AxisShapeCompact(inputs[0].shape_, &axis, false);
   MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
     Tensor<xpu, 2, DType> out = outputs[0].get_with_shape<xpu, 2, DType>(
       Shape2(shape[0], shape[2]), s);
@@ -529,11 +531,11 @@ void ReduceAxesComputeImpl(const OpContext& ctx,
                            const std::vector<TBlob>& inputs,
                            const std::vector<OpReqType>& req,
                            const std::vector<TBlob>& outputs,
-                           const TShape& small) {
+                           const mxnet::TShape& small) {
   using namespace mshadow;
   using namespace mshadow::expr;
 
-  TShape src_shape, dst_shape;
+  mxnet::TShape src_shape, dst_shape;
   BroadcastReduceShapeCompact(inputs[0].shape_, small, &src_shape, &dst_shape);
   Stream<xpu> *s = ctx.get_stream<xpu>();
   MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
@@ -562,7 +564,7 @@ void ReduceAxesCompute(const nnvm::NodeAttrs& attrs,
                        const std::vector<OpReqType>& req,
                        const std::vector<TBlob>& outputs) {
   const ReduceAxesParam& param = nnvm::get<ReduceAxesParam>(attrs.parsed);
-  TShape small;
+  mxnet::TShape small;
   if (param.keepdims) {
     small = outputs[0].shape_;
   } else {
@@ -686,7 +688,7 @@ struct ReduceCsrKernel<red_op, req, 1> {
 template <typename xpu, typename red_op, bool normalize = false>
 void ReduceCsrImpl(mshadow::Stream<xpu>* s, const OpContext& ctx,
                    const NDArray& input, const OpReqType req,
-                   NDArray* output, const TShape reduce_axis) {
+                   NDArray* output, const mxnet::TShape reduce_axis) {
   if (req == kNullOp) return;
   int64_t out_data_size = 0;
   if (reduce_axis[0] == 0) {
@@ -783,7 +785,7 @@ void ReduceCsr(const nnvm::NodeAttrs& attrs, mshadow::Stream<xpu>* s, const OpCo
                const NDArray& input, const OpReqType req, NDArray* output) {
   const ReduceAxesParam& param = nnvm::get<ReduceAxesParam>(attrs.parsed);
   CHECK(param.axis.has_value());
-  const TShape axis = param.axis.value();
+  const mxnet::TShape axis = param.axis.value();
   CHECK_EQ(axis.ndim(), 1U) << "sum(csr)/mean(csr) only supports axis 0 or 1";
   CHECK(axis[0] == 0 || axis[0] == 1)
      << "sum(csr)/mean(csr) only support axis 0 or 1";
@@ -813,14 +815,14 @@ void ReduceAxesOpForwardEx(const nnvm::NodeAttrs& attrs, const OpContext& ctx,
 
 template<typename xpu, typename OP, bool normalize = false>
 void ReduceAxesBackwardUseInOutImpl(const OpContext& ctx,
-                                    const TShape &small,
+                                    const mxnet::TShape &small,
                                     const std::vector<TBlob>& inputs,
                                     const std::vector<OpReqType>& req,
                                     const std::vector<TBlob>& outputs) {
   using namespace mshadow;
   using namespace mshadow::expr;
 
-  TShape src_shape, dst_shape;
+  mxnet::TShape src_shape, dst_shape;
   BroadcastReduceShapeCompact(outputs[0].shape_, small, &src_shape, &dst_shape);
   Stream<xpu> *s = ctx.get_stream<xpu>();
   MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
@@ -863,7 +865,7 @@ void ReduceAxesBackwardUseInOut(const nnvm::NodeAttrs& attrs,
   using namespace mshadow;
   using namespace mshadow::expr;
   const ReduceAxesParam& param = nnvm::get<ReduceAxesParam>(attrs.parsed);
-  TShape small;
+  mxnet::TShape small;
   if (param.keepdims) {
     small = inputs[0].shape_;
   } else {
@@ -878,10 +880,10 @@ inline void BroadcastComputeImpl(const nnvm::NodeAttrs& attrs,
                                  const std::vector<TBlob>& inputs,
                                  const std::vector<OpReqType>& req,
                                  const std::vector<TBlob>& outputs,
-                                 const TShape& small) {
+                                 const mxnet::TShape& small) {
   using namespace mshadow;
   using namespace mshadow::expr;
-  TShape src_shape, dst_shape;
+  mxnet::TShape src_shape, dst_shape;
   BroadcastReduceShapeCompact(outputs[0].shape_, small, &dst_shape, &src_shape);
   Stream<xpu> *s = ctx.get_stream<xpu>();
   MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
@@ -920,7 +922,7 @@ inline void ReduceAxesBackwardUseNone(const nnvm::NodeAttrs& attrs,
   using namespace mshadow;
   using namespace mshadow::expr;
   const ReduceAxesParam& param = nnvm::get<ReduceAxesParam>(attrs.parsed);
-  TShape small;
+  mxnet::TShape small;
   if (param.keepdims) {
     small = inputs[0].shape_;
   } else {
@@ -976,7 +978,7 @@ inline bool LpNormStorageType(const nnvm::NodeAttrs& attrs,
                                      DispatchMode::kFCompute);
   }
   if (param.ord == 2) {
-    const TShape axis = param.axis.has_value() ? param.axis.value() : TShape();
+    const mxnet::TShape axis = param.axis.has_value() ? param.axis.value() : mxnet::TShape();
     if (!dispatched && (in_stype == kRowSparseStorage || in_stype == kCSRStorage) &&
         axis.ndim() == 0 && param.ord == 2) {
       // l2 norm: rsp/csr, axis = () -> dns
@@ -1081,7 +1083,7 @@ void LpNormCompute(const nnvm::NodeAttrs& attrs,
   CHECK(param.ord == 1 || param.ord == 2) << "norm only supports ord=1 and ord=2";
   if (req[0] == kNullOp) return;
 
-  TShape small;
+  mxnet::TShape small;
   if (param.keepdims) {
     small = outputs[0].shape_;
   } else {
@@ -1107,14 +1109,14 @@ void LpNormGradCompute(const nnvm::NodeAttrs& attrs,
   if (req[0] == kNullOp) return;
 
   const NormParam& param = nnvm::get<NormParam>(attrs.parsed);
-  TShape small;
+  mxnet::TShape small;
   if (param.keepdims) {
     small = inputs[0].shape_;
   } else {
     small = ReduceAxesShapeImpl(outputs[0].shape_, param.axis, true, false);
   }
   if (param.ord == 1) {
-    TShape src_shape, dst_shape;
+    mxnet::TShape src_shape, dst_shape;
     BroadcastReduceShapeCompact(outputs[0].shape_, small, &src_shape, &dst_shape);
     Stream<xpu> *s = ctx.get_stream<xpu>();
     MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
@@ -1213,16 +1215,16 @@ struct pick_grad {
 };
 
 inline bool PickOpShape(const nnvm::NodeAttrs& attrs,
-                        std::vector<TShape> *in_attrs,
-                        std::vector<TShape> *out_attrs) {
+                        mxnet::ShapeVector *in_attrs,
+                        mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 2);
   CHECK_EQ(out_attrs->size(), 1);
-  const TShape& ishape = (*in_attrs)[0];
+  const mxnet::TShape& ishape = (*in_attrs)[0];
   if (ishape.ndim() == 0) return false;
   const PickParam& param = nnvm::get<PickParam>(attrs.parsed);
   if (!param.axis) LOG(FATAL)
     << "axis=None is not supported by pick yet. Must specify an axis.";
-  TShape oshape = ReduceAxisShapeImpl(ishape, param.axis, param.keepdims);
+  mxnet::TShape oshape = ReduceAxisShapeImpl(ishape, param.axis, param.keepdims);
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape);
   if (!(*in_attrs)[1].ndim()) return false;
   if ((*in_attrs)[1].ndim() == ishape.ndim()) {
@@ -1258,7 +1260,7 @@ void PickOpForward(const nnvm::NodeAttrs& attrs,
   mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
   const PickParam& param = nnvm::get<PickParam>(attrs.parsed);
 
-  const TShape& ishape = inputs[0].shape_;
+  const mxnet::TShape& ishape = inputs[0].shape_;
   index_t axis = CheckAxis(param.axis.value(), ishape.ndim());
   int leading = 1, trailing = 1, M = ishape[axis];
   for (index_t i = 0; i < axis; ++i) leading *= ishape[i];
@@ -1305,7 +1307,7 @@ void PickOpBackward(const nnvm::NodeAttrs& attrs,
   mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
   const PickParam& param = nnvm::get<PickParam>(attrs.parsed);
 
-  const TShape& ishape = outputs[0].shape_;
+  const mxnet::TShape& ishape = outputs[0].shape_;
   const index_t axis = CheckAxis(param.axis.value(), ishape.ndim());
   int leading = 1, trailing = 1, M = ishape[axis];
   for (index_t i = 0; i < axis; ++i) leading *= ishape[i];
@@ -1346,7 +1348,7 @@ void PickOpBackward(const nnvm::NodeAttrs& attrs,
   .set_num_inputs(1)                                            \
   .set_num_outputs(1)                                           \
   .set_attr_parser(ParamParser<ReduceAxisParam>)                \
-  .set_attr<nnvm::FInferShape>("FInferShape", ReduceAxisShape)  \
+  .set_attr<mxnet::FInferShape>("FInferShape", ReduceAxisShape)  \
   .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>) \
   .add_argument("data", "NDArray-or-Symbol", "The input")       \
   .add_arguments(ReduceAxisParam::__FIELDS__())
@@ -1356,7 +1358,7 @@ void PickOpBackward(const nnvm::NodeAttrs& attrs,
   .set_num_inputs(1)                                            \
   .set_num_outputs(1)                                           \
   .set_attr_parser(AxesParamParser<ReduceAxesParam>)            \
-  .set_attr<nnvm::FInferShape>("FInferShape", ReduceAxesShape)  \
+  .set_attr<mxnet::FInferShape>("FInferShape", ReduceAxesShape)  \
   .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>) \
   .add_argument("data", "NDArray-or-Symbol", "The input")       \
   .add_arguments(ReduceAxesParam::__FIELDS__())
diff --git a/src/operator/tensor/broadcast_reduce_op_index.cc b/src/operator/tensor/broadcast_reduce_op_index.cc
index c18a8bcf9126..ed9a90d04f30 100644
--- a/src/operator/tensor/broadcast_reduce_op_index.cc
+++ b/src/operator/tensor/broadcast_reduce_op_index.cc
@@ -103,7 +103,7 @@ Examples::
     param.keepdims = false;
     attrs->parsed = param;
   })
-.set_attr<nnvm::FInferShape>("FInferShape", ReduceAxisShape)
+.set_attr<mxnet::FInferShape>("FInferShape", ReduceAxisShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FCompute>("FCompute<cpu>", SearchAxisCompute<cpu, mshadow::red::maximum>)
 .add_argument("data", "NDArray-or-Symbol", "The input array");
@@ -158,7 +158,7 @@ Examples::
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data", "index"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", PickOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", PickOpShape)
 .set_attr<nnvm::FInferType>("FInferType", PickOpType)
 .set_attr<FCompute>("FCompute<cpu>", PickOpForward<cpu>)
 .set_attr<nnvm::FGradient>("FGradient",
diff --git a/src/operator/tensor/broadcast_reduce_op_value.cc b/src/operator/tensor/broadcast_reduce_op_value.cc
index 1fdbb8920572..52fd61aa110e 100644
--- a/src/operator/tensor/broadcast_reduce_op_value.cc
+++ b/src/operator/tensor/broadcast_reduce_op_value.cc
@@ -59,7 +59,7 @@ void L2NormComputeEx<cpu>(const nnvm::NodeAttrs& attrs,
   const NormParam& param = nnvm::get<NormParam>(attrs.parsed);
   mshadow::Stream<cpu>* s = ctx.get_stream<cpu>();
   const NDArrayStorageType istype = inputs[0].storage_type();
-  const TShape axis = param.axis.has_value() ? param.axis.value() : TShape();
+  const mxnet::TShape axis = param.axis.has_value() ? param.axis.value() : mxnet::TShape();
   if ((istype == kRowSparseStorage || istype == kCSRStorage) && axis.ndim() == 0 &&
        param.ord == 2) {
     // l2 norm on the entire array
@@ -238,7 +238,7 @@ Example::
 )code" ADD_FILELINE)
 .set_attr_parser(ParamParser<BroadcastAxesParam>)
 .add_arguments(BroadcastAxesParam::__FIELDS__())
-.set_attr<nnvm::FInferShape>("FInferShape", BroadcastAxesShape)
+.set_attr<mxnet::FInferShape>("FInferShape", BroadcastAxesShape)
 .set_attr<FCompute>("FCompute<cpu>", BroadcastCompute<cpu>);
 
 MXNET_OPERATOR_REGISTER_BROADCAST(broadcast_to)
@@ -262,7 +262,7 @@ So with `shape=(2,0)`, we will obtain the same result as in the above example.
 )code" ADD_FILELINE)
 .set_attr_parser(ParamParser<BroadcastToParam>)
 .add_arguments(BroadcastToParam::__FIELDS__())
-.set_attr<nnvm::FInferShape>("FInferShape", BroadcastToShape)
+.set_attr<mxnet::FInferShape>("FInferShape", BroadcastToShape)
 .set_attr<FCompute>("FCompute<cpu>", BroadcastCompute<cpu>);
 
 // backward op for broadcast.
@@ -315,7 +315,7 @@ For example::
 )code" ADD_FILELINE)
 .set_attr_parser(ParamParser<BroadcastLikeParam>)
 .add_arguments(BroadcastLikeParam::__FIELDS__())
-.set_attr<nnvm::FInferShape>("FInferShape", BroadcastLikeShape)
+.set_attr<mxnet::FInferShape>("FInferShape", BroadcastLikeShape)
 .set_attr<FCompute>("FCompute<cpu>", BroadcastCompute<cpu>);
 
 NNVM_REGISTER_OP(norm)
@@ -351,7 +351,7 @@ Examples::
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<NormParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", NormShape)
+.set_attr<mxnet::FInferShape>("FInferShape", NormShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FInferStorageType>("FInferStorageType", LpNormStorageType)
 .set_attr<nnvm::FGradient>("FGradient", ReduceGrad{ "_backward_norm" })
diff --git a/src/operator/tensor/broadcast_reduce_op_value.cu b/src/operator/tensor/broadcast_reduce_op_value.cu
index 881f52090a7a..2d91c5074496 100644
--- a/src/operator/tensor/broadcast_reduce_op_value.cu
+++ b/src/operator/tensor/broadcast_reduce_op_value.cu
@@ -39,7 +39,7 @@ void L2NormComputeEx<gpu>(const nnvm::NodeAttrs& attrs,
   const NormParam& param = nnvm::get<NormParam>(attrs.parsed);
   mshadow::Stream<gpu>* s = ctx.get_stream<gpu>();
   const NDArrayStorageType istype = inputs[0].storage_type();
-  const TShape axis = param.axis.has_value() ? param.axis.value() : TShape();
+  const mxnet::TShape axis = param.axis.has_value() ? param.axis.value() : mxnet::TShape();
   if ((istype == kRowSparseStorage || istype == kCSRStorage) && axis.ndim() == 0 &&
        param.ord == 2) {
     // l2 norm on the entire array
diff --git a/src/operator/tensor/cast_storage-inl.cuh b/src/operator/tensor/cast_storage-inl.cuh
index 39e522664f06..ee1531dbd94e 100644
--- a/src/operator/tensor/cast_storage-inl.cuh
+++ b/src/operator/tensor/cast_storage-inl.cuh
@@ -28,7 +28,6 @@
 #include <cub/cub.cuh>
 #include <mxnet/base.h>
 #include <mxnet/operator.h>
-#include <nnvm/tuple.h>
 #include "./util/tensor_util-inl.h"
 #include "../mxnet_op.h"
 #include "./util/tensor_util-inl.cuh"
diff --git a/src/operator/tensor/cast_storage-inl.h b/src/operator/tensor/cast_storage-inl.h
index cdb6246313b4..93606fcde86f 100644
--- a/src/operator/tensor/cast_storage-inl.h
+++ b/src/operator/tensor/cast_storage-inl.h
@@ -340,7 +340,7 @@ void CastStorageCsrCsrImpl(const OpContext& ctx, const NDArray& csr,
     FillZerosCsrImpl(s, *output);
     return;
   }
-  std::vector<TShape> aux_shapes({csr.aux_shape(csr::kIndPtr), csr.aux_shape(csr::kIdx)});
+  mxnet::ShapeVector aux_shapes({csr.aux_shape(csr::kIndPtr), csr.aux_shape(csr::kIdx)});
   output->CheckAndAlloc(aux_shapes);
   const TBlob& val = output->data();
   const TBlob& indptr = output->aux_data(csr::kIndPtr);
diff --git a/src/operator/tensor/cast_storage.cc b/src/operator/tensor/cast_storage.cc
index afea9b8a3ced..5d93979a5bb7 100644
--- a/src/operator/tensor/cast_storage.cc
+++ b/src/operator/tensor/cast_storage.cc
@@ -72,7 +72,7 @@ Example::
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<CastStorageParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FInferStorageType>("FInferStorageType", CastStorageInferStorageType)
 .set_attr<FResourceRequest>("FResourceRequest",
diff --git a/src/operator/tensor/control_flow_op.cc b/src/operator/tensor/control_flow_op.cc
index 164fd6a66ac7..5a05253478c8 100644
--- a/src/operator/tensor/control_flow_op.cc
+++ b/src/operator/tensor/control_flow_op.cc
@@ -61,7 +61,7 @@ Examples::
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"condition", "x", "y"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", WhereOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", WhereOpShape)
 .set_attr<nnvm::FInferType>("FInferType", WhereOpType)
 .set_attr<FInferStorageType>("FInferStorageType", WhereOpForwardStorageType)
 .set_attr<FCompute>("FCompute<cpu>", WhereOpForward<cpu>)
diff --git a/src/operator/tensor/control_flow_op.h b/src/operator/tensor/control_flow_op.h
index 9d0e8cf90817..96696b244bc3 100644
--- a/src/operator/tensor/control_flow_op.h
+++ b/src/operator/tensor/control_flow_op.h
@@ -170,13 +170,13 @@ struct where_batch_backward {
 };
 
 inline bool WhereOpShape(const nnvm::NodeAttrs& attrs,
-                         std::vector<TShape>* in_attrs,
-                         std::vector<TShape>* out_attrs) {
+                         mxnet::ShapeVector* in_attrs,
+                         mxnet::ShapeVector* out_attrs) {
   CHECK_EQ(in_attrs->size(), 3U)
     << "where operator takes 3 arguments (" << in_attrs->size() << " given)";
   CHECK_EQ(out_attrs->size(), 1U);
 
-  TShape tshape((*in_attrs)[1]);
+  mxnet::TShape tshape((*in_attrs)[1]);
   if (!shape_assign(&tshape, (*in_attrs)[2])) return false;
   if (!shape_assign(&tshape, (*out_attrs)[0])) return false;
   SHAPE_ASSIGN_CHECK(*in_attrs, 1, tshape);
diff --git a/src/operator/tensor/diag_op-inl.h b/src/operator/tensor/diag_op-inl.h
index 23123cfab752..1e3c1c9701d4 100644
--- a/src/operator/tensor/diag_op-inl.h
+++ b/src/operator/tensor/diag_op-inl.h
@@ -61,11 +61,11 @@ struct DiagParam : public dmlc::Parameter<DiagParam> {
   }
 };
 
-inline TShape DiagShapeImpl(const TShape& ishape, const int k,
+inline mxnet::TShape DiagShapeImpl(const mxnet::TShape& ishape, const int k,
                             const int32_t axis1, const int32_t axis2) {
   if (ishape.ndim() == 1) {
     auto s = ishape[0] + std::abs(k);
-    return TShape({s, s});
+    return mxnet::TShape({s, s});
   }
 
   int32_t x1 = CheckAxis(axis1, ishape.ndim());
@@ -92,7 +92,7 @@ inline TShape DiagShapeImpl(const TShape& ishape, const int k,
   }
 
   int32_t n_dim = static_cast<int32_t>(ishape.ndim()) - 1;
-  TShape oshape(n_dim);
+  mxnet::TShape oshape(n_dim);
 
   // remove axis1 and axis2 and append the new axis to the end
   uint32_t idx = 0;
@@ -108,19 +108,19 @@ inline TShape DiagShapeImpl(const TShape& ishape, const int k,
 }
 
 inline bool DiagOpShape(const nnvm::NodeAttrs& attrs,
-                             std::vector<TShape>* in_attrs,
-                             std::vector<TShape>* out_attrs) {
+                             mxnet::ShapeVector* in_attrs,
+                             mxnet::ShapeVector* out_attrs) {
     CHECK_EQ(in_attrs->size(), 1U);
     CHECK_EQ(out_attrs->size(), 1U);
 
-    const TShape& ishape = (*in_attrs)[0];
+    const mxnet::TShape& ishape = (*in_attrs)[0];
     if (ishape.ndim() == 0) {
       return false;
     }
 
     const DiagParam& param = nnvm::get<DiagParam>(attrs.parsed);
 
-    TShape oshape = DiagShapeImpl(ishape,
+    mxnet::TShape oshape = DiagShapeImpl(ishape,
                                   param.k,
                                   param.axis1,
                                   param.axis2);
@@ -186,8 +186,8 @@ struct diag_gen {
 template<typename xpu, bool back>
 void DiagOpProcess(const TBlob& in_data,
                    const TBlob& out_data,
-                   const TShape& ishape,
-                   const TShape& oshape,
+                   const mxnet::TShape& ishape,
+                   const mxnet::TShape& oshape,
                    index_t dsize,
                    const DiagParam& param,
                    mxnet_op::Stream<xpu> *s,
@@ -296,8 +296,8 @@ void DiagOpForward(const nnvm::NodeAttrs& attrs,
   Stream<xpu> *s = ctx.get_stream<xpu>();
   const TBlob& in_data = inputs[0];
   const TBlob& out_data = outputs[0];
-  const TShape& ishape = inputs[0].shape_;
-  const TShape& oshape = outputs[0].shape_;
+  const mxnet::TShape& ishape = inputs[0].shape_;
+  const mxnet::TShape& oshape = outputs[0].shape_;
   const DiagParam& param = nnvm::get<DiagParam>(attrs.parsed);
 
   DiagOpProcess<xpu, false>(in_data, out_data, ishape, oshape, out_data.Size(), param, s, req);
@@ -317,8 +317,8 @@ void DiagOpBackward(const nnvm::NodeAttrs& attrs,
 
   const TBlob& in_data = inputs[0];
   const TBlob& out_data = outputs[0];
-  const TShape& ishape = inputs[0].shape_;
-  const TShape& oshape = outputs[0].shape_;
+  const mxnet::TShape& ishape = inputs[0].shape_;
+  const mxnet::TShape& oshape = outputs[0].shape_;
   const DiagParam& param = nnvm::get<DiagParam>(attrs.parsed);
 
   DiagOpProcess<xpu, true>(in_data, out_data, oshape, ishape, in_data.Size(), param, s, req);
diff --git a/src/operator/tensor/diag_op.cc b/src/operator/tensor/diag_op.cc
index 9dcdb63d831a..4d14cbc29140 100644
--- a/src/operator/tensor/diag_op.cc
+++ b/src/operator/tensor/diag_op.cc
@@ -92,7 +92,7 @@ Examples::
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", DiagOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", DiagOpShape)
 .set_attr<nnvm::FInferType>("FInferType", DiagOpType)
 .set_attr<FCompute>("FCompute<cpu>", DiagOpForward<cpu>)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_diag"})
diff --git a/src/operator/tensor/dot-inl.h b/src/operator/tensor/dot-inl.h
index 69f87ae42f97..163b4426cb2b 100644
--- a/src/operator/tensor/dot-inl.h
+++ b/src/operator/tensor/dot-inl.h
@@ -1200,33 +1200,37 @@ inline void DotDnsCsrDnsImpl(const OpContext& ctx, const cpu& cpu_dev,
 }
 
 inline bool DotShape(const nnvm::NodeAttrs& attrs,
-                     std::vector<TShape> *in_attrs,
-                     std::vector<TShape> *out_attrs) {
+                     mxnet::ShapeVector *in_attrs,
+                     mxnet::ShapeVector *out_attrs) {
   const DotParam& param = nnvm::get<DotParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 2U);
   CHECK_EQ(out_attrs->size(), 1U);
-  TShape& lshape = (*in_attrs)[0];
-  TShape& rshape = (*in_attrs)[1];
+  mxnet::TShape& lshape = (*in_attrs)[0];
+  mxnet::TShape& rshape = (*in_attrs)[1];
   if (lshape.ndim() == 1 && rshape.ndim() == 1) {
     CHECK(!param.transpose_a && !param.transpose_b) << "Cannot transpose vectors";
     CHECK_EQ(lshape[0], rshape[0]) << "dot shape error: " << lshape << " X " << rshape;
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::Shape1(1));
   } else {
     bool Ta = param.transpose_a, Tb = param.transpose_b;
-    TShape L[2], R[2];
+    mxnet::TShape L[2], R[2];
     if (Ta) {
       L[0] = mshadow::Shape1(lshape[0]);
-      L[1] = lshape.ndim() > 1 ? TShape(&lshape[1], &lshape[lshape.ndim()]) : TShape(1);
+      L[1] = lshape.ndim() > 1 ?
+             mxnet::TShape(&lshape[1], &lshape[lshape.ndim()]) : mxnet::TShape(1);
     } else {
-      L[0] = lshape.ndim() > 1 ? TShape(&lshape[0], &lshape[lshape.ndim()-1]) : TShape(1);
+      L[0] = lshape.ndim() > 1 ?
+             mxnet::TShape(&lshape[0], &lshape[lshape.ndim()-1]) : mxnet::TShape(1);
       L[1] = mshadow::Shape1(lshape[lshape.ndim()-1]);
     }
     if (Tb) {
-      R[0] = rshape.ndim() > 1 ? TShape(&rshape[0], &rshape[rshape.ndim()-1]) : TShape(1);
+      R[0] = rshape.ndim() > 1 ?
+             mxnet::TShape(&rshape[0], &rshape[rshape.ndim()-1]) : mxnet::TShape(1);
       R[1] = mshadow::Shape1(rshape[rshape.ndim()-1]);
     } else {
       R[0] = mshadow::Shape1(rshape[0]);
-      R[1] = rshape.ndim() > 1 ? TShape(&rshape[1], &rshape[rshape.ndim()]) : TShape(1);
+      R[1] = rshape.ndim() > 1 ?
+             mxnet::TShape(&rshape[1], &rshape[rshape.ndim()]) : mxnet::TShape(1);
     }
 
     if (L[!Ta].Size() != 0 && R[Tb].Size() != 0) {
@@ -1236,7 +1240,7 @@ inline bool DotShape(const nnvm::NodeAttrs& attrs,
     std::vector<index_t> buf;
     if (lshape.ndim() > 1) buf.insert(buf.end(), &L[Ta][0], &L[Ta][L[Ta].ndim()]);
     if (rshape.ndim() > 1) buf.insert(buf.end(), &R[!Tb][0], &R[!Tb][R[!Tb].ndim()]);
-    TShape oshape(buf.begin(), buf.end());
+    mxnet::TShape oshape(buf.begin(), buf.end());
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape);
   }
   return true;
@@ -1468,13 +1472,13 @@ void BatchDotBackward_(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool BatchDotShape(const nnvm::NodeAttrs& attrs,
-                          std::vector<TShape> *in_attrs,
-                          std::vector<TShape> *out_attrs) {
+                          mxnet::ShapeVector *in_attrs,
+                          mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 2U);
   CHECK_EQ(out_attrs->size(), 1U);
   const DotParam& param = nnvm::get<DotParam>(attrs.parsed);
-  TShape& lshape = (*in_attrs)[0];
-  TShape& rshape = (*in_attrs)[1];
+  mxnet::TShape& lshape = (*in_attrs)[0];
+  mxnet::TShape& rshape = (*in_attrs)[1];
   if (lshape.ndim() == 3 && rshape.ndim() == 3) {
     CHECK(lshape[0] == rshape[0])
       << "batch_dot shape error(batch_size must be equal): " << lshape << " X " << rshape
diff --git a/src/operator/tensor/dot.cc b/src/operator/tensor/dot.cc
index d45551d383b8..7d7b6c06c846 100644
--- a/src/operator/tensor/dot.cc
+++ b/src/operator/tensor/dot.cc
@@ -82,7 +82,7 @@ above patterns, ``dot`` will fallback and generate output with default storage.
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"lhs", "rhs"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", DotShape)
+.set_attr<mxnet::FInferShape>("FInferShape", DotShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)
 .set_attr<FInferStorageType>("FInferStorageType", DotForwardInferStorageType)
 .set_attr<FResourceRequest>("FResourceRequest",
@@ -130,7 +130,7 @@ which is computed by::
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"lhs", "rhs"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", BatchDotShape)
+.set_attr<mxnet::FInferShape>("FInferShape", BatchDotShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)
 .set_attr<FResourceRequest>("FResourceRequest",
   [](const NodeAttrs& attrs) {
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op-inl.cuh b/src/operator/tensor/elemwise_binary_broadcast_op-inl.cuh
index d02004d75d35..8469f59e2e9c 100644
--- a/src/operator/tensor/elemwise_binary_broadcast_op-inl.cuh
+++ b/src/operator/tensor/elemwise_binary_broadcast_op-inl.cuh
@@ -40,7 +40,7 @@ BinaryBroadcastBackwardUseNone(const nnvm::NodeAttrs& attrs,
                                const std::vector<OpReqType>& req,
                                const std::vector<TBlob>& outputs) {
   using namespace broadcast;
-  TShape new_lshape, new_rshape, new_oshape;
+  mxnet::TShape new_lshape, new_rshape, new_oshape;
   int ndim = BinaryBroadcastShapeCompact(outputs[0].shape_, outputs[1].shape_, inputs[0].shape_,
                                          &new_lshape, &new_rshape, &new_oshape);
   if (!ndim) {
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op.h b/src/operator/tensor/elemwise_binary_broadcast_op.h
index 304422038b89..1d2b7c9c1163 100644
--- a/src/operator/tensor/elemwise_binary_broadcast_op.h
+++ b/src/operator/tensor/elemwise_binary_broadcast_op.h
@@ -40,12 +40,12 @@
 namespace mxnet {
 namespace op {
 inline bool BinaryBroadcastShape(const nnvm::NodeAttrs& attrs,
-                                 std::vector<TShape> *in_attrs,
-                                 std::vector<TShape> *out_attrs) {
+                                 mxnet::ShapeVector *in_attrs,
+                                 mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 2U);
   CHECK_EQ(out_attrs->size(), 1U);
-  TShape& lhs = (*in_attrs)[0];
-  TShape& rhs = (*in_attrs)[1];
+  mxnet::TShape& lhs = (*in_attrs)[0];
+  mxnet::TShape& rhs = (*in_attrs)[1];
 
   // avoid pre-mature shape inference.
   if (lhs.ndim() == 0 || rhs.ndim() == 0) return false;
@@ -54,7 +54,7 @@ inline bool BinaryBroadcastShape(const nnvm::NodeAttrs& attrs,
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, lhs);
     return true;
   }
-  TShape out(std::max(lhs.ndim(), rhs.ndim()));
+  mxnet::TShape out(std::max(lhs.ndim(), rhs.ndim()));
   index_t bl = out.ndim() - lhs.ndim();
   index_t br = out.ndim() - rhs.ndim();
   for (index_t i = 0; i < out.ndim(); ++i) {
@@ -142,14 +142,14 @@ inline bool BinaryBroadcastAddStorageType(const nnvm::NodeAttrs& attrs,
     LOG(FATAL) << "NDim too large ";  \
   }
 
-inline int BinaryBroadcastShapeCompact(const TShape& lshape, const TShape& rshape,
-                                       const TShape& oshape, TShape *new_lshape,
-                                       TShape *new_rshape, TShape *new_oshape) {
+inline int BinaryBroadcastShapeCompact(const mxnet::TShape& lshape, const mxnet::TShape& rshape,
+                                       const mxnet::TShape& oshape, mxnet::TShape *new_lshape,
+                                       mxnet::TShape *new_rshape, mxnet::TShape *new_oshape) {
   if (lshape == rshape) return 0;
   index_t odim = std::max<index_t>(oshape.ndim(), broadcast::MAX_DIM);
-  *new_lshape = TShape(odim);
-  *new_rshape = TShape(odim);
-  *new_oshape = TShape(odim);
+  *new_lshape = mxnet::TShape(odim);
+  *new_rshape = mxnet::TShape(odim);
+  *new_oshape = mxnet::TShape(odim);
   index_t bl = oshape.ndim() - lshape.ndim();
   index_t br = oshape.ndim() - rshape.ndim();
   index_t j = 0, lprod = 1, rprod = 1, oprod = 1;
@@ -293,7 +293,7 @@ void BinaryBroadcastCompute(const nnvm::NodeAttrs& attrs,
                             const std::vector<TBlob>& inputs,
                             const std::vector<OpReqType>& req,
                             const std::vector<TBlob>& outputs) {
-  TShape new_lshape, new_rshape, new_oshape;
+  mxnet::TShape new_lshape, new_rshape, new_oshape;
   int ndim = BinaryBroadcastShapeCompact(inputs[0].shape_, inputs[1].shape_, outputs[0].shape_,
                                          &new_lshape, &new_rshape, &new_oshape);
   if (!ndim) {
@@ -384,9 +384,9 @@ void BinaryBroadcastCsrDnsDnsImpl(const OpContext& ctx,
                                   const NDArray& dns,
                                   const OpReqType req,
                                   const NDArray& output,
-                                  const TShape& new_csrshape,
-                                  const TShape& new_dnsshape,
-                                  const TShape& new_oshape,
+                                  const mxnet::TShape& new_csrshape,
+                                  const mxnet::TShape& new_dnsshape,
+                                  const mxnet::TShape& new_oshape,
                                   const int ndim,
                                   const bool reverse) {
   using namespace mshadow;
@@ -501,7 +501,7 @@ void BinaryBroadcastComputeDenseEx(const nnvm::NodeAttrs& attrs,
   bool reverse = (lhs_stype == kDefaultStorage);
   const NDArray& dns = (reverse) ? lhs : rhs;
   const NDArray& csr = (reverse) ? rhs : lhs;
-  TShape new_csrshape, new_dnsshape, new_oshape;
+  mxnet::TShape new_csrshape, new_dnsshape, new_oshape;
   int ndim = BinaryBroadcastShapeCompact(csr.shape(), dns.shape(), out.shape(),
                                          &new_csrshape, &new_dnsshape, &new_oshape);
 
@@ -531,7 +531,7 @@ BinaryBroadcastBackwardUseNone(const nnvm::NodeAttrs& attrs,
                                     const std::vector<OpReqType>& req,
                                     const std::vector<TBlob>& outputs) {
   using namespace broadcast;
-  TShape new_lshape, new_rshape, new_oshape;
+  mxnet::TShape new_lshape, new_rshape, new_oshape;
   int ndim = BinaryBroadcastShapeCompact(outputs[0].shape_, outputs[1].shape_, inputs[0].shape_,
                                          &new_lshape, &new_rshape, &new_oshape);
   if (!ndim) {
@@ -568,9 +568,9 @@ inline void BinaryBroadcastBackwardUseInImpl(const OpContext& ctx,
                                              const std::vector<TBlob>& inputs,
                                              const std::vector<OpReqType>& req,
                                              const std::vector<TBlob>& outputs,
-                                             const TShape& new_lshape,
-                                             const TShape& new_rshape,
-                                             const TShape& new_oshape) {
+                                             const mxnet::TShape& new_lshape,
+                                             const mxnet::TShape& new_rshape,
+                                             const mxnet::TShape& new_oshape) {
   using namespace mshadow;
   using namespace mshadow::expr;
   using namespace broadcast;
@@ -599,7 +599,7 @@ void BinaryBroadcastBackwardUseIn(const nnvm::NodeAttrs& attrs,
                                   const std::vector<TBlob>& inputs,
                                   const std::vector<OpReqType>& req,
                                   const std::vector<TBlob>& outputs) {
-  TShape new_lshape, new_rshape, new_oshape;
+  mxnet::TShape new_lshape, new_rshape, new_oshape;
   const bool need_bc = BinaryBroadcastShapeCompact(outputs[0].shape_,
                                                    outputs[1].shape_, inputs[0].shape_,
                                                    &new_lshape, &new_rshape, &new_oshape) != 0;
@@ -623,7 +623,7 @@ void BinaryBroadcastBackwardUseIn(const nnvm::NodeAttrs& attrs,
     [](const NodeAttrs& attrs) {                                      \
       return std::vector<std::string>{"lhs", "rhs"};                  \
     })                                                                \
-  .set_attr<nnvm::FInferShape>("FInferShape", BinaryBroadcastShape)   \
+  .set_attr<mxnet::FInferShape>("FInferShape", BinaryBroadcastShape)   \
   .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)       \
   .set_attr<nnvm::FInplaceOption>("FInplaceOption",                   \
     [](const NodeAttrs& attrs){                                       \
diff --git a/src/operator/tensor/elemwise_binary_op-inl.h b/src/operator/tensor/elemwise_binary_op-inl.h
index 42f907f71be4..f47c5d3cc8fa 100644
--- a/src/operator/tensor/elemwise_binary_op-inl.h
+++ b/src/operator/tensor/elemwise_binary_op-inl.h
@@ -240,7 +240,7 @@ void ElemwiseBinaryOp::RspRspOp(mshadow::Stream<cpu> *s,
           CHECK_LE(iter_out, num_rows_r);
         }
         DCHECK_LE(iter_out, num_rows_l + num_rows_r);  // Make sure that we didn't overrun
-        nnvm::TShape new_shape = output.aux_shape(rowsparse::kIdx);
+        mxnet::TShape new_shape = output.aux_shape(rowsparse::kIdx);
         CHECK_LE(iter_out, new_shape.Size());
         if (!rhs_is_dense && !lhs_is_dense && !lhs_in_place && !rhs_in_place && !scatter) {
           // Reduce the first-dimension size by the number of common rows
diff --git a/src/operator/tensor/elemwise_binary_op.h b/src/operator/tensor/elemwise_binary_op.h
index 9b451fa69357..2fe3fd9919cf 100644
--- a/src/operator/tensor/elemwise_binary_op.h
+++ b/src/operator/tensor/elemwise_binary_op.h
@@ -726,7 +726,7 @@ class ElemwiseBinaryOp : public OpBase {
     [](const NodeAttrs& attrs) {                                    \
       return std::vector<std::string>{"lhs", "rhs"};                \
     })                                                              \
-  .set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<2, 1>)  \
+  .set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<2, 1>)  \
   .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)     \
   .set_attr<nnvm::FInplaceOption>("FInplaceOption",                 \
     [](const NodeAttrs& attrs){                                     \
diff --git a/src/operator/tensor/elemwise_binary_scalar_op.h b/src/operator/tensor/elemwise_binary_scalar_op.h
index 1a8adedbceed..c78841641214 100644
--- a/src/operator/tensor/elemwise_binary_scalar_op.h
+++ b/src/operator/tensor/elemwise_binary_scalar_op.h
@@ -324,7 +324,7 @@ class BinaryScalarOp : public UnaryOp {
   .set_attr_parser([](NodeAttrs* attrs) {                           \
       attrs->parsed = std::stod(attrs->dict["scalar"]);             \
     })                                                              \
-  .set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)  \
+  .set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)  \
   .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)     \
   .set_attr<nnvm::FInplaceOption>("FInplaceOption",                 \
     [](const NodeAttrs& attrs){                                     \
diff --git a/src/operator/tensor/elemwise_binary_scalar_op_basic.cc b/src/operator/tensor/elemwise_binary_scalar_op_basic.cc
index 1eb3da65eef3..ae356deff0a1 100644
--- a/src/operator/tensor/elemwise_binary_scalar_op_basic.cc
+++ b/src/operator/tensor/elemwise_binary_scalar_op_basic.cc
@@ -33,7 +33,7 @@
   .set_attr_parser([](NodeAttrs* attrs) {                           \
       attrs->parsed = std::stod(attrs->dict["scalar"]);             \
     })                                                              \
-  .set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)  \
+  .set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)  \
   .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)     \
   .set_attr<FInferStorageType>("FInferStorageType",                 \
     BinaryScalarStorageTypeWithDenseResultStorageType)              \
diff --git a/src/operator/tensor/elemwise_binary_scalar_op_extended.cc b/src/operator/tensor/elemwise_binary_scalar_op_extended.cc
index dbe3c4f6219f..f027665a549b 100644
--- a/src/operator/tensor/elemwise_binary_scalar_op_extended.cc
+++ b/src/operator/tensor/elemwise_binary_scalar_op_extended.cc
@@ -111,7 +111,7 @@ Example::
       attrs->parsed = 1.0;
     }
   })
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
                                 [](const NodeAttrs& attrs){
diff --git a/src/operator/tensor/elemwise_sum.cc b/src/operator/tensor/elemwise_sum.cc
index 85b58b6e0f3e..f1ec8b5ad387 100644
--- a/src/operator/tensor/elemwise_sum.cc
+++ b/src/operator/tensor/elemwise_sum.cc
@@ -60,11 +60,11 @@ std::vector<nnvm::NodeEntry> ElementWiseSumGrad(
 }
 
 bool ElementWiseSumShape(const nnvm::NodeAttrs& attrs,
-                         std::vector<TShape> *in_attrs,
-                         std::vector<TShape> *out_attrs) {
+                         mxnet::ShapeVector *in_attrs,
+                         mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(out_attrs->size(), 1);
-  return ElemwiseAttr<TShape, shape_is_none, shape_assign, true, shape_string>(
-    attrs, in_attrs, out_attrs, TShape());
+  return ElemwiseAttr<mxnet::TShape, shape_is_none, shape_assign, true, shape_string>(
+    attrs, in_attrs, out_attrs, mxnet::TShape());
 }
 
 bool ElementWiseSumType(const nnvm::NodeAttrs& attrs,
@@ -182,7 +182,7 @@ The storage type of ``add_n`` output depends on storage types of inputs
 #if MXNET_USE_MKLDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 #endif
-.set_attr<nnvm::FInferShape>("FInferShape", ElementWiseSumShape)
+.set_attr<mxnet::FInferShape>("FInferShape", ElementWiseSumShape)
 .set_attr<nnvm::FInferType>("FInferType", ElementWiseSumType)
 .set_attr<FInferStorageType>("FInferStorageType", ElementWiseSumForwardInferStorageType)
 .set_attr<nnvm::FGradient>("FGradient", ElementWiseSumGrad)
diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h
index 8d5ad055b118..3085f6d2256a 100644
--- a/src/operator/tensor/elemwise_unary_op.h
+++ b/src/operator/tensor/elemwise_unary_op.h
@@ -73,7 +73,7 @@ class OpBase {
                                const NDArray* clone_from = nullptr) {
     if (req != kNullOp) {
       if (clone_from) {
-        const TShape& ishape = clone_from->storage_shape();
+        const mxnet::TShape& ishape = clone_from->storage_shape();
         dest->CheckAndAllocData(ishape);
         CHECK_EQ(dest->storage_type(), clone_from->storage_type());
         for (size_t i = 0, n = clone_from->aux_shapes().size(); i < n; ++i) {
@@ -144,7 +144,7 @@ class OpBase {
                                                           const TBlob& blob) {
     const size_t dim = blob.shape_.ndim();
     if (dim) {
-      TShape shape({blob.shape_[0], 1});
+      mxnet::TShape shape({blob.shape_[0], 1});
       for (size_t i = 1; i < dim; ++i) {
         shape[1] *= blob.shape_[i];
       }
@@ -181,7 +181,7 @@ class UnaryOp : public OpBase {
     CHECK_EQ(outputs.size(), static_cast<size_t>(n_out))
       << " in operator " << attrs.name;
     static_assert(n_in > 0 && n_out > 0, "Invalid input and/or output count values");
-    const TShape& isshape = inputs[0].storage_shape();
+    const mxnet::TShape& isshape = inputs[0].storage_shape();
     if (!shape_is_none(isshape)) {
       NDArray *output = nullptr;
       for (size_t i = 0, n = inputs.size(); i < n; ++i) {
@@ -192,7 +192,7 @@ class UnaryOp : public OpBase {
         CHECK_EQ(output->shape(), inputs[i].shape());
         CHECK_EQ(output->storage_type(), input.storage_type());
         CHECK_EQ(output->aux_shapes().size(), input.aux_shapes().size());
-        std::vector<TShape> aux_shapes;
+        mxnet::ShapeVector aux_shapes;
         const size_t aux_shape_count = input.aux_shapes().size();
         aux_shapes.reserve(aux_shape_count);
         for (size_t j = 0; j < aux_shape_count; ++j) {
@@ -554,7 +554,7 @@ struct ReshapeLikeParam : public dmlc::Parameter<ReshapeLikeParam> {
   NNVM_REGISTER_OP(__name$)                                         \
   .set_num_inputs(1)                                                \
   .set_num_outputs(1)                                               \
-  .set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)  \
+  .set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)  \
   .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)     \
   .set_attr<nnvm::FInplaceOption>("FInplaceOption",                 \
     [](const NodeAttrs& attrs){                                     \
diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc
index d0079b545dd8..4aaf4dfd33c4 100644
--- a/src/operator/tensor/elemwise_unary_op_basic.cc
+++ b/src/operator/tensor/elemwise_unary_op_basic.cc
@@ -349,7 +349,7 @@ NNVM_REGISTER_OP(_identity_with_attr_like_rhs)
     [](const NodeAttrs& attrs) { return std::vector<uint32_t>(1, 1); })
 .set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>)
 .set_attr<FComputeEx>("FComputeEx<cpu>", UnaryOp::IdentityComputeFirstItemEx<cpu>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<2, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<2, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)
 .set_attr<FInferStorageType>("FInferStorageType", IdentityAttrLikeRhsStorageType)
 .set_attr<nnvm::FGradient>(
@@ -392,8 +392,8 @@ void ReshapeLikeRangeCanonicalize(int ndims, const char *side,
   CHECK(*cbegin >= 0) << "Invalid begin for " << side << "_begin=" << begin;
 }
 
-void GetReshapeLikeParams(const ReshapeLikeParam &param, const TShape &lshape,
-                          const TShape &rshape, int *lhs_begin, int *lhs_end,
+void GetReshapeLikeParams(const ReshapeLikeParam &param, const mxnet::TShape &lshape,
+                          const mxnet::TShape &rshape, int *lhs_begin, int *lhs_end,
                           int *rhs_begin, int *rhs_end) {
   // LHS params
   ReshapeLikeRangeCanonicalize(lshape.ndim(), "lhs", param.lhs_begin,
@@ -404,18 +404,18 @@ void GetReshapeLikeParams(const ReshapeLikeParam &param, const TShape &lshape,
 }
 
 bool ReshapeLikeShapeCompute(const nnvm::NodeAttrs &attrs,
-                             std::vector<TShape> *in_attrs,
-                             std::vector<TShape> *out_attrs) {
+                             mxnet::ShapeVector *in_attrs,
+                             mxnet::ShapeVector *out_attrs) {
   const ReshapeLikeParam &param = nnvm::get<ReshapeLikeParam>(attrs.parsed);
-  const TShape &lshape = (*in_attrs)[0];
-  const TShape &rshape = (*in_attrs)[1];
+  const mxnet::TShape &lshape = (*in_attrs)[0];
+  const mxnet::TShape &rshape = (*in_attrs)[1];
   int lhs_begin, lhs_end, rhs_begin, rhs_end;
   GetReshapeLikeParams(param, lshape, rshape, &lhs_begin, &lhs_end, &rhs_begin,
                        &rhs_end);
 
   int lhsrank = static_cast<int>(lshape.ndim());
   int orank = lhsrank + (rhs_end - rhs_begin) - (lhs_end - lhs_begin);
-  TShape oshape(orank);
+  mxnet::TShape oshape(orank);
 
   for (int i = 0; i < lhs_begin; ++i)
     oshape[i] = lshape[i];
@@ -480,7 +480,7 @@ Negative indices are supported, and `None` can be used for either `lhs_end` or `
 .set_attr<nnvm::FIgnoreInputs>("FIgnoreInputs",
     [](const NodeAttrs& attrs) { return std::vector<uint32_t>(1, 1); })
 .set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>)
-.set_attr<nnvm::FInferShape>("FInferShape", ReshapeLikeShapeCompute)
+.set_attr<mxnet::FInferShape>("FInferShape", ReshapeLikeShapeCompute)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)
 .set_attr<nnvm::FGradient>(
     "FGradient",  [](const nnvm::NodePtr& n,
@@ -522,13 +522,13 @@ Example::
 .set_num_outputs(1)
 .set_attr<FCompute>("FCompute<cpu>", ShapeComputeCPU)
 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes)
-.set_attr<nnvm::FInferShape>("FInferShape",
+.set_attr<mxnet::FInferShape>("FInferShape",
   [](const nnvm::NodeAttrs& attrs,
-     std::vector<TShape> *in_attrs,
-     std::vector<TShape> *out_attrs) {
+     mxnet::ShapeVector *in_attrs,
+     mxnet::ShapeVector *out_attrs) {
     CHECK_EQ(in_attrs->size(), 1U);
     CHECK_EQ(out_attrs->size(), 1U);
-    TShape target_shape(1);
+    mxnet::TShape target_shape(1);
     target_shape[0] = in_attrs->at(0).ndim();
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, target_shape);
     return !shape_is_none(out_attrs->at(0));
@@ -574,10 +574,10 @@ Example::
 .set_num_outputs(1)
 .set_attr<FCompute>("FCompute<cpu>", SizeComputeCPU)
 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes)
-.set_attr<nnvm::FInferShape>("FInferShape",
+.set_attr<mxnet::FInferShape>("FInferShape",
   [](const nnvm::NodeAttrs& attrs,
-     std::vector<TShape> *in_attrs,
-     std::vector<TShape> *out_attrs) {
+     mxnet::ShapeVector *in_attrs,
+     mxnet::ShapeVector *out_attrs) {
     CHECK_EQ(in_attrs->size(), 1U);
     CHECK_EQ(out_attrs->size(), 1U);
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, 1U);
@@ -609,7 +609,7 @@ Example::
 
 )code" ADD_FILELINE)
 .set_attr_parser(ParamParser<CastParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInferType>("FInferType", CastType)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
   [](const NodeAttrs& attrs){
diff --git a/src/operator/tensor/histogram-inl.h b/src/operator/tensor/histogram-inl.h
index 40acb55d1d41..51d0bdb6c2b6 100644
--- a/src/operator/tensor/histogram-inl.h
+++ b/src/operator/tensor/histogram-inl.h
@@ -73,8 +73,8 @@ struct FillBinBoundsKernel {
 };
 
 inline bool HistogramOpShape(const nnvm::NodeAttrs& attrs,
-                             std::vector<TShape>* in_attrs,
-                             std::vector<TShape>* out_attrs) {
+                             mxnet::ShapeVector* in_attrs,
+                             mxnet::ShapeVector* out_attrs) {
   HistogramParam param = nnvm::get<HistogramParam>(attrs.parsed);
   const bool has_cnt = param.bin_cnt.has_value();
   const bool has_range = param.range.has_value();
@@ -87,17 +87,17 @@ inline bool HistogramOpShape(const nnvm::NodeAttrs& attrs,
     // if cnt is specified, the output histogram has shape (cnt,)
     // while output bins has shape (cnt+1,)
     const int bin_cnt = param.bin_cnt.value();
-    SHAPE_ASSIGN_CHECK(*out_attrs, 0, TShape({bin_cnt}));
-    SHAPE_ASSIGN_CHECK(*out_attrs, 1, TShape({bin_cnt + 1}));
+    SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape({bin_cnt}));
+    SHAPE_ASSIGN_CHECK(*out_attrs, 1, mxnet::TShape({bin_cnt + 1}));
   } else {
     // if cnt is not specified, the output histogram has shape (bins.Size() - 1)
     // while output bins has same shape as input bins
-    TShape oshape = (*in_attrs)[1];
+    mxnet::TShape oshape = (*in_attrs)[1];
 
     CHECK_EQ(oshape.ndim(), 1U) << "bins argument should be an 1D vector";
     CHECK_GE(oshape.Size(), 2U) << "number of bounds should be >= 2";
 
-    SHAPE_ASSIGN_CHECK(*out_attrs, 0, TShape({(oshape[0] - 1)}));
+    SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape({(oshape[0] - 1)}));
     SHAPE_ASSIGN_CHECK(*out_attrs, 1, in_attrs->at(1));
   }
 
diff --git a/src/operator/tensor/histogram.cc b/src/operator/tensor/histogram.cc
index 3c4eaa158829..754475bff9ad 100644
--- a/src/operator/tensor/histogram.cc
+++ b/src/operator/tensor/histogram.cc
@@ -151,7 +151,7 @@ Example::
   [](const NodeAttrs& attrs) {
     return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
-.set_attr<nnvm::FInferShape>("FInferShape", HistogramOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", HistogramOpShape)
 .set_attr<nnvm::FInferType>("FInferType", HistogramOpType)
 .set_attr<FCompute>("FCompute<cpu>", HistogramOpForward<cpu>)
 .add_argument("data", "NDArray-or-Symbol", "Input ndarray")
diff --git a/src/operator/tensor/indexing_op.cc b/src/operator/tensor/indexing_op.cc
index 564171d2c3fd..a0254ead4572 100644
--- a/src/operator/tensor/indexing_op.cc
+++ b/src/operator/tensor/indexing_op.cc
@@ -78,8 +78,8 @@ void EmbeddingOpForwardDnsImpl<cpu>(mshadow::Stream<cpu>* s,
                                     const OpReqType req,
                                     const TBlob& output) {
   using namespace mxnet_op;
-  const TShape& ishape = data.shape_;
-  const TShape& oshape = output.shape_;
+  const mxnet::TShape& ishape = data.shape_;
+  const mxnet::TShape& oshape = output.shape_;
 
   MSHADOW_TYPE_SWITCH(output.type_flag_, DType, {
     MSHADOW_TYPE_SWITCH(data.type_flag_, IType, {
@@ -284,9 +284,9 @@ void TakeOpForward<cpu>(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(inputs.size(), 2U);
   CHECK_EQ(outputs.size(), 1U);
 
-  const TShape& idxshape = inputs[take_::kIdx].shape_;
-  const TShape& arrshape = inputs[take_::kArr].shape_;
-  const TShape& oshape = outputs[take_::kOut].shape_;
+  const mxnet::TShape& idxshape = inputs[take_::kIdx].shape_;
+  const mxnet::TShape& arrshape = inputs[take_::kArr].shape_;
+  const mxnet::TShape& oshape = outputs[take_::kOut].shape_;
 
   Stream<cpu> *s = ctx.get_stream<cpu>();
   const int actual_axis = param.axis + ((param.axis < 0) ? arrshape.ndim() : 0);
@@ -524,7 +524,7 @@ The storage type of weight can be either row_sparse or default.
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data", "weight"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", EmbeddingOpShape<EmbeddingParam>)
+.set_attr<mxnet::FInferShape>("FInferShape", EmbeddingOpShape<EmbeddingParam>)
 .set_attr<nnvm::FInferType>("FInferType", EmbeddingOpType<EmbeddingParam>)
 .set_attr<FInferStorageType>("FInferStorageType", EmbeddingOpForwardStorageType)
 .set_attr<FResourceRequest>("FResourceRequest",
@@ -604,7 +604,7 @@ Examples::
   [](const NodeAttrs& attrs) {
     return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", EmbeddingOpShape<SparseEmbeddingParam>)
+.set_attr<mxnet::FInferShape>("FInferShape", EmbeddingOpShape<SparseEmbeddingParam>)
 .set_attr<nnvm::FInferType>("FInferType", EmbeddingOpType<SparseEmbeddingParam>)
 .set_attr<FInferStorageType>("FInferStorageType", SparseEmbeddingOpForwardStorageType)
 .set_attr<FComputeEx>("FComputeEx<cpu>", SparseEmbeddingOpForwardEx<cpu>)
@@ -700,7 +700,7 @@ The storage type of ``take`` output depends upon the input storage type:
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"a", "indices"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", TakeOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", TakeOpShape)
 .set_attr<nnvm::FInferType>("FInferType", TakeOpType)
 .set_attr<FInferStorageType>("FInferStorageType", TakeOpForwardStorageType)
 .set_attr<FResourceRequest>("FResourceRequest",
@@ -757,7 +757,7 @@ Examples::
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"a", "indices"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", BatchTakeOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", BatchTakeOpShape)
 .set_attr<nnvm::FInferType>("FInferType", BatchTakeOpType)
 .set_attr<FCompute>("FCompute<cpu>", BatchTakeOpForward<cpu>)
 .add_argument("a", "NDArray-or-Symbol", "The input array")
@@ -804,7 +804,7 @@ Examples::
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"indices"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", OneHotOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", OneHotOpShape)
 .set_attr<nnvm::FInferType>("FInferType", OneHotOpType)
 .set_attr<FCompute>("FCompute<cpu>", OneHotOpForward<cpu>)
 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes)
@@ -844,7 +844,7 @@ Examples::
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data", "indices"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", GatherNDShape)
+.set_attr<mxnet::FInferShape>("FInferShape", GatherNDShape)
 .set_attr<nnvm::FInferType>("FInferType", GatherNDType)
 .set_attr<FCompute>("FCompute<cpu>", GatherNDForward<cpu>)
 .set_attr<nnvm::FGradient>("FGradient",
@@ -919,7 +919,7 @@ Examples::
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data", "indices"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", ScatterNDShape)
+.set_attr<mxnet::FInferShape>("FInferShape", ScatterNDShape)
 .set_attr<nnvm::FInferType>("FInferType", ScatterNDType)
 .set_attr<FCompute>("FCompute<cpu>", ScatterNDForward<cpu>)
 .set_attr<nnvm::FGradient>("FGradient",
@@ -982,7 +982,7 @@ Examples::
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data", "indices"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", ScatterNDShape)
+.set_attr<mxnet::FInferShape>("FInferShape", ScatterNDShape)
 .set_attr<nnvm::FInferType>("FInferType", ScatterNDType)
 .set_attr<FCompute>("FCompute<cpu>", GatherNDBackward<cpu>)
 .set_attr<nnvm::FGradient>("FGradient",
@@ -1029,15 +1029,15 @@ Examples::
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"lhs", "rhs", "indices"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape",
+.set_attr<mxnet::FInferShape>("FInferShape",
   [](const nnvm::NodeAttrs& attrs,
-     std::vector<TShape> *in_attrs,
-     std::vector<TShape> *out_attrs) {
+     mxnet::ShapeVector *in_attrs,
+     mxnet::ShapeVector *out_attrs) {
     CHECK_EQ(in_attrs->size(), 3U);
     CHECK_EQ(out_attrs->size(), 1U);
     SHAPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0));
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
-    std::vector<TShape> tmp_in_attrs = {in_attrs->at(1), in_attrs->at(2)};
+    mxnet::ShapeVector tmp_in_attrs = {in_attrs->at(1), in_attrs->at(2)};
     if (!ScatterNDShape(attrs, &tmp_in_attrs, out_attrs)) {
       return false;
     }
diff --git a/src/operator/tensor/indexing_op.cu b/src/operator/tensor/indexing_op.cu
index bad3e5a1a6c5..94fe377ebbc7 100644
--- a/src/operator/tensor/indexing_op.cu
+++ b/src/operator/tensor/indexing_op.cu
@@ -170,8 +170,8 @@ void EmbeddingOpForwardDnsImpl<gpu>(mshadow::Stream<gpu>* s,
                                     const OpReqType req,
                                     const TBlob& output) {
   using namespace mxnet_op;
-  const TShape& ishape = data.shape_;
-  const TShape& oshape = output.shape_;
+  const mxnet::TShape& ishape = data.shape_;
+  const mxnet::TShape& oshape = output.shape_;
 
   MSHADOW_TYPE_SWITCH(output.type_flag_, DType, {
     MSHADOW_TYPE_SWITCH(data.type_flag_, IType, {
@@ -475,9 +475,9 @@ void TakeOpForward<gpu>(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(inputs.size(), 2U);
   CHECK_EQ(outputs.size(), 1U);
 
-  const TShape& idxshape = inputs[take_::kIdx].shape_;
-  const TShape& arrshape = inputs[take_::kArr].shape_;
-  const TShape& oshape = outputs[take_::kOut].shape_;
+  const mxnet::TShape& idxshape = inputs[take_::kIdx].shape_;
+  const mxnet::TShape& arrshape = inputs[take_::kArr].shape_;
+  const mxnet::TShape& oshape = outputs[take_::kOut].shape_;
 
   Stream<gpu> *s = ctx.get_stream<gpu>();
   const int actual_axis = param.axis + ((param.axis < 0) ? arrshape.ndim() : 0);
diff --git a/src/operator/tensor/indexing_op.h b/src/operator/tensor/indexing_op.h
index fba331e25705..8979531fef4e 100644
--- a/src/operator/tensor/indexing_op.h
+++ b/src/operator/tensor/indexing_op.h
@@ -141,17 +141,17 @@ inline void AddTakeGradLargeBatch(mshadow::Tensor<cpu, 2, DType> dst,
 }
 template<typename ParamType>
 inline bool EmbeddingOpShape(const nnvm::NodeAttrs& attrs,
-                             std::vector<TShape> *in_attrs,
-                             std::vector<TShape> *out_attrs) {
+                             mxnet::ShapeVector *in_attrs,
+                             mxnet::ShapeVector *out_attrs) {
   using namespace mshadow;
-  const TShape &dshape = (*in_attrs)[embedding::kData];
+  const mxnet::TShape &dshape = (*in_attrs)[embedding::kData];
   if (dshape.ndim() ==  0) return false;
   const ParamType& param = nnvm::get<ParamType>(attrs.parsed);
   SHAPE_ASSIGN_CHECK(*in_attrs, embedding::kWeight, Shape2(param.input_dim,
                                                            param.output_dim));
   out_attrs->clear();
 
-  TShape oshape(dshape.ndim()+1);
+  mxnet::TShape oshape(dshape.ndim()+1);
   for (size_t i = 0; i < dshape.ndim(); ++i) {
     oshape[i] = dshape[i];
   }
@@ -521,8 +521,8 @@ void EmbeddingOpBackward(const nnvm::NodeAttrs& attrs,
           << "Embedding layer doesn't support calculate data gradient";
   CHECK_EQ(outputs[1].type_flag_, inputs[0].type_flag_);
 
-  const TShape& ishape = inputs[1].shape_;
-  const TShape& oshape = inputs[0].shape_;
+  const mxnet::TShape& ishape = inputs[1].shape_;
+  const mxnet::TShape& oshape = inputs[0].shape_;
 
   Stream<xpu> *s = ctx.get_stream<xpu>();
   MSHADOW_TYPE_SWITCH(outputs[1].type_flag_, DType, {
@@ -677,11 +677,11 @@ struct TakeParam: public dmlc::Parameter<TakeParam> {
 };
 
 inline bool TakeOpShape(const nnvm::NodeAttrs& attrs,
-                        std::vector<TShape> *in_attrs,
-                        std::vector<TShape> *out_attrs) {
+                        mxnet::ShapeVector *in_attrs,
+                        mxnet::ShapeVector *out_attrs) {
   using namespace mshadow;
-  const TShape &arrshape = (*in_attrs)[take_::kArr];
-  const TShape &idxshape = (*in_attrs)[take_::kIdx];
+  const mxnet::TShape &arrshape = (*in_attrs)[take_::kArr];
+  const mxnet::TShape &idxshape = (*in_attrs)[take_::kIdx];
   if (idxshape.ndim() == 0U || idxshape.Size() == 0U) return false;
   const TakeParam& param = nnvm::get<TakeParam>(attrs.parsed);
   if (param.mode == take_::kRaise) {
@@ -693,7 +693,7 @@ inline bool TakeOpShape(const nnvm::NodeAttrs& attrs,
   out_attrs->clear();
 
   const index_t actual_axis = param.axis + ((param.axis < 0) ? arrshape.ndim() : 0);
-  TShape oshape(idxshape.ndim() + arrshape.ndim() - 1);
+  mxnet::TShape oshape(idxshape.ndim() + arrshape.ndim() - 1);
   for (index_t i = 0; i < idxshape.ndim(); ++i) {
     oshape[i + actual_axis] = idxshape[i];
   }
@@ -838,9 +838,9 @@ void TakeOpBackwardImpl(mshadow::Stream<cpu>* s,
   using namespace mxnet_op;
   using namespace mshadow;
   CHECK(axis != 0) << "axis == 0 case should be dispatched to the legacy implementation";
-  const TShape& arrshape = arr.shape_;
-  const TShape& idxshape = idx.shape_;
-  const TShape& oshape = ograd.shape_;
+  const mxnet::TShape& arrshape = arr.shape_;
+  const mxnet::TShape& idxshape = idx.shape_;
+  const mxnet::TShape& oshape = ograd.shape_;
   MSHADOW_TYPE_SWITCH(idx.type_flag_, IType, {
     // get size of temporary storage for sort
     int* src_indptr_ptr = nullptr;
@@ -910,9 +910,9 @@ void TakeOpBackwardImpl(mshadow::Stream<gpu>* s,
   using namespace mxnet_op;
   using namespace mshadow;
   CHECK(axis != 0) << "axis == 0 case should be dispatched to the legacy implementation";
-  const TShape& arrshape = arr.shape_;
-  const TShape& idxshape = idx.shape_;
-  const TShape& oshape = ograd.shape_;
+  const mxnet::TShape& arrshape = arr.shape_;
+  const mxnet::TShape& idxshape = idx.shape_;
+  const mxnet::TShape& oshape = ograd.shape_;
   MSHADOW_TYPE_SWITCH(idx.type_flag_, IType, {
     // get size of temporary storage for sort
     char* temp_storage_ptr = nullptr;
@@ -1019,9 +1019,9 @@ void TakeOpBackward(const nnvm::NodeAttrs& attrs,
       // inputs are specified in the .cc file, which are the gradients from
       // the upper layer and the input index
       // outputs are the gradients of inputs in the feed-forward pass
-      const TShape& idxshape = inputs[1].shape_;
-      const TShape& arrshape = outputs[0].shape_;
-      const TShape& oshape = inputs[0].shape_;
+      const mxnet::TShape& idxshape = inputs[1].shape_;
+      const mxnet::TShape& arrshape = outputs[0].shape_;
+      const mxnet::TShape& oshape = inputs[0].shape_;
 
       if (req[take_::kIdx] != kNullOp) {
         mxnet_op::Kernel<mxnet_op::set_zero, xpu>::Launch(
@@ -1065,8 +1065,8 @@ void TakeOpBackward(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool BatchTakeOpShape(const nnvm::NodeAttrs& attrs,
-                             std::vector<TShape> *in_attrs,
-                             std::vector<TShape> *out_attrs) {
+                             mxnet::ShapeVector *in_attrs,
+                             mxnet::ShapeVector *out_attrs) {
   LOG(INFO) << "batch_take is deprecated. Please use pick instead.";
   CHECK_EQ(in_attrs->size(), 2U) << "BatchTake op requires two inputs";
   if ((*in_attrs)[1].ndim() != 0) {
@@ -1163,13 +1163,13 @@ inline void GetOneHotParams(const OneHotParam& param, int* depth, double* on_val
 }
 
 inline bool OneHotOpShape(const nnvm::NodeAttrs& attrs,
-                          std::vector<TShape> *in_attrs,
-                          std::vector<TShape> *out_attrs) {
+                          mxnet::ShapeVector *in_attrs,
+                          mxnet::ShapeVector *out_attrs) {
   const OneHotParam& param = nnvm::get<OneHotParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
   // The shape of indices
-  const TShape& ishape = (*in_attrs)[0];
+  const mxnet::TShape& ishape = (*in_attrs)[0];
 
   int depth = 0;
   double on_value = 1.0;
@@ -1177,7 +1177,7 @@ inline bool OneHotOpShape(const nnvm::NodeAttrs& attrs,
   int dtype = mshadow::kFloat32;
   GetOneHotParams(param, &depth, &on_value, &off_value, &dtype);
 
-  TShape oshape(ishape.ndim() + 1);
+  mxnet::TShape oshape(ishape.ndim() + 1);
   for (index_t i = 0; i < ishape.ndim(); ++i) {
     oshape[i] = ishape[i];
   }
@@ -1251,13 +1251,13 @@ void OneHotOpForward(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool GatherNDShape(const nnvm::NodeAttrs& attrs,
-                          std::vector<TShape> *in_attrs,
-                          std::vector<TShape> *out_attrs) {
+                          mxnet::ShapeVector *in_attrs,
+                          mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 2U);
   CHECK_EQ(out_attrs->size(), 1U);
   // The shape of indices
-  const TShape& dshape = (*in_attrs)[0];
-  const TShape& ishape = (*in_attrs)[1];
+  const mxnet::TShape& dshape = (*in_attrs)[0];
+  const mxnet::TShape& ishape = (*in_attrs)[1];
 
   if (shape_is_none(dshape) || shape_is_none(ishape)) return false;
 
@@ -1270,7 +1270,7 @@ inline bool GatherNDShape(const nnvm::NodeAttrs& attrs,
   CHECK_LE(ishape[0], 10)
     << "gather_nd supports indexing along at most 10 dimensions.";
 
-  TShape oshape(ishape.ndim() - 1 + dshape.ndim() - ishape[0]);
+  mxnet::TShape oshape(ishape.ndim() - 1 + dshape.ndim() - ishape[0]);
 
   for (size_t i = 0; i < ishape.ndim() - 1; ++i) oshape[i] = ishape[i+1];
   for (int i = 0; i < dshape.ndim() - ishape[0]; ++i) {
@@ -1319,8 +1319,8 @@ void GatherNDForward(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(outputs.size(), 1U);
   if (req[0] == kNullOp) return;
   mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
-  const TShape& dshape = inputs[0].shape_;
-  const TShape& ishape = inputs[1].shape_;
+  const mxnet::TShape& dshape = inputs[0].shape_;
+  const mxnet::TShape& ishape = inputs[1].shape_;
   int M = ishape[0];
   int N = ishape.Size() / M;
   int K = dshape.ProdShape(M, dshape.ndim());
@@ -1337,7 +1337,7 @@ void GatherNDForward(const nnvm::NodeAttrs& attrs,
 
 
 struct ScatterNDParam : public dmlc::Parameter<ScatterNDParam> {
-  TShape shape;
+  mxnet::TShape shape;
   DMLC_DECLARE_PARAMETER(ScatterNDParam) {
     DMLC_DECLARE_FIELD(shape)
       .describe("Shape of output.");
@@ -1345,17 +1345,17 @@ struct ScatterNDParam : public dmlc::Parameter<ScatterNDParam> {
 };
 
 inline bool ScatterNDShape(const nnvm::NodeAttrs& attrs,
-                           std::vector<TShape> *in_attrs,
-                           std::vector<TShape> *out_attrs) {
+                           mxnet::ShapeVector *in_attrs,
+                           mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 2U);
   CHECK_EQ(out_attrs->size(), 1U);
   const auto& params = dmlc::get<ScatterNDParam>(attrs.parsed);
 
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, params.shape);
 
-  const TShape& dshape = (*in_attrs)[0];
-  const TShape& ishape = (*in_attrs)[1];
-  const TShape& oshape = (*out_attrs)[0];
+  const mxnet::TShape& dshape = (*in_attrs)[0];
+  const mxnet::TShape& ishape = (*in_attrs)[1];
+  const mxnet::TShape& oshape = (*out_attrs)[0];
 
   if (shape_is_none(dshape) || shape_is_none(ishape) || shape_is_none(oshape)) return false;
 
@@ -1422,8 +1422,8 @@ void ScatterNDForward(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(outputs.size(), 1U);
   if (req[0] == kNullOp) return;
   mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
-  const TShape& oshape = outputs[0].shape_;
-  const TShape& ishape = inputs[1].shape_;
+  const mxnet::TShape& oshape = outputs[0].shape_;
+  const mxnet::TShape& ishape = inputs[1].shape_;
   dim_t M = ishape[0];
   dim_t N = ishape.Size() / M;
   dim_t K = oshape.ProdShape(M, oshape.ndim());
@@ -1479,8 +1479,8 @@ void GatherNDBackward(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(outputs.size(), 1U);
   if (req[0] == kNullOp) return;
   mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
-  const TShape& oshape = outputs[0].shape_;
-  const TShape& ishape = inputs[1].shape_;
+  const mxnet::TShape& oshape = outputs[0].shape_;
+  const mxnet::TShape& ishape = inputs[1].shape_;
   dim_t M = ishape[0];
   dim_t N = ishape.Size() / M;
   dim_t K = oshape.ProdShape(M, oshape.ndim());
diff --git a/src/operator/tensor/init_op.cc b/src/operator/tensor/init_op.cc
index 8554ba854178..341748b50abe 100644
--- a/src/operator/tensor/init_op.cc
+++ b/src/operator/tensor/init_op.cc
@@ -39,7 +39,7 @@ NNVM_REGISTER_OP(_zeros_without_dtype)
 .set_num_inputs(0)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<InitOpWithoutDTypeParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", InitShape<InitOpWithoutDTypeParam>)
+.set_attr<mxnet::FInferShape>("FInferShape", InitShape<InitOpWithoutDTypeParam>)
 .set_attr<nnvm::FInferType>("FInferType", InitType<InitOpWithoutDTypeParam>)
 .set_attr<FInferStorageType>("FInferStorageType",
   InitStorageType<InitOpWithoutDTypeParam, true, true>)
@@ -52,7 +52,7 @@ NNVM_REGISTER_OP(_zeros)
 .set_num_inputs(0)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<InitOpParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", InitShape<InitOpParam>)
+.set_attr<mxnet::FInferShape>("FInferShape", InitShape<InitOpParam>)
 .set_attr<nnvm::FInferType>("FInferType", InitType<InitOpParam>)
 .set_attr<FInferStorageType>("FInferStorageType", InitStorageType<InitOpParam, true, true>)
 .set_attr<FCompute>("FCompute<cpu>", FillCompute<cpu, 0>)
@@ -64,7 +64,7 @@ NNVM_REGISTER_OP(_eye)
 .set_num_inputs(0)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<EyeParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", InitEyeShape<EyeParam>)
+.set_attr<mxnet::FInferShape>("FInferShape", InitEyeShape<EyeParam>)
 .set_attr<nnvm::FInferType>("FInferType", InitType<EyeParam>)
 .set_attr<FCompute>("FCompute<cpu>", EyeFill<cpu>)
 .add_arguments(EyeParam::__FIELDS__());
@@ -74,7 +74,7 @@ NNVM_REGISTER_OP(_ones)
 .set_num_inputs(0)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<InitOpParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", InitShape<InitOpParam>)
+.set_attr<mxnet::FInferShape>("FInferShape", InitShape<InitOpParam>)
 .set_attr<nnvm::FInferType>("FInferType", InitType<InitOpParam>)
 .set_attr<FCompute>("FCompute<cpu>", FillCompute<cpu, 1>)
 .add_arguments(InitOpParam::__FIELDS__());
@@ -84,7 +84,7 @@ NNVM_REGISTER_OP(_full)
   .set_num_inputs(0)
   .set_num_outputs(1)
   .set_attr_parser(ParamParser<InitOpWithScalarParam>)
-  .set_attr<nnvm::FInferShape>("FInferShape", InitShape<InitOpWithScalarParam>)
+  .set_attr<mxnet::FInferShape>("FInferShape", InitShape<InitOpWithScalarParam>)
   .set_attr<nnvm::FInferType>("FInferType", InitType<InitOpWithScalarParam>)
   .set_attr<FCompute>("FCompute<cpu>", InitFillWithScalarCompute<cpu>)
 .add_arguments(InitOpWithScalarParam::__FIELDS__());
@@ -94,7 +94,7 @@ NNVM_REGISTER_OP(_arange)
 .set_num_inputs(0)
 .set_num_outputs(1)
 .set_attr_parser(RangeParamParser)
-.set_attr<nnvm::FInferShape>("FInferShape", RangeShape)
+.set_attr<mxnet::FInferShape>("FInferShape", RangeShape)
 .set_attr<nnvm::FInferType>("FInferType", InitType<RangeParam>)
 .set_attr<FCompute>("FCompute<cpu>", RangeCompute<cpu>)
 .add_arguments(RangeParam::__FIELDS__());
@@ -121,7 +121,7 @@ Examples::
 )code")
 .set_num_inputs(1)
 .set_num_outputs(1)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FInferStorageType>("FInferStorageType", ElemwiseStorageType<1, 1, false, true, true>)
 .set_attr<nnvm::FIgnoreInputs>("FIgnoreInputs",
@@ -146,7 +146,7 @@ Examples::
 )code")
 .set_num_inputs(1)
 .set_num_outputs(1)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<nnvm::FIgnoreInputs>("FIgnoreInputs",
     [](const NodeAttrs& attrs) { return std::vector<uint32_t>(1, 0); })
diff --git a/src/operator/tensor/init_op.h b/src/operator/tensor/init_op.h
index e9e67cb1a4c5..680431dfecd1 100644
--- a/src/operator/tensor/init_op.h
+++ b/src/operator/tensor/init_op.h
@@ -44,12 +44,12 @@ namespace mxnet {
 namespace op {
 
 struct InitOpParam : public dmlc::Parameter<InitOpParam> {
-  TShape shape;
+  mxnet::TShape shape;
   std::string ctx;
   int dtype;
   DMLC_DECLARE_PARAMETER(InitOpParam) {
     DMLC_DECLARE_FIELD(shape)
-    .set_default(TShape())
+    .set_default(mxnet::TShape())
     .describe("The shape of the output");
     DMLC_DECLARE_FIELD(ctx)
     .set_default("")
@@ -62,12 +62,12 @@ struct InitOpParam : public dmlc::Parameter<InitOpParam> {
 };
 
 struct InitOpWithoutDTypeParam : public dmlc::Parameter<InitOpWithoutDTypeParam> {
-  TShape shape;
+  mxnet::TShape shape;
   std::string ctx;
   int dtype;
   DMLC_DECLARE_PARAMETER(InitOpWithoutDTypeParam) {
     DMLC_DECLARE_FIELD(shape)
-    .set_default(TShape())
+    .set_default(mxnet::TShape())
     .describe("The shape of the output");
     DMLC_DECLARE_FIELD(ctx)
     .set_default("")
@@ -115,8 +115,8 @@ struct EyeParam : public dmlc::Parameter<EyeParam> {
 
 template<typename ParamType>
 inline bool InitEyeShape(const nnvm::NodeAttrs& attrs,
-                         std::vector<TShape> *in_attrs,
-                         std::vector<TShape> *out_attrs) {
+                         mxnet::ShapeVector *in_attrs,
+                         mxnet::ShapeVector *out_attrs) {
   const ParamType& param = nnvm::get<ParamType>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 0U);
   CHECK_EQ(out_attrs->size(), 1U);
@@ -175,13 +175,13 @@ struct RangeParam : public dmlc::Parameter<RangeParam> {
 
 /*! \brief Initialize and fill output with an arbitrary value */
 struct InitOpWithScalarParam : dmlc::Parameter<InitOpWithScalarParam> {
-  TShape shape;
+  mxnet::TShape shape;
   std::string ctx;
   int dtype;
   double value;
   DMLC_DECLARE_PARAMETER(InitOpWithScalarParam) {
     DMLC_DECLARE_FIELD(shape)
-      .set_default(TShape())
+      .set_default(mxnet::TShape())
       .describe("The shape of the output");
     DMLC_DECLARE_FIELD(ctx)
       .set_default("")
@@ -208,8 +208,8 @@ inline void RangeParamParser(nnvm::NodeAttrs* attrs) {
 
 template<typename ParamType>
 inline bool InitShape(const nnvm::NodeAttrs& attrs,
-                      std::vector<TShape> *in_attrs,
-                      std::vector<TShape> *out_attrs) {
+                      mxnet::ShapeVector *in_attrs,
+                      mxnet::ShapeVector *out_attrs) {
   const ParamType& param = nnvm::get<ParamType>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 0U);
   CHECK_EQ(out_attrs->size(), 1U);
@@ -370,7 +370,7 @@ void FillZerosRspImpl(mshadow::Stream<xpu> *, const NDArray& dst) {
   CHECK_EQ(dst.storage_type(), kRowSparseStorage) << "dst should be an RSP NDArray";
   if (dst.storage_initialized()) {
     // reset the shapes if it's not zeros (set_aux_shape() will set storage_shape to zero as well)
-    dst.set_aux_shape(rowsparse::kIdx, TShape(mshadow::Shape1(0)));
+    dst.set_aux_shape(rowsparse::kIdx, mxnet::TShape(mshadow::Shape1(0)));
   }
 }
 
@@ -485,8 +485,8 @@ void RangeCompute(const nnvm::NodeAttrs& attrs,
 
 
 inline bool RangeShape(const nnvm::NodeAttrs& attrs,
-                       std::vector<TShape> *in_attrs,
-                       std::vector<TShape> *out_attrs) {
+                       mxnet::ShapeVector *in_attrs,
+                       mxnet::ShapeVector *out_attrs) {
   const RangeParam& param = nnvm::get<RangeParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 0U);
   CHECK_EQ(out_attrs->size(), 1U);
@@ -508,7 +508,7 @@ inline bool RangeShape(const nnvm::NodeAttrs& attrs,
   }
   const double out_size = std::ceil((param.stop.value() - param.start) / param.step)
                           * param.repeat;
-  SHAPE_ASSIGN_CHECK(*out_attrs, 0, TShape({static_cast<nnvm::dim_t>(out_size)}));
+  SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape({static_cast<nnvm::dim_t>(out_size)}));
   return true;
 }
 
diff --git a/src/operator/tensor/la_op.cc b/src/operator/tensor/la_op.cc
index 0f3c2954a0f6..252bdf8d9460 100644
--- a/src/operator/tensor/la_op.cc
+++ b/src/operator/tensor/la_op.cc
@@ -90,7 +90,7 @@ Examples::
 .set_attr_parser(ParamParser<LaMatrixMacParam>)
 .set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs)
   { return std::vector<std::string>{"A", "B", "C"}; } )
-.set_attr<nnvm::FInferShape>("FInferShape", LaMatrixMultMacOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", LaMatrixMultMacOpShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<3, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption", [](const NodeAttrs& attrs)
   { return std::vector<std::pair<int, int>>{{2, 0}}; })
@@ -165,7 +165,7 @@ Examples::
 .set_attr_parser(ParamParser<LaMatrixMultParam>)
 .set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs)
   { return std::vector<std::string>{"A", "B"}; } )
-.set_attr<nnvm::FInferShape>("FInferShape", LaMatrixMultMacOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", LaMatrixMultMacOpShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)
 .set_attr<FCompute>("FCompute<cpu>", LaOpGemmForward<cpu, 2, 2, 2, 1, gemm2>)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_linalg_gemm2"})
@@ -216,7 +216,7 @@ Examples::
 .set_attr_parser(ParamParser<LaCholeskyParam>)
 .set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs)
   { return std::vector<std::string>{"A"}; } )
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption", [](const NodeAttrs& attrs)
   { return std::vector<std::pair<int, int>>{{0, 0}}; })
@@ -277,7 +277,7 @@ Examples::
 .set_attr_parser(ParamParser<LaCholeskyParam>)
 .set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs)
   { return std::vector<std::string>{"A"}; } )
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption", [](const NodeAttrs& attrs)
   { return std::vector<std::pair<int, int>>{{0, 0}}; })
@@ -336,7 +336,7 @@ Examples::
 .set_attr_parser(ParamParser<LaTriangMatrixMultParam>)
 .set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs)
   { return std::vector<std::string>{"A", "B"}; } )
-.set_attr<nnvm::FInferShape>("FInferShape", LaTriangMatrixMultOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", LaTriangMatrixMultOpShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption", [](const NodeAttrs& attrs)
   { return std::vector<std::pair<int, int>>{{1, 0}}; })
@@ -399,7 +399,7 @@ Examples::
 .set_attr_parser(ParamParser<LaTriangMatrixMultParam>)
 .set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs)
   { return std::vector<std::string>{"A", "B"}; } )
-.set_attr<nnvm::FInferShape>("FInferShape", LaTriangMatrixMultOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", LaTriangMatrixMultOpShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption", [](const NodeAttrs& attrs)
   { return std::vector<std::pair<int, int>>{{1, 0}}; })
@@ -447,7 +447,7 @@ Examples::
 .set_num_outputs(1)
 .set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs)
   { return std::vector<std::string>{"A"}; } )
-.set_attr<nnvm::FInferShape>("FInferShape", LaReduceShape<2>)
+.set_attr<mxnet::FInferShape>("FInferShape", LaReduceShape<2>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FCompute>("FCompute<cpu>", LaOpForward<cpu, 2, 0, 1, 1, sumlogdiag>)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_linalg_sumlogdiag"})
@@ -504,7 +504,7 @@ Examples::
 .set_attr_parser(ParamParser<LaSyrkParam>)
 .set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs)
   { return std::vector<std::string>{"A"}; } )
-.set_attr<nnvm::FInferShape>("FInferShape", LaSyrkShape)
+.set_attr<mxnet::FInferShape>("FInferShape", LaSyrkShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FCompute>("FCompute<cpu>", LaOpForward<cpu, 2, 2, 1, 1, syrk>)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_linalg_syrk"})
@@ -571,7 +571,7 @@ Examples::
 .set_num_outputs(2)
 .set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs)
   { return std::vector<std::string>{"A"}; } )
-.set_attr<nnvm::FInferShape>("FInferShape", LaLQFactShape)
+.set_attr<mxnet::FInferShape>("FInferShape", LaLQFactShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 2>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption", [](const NodeAttrs& attrs)
   { return std::vector<std::pair<int, int>>{{0, 0}}; })
@@ -640,7 +640,7 @@ Examples::
 .set_num_outputs(2)
 .set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs)
   { return std::vector<std::string>{"A"}; } )
-.set_attr<nnvm::FInferShape>("FInferShape", LaEigFactShape)
+.set_attr<mxnet::FInferShape>("FInferShape", LaEigFactShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 2>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption", [](const NodeAttrs& attrs)
   { return std::vector<std::pair<int, int>>{{0, 0}}; })
diff --git a/src/operator/tensor/la_op.h b/src/operator/tensor/la_op.h
index 0327dd19b72c..5e18e0ef5a25 100644
--- a/src/operator/tensor/la_op.h
+++ b/src/operator/tensor/la_op.h
@@ -131,8 +131,8 @@ struct LaSyrkParam : public dmlc::Parameter<LaSyrkParam> {
 
 // Common function for shape inference for matrix mult and matrix mac.
 inline bool LaMatrixMultMacOpShape(const nnvm::NodeAttrs& attrs,
-                                   std::vector<TShape>* in_attrs,
-                                   std::vector<TShape>* out_attrs) {
+                                   mxnet::ShapeVector* in_attrs,
+                                   mxnet::ShapeVector* out_attrs) {
   CHECK_GE(in_attrs->size(), 2);
   CHECK_EQ(out_attrs->size(), 1);
   bool transpose_a(false), transpose_b(false);
@@ -167,7 +167,7 @@ inline bool LaMatrixMultMacOpShape(const nnvm::NodeAttrs& attrs,
              << "Incompatible matrix dimensions for multiplication";
     oshape[axis] = (transpose_a ? (*in_attrs)[0][ndim-1] : (*in_attrs)[0][axis]);
     oshape[ndim-1] = (transpose_b ? (*in_attrs)[1][axis] : (*in_attrs)[1][ndim-1]);
-    TShape tshape(oshape.begin(), oshape.end());
+    mxnet::TShape tshape(oshape.begin(), oshape.end());
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, tshape);
     if ( in_attrs->size() > 2 ) {
        // Infer/check shape of third operand of a mac.
@@ -180,8 +180,8 @@ inline bool LaMatrixMultMacOpShape(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool LaTriangMatrixMultOpShape(const nnvm::NodeAttrs& attrs,
-                                      std::vector<TShape>* in_attrs,
-                                      std::vector<TShape>* out_attrs) {
+                                      mxnet::ShapeVector* in_attrs,
+                                      mxnet::ShapeVector* out_attrs) {
   const LaTriangMatrixMultParam& param = nnvm::get<LaTriangMatrixMultParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 2);
   CHECK_EQ(out_attrs->size(), 1);
@@ -210,7 +210,7 @@ inline bool LaTriangMatrixMultOpShape(const nnvm::NodeAttrs& attrs,
       oshape[ndim-2] = (param.transpose ? (*in_attrs)[0][ndim-1] : (*in_attrs)[0][ndim-2]);
       oshape[ndim-1] = (*in_attrs)[1][ndim-1];
     }
-    TShape tshape(oshape.begin(), oshape.end());
+    mxnet::TShape tshape(oshape.begin(), oshape.end());
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, tshape);
     return true;
   }
@@ -230,9 +230,9 @@ inline bool LaTriangMatrixMultOpShape(const nnvm::NodeAttrs& attrs,
       ishape2[odim-1] = (*out_attrs)[0][odim-1];
       ishape1[odim-2] = ishape1[odim-1] = ishape2[odim-2] = (*out_attrs)[0][odim-2];
     }
-    TShape tshape1(ishape1.begin(), ishape1.end());
+    mxnet::TShape tshape1(ishape1.begin(), ishape1.end());
     SHAPE_ASSIGN_CHECK(*in_attrs, 0, tshape1);
-    TShape tshape2(ishape2.begin(), ishape2.end());
+    mxnet::TShape tshape2(ishape2.begin(), ishape2.end());
     SHAPE_ASSIGN_CHECK(*in_attrs, 1, tshape2);
     return true;
   }
@@ -241,8 +241,8 @@ inline bool LaTriangMatrixMultOpShape(const nnvm::NodeAttrs& attrs,
 
 template<int dim>
 inline bool LaReduceShape(const nnvm::NodeAttrs& attrs,
-                          std::vector<TShape>* in_attrs,
-                          std::vector<TShape>* out_attrs) {
+                          mxnet::ShapeVector* in_attrs,
+                          mxnet::ShapeVector* out_attrs) {
   // Shape for reduction of the dim lowest dimensions to a scalar.
   // Can only deduct in forward direction.
   CHECK_EQ(in_attrs->size(), 1);
@@ -257,18 +257,18 @@ inline bool LaReduceShape(const nnvm::NodeAttrs& attrs,
     oshape[i] = (*in_attrs)[0][i];
   }
   // Will reduce all matrices/vectors to a scalar.
-  TShape tshape(oshape.begin(), oshape.end());
+  mxnet::TShape tshape(oshape.begin(), oshape.end());
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, tshape);
   return true;
 }
 
 // Shape inference function for linalg_syrk
 inline bool LaSyrkShape(const nnvm::NodeAttrs& attrs,
-                        std::vector<TShape>* in_attrs,
-                        std::vector<TShape>* out_attrs) {
+                        mxnet::ShapeVector* in_attrs,
+                        mxnet::ShapeVector* out_attrs) {
   CHECK_EQ(in_attrs->size(), 1);
   CHECK_EQ(out_attrs->size(), 1);
-  const TShape& in_attr = (*in_attrs)[0];
+  const mxnet::TShape& in_attr = (*in_attrs)[0];
   bool transpose = nnvm::get<LaSyrkParam>(attrs.parsed).transpose;
   const int ndim = in_attr.ndim();
   if ( ndim >= 2 ) {
@@ -279,7 +279,7 @@ inline bool LaSyrkShape(const nnvm::NodeAttrs& attrs,
     }
     oshape[ndim-2] = (transpose ? in_attr[ndim-1] : in_attr[ndim-2]);
     oshape[ndim-1] = oshape[ndim-2];
-    TShape tshape(oshape.begin(), oshape.end());
+    mxnet::TShape tshape(oshape.begin(), oshape.end());
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, tshape);
     return true;
   }
@@ -290,13 +290,13 @@ inline bool LaSyrkShape(const nnvm::NodeAttrs& attrs,
 // Shape inference function for linalg_gelqf
 // Inputs: A. Outputs: Q, L
 inline bool LaLQFactShape(const nnvm::NodeAttrs& attrs,
-                          std::vector<TShape>* in_attrs,
-                          std::vector<TShape>* out_attrs) {
+                          mxnet::ShapeVector* in_attrs,
+                          mxnet::ShapeVector* out_attrs) {
   CHECK_EQ(in_attrs->size(), 1);
   CHECK_EQ(out_attrs->size(), 2);
-  const TShape& in_a = (*in_attrs)[0];
-  const TShape& out_q = (*out_attrs)[0];
-  const TShape& out_l = (*out_attrs)[1];
+  const mxnet::TShape& in_a = (*in_attrs)[0];
+  const mxnet::TShape& out_q = (*out_attrs)[0];
+  const mxnet::TShape& out_l = (*out_attrs)[1];
   if ( in_a.ndim() >= 2 ) {
     // Forward shape inference.
     const int ndim(in_a.ndim());
@@ -309,7 +309,7 @@ inline bool LaLQFactShape(const nnvm::NodeAttrs& attrs,
       oshape_l[i] = in_a[i];
     }
     oshape_l[ndim-1] = in_a[ndim-2];
-    TShape tshape_l(oshape_l.begin(), oshape_l.end());
+    mxnet::TShape tshape_l(oshape_l.begin(), oshape_l.end());
     SHAPE_ASSIGN_CHECK(*out_attrs, 1, tshape_l);
     return true;
   }
@@ -333,13 +333,13 @@ inline bool LaLQFactShape(const nnvm::NodeAttrs& attrs,
 // Shape inference function for linalg_syevd
 // Inputs: A. Outputs: U, L
 inline bool LaEigFactShape(const nnvm::NodeAttrs& attrs,
-                           std::vector<TShape>* in_attrs,
-                           std::vector<TShape>* out_attrs) {
+                           mxnet::ShapeVector* in_attrs,
+                           mxnet::ShapeVector* out_attrs) {
   CHECK_EQ(in_attrs->size(), 1);
   CHECK_EQ(out_attrs->size(), 2);
-  const TShape& in_a = (*in_attrs)[0];
-  const TShape& out_u = (*out_attrs)[0];
-  const TShape& out_l = (*out_attrs)[1];
+  const mxnet::TShape& in_a = (*in_attrs)[0];
+  const mxnet::TShape& out_u = (*out_attrs)[0];
+  const mxnet::TShape& out_l = (*out_attrs)[1];
   if ( in_a.ndim() >= 2 ) {
     // Forward shape inference.
     const int ndim(in_a.ndim());
@@ -351,7 +351,7 @@ inline bool LaEigFactShape(const nnvm::NodeAttrs& attrs,
     for ( int i = 0; i < ndim-1; ++i ) {
       oshape_l[i] = in_a[i];
     }
-    TShape tshape_l(oshape_l.begin(), oshape_l.end());
+    mxnet::TShape tshape_l(oshape_l.begin(), oshape_l.end());
     SHAPE_ASSIGN_CHECK(*out_attrs, 1, tshape_l);
     return true;
   }
@@ -384,7 +384,7 @@ mshadow::Tensor<xpu, dim, DType> LaOpFlatten(const TBlob& blob,
   }
   // Collapse ranges [0,axis-1] and [axis+1,ndim-2].
   CHECK_EQ(dim, 4);
-  TShape shape(dim);
+  mxnet::TShape shape(dim);
   shape[0] = 1;
   for (int i = 0; i < axis; ++i) {
     shape[0] *= blob.shape_[i];
diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h
index 28ed4215e0a7..3a58c1200ae0 100644
--- a/src/operator/tensor/matrix_op-inl.h
+++ b/src/operator/tensor/matrix_op-inl.h
@@ -47,7 +47,7 @@ namespace mxnet {
 namespace op {
 
 struct ReshapeParam : public dmlc::Parameter<ReshapeParam> {
-  TShape target_shape;
+  mxnet::TShape target_shape;
   bool keep_highest;
   nnvm::Tuple<int> shape;
   bool reverse;
@@ -59,7 +59,7 @@ struct ReshapeParam : public dmlc::Parameter<ReshapeParam> {
     .set_default(false)
     .describe("If true then the special values are inferred from right to left");
     DMLC_DECLARE_FIELD(target_shape)
-    .set_default(TShape())
+    .set_default(mxnet::TShape())
     .describe("(Deprecated! Use ``shape`` instead.) "
               "Target new shape. One and only one dim can be 0, "
               "in which case it will be inferred from the rest of dims");
@@ -71,8 +71,8 @@ struct ReshapeParam : public dmlc::Parameter<ReshapeParam> {
 };
 
 template<typename IType>
-inline TShape InferReshapeShape(const nnvm::Tuple<IType>& shape,
-                                const TShape& dshape, bool reverse) {
+inline mxnet::TShape InferReshapeShape(const nnvm::Tuple<IType>& shape,
+                                const mxnet::TShape& dshape, bool reverse) {
   std::vector<IType> dshape_vec;
   std::vector<IType> param_shape_vec(shape.begin(), shape.end());
   for (index_t i = 0; i < dshape.ndim(); ++i) {
@@ -148,11 +148,11 @@ inline TShape InferReshapeShape(const nnvm::Tuple<IType>& shape,
     std::reverse(dshape_vec.begin(), dshape_vec.end());
     std::reverse(tmp.begin(), tmp.end());
   }
-  TShape oshape(tmp.begin(), tmp.end());
+  mxnet::TShape oshape(tmp.begin(), tmp.end());
   return oshape;
 }
 
-inline bool ReverseReshapeInferShape(TShape *in, const TShape& out) {
+inline bool ReverseReshapeInferShape(mxnet::TShape *in, const mxnet::TShape& out) {
   if (in->Size() && out.Size()) {
     return true;
   } else if (!out.Size()) {
@@ -176,14 +176,14 @@ inline bool ReverseReshapeInferShape(TShape *in, const TShape& out) {
 }
 
 inline bool ReshapeShape(const nnvm::NodeAttrs& attrs,
-                         std::vector<TShape> *in_attrs,
-                         std::vector<TShape> *out_attrs) {
+                         mxnet::ShapeVector *in_attrs,
+                         mxnet::ShapeVector *out_attrs) {
   const ReshapeParam& param_ = nnvm::get<ReshapeParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]";
   CHECK_EQ(out_attrs->size(), 1U);
-  TShape &dshape = (*in_attrs)[0];
+  mxnet::TShape &dshape = (*in_attrs)[0];
   if (dshape.ndim() == 0) return false;
-  TShape oshape;
+  mxnet::TShape oshape;
   if (param_.shape.ndim() != 0) {
     oshape = InferReshapeShape(param_.shape, dshape, param_.reverse);
   } else if (param_.target_shape.ndim()) {
@@ -218,11 +218,11 @@ inline bool ReshapeShape(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool FlattenShape(const nnvm::NodeAttrs& attrs,
-                         std::vector<TShape> *in_attrs,
-                         std::vector<TShape> *out_attrs) {
+                         mxnet::ShapeVector *in_attrs,
+                         mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]";
   CHECK_EQ(out_attrs->size(), 1U);
-  const TShape &dshape = (*in_attrs)[0];
+  const mxnet::TShape &dshape = (*in_attrs)[0];
   if (dshape.ndim() == 0) return false;
   uint32_t target_dim = 1;
   for (uint32_t i = 1; i < dshape.ndim(); ++i) {
@@ -233,9 +233,9 @@ inline bool FlattenShape(const nnvm::NodeAttrs& attrs,
 }
 
 struct TransposeParam : public dmlc::Parameter<TransposeParam> {
-  TShape axes;
+  mxnet::TShape axes;
   DMLC_DECLARE_PARAMETER(TransposeParam) {
-    DMLC_DECLARE_FIELD(axes).set_default(TShape())
+    DMLC_DECLARE_FIELD(axes).set_default(mxnet::TShape())
     .describe("Target axis order. By default the axes will be inverted.");
   }
 };
@@ -244,7 +244,7 @@ template<typename xpu>
 void TransposeImpl(RunContext ctx,
                    const TBlob& src,
                    const TBlob& ret,
-                   const TShape& axes) {
+                   const mxnet::TShape& axes) {
   using namespace mshadow;
   using namespace mshadow::expr;
   CHECK_EQ(src.type_flag_, ret.type_flag_);
@@ -310,7 +310,7 @@ void Transpose(const nnvm::NodeAttrs& attrs,
   const TransposeParam& param = nnvm::get<TransposeParam>(attrs.parsed);
   CHECK_EQ(req[0], kWriteTo) << "Transpose does not support inplace";
   if (param.axes.ndim() == 0) {
-    TShape axes = TShape(inputs[0].ndim());
+    mxnet::TShape axes = mxnet::TShape(inputs[0].ndim());
     for (index_t i = 0; i < axes.ndim(); ++i) {
       axes[i] = axes.ndim() - 1 - i;
     }
@@ -321,14 +321,14 @@ void Transpose(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool TransposeShape(const nnvm::NodeAttrs& attrs,
-                             std::vector<TShape> *in_attrs,
-                             std::vector<TShape> *out_attrs) {
+                             mxnet::ShapeVector *in_attrs,
+                             mxnet::ShapeVector *out_attrs) {
   const TransposeParam& param = nnvm::get<TransposeParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
-  TShape& shp = (*in_attrs)[0];
+  mxnet::TShape& shp = (*in_attrs)[0];
   CHECK_LE(shp.ndim(), 6U) << "Transpose support at most 6 dimensions";
-  TShape ret(shp.ndim());
+  mxnet::TShape ret(shp.ndim());
   if (param.axes.ndim() == 0) {
     for (index_t i = 0; i < shp.ndim(); ++i) {
       ret[i] = shp[shp.ndim()-1-i];
@@ -357,8 +357,8 @@ struct ExpandDimParam : public dmlc::Parameter<ExpandDimParam> {
 
 
 inline bool ExpandDimShape(const nnvm::NodeAttrs& attrs,
-                           std::vector<TShape> *in_attrs,
-                           std::vector<TShape> *out_attrs) {
+                           mxnet::ShapeVector *in_attrs,
+                           mxnet::ShapeVector *out_attrs) {
   const ExpandDimParam& param = nnvm::get<ExpandDimParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
@@ -366,8 +366,8 @@ inline bool ExpandDimShape(const nnvm::NodeAttrs& attrs,
     return false;
   }
 
-  TShape& ishape = (*in_attrs)[0];
-  TShape& oshape = (*out_attrs)[0];
+  mxnet::TShape& ishape = (*in_attrs)[0];
+  mxnet::TShape& oshape = (*out_attrs)[0];
   int indim = ishape.ndim();
   bool unknown_ishape = false;
   if (0 == indim) {
@@ -382,7 +382,7 @@ inline bool ExpandDimShape(const nnvm::NodeAttrs& attrs,
   CHECK(axis >= 0 && axis <= indim)
       << "axis must be in the range [" << -indim << ", " << indim << "] ("
       << param.axis << " provided)";
-  TShape ret(indim + 1);
+  mxnet::TShape ret(indim + 1);
   for (int i = 0; i < axis; ++i) {
     ret[i] = (unknown_ishape? 0 : ishape[i]);
   }
@@ -392,7 +392,7 @@ inline bool ExpandDimShape(const nnvm::NodeAttrs& attrs,
   }
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, ret);
 
-  ret = TShape(indim);
+  ret = mxnet::TShape(indim);
   for (int i = 0; i < axis; ++i) ret[i] = oshape[i];
   for (int i = axis+1; i < indim+1; ++i) ret[i-1] = oshape[i];
   SHAPE_ASSIGN_CHECK(*in_attrs, 0, ret);
@@ -482,7 +482,7 @@ void SliceCsrIndPtrImpl(const int begin, const int end, RunContext ctx,
  * Slice a CSR NDArray for first dimension
  */
 template<typename xpu>
-void SliceDimOneCsrImpl(const TShape &begin, const TShape &end, const OpContext& ctx,
+void SliceDimOneCsrImpl(const mxnet::TShape &begin, const mxnet::TShape &end, const OpContext& ctx,
                         const NDArray &in, const NDArray &out) {
   using namespace mshadow;
   using namespace mxnet_op;
@@ -571,7 +571,7 @@ struct SliceDimTwoCsrAssign {
  * Slice a CSR NDArray for two dimensions
  */
 template<typename xpu>
-void SliceDimTwoCsrImpl(const TShape &begin, const TShape &end, const OpContext& ctx,
+void SliceDimTwoCsrImpl(const mxnet::TShape &begin, const mxnet::TShape &end, const OpContext& ctx,
                         const NDArray &in, const NDArray &out);
 
 
@@ -582,11 +582,11 @@ void SliceCsrImpl(const SliceParam &param, const OpContext& ctx,
   CHECK_NE(req, kAddTo) << "kAddTo for Slice on CSR input is not supported";
   CHECK_NE(req, kWriteInplace) << "kWriteInplace for Slice on CSR input is not supported";
 
-  const TShape ishape = in.shape();
-  const TShape oshape = out.shape();
+  const mxnet::TShape ishape = in.shape();
+  const mxnet::TShape oshape = out.shape();
 
   uint32_t N = ishape.ndim();
-  TShape begin(N), end(N);
+  mxnet::TShape begin(N), end(N);
   for (uint32_t i = 0; i < N; ++i) {
     int s = 0;
     if (param.begin[i]) {
@@ -629,7 +629,7 @@ void SliceEx(const nnvm::NodeAttrs& attrs,
 }
 
 template<int ndim>
-inline void GetIndexRange(const TShape& dshape,
+inline void GetIndexRange(const mxnet::TShape& dshape,
                           const nnvm::Tuple<dmlc::optional<int>>& param_begin,
                           const nnvm::Tuple<dmlc::optional<int>>& param_end,
                           const nnvm::Tuple<dmlc::optional<int>>& param_step,
@@ -706,7 +706,7 @@ inline void GetIndexRange(const TShape& dshape,
 
 inline void SetSliceOpOutputDimSize(const index_t i, const int b,
                                     const int e, const int s,
-                                    TShape* oshape) {
+                                    mxnet::TShape* oshape) {
   if (e != b) {
     if (s > 0) {
       CHECK_LT(b, e) << "slicing with begin=[" << i << "]=" << b << ", end[" << i << "]="
@@ -721,14 +721,14 @@ inline void SetSliceOpOutputDimSize(const index_t i, const int b,
 }
 
 inline bool SliceOpShape(const nnvm::NodeAttrs& attrs,
-                         std::vector<TShape>* in_attrs,
-                         std::vector<TShape>* out_attrs) {
+                         mxnet::ShapeVector* in_attrs,
+                         mxnet::ShapeVector* out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
-  const TShape& dshape = (*in_attrs)[0];
+  const mxnet::TShape& dshape = (*in_attrs)[0];
   if (dshape.ndim() == 0) return false;
   const SliceParam& param = nnvm::get<SliceParam>(attrs.parsed);
-  TShape oshape = dshape;
+  mxnet::TShape oshape = dshape;
 
   MXNET_NDIM_SWITCH(dshape.ndim(), ndim, {
     common::StaticArray<index_t, ndim> begin, end, step;
@@ -935,13 +935,13 @@ void SliceOpBackward(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool SliceAssignOpShape(const nnvm::NodeAttrs& attrs,
-                               std::vector<TShape> *in_attrs,
-                               std::vector<TShape> *out_attrs) {
+                               mxnet::ShapeVector *in_attrs,
+                               mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 2U);
   CHECK_EQ(out_attrs->size(), 1U);
-  const TShape& dshape = (*in_attrs)[0];
+  const mxnet::TShape& dshape = (*in_attrs)[0];
   if (dshape.ndim() == 0U || dshape.Size() == 0U) return false;
-  TShape vshape = dshape;  // vshape is the value shape on the right hand side
+  mxnet::TShape vshape = dshape;  // vshape is the value shape on the right hand side
   const SliceParam& param = nnvm::get<SliceParam>(attrs.parsed);
   MXNET_NDIM_SWITCH(dshape.ndim(), ndim, {
     common::StaticArray<index_t, ndim> begin, end, step;
@@ -1017,11 +1017,11 @@ struct SliceAssignScalarParam : public dmlc::Parameter<SliceAssignScalarParam> {
 };
 
 inline bool SliceAssignScalarOpShape(const nnvm::NodeAttrs& attrs,
-                                    std::vector<TShape> *in_attrs,
-                                    std::vector<TShape> *out_attrs) {
+                                    mxnet::ShapeVector *in_attrs,
+                                    mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
-  const TShape& dshape = (*in_attrs)[0];
+  const mxnet::TShape& dshape = (*in_attrs)[0];
   if (dshape.ndim() == 0U || dshape.Size() == 0U) return false;
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, dshape);
   return true;
@@ -1080,7 +1080,7 @@ void SliceAssignScalarOpForward(const nnvm::NodeAttrs& attrs,
     LOG(FATAL) << "_crop_assign_scalar only supports kWriteTo and kWriteInplace";
   }
 
-  TShape vshape = data.shape_;
+  mxnet::TShape vshape = data.shape_;
   const SliceAssignScalarParam& param = nnvm::get<SliceAssignScalarParam>(attrs.parsed);
   MXNET_NDIM_SWITCH(data.ndim(), ndim, {
     common::StaticArray<index_t, ndim> begin, end, step;
@@ -1113,7 +1113,7 @@ struct SliceAxisParam : public dmlc::Parameter<SliceAxisParam> {
   }
 };
 
-inline void GetSliceAxisParams(const SliceAxisParam& param, const TShape& ishape,
+inline void GetSliceAxisParams(const SliceAxisParam& param, const mxnet::TShape& ishape,
                            int* axis, index_t* begin, index_t* end) {
   *axis = param.axis;
   if (*axis < 0) {
@@ -1150,16 +1150,16 @@ inline void GetSliceAxisParams(const SliceAxisParam& param, const TShape& ishape
 }
 
 inline bool SliceAxisShape(const nnvm::NodeAttrs& attrs,
-                       std::vector<TShape> *in_attrs,
-                       std::vector<TShape> *out_attrs) {
+                       mxnet::ShapeVector *in_attrs,
+                       mxnet::ShapeVector *out_attrs) {
   const SliceAxisParam& param = nnvm::get<SliceAxisParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
-  TShape& ishape = (*in_attrs)[0];
+  mxnet::TShape& ishape = (*in_attrs)[0];
   int axis;
   index_t begin, end;
   GetSliceAxisParams(param, ishape, &axis, &begin, &end);
-  TShape shape(ishape.ndim());
+  mxnet::TShape shape(ishape.ndim());
   for (index_t i = 0; i < ishape.ndim(); ++i) {
     if (static_cast<int>(i) == axis) {
       shape[i] = static_cast<index_t>(end - begin);
@@ -1255,9 +1255,9 @@ void SliceAxisGrad_(const nnvm::NodeAttrs& attrs,
 }
 
 struct SliceLikeParam : public dmlc::Parameter<SliceLikeParam> {
-  TShape axes;
+  mxnet::TShape axes;
   DMLC_DECLARE_PARAMETER(SliceLikeParam) {
-    DMLC_DECLARE_FIELD(axes).set_default(TShape())
+    DMLC_DECLARE_FIELD(axes).set_default(mxnet::TShape())
     .describe("List of axes on which input data will be sliced according to the "
               "corresponding size of the second input. By default will slice on "
               "all axes. Negative axes are supported.");
@@ -1265,13 +1265,13 @@ struct SliceLikeParam : public dmlc::Parameter<SliceLikeParam> {
 };
 
 inline bool SliceLikeShape(const nnvm::NodeAttrs& attrs,
-                           std::vector<TShape> *in_attrs,
-                           std::vector<TShape> *out_attrs) {
+                           mxnet::ShapeVector *in_attrs,
+                           mxnet::ShapeVector *out_attrs) {
   const SliceLikeParam& param = nnvm::get<SliceLikeParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 2U);
   CHECK_EQ(out_attrs->size(), 1U);
-  TShape& ishape = (*in_attrs)[0];
-  TShape& from_shape = (*in_attrs)[1];
+  mxnet::TShape& ishape = (*in_attrs)[0];
+  mxnet::TShape& from_shape = (*in_attrs)[1];
   if (param.axes.ndim() == 0) {
     CHECK_EQ(ishape.ndim(), from_shape.ndim())
       << "By default slice_axis performs slice on all axes, but ndim mismatch "
@@ -1283,7 +1283,7 @@ inline bool SliceLikeShape(const nnvm::NodeAttrs& attrs,
     }
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, from_shape);
   } else {
-    TShape shape(ishape);
+    mxnet::TShape shape(ishape);
     for (index_t i = 0; i < param.axes.ndim(); ++i) {
       int axis = static_cast<int>(param.axes[i]);
       if (axis < 0) {
@@ -1305,9 +1305,9 @@ inline bool SliceLikeShape(const nnvm::NodeAttrs& attrs,
   return true;
 }
 
-inline void SliceLikeInferRanges(const TShape& dshape,
-                                 const TShape& fshape,
-                                 const TShape& axes,
+inline void SliceLikeInferRanges(const mxnet::TShape& dshape,
+                                 const mxnet::TShape& fshape,
+                                 const mxnet::TShape& axes,
                                  nnvm::Tuple<dmlc::optional<int>>* param_begin,
                                  nnvm::Tuple<dmlc::optional<int>>* param_end,
                                  nnvm::Tuple<dmlc::optional<int>>* param_step) {
@@ -1356,8 +1356,8 @@ void SliceLikeForward(const nnvm::NodeAttrs& attrs,
   mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
   const TBlob& data = inputs[0];
   const TBlob& out = outputs[0];
-  const TShape& ishape = data.shape_;
-  const TShape& from_shape = inputs[1].shape_;
+  const mxnet::TShape& ishape = data.shape_;
+  const mxnet::TShape& from_shape = inputs[1].shape_;
   nnvm::Tuple<dmlc::optional<int>> param_begin;
   nnvm::Tuple<dmlc::optional<int>> param_end;
   nnvm::Tuple<dmlc::optional<int>> param_step;
@@ -1404,8 +1404,8 @@ void SliceLikeBackward(const nnvm::NodeAttrs& attrs,
     LOG(FATAL) << "_slice_like_backward does not support kWriteInplace";
   }
 
-  const TShape& ishape = ograd.shape_;
-  const TShape& from_shape = outputs[1].shape_;
+  const mxnet::TShape& ishape = ograd.shape_;
+  const mxnet::TShape& from_shape = outputs[1].shape_;
   nnvm::Tuple<dmlc::optional<int>> param_begin;
   nnvm::Tuple<dmlc::optional<int>> param_end;
   nnvm::Tuple<dmlc::optional<int>> param_step;
@@ -1543,7 +1543,7 @@ struct RepeatParam : public dmlc::Parameter<RepeatParam> {
  * \brief Helper function for getting user input params for the operator repeat.
  * Sanity check the user input values.
  */
-inline void GetRepeatParams(const RepeatParam& param, const TShape& ishape,
+inline void GetRepeatParams(const RepeatParam& param, const mxnet::TShape& ishape,
                             int* repeats, dmlc::optional<int>* axisOpt) {
   *repeats = param.repeats;
   CHECK_GE(*repeats, 0) << "repeats cannot be a negative number";
@@ -1559,18 +1559,18 @@ inline void GetRepeatParams(const RepeatParam& param, const TShape& ishape,
 }
 
 inline bool RepeatOpShape(const nnvm::NodeAttrs& attrs,
-                        std::vector<TShape> *in_attrs,
-                        std::vector<TShape> *out_attrs) {
+                        mxnet::ShapeVector *in_attrs,
+                        mxnet::ShapeVector *out_attrs) {
   const RepeatParam& param = nnvm::get<RepeatParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
-  const TShape& ishape = (*in_attrs)[0];
+  const mxnet::TShape& ishape = (*in_attrs)[0];
   int repeats = 0;
   dmlc::optional<int> axisOpt;
   GetRepeatParams(param, ishape, &repeats, &axisOpt);
   // If 0 repeats, return an empty 0 dim array
   if (0 == repeats) {
-    SHAPE_ASSIGN_CHECK(*out_attrs, 0, TShape());
+    SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape());
     return true;
   }
 
@@ -1581,7 +1581,7 @@ inline bool RepeatOpShape(const nnvm::NodeAttrs& attrs,
     if (axis < 0) {
       axis += ndims;
     }
-    TShape shape(ishape.ndim());
+    mxnet::TShape shape(ishape.ndim());
     for (index_t i = 0; i < ishape.ndim(); ++i) {
       if (static_cast<int>(i) == axis) {
         shape[i] = static_cast<index_t>(repeats) * ishape[i];
@@ -1591,7 +1591,7 @@ inline bool RepeatOpShape(const nnvm::NodeAttrs& attrs,
     }
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, shape);
   } else {  // If axis is not input by user, return a flat 1D array of size = in.size*repeats
-    TShape shape(1);
+    mxnet::TShape shape(1);
     shape[0] = ishape.Size() * static_cast<index_t>(repeats);
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, shape);
   }
@@ -1614,12 +1614,13 @@ inline bool RepeatOpType(const nnvm::NodeAttrs& attrs,
  * \brief Reshape the input and output tensors for
  * using broadcast_to to achieve the funcitonality
  * of operator repeat.
- * \return a pair of TShape's, first is the reshaped
+ * \return a pair of mxnet::TShape's, first is the reshaped
  * input shape, second is the reshaped output shape.
  */
-inline std::pair<TShape, TShape> ReshapeInputOutputForRepeatOp(const TShape& ishape,
-                                                               const dmlc::optional<int>& axisOpt,
-                                                               const int repeats) {
+inline std::pair<mxnet::TShape, mxnet::TShape> ReshapeInputOutputForRepeatOp(
+  const mxnet::TShape& ishape,
+  const dmlc::optional<int>& axisOpt,
+  const int repeats) {
   if (static_cast<bool>(axisOpt)) {
     int axis = axisOpt.value();
     int ndim = static_cast<int>(ishape.ndim());
@@ -1629,9 +1630,9 @@ inline std::pair<TShape, TShape> ReshapeInputOutputForRepeatOp(const TShape& ish
     CHECK(axis >= 0 && axis < static_cast<int>(ishape.ndim())) << "Invalid input of axis";
 
     // reshape the input tensor by adding a dim at the (axis+1)-th dim
-    TShape rshape(ishape.ndim()+1);
+    mxnet::TShape rshape(ishape.ndim()+1);
     // the shape we want to broadcast to
-    TShape bshape(rshape.ndim());
+    mxnet::TShape bshape(rshape.ndim());
     int i = 0;
     while (i <= axis) {
       rshape[i] = bshape[i] = ishape[i];
@@ -1650,11 +1651,11 @@ inline std::pair<TShape, TShape> ReshapeInputOutputForRepeatOp(const TShape& ish
     // reshape the tensor into shape (ishape.Size(), 1)
     // then add one dim at axis = 1 and broadcast to
     // shape (ishape.Size(), repeats)
-    TShape rshape(2);
+    mxnet::TShape rshape(2);
     rshape[0] = ishape.Size();
     rshape[1] = 1;
 
-    TShape bshape(2);
+    mxnet::TShape bshape(2);
     bshape[0] = rshape[0];
     bshape[1] = repeats;
     return std::make_pair(rshape, bshape);
@@ -1668,7 +1669,7 @@ void RepeatOpForward(const nnvm::NodeAttrs& attrs,
                      const std::vector<OpReqType>& req,
                      const std::vector<TBlob>& outputs) {
   const TBlob& iTBlob = inputs[0];
-  const TShape& ishape = iTBlob.shape_;
+  const mxnet::TShape& ishape = iTBlob.shape_;
   if (ishape.ndim() == 0) return;
 
   int repeats = 0;
@@ -1677,7 +1678,8 @@ void RepeatOpForward(const nnvm::NodeAttrs& attrs,
   GetRepeatParams(param, ishape, &repeats, &axisOpt);
   if (0 == repeats) return;
 
-  std::pair<TShape, TShape> rshapes = ReshapeInputOutputForRepeatOp(ishape, axisOpt, repeats);
+  std::pair<mxnet::TShape, mxnet::TShape> rshapes = \
+    ReshapeInputOutputForRepeatOp(ishape, axisOpt, repeats);
 
   // reshaped input tblob
   TBlob iblob(inputs[0].dptr_, rshapes.first, inputs[0].dev_mask(),
@@ -1711,7 +1713,7 @@ void RepeatOpBackward(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(inputs.size(), 1U);
   CHECK_EQ(outputs.size(), 1U);
 
-  const TShape& oshape = outputs[0].shape_;
+  const mxnet::TShape& oshape = outputs[0].shape_;
   if (oshape.ndim() == 0) return;
 
   int repeats = 0;
@@ -1720,7 +1722,7 @@ void RepeatOpBackward(const nnvm::NodeAttrs& attrs,
   GetRepeatParams(param, oshape, &repeats, &axisOpt);
   if (0 == repeats) return;
 
-  std::pair<TShape, TShape> rshapes =
+  std::pair<mxnet::TShape, mxnet::TShape> rshapes =
     ReshapeInputOutputForRepeatOp(oshape, axisOpt, repeats);
 
   // reshaped output grad tblob
@@ -1738,7 +1740,7 @@ void RepeatOpBackward(const nnvm::NodeAttrs& attrs,
 }
 
 struct TileParam : public dmlc::Parameter<TileParam> {
-  TShape reps;
+  mxnet::TShape reps;
   DMLC_DECLARE_PARAMETER(TileParam) {
     DMLC_DECLARE_FIELD(reps)
       .describe("The number of times for repeating the tensor a. Each dim size of reps"
@@ -1750,13 +1752,13 @@ struct TileParam : public dmlc::Parameter<TileParam> {
 };
 
 inline bool TileOpShape(const nnvm::NodeAttrs& attrs,
-                        std::vector<TShape> *in_attrs,
-                        std::vector<TShape> *out_attrs) {
+                        mxnet::ShapeVector *in_attrs,
+                        mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
   const TileParam& param = nnvm::get<TileParam>(attrs.parsed);
-  const TShape& ishape = (*in_attrs)[0];
-  const TShape& reps = param.reps;
+  const mxnet::TShape& ishape = (*in_attrs)[0];
+  const mxnet::TShape& reps = param.reps;
   // If reps is empty, return a identical input array
   if (reps.ndim() == 0 || ishape.ndim() == 0) {
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, ishape);
@@ -1765,7 +1767,7 @@ inline bool TileOpShape(const nnvm::NodeAttrs& attrs,
   for (size_t i = 0; i < reps.ndim(); ++i) {
     CHECK_GT(reps[i], 0) << "invalid reps=" << i << ", dim size must be greater than zero";
   }
-  TShape oshape(std::max(ishape.ndim(), reps.ndim()));
+  mxnet::TShape oshape(std::max(ishape.ndim(), reps.ndim()));
   int i1 = static_cast<int>(ishape.ndim()) - 1;
   int i2 = static_cast<int>(reps.ndim()) - 1;
   for (int i = static_cast<int>(oshape.ndim()) - 1; i >= 0; --i) {
@@ -1797,20 +1799,21 @@ inline bool TileOpType(const nnvm::NodeAttrs& attrs,
  * \brief Reshape the input and output tensors for
  * using broadcast_to to achieve the funcitonality
  * of operator tile.
- * \return a pair of TShape's, first is the reshaped
+ * \return a pair of mxnet::TShape's, first is the reshaped
  * input shape, second is the reshaped output shape.
  */
-inline std::pair<TShape, TShape> ReshapeInputOutputForTileOp(const TShape& ishape,
-                                                             const TShape& reps) {
+inline std::pair<mxnet::TShape, mxnet::TShape> ReshapeInputOutputForTileOp(
+  const mxnet::TShape& ishape,
+  const mxnet::TShape& reps) {
   if (ishape.ndim() == 0 || reps.ndim() == 0) {
     return std::make_pair(ishape, ishape);
   }
 
   // The shape we want to broadcast to
-  TShape bshape(std::max(ishape.ndim(), reps.ndim()) * 2);
+  mxnet::TShape bshape(std::max(ishape.ndim(), reps.ndim()) * 2);
 
   // The shape of the input tensor after adding new axes before each dim
-  TShape rshape(bshape.ndim());
+  mxnet::TShape rshape(bshape.ndim());
 
   int i1 = static_cast<int>(ishape.ndim()) - 1;
   int i2 = static_cast<int>(reps.ndim()) - 1;
@@ -1853,15 +1856,15 @@ void TileOpForward(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(outputs.size(), 1U);
 
   if (inputs[0].Size() == 0) return;
-  const TShape& ishape = inputs[0].shape_;
-  const TShape& reps = nnvm::get<TileParam>(attrs.parsed).reps;
+  const mxnet::TShape& ishape = inputs[0].shape_;
+  const mxnet::TShape& reps = nnvm::get<TileParam>(attrs.parsed).reps;
 
   // If any one of the number in reps is zero, return immediately
   for (index_t i = 0; i < reps.ndim(); ++i) {
     if (0 == reps[i]) return;
   }
 
-  std::pair<TShape, TShape> rshapes = ReshapeInputOutputForTileOp(ishape, reps);
+  std::pair<mxnet::TShape, mxnet::TShape> rshapes = ReshapeInputOutputForTileOp(ishape, reps);
 
   // reshaped input tblob
   TBlob iblob(inputs[0].dptr_, rshapes.first, inputs[0].dev_mask(),
@@ -1895,15 +1898,15 @@ void TileOpBackward(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(outputs.size(), 1U);
 
   if (inputs[0].Size() == 0) return;
-  const TShape& oshape = outputs[0].shape_;
-  const TShape& reps = nnvm::get<TileParam>(attrs.parsed).reps;
+  const mxnet::TShape& oshape = outputs[0].shape_;
+  const mxnet::TShape& reps = nnvm::get<TileParam>(attrs.parsed).reps;
 
   // If any one of the number in reps is zero, return immediately
   for (index_t i = 0; i < reps.ndim(); ++i) {
     if (0 == reps[i]) return;
   }
 
-  std::pair<TShape, TShape> rshapes = ReshapeInputOutputForTileOp(oshape, reps);
+  std::pair<mxnet::TShape, mxnet::TShape> rshapes = ReshapeInputOutputForTileOp(oshape, reps);
 
   // reshaped output grad tblob
   TBlob oblob(outputs[0].dptr_, rshapes.first, outputs[0].dev_mask(),
@@ -1984,7 +1987,7 @@ void ReverseOpForward(const nnvm::NodeAttrs& attrs,
   CHECK_LT(param.axis.ndim(), REVERSE_MAX_DIM);
   Stream<xpu> *s = ctx.get_stream<xpu>();
 
-  const TShape& ishape = inputs[0].shape_;
+  const mxnet::TShape& ishape = inputs[0].shape_;
 
   std::vector<index_t> stride_(param.axis.ndim());
   std::vector<index_t>  trailing_(param.axis.ndim());
@@ -2046,17 +2049,17 @@ struct StackParam : public dmlc::Parameter<StackParam> {
 
 
 inline bool StackOpShape(const nnvm::NodeAttrs& attrs,
-                         std::vector<TShape> *in_attrs,
-                         std::vector<TShape> *out_attrs) {
+                         mxnet::ShapeVector *in_attrs,
+                         mxnet::ShapeVector *out_attrs) {
   const StackParam& param = dmlc::get<StackParam>(attrs.parsed);
 
-  TShape dshape;
-  for (const TShape& i : (*in_attrs)) {
+  mxnet::TShape dshape;
+  for (const mxnet::TShape& i : (*in_attrs)) {
     shape_assign(&dshape, i);
   }
   if (dshape.ndim() == 0) return false;
 
-  TShape oshape(dshape.ndim() + 1);
+  mxnet::TShape oshape(dshape.ndim() + 1);
   int axis = CheckAxis(param.axis, oshape.ndim());
   for (int i = 0; i < axis; ++i) {
     oshape[i] = dshape[i];
@@ -2140,10 +2143,10 @@ void StackOpBackward(const nnvm::NodeAttrs& attrs,
 }
 
 struct SqueezeParam : public dmlc::Parameter<SqueezeParam> {
-  dmlc::optional<TShape> axis;
+  dmlc::optional<mxnet::TShape> axis;
   DMLC_DECLARE_PARAMETER(SqueezeParam) {
     DMLC_DECLARE_FIELD(axis)
-    .set_default(dmlc::optional<TShape>())
+    .set_default(dmlc::optional<mxnet::TShape>())
     .describe("Selects a subset of the single-dimensional entries in the shape."
               " If an axis is selected with shape entry greater than one, an error is raised.");
   }
@@ -2153,7 +2156,7 @@ struct SqueezeParam : public dmlc::Parameter<SqueezeParam> {
 // move all the zeros to the last of the shape array
 // and keep the relative order of the non-zero values.
 // Returns the new shape size after moving all zeros to the end.
-inline size_t SqueezeShapeHelper(TShape* shape) {
+inline size_t SqueezeShapeHelper(mxnet::TShape* shape) {
   CHECK(shape != nullptr);
   size_t count = 0;
   for (size_t i = 0; i < shape->ndim(); ++i) {
@@ -2167,18 +2170,18 @@ inline size_t SqueezeShapeHelper(TShape* shape) {
 }
 
 inline bool SqueezeShape(const nnvm::NodeAttrs& attrs,
-                         std::vector<TShape> *in_attrs,
-                         std::vector<TShape> *out_attrs) {
+                         mxnet::ShapeVector *in_attrs,
+                         mxnet::ShapeVector *out_attrs) {
   const SqueezeParam& param = nnvm::get<SqueezeParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]";
   CHECK_EQ(out_attrs->size(), 1U);
-  const TShape& dshape = in_attrs->at(0);
+  const mxnet::TShape& dshape = in_attrs->at(0);
   const int dndim = dshape.ndim();
   if (shape_is_none(dshape)) return false;
-  TShape oshape = dshape;
+  mxnet::TShape oshape = dshape;
   if (param.axis.has_value()) {
     // preprocess axis
-    TShape axes = param.axis.value();
+    mxnet::TShape axes = param.axis.value();
     for (size_t i = 0; i < axes.ndim(); ++i) {
       if (axes[i] < 0) {
         axes[i] += dndim;
@@ -2203,7 +2206,7 @@ inline bool SqueezeShape(const nnvm::NodeAttrs& attrs,
     oshape[0] = 1;
     oshape_size = 1;
   }
-  SHAPE_ASSIGN_CHECK(*out_attrs, 0, TShape(oshape.data(), oshape.data()+oshape_size));
+  SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(oshape.data(), oshape.data()+oshape_size));
   return true;
 }
 
@@ -2216,16 +2219,16 @@ struct DepthToSpaceParam : public dmlc::Parameter<DepthToSpaceParam> {
 };
 
 inline bool DepthToSpaceOpShape(const nnvm::NodeAttrs& attrs,
-                                std::vector<TShape>* in_attrs,
-                                std::vector<TShape>* out_attrs) {
+                                mxnet::ShapeVector* in_attrs,
+                                mxnet::ShapeVector* out_attrs) {
   const DepthToSpaceParam& param = nnvm::get<DepthToSpaceParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
   CHECK_EQ(in_attrs->at(0).ndim(), 4) << "Operation Depth To Space requires exactly 4D tensor";
 
-  TShape expected_out(4);
+  mxnet::TShape expected_out(4);
 
-  TShape& in_shape = in_attrs->at(0);
+  mxnet::TShape& in_shape = in_attrs->at(0);
   int block = param.block_size;
   CHECK_NE(block, 0) << "block_size must be a positive integer value";
   CHECK_NE(in_shape[1], 0) << "Depth dimension:1 cannot be 0";
@@ -2380,16 +2383,16 @@ void DepthToSpaceOpForward(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool SpaceToDepthOpShape(const nnvm::NodeAttrs& attrs,
-                                std::vector<TShape>* in_attrs,
-                                std::vector<TShape>* out_attrs) {
+                                mxnet::ShapeVector* in_attrs,
+                                mxnet::ShapeVector* out_attrs) {
   const DepthToSpaceParam& param = nnvm::get<DepthToSpaceParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
   CHECK_EQ(in_attrs->at(0).ndim(), 4) << "Operation Space To Depth requires exactly 4D tensor";
 
-  TShape expected_out(in_attrs->at(0).ndim());
+  mxnet::TShape expected_out(in_attrs->at(0).ndim());
 
-  TShape& in_shape = in_attrs->at(0);
+  mxnet::TShape& in_shape = in_attrs->at(0);
   int block = param.block_size;
   CHECK_NE(block, 0) << "block_size must be a positive integer value";
   CHECK_NE(in_shape[0], 0)
@@ -2534,7 +2537,7 @@ enum SplitOpInputs {kData};
 }  // namespace split_enum
 
 struct SplitParam : public dmlc::Parameter<SplitParam> {
-  TShape indices;
+  mxnet::TShape indices;
   int axis;
   bool squeeze_axis;
   int sections;
@@ -2555,8 +2558,8 @@ struct SplitParam : public dmlc::Parameter<SplitParam> {
   }
 };  // struct SplitParam
 
-inline TShape GetSplitIndices(const TShape& ishape, int axis, int sections) {
-  TShape indices(sections+1);
+inline mxnet::TShape GetSplitIndices(const mxnet::TShape& ishape, int axis, int sections) {
+  mxnet::TShape indices(sections+1);
   indices[0] = 0;
   int64_t section_size = ishape[axis] / sections;
   for (int i = 0; i < sections; ++i) {
@@ -2581,13 +2584,13 @@ inline bool SplitOpType(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool SplitOpShape(const nnvm::NodeAttrs& attrs,
-                         std::vector<TShape>* in_attrs,
-                         std::vector<TShape>* out_attrs) {
+                         mxnet::ShapeVector* in_attrs,
+                         mxnet::ShapeVector* out_attrs) {
   using namespace mshadow;
   const SplitParam& param = nnvm::get<SplitParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 1U);
-  TShape dshape = in_attrs->at(split_enum::kData);
-  TShape ishape = in_attrs->at(split_enum::kData);
+  mxnet::TShape dshape = in_attrs->at(split_enum::kData);
+  mxnet::TShape ishape = in_attrs->at(split_enum::kData);
   if (dshape.ndim() == 0) return false;
   if (param.axis >= 0) {
     CHECK_LT(static_cast<size_t>(param.axis), dshape.ndim());
@@ -2598,15 +2601,15 @@ inline bool SplitOpShape(const nnvm::NodeAttrs& attrs,
   if (real_axis < 0) {
     real_axis += dshape.ndim();
   }
-  const TShape indices =
+  const mxnet::TShape indices =
     (param.sections > 0) ? GetSplitIndices(ishape, real_axis, param.sections) : param.indices;
   int num_outputs = (param.sections > 0) ? indices.ndim() - 1 : indices.ndim();
   // Pre-compute squeezed output shape for future usage
-  TShape squeezed_dshape = dshape;
+  mxnet::TShape squeezed_dshape = dshape;
   for (int d = real_axis; d < static_cast<int>(squeezed_dshape.ndim()) - 1; ++d) {
     squeezed_dshape[d] = squeezed_dshape[d+1];
   }
-  squeezed_dshape = TShape(&squeezed_dshape[0], &squeezed_dshape[squeezed_dshape.ndim()-1]);
+  squeezed_dshape = mxnet::TShape(&squeezed_dshape[0], &squeezed_dshape[squeezed_dshape.ndim()-1]);
   // Assign shape to every output
   for (int i = 0; i < num_outputs; ++i) {
     int start = indices[i];
@@ -2623,7 +2626,7 @@ inline bool SplitOpShape(const nnvm::NodeAttrs& attrs,
       SHAPE_ASSIGN_CHECK(*out_attrs, i, dshape);
     }
   }
-  TShape back_calculate_dshape = ishape;
+  mxnet::TShape back_calculate_dshape = ishape;
   back_calculate_dshape[real_axis] = 0;
   for (int d = 0; d < real_axis; ++d) {
     back_calculate_dshape[d] = (*out_attrs)[0][d];
@@ -2739,8 +2742,8 @@ inline void SplitOpForward(const nnvm::NodeAttrs& attrs,
   }
 
   size_t workspace_size = 0;
-  const TShape& ishape = input_data.shape_;
-  const TShape split_pts =
+  const mxnet::TShape& ishape = input_data.shape_;
+  const mxnet::TShape split_pts =
     (param.sections > 0) ? GetSplitIndices(ishape, real_axis, param.sections) : param.indices;
   std::vector<size_t> indices;
   for (const auto& section : split_pts) {
@@ -2803,8 +2806,8 @@ inline void SplitOpBackward(const nnvm::NodeAttrs& attrs,
   }
 
   size_t workspace_size = 0;
-  const TShape& ishape = input_grad.shape_;
-  const TShape split_pts =
+  const mxnet::TShape& ishape = input_grad.shape_;
+  const mxnet::TShape split_pts =
     (param.sections > 0) ? GetSplitIndices(ishape, real_axis, param.sections) : param.indices;
   std::vector<size_t> indices;
   for (const auto& section : split_pts) {
diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc
index 3a244ac89790..3bca330f98b0 100644
--- a/src/operator/tensor/matrix_op.cc
+++ b/src/operator/tensor/matrix_op.cc
@@ -34,8 +34,8 @@ namespace op {
 
 
 template<>
-void SliceDimTwoCsrImpl<cpu>(const TShape &begin, const TShape &end, const OpContext& ctx,
-                             const NDArray &in, const NDArray &out) {
+void SliceDimTwoCsrImpl<cpu>(const mxnet::TShape &begin, const mxnet::TShape &end,
+                             const OpContext& ctx, const NDArray &in, const NDArray &out) {
   using namespace mshadow;
   using namespace mxnet_op;
   using namespace csr;
@@ -223,7 +223,7 @@ If the argument `reverse` is set to 1, then the special values are inferred from
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<ReshapeParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ReshapeShape)
+.set_attr<mxnet::FInferShape>("FInferShape", ReshapeShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_reshape"})
 .set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>)
@@ -315,7 +315,7 @@ Example::
 )code" ADD_FILELINE)
 .set_num_inputs(1)
 .set_num_outputs(1)
-.set_attr<nnvm::FInferShape>("FInferShape", FlattenShape)
+.set_attr<mxnet::FInferShape>("FInferShape", FlattenShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 #if MXNET_USE_MKLDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", FlattenStorageType)
@@ -371,7 +371,7 @@ Examples::
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<TransposeParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", TransposeShape)
+.set_attr<mxnet::FInferShape>("FInferShape", TransposeShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<nnvm::FGradient>("FGradient",
   [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
@@ -381,7 +381,7 @@ Examples::
           "transpose", n, ograds, {},
           std::unordered_map<std::string, std::string>());
     } else {
-      TShape axes = TShape(param.axes.ndim());
+      mxnet::TShape axes = mxnet::TShape(param.axes.ndim());
       for (index_t i = 0; i < axes.ndim(); ++i) {
         axes[param.axes[i]] = i;
       }
@@ -407,7 +407,7 @@ will return a new array with shape ``(2,1,3,4)``.
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<ExpandDimParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ExpandDimShape)
+.set_attr<mxnet::FInferShape>("FInferShape", ExpandDimShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
   [](const NodeAttrs& attrs){
@@ -495,7 +495,7 @@ Example::
                                                             [1.,  3.]]
 )code" ADD_FILELINE)
 .set_attr_parser(ParamParser<SliceParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", SliceOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", SliceOpShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FResourceRequest>("FResourceRequest",
   [](const NodeAttrs& attrs) {
@@ -530,7 +530,7 @@ NNVM_REGISTER_OP(_slice_assign)
     return std::vector<std::string>{"lhs", "rhs"};
   })
 .set_attr_parser(ParamParser<SliceParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", SliceAssignOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", SliceAssignOpShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
   [](const NodeAttrs& attrs){
@@ -551,7 +551,7 @@ NNVM_REGISTER_OP(_slice_assign_scalar)
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<SliceAssignScalarParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", SliceAssignScalarOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", SliceAssignScalarOpShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
   [](const NodeAttrs& attrs){
@@ -587,7 +587,7 @@ Examples::
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<SliceAxisParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", SliceAxisShape)
+.set_attr<mxnet::FInferShape>("FInferShape", SliceAxisShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FCompute>("FCompute<cpu>", SliceAxis<cpu>)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_slice_axis"})
@@ -660,7 +660,7 @@ Example::
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data", "shape_like"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", SliceLikeShape)
+.set_attr<mxnet::FInferShape>("FInferShape", SliceLikeShape)
 .set_attr<nnvm::FInferType>("FInferType", [](const nnvm::NodeAttrs& attrs,
                                              std::vector<int> *in_attrs,
                                              std::vector<int> *out_attrs) {
@@ -714,7 +714,7 @@ parameter values:
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<ClipParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FCompute>("FCompute<cpu>", Clip<cpu>)
 .set_attr<FComputeEx>("FComputeEx<cpu>", ClipEx<cpu>)
@@ -791,7 +791,7 @@ The parameter ``axis`` specifies the axis along which to perform repeat::
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", RepeatOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", RepeatOpShape)
 .set_attr<nnvm::FInferType>("FInferType", RepeatOpType)
 .set_attr<FCompute>("FCompute<cpu>", RepeatOpForward<cpu>)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_repeat"})
@@ -852,7 +852,7 @@ three cases:
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", TileOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", TileOpShape)
 .set_attr<nnvm::FInferType>("FInferType", TileOpType)
 .set_attr<FCompute>("FCompute<cpu>", TileOpForward<cpu>)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_tile"})
@@ -898,7 +898,7 @@ Examples::
 [](const NodeAttrs& attrs) {
   return std::vector<ResourceRequest> {ResourceRequest::kTempSpace};
 })
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FCompute>("FCompute<cpu>", ReverseOpForward<cpu>)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_backward_reverse" })
@@ -949,7 +949,7 @@ Examples::
     return ret;
   })
 .set_attr<std::string>("key_var_num_args", "num_args")
-.set_attr<nnvm::FInferShape>("FInferShape", StackOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", StackOpShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<-1, 1>)
 .set_attr<FCompute>("FCompute<cpu>", StackOpForward<cpu>)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_stack"})
@@ -990,7 +990,7 @@ Examples::
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", SqueezeShape)
+.set_attr<mxnet::FInferShape>("FInferShape", SqueezeShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_squeeze"})
@@ -1045,7 +1045,7 @@ Example::
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", DepthToSpaceOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", DepthToSpaceOpShape)
 .set_attr<nnvm::FInferType>("FInferType", DepthToSpaceOpType)
 .set_attr<FCompute>("FCompute<cpu>", DepthToSpaceOpForward<cpu>)
 .set_attr<FResourceRequest>("FResourceRequest",
@@ -1099,7 +1099,7 @@ Example::
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", SpaceToDepthOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", SpaceToDepthOpShape)
 .set_attr<nnvm::FInferType>("FInferType", SpaceToDepthOpType)
 .set_attr<FCompute>("FCompute<cpu>", SpaceToDepthOpForward<cpu>)
 .set_attr<FResourceRequest>("FResourceRequest",
@@ -1185,7 +1185,7 @@ Example::
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", SplitOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", SplitOpShape)
 .set_attr<nnvm::FInferType>("FInferType", SplitOpType)
 .set_attr<FCompute>("FCompute<cpu>", SplitOpForward<cpu>)
 .set_attr<FResourceRequest>("FResourceRequest",
diff --git a/src/operator/tensor/matrix_op.cu b/src/operator/tensor/matrix_op.cu
index 87311276da26..b382c55ce74a 100644
--- a/src/operator/tensor/matrix_op.cu
+++ b/src/operator/tensor/matrix_op.cu
@@ -67,8 +67,8 @@ struct SliceMarkCsrIndPtr {
 
 
 template<>
-void SliceDimTwoCsrImpl<gpu>(const TShape &begin, const TShape &end, const OpContext& ctx,
-                             const NDArray &in, const NDArray &out) {
+void SliceDimTwoCsrImpl<gpu>(const mxnet::TShape &begin, const mxnet::TShape &end,
+                             const OpContext& ctx, const NDArray &in, const NDArray &out) {
   using namespace mshadow;
   using namespace mxnet_op;
   using namespace csr;
diff --git a/src/operator/tensor/ordering_op-inl.h b/src/operator/tensor/ordering_op-inl.h
index 1847a533d6ea..5a95e05ffb65 100644
--- a/src/operator/tensor/ordering_op-inl.h
+++ b/src/operator/tensor/ordering_op-inl.h
@@ -128,9 +128,9 @@ struct ArgSortParam : public dmlc::Parameter<ArgSortParam> {
   }
 };
 
-inline void ParseTopKParam(const TShape& src_shape, const TopKParam& param, TShape *target_shape,
-                           int *batch_size, int *element_num, int *axis, int *k,
-                           bool *do_transpose, bool *is_ascend) {
+inline void ParseTopKParam(const mxnet::TShape& src_shape, const TopKParam& param,
+                           mxnet::TShape *target_shape, int *batch_size, int *element_num,
+                           int *axis, int *k, bool *do_transpose, bool *is_ascend) {
   *do_transpose = false;
   *k = param.k;
   *is_ascend = param.is_ascend;
@@ -387,7 +387,7 @@ void TopKImpl(const RunContext &ctx,
   bool is_ascend = false;
   int k = 0;
   size_t alignment = std::max(sizeof(DType), sizeof(int));
-  TShape target_shape;
+  mxnet::TShape target_shape;
   ParseTopKParam(src.shape_, param,
                  &target_shape, &batch_size, &element_num, &axis, &k, &do_transpose, &is_ascend);
   CHECK_LE(element_num, mxnet::common::MaxIntegerValue<IDType>())
@@ -479,7 +479,7 @@ void TopKImpl(const RunContext &ctx,
                                                      element_num)), 0, k),
                               Shape1(batch_size * k));
     if (do_transpose) {
-      TShape src_shape = src.shape_.FlatTo3D(axis);
+      mxnet::TShape src_shape = src.shape_.FlatTo3D(axis);
       CHECK_EQ(sel_indices.CheckContiguous(), true);
       sel_indices = transpose_indices(sel_indices, Shape3(src_shape[0], src_shape[2], src_shape[1]),
                                       Shape3(0, 2, 1));
@@ -610,7 +610,7 @@ void TopKBackwardImpl(const OpContext &ctx,
   bool do_transpose = false;
   bool is_ascend = false;
   int k = 0;
-  TShape target_shape;
+  mxnet::TShape target_shape;
   ParseTopKParam(outputs[0].shape_, param,
                  &target_shape, &batch_size, &element_num, &axis, &k, &do_transpose, &is_ascend);
   CHECK_LE(element_num, mxnet::common::MaxIntegerValue<IDType>())
@@ -632,11 +632,11 @@ void TopKBackwardImpl(const OpContext &ctx,
                                            batch_shift.dptr_);
   if (do_transpose) {
     Tensor<xpu, 1, IDType> indices = inputs[2].FlatTo1D<xpu, IDType>(s);
-    TShape src_shape = outputs[0].shape_.FlatTo3D(axis);
+    mxnet::TShape src_shape = outputs[0].shape_.FlatTo3D(axis);
     sel_indices = reshape(transpose(
                             broadcast_to(inplace_reshape(batch_shift,
                                                          Shape3(src_shape[0], src_shape[2], 1)),
-                                         TShape(Shape3(src_shape[0], src_shape[2], k))),
+                                         mxnet::TShape(Shape3(src_shape[0], src_shape[2], k))),
                             Shape3(0, 2, 1)),
                           Shape1(batch_size * k));
     sel_indices += tcast<int>(indices);
@@ -647,7 +647,7 @@ void TopKBackwardImpl(const OpContext &ctx,
       inputs[2].get_with_shape<xpu, 2, IDType>(Shape2(batch_size, k), s);
     sel_indices = reshape(tcast<int>(indices) +
                           broadcast_to(inplace_reshape(batch_shift, Shape2(batch_size, 1)),
-                                       TShape(Shape2(batch_size, k))),
+                                       mxnet::TShape(Shape2(batch_size, k))),
                           Shape1(batch_size * k));
   }
   CHECK_EQ(sel_indices.CheckContiguous(), true);
@@ -742,8 +742,8 @@ inline bool TopKType(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool TopKShapeImpl(const TopKParam& param,
-                          std::vector<TShape> *in_attrs,
-                          std::vector<TShape> *out_attrs) {
+                          mxnet::ShapeVector *in_attrs,
+                          mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   if (param.ret_typ == topk_enum::kReturnIndices ||
     param.ret_typ == topk_enum::kReturnMask) {
@@ -751,13 +751,13 @@ inline bool TopKShapeImpl(const TopKParam& param,
   } else {
     CHECK_EQ(out_attrs->size(), 2U);
   }
-  TShape& in_shape = (*in_attrs)[0];
+  mxnet::TShape& in_shape = (*in_attrs)[0];
   int batch_size, element_num;  // number of batches + the size of each batch
   int axis = 0;
   bool do_transpose = false;
   bool is_ascend = false;
   int k = 0;
-  TShape target_shape;
+  mxnet::TShape target_shape;
   ParseTopKParam(in_shape, param,
     &target_shape, &batch_size, &element_num, &axis, &k, &do_transpose, &is_ascend);
   if (param.ret_typ == topk_enum::kReturnIndices ||
@@ -771,8 +771,8 @@ inline bool TopKShapeImpl(const TopKParam& param,
 }
 
 inline bool TopKShape(const nnvm::NodeAttrs& attrs,
-                      std::vector<TShape> *in_attrs,
-                      std::vector<TShape> *out_attrs) {
+                      mxnet::ShapeVector *in_attrs,
+                      mxnet::ShapeVector *out_attrs) {
   const TopKParam& param = nnvm::get<TopKParam>(attrs.parsed);
   return TopKShapeImpl(param, in_attrs, out_attrs);
 }
@@ -800,8 +800,8 @@ inline bool SortType(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool SortShape(const nnvm::NodeAttrs& attrs,
-                      std::vector<TShape> *in_attrs,
-                      std::vector<TShape> *out_attrs) {
+                      mxnet::ShapeVector *in_attrs,
+                      mxnet::ShapeVector *out_attrs) {
   const SortParam& param = nnvm::get<SortParam>(attrs.parsed);
   TopKParam topk_param;
   topk_param.axis = param.axis;
@@ -821,8 +821,8 @@ inline bool ArgSortType(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool ArgSortShape(const nnvm::NodeAttrs& attrs,
-                         std::vector<TShape> *in_attrs,
-                         std::vector<TShape> *out_attrs) {
+                         mxnet::ShapeVector *in_attrs,
+                         mxnet::ShapeVector *out_attrs) {
   const ArgSortParam& param = nnvm::get<ArgSortParam>(attrs.parsed);
   TopKParam topk_param;
   topk_param.axis = param.axis;
diff --git a/src/operator/tensor/ordering_op.cc b/src/operator/tensor/ordering_op.cc
index fb0029959718..4adfac29fec1 100644
--- a/src/operator/tensor/ordering_op.cc
+++ b/src/operator/tensor/ordering_op.cc
@@ -65,7 +65,7 @@ Examples::
 .set_num_inputs(1)
 .set_num_outputs(TopKNumOutputs)
 .set_attr_parser(ParamParser<TopKParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", TopKShape)
+.set_attr<mxnet::FInferShape>("FInferShape", TopKShape)
 .set_attr<nnvm::FInferType>("FInferType", TopKType)
 .set_attr<nnvm::FNumVisibleOutputs>("FNumVisibleOutputs", TopKNumVisibleOutputs)
 .set_attr<FCompute>("FCompute<cpu>", TopK<cpu>)
@@ -128,7 +128,7 @@ Examples::
 .set_num_inputs(1)
 .set_num_outputs(2)
 .set_attr_parser(ParamParser<SortParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", SortShape)
+.set_attr<mxnet::FInferShape>("FInferShape", SortShape)
 .set_attr<nnvm::FInferType>("FInferType", SortType)
 .set_attr<nnvm::FNumVisibleOutputs>("FNumVisibleOutputs", [](const NodeAttrs& attrs) { return 1; })
 .set_attr<FCompute>("FCompute<cpu>", Sort<cpu>)
@@ -178,7 +178,7 @@ Examples::
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<ArgSortParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ArgSortShape)
+.set_attr<mxnet::FInferShape>("FInferShape", ArgSortShape)
 .set_attr<nnvm::FInferType>("FInferType", ArgSortType)
 .set_attr<FCompute>("FCompute<cpu>", ArgSort<cpu>)
 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes)
diff --git a/src/operator/tensor/ravel.cc b/src/operator/tensor/ravel.cc
index 7bbfac5d58c0..0a66ea80fca9 100644
--- a/src/operator/tensor/ravel.cc
+++ b/src/operator/tensor/ravel.cc
@@ -46,7 +46,7 @@ Examples::
   { return std::vector<ResourceRequest>{ResourceRequest::kTempSpace}; })
 .set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs)
   { return std::vector<std::string>{"data"}; } )
-.set_attr<nnvm::FInferShape>("FInferShape", RavelOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", RavelOpShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FCompute>("FCompute<cpu>", RavelForward<cpu>)
 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes)
@@ -70,7 +70,7 @@ Examples::
   { return std::vector<ResourceRequest>{ResourceRequest::kTempSpace}; })
 .set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs)
   { return std::vector<std::string>{"data"}; } )
-.set_attr<nnvm::FInferShape>("FInferShape", UnravelOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", UnravelOpShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FCompute>("FCompute<cpu>", UnravelForward<cpu>)
 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes)
diff --git a/src/operator/tensor/ravel.h b/src/operator/tensor/ravel.h
index 1eb61e1b6819..6d337dcef701 100644
--- a/src/operator/tensor/ravel.h
+++ b/src/operator/tensor/ravel.h
@@ -37,19 +37,19 @@ namespace mxnet {
 namespace op {
 
 struct RavelParam : public dmlc::Parameter<RavelParam> {
-  TShape shape;
+  mxnet::TShape shape;
   DMLC_DECLARE_PARAMETER(RavelParam) {
     DMLC_DECLARE_FIELD(shape)
-      .set_default(TShape())
+      .set_default(mxnet::TShape())
       .describe("Shape of the array into which the multi-indices apply.");
   }
 };
 
 inline bool RavelOpShape(const nnvm::NodeAttrs& attrs,
-                         std::vector<TShape>* in_attrs,
-                         std::vector<TShape>* out_attrs) {
+                         mxnet::ShapeVector* in_attrs,
+                         mxnet::ShapeVector* out_attrs) {
   using namespace mshadow;
-  const TShape& shape = nnvm::get<RavelParam>(attrs.parsed).shape;
+  const mxnet::TShape& shape = nnvm::get<RavelParam>(attrs.parsed).shape;
   CHECK_EQ(in_attrs->size(), 1);
   CHECK_EQ(out_attrs->size(), 1);
   CHECK_GT(shape.ndim(), 0) << "Empty shape parameter for ravel operator.";
@@ -69,10 +69,10 @@ inline bool RavelOpShape(const nnvm::NodeAttrs& attrs,
 }
 
 inline bool UnravelOpShape(const nnvm::NodeAttrs& attrs,
-                           std::vector<TShape>* in_attrs,
-                           std::vector<TShape>* out_attrs) {
+                           mxnet::ShapeVector* in_attrs,
+                           mxnet::ShapeVector* out_attrs) {
   using namespace mshadow;
-  const TShape& shape = nnvm::get<RavelParam>(attrs.parsed).shape;
+  const mxnet::TShape& shape = nnvm::get<RavelParam>(attrs.parsed).shape;
   CHECK_EQ(in_attrs->size(), 1);
   CHECK_EQ(out_attrs->size(), 1);
   CHECK_GT(shape.ndim(), 0) << "Empty shape parameter for unravel operator.";
@@ -126,7 +126,7 @@ void RavelForward(const nnvm::NodeAttrs& attrs,
                   const std::vector<TBlob>& outputs) {
   using namespace mshadow;
   Stream<xpu> *s = ctx.get_stream<xpu>();
-  const TShape& shape = nnvm::get<RavelParam>(attrs.parsed).shape;
+  const mxnet::TShape& shape = nnvm::get<RavelParam>(attrs.parsed).shape;
   std::vector<index_t> buffer(shape.data(), shape.data()+shape.ndim());
   Tensor<xpu, 1, index_t> work
     = ctx.requested[0].get_space_typed<xpu, 1, index_t>(Shape1(shape.ndim()), s);
@@ -147,7 +147,7 @@ void UnravelForward(const nnvm::NodeAttrs& attrs,
                   const std::vector<TBlob>& outputs) {
   using namespace mshadow;
   Stream<xpu> *s = ctx.get_stream<xpu>();
-  const TShape& shape = nnvm::get<RavelParam>(attrs.parsed).shape;
+  const mxnet::TShape& shape = nnvm::get<RavelParam>(attrs.parsed).shape;
   std::vector<index_t> buffer(shape.data(), shape.data()+shape.ndim());
   Tensor<xpu, 1, index_t> work
     = ctx.requested[0].get_space_typed<xpu, 1, index_t>(Shape1(shape.ndim()), s);
diff --git a/src/operator/tensor/sparse_retain-inl.h b/src/operator/tensor/sparse_retain-inl.h
index 52401beba316..951bf80b81b8 100644
--- a/src/operator/tensor/sparse_retain-inl.h
+++ b/src/operator/tensor/sparse_retain-inl.h
@@ -44,13 +44,13 @@ enum SparseRetainOpOutputs {kOut};
 }  // namespace sr
 
 inline bool SparseRetainOpShape(const nnvm::NodeAttrs& attrs,
-                                std::vector<TShape> *in_attrs,
-                                std::vector<TShape> *out_attrs) {
+                                mxnet::ShapeVector *in_attrs,
+                                mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 2U)
     << "sparse_retain operator takes 2 arguments (" << in_attrs->size() << " given)";
   CHECK_EQ(out_attrs->size(), 1U);
 
-  TShape tshape((*in_attrs)[sr::kArr]);
+  mxnet::TShape tshape((*in_attrs)[sr::kArr]);
   shape_assign(&tshape, (*out_attrs)[sr::kOut]);
   SHAPE_ASSIGN_CHECK(*in_attrs, sr::kArr, tshape);
   SHAPE_ASSIGN_CHECK(*out_attrs, sr::kOut, tshape);
diff --git a/src/operator/tensor/sparse_retain.cc b/src/operator/tensor/sparse_retain.cc
index 8630457e6a5e..007541b00b37 100644
--- a/src/operator/tensor/sparse_retain.cc
+++ b/src/operator/tensor/sparse_retain.cc
@@ -57,7 +57,7 @@ The storage type of ``retain`` output depends on storage types of inputs
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data", "indices"};
   })
-.set_attr<nnvm::FInferShape>("FInferShape", SparseRetainOpShape)
+.set_attr<mxnet::FInferShape>("FInferShape", SparseRetainOpShape)
 .set_attr<nnvm::FInferType>("FInferType", SparseRetainOpType)
 .set_attr<FInferStorageType>("FInferStorageType", SparseRetainForwardInferStorageType)
 .set_attr<FComputeEx>("FComputeEx<cpu>", SparseRetainOpForwardEx<cpu>)
diff --git a/src/operator/tensor/square_sum-inl.h b/src/operator/tensor/square_sum-inl.h
index 162b3c8ad12b..016b383117bc 100644
--- a/src/operator/tensor/square_sum-inl.h
+++ b/src/operator/tensor/square_sum-inl.h
@@ -53,7 +53,7 @@ inline bool SquareSumForwardInferStorageType(const nnvm::NodeAttrs& attrs,
   const auto& in_stype = in_attrs->at(0);
   auto& out_stype = out_attrs->at(0);
   bool dispatched = false;
-  const TShape axis = param.axis.has_value() ? param.axis.value() : TShape();
+  const mxnet::TShape axis = param.axis.has_value() ? param.axis.value() : mxnet::TShape();
   if (!dispatched && in_stype == kRowSparseStorage &&
       axis.ndim() > 0 && axis[0] == 1 && param.keepdims) {
     // sum per row and keep dims
@@ -267,7 +267,7 @@ void SquareSumRspImpl(const nnvm::NodeAttrs& attrs,
   if (req == kNullOp) return;
   const ReduceAxesParam& param = nnvm::get<ReduceAxesParam>(attrs.parsed);
   CHECK(param.axis.has_value());
-  const TShape axis = param.axis.value();
+  const mxnet::TShape axis = param.axis.value();
   CHECK_EQ(axis.ndim(), 1U) << "_square_sum(row_sparse_matrix) only supports axis=0 or 1";
   CHECK(axis[0] == 0 || axis[0] == 1)
     << "_square_sum(row_sparse_matrix) only supports axis=0 or 1";
@@ -382,7 +382,7 @@ void SquareSumRspGradImpl(const nnvm::NodeAttrs& attrs,
   if (req == kNullOp) return;
   const ReduceAxesParam& param = nnvm::get<ReduceAxesParam>(attrs.parsed);
   CHECK(param.axis.has_value());
-  const TShape axis = param.axis.value();
+  const mxnet::TShape axis = param.axis.value();
   CHECK_EQ(axis.ndim(), 1U) << "_square_sum(row_sparse_matrix) only supports axis=0/1";
   CHECK(axis[0] == 0 || axis[0] == 1)
     << "_square_sum(row_sparse_matrix) only supports axis=0 or 1";
diff --git a/src/profiler/profiler.h b/src/profiler/profiler.h
index ba99c811ac98..adea941bda13 100644
--- a/src/profiler/profiler.h
+++ b/src/profiler/profiler.h
@@ -1081,8 +1081,8 @@ struct ProfileOperator : public ProfileEvent {
    * \brief Operator attributes
    */
   struct Attributes {
-    std::vector<nnvm::TShape> inputs_;
-    std::vector<nnvm::TShape> outputs_;
+    std::vector<mxnet::TShape> inputs_;
+    std::vector<mxnet::TShape> outputs_;
     std::unordered_map<std::string, std::string> attr_;
     std::string to_string() const {
       std::stringstream ss;
diff --git a/tests/cpp/include/test_core_op.h b/tests/cpp/include/test_core_op.h
index f59a9e8d74dc..bf35834c5d5f 100644
--- a/tests/cpp/include/test_core_op.h
+++ b/tests/cpp/include/test_core_op.h
@@ -104,7 +104,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer<DType>
    * \param ctx Context to use when creating the array/tensor
    * \return The created NDArray
    */
-  NDArray CreateRandArray(const TShape& shape, const RunContext& run_ctx, int dtype) const {
+  NDArray CreateRandArray(const mxnet::TShape& shape, const RunContext& run_ctx, int dtype) const {
     CHECK_GT(shape.Size(), 0);  // Check it's a valid shape
     NDArray array(shape, run_ctx.ctx, true, dtype);
     array.CheckAndAlloc();
@@ -118,7 +118,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer<DType>
    * \param ctx Context to use when creating the array/tensor
    * \return The created NDArray
    */
-  NDArray CreateZeroArray(const TShape& shape, const RunContext& run_ctx, int dtype) const {
+  NDArray CreateZeroArray(const mxnet::TShape& shape, const RunContext& run_ctx, int dtype) const {
     CHECK_GT(shape.Size(), 0);  // Check it's a valid shape
     NDArray array(shape, run_ctx.ctx, true, dtype);
     array.CheckAndAlloc();
@@ -266,7 +266,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer<DType>
    * \param isGPU Is this going to be on the GPU?
    * \param shapes Array of input shapes
    */
-  CoreOpExecutor(const bool isGPU, const std::vector<TShape>& shapes)
+  CoreOpExecutor(const bool isGPU, const mxnet::ShapeVector& shapes)
     : input_shapes_(shapes)
       , op_(nullptr)  {
     ctx_.is_train = true;
@@ -397,7 +397,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer<DType>
 
       // Generic, all shapes the same. Probably this will need to be adjusted for more complex
       // operators such as dot
-      std::vector<nnvm::TShape> input_shapes;
+      std::vector<mxnet::TShape> input_shapes;
       if (!input_shapes_.empty()) {
         for (size_t i = 0, n = num_inputs; i < n; ++i) {
           input_shapes.emplace_back(i < input_shapes_.size() ? input_shapes_[i]
@@ -466,10 +466,10 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer<DType>
 
       // Output arrays
       if (outputs_.empty()) {
-        std::vector<nnvm::TShape> output_shapes;
-        static auto& finfer_shape = Op::GetAttr<nnvm::FInferShape>("FInferShape");
+        std::vector<mxnet::TShape> output_shapes;
+        static auto& finfer_shape = Op::GetAttr<mxnet::FInferShape>("FInferShape");
         if (finfer_shape.count(op_)) {
-          nnvm::FInferShape call_infer_shapes = finfer_shape[op_];
+          mxnet::FInferShape call_infer_shapes = finfer_shape[op_];
           output_shapes.resize(inferred_num_outputs);
           call_infer_shapes(attrs_, &input_shapes, &output_shapes);
           input_shapes_ = input_shapes;
@@ -482,9 +482,9 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer<DType>
               for (int i = 0; i < num_inputs; ++i) {
                 const int map_key = bwd_node_ptr->inputs[i].index;
                 CHECK(index2array.find(map_key) != index2array.end());
-                const nnvm::TShape &shp = index2array[map_key]->shape();
+                const mxnet::TShape &shp = index2array[map_key]->shape();
                 input_shapes.push_back(shp);
-                const nnvm::TShape ss = input_shapes[i];
+                const mxnet::TShape ss = input_shapes[i];
               }
             } else {
               // TODO(cjolivier)
@@ -788,7 +788,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer<DType>
   /*!
    * \brief Input data shape
    */
-  std::vector<TShape> input_shapes_;
+  mxnet::ShapeVector input_shapes_;
   /*
    * \brief Pointer to the operator object
    */
@@ -863,7 +863,7 @@ template<typename DType = float>
 inline void BasicRunCoreOpBidirectional(const bool isGPU,
                                         bool verbose,
                                         const kwargs_t& op_kwargs,
-                                        const std::vector<TShape>& shapes,
+                                        const mxnet::ShapeVector& shapes,
                                         const char *op_name,
                                         const char *backward_op_name = "") {
   test::op::CoreOpExecutor<DType> op(isGPU, shapes);
diff --git a/tests/cpp/include/test_legacy_op.h b/tests/cpp/include/test_legacy_op.h
index 7fd407e39807..fdb52cf6e4e0 100644
--- a/tests/cpp/include/test_legacy_op.h
+++ b/tests/cpp/include/test_legacy_op.h
@@ -67,7 +67,7 @@ class LegacyOperatorExecutor : public OperatorDataInitializer<DType>
   typedef AccReal AccRealType;
 
   /*! \brief Manage test blobs and context */
-  LegacyOperatorExecutor(const bool isGPU, const std::vector<TShape>& topShapes)
+  LegacyOperatorExecutor(const bool isGPU, const mxnet::ShapeVector& topShapes)
 #if !MXNET_USE_CUDA
     : isGPU_(false)
 #else
@@ -102,7 +102,7 @@ class LegacyOperatorExecutor : public OperatorDataInitializer<DType>
         const size_t output_count = opProp.ListOutputs().size();
         const size_t aux_count = opProp.ListAuxiliaryStates().size();
         // Figure out what sort of blobs we need to allocate
-        std::vector<TShape> out_shape, aux_shape;
+        mxnet::ShapeVector out_shape, aux_shape;
         out_shape.resize(output_count);
         aux_shape.resize(aux_count);
         opProp.InferShape(&shape_input_vec_, &out_shape, &aux_shape);
@@ -134,7 +134,7 @@ class LegacyOperatorExecutor : public OperatorDataInitializer<DType>
         }
 
         // Get the resource of temporal space
-        std::vector<TShape> inputShapes;
+        mxnet::ShapeVector inputShapes;
         for (size_t x = 0, n = shape_input_vec_.size(); x < n; ++x) {
           inputShapes.emplace_back(shape_input_vec_[x]);
         }
@@ -166,7 +166,7 @@ class LegacyOperatorExecutor : public OperatorDataInitializer<DType>
       }
 
       // Get the resource of temporal space
-      std::vector<TShape> ishapes;
+      mxnet::ShapeVector ishapes;
       allocateResources(opProp.BackwardResource(ishapes));
 
       resetBackward();
@@ -303,7 +303,7 @@ class LegacyOperatorExecutor : public OperatorDataInitializer<DType>
     Stream& os = *_os;
     os << "static const std::vector< std::vector< std::vector<float> > > ___"
        << label << "_data_shape_";
-    const TShape& shape = shape_input_vec_[0];
+    const mxnet::TShape& shape = shape_input_vec_[0];
     for (size_t i = 0, n = shape.ndim(); i < n; ++i) {
       os << shape[i] << "_";
     }
@@ -398,7 +398,7 @@ class LegacyOperatorExecutor : public OperatorDataInitializer<DType>
   /*! \brief Input and output blobs */
   OpContext                 opContext_;
 
-  std::vector<TShape>       shape_input_vec_;
+  mxnet::ShapeVector       shape_input_vec_;
 
   struct OpData {
     std::vector<TBlob> blob_input_vec_;
@@ -533,7 +533,7 @@ class LegacyOperatorExecutor : public OperatorDataInitializer<DType>
   /*! \brief Locally allocate a managed TBlob and insert into the supplied vector */
   static TBlob *allocateBlob(std::list<std::unique_ptr<test::StandaloneBlob>> *standalone_blobs,
                              std::vector<TBlob> *dest,
-                             const TShape& shape,
+                             const mxnet::TShape& shape,
                              const bool isGPU,
                              const int dtype) {
     test::StandaloneBlob *blob = new test::StandaloneBlob(shape, isGPU, dtype);
@@ -544,7 +544,7 @@ class LegacyOperatorExecutor : public OperatorDataInitializer<DType>
   }
 
   /*! \brief Locally allocate a managed TBlob and insert into the supplied vector */
-  inline TBlob *allocateBlob(std::vector<TBlob> *dest, const TShape& shape,
+  inline TBlob *allocateBlob(std::vector<TBlob> *dest, const mxnet::TShape& shape,
                              const bool isGPU, const int dtype) {
     return allocateBlob(&standalone_blobs_, dest, shape, isGPU, dtype);
   }
diff --git a/tests/cpp/include/test_mkldnn.h b/tests/cpp/include/test_mkldnn.h
index c705a6004aa9..a379dab7bf90 100644
--- a/tests/cpp/include/test_mkldnn.h
+++ b/tests/cpp/include/test_mkldnn.h
@@ -37,7 +37,7 @@
 
 using namespace mxnet;
 
-inline static mkldnn::memory::primitive_desc GetMemPD(const TShape s, int dtype,
+inline static mkldnn::memory::primitive_desc GetMemPD(const mxnet::TShape s, int dtype,
                                                mkldnn::memory::format format) {
   mkldnn::memory::dims dims(s.ndim());
   for (size_t i = 0; i < dims.size(); i++)
@@ -49,7 +49,7 @@ inline static mkldnn::memory::primitive_desc GetMemPD(const TShape s, int dtype,
 inline static mkldnn::memory::primitive_desc GetExpandedMemPD(
     mkldnn::memory::primitive_desc pd, float scale, int dim = 0) {
   CHECK(dim < pd.desc().data.ndims) << "dimension cannot be larger than total dimensions of input";
-  nnvm::TShape s(pd.desc().data.ndims);
+  mxnet::TShape s(pd.desc().data.ndims);
   for (size_t i = 0; i < pd.desc().data.ndims; i++)
     s[i] = pd.desc().data.dims[i];
   s[dim] = static_cast<int>(s[dim] * scale);
@@ -58,7 +58,7 @@ inline static mkldnn::memory::primitive_desc GetExpandedMemPD(
 }
 
 struct TestArrayShapes {
-  std::vector<nnvm::TShape> shapes;
+  std::vector<mxnet::TShape> shapes;
   std::vector<mkldnn::memory::primitive_desc> pds;
 };
 
@@ -85,7 +85,7 @@ inline static void InitMKLDNNArray(NDArray *arr, const mkldnn::memory::primitive
   arr->WaitToRead();
 }
 
-inline static bool IsSameShape(mkldnn::memory::primitive_desc pd, TShape shape) {
+inline static bool IsSameShape(mkldnn::memory::primitive_desc pd, mxnet::TShape shape) {
   if (pd.desc().data.ndims != shape.ndim()) return false;
   for (size_t i = 0; i < shape.ndim(); i++)
     if (pd.desc().data.dims[i] != shape[i]) return false;
@@ -161,11 +161,11 @@ inline static std::vector<mkldnn::memory::format> GetMKLDNNFormat(size_t num_dim
 
 inline static TestArrayShapes GetTestArrayShapes(bool spatial_data_format = false) {
   int dtype = mshadow::DataType<mshadow::default_real_t>::kFlag;
-  std::vector<TShape> shapes;
+  mxnet::ShapeVector shapes;
   std::vector<mkldnn::memory::primitive_desc> pds;
   {
     // 1D
-    TShape s(1);
+    mxnet::TShape s(1);
     s[0] = 279936;
     shapes.push_back(s);
     pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::x));
@@ -175,7 +175,7 @@ inline static TestArrayShapes GetTestArrayShapes(bool spatial_data_format = fals
   }
   {
     // 2D
-    TShape s(2);
+    mxnet::TShape s(2);
     s[0] = 96;
     s[1] = 2916;
     shapes.push_back(s);
@@ -187,12 +187,12 @@ inline static TestArrayShapes GetTestArrayShapes(bool spatial_data_format = fals
   }
   {
     // 4D
-    TShape s1(4);
+    mxnet::TShape s1(4);
     s1[0] = 10; s1[1] = 96; s1[2] = 54; s1[3] = 54;
     shapes.push_back(s1);
     pds.push_back(GetMemPD(s1, dtype, mkldnn::memory::format::nchw));
 
-    TShape s2(4);
+    mxnet::TShape s2(4);
     s2[0] = 96; s2[1] = 3; s2[2] = 11; s2[3] = 11;
     shapes.push_back(s2);
     pds.push_back(GetMemPD(s2, dtype, mkldnn::memory::format::oihw));
@@ -204,7 +204,7 @@ inline static TestArrayShapes GetTestArrayShapes(bool spatial_data_format = fals
   }
   {
     // 5D
-    TShape s(5);
+    mxnet::TShape s(5);
     s[0] = 96; s[1] = 1; s[2] = 3; s[3] = 11; s[4] = 11;
     shapes.push_back(s);
     pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::goihw));
@@ -256,10 +256,10 @@ enum ArrayTypes {
 };
 
 
-inline NDArray CreateKernelNDArray(TShape kernel, int num_filters, TShape input,
+inline NDArray CreateKernelNDArray(mxnet::TShape kernel, int num_filters, mxnet::TShape input,
     bool is_deconv = false) {
   CHECK_EQ(kernel.ndim(), 2) << "mkldnn only supports 2d filters on 4d inputs";
-  TShape target_shape(4);
+  mxnet::TShape target_shape(4);
   target_shape[0] = is_deconv ? input[1] : num_filters;
   target_shape[1] = is_deconv ? num_filters : input[1];
   target_shape[2] = kernel[0];
@@ -271,7 +271,7 @@ inline NDArray CreateKernelNDArray(TShape kernel, int num_filters, TShape input,
   return arr;
 }
 
-inline NDArray CreateBiasNDArray(TShape target_shape) {
+inline NDArray CreateBiasNDArray(mxnet::TShape target_shape) {
   int dtype = mshadow::DataType<mshadow::default_real_t>::kFlag;
   NDArray arr(target_shape, Context());
   auto pd = GetMemPD(target_shape, dtype, mkldnn::memory::format::x);
@@ -299,8 +299,8 @@ inline std::string CreateShapeString(int value, int dim) {
 }
 
 inline void PrintVerifyMsg(const NDArrayAttrs &arr1, const NDArrayAttrs &arr2) {
-  TShape t1 = arr1.arr.shape();
-  TShape t2 = arr2.arr.shape();
+  mxnet::TShape t1 = arr1.arr.shape();
+  mxnet::TShape t2 = arr2.arr.shape();
   std::stringstream ss;
   std::cout << "Verifying: " << arr1.desc.c_str() << " " <<
             t1 << " with " << arr2.desc.c_str() << " " << t2 << "\n";
@@ -332,7 +332,7 @@ inline std::vector<NDArrayAttrs> GetTestInputArrays(
     int types = ArrayTypes::All, bool rand = false,
     std::vector<float> scale = {1}, bool spatial_data_format = false) {
   TestArrayShapes tas = GetTestArrayShapes(spatial_data_format);
-  std::vector<nnvm::TShape> shapes = tas.shapes;
+  std::vector<mxnet::TShape> shapes = tas.shapes;
   std::vector<mkldnn::memory::primitive_desc> pds = tas.pds;
 
   std::vector<NDArrayAttrs> in_arrs;
@@ -443,10 +443,10 @@ inline std::vector<NDArrayAttrs> GetTestInputArrays(
  * Optional num_inputs / dim args can be passed to modify input shape (used for Concat test)
  */
 inline std::vector<NDArrayAttrs> GetTestOutputArrays(
-    const TShape &shp,
+    const mxnet::TShape &shp,
     const std::vector<mkldnn::memory::primitive_desc> &pds,
     std::vector<float>scale = {1}, bool rand = true, int types = ArrayTypes::All) {
-  TShape shape = shp;
+  mxnet::TShape shape = shp;
 
   for (int dim = 0; dim < scale.size(); dim++)
     shape[dim] = static_cast<int>(shape[dim] * scale[dim]);
@@ -461,7 +461,7 @@ inline std::vector<NDArrayAttrs> GetTestOutputArrays(
     InitDefaultArray(&in_arrs.back().arr, rand);
   }
 
-  TShape tmp_shape = shape;
+  mxnet::TShape tmp_shape = shape;
   if (types & ArrayTypes::NormalReshaped) {
     // Type 4.
     tmp_shape[0] = shape[0] * 2;
@@ -470,7 +470,7 @@ inline std::vector<NDArrayAttrs> GetTestOutputArrays(
     in_arrs.emplace_back(arr0.Slice(1, shape[0] + 1), "Reshaped NDArray");
   }
 
-  nnvm::TShape s(1);
+  mxnet::TShape s(1);
   if (types & ArrayTypes::NormalReused) {
     // Type 5.
     // Get a reused version.
@@ -528,7 +528,7 @@ inline std::vector<NDArrayAttrs> GetTestOutputArrays(
 
     // Type 8, 9.
     // Get a reused version.
-    nnvm::TShape s(1);
+    mxnet::TShape s(1);
     s[0] = shape.Size();
     NDArray arr = NDArray(s, Context());
     arr = arr.AsArray(shape, arr.dtype());
@@ -553,7 +553,7 @@ inline std::vector<NDArrayAttrs> GetTestOutputArrays(
  * Determines axis ndarrays are concatenated by
  * Used to verify concat/concat backwards operator
  */
-inline int GetDim(TShape input_shape, TShape output_shape) {
+inline int GetDim(mxnet::TShape input_shape, mxnet::TShape output_shape) {
   CHECK(input_shape.Size() != output_shape.Size());
   for (size_t i = 0; i < input_shape.ndim(); i++) {
     if (input_shape[i] != output_shape[i])
@@ -566,7 +566,7 @@ inline int GetDim(TShape input_shape, TShape output_shape) {
  * Calculates the size of continuous block of array inside larger concatenated array
  * Used to verify concat/concat backwards operator
  */
-inline int GetBlockSize(TShape shape, int dim) {
+inline int GetBlockSize(mxnet::TShape shape, int dim) {
   int block_size = 1;
   for (int i = shape.ndim() - 1; i >= dim; i--)
     block_size *= shape[i];
diff --git a/tests/cpp/include/test_ndarray_utils.h b/tests/cpp/include/test_ndarray_utils.h
index f5ab96794ada..8a53298f4811 100644
--- a/tests/cpp/include/test_ndarray_utils.h
+++ b/tests/cpp/include/test_ndarray_utils.h
@@ -60,7 +60,7 @@ inline float RandFloat() {
 }
 
 // Get an NDArray with provided indices, prepared for a RowSparse NDArray.
-inline NDArray RspIdxND(const TShape shape, const Context ctx,
+inline NDArray RspIdxND(const mxnet::TShape shape, const Context ctx,
                         const std::vector<TEST_ITYPE> &values) {
   NDArray nd(shape, ctx, false, ROW_SPARSE_IDX_TYPE);
   size_t num_val = values.size();
@@ -74,7 +74,7 @@ inline NDArray RspIdxND(const TShape shape, const Context ctx,
 }
 
 // Get a dense NDArray with provided values.
-inline NDArray DnsND(const TShape shape, const Context ctx, std::vector<TEST_DTYPE> vs) {
+inline NDArray DnsND(const mxnet::TShape shape, const Context ctx, std::vector<TEST_DTYPE> vs) {
   NDArray nd(shape, ctx, false);
   size_t num_val = shape.Size();
   // generate random values
@@ -109,8 +109,10 @@ static void inline CopyBlob(mshadow::Stream<xpu> *s,
 }
 
 // Get a RowSparse NDArray with provided indices and values
-inline NDArray RspND(const TShape shape, const Context ctx, const std::vector<TEST_ITYPE> idx,
-              std::vector<TEST_DTYPE> vals) {
+inline NDArray RspND(const mxnet::TShape shape,
+                     const Context ctx,
+                     const std::vector<TEST_ITYPE> idx,
+                     std::vector<TEST_DTYPE> vals) {
   CHECK(shape.ndim() <= 2) << "High dimensional row sparse not implemented yet";
   index_t num_rows = idx.size();
   index_t num_cols = vals.size() / idx.size();
@@ -122,7 +124,7 @@ inline NDArray RspND(const TShape shape, const Context ctx, const std::vector<TE
   NDArray data = DnsND(mshadow::Shape2(num_rows, num_cols), ctx, vals);
   print(&std::cout, "data", data);
   // create result nd
-  std::vector<TShape> aux_shapes = {mshadow::Shape1(num_rows)};
+  mxnet::ShapeVector aux_shapes = {mshadow::Shape1(num_rows)};
   NDArray nd(kRowSparseStorage, shape, ctx, false, mshadow::default_type_flag,
              {}, aux_shapes);
 
@@ -213,7 +215,7 @@ class Array {
  public:
   Array() = default;
 
-  explicit Array(const TShape &shape)
+  explicit Array(const mxnet::TShape &shape)
     : shape_(shape) {}
 
   explicit Array(const NDArray &arr)
@@ -223,7 +225,7 @@ class Array {
 
   void clear() {
     items_.clear();
-    shape_ = TShape(0);
+    shape_ = mxnet::TShape(0);
   }
 
   static inline bool IsNear(const DType v1, const DType v2) { return fabs(v2 - v1) <= EPSILON; }
@@ -288,7 +290,7 @@ class Array {
       case kUndefinedStorage:
       default:
         LOG(ERROR) << "Unsupported storage type: " << storageType;
-        return NDArray(TShape(0), ctx);
+        return NDArray(mxnet::TShape(0), ctx);
     }
   }
 
@@ -337,7 +339,7 @@ class Array {
   }
 
  private:
-  TShape shape_;
+  mxnet::TShape shape_;
   TItems items_;
 };
 
diff --git a/tests/cpp/include/test_op.h b/tests/cpp/include/test_op.h
index 7a0c6d3878ee..d581e88357de 100644
--- a/tests/cpp/include/test_op.h
+++ b/tests/cpp/include/test_op.h
@@ -281,8 +281,8 @@ static test::op::OpInfo<OperatorProp, OperatorExecutor> createOpAndInfoF(const k
   return info;
 }
 
-inline std::vector<TShape> ShapesOf(const std::vector<NDArray>& arrays) {
-  std::vector<TShape> res;
+inline mxnet::ShapeVector ShapesOf(const std::vector<NDArray>& arrays) {
+  mxnet::ShapeVector res;
   res.reserve(arrays.size());
   for (const NDArray& ar : arrays) {
     res.emplace_back(ar.shape());
diff --git a/tests/cpp/include/test_op_runner.h b/tests/cpp/include/test_op_runner.h
index 1e00e30a1b34..b46065bb5cdb 100644
--- a/tests/cpp/include/test_op_runner.h
+++ b/tests/cpp/include/test_op_runner.h
@@ -64,7 +64,7 @@ class OperatorRunner {
   test::op::OpInfo<OperatorProp, OperatorExecutor>
   RunGenericOperatorForward(
     bool isGPU,
-    const std::vector<TShape>& inputShapes,
+    const mxnet::ShapeVector& inputShapes,
     const std::vector<std::pair<std::string, std::string> > &kwargs,
     const size_t count = 1) {
 #if MXNET_USE_CUDA
@@ -107,7 +107,7 @@ class OperatorRunner {
    */
   test::op::OpInfo<OperatorProp, OperatorExecutor> RunBidirectional(
     bool isGPU,
-    const std::vector<TShape>& inputShapes,
+    const mxnet::ShapeVector& inputShapes,
     const std::vector<std::pair<std::string, std::string> > &kwargs,
     const size_t count = 1) {
     test::op::OpInfo<OperatorProp, OperatorExecutor> info =
@@ -137,7 +137,7 @@ class OperatorRunner {
              const test::op::kwargs_t& kwargs,
              int dim = 0,
              size_t count = 1,
-             const std::vector<TShape>& timing_shapes = {},
+             const mxnet::ShapeVector& timing_shapes = {},
              bool backward = true) {
     if (mxnet::test::quick_test) {
       total_iterations_ = 2;
@@ -193,7 +193,7 @@ class OperatorRunner {
           info = RunGenericOperatorForward(isGPU,
                                            !timing_shapes.empty()
                                            ? timing_shapes
-                                           : std::vector<TShape>({TShape({batchSize,
+                                           : mxnet::ShapeVector({mxnet::TShape({batchSize,
                                                                           channels,
                                                                           width})}),
                                            kwargs,
@@ -203,7 +203,7 @@ class OperatorRunner {
           info = RunGenericOperatorForward(isGPU,
                                            !timing_shapes.empty()
                                            ? timing_shapes
-                                           : std::vector<TShape>({ TShape({batchSize,
+                                           : mxnet::ShapeVector({ mxnet::TShape({batchSize,
                                                                            channels,
                                                                            height,
                                                                            width})}),
@@ -214,7 +214,7 @@ class OperatorRunner {
           info = RunGenericOperatorForward(isGPU,
                                            !timing_shapes.empty()
                                            ? timing_shapes
-                                           : std::vector<TShape>({ TShape({batchSize,
+                                           : mxnet::ShapeVector({ mxnet::TShape({batchSize,
                                                                            channels,
                                                                            depth,
                                                                            height,
diff --git a/tests/cpp/include/test_tune.h b/tests/cpp/include/test_tune.h
index f5e15cc1811b..9f5a2e04c54e 100644
--- a/tests/cpp/include/test_tune.h
+++ b/tests/cpp/include/test_tune.h
@@ -67,7 +67,7 @@ class TuningTester {
 
   using bool_mode_pair = std::pair<bool, ::mxnet::op::tune::TuningMode>;
 
-  using shape_vect = std::vector<TShape>;
+  using shape_vect = mxnet::ShapeVector;
   using shape_vec_to_bool_map = std::map<shape_vect, bool_mode_pair, test::less_shapevect>;
 
  private:
@@ -99,7 +99,7 @@ class TuningTester {
 
     // Do the performance runs
     const char *pu = isGPU ? "GPU" : "CPU";
-    for (const std::vector<TShape> &this_run_shapes : shapes) {
+    for (const mxnet::ShapeVector &this_run_shapes : shapes) {
       test::perf::timing_map_t tmap = runner.TimingTest(std::string(op_name) + " Operator " + pu,
                                                         isGPU, false, kwargs,
                                                         0, calls_per_iteration_,
@@ -189,13 +189,13 @@ class TuningTester {
       if (verbose || test::csv) {
         if (!test::csv) {
           for (size_t x = 0, n = shapes.size(); x < n; ++x) {
-            const TShape &shape = shapes[x];
+            const mxnet::TShape &shape = shapes[x];
             if (x) {
               std::cout << ", ";
             }
             std::cout << shape;
           }
-          const TShape &lhs_shape = shapes[0];
+          const mxnet::TShape &lhs_shape = shapes[0];
           std::cout << " lhs=" << test::pretty_num(lhs_shape.Size()) << " items";
           std::cout << "\t(" << TimingDirectionAsString(direction) << ")" << std::endl;
         } else {
diff --git a/tests/cpp/include/test_util.h b/tests/cpp/include/test_util.h
index dee89039d3ef..aec3ddc5a59b 100644
--- a/tests/cpp/include/test_util.h
+++ b/tests/cpp/include/test_util.h
@@ -50,7 +50,7 @@ extern bool performance_run;
 extern bool csv;
 
 template<typename DType>
-inline size_t shapeMemorySize(const TShape& shape) {
+inline size_t shapeMemorySize(const mxnet::TShape& shape) {
   return shape.Size() * sizeof(DType);
 }
 
@@ -87,7 +87,7 @@ class BlobMemory {
 
 class StandaloneBlob : public TBlob {
  public:
-  inline StandaloneBlob(const TShape& shape, const bool isGPU, const int dtype)
+  inline StandaloneBlob(const mxnet::TShape& shape, const bool isGPU, const int dtype)
     : TBlob(nullptr, shape, isGPU ? gpu::kDevMask : cpu::kDevMask, dtype)
       , memory_(std::make_shared<BlobMemory>(isGPU)) {
     MSHADOW_TYPE_SWITCH(dtype, DType, {
@@ -261,12 +261,12 @@ inline void dump(Stream *os, const TBlob& blob, const char *suffix = "f") {
 
 
 /*! \brief Return reference to data at position indexes */
-inline index_t getMult(const TShape& shape, const index_t axis) {
+inline index_t getMult(const mxnet::TShape& shape, const index_t axis) {
   return axis < shape.ndim() ? shape[axis] : 1;
 }
 
 /*! \brief offset, given indices such as bn, channel, depth, row, column */
-inline index_t offset(const TShape& shape, const std::vector<size_t>& indices) {
+inline index_t offset(const mxnet::TShape& shape, const std::vector<size_t>& indices) {
   const size_t dim = shape.ndim();
   CHECK_LE(indices.size(), dim);
   size_t offset = 0;
@@ -314,8 +314,8 @@ inline std::string repeatedStr(const char *s, const signed int count,
 
 /*! \brief Pretty print a shape with optional label */
 template<typename StreamType>
-inline StreamType& print_shape(StreamType *_os, const std::string& label, const TShape& shape,
-                               const bool add_endl = true) {
+inline StreamType& print_shape(StreamType *_os, const std::string& label,
+                               const mxnet::TShape& shape, const bool add_endl = true) {
   if (!label.empty()) {
     *_os << label << ": ";
   }
@@ -355,14 +355,14 @@ inline StreamType& print_blob_(const RunContext& ctx,
 
   if (dim == 1) {
     // probably a 1d tensor (mshadow::Tensor is deprecated)
-    TBlob changed(blob.dptr<DType>(), TShape(3), blob.dev_mask(), blob.dev_id());
+    TBlob changed(blob.dptr<DType>(), mxnet::TShape(3), blob.dev_mask(), blob.dev_id());
     changed.shape_[0] = 1;
     changed.shape_[1] = 1;
     changed.shape_[2] = blob.shape_[0];
     return print_blob_<DType>(ctx, &os, changed, false, false, add_endl);
   } else if (dim == 2) {
     // probably a 2d tensor (mshadow::Tensor is deprecated)
-    TBlob changed(blob.dptr<DType>(), TShape(4), blob.dev_mask(), blob.dev_id());
+    TBlob changed(blob.dptr<DType>(), mxnet::TShape(4), blob.dev_mask(), blob.dev_id());
     changed.shape_[0] = 1;
     changed.shape_[1] = 1;
     changed.shape_[2] = blob.shape_[0];
@@ -504,35 +504,35 @@ inline StreamType& print(const RunContext& ctx, StreamType *_os,
   switch (arr.storage_type()) {
     case kRowSparseStorage: {
       // data
-      const TShape& shape = arr.shape();
+      const mxnet::TShape& shape = arr.shape();
       print_shape(_os, "[row_sparse] main shape", shape, false);
-      const TShape& storage_shape = arr.storage_shape();
+      const mxnet::TShape& storage_shape = arr.storage_shape();
       const bool is_one_row = storage_shape[0] < 2;
       print_shape(_os, "storage shape", storage_shape, false);
       print(ctx, _os, arr.data(), true, true, !is_one_row);
 
       // indices
-      const TShape& indices_shape = arr.aux_shape(rowsparse::kIdx);
+      const mxnet::TShape& indices_shape = arr.aux_shape(rowsparse::kIdx);
       print_shape(_os, "indices shape", indices_shape, false);
       print(ctx, _os, arr.aux_data(rowsparse::kIdx), true, true, false) << std::endl;
       break;
     }
     case kCSRStorage: {
       // data
-      const TShape& shape = arr.shape();
+      const mxnet::TShape& shape = arr.shape();
       print_shape(_os, "[CSR] main shape", shape, false);
-      const TShape& storage_shape = arr.storage_shape();
+      const mxnet::TShape& storage_shape = arr.storage_shape();
       const bool is_one_row = storage_shape[0] < 2;
       print_shape(_os, "storage shape", storage_shape, false);
       print(ctx, _os, arr.data(), true, true, !is_one_row);
 
       // row ptrs
-      const TShape& ind_ptr_shape = arr.aux_shape(csr::kIndPtr);
+      const mxnet::TShape& ind_ptr_shape = arr.aux_shape(csr::kIndPtr);
       print_shape(_os, "row ptrs shape", ind_ptr_shape, false);
       print(ctx, _os, arr.aux_data(csr::kIndPtr), true, true, false) << std::endl;
 
       // col indices
-      const TShape& indices_shape = arr.aux_shape(csr::kIdx);
+      const mxnet::TShape& indices_shape = arr.aux_shape(csr::kIdx);
       print_shape(_os, "col indices shape", indices_shape, false);
       print(ctx, _os, arr.aux_data(csr::kIdx), true, true, false) << std::endl;
 
@@ -540,7 +540,7 @@ inline StreamType& print(const RunContext& ctx, StreamType *_os,
     }
     case kDefaultStorage: {
       // data
-      const TShape& shape = arr.shape();
+      const mxnet::TShape& shape = arr.shape();
       const bool is_one_row = shape[0] < 2;
       print_shape(_os, "[dense] main shape", shape, !is_one_row);
       print(ctx, _os, arr.data(), true, true, !is_one_row) << std::endl;
@@ -696,13 +696,13 @@ inline ScalarType rangedRand(const ScalarType min, const ScalarType max) {
 }
 
 /*!
- * \brief Deterministically compare TShape objects as less-than,
+ * \brief Deterministically compare mxnet::TShape objects as less-than,
  *        for use in stl sorted key such as map and set
  * \param s1 First shape
  * \param s2 Second shape
  * \return true if s1 is less than s2
  */
-inline bool operator < (const nnvm::TShape &s1, const nnvm::TShape &s2) {
+inline bool operator < (const mxnet::TShape &s1, const mxnet::TShape &s2) {
   if (s1.Size() == s2.Size()) {
     if (s1.ndim() == s2.ndim()) {
       for (size_t i = 0, n = s1.ndim(); i < n; ++i) {
@@ -719,13 +719,14 @@ inline bool operator < (const nnvm::TShape &s1, const nnvm::TShape &s2) {
 }
 
 /*!
- * \brief Deterministically compare a vector of TShape objects as less-than,
+ * \brief Deterministically compare a vector of mxnet::TShape objects as less-than,
  *        for use in stl sorted key such as map and set
  * \param v1 First vector of shapes
  * \param v2 Second vector of shapes
  * \return true if v1 is less than v2
  */
-inline bool operator < (const std::vector<nnvm::TShape>& v1, const std::vector<nnvm::TShape>& v2) {
+inline bool operator < (const std::vector<mxnet::TShape>& v1,
+                        const std::vector<mxnet::TShape>& v2) {
   if (v1.size() == v2.size()) {
     for (size_t i = 0, n = v1.size(); i < n; ++i) {
       if (v1[i] == v2[i]) {
@@ -742,7 +743,8 @@ inline bool operator < (const std::vector<nnvm::TShape>& v1, const std::vector<n
  * \brief std::less compare structure for compating vectors of shapes for stl sorted containers
  */
 struct less_shapevect {
-  bool operator()(const std::vector<nnvm::TShape>& v1, const std::vector<nnvm::TShape>& v2) const {
+  bool operator()(const std::vector<mxnet::TShape>& v1,
+                  const std::vector<mxnet::TShape>& v2) const {
     if (v1.size() == v2.size()) {
       for (size_t i = 0, n = v1.size(); i < n; ++i) {
         if (v1[i] == v2[i]) {
diff --git a/tests/cpp/misc/serialization.cc b/tests/cpp/misc/serialization.cc
index 96f8b6c3a3a7..77014238c2fa 100644
--- a/tests/cpp/misc/serialization.cc
+++ b/tests/cpp/misc/serialization.cc
@@ -45,13 +45,13 @@ TEST(SerializerTest, InputMapCorrect) {
 }
 
 TEST(SerializerTest, OutputMapCorrect) {
-    std::map<std::string, std::tuple<uint32_t, TShape, int, int> > output_map;
-    output_map.emplace("output_0", std::make_tuple(1, TShape({23, 12, 63, 432}), 0, 1));
-    output_map.emplace("another_output", std::make_tuple(2, TShape({23, 123}), 14, -23));
-    output_map.emplace("last_output", std::make_tuple(0, TShape({0}), -1, 0));
+    std::map<std::string, std::tuple<uint32_t, mxnet::TShape, int, int> > output_map;
+    output_map.emplace("output_0", std::make_tuple(1, mxnet::TShape({23, 12, 63, 432}), 0, 1));
+    output_map.emplace("another_output", std::make_tuple(2, mxnet::TShape({23, 123}), 14, -23));
+    output_map.emplace("last_output", std::make_tuple(0, mxnet::TShape({0}), -1, 0));
     std::string serialized_data;
     common::Serialize(output_map, &serialized_data);
-    std::map<std::string, std::tuple<uint32_t, TShape, int, int> > deserialized_output_map;
+    std::map<std::string, std::tuple<uint32_t, mxnet::TShape, int, int> > deserialized_output_map;
     common::Deserialize(&deserialized_output_map, serialized_data);
     ASSERT_EQ(output_map.size(), deserialized_output_map.size());
     for (auto& p : output_map) {
diff --git a/tests/cpp/operator/activation_perf.cc b/tests/cpp/operator/activation_perf.cc
index bba8a3ec5722..29deda92e01b 100644
--- a/tests/cpp/operator/activation_perf.cc
+++ b/tests/cpp/operator/activation_perf.cc
@@ -39,7 +39,7 @@ const kwargs_t basic_activation_args = { };
  */
 TEST(ACTIVATION_PERF, ExecuteBidirectional) {
   using namespace std;
-  TShape shape({5, 5});
+  mxnet::TShape shape({5, 5});
   vector<string> activations = {
     "relu",
     "sigmoid",
@@ -70,11 +70,11 @@ TEST(ACTIVATION_PERF, TimingCPU) {
   kwargs.push_back({"act_type", "tanh"});
   kwargs = test::op::CoreOpExecutor<float>::ArgsWithOpName(kwargs, "Activation",
                                                            "_backward_Activation");
-  TShape shape({10, 10, 10, 10});
+  mxnet::TShape shape({10, 10, 10, 10});
   test::op::CoreOperatorRunner<float> runner;
   runner.RunBidirectional(false, { shape }, kwargs, 1);
 
-  std::vector <TShape> shapes;
+  std::vector <mxnet::TShape> shapes;
   if (test::performance_run) {
     shapes = {
       {1,  1, 28,  28},
@@ -89,7 +89,7 @@ TEST(ACTIVATION_PERF, TimingCPU) {
       {50, 3, 18,  32},
     };
   }
-  for (const TShape &shape : shapes) {
+  for (const mxnet::TShape &shape : shapes) {
     runner.TimingTest("Activation Operator CPU", false, false, kwargs, 2, 10, { shape });
   }
 }
@@ -104,17 +104,17 @@ TEST(ACTIVATION_PERF, TimingGPU) {
   kwargs.push_back({"act_type", "tanh"});
   kwargs = test::op::CoreOpExecutor<float>::ArgsWithOpName(kwargs, "Activation",
                                                            "_backward_Activation");
-  TShape shape({10, 10, 10, 10});
+  mxnet::TShape shape({10, 10, 10, 10});
   test::op::CoreOperatorRunner<float> runner;
   runner.RunBidirectional(true, { shape }, kwargs, 1);
-  std::vector <TShape> shapes = {
+  std::vector <mxnet::TShape> shapes = {
       {1,  1, 28,  28},
       {1,  3, 28,  28},
       {50, 1, 18,  32},
       {50, 3, 18,  32},
       {20, 3, 128, 128}
     };
-  for (const TShape &shape : shapes) {
+  for (const mxnet::TShape &shape : shapes) {
     runner.TimingTest("Activation Operator GPU", true, false, kwargs, 2, 10, { shape });
   }
 }
diff --git a/tests/cpp/operator/batchnorm_test.cc b/tests/cpp/operator/batchnorm_test.cc
index 2f9de742a35a..8beebfb1582e 100644
--- a/tests/cpp/operator/batchnorm_test.cc
+++ b/tests/cpp/operator/batchnorm_test.cc
@@ -105,7 +105,7 @@ class BNOperatorExecutor : public test::op::CoreOpExecutor<DType, AccReal> {
  public:
   using Super::ctx;
 
-  BNOperatorExecutor(const bool isGPU, const TShape& inputShape,
+  BNOperatorExecutor(const bool isGPU, const mxnet::TShape& inputShape,
                      const test::op::kwargs_t& kwargs,
                      const bool hasWeightAndBias = false)
     : test::op::CoreOpExecutor<DType, AccReal>(isGPU, { inputShape })
@@ -664,7 +664,7 @@ static StreamType& dumpB(StreamType *os,
 template<typename OperatorProp, typename OperatorExecutor>
 static test::op::OpInfo<OperatorProp, OperatorExecutor> TestBatchNormOperatorForward(
   bool isGPU,
-  const TShape& inputShape,
+  const mxnet::TShape& inputShape,
   const std::vector<std::pair<std::string, std::string> >& kwargs,
   const size_t count = 1) {
 #if MXNET_USE_CUDA
@@ -712,7 +712,7 @@ template<typename OperatorProp1, typename OperatorProp2, typename OperatorExecut
 static test::op::OpInfoPair<OperatorProp1, OperatorProp2, OperatorExecutor> testForwardAndBackward(
   const bool isGPU1,
   const bool isGPU2,
-  const TShape &inputShape,
+  const mxnet::TShape &inputShape,
   const test::op::kwargs_t& kwargs,
   const size_t count = 1,
   const size_t cycleCount = CYCLE_COUNT) {
@@ -781,7 +781,7 @@ static test::op::OpInfoPair<OperatorProp1, OperatorProp2, OperatorExecutor> test
 template<typename OperatorProp1, typename OperatorProp2, typename OperatorExecutor>
 static test::op::OpInfoPair<OperatorProp1, OperatorProp2, OperatorExecutor>
 testForwardAndBackward(const bool isGPU,
-                       const TShape &inputShape,
+                       const mxnet::TShape &inputShape,
                        const test::op::kwargs_t kwargs,
                        const size_t count = 1,
                        const size_t cycleCount = CYCLE_COUNT
@@ -821,7 +821,7 @@ struct BatchNormCoreOpProp : public mxnet::test::op::CoreOpProp {
 template<typename OperatorExecutor>
 static test::op::OpInfoPair<BatchNormCoreOpProp, BatchNormCoreOpProp, OperatorExecutor>
 testBNForwardAndBackward2D(const bool isGPU,
-                           const TShape &inputShape,
+                           const mxnet::TShape &inputShape,
                            const test::op::kwargs_t& kwargs) {
   CHECK_EQ(inputShape.ndim(), 4);  // V1 can only handle 2D
   return testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp, OperatorExecutor>(
@@ -831,7 +831,7 @@ testBNForwardAndBackward2D(const bool isGPU,
 template<typename OperatorExecutor>
 static test::op::OpInfoPair<BatchNormCoreOpProp, BatchNormCoreOpProp, OperatorExecutor>
 testBNForwardAndBackward(const bool isGPU,
-                         const TShape &inputShape,
+                         const mxnet::TShape &inputShape,
                          const test::op::kwargs_t& kwargs) {
   return testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp, OperatorExecutor>(
     isGPU, isGPU, inputShape, kwargs);
@@ -1066,10 +1066,10 @@ inline std::ostream& operator << (std::ostream& os, const test::op::kwargs_t& kw
 
 #if 0
 TEST(BATCH_NORM, TestIterAll) {
-  TShape shapes[] = {
-    TShape({BATCH_SIZE, CHANNELS, DH}),
-    TShape({BATCH_SIZE, CHANNELS, DH, DW}),
-    TShape({BATCH_SIZE, CHANNELS, DEPTH, DH, DW})
+  mxnet::TShape shapes[] = {
+    mxnet::TShape({BATCH_SIZE, CHANNELS, DH}),
+    mxnet::TShape({BATCH_SIZE, CHANNELS, DH, DW}),
+    mxnet::TShape({BATCH_SIZE, CHANNELS, DEPTH, DH, DW})
   };
   int pass = 0;
   const char *tof[2] = { "False", "True" };
@@ -1082,7 +1082,7 @@ TEST(BATCH_NORM, TestIterAll) {
         if (x3) {
           kwargs.push_back({ "cudnn_off", "True" });
         }
-        for (TShape shape : shapes) {
+        for (mxnet::TShape shape : shapes) {
           for (bool g1 : { false, true }) {
             for (bool g2 : { false, true }) {
               for (int type : v2_types) {
@@ -1122,7 +1122,7 @@ TEST(BATCH_NORM, TestBackward3D) {
   MSHADOW_REAL_TYPE_SWITCH_EX(
     mshadow::kFloat32, DType, AccReal,
     {
-      const TShape inputShape({2, 3, 2, 3, 5});
+      const mxnet::TShape inputShape({2, 3, 2, 3, 5});
       test::op::OpInfo<BatchNormCoreOpProp, BNOperatorExecutor<DType, AccReal>> info =
         TestBatchNormOperatorForward<BatchNormCoreOpProp, BNOperatorExecutor<DType, AccReal>>(
           false, inputShape, blank_kwargs);
@@ -1140,7 +1140,7 @@ class ChannelAxisTestData {
   void loadOrSave(const RunContext& run_ctx, const TBlob& blob, int channel_axis, const Mode mode) {
     test::CAccessAsCPU cpu_blob(run_ctx, blob, true);
     mxnet::op::batchnorm::BNTensor3<DType> tensor3(cpu_blob(), channel_axis);
-    const TShape &shape = blob.shape_;
+    const mxnet::TShape &shape = blob.shape_;
     CHECK_GT(shape.ndim(), 0);
     if (channel_axis < 0) {
       channel_axis = shape.ndim() + channel_axis;
@@ -1264,7 +1264,7 @@ static void testSaveAndLoad(const std::vector<size_t>& dims,
   ChannelAxisTestData<DType> data;
   data.channel_data_ = inputChannelData;
 
-  TShape shape(dims.size());
+  mxnet::TShape shape(dims.size());
   for (size_t i = 0, n = dims.size(); i < n; ++i) {
     shape[i] = index_t(dims[i]);
   }
@@ -1312,7 +1312,7 @@ TEST(BATCH_NORM, TestChannelAxisSaveAndLoad) {
 }
 
 /*! \brief Insert the channel field `channelCount` into the shape at `channelAxis` position */
-static TShape MakeShape(const std::vector<index_t>& shape,
+static mxnet::TShape MakeShape(const std::vector<index_t>& shape,
                         signed int channelAxis,
                         const size_t channelCount) {
   if (channelAxis < 0) {
@@ -1320,7 +1320,7 @@ static TShape MakeShape(const std::vector<index_t>& shape,
   }
   CHECK_LT(channelAxis, shape.size() + 1);
   const index_t dim = index_t(shape.size()) + 1;
-  TShape newShape(dim);
+  mxnet::TShape newShape(dim);
   for (size_t x = 0; x < static_cast<size_t>(channelAxis); ++x) {
     newShape[x] = index_t(shape[x]);
   }
@@ -1386,8 +1386,8 @@ static void runChannelAxisTest(
   test::op::kwargs_t kwargs = base_kwargs;
 
   // Insert the channel field into the shape at channelAxis position
-  const TShape shape_c1 = MakeShape(shape, channelAxis1, channelCount);
-  const TShape shape_c2 = MakeShape(shape, channelAxis2, channelCount);
+  const mxnet::TShape shape_c1 = MakeShape(shape, channelAxis1, channelCount);
+  const mxnet::TShape shape_c2 = MakeShape(shape, channelAxis2, channelCount);
 
   // Create operator 1 with ChannelAxis2 (normally the experimental one)
   kwargs.push_back({"axis", std::to_string(channelAxis1)});
@@ -1575,7 +1575,7 @@ TEST(BATCH_NORM, Test2DBackwardMixed_gpu_cpu) {
     MSHADOW_REAL_TYPE_SWITCH_EX(
       type, DType, AccReal,
       {
-        const TShape inputShape({1, 1, 2, 1});
+        const mxnet::TShape inputShape({1, 1, 2, 1});
         testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp,
           BNOperatorExecutor<DType, AccReal>>(
           false, true, inputShape, blank_kwargs);
@@ -1591,7 +1591,7 @@ TEST(BATCH_NORM, Test2DBackwardMixedComplex_gpu_cpu) {
     MSHADOW_REAL_TYPE_SWITCH_EX(
       type, DType, AccReal,
       {
-        const TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW});
+        const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW});
         testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp,
           BNOperatorExecutor<DType, AccReal>>(
           false, true, inputShape, blank_kwargs);
@@ -1609,7 +1609,7 @@ TEST(BATCH_NORM, Test2DBackwardMixed_gpu_cpu_nfg) {
     MSHADOW_REAL_TYPE_SWITCH_EX(
       type, DType, AccReal,
       {
-        const TShape inputShape({1, 1, 2, 1});
+        const mxnet::TShape inputShape({1, 1, 2, 1});
         testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp,
           BNOperatorExecutor<DType, AccReal>>(
           false, true, inputShape, nonfixgamma_kwargs);
@@ -1625,7 +1625,7 @@ TEST(BATCH_NORM, Test2DBackwardMixedComplex_gpu_cpu_nfg) {
     MSHADOW_REAL_TYPE_SWITCH_EX(
       type, DType, AccReal,
       {
-        const TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW});
+        const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW});
         testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp,
           BNOperatorExecutor<DType, AccReal>>(
           false, true, inputShape, nonfixgamma_kwargs);
@@ -1643,7 +1643,7 @@ TEST(BATCH_NORM, Test2DBackwardMixed_gpu_cpu_ugs) {
     MSHADOW_REAL_TYPE_SWITCH_EX(
       type, DType, AccReal,
       {
-        const TShape inputShape({2, 3, 2, 2});
+        const mxnet::TShape inputShape({2, 3, 2, 2});
         testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp,
           BNOperatorExecutor<DType, AccReal>>(
           false, true, inputShape, useglobalstats_kwargs_nocudnn);
@@ -1659,7 +1659,7 @@ TEST(BATCH_NORM, Test2DBackwardMixedComplex_gpu_cpu_ugs) {
     MSHADOW_REAL_TYPE_SWITCH_EX(
       type, DType, AccReal,
       {
-        const TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW});
+        const mxnet::TShape inputShape({BATCH_SIZE, CHANNELS, DH, DW});
         testForwardAndBackward<BatchNormCoreOpProp, BatchNormCoreOpProp,
           BNOperatorExecutor<DType, AccReal>>(
           false, true, inputShape, useglobalstats_kwargs);
diff --git a/tests/cpp/operator/coreop_perf.cc b/tests/cpp/operator/coreop_perf.cc
index 31ecebdfee13..14ef625e6915 100644
--- a/tests/cpp/operator/coreop_perf.cc
+++ b/tests/cpp/operator/coreop_perf.cc
@@ -38,7 +38,7 @@ static void RunCoreOpBidirectional(const bool isGPU,
                                    const kwargs_t& op_kwargs,
                                    const char *op_name,
                                    const char *backward_op_name = "") {
-  const TShape shape({5, 5});
+  const mxnet::TShape shape({5, 5});
   test::op::CoreOpExecutor<DType> op(isGPU, { shape });
   op.set_verbose(false);
 
@@ -69,7 +69,7 @@ static void RunCoreOpTimingTest(const bool isGPU,
   runner.RunBidirectional(false, { {20, 3, 128, 128} }, kwargs, 1);
 
   // Do the performance runs
-  std::vector <TShape> shapes;
+  std::vector <mxnet::TShape> shapes;
   if (test::performance_run) {
     shapes = {
       {1,  1, 28,  28},
@@ -85,7 +85,7 @@ static void RunCoreOpTimingTest(const bool isGPU,
     };
   }
   const char *pu = isGPU ? "GPU" : "CPU";
-  for (const TShape &shape : shapes) {
+  for (const mxnet::TShape &shape : shapes) {
     runner.TimingTest(std::string(op_name) + " Operator " + pu, isGPU, false, kwargs,
                       2, 10, { shape });
   }
diff --git a/tests/cpp/operator/dropout_perf.cc b/tests/cpp/operator/dropout_perf.cc
index 4afd56fe586a..2a1754e2606f 100644
--- a/tests/cpp/operator/dropout_perf.cc
+++ b/tests/cpp/operator/dropout_perf.cc
@@ -38,7 +38,7 @@ const kwargs_t basic_dropout_args = { };
  * \brief Generic bidirectional sanity test
  */
 TEST(DROPOUT_PERF, ExecuteBidirectional) {
-  TShape shape({5, 5});
+  mxnet::TShape shape({5, 5});
   kwargs_t kwargs = basic_dropout_args;
   kwargs.push_back({"mode", "always"});
   test::op::CoreOperatorRunner<float> runner;
@@ -55,12 +55,12 @@ TEST(DROPOUT_PERF, TimingCPU) {
   kwargs_t kwargs = basic_dropout_args;
 // Which math function is arbitrary since it will have roughly constant timing among approaches
   kwargs.push_back({"mode", "always"});
-  TShape shape({10, 10, 10, 10});
+  mxnet::TShape shape({10, 10, 10, 10});
   test::op::CoreOperatorRunner<float> runner;
   kwargs = test::op::CoreOpExecutor<float>::ArgsWithOpName(kwargs, "Dropout",
                                                            "_backward_Dropout");
   runner.RunBidirectional(false, { shape }, kwargs, 1);
-  std::vector <TShape> shapes;
+  std::vector <mxnet::TShape> shapes;
   if (test::performance_run) {
     shapes = {
       {1,  1, 28,  28},
@@ -75,7 +75,7 @@ TEST(DROPOUT_PERF, TimingCPU) {
       {50, 3, 18,  32},
     };
   }
-  for (const TShape &shape : shapes) {
+  for (const mxnet::TShape &shape : shapes) {
     kwargs = test::op::CoreOpExecutor<float>::ArgsWithOpName(kwargs, "Dropout",
                                                              "_backward_Dropout");
     runner.TimingTest("Dropout Operator CPU", false, false, kwargs, 2, 10, { shape }, false);
@@ -90,19 +90,19 @@ TEST(DROPOUT_PERF, TimingGPU) {
   kwargs_t kwargs = basic_dropout_args;
   // Which math function is arbitrary since it will have roughly constant timing among approaches
   kwargs.push_back({"mode", "always"});
-  TShape shape({10, 10, 10, 10});
+  mxnet::TShape shape({10, 10, 10, 10});
   test::op::CoreOperatorRunner<float> runner;
   kwargs = test::op::CoreOpExecutor<float>::ArgsWithOpName(kwargs, "Dropout",
                                                            "_backward_Dropout");
   runner.RunBidirectional(false, { shape }, kwargs, 1);
-  std::vector <TShape> shapes = {
+  std::vector <mxnet::TShape> shapes = {
     {1,  1, 28,  28},
     {1,  3, 28,  28},
     {50, 1, 18,  32},
     {50, 3, 18,  32},
     {20, 3, 128, 128}
   };
-  for (const TShape &shape : shapes) {
+  for (const mxnet::TShape &shape : shapes) {
     kwargs = test::op::CoreOpExecutor<float>::ArgsWithOpName(kwargs, "Dropout",
                                                              "_backward_Dropout");
     runner.TimingTest("Dropout Operator GPU", true, false, kwargs, 2, 10, { shape }, false);
diff --git a/tests/cpp/operator/fully_conn_perf.cc b/tests/cpp/operator/fully_conn_perf.cc
index e574ae2b4379..9fd70261dc93 100644
--- a/tests/cpp/operator/fully_conn_perf.cc
+++ b/tests/cpp/operator/fully_conn_perf.cc
@@ -26,7 +26,6 @@
 
 #include <dmlc/logging.h>
 #include <mxnet/tensor_blob.h>
-#include <nnvm/tuple.h>
 #include "../../src/operator/nn/fully_connected-inl.h"
 #include "../include/test_op_runner.h"
 #include "../include/test_core_op.h"
@@ -40,8 +39,8 @@ const kwargs_t basic_fullyconn_args = { {"num_hidden", "250"}, {"no_bias", "true
  * \brief Generic bidirectional sanity test
  */
 TEST(FULLY_CONNECTED, ExecuteBidirectionalFullyConnected) {
-  TShape shape1({5, 5});
-  TShape shape2({250, 5});
+  mxnet::TShape shape1({5, 5});
+  mxnet::TShape shape2({250, 5});
   kwargs_t kwargs = basic_fullyconn_args;
   test::op::CoreOperatorRunner<float> runner;
   runner.set_verbose(true);
@@ -55,13 +54,13 @@ TEST(FULLY_CONNECTED, ExecuteBidirectionalFullyConnected) {
  */
 TEST(FULLY_CONNECTED, FullyConnectedTimingCPU) {
   kwargs_t kwargs = basic_fullyconn_args;
-  TShape shape1({10, 10, 10, 10});
-  TShape shape2({250, 1000});
+  mxnet::TShape shape1({10, 10, 10, 10});
+  mxnet::TShape shape2({250, 1000});
   test::op::CoreOperatorRunner<float> runner;
   kwargs = test::op::CoreOpExecutor<float>::ArgsWithOpName(kwargs, "FullyConnected",
                                                            "_backward_FullyConnected");
   runner.RunBidirectional(false, { shape1, shape2 }, kwargs, 1);
-  std::vector <TShape> shapes;
+  std::vector <mxnet::TShape> shapes;
   if (test::performance_run) {
     shapes = {
       {1,  1, 28,  28},
@@ -76,8 +75,8 @@ TEST(FULLY_CONNECTED, FullyConnectedTimingCPU) {
       {50, 3, 18,  32},
     };
   }
-  for (const TShape& shape : shapes) {
-    TShape shape2({250, static_cast<nnvm::dim_t>(shape.ProdShape(1, shape.ndim()))});
+  for (const mxnet::TShape& shape : shapes) {
+    mxnet::TShape shape2({250, static_cast<nnvm::dim_t>(shape.ProdShape(1, shape.ndim()))});
     kwargs = test::op::CoreOpExecutor<float>::ArgsWithOpName(kwargs, "FullyConnected",
                                                              "_backward_FullyConnected");
     runner.TimingTest("Fully connected CPU", false, false, kwargs, 2, 10,
@@ -91,13 +90,13 @@ TEST(FULLY_CONNECTED, FullyConnectedTimingCPU) {
  */
 TEST(FULLY_CONNECTED, FullyConnectedTimingGPU) {
   kwargs_t kwargs = basic_fullyconn_args;
-  TShape shape1({10, 10, 10, 10});
-  TShape shape2({250, 1000});
+  mxnet::TShape shape1({10, 10, 10, 10});
+  mxnet::TShape shape2({250, 1000});
   test::op::CoreOperatorRunner<float> runner;
   kwargs = test::op::CoreOpExecutor<float>::ArgsWithOpName(kwargs, "FullyConnected",
                                                            "_backward_FullyConnected");
   runner.RunBidirectional(false, { shape1, shape2 }, kwargs, 1);
-  std::vector <TShape> shapes;
+  std::vector <mxnet::TShape> shapes;
   if (test::performance_run) {
     shapes = {
       {1,  1, 28,  28},
@@ -112,8 +111,8 @@ TEST(FULLY_CONNECTED, FullyConnectedTimingGPU) {
       {50, 3, 18,  32},
     };
   }
-  for (const TShape& shape : shapes) {
-    TShape shape2({250, static_cast<nnvm::dim_t>(shape.ProdShape(1, shape.ndim()))});
+  for (const mxnet::TShape& shape : shapes) {
+    mxnet::TShape shape2({250, static_cast<nnvm::dim_t>(shape.ProdShape(1, shape.ndim()))});
     kwargs = test::op::CoreOpExecutor<float>::ArgsWithOpName(kwargs, "FullyConnected",
                                                              "_backward_FullyConnected");
     runner.TimingTest("Fully connected GPU", true, false, kwargs, 2, 10,
diff --git a/tests/cpp/operator/mkldnn_operator_test.cc b/tests/cpp/operator/mkldnn_operator_test.cc
index 3bf3228a4b44..559ab5da0ccc 100644
--- a/tests/cpp/operator/mkldnn_operator_test.cc
+++ b/tests/cpp/operator/mkldnn_operator_test.cc
@@ -437,7 +437,7 @@ void VerifyConcatResult(const std::vector<NDArray *> &in_arrs,
                         const std::vector<NDArray *> &out_arrs) {
   int num_inputs = in_arrs.size();
   int input_size = in_arrs[0]->shape().Size();
-  TShape input_shape = in_arrs[0]->shape();
+  mxnet::TShape input_shape = in_arrs[0]->shape();
   NDArray output = out_arrs[0]->Reorder2Default();
   size_t total_size = output.shape().Size();
   EXPECT_EQ(input_size * num_inputs, total_size);
@@ -462,7 +462,7 @@ void VerifyConcatBackwardsResult(const std::vector<NDArray *> &in_arrs,
   // in_arrs is larger array, out_arr is ammler
   int num_inputs = out_arrs.size();
   int input_size = out_arrs[0]->shape().Size();
-  TShape input_shape = out_arrs[0]->shape();
+  mxnet::TShape input_shape = out_arrs[0]->shape();
   NDArray output = in_arrs[0]->Reorder2Default();
   size_t total_size = output.shape().Size();
   EXPECT_EQ(input_size * num_inputs, total_size);
@@ -879,7 +879,7 @@ void TestOpExBN(const OpAttrs &forward_attrs, const OpAttrs &backwards_attrs) {
 }
 
 // Computes second dimension of FC weight matrix based on input shape
-uint32_t GetFCWeightDim2(const nnvm::TShape arr) {
+uint32_t GetFCWeightDim2(const mxnet::TShape arr) {
   uint32_t dim = 1;
   for (int i = 1; i < arr.ndim(); i++) {
     dim *= arr[i];
@@ -916,13 +916,13 @@ void TestFullyConnectedOp(const OpAttrs &forward_attrs, const OpAttrs &backwards
       if (in_shape.ndim() < 2)
         continue;
 
-      nnvm::TShape wt_shape(2);
+      mxnet::TShape wt_shape(2);
       wt_shape[0] = num_hid;
       wt_shape[1] = GetFCWeightDim2(in_shape);
       NDArray weights(wt_shape, Context());
       InitDefaultArray(&weights, false);
 
-      nnvm::TShape bias_shape(1);
+      mxnet::TShape bias_shape(1);
       bias_shape[0] = num_hid;
       NDArray bias(bias_shape, Context());
       InitDefaultArray(&bias, false);
@@ -931,7 +931,7 @@ void TestFullyConnectedOp(const OpAttrs &forward_attrs, const OpAttrs &backwards
       inputs[1] = &weights;
       inputs[2] = &bias;
 
-      nnvm::TShape out_shape(2);
+      mxnet::TShape out_shape(2);
       out_shape[0] = in_shape[0];
       out_shape[1] = num_hid;
 
@@ -1018,9 +1018,9 @@ void TestConvOp(const OpAttrs &forward_attrs, const OpAttrs &backwards_attrs,
 
   P param;
   param.Init(forward_attrs.attrs.dict);
-  TShape kernel = param.kernel;
-  TShape padding = param.pad;
-  TShape stride = param.stride;
+  mxnet::TShape kernel = param.kernel;
+  mxnet::TShape padding = param.pad;
+  mxnet::TShape stride = param.stride;
   int num_filter = param.num_filter;
 
   std::vector<NDArrayAttrs> in_arrs = GetTestInputArrays(
@@ -1032,7 +1032,7 @@ void TestConvOp(const OpAttrs &forward_attrs, const OpAttrs &backwards_attrs,
     auto in_arr = in_arrs[i1];
 
     // can only conv only 4D inputs
-    TShape input_shape = in_arr.arr.shape();
+    mxnet::TShape input_shape = in_arr.arr.shape();
     if (input_shape.ndim() != kernel.ndim() + 2)
       continue;
 
@@ -1056,7 +1056,7 @@ void TestConvOp(const OpAttrs &forward_attrs, const OpAttrs &backwards_attrs,
                                            scale_vector, true, forward_attrs.output_types);
     }
     NDArray ndkernel = CreateKernelNDArray(kernel, num_filter, in_arr.arr.shape(), is_deconv);
-    TShape bias_shape = {num_filter};
+    mxnet::TShape bias_shape = {num_filter};
     NDArray ndbias = CreateBiasNDArray(bias_shape);
     inputs[0] = &in_arr.arr;
     inputs[1] = &ndkernel;
@@ -1144,9 +1144,9 @@ void TestPoolingOp(const OpAttrs &forward_attrs, const OpAttrs &backwards_attrs)
 
   mxnet::op::PoolingParam param;
   param.Init(forward_attrs.attrs.dict);
-  TShape kernel = param.kernel;
-  TShape padding = param.pad;
-  TShape stride = param.stride;
+  mxnet::TShape kernel = param.kernel;
+  mxnet::TShape padding = param.pad;
+  mxnet::TShape stride = param.stride;
 
   std::vector<NDArrayAttrs> in_arrs = GetTestInputArrays();
   std::vector<std::vector<NDArrayAttrs>> out_arrs(forward_attrs.num_outputs);
@@ -1156,7 +1156,7 @@ void TestPoolingOp(const OpAttrs &forward_attrs, const OpAttrs &backwards_attrs)
     auto in_arr = in_arrs[i1];
 
     // can only pool only 3D and 4D inputs
-    TShape input_shape = in_arr.arr.shape();
+    mxnet::TShape input_shape = in_arr.arr.shape();
     if (input_shape.ndim() != kernel.ndim() + 2)
       continue;
     // cannot pool if ndarray and mkldnn memory have different ndim
diff --git a/tests/cpp/operator/mkldnn_test.cc b/tests/cpp/operator/mkldnn_test.cc
index 31e762f21720..1e7f09005c93 100644
--- a/tests/cpp/operator/mkldnn_test.cc
+++ b/tests/cpp/operator/mkldnn_test.cc
@@ -129,7 +129,7 @@ static void VerifyMem(const mkldnn::memory &mem) {
 
 TEST(MKLDNN_NDArray, GetDataReorder) {
   TestArrayShapes tas = GetTestArrayShapes();
-  std::vector<TShape> shapes = tas.shapes;
+  mxnet::ShapeVector shapes = tas.shapes;
   std::vector<mkldnn::memory::primitive_desc> pds = tas.pds;
 
 
@@ -373,7 +373,7 @@ TEST(MKLDNN_NDArray, GetTestInputArraysConcat) {
 
 TEST(MKLDNN_NDArray, GetTestOutputArraysConcat) {
   auto shapes_pds = GetTestArrayShapes();
-  std::vector<nnvm::TShape> shapes; shapes = shapes_pds.shapes;
+  std::vector<mxnet::TShape> shapes; shapes = shapes_pds.shapes;
   std::vector<mkldnn::memory::primitive_desc> pds = shapes_pds.pds;
   for (auto &shape : shapes) {
     for (int dim = 0; dim < 5; dim++) {
diff --git a/tests/cpp/operator/runner/core_op_runner_test.cc b/tests/cpp/operator/runner/core_op_runner_test.cc
index 6cc2baddae28..96458cd1c713 100644
--- a/tests/cpp/operator/runner/core_op_runner_test.cc
+++ b/tests/cpp/operator/runner/core_op_runner_test.cc
@@ -58,7 +58,7 @@ inline std::vector<TT> AsVect(const TT& t) {
  * \brief Generic bidirectional sanity test for simple unary op
  */
 TEST(CORE_OP_RUNNER, ExecuteBidirectionalSimpleUnaryList) {
-  TShape shape({5, 5});
+  mxnet::TShape shape({5, 5});
   kwargs_t kwargs = basic_args;
 
   for (const std::pair<std::string, std::string>& i : test_unary_operators) {
@@ -90,7 +90,7 @@ TEST(CORE_OP_RUNNER, ExecuteBidirectionalList) {
     const char *op_name = i.first.c_str();
     const char *backward_op_name = i.second.c_str();
 
-    TShape shape({5, 5});
+    mxnet::TShape shape({5, 5});
     kwargs_t kwargs = basic_args;
 
     test::op::CoreOpExecutor<float> op(false, AsVect(shape));
@@ -119,7 +119,7 @@ TEST(CORE_OP_RUNNER, ExecuteBidirectionalDotProduct) {
 
   kwargs_t kwargs = basic_args;
 
-  test::op::CoreOpExecutor<float> op(false, { TShape({ 2, 3 }), TShape({ 3, 2 }) });
+  test::op::CoreOpExecutor<float> op(false, { mxnet::TShape({ 2, 3 }), mxnet::TShape({ 3, 2 }) });
 
   op.set_verbose(false);
   op.Init(op.ArgsWithOpName(kwargs, op_name, backward_op_name));
@@ -137,7 +137,7 @@ TEST(CORE_OP_RUNNER, ExecuteBidirectionalDotProduct) {
 
 TEST(CORE_OP_RUNNER, ExecuteBidirectionalRunnerSimpleUnary) {
   typedef float DType;
-  TShape shape({5, 5});
+  mxnet::TShape shape({5, 5});
   for (const std::pair<std::string, std::string>& i : test_unary_operators) {
     const char *op_name = i.first.c_str();
     const char *backward_op_name = i.second.c_str();
@@ -149,7 +149,7 @@ TEST(CORE_OP_RUNNER, ExecuteBidirectionalRunnerSimpleUnary) {
 
 TEST(CORE_OP_RUNNER, ExecuteBidirectionalRunner) {
   typedef float DType;
-  TShape shape({5, 5});
+  mxnet::TShape shape({5, 5});
   for (const std::pair<std::string, std::string>& i : test_binary_operators) {
     const char *op_name = i.first.c_str();
     const char *backward_op_name = i.second.c_str();
@@ -168,7 +168,7 @@ TEST(CORE_OP_RUNNER, ExecuteBidirectionalRunnerDotProduct) {
   const char *backward_op_name = "_backward_dot";
   test::op::CoreOperatorRunner<DType> runner;
   runner.RunBidirectional(false,
-                          { TShape({ 2, 3 }), TShape({ 3, 2 }) },
+                          { mxnet::TShape({ 2, 3 }), mxnet::TShape({ 3, 2 }) },
                           test::op::CoreOpExecutor<DType>::ArgsWithOpName(basic_args,
                                                                           op_name,
                                                                           backward_op_name),
@@ -186,9 +186,9 @@ TEST(CORE_OP_RUNNER, TimingCPUSimpleUnary) {
   const kwargs_t kwargs = test::op::CoreOpExecutor<DType>::ArgsWithOpName(basic_args, op_name);
 
   test::op::CoreOperatorRunner<DType> runner;
-  runner.RunBidirectional(false, { TShape({10, 10, 10, 10}) }, kwargs, 1);  // prime code and cache
+  runner.RunBidirectional(false, { mxnet::TShape({10, 10, 10, 10}) }, kwargs, 1);
 
-  std::vector <TShape> shapes;
+  std::vector <mxnet::TShape> shapes;
   if (test::performance_run) {
     shapes = {
       {1,  1, 28,  28},
@@ -203,7 +203,7 @@ TEST(CORE_OP_RUNNER, TimingCPUSimpleUnary) {
       {50, 3, 18,  32},
     };
   }
-  for (const TShape &shape : shapes) {
+  for (const mxnet::TShape &shape : shapes) {
     runner.TimingTest(std::string(op_name) +  "Operator CPU",
                       false, false, kwargs, 2, 10, { shape });
   }
@@ -219,9 +219,9 @@ TEST(CORE_OP_RUNNER, TimingCPUBinary) {
     basic_args, op_name, backward_op_name);
 
   test::op::CoreOperatorRunner<DType> runner;
-  runner.RunBidirectional(false, { TShape({10, 10, 10, 10}) }, kwargs, 1);  // prime code and cache
+  runner.RunBidirectional(false, { mxnet::TShape({10, 10, 10, 10}) }, kwargs, 1);
 
-  std::vector <TShape> shapes;
+  std::vector <mxnet::TShape> shapes;
   if (test::performance_run) {
     shapes = {
       {1,  1, 28,  28},
@@ -236,7 +236,7 @@ TEST(CORE_OP_RUNNER, TimingCPUBinary) {
       {50, 3, 18,  32},
     };
   }
-  for (const TShape &shape : shapes) {
+  for (const mxnet::TShape &shape : shapes) {
     runner.TimingTest(std::string(op_name) + "Operator CPU", false,
                       false, kwargs, 2, 10, { shape });
   }
@@ -257,16 +257,16 @@ TEST(CORE_OP_RUNNER, TimingCPUBinaryDotProduct) {
   test::op::CoreOperatorRunner<DType> runner;
   runner.RunBidirectional(false, { {2, 3}, {3, 2} }, kwargs, 1);  // prime code and cache
 
-  std::vector <TShape> shapes;
+  std::vector <mxnet::TShape> shapes;
   if (test::performance_run) {
     shapes = { {28,  28}, {18,  32}, {128, 24}, {128, 256} };
   } else {
     shapes = { {28,  28}, {128, 24} };
   }
-  std::vector<TShape> input_shapes(2);
-  for (const TShape &shape : shapes) {
+  mxnet::ShapeVector input_shapes(2);
+  for (const mxnet::TShape &shape : shapes) {
     input_shapes[0] = shape;
-    input_shapes[1] = TShape({shape[1], shape[0]});
+    input_shapes[1] = mxnet::TShape({shape[1], shape[0]});
     runner.TimingTest(std::string(op_name) + " Operator CPU", false,
                       false, kwargs, 2, 10, input_shapes);
   }
@@ -281,11 +281,11 @@ TEST(CORE_OP_RUNNER, TimingGPUSimpleUnary) {
 
   test::op::CoreOperatorRunner<DType> runner;
   runner.RunBidirectional(false,
-                          { TShape({10, 10, 10, 10}) },
+                          { mxnet::TShape({10, 10, 10, 10}) },
                           kwargs,
                           1);  // prime code and cache
 
-  std::vector <TShape> shapes;
+  std::vector <mxnet::TShape> shapes;
   if (test::performance_run) {
     shapes = {
       {1,  1, 28,  28},
@@ -300,7 +300,7 @@ TEST(CORE_OP_RUNNER, TimingGPUSimpleUnary) {
       {50, 3, 18,  32},
     };
   }
-  for (const TShape &shape : shapes) {
+  for (const mxnet::TShape &shape : shapes) {
     runner.TimingTest(std::string(op_name) + "Operator GPU", true, false, kwargs, 2, 10, { shape });
   }}
 
@@ -315,11 +315,11 @@ TEST(CORE_OP_RUNNER, TimingGPUBinary) {
 
   test::op::CoreOperatorRunner<DType> runner;
   runner.RunBidirectional(true,
-                          { TShape({10, 10, 10, 10}) },
+                          { mxnet::TShape({10, 10, 10, 10}) },
                           kwargs,
                           1);  // prime code and cache
 
-  std::vector <TShape> shapes;
+  std::vector <mxnet::TShape> shapes;
   if (test::performance_run) {
     shapes = {
       {1,  1, 28,  28},
@@ -334,7 +334,7 @@ TEST(CORE_OP_RUNNER, TimingGPUBinary) {
       {50, 3, 18,  32},
     };
   }
-  for (const TShape &shape : shapes) {
+  for (const mxnet::TShape &shape : shapes) {
     runner.TimingTest(std::string(op_name) + "Operator GPU", true, false, kwargs, 2, 10, { shape });
   }
 }
diff --git a/tests/cpp/operator/slice_channel_perf.cc b/tests/cpp/operator/slice_channel_perf.cc
index dc42d2a5d437..638613ea1ec9 100644
--- a/tests/cpp/operator/slice_channel_perf.cc
+++ b/tests/cpp/operator/slice_channel_perf.cc
@@ -38,7 +38,7 @@ const kwargs_t basic_activation_args = { };
  * \brief Generic bidirectional sanity test
  */
 TEST(SLICE_CHANNEL_PERF, ExecuteBidirectional) {
-  TShape shape({1, 160, 200});
+  mxnet::TShape shape({1, 160, 200});
   kwargs_t kwargs = basic_activation_args;
   kwargs.push_back({"num_outputs", "160"});
   test::op::LegacyOpRunner<mxnet::op::SliceChannelProp, float, float> runner;
@@ -54,9 +54,9 @@ TEST(SLICE_CHANNEL_PERF, TimingCPU) {
   kwargs.push_back({"num_outputs", "160"});
   test::op::LegacyOpRunner<mxnet::op::SliceChannelProp, float, float> runner;
   runner.RunBidirectional(false,
-                          { TShape({1, 160, 200}) },
+                          { mxnet::TShape({1, 160, 200}) },
                           kwargs, 1);  // prime code and cache
-  std::vector <TShape> shapes;
+  std::vector <mxnet::TShape> shapes;
   if (test::performance_run) {
     shapes = {
       {1, 160, 200},
@@ -71,7 +71,7 @@ TEST(SLICE_CHANNEL_PERF, TimingCPU) {
       {1, 160, 200}
     };
   }
-  for (const TShape &shape : shapes) {
+  for (const mxnet::TShape &shape : shapes) {
     runner.TimingTest("SliceChannel Operator CPU", false, false, kwargs, 2, 10, { shape });
   }
 }
@@ -87,16 +87,16 @@ TEST(SLICE_CHANNEL_PERF, TimingGPU) {
   test::OperatorRunner<mxnet::op::SliceChannelProp,
     test::op::LegacyOperatorExecutor<float, float>> runner;
   runner.RunBidirectional(true,
-                          { TShape({1, 160, 200}) },
+                          { mxnet::TShape({1, 160, 200}) },
                           kwargs, 1);  // prime code and cache
-  std::vector <TShape> shapes = {
+  std::vector <mxnet::TShape> shapes = {
       {1, 160, 200},
       {1, 160, 200},
       {1, 160, 200},
       {1, 160, 200},
       {1, 160, 200}
     };
-  for (const TShape &shape : shapes) {
+  for (const mxnet::TShape &shape : shapes) {
     runner.TimingTest("SliceChannel Operator GPU", true, false, kwargs, 2, 10, { shape });
   }
 }
diff --git a/tests/cpp/operator/tune/operator_tune_test.cc b/tests/cpp/operator/tune/operator_tune_test.cc
index 3c45b5e31446..00a062698b17 100644
--- a/tests/cpp/operator/tune/operator_tune_test.cc
+++ b/tests/cpp/operator/tune/operator_tune_test.cc
@@ -41,8 +41,8 @@ TEST(OMP_TUNING, ShowAllTunedOps) {
 
 using kwargs_t = test::op::kwargs_t;
 
-static std::vector<std::vector<TShape>> tuning_shapes() {
-  std::vector<std::vector<TShape>> shapes;
+static std::vector<mxnet::ShapeVector> tuning_shapes() {
+  std::vector<mxnet::ShapeVector> shapes;
   if (test::performance_run || test::csv) {
     shapes = {
       {{1,  1, 28,  28}},
@@ -127,7 +127,7 @@ static float EvaluateTune(const bool verbose = true) {
     std::cout << "******************************" << std::endl;
 
     // Do the performance runs
-    std::vector<std::vector<TShape>> shapes = tuning_shapes();
+    std::vector<mxnet::ShapeVector> shapes = tuning_shapes();
 
     tuningTester.TestTunedOperator({}, verbose, shapes,
                                    binary_operators[i].first.c_str(),