diff --git a/CMakeLists.txt b/CMakeLists.txt
index 688dd42c54fe..d3e6c7440e16 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -74,7 +74,6 @@ option(USE_JEMALLOC "Build with Jemalloc support" OFF)
 option(USE_LIBJPEG_TURBO "Use libjpeg-turbo" OFF)
 option(USE_DIST_KVSTORE "Build with DIST_KVSTORE support" OFF)
 option(USE_PLUGINS_WARPCTC "Use WARPCTC Plugins" OFF)
-option(USE_PLUGIN_CAFFE "Use Caffe Plugin" OFF)
 option(USE_CPP_PACKAGE "Build C++ Package" OFF)
 option(USE_MXNET_LIB_NAMING "Use MXNet library naming conventions." ON)
 option(USE_GPROF "Compile with gprof (profiling) flag" OFF)
@@ -521,39 +520,6 @@ if(USE_OPERATOR_TUNING AND USE_OPENMP)
   add_definitions(-DMXNET_USE_OPERATOR_TUNING=1)
 endif()
 
-if(USE_PLUGIN_CAFFE)
-  if(NOT USE_CUDA)
-    set(CPU_ONLY ON)
-    add_definitions(-DCPU_ONLY=1)
-  endif()
-  if(NOT DEFINED CAFFE_PATH)
-    if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/caffe)
-      set(CAFFE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/caffe)
-    else()
-      set(CAFFE_PATH $ENV{CAFFE_PATH})
-    endif()
-  endif()
-  list(APPEND CMAKE_MODULE_PATH ${CAFFE_PATH}/cmake)
-  include_directories(${CAFFE_PATH}/include)
-  include_directories(${CAFFE_PATH}/build/src)
-  include_directories(${CMAKE_BINARY_DIR}/caffe/include)
-  link_directories(${CAFFE_PATH}/build/lib)
-  if(NOT DEFINED CAFFE_PATH)
-    message(FATAL_ERROR "Please set CAFFE_PATH to point to the caffe source installation")
-  endif()
-  FILE(GLOB_RECURSE PLUGINS_SOURCE "plugin/caffe/*.cc" "plugin/caffe/*.h")
-  FILE(GLOB_RECURSE PLUGINS_CUSRC "plugin/caffe/*.cu")
-  list(APPEND SOURCE ${PLUGINS_SOURCE})
-  list(APPEND CUDA ${PLUGINS_CUSRC})
-  include_directories(${CMAKE_BINARY_DIR}/include)
-  add_definitions(-DMXNET_USE_CAFFE=1)
-  list(APPEND mxnet_LINKER_LIBS
-    protobuf boost_system boost_thread boost_filesystem
-    gflags glog caffe
-    ${Caffe_LINKER_LIBS}
-)
-endif()
-
 if (NOT (EXTRA_OPERATORS STREQUAL ""))
     mxnet_source_group("Extra"   GLOB_RECURSE "${EXTRA_OPERATORS}/*.cc")
     mxnet_source_group("Extra\\Cuda"   GLOB_RECURSE "${EXTRA_OPERATORS}/*.cu")
@@ -640,14 +606,6 @@ if(USE_CUDA)
   link_directories(${CUDAToolkit_LIBRARY_DIR})
 endif()
 
-# unsupported: if caffe is a subdirectory of mxnet, load its CMakeLists.txt as well
-if(USE_PLUGIN_CAFFE)
-  if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/caffe)
-    add_subdirectory(caffe)
-  endif()
-endif()
-
-
 if(MSVC)
   set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /EHsc")
   set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /EHsc /Gy")
diff --git a/docs/static_site/src/pages/api/faq/caffe.md b/docs/static_site/src/pages/api/faq/caffe.md
deleted file mode 100644
index ba84b8b590be..000000000000
--- a/docs/static_site/src/pages/api/faq/caffe.md
+++ /dev/null
@@ -1,148 +0,0 @@
----
-layout: page_category
-title:  Convert from Caffe to MXNet
-category: faq
-faq_c: Deployment Environments
-question: How to convert a Caffe model to MXNet?
-permalink: /api/faq/caffe
----
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-# How to | Convert from Caffe to MXNet
-
-Key topics covered include the following:
-
-- [Calling Caffe operators in MXNet](#calling-caffe-operators-in-mxnet)
-
-## Calling Caffe operators in MXNet
-
-MXNet supports calling most Caffe operators,
-including network layer, data layer, and loss function, directly. It is
-particularly useful if there are customized operators implemented in Caffe, then
-we do not need to re-implement them in MXNet.
-
-### How to install
-
-This feature requires Caffe. In particular, we need to re-compile Caffe before
-[PR #4527](https://github.com/BVLC/caffe/pull/4527) is merged into Caffe. There
-are the steps of how to rebuild Caffe:
-
-1. Download [Caffe](https://github.com/BVLC/caffe). E.g. `git clone
-   https://github.com/BVLC/caffe`
-2. Download the
-   [patch for the MXNet interface](https://github.com/BVLC/caffe/pull/4527.patch)
-   and apply to Caffe. E.g.
-   ```bash
-   cd caffe && wget https://github.com/BVLC/caffe/pull/4527.patch && git apply 4527.patch
-   ```
-3. Build and install Caffe by following the
-   [official guide](https://caffe.berkeleyvision.org/installation.html).
-
-Next we need to compile MXNet with Caffe supports
-
-1. Copy `make/config.mk` (for Linux) or `make/osx.mk`
-   (for Mac) into the MXNet root folder as `config.mk` if you have not done it yet
-2. Open the copied `config.mk` and uncomment these two lines
-   ```bash
-   CAFFE_PATH = $(HOME)/caffe
-   MXNET_PLUGINS += plugin/caffe/caffe.mk
-   ```
-   Modify `CAFFE_PATH` to your Caffe installation, if necessary.
-3. Then build with 8 threads `make clean && make -j8`.
-
-### How to use
-
-This Caffe plugin adds three components into MXNet:
-
-- `sym.CaffeOp` : Caffe neural network layer
-- `sym.CaffeLoss` : Caffe loss functions
-- `io.CaffeDataIter` : Caffe data layer
-
-#### Use `sym.CaffeOp`
-The following example shows the definition of a 10 classes multi-layer perceptron:
-
-```Python
-data = mx.sym.Variable('data')
-fc1  = mx.sym.CaffeOp(data_0=data, num_weight=2, name='fc1', prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 128} }")
-act1 = mx.sym.CaffeOp(data_0=fc1, prototxt="layer{type:\"TanH\"}")
-fc2  = mx.sym.CaffeOp(data_0=act1, num_weight=2, name='fc2', prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 64} }")
-act2 = mx.sym.CaffeOp(data_0=fc2, prototxt="layer{type:\"TanH\"}")
-fc3 = mx.sym.CaffeOp(data_0=act2, num_weight=2, name='fc3', prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 10}}")
-```
-
-Let's break it down. First, `data = mx.sym.Variable('data')` defines a variable
-as a placeholder for input.  Then, it's fed through Caffe operators with `fc1 =
-mx.sym.CaffeOp(...)`. `CaffeOp` accepts several arguments:
-
-- The inputs to Caffe operators are named as `data_i` for *i=0, ..., num_data-1*
-- `num_data` is the number of inputs. In default it is 1, and therefore
-skipped in the above example.
-- `num_out` is the number of outputs. In default it is 1 and also skipped.
-- `num_weight` is the number of weights (`blobs_`).  Its default value is 0. We
-need to explicitly specify it for a non-zero value.
-- `prototxt` is the protobuf configuration string.
-
-#### Use `sym.CaffeLoss`
-
-Using Caffe loss is similar.
-We can replace the MXNet loss with Caffe loss.
-We can replace
-
-Replacing the last line of the above example with the following two lines we can
-call Caffe loss instead of MXNet loss.
-
-```Python
-label = mx.sym.Variable('softmax_label')
-mlp = mx.sym.CaffeLoss(data=fc3, label=label, grad_scale=1, name='softmax', prototxt="layer{type:\"SoftmaxWithLoss\"}")
-```
-
-Similar to `CaffeOp`, `CaffeLoss` has arguments `num_data` (2 in default) and
-`num_out` (1 in default). But there are two differences
-
-1. Inputs are `data` and `label`. And we need to explicitly create a variable
-   placeholder for label, which is implicitly done in MXNet loss.
-2. `grad_scale` is the weight of this loss.
-
-#### Use `io.CaffeDataIter`
-
-We can also wrap a Caffe data layer into MXNet's data iterator. Below is an
-example for creating a data iterator for MNIST
-
-```python
-train = mx.io.CaffeDataIter(
-    prototxt =
-    'layer { \
-        name: "mnist" \
-        type: "Data" \
-        top: "data" \
-        top: "label" \
-        include { \
-            phase: TEST \
-        } \
-        transform_param { \
-            scale: 0.00390625 \
-        } \
-        data_param { \
-            source: "caffe/examples/mnist/mnist_test_lmdb" \
-            batch_size: 100 \
-            backend: LMDB \
-        } \
-    }',
-    flat           = flat,
-    num_examples   = 60000,
-)
-```
diff --git a/include/mxnet/libinfo.h b/include/mxnet/libinfo.h
index ade1c731afcf..dd7790059de1 100644
--- a/include/mxnet/libinfo.h
+++ b/include/mxnet/libinfo.h
@@ -115,10 +115,6 @@
 #define MXNET_USE_F16C MSHADOW_USE_F16C
 #endif
 
-#ifndef MXNET_USE_CAFFE
-#define MXNET_USE_CAFFE 0
-#endif
-
 #ifndef MXNET_USE_DIST_KVSTORE
 #define MXNET_USE_DIST_KVSTORE 0
 #endif
@@ -183,9 +179,7 @@ enum : unsigned {
   OPENCV,
 
   // Misc
-  CAFFE,
   DIST_KVSTORE,
-  CXX14,
   INT64_TENSOR_SIZE,
 
   // Signal handler to print stack traces on exceptions
diff --git a/plugin/caffe/README.md b/plugin/caffe/README.md
deleted file mode 100644
index 7e60f2e83564..000000000000
--- a/plugin/caffe/README.md
+++ /dev/null
@@ -1,58 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-# How to use Caffe operator in MXNet
-
-[Caffe](http://caffe.berkeleyvision.org/) has been a well-known and widely-used deep learning framework. Now MXNet has supported calling most caffe operators(layers) and loss functions directly in its symbolic graph! Using one's own customized caffe layer is also effortless.
-
-Besides Caffe, MXNet has already embedded Torch modules and its tensor mathematical functions. ([link](https://github.com/dmlc/mxnet/blob/master/docs/faq/torch.md))
-
-This blog demonstrates two steps to use Caffe op in MXNet:
-
-* How to install MXNet with Caffe support.
-
-* How to embed Caffe op into MXNet's symbolic graph.
-
-## Install Caffe With MXNet interface
-* Download offical Caffe repository [BVLC/Caffe](https://github.com/BVLC/caffe).
-* Download [caffe patch for mxnet interface] (https://github.com/BVLC/caffe/pull/4527.patch). Move patch file under your caffe root folder and apply the patch by `git apply patch_file_name`.
-* Install caffe following [official guide](http://caffe.berkeleyvision.org/installation.html).
-
-## Compile with Caffe
-* In mxnet folder, open `config.mk` (if you haven't already, copy `make/config.mk` (Linux) or `make/osx.mk` (Mac) into MXNet root folder as `config.mk`) and uncomment the lines `CAFFE_PATH = $(HOME)/caffe` and `MXNET_PLUGINS += plugin/caffe/caffe.mk`. Modify `CAFFE_PATH` to your caffe installation if necessary. 
-* Run `make clean && make` to build with caffe support.
-
-## Caffe Operator (Layer)
-Caffe's neural network operator and loss functions are supported by MXNet through `mxnet.symbol.CaffeOp` and `mxnet.symbol.CaffeLoss` respectively.
-For example, the following code shows multi-layer perception network for classifying MNIST digits ([full code](https://github.com/dmlc/mxnet/blob/master/example/caffe/caffe_net.py)):
-
-### Python
-```Python
-data = mx.symbol.Variable('data')
-label = mx.symbol.Variable('softmax_label')
-fc1  = mx.symbol.CaffeOp(data_0=data, num_weight=2, name='fc1', prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 128} }")
-act1 = mx.symbol.CaffeOp(data_0=fc1, prototxt="layer{type:\"TanH\"}")
-fc2  = mx.symbol.CaffeOp(data_0=act1, num_weight=2, name='fc2', prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 64} }")
-act2 = mx.symbol.CaffeOp(data_0=fc2, prototxt="layer{type:\"TanH\"}")
-fc3 = mx.symbol.CaffeOp(data_0=act2, num_weight=2, name='fc3', prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 10}}")
-mlp = mx.symbol.CaffeLoss(data=fc3, label=label, grad_scale=1, name='softmax', prototxt="layer{type:\"SoftmaxWithLoss\"}")
-```
-
-Let's break it down. First `data = mx.symbol.Variable('data')` defines a variable as placeholder for input.
-Then it's fed through Caffe operators with `fc1  = mx.symbol.CaffeOp(data_0=data, num_weight=2, name='fc1', prototxt="layer{type:\"InnerProduct\" inner_product_param{num_output: 128} }")`.
-
-The inputs to caffe op are named as data_i for i=0 ... num_data-1 as `num_data` is the number of inputs. You may skip the argument, as the example does, if its value is 1. While `num_weight` is number of `blobs_`(weights). Its default value is 0, as many ops maintain no weight. `prototxt` is the configuration string.
diff --git a/plugin/caffe/caffe.mk b/plugin/caffe/caffe.mk
deleted file mode 100644
index c115e473be9d..000000000000
--- a/plugin/caffe/caffe.mk
+++ /dev/null
@@ -1,32 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-CFLAGS += -I$(CAFFE_PATH)/include -I$(CAFFE_PATH)/build/src -I$(CAFFE_PATH)/build/include
-LDFLAGS += -lprotobuf -lboost_system -lboost_thread -lboost_filesystem -lgflags -lglog -L$(CAFFE_PATH)/build/lib -lcaffe
-
-ifeq ($(USE_CUDNN), 1)
-	CFLAGS += -DUSE_CUDNN=1
-endif
-
-ifeq ($(USE_CUDA), 0)
-	CFLAGS += -DCPU_ONLY=1
-endif
-
-CAFFE_SRC = $(wildcard plugin/caffe/*.cc)
-PLUGIN_OBJ += $(patsubst %.cc, build/%.o, $(CAFFE_SRC))
-CAFFE_CUSRC = $(wildcard plugin/caffe/*.cu)
-PLUGIN_CUOBJ += $(patsubst %.cu, build/%_gpu.o, $(CAFFE_CUSRC))
diff --git a/plugin/caffe/caffe_blob.cc b/plugin/caffe/caffe_blob.cc
deleted file mode 100644
index 6a75439f3e4e..000000000000
--- a/plugin/caffe/caffe_blob.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * Copyright (c) 2016 by Contributors
- * \file caffe_blob.cc
- * \brief Implementations of SetDataGradToBlob given various device/dimension
- * \author Haoran Wang
-*/
-#include "caffe_blob.h"
-namespace mxnet {
-namespace op {
-namespace caffe {
-
-template<>
-void SetDataGradToBlob<mshadow::cpu, float>(caffeMemoryTypes memType,
-                            std::vector<::caffe::Blob<float>*>::iterator blob,
-                            std::vector<TBlob>::const_iterator itr) {
-  float *data_ptr = reinterpret_cast<float*>((*itr).dptr_);
-  if (memType == Data)
-    (*blob)->set_cpu_data(data_ptr);
-  else
-    MXCAFFEBLOB(*blob, float)->set_cpu_diff(data_ptr);
-}
-
-template<>
-void SetDataGradToBlob<mshadow::cpu, double>(caffeMemoryTypes memType,
-                            std::vector<::caffe::Blob<double>*>::iterator blob,
-                            std::vector<TBlob>::const_iterator itr) {
-  double *data_ptr = reinterpret_cast<double*>((*itr).dptr_);
-  if (memType == Data)
-    (*blob)->set_cpu_data(data_ptr);
-  else
-    MXCAFFEBLOB(*blob, double)->set_cpu_diff(data_ptr);
-}
-
-template<>
-void SetDataGradToBlob<mshadow::gpu, float>(caffeMemoryTypes memType,
-                            std::vector<::caffe::Blob<float>*>::iterator blob,
-                            std::vector<TBlob>::const_iterator itr) {
-  float *data_ptr = reinterpret_cast<float*>((*itr).dptr_);
-  if (memType == Data)
-    (*blob)->set_gpu_data(data_ptr);
-  else
-    MXCAFFEBLOB(*blob, float)->set_gpu_diff(data_ptr);
-}
-
-template<>
-void SetDataGradToBlob<mshadow::gpu, double>(caffeMemoryTypes memType,
-                            std::vector<::caffe::Blob<double>*>::iterator blob,
-                            std::vector<TBlob>::const_iterator itr) {
-  double *data_ptr = reinterpret_cast<double*>((*itr).dptr_);
-  if (memType == Data)
-    (*blob)->set_gpu_data(data_ptr);
-  else
-    MXCAFFEBLOB(*blob, double)->set_gpu_diff(data_ptr);
-}
-
-mxnet::TShape Vector2TShape(const std::vector<int> &vec_int) {
-  std::vector<mshadow::index_t> vec;
-  for (uint32_t i = 0; i < vec_int.size(); ++i)
-    vec.push_back(vec_int[i]);
-  // 0-dim represents scalar in caffe
-  if (vec_int.size() == 0)
-    vec.push_back(1);
-  return {vec.begin(), vec.end()};
-}
-
-std::vector<int> TShape2Vector(const mxnet::TShape &tshape) {
-  std::vector<int> s;
-  for (uint32_t i =0 ; i < tshape.ndim(); ++i)
-    s.push_back(tshape[i]);
-  return s;
-}
-
-}  // namespace caffe
-}  // namespace op
-}  // namespace mxnet
diff --git a/plugin/caffe/caffe_blob.h b/plugin/caffe/caffe_blob.h
deleted file mode 100644
index 6243b5dc8c88..000000000000
--- a/plugin/caffe/caffe_blob.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * Copyright (c) 2016 by Contributors
- * \file caffe_blob.h
- * \brief conversion between tensor and caffeBlob
- * \author Haoran Wang
-*/
-#ifndef PLUGIN_CAFFE_CAFFE_BLOB_H_
-#define PLUGIN_CAFFE_CAFFE_BLOB_H_
-
-#include <mxnet/tensor_blob.h>
-#include <vector>
-#include <caffe/blob.hpp>
-#include <caffe/layer.hpp>
-
-namespace mxnet {
-namespace op {
-
-namespace caffe {
-
-// Declare Memory Type for Caffe blob
-enum caffeMemoryTypes {Data, Grad, Non};
-
-mxnet::TShape Vector2TShape(const std::vector<int> &vec_int);
-std::vector<int> TShape2Vector(const mxnet::TShape &tshape);
-
-// implementation of tensor to blob, called by TensorToBlob
-template<typename Device, typename Dtype>
-void SetDataGradToBlob(caffeMemoryTypes memType,
-                       typename std::vector< ::caffe::Blob<Dtype>*>::iterator blob,
-                       typename std::vector<TBlob>::const_iterator itr);
-
-/**
- * \brief The interface to convert mxnet's tensor to caffe's blob
- * \brief called in caffe_operator_inl.h
- */
-template<typename Device, typename Dtype>
-void TBlob2CaffeBlob(caffeMemoryTypes memType,
-                     typename std::vector< ::caffe::Blob<Dtype>*>::iterator blob,
-                     typename std::vector<TBlob>::const_iterator tblob,
-                     int n = 1) {
-  for (int i = 0; i < n; ++i, ++blob, ++tblob) {
-    (*blob)->Reshape(TShape2Vector((*tblob).shape_));
-    SetDataGradToBlob<Device, Dtype>(memType, blob, tblob);
-  }
-}
-
-template<typename Dtype>
-void SetOpBlobs(::caffe::Layer<Dtype> *caffeOp,
-                const std::vector< ::caffe::Blob<Dtype>*>& weights) {
-  CHECK_EQ(caffeOp->blobs().size(), weights.size());
-  for (int i = 0; i < weights.size(); ++i)
-    caffeOp->blobs()[i].reset(weights[i]);
-}
-
-/**!
- * \brief Workaround for missing functions in ::caffe::Blob
- * \warning Do not add or override any virtual functions in this class
- * @tparam Dtype
- */
-template<class Dtype>
-class CaffeBlobFriend : public ::caffe::Blob<Dtype> {
- public:
-  inline void set_cpu_diff(Dtype* diff) {
-    CHECK(diff);
-    this->diff_->set_cpu_data(diff);
-  }
-
-  inline void set_gpu_diff(Dtype* diff) {
-    CHECK(diff);
-    this->diff_->set_gpu_data(diff);
-  }
-};
-
-#define MXCAFFEBLOB(__object$, __type$) \
-  (static_cast<mxnet::op::caffe::CaffeBlobFriend<__type$> *>(__object$))
-
-/**!
- * \brief Workaround for missing functions in ::caffe::Layer
- * \warning Do not add or override any virtual functions in this class
- * @tparam Dtype
- */
-template <typename Dtype>
-class CaffeLayerFriend : public ::caffe::Layer<Dtype> {
-  explicit CaffeLayerFriend(const ::caffe::LayerParameter& param) = delete;
- public:
-  inline void SetPhase(::caffe::Phase p) {
-    this->phase_ = p;
-  }
-};
-
-#define MXCAFFELAYER(__object$, __type$) \
-  (static_cast<mxnet::op::caffe::CaffeLayerFriend<__type$> *>(__object$))
-
-}  // namespace caffe
-}  // namespace op
-}  // namespace mxnet
-
-#endif  // PLUGIN_CAFFE_CAFFE_BLOB_H_
diff --git a/plugin/caffe/caffe_common.cc b/plugin/caffe/caffe_common.cc
deleted file mode 100644
index dd445efbd659..000000000000
--- a/plugin/caffe/caffe_common.cc
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * Copyright (c) 2016 by Contributors
- * \file caffe_common.h
- * \brief Common functions for caffeOp and caffeLoss symbols
- * \author Haoran Wang
-*/
-#include<mshadow/tensor.h>
-#include<caffe/common.hpp>
-#include"caffe_common.h"
-
-namespace mxnet {
-namespace op {
-namespace caffe {
-
-// Cpu implementation of set_mode
-template<>
-void CaffeMode::SetMode<mshadow::cpu>() {
-  ::caffe::Caffe::set_mode(::caffe::Caffe::CPU);
-}
-
-// Gpu implementation of set_mode
-template<>
-void CaffeMode::SetMode<mshadow::gpu>() {
-  ::caffe::Caffe::set_mode(::caffe::Caffe::GPU);
-}
-
-}  // namespace caffe
-}  // namespace op
-}  // namespace mxnet
diff --git a/plugin/caffe/caffe_common.h b/plugin/caffe/caffe_common.h
deleted file mode 100644
index 211d8c44d518..000000000000
--- a/plugin/caffe/caffe_common.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * Copyright (c) 2016 by Contributors
- * \file caffe_common.h
- * \brief Common functions for caffeOp and caffeLoss symbols
- * \author Haoran Wang
-*/
-
-#ifndef PLUGIN_CAFFE_CAFFE_COMMON_H_
-#define PLUGIN_CAFFE_CAFFE_COMMON_H_
-
-#include <mxnet/operator.h>
-#include <dmlc/type_traits.h>
-
-#include <caffe/proto/caffe.pb.h>
-
-#include <vector>
-#include <iostream>
-#include <exception>
-
-#include <caffe/layer.hpp>
-#include <caffe/blob.hpp>
-#include <caffe/layer_factory.hpp>
-
-namespace mxnet {
-namespace op {
-namespace caffe {
-
-/**
- * \brief The class sets caffe's mode before doing forward/backward
- * \tparam xpu The device that the op will be executed on.
- */
-class CaffeMode {
- public:
-  template<typename xpu> static void SetMode();
-};
-
-// Initialization funciton called by caffeOp & caffeLoss
-template<typename Dtype>
-void InitCaffeBlobs(std::vector< ::caffe::Blob<Dtype>*>* v, int n_num) {
-  for (index_t i=0; i < n_num; ++i)
-    v->push_back(new ::caffe::Blob<Dtype>());
-}
-
-template<typename Dtype>
-void DelCaffeBlobs(std::vector< ::caffe::Blob<Dtype>*>* v, int n_num) {
-  for (index_t i=0; i < n_num; ++i)
-    delete v->at(i);
-}
-
-
-struct NULLDeleter {template<typename T> void operator()(T*){}};
-
-template <typename Dtype>
-void Deleter(::caffe::Layer<Dtype> *ptr) {
-}
-
-template <typename Dtype>
-class LayerRegistry {
- public:
-  static ::caffe::Layer<Dtype> * CreateLayer(const ::caffe::LayerParameter& param) {
-    ::caffe::shared_ptr< ::caffe::Layer<Dtype> > ptr =
-      ::caffe::LayerRegistry<Dtype>::CreateLayer(param);
-    // avoid caffe::layer destructor, which deletes the weights layer owns
-    new ::caffe::shared_ptr< ::caffe::Layer<Dtype> >(ptr);
-    return ptr.get();
-  }
-};
-
-}  // namespace caffe
-}  // namespace op
-}  // namespace mxnet
-
-/*! \brief override type_name for caffe::LayerParameter */
-namespace dmlc {
-  DMLC_DECLARE_TYPE_NAME(::caffe::LayerParameter, "caffe-layer-parameter");
-}
-
-#endif  // PLUGIN_CAFFE_CAFFE_COMMON_H_
diff --git a/plugin/caffe/caffe_data_iter.cc b/plugin/caffe/caffe_data_iter.cc
deleted file mode 100644
index 552b9dce9f3d..000000000000
--- a/plugin/caffe/caffe_data_iter.cc
+++ /dev/null
@@ -1,273 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * Copyright (c) 2015 by Contributors
- * \file caffe_data_iter.cc
- * \brief register mnist iterator
-*/
-#include <sys/time.h>
-#include <caffe/proto/caffe.pb.h>
-#include <dmlc/parameter.h>
-#include <atomic>
-
-#include "caffe_common.h"
-#include "caffe_stream.h"
-#include "caffe_fieldentry.h"
-#include "caffe_blob.h"
-#include "../../src/io/inst_vector.h"
-#include "../../src/io/iter_prefetcher.h"
-
-#define CHECK_NEXT_TIMING
-
-#ifdef CHECK_NEXT_TIMING
-#define IF_CHECK_TIMING(__t$) __t$
-#else
-#define IF_CHECK_TIMING(__t$)
-#endif
-
-namespace mxnet {
-namespace io {
-
-struct CaffeDataParam : public dmlc::Parameter<CaffeDataParam> {
-  /*! \brief protobuf text */
-  ::caffe::LayerParameter prototxt;
-  /*! \brief number of iterations per epoch */
-  int num_examples;
-  /*! \brief data mode */
-  bool flat;
-
-  DMLC_DECLARE_PARAMETER(CaffeDataParam) {
-    DMLC_DECLARE_FIELD(prototxt).set_default("layer{}")
-      .describe("Caffe's layer parameter");
-    DMLC_DECLARE_FIELD(flat).set_default(false)
-      .describe("Augmentation Param: Whether to flat the data into 1D.");
-    DMLC_DECLARE_FIELD(num_examples).set_lower_bound(1).set_default(10000)
-      .describe("Number of examples in the epoch.");
-  }
-};
-
-template<typename Dtype>
-class CaffeDataIter : public IIterator<TBlobBatch> {
- public:
-  explicit CaffeDataIter(int type_flag) : batch_size_(0), channels_(1), width_(1), height_(1)
-                               , type_flag_(type_flag), loc_(0)
-  {}
-  virtual ~CaffeDataIter(void) {}
-
-  // intialize iterator loads data in
-  virtual void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) {
-    std::map<std::string, std::string> kmap(kwargs.begin(), kwargs.end());
-    param_.InitAllowUnknown(kmap);
-
-    // Caffe seems to understand phase inside an "include {}" block
-    if (!param_.prototxt.has_phase()) {
-      if (param_.prototxt.include().size()) {
-        if (param_.prototxt.include(0).has_phase()) {
-          param_.prototxt.set_phase(param_.prototxt.include(0).phase());
-        }
-      }
-    }
-
-    std::string type = param_.prototxt.type();
-    caffe_data_layer_ = caffe::LayerRegistry<Dtype>::CreateLayer(param_.prototxt);
-    CHECK(caffe_data_layer_ != nullptr) << "Failed creating caffe data layer";
-    const size_t top_size = param_.prototxt.top_size();
-    if (top_size > 0) {
-      if (top_size > NR_SUPPORTED_TOP_ITEMS) {
-        LOG(WARNING)
-          << "Too may \"top\" items, only two (one data, one label) are currently supported";
-      }
-      top_.reserve(top_size);
-      for (size_t x = 0; x < top_size; ++x) {
-        ::caffe::Blob<Dtype> *blob = new ::caffe::Blob<Dtype>();
-        cleanup_blobs_.push_back(std::unique_ptr<::caffe::Blob<Dtype>>(blob));
-        top_.push_back(blob);
-      }
-      caffe_data_layer_->SetUp(bottom_, top_);
-      const std::vector<int> &shape = top_[DATA]->shape();
-      const size_t shapeDimCount = shape.size();
-      if (shapeDimCount > 0) {
-        batch_size_ = shape[0];
-        if (shapeDimCount > 1) {
-          channels_ = shape[1];
-          if (shapeDimCount > 2) {
-            width_ = shape[2];
-            if (shapeDimCount > 3) {
-              height_ = shape[3];
-            }
-          }
-        }
-      }
-
-      if (top_size > DATA) {
-        if (param_.flat) {
-          batch_data_ = TBlob(nullptr, mshadow::Shape2(batch_size_,
-                                                       channels_ * width_ * height_),
-                              cpu::kDevCPU, type_flag_);
-        } else {
-          batch_data_ = TBlob(nullptr, mxnet::TShape(top_[DATA]->shape().begin(),
-                                                     top_[DATA]->shape().end()),
-                              cpu::kDevCPU, type_flag_);
-        }
-      }
-      out_.data.clear();
-      if (top_size > LABEL) {
-          batch_label_ = TBlob(nullptr, mxnet::TShape(top_[LABEL]->shape().begin(),
-                                                      top_[LABEL]->shape().end()),
-                               cpu::kDevCPU, type_flag_);
-      }
-      out_.batch_size = batch_size_;
-    }
-  }
-
-  virtual void BeforeFirst(void) {
-    loc_ = 0;
-  }
-
-  virtual bool Next(void) {
-    // MxNet iterator is expected to return CPU-accessible memory
-    if (::caffe::Caffe::mode() != ::caffe::Caffe::CPU) {
-      ::caffe::Caffe::set_mode(::caffe::Caffe::CPU);
-      CHECK_EQ(::caffe::Caffe::mode(), ::caffe::Caffe::CPU);
-    }
-    caffe_data_layer_->Forward(bottom_, top_);
-    CHECK_GT(batch_size_, 0) << "batch size must be greater than zero";
-    CHECK_EQ(out_.batch_size, batch_size_) << "Internal Error: batch size mismatch";
-
-    if (loc_ + batch_size_ <= param_.num_examples) {
-      batch_data_.dptr_ = top_[DATA]->mutable_cpu_data();
-      batch_label_.dptr_ = top_[LABEL]->mutable_cpu_data();
-
-      out_.data.clear();
-      out_.data.push_back(batch_data_);
-      out_.data.push_back(batch_label_);
-      loc_ += batch_size_;
-      return true;
-    }
-
-    return false;
-  }
-
-  virtual const TBlobBatch &Value(void) const {
-    return out_;
-  }
-
- private:
-  /*! \brief indexes into top_ */
-  enum { DATA = 0, LABEL, NR_SUPPORTED_TOP_ITEMS };
-
-  /*! \brief MNISTCass iter params */
-  CaffeDataParam param_;
-  /*! \brief Shape scalar values */
-  index_t batch_size_, channels_, width_, height_;
-  /*! \brief Caffe data layer */
-  boost::shared_ptr<caffe::Layer<Dtype> >  caffe_data_layer_;
-  /*! \brief batch data blob */
-  mxnet::TBlob batch_data_;
-  /*! \brief batch label blob */
-  mxnet::TBlob batch_label_;
-  /*! \brief Output blob data for this iteration */
-  TBlobBatch out_;
-  /*! \brief Bottom and top connection-point blob data */
-  std::vector<::caffe::Blob<Dtype>*> bottom_, top_;
-  /*! \brief Cleanup these blobs on exit */
-  std::list<std::unique_ptr<::caffe::Blob<Dtype>>> cleanup_blobs_;
-  /*! \brief type flag of the tensor blob */
-  const int type_flag_;
-  /*! \brief Blobs done so far */
-  std::atomic<size_t>  loc_;
-};  // class CaffeDataIter
-
-class CaffeDataIterWrapper : public PrefetcherIter {
- public:
-  CaffeDataIterWrapper() : PrefetcherIter(NULL), next_time_(0) {}
-  virtual ~CaffeDataIterWrapper() {
-    IF_CHECK_TIMING(
-      if (next_time_.load() > 0) {
-        LOG(WARNING) << "Caffe data loader was blocked for "
-                     << next_time_.load()
-                     << " ms waiting for incoming data";
-      }
-    )
-  }
-  virtual void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) {
-    // We need to init prefetcher args in order to get dtype
-    this->param_.InitAllowUnknown(kwargs);
-    if (!this->param_.dtype) this->param_.dtype = mshadow::kFloat32;
-    switch (this->param_.dtype.value()) {
-      case mshadow::kFloat32:
-        this->loader_.reset(new CaffeDataIter<float>(this->param_.dtype.value()));
-        break;
-      case mshadow::kFloat64:
-        this->loader_.reset(new CaffeDataIter<double>(this->param_.dtype.value()));
-        break;
-      case mshadow::kFloat16:
-        LOG(FATAL) << "float16 layer is not supported by caffe";
-        return;
-      case mshadow::kBfloat16:
-        LOG(FATAL) << "bfloat16 layer is not supported by caffe";
-        return;
-      default:
-        LOG(FATAL) << "Unsupported type " << this->param_.dtype.value();
-        return;
-    }
-    PrefetcherIter::Init(kwargs);
-    this->param_.prefetch_buffer = 1;
-  }
-  virtual void BeforeFirst(void) {
-    return PrefetcherIter::BeforeFirst();
-  }
-  virtual bool Next(void) {
-    IF_CHECK_TIMING(
-      const uint64_t start_time = GetTickCountMS();
-    )
-    const bool rc = PrefetcherIter::Next();
-    IF_CHECK_TIMING(
-      const uint64_t diff_time  = GetTickCountMS() - start_time;
-      next_time_.fetch_add(diff_time);
-    )
-    return rc;
-  }
-
- protected:
-  IF_CHECK_TIMING(
-    static uint64_t GetTickCountMS() {
-      struct timeval tv;
-      gettimeofday(&tv, 0);
-      return uint64_t( tv.tv_sec ) * 1000 + tv.tv_usec / 1000;
-    }
-  )
-
-  /*! \brief milliseconds spent in Next() */
-  std::atomic<uint64_t> next_time_;
-};  // class CaffeDataIterWrapper
-
-DMLC_REGISTER_PARAMETER(CaffeDataParam);
-
-MXNET_REGISTER_IO_ITER(CaffeDataIter)
-.describe("Create MxNet iterator for a Caffe data layer.")
-.add_arguments(CaffeDataParam::__FIELDS__())
-.add_arguments(PrefetcherParam::__FIELDS__())
-.set_body([]() {
-    return new CaffeDataIterWrapper();
-});
-
-}  // namespace io
-}  // namespace mxnet
diff --git a/plugin/caffe/caffe_fieldentry.h b/plugin/caffe/caffe_fieldentry.h
deleted file mode 100644
index f97b76519e0e..000000000000
--- a/plugin/caffe/caffe_fieldentry.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * Copyright (c) 2016 by Contributors
- * \file caffe_fieldentry.h
- * \brief Implement FieldEntry<caffe::LayerParameter>
- * \author Haoran Wang
- */
-#ifndef PLUGIN_CAFFE_CAFFE_FIELDENTRY_H_
-#define PLUGIN_CAFFE_CAFFE_FIELDENTRY_H_
-
-#include <caffe/proto/caffe.pb.h>
-#include <dmlc/parameter.h>
-#include <dmlc/base.h>
-#include <dmlc/json.h>
-#include <dmlc/logging.h>
-#include <dmlc/type_traits.h>
-#include <google/protobuf/message.h>
-#include <google/protobuf/text_format.h>
-
-#include <cstddef>
-#include <cstdlib>
-#include <sstream>
-#include <limits>
-#include <map>
-#include <set>
-#include <typeinfo>
-#include <string>
-#include <vector>
-#include <algorithm>
-#include <utility>
-
-#include <caffe/util/io.hpp>
-namespace dmlc {
-namespace parameter {
-
-// specialize define for Layer Parameter
-template<>
-class FieldEntry<caffe::LayerParameter>
-    : public FieldEntryBase<FieldEntry<caffe::LayerParameter>, caffe::LayerParameter> {
- public:
-  // parent class
-  typedef FieldEntryBase<FieldEntry<caffe::LayerParameter>, caffe::LayerParameter> Parent;
-
-
-  bool ReadProtoFromTextContent(const std::string& text,
-                                ::google::protobuf::Message* proto) const {
-    bool success = google::protobuf::TextFormat::ParseFromString(text, proto);
-    return success;
-  }
-
-  /**
-   * /brief Customize set method for LayerParameter
-   * /tparam value string of caffe's layer configuration
-   * */
-  virtual void Set(void *head, const std::string &value) const {
-    caffe::NetParameter net_param;
-    if (!ReadProtoFromTextContent(value, &net_param))
-      CHECK(false)<< "Caffe Net Prototxt: " << value << "Initialized Failed";
-
-    CHECK_EQ(net_param.layer_size(), 1) << "Prototxt" << value <<" more than a layer";
-    caffe::LayerParameter *layer_param = new caffe::LayerParameter(net_param.layer(0));
-    this->Get(head) = (*layer_param);
-  }
-
-  virtual void PrintValue(std::ostream &os, caffe::LayerParameter value) const { // NOLINT(*)
-  }
-
-  virtual void PrintDefaultValueString(std::ostream &os) const {  // NOLINT(*)
-    std::string s;
-    caffe::NetParameter np;
-    // Avoid wasting time making a copy -- just push in out default object's pointer
-    np.mutable_layer()->AddAllocated(const_cast<::caffe::LayerParameter *>(&default_value_));
-    google::protobuf::TextFormat::PrintToString(np, &s);
-    np.mutable_layer()->ReleaseLast();
-    os << '\'' << s << '\'';
-  }
-
-  // override set_default
-  inline FieldEntry<caffe::LayerParameter> &set_default(const std::string &value) {
-    caffe::NetParameter net_param;
-    if (!ReadProtoFromTextContent(value, &net_param))
-      CHECK(false)<< "Caffe Net Prototxt: " << value << "Initialized Failed";
-
-    CHECK_EQ(net_param.layer_size(), 1) << "Protoxt " << value <<" is more than one layer";
-    default_value_ = caffe::LayerParameter(net_param.layer(0));
-    has_default_ = true;
-    // return self to allow chaining
-    return this->self();
-  }
-};
-
-}  // namespace parameter
-}  // namespace dmlc
-
-#endif  // PLUGIN_CAFFE_CAFFE_FIELDENTRY_H_
diff --git a/plugin/caffe/caffe_loss-inl.h b/plugin/caffe/caffe_loss-inl.h
deleted file mode 100644
index 98c714612dca..000000000000
--- a/plugin/caffe/caffe_loss-inl.h
+++ /dev/null
@@ -1,303 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * Copyright (c) 2016 by Contributors
- * \file caffe_loss-inl.h
- * \brief Caffe Operator
- * \author Haoran Wang
-*/
-#ifndef PLUGIN_CAFFE_CAFFE_LOSS_INL_H_
-#define PLUGIN_CAFFE_CAFFE_LOSS_INL_H_
-
-#include <caffe/proto/caffe.pb.h>
-#include <dmlc/logging.h>
-#include <dmlc/parameter.h>
-#include <mxnet/operator.h>
-
-#include <map>
-#include <vector>
-#include <string>
-#include <utility>
-
-#include "../../src/operator/operator_common.h"
-#include "caffe_common.h"
-#include "caffe_stream.h"
-#include "caffe_fieldentry.h"
-#include "caffe_blob.h"
-
-namespace mxnet {
-namespace op {
-
-struct CaffeLossParam : public dmlc::Parameter<CaffeLossParam> {
-  ::caffe::LayerParameter prototxt;
-  int num_data, num_out;
-  float grad_scale;
-
-  DMLC_DECLARE_PARAMETER(CaffeLossParam) {
-    DMLC_DECLARE_FIELD(prototxt).set_default("layer{}")
-    .describe("Caffe's layer parameter");
-    DMLC_DECLARE_FIELD(num_data).set_range(0, 100).set_default(2)
-    .describe("Operator input number");
-    DMLC_DECLARE_FIELD(num_out).set_range(0, 100).set_default(1)
-    .describe("Operator output number");
-    DMLC_DECLARE_FIELD(grad_scale)
-    .set_default(1.0f)
-    .describe("Scale the gradient by a float factor (a.k.a weight of this loss).");
-  }
-};
-
-/**
- * \brief this is the implementation of caffe operator in caffe.
- * \tparam xpu the device that the op will be executed on.
- */
-template<typename xpu, typename Dtype>
-class CaffeLoss : public Operator {
- public:
-  explicit CaffeLoss(CaffeLossParam p):param_(p),
-                                       setup_(false) {
-    std::string type = param_.prototxt.type();
-    caffeOp_ = caffe::LayerRegistry<Dtype>::CreateLayer(param_.prototxt);
-    grad_scale_ = (Dtype)param_.grad_scale;
-
-    caffe::InitCaffeBlobs<Dtype>(&bot_, param_.num_data);
-    caffe::InitCaffeBlobs<Dtype>(&top_, param_.num_out);
-    flags_.resize(param_.num_data);
-  }
-
-  ~CaffeLoss() {
-    caffe::DelCaffeBlobs(&bot_, param_.num_data);
-    caffe::DelCaffeBlobs(&top_, param_.num_out);
-  }
-
-  virtual void Forward(const OpContext &ctx,
-                       const std::vector<TBlob> &in_data,
-                       const std::vector<OpReqType> &req,
-                       const std::vector<TBlob> &out_data,
-                       const std::vector<TBlob> &aux_args) {
-    // Set mode before forward
-    caffe::CaffeMode::SetMode<xpu>();
-    using ::caffe::Blob;
-    using std::vector;
-    using namespace mshadow;
-    using namespace mshadow::expr;
-    for (uint32_t i = 0; i < req.size(); ++i)
-      CHECK_EQ(req[i], kWriteTo);
-
-    CHECK_EQ(in_data.size(), param_.num_data);
-    CHECK_EQ(out_data.size(), param_.num_out);
-
-#if defined(__CUDACC__)
-    Stream<xpu> *s = ctx.get_stream<xpu>();
-    // TODO(Haoran): when need cublas handle in stream?
-    CHECK_EQ(s->blas_handle_ownership_, Stream<xpu>::OwnHandle)
-          << "Must init CuBLAS handle in stream";
-#endif  // __CUDACC__
-
-    caffe::TBlob2CaffeBlob<xpu, Dtype>(caffe::Data,
-                                      bot_.begin(),
-                                      in_data.begin(),
-                                      param_.num_data);
-    caffe::TBlob2CaffeBlob<xpu, Dtype>(caffe::Data,
-                                      top_.begin(),
-                                      out_data.begin(),
-                                      param_.num_out);
-    CaffeOpSetup();
-    if (ctx.is_train)
-      MXCAFFELAYER(caffeOp_, Dtype)->SetPhase(::caffe::TRAIN);
-    else
-      MXCAFFELAYER(caffeOp_, Dtype)->SetPhase(::caffe::TEST);
-    caffeOp_->Forward(bot_, top_);
-
-#if defined(__CUDACC__)
-    // Sync cpu data to gpu data
-    for (uint32_t i = 0; i < top_.size(); ++i)
-      top_[i]->gpu_data();
-
-    CHECK_EQ(cudaStreamSynchronize(NULL), cudaSuccess);
-#endif  // __CUDACC__
-  }
-
-  // Set up caffe op with real data
-  void CaffeOpSetup() {
-    if (!setup_) {
-      setup_ = true;
-      caffeOp_->SetUp(bot_, top_);
-    }
-  }
-
-  virtual void Backward(const OpContext &ctx,
-                        const std::vector<TBlob> &out_grad,
-                        const std::vector<TBlob> &in_data,
-                        const std::vector<TBlob> &out_data,
-                        const std::vector<OpReqType> &req,
-                        const std::vector<TBlob> &in_grad,
-                        const std::vector<TBlob> &aux_args) {
-    // Set mode before backward
-    caffe::CaffeMode::SetMode<xpu>();
-    using namespace mshadow;
-    using namespace mshadow::expr;
-    CHECK_EQ(out_grad.size(), param_.num_out);
-    for (int i = 0; i < param_.num_data; ++i)
-      CHECK(req[i] != kAddTo) << "caffe doesn't accm diff on bottom data";
-    CHECK(in_data.size() == param_.num_data);
-
-#if defined(__CUDACC__)
-    Stream<xpu> *s = ctx.get_stream<xpu>();
-    // TODO(Haoran): when need cublas handle in stream?
-    CHECK_EQ(s->blas_handle_ownership_, Stream<xpu>::OwnHandle)
-          << "Must init CuBLAS handle in stream";
-#endif  // __CUDACC__
-
-    caffe::TBlob2CaffeBlob<xpu, Dtype>(caffe::Grad,
-                                      bot_.begin(),
-                                      in_grad.begin(),
-                                      param_.num_data);
-    // Pass grad scale to caffe blob
-    MXCAFFEBLOB(top_[0], Dtype)->set_cpu_diff(&grad_scale_);
-
-    // Set BP flag
-    for (int i = 0; i < param_.num_data; ++i)
-      flags_[i] = req[i] != kNullOp;
-
-    caffeOp_->Backward(top_, flags_, bot_);
-
-#if defined(__CUDACC__)
-    // Sync cpu diff to gpu diff
-    for (uint32_t i = 0; i < bot_.size(); ++i)
-      bot_[i]->gpu_diff();
-
-    CHECK_EQ(cudaStreamSynchronize(NULL), cudaSuccess);
-#endif  // __CUDACC__
-  }
-
- private:
-  CaffeLossParam param_;
-  ::caffe::Layer<Dtype> *caffeOp_;
-  Dtype grad_scale_;
-  std::vector< ::caffe::Blob<Dtype> *> bot_, top_;
-  std::vector<bool> flags_;
-  bool setup_;
-};  // class CaffeLoss
-
-// Decalre Factory function, used for dispatch specialization
-template<typename xpu>
-Operator* CreateOp(CaffeLossParam param, int);
-
-#if DMLC_USE_CXX11
-class CaffeLossProp : public OperatorProperty {
- public:
-  std::vector<std::string> ListArguments() const override {
-    return {"data", "label"};
-  }
-
-  void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
-    param_.Init(kwargs);
-    CHECK_EQ(param_.num_out, 1);
-    CHECK_EQ(param_.num_data, 2);
-
-    // Fetch grad_scale from prototxt
-    if ((param_.prototxt.loss_weight_size() > 0))
-      param_.grad_scale = param_.prototxt.loss_weight(0);
-  }
-
-  std::map<std::string, std::string> GetParams() const override {
-    return param_.__DICT__();
-  }
-
-  /*brief Set up caffeop to infer output shape*/
-  bool InferShape(mxnet::ShapeVector *in_shape,
-                  mxnet::ShapeVector *out_shape,
-                  mxnet::ShapeVector *aux_shape) const override {
-    using namespace mshadow;
-    using ::caffe::Blob;
-    using std::vector;
-    if (caffeOp_ == NULL)
-      caffeOp_ = caffe::LayerRegistry<float>::CreateLayer(param_.prototxt);
-
-    CHECK_GE(in_shape->size(), param_.num_data);
-    // Initialize empty bottom & top blobs for caffeOp setup
-    vector<Blob<float> *> bot_blobs, top_blobs;
-
-    for (int i = 0; i < param_.num_data; ++i) {
-      mxnet::TShape tshape = (*in_shape)[i];
-      if (tshape.ndim() == 0) return false;
-      auto blob_ptr = new Blob<float>();
-      blob_ptr->Reshape(caffe::TShape2Vector(tshape));
-      bot_blobs.push_back(blob_ptr);
-    }
-
-    for (int i = 0; i < param_.num_out; ++i)
-      top_blobs.push_back(new Blob<float>());
-
-    caffeOp_->SetUp(bot_blobs, top_blobs);
-    CHECK_EQ(in_shape->size(), caffeOp_->blobs().size() + param_.num_data);
-    // Initialize out shapes
-    out_shape->clear();
-    for (auto blob : top_blobs) {
-      mxnet::TShape tshape = caffe::Vector2TShape(blob->shape());
-      out_shape->push_back(tshape);
-    }
-
-    for (auto blob_ptr : bot_blobs)
-      delete blob_ptr;
-    for (auto blob_ptr : top_blobs)
-      delete blob_ptr;
-
-    return true;
-  }
-
-  OperatorProperty* Copy() const override {
-    auto copy_prop = new CaffeLossProp();
-    copy_prop->param_ = this->param_;
-    return copy_prop;
-  }
-
-  std::string TypeString() const override {
-    return "CaffeLoss";
-  }
-
-  std::vector<int> DeclareBackwardDependency(
-    const std::vector<int> &out_grad,
-    const std::vector<int> &in_data,
-    const std::vector<int> &out_data) const override {
-    std::vector<int> dep;
-    dep.insert(dep.end(), in_data.begin(), in_data.end());
-    dep.insert(dep.end(), out_data.begin(), out_data.end());
-    return dep;
-  }
-
-  Operator* CreateOperator(Context ctx) const override {
-    LOG(FATAL) << "Not Implemented.";
-    return NULL;
-  }
-
-  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
-                             std::vector<int> *in_type) const override;
-
-
- private:
-  mutable CaffeLossParam param_;
-  mutable ::caffe::Layer<float> *caffeOp_;
-};  // class CaffeLossSymbol
-#endif
-
-}  // namespace op
-}  // namespace mxnet
-#endif  // PLUGIN_CAFFE_CAFFE_LOSS_INL_H_
diff --git a/plugin/caffe/caffe_loss.cc b/plugin/caffe/caffe_loss.cc
deleted file mode 100644
index c2d1c1b9bab9..000000000000
--- a/plugin/caffe/caffe_loss.cc
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * Copyright (c) 2016 by Contributors
- * \file caffe_loss.cc
- * \brief caffe loss
- * \author Haoran Wang
-*/
-#include "./caffe_loss-inl.h"
-
-namespace mxnet {
-namespace op {
-template<>
-Operator *CreateOp<cpu>(CaffeLossParam param, int dtype) {
-  Operator *op = NULL;
-  switch (dtype) {
-  case mshadow::kFloat32:
-    op = new CaffeLoss<cpu, float>(param);
-    break;
-  case mshadow::kFloat64:
-    op = new CaffeLoss<cpu, double>(param);
-    break;
-  case mshadow::kFloat16:
-    LOG(FATAL) << "float16 layer is not supported by caffe";
-    break;
-  case mshadow::kBfloat16:
-    LOG(FATAL) << "bfloat16 layer is not supported by caffe";
-    return;
-  default:
-    LOG(FATAL) << "Unsupported type " << dtype;
-  }
-  return op;
-}
-
-// DO_BIND_DISPATCH comes from static_operator_common.h
-Operator *CaffeLossProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
-                                     std::vector<int> *in_type) const {
-  std::vector<int> out_type, aux_type;
-  mxnet::ShapeVector out_shape, aux_shape;
-  out_type.resize(this->ListOutputs().size());
-  out_shape.resize(this->ListOutputs().size());
-  aux_type.resize(this->ListAuxiliaryStates().size());
-  aux_shape.resize(this->ListAuxiliaryStates().size());
-  CHECK(InferType(in_type, &out_type, &aux_type));
-  CHECK(InferShape(in_shape, &out_shape, &aux_shape));
-  DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
-}
-
-DMLC_REGISTER_PARAMETER(CaffeLossParam);
-
-MXNET_REGISTER_OP_PROPERTY(CaffeLoss, CaffeLossProp)
-.describe("Caffe loss layer")
-.add_arguments(CaffeLossParam::__FIELDS__());
-
-}  // namespace op
-}  // namespace mxnet
diff --git a/plugin/caffe/caffe_loss.cu b/plugin/caffe/caffe_loss.cu
deleted file mode 100644
index ff81e1c1ffa6..000000000000
--- a/plugin/caffe/caffe_loss.cu
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * Copyright (c) 2016 by Contributors
- * \file caffe_loss_gpu.cc
- * \brief caffe loss
- * \author Haoran Wang
-*/
-#include "./caffe_loss-inl.h"
-
-namespace mxnet {
-namespace op {
-template<>
-Operator* CreateOp<gpu>(CaffeLossParam param, int dtype) {
-  Operator *op = NULL;
-  switch (dtype) {
-  case mshadow::kFloat32:
-    op = new CaffeLoss<gpu, float>(param);
-    break;
-  case mshadow::kFloat64:
-    op = new CaffeLoss<gpu, double>(param);
-    break;
-  case mshadow::kFloat16:
-    LOG(FATAL) << "float16 layer is not supported by caffe";
-    break;
-  case mshadow::kBfloat16:
-    LOG(FATAL) << "bfloat16 layer is not supported by caffe";
-    break;
-  default:
-    LOG(FATAL) << "Unsupported type " << dtype;
-  }
-  return op;
-}
-
-}  // namespace op
-}  // namespace mxnet
diff --git a/plugin/caffe/caffe_op-inl.h b/plugin/caffe/caffe_op-inl.h
deleted file mode 100644
index b4ab0926199c..000000000000
--- a/plugin/caffe/caffe_op-inl.h
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * Copyright (c) 2016 by Contributors
- * \file caffe_op-inl.h
- * \brief Caffe Operator
- * \author Haoran Wang
-*/
-#ifndef PLUGIN_CAFFE_CAFFE_OP_INL_H_
-#define PLUGIN_CAFFE_CAFFE_OP_INL_H_
-
-#include <dmlc/logging.h>
-#include <dmlc/parameter.h>
-#include <mxnet/operator.h>
-#include <caffe/proto/caffe.pb.h>
-
-#include <map>
-#include <vector>
-#include <string>
-#include <utility>
-
-#include "../../src/operator/operator_common.h"
-#include "caffe_common.h"
-#include "caffe_stream.h"
-#include "caffe_fieldentry.h"
-#include "caffe_blob.h"
-
-namespace mxnet {
-namespace op {
-
-struct CaffeOpParam : public dmlc::Parameter<CaffeOpParam> {
-  ::caffe::LayerParameter prototxt;
-  int num_data, num_weight, num_out;
-
-  DMLC_DECLARE_PARAMETER(CaffeOpParam) { DMLC_DECLARE_FIELD(prototxt).set_default("layer{}")
-    .describe("Caffe's layer parameter");
-    DMLC_DECLARE_FIELD(num_data).set_default(1)
-    .describe("Operator input number");
-    DMLC_DECLARE_FIELD(num_weight).set_default(0)
-    .describe("Weight number");
-    DMLC_DECLARE_FIELD(num_out).set_default(1)
-    .describe("Operator output number");
-  }
-};
-
-
-/**
- * \brief this is the implementation of caffe operator in caffe.
- * \tparam xpu the device that the op will be executed on.
- */
-template<typename xpu, typename Dtype>
-class CaffeOp : public Operator {
- public:
-  explicit CaffeOp(CaffeOpParam p):param_(p),
-                                   init_w_(false),
-                                   init_wd_(false),
-                                   setup_(false) {
-    std::string type = param_.prototxt.type();
-    caffeOp_ = caffe::LayerRegistry<Dtype>::CreateLayer(param_.prototxt);
-
-    caffe::InitCaffeBlobs<Dtype>(&bot_, param_.num_data);
-    caffe::InitCaffeBlobs<Dtype>(&top_, param_.num_out);
-    caffe::InitCaffeBlobs<Dtype>(&wei_, param_.num_weight);
-    flags_.resize(param_.num_data);
-  }
-
-  ~CaffeOp() {
-    caffe::DelCaffeBlobs(&bot_, param_.num_data);
-    caffe::DelCaffeBlobs(&top_, param_.num_out);
-    caffe::DelCaffeBlobs(&wei_, param_.num_weight);
-  }
-
-  virtual void Forward(const OpContext &ctx,
-                       const std::vector<TBlob> &in_data,
-                       const std::vector<OpReqType> &req,
-                       const std::vector<TBlob> &out_data,
-                       const std::vector<TBlob> &aux_args) {
-    // Set mode before forward
-    caffe::CaffeMode::SetMode<xpu>();
-    using ::caffe::Blob;
-    using std::vector;
-    using namespace mshadow;
-    using namespace mshadow::expr;
-    for (uint32_t i = 0; i < req.size(); ++i)
-      CHECK_EQ(req[i], kWriteTo);
-    int expected_num_data = param_.num_weight + param_.num_data;
-    CHECK_EQ(in_data.size(), expected_num_data);
-    CHECK_EQ(out_data.size(), param_.num_out);
-
-#if defined(__CUDACC__)
-    Stream<xpu> *s = ctx.get_stream<xpu>();
-    // TODO(Haoran): when need cublas handle in stream?
-    CHECK_EQ(s->blas_handle_ownership_, Stream<xpu>::OwnHandle)
-          << "Must init CuBLAS handle in stream";
-#endif  // __CUDACC__
-
-    caffe::TBlob2CaffeBlob<xpu, Dtype>(caffe::Data,
-                                       bot_.begin(),
-                                       in_data.begin(),
-                                       param_.num_data);
-    caffe::TBlob2CaffeBlob<xpu, Dtype>(caffe::Data,
-                                       top_.begin(),
-                                       out_data.begin(),
-                                       param_.num_out);
-    CaffeOpSetup();
-    // Init caffe's weight pointer
-    if (!init_w_) {
-      init_w_ = true;
-      caffe::TBlob2CaffeBlob<xpu, Dtype>(caffe::Data,
-                                         wei_.begin(),
-                                         in_data.begin() + param_.num_data,
-                                         param_.num_weight);
-      caffe::SetOpBlobs(caffeOp_, wei_);
-    }
-    if (ctx.is_train)
-      MXCAFFELAYER(caffeOp_, Dtype)->SetPhase(::caffe::TRAIN);
-    else
-      MXCAFFELAYER(caffeOp_, Dtype)->SetPhase(::caffe::TEST);
-    caffeOp_->Forward(bot_, top_);
-
-#if defined(__CUDACC__)
-    // Sync cpu data to gpu data
-    for (uint32_t i = 0; i < top_.size(); ++i)
-      top_[i]->gpu_data();
-
-    CHECK_EQ(cudaStreamSynchronize(NULL), cudaSuccess);
-#endif  // __CUDACC__
-  }
-
-  // Set up caffe op with real data
-  void CaffeOpSetup() {
-    if (!setup_) {
-      setup_ = true;
-      caffeOp_->SetUp(bot_, top_);
-    }
-  }
-
-  virtual void Backward(const OpContext &ctx,
-                        const std::vector<TBlob> &out_grad,
-                        const std::vector<TBlob> &in_data,
-                        const std::vector<TBlob> &out_data,
-                        const std::vector<OpReqType> &req,
-                        const std::vector<TBlob> &in_grad,
-                        const std::vector<TBlob> &aux_args) {
-    // Set mode before backward
-    caffe::CaffeMode::SetMode<xpu>();
-    using namespace mshadow;
-    using namespace mshadow::expr;
-    CHECK_EQ(out_grad.size(), param_.num_out);
-    for (int i = 0; i < param_.num_data; ++i)
-      CHECK(req[i] != kAddTo) << "caffe doesn't accm diff on bottom data";
-
-    int expected_num_data = param_.num_weight + param_.num_data;
-    CHECK(in_data.size() == expected_num_data && in_grad.size() == expected_num_data);
-    CHECK_EQ(req.size(), expected_num_data);
-
-    Stream<xpu> *s = ctx.get_stream<xpu>();
-#if defined(__CUDACC__)
-    // TODO(Haoran): when need cublas handle in stream?
-    CHECK_EQ(s->blas_handle_ownership_, Stream<xpu>::OwnHandle)
-          << "Must init CuBLAS handle in stream";
-#endif  // __CUDACC__
-
-    caffe::TBlob2CaffeBlob<xpu, Dtype>(caffe::Grad,
-                                       bot_.begin(),
-                                       in_grad.begin(),
-                                       param_.num_data);
-    caffe::TBlob2CaffeBlob<xpu, Dtype>(caffe::Grad,
-                                       top_.begin(),
-                                       out_grad.begin(),
-                                       param_.num_out);
-
-    // Init caffe's gradient pointer
-    if (!init_wd_) {
-      init_wd_ = true;
-      caffe::TBlob2CaffeBlob<xpu, Dtype>(caffe::Grad,
-                                         wei_.begin(),
-                                         in_grad.begin() + param_.num_data,
-                                         param_.num_weight);
-    }
-
-    // Handle OpReqType of weights
-    for (int i = param_.num_data; i < expected_num_data; ++i)
-      HandleOpReq(s, req[i], in_grad[i]);
-
-    // Set BP flag
-    for (int i = 0; i < param_.num_data; ++i)
-      flags_[i] = req[i] != kNullOp;
-
-    caffeOp_->Backward(top_, flags_, bot_);
-
-#if defined(__CUDACC__)
-    // Sync cpu diff to gpu diff
-    for (uint32_t i = 0; i < bot_.size(); ++i)
-      bot_[i]->gpu_diff();
-
-    CHECK_EQ(cudaStreamSynchronize(NULL), cudaSuccess);
-#endif  // __CUDACC__
-  }
-
-  void HandleOpReq(mshadow::Stream<xpu>*s, OpReqType req, const TBlob& in_g) {
-    if ((req == kWriteInplace) || (req == kWriteTo)) {
-      mshadow::Tensor<xpu, 2, Dtype> grad = in_g.FlatTo2D<xpu, Dtype>(s);
-      grad = 0;
-    }
-  }
-
- private:
-  CaffeOpParam param_;
-  ::caffe::Layer<Dtype> *caffeOp_;
-  std::vector< ::caffe::Blob<Dtype> *> bot_, top_, wei_;
-  std::vector<bool> flags_;
-  bool init_w_, init_wd_, setup_;
-};  // class CaffeOp
-
-// Decalre Factory function, used for dispatch specialization
-template<typename xpu>
-Operator* CreateOp(CaffeOpParam param, int);
-
-#if DMLC_USE_CXX11
-class CaffeOpProp : public OperatorProperty {
- public:
-  std::vector<std::string> ListArguments() const override {
-    std::vector<std::string> res;
-    for (int i = 0; i < param_.num_data; ++i)
-      res.push_back(std::string("data_") + std::to_string(i));
-
-    for (int i = 0; i < param_.num_weight; ++i) {
-      if (i == 0)
-        res.push_back(std::to_string(i) + "_weight");
-      else
-        res.push_back(std::to_string(i) + "_bias");
-    }
-    return res;
-  }
-
-  std::vector<std::string> ListOutputs() const override {
-    if (param_.num_out > 1) {
-      std::vector<std::string> ret;
-      for (int i = 0; i < param_.num_out; ++i)
-        ret.push_back("output" + std::to_string(i));
-      return ret;
-    } else {
-      return {"output"};
-    }
-  }
-
-  void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
-    param_.Init(kwargs);
-  }
-
-  std::map<std::string, std::string> GetParams() const override {
-    return param_.__DICT__();
-  }
-
-  /*
-   * \brief Set up caffeOp_ to infer weights & output shape
-   * \brief Initialize param_'s in & out dims
-   */
-  bool InferShape(mxnet::ShapeVector *in_shape,
-                  mxnet::ShapeVector *out_shape,
-                  mxnet::ShapeVector *aux_shape) const override {
-    if (caffeOp_ == NULL)
-      caffeOp_ = caffe::LayerRegistry<float>::CreateLayer(param_.prototxt);
-    using namespace mshadow;
-    using ::caffe::Blob;
-    using std::vector;
-    CHECK_GE(in_shape->size(), param_.num_data);
-    // Initialize emtryp bottom & top blobs for caffeop
-    vector<Blob<float> *> bot_blobs, top_blobs;
-
-    for (int i = 0; i < param_.num_data; ++i) {
-      mxnet::TShape tshape = (*in_shape)[i];
-      if (tshape.ndim() == 0) return false;
-      auto blob_ptr = new Blob<float>();
-      blob_ptr->Reshape(caffe::TShape2Vector(tshape));
-      bot_blobs.push_back(blob_ptr);
-    }
-
-    for (int i = 0; i < param_.num_out; ++i)
-      top_blobs.push_back(new Blob<float>());
-
-    caffeOp_->SetUp(bot_blobs, top_blobs);
-    CHECK_EQ(in_shape->size(), caffeOp_->blobs().size() + param_.num_data);
-    // Set weight shape
-    CHECK_EQ(param_.num_weight, caffeOp_->blobs().size());
-    for (int i = 0; i < param_.num_weight ; ++i) {
-      mxnet::TShape tshape = caffe::Vector2mxnet::TShape(caffeOp_->blobs()[i]->shape());
-      SHAPE_ASSIGN_CHECK(*in_shape, i + param_.num_data, tshape);
-    }
-    // Initialize out shapes
-    out_shape->clear();
-    for (auto blob : top_blobs) {
-      mxnet::TShape tshape = caffe::Vector2mxnet::TShape(blob->shape());
-      out_shape->push_back(tshape);
-    }
-
-    for (auto blob_ptr : bot_blobs)
-      delete blob_ptr;
-    for (auto blob_ptr : top_blobs)
-      delete blob_ptr;
-    return true;
-  }
-
-  OperatorProperty* Copy() const override {
-    auto copy_prop = new CaffeOpProp();
-    copy_prop->param_ = this->param_;
-    return copy_prop;
-  }
-
-  std::string TypeString() const override {
-    return "CaffeOp";
-  }
-
-  Operator* CreateOperator(Context ctx) const override {
-    LOG(FATAL) << "Not Implemented.";
-    return NULL;
-  }
-
-  Operator* CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
-                             std::vector<int> *in_type) const override;
-
- private:
-  mutable CaffeOpParam param_;
-  mutable ::caffe::Layer<float> *caffeOp_;
-};  // class CaffeOpSymbol
-#endif
-
-}  // namespace op
-}  // namespace mxnet
-#endif  // PLUGIN_CAFFE_CAFFE_OP_INL_H_
diff --git a/plugin/caffe/caffe_op.cc b/plugin/caffe/caffe_op.cc
deleted file mode 100644
index db80f4a90f74..000000000000
--- a/plugin/caffe/caffe_op.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * Copyright (c) 2016 by Contributors
- * \file caffe_op.cc
- * \brief caffe operator
- * \author Haoran Wang
-*/
-#include "./caffe_op-inl.h"
-namespace mxnet {
-namespace op {
-
-template<>
-Operator* CreateOp<cpu>(CaffeOpParam param, int dtype) {
-  Operator *op = NULL;
-  switch (dtype) {
-  case mshadow::kFloat32:
-    op = new CaffeOp<cpu, float>(param);
-    break;
-  case mshadow::kFloat64:
-    op = new CaffeOp<cpu, double>(param);
-    break;
-  case mshadow::kFloat16:
-    LOG(FATAL) << "float16 layer is not supported by caffe";
-    break;
-  case mshadow::kBfloat16:
-    LOG(FATAL) << "bfloat16 layer is not supported by caffe";
-    break;
-  default:
-    LOG(FATAL) << "Unsupported type " << dtype;
-  }
-  return op;
-}
-
-// DO_BIND_DISPATCH comes from static_operator_common.h
-Operator *CaffeOpProp::CreateOperatorEx(Context ctx, mxnet::ShapeVector *in_shape,
-                                     std::vector<int> *in_type) const {
-  std::vector<int> out_type, aux_type;
-  mxnet::ShapeVector out_shape, aux_shape;
-  out_type.resize(this->ListOutputs().size());
-  out_shape.resize(this->ListOutputs().size());
-  aux_type.resize(this->ListAuxiliaryStates().size());
-  aux_shape.resize(this->ListAuxiliaryStates().size());
-  CHECK(InferType(in_type, &out_type, &aux_type));
-  CHECK(InferShape(in_shape, &out_shape, &aux_shape));
-  DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
-}
-
-DMLC_REGISTER_PARAMETER(CaffeOpParam);
-
-MXNET_REGISTER_OP_PROPERTY(CaffeOp, CaffeOpProp)
-.describe("Apply caffe operator")
-.add_argument("data", "Symbol[]", "List of tensors")
-.add_arguments(CaffeOpParam::__FIELDS__());
-
-}  // namespace op
-}  // namespace mxnet
diff --git a/plugin/caffe/caffe_op.cu b/plugin/caffe/caffe_op.cu
deleted file mode 100644
index 7d4017b33ad5..000000000000
--- a/plugin/caffe/caffe_op.cu
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * Copyright (c) 2016 by Contributors
- * \file caffe_operator_gpu.cc
- * \brief caffe operator
- * \author Haoran Wang
-*/
-#include "./caffe_op-inl.h"
-namespace mxnet {
-namespace op {
-
-template<>
-Operator *CreateOp<gpu>(CaffeOpParam param, int dtype) {
-  Operator *op = NULL;
-  switch (dtype) {
-  case mshadow::kFloat32:
-    op = new CaffeOp<gpu, float>(param);
-    break;
-  case mshadow::kFloat64:
-    op = new CaffeOp<gpu, double>(param);
-    break;
-  case mshadow::kFloat16:
-    LOG(FATAL) << "float16 layer is not supported by caffe";
-    break;
-  case mshadow::kBfloat16:
-    LOG(FATAL) << "bfloat16 layer is not supported by caffe";
-    break;
-  default:
-    LOG(FATAL) << "Unsupported type " << dtype;
-  }
-  return op;
-}
-
-}  // namespace op
-}  // namespace mxnet
diff --git a/plugin/caffe/caffe_stream.cc b/plugin/caffe/caffe_stream.cc
deleted file mode 100644
index 823948a8aa2f..000000000000
--- a/plugin/caffe/caffe_stream.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * Copyright (c) 2016 by Contributors
- * \file caffe_stream.cc
- * \brief define stream opertors >> and <<
- * \author Haoran Wang
-*/
-#include"caffe_stream.h"
-
-namespace dmlc {
-namespace parameter {
-  std::istringstream &operator>>(std::istringstream &is, ::caffe::LayerParameter &para_) {
-    return is;
-  }
-  std::ostream &operator<<(std::ostream &os, ::caffe::LayerParameter &para_) {
-    return os;
-  }
-}
-}
diff --git a/plugin/caffe/caffe_stream.h b/plugin/caffe/caffe_stream.h
deleted file mode 100644
index 228e3727daed..000000000000
--- a/plugin/caffe/caffe_stream.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * Copyright (c) 2016 by Contributors
- * \file caffe_stream.h
- * \brief define stream opertors >> and <<
- * \author Haoran Wang
-*/
-#ifndef PLUGIN_CAFFE_CAFFE_STREAM_H_
-#define PLUGIN_CAFFE_CAFFE_STREAM_H_
-
-#include<caffe/proto/caffe.pb.h>
-#include<iostream>
-namespace dmlc {
-namespace parameter {
-  std::istringstream &operator>>(std::istringstream &is, ::caffe::LayerParameter &para_);
-  std::ostream &operator<<(std::ostream &os, ::caffe::LayerParameter &para_);
-}
-}
-
-#endif  // PLUGIN_CAFFE_CAFFE_STREAM_H_
diff --git a/python/mxnet/gluon/metric.py b/python/mxnet/gluon/metric.py
index 5b081ceac4d8..08ee1411824c 100644
--- a/python/mxnet/gluon/metric.py
+++ b/python/mxnet/gluon/metric.py
@@ -1804,15 +1804,6 @@ def __init__(self, name='torch',
             name, output_names=output_names, label_names=label_names)
 
 
-@register
-class Caffe(Loss):
-    """Dummy metric for caffe criterions."""
-    def __init__(self, name='caffe',
-                 output_names=None, label_names=None):
-        super(Caffe, self).__init__(
-            name, output_names=output_names, label_names=label_names)
-
-
 @register
 @use_np
 class CustomMetric(EvalMetric):
diff --git a/python/mxnet/runtime.py b/python/mxnet/runtime.py
index 27500e7eb772..28525ae65edf 100644
--- a/python/mxnet/runtime.py
+++ b/python/mxnet/runtime.py
@@ -40,8 +40,7 @@
     [✖ CUDA, ✖ CUDNN, ✖ NCCL, ✖ CUDA_RTC, ✖ TENSORRT, ✔ CPU_SSE, ✔ CPU_SSE2, ✔ CPU_SSE3,
     ✔ CPU_SSE4_1, ✔ CPU_SSE4_2, ✖ CPU_SSE4A, ✔ CPU_AVX, ✖ CPU_AVX2, ✔ OPENMP, ✖ SSE,
     ✔ F16C, ✔ JEMALLOC, ✔ BLAS_OPEN, ✖ BLAS_ATLAS, ✖ BLAS_MKL, ✖ BLAS_APPLE, ✔ LAPACK,
-    ✖ MKLDNN, ✔ OPENCV, ✖ CAFFE, ✖ DIST_KVSTORE, ✖ CXX14, ✖ INT64_TENSOR_SIZE,
-    ✔ SIGNAL_HANDLER, ✔ DEBUG, ✖ TVM_OP]
+    ✖ MKLDNN, ✔ OPENCV, ✖ DIST_KVSTORE, ✖ INT64_TENSOR_SIZE, ✔ SIGNAL_HANDLER, ✔ DEBUG, ✖ TVM_OP]
 
 
 """
diff --git a/src/libinfo.cc b/src/libinfo.cc
index 211444e857d2..d14aaf5769b2 100644
--- a/src/libinfo.cc
+++ b/src/libinfo.cc
@@ -84,7 +84,6 @@ class FeatureSet {
     feature_bits.set(OPENCV, MXNET_USE_OPENCV);
 
     // Misc
-    feature_bits.set(CAFFE, MXNET_USE_CAFFE);
     feature_bits.set(DIST_KVSTORE, MXNET_USE_DIST_KVSTORE);
     feature_bits.set(INT64_TENSOR_SIZE, MXNET_USE_INT64_TENSOR_SIZE);
     feature_bits.set(SIGNAL_HANDLER, MXNET_USE_SIGNAL_HANDLER);
@@ -155,9 +154,7 @@ const std::vector<std::string> EnumNames::names = {
   "LAPACK",
   "MKLDNN",
   "OPENCV",
-  "CAFFE",
   "DIST_KVSTORE",
-  "CXX14",
   "INT64_TENSOR_SIZE",
   "SIGNAL_HANDLER",
   "DEBUG",
diff --git a/tests/jenkins/run_test.sh b/tests/jenkins/run_test.sh
deleted file mode 100755
index 59516d46bcd0..000000000000
--- a/tests/jenkins/run_test.sh
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-
-# Exit script with error if any errors occur
-
-echo "BUILD make"
-cp make/config.mk .
-echo "USE_CUDA=1" >> config.mk
-echo "USE_CUDA_PATH=/usr/local/cuda" >> config.mk
-echo "USE_CUDNN=1" >> config.mk
-echo "DEV=1" >> config.mk
-echo "EXTRA_OPERATORS=example/ssd/operator" >> config.mk
-echo "USE_CPP_PACKAGE=1" >> config.mk
-
-set -e
-
-make -j$(nproc) || exit -1
-
-echo "BUILD cpp_test"
-make -j$(nproc) test || exit -1
-export MXNET_ENGINE_INFO=true
-./build/tests/cpp/mxnet_test
-
-export MXNET_ENGINE_INFO=false
-export PYTHONPATH=$(pwd)/python
-
-echo "BUILD python_test"
-pytest --verbose tests/python/unittest || exit -1
-pytest --verbose tests/python/gpu/test_operator_gpu.py || exit -1
-pytest --verbose tests/python/train || exit -1
-
-echo "BUILD scala_test"
-export PATH=$PATH:/opt/apache-maven/bin
-cd scala-package
-mvn install || exit -1
-
-# echo "BUILD julia_test"
-# export MXNET_HOME="${PWD}"
-# /home/ubuntu/julia/bin/julia -e 'try Pkg.clone("MXNet"); catch end; Pkg.checkout("MXNet"); Pkg.build("MXNet"); Pkg.test("MXNet")' || exit -1
diff --git a/tests/jenkins/run_test_amzn_linux_gpu.sh b/tests/jenkins/run_test_amzn_linux_gpu.sh
deleted file mode 100755
index a257b9684ba0..000000000000
--- a/tests/jenkins/run_test_amzn_linux_gpu.sh
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-
-# Exit script with error if any errors occur
-
-echo "BUILD make"
-cp make/config.mk .
-echo "USE_CUDA=0" >> config.mk
-echo "USE_CUDNN=0" >> config.mk
-echo "USE_BLAS=openblas" >> config.mk
-echo "USE_CPP_PACKAGE=1" >> config.mk
-echo "ADD_CFLAGS += -I/usr/include/openblas" >>config.mk
-echo "GTEST_PATH=/usr/local/gtest" >> config.mk
-echo 'export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH' >> ~/.profile
-echo 'export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH' >> ~/.profile
-JAVA_HOME=`/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.*.amzn1.x86_64[-1]`
-echo 'export JAVA_HOME=${JAVA_HOME}' >> ~/.profile
-echo 'export JRE_HOME=${JAVA_HOME}/jre' >> ~/.profile
-echo 'export PATH=$PATH:/apache-maven-3.3.9/bin/:/usr/bin:${JAVA_HOME}/bin' >> ~/.profile
-source ~/.profile
-user=`id -u -n`
-
-set -e
-
-make -j 4
-
-echo "BUILD cpp_test"
-make -j 4 test
-export MXNET_ENGINE_INFO=true
-./build/tests/cpp/mxnet_test
-
-echo "BUILD valgrind_test"
-valgrind ./build/tests/cpp/mxnet_test
-
-export MXNET_ENGINE_INFO=false
-export PYTHONPATH=${PWD}/python
-
-echo "BUILD python_test"
-pytest --verbose tests/python/unittest
-pytest --verbose tests/python/train
-
-#echo "BUILD julia_test"
-#export MXNET_HOME="${PWD}"
-#julia -e 'try Pkg.clone("MXNet"); catch end; Pkg.checkout("MXNet"); Pkg.build("MXNet"); Pkg.test("MXNet")' || exit -1
-
-echo "BUILD scala_test"
-cd scala-package
-mvn integration-test
diff --git a/tests/jenkins/run_test_ubuntu.sh b/tests/jenkins/run_test_ubuntu.sh
deleted file mode 100755
index 835bce9aeef7..000000000000
--- a/tests/jenkins/run_test_ubuntu.sh
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-
-set -e
-
-echo "BUILD make"
-
-WITH_CAFFE_PLUGIN=0
-
-if [ "$WITH_CAFFE_PLUGIN" == "1" ]; then
-# Check out caffe
-  git clone https://github.com/BVLC/caffe
-  mkdir -p caffe/build
-  cd caffe/build
-  cmake ..
-  make -j$(nproc)
-  cd ../..
-fi
-
-cp make/config.mk .
-echo "USE_CUDA=1" >> config.mk
-echo "USE_CUDA_PATH=/usr/local/cuda" >> config.mk
-echo "USE_CUDNN=1" >> config.mk
-echo "DEV=1" >> config.mk
-echo "EXTRA_OPERATORS=example/ssd/operator" >> config.mk
-echo "USE_CPP_PACKAGE=1" >> config.mk
-
-if [ "$WITH_CAFFE_PLUGIN" == "1" ]; then
-    echo "CAFFE_PATH = $(pwd)/caffe" >> config.mk
-    echo "MXNET_PLUGINS += plugin/caffe/caffe.mk" >> config.mk
-fi
-
-user=`id -u -n`
-
-make -j$(nproc)
-
-export PYTHONPATH=${PWD}/python
-
-echo "BUILD python_test"
-pytest --verbose tests/python/unittest || exit 1
-pytest --verbose tests/python/gpu/test_operator_gpu.py || exit 1
-pytest --verbose tests/python/train || exit 1
-
-echo "BUILD scala_test"
-export PATH=$PATH:/opt/apache-maven/bin
-cd scala-package
-mvn integration-test || exit 1
-
diff --git a/tests/python/unittest/test_runtime.py b/tests/python/unittest/test_runtime.py
index d7017cfbfbb2..f1811554ba2d 100644
--- a/tests/python/unittest/test_runtime.py
+++ b/tests/python/unittest/test_runtime.py
@@ -26,7 +26,7 @@ def test_features():
     features = Features()
     print(features)
     assert 'CUDA' in features
-    assert len(features) >= 30
+    assert len(features) >= 20
 
 
 def test_is_singleton():