From 42fd939fdf30b7b613b727b806dbb5755208e4f0 Mon Sep 17 00:00:00 2001
From: Sam Skalicky <samskalicky@gmail.com>
Date: Thu, 23 Jul 2020 17:37:58 +0000
Subject: [PATCH 01/25] initial commit

---
 include/mxnet/lib_api.h |  3 +-
 src/c_api/c_api.cc      | 84 +++++++++++++++++++++++++++++++----------
 2 files changed, 66 insertions(+), 21 deletions(-)
diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h
index c8ba712a9ec4..1eb7ccea7f09 100644
--- a/include/mxnet/lib_api.h
+++ b/include/mxnet/lib_api.h
@@ -49,7 +49,7 @@
 #endif
 
 /* Make sure to update the version number everytime you make changes */
-#define MX_LIBRARY_VERSION 7
+#define MX_LIBRARY_VERSION 8
 
 /*!
  * \brief For loading multiple custom op libraries in Linux, exporting same symbol multiple
@@ -561,6 +561,7 @@ class OpResource {
 #define MX_STR_SUBGRAPH_SYM_JSON "subgraph_sym_json"
 #define MX_STR_DTYPE "__ext_dtype__"
 #define MX_STR_SHAPE "__ext_shape__"
+#define MX_STR_EXTRA_INPUTS "__ext_extra_inputs__"
 
 /* \brief get shape value from list of shapes string
  *
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index 53ff1e41c7f6..776926330546 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -469,9 +469,27 @@ void registerOperators(void *lib, int verbose) {
                            &num_in, &num_out))
       << "Error calling ParseAttrs::num_inputs for custom operator '" << name_str << "'";
 
-      return num_in;
+      // get extra inputs, if exists
+      int extra_inputs = 0;
+      if (attrs.dict.count(MX_STR_EXTRA_INPUTS) > 0)
+        extra_inputs = std::stoi(attrs.dict[MX_STR_EXTRA_INPUTS]);
+      
+      return num_in + extra_inputs;
     };
 
+    // lambda function to call parse attributes and return the number of inputs for subgraph ops
+    auto num_subgraph_inputs = [=](const NodeAttrs& attrs) {
+      // get number of inputs for subgraph
+      int num_in = DefaultSubgraphOpNumInputs(attrs);
+
+      // get extra inputs, if exists
+      int extra_inputs = 0;
+      if (attrs.dict.count(MX_STR_EXTRA_INPUTS) > 0)
+        extra_inputs = std::stoi(attrs.dict[MX_STR_EXTRA_INPUTS]);
+
+      return num_in + extra_inputs;
+    }
+    
     // lambda function to call parse attributes and return the number of outputs
     auto num_outputs = [=](const NodeAttrs& attrs) {
       // convert attributes to vector of char*
@@ -506,7 +524,13 @@ void registerOperators(void *lib, int verbose) {
                            &num_in, &num_out))
       << "Error calling ParseAttrs::num_outputs for custom operator '" << name_str << "'";
       // for backward passes, inputs + outputs + input gradients (one for each output)
-      return num_in + 2 * num_out;
+
+      // get extra inputs, if exists
+      int extra_inputs = 0;
+      if (attrs.dict.count(MX_STR_EXTRA_INPUTS) > 0)
+        extra_inputs = std::stoi(attrs.dict[MX_STR_EXTRA_INPUTS]);
+
+      return num_in + extra_inputs + 2 * num_out;
     };
 
     // lambda function to call infer shape
@@ -520,17 +544,24 @@ void registerOperators(void *lib, int verbose) {
         attr_vals.push_back(kv.second.c_str());
       }
 
-      std::vector<uint32_t*> inshapes(in_shape->size());
-      std::vector<int> indims(in_shape->size());
+      // get extra inputs, if exists
+      int extra_inputs = 0;
+      if (attrs.dict.count(MX_STR_EXTRA_INPUTS) > 0)
+        extra_inputs = std::stoi(attrs.dict[MX_STR_EXTRA_INPUTS]);
+      int num_inputs = in_shape->size() - extra_inputs;
+      
+      std::vector<uint32_t*> inshapes(num_inputs);
+      std::vector<int> indims(num_inputs);
 
       // determine amount of memory needed to store all the input shapes
       size_t buff_size = 0;
-      for (const auto& i : *in_shape) buff_size += i.ndim();
+      for (size_t i = 0; i < num_inputs; ++i)
+        buff_size += (*in_shape)[i].ndim();
 
       // copy input shapes from ShapeVector to raw memory layout
       std::vector<uint32_t> inbuff(buff_size);
       uint32_t *ptr = inbuff.data();
-      for (size_t i = 0; i < in_shape->size(); ++i) {
+      for (size_t i = 0; i < num_inputs; ++i) {
         inshapes[i] = ptr;
         indims[i] = (*in_shape)[i].ndim();
         for (int j = 0; j < (*in_shape)[i].ndim(); ++j, ++ptr) {
@@ -546,22 +577,22 @@ void registerOperators(void *lib, int verbose) {
       int* outdims = nullptr;
 
       CHECK(callInferShape(shape_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
-                           inshapes.data(), indims.data(), in_shape->size(),
+                           inshapes.data(), indims.data(), num_inputs,
                            &mod_inshapes, &mod_indims,
                            &outshapes, &outdims, out_shape->size()))
       << "Error calling InferShape for custom operator '" << name_str << "'";
 
-      std::vector<uint32_t*> in_shapes(in_shape->size());
+      std::vector<uint32_t*> in_shapes(num_inputs);
       // determine amount of memory needed to store all the modified input shapes
       buff_size = 0;
-      for (unsigned i = 0; i < in_shape->size(); i++) {
+      for (unsigned i = 0; i < num_inputs; i++) {
         buff_size += mod_indims[i];
       }
 
       // copy modified input shapes from custom op memory to MXNet memory
       std::vector<uint32_t> mod_inbuff(buff_size);
       ptr = mod_inbuff.data();
-      for (unsigned i = 0; i < in_shape->size(); ++i) {
+      for (unsigned i = 0; i < num_inputs; ++i) {
         in_shapes[i] = ptr;
         for (int j = 0; j < mod_indims[i]; ++j, ++ptr) {
           *ptr = static_cast<uint32_t>(mod_inshapes[i][j]);
@@ -569,7 +600,7 @@ void registerOperators(void *lib, int verbose) {
       }
 
       // assign modified input shapes to ShapeVector
-      for (unsigned i = 0; i < in_shape->size(); ++i) {
+      for (unsigned i = 0; i < num_inputs; ++i) {
         SHAPE_ASSIGN_CHECK(*in_shape, i,
                            mxnet::TShape(in_shapes[i], in_shapes[i]+mod_indims[i]));
       }
@@ -599,7 +630,7 @@ void registerOperators(void *lib, int verbose) {
 
       // free memory used by custom op to allocate shapes/dims
       callFree(mod_indims);
-      for (unsigned i = 0; i < in_shape->size(); i++) {
+      for (unsigned i = 0; i < num_inputs; i++) {
         callFree(mod_inshapes[i]);
       }
       callFree(mod_inshapes);
@@ -624,6 +655,12 @@ void registerOperators(void *lib, int verbose) {
         attr_vals.push_back(kv.second.c_str());
       }
 
+      // get extra inputs, if exists
+      int extra_inputs = 0;
+      if (attrs.dict.count(MX_STR_EXTRA_INPUTS) > 0)
+        extra_inputs = std::stoi(attrs.dict[MX_STR_EXTRA_INPUTS]);
+      int num_inputs = in_type->size() - extra_inputs;
+
       // copy input types from in_type
       std::vector<int> intypes(*in_type);
 
@@ -631,12 +668,12 @@ void registerOperators(void *lib, int verbose) {
       std::vector<int> outtypes(out_type->size());
 
       CHECK(callInferType(type_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
-                           intypes.data(), in_type->size(),
+                           intypes.data(), num_inputs,
                            outtypes.data(), out_type->size()))
       << "Error calling InferType for custom operator '" << name_str << "'";
 
       // copy and assign modified input types from custom op to MXNet memory
-      for (size_t i = 0; i < in_type->size(); i++) {
+      for (size_t i = 0; i < num_inputs; i++) {
         TYPE_ASSIGN_CHECK(*in_type, i, intypes[i]);
       }
       // copy and assign output types from custom op to MXNet memory
@@ -680,7 +717,7 @@ void registerOperators(void *lib, int verbose) {
                                 std::vector<int>* in_stypes,
                                 std::vector<int>* out_stypes) {
       if (stype_fp == nullptr) {
-        // InferSType is not defineid in customized lib.
+        // InferSType is not defined in customized lib.
         CHECK(mxnet::common::ContainsOnlyStorage(*in_stypes, mxnet::kDefaultStorage))
         << "Error input tensors are not dense for custom operator '" << name_str << "'";
         // set outputs as dense
@@ -694,18 +731,25 @@ void registerOperators(void *lib, int verbose) {
           attr_keys.push_back(kv.first.c_str());
           attr_vals.push_back(kv.second.c_str());
         }
+
+        // get extra inputs, if exists
+        int extra_inputs = 0;
+        if (attrs.dict.count(MX_STR_EXTRA_INPUTS) > 0)
+          extra_inputs = std::stoi(attrs.dict[MX_STR_EXTRA_INPUTS]);
+        int num_inputs = in_stypes->size() - extra_inputs;
+
         // copy input types from in_stype
         std::vector<int> instypes(*in_stypes);
 
         // output types will be populated by inferType function
         std::vector<int> outstypes(out_stypes->size());
         CHECK(callInferSType(stype_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
-                             instypes.data(), in_stypes->size(),
+                             instypes.data(), num_inputs,
                              outstypes.data(), out_stypes->size()))
         << "Error calling InferSType for custom operator '" << name_str << "'";
 
         // copy and assign modified input storage types from custom op to MXNet memory.
-        for (size_t i = 0; i < in_stypes->size(); i++) {
+        for (size_t i = 0; i < num_inputs; i++) {
           STORAGE_TYPE_ASSIGN_CHECK(*in_stypes, i, instypes[i]);
         }
         // copy and assign output storage types from custom op to MXNet memory.
@@ -835,7 +879,7 @@ void registerOperators(void *lib, int verbose) {
         regOp.set_attr<nnvm::FMutateInputs>("FMutateInputs", mutate_inputs, plevel);
     } else {
       using namespace mxnet::op;
-      regOp.set_num_inputs(DefaultSubgraphOpNumInputs);
+      regOp.set_num_inputs(num_subgraph_inputs);
       regOp.set_num_outputs(DefaultSubgraphOpNumOutputs);
       regOp.set_attr<nnvm::FInferType>("FInferType", DefaultSubgraphOpType, plevel);
       regOp.set_attr<mxnet::FInferShape>("FInferShape", DefaultSubgraphOpShape, plevel);
@@ -901,12 +945,12 @@ void registerOperators(void *lib, int verbose) {
         using namespace mxnet::op;
         auto grad_inouts = [=](const nnvm::NodeAttrs& attrs) {
           // for backward passes, inputs + outputs + input gradients (one for each output)
-          uint32_t cnt = DefaultSubgraphOpNumInputs(attrs);
+          uint32_t cnt = num_subgraph_inputs(attrs);
           cnt += 2 * DefaultSubgraphOpNumOutputs(attrs);
           return cnt;
         };
         gradOp.set_num_inputs(grad_inouts);
-        gradOp.set_num_outputs(DefaultSubgraphOpNumInputs);
+        gradOp.set_num_outputs(num_subgraph_inputs);
       }
 
       if (createop_map.size() != 0) {

From 62e0048e161021be06ce4e538c9796f270473d19 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Thu, 23 Jul 2020 19:31:55 +0000
Subject: [PATCH 02/25] first successful compile

---
 src/c_api/c_api.cc | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index 776926330546..ed66c4b2c16b 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -472,7 +472,7 @@ void registerOperators(void *lib, int verbose) {
       // get extra inputs, if exists
       int extra_inputs = 0;
       if (attrs.dict.count(MX_STR_EXTRA_INPUTS) > 0)
-        extra_inputs = std::stoi(attrs.dict[MX_STR_EXTRA_INPUTS]);
+        extra_inputs = std::stoi(attrs.dict.at(MX_STR_EXTRA_INPUTS));
       
       return num_in + extra_inputs;
     };
@@ -480,15 +480,15 @@ void registerOperators(void *lib, int verbose) {
     // lambda function to call parse attributes and return the number of inputs for subgraph ops
     auto num_subgraph_inputs = [=](const NodeAttrs& attrs) {
       // get number of inputs for subgraph
-      int num_in = DefaultSubgraphOpNumInputs(attrs);
+      int num_in = mxnet::op::DefaultSubgraphOpNumInputs(attrs);
 
       // get extra inputs, if exists
       int extra_inputs = 0;
       if (attrs.dict.count(MX_STR_EXTRA_INPUTS) > 0)
-        extra_inputs = std::stoi(attrs.dict[MX_STR_EXTRA_INPUTS]);
+        extra_inputs = std::stoi(attrs.dict.at(MX_STR_EXTRA_INPUTS));
 
       return num_in + extra_inputs;
-    }
+    };
     
     // lambda function to call parse attributes and return the number of outputs
     auto num_outputs = [=](const NodeAttrs& attrs) {
@@ -528,7 +528,7 @@ void registerOperators(void *lib, int verbose) {
       // get extra inputs, if exists
       int extra_inputs = 0;
       if (attrs.dict.count(MX_STR_EXTRA_INPUTS) > 0)
-        extra_inputs = std::stoi(attrs.dict[MX_STR_EXTRA_INPUTS]);
+        extra_inputs = std::stoi(attrs.dict.at(MX_STR_EXTRA_INPUTS));
 
       return num_in + extra_inputs + 2 * num_out;
     };
@@ -547,7 +547,7 @@ void registerOperators(void *lib, int verbose) {
       // get extra inputs, if exists
       int extra_inputs = 0;
       if (attrs.dict.count(MX_STR_EXTRA_INPUTS) > 0)
-        extra_inputs = std::stoi(attrs.dict[MX_STR_EXTRA_INPUTS]);
+        extra_inputs = std::stoi(attrs.dict.at(MX_STR_EXTRA_INPUTS));
       int num_inputs = in_shape->size() - extra_inputs;
       
       std::vector<uint32_t*> inshapes(num_inputs);
@@ -658,7 +658,7 @@ void registerOperators(void *lib, int verbose) {
       // get extra inputs, if exists
       int extra_inputs = 0;
       if (attrs.dict.count(MX_STR_EXTRA_INPUTS) > 0)
-        extra_inputs = std::stoi(attrs.dict[MX_STR_EXTRA_INPUTS]);
+        extra_inputs = std::stoi(attrs.dict.at(MX_STR_EXTRA_INPUTS));
       int num_inputs = in_type->size() - extra_inputs;
 
       // copy input types from in_type
@@ -735,7 +735,7 @@ void registerOperators(void *lib, int verbose) {
         // get extra inputs, if exists
         int extra_inputs = 0;
         if (attrs.dict.count(MX_STR_EXTRA_INPUTS) > 0)
-          extra_inputs = std::stoi(attrs.dict[MX_STR_EXTRA_INPUTS]);
+          extra_inputs = std::stoi(attrs.dict.at(MX_STR_EXTRA_INPUTS));
         int num_inputs = in_stypes->size() - extra_inputs;
 
         // copy input types from in_stype

From c56d526174f916c0e04816769e6c9b5b9afeaaea Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Fri, 24 Jul 2020 17:27:17 +0000
Subject: [PATCH 03/25] initial working example

---
 example/extensions/lib_pass/pass_lib.cc       |   5 +-
 .../extensions/lib_subgraph/subgraph_lib.cc   |  53 ++-
 .../extensions/lib_subgraph/test_subgraph.py  |  68 +--
 include/mxnet/lib_api.h                       | 393 +++++++++++++++---
 python/mxnet/gluon/block.py                   |  10 +-
 src/c_api/c_api.cc                            |  76 +++-
 6 files changed, 475 insertions(+), 130 deletions(-)

diff --git a/example/extensions/lib_pass/pass_lib.cc b/example/extensions/lib_pass/pass_lib.cc
index bbdcd73a7a0b..2e8a4a584821 100644
--- a/example/extensions/lib_pass/pass_lib.cc
+++ b/example/extensions/lib_pass/pass_lib.cc
@@ -60,8 +60,7 @@ MXReturnValue jsonPass(const std::string& in_graph, const std::string** out_grap
   MXTensor* aux_ = res.alloc_aux("test_aux",{1},MXContext::CPU(0),kFloat32);
   
   // convert json string to json object
-  JsonParser parser;
-  JsonVal json_val = parser.parse_to_json(in_graph);
+  JsonVal json_val = JsonVal::parse(in_graph);
 
   // get nodes list
   JsonVal nodes = json_val.map[JsonVal("nodes")];
@@ -86,7 +85,7 @@ MXReturnValue jsonPass(const std::string& in_graph, const std::string** out_grap
     }
   }
   
-  *out_graph = new std::string(parser.dump(json_val));
+  *out_graph = new std::string(json_val.dump());
   return MX_SUCCESS;
 }
 
diff --git a/example/extensions/lib_subgraph/subgraph_lib.cc b/example/extensions/lib_subgraph/subgraph_lib.cc
index 28442078ebe6..d53ceaf34d2f 100644
--- a/example/extensions/lib_subgraph/subgraph_lib.cc
+++ b/example/extensions/lib_subgraph/subgraph_lib.cc
@@ -57,8 +57,7 @@ MXReturnValue myExecutor(std::vector<MXTensor>* inputs,
   std::cout << subgraph_sym << std::endl;
 
   // convert json string to json object
-  JsonParser parser;
-  JsonVal json_val = parser.parse_to_json(subgraph_sym);
+  JsonVal json_val = JsonVal::parse(subgraph_sym);
   // get nodes list
   JsonVal nodes = json_val.map[JsonVal("nodes")];
   //counter for inputs
@@ -148,6 +147,9 @@ class MyStatefulOp : public CustomStatefulOp {
   MXReturnValue Forward(std::vector<MXTensor>* inputs,
                         std::vector<MXTensor>* outputs,
                         const OpResource& op_res) {
+    if(attrs_.count(MX_STR_EXTRA_INPUTS) > 0 && std::stoi(attrs_.at(MX_STR_EXTRA_INPUTS)) > 0)
+      std::cout << "forward::extra_inputs(" << attrs_.at(MX_STR_EXTRA_INPUTS) << ")::inputs ["
+		<< inputs->size() << "]" << std::endl;
     return myExecutor(inputs, outputs, subgraph_sym);
   }
 
@@ -183,8 +185,7 @@ MXReturnValue mySupportedOps(const std::string& json,
     std::cout << "option: " << kv.first << " ==> " << kv.second << std::endl;
   }
   //convert json string to json object
-  JsonParser parser;
-  JsonVal json_val = parser.parse_to_json(json);
+  JsonVal json_val = JsonVal::parse(json);
   //get nodes list
   JsonVal nodes = json_val.map[JsonVal("nodes")];
 
@@ -249,7 +250,6 @@ MXReturnValue myReviewSubgraph(const std::string& json, int subgraph_id, bool* a
   } else {
     *accept = true;
     std::cout << "accepting subgraph" << std::endl;
-    attrs->insert(std::pair<std::string,std::string>("myKey","myVal"));
   }
   return MX_SUCCESS;
 }
@@ -269,8 +269,7 @@ class MySelector : public CustomOpSelector {
                 << " ==> " << kv.second << std::endl;
     }
     //convert json string to json object
-    JsonParser parser;
-    JsonVal json_val = parser.parse_to_json(json);
+    JsonVal json_val = JsonVal::parse(json);
     //get nodes list
     nodes = json_val.map[JsonVal("nodes")];
   }
@@ -331,6 +330,46 @@ REGISTER_PARTITIONER(mySelect)
 .setCreateSelector("strategy1", createSelector)
 .setReviewSubgraph("strategy1", myReviewSubgraph);
 
+/* \brief a basic pass that adds a new input for subgraph ops */
+MXReturnValue addInputPass(const std::string& in_graph, const std::string** out_graph,
+			   const std::unordered_map<std::string, std::string>& options,
+			   const std::unordered_map<std::string, MXTensor>& args,
+			   const std::unordered_map<std::string, MXTensor>& aux,
+			   const PassResource& res) {
+  // convert graph from JSON string to Graph/Node data structure
+  Graph *g = Graph::fromString(in_graph);
+  //find node with '_custom_subgraph_op' op type
+  for(Node* n : g->nodes) {
+    if(n->op.compare("_custom_subgraph_op") == 0) {
+      //set extra input
+      n->attrs[MX_STR_EXTRA_INPUTS] = std::to_string(1);
+      
+      //create a new input Node
+      Node* input = new Node();
+      std::string input_name = n->name + "_input";
+      input->name = input_name;
+      input->op = "null";
+      //add a new node in graph
+      g->nodes.push_back(input);
+      g->inputs.push_back(input);
+      //connect new input to node
+      input->outputs.push_back({n,n->inputs.size()});
+      //connect node to new input
+      n->inputs.push_back({input,0});
+      // add a corresponding tensor for this input
+      MXTensor* arg_ = res.alloc_arg(input_name,{1},MXContext::CPU(0),kFloat32);
+    }
+  }
+
+  //convert back to JSON string from Graph/Node
+  *out_graph = new std::string(g->toString());
+  return MX_SUCCESS;
+}
+
+REGISTER_PASS(addInputPass)
+.setBody(addInputPass);
+
+
 MXReturnValue initialize(int version) {
   if (version >= 10700) {
     std::cout << "MXNet version " << version << " supported" << std::endl;
diff --git a/example/extensions/lib_subgraph/test_subgraph.py b/example/extensions/lib_subgraph/test_subgraph.py
index fa56b50515e5..b396612d3149 100644
--- a/example/extensions/lib_subgraph/test_subgraph.py
+++ b/example/extensions/lib_subgraph/test_subgraph.py
@@ -55,36 +55,13 @@ def test(backend):
     ###############################################
     #execute in MXNet
     print('-------------------------------')
-    print('Testing regular MXNet execution')
-    exe = sym.bind(ctx=mx.cpu(), args=args)
-    out = exe.forward()
+    print('Testing regular Gluon execution')
+    inputs = [a,b]
+    sym_block = nn.SymbolBlock(sym, inputs)
+    sym_block.initialize()
+    out = sym_block(mx.nd.ones((3,2)),mx.nd.ones((3,2)))
     print(out)
 
-    # with propogating shapes/types
-    print('-------------------------------')
-    print('Testing %s partitioning with shapes/types' % backend)
-    mysym2 = sym.optimize_for(backend,args)
-    print(mysym2.tojson())
-    exe2 = mysym2.bind(ctx=mx.cpu(), args=args)
-    out2 = exe2.forward()
-    print(out2)
-
-    # with propogating shapes/types, rejecting subgraph
-    print('-------------------------------')
-    print('Testing %s partitioning with shapes/types - rejecting subgraph' % backend)
-    mysym2 = sym.optimize_for(backend, args, reject=True)
-    exe2 = mysym2.bind(ctx=mx.cpu(), args=args)
-    out2 = exe2.forward()
-    print(out2)
-
-    # without propogating shapes/types
-    print('-------------------------------')
-    print('Testing %s partitioning without shapes/types' % backend)
-    mysym3 = sym.optimize_for(backend, myOpt='yello')
-    exe3 = mysym3.bind(ctx=mx.cpu(), args=args)
-    out3 = exe3.forward()
-    print(out3)
-
     # Gluon Hybridize partitioning with shapes/types
     print('-------------------------------')
     print('Testing %s Gluon Hybridize partitioning with shapes/types' % backend)
@@ -92,8 +69,8 @@ def test(backend):
     sym_block = nn.SymbolBlock(sym, inputs)
     sym_block.initialize()
     sym_block.hybridize(backend=backend)
-    out4 = sym_block(mx.nd.ones((3,2)),mx.nd.ones((3,2)))
-    print(out4)
+    out2 = sym_block(mx.nd.ones((3,2)),mx.nd.ones((3,2)))
+    print(out2)
 
     # Gluon Hybridize partitioning with shapes/types without inference
     print('-------------------------------')
@@ -104,6 +81,14 @@ def test(backend):
     sym_block2.optimize_for(mx.nd.ones((3,2)), mx.nd.ones((3,2)), backend=backend)
     sym_block2.export('partitioned')
 
+    # Test with additional input to subgraph op
+    print('-------------------------------')
+    print('Testing %s Gluon Hybridize partitioning with extra input' % backend)
+    sym_block2.optimize_for(mx.nd.ones((3,2)), mx.nd.ones((3,2)), backend="addInputPass", clear=False)
+    out3 = sym_block2(mx.nd.ones((3,2)),mx.nd.ones((3,2)))
+    print(out3)
+    
+    
     ###############################################
     # Test with subgraph directly consuming params
     ###############################################
@@ -111,27 +96,12 @@ def test(backend):
     #execute in MXNet
     print('-------------------------------')
     print('Testing regular MXNet execution')
-    exe5 = sym2.bind(ctx=mx.cpu(), args=args)
-    out5 = exe5.forward()
+    inputs = [a]
+    sym2_block = nn.SymbolBlock(sym2, inputs)
+    sym2_block.initialize()
+    out5 = sym2_block(mx.nd.ones((3,2)))
     print(out5)
 
-    # with propogating shapes/types
-    print('-------------------------------')
-    print('Testing %s partitioning with shapes/types' % backend)
-    mysym6 = sym2.optimize_for(backend, args, reqArgs=True)
-    print(mysym6.tojson())
-    exe6 = mysym6.bind(ctx=mx.cpu(), args=args)
-    out6 = exe6.forward()
-    print(out6)
-
-    # without propogating shapes/types
-    print('-------------------------------')
-    print('Testing %s partitioning without shapes/types' % backend)
-    mysym7 = sym2.optimize_for(backend, reqArgs=True)
-    exe7 = mysym7.bind(ctx=mx.cpu(), args=args)
-    out7 = exe7.forward()
-    print(out7)
-
     # Gluon Hybridize partitioning with shapes/types
     print('-------------------------------')
     print('Testing %s Gluon Hybridize partitioning with shapes/types' % backend)
diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h
index 1eb7ccea7f09..600896c84db0 100644
--- a/include/mxnet/lib_api.h
+++ b/include/mxnet/lib_api.h
@@ -37,11 +37,13 @@
 #include <string.h>
 #include <vector>
 #include <map>
+#include <unordered_set>
 #include <unordered_map>
 #include <string>
 #include <iostream>
 #include <utility>
 #include <stdexcept>
+#include <functional>
 #include <random>
 
 #if defined(__NVCC__)
@@ -639,52 +641,49 @@ struct JsonVal {
     }
     return type < o.type;
   }
-  JsonType type;
-  int num;
-  std::string str;
-  std::vector<JsonVal> list;
-  std::map<JsonVal, JsonVal> map;
-};
-
-/*! \brief functions used for parsing JSON */
-struct JsonParser {
-  JsonVal parse_to_json(const std::string& json) {
-    unsigned int idx = 0;
-    return parse(json, &idx);
-  }
-  void print_json_val(const JsonVal& val) {
-    std::cout << json_val_string(val) << std::endl;
-  }
-  // debug function to dump data structure to string
-  std::string json_val_string(const JsonVal &val) {
+  
+  // convert JSON object back to JSON-compatible string
+  std::string dump() const {
     std::string ret;
-    switch (val.type) {
+    switch (type) {
     case ERR:
       ret = "json(Error)";
       break;
     case STR:
-      ret = "json(STR:" + val.str + ")";
+      ret = "\"" + str + "\"";
       break;
     case NUM:
-      ret = "json(INT:" + val.str + ")";
+      ret = str;
       break;
     case LIST:
-      ret = "json(LIST:[";
-      for (auto &item : val.list)
-        ret += json_val_string(item) + ",";
-      ret += "])";
+      ret = "[";
+      for (unsigned i=0; i < list.size(); i++) {
+        auto &item = list[i];
+        ret += item.dump();
+        if (i < list.size()-1)
+          ret += ",";
+      }
+      ret += "]";
       break;
     case MAP:
-      ret = "json(MAP:{";
-      for (auto &item : val.map)
-        ret += json_val_string(item.first) + " : " + json_val_string(item.second) + ",";
-      ret += "})";
+      ret = "{";
+      unsigned cnt = 0;
+      for (auto &item : map) {
+        ret += item.first.dump() + " : " + item.second.dump();
+        if (cnt++ < map.size()-1)
+          ret += ",";
+      }
+      ret += "}";
       break;
     }
     return ret;
   }
+  static JsonVal parse(const std::string& json) {
+    unsigned int idx = 0;
+    return JsonVal::parse(json, &idx);
+  }
   // parse a string JSON object
-  JsonVal parse_string(const std::string& json, unsigned int* idx) {
+  static JsonVal parse_string(const std::string& json, unsigned int* idx) {
     JsonVal ret(STR);
     while (*idx < json.size()) {
       if (json[*idx] == '"') {
@@ -699,7 +698,7 @@ struct JsonParser {
     return JsonVal();
   }
   // parse a number JSON object
-  JsonVal parse_num(const std::string& json, unsigned int* idx) {
+  static JsonVal parse_num(const std::string& json, unsigned int* idx) {
     JsonVal ret(NUM);
     while (*idx < json.size()) {
       if (json[*idx] >= '0' && json[*idx] <= '9') {
@@ -713,14 +712,14 @@ struct JsonParser {
     return ret;
   }
   // parse a list of JSON objects
-  JsonVal parse_list(const std::string& json, unsigned int* idx) {
+  static JsonVal parse_list(const std::string& json, unsigned int* idx) {
     JsonVal ret(LIST);
     while (*idx < json.size()) {
       if (json[*idx] == ']') {
         ++(*idx);
         return ret;
       } else {
-        JsonVal item = parse(json, idx);
+        JsonVal item = JsonVal::parse(json, idx);
         if (item.type != ERR)
           ret.list.push_back(item);
       }
@@ -729,14 +728,14 @@ struct JsonParser {
     return JsonVal();
   }
   // parse a map of JSON objects
-  JsonVal parse_map(const std::string& json, unsigned int* idx) {
+  static JsonVal parse_map(const std::string& json, unsigned int* idx) {
     JsonVal ret(MAP), key;
     while (*idx < json.size()) {
       if (json[*idx] == '}') {
         ++(*idx);
         return ret;
       } else {
-        JsonVal item = parse(json, idx);
+        JsonVal item = JsonVal::parse(json, idx);
         if (key.type == ERR) {
           key = item;
         } else {
@@ -749,62 +748,334 @@ struct JsonParser {
     return JsonVal();
   }
   // generic parse function
-  JsonVal parse(const std::string& json, unsigned int *idx) {
+  static JsonVal parse(const std::string& json, unsigned int *idx) {
     JsonVal ret;
     while (*idx < json.size()) {
       if (json[*idx] == '"') {
         ++(*idx);
-        ret = parse_string(json, idx);
+        ret = JsonVal::parse_string(json, idx);
       } else if (json[*idx] >= '0' && json[*idx] <= '9') {
-        ret = parse_num(json, idx);
+        ret = JsonVal::parse_num(json, idx);
       } else if (json[*idx] == '[') {
         ++(*idx);
-        ret = parse_list(json, idx);
+        ret = JsonVal::parse_list(json, idx);
       } else if (json[*idx] == '{') {
         ++(*idx);
-        ret = parse_map(json, idx);
+        ret = JsonVal::parse_map(json, idx);
       } else if (json[*idx] == ']' || json[*idx] == '}') {return ret;}
       if (ret.type != ERR) return ret;
       ++(*idx);
     }
     return ret;
   }
-  // convert JSON object back to JSON-compatible string
-  std::string dump(const JsonVal &val) {
+  // debug function to convert data structure to a debugstring
+  std::string toString() const {
     std::string ret;
-    switch (val.type) {
+    switch (type) {
     case ERR:
       ret = "json(Error)";
       break;
     case STR:
-      ret = "\"" + val.str + "\"";
+      ret = "json(STR:" + str + ")";
       break;
     case NUM:
-      ret = val.str;
+      ret = "json(INT:" + str + ")";
       break;
     case LIST:
-      ret = "[";
-      for (unsigned i=0; i < val.list.size(); i++) {
-        auto &item = val.list[i];
-        ret += dump(item);
-        if (i < val.list.size()-1)
-          ret += ",";
-      }
-      ret += "]";
+      ret = "json(LIST:[";
+      for (auto &item : list)
+        ret += item.toString() + ",";
+      ret += "])";
       break;
     case MAP:
-      ret = "{";
-      unsigned cnt = 0;
-      for (auto &item : val.map) {
-        ret += dump(item.first) + " : " + dump(item.second);
-        if (cnt++ < val.map.size()-1)
-          ret += ",";
-      }
-      ret += "}";
+      ret = "json(MAP:{";
+      for (auto &item : map)
+        ret += item.first.toString() + " : " + item.second.toString() + ",";
+      ret += "})";
       break;
     }
     return ret;
   }
+  JsonType type;
+  int num;
+  std::string str;
+  std::vector<JsonVal> list;
+  std::map<JsonVal, JsonVal> map;
+};
+
+/*!
+ * \brief Graph utility to parse serialized subgraph symbol
+ */
+class Node;
+class Graph;
+
+// Representation of an input/output to a node
+struct NodeEntry {
+  Node* node; // other node thats producing/consuming inputs/outputs
+  int entry; // entry from other node (ie. which output from producing node)
+};
+
+// Representation of a node in the graph
+class Node {
+ public:
+  std::string op; // operator name (ie. Convolution)
+  std::string name; // unique node name (ie. conv_0 or conv_1)
+  std::vector<NodeEntry> inputs; // set of inputs to the node
+  std::vector<NodeEntry> outputs; // set of outputs from the node
+  std::vector<Graph*> subgraphs; // set of subgraphs within this node
+  std::unordered_map<std::string, std::string> attrs; // node attributes
+};
+
+// Representation of the graph
+class Graph {
+ public:
+  Graph() {}
+  /* \brief deleted nodes when deleting the graph */
+  ~Graph() {
+    for(int i=0; i<nodes.size(); i++)
+      delete nodes[i];
+  }
+
+  /* \brief create a graph object from an unparsed string */
+  static Graph* fromString(const std::string& json) {
+    JsonVal val = JsonVal::parse(json);
+    return fromJson(val);
+  }
+  
+  /* \brief create a graph object from a parsed JSON object */
+  static Graph* fromJson(JsonVal val) {
+    // get nodes list
+    JsonVal nodes = val.map[JsonVal("nodes")];
+    Graph *g = new Graph();
+
+    std::map<int, Node*> nodeMap;
+    // loop over nodes
+    for(int i=0; i<nodes.list.size(); i++) {
+      Node* n = new Node();
+      g->nodes.push_back(n);
+      JsonVal node = nodes.list[i];
+
+      // set the op info
+      n->op = node.map[JsonVal("op")].str;
+      n->name = node.map[JsonVal("name")].str;
+
+      // if op is null its an input to the graph
+      if(n->op.compare("null") == 0)
+        g->inputs.push_back(n);
+      
+      // set attrs
+      JsonVal attributes = node.map[JsonVal("attrs")];
+      for(auto& kv : attributes.map) {
+        n->attrs[kv.first.str] = kv.second.str;
+      }
+
+      // set subgraphs, parsing each into a graph
+      if (node.map.count(JsonVal("subgraphs")) > 0) {
+	JsonVal subgraphs = node.map[JsonVal("subgraphs")];
+	for (auto &subgraph : subgraphs.list) {
+	  n->subgraphs.push_back(fromJson(subgraph));
+	}
+      }
+
+      // set node inputs
+      JsonVal node_inputs = node.map[JsonVal("inputs")];
+      n->inputs.resize(node_inputs.list.size());
+      for(int j=0; j<node_inputs.list.size(); j++) {
+        JsonVal input = node_inputs.list[j];
+        NodeEntry& entry = n->inputs[j];
+        //get pointer to other node
+        entry.node = nodeMap[input.list[0].num];
+        //get the other node's output index
+        entry.entry = input.list[1].num;
+        //set other nodes output as connected to this node
+        entry.node->outputs.push_back({n,j});
+      }
+      nodeMap[i] = n;
+    }
+
+    // set graph level outputs
+    JsonVal& heads = val.map[JsonVal("heads")];
+    g->outputs.resize(heads.list.size());
+    for(int i=0; i<heads.list.size(); i++) {
+      JsonVal head = heads.list[i];
+      g->outputs[i].node = nodeMap[head.list[0].num];
+      g->outputs[i].entry = head.list[1].num;
+    }
+
+    // add all attributes to the graph
+    for(auto& kv : val.map) {
+      if(kv.first.str.compare("nodes") != 0 &&
+         kv.first.str.compare("heads") != 0 &&
+         kv.first.str.compare("node_row_ptr") != 0 &&
+         kv.first.str.compare("arg_nodes") != 0) {
+        g->attrs[kv.first.str] = kv.second;
+      }
+    }
+    return g;
+  }
+  
+  /* \brief convert graph object back to JSON object */
+  JsonVal toJson() {
+    // top level object is a map
+    JsonVal val(MAP);
+
+    // add attributes
+    for(auto& kv : attrs) {
+      val.map[JsonVal(kv.first)] = kv.second;
+    }
+
+    // sort graph nodes in topological order, create mapping of node to index
+    std::map<Node*, int> nodeMap;
+    std::vector<Node*> sorted = topological_sort();
+    for(int i=sorted.size()-1; i>=0; i--) {
+      nodeMap[sorted[i]] = sorted.size()-1-i;
+    }
+
+    // create node_row_ptr entry
+    val.map[JsonVal("node_row_ptr")] = JsonVal(LIST);
+    JsonVal& node_row_ptr = val.map[JsonVal("node_row_ptr")];
+    for(int i=0; i<nodes.size(); i++)
+      node_row_ptr.list.push_back(JsonVal(i));
+
+    // add all input nodes
+    val.map[JsonVal("arg_nodes")] = JsonVal(LIST);
+    JsonVal& arg_nodes = val.map[JsonVal("arg_nodes")];
+    for(int i=0; i<inputs.size(); i++)
+      arg_nodes.list.push_back(JsonVal(nodeMap[inputs[i]]));
+
+    // add all output nodes
+    val.map[JsonVal("heads")] = JsonVal(LIST);
+    JsonVal& heads = val.map[JsonVal("heads")];
+    for(int i=0; i<outputs.size(); i++) {
+      heads.list.push_back(JsonVal(LIST));
+      JsonVal& out = heads.list[i];
+      out.list.push_back(JsonVal(nodeMap[outputs[i].node]));
+      out.list.push_back(JsonVal(outputs[i].entry));
+      out.list.push_back(JsonVal(0));
+    }
+
+    // add all graph nodes
+    val.map[JsonVal("nodes")] = JsonVal(LIST);
+    JsonVal& nodes_ = val.map[JsonVal("nodes")];
+    for(int i=sorted.size()-1; i>=0; i--) {
+      // each node is a map
+      nodes_.list.push_back(JsonVal(MAP));
+      Node* n = sorted[i];
+      JsonVal& n_ = nodes_.list[nodes_.list.size()-1];
+      
+      n_.map[JsonVal("op")] = JsonVal(n->op);
+      n_.map[JsonVal("name")] = JsonVal(n->name);
+      n_.map[JsonVal("inputs")] = JsonVal(LIST);
+
+      // add inputs for this node
+      JsonVal& inputs_ = n_.map[JsonVal("inputs")];
+      for(int j=0; j<n->inputs.size(); j++) {
+        inputs_.list.push_back(JsonVal(LIST));
+        NodeEntry& entry = n->inputs[j];
+        JsonVal& in = inputs_.list[j];
+        in.list.push_back(JsonVal(nodeMap[entry.node]));
+        in.list.push_back(JsonVal(entry.entry));
+        in.list.push_back(JsonVal(0));
+      }
+
+      // add subgraphs for this node, convert each back to JSON
+      if (n->subgraphs.size() > 0) {
+	n_.map[JsonVal("subgraphs")] = JsonVal(LIST);
+	JsonVal &subgraphs_ = n_.map[JsonVal("subgraphs")];
+	for(Graph *subgraph : n->subgraphs) {
+	  subgraphs_.list.push_back(subgraph->toJson());
+	}
+      }
+
+      // add attributes for this node
+      n_.map[JsonVal("attrs")] = JsonVal(MAP);
+      JsonVal& attrs_ = n_.map[JsonVal("attrs")];
+      for(auto& kv : n->attrs) {
+        attrs_.map[JsonVal(kv.first)] = JsonVal(kv.second);
+      }
+    }
+    return val;
+  }
+
+  /* \brief convert graph object to JSON string */
+  std::string toString() {
+    return toJson().dump();
+  }
+
+  /* \brief visits a node "n" */
+  void _dfs_util(Node* n, std::unordered_set<Node*>* to_visit,
+                 std::function<void(Node*)> handler) {
+    to_visit->erase(n); // remove node now that we're visiting it
+    for(NodeEntry& e : n->outputs) {
+      Node* o = e.node;
+      if(to_visit->count(o) != 0) {
+        _dfs_util(o,to_visit,handler); // visit neighbor
+      }
+    }
+    handler(n); // post-order visit this node
+  }
+
+  /* \brief post-order DFS graph traversal */
+  void DFS(std::function<void(Node*)> handler) {
+    std::unordered_set<Node*> to_visit;
+    //put all nodes in set to visit
+    for(auto& n : nodes)
+      to_visit.insert(n);
+    //visit all inputs first
+    for(auto& i : inputs)
+      if(to_visit.count(i) != 0)
+        _dfs_util(i, &to_visit, handler);
+    //visit any nodes left
+    while(to_visit.size() > 0)
+      _dfs_util(*(to_visit.begin()), &to_visit, handler);
+  }
+
+  /* \brief sort graph nodes in topological order */
+  std::vector<Node*> topological_sort() {
+    std::vector<Node*> sorted;
+    auto handler = [&](Node* n) {
+      sorted.push_back(n); // when visiting each node, add it in order to the vector
+    };
+    DFS(handler);
+    return sorted;
+  }
+
+  /* \brief print out graph details */
+  void print(int indent=0) {
+    std::string space = "";
+    for(int i=0; i < indent; i++) space+=" ";
+    
+    std::cout << space << "########### Graph #############" << std::endl;
+    std::cout << space << "attributes: " << std::endl;
+    for (auto &kv : attrs) 
+      std::cout << space << "\t" << kv.first << " : " << kv.second.str << std::endl;
+    std::cout << space << "inputs: " << inputs.size() << std::endl;
+    std::cout << space << "outputs: " << outputs.size() << std::endl;
+    std::cout << space << "nodes: " << nodes.size() << std::endl;
+    std::vector<Node*> sorted = topological_sort();
+    // loop over each node and print out its inputs/outputs
+    for(int i=sorted.size()-1; i>=0; i--) {
+      std::cout << space << "Node: " << sorted[i]->name << std::endl;
+      for(int j=0; j<sorted[i]->inputs.size(); j++) {
+        std::cout << space << "\tInput: " << sorted[i]->inputs[j].node->name << " " << sorted[i]->inputs[j].entry << std::endl;
+      }
+      for(int j=0; j<sorted[i]->outputs.size(); j++) {
+        std::cout << space << "\tOutput: " << sorted[i]->outputs[j].node->name << " " << sorted[i]->outputs[j].entry << std::endl;
+      }
+      if(sorted[i]->subgraphs.size() > 0) {
+	for(auto &subgraph : sorted[i]->subgraphs) {
+	  std::cout << space << "\tSubgraph:" << std::endl;
+	  subgraph->print(indent+2);
+	}
+      }
+    }
+    std::cout << space << "###############################" << std::endl;
+  }  
+  
+  std::vector<Node*> nodes;
+  std::vector<Node*> inputs;
+  std::vector<NodeEntry> outputs;
+  std::map<std::string, JsonVal> attrs;
 };
 
 /* \brief An abstract class for library authors creating custom
diff --git a/python/mxnet/gluon/block.py b/python/mxnet/gluon/block.py
index d6782ba94224..8d8ae328d075 100644
--- a/python/mxnet/gluon/block.py
+++ b/python/mxnet/gluon/block.py
@@ -1132,7 +1132,7 @@ def _call_cached_op(self, *args):
             out = [out]
         return _regroup(out, self._out_format)
 
-    def optimize_for(self, x, *args, backend=None, backend_opts=None, **kwargs):
+    def optimize_for(self, x, *args, backend=None, backend_opts=None, clear=True, **kwargs):
         """Partitions the current HybridBlock and optimizes it for a given backend
         without executing a forward pass. Modifies the HybridBlock in-place.
 
@@ -1162,6 +1162,7 @@ def optimize_for(self, x, *args, backend=None, backend_opts=None, **kwargs):
             The name of backend, as registered in `SubgraphBackendRegistry`, default None
         backend_opts : dict of user-specified options to pass to the backend for partitioning, optional
             Passed on to `PrePartition` and `PostPartition` functions of `SubgraphProperty`
+        clear : clears any previous optimizations
         static_alloc : bool, default False
             Statically allocate memory to improve speed. Memory usage may increase.
         static_shape : bool, default False
@@ -1171,7 +1172,7 @@ def optimize_for(self, x, *args, backend=None, backend_opts=None, **kwargs):
         """
 
         # do hybrize API call
-        self.hybridize(True, backend, backend_opts, **kwargs)
+        self.hybridize(True, backend, backend_opts, clear, **kwargs)
 
         # do part of forward API call
         has_symbol, has_ndarray, ctx_set, _ = _gather_type_ctx_info([x] + list(args))
@@ -1213,7 +1214,7 @@ def register_child(self, block, name=None):
             self._active = False
         self._clear_cached_op()
 
-    def hybridize(self, active=True, backend=None, backend_opts=None, **kwargs):
+    def hybridize(self, active=True, backend=None, backend_opts=None, clear=True,  **kwargs):
         """Activates or deactivates :py:class:`HybridBlock` s recursively. Has no effect on
         non-hybrid children.
 
@@ -1225,6 +1226,7 @@ def hybridize(self, active=True, backend=None, backend_opts=None, **kwargs):
             The name of backend, as registered in `SubgraphBackendRegistry`, default None
         backend_opts : dict of user-specified options to pass to the backend for partitioning, optional
             Passed on to `PrePartition` and `PostPartition` functions of `SubgraphProperty`
+        clear : clears any previous optimizations
         static_alloc : bool, default False
             Statically allocate memory to improve speed. Memory usage may increase.
         static_shape : bool, default False
@@ -1241,7 +1243,7 @@ def hybridize(self, active=True, backend=None, backend_opts=None, **kwargs):
 
         self._active = active
         self._flags = list(kwargs.items())
-        self._clear_cached_op()
+        if clear: self._clear_cached_op()
         if active and self._forward_hooks or self._forward_pre_hooks:
             warnings.warn('"{block}" is being hybridized while still having forward hook/pre-hook. '
                           'If "{block}" is a child of HybridBlock, the hooks will not take effect.'
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index ed66c4b2c16b..b5d1dd89071d 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -473,7 +473,7 @@ void registerOperators(void *lib, int verbose) {
       int extra_inputs = 0;
       if (attrs.dict.count(MX_STR_EXTRA_INPUTS) > 0)
         extra_inputs = std::stoi(attrs.dict.at(MX_STR_EXTRA_INPUTS));
-      
+
       return num_in + extra_inputs;
     };
 
@@ -489,7 +489,7 @@ void registerOperators(void *lib, int verbose) {
 
       return num_in + extra_inputs;
     };
-    
+
     // lambda function to call parse attributes and return the number of outputs
     auto num_outputs = [=](const NodeAttrs& attrs) {
       // convert attributes to vector of char*
@@ -549,7 +549,7 @@ void registerOperators(void *lib, int verbose) {
       if (attrs.dict.count(MX_STR_EXTRA_INPUTS) > 0)
         extra_inputs = std::stoi(attrs.dict.at(MX_STR_EXTRA_INPUTS));
       int num_inputs = in_shape->size() - extra_inputs;
-      
+
       std::vector<uint32_t*> inshapes(num_inputs);
       std::vector<int> indims(num_inputs);
 
@@ -644,6 +644,28 @@ void registerOperators(void *lib, int verbose) {
       return true;
     };
 
+    // lambda function to call infer shape for subgraph ops
+    auto infer_subgraph_shape = [=] (const nnvm::NodeAttrs& attrs,
+                            mxnet::ShapeVector *in_shape,
+                            mxnet::ShapeVector *out_shape) {
+      // convert attributes to vector of char*
+      std::vector<const char*> attr_keys, attr_vals;
+      for (auto &kv : attrs.dict) {
+        attr_keys.push_back(kv.first.c_str());
+        attr_vals.push_back(kv.second.c_str());
+      }
+
+      // get extra inputs, if exists
+      int extra_inputs = 0;
+      if (attrs.dict.count(MX_STR_EXTRA_INPUTS) > 0)
+        extra_inputs = std::stoi(attrs.dict.at(MX_STR_EXTRA_INPUTS));
+
+      auto in_first = in_shape->begin();
+      auto in_last  = in_first + in_shape->size() - extra_inputs; 
+      mxnet::ShapeVector *sg_in_shapes = new mxnet::ShapeVector(in_first, in_last);
+      return mxnet::op::DefaultSubgraphOpShape(attrs, sg_in_shapes, out_shape);
+    };
+
     // lambda function to call infer type
     auto infer_type = [=] (const nnvm::NodeAttrs& attrs,
                             std::vector<int> *in_type,
@@ -684,6 +706,29 @@ void registerOperators(void *lib, int verbose) {
       return true;
     };
 
+    // lambda function to call infer type for subgraph ops
+    auto infer_subgraph_type = [=] (const nnvm::NodeAttrs& attrs,
+				    std::vector<int> *in_type,
+				    std::vector<int> *out_type) {
+      // convert attributes to vector of char*
+      std::vector<const char*> attr_keys, attr_vals;
+      for (auto &kv : attrs.dict) {
+        attr_keys.push_back(kv.first.c_str());
+        attr_vals.push_back(kv.second.c_str());
+      }
+
+      // get extra inputs, if exists
+      int extra_inputs = 0;
+      if (attrs.dict.count(MX_STR_EXTRA_INPUTS) > 0)
+        extra_inputs = std::stoi(attrs.dict.at(MX_STR_EXTRA_INPUTS));
+
+      auto in_first = in_type->begin();
+      auto in_last  = in_first + in_type->size() - extra_inputs;
+      std::vector<int> *sg_in_types = new std::vector<int>(in_first, in_last);
+      
+      return mxnet::op::DefaultSubgraphOpType(attrs, sg_in_types, out_type);
+    };
+    
     // lambda function to convert from external mutate_inputs to internal MXNet types
     auto mutate_inputs = [=](const nnvm::NodeAttrs& attrs) {
       // convert attributes to vector of char*
@@ -762,6 +807,25 @@ void registerOperators(void *lib, int verbose) {
       }
     };
 
+    // lambda function to set storage types for subgraph ops
+    auto infer_subgraph_storage_type = [=](const nnvm::NodeAttrs& attrs,
+					   const int dev_mask,
+					   DispatchMode* dispatch_mode,
+					   std::vector<int>* in_stypes,
+					   std::vector<int>* out_stypes) {
+        // get extra inputs, if exists
+        int extra_inputs = 0;
+        if (attrs.dict.count(MX_STR_EXTRA_INPUTS) > 0)
+          extra_inputs = std::stoi(attrs.dict.at(MX_STR_EXTRA_INPUTS));
+
+	auto in_first = in_stypes->begin();
+	auto in_last  = in_first + in_stypes->size() - extra_inputs;
+	std::vector<int> *sg_in_stypes = new std::vector<int>(in_first, in_last);
+	
+	return mxnet::op::DefaultSubgraphOpStorageType(attrs, dev_mask, dispatch_mode,
+						       sg_in_stypes, out_stypes);
+    };
+    
     // FGradient register lambda
     auto grad_reg = [=](const nnvm::ObjectPtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
       // create node for gradient
@@ -881,10 +945,10 @@ void registerOperators(void *lib, int verbose) {
       using namespace mxnet::op;
       regOp.set_num_inputs(num_subgraph_inputs);
       regOp.set_num_outputs(DefaultSubgraphOpNumOutputs);
-      regOp.set_attr<nnvm::FInferType>("FInferType", DefaultSubgraphOpType, plevel);
-      regOp.set_attr<mxnet::FInferShape>("FInferShape", DefaultSubgraphOpShape, plevel);
+      regOp.set_attr<nnvm::FInferType>("FInferType", infer_subgraph_type, plevel);
+      regOp.set_attr<mxnet::FInferShape>("FInferShape", infer_subgraph_shape, plevel);
       regOp.set_attr<FInferStorageType>("FInferStorageType",
-                                        DefaultSubgraphOpStorageType, plevel);
+                                        infer_subgraph_storage_type, plevel);
       regOp.set_attr<nnvm::FMutateInputs>("FMutateInputs",
                                           DefaultSubgraphOpMutableInputs, plevel);
     }

From ff47ced20469573b7045cf3573cae64c9ddeb27a Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Fri, 24 Jul 2020 20:29:19 +0000
Subject: [PATCH 04/25] sanity + refactor op registration

---
 include/mxnet/lib_api.h | 138 ++++++++--------
 src/c_api/c_api.cc      | 338 +++++++++++++++++++++-------------------
 2 files changed, 252 insertions(+), 224 deletions(-)

diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h
index 600896c84db0..d23bc4227d09 100644
--- a/include/mxnet/lib_api.h
+++ b/include/mxnet/lib_api.h
@@ -641,7 +641,7 @@ struct JsonVal {
     }
     return type < o.type;
   }
-  
+
   // convert JSON object back to JSON-compatible string
   std::string dump() const {
     std::string ret;
@@ -811,19 +811,19 @@ class Graph;
 
 // Representation of an input/output to a node
 struct NodeEntry {
-  Node* node; // other node thats producing/consuming inputs/outputs
-  int entry; // entry from other node (ie. which output from producing node)
+  Node* node;  // other node thats producing/consuming inputs/outputs
+  int entry;  // entry from other node (ie. which output from producing node)
 };
 
 // Representation of a node in the graph
 class Node {
  public:
-  std::string op; // operator name (ie. Convolution)
-  std::string name; // unique node name (ie. conv_0 or conv_1)
-  std::vector<NodeEntry> inputs; // set of inputs to the node
-  std::vector<NodeEntry> outputs; // set of outputs from the node
-  std::vector<Graph*> subgraphs; // set of subgraphs within this node
-  std::unordered_map<std::string, std::string> attrs; // node attributes
+  std::string op;  // operator name (ie. Convolution)
+  std::string name;  // unique node name (ie. conv_0 or conv_1)
+  std::vector<NodeEntry> inputs;  // set of inputs to the node
+  std::vector<NodeEntry> outputs;  // set of outputs from the node
+  std::vector<Graph*> subgraphs;  // set of subgraphs within this node
+  std::unordered_map<std::string, std::string> attrs;  // node attributes
 };
 
 // Representation of the graph
@@ -832,7 +832,7 @@ class Graph {
   Graph() {}
   /* \brief deleted nodes when deleting the graph */
   ~Graph() {
-    for(int i=0; i<nodes.size(); i++)
+    for (int i = 0; i < nodes.size(); i++)
       delete nodes[i];
   }
 
@@ -841,7 +841,7 @@ class Graph {
     JsonVal val = JsonVal::parse(json);
     return fromJson(val);
   }
-  
+
   /* \brief create a graph object from a parsed JSON object */
   static Graph* fromJson(JsonVal val) {
     // get nodes list
@@ -850,7 +850,7 @@ class Graph {
 
     std::map<int, Node*> nodeMap;
     // loop over nodes
-    for(int i=0; i<nodes.list.size(); i++) {
+    for (int i = 0; i < nodes.list.size(); i++) {
       Node* n = new Node();
       g->nodes.push_back(n);
       JsonVal node = nodes.list[i];
@@ -860,35 +860,35 @@ class Graph {
       n->name = node.map[JsonVal("name")].str;
 
       // if op is null its an input to the graph
-      if(n->op.compare("null") == 0)
+      if (n->op.compare("null") == 0)
         g->inputs.push_back(n);
-      
+
       // set attrs
       JsonVal attributes = node.map[JsonVal("attrs")];
-      for(auto& kv : attributes.map) {
+      for (auto& kv : attributes.map) {
         n->attrs[kv.first.str] = kv.second.str;
       }
 
       // set subgraphs, parsing each into a graph
       if (node.map.count(JsonVal("subgraphs")) > 0) {
-	JsonVal subgraphs = node.map[JsonVal("subgraphs")];
-	for (auto &subgraph : subgraphs.list) {
-	  n->subgraphs.push_back(fromJson(subgraph));
-	}
+        JsonVal subgraphs = node.map[JsonVal("subgraphs")];
+        for (auto &subgraph : subgraphs.list) {
+          n->subgraphs.push_back(fromJson(subgraph));
+        }
       }
 
       // set node inputs
       JsonVal node_inputs = node.map[JsonVal("inputs")];
       n->inputs.resize(node_inputs.list.size());
-      for(int j=0; j<node_inputs.list.size(); j++) {
+      for (int j = 0; j < node_inputs.list.size(); j++) {
         JsonVal input = node_inputs.list[j];
         NodeEntry& entry = n->inputs[j];
-        //get pointer to other node
+        // get pointer to other node
         entry.node = nodeMap[input.list[0].num];
-        //get the other node's output index
+        // get the other node's output index
         entry.entry = input.list[1].num;
-        //set other nodes output as connected to this node
-        entry.node->outputs.push_back({n,j});
+        // set other nodes output as connected to this node
+        entry.node->outputs.push_back({n, j});
       }
       nodeMap[i] = n;
     }
@@ -896,15 +896,15 @@ class Graph {
     // set graph level outputs
     JsonVal& heads = val.map[JsonVal("heads")];
     g->outputs.resize(heads.list.size());
-    for(int i=0; i<heads.list.size(); i++) {
+    for (int i = 0; i < heads.list.size(); i++) {
       JsonVal head = heads.list[i];
       g->outputs[i].node = nodeMap[head.list[0].num];
       g->outputs[i].entry = head.list[1].num;
     }
 
     // add all attributes to the graph
-    for(auto& kv : val.map) {
-      if(kv.first.str.compare("nodes") != 0 &&
+    for (auto& kv : val.map) {
+      if (kv.first.str.compare("nodes") != 0 &&
          kv.first.str.compare("heads") != 0 &&
          kv.first.str.compare("node_row_ptr") != 0 &&
          kv.first.str.compare("arg_nodes") != 0) {
@@ -913,40 +913,42 @@ class Graph {
     }
     return g;
   }
-  
+
   /* \brief convert graph object back to JSON object */
   JsonVal toJson() {
     // top level object is a map
     JsonVal val(MAP);
 
     // add attributes
-    for(auto& kv : attrs) {
+    for (auto& kv : attrs) {
       val.map[JsonVal(kv.first)] = kv.second;
     }
 
     // sort graph nodes in topological order, create mapping of node to index
     std::map<Node*, int> nodeMap;
     std::vector<Node*> sorted = topological_sort();
-    for(int i=sorted.size()-1; i>=0; i--) {
+    // nodes are in reverse topological order in the vector (back is first)
+    // so loop from end to front over the vector 'sorted'
+    for (int i = sorted.size()-1; i >= 0; i--) {
       nodeMap[sorted[i]] = sorted.size()-1-i;
     }
 
     // create node_row_ptr entry
     val.map[JsonVal("node_row_ptr")] = JsonVal(LIST);
     JsonVal& node_row_ptr = val.map[JsonVal("node_row_ptr")];
-    for(int i=0; i<nodes.size(); i++)
+    for (int i = 0; i < nodes.size(); i++)
       node_row_ptr.list.push_back(JsonVal(i));
 
     // add all input nodes
     val.map[JsonVal("arg_nodes")] = JsonVal(LIST);
     JsonVal& arg_nodes = val.map[JsonVal("arg_nodes")];
-    for(int i=0; i<inputs.size(); i++)
+    for (int i = 0; i < inputs.size(); i++)
       arg_nodes.list.push_back(JsonVal(nodeMap[inputs[i]]));
 
     // add all output nodes
     val.map[JsonVal("heads")] = JsonVal(LIST);
     JsonVal& heads = val.map[JsonVal("heads")];
-    for(int i=0; i<outputs.size(); i++) {
+    for (int i = 0; i < outputs.size(); i++) {
       heads.list.push_back(JsonVal(LIST));
       JsonVal& out = heads.list[i];
       out.list.push_back(JsonVal(nodeMap[outputs[i].node]));
@@ -957,12 +959,12 @@ class Graph {
     // add all graph nodes
     val.map[JsonVal("nodes")] = JsonVal(LIST);
     JsonVal& nodes_ = val.map[JsonVal("nodes")];
-    for(int i=sorted.size()-1; i>=0; i--) {
+    for (int i = sorted.size()-1; i >= 0; i--) {
       // each node is a map
       nodes_.list.push_back(JsonVal(MAP));
       Node* n = sorted[i];
       JsonVal& n_ = nodes_.list[nodes_.list.size()-1];
-      
+
       n_.map[JsonVal("op")] = JsonVal(n->op);
       n_.map[JsonVal("name")] = JsonVal(n->name);
       n_.map[JsonVal("inputs")] = JsonVal(LIST);
@@ -980,17 +982,17 @@ class Graph {
 
       // add subgraphs for this node, convert each back to JSON
       if (n->subgraphs.size() > 0) {
-	n_.map[JsonVal("subgraphs")] = JsonVal(LIST);
-	JsonVal &subgraphs_ = n_.map[JsonVal("subgraphs")];
-	for(Graph *subgraph : n->subgraphs) {
-	  subgraphs_.list.push_back(subgraph->toJson());
-	}
+        n_.map[JsonVal("subgraphs")] = JsonVal(LIST);
+        JsonVal &subgraphs_ = n_.map[JsonVal("subgraphs")];
+        for(Graph *subgraph : n->subgraphs) {
+          subgraphs_.list.push_back(subgraph->toJson());
+        }
       }
 
       // add attributes for this node
       n_.map[JsonVal("attrs")] = JsonVal(MAP);
       JsonVal& attrs_ = n_.map[JsonVal("attrs")];
-      for(auto& kv : n->attrs) {
+      for (auto& kv : n->attrs) {
         attrs_.map[JsonVal(kv.first)] = JsonVal(kv.second);
       }
     }
@@ -1005,28 +1007,28 @@ class Graph {
   /* \brief visits a node "n" */
   void _dfs_util(Node* n, std::unordered_set<Node*>* to_visit,
                  std::function<void(Node*)> handler) {
-    to_visit->erase(n); // remove node now that we're visiting it
-    for(NodeEntry& e : n->outputs) {
+    to_visit->erase(n);  // remove node now that we're visiting it
+    for (NodeEntry& e : n->outputs) {
       Node* o = e.node;
       if(to_visit->count(o) != 0) {
-        _dfs_util(o,to_visit,handler); // visit neighbor
+        _dfs_util(o, to_visit, handler);  // visit neighbor
       }
     }
-    handler(n); // post-order visit this node
+    handler(n);  // post-order visit this node
   }
 
   /* \brief post-order DFS graph traversal */
   void DFS(std::function<void(Node*)> handler) {
     std::unordered_set<Node*> to_visit;
-    //put all nodes in set to visit
-    for(auto& n : nodes)
+    // put all nodes in set to visit
+    for (auto& n : nodes)
       to_visit.insert(n);
-    //visit all inputs first
-    for(auto& i : inputs)
-      if(to_visit.count(i) != 0)
+    // visit all inputs first
+    for (auto& i : inputs)
+      if (to_visit.count(i) != 0)
         _dfs_util(i, &to_visit, handler);
-    //visit any nodes left
-    while(to_visit.size() > 0)
+    // visit any nodes left
+    while (to_visit.size() > 0)
       _dfs_util(*(to_visit.begin()), &to_visit, handler);
   }
 
@@ -1034,7 +1036,7 @@ class Graph {
   std::vector<Node*> topological_sort() {
     std::vector<Node*> sorted;
     auto handler = [&](Node* n) {
-      sorted.push_back(n); // when visiting each node, add it in order to the vector
+      sorted.push_back(n);  // when visiting each node, add it in order to the vector
     };
     DFS(handler);
     return sorted;
@@ -1043,35 +1045,37 @@ class Graph {
   /* \brief print out graph details */
   void print(int indent=0) {
     std::string space = "";
-    for(int i=0; i < indent; i++) space+=" ";
-    
+    for (int i = 0; i < indent; i++) space+=" ";
+
     std::cout << space << "########### Graph #############" << std::endl;
     std::cout << space << "attributes: " << std::endl;
-    for (auto &kv : attrs) 
+    for (auto &kv : attrs)
       std::cout << space << "\t" << kv.first << " : " << kv.second.str << std::endl;
     std::cout << space << "inputs: " << inputs.size() << std::endl;
     std::cout << space << "outputs: " << outputs.size() << std::endl;
     std::cout << space << "nodes: " << nodes.size() << std::endl;
     std::vector<Node*> sorted = topological_sort();
     // loop over each node and print out its inputs/outputs
-    for(int i=sorted.size()-1; i>=0; i--) {
+    for (int i = sorted.size()-1; i >= 0; i--) {
       std::cout << space << "Node: " << sorted[i]->name << std::endl;
-      for(int j=0; j<sorted[i]->inputs.size(); j++) {
-        std::cout << space << "\tInput: " << sorted[i]->inputs[j].node->name << " " << sorted[i]->inputs[j].entry << std::endl;
+      for (int j = 0; j < sorted[i]->inputs.size(); j++) {
+        std::cout << space << "\tInput: " << sorted[i]->inputs[j].node->name << " "
+                  << sorted[i]->inputs[j].entry << std::endl;
       }
-      for(int j=0; j<sorted[i]->outputs.size(); j++) {
-        std::cout << space << "\tOutput: " << sorted[i]->outputs[j].node->name << " " << sorted[i]->outputs[j].entry << std::endl;
+      for (int j = 0; j < sorted[i]->outputs.size(); j++) {
+        std::cout << space << "\tOutput: " << sorted[i]->outputs[j].node->name << " "
+                  << sorted[i]->outputs[j].entry << std::endl;
       }
-      if(sorted[i]->subgraphs.size() > 0) {
-	for(auto &subgraph : sorted[i]->subgraphs) {
-	  std::cout << space << "\tSubgraph:" << std::endl;
-	  subgraph->print(indent+2);
+      if (sorted[i]->subgraphs.size() > 0) {
+        for (auto &subgraph : sorted[i]->subgraphs) {
+          std::cout << space << "\tSubgraph:" << std::endl;
+          subgraph->print(indent+2);
 	}
       }
     }
     std::cout << space << "###############################" << std::endl;
-  }  
-  
+  }
+
   std::vector<Node*> nodes;
   std::vector<Node*> inputs;
   std::vector<NodeEntry> outputs;
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index b5d1dd89071d..47c9672f069c 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -321,6 +321,166 @@ void CustomFComputeDispatcher(const std::string op_name,
   }
 }
 
+template <typename RescReq, typename AttrParser, typename NumInputs, typename NumOutputs,
+	  typename NumInOuts,
+	  typename InferType, typename InferShape, typename InferSType, typename MutateInputs,
+	  typename SubgraphNumInputs, typename SubgraphInferType, typename SubgraphInferShape,
+	  typename SubgraphInferSType, typename CreateOpState, typename GradReg>
+void registerOp(const char* name, const std::string& name_str, bool isSubgraphOp,
+		RescReq resc_req, AttrParser attr_parser, NumInputs num_inputs,
+		NumOutputs num_outputs, NumInOuts num_inouts, InferType infer_type,
+		InferShape infer_shape, InferSType infer_storage_type,
+		MutateInputs mutate_inputs, SubgraphNumInputs num_subgraph_inputs,
+		SubgraphInferType infer_subgraph_type, SubgraphInferShape infer_subgraph_shape,
+		SubgraphInferSType infer_subgraph_storage_type, CreateOpState create_opstate,
+		GradReg grad_reg, mutateInputs_t mutate_fp,
+		std::unordered_map<std::string, createOpState_t> &createop_map,
+		std::unordered_map<std::string, fcomp_t> &forward_ctx_map,
+		std::unordered_map<std::string, fcomp_t> &backward_ctx_map,
+		opCallFComp_t callFComp, opCallFStatefulComp_t callFStatefulComp) {
+    // check if operator is already registered
+    const nnvm::Op *regOpPtr = dmlc::Registry<nnvm::Op>::Get()->Find(name);
+    nnvm::Op &regOp = dmlc::Registry<nnvm::Op>::Get()->__REGISTER_OR_GET__(name);
+    int plevel = 10;
+    if (regOpPtr != nullptr) {
+      // overwrite registration of existing op with custom op
+      regOp.arguments.clear();
+      // set attribute with higher plevel (11) to allow re-registering once
+      // TODO(samskalicky): enable constant overwriting of registertion multiple times
+      plevel++;
+    }
+    // define supported resources for both subgraph ops and regular ops
+    regOp.set_attr<FResourceRequest>("FResourceRequest", resc_req, plevel);
+    if (!isSubgraphOp) {
+      regOp.set_attr_parser(attr_parser);
+      regOp.set_num_inputs(num_inputs);
+      regOp.set_num_outputs(num_outputs);
+      regOp.set_attr<nnvm::FInferType>("FInferType", infer_type, plevel);
+      regOp.set_attr<FInferStorageType>("FInferStorageType", infer_storage_type, plevel);
+      regOp.set_attr<mxnet::FInferShape>("FInferShape", infer_shape, plevel);
+      // optionally add fmutate inputs if user specified a function
+      if (mutate_fp != nullptr)
+        regOp.set_attr<nnvm::FMutateInputs>("FMutateInputs", mutate_inputs, plevel);
+    } else {
+      using namespace mxnet::op;
+      regOp.set_num_inputs(num_subgraph_inputs);
+      regOp.set_num_outputs(DefaultSubgraphOpNumOutputs);
+      regOp.set_attr<nnvm::FInferType>("FInferType", infer_subgraph_type, plevel);
+      regOp.set_attr<mxnet::FInferShape>("FInferShape", infer_subgraph_shape, plevel);
+      regOp.set_attr<FInferStorageType>("FInferStorageType",
+                                        infer_subgraph_storage_type, plevel);
+      regOp.set_attr<nnvm::FMutateInputs>("FMutateInputs",
+                                          DefaultSubgraphOpMutableInputs, plevel);
+    }
+    // optionally add stateful forward
+    if (createop_map.size() != 0) {
+      regOp.set_attr<FCreateOpState>("FCreateOpState", create_opstate, plevel);
+      auto fstate_forward = [=](const OpStatePtr& state_ptr,
+                                const OpContext& ctx,
+                                const std::vector<NDArray>& inputs,
+                                const std::vector<OpReqType>& req,
+                                const std::vector<NDArray>& outputs) {
+        CustomFComputeDispatcher(name_str, nullptr, nullptr, nullptr,
+                                 callFStatefulComp, 1, &state_ptr, ctx, inputs, req, outputs);
+      };
+      if (createop_map.count("cpu") > 0)
+        regOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", fstate_forward, plevel);
+      if (createop_map.count("gpu") > 0)
+        regOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<gpu>", fstate_forward, plevel);
+    } else {
+      auto forward_lambda = [=](const nnvm::NodeAttrs& attrs,
+                                const OpContext& ctx,
+                                const std::vector<NDArray>& inputs,
+                                const std::vector<OpReqType>& req,
+                                const std::vector<NDArray>& outputs) {
+        if (ctx.run_ctx.ctx.dev_mask() == Context::kCPU) {
+          CHECK_GT(forward_ctx_map.count("cpu"), 0);
+          fcomp_t fcomp = forward_ctx_map.at("cpu");
+          CustomFComputeDispatcher(name_str, callFComp, fcomp, &attrs,
+                                   nullptr, 0, nullptr, ctx, inputs, req, outputs);
+        } else if (ctx.run_ctx.ctx.dev_mask() == Context::kGPU) {
+          CHECK_GT(forward_ctx_map.count("gpu"), 0);
+          fcomp_t fcomp = forward_ctx_map.at("gpu");
+          CustomFComputeDispatcher(name_str, callFComp, fcomp, &attrs,
+                                   nullptr, 0, nullptr, ctx, inputs, req, outputs);
+        }
+      };
+      if (forward_ctx_map.count("cpu") > 0)
+        regOp.set_attr<FComputeEx>("FComputeEx<cpu>", forward_lambda, plevel);
+      if (forward_ctx_map.count("gpu") > 0)
+        regOp.set_attr<FComputeEx>("FComputeEx<gpu>", forward_lambda, plevel);
+    }
+    // optionally add fgradient if user specified a function, or for stateful ops
+    if (backward_ctx_map.size() != 0 || createop_map.size() != 0) {
+      std::string grad_name = "_backward_" + name_str;
+      nnvm::Op &gradOp = dmlc::Registry<nnvm::Op>::Get()->__REGISTER_OR_GET__(grad_name);
+      regOp.set_attr<nnvm::FGradient>("FGradient", grad_reg, plevel);
+      gradOp.set_attr<nnvm::TIsBackward>("TIsBackward", true, plevel);
+      gradOp.set_attr<FInferStorageType>("FInferStorageType", infer_storage_type, plevel);
+      gradOp.set_attr<FResourceRequest>("FResourceRequest", resc_req, plevel);
+
+      if (!isSubgraphOp) {
+        // register attr parser and standard functions for non-subgraph ops
+        gradOp.set_attr_parser(attr_parser);
+        gradOp.set_num_inputs(num_inouts);
+        gradOp.set_num_outputs(num_inputs);
+      } else {
+        // for subgraph ops use special functions that do not invoke attr_parser
+        using namespace mxnet::op;
+        auto grad_inouts = [=](const nnvm::NodeAttrs& attrs) {
+          // for backward passes, inputs + outputs + input gradients (one for each output)
+          uint32_t cnt = num_subgraph_inputs(attrs);
+          cnt += 2 * DefaultSubgraphOpNumOutputs(attrs);
+          return cnt;
+        };
+        gradOp.set_num_inputs(grad_inouts);
+        gradOp.set_num_outputs(num_subgraph_inputs);
+      }
+
+      if (createop_map.size() != 0) {
+        // for stateful operators
+        gradOp.set_attr<bool>("TIsLayerOpBackward", true, plevel);
+        auto fstate_backward = [=](const OpStatePtr& state_ptr,
+                                   const OpContext& ctx,
+                                   const std::vector<NDArray>& inputs,
+                                   const std::vector<OpReqType>& req,
+                                   const std::vector<NDArray>& outputs) {
+          CustomFComputeDispatcher(name_str, nullptr, nullptr, nullptr,
+                                   callFStatefulComp, 0, &state_ptr, ctx, inputs, req, outputs);
+        };
+        gradOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", fstate_backward, plevel);
+        gradOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<gpu>", fstate_backward, plevel);
+      } else {
+        // for stateless operators
+        if (backward_ctx_map.count("cpu") > 0) {
+          fcomp_t fcomp_back_cpu = backward_ctx_map.at("cpu");
+          auto backward_cpu_lambda = [=](const nnvm::NodeAttrs& attrs,
+                                         const OpContext& ctx,
+                                         const std::vector<NDArray>& inputs,
+                                         const std::vector<OpReqType>& req,
+                                         const std::vector<NDArray>& outputs) {
+            CustomFComputeDispatcher(name_str, callFComp, fcomp_back_cpu, &attrs,
+                                     nullptr, 0, nullptr, ctx, inputs, req, outputs);
+          };
+          gradOp.set_attr<FComputeEx>("FComputeEx<cpu>", backward_cpu_lambda, plevel);
+        }
+        if (backward_ctx_map.count("gpu") > 0) {
+          fcomp_t fcomp_back_gpu = backward_ctx_map.at("gpu");
+          auto backward_gpu_lambda = [=](const nnvm::NodeAttrs& attrs,
+                                         const OpContext& ctx,
+                                         const std::vector<NDArray>& inputs,
+                                         const std::vector<OpReqType>& req,
+                                         const std::vector<NDArray>& outputs) {
+            CustomFComputeDispatcher(name_str, callFComp, fcomp_back_gpu, &attrs,
+                                     nullptr, 0, nullptr, ctx, inputs, req, outputs);
+          };
+          gradOp.set_attr<FComputeEx>("FComputeEx<gpu>", backward_gpu_lambda, plevel);
+        }
+      }
+    }
+    regOp.add_argument("data", "NDArray[]", "Source inputs");
+}
+
 void registerOperators(void *lib, int verbose) {
   // get C type interface functions
   opCallFree_t callFree = get_func<opCallFree_t>(lib, const_cast<char*>(MXLIB_OPCALLFREE_STR));
@@ -661,7 +821,7 @@ void registerOperators(void *lib, int verbose) {
         extra_inputs = std::stoi(attrs.dict.at(MX_STR_EXTRA_INPUTS));
 
       auto in_first = in_shape->begin();
-      auto in_last  = in_first + in_shape->size() - extra_inputs; 
+      auto in_last  = in_first + in_shape->size() - extra_inputs;
       mxnet::ShapeVector *sg_in_shapes = new mxnet::ShapeVector(in_first, in_last);
       return mxnet::op::DefaultSubgraphOpShape(attrs, sg_in_shapes, out_shape);
     };
@@ -708,8 +868,8 @@ void registerOperators(void *lib, int verbose) {
 
     // lambda function to call infer type for subgraph ops
     auto infer_subgraph_type = [=] (const nnvm::NodeAttrs& attrs,
-				    std::vector<int> *in_type,
-				    std::vector<int> *out_type) {
+                                    std::vector<int> *in_type,
+                                    std::vector<int> *out_type) {
       // convert attributes to vector of char*
       std::vector<const char*> attr_keys, attr_vals;
       for (auto &kv : attrs.dict) {
@@ -725,10 +885,10 @@ void registerOperators(void *lib, int verbose) {
       auto in_first = in_type->begin();
       auto in_last  = in_first + in_type->size() - extra_inputs;
       std::vector<int> *sg_in_types = new std::vector<int>(in_first, in_last);
-      
+
       return mxnet::op::DefaultSubgraphOpType(attrs, sg_in_types, out_type);
     };
-    
+
     // lambda function to convert from external mutate_inputs to internal MXNet types
     auto mutate_inputs = [=](const nnvm::NodeAttrs& attrs) {
       // convert attributes to vector of char*
@@ -809,23 +969,23 @@ void registerOperators(void *lib, int verbose) {
 
     // lambda function to set storage types for subgraph ops
     auto infer_subgraph_storage_type = [=](const nnvm::NodeAttrs& attrs,
-					   const int dev_mask,
-					   DispatchMode* dispatch_mode,
-					   std::vector<int>* in_stypes,
-					   std::vector<int>* out_stypes) {
+                                           const int dev_mask,
+                                           DispatchMode* dispatch_mode,
+                                           std::vector<int>* in_stypes,
+                                           std::vector<int>* out_stypes) {
         // get extra inputs, if exists
         int extra_inputs = 0;
         if (attrs.dict.count(MX_STR_EXTRA_INPUTS) > 0)
           extra_inputs = std::stoi(attrs.dict.at(MX_STR_EXTRA_INPUTS));
 
-	auto in_first = in_stypes->begin();
-	auto in_last  = in_first + in_stypes->size() - extra_inputs;
-	std::vector<int> *sg_in_stypes = new std::vector<int>(in_first, in_last);
-	
-	return mxnet::op::DefaultSubgraphOpStorageType(attrs, dev_mask, dispatch_mode,
-						       sg_in_stypes, out_stypes);
+        auto in_first = in_stypes->begin();
+        auto in_last  = in_first + in_stypes->size() - extra_inputs;
+        std::vector<int> *sg_in_stypes = new std::vector<int>(in_first, in_last);
+
+        return mxnet::op::DefaultSubgraphOpStorageType(attrs, dev_mask, dispatch_mode,
+                                                       sg_in_stypes, out_stypes);
     };
-    
+
     // FGradient register lambda
     auto grad_reg = [=](const nnvm::ObjectPtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
       // create node for gradient
@@ -918,147 +1078,11 @@ void registerOperators(void *lib, int verbose) {
 
     /* -------------- BELOW IS THE REGISTRATION FOR CUSTOM OPERATORS --------------- */
 
-    // check if operator is already registered
-    const nnvm::Op *regOpPtr = dmlc::Registry<nnvm::Op>::Get()->Find(name);
-    nnvm::Op &regOp = dmlc::Registry<nnvm::Op>::Get()->__REGISTER_OR_GET__(name);
-    int plevel = 10;
-    if (regOpPtr != nullptr) {
-      // overwrite registration of existing op with custom op
-      regOp.arguments.clear();
-      // set attribute with higher plevel (11) to allow re-registering once
-      // TODO(samskalicky): enable constant overwriting of registertion multiple times
-      plevel++;
-    }
-    // define supported resources for both subgraph ops and regular ops
-    regOp.set_attr<FResourceRequest>("FResourceRequest", resc_req, plevel);
-    if (!isSubgraphOp) {
-      regOp.set_attr_parser(attr_parser);
-      regOp.set_num_inputs(num_inputs);
-      regOp.set_num_outputs(num_outputs);
-      regOp.set_attr<nnvm::FInferType>("FInferType", infer_type, plevel);
-      regOp.set_attr<FInferStorageType>("FInferStorageType", infer_storage_type, plevel);
-      regOp.set_attr<mxnet::FInferShape>("FInferShape", infer_shape, plevel);
-      // optionally add fmutate inputs if user specified a function
-      if (mutate_fp != nullptr)
-        regOp.set_attr<nnvm::FMutateInputs>("FMutateInputs", mutate_inputs, plevel);
-    } else {
-      using namespace mxnet::op;
-      regOp.set_num_inputs(num_subgraph_inputs);
-      regOp.set_num_outputs(DefaultSubgraphOpNumOutputs);
-      regOp.set_attr<nnvm::FInferType>("FInferType", infer_subgraph_type, plevel);
-      regOp.set_attr<mxnet::FInferShape>("FInferShape", infer_subgraph_shape, plevel);
-      regOp.set_attr<FInferStorageType>("FInferStorageType",
-                                        infer_subgraph_storage_type, plevel);
-      regOp.set_attr<nnvm::FMutateInputs>("FMutateInputs",
-                                          DefaultSubgraphOpMutableInputs, plevel);
-    }
-    // optionally add stateful forward
-    if (createop_map.size() != 0) {
-      regOp.set_attr<FCreateOpState>("FCreateOpState", create_opstate, plevel);
-      auto fstate_forward = [=](const OpStatePtr& state_ptr,
-                                const OpContext& ctx,
-                                const std::vector<NDArray>& inputs,
-                                const std::vector<OpReqType>& req,
-                                const std::vector<NDArray>& outputs) {
-        CustomFComputeDispatcher(name_str, nullptr, nullptr, nullptr,
-                                 callFStatefulComp, 1, &state_ptr, ctx, inputs, req, outputs);
-      };
-      if (createop_map.count("cpu") > 0)
-        regOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", fstate_forward, plevel);
-      if (createop_map.count("gpu") > 0)
-        regOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<gpu>", fstate_forward, plevel);
-    } else {
-      auto forward_lambda = [=](const nnvm::NodeAttrs& attrs,
-                                const OpContext& ctx,
-                                const std::vector<NDArray>& inputs,
-                                const std::vector<OpReqType>& req,
-                                const std::vector<NDArray>& outputs) {
-        if (ctx.run_ctx.ctx.dev_mask() == Context::kCPU) {
-          CHECK_GT(forward_ctx_map.count("cpu"), 0);
-          fcomp_t fcomp = forward_ctx_map.at("cpu");
-          CustomFComputeDispatcher(name_str, callFComp, fcomp, &attrs,
-                                   nullptr, 0, nullptr, ctx, inputs, req, outputs);
-        } else if (ctx.run_ctx.ctx.dev_mask() == Context::kGPU) {
-          CHECK_GT(forward_ctx_map.count("gpu"), 0);
-          fcomp_t fcomp = forward_ctx_map.at("gpu");
-          CustomFComputeDispatcher(name_str, callFComp, fcomp, &attrs,
-                                   nullptr, 0, nullptr, ctx, inputs, req, outputs);
-        }
-      };
-      if (forward_ctx_map.count("cpu") > 0)
-        regOp.set_attr<FComputeEx>("FComputeEx<cpu>", forward_lambda, plevel);
-      if (forward_ctx_map.count("gpu") > 0)
-        regOp.set_attr<FComputeEx>("FComputeEx<gpu>", forward_lambda, plevel);
-    }
-    // optionally add fgradient if user specified a function, or for stateful ops
-    if (backward_ctx_map.size() != 0 || createop_map.size() != 0) {
-      std::string grad_name = "_backward_" + name_str;
-      nnvm::Op &gradOp = dmlc::Registry<nnvm::Op>::Get()->__REGISTER_OR_GET__(grad_name);
-      regOp.set_attr<nnvm::FGradient>("FGradient", grad_reg, plevel);
-      gradOp.set_attr<nnvm::TIsBackward>("TIsBackward", true, plevel);
-      gradOp.set_attr<FInferStorageType>("FInferStorageType", infer_storage_type, plevel);
-      gradOp.set_attr<FResourceRequest>("FResourceRequest", resc_req, plevel);
-
-      if (!isSubgraphOp) {
-        // register attr parser and standard functions for non-subgraph ops
-        gradOp.set_attr_parser(attr_parser);
-        gradOp.set_num_inputs(num_inouts);
-        gradOp.set_num_outputs(num_inputs);
-      } else {
-        // for subgraph ops use special functions that do not invoke attr_parser
-        using namespace mxnet::op;
-        auto grad_inouts = [=](const nnvm::NodeAttrs& attrs) {
-          // for backward passes, inputs + outputs + input gradients (one for each output)
-          uint32_t cnt = num_subgraph_inputs(attrs);
-          cnt += 2 * DefaultSubgraphOpNumOutputs(attrs);
-          return cnt;
-        };
-        gradOp.set_num_inputs(grad_inouts);
-        gradOp.set_num_outputs(num_subgraph_inputs);
-      }
-
-      if (createop_map.size() != 0) {
-        // for stateful operators
-        gradOp.set_attr<bool>("TIsLayerOpBackward", true, plevel);
-        auto fstate_backward = [=](const OpStatePtr& state_ptr,
-                                   const OpContext& ctx,
-                                   const std::vector<NDArray>& inputs,
-                                   const std::vector<OpReqType>& req,
-                                   const std::vector<NDArray>& outputs) {
-          CustomFComputeDispatcher(name_str, nullptr, nullptr, nullptr,
-                                   callFStatefulComp, 0, &state_ptr, ctx, inputs, req, outputs);
-        };
-        gradOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", fstate_backward, plevel);
-        gradOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<gpu>", fstate_backward, plevel);
-      } else {
-        // for stateless operators
-        if (backward_ctx_map.count("cpu") > 0) {
-          fcomp_t fcomp_back_cpu = backward_ctx_map.at("cpu");
-          auto backward_cpu_lambda = [=](const nnvm::NodeAttrs& attrs,
-                                         const OpContext& ctx,
-                                         const std::vector<NDArray>& inputs,
-                                         const std::vector<OpReqType>& req,
-                                         const std::vector<NDArray>& outputs) {
-            CustomFComputeDispatcher(name_str, callFComp, fcomp_back_cpu, &attrs,
-                                     nullptr, 0, nullptr, ctx, inputs, req, outputs);
-          };
-          gradOp.set_attr<FComputeEx>("FComputeEx<cpu>", backward_cpu_lambda, plevel);
-        }
-        if (backward_ctx_map.count("gpu") > 0) {
-          fcomp_t fcomp_back_gpu = backward_ctx_map.at("gpu");
-          auto backward_gpu_lambda = [=](const nnvm::NodeAttrs& attrs,
-                                         const OpContext& ctx,
-                                         const std::vector<NDArray>& inputs,
-                                         const std::vector<OpReqType>& req,
-                                         const std::vector<NDArray>& outputs) {
-            CustomFComputeDispatcher(name_str, callFComp, fcomp_back_gpu, &attrs,
-                                     nullptr, 0, nullptr, ctx, inputs, req, outputs);
-          };
-          gradOp.set_attr<FComputeEx>("FComputeEx<gpu>", backward_gpu_lambda, plevel);
-        }
-      }
-    }
-    regOp.add_argument("data", "NDArray[]", "Source inputs");
+    registerOp(name, name_str, isSubgraphOp, resc_req, attr_parser, num_inputs, num_outputs,
+	       num_inouts, infer_type, infer_shape, infer_storage_type, mutate_inputs,
+	       num_subgraph_inputs, infer_subgraph_type, infer_subgraph_shape,
+	       infer_subgraph_storage_type, create_opstate, grad_reg, mutate_fp,
+	       createop_map, forward_ctx_map, backward_ctx_map, callFComp, callFStatefulComp);
   }
 }
 

From efc2f335fa686da756f0dbf6b043e7353fdb5ac1 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Fri, 24 Jul 2020 20:58:17 +0000
Subject: [PATCH 05/25] more sanity fixes

---
 include/mxnet/lib_api.h | 10 +++++-----
 src/c_api/c_api.cc      | 38 +++++++++++++++++++-------------------
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h
index d23bc4227d09..8d35805ede50 100644
--- a/include/mxnet/lib_api.h
+++ b/include/mxnet/lib_api.h
@@ -971,7 +971,7 @@ class Graph {
 
       // add inputs for this node
       JsonVal& inputs_ = n_.map[JsonVal("inputs")];
-      for(int j=0; j<n->inputs.size(); j++) {
+      for (int j = 0; j < n->inputs.size(); j++) {
         inputs_.list.push_back(JsonVal(LIST));
         NodeEntry& entry = n->inputs[j];
         JsonVal& in = inputs_.list[j];
@@ -984,7 +984,7 @@ class Graph {
       if (n->subgraphs.size() > 0) {
         n_.map[JsonVal("subgraphs")] = JsonVal(LIST);
         JsonVal &subgraphs_ = n_.map[JsonVal("subgraphs")];
-        for(Graph *subgraph : n->subgraphs) {
+        for (Graph *subgraph : n->subgraphs) {
           subgraphs_.list.push_back(subgraph->toJson());
         }
       }
@@ -1010,7 +1010,7 @@ class Graph {
     to_visit->erase(n);  // remove node now that we're visiting it
     for (NodeEntry& e : n->outputs) {
       Node* o = e.node;
-      if(to_visit->count(o) != 0) {
+      if (to_visit->count(o) != 0) {
         _dfs_util(o, to_visit, handler);  // visit neighbor
       }
     }
@@ -1043,7 +1043,7 @@ class Graph {
   }
 
   /* \brief print out graph details */
-  void print(int indent=0) {
+  void print(int indent = 0) {
     std::string space = "";
     for (int i = 0; i < indent; i++) space+=" ";
 
@@ -1070,7 +1070,7 @@ class Graph {
         for (auto &subgraph : sorted[i]->subgraphs) {
           std::cout << space << "\tSubgraph:" << std::endl;
           subgraph->print(indent+2);
-	}
+        }
       }
     }
     std::cout << space << "###############################" << std::endl;
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index 47c9672f069c..55d1ef02e905 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -322,22 +322,22 @@ void CustomFComputeDispatcher(const std::string op_name,
 }
 
 template <typename RescReq, typename AttrParser, typename NumInputs, typename NumOutputs,
-	  typename NumInOuts,
-	  typename InferType, typename InferShape, typename InferSType, typename MutateInputs,
-	  typename SubgraphNumInputs, typename SubgraphInferType, typename SubgraphInferShape,
-	  typename SubgraphInferSType, typename CreateOpState, typename GradReg>
+          typename NumInOuts,
+          typename InferType, typename InferShape, typename InferSType, typename MutateInputs,
+          typename SubgraphNumInputs, typename SubgraphInferType, typename SubgraphInferShape,
+          typename SubgraphInferSType, typename CreateOpState, typename GradReg>
 void registerOp(const char* name, const std::string& name_str, bool isSubgraphOp,
-		RescReq resc_req, AttrParser attr_parser, NumInputs num_inputs,
-		NumOutputs num_outputs, NumInOuts num_inouts, InferType infer_type,
-		InferShape infer_shape, InferSType infer_storage_type,
-		MutateInputs mutate_inputs, SubgraphNumInputs num_subgraph_inputs,
-		SubgraphInferType infer_subgraph_type, SubgraphInferShape infer_subgraph_shape,
-		SubgraphInferSType infer_subgraph_storage_type, CreateOpState create_opstate,
-		GradReg grad_reg, mutateInputs_t mutate_fp,
-		std::unordered_map<std::string, createOpState_t> &createop_map,
-		std::unordered_map<std::string, fcomp_t> &forward_ctx_map,
-		std::unordered_map<std::string, fcomp_t> &backward_ctx_map,
-		opCallFComp_t callFComp, opCallFStatefulComp_t callFStatefulComp) {
+                RescReq resc_req, AttrParser attr_parser, NumInputs num_inputs,
+                NumOutputs num_outputs, NumInOuts num_inouts, InferType infer_type,
+                InferShape infer_shape, InferSType infer_storage_type,
+                MutateInputs mutate_inputs, SubgraphNumInputs num_subgraph_inputs,
+                SubgraphInferType infer_subgraph_type, SubgraphInferShape infer_subgraph_shape,
+                SubgraphInferSType infer_subgraph_storage_type, CreateOpState create_opstate,
+                GradReg grad_reg, mutateInputs_t mutate_fp,
+                const std::unordered_map<std::string, createOpState_t> &createop_map,
+                const std::unordered_map<std::string, fcomp_t> &forward_ctx_map,
+                const std::unordered_map<std::string, fcomp_t> &backward_ctx_map,
+                opCallFComp_t callFComp, opCallFStatefulComp_t callFStatefulComp) {
     // check if operator is already registered
     const nnvm::Op *regOpPtr = dmlc::Registry<nnvm::Op>::Get()->Find(name);
     nnvm::Op &regOp = dmlc::Registry<nnvm::Op>::Get()->__REGISTER_OR_GET__(name);
@@ -1079,10 +1079,10 @@ void registerOperators(void *lib, int verbose) {
     /* -------------- BELOW IS THE REGISTRATION FOR CUSTOM OPERATORS --------------- */
 
     registerOp(name, name_str, isSubgraphOp, resc_req, attr_parser, num_inputs, num_outputs,
-	       num_inouts, infer_type, infer_shape, infer_storage_type, mutate_inputs,
-	       num_subgraph_inputs, infer_subgraph_type, infer_subgraph_shape,
-	       infer_subgraph_storage_type, create_opstate, grad_reg, mutate_fp,
-	       createop_map, forward_ctx_map, backward_ctx_map, callFComp, callFStatefulComp);
+               num_inouts, infer_type, infer_shape, infer_storage_type, mutate_inputs,
+               num_subgraph_inputs, infer_subgraph_type, infer_subgraph_shape,
+               infer_subgraph_storage_type, create_opstate, grad_reg, mutate_fp,
+               createop_map, forward_ctx_map, backward_ctx_map, callFComp, callFStatefulComp);
   }
 }
 

From b730a75732bc1bb3dcfa0e44ce17834e5bea8447 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Fri, 24 Jul 2020 21:19:54 +0000
Subject: [PATCH 06/25] more sanity fixes

---
 python/mxnet/gluon/block.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/mxnet/gluon/block.py b/python/mxnet/gluon/block.py
index 8d8ae328d075..be98b7a59aaa 100644
--- a/python/mxnet/gluon/block.py
+++ b/python/mxnet/gluon/block.py
@@ -1214,7 +1214,7 @@ def register_child(self, block, name=None):
             self._active = False
         self._clear_cached_op()
 
-    def hybridize(self, active=True, backend=None, backend_opts=None, clear=True,  **kwargs):
+    def hybridize(self, active=True, backend=None, backend_opts=None, clear=True, **kwargs):
         """Activates or deactivates :py:class:`HybridBlock` s recursively. Has no effect on
         non-hybrid children.
 
@@ -1243,7 +1243,8 @@ def hybridize(self, active=True, backend=None, backend_opts=None, clear=True,  *
 
         self._active = active
         self._flags = list(kwargs.items())
-        if clear: self._clear_cached_op()
+        if clear:
+            self._clear_cached_op()
         if active and self._forward_hooks or self._forward_pre_hooks:
             warnings.warn('"{block}" is being hybridized while still having forward hook/pre-hook. '
                           'If "{block}" is a child of HybridBlock, the hooks will not take effect.'

From be43a7fd5a4d65ff0051383b7c14db323190385a Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Fri, 24 Jul 2020 22:11:09 +0000
Subject: [PATCH 07/25] added cast

---
 example/extensions/lib_subgraph/subgraph_lib.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/example/extensions/lib_subgraph/subgraph_lib.cc b/example/extensions/lib_subgraph/subgraph_lib.cc
index d53ceaf34d2f..095d8f198e1b 100644
--- a/example/extensions/lib_subgraph/subgraph_lib.cc
+++ b/example/extensions/lib_subgraph/subgraph_lib.cc
@@ -353,7 +353,7 @@ MXReturnValue addInputPass(const std::string& in_graph, const std::string** out_
       g->nodes.push_back(input);
       g->inputs.push_back(input);
       //connect new input to node
-      input->outputs.push_back({n,n->inputs.size()});
+      input->outputs.push_back({n,(int)(n->inputs.size())});
       //connect node to new input
       n->inputs.push_back({input,0});
       // add a corresponding tensor for this input

From 5d6c153603f487b61c3085d7a370ecc999afe13b Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Tue, 28 Jul 2020 05:28:44 +0000
Subject: [PATCH 08/25] updated READMEs

---
 example/extensions/lib_pass/README.md     |  63 +++++++---
 example/extensions/lib_subgraph/README.md | 141 ++++++++++++++++------
 2 files changed, 151 insertions(+), 53 deletions(-)

diff --git a/example/extensions/lib_pass/README.md b/example/extensions/lib_pass/README.md
index c2771242440f..a6705afb1a08 100644
--- a/example/extensions/lib_pass/README.md
+++ b/example/extensions/lib_pass/README.md
@@ -80,7 +80,7 @@ sym_block.optimize_for(x, backend='myPass')
 
 APIs in MXNet are available in both Symbol and Gluon APIs. For the Symbol API, the `optimize_for` API can be called on Symbol objects to return a new Symbol post graph pass.
 
-```
+```python
 optimize_for(backend, args=None, aux=None, ctx=None, **kwargs)
 ```
 
@@ -88,7 +88,7 @@ The `optimize_for` API takes at least 1 argument, `backend` which is a string th
 
 For the Gluon API, the `hybridize` API can be called on HybridBlocks to execute a graph pass on the internal CachedOp Symbol.
 
-```
+```python
 hybridize(backend=None, backend_opts=None, **kwargs)
 ```
 
@@ -96,20 +96,20 @@ The `hybridize` function prepares the HybridBlock to be converted into a backend
 
 If you just want to run a graph pass on the HybridBlock but not run a complete forward pass, you can use the `optimize_for` API that combines the work done in the `hybridize` API with part of the work done in the forward pass.
 
-```
+```python
 optimize_for(x, backend=None, backend_opts=None, **kwargs)
 ```
 
 When the `optimize_for` API is called on a HybridBlock it runs the graph pass immediately. This lets users export the modified model without running a complete forward pass.
 
-```
+```python
 block.optimize_for(x, backend='myPass')
 block.export('optimized')
 ```
 
 But you can also use `optimize_for` in place of `hybridize` and run inference immediately after too.
 
-```
+```python
 block.optimize_for(x, backend='myPass')
 block(x)
 ```
@@ -120,12 +120,12 @@ There are several essential building blocks for making a custom pass:
 
 * [initialize](./pass_lib.cc#44):
     * This function is the library initialization function necessary for any dynamic libraries. It lets you check if the user is using a compatible version of MXNet. Note that this `version` parameter is passed from MXNet when library is loaded.
-
+```c++
             MXReturnValue initialize(int version)
-
+```
 * [graphPass](./pass_lib.cc#31):
     * This function provides a copy of the model graph as a JSON string, and provides an interface for returning a modified model JSON string. Also this is where a custom pass can validate the options specified by the user.
-
+```c++
             MXReturnValue graphPass(
                 const std::string& in_graph,
                 const std::string** out_graph,
@@ -133,22 +133,54 @@ There are several essential building blocks for making a custom pass:
                 const std::unordered_map<std::string, MXTensor>& args,
                 const std::unordered_map<std::string, MXTensor>& aux,
                 const PassResource& res)
-
+```
 * [REGISTER_PASS(my_pass_name)](./pass_lib.cc#L41):
     * This macro registers the custom pass and its properties to MXNet by its name. The argument to `setBody` is the `graphPass` function.
-
+```c++
             REGISTER_PASS(my_pass_name)
             .setBody(graphPass);
-
+```
 Let’s take a closer look at those registry functions:
 
 * **graphPass**: This function takes six arguments. The 1st argument is a JSON string of the model architecture graph, where nodes are inputs/params/weights and edges are data dependencies. The graph is pre-sorted in topological order. The 2nd argument is a pointer to a pointer of a JSON model string. It is expected users will dereference and assign the address of their output string allocated with `new` and `delete` will be called on it automatically. The third argument is the map of options specified by the user. Users can pass custom options to the pass and they are passed to this function in the `options` map. The fourth and fifth arguments are the named tensor mapping for the args and aux params for the model. They will contain the model params if the user provides them to the `optimize_for` API. The last argument is the `PassResource` object for memory allocation and other utilities. The details of `PassResource` are covered in the section below
 
-### Pass Resource
+### Graph representation
+
+The `Graph` class represents the model's architecture. Each `Node` in the graph represents an operator or weight (ie. args/aux param). Since an operator in MXNet can take multiple inputs and produce multiple outputs, each input/output is represented by a `NodeEntry`. A `Node` contains the following:
+- `op` - [string] operator name
+- `name` - [string] unique node name
+- `inputs` - [vector of NodeEntry] set of inputs to the node
+- `outputs` - [vector of NodeEntry] set of outputs from the node
+- `subgraph` - [vector of Graph] set of subgraphs in the node
+- `attrs` - [map of string to string] set of attributes for the node
+
+The `inputs` are a set of `NodeEntry` where each contains a pointer to a node that produces the data, and an `entry` that is the index of the output on the other node. Conversely, the `output` are a set of `NodeEntry` where each contains a pointer to a node that consumes the data, and and `entry` that is the index of the input on the other node. This bidirectional dependency will enable you to easily traverse the graph. 
+
+A `Graph` contains the following:
+- `nodes` - [vector of Node] set of nodes in the graph
+- `inputs` - [vector of Node] set of inputs to the graph
+- `outputs` - [vector of NodeEntry] set of outputs from the graph
+- `attrs` - [map of string to JSON object] set of attributes for the graph
 
-Some graph passes require allocating new NDArrays to add/replace model params. The `alloc_arg` and `alloc_aux` APIs enabling allocating new NDArrays and integrating them with the user-provide args and aux params. Both APIs have the following signature:
+The `nodes` are all the nodes in the graph (superset). The `inputs` are only those nodes that are model inputs (ie. input image) or weights (ie. arg/aux params). The `outputs` are the outputs from the operators in the model that are true outputs of the model (ie. prediction results). 
 
+Heres an example creating a new node and adding it to the graph:
+```c++
+Node* n = new Node();
+g->nodes.push_back(n);
+```
+Heres an example creating an edge between two nodes:
+```c++
+n1->outputs.push_back({n2,0});
+n2->inputs.push_back({n1,0});
 ```
+Here node `n1` produces an output at index 0 that is consumed by node `n2` on the 0th input.
+
+### Pass Resource
+
+Some graph passes require allocating new NDArrays to add/replace model params. The `alloc_arg` and `alloc_aux` APIs enabling allocating new NDArrays and integrate them with the model args and aux params. Both APIs have the following signature:
+
+```c++
     MXTensor* alloc_xxx(const std::string& name,
                         const std::vector<int64_t>& shapes,
                         const MXContext &ctx,
@@ -162,8 +194,7 @@ If the `name` provided matches the name of an existing param it replaces the pre
 To simplify custom libraries, basic JSON parsing utility functions have been implemented in the `lib_api.h` header file. You create a `JsonParser` object and parse the string by calling the `parse_to_json` API like:
 
 ```c++
-JsonParser parser;
-JsonVal json_val = parser.parse_to_json(json_string);
+JsonVal json_val = JsonVal::parse(json);
 ```
 
 A `JsonVal` is a class that represents the nodes in a JSON structure. You can check the type of a node (num, str, list, or map) by comparing the `JsonVal.type` to `STR`, `NUM`, `LIST`, or `MAP`. Then you can get that value from the node like:
@@ -187,4 +218,4 @@ switch(json_val.type) {
 }
 ```
 
-There are also convenience constructors for creating `JsonVal` objects for strings and numbers like `JsonVal("myKey")` or `JsonVal(42)`. This makes it easy to get specific keys from a map like `json_val.map[JsonVal("nodes")]`.
+You call the `dump` function on a `JsonVal` object like `json_val.dump()` to get a JSON-compatible string. There are also convenience constructors for creating `JsonVal` objects for strings and numbers like `JsonVal("myKey")` or `JsonVal(42)`. This makes it easy to get specific keys from a map like `json_val.map[JsonVal("nodes")]`.
\ No newline at end of file
diff --git a/example/extensions/lib_subgraph/README.md b/example/extensions/lib_subgraph/README.md
index 6644a1fdc8ff..c9747a60116f 100644
--- a/example/extensions/lib_subgraph/README.md
+++ b/example/extensions/lib_subgraph/README.md
@@ -38,11 +38,16 @@ You can start getting familiar with custom partitioners by running an example pr
 2. Run `python test_subgraph.py`. It’ll first load the above library, find the components, register them in the MXNet backend, then partition the model and execute the operators like a regular MXNet operator and output the result. Below is the output when running the `python test_subgraph.py` command. Notice that it loads 2 operators: my_gemm and state_gemm.
 
 ```
-[10:38:03] src/c_api/c_api.cc:286: Found 1 operators in library
-[10:38:03] src/c_api/c_api.cc:350:       Op[0] _custom_subgraph_op
-[10:38:03] src/c_api/c_api.cc:785: Found 1 partitioners in library
-[10:38:03] src/c_api/c_api.cc:801:       Partitioner[0] myProp
-[10:38:03] src/c_api/c_api.cc:821:             Strategy[0] strategy1 subgraphOp: '_custom_subgraph_op'
+[02:01:18] src/c_api/c_api.cc:515: Found 1 operators in library
+[02:01:18] src/c_api/c_api.cc:580: 	Op[0] _custom_subgraph_op
+[02:01:18] src/c_api/c_api.cc:581: 		isSubgraphOp
+[02:01:18] src/c_api/c_api.cc:1121: Found 2 partitioners in library
+[02:01:18] src/c_api/c_api.cc:1137: 	Partitioner[0] myProp
+[02:01:18] src/c_api/c_api.cc:1159: 		Strategy[0] strategy1 subgraphOp: '_custom_subgraph_op'
+[02:01:18] src/c_api/c_api.cc:1137: 	Partitioner[1] mySelect
+[02:01:18] src/c_api/c_api.cc:1159: 		Strategy[0] strategy1 subgraphOp: '_custom_subgraph_op'
+[02:01:18] src/c_api/c_api.cc:1182: Found 1 graph passes in library
+[02:01:18] src/c_api/c_api.cc:1197: 	Graph Pass [0] addInputPass
 ```
 
 ### Basic Files For Custom Partitioner Library
@@ -93,7 +98,7 @@ In the Gluon hybridize flow, the model is actually hybridized during the first i
 
 Partitioning APIs in MXNet are available in both Symbol and Gluon APIs. For the Symbol API, the `optimize_for` API can be called on Symbol objects to return a partitioned Symbol.
 
-```
+```python
 optimize_for(backend, args=None, aux=None, ctx=None, **kwargs)
 ```
 
@@ -101,28 +106,28 @@ The `optimize_for` API takes at least 1 argument, `backend` which is a string th
 
 For the Gluon API, the `hybridize` API can be called on HybridBlocks to partition the internal CachedOp Symbol.
 
-```
-hybridize(backend=None, backend_opts=None, **kwargs)
+```python
+hybridize(backend=None, backend_opts=None, clear=True, **kwargs)
 ```
 
-The `hybridize` function prepares the HybridBlock to be converted into a backend symbol. The `backend` argument is a string that identifies which backend that will partition the model. The `backend_opts` takes other user-specified options that will be passed to the backend partitioning APIs. The actual partitioning takes place during the forward pass.
+The `hybridize` function prepares the HybridBlock to be converted into a backend symbol. The `backend` argument is a string that identifies which backend that will partition the model. The `backend_opts` are other user-specified options (as a Python dictionary of strings mapped to strings) that will be passed to the backend partitioning APIs. The `clear` argument defaults to `True` and clears any previous optimizations done on the block. If you want to chain optimizations together, set `clear` to `False`. The actual partitioning takes place during the forward pass. If you want to use `hybridize` to chain multiple optimizations, be sure to execute a forward pass after each call to `hybridize`. 
 
 If you just want to partition the HybridBlock but not run a complete forward pass, you can use the `optimize_for` API that combines the work done in the `hybridize` API with part of the work done in the forward pass.
 
-```
-optimize_for(x, backend=None, backend_opts=None, **kwargs)
+```python
+optimize_for(x, backend=None, backend_opts=None, clear=True, **kwargs)
 ```
 
-When the `optimize_for` API is called on a HybridBlock it partitions immediately. This lets users export the partitioned model without running a complete forward pass.
+When the `optimize_for` API is called on a HybridBlock it partitions immediately. This lets users export the partitioned model without running a complete forward pass. Chaining multiple optimizations is as simple as calling `optimize_for` multiple times, no need to execute a forward pass (as opposed to `hybridize`).
 
-```
+```python
 block.optimize_for(x, backend='myPart')
 block.export('partitioned')
 ```
 
 But you can also use `optimize_for` in place of `hybridize` and run inference immediately after too.
 
-```
+```python
 block.optimize_for(x, backend='myPart')
 block(x)
 ```
@@ -133,52 +138,115 @@ There are several essential building blocks for making a custom partitioner:
 
 * [initialize](./subgraph_lib.cc#L261):
     * This function is the library initialization function necessary for any dynamic libraries. It lets you check if the user is using a compatible version of MXNet. Note that this `version` parameter is passed from MXNet when library is loaded.
-
+```c++
             MXReturnValue initialize(int version)
-
+```
 * [supportedOps](./subgraph_lib.cc#L179):
     * This function provides a copy of the model graph as a JSON string, and provides an interface for identifying which operators should be partitioned into a subgraph. Also this is where a custom partitioner can validate the options specified by the user.
-
+```c++
             MXReturnValue supportedOps(
-                std::string json,
-                std::vector<bool>& ids,
-                std::unordered_map<std::string, std::string>& options)
-
+                const std::string& json,
+                std::vector<int>* ids,
+                const std::unordered_map<std::string, std::string>& options)
+```
 * [REGISTER_PARTITIONER(my_part_name)](./subgraph_lib.cc#L257):
-    * This macro registers the custom partitioner and its properties to MXNet by its name. Notice that a partitioner can have multiple partitioning strategies. This enables multiple *passes* to be run in a single partitioning call from the user. The first argument to `addStrategy` is a user-specified name. The second argument is the `supportedOps` function. The third argument is the name of the subgraph operator to create for each subgraph created during partitioning (see below for more info about subgraph operators). The `setReviewSubgraph` API registers a callback function that is called for each subgraph created during partitioning (more on this below). Notice that the first argument to this function is the strategy to associate with and the second argument is the `reviewSubgraph` function.
-
+    * This macro registers the custom partitioner and its properties to MXNet by its name. Notice that a partitioner can have multiple partitioning strategies. This enables multiple *passes* to be run in a single partitioning call from the user. The first argument to `addStrategy` is a user-specified name. The second argument is the name of the subgraph operator to create for each subgraph created during partitioning (see below for more info about subgraph operators). The `setSupportedOps` API registers the `supportedOps` function. The `setReviewSubgraph` API registers a callback function that is called for each subgraph created during partitioning (more on this below). Notice that the first argument to this function is the strategy to associate with and the second argument is the `reviewSubgraph` function.
+```c++
             REGISTER_PARTITIONER(my_part_name)
-            .addStrategy("strategy1", supportedOps, "_custom_subgraph_op")
+            .addStrategy("strategy1", "_custom_subgraph_op")
+            .setSupportedOps("strategy1", supportedOps)
             .setReviewSubgraph("strategy1", reviewSubgraph);
-
-
+```
 Also there are some optional functions you can specify:
 
 * [reviewSubgraph](./subgraph_lib.cc#L219):
     * This function provides an opportunity to accept/reject a subgraph after MXNet partitions it. It also allows specifying custom attributes on the subgraph (ie. user-generated IDs). If you do not register this function, subgraphs will be accepted by default. 
-
+```c++
             MXReturnValue reviewSubgraph(
-                std::string json,
+                const std::string& json,
                 int subgraph_id,
                 bool* accept,
-                std::unordered_map<std::string, std::string>& options,
-                std::unordered_map<std::string, std::string>& attrs,
-                std::map<std::string, MXTensor>& args,
-                std::map<std::string, MXTensor>& aux)
-
+                const std::unordered_map<std::string, std::string>& options,
+                std::unordered_map<std::string, std::string>* attrs,
+                const std::map<std::string, MXTensor>& args,
+                const std::map<std::string, MXTensor>& aux)
+```
 Let’s take a closer look at those registry functions:
 
 * **supportedOps**: This function takes four arguments. The 1st argument is a JSON string of the model architecture graph, where nodes are inputs/params/weights and edges are data dependencies. The graph is pre-sorted in topological order. The 2nd argument is an array of booleans, one for each operator in the model. When traversing the graph, operators to be partitioned into subgraphs are identified and an entry is set to `true` for the index in the `ids` array corresponding to the node ID. The last argument is the map of options specified by the user. Users can pass custom options to the partitioner and they are passed to this function in the `options` map. 
 
 * **reviewSubgraph**: This function takes five arguments. The 1st argument is a JSON string of the newly partitioned subgraph. The 2nd argument is the subgraph ID, this is just a number MXNet uses to identify this particular subgraph (it starts at zero and increments, unique for each subgraph in the model). The 3rd argument is an output to be set in this function to tell MXNet whether to accept (value: `true`) or reject (value: `false`) the subgraph. You might want to reject a subgraph if it doesnt include all the operators you want, for example. The `options` map is the same one passed to the `supportedOps` API. The 4th argument is the map of options specified by the user. The 5th argument is a map of attributes that should be set on the created subgraph. These attributes will be available later at runtime, and provides a mechanisn to pass info from partition-time to runtime. The last argument is the map of params/weights/args to the model and the associated names. For inputs the the subgraph that come directly from the params/weights of the model, you can look up the name of the input in this map to get the actual tensor values.
 
+### Writing a Custom Selector
+Instead of implementing the `supportedOps` API, you can choose to implement a custom selector class for more control over partitioning instead. 
+
+* [createSelector](./subgraph_lib.cc#L321):
+    * This function provides a copy of the model graph as a JSON string for the first argument. The 2nd argument is a placeholder for CustomOpSelector object. You must define a class that inherits CustomOpSelector and override the required  functions. Then you need to create an instance of your class and assign it to the placeholder. The last argument is a map of user-specified options.
+```c++
+            MXReturnValue createSelector(
+                const std::string& json,
+                CustomOpSelector** sel_inst,
+                const std::unordered_map<std::string, std::string>& options)
+```
+Instead of registering a `supportedOps` API, register the `setCreateSelector` API. 
+```c++
+            REGISTER_PARTITIONER(my_part_name)
+            .addStrategy("strategy1", "_custom_subgraph_op")
+            .setCreateSelector("strategy1", createSelector)
+            .setReviewSubgraph("strategy1", reviewSubgraph);
+```
+When implementing your own selector class, you must inherit from the `CustomOpSelector` class and define the following APIs:
+* [Select](./subgraph_lib.cc#L301):
+    * This function selects a node to include in a subgraph by the index of the node (`nodeID`) in the graph. Return `true` to include this node or `false` to reject this node. 
+```c++
+            bool Select(
+                int nodeID)
+```
+* [SelectInput](./subgraph_lib.cc#L304):
+    * This function grows the subgraph from a node (`nodeID`) to a node that produces one of its inputs (`input_nodeID`). Return `true` to include this node (`input_nodeID`) or `false` to reject this node. 
+```c++
+            bool SelectInput(
+                int nodeID,
+                int input_nodeID)
+```
+* [SelectOutput](./subgraph_lib.cc#L304):
+    * This function grows the subgraph from a node (`nodeID`) to a node that consumes one of its outputs (`output_nodeID`). Return `true` to include this node (`output_nodeID`) or `false` to reject this node. 
+```c++
+            bool SelectOutput(
+                int nodeID,
+                int output_nodeID)
+```
+All of these APIs refer to the model's graph that is provided to the `createSelector` API. When you implement your custom `createSelector` function, you can pass the graph and options to the constructor of your class like this:
+```c++
+MXReturnValue myCreateSelector(const std::string& json, CustomOpSelector** sel_inst,
+                               const std::unordered_map<std::string, std::string>& options) {
+  *sel_inst = new MySelector(json, options);
+  return MX_SUCCESS;
+}
+```
+In addition to the 3 required APIs shown above, you can also implement the following optional APIs for your `CustomOpSelector` class:
+* [Filter](./subgraph_lib.cc#L310):
+    * This function enables reviewing the candidate nodes to include in subgraph. The `candidates` are the indices of nodes in the graph to be included in the subgraph. The 2nd argument `keep` is an empty vector to be filled with the indices of nodes you wish to keep in the subgraph. Any remaining candidate nodes not added to `keep` will be excluded from the subgraph. The following function body shows the default behavior when not overloaded, to keep all candidates:
+```c++
+            void Filter(
+                std::vector<int>& candidates,
+                std::vector<int>& keep) {
+              keep.insert(keep.end(), candidates.begin(), candidates.end());
+            }
+```
+* [Reset](./subgraph_lib.cc#L314):
+    * This function provides an opportunity to reset any selector state between subgraphs. It is called after growing subgraph, and before `Filter`. There is no default behavior.
+```c++
+            virtual void Reset() {}
+```
+
 ### Writing A Custom Subgraph Operator
 
 A partitioning strategy specifies how to partition a model and isolate operators into subgraphs. In MXNet, subgraphs are just a [stateful operator](../lib_custom_op#writing-stateful-custom-operator). Subgraph operators have an extra attribute called `MX_STR_SUBGRAPH_SYM_JSON` that maps to a JSON string of the subgraph. The expectation is that when a subgraph operator executes a forward/backward call, it executes all of the operators in the subgraph. 
 
 When registering a custom subgraph operator, all thats needed is to register a `createOpState` function and to set that the operator is a subgraph operator by calling the `setIsSubgraphOp` API like:
 
-```
+```c++
 REGISTER_OP(my_subgraph_op)
 .setIsSubgraphOp()
 .setCreateOpState(createOpState, "cpu");
@@ -189,8 +257,7 @@ REGISTER_OP(my_subgraph_op)
 To simplify custom partitioner libraries, basic JSON parsing utility functions have been implemented in the `lib_api.h` header file. You create a `JsonParser` object and parse the string by calling the `parse_to_json` API like:
 
 ```c++
-JsonParser parser;
-JsonVal json_val = parser.parse_to_json(json_string);
+JsonVal json_val = JsonVal::parse(json);
 ```
 
 A `JsonVal` is a class that represents the nodes in a JSON structure. You can check the type of a node (num, str, list, or map) by comparing the `JsonVal.type` to `STR`, `NUM`, `LIST`, or `MAP`. Then you can get that value from the node like:
@@ -214,4 +281,4 @@ switch(json_val.type) {
 }
 ```
 
-There are also convenience constructors for creating `JsonVal` objects for strings and numbers like `JsonVal("myKey")` or `JsonVal(42)`. This makes it easy to get specific keys from a map like `json_val.map[JsonVal("nodes")]`.
\ No newline at end of file
+You call the `dump` function on a `JsonVal` object like `json_val.dump()` to get a JSON-compatible string. There are also convenience constructors for creating `JsonVal` objects for strings and numbers like `JsonVal("myKey")` or `JsonVal(42)`. This makes it easy to get specific keys from a map like `json_val.map[JsonVal("nodes")]`.
\ No newline at end of file

From 6efae766610962075bbe30a408ff72aaaa5b07d5 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Tue, 28 Jul 2020 16:43:30 +0000
Subject: [PATCH 09/25] added namespace

---
 example/extensions/lib_api/init_lib.cc        |   2 +
 example/extensions/lib_custom_op/gemm_lib.cc  |   2 +
 example/extensions/lib_custom_op/relu_lib.cu  |   2 +
 .../lib_custom_op/transposecsr_lib.cc         |   2 +
 .../lib_custom_op/transposerowsp_lib.cc       |   2 +
 example/extensions/lib_pass/README.md         |  12 +-
 example/extensions/lib_pass/pass_lib.cc       |   2 +
 .../extensions/lib_subgraph/subgraph_lib.cc   |   2 +
 include/mxnet/lib_api.h                       | 234 +++++++-------
 src/c_api/c_api.cc                            | 286 +++++++++---------
 .../partitioner/custom_subgraph_property.h    |  80 ++---
 11 files changed, 335 insertions(+), 291 deletions(-)

diff --git a/example/extensions/lib_api/init_lib.cc b/example/extensions/lib_api/init_lib.cc
index fb3a10457cf5..1531c56e5800 100644
--- a/example/extensions/lib_api/init_lib.cc
+++ b/example/extensions/lib_api/init_lib.cc
@@ -26,6 +26,8 @@
 #include <iostream>
 #include "lib_api.h"
 
+using namespace mxnet::ext;
+
 MXReturnValue initialize(int version) {
   if (version >= 10700) {
     std::cout << "MXNet version " << version << " supported" << std::endl;
diff --git a/example/extensions/lib_custom_op/gemm_lib.cc b/example/extensions/lib_custom_op/gemm_lib.cc
index 4f8dabadc6a1..3298794e83a3 100644
--- a/example/extensions/lib_custom_op/gemm_lib.cc
+++ b/example/extensions/lib_custom_op/gemm_lib.cc
@@ -26,6 +26,8 @@
 #include <iostream>
 #include "lib_api.h"
 
+using namespace mxnet::ext;
+
 // main matrix multiplication routine
 void gemm(const float* A, const float* B, float* C,
           const unsigned n, const unsigned k, const unsigned m) {
diff --git a/example/extensions/lib_custom_op/relu_lib.cu b/example/extensions/lib_custom_op/relu_lib.cu
index a4711cbeab67..a58c74e22877 100644
--- a/example/extensions/lib_custom_op/relu_lib.cu
+++ b/example/extensions/lib_custom_op/relu_lib.cu
@@ -26,6 +26,8 @@
 #include <iostream>
 #include "lib_api.h"
 
+using namespace	mxnet::ext;
+
 #define NumThreadPerBlock 256 // mxnet recommended cuda thread number per block
 
 __global__ void relu_gpu_forward(float *out, float *in, int64_t N) {
diff --git a/example/extensions/lib_custom_op/transposecsr_lib.cc b/example/extensions/lib_custom_op/transposecsr_lib.cc
index 224cd6aa81b6..ced26c5ffc95 100644
--- a/example/extensions/lib_custom_op/transposecsr_lib.cc
+++ b/example/extensions/lib_custom_op/transposecsr_lib.cc
@@ -26,6 +26,8 @@
 #include <iostream>
 #include "lib_api.h"
 
+using namespace	mxnet::ext;
+
 void transpose(MXTensor& src, MXTensor& dst, const OpResource& res) {
   MXSparse* A = src.data<MXSparse>();
   MXSparse* B = dst.data<MXSparse>(); 
diff --git a/example/extensions/lib_custom_op/transposerowsp_lib.cc b/example/extensions/lib_custom_op/transposerowsp_lib.cc
index 46d3c4d41a4c..3e1027c01869 100644
--- a/example/extensions/lib_custom_op/transposerowsp_lib.cc
+++ b/example/extensions/lib_custom_op/transposerowsp_lib.cc
@@ -26,6 +26,8 @@
 #include <iostream>
 #include "lib_api.h"
 
+using namespace	mxnet::ext;
+
 void transpose(MXTensor& src, MXTensor& dst, const OpResource& res) {
   MXSparse* A = src.data<MXSparse>();
   MXSparse* B = dst.data<MXSparse>(); 
diff --git a/example/extensions/lib_pass/README.md b/example/extensions/lib_pass/README.md
index a6705afb1a08..1d0d972f90d0 100644
--- a/example/extensions/lib_pass/README.md
+++ b/example/extensions/lib_pass/README.md
@@ -154,7 +154,7 @@ The `Graph` class represents the model's architecture. Each `Node` in the graph
 - `subgraph` - [vector of Graph] set of subgraphs in the node
 - `attrs` - [map of string to string] set of attributes for the node
 
-The `inputs` are a set of `NodeEntry` where each contains a pointer to a node that produces the data, and an `entry` that is the index of the output on the other node. Conversely, the `output` are a set of `NodeEntry` where each contains a pointer to a node that consumes the data, and and `entry` that is the index of the input on the other node. This bidirectional dependency will enable you to easily traverse the graph. 
+The `inputs` are a set of `NodeEntry` where each contains a pointer to a `Node` that produces the data, and an `entry` that is the index of the output on the other `Node`. Conversely, the `outputs` are a set of `NodeEntry` where each contains a pointer to a`Node` that consumes the data, and and `entry` that is the index of the input on the other `Node`. This bidirectional dependency will enable you to easily traverse the graph. 
 
 A `Graph` contains the following:
 - `nodes` - [vector of Node] set of nodes in the graph
@@ -171,14 +171,14 @@ g->nodes.push_back(n);
 ```
 Heres an example creating an edge between two nodes:
 ```c++
-n1->outputs.push_back({n2,0});
+n1->outputs.push_back({n2,1});
 n2->inputs.push_back({n1,0});
 ```
-Here node `n1` produces an output at index 0 that is consumed by node `n2` on the 0th input.
+Here node `n1` produces an output at index 0 that is consumed by node `n2` on the input at index 1.
 
 ### Pass Resource
 
-Some graph passes require allocating new NDArrays to add/replace model params. The `alloc_arg` and `alloc_aux` APIs enabling allocating new NDArrays and integrate them with the model args and aux params. Both APIs have the following signature:
+Some graph passes require allocating new NDArrays to add/replace model params. The `alloc_arg` and `alloc_aux` APIs enable allocating new NDArrays and integrate them with the model args and aux params. Both APIs have the following signature:
 
 ```c++
     MXTensor* alloc_xxx(const std::string& name,
@@ -187,7 +187,9 @@ Some graph passes require allocating new NDArrays to add/replace model params. T
                         MXDType dtype)
 ```
 
-If the `name` provided matches the name of an existing param it replaces the previous one. Otherwise it adds a new param to the appropriate arg/aux set.
+If the `name` provided matches the name of an existing param it replaces the previous one. Otherwise it adds a new param to the appropriate arg/aux set. Be sure that you add a new node in the graph that corresponds to this new param, otherwise it will be useless.
+
+If you wish to remove an existing param, just remove the node in the graph corresponding to that param. It will be deleted after the pass completes and removed from the dictionary of args or aux (whichever it is a member of).
 
 ### Parsing a JSON string
 
diff --git a/example/extensions/lib_pass/pass_lib.cc b/example/extensions/lib_pass/pass_lib.cc
index 2e8a4a584821..3ba2949f3b4c 100644
--- a/example/extensions/lib_pass/pass_lib.cc
+++ b/example/extensions/lib_pass/pass_lib.cc
@@ -28,6 +28,8 @@
 #include <algorithm>
 #include "lib_api.h"
 
+using namespace	mxnet::ext;
+
 /* \brief a basic pass that copies the input to the output */
 MXReturnValue myPass(const std::string& in_graph, const std::string** out_graph,
                      const std::unordered_map<std::string, std::string>& options,
diff --git a/example/extensions/lib_subgraph/subgraph_lib.cc b/example/extensions/lib_subgraph/subgraph_lib.cc
index 095d8f198e1b..17a7263010bf 100644
--- a/example/extensions/lib_subgraph/subgraph_lib.cc
+++ b/example/extensions/lib_subgraph/subgraph_lib.cc
@@ -28,6 +28,8 @@
 #include <algorithm>
 #include "lib_api.h"
 
+using namespace	mxnet::ext;
+
 /* function to execute log operator on floats */
 void myLog(MXTensor &in, MXTensor &out) {
   float* inp = in.data<float>();
diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h
index 8d35805ede50..1ab9042bcaf7 100644
--- a/include/mxnet/lib_api.h
+++ b/include/mxnet/lib_api.h
@@ -212,6 +212,9 @@ extern "C" {
 #endif
 #endif
 
+namespace mxnet {
+namespace ext {
+
 /*!
  * \brief Tensor data type, consistent with mshadow data type
  */
@@ -1453,12 +1456,13 @@ typedef int (*opRegSize_t)(void);
 
 #define MXLIB_OPREGGET_STR "_opRegGet"
 typedef int (*opRegGet_t)(int idx, const char** name, int *isSGop,
-                          const char*** forward_ctx, fcomp_t** forward_fp, int* forward_count,
-                          const char*** backward_ctx, fcomp_t** backward_fp, int* backward_count,
-                          const char*** create_op_ctx, createOpState_t** create_op_fp,
-                          int* create_op_count,
-                          parseAttrs_t* parse, inferType_t* type, inferSType_t* stype,
-                          inferShape_t* shape, mutateInputs_t* mutate);
+                          const char*** forward_ctx, mxnet::ext::fcomp_t** forward_fp,
+                          int* forward_count, const char*** backward_ctx,
+                          mxnet::ext::fcomp_t** backward_fp, int* backward_count,
+                          const char*** create_op_ctx, mxnet::ext::createOpState_t** create_op_fp,
+                          int* create_op_count, mxnet::ext::parseAttrs_t* parse,
+                          mxnet::ext::inferType_t* type, mxnet::ext::inferSType_t* stype,
+                          mxnet::ext::inferShape_t* shape, mxnet::ext::mutateInputs_t* mutate);
 
 #define MXLIB_OPCALLFREE_STR "_opCallFree"
 typedef int (*opCallFree_t)(void* ptr);
@@ -1627,6 +1631,9 @@ typedef int (*opVersion_t)();
 #define MX_VOID_RET void
 #endif
 
+} // namespace ext
+} // namespace mxnet
+
 extern "C" {
   /*! \brief returns MXNet library version */
   MX_INT_RET _opVersion() {
@@ -1635,18 +1642,19 @@ extern "C" {
 
   /*! \brief returns number of ops registered in this library */
   MX_INT_RET _opRegSize() {
-    return Registry<CustomOp>::get()->size();
+    return mxnet::ext::Registry<mxnet::ext::CustomOp>::get()->size();
   }
 
   /*! \brief returns operator registration at specified index */
   MX_VOID_RET _opRegGet(int idx, const char** name, int *isSGop,
-                        const char*** forward_ctx, fcomp_t** forward_fp,
+                        const char*** forward_ctx, mxnet::ext::fcomp_t** forward_fp,
                         int* forward_count, const char*** backward_ctx,
-                        fcomp_t** backward_fp, int* backward_count,
-                        const char*** create_op_ctx, createOpState_t** create_op_fp,
-                        int* create_op_count, parseAttrs_t* parse, inferType_t* type,
-                        inferSType_t* stype, inferShape_t* shape, mutateInputs_t* mutate) {
-    CustomOp &op = Registry<CustomOp>::get()->get(idx);
+                        mxnet::ext::fcomp_t** backward_fp, int* backward_count,
+                        const char*** create_op_ctx, mxnet::ext::createOpState_t** create_op_fp,
+                        int* create_op_count, mxnet::ext::parseAttrs_t* parse,
+                        mxnet::ext::inferType_t* type, mxnet::ext::inferSType_t* stype,
+                        mxnet::ext::inferShape_t* shape, mxnet::ext::mutateInputs_t* mutate) {
+    mxnet::ext::CustomOp &op = mxnet::ext::Registry<mxnet::ext::CustomOp>::get()->get(idx);
     *name = op.name;
     *parse = op.parse_attrs;
     *type = op.infer_type;
@@ -1672,7 +1680,7 @@ extern "C" {
   }
 
   /*! \brief returns status of calling parse attributes function for operator from library */
-  MX_INT_RET _opCallParseAttrs(parseAttrs_t parseAttrs, const char* const* keys,
+  MX_INT_RET _opCallParseAttrs(mxnet::ext::parseAttrs_t parseAttrs, const char* const* keys,
                                const char* const* vals, int num,
                                int* num_in, int* num_out) {
     // create map of attributes from list
@@ -1685,7 +1693,7 @@ extern "C" {
   }
 
   /*! \brief returns status of calling inferShape function for operator from library */
-  MX_INT_RET _opCallInferShape(inferShape_t inferShape, const char* const* keys,
+  MX_INT_RET _opCallInferShape(mxnet::ext::inferShape_t inferShape, const char* const* keys,
                                const char* const* vals, int num,
                                unsigned int** inshapes, int* indims, int num_in,
                                unsigned int*** mod_inshapes, int** mod_indims,
@@ -1740,7 +1748,7 @@ extern "C" {
   }
 
   /*! \brief returns status of calling inferType function for operator from library */
-  MX_INT_RET _opCallInferType(inferType_t inferType, const char* const* keys,
+  MX_INT_RET _opCallInferType(mxnet::ext::inferType_t inferType, const char* const* keys,
                               const char* const* vals, int num,
                               int* intypes, int num_in, int* outtypes, int num_out) {
     // create map of attributes from list
@@ -1775,7 +1783,7 @@ extern "C" {
   }
 
   /*! \brief returns status of calling inferSType function for operator from library */
-  MX_INT_RET _opCallInferSType(inferSType_t inferSType, const char* const* keys,
+  MX_INT_RET _opCallInferSType(mxnet::ext::inferSType_t inferSType, const char* const* keys,
                                const char* const* vals, int num,
                                int* instypes, int num_in, int* outstypes, int num_out) {
     // create map of attributes from list
@@ -1811,14 +1819,14 @@ extern "C" {
   }
 
   /*! \brief returns status of calling Forward/Backward function for operator from library */
-  MX_INT_RET _opCallFCompute(fcomp_t fcomp, const char* const* keys, const char* const* vals,
+  MX_INT_RET _opCallFCompute(mxnet::ext::fcomp_t fcomp, const char* const* keys, const char* const* vals,
                              int num, const int64_t** inshapes, int* indims, void** indata,
                              int* intypes, size_t* inIDs, const char** indev_type, int* indev_id,
                              int num_in, const int64_t** outshapes, int* outdims, void** outdata,
                              int* outtypes, size_t* outIDs, const char** outdev_type,
-                             int* outdev_id, int num_out, xpu_malloc_t cpu_malloc, void* cpu_alloc,
-                             xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream,
-                             sparse_malloc_t sparse_malloc, void* sparse_alloc,
+                             int* outdev_id, int num_out, mxnet::ext::xpu_malloc_t cpu_malloc, void* cpu_alloc,
+                             mxnet::ext::xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream,
+                             mxnet::ext::sparse_malloc_t sparse_malloc, void* sparse_alloc,
                              int* instypes, int* outstypes, void** in_indices, void** out_indices,
                              void** in_indptr, void** out_indptr,
                              int64_t* in_indices_shapes, int64_t* out_indices_shapes,
@@ -1831,66 +1839,68 @@ extern "C" {
     }
 
     // create a vector of tensors for inputs
-    std::vector<MXTensor> inputs(num_in);
+    std::vector<mxnet::ext::MXTensor> inputs(num_in);
     // create a vector for sparse inputs
-    std::vector<MXSparse> in_sparse(num_in);
+    std::vector<mxnet::ext::MXSparse> in_sparse(num_in);
 
     for (int i = 0; i < num_in; i++) {
       // Dense representation.
       if (instypes[i] == 0) {
-        inputs[i].setTensor(indata[i], (MXDType)intypes[i], inshapes[i], indims[i],
-                            inIDs[i], MXContext(indev_type[i], indev_id[i]), kDefaultStorage);
+        inputs[i].setTensor(indata[i], (mxnet::ext::MXDType)intypes[i], inshapes[i], indims[i],
+                            inIDs[i], mxnet::ext::MXContext(indev_type[i], indev_id[i]),
+                            mxnet::ext::kDefaultStorage);
       } else {
         // Sparse representation.
-        MXStorageType type;
+        mxnet::ext::MXStorageType type;
         if (instypes[i] == 1) {
-          type = kRowSparseStorage;
+          type = mxnet::ext::kRowSparseStorage;
           in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], in_indices_shapes[i]);
         } else {
-          type = kCSRStorage;
+          type = mxnet::ext::kCSRStorage;
           in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i],
                            in_indices_shapes[i], in_indptr[i], in_indptr_shapes[i]);
         }
-        inputs[i].setTensor(reinterpret_cast<void*>(&in_sparse[i]), (MXDType)intypes[i],
+        inputs[i].setTensor(reinterpret_cast<void*>(&in_sparse[i]), (mxnet::ext::MXDType)intypes[i],
                             inshapes[i], indims[i], inIDs[i],
-                            MXContext(indev_type[i], indev_id[i]), type);
+                            mxnet::ext::MXContext(indev_type[i], indev_id[i]), type);
       }
     }
 
     // create a vector of tensors for outputs
-    std::vector<MXTensor> outputs(num_out);
-    std::vector<MXSparse> out_sparse(num_out);
+    std::vector<mxnet::ext::MXTensor> outputs(num_out);
+    std::vector<mxnet::ext::MXSparse> out_sparse(num_out);
 
     for (int i = 0; i < num_out; i++) {
       // Dense representation.
       if (outstypes[i] == 0) {
-        outputs[i].setTensor(outdata[i], (MXDType)outtypes[i], outshapes[i], outdims[i],
-                             outIDs[i], MXContext(outdev_type[i], outdev_id[i]), kDefaultStorage);
+        outputs[i].setTensor(outdata[i], (mxnet::ext::MXDType)outtypes[i], outshapes[i], outdims[i],
+                             outIDs[i], mxnet::ext::MXContext(outdev_type[i], outdev_id[i]),
+                             mxnet::ext::kDefaultStorage);
       } else {
         // Sparse representation.
-        MXStorageType type;
+        mxnet::ext::MXStorageType type;
         if (outstypes[i] == 1) {
-          type = kRowSparseStorage;
+          type = mxnet::ext::kRowSparseStorage;
           out_sparse[i].set(outdata[i], outshapes[i], outdims[i],
                             out_indices[i], out_indices_shapes[i]);
         } else {
-          type = kCSRStorage;
+          type = mxnet::ext::kCSRStorage;
           out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i],
                             out_indices_shapes[i], out_indptr[i], out_indptr_shapes[i]);
         }
-        outputs[i].setTensor(reinterpret_cast<void*>(&out_sparse[i]), (MXDType)outtypes[i],
+        outputs[i].setTensor(reinterpret_cast<void*>(&out_sparse[i]), (mxnet::ext::MXDType)outtypes[i],
                              outshapes[i], outdims[i], outIDs[i],
-                             MXContext(outdev_type[i], outdev_id[i]), type);
+                             mxnet::ext::MXContext(outdev_type[i], outdev_id[i]), type);
       }
     }
 
-    OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc,
-                   cuda_stream, sparse_malloc, sparse_alloc, rng_cpu_states, rng_gpu_states);
+    mxnet::ext::OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc,
+                               cuda_stream, sparse_malloc, sparse_alloc, rng_cpu_states, rng_gpu_states);
     return fcomp(attrs, &inputs, &outputs, res);
   }
 
   /*! \brief returns status of calling mutateInputs function for operator from library */
-  MX_INT_RET _opCallMutateInputs(mutateInputs_t mutate, const char* const* keys,
+  MX_INT_RET _opCallMutateInputs(mxnet::ext::mutateInputs_t mutate, const char* const* keys,
                                  const char* const* vals, int num,
                                  int** mutate_indices, int* indices_size) {
     // create map of attributes from list
@@ -1917,7 +1927,7 @@ extern "C" {
   }
 
   /*! \brief returns status of calling createStatefulOp function for operator from library */
-  MX_INT_RET _opCallCreateOpState(createOpState_t create_op, const char* const* keys,
+  MX_INT_RET _opCallCreateOpState(mxnet::ext::createOpState_t create_op, const char* const* keys,
                                   const char* const* vals, int num,
                                   void** state_op) {
     // create map of attributes from list
@@ -1928,7 +1938,7 @@ extern "C" {
 
     // void pointer to hold custom state op instance created in custom library
     // eventually state_op pointer is populated by instance from custom library
-    CustomStatefulOp** op_ptr = reinterpret_cast<CustomStatefulOp**>(state_op);
+    mxnet::ext::CustomStatefulOp** op_ptr = reinterpret_cast<mxnet::ext::CustomStatefulOp**>(state_op);
     return create_op(attrs, op_ptr);
   }
 
@@ -1938,9 +1948,9 @@ extern "C" {
                                      const char** indev_type, int* indev_id, int num_in,
                                      const int64_t** outshapes, int* outdims, void** outdata,
                                      int* outtypes, size_t* outIDs, const char** outdev_type,
-                                     int* outdev_id, int num_out, xpu_malloc_t cpu_malloc,
-                                     void* cpu_alloc, xpu_malloc_t gpu_malloc, void* gpu_alloc,
-                                     void* stream, sparse_malloc_t sparse_malloc,
+                                     int* outdev_id, int num_out, mxnet::ext::xpu_malloc_t cpu_malloc,
+                                     void* cpu_alloc, mxnet::ext::xpu_malloc_t gpu_malloc, void* gpu_alloc,
+                                     void* stream, mxnet::ext::sparse_malloc_t sparse_malloc,
                                      void* sparse_alloc, int* instypes, int* outstypes,
                                      void** in_indices, void** out_indices, void** in_indptr,
                                      void** out_indptr, int64_t* in_indices_shapes,
@@ -1948,64 +1958,66 @@ extern "C" {
                                      int64_t* out_indptr_shapes,
                                      void* rng_cpu_states, void* rng_gpu_states) {
     // create a vector of tensors for inputs
-    std::vector<MXTensor> inputs(num_in);
+    std::vector<mxnet::ext::MXTensor> inputs(num_in);
     // create a vector for sparse inputs
-    std::vector<MXSparse> in_sparse(num_in);
+    std::vector<mxnet::ext::MXSparse> in_sparse(num_in);
 
     for (int i = 0; i < num_in; i++) {
       if (instypes[i] == 0) {
         // Dense representation.
-        inputs[i].setTensor(indata[i], (MXDType)intypes[i], inshapes[i], indims[i],
-                            inIDs[i], MXContext(indev_type[i], indev_id[i]), kDefaultStorage);
+        inputs[i].setTensor(indata[i], (mxnet::ext::MXDType)intypes[i], inshapes[i], indims[i],
+                            inIDs[i], mxnet::ext::MXContext(indev_type[i], indev_id[i]),
+                            mxnet::ext::kDefaultStorage);
       } else {
         // Sparse representation.
-        MXStorageType type;
+        mxnet::ext::MXStorageType type;
         if (instypes[i] == 1) {
-          type = kRowSparseStorage;
+          type = mxnet::ext::kRowSparseStorage;
           in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i], in_indices_shapes[i]);
         } else {
-          type = kCSRStorage;
+          type = mxnet::ext::kCSRStorage;
           in_sparse[i].set(indata[i], inshapes[i], indims[i], in_indices[i],
                            in_indices_shapes[i], in_indptr[i], in_indptr_shapes[i]);
         }
-        inputs[i].setTensor(reinterpret_cast<void*>(&in_sparse[i]), (MXDType)intypes[i],
+        inputs[i].setTensor(reinterpret_cast<void*>(&in_sparse[i]), (mxnet::ext::MXDType)intypes[i],
                             inshapes[i], indims[i], inIDs[i],
-                            MXContext(indev_type[i], indev_id[i]), type);
+                            mxnet::ext::MXContext(indev_type[i], indev_id[i]), type);
       }
     }
 
     // create a vector of tensors for outputs
-    std::vector<MXTensor> outputs(num_out);
+    std::vector<mxnet::ext::MXTensor> outputs(num_out);
     // create a vector for sparse outputs
-    std::vector<MXSparse> out_sparse(num_out);
+    std::vector<mxnet::ext::MXSparse> out_sparse(num_out);
 
     for (int i = 0; i < num_out; i++) {
       if (outstypes[i] == 0) {
         // Dense representation.
-        outputs[i].setTensor(outdata[i], (MXDType)outtypes[i], outshapes[i], outdims[i],
-                             outIDs[i], MXContext(outdev_type[i], outdev_id[i]), kDefaultStorage);
+        outputs[i].setTensor(outdata[i], (mxnet::ext::MXDType)outtypes[i], outshapes[i], outdims[i],
+                             outIDs[i], mxnet::ext::MXContext(outdev_type[i], outdev_id[i]),
+                             mxnet::ext::kDefaultStorage);
       } else {
         // Sparse representation.
-        MXStorageType type;
+        mxnet::ext::MXStorageType type;
         if (outstypes[i] == 1) {
-          type = kRowSparseStorage;
+          type = mxnet::ext::kRowSparseStorage;
           out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i],
                             out_indices_shapes[i]);
         } else {
-          type = kCSRStorage;
+          type = mxnet::ext::kCSRStorage;
           out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i],
                             out_indices_shapes[i], out_indptr[i], out_indptr_shapes[i]);
         }
-        outputs[i].setTensor(reinterpret_cast<void*>(&out_sparse[i]), (MXDType)outtypes[i],
+        outputs[i].setTensor(reinterpret_cast<void*>(&out_sparse[i]), (mxnet::ext::MXDType)outtypes[i],
                              outshapes[i], outdims[i], outIDs[i],
-                             MXContext(outdev_type[i], outdev_id[i]), type);
+                             mxnet::ext::MXContext(outdev_type[i], outdev_id[i]), type);
       }
     }
 
-    OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc,
-                   stream, sparse_malloc, sparse_alloc, rng_cpu_states, rng_gpu_states);
+    mxnet::ext::OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc,
+                               stream, sparse_malloc, sparse_alloc, rng_cpu_states, rng_gpu_states);
 
-    CustomStatefulOp* op_ptr = reinterpret_cast<CustomStatefulOp*>(state_op);
+    mxnet::ext::CustomStatefulOp* op_ptr = reinterpret_cast<mxnet::ext::CustomStatefulOp*>(state_op);
     if (is_forward) {
       return op_ptr->Forward(&inputs, &outputs, res);
     }
@@ -2014,22 +2026,25 @@ extern "C" {
 
   /*! \brief returns number of partitioners registered in this library */
   MX_INT_RET _partRegSize() {
-    return Registry<CustomPartitioner>::get()->size();
+    return mxnet::ext::Registry<mxnet::ext::CustomPartitioner>::get()->size();
   }
 
   /* returns number of strategies registered for partitioner
    * at specified index */
   MX_INT_RET _partRegGetCount(int idx, const char** name) {
-    CustomPartitioner part = Registry<CustomPartitioner>::get()->get(idx);
+    mxnet::ext::CustomPartitioner part =
+      mxnet::ext::Registry<mxnet::ext::CustomPartitioner>::get()->get(idx);
     *name = part.name;
     return part.strategies.size();
   }
 
   /*! \brief returns partitioner registration at specified index */
   MX_VOID_RET _partRegGet(int part_idx, int stg_idx, const char** strategy,
-                        supportedOps_t* supportedOps, createSelector_t* createSelector,
-                        reviewSubgraph_t* reviewSubgraph, const char** op_name) {
-    CustomPartitioner part = Registry<CustomPartitioner>::get()->get(part_idx);
+                          mxnet::ext::supportedOps_t* supportedOps,
+                          mxnet::ext::createSelector_t* createSelector,
+                          mxnet::ext::reviewSubgraph_t* reviewSubgraph, const char** op_name) {
+    mxnet::ext::CustomPartitioner part =
+      mxnet::ext::Registry<mxnet::ext::CustomPartitioner>::get()->get(part_idx);
     *strategy = part.strategies[stg_idx];
     *op_name = part.op_names[stg_idx];
     *supportedOps = part.getSupportedOps(stg_idx);
@@ -2038,7 +2053,7 @@ extern "C" {
   }
 
   /*! \brief returns status of calling supported ops function from library */
-  MX_INT_RET _partCallSupportedOps(supportedOps_t supportedOps, const char *json,
+  MX_INT_RET _partCallSupportedOps(mxnet::ext::supportedOps_t supportedOps, const char *json,
                                    int num_ids, int *ids, const char* const* opt_keys,
                                    const char* const* opt_vals, int num_opts) {
     std::string subgraph_json(json);
@@ -2050,7 +2065,7 @@ extern "C" {
     // create array of subgraph IDs for operator support
     std::vector<int> _ids(num_ids, -2);
     // call user's supportedOps function
-    MXReturnValue retval = supportedOps(subgraph_json, &_ids, opts);
+    mxnet::ext::MXReturnValue retval = supportedOps(subgraph_json, &_ids, opts);
     if (!retval) return retval;
 
     // copy bools in ids to ints
@@ -2061,7 +2076,7 @@ extern "C" {
   }
 
   /*! \brief returns status of calling create selector function from library */
-  MX_INT_RET _partCallCreateSelector(createSelector_t createSelector, const char *json,
+  MX_INT_RET _partCallCreateSelector(mxnet::ext::createSelector_t createSelector, const char *json,
                                      void** selector, const char* const* opt_keys,
                                      const char* const* opt_vals, int num_opts) {
     std::string symbol_json(json);
@@ -2072,7 +2087,7 @@ extern "C" {
 
     // void pointer to hold selector instance created in custom library
     // eventually pointer is populated by instance from custom library
-    CustomOpSelector** sel_ptr = reinterpret_cast<CustomOpSelector**>(selector);
+    mxnet::ext::CustomOpSelector** sel_ptr = reinterpret_cast<mxnet::ext::CustomOpSelector**>(selector);
 
     // call user's createSelector function
     return createSelector(symbol_json, sel_ptr, opts);
@@ -2080,28 +2095,28 @@ extern "C" {
 
   /*! \brief returns status of calling select function from library */
   MX_VOID_RET _partCallSelect(void* sel_inst, int nodeID, int* selected) {
-    CustomOpSelector* sel_ptr = reinterpret_cast<CustomOpSelector*>(sel_inst);
+    mxnet::ext::CustomOpSelector* sel_ptr = reinterpret_cast<mxnet::ext::CustomOpSelector*>(sel_inst);
     *selected = sel_ptr->Select(nodeID);
   }
 
   /*! \brief returns status of calling select input function from library */
   MX_VOID_RET _partCallSelectInput(void* sel_inst, int nodeID,
                                   int input_nodeID, int* selected) {
-    CustomOpSelector* sel_ptr = reinterpret_cast<CustomOpSelector*>(sel_inst);
+    mxnet::ext::CustomOpSelector* sel_ptr = reinterpret_cast<mxnet::ext::CustomOpSelector*>(sel_inst);
     *selected = sel_ptr->SelectInput(nodeID, input_nodeID);
   }
 
   /*! \brief returns status of calling select output function from library */
   MX_VOID_RET _partCallSelectOutput(void* sel_inst, int nodeID,
                                     int output_nodeID, int* selected) {
-    CustomOpSelector* sel_ptr = reinterpret_cast<CustomOpSelector*>(sel_inst);
+    mxnet::ext::CustomOpSelector* sel_ptr = reinterpret_cast<mxnet::ext::CustomOpSelector*>(sel_inst);
     *selected = sel_ptr->SelectOutput(nodeID, output_nodeID);
   }
 
   /*! \brief returns status of calling filter function from library */
   MX_VOID_RET _partCallFilter(void* sel_inst, int* candidates, int num_candidates,
                               int** keep, int* num_keep) {
-    CustomOpSelector* sel_ptr = reinterpret_cast<CustomOpSelector*>(sel_inst);
+    mxnet::ext::CustomOpSelector* sel_ptr = reinterpret_cast<mxnet::ext::CustomOpSelector*>(sel_inst);
     std::vector<int> candidates_(num_candidates);
     for (int i=0; i < num_candidates; i++) {
       candidates_[i] = candidates[i];
@@ -2118,12 +2133,12 @@ extern "C" {
 
   /*! \brief returns status of calling reset selector function from library */
   MX_VOID_RET _partCallReset(void* sel_inst) {
-    CustomOpSelector* sel_ptr = reinterpret_cast<CustomOpSelector*>(sel_inst);
+    mxnet::ext::CustomOpSelector* sel_ptr = reinterpret_cast<mxnet::ext::CustomOpSelector*>(sel_inst);
     sel_ptr->Reset();
   }
 
   /*! \brief returns status of calling review subgraph function from library */
-  MX_INT_RET _partCallReviewSubgraph(reviewSubgraph_t reviewSubgraph, const char *json,
+  MX_INT_RET _partCallReviewSubgraph(mxnet::ext::reviewSubgraph_t reviewSubgraph, const char *json,
                                      int subgraph_id, int *accept, const char* const* opt_keys,
                                      const char* const* opt_vals, int num_opts,
                                      char*** attr_keys, char*** attr_vals, int *num_attrs,
@@ -2145,33 +2160,33 @@ extern "C" {
       opts[std::string(opt_keys[i])] = std::string(opt_vals[i]);
 
     // create a map of named tensors for args
-    std::unordered_map<std::string, MXTensor> args;
+    std::unordered_map<std::string, mxnet::ext::MXTensor> args;
     for (int i = 0; i < num_args; i++) {
       std::vector<int64_t> shapes;
       for (int j = 0; j < arg_dims[i]; j++)
         shapes.push_back(arg_shapes[i][j]);
 
-      MXTensor tensor(arg_data[i], shapes, (MXDType)arg_types[i],
-                      arg_IDs[i], MXContext(arg_dev_type[i], arg_dev_id[i]));
+      mxnet::ext::MXTensor tensor(arg_data[i], shapes, (mxnet::ext::MXDType)arg_types[i],
+                      arg_IDs[i], mxnet::ext::MXContext(arg_dev_type[i], arg_dev_id[i]));
       args[arg_names[i]] = tensor;
     }
     // create a map of named tensors for aux
-    std::unordered_map<std::string, MXTensor> aux;
+    std::unordered_map<std::string, mxnet::ext::MXTensor> aux;
     for (int i = 0; i < num_aux; i++) {
       std::vector<int64_t> shapes;
       for (int j = 0; j < aux_dims[i]; j++)
         shapes.push_back(aux_shapes[i][j]);
 
-      MXTensor tensor(aux_data[i], shapes, (MXDType)aux_types[i],
-                      aux_IDs[i], MXContext(aux_dev_type[i], aux_dev_id[i]));
+      mxnet::ext::MXTensor tensor(aux_data[i], shapes, (mxnet::ext::MXDType)aux_types[i],
+                                  aux_IDs[i], mxnet::ext::MXContext(aux_dev_type[i], aux_dev_id[i]));
       aux[aux_names[i]] = tensor;
     }
 
     // attributes to set on subgraph node
     std::unordered_map<std::string, std::string> attrs;
 
-    MXReturnValue retval = reviewSubgraph(subgraph_json, subgraph_id, &accept_bool,
-                                          opts, &attrs, args, aux);
+    mxnet::ext::MXReturnValue retval = reviewSubgraph(subgraph_json, subgraph_id, &accept_bool,
+                                                      opts, &attrs, args, aux);
     if (!retval) return retval;
 
     *accept = accept_bool;
@@ -2198,19 +2213,20 @@ extern "C" {
 
   /*! \brief returns number of graph passes registered in this library */
   MX_INT_RET _passRegSize() {
-    return Registry<CustomPass>::get()->size();
+    return mxnet::ext::Registry<mxnet::ext::CustomPass>::get()->size();
   }
 
   /*! \brief returns pass registration at specified index */
-  MX_VOID_RET _passRegGet(int pass_idx, graphPass_t* graphPass,
+  MX_VOID_RET _passRegGet(int pass_idx, mxnet::ext::graphPass_t* graphPass,
                           const char** pass_name) {
-    CustomPass pass = Registry<CustomPass>::get()->get(pass_idx);
+    mxnet::ext::CustomPass pass =
+      mxnet::ext::Registry<mxnet::ext::CustomPass>::get()->get(pass_idx);
     *graphPass = pass.pass;
     *pass_name = pass.name;
   }
 
   /*! \brief returns status of calling graph pass function from library */
-  MX_INT_RET _passCallGraphPass(graphPass_t graphPass, const char *json,
+  MX_INT_RET _passCallGraphPass(mxnet::ext::graphPass_t graphPass, const char *json,
                                 char** graph, const char* const* opt_keys,
                                 const char* const* opt_vals, int num_opts,
                                 const char* pass_name, const char* const* arg_names, int num_args,
@@ -2221,7 +2237,7 @@ extern "C" {
                                 void* const* aux_data, const int64_t* const* aux_shapes,
                                 const int* aux_dims, const int* aux_types,
                                 const size_t* aux_IDs, const char* const* aux_dev_type,
-                                const int* aux_dev_id, nd_malloc_t nd_malloc,
+                                const int* aux_dev_id, mxnet::ext::nd_malloc_t nd_malloc,
                                 const void* nd_alloc) {
     std::string graph_json(json);
     const std::string* out_graph = nullptr;
@@ -2231,37 +2247,37 @@ extern "C" {
       opts[std::string(opt_keys[i])] = std::string(opt_vals[i]);
 
     // create a map of named tensors for args
-    std::unordered_map<std::string, MXTensor> args;
+    std::unordered_map<std::string, mxnet::ext::MXTensor> args;
     for (int i = 0; i < num_args; i++) {
       std::vector<int64_t> shapes;
       for (int j = 0; j < arg_dims[i]; j++)
         shapes.push_back(arg_shapes[i][j]);
 
-      MXTensor tensor(arg_data[i], shapes, (MXDType)arg_types[i],
-                      arg_IDs[i], MXContext(arg_dev_type[i], arg_dev_id[i]));
+      mxnet::ext::MXTensor tensor(arg_data[i], shapes, (mxnet::ext::MXDType)arg_types[i],
+                                  arg_IDs[i], mxnet::ext::MXContext(arg_dev_type[i], arg_dev_id[i]));
       args[arg_names[i]] = tensor;
     }
     // create a map of named tensors for aux
-    std::unordered_map<std::string, MXTensor> aux;
+    std::unordered_map<std::string, mxnet::ext::MXTensor> aux;
     for (int i = 0; i < num_aux; i++) {
       std::vector<int64_t> shapes;
       for (int j = 0; j < aux_dims[i]; j++)
         shapes.push_back(aux_shapes[i][j]);
 
-      MXTensor tensor(aux_data[i], shapes, (MXDType)aux_types[i],
-                      aux_IDs[i], MXContext(aux_dev_type[i], aux_dev_id[i]));
+      mxnet::ext::MXTensor tensor(aux_data[i], shapes, (mxnet::ext::MXDType)aux_types[i],
+                                  aux_IDs[i], mxnet::ext::MXContext(aux_dev_type[i], aux_dev_id[i]));
       aux[aux_names[i]] = tensor;
     }
 
-    std::unordered_map<std::string, MXTensor> new_args, new_aux;
-    PassResource res(&new_args, &new_aux, nd_malloc, nd_alloc);
-    MXReturnValue retval = graphPass(graph_json, &out_graph, opts, args, aux, res);
+    std::unordered_map<std::string, mxnet::ext::MXTensor> new_args, new_aux;
+    mxnet::ext::PassResource res(&new_args, &new_aux, nd_malloc, nd_alloc);
+    mxnet::ext::MXReturnValue retval = graphPass(graph_json, &out_graph, opts, args, aux, res);
     if (!retval) return retval;
 
     if (out_graph == nullptr) {
       std::cout << "Error calling graph pass '" << pass_name
                 << "' returned out_graph string is null" << std::endl;
-      return MX_FAIL;
+      return mxnet::ext::MX_FAIL;
     }
     *graph = static_cast<char*>(malloc((out_graph->length()+1) * sizeof(char)));
     out_graph->copy(*graph, out_graph->size()+1);
@@ -2277,10 +2293,10 @@ extern "C" {
    * \return Non-zero value on error i.e. library incompatible with passed MXNet version
    */
 #if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
-  __declspec(dllexport) MXReturnValue __cdecl
+  __declspec(dllexport) mxnet::ext::MXReturnValue __cdecl
 #else
-  MXReturnValue
+  mxnet::ext::MXReturnValue
 #endif
   initialize(int version);
-}
+} // extern "C"
 #endif  // MXNET_LIB_API_H_
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index 55d1ef02e905..cd6de9ec8cf4 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -103,16 +103,18 @@ inline int MXAPIGetFunctionRegInfo(const FunRegType *e,
  * state_ptr will be nullptr for regular ops; fcomp_fp is nullptr for stateful ops
  */
 void CustomFComputeDispatcher(const std::string op_name,
-                              const opCallFComp_t callFComp,
-                              const fcomp_t fcomp_fp,
+                              const mxnet::ext::opCallFComp_t callFComp,
+                              const mxnet::ext::fcomp_t fcomp_fp,
                               const nnvm::NodeAttrs* attrs,
-                              const opCallFStatefulComp_t callFStatefulComp,
+                              const mxnet::ext::opCallFStatefulComp_t callFStatefulComp,
                               int stateful_forward_flag,
                               const OpStatePtr* state_ptr,
                               const OpContext& ctx,
                               const std::vector<NDArray>& inputs,
                               const std::vector<OpReqType>& req,
                               const std::vector<NDArray>& outputs) {
+  using namespace mxnet::ext;
+
   std::vector<void*> in_data, out_data;
   std::vector<const int64_t*> in_shapes, out_shapes;
   std::vector<int> in_dims, out_dims;
@@ -333,155 +335,159 @@ void registerOp(const char* name, const std::string& name_str, bool isSubgraphOp
                 MutateInputs mutate_inputs, SubgraphNumInputs num_subgraph_inputs,
                 SubgraphInferType infer_subgraph_type, SubgraphInferShape infer_subgraph_shape,
                 SubgraphInferSType infer_subgraph_storage_type, CreateOpState create_opstate,
-                GradReg grad_reg, mutateInputs_t mutate_fp,
-                const std::unordered_map<std::string, createOpState_t> &createop_map,
-                const std::unordered_map<std::string, fcomp_t> &forward_ctx_map,
-                const std::unordered_map<std::string, fcomp_t> &backward_ctx_map,
-                opCallFComp_t callFComp, opCallFStatefulComp_t callFStatefulComp) {
-    // check if operator is already registered
-    const nnvm::Op *regOpPtr = dmlc::Registry<nnvm::Op>::Get()->Find(name);
-    nnvm::Op &regOp = dmlc::Registry<nnvm::Op>::Get()->__REGISTER_OR_GET__(name);
-    int plevel = 10;
-    if (regOpPtr != nullptr) {
-      // overwrite registration of existing op with custom op
-      regOp.arguments.clear();
-      // set attribute with higher plevel (11) to allow re-registering once
-      // TODO(samskalicky): enable constant overwriting of registertion multiple times
-      plevel++;
-    }
-    // define supported resources for both subgraph ops and regular ops
-    regOp.set_attr<FResourceRequest>("FResourceRequest", resc_req, plevel);
+                GradReg grad_reg, mxnet::ext::mutateInputs_t mutate_fp,
+                const std::unordered_map<std::string, mxnet::ext::createOpState_t> &createop_map,
+                const std::unordered_map<std::string, mxnet::ext::fcomp_t> &forward_ctx_map,
+                const std::unordered_map<std::string, mxnet::ext::fcomp_t> &backward_ctx_map,
+                mxnet::ext::opCallFComp_t callFComp, mxnet::ext::opCallFStatefulComp_t callFStatefulComp) {
+  using namespace mxnet::ext;
+
+  // check if operator is already registered
+  const nnvm::Op *regOpPtr = dmlc::Registry<nnvm::Op>::Get()->Find(name);
+  nnvm::Op &regOp = dmlc::Registry<nnvm::Op>::Get()->__REGISTER_OR_GET__(name);
+  int plevel = 10;
+  if (regOpPtr != nullptr) {
+    // overwrite registration of existing op with custom op
+    regOp.arguments.clear();
+    // set attribute with higher plevel (11) to allow re-registering once
+    // TODO(samskalicky): enable constant overwriting of registertion multiple times
+    plevel++;
+  }
+  // define supported resources for both subgraph ops and regular ops
+  regOp.set_attr<FResourceRequest>("FResourceRequest", resc_req, plevel);
+  if (!isSubgraphOp) {
+    regOp.set_attr_parser(attr_parser);
+    regOp.set_num_inputs(num_inputs);
+    regOp.set_num_outputs(num_outputs);
+    regOp.set_attr<nnvm::FInferType>("FInferType", infer_type, plevel);
+    regOp.set_attr<FInferStorageType>("FInferStorageType", infer_storage_type, plevel);
+    regOp.set_attr<mxnet::FInferShape>("FInferShape", infer_shape, plevel);
+    // optionally add fmutate inputs if user specified a function
+    if (mutate_fp != nullptr)
+      regOp.set_attr<nnvm::FMutateInputs>("FMutateInputs", mutate_inputs, plevel);
+  } else {
+    using namespace mxnet::op;
+    regOp.set_num_inputs(num_subgraph_inputs);
+    regOp.set_num_outputs(DefaultSubgraphOpNumOutputs);
+    regOp.set_attr<nnvm::FInferType>("FInferType", infer_subgraph_type, plevel);
+    regOp.set_attr<mxnet::FInferShape>("FInferShape", infer_subgraph_shape, plevel);
+    regOp.set_attr<FInferStorageType>("FInferStorageType",
+                                      infer_subgraph_storage_type, plevel);
+    regOp.set_attr<nnvm::FMutateInputs>("FMutateInputs",
+                                        DefaultSubgraphOpMutableInputs, plevel);
+  }
+  // optionally add stateful forward
+  if (createop_map.size() != 0) {
+    regOp.set_attr<FCreateOpState>("FCreateOpState", create_opstate, plevel);
+    auto fstate_forward = [=](const OpStatePtr& state_ptr,
+                              const OpContext& ctx,
+                              const std::vector<NDArray>& inputs,
+                              const std::vector<OpReqType>& req,
+                              const std::vector<NDArray>& outputs) {
+      CustomFComputeDispatcher(name_str, nullptr, nullptr, nullptr,
+                               callFStatefulComp, 1, &state_ptr, ctx, inputs, req, outputs);
+    };
+    if (createop_map.count("cpu") > 0)
+      regOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", fstate_forward, plevel);
+    if (createop_map.count("gpu") > 0)
+      regOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<gpu>", fstate_forward, plevel);
+  } else {
+    auto forward_lambda = [=](const nnvm::NodeAttrs& attrs,
+                              const OpContext& ctx,
+                              const std::vector<NDArray>& inputs,
+                              const std::vector<OpReqType>& req,
+                              const std::vector<NDArray>& outputs) {
+      if (ctx.run_ctx.ctx.dev_mask() == Context::kCPU) {
+        CHECK_GT(forward_ctx_map.count("cpu"), 0);
+        fcomp_t fcomp = forward_ctx_map.at("cpu");
+        CustomFComputeDispatcher(name_str, callFComp, fcomp, &attrs,
+                                 nullptr, 0, nullptr, ctx, inputs, req, outputs);
+      } else if (ctx.run_ctx.ctx.dev_mask() == Context::kGPU) {
+        CHECK_GT(forward_ctx_map.count("gpu"), 0);
+        fcomp_t fcomp = forward_ctx_map.at("gpu");
+        CustomFComputeDispatcher(name_str, callFComp, fcomp, &attrs,
+                                 nullptr, 0, nullptr, ctx, inputs, req, outputs);
+      }
+    };
+    if (forward_ctx_map.count("cpu") > 0)
+      regOp.set_attr<FComputeEx>("FComputeEx<cpu>", forward_lambda, plevel);
+    if (forward_ctx_map.count("gpu") > 0)
+      regOp.set_attr<FComputeEx>("FComputeEx<gpu>", forward_lambda, plevel);
+  }
+  // optionally add fgradient if user specified a function, or for stateful ops
+  if (backward_ctx_map.size() != 0 || createop_map.size() != 0) {
+    std::string grad_name = "_backward_" + name_str;
+    nnvm::Op &gradOp = dmlc::Registry<nnvm::Op>::Get()->__REGISTER_OR_GET__(grad_name);
+    regOp.set_attr<nnvm::FGradient>("FGradient", grad_reg, plevel);
+    gradOp.set_attr<nnvm::TIsBackward>("TIsBackward", true, plevel);
+    gradOp.set_attr<FInferStorageType>("FInferStorageType", infer_storage_type, plevel);
+    gradOp.set_attr<FResourceRequest>("FResourceRequest", resc_req, plevel);
+    
     if (!isSubgraphOp) {
-      regOp.set_attr_parser(attr_parser);
-      regOp.set_num_inputs(num_inputs);
-      regOp.set_num_outputs(num_outputs);
-      regOp.set_attr<nnvm::FInferType>("FInferType", infer_type, plevel);
-      regOp.set_attr<FInferStorageType>("FInferStorageType", infer_storage_type, plevel);
-      regOp.set_attr<mxnet::FInferShape>("FInferShape", infer_shape, plevel);
-      // optionally add fmutate inputs if user specified a function
-      if (mutate_fp != nullptr)
-        regOp.set_attr<nnvm::FMutateInputs>("FMutateInputs", mutate_inputs, plevel);
+      // register attr parser and standard functions for non-subgraph ops
+      gradOp.set_attr_parser(attr_parser);
+      gradOp.set_num_inputs(num_inouts);
+      gradOp.set_num_outputs(num_inputs);
     } else {
+      // for subgraph ops use special functions that do not invoke attr_parser
       using namespace mxnet::op;
-      regOp.set_num_inputs(num_subgraph_inputs);
-      regOp.set_num_outputs(DefaultSubgraphOpNumOutputs);
-      regOp.set_attr<nnvm::FInferType>("FInferType", infer_subgraph_type, plevel);
-      regOp.set_attr<mxnet::FInferShape>("FInferShape", infer_subgraph_shape, plevel);
-      regOp.set_attr<FInferStorageType>("FInferStorageType",
-                                        infer_subgraph_storage_type, plevel);
-      regOp.set_attr<nnvm::FMutateInputs>("FMutateInputs",
-                                          DefaultSubgraphOpMutableInputs, plevel);
+      auto grad_inouts = [=](const nnvm::NodeAttrs& attrs) {
+        // for backward passes, inputs + outputs + input gradients (one for each output)
+        uint32_t cnt = num_subgraph_inputs(attrs);
+        cnt += 2 * DefaultSubgraphOpNumOutputs(attrs);
+        return cnt;
+      };
+      gradOp.set_num_inputs(grad_inouts);
+      gradOp.set_num_outputs(num_subgraph_inputs);
     }
-    // optionally add stateful forward
+    
     if (createop_map.size() != 0) {
-      regOp.set_attr<FCreateOpState>("FCreateOpState", create_opstate, plevel);
-      auto fstate_forward = [=](const OpStatePtr& state_ptr,
-                                const OpContext& ctx,
-                                const std::vector<NDArray>& inputs,
-                                const std::vector<OpReqType>& req,
-                                const std::vector<NDArray>& outputs) {
+      // for stateful operators
+      gradOp.set_attr<bool>("TIsLayerOpBackward", true, plevel);
+      auto fstate_backward = [=](const OpStatePtr& state_ptr,
+                                 const OpContext& ctx,
+                                 const std::vector<NDArray>& inputs,
+                                 const std::vector<OpReqType>& req,
+                                 const std::vector<NDArray>& outputs) {
         CustomFComputeDispatcher(name_str, nullptr, nullptr, nullptr,
-                                 callFStatefulComp, 1, &state_ptr, ctx, inputs, req, outputs);
+                                 callFStatefulComp, 0, &state_ptr, ctx, inputs, req, outputs);
       };
-      if (createop_map.count("cpu") > 0)
-        regOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", fstate_forward, plevel);
-      if (createop_map.count("gpu") > 0)
-        regOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<gpu>", fstate_forward, plevel);
+      gradOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", fstate_backward, plevel);
+      gradOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<gpu>", fstate_backward, plevel);
     } else {
-      auto forward_lambda = [=](const nnvm::NodeAttrs& attrs,
-                                const OpContext& ctx,
-                                const std::vector<NDArray>& inputs,
-                                const std::vector<OpReqType>& req,
-                                const std::vector<NDArray>& outputs) {
-        if (ctx.run_ctx.ctx.dev_mask() == Context::kCPU) {
-          CHECK_GT(forward_ctx_map.count("cpu"), 0);
-          fcomp_t fcomp = forward_ctx_map.at("cpu");
-          CustomFComputeDispatcher(name_str, callFComp, fcomp, &attrs,
-                                   nullptr, 0, nullptr, ctx, inputs, req, outputs);
-        } else if (ctx.run_ctx.ctx.dev_mask() == Context::kGPU) {
-          CHECK_GT(forward_ctx_map.count("gpu"), 0);
-          fcomp_t fcomp = forward_ctx_map.at("gpu");
-          CustomFComputeDispatcher(name_str, callFComp, fcomp, &attrs,
+      // for stateless operators
+      if (backward_ctx_map.count("cpu") > 0) {
+        fcomp_t fcomp_back_cpu = backward_ctx_map.at("cpu");
+        auto backward_cpu_lambda = [=](const nnvm::NodeAttrs& attrs,
+                                       const OpContext& ctx,
+                                       const std::vector<NDArray>& inputs,
+                                       const std::vector<OpReqType>& req,
+                                       const std::vector<NDArray>& outputs) {
+          CustomFComputeDispatcher(name_str, callFComp, fcomp_back_cpu, &attrs,
                                    nullptr, 0, nullptr, ctx, inputs, req, outputs);
-        }
-      };
-      if (forward_ctx_map.count("cpu") > 0)
-        regOp.set_attr<FComputeEx>("FComputeEx<cpu>", forward_lambda, plevel);
-      if (forward_ctx_map.count("gpu") > 0)
-        regOp.set_attr<FComputeEx>("FComputeEx<gpu>", forward_lambda, plevel);
-    }
-    // optionally add fgradient if user specified a function, or for stateful ops
-    if (backward_ctx_map.size() != 0 || createop_map.size() != 0) {
-      std::string grad_name = "_backward_" + name_str;
-      nnvm::Op &gradOp = dmlc::Registry<nnvm::Op>::Get()->__REGISTER_OR_GET__(grad_name);
-      regOp.set_attr<nnvm::FGradient>("FGradient", grad_reg, plevel);
-      gradOp.set_attr<nnvm::TIsBackward>("TIsBackward", true, plevel);
-      gradOp.set_attr<FInferStorageType>("FInferStorageType", infer_storage_type, plevel);
-      gradOp.set_attr<FResourceRequest>("FResourceRequest", resc_req, plevel);
-
-      if (!isSubgraphOp) {
-        // register attr parser and standard functions for non-subgraph ops
-        gradOp.set_attr_parser(attr_parser);
-        gradOp.set_num_inputs(num_inouts);
-        gradOp.set_num_outputs(num_inputs);
-      } else {
-        // for subgraph ops use special functions that do not invoke attr_parser
-        using namespace mxnet::op;
-        auto grad_inouts = [=](const nnvm::NodeAttrs& attrs) {
-          // for backward passes, inputs + outputs + input gradients (one for each output)
-          uint32_t cnt = num_subgraph_inputs(attrs);
-          cnt += 2 * DefaultSubgraphOpNumOutputs(attrs);
-          return cnt;
         };
-        gradOp.set_num_inputs(grad_inouts);
-        gradOp.set_num_outputs(num_subgraph_inputs);
+        gradOp.set_attr<FComputeEx>("FComputeEx<cpu>", backward_cpu_lambda, plevel);
       }
-
-      if (createop_map.size() != 0) {
-        // for stateful operators
-        gradOp.set_attr<bool>("TIsLayerOpBackward", true, plevel);
-        auto fstate_backward = [=](const OpStatePtr& state_ptr,
-                                   const OpContext& ctx,
-                                   const std::vector<NDArray>& inputs,
-                                   const std::vector<OpReqType>& req,
-                                   const std::vector<NDArray>& outputs) {
-          CustomFComputeDispatcher(name_str, nullptr, nullptr, nullptr,
-                                   callFStatefulComp, 0, &state_ptr, ctx, inputs, req, outputs);
+      if (backward_ctx_map.count("gpu") > 0) {
+        fcomp_t fcomp_back_gpu = backward_ctx_map.at("gpu");
+        auto backward_gpu_lambda = [=](const nnvm::NodeAttrs& attrs,
+                                       const OpContext& ctx,
+                                       const std::vector<NDArray>& inputs,
+                                       const std::vector<OpReqType>& req,
+                                       const std::vector<NDArray>& outputs) {
+          CustomFComputeDispatcher(name_str, callFComp, fcomp_back_gpu, &attrs,
+                                   nullptr, 0, nullptr, ctx, inputs, req, outputs);
         };
-        gradOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", fstate_backward, plevel);
-        gradOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<gpu>", fstate_backward, plevel);
-      } else {
-        // for stateless operators
-        if (backward_ctx_map.count("cpu") > 0) {
-          fcomp_t fcomp_back_cpu = backward_ctx_map.at("cpu");
-          auto backward_cpu_lambda = [=](const nnvm::NodeAttrs& attrs,
-                                         const OpContext& ctx,
-                                         const std::vector<NDArray>& inputs,
-                                         const std::vector<OpReqType>& req,
-                                         const std::vector<NDArray>& outputs) {
-            CustomFComputeDispatcher(name_str, callFComp, fcomp_back_cpu, &attrs,
-                                     nullptr, 0, nullptr, ctx, inputs, req, outputs);
-          };
-          gradOp.set_attr<FComputeEx>("FComputeEx<cpu>", backward_cpu_lambda, plevel);
-        }
-        if (backward_ctx_map.count("gpu") > 0) {
-          fcomp_t fcomp_back_gpu = backward_ctx_map.at("gpu");
-          auto backward_gpu_lambda = [=](const nnvm::NodeAttrs& attrs,
-                                         const OpContext& ctx,
-                                         const std::vector<NDArray>& inputs,
-                                         const std::vector<OpReqType>& req,
-                                         const std::vector<NDArray>& outputs) {
-            CustomFComputeDispatcher(name_str, callFComp, fcomp_back_gpu, &attrs,
-                                     nullptr, 0, nullptr, ctx, inputs, req, outputs);
-          };
-          gradOp.set_attr<FComputeEx>("FComputeEx<gpu>", backward_gpu_lambda, plevel);
-        }
+        gradOp.set_attr<FComputeEx>("FComputeEx<gpu>", backward_gpu_lambda, plevel);
       }
     }
-    regOp.add_argument("data", "NDArray[]", "Source inputs");
+    }
+  regOp.add_argument("data", "NDArray[]", "Source inputs");
 }
 
 void registerOperators(void *lib, int verbose) {
+  using namespace mxnet::ext;
+
   // get C type interface functions
   opCallFree_t callFree = get_func<opCallFree_t>(lib, const_cast<char*>(MXLIB_OPCALLFREE_STR));
 
@@ -1087,6 +1093,8 @@ void registerOperators(void *lib, int verbose) {
 }
 
 void registerPartitioners(void *lib, int verbose) {
+  using namespace mxnet::ext;
+  
   // get C type interface functions
   opCallFree_t callFree = get_func<opCallFree_t>(lib, const_cast<char*>(MXLIB_OPCALLFREE_STR));
 
@@ -1169,6 +1177,8 @@ void registerPartitioners(void *lib, int verbose) {
 }
 
 void registerPasses(void *lib, int verbose) {
+  using namespace mxnet::ext;
+
   // get C type interface functions
   opCallFree_t callFree = get_func<opCallFree_t>(lib, const_cast<char*>(MXLIB_OPCALLFREE_STR));
 
@@ -1404,14 +1414,16 @@ int MXLoadLib(const char *path, unsigned verbose) {
     LOG(FATAL) << "Unable to load library";
 
   // check that library and MXNet use same version of library API
-  opVersion_t opVersion = get_func<opVersion_t>(lib, const_cast<char*>(MXLIB_OPVERSION_STR));
+  mxnet::ext::opVersion_t opVersion =
+    get_func<mxnet::ext::opVersion_t>(lib, const_cast<char*>(MXLIB_OPVERSION_STR));
   int libVersion =  opVersion();
   if (MX_LIBRARY_VERSION != libVersion)
     LOG(FATAL) << "Library version (" << libVersion << ") does not match MXNet version ("
                << MX_LIBRARY_VERSION << ")";
 
   // initialize library by passing MXNet version
-  initialize_t initialize = get_func<initialize_t>(lib, const_cast<char*>(MXLIB_INITIALIZE_STR));
+  mxnet::ext::initialize_t initialize =
+    get_func<mxnet::ext::initialize_t>(lib, const_cast<char*>(MXLIB_INITIALIZE_STR));
   if (!initialize(static_cast<int>(MXNET_VERSION)))
     LOG(FATAL) << "Library failed to initialize";
 
diff --git a/src/operator/subgraph/partitioner/custom_subgraph_property.h b/src/operator/subgraph/partitioner/custom_subgraph_property.h
index ea721c5aa71a..b936b050cdba 100644
--- a/src/operator/subgraph/partitioner/custom_subgraph_property.h
+++ b/src/operator/subgraph/partitioner/custom_subgraph_property.h
@@ -49,12 +49,12 @@ namespace op {
 class CustomContainOpSelector: public SubgraphSelector {
  public:
   explicit CustomContainOpSelector(std::unordered_map<std::string, int> supported_nodes,
-                                   void* sel_inst, partCallSelect_t callSelect,
-                                   partCallSelectInput_t callSelectInput,
-                                   partCallSelectOutput_t callSelectOutput,
-                                   partCallFilter_t callFilter,
-                                   partCallReset_t callReset,
-                                   opCallFree_t callFree,
+                                   void* sel_inst, mxnet::ext::partCallSelect_t callSelect,
+                                   mxnet::ext::partCallSelectInput_t callSelectInput,
+                                   mxnet::ext::partCallSelectOutput_t callSelectOutput,
+                                   mxnet::ext::partCallFilter_t callFilter,
+                                   mxnet::ext::partCallReset_t callReset,
+                                   mxnet::ext::opCallFree_t callFree,
                                    std::unordered_map<const nnvm::Node*, unsigned> node2id) :
   supported_nodes_(supported_nodes), sel_inst_(sel_inst), callSelect_(callSelect),
     callSelectInput_(callSelectInput), callSelectOutput_(callSelectOutput),
@@ -123,12 +123,12 @@ class CustomContainOpSelector: public SubgraphSelector {
 
   std::unordered_map<std::string, int> supported_nodes_;
   void* sel_inst_;
-  partCallSelect_t callSelect_;
-  partCallSelectInput_t callSelectInput_;
-  partCallSelectOutput_t callSelectOutput_;
-  partCallFilter_t callFilter_;
-  partCallReset_t callReset_;
-  opCallFree_t callFree_;
+  mxnet::ext::partCallSelect_t callSelect_;
+  mxnet::ext::partCallSelectInput_t callSelectInput_;
+  mxnet::ext::partCallSelectOutput_t callSelectOutput_;
+  mxnet::ext::partCallFilter_t callFilter_;
+  mxnet::ext::partCallReset_t callReset_;
+  mxnet::ext::opCallFree_t callFree_;
   std::unordered_map<const nnvm::Node*, unsigned> node2id_;
 };
 
@@ -155,18 +155,18 @@ class  CustomSubgraphProperty: public SubgraphProperty {
     review_subgraph_(nullptr),
     subgraph_op_name("error") {}
   CustomSubgraphProperty(std::string subgraph_prop_name,
-                         partCallSupportedOps_t call_supported_ops,
-                         supportedOps_t supported_ops,
-                         partCallCreateSelector_t call_create_selector,
-                         createSelector_t create_selector,
-                         partCallSelect_t callSelect,
-                         partCallSelectInput_t callSelectInput,
-                         partCallSelectOutput_t callSelectOutput,
-                         partCallFilter_t callFilter,
-                         partCallReset_t callReset,
-                         partCallReviewSubgraph_t call_review_subgraph,
-                         reviewSubgraph_t review_subgraph,
-                         opCallFree_t call_free,
+                         mxnet::ext::partCallSupportedOps_t call_supported_ops,
+                         mxnet::ext::supportedOps_t supported_ops,
+                         mxnet::ext::partCallCreateSelector_t call_create_selector,
+                         mxnet::ext::createSelector_t create_selector,
+                         mxnet::ext::partCallSelect_t callSelect,
+                         mxnet::ext::partCallSelectInput_t callSelectInput,
+                         mxnet::ext::partCallSelectOutput_t callSelectOutput,
+                         mxnet::ext::partCallFilter_t callFilter,
+                         mxnet::ext::partCallReset_t callReset,
+                         mxnet::ext::partCallReviewSubgraph_t call_review_subgraph,
+                         mxnet::ext::reviewSubgraph_t review_subgraph,
+                         mxnet::ext::opCallFree_t call_free,
                          std::string op_name) :
       subgraph_prop(subgraph_prop_name),
       call_supported_ops_(call_supported_ops),
@@ -429,7 +429,7 @@ class  CustomSubgraphProperty: public SubgraphProperty {
           if (e.node->attrs.dict.count(MX_STR_SHAPE) > 0) {
             std::string& shape = e.node->attrs.dict[MX_STR_SHAPE];
             // add this shape to the list
-            ss << getShapeAt(shape, e.index);
+            ss << mxnet::ext::getShapeAt(shape, e.index);
           }
           if (i < sym.outputs.size()-1)
             ss << ",";
@@ -446,7 +446,7 @@ class  CustomSubgraphProperty: public SubgraphProperty {
           if (e.node->attrs.dict.count(MX_STR_DTYPE) > 0) {
             std::string& dtype = e.node->attrs.dict[MX_STR_DTYPE];
             // add this dtype to the list
-            ss << getDtypeAt(dtype, e.index);
+            ss << mxnet::ext::getDtypeAt(dtype, e.index);
           }
           if (i < sym.outputs.size()-1)
             ss << ",";
@@ -489,7 +489,7 @@ class  CustomSubgraphProperty: public SubgraphProperty {
         // get dtype string from other node
         std::string& dtype = orig.node->attrs.dict[MX_STR_DTYPE];
         std::stringstream ss;
-        ss << "[" << getDtypeAt(dtype, orig.index) << "]";
+        ss << "[" << mxnet::ext::getDtypeAt(dtype, orig.index) << "]";
         e->node->attrs.dict[MX_STR_DTYPE] = ss.str();
       }
 
@@ -498,7 +498,7 @@ class  CustomSubgraphProperty: public SubgraphProperty {
         std::string& shape = orig.node->attrs.dict[MX_STR_SHAPE];
         // create new shape string for this node
         std::stringstream ss;
-        ss << "[" << getShapeAt(shape, orig.index) << "]";
+        ss << "[" << mxnet::ext::getShapeAt(shape, orig.index) << "]";
         e->node->attrs.dict[MX_STR_SHAPE] = ss.str();
       }
     }
@@ -512,18 +512,18 @@ class  CustomSubgraphProperty: public SubgraphProperty {
   }
 
   std::string subgraph_prop;
-  partCallSupportedOps_t call_supported_ops_;
-  supportedOps_t supported_ops_;
-  partCallCreateSelector_t call_create_selector_;
-  createSelector_t create_selector_;
-  partCallSelect_t callSelect_;
-  partCallSelectInput_t callSelectInput_;
-  partCallSelectOutput_t callSelectOutput_;
-  partCallFilter_t callFilter_;
-  partCallReset_t callReset_;
-  partCallReviewSubgraph_t call_review_subgraph_;
-  reviewSubgraph_t review_subgraph_;
-  opCallFree_t call_free_;
+  mxnet::ext::partCallSupportedOps_t call_supported_ops_;
+  mxnet::ext::supportedOps_t supported_ops_;
+  mxnet::ext::partCallCreateSelector_t call_create_selector_;
+  mxnet::ext::createSelector_t create_selector_;
+  mxnet::ext::partCallSelect_t callSelect_;
+  mxnet::ext::partCallSelectInput_t callSelectInput_;
+  mxnet::ext::partCallSelectOutput_t callSelectOutput_;
+  mxnet::ext::partCallFilter_t callFilter_;
+  mxnet::ext::partCallReset_t callReset_;
+  mxnet::ext::partCallReviewSubgraph_t call_review_subgraph_;
+  mxnet::ext::reviewSubgraph_t review_subgraph_;
+  mxnet::ext::opCallFree_t call_free_;
   std::unordered_map<std::string, int> supported_nodes;
   std::string subgraph_op_name;
   std::vector<std::pair<std::string, std::string>> options_map_;

From 5f9e0348159dbe1b73ce57eaae88b8ea3ae96ec0 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Tue, 28 Jul 2020 16:47:05 +0000
Subject: [PATCH 10/25] removed tab

---
 example/extensions/lib_custom_op/relu_lib.cu           | 2 +-
 example/extensions/lib_custom_op/transposecsr_lib.cc   | 2 +-
 example/extensions/lib_custom_op/transposerowsp_lib.cc | 2 +-
 example/extensions/lib_pass/pass_lib.cc                | 2 +-
 example/extensions/lib_subgraph/subgraph_lib.cc        | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/example/extensions/lib_custom_op/relu_lib.cu b/example/extensions/lib_custom_op/relu_lib.cu
index a58c74e22877..53ab1c0479df 100644
--- a/example/extensions/lib_custom_op/relu_lib.cu
+++ b/example/extensions/lib_custom_op/relu_lib.cu
@@ -26,7 +26,7 @@
 #include <iostream>
 #include "lib_api.h"
 
-using namespace	mxnet::ext;
+using namespace mxnet::ext;
 
 #define NumThreadPerBlock 256 // mxnet recommended cuda thread number per block
 
diff --git a/example/extensions/lib_custom_op/transposecsr_lib.cc b/example/extensions/lib_custom_op/transposecsr_lib.cc
index ced26c5ffc95..0eae28f0d449 100644
--- a/example/extensions/lib_custom_op/transposecsr_lib.cc
+++ b/example/extensions/lib_custom_op/transposecsr_lib.cc
@@ -26,7 +26,7 @@
 #include <iostream>
 #include "lib_api.h"
 
-using namespace	mxnet::ext;
+using namespace mxnet::ext;
 
 void transpose(MXTensor& src, MXTensor& dst, const OpResource& res) {
   MXSparse* A = src.data<MXSparse>();
diff --git a/example/extensions/lib_custom_op/transposerowsp_lib.cc b/example/extensions/lib_custom_op/transposerowsp_lib.cc
index 3e1027c01869..2dc1aac8452a 100644
--- a/example/extensions/lib_custom_op/transposerowsp_lib.cc
+++ b/example/extensions/lib_custom_op/transposerowsp_lib.cc
@@ -26,7 +26,7 @@
 #include <iostream>
 #include "lib_api.h"
 
-using namespace	mxnet::ext;
+using namespace mxnet::ext;
 
 void transpose(MXTensor& src, MXTensor& dst, const OpResource& res) {
   MXSparse* A = src.data<MXSparse>();
diff --git a/example/extensions/lib_pass/pass_lib.cc b/example/extensions/lib_pass/pass_lib.cc
index 3ba2949f3b4c..4cb6bc224853 100644
--- a/example/extensions/lib_pass/pass_lib.cc
+++ b/example/extensions/lib_pass/pass_lib.cc
@@ -28,7 +28,7 @@
 #include <algorithm>
 #include "lib_api.h"
 
-using namespace	mxnet::ext;
+using namespace mxnet::ext;
 
 /* \brief a basic pass that copies the input to the output */
 MXReturnValue myPass(const std::string& in_graph, const std::string** out_graph,
diff --git a/example/extensions/lib_subgraph/subgraph_lib.cc b/example/extensions/lib_subgraph/subgraph_lib.cc
index 17a7263010bf..779250228907 100644
--- a/example/extensions/lib_subgraph/subgraph_lib.cc
+++ b/example/extensions/lib_subgraph/subgraph_lib.cc
@@ -28,7 +28,7 @@
 #include <algorithm>
 #include "lib_api.h"
 
-using namespace	mxnet::ext;
+using namespace mxnet::ext;
 
 /* function to execute log operator on floats */
 void myLog(MXTensor &in, MXTensor &out) {

From 9711165f8968e58897fd2ef3c173ca0efe1e05cf Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Wed, 5 Aug 2020 06:59:21 +0000
Subject: [PATCH 11/25] changed subgraph/pass APIs to use new Graph class
 instead of JSON strings

---
 example/extensions/lib_custom_op/gemm_lib.cc  |  2 -
 example/extensions/lib_pass/pass_lib.cc       | 53 +------------
 example/extensions/lib_pass/test_pass.py      | 26 +------
 .../extensions/lib_subgraph/subgraph_lib.cc   | 77 +++++++------------
 include/mxnet/lib_api.h                       | 37 ++++-----
 5 files changed, 50 insertions(+), 145 deletions(-)

diff --git a/example/extensions/lib_custom_op/gemm_lib.cc b/example/extensions/lib_custom_op/gemm_lib.cc
index f1081fcf75e5..453103533c37 100644
--- a/example/extensions/lib_custom_op/gemm_lib.cc
+++ b/example/extensions/lib_custom_op/gemm_lib.cc
@@ -198,8 +198,6 @@ class MyStatefulGemm : public CustomStatefulOp {
     return backward(attrs_, inputs, outputs, op_res);
   }
 
-  ~MyStatefulGemm() = default;
-
  private:
   int count;
   const std::unordered_map<std::string, std::string> attrs_;
diff --git a/example/extensions/lib_pass/pass_lib.cc b/example/extensions/lib_pass/pass_lib.cc
index dec5d0debb1c..4acd70356169 100644
--- a/example/extensions/lib_pass/pass_lib.cc
+++ b/example/extensions/lib_pass/pass_lib.cc
@@ -30,8 +30,8 @@
 
 using namespace mxnet::ext;
 
-/* \brief a basic pass that copies the input to the output */
-MXReturnValue myPass(const std::string& in_graph, const std::string** out_graph,
+/* \brief a basic pass that prints out the options and the graph */
+MXReturnValue myPass(mxnet::ext::Graph *g,
                      const std::unordered_map<std::string, std::string>& options,
                      const std::unordered_map<std::string, MXTensor>& args,
                      const std::unordered_map<std::string, MXTensor>& aux,
@@ -39,60 +39,13 @@ MXReturnValue myPass(const std::string& in_graph, const std::string** out_graph,
   for (auto kv : options) {
     std::cout << "option: " << kv.first << " ==> " << kv.second << std::endl;
   }
-
-  *out_graph = new std::string(in_graph);
+  g->print();
   return MX_SUCCESS;
 }
 
 REGISTER_PASS(myPass)
 .setBody(myPass);
 
-/* \brief a basic pass that parses the input string to JSON and then dumps it back */
-MXReturnValue jsonPass(const std::string& in_graph, const std::string** out_graph,
-                       const std::unordered_map<std::string, std::string>& options,
-                       const std::unordered_map<std::string, MXTensor>& args,
-                       const std::unordered_map<std::string, MXTensor>& aux,
-                       const PassResource& res) {
-  for (auto kv : options)
-    std::cout << "option: " << kv.first << " ==> " << kv.second << std::endl;
-
-  // add test arg/aux
-  
-  MXTensor* arg_ = res.alloc_arg("test_arg",{1},MXContext::CPU(0),kFloat32);
-  MXTensor* aux_ = res.alloc_aux("test_aux",{1},MXContext::CPU(0),kFloat32);
-  
-  // convert json string to json object
-  JsonVal json_val = JsonVal::parse(in_graph);
-
-  // get nodes list
-  JsonVal nodes = json_val.map[JsonVal("nodes")];
-
-  // loop over nodes
-  for(auto node : nodes.list) {
-    // get the op name
-    std::string op = node.map[JsonVal("op")].str;
-    // get node ID inputs to op
-    JsonVal node_inputs = node.map[JsonVal("inputs")];
-
-    //get shape/type if available
-    std::string shape;
-    int dtype = -1;
-    if(node.map.find(JsonVal("attrs")) != node.map.end()) {
-      JsonVal attrs = node.map[JsonVal("attrs")];
-      if(attrs.map.find(JsonVal("shape")) != attrs.map.end()) 
-        shape = attrs.map[JsonVal("shape")].str;
-      if(attrs.map.find(JsonVal("dtype")) != attrs.map.end())
-        dtype = std::stoi(attrs.map[JsonVal("dtype")].str);
-    }
-  }
-  
-  *out_graph = new std::string(json_val.dump());
-  return MX_SUCCESS;
-}
-
-REGISTER_PASS(jsonPass)
-.setBody(jsonPass);
-
 MXReturnValue initialize(int version) {
   if (version >= 10700) {
     std::cout << "MXNet version " << version << " supported" << std::endl;
diff --git a/example/extensions/lib_pass/test_pass.py b/example/extensions/lib_pass/test_pass.py
index 5d4578391097..66411a69cac6 100644
--- a/example/extensions/lib_pass/test_pass.py
+++ b/example/extensions/lib_pass/test_pass.py
@@ -52,29 +52,12 @@ def test_model(pass_name):
     # execute in MXNet
     print('-------------------------------')
     print('Testing regular MXNet execution')
-    exe = sym.bind(ctx=mx.cpu(), args=args)
-    out = exe.forward()
+    inputs = [a,b]
+    sym_block = nn.SymbolBlock(sym, inputs)
+    sym_block.initialize()
+    out = sym_block(mx.nd.ones((3,2)),mx.nd.ones((3,2)))
     print(out)
 
-    # Symbol optimize_for
-    # with propogating shapes/types
-    print('-------------------------------')
-    print('Testing pass "%s" with shapes/types' % pass_name)
-    aux = {}
-    mysym2 = sym.optimize_for(pass_name,args,aux)
-    print(mysym2.tojson())
-    exe2 = mysym2.bind(ctx=mx.cpu(), args=args)
-    out2 = exe2.forward()
-    print(out2)
-
-    # without propogating shapes/types
-    print('-------------------------------')
-    print('Testing pass "%s" without shapes/types' % pass_name)
-    mysym3 = sym.optimize_for(pass_name, myOpt='yello')
-    exe3 = mysym3.bind(ctx=mx.cpu(), args=args)
-    out3 = exe3.forward()
-    print(out3)
-
     # Gluon Hybridize
     print('-------------------------------')
     print('Testing pass "%s" Gluon Hybridize with shapes/types' % pass_name)
@@ -95,4 +78,3 @@ def test_model(pass_name):
     sym_block2.export('modified')
 
 test_model('myPass')
-test_model('jsonPass')
diff --git a/example/extensions/lib_subgraph/subgraph_lib.cc b/example/extensions/lib_subgraph/subgraph_lib.cc
index e75d665191dc..79de1d059213 100644
--- a/example/extensions/lib_subgraph/subgraph_lib.cc
+++ b/example/extensions/lib_subgraph/subgraph_lib.cc
@@ -180,38 +180,30 @@ REGISTER_OP(_custom_subgraph_op)
 
 const std::vector<std::string> op_names({"exp","log"});
 
-MXReturnValue mySupportedOps(const std::string& json,
+MXReturnValue mySupportedOps(const mxnet::ext::Graph* graph,
                              std::vector<int>* ids,
                              const std::unordered_map<std::string, std::string>& options) {
   for (auto kv : options) {
     std::cout << "option: " << kv.first << " ==> " << kv.second << std::endl;
   }
-  //convert json string to json object
-  JsonVal json_val = JsonVal::parse(json);
-  //get nodes list
-  JsonVal nodes = json_val.map[JsonVal("nodes")];
 
   //loop over nodes
-  for(int i=0; i<nodes.list.size(); i++) {
-    JsonVal node = nodes.list[i];
-    JsonVal op = node.map[JsonVal("op")];
+  for(int i=0; i<graph->nodes.size(); i++) {
+    mxnet::ext::Node *node = graph->nodes[i];
 
     //get shape/type if available
     std::string shape;
     int dtype = -1;
-    if(node.map.find(JsonVal("attrs")) != node.map.end()) {
-      JsonVal attrs = node.map[JsonVal("attrs")];
-      if(attrs.map.find(JsonVal("shape")) != attrs.map.end()) 
-        shape = attrs.map[JsonVal("shape")].str;
-      if(attrs.map.find(JsonVal("dtype")) != attrs.map.end())
-        dtype = std::stoi(attrs.map[JsonVal("dtype")].str);
-    }
+    if(node->attrs.count("shape") > 0)
+      shape = node->attrs["shape"];
+    if(node->attrs.count("dtype") > 0)
+      dtype = std::stoi(node->attrs["dtype"]);
 
     //check if op dtype is float, and if option was specified to require float types
     if((dtype == kFloat32 && options.count("reqFloat") > 0) || options.count("reqFloat") == 0) {
-      //check if op is in whitelist
-      if(std::find(op_names.begin(),op_names.end(),op.str.c_str()) != op_names.end()) {
-        // found op in whitelist, set value to -1 to include op in any subgraph
+      //check if op is in allowlist
+      if(std::find(op_names.begin(),op_names.end(),node->op.c_str()) != op_names.end()) {
+        // found op in allowlist, set value to -1 to include op in any subgraph
         ids->at(i) = -1;
       }
     }
@@ -219,7 +211,7 @@ MXReturnValue mySupportedOps(const std::string& json,
   return MX_SUCCESS;
 }
 
-MXReturnValue myReviewSubgraph(const std::string& json, int subgraph_id, bool* accept,
+MXReturnValue myReviewSubgraph(const mxnet::ext::Graph *subgraph, int subgraph_id, bool* accept,
                                const std::unordered_map<std::string, std::string>& options,
                                std::unordered_map<std::string, std::string>* attrs,
                                const std::unordered_map<std::string, MXTensor>& args,
@@ -263,38 +255,30 @@ REGISTER_PARTITIONER(myProp)
 
 class MySelector : public CustomOpSelector {
  public:
-  MySelector(const std::string& json,
+  MySelector(const mxnet::ext::Graph *graph,
              const std::unordered_map<std::string, std::string>& options) :
-    graph_json(json), options_(options) {
+    graph_(graph), options_(options) {
     for (auto kv : options) {
       std::cout << "selector options: " << kv.first
                 << " ==> " << kv.second << std::endl;
     }
-    //convert json string to json object
-    JsonVal json_val = JsonVal::parse(json);
-    //get nodes list
-    nodes = json_val.map[JsonVal("nodes")];
   }
   bool chooseNode(int nodeID) {
-    JsonVal node = nodes.list[nodeID];
-    JsonVal op = node.map[JsonVal("op")];
+    mxnet::ext::Node *node = graph_->nodes[nodeID];
 
     //get shape/type if available
     std::string shape;
     int dtype = -1;
-    if(node.map.find(JsonVal("attrs")) != node.map.end()) {
-      JsonVal attrs = node.map[JsonVal("attrs")];
-      if(attrs.map.find(JsonVal("shape")) != attrs.map.end()) 
-        shape = attrs.map[JsonVal("shape")].str;
-      if(attrs.map.find(JsonVal("dtype")) != attrs.map.end())
-        dtype = std::stoi(attrs.map[JsonVal("dtype")].str);
-    }
+    if(node->attrs.count("shape") > 0)
+      shape = node->attrs["shape"];
+    if(node->attrs.count("dtype") > 0)
+      dtype = std::stoi(node->attrs["dtype"]);
 
     //check if op dtype is float, and if option was specified to require float types
     if((dtype == kFloat32 && options_.count("reqFloat") > 0) || options_.count("reqFloat") == 0) {
-      //check if op is in whitelist
-      if(std::find(op_names.begin(),op_names.end(),op.str.c_str()) != op_names.end()) {
-        // found op in whitelist, return true to include op subgraph
+      //check if op is in allowlist
+      if(std::find(op_names.begin(),op_names.end(),node->op.c_str()) != op_names.end()) {
+        // found op in allowlist, return true to include op subgraph
 	return true;
       }
     }
@@ -315,14 +299,13 @@ class MySelector : public CustomOpSelector {
   }
   void Reset() override {}
  private:
-  std::string graph_json;
-  JsonVal nodes;
+  const mxnet::ext::Graph *graph_;
   const std::unordered_map<std::string, std::string> options_;
 };
 
-MXReturnValue createSelector(const std::string& json, CustomOpSelector** sel_inst,
+MXReturnValue createSelector(const mxnet::ext::Graph *graph, CustomOpSelector** sel_inst,
                              const std::unordered_map<std::string, std::string>& options) {
-  *sel_inst = new MySelector(json, options);
+  *sel_inst = new MySelector(graph, options);
   std::cout << "Info: selector created" << std::endl;
   return MX_SUCCESS;
 }
@@ -333,15 +316,13 @@ REGISTER_PARTITIONER(mySelect)
 .setReviewSubgraph("strategy1", myReviewSubgraph);
 
 /* \brief a basic pass that adds a new input for subgraph ops */
-MXReturnValue addInputPass(const std::string& in_graph, const std::string** out_graph,
+MXReturnValue addInputPass(mxnet::ext::Graph *graph,
 			   const std::unordered_map<std::string, std::string>& options,
 			   const std::unordered_map<std::string, MXTensor>& args,
 			   const std::unordered_map<std::string, MXTensor>& aux,
 			   const PassResource& res) {
-  // convert graph from JSON string to Graph/Node data structure
-  Graph *g = Graph::fromString(in_graph);
   //find node with '_custom_subgraph_op' op type
-  for(Node* n : g->nodes) {
+  for(Node* n : graph->nodes) {
     if(n->op.compare("_custom_subgraph_op") == 0) {
       //set extra input
       n->attrs[MX_STR_EXTRA_INPUTS] = std::to_string(1);
@@ -352,8 +333,8 @@ MXReturnValue addInputPass(const std::string& in_graph, const std::string** out_
       input->name = input_name;
       input->op = "null";
       //add a new node in graph
-      g->nodes.push_back(input);
-      g->inputs.push_back(input);
+      graph->nodes.push_back(input);
+      graph->inputs.push_back(input);
       //connect new input to node
       input->outputs.push_back({n,(int)(n->inputs.size())});
       //connect node to new input
@@ -363,8 +344,6 @@ MXReturnValue addInputPass(const std::string& in_graph, const std::string** out_
     }
   }
 
-  //convert back to JSON string from Graph/Node
-  *out_graph = new std::string(g->toString());
   return MX_SUCCESS;
 }
 
diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h
index 1ab9042bcaf7..144fa576bc54 100644
--- a/include/mxnet/lib_api.h
+++ b/include/mxnet/lib_api.h
@@ -1272,7 +1272,7 @@ class CustomOp {
 };
 
 /*! \brief Custom Pass Create function template */
-typedef MXReturnValue (*graphPass_t)(const std::string& in_graph, const std::string** out_graph,
+typedef MXReturnValue (*graphPass_t)(mxnet::ext::Graph* graph,
                                      const std::unordered_map<std::string, std::string>& options,
                                      const std::unordered_map<std::string, MXTensor>& args,
                                      const std::unordered_map<std::string, MXTensor>& aux,
@@ -1298,13 +1298,13 @@ class CustomPass {
 };
 
 /*! \brief Custom Subgraph Create function template */
-typedef MXReturnValue (*supportedOps_t)(const std::string& json, std::vector<int>* ids,
+typedef MXReturnValue (*supportedOps_t)(const mxnet::ext::Graph *graph, std::vector<int>* ids,
                                         const std::unordered_map<std::string,
                                                                  std::string>& options);
-typedef MXReturnValue (*createSelector_t)(const std::string& json, CustomOpSelector** sel_inst,
+typedef MXReturnValue (*createSelector_t)(const mxnet::ext::Graph *graph, CustomOpSelector** sel_inst,
                                           const std::unordered_map<std::string,
                                                                    std::string>& options);
-typedef MXReturnValue (*reviewSubgraph_t)(const std::string& json, int subgraph_id, bool* accept,
+typedef MXReturnValue (*reviewSubgraph_t)(const mxnet::ext::Graph *subgraph, int subgraph_id, bool* accept,
                                           const std::unordered_map<std::string,
                                                                    std::string>& options,
                                           std::unordered_map<std::string, std::string>* attrs,
@@ -2056,7 +2056,7 @@ extern "C" {
   MX_INT_RET _partCallSupportedOps(mxnet::ext::supportedOps_t supportedOps, const char *json,
                                    int num_ids, int *ids, const char* const* opt_keys,
                                    const char* const* opt_vals, int num_opts) {
-    std::string subgraph_json(json);
+    mxnet::ext::Graph *graph = mxnet::ext::Graph::fromString(json);
     // create map of options from list
     std::unordered_map<std::string, std::string> opts;
     for (int i = 0; i < num_opts; i++)
@@ -2065,7 +2065,7 @@ extern "C" {
     // create array of subgraph IDs for operator support
     std::vector<int> _ids(num_ids, -2);
     // call user's supportedOps function
-    mxnet::ext::MXReturnValue retval = supportedOps(subgraph_json, &_ids, opts);
+    mxnet::ext::MXReturnValue retval = supportedOps(graph, &_ids, opts);
     if (!retval) return retval;
 
     // copy bools in ids to ints
@@ -2079,7 +2079,7 @@ extern "C" {
   MX_INT_RET _partCallCreateSelector(mxnet::ext::createSelector_t createSelector, const char *json,
                                      void** selector, const char* const* opt_keys,
                                      const char* const* opt_vals, int num_opts) {
-    std::string symbol_json(json);
+    mxnet::ext::Graph *graph = mxnet::ext::Graph::fromString(json);
     // create map of options from list
     std::unordered_map<std::string, std::string> opts;
     for (int i = 0; i < num_opts; i++)
@@ -2090,7 +2090,7 @@ extern "C" {
     mxnet::ext::CustomOpSelector** sel_ptr = reinterpret_cast<mxnet::ext::CustomOpSelector**>(selector);
 
     // call user's createSelector function
-    return createSelector(symbol_json, sel_ptr, opts);
+    return createSelector(graph, sel_ptr, opts);
   }
 
   /*! \brief returns status of calling select function from library */
@@ -2152,7 +2152,7 @@ extern "C" {
                                      const int* aux_dims, const int* aux_types,
                                      const size_t* aux_IDs, const char* const* aux_dev_type,
                                      const int* aux_dev_id) {
-    std::string subgraph_json(json);
+    mxnet::ext::Graph *subgraph = mxnet::ext::Graph::fromString(json);
     bool accept_bool = false;
     // create map of attributes from list
     std::unordered_map<std::string, std::string> opts;
@@ -2185,7 +2185,7 @@ extern "C" {
     // attributes to set on subgraph node
     std::unordered_map<std::string, std::string> attrs;
 
-    mxnet::ext::MXReturnValue retval = reviewSubgraph(subgraph_json, subgraph_id, &accept_bool,
+    mxnet::ext::MXReturnValue retval = reviewSubgraph(subgraph, subgraph_id, &accept_bool,
                                                       opts, &attrs, args, aux);
     if (!retval) return retval;
 
@@ -2227,7 +2227,7 @@ extern "C" {
 
   /*! \brief returns status of calling graph pass function from library */
   MX_INT_RET _passCallGraphPass(mxnet::ext::graphPass_t graphPass, const char *json,
-                                char** graph, const char* const* opt_keys,
+                                char** out_graph, const char* const* opt_keys,
                                 const char* const* opt_vals, int num_opts,
                                 const char* pass_name, const char* const* arg_names, int num_args,
                                 void* const* arg_data, const int64_t* const* arg_shapes,
@@ -2239,8 +2239,7 @@ extern "C" {
                                 const size_t* aux_IDs, const char* const* aux_dev_type,
                                 const int* aux_dev_id, mxnet::ext::nd_malloc_t nd_malloc,
                                 const void* nd_alloc) {
-    std::string graph_json(json);
-    const std::string* out_graph = nullptr;
+    mxnet::ext::Graph *graph = mxnet::ext::Graph::fromString(json);
     // create map of attributes from list
     std::unordered_map<std::string, std::string> opts;
     for (int i = 0; i < num_opts; i++)
@@ -2271,17 +2270,11 @@ extern "C" {
 
     std::unordered_map<std::string, mxnet::ext::MXTensor> new_args, new_aux;
     mxnet::ext::PassResource res(&new_args, &new_aux, nd_malloc, nd_alloc);
-    mxnet::ext::MXReturnValue retval = graphPass(graph_json, &out_graph, opts, args, aux, res);
+    mxnet::ext::MXReturnValue retval = graphPass(graph, opts, args, aux, res);
     if (!retval) return retval;
 
-    if (out_graph == nullptr) {
-      std::cout << "Error calling graph pass '" << pass_name
-                << "' returned out_graph string is null" << std::endl;
-      return mxnet::ext::MX_FAIL;
-    }
-    *graph = static_cast<char*>(malloc((out_graph->length()+1) * sizeof(char)));
-    out_graph->copy(*graph, out_graph->size()+1);
-    delete out_graph;
+    std::string *tmp = new std::string(graph->toString());
+    *out_graph = const_cast<char*>(tmp->c_str());
     return retval;
   }
 

From e27ff0185a5f91691c31306aae4773d9666a471a Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Thu, 6 Aug 2020 07:41:12 +0000
Subject: [PATCH 12/25] updated subgraph_lib example to use new Graph class

---
 .../extensions/lib_subgraph/subgraph_lib.cc   | 105 +++++++++---------
 include/mxnet/lib_api.h                       |  47 +++++++-
 2 files changed, 92 insertions(+), 60 deletions(-)

diff --git a/example/extensions/lib_subgraph/subgraph_lib.cc b/example/extensions/lib_subgraph/subgraph_lib.cc
index 79de1d059213..006f4b68947d 100644
--- a/example/extensions/lib_subgraph/subgraph_lib.cc
+++ b/example/extensions/lib_subgraph/subgraph_lib.cc
@@ -32,18 +32,18 @@
 using namespace mxnet::ext;
 
 /* function to execute log operator on floats */
-void myLog(MXTensor &in, MXTensor &out) {
-  float* inp = in.data<float>();
-  float* outp = out.data<float>();
-  for (int64_t i = 0; i < in.size(); i++) {
+void myLog(MXTensor *in, MXTensor *out) {
+  float* inp = in->data<float>();
+  float* outp = out->data<float>();
+  for (int64_t i = 0; i < in->size(); i++) {
     outp[i] = logf(inp[i]);
   }
 }
 /* function to execute exp operator on floats */
-void myExp(MXTensor &in, MXTensor &out) {
-  float* inp = in.data<float>();
-  float* outp =out.data<float>();
-  for (int64_t i = 0; i < in.size(); i++) {
+void myExp(MXTensor *in, MXTensor *out) {
+  float* inp = in->data<float>();
+  float* outp =out->data<float>();
+  for (int64_t i = 0; i < in->size(); i++) {
     outp[i] = expf(inp[i]);
   }
 }
@@ -55,14 +55,10 @@ void myExp(MXTensor &in, MXTensor &out) {
  */
 MXReturnValue myExecutor(std::vector<MXTensor>* inputs,
                          std::vector<MXTensor>* outputs,
-                         const std::string& subgraph_sym) {
-  std::cout << "Info: subgraph symbol is: " << std::endl;
-  std::cout << subgraph_sym << std::endl;
-
-  // convert json string to json object
-  JsonVal json_val = JsonVal::parse(subgraph_sym);
-  // get nodes list
-  JsonVal nodes = json_val.map[JsonVal("nodes")];
+                         mxnet::ext::Graph *subgraph) {
+  std::cout << "Info: subgraph is: " << std::endl;
+  subgraph->print();
+
   //counter for inputs
   int input_cnt = 0;
   // temporary tensor storage
@@ -71,40 +67,40 @@ MXReturnValue myExecutor(std::vector<MXTensor>* inputs,
   std::vector<void*> to_free;
 
   // loop over nodes
-  for(auto node : nodes.list) {
-    // get the op name
-    std::string op = node.map[JsonVal("op")].str;
-    // get node ID inputs to op
-    JsonVal node_inputs = node.map[JsonVal("inputs")];
-    
+  for(int i=0; i<subgraph->size(); i++) {
+    mxnet::ext::Node* node = subgraph->getNode(i);
     // handle each op type
-    if (op.compare("null") == 0) {
-      // null is an input data to the subgraph, add to data storage
-      data.push_back(inputs->at(input_cnt++));
-    } else if (op.compare("log") == 0) {
+    if (node->op.compare("null") == 0) {
+      // set tensor for this input to the subgraph
+      node->tensor = &inputs->at(input_cnt++);
+    } else if (node->op.compare("log") == 0) {
       // get input tensor based on node ID inputs from data storage
-      MXTensor &input = data[node_inputs.list[0].list[0].num];
+      MXTensor *input = node->inputs.at(0).node->tensor;
       // create temporary storage
-      MXTensor tmp(malloc(input.size()*4), input.shape, input.dtype, 0, MXContext::CPU(0), kDefaultStorage);  // NOLINT
+      MXTensor tmp(malloc(input->size()*4), input->shape, input->dtype, 0, MXContext::CPU(0), kDefaultStorage);  // NOLINT
       // save allocated ptr to free later
       to_free.push_back(tmp.data_ptr);
       // execute log operator
-      myLog(input,tmp);
+      myLog(input,&tmp);
       // add output tensor to data storage
       data.push_back(tmp);
-    } else if (op.compare("exp") == 0) {
+      // set tensor for this node so we can read it later
+      node->tensor = &data.back();
+    } else if (node->op.compare("exp") == 0) {
       // get input tensor based on node ID inputs from data storage
-      MXTensor &input = data[node_inputs.list[0].list[0].num];
+      MXTensor *input = node->inputs.at(0).node->tensor;
       // create temporary storage
-      MXTensor tmp(malloc(input.size()*4), input.shape, input.dtype, 0, MXContext::CPU(0), kDefaultStorage);  // NOLINT
+      MXTensor tmp(malloc(input->size()*4), input->shape, input->dtype, 0, MXContext::CPU(0), kDefaultStorage);  // NOLINT
       // save allocated ptr to free later
       to_free.push_back(tmp.data_ptr);
       // execute exp operator 
-      myExp(input,tmp);
+      myExp(input,&tmp);
       // add output tensor to data storage
       data.push_back(tmp);
+      // set tensor for this node so we can read it later
+      node->tensor = &data.back();
     } else {
-      std::cout << "Error! Unsupported op '" << op << "' found in myExecutor";
+      std::cout << "Error! Unsupported op '" << node->op << "' found in myExecutor";
       // free allocated temporary storage
       for (void* ptr : to_free)
         free(ptr);  // NOLINT
@@ -112,18 +108,16 @@ MXReturnValue myExecutor(std::vector<MXTensor>* inputs,
     }
   }
   
-  // get list of outputs from subgraph
-  JsonVal heads = json_val.map[JsonVal("heads")];
   // copy all operator results to outputs of subgraph
-  for (int j = 0; j < heads.list.size(); j++) {
+  for (int j = 0; j < subgraph->outputs.size(); j++) {
     // get computed result
-    MXTensor &result = data[heads.list[0].list[0].num];
+    MXTensor *result = subgraph->outputs[j].node->tensor;
     // get output tensor to pass to MX
     MXTensor &out = outputs->at(j);
     float *out_data = out.data<float>();
-    float *res_data = result.data<float>();
+    float *res_data = result->data<float>();
     // loop and copy data
-    for (int64_t i = 0; i < result.size(); i++) {
+    for (int64_t i = 0; i < result->size(); i++) {
       out_data[i] = res_data[i];
     }
   }
@@ -138,12 +132,13 @@ MXReturnValue myExecutor(std::vector<MXTensor>* inputs,
 
 class MyStatefulOp : public CustomStatefulOp {
  public:
-  explicit MyStatefulOp(std::string  sym,
+  explicit MyStatefulOp(std::string json,
                         const std::unordered_map<std::string, std::string>& attrs)
-    : subgraph_sym(std::move(sym)), attrs_(attrs) {
+    : attrs_(attrs) {
     for (auto kv : attrs) {
       std::cout << "subgraphOp attributes: " << kv.first << " ==> " << kv.second << std::endl;
     }
+    subgraph_ = mxnet::ext::Graph::fromString(json);
   }
 
   MXReturnValue Forward(std::vector<MXTensor>* inputs,
@@ -152,11 +147,11 @@ class MyStatefulOp : public CustomStatefulOp {
     if(attrs_.count(MX_STR_EXTRA_INPUTS) > 0 && std::stoi(attrs_.at(MX_STR_EXTRA_INPUTS)) > 0)
       std::cout << "forward::extra_inputs(" << attrs_.at(MX_STR_EXTRA_INPUTS) << ")::inputs ["
 		<< inputs->size() << "]" << std::endl;
-    return myExecutor(inputs, outputs, subgraph_sym);
+    return myExecutor(inputs, outputs, subgraph_);
   }
 
  private:
-  const std::string subgraph_sym;
+  mxnet::ext::Graph *subgraph_;
   const std::unordered_map<std::string, std::string> attrs_;
 };
 
@@ -188,16 +183,16 @@ MXReturnValue mySupportedOps(const mxnet::ext::Graph* graph,
   }
 
   //loop over nodes
-  for(int i=0; i<graph->nodes.size(); i++) {
-    mxnet::ext::Node *node = graph->nodes[i];
+  for(int i=0; i<graph->size(); i++) {
+    const mxnet::ext::Node *node = graph->getNode(i);
 
     //get shape/type if available
     std::string shape;
     int dtype = -1;
     if(node->attrs.count("shape") > 0)
-      shape = node->attrs["shape"];
+      shape = node->attrs.at("shape");
     if(node->attrs.count("dtype") > 0)
-      dtype = std::stoi(node->attrs["dtype"]);
+      dtype = std::stoi(node->attrs.at("dtype"));
 
     //check if op dtype is float, and if option was specified to require float types
     if((dtype == kFloat32 && options.count("reqFloat") > 0) || options.count("reqFloat") == 0) {
@@ -264,15 +259,15 @@ class MySelector : public CustomOpSelector {
     }
   }
   bool chooseNode(int nodeID) {
-    mxnet::ext::Node *node = graph_->nodes[nodeID];
+    const mxnet::ext::Node *node = graph_->getNode(nodeID);
 
     //get shape/type if available
     std::string shape;
     int dtype = -1;
     if(node->attrs.count("shape") > 0)
-      shape = node->attrs["shape"];
+      shape = node->attrs.at("shape");
     if(node->attrs.count("dtype") > 0)
-      dtype = std::stoi(node->attrs["dtype"]);
+      dtype = std::stoi(node->attrs.at("dtype"));
 
     //check if op dtype is float, and if option was specified to require float types
     if((dtype == kFloat32 && options_.count("reqFloat") > 0) || options_.count("reqFloat") == 0) {
@@ -322,18 +317,18 @@ MXReturnValue addInputPass(mxnet::ext::Graph *graph,
 			   const std::unordered_map<std::string, MXTensor>& aux,
 			   const PassResource& res) {
   //find node with '_custom_subgraph_op' op type
-  for(Node* n : graph->nodes) {
+  for(int i=0; i<graph->size(); i++) {
+    mxnet::ext::Node* n = graph->getNode(i);
     if(n->op.compare("_custom_subgraph_op") == 0) {
       //set extra input
       n->attrs[MX_STR_EXTRA_INPUTS] = std::to_string(1);
       
       //create a new input Node
-      Node* input = new Node();
+      Node* input = graph->addNode();
       std::string input_name = n->name + "_input";
       input->name = input_name;
       input->op = "null";
-      //add a new node in graph
-      graph->nodes.push_back(input);
+      //set this node as an input in the graph
       graph->inputs.push_back(input);
       //connect new input to node
       input->outputs.push_back({n,(int)(n->inputs.size())});
diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h
index 144fa576bc54..dbab1c5f6c8d 100644
--- a/include/mxnet/lib_api.h
+++ b/include/mxnet/lib_api.h
@@ -821,8 +821,10 @@ struct NodeEntry {
 // Representation of a node in the graph
 class Node {
  public:
+  Node() {tensor = nullptr;}
   std::string op;  // operator name (ie. Convolution)
   std::string name;  // unique node name (ie. conv_0 or conv_1)
+  MXTensor* tensor; // tensor data for input nodes
   std::vector<NodeEntry> inputs;  // set of inputs to the node
   std::vector<NodeEntry> outputs;  // set of outputs from the node
   std::vector<Graph*> subgraphs;  // set of subgraphs within this node
@@ -1009,7 +1011,7 @@ class Graph {
 
   /* \brief visits a node "n" */
   void _dfs_util(Node* n, std::unordered_set<Node*>* to_visit,
-                 std::function<void(Node*)> handler) {
+                 std::function<void(Node*)> handler) const {
     to_visit->erase(n);  // remove node now that we're visiting it
     for (NodeEntry& e : n->outputs) {
       Node* o = e.node;
@@ -1021,7 +1023,7 @@ class Graph {
   }
 
   /* \brief post-order DFS graph traversal */
-  void DFS(std::function<void(Node*)> handler) {
+  void DFS(std::function<void(Node*)> handler) const {
     std::unordered_set<Node*> to_visit;
     // put all nodes in set to visit
     for (auto& n : nodes)
@@ -1036,7 +1038,7 @@ class Graph {
   }
 
   /* \brief sort graph nodes in topological order */
-  std::vector<Node*> topological_sort() {
+  std::vector<Node*> topological_sort() const {
     std::vector<Node*> sorted;
     auto handler = [&](Node* n) {
       sorted.push_back(n);  // when visiting each node, add it in order to the vector
@@ -1046,7 +1048,7 @@ class Graph {
   }
 
   /* \brief print out graph details */
-  void print(int indent = 0) {
+  void print(int indent = 0) const {
     std::string space = "";
     for (int i = 0; i < indent; i++) space+=" ";
 
@@ -1078,11 +1080,45 @@ class Graph {
     }
     std::cout << space << "###############################" << std::endl;
   }
+  Node* addNode() {
+    Node* n = new Node();
+    return n;
+  }
 
-  std::vector<Node*> nodes;
+  Node* getNode(size_t idx) {
+    return nodes[idx];
+  }
+  const Node* getNode(size_t idx) const {
+    return nodes.at(idx);
+  }
+  const JsonVal& getAttr(const std::string& key) const {
+    return attrs.at(key);
+  }
+  
+  size_t size() const {
+    return nodes.size();
+  }
+  
+  void _setParams(PassResource* res_,
+                 std::unordered_map<std::string, mxnet::ext::MXTensor>& args,
+                 std::unordered_map<std::string, mxnet::ext::MXTensor>& aux) {
+    // set params for each input node
+    for(Node* node : inputs) {
+      if(args.count(node->name) > 0)
+        node->tensor = &args[node->name];
+      else if(aux.count(node->name) > 0)
+        node->tensor = &aux[node->name];
+    }
+    res = res_;
+  }
+  
   std::vector<Node*> inputs;
   std::vector<NodeEntry> outputs;
   std::map<std::string, JsonVal> attrs;
+
+ private:
+  std::vector<Node*> nodes;
+  PassResource* res;
 };
 
 /* \brief An abstract class for library authors creating custom
@@ -2270,6 +2306,7 @@ extern "C" {
 
     std::unordered_map<std::string, mxnet::ext::MXTensor> new_args, new_aux;
     mxnet::ext::PassResource res(&new_args, &new_aux, nd_malloc, nd_alloc);
+    graph->_setParams(&res,args,aux);
     mxnet::ext::MXReturnValue retval = graphPass(graph, opts, args, aux, res);
     if (!retval) return retval;
 

From f27fea83a19ea797b73400d9ecd99ce5b6ee8a5f Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Fri, 7 Aug 2020 05:01:05 +0000
Subject: [PATCH 13/25] sanity

---
 include/mxnet/lib_api.h | 93 +++++++++++++++++++++++++----------------
 1 file changed, 57 insertions(+), 36 deletions(-)

diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h
index dbab1c5f6c8d..ee9b23753990 100644
--- a/include/mxnet/lib_api.h
+++ b/include/mxnet/lib_api.h
@@ -824,7 +824,7 @@ class Node {
   Node() {tensor = nullptr;}
   std::string op;  // operator name (ie. Convolution)
   std::string name;  // unique node name (ie. conv_0 or conv_1)
-  MXTensor* tensor; // tensor data for input nodes
+  MXTensor* tensor;  // tensor data for input nodes
   std::vector<NodeEntry> inputs;  // set of inputs to the node
   std::vector<NodeEntry> outputs;  // set of outputs from the node
   std::vector<Graph*> subgraphs;  // set of subgraphs within this node
@@ -1094,24 +1094,24 @@ class Graph {
   const JsonVal& getAttr(const std::string& key) const {
     return attrs.at(key);
   }
-  
+
   size_t size() const {
     return nodes.size();
   }
-  
+
   void _setParams(PassResource* res_,
-                 std::unordered_map<std::string, mxnet::ext::MXTensor>& args,
-                 std::unordered_map<std::string, mxnet::ext::MXTensor>& aux) {
+                 std::unordered_map<std::string, mxnet::ext::MXTensor>* args,
+                 std::unordered_map<std::string, mxnet::ext::MXTensor>* aux) {
     // set params for each input node
-    for(Node* node : inputs) {
-      if(args.count(node->name) > 0)
-        node->tensor = &args[node->name];
-      else if(aux.count(node->name) > 0)
-        node->tensor = &aux[node->name];
+    for (Node* node : inputs) {
+      if (args->count(node->name) > 0)
+        node->tensor = &args->at(node->name);
+      else if (aux->count(node->name) > 0)
+        node->tensor = &aux->at(node->name);
     }
     res = res_;
   }
-  
+
   std::vector<Node*> inputs;
   std::vector<NodeEntry> outputs;
   std::map<std::string, JsonVal> attrs;
@@ -1337,10 +1337,12 @@ class CustomPass {
 typedef MXReturnValue (*supportedOps_t)(const mxnet::ext::Graph *graph, std::vector<int>* ids,
                                         const std::unordered_map<std::string,
                                                                  std::string>& options);
-typedef MXReturnValue (*createSelector_t)(const mxnet::ext::Graph *graph, CustomOpSelector** sel_inst,
+typedef MXReturnValue (*createSelector_t)(const mxnet::ext::Graph *graph,
+                                          CustomOpSelector** sel_inst,
                                           const std::unordered_map<std::string,
                                                                    std::string>& options);
-typedef MXReturnValue (*reviewSubgraph_t)(const mxnet::ext::Graph *subgraph, int subgraph_id, bool* accept,
+typedef MXReturnValue (*reviewSubgraph_t)(const mxnet::ext::Graph *subgraph, int subgraph_id,
+                                          bool* accept,
                                           const std::unordered_map<std::string,
                                                                    std::string>& options,
                                           std::unordered_map<std::string, std::string>* attrs,
@@ -1667,8 +1669,8 @@ typedef int (*opVersion_t)();
 #define MX_VOID_RET void
 #endif
 
-} // namespace ext
-} // namespace mxnet
+}  // namespace ext
+}  // namespace mxnet
 
 extern "C" {
   /*! \brief returns MXNet library version */
@@ -1855,13 +1857,16 @@ extern "C" {
   }
 
   /*! \brief returns status of calling Forward/Backward function for operator from library */
-  MX_INT_RET _opCallFCompute(mxnet::ext::fcomp_t fcomp, const char* const* keys, const char* const* vals,
+  MX_INT_RET _opCallFCompute(mxnet::ext::fcomp_t fcomp, const char* const* keys,
+                             const char* const* vals,
                              int num, const int64_t** inshapes, int* indims, void** indata,
                              int* intypes, size_t* inIDs, const char** indev_type, int* indev_id,
                              int num_in, const int64_t** outshapes, int* outdims, void** outdata,
                              int* outtypes, size_t* outIDs, const char** outdev_type,
-                             int* outdev_id, int num_out, mxnet::ext::xpu_malloc_t cpu_malloc, void* cpu_alloc,
-                             mxnet::ext::xpu_malloc_t gpu_malloc, void* gpu_alloc, void* cuda_stream,
+                             int* outdev_id, int num_out, mxnet::ext::xpu_malloc_t cpu_malloc,
+                             void* cpu_alloc,
+                             mxnet::ext::xpu_malloc_t gpu_malloc, void* gpu_alloc,
+                             void* cuda_stream,
                              mxnet::ext::sparse_malloc_t sparse_malloc, void* sparse_alloc,
                              int* instypes, int* outstypes, void** in_indices, void** out_indices,
                              void** in_indptr, void** out_indptr,
@@ -1924,14 +1929,16 @@ extern "C" {
           out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i],
                             out_indices_shapes[i], out_indptr[i], out_indptr_shapes[i]);
         }
-        outputs[i].setTensor(reinterpret_cast<void*>(&out_sparse[i]), (mxnet::ext::MXDType)outtypes[i],
+        outputs[i].setTensor(reinterpret_cast<void*>(&out_sparse[i]),
+                             (mxnet::ext::MXDType)outtypes[i],
                              outshapes[i], outdims[i], outIDs[i],
                              mxnet::ext::MXContext(outdev_type[i], outdev_id[i]), type);
       }
     }
 
     mxnet::ext::OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc,
-                               cuda_stream, sparse_malloc, sparse_alloc, rng_cpu_states, rng_gpu_states);
+                               cuda_stream, sparse_malloc, sparse_alloc,
+                               rng_cpu_states, rng_gpu_states);
     return fcomp(attrs, &inputs, &outputs, res);
   }
 
@@ -1974,7 +1981,8 @@ extern "C" {
 
     // void pointer to hold custom state op instance created in custom library
     // eventually state_op pointer is populated by instance from custom library
-    mxnet::ext::CustomStatefulOp** op_ptr = reinterpret_cast<mxnet::ext::CustomStatefulOp**>(state_op);
+    mxnet::ext::CustomStatefulOp** op_ptr =
+      reinterpret_cast<mxnet::ext::CustomStatefulOp**>(state_op);
     return create_op(attrs, op_ptr);
   }
 
@@ -1984,8 +1992,10 @@ extern "C" {
                                      const char** indev_type, int* indev_id, int num_in,
                                      const int64_t** outshapes, int* outdims, void** outdata,
                                      int* outtypes, size_t* outIDs, const char** outdev_type,
-                                     int* outdev_id, int num_out, mxnet::ext::xpu_malloc_t cpu_malloc,
-                                     void* cpu_alloc, mxnet::ext::xpu_malloc_t gpu_malloc, void* gpu_alloc,
+                                     int* outdev_id, int num_out,
+                                     mxnet::ext::xpu_malloc_t cpu_malloc,
+                                     void* cpu_alloc, mxnet::ext::xpu_malloc_t gpu_malloc,
+                                     void* gpu_alloc,
                                      void* stream, mxnet::ext::sparse_malloc_t sparse_malloc,
                                      void* sparse_alloc, int* instypes, int* outstypes,
                                      void** in_indices, void** out_indices, void** in_indptr,
@@ -2044,7 +2054,8 @@ extern "C" {
           out_sparse[i].set(outdata[i], outshapes[i], outdims[i], out_indices[i],
                             out_indices_shapes[i], out_indptr[i], out_indptr_shapes[i]);
         }
-        outputs[i].setTensor(reinterpret_cast<void*>(&out_sparse[i]), (mxnet::ext::MXDType)outtypes[i],
+        outputs[i].setTensor(reinterpret_cast<void*>(&out_sparse[i]),
+                             (mxnet::ext::MXDType)outtypes[i],
                              outshapes[i], outdims[i], outIDs[i],
                              mxnet::ext::MXContext(outdev_type[i], outdev_id[i]), type);
       }
@@ -2053,7 +2064,8 @@ extern "C" {
     mxnet::ext::OpResource res(cpu_malloc, cpu_alloc, gpu_malloc, gpu_alloc,
                                stream, sparse_malloc, sparse_alloc, rng_cpu_states, rng_gpu_states);
 
-    mxnet::ext::CustomStatefulOp* op_ptr = reinterpret_cast<mxnet::ext::CustomStatefulOp*>(state_op);
+    mxnet::ext::CustomStatefulOp* op_ptr =
+      reinterpret_cast<mxnet::ext::CustomStatefulOp*>(state_op);
     if (is_forward) {
       return op_ptr->Forward(&inputs, &outputs, res);
     }
@@ -2123,7 +2135,8 @@ extern "C" {
 
     // void pointer to hold selector instance created in custom library
     // eventually pointer is populated by instance from custom library
-    mxnet::ext::CustomOpSelector** sel_ptr = reinterpret_cast<mxnet::ext::CustomOpSelector**>(selector);
+    mxnet::ext::CustomOpSelector** sel_ptr =
+      reinterpret_cast<mxnet::ext::CustomOpSelector**>(selector);
 
     // call user's createSelector function
     return createSelector(graph, sel_ptr, opts);
@@ -2131,28 +2144,32 @@ extern "C" {
 
   /*! \brief returns status of calling select function from library */
   MX_VOID_RET _partCallSelect(void* sel_inst, int nodeID, int* selected) {
-    mxnet::ext::CustomOpSelector* sel_ptr = reinterpret_cast<mxnet::ext::CustomOpSelector*>(sel_inst);
+    mxnet::ext::CustomOpSelector* sel_ptr =
+      reinterpret_cast<mxnet::ext::CustomOpSelector*>(sel_inst);
     *selected = sel_ptr->Select(nodeID);
   }
 
   /*! \brief returns status of calling select input function from library */
   MX_VOID_RET _partCallSelectInput(void* sel_inst, int nodeID,
                                   int input_nodeID, int* selected) {
-    mxnet::ext::CustomOpSelector* sel_ptr = reinterpret_cast<mxnet::ext::CustomOpSelector*>(sel_inst);
+    mxnet::ext::CustomOpSelector* sel_ptr =
+      reinterpret_cast<mxnet::ext::CustomOpSelector*>(sel_inst);
     *selected = sel_ptr->SelectInput(nodeID, input_nodeID);
   }
 
   /*! \brief returns status of calling select output function from library */
   MX_VOID_RET _partCallSelectOutput(void* sel_inst, int nodeID,
                                     int output_nodeID, int* selected) {
-    mxnet::ext::CustomOpSelector* sel_ptr = reinterpret_cast<mxnet::ext::CustomOpSelector*>(sel_inst);
+    mxnet::ext::CustomOpSelector* sel_ptr =
+      reinterpret_cast<mxnet::ext::CustomOpSelector*>(sel_inst);
     *selected = sel_ptr->SelectOutput(nodeID, output_nodeID);
   }
 
   /*! \brief returns status of calling filter function from library */
   MX_VOID_RET _partCallFilter(void* sel_inst, int* candidates, int num_candidates,
                               int** keep, int* num_keep) {
-    mxnet::ext::CustomOpSelector* sel_ptr = reinterpret_cast<mxnet::ext::CustomOpSelector*>(sel_inst);
+    mxnet::ext::CustomOpSelector* sel_ptr =
+      reinterpret_cast<mxnet::ext::CustomOpSelector*>(sel_inst);
     std::vector<int> candidates_(num_candidates);
     for (int i=0; i < num_candidates; i++) {
       candidates_[i] = candidates[i];
@@ -2169,7 +2186,8 @@ extern "C" {
 
   /*! \brief returns status of calling reset selector function from library */
   MX_VOID_RET _partCallReset(void* sel_inst) {
-    mxnet::ext::CustomOpSelector* sel_ptr = reinterpret_cast<mxnet::ext::CustomOpSelector*>(sel_inst);
+    mxnet::ext::CustomOpSelector* sel_ptr =
+      reinterpret_cast<mxnet::ext::CustomOpSelector*>(sel_inst);
     sel_ptr->Reset();
   }
 
@@ -2214,7 +2232,8 @@ extern "C" {
         shapes.push_back(aux_shapes[i][j]);
 
       mxnet::ext::MXTensor tensor(aux_data[i], shapes, (mxnet::ext::MXDType)aux_types[i],
-                                  aux_IDs[i], mxnet::ext::MXContext(aux_dev_type[i], aux_dev_id[i]));
+                                  aux_IDs[i], mxnet::ext::MXContext(aux_dev_type[i],
+                                                                    aux_dev_id[i]));
       aux[aux_names[i]] = tensor;
     }
 
@@ -2289,7 +2308,8 @@ extern "C" {
         shapes.push_back(arg_shapes[i][j]);
 
       mxnet::ext::MXTensor tensor(arg_data[i], shapes, (mxnet::ext::MXDType)arg_types[i],
-                                  arg_IDs[i], mxnet::ext::MXContext(arg_dev_type[i], arg_dev_id[i]));
+                                  arg_IDs[i], mxnet::ext::MXContext(arg_dev_type[i],
+                                                                    arg_dev_id[i]));
       args[arg_names[i]] = tensor;
     }
     // create a map of named tensors for aux
@@ -2300,13 +2320,14 @@ extern "C" {
         shapes.push_back(aux_shapes[i][j]);
 
       mxnet::ext::MXTensor tensor(aux_data[i], shapes, (mxnet::ext::MXDType)aux_types[i],
-                                  aux_IDs[i], mxnet::ext::MXContext(aux_dev_type[i], aux_dev_id[i]));
+                                  aux_IDs[i], mxnet::ext::MXContext(aux_dev_type[i],
+                                                                    aux_dev_id[i]));
       aux[aux_names[i]] = tensor;
     }
 
     std::unordered_map<std::string, mxnet::ext::MXTensor> new_args, new_aux;
     mxnet::ext::PassResource res(&new_args, &new_aux, nd_malloc, nd_alloc);
-    graph->_setParams(&res,args,aux);
+    graph->_setParams(&res, &args, &aux);
     mxnet::ext::MXReturnValue retval = graphPass(graph, opts, args, aux, res);
     if (!retval) return retval;
 
@@ -2328,5 +2349,5 @@ extern "C" {
   mxnet::ext::MXReturnValue
 #endif
   initialize(int version);
-} // extern "C"
+}  // extern "C"
 #endif  // MXNET_LIB_API_H_

From c850959a610b017147d17e3dc00ef0edf10d5754 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Fri, 7 Aug 2020 05:31:35 +0000
Subject: [PATCH 14/25] updated graph pass API, removed args/aux/passResource

---
 example/extensions/lib_pass/pass_lib.cc       |  5 +---
 .../extensions/lib_subgraph/subgraph_lib.cc   | 12 ++------
 include/mxnet/lib_api.h                       | 30 ++++++++++++++-----
 src/c_api/c_api.cc                            |  9 +++---
 4 files changed, 32 insertions(+), 24 deletions(-)

diff --git a/example/extensions/lib_pass/pass_lib.cc b/example/extensions/lib_pass/pass_lib.cc
index 4acd70356169..287039332167 100644
--- a/example/extensions/lib_pass/pass_lib.cc
+++ b/example/extensions/lib_pass/pass_lib.cc
@@ -32,10 +32,7 @@ using namespace mxnet::ext;
 
 /* \brief a basic pass that prints out the options and the graph */
 MXReturnValue myPass(mxnet::ext::Graph *g,
-                     const std::unordered_map<std::string, std::string>& options,
-                     const std::unordered_map<std::string, MXTensor>& args,
-                     const std::unordered_map<std::string, MXTensor>& aux,
-                     const PassResource& res) {
+                     const std::unordered_map<std::string, std::string>& options) {
   for (auto kv : options) {
     std::cout << "option: " << kv.first << " ==> " << kv.second << std::endl;
   }
diff --git a/example/extensions/lib_subgraph/subgraph_lib.cc b/example/extensions/lib_subgraph/subgraph_lib.cc
index 006f4b68947d..11ee2757f6f5 100644
--- a/example/extensions/lib_subgraph/subgraph_lib.cc
+++ b/example/extensions/lib_subgraph/subgraph_lib.cc
@@ -312,10 +312,7 @@ REGISTER_PARTITIONER(mySelect)
 
 /* \brief a basic pass that adds a new input for subgraph ops */
 MXReturnValue addInputPass(mxnet::ext::Graph *graph,
-			   const std::unordered_map<std::string, std::string>& options,
-			   const std::unordered_map<std::string, MXTensor>& args,
-			   const std::unordered_map<std::string, MXTensor>& aux,
-			   const PassResource& res) {
+			   const std::unordered_map<std::string, std::string>& options) {
   //find node with '_custom_subgraph_op' op type
   for(int i=0; i<graph->size(); i++) {
     mxnet::ext::Node* n = graph->getNode(i);
@@ -324,10 +321,7 @@ MXReturnValue addInputPass(mxnet::ext::Graph *graph,
       n->attrs[MX_STR_EXTRA_INPUTS] = std::to_string(1);
       
       //create a new input Node
-      Node* input = graph->addNode();
-      std::string input_name = n->name + "_input";
-      input->name = input_name;
-      input->op = "null";
+      Node* input = graph->addNode(n->name + "_input", "null");
       //set this node as an input in the graph
       graph->inputs.push_back(input);
       //connect new input to node
@@ -335,7 +329,7 @@ MXReturnValue addInputPass(mxnet::ext::Graph *graph,
       //connect node to new input
       n->inputs.push_back({input,0});
       // add a corresponding tensor for this input
-      MXTensor* arg_ = res.alloc_arg(input_name,{1},MXContext::CPU(0),kFloat32);
+      input->alloc_arg({1},MXContext::CPU(0),kFloat32);
     }
   }
 
diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h
index ee9b23753990..89eb69003437 100644
--- a/include/mxnet/lib_api.h
+++ b/include/mxnet/lib_api.h
@@ -822,6 +822,15 @@ struct NodeEntry {
 class Node {
  public:
   Node() {tensor = nullptr;}
+  void _setPassResource(PassResource* res_) {res = res_;}
+  void alloc_arg(const std::vector<int64_t>& shapes,
+                 const MXContext &ctx, MXDType dtype) {
+    tensor = res->alloc_arg(name, shapes, ctx, dtype);
+  }
+  void alloc_aux(const std::vector<int64_t>& shapes,
+                 const MXContext &ctx, MXDType dtype) {
+    tensor = res->alloc_aux(name, shapes, ctx, dtype);
+  }
   std::string op;  // operator name (ie. Convolution)
   std::string name;  // unique node name (ie. conv_0 or conv_1)
   MXTensor* tensor;  // tensor data for input nodes
@@ -829,12 +838,14 @@ class Node {
   std::vector<NodeEntry> outputs;  // set of outputs from the node
   std::vector<Graph*> subgraphs;  // set of subgraphs within this node
   std::unordered_map<std::string, std::string> attrs;  // node attributes
+ private:
+  PassResource* res;  
 };
 
 // Representation of the graph
 class Graph {
  public:
-  Graph() {}
+  Graph() : res(nullptr) {}
   /* \brief deleted nodes when deleting the graph */
   ~Graph() {
     for (int i = 0; i < nodes.size(); i++)
@@ -1080,8 +1091,12 @@ class Graph {
     }
     std::cout << space << "###############################" << std::endl;
   }
-  Node* addNode() {
+  Node* addNode(const std::string& name, const std::string& op) {
     Node* n = new Node();
+    n->name = name;
+    n->op = op;
+    if (res)
+      n->_setPassResource(res);
     return n;
   }
 
@@ -1110,6 +1125,10 @@ class Graph {
         node->tensor = &aux->at(node->name);
     }
     res = res_;
+    // set passResource for each node
+    for (Node* node : nodes) {
+      node->_setPassResource(res);
+    }
   }
 
   std::vector<Node*> inputs;
@@ -1309,10 +1328,7 @@ class CustomOp {
 
 /*! \brief Custom Pass Create function template */
 typedef MXReturnValue (*graphPass_t)(mxnet::ext::Graph* graph,
-                                     const std::unordered_map<std::string, std::string>& options,
-                                     const std::unordered_map<std::string, MXTensor>& args,
-                                     const std::unordered_map<std::string, MXTensor>& aux,
-                                     const PassResource& res);
+                                     const std::unordered_map<std::string, std::string>& options);
 
 /*!
  * \brief An abstract class for graph passes
@@ -2328,7 +2344,7 @@ extern "C" {
     std::unordered_map<std::string, mxnet::ext::MXTensor> new_args, new_aux;
     mxnet::ext::PassResource res(&new_args, &new_aux, nd_malloc, nd_alloc);
     graph->_setParams(&res, &args, &aux);
-    mxnet::ext::MXReturnValue retval = graphPass(graph, opts, args, aux, res);
+    mxnet::ext::MXReturnValue retval = graphPass(graph, opts);
     if (!retval) return retval;
 
     std::string *tmp = new std::string(graph->toString());
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index e025caf4015e..48b3c652ff81 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -339,7 +339,8 @@ void registerOp(const char* name, const std::string& name_str, bool isSubgraphOp
                 const std::unordered_map<std::string, mxnet::ext::createOpState_t> &createop_map,
                 const std::unordered_map<std::string, mxnet::ext::fcomp_t> &forward_ctx_map,
                 const std::unordered_map<std::string, mxnet::ext::fcomp_t> &backward_ctx_map,
-                mxnet::ext::opCallFComp_t callFComp, mxnet::ext::opCallFStatefulComp_t callFStatefulComp) {
+                mxnet::ext::opCallFComp_t callFComp,
+                mxnet::ext::opCallFStatefulComp_t callFStatefulComp) {
   using namespace mxnet::ext;
 
   // check if operator is already registered
@@ -422,7 +423,7 @@ void registerOp(const char* name, const std::string& name_str, bool isSubgraphOp
     gradOp.set_attr<nnvm::TIsBackward>("TIsBackward", true, plevel);
     gradOp.set_attr<FInferStorageType>("FInferStorageType", infer_storage_type, plevel);
     gradOp.set_attr<FResourceRequest>("FResourceRequest", resc_req, plevel);
-    
+
     if (!isSubgraphOp) {
       // register attr parser and standard functions for non-subgraph ops
       gradOp.set_attr_parser(attr_parser);
@@ -440,7 +441,7 @@ void registerOp(const char* name, const std::string& name_str, bool isSubgraphOp
       gradOp.set_num_inputs(grad_inouts);
       gradOp.set_num_outputs(num_subgraph_inputs);
     }
-    
+
     if (createop_map.size() != 0) {
       // for stateful operators
       gradOp.set_attr<bool>("TIsLayerOpBackward", true, plevel);
@@ -1094,7 +1095,7 @@ void registerOperators(void *lib, int verbose) {
 
 void registerPartitioners(void *lib, int verbose) {
   using namespace mxnet::ext;
-  
+
   // get C type interface functions
   opCallFree_t callFree = get_func<opCallFree_t>(lib, const_cast<char*>(MXLIB_OPCALLFREE_STR));
 

From 9b7e0a135fa1b8a0192b00ed8e74cd4807ba9472 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Fri, 7 Aug 2020 05:41:19 +0000
Subject: [PATCH 15/25] insanity fix

---
 include/mxnet/lib_api.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h
index 89eb69003437..e04afa7967d7 100644
--- a/include/mxnet/lib_api.h
+++ b/include/mxnet/lib_api.h
@@ -839,7 +839,7 @@ class Node {
   std::vector<Graph*> subgraphs;  // set of subgraphs within this node
   std::unordered_map<std::string, std::string> attrs;  // node attributes
  private:
-  PassResource* res;  
+  PassResource* res;
 };
 
 // Representation of the graph

From be08e2bb2417f5384da788774f7047f1c81d05e0 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Fri, 7 Aug 2020 06:19:52 +0000
Subject: [PATCH 16/25] modernize

---
 example/extensions/lib_subgraph/subgraph_lib.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/example/extensions/lib_subgraph/subgraph_lib.cc b/example/extensions/lib_subgraph/subgraph_lib.cc
index 11ee2757f6f5..1ee0c139bbeb 100644
--- a/example/extensions/lib_subgraph/subgraph_lib.cc
+++ b/example/extensions/lib_subgraph/subgraph_lib.cc
@@ -135,7 +135,7 @@ class MyStatefulOp : public CustomStatefulOp {
   explicit MyStatefulOp(std::string json,
                         const std::unordered_map<std::string, std::string>& attrs)
     : attrs_(attrs) {
-    for (auto kv : attrs) {
+    for (const auto &kv : attrs) {
       std::cout << "subgraphOp attributes: " << kv.first << " ==> " << kv.second << std::endl;
     }
     subgraph_ = mxnet::ext::Graph::fromString(json);
@@ -143,7 +143,7 @@ class MyStatefulOp : public CustomStatefulOp {
 
   MXReturnValue Forward(std::vector<MXTensor>* inputs,
                         std::vector<MXTensor>* outputs,
-                        const OpResource& op_res) {
+                        const OpResource& op_res) override {
     if(attrs_.count(MX_STR_EXTRA_INPUTS) > 0 && std::stoi(attrs_.at(MX_STR_EXTRA_INPUTS)) > 0)
       std::cout << "forward::extra_inputs(" << attrs_.at(MX_STR_EXTRA_INPUTS) << ")::inputs ["
 		<< inputs->size() << "]" << std::endl;

From 474de7226011b9850759b49a3e657385500cbfcb Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Sat, 8 Aug 2020 04:57:40 +0000
Subject: [PATCH 17/25] updated reviewSubgraph with new Graph class

---
 .../extensions/lib_subgraph/subgraph_lib.cc   | 21 +-----
 include/mxnet/lib_api.h                       | 74 ++++++++++++-------
 2 files changed, 47 insertions(+), 48 deletions(-)

diff --git a/example/extensions/lib_subgraph/subgraph_lib.cc b/example/extensions/lib_subgraph/subgraph_lib.cc
index 1ee0c139bbeb..8cea58e69821 100644
--- a/example/extensions/lib_subgraph/subgraph_lib.cc
+++ b/example/extensions/lib_subgraph/subgraph_lib.cc
@@ -207,29 +207,10 @@ MXReturnValue mySupportedOps(const mxnet::ext::Graph* graph,
 }
 
 MXReturnValue myReviewSubgraph(const mxnet::ext::Graph *subgraph, int subgraph_id, bool* accept,
-                               const std::unordered_map<std::string, std::string>& options,
-                               std::unordered_map<std::string, std::string>* attrs,
-                               const std::unordered_map<std::string, MXTensor>& args,
-                               const std::unordered_map<std::string, MXTensor>& aux) {
+                               const std::unordered_map<std::string, std::string>& options) {
   for (auto kv : options) {
     std::cout << "option: " << kv.first << " ==> " << kv.second << std::endl;
   }
-  for (auto kv : args) {
-    std::cout << "arg: " << kv.first << " ==> (";
-    for (auto s : kv.second.shape)
-      std::cout << s << ",";
-    std::cout << ") [";
-    for (int i=0; i<kv.second.size(); i++)
-      std::cout << kv.second.data<float>()[i] << ", ";
-    std::cout << "]" << std::endl;
-  }
-
-  // check if option `reqArgs` was specified, and if so check if args were provided
-  if(options.count("reqArgs") > 0 && args.size() == 0) {
-    *accept = false;
-    std::cout << "rejecting subgraph since args were not provided" << std::endl;
-    return MX_SUCCESS;
-  }
 
   // check if option `reject` was specified, and if so check if value is 'True'
   if(options.count("reject") > 0 && options.at("reject").compare("True") == 0) {
diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h
index e04afa7967d7..0e485a012ffb 100644
--- a/include/mxnet/lib_api.h
+++ b/include/mxnet/lib_api.h
@@ -467,12 +467,14 @@ typedef std::mt19937 mx_cpu_rand_t;
 #define MX_NUM_CPU_RANDOM_STATES 1024
 #define MX_NUM_GPU_RANDOM_STATES 32768
 
+/* \brief Class to help allocate new args/aux params in graph passes */
 class PassResource {
  public:
   PassResource(std::unordered_map<std::string, MXTensor>* new_args,
                std::unordered_map<std::string, MXTensor>* new_aux,
                nd_malloc_t nd_malloc, const void* nd_alloc)
     : new_args_(new_args), new_aux_(new_aux), nd_malloc_(nd_malloc), nd_alloc_(nd_alloc) {}
+  // allocate new arg param, adds to args map, returns newly allocated tensor
   MXTensor* alloc_arg(const std::string& name, const std::vector<int64_t>& shapes,
                       const MXContext &ctx, MXDType dtype) const {
     void* data;
@@ -482,6 +484,7 @@ class PassResource {
     (*new_args_)[name] = tensor;
     return &(new_args_->at(name));
   }
+  // allocate new aux param, adds to aux map, returns newly allocated tensor
   MXTensor* alloc_aux(const std::string& name, const std::vector<int64_t>& shapes,
                       const MXContext &ctx, MXDType dtype) const {
     void* data;
@@ -562,10 +565,13 @@ class OpResource {
   void *rand_cpu_states, *rand_gpu_states;
 };
 
-/*! \brief Macro to help passing serialized subgraph through attribute dict */
+/*! \brief attribute key to help passing serialized subgraph through subgraph op attribute */
 #define MX_STR_SUBGRAPH_SYM_JSON "subgraph_sym_json"
+/*! \brief dtype attribute key for ops after type propagation */
 #define MX_STR_DTYPE "__ext_dtype__"
+/*! \brief shape attribute key for ops after shape propagation */
 #define MX_STR_SHAPE "__ext_shape__"
+/*! \brief extra input attribute key for ops */
 #define MX_STR_EXTRA_INPUTS "__ext_extra_inputs__"
 
 /* \brief get shape value from list of shapes string
@@ -681,6 +687,7 @@ struct JsonVal {
     }
     return ret;
   }
+  // convert JSON-compatible string to JSON object
   static JsonVal parse(const std::string& json) {
     unsigned int idx = 0;
     return JsonVal::parse(json, &idx);
@@ -815,20 +822,27 @@ class Graph;
 // Representation of an input/output to a node
 struct NodeEntry {
   Node* node;  // other node thats producing/consuming inputs/outputs
-  int entry;  // entry from other node (ie. which output from producing node)
+  int entry;  // entry index from other node (ie. output index from producing node)
 };
 
 // Representation of a node in the graph
 class Node {
  public:
   Node() {tensor = nullptr;}
+  // internally set passResource to enable tensor allocation for graph passes
   void _setPassResource(PassResource* res_) {res = res_;}
+  /* \brief allocate an arg tensor for this node */
   void alloc_arg(const std::vector<int64_t>& shapes,
                  const MXContext &ctx, MXDType dtype) {
+    if (!res)
+      throw std::runtime_error("Node not initialized. Cannot use alloc_arg outside of graph passes.");
     tensor = res->alloc_arg(name, shapes, ctx, dtype);
   }
+  /* \brief allocate an aux tensor for this node */
   void alloc_aux(const std::vector<int64_t>& shapes,
                  const MXContext &ctx, MXDType dtype) {
+    if (!res)
+      throw std::runtime_error("Node not initialized. Cannot use alloc_aux outside of graph passes.");
     tensor = res->alloc_aux(name, shapes, ctx, dtype);
   }
   std::string op;  // operator name (ie. Convolution)
@@ -1091,6 +1105,8 @@ class Graph {
     }
     std::cout << space << "###############################" << std::endl;
   }
+
+  /* \brief add a new node to this graph */
   Node* addNode(const std::string& name, const std::string& op) {
     Node* n = new Node();
     n->name = name;
@@ -1099,24 +1115,27 @@ class Graph {
       n->_setPassResource(res);
     return n;
   }
-
+  /* \brief get node at index in graph */
   Node* getNode(size_t idx) {
     return nodes[idx];
   }
+  /* \brief get const node at index in const graph */
   const Node* getNode(size_t idx) const {
     return nodes.at(idx);
   }
+  /* \brief get attribute on graph */
   const JsonVal& getAttr(const std::string& key) const {
     return attrs.at(key);
   }
-
+  /* \brief get number of nodes in the graph */
   size_t size() const {
     return nodes.size();
   }
-
-  void _setParams(PassResource* res_,
-                 std::unordered_map<std::string, mxnet::ext::MXTensor>* args,
-                 std::unordered_map<std::string, mxnet::ext::MXTensor>* aux) {
+  // internally set passResource to enable tensor allocation for graph passes
+  void _setPassResource(PassResource* res_) {res = res_;}
+  // internally set arg/aux params when available
+  void _setParams(std::unordered_map<std::string, mxnet::ext::MXTensor>* args,
+                  std::unordered_map<std::string, mxnet::ext::MXTensor>* aux) {
     // set params for each input node
     for (Node* node : inputs) {
       if (args->count(node->name) > 0)
@@ -1124,10 +1143,12 @@ class Graph {
       else if (aux->count(node->name) > 0)
         node->tensor = &aux->at(node->name);
     }
-    res = res_;
-    // set passResource for each node
-    for (Node* node : nodes) {
-      node->_setPassResource(res);
+
+    if (res) {
+      // set passResource for each node
+      for (Node* node : nodes) {
+        node->_setPassResource(res);
+      }
     }
   }
 
@@ -1360,10 +1381,7 @@ typedef MXReturnValue (*createSelector_t)(const mxnet::ext::Graph *graph,
 typedef MXReturnValue (*reviewSubgraph_t)(const mxnet::ext::Graph *subgraph, int subgraph_id,
                                           bool* accept,
                                           const std::unordered_map<std::string,
-                                                                   std::string>& options,
-                                          std::unordered_map<std::string, std::string>* attrs,
-                                          const std::unordered_map<std::string, MXTensor>& args,
-                                          const std::unordered_map<std::string, MXTensor>& aux);
+                                                                   std::string>& options);
 
 /*!
  * \brief An abstract class for subgraph property
@@ -2253,28 +2271,27 @@ extern "C" {
       aux[aux_names[i]] = tensor;
     }
 
-    // attributes to set on subgraph node
-    std::unordered_map<std::string, std::string> attrs;
-
+    subgraph->_setParams(&args, &aux);
     mxnet::ext::MXReturnValue retval = reviewSubgraph(subgraph, subgraph_id, &accept_bool,
-                                                      opts, &attrs, args, aux);
+                                                      opts);
     if (!retval) return retval;
 
     *accept = accept_bool;
 
-    if (attrs.size() > 0) {
-      *num_attrs = attrs.size();
+    if (subgraph->attrs.size() > 0) {
+      *num_attrs = subgraph->attrs.size();
       // allocate space for attributes
-      *attr_keys = static_cast<char**>(malloc (attrs.size() * sizeof(char*)));
-      *attr_vals = static_cast<char**>(malloc (attrs.size() * sizeof(char*)));
+      *attr_keys = static_cast<char**>(malloc (*num_attrs * sizeof(char*)));
+      *attr_vals = static_cast<char**>(malloc (*num_attrs * sizeof(char*)));
 
       // copy attributes
       int i = 0;
-      for (auto kv : attrs) {
+      for (auto kv : subgraph->attrs) {
         (*attr_keys)[i] = static_cast<char*>(malloc ((kv.first.size()+1) * sizeof(char)));
-        (*attr_vals)[i] = static_cast<char*>(malloc ((kv.second.size()+1) * sizeof(char)));
+        std::string val = kv.second.dump();  // convert JsonVal back to string
+        (*attr_vals)[i] = static_cast<char*>(malloc ((val.size()+1) * sizeof(char)));
         snprintf((*attr_keys)[i], kv.first.size()+1, "%s", kv.first.c_str());
-        snprintf((*attr_vals)[i], kv.second.size()+1, "%s", kv.second.c_str());
+        snprintf((*attr_vals)[i], val.size()+1, "%s", val.c_str());
         i++;
       }
     }
@@ -2343,7 +2360,8 @@ extern "C" {
 
     std::unordered_map<std::string, mxnet::ext::MXTensor> new_args, new_aux;
     mxnet::ext::PassResource res(&new_args, &new_aux, nd_malloc, nd_alloc);
-    graph->_setParams(&res, &args, &aux);
+    graph->_setParams(&args, &aux);
+    graph->_setPassResource(&res);
     mxnet::ext::MXReturnValue retval = graphPass(graph, opts);
     if (!retval) return retval;
 

From 87dd1e80f930ed0b10eb026680397cd16d1d6895 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Sat, 8 Aug 2020 05:06:48 +0000
Subject: [PATCH 18/25] insanity

---
 include/mxnet/lib_api.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h
index 0e485a012ffb..49b4b138bfe1 100644
--- a/include/mxnet/lib_api.h
+++ b/include/mxnet/lib_api.h
@@ -835,14 +835,16 @@ class Node {
   void alloc_arg(const std::vector<int64_t>& shapes,
                  const MXContext &ctx, MXDType dtype) {
     if (!res)
-      throw std::runtime_error("Node not initialized. Cannot use alloc_arg outside of graph passes.");
+      throw std::runtime_error(
+                 "Node not initialized. Cannot use alloc_arg outside of graph passes.");
     tensor = res->alloc_arg(name, shapes, ctx, dtype);
   }
   /* \brief allocate an aux tensor for this node */
   void alloc_aux(const std::vector<int64_t>& shapes,
                  const MXContext &ctx, MXDType dtype) {
     if (!res)
-      throw std::runtime_error("Node not initialized. Cannot use alloc_aux outside of graph passes.");
+      throw std::runtime_error(
+                 "Node not initialized. Cannot use alloc_aux outside of graph passes.");
     tensor = res->alloc_aux(name, shapes, ctx, dtype);
   }
   std::string op;  // operator name (ie. Convolution)
@@ -852,6 +854,7 @@ class Node {
   std::vector<NodeEntry> outputs;  // set of outputs from the node
   std::vector<Graph*> subgraphs;  // set of subgraphs within this node
   std::unordered_map<std::string, std::string> attrs;  // node attributes
+
  private:
   PassResource* res;
 };

From f080a3950a8fa0ba148f37085d554d3cf760676b Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Sat, 8 Aug 2020 07:56:28 +0000
Subject: [PATCH 19/25] updated docs

---
 example/extensions/lib_pass/README.md     | 48 ++++++++++------------
 example/extensions/lib_subgraph/README.md | 50 ++++++++++++++---------
 2 files changed, 52 insertions(+), 46 deletions(-)

diff --git a/example/extensions/lib_pass/README.md b/example/extensions/lib_pass/README.md
index 1d0d972f90d0..e7c50d0fc833 100644
--- a/example/extensions/lib_pass/README.md
+++ b/example/extensions/lib_pass/README.md
@@ -32,22 +32,21 @@ To run the following example, the build type of MXNet doesn’t matter since the
 
 ### Run An Example
 
-You can start getting familiar with custom passes by running an example provided in the **example/extensions/lib_pass** directory. The `myPass` example just copies the input graph to the output. Go to the **lib_pass** directory and follow these steps:
+You can start getting familiar with custom passes by running an example provided in the **example/extensions/lib_pass** directory. The `myPass` example just prints out the graph. Go to the **lib_pass** directory and follow these steps:
 
 1. Run `make`. The Makefile will generate the dynamic library **libpass_lib.so** which is compiled from the `pass_lib.cc` file. This is the library you are going to load that contains everything for the custom pass.
-2. Run `python test_pass.py`. It’ll first load the above library, find the components, register them in the MXNet backend, then execute the pass on the model and execute the operators like a regular MXNet operator and output the result. Below is the output when running the `python test_pass.py` command. Notice that it loads 2 passes: myPass and jsonPass.
+2. Run `python test_pass.py`. It’ll first load the above library, find the components, register them in the MXNet backend, then execute the pass on the model and execute the operators like a regular MXNet operator and output the result. Below is the output when running the `python test_pass.py` command. Notice that it loads 1 pass: `myPass`.
 
 ```
 [10:38:03] src/c_api/c_api.cc:286: Found 0 operators in library
 [10:38:03] src/c_api/c_api.cc:785: Found 0 partitioners in library
-[07:14:00] src/c_api/c_api.cc:887: Found 2 graph passes in library
+[07:14:00] src/c_api/c_api.cc:887: Found 1 graph passes in library
 [07:14:00] src/c_api/c_api.cc:902:       Graph Pass [0] myPass
-[07:14:00] src/c_api/c_api.cc:902:       Graph Pass [1] jsonPass
 ```
 
 ### Basic Files For Custom Pass Library
 * **lib_pass/pass_lib.cc**: This file has a source code implementation of all required components to make a custom pass, it also shows registration of them so that they can be loaded by MXNet.
-* **lib_pass/Makefile**: This file compiles the source code to a dynamic shared library, with a header file `include/mxnet/lib_api.h` from MXNet source code. Currently the custom pass is compatible with C++11 onwards.
+* **lib_pass/Makefile**: This file compiles the source code to a dynamic shared library, with a header file `include/mxnet/lib_api.h` from MXNet source code. Currently the custom pass is compatible with C++11 and above.
 * **lib_pass/test_pass.py**: This file calls `mx.library.load(‘libpass_lib.so’)` to load the library containing the custom components, executes the pass on the model using the `optimize_for` API, and prints outputs of the forward passes. The outputs should be the same as the regular MXNet forward pass without running the pass.
 * **include/mxnet/lib_api.h**: This file from MXNet source code is the single header file needed to include all necessary data types and function prototypes for writing a custom library. You can either specify the include path in the `Makefile`, or copy the header file over to `example/extensions/lib_pass` folder. Note that apart from this header, the custom library is independent of MXNet source.
 ## Writing Custom Pass Library
@@ -78,18 +77,18 @@ sym_block.optimize_for(x, backend='myPass')
 
 ### Using a Custom Pass Library
 
-APIs in MXNet are available in both Symbol and Gluon APIs. For the Symbol API, the `optimize_for` API can be called on Symbol objects to return a new Symbol post graph pass.
+APIs in MXNet are available in both Symbol and Gluon APIs. For the Symbol API, `optimize_for` can be called on Symbol objects to run the graph pass and return a new Symbol.
 
 ```python
-optimize_for(backend, args=None, aux=None, ctx=None, **kwargs)
+sym.optimize_for(backend, args=None, aux=None, ctx=None, **kwargs)
 ```
 
 The `optimize_for` API takes at least 1 argument, `backend` which is a string that identifies which backend to use to optimize the model. The `args` and `aux` arguments are optional and take a list of NDArray or dict of str to NDArray. They are used to infer shapes and types and before executing the graph pass. The `ctx` argument is optional and takes a device context to infer storage types. It also takes any other user-specified options that will be passed to the backend APIs.
 
-For the Gluon API, the `hybridize` API can be called on HybridBlocks to execute a graph pass on the internal CachedOp Symbol.
+For the Gluon API, `hybridize` can be called on HybridBlocks to execute a graph pass on the internal CachedOp Symbol.
 
 ```python
-hybridize(backend=None, backend_opts=None, **kwargs)
+block.hybridize(backend=None, backend_opts=None, **kwargs)
 ```
 
 The `hybridize` function prepares the HybridBlock to be converted into a backend symbol. The `backend` argument is a string that identifies which pass that will be executed on the model. The `backend_opts` takes other user-specified options that will be passed to the backend APIs. The actual pass runs once just before the first the forward pass.
@@ -97,7 +96,7 @@ The `hybridize` function prepares the HybridBlock to be converted into a backend
 If you just want to run a graph pass on the HybridBlock but not run a complete forward pass, you can use the `optimize_for` API that combines the work done in the `hybridize` API with part of the work done in the forward pass.
 
 ```python
-optimize_for(x, backend=None, backend_opts=None, **kwargs)
+block.optimize_for(x, backend=None, backend_opts=None, **kwargs)
 ```
 
 When the `optimize_for` API is called on a HybridBlock it runs the graph pass immediately. This lets users export the modified model without running a complete forward pass.
@@ -124,15 +123,11 @@ There are several essential building blocks for making a custom pass:
             MXReturnValue initialize(int version)
 ```
 * [graphPass](./pass_lib.cc#31):
-    * This function provides a copy of the model graph as a JSON string, and provides an interface for returning a modified model JSON string. Also this is where a custom pass can validate the options specified by the user.
+    * This function provides a copy of the model graph, and any specific options from the user.
 ```c++
             MXReturnValue graphPass(
-                const std::string& in_graph,
-                const std::string** out_graph,
-                const std::unordered_map<std::string, std::string>& options,
-                const std::unordered_map<std::string, MXTensor>& args,
-                const std::unordered_map<std::string, MXTensor>& aux,
-                const PassResource& res)
+                mxnet::ext::Graph *g,
+                const std::unordered_map<std::string, std::string>& options)
 ```
 * [REGISTER_PASS(my_pass_name)](./pass_lib.cc#L41):
     * This macro registers the custom pass and its properties to MXNet by its name. The argument to `setBody` is the `graphPass` function.
@@ -142,7 +137,7 @@ There are several essential building blocks for making a custom pass:
 ```
 Let’s take a closer look at those registry functions:
 
-* **graphPass**: This function takes six arguments. The 1st argument is a JSON string of the model architecture graph, where nodes are inputs/params/weights and edges are data dependencies. The graph is pre-sorted in topological order. The 2nd argument is a pointer to a pointer of a JSON model string. It is expected users will dereference and assign the address of their output string allocated with `new` and `delete` will be called on it automatically. The third argument is the map of options specified by the user. Users can pass custom options to the pass and they are passed to this function in the `options` map. The fourth and fifth arguments are the named tensor mapping for the args and aux params for the model. They will contain the model params if the user provides them to the `optimize_for` API. The last argument is the `PassResource` object for memory allocation and other utilities. The details of `PassResource` are covered in the section below
+* **graphPass**: This function takes two arguments. The first argument is the Graph of the model architecture, where nodes are inputs/params/weights and edges are data dependencies. The second argument is the map of options specified by the user. Users can pass custom options to the pass and they are passed to this function in the `options` map.
 
 ### Graph representation
 
@@ -166,8 +161,7 @@ The `nodes` are all the nodes in the graph (superset). The `inputs` are only tho
 
 Heres an example creating a new node and adding it to the graph:
 ```c++
-Node* n = new Node();
-g->nodes.push_back(n);
+g->addNode("myConv","Convolution");
 ```
 Heres an example creating an edge between two nodes:
 ```c++
@@ -176,20 +170,20 @@ n2->inputs.push_back({n1,0});
 ```
 Here node `n1` produces an output at index 0 that is consumed by node `n2` on the input at index 1.
 
-### Pass Resource
-
 Some graph passes require allocating new NDArrays to add/replace model params. The `alloc_arg` and `alloc_aux` APIs enable allocating new NDArrays and integrate them with the model args and aux params. Both APIs have the following signature:
 
 ```c++
-    MXTensor* alloc_xxx(const std::string& name,
-                        const std::vector<int64_t>& shapes,
+    MXTensor* alloc_xxx(const std::vector<int64_t>& shapes,
                         const MXContext &ctx,
                         MXDType dtype)
 ```
 
-If the `name` provided matches the name of an existing param it replaces the previous one. Otherwise it adds a new param to the appropriate arg/aux set. Be sure that you add a new node in the graph that corresponds to this new param, otherwise it will be useless.
+This function can be called on a node in the graph to allocate a tensor for that node like:
 
-If you wish to remove an existing param, just remove the node in the graph corresponding to that param. It will be deleted after the pass completes and removed from the dictionary of args or aux (whichever it is a member of).
+```c++
+node->alloc_arg({1},MXContext::CPU(0),kFloat32);
+```
+It adds a new param to the appropriate arg/aux set when the graph pass returns. If you wish to remove an existing param, just remove the node in the graph corresponding to that param. It will be deleted after the pass completes and removed from the dictionary of args or aux (whichever it is a member of).
 
 ### Parsing a JSON string
 
@@ -220,4 +214,4 @@ switch(json_val.type) {
 }
 ```
 
-You call the `dump` function on a `JsonVal` object like `json_val.dump()` to get a JSON-compatible string. There are also convenience constructors for creating `JsonVal` objects for strings and numbers like `JsonVal("myKey")` or `JsonVal(42)`. This makes it easy to get specific keys from a map like `json_val.map[JsonVal("nodes")]`.
\ No newline at end of file
+You call the `dump` function on a `JsonVal` object like `json_val.dump()` to get a JSON-compatible string. There are also convenience constructors for creating `JsonVal` objects for strings and numbers like `JsonVal("myKey")` or `JsonVal(42)`. This makes it easy to get specific keys from a map like `json_val.map[JsonVal("nodes")]`.
diff --git a/example/extensions/lib_subgraph/README.md b/example/extensions/lib_subgraph/README.md
index c9747a60116f..2752d27a67f4 100644
--- a/example/extensions/lib_subgraph/README.md
+++ b/example/extensions/lib_subgraph/README.md
@@ -96,18 +96,18 @@ In the Gluon hybridize flow, the model is actually hybridized during the first i
 
 ### Using a Custom Partitioner Library
 
-Partitioning APIs in MXNet are available in both Symbol and Gluon APIs. For the Symbol API, the `optimize_for` API can be called on Symbol objects to return a partitioned Symbol.
+Partitioning APIs in MXNet are available in both Symbol and Gluon APIs. For the Symbol API, `optimize_for` can be called on Symbol objects to return a partitioned Symbol.
 
 ```python
-optimize_for(backend, args=None, aux=None, ctx=None, **kwargs)
+sym.optimize_for(backend, args=None, aux=None, ctx=None, **kwargs)
 ```
 
 The `optimize_for` API takes at least 1 argument, `backend` which is a string that identifies which backend to partition the model for. The `args` and `aux` arguments are optional and take a list of NDArray or dict of str to NDArray. They are used to infer shapes and types and before partitioning, and passed to the backend to use during compilation. The `ctx` argument is optional and takes a device context to infer storage types. It also takes any other user-specified options that will be passed to the backend partitioning APIs.
 
-For the Gluon API, the `hybridize` API can be called on HybridBlocks to partition the internal CachedOp Symbol.
+For the Gluon API, `hybridize` can be called on HybridBlocks to partition the internal CachedOp Symbol.
 
 ```python
-hybridize(backend=None, backend_opts=None, clear=True, **kwargs)
+block.hybridize(backend=None, backend_opts=None, clear=True, **kwargs)
 ```
 
 The `hybridize` function prepares the HybridBlock to be converted into a backend symbol. The `backend` argument is a string that identifies which backend that will partition the model. The `backend_opts` are other user-specified options (as a Python dictionary of strings mapped to strings) that will be passed to the backend partitioning APIs. The `clear` argument defaults to `True` and clears any previous optimizations done on the block. If you want to chain optimizations together, set `clear` to `False`. The actual partitioning takes place during the forward pass. If you want to use `hybridize` to chain multiple optimizations, be sure to execute a forward pass after each call to `hybridize`. 
@@ -115,13 +115,14 @@ The `hybridize` function prepares the HybridBlock to be converted into a backend
 If you just want to partition the HybridBlock but not run a complete forward pass, you can use the `optimize_for` API that combines the work done in the `hybridize` API with part of the work done in the forward pass.
 
 ```python
-optimize_for(x, backend=None, backend_opts=None, clear=True, **kwargs)
+block.optimize_for(x, backend=None, backend_opts=None, clear=True, **kwargs)
 ```
 
 When the `optimize_for` API is called on a HybridBlock it partitions immediately. This lets users export the partitioned model without running a complete forward pass. Chaining multiple optimizations is as simple as calling `optimize_for` multiple times, no need to execute a forward pass (as opposed to `hybridize`).
 
 ```python
 block.optimize_for(x, backend='myPart')
+block.optimize_for(x, backend='myOtherPart', clear=False)
 block.export('partitioned')
 ```
 
@@ -142,10 +143,10 @@ There are several essential building blocks for making a custom partitioner:
             MXReturnValue initialize(int version)
 ```
 * [supportedOps](./subgraph_lib.cc#L179):
-    * This function provides a copy of the model graph as a JSON string, and provides an interface for identifying which operators should be partitioned into a subgraph. Also this is where a custom partitioner can validate the options specified by the user.
+    * This function provides a copy of the model Graph, and an interface for identifying which operators should be partitioned into a subgraph. Also this is where a custom partitioner can validate the options specified by the user.
 ```c++
             MXReturnValue supportedOps(
-                const std::string& json,
+                const mxnet::ext::Graph* graph,
                 std::vector<int>* ids,
                 const std::unordered_map<std::string, std::string>& options)
 ```
@@ -163,28 +164,25 @@ Also there are some optional functions you can specify:
     * This function provides an opportunity to accept/reject a subgraph after MXNet partitions it. It also allows specifying custom attributes on the subgraph (ie. user-generated IDs). If you do not register this function, subgraphs will be accepted by default. 
 ```c++
             MXReturnValue reviewSubgraph(
-                const std::string& json,
+                const mxnet::ext::Graph* subgraph,
                 int subgraph_id,
                 bool* accept,
-                const std::unordered_map<std::string, std::string>& options,
-                std::unordered_map<std::string, std::string>* attrs,
-                const std::map<std::string, MXTensor>& args,
-                const std::map<std::string, MXTensor>& aux)
+                const std::unordered_map<std::string, std::string>& options)
 ```
 Let’s take a closer look at those registry functions:
 
-* **supportedOps**: This function takes four arguments. The 1st argument is a JSON string of the model architecture graph, where nodes are inputs/params/weights and edges are data dependencies. The graph is pre-sorted in topological order. The 2nd argument is an array of booleans, one for each operator in the model. When traversing the graph, operators to be partitioned into subgraphs are identified and an entry is set to `true` for the index in the `ids` array corresponding to the node ID. The last argument is the map of options specified by the user. Users can pass custom options to the partitioner and they are passed to this function in the `options` map. 
+* **supportedOps**: This function takes 3 arguments. The 1st argument is the model architecture graph, where nodes are inputs/params/weights and edges are data dependencies. The graph is pre-sorted in topological order. The 2nd argument is an array of integers, one for each operator in the model. When traversing the graph, operators to be partitioned into subgraphs are identified and an entry is set to a value for the index in the `ids` array corresponding to the node ID. Setting a non-negative value (ie. [0, MAX_INT]) indicates the operator should be partitioned into that specific subgraph. Setting a value of -1 indicates that the operator can be partitioned into any subgraph. The last argument is the map of options specified by the user. Users can pass custom options to the partitioner and they are passed to this function in the `options` map. 
 
-* **reviewSubgraph**: This function takes five arguments. The 1st argument is a JSON string of the newly partitioned subgraph. The 2nd argument is the subgraph ID, this is just a number MXNet uses to identify this particular subgraph (it starts at zero and increments, unique for each subgraph in the model). The 3rd argument is an output to be set in this function to tell MXNet whether to accept (value: `true`) or reject (value: `false`) the subgraph. You might want to reject a subgraph if it doesnt include all the operators you want, for example. The `options` map is the same one passed to the `supportedOps` API. The 4th argument is the map of options specified by the user. The 5th argument is a map of attributes that should be set on the created subgraph. These attributes will be available later at runtime, and provides a mechanisn to pass info from partition-time to runtime. The last argument is the map of params/weights/args to the model and the associated names. For inputs the the subgraph that come directly from the params/weights of the model, you can look up the name of the input in this map to get the actual tensor values.
+* **reviewSubgraph**: This function takes four arguments. The 1st argument is the newly partitioned subgraph. The 2nd argument is the subgraph ID, this is just a number MXNet uses to identify this particular subgraph (it starts at zero and increments, unique for each subgraph in the model). The 3rd argument is an output to be set in this function to tell MXNet whether to accept (value: `true`) or reject (value: `false`) the subgraph. You might want to reject a subgraph if it doesnt include all the operators you want, for example. The `options` map is the same one passed to the `supportedOps` API. The 4th argument is the map of options specified by the user. Any custom attributes set on the Graph object will be available later at runtime, and provides a mechanisn to pass info from partition-time to runtime. For inputs to the subgraph that come directly from the params/weights of the model, you can access the raw tensor data directly from that node in the graph.
 
 ### Writing a Custom Selector
 Instead of implementing the `supportedOps` API, you can choose to implement a custom selector class for more control over partitioning instead. 
 
 * [createSelector](./subgraph_lib.cc#L321):
-    * This function provides a copy of the model graph as a JSON string for the first argument. The 2nd argument is a placeholder for CustomOpSelector object. You must define a class that inherits CustomOpSelector and override the required  functions. Then you need to create an instance of your class and assign it to the placeholder. The last argument is a map of user-specified options.
+    * This function provides a copy of the model graph as the first argument. The 2nd argument is a placeholder for CustomOpSelector object. You must define a class that inherits from the `CustomOpSelector` class and override the required functions. Then you need to create an instance of your class and assign it to the placeholder. The last argument is a map of user-specified options.
 ```c++
             MXReturnValue createSelector(
-                const std::string& json,
+                const mxnet::ext::Graph *graph,
                 CustomOpSelector** sel_inst,
                 const std::unordered_map<std::string, std::string>& options)
 ```
@@ -218,9 +216,10 @@ When implementing your own selector class, you must inherit from the `CustomOpSe
 ```
 All of these APIs refer to the model's graph that is provided to the `createSelector` API. When you implement your custom `createSelector` function, you can pass the graph and options to the constructor of your class like this:
 ```c++
-MXReturnValue myCreateSelector(const std::string& json, CustomOpSelector** sel_inst,
+MXReturnValue myCreateSelector(const mxnet::ext::Graph *graph,
+                               CustomOpSelector** sel_inst,
                                const std::unordered_map<std::string, std::string>& options) {
-  *sel_inst = new MySelector(json, options);
+  *sel_inst = new MySelector(graph, options);
   return MX_SUCCESS;
 }
 ```
@@ -252,6 +251,19 @@ REGISTER_OP(my_subgraph_op)
 .setCreateOpState(createOpState, "cpu");
 ```
 
+### Converting a JSON string encoded graph
+
+A Graph object can be created from a JSON string containing a graph/subgraph like:
+
+```c++
+mxnet::ext::Graph* g = mxnet::ext::Graph::fromString(json);
+```
+
+It can be converted back to a JSON string just as easily:
+```c++
+std::string json = g->toString();
+```
+
 ### Parsing a JSON string
 
 To simplify custom partitioner libraries, basic JSON parsing utility functions have been implemented in the `lib_api.h` header file. You create a `JsonParser` object and parse the string by calling the `parse_to_json` API like:
@@ -281,4 +293,4 @@ switch(json_val.type) {
 }
 ```
 
-You call the `dump` function on a `JsonVal` object like `json_val.dump()` to get a JSON-compatible string. There are also convenience constructors for creating `JsonVal` objects for strings and numbers like `JsonVal("myKey")` or `JsonVal(42)`. This makes it easy to get specific keys from a map like `json_val.map[JsonVal("nodes")]`.
\ No newline at end of file
+You call the `dump` function on a `JsonVal` object like `json_val.dump()` to get a JSON-compatible string. There are also convenience constructors for creating `JsonVal` objects for strings and numbers like `JsonVal("myKey")` or `JsonVal(42)`. This makes it easy to get specific keys from a map like `json_val.map[JsonVal("nodes")]`.

From 91c28c99ea543fe079d0703bd17c00433590c886 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Mon, 10 Aug 2020 17:44:05 +0000
Subject: [PATCH 20/25] added extension error messaging

---
 example/extensions/lib_api/init_lib.cc        |   2 +-
 example/extensions/lib_custom_op/gemm_lib.cc  |  12 +-
 example/extensions/lib_custom_op/relu_lib.cu  |   2 +-
 .../lib_custom_op/transposecsr_lib.cc         |  20 +-
 .../lib_custom_op/transposerowsp_lib.cc       |  20 +-
 example/extensions/lib_pass/pass_lib.cc       |   2 +-
 .../extensions/lib_subgraph/subgraph_lib.cc   |   4 +-
 include/mxnet/lib_api.h                       |  53 ++++
 src/c_api/c_api.cc                            | 234 +++++++++++-------
 9 files changed, 226 insertions(+), 123 deletions(-)

diff --git a/example/extensions/lib_api/init_lib.cc b/example/extensions/lib_api/init_lib.cc
index 1531c56e5800..0ed43761fe53 100644
--- a/example/extensions/lib_api/init_lib.cc
+++ b/example/extensions/lib_api/init_lib.cc
@@ -33,7 +33,7 @@ MXReturnValue initialize(int version) {
     std::cout << "MXNet version " << version << " supported" << std::endl;
     return MX_SUCCESS;
   } else {
-    std::cout << "MXNet version " << version << " not supported" << std::endl;
+    MX_ERROR_MSG << "MXNet version " << version << " not supported";
     return MX_FAIL;
   }
 }
diff --git a/example/extensions/lib_custom_op/gemm_lib.cc b/example/extensions/lib_custom_op/gemm_lib.cc
index 453103533c37..6081713cda67 100644
--- a/example/extensions/lib_custom_op/gemm_lib.cc
+++ b/example/extensions/lib_custom_op/gemm_lib.cc
@@ -130,12 +130,12 @@ MXReturnValue inferType(const std::unordered_map<std::string, std::string>& attr
                         std::vector<int> *outtypes) {
   // validate inputs
   if (intypes->size() != 2) {
-    std::cout << "Expected 2 inputs to inferType" << std::endl;
+    MX_ERROR_MSG << "Expected 2 inputs to inferType";
     return MX_FAIL;
   }
   for (unsigned i = 0; i < intypes->size(); i++) {
     if (intypes->at(i) != kFloat32) {
-      std::cout << "Expected input " << i << " to have float32 type" << std::endl;
+      MX_ERROR_MSG << "Expected input " << i << " to have float32 type";
       return MX_FAIL;
     }
   }
@@ -149,11 +149,11 @@ MXReturnValue inferShape(const std::unordered_map<std::string, std::string>& att
                          std::vector<std::vector<unsigned int>>* outshapes) {
   // validate inputs
   if (inshapes->size() != 2) {
-    std::cout << "Expected 2 inputs to inferShape" << std::endl;
+    MX_ERROR_MSG << "Expected 2 inputs to inferShape";
     return MX_FAIL;
   }
   if (inshapes->at(0).size() != 2 || inshapes->at(1).size() != 2) {
-    std::cout << "Expected 2D matrices for both inputs to inferShape" << std::endl;
+    MX_ERROR_MSG << "Expected 2D matrices for both inputs to inferShape";
     return MX_FAIL;
   }
 
@@ -162,7 +162,7 @@ MXReturnValue inferShape(const std::unordered_map<std::string, std::string>& att
   unsigned kk = inshapes->at(1)[0];
   unsigned m = inshapes->at(1)[1];
   if (k != kk) {
-    std::cout << "Exected first input axis 1 equals to second input axis 0" << std::endl;
+    MX_ERROR_MSG << "Exected first input axis 1 equals to second input axis 0";
     return MX_FAIL;
   }
 
@@ -231,7 +231,7 @@ MXReturnValue initialize(int version) {
     std::cout << "MXNet version " << version << " supported" << std::endl;
     return MX_SUCCESS;
   } else {
-    std::cout << "MXNet version " << version << " not supported" << std::endl;
+    MX_ERROR_MSG << "MXNet version " << version << " not supported";
     return MX_FAIL;
   }
 }
diff --git a/example/extensions/lib_custom_op/relu_lib.cu b/example/extensions/lib_custom_op/relu_lib.cu
index 53ab1c0479df..7022c76e6999 100644
--- a/example/extensions/lib_custom_op/relu_lib.cu
+++ b/example/extensions/lib_custom_op/relu_lib.cu
@@ -265,7 +265,7 @@ MXReturnValue initialize(int version) {
     std::cout << "MXNet version " << version << " supported" << std::endl;
     return MX_SUCCESS;
   } else {
-    std::cout << "MXNet version " << version << " not supported" << std::endl;
+    MX_ERROR_MSG << "MXNet version " << version << " not supported";
     return MX_FAIL;
   }
 }
diff --git a/example/extensions/lib_custom_op/transposecsr_lib.cc b/example/extensions/lib_custom_op/transposecsr_lib.cc
index bc1e451baebd..0a882f4d2517 100644
--- a/example/extensions/lib_custom_op/transposecsr_lib.cc
+++ b/example/extensions/lib_custom_op/transposecsr_lib.cc
@@ -73,11 +73,11 @@ MXReturnValue forward(const std::unordered_map<std::string, std::string>& attrs,
   // The data types and storage types of inputs and outputs should be the same.  
   if(inputs->at(0).dtype != outputs->at(0).dtype ||
      inputs->at(0).stype != outputs->at(0).stype) {
-    std::cout << "Error! Expected all inputs and outputs to be the same type." 
-              << "Found input storage type:" << inputs->at(0).stype
-              << " Found output storage type:" << outputs->at(0).stype
-              << " Found input data type:" << inputs->at(0).dtype
-              << " Found output data type:" << outputs->at(0).dtype << std::endl;
+    MX_ERROR_MSG << "Error! Expected all inputs and outputs to be the same type." 
+                 << "Found input storage type:" << inputs->at(0).stype
+                 << " Found output storage type:" << outputs->at(0).stype
+                 << " Found input data type:" << inputs->at(0).dtype
+                 << " Found output data type:" << outputs->at(0).dtype;
     return MX_FAIL;
   }
 
@@ -104,11 +104,11 @@ MXReturnValue inferType(const std::unordered_map<std::string, std::string>& attr
                         std::vector<int>* outtypes) {
   // validate inputs
   if (intypes->size() != 1) {
-    std::cout << "Expected 1 inputs to inferType" << std::endl;
+    MX_ERROR_MSG << "Expected 1 inputs to inferType";
     return MX_FAIL;
   }
   if (intypes->at(0) != kFloat32) {
-    std::cout << "Expected input to have float32 type" << std::endl;
+    MX_ERROR_MSG << "Expected input to have float32 type";
     return MX_FAIL;
   }
 
@@ -120,7 +120,7 @@ MXReturnValue inferSType(const std::unordered_map<std::string, std::string>& att
                          std::vector<int>* instypes,
                          std::vector<int>* outstypes) {
   if (instypes->at(0) != kCSRStorage) {
-    std::cout << "Expected storage type is kCSRStorage" << std::endl;
+    MX_ERROR_MSG << "Expected storage type is kCSRStorage";
     return MX_FAIL;
   }
   outstypes->at(0) = instypes->at(0);
@@ -132,7 +132,7 @@ MXReturnValue inferShape(const std::unordered_map<std::string, std::string>& att
                          std::vector<std::vector<unsigned int>>* outshapes) {
   // validate inputs
   if (inshapes->size() != 1) {
-    std::cout << "Expected 1 inputs to inferShape" << std::endl;
+    MX_ERROR_MSG << "Expected 1 inputs to inferShape";
     return MX_FAIL;
   }
 
@@ -197,7 +197,7 @@ MXReturnValue initialize(int version) {
     std::cout << "MXNet version " << version << " supported" << std::endl;
     return MX_SUCCESS;
   } else {
-    std::cout << "MXNet version " << version << " not supported" << std::endl;
+    MX_ERROR_MSG << "MXNet version " << version << " not supported";
     return MX_FAIL;
   }
 }
diff --git a/example/extensions/lib_custom_op/transposerowsp_lib.cc b/example/extensions/lib_custom_op/transposerowsp_lib.cc
index 70c65616bdce..cb4592239ef9 100644
--- a/example/extensions/lib_custom_op/transposerowsp_lib.cc
+++ b/example/extensions/lib_custom_op/transposerowsp_lib.cc
@@ -76,11 +76,11 @@ MXReturnValue forward(const std::unordered_map<std::string, std::string>& attrs,
   // The data types and storage types of inputs and outputs should be the same.
   if(inputs->at(0).dtype != outputs->at(0).dtype ||
      inputs->at(0).stype != outputs->at(0).stype) {
-    std::cout << "Error! Expected all inputs and outputs to be the same type."
-              << "Found input storage type:" << inputs->at(0).stype
-              << " Found output storage type:" << outputs->at(0).stype
-              << " Found input data type:" << inputs->at(0).dtype
-              << " Found output data type:" << outputs->at(0).dtype << std::endl;
+    MX_ERROR_MSG << "Error! Expected all inputs and outputs to be the same type."
+                 << "Found input storage type:" << inputs->at(0).stype
+                 << " Found output storage type:" << outputs->at(0).stype
+                 << " Found input data type:" << inputs->at(0).dtype
+                 << " Found output data type:" << outputs->at(0).dtype;
     return MX_FAIL;
   }
   transpose(inputs->at(0), outputs->at(0), res);
@@ -106,11 +106,11 @@ MXReturnValue inferType(const std::unordered_map<std::string, std::string>& attr
                         std::vector<int>* outtypes) {
   // validate inputs
   if (intypes->size() != 1) {
-    std::cout << "Expected 1 inputs to inferType" << std::endl;
+    MX_ERROR_MSG << "Expected 1 inputs to inferType";
     return MX_FAIL;
   }
   if (intypes->at(0) != kFloat32) {
-    std::cout << "Expected input to have float32 type" << std::endl;
+    MX_ERROR_MSG << "Expected input to have float32 type";
     return MX_FAIL;
   }
 
@@ -122,7 +122,7 @@ MXReturnValue inferSType(const std::unordered_map<std::string, std::string>& att
                          std::vector<int>* instypes,
                          std::vector<int>* outstypes) {
   if (instypes->at(0) != kRowSparseStorage) {
-    std::cout << "Expected storage type is kRowSparseStorage" << std::endl;
+    MX_ERROR_MSG << "Expected storage type is kRowSparseStorage";
     return MX_FAIL;
   }
   outstypes->at(0) = instypes->at(0);
@@ -134,7 +134,7 @@ MXReturnValue inferShape(const std::unordered_map<std::string, std::string>& att
                          std::vector<std::vector<unsigned int>>* outshapes) {
   // validate inputs
   if (inshapes->size() != 1) {
-    std::cout << "Expected 1 inputs to inferShape" << std::endl;
+    MX_ERROR_MSG << "Expected 1 inputs to inferShape";
     return MX_FAIL;
   }
 
@@ -199,7 +199,7 @@ MXReturnValue initialize(int version) {
     std::cout << "MXNet version " << version << " supported" << std::endl;
     return MX_SUCCESS;
   } else {
-    std::cout << "MXNet version " << version << " not supported" << std::endl;
+    MX_ERROR_MSG << "MXNet version " << version << " not supported";
     return MX_FAIL;
   }
 }
diff --git a/example/extensions/lib_pass/pass_lib.cc b/example/extensions/lib_pass/pass_lib.cc
index 287039332167..825d38290936 100644
--- a/example/extensions/lib_pass/pass_lib.cc
+++ b/example/extensions/lib_pass/pass_lib.cc
@@ -48,7 +48,7 @@ MXReturnValue initialize(int version) {
     std::cout << "MXNet version " << version << " supported" << std::endl;
     return MX_SUCCESS;
   } else {
-    std::cout << "MXNet version " << version << " not supported" << std::endl;
+    MX_ERROR_MSG << "MXNet version " << version << " not supported" << std::endl;
     return MX_FAIL;
   }
 }
diff --git a/example/extensions/lib_subgraph/subgraph_lib.cc b/example/extensions/lib_subgraph/subgraph_lib.cc
index 8cea58e69821..b2b5a74f2d0a 100644
--- a/example/extensions/lib_subgraph/subgraph_lib.cc
+++ b/example/extensions/lib_subgraph/subgraph_lib.cc
@@ -100,7 +100,7 @@ MXReturnValue myExecutor(std::vector<MXTensor>* inputs,
       // set tensor for this node so we can read it later
       node->tensor = &data.back();
     } else {
-      std::cout << "Error! Unsupported op '" << node->op << "' found in myExecutor";
+      MX_ERROR_MSG << "Error! Unsupported op '" << node->op << "' found in myExecutor";
       // free allocated temporary storage
       for (void* ptr : to_free)
         free(ptr);  // NOLINT
@@ -326,7 +326,7 @@ MXReturnValue initialize(int version) {
     std::cout << "MXNet version " << version << " supported" << std::endl;
     return MX_SUCCESS;
   } else {
-    std::cout << "MXNet version " << version << " not supported" << std::endl;
+    MX_ERROR_MSG << "MXNet version " << version << " not supported by custom library" << std::endl;
     return MX_FAIL;
   }
 }
diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h
index 49b4b138bfe1..cb2b6344e876 100644
--- a/include/mxnet/lib_api.h
+++ b/include/mxnet/lib_api.h
@@ -45,6 +45,7 @@
 #include <stdexcept>
 #include <functional>
 #include <random>
+#include <sstream>
 
 #if defined(__NVCC__)
   #include <curand_kernel.h>
@@ -1519,6 +1520,43 @@ class Registry {
   MX_STR_CONCAT(MX_REGISTER_PASS_DEF_(Name), __COUNTER__) = \
     Registry<CustomPass>::get()->add(MX_TOSTRING(Name))
 
+
+ 
+class MXerrorMsgs {
+ public:
+  /*!
+   * \brief get singleton pointer to class
+   * \returns pointer to class
+   */
+  static MXerrorMsgs* get() {
+    static MXerrorMsgs inst;
+    return &inst;
+  }
+  /*!
+   * \brief add a new error message
+   */
+  std::stringstream& add(const char* file, int line) {
+    messages.push_back(std::stringstream());
+    messages.back() << file << "[" << line << "]: ";
+    return messages.back();
+  }
+  int size() {
+    return messages.size();
+  }
+  const std::string* get(int idx) {
+    return new std::string(messages.at(idx).str());
+  }
+
+ private:
+  /*! \brief constructor */
+  MXerrorMsgs() {}
+  /*! \brief destructor */
+  ~MXerrorMsgs() {}
+  /*! \brief map of entries in registry */
+  std::vector<std::stringstream> messages;
+};
+#define MX_ERROR_MSG MXerrorMsgs::get()->add(__FILE__,__LINE__)
+ 
 /* -------------- BELOW ARE CTYPE FUNCTIONS PROTOTYPES --------------- */
 
 /*!
@@ -1698,6 +1736,12 @@ typedef int (*initialize_t)(int version);
 #define MXLIB_OPVERSION_STR "_opVersion"
 typedef int (*opVersion_t)();
 
+#define MXLIB_MSGSIZE_STR "_msgSize"
+typedef int (*msgSize_t)(void);
+
+#define MXLIB_MSGGET_STR "_msgGet"
+typedef int (*msgGet_t)(int idx, const char** msg);
+
 #if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
 #define MX_INT_RET  __declspec(dllexport) int __cdecl
 #define MX_VOID_RET __declspec(dllexport) void __cdecl
@@ -2386,5 +2430,14 @@ extern "C" {
   mxnet::ext::MXReturnValue
 #endif
   initialize(int version);
+
+  MX_INT_RET _msgSize() {
+    return mxnet::ext::MXerrorMsgs::get()->size();
+  }
+
+  /*! \brief returns operator registration at specified index */
+  MX_VOID_RET _msgGet(int idx, const char** msg) {
+    *msg = mxnet::ext::MXerrorMsgs::get()->get(idx)->c_str();
+  }
 }  // extern "C"
 #endif  // MXNET_LIB_API_H_
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index 48b3c652ff81..f98d34f881ad 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -98,6 +98,22 @@ inline int MXAPIGetFunctionRegInfo(const FunRegType *e,
 
 // NOTE: return value is added in API_END
 
+std::string getExtensionMsgs(mxnet::ext::msgSize_t msgSize,
+                             mxnet::ext::msgGet_t msgGet) {
+  std::string str;
+  if (msgSize() > 0) {
+    str = "\nExtension Traceback:\n";
+    for (int i = 0; i < msgSize(); i++) {
+      const char* tmp;
+      msgGet(i,&tmp);
+      // format: [i] message
+      str += std::string("\t[") + std::to_string(i) + std::string("] ")
+        + std::string(tmp) + std::string("\n");
+    }
+  }
+  return str;
+}
+
 /*!
  * \brief Common compute function dispatcher for forward/backward and stateful forward/backward
  * state_ptr will be nullptr for regular ops; fcomp_fp is nullptr for stateful ops
@@ -112,7 +128,9 @@ void CustomFComputeDispatcher(const std::string op_name,
                               const OpContext& ctx,
                               const std::vector<NDArray>& inputs,
                               const std::vector<OpReqType>& req,
-                              const std::vector<NDArray>& outputs) {
+                              const std::vector<NDArray>& outputs,
+                              mxnet::ext::msgSize_t msgSize,
+                              mxnet::ext::msgGet_t msgGet) {
   using namespace mxnet::ext;
 
   std::vector<void*> in_data, out_data;
@@ -283,43 +301,46 @@ void CustomFComputeDispatcher(const std::string op_name,
     }
 
     // call fcompute function
-    CHECK(callFComp(fcomp_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
-                    in_shapes.data(), in_dims.data(), in_data.data(), in_types.data(),
-                    in_verIDs.data(), in_dev_type.data(), in_dev_id.data(), in_data.size(),
-                    out_shapes.data(), out_dims.data(), out_data.data(), out_types.data(),
-                    out_verIDs.data(), out_dev_type.data(), out_dev_id.data(), out_data.size(),
-                    cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream,
-                    sparse_malloc, &sparse_alloc, in_stypes.data(), out_stypes.data(),
-                    in_indices.data(), out_indices.data(), in_indptr.data(), out_indptr.data(),
-                    in_indices_shapes.data(), out_indices_shapes.data(),
-                    in_indptr_shapes.data(), out_indptr_shapes.data(),
-                    rng_cpu_states, rng_gpu_states))
-      << "Error calling FCompute for custom operator '" << op_name << "'";
+    int retval = callFComp(fcomp_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
+                           in_shapes.data(), in_dims.data(), in_data.data(), in_types.data(),
+                           in_verIDs.data(), in_dev_type.data(), in_dev_id.data(), in_data.size(),
+                           out_shapes.data(), out_dims.data(), out_data.data(), out_types.data(),
+                           out_verIDs.data(), out_dev_type.data(), out_dev_id.data(), out_data.size(),
+                           cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream,
+                           sparse_malloc, &sparse_alloc, in_stypes.data(), out_stypes.data(),
+                           in_indices.data(), out_indices.data(), in_indptr.data(), out_indptr.data(),
+                           in_indices_shapes.data(), out_indices_shapes.data(),
+                           in_indptr_shapes.data(), out_indptr_shapes.data(),
+                           rng_cpu_states, rng_gpu_states);
+    std::string msgs = getExtensionMsgs(msgSize, msgGet);
+    CHECK(retval) << "Error calling FCompute for custom operator '" << op_name << "'" << msgs;
   }
 
   if (state_ptr != nullptr) {
     // retrieve op state object created from CreateOpState
     CustomStatefulOpWrapper& op = state_ptr->get_state<CustomStatefulOpWrapper>();
     CustomStatefulOp* state_op_inst = op.get_instance();
+    std::string msgs = getExtensionMsgs(msgSize, msgGet);
     CHECK(state_op_inst != nullptr)
-      << "Error custom stateful operator is null for operator '" << op_name << "'";
+      << "Error custom stateful operator is null for operator '" << op_name << "'" << msgs;
 
     // call fcompute function
-    CHECK(callFStatefulComp(stateful_forward_flag, state_op_inst,
-                            in_shapes.data(), in_dims.data(), in_data.data(), in_types.data(),
-                            in_verIDs.data(), in_dev_type.data(), in_dev_id.data(),
-                            in_data.size(),
-                            out_shapes.data(), out_dims.data(), out_data.data(), out_types.data(),
-                            out_verIDs.data(), out_dev_type.data(), out_dev_id.data(),
-                            out_data.size(),
-                            cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream,
-                            sparse_malloc, &sparse_alloc, in_stypes.data(), out_stypes.data(),
-                            in_indices.data(), out_indices.data(),
-                            in_indptr.data(), out_indptr.data(),
-                            in_indices_shapes.data(), out_indices_shapes.data(),
-                            in_indptr_shapes.data(), out_indptr_shapes.data(),
-                            rng_cpu_states, rng_gpu_states))
-      << "Error calling FStatefulCompute for custom operator '" << op_name << "'";
+    int retval = callFStatefulComp(stateful_forward_flag, state_op_inst,
+                                   in_shapes.data(), in_dims.data(), in_data.data(), in_types.data(),
+                                   in_verIDs.data(), in_dev_type.data(), in_dev_id.data(),
+                                   in_data.size(),
+                                   out_shapes.data(), out_dims.data(), out_data.data(), out_types.data(),
+                                   out_verIDs.data(), out_dev_type.data(), out_dev_id.data(),
+                                   out_data.size(),
+                                   cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream,
+                                   sparse_malloc, &sparse_alloc, in_stypes.data(), out_stypes.data(),
+                                   in_indices.data(), out_indices.data(),
+                                   in_indptr.data(), out_indptr.data(),
+                                   in_indices_shapes.data(), out_indices_shapes.data(),
+                                   in_indptr_shapes.data(), out_indptr_shapes.data(),
+                                   rng_cpu_states, rng_gpu_states);
+    msgs = getExtensionMsgs(msgSize, msgGet);
+    CHECK(retval) << "Error calling FStatefulCompute for custom operator '" << op_name << "'" << msgs;
   }
 }
 
@@ -340,7 +361,9 @@ void registerOp(const char* name, const std::string& name_str, bool isSubgraphOp
                 const std::unordered_map<std::string, mxnet::ext::fcomp_t> &forward_ctx_map,
                 const std::unordered_map<std::string, mxnet::ext::fcomp_t> &backward_ctx_map,
                 mxnet::ext::opCallFComp_t callFComp,
-                mxnet::ext::opCallFStatefulComp_t callFStatefulComp) {
+                mxnet::ext::opCallFStatefulComp_t callFStatefulComp,
+                mxnet::ext::msgSize_t msgSize,
+                mxnet::ext::msgGet_t msgGet) {
   using namespace mxnet::ext;
 
   // check if operator is already registered
@@ -386,7 +409,7 @@ void registerOp(const char* name, const std::string& name_str, bool isSubgraphOp
                               const std::vector<OpReqType>& req,
                               const std::vector<NDArray>& outputs) {
       CustomFComputeDispatcher(name_str, nullptr, nullptr, nullptr,
-                               callFStatefulComp, 1, &state_ptr, ctx, inputs, req, outputs);
+                               callFStatefulComp, 1, &state_ptr, ctx, inputs, req, outputs, msgSize, msgGet);
     };
     if (createop_map.count("cpu") > 0)
       regOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", fstate_forward, plevel);
@@ -402,12 +425,12 @@ void registerOp(const char* name, const std::string& name_str, bool isSubgraphOp
         CHECK_GT(forward_ctx_map.count("cpu"), 0);
         fcomp_t fcomp = forward_ctx_map.at("cpu");
         CustomFComputeDispatcher(name_str, callFComp, fcomp, &attrs,
-                                 nullptr, 0, nullptr, ctx, inputs, req, outputs);
+                                 nullptr, 0, nullptr, ctx, inputs, req, outputs, msgSize, msgGet);
       } else if (ctx.run_ctx.ctx.dev_mask() == Context::kGPU) {
         CHECK_GT(forward_ctx_map.count("gpu"), 0);
         fcomp_t fcomp = forward_ctx_map.at("gpu");
         CustomFComputeDispatcher(name_str, callFComp, fcomp, &attrs,
-                                 nullptr, 0, nullptr, ctx, inputs, req, outputs);
+                                 nullptr, 0, nullptr, ctx, inputs, req, outputs, msgSize, msgGet);
       }
     };
     if (forward_ctx_map.count("cpu") > 0)
@@ -451,7 +474,7 @@ void registerOp(const char* name, const std::string& name_str, bool isSubgraphOp
                                  const std::vector<OpReqType>& req,
                                  const std::vector<NDArray>& outputs) {
         CustomFComputeDispatcher(name_str, nullptr, nullptr, nullptr,
-                                 callFStatefulComp, 0, &state_ptr, ctx, inputs, req, outputs);
+                                 callFStatefulComp, 0, &state_ptr, ctx, inputs, req, outputs, msgSize, msgGet);
       };
       gradOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", fstate_backward, plevel);
       gradOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<gpu>", fstate_backward, plevel);
@@ -465,7 +488,7 @@ void registerOp(const char* name, const std::string& name_str, bool isSubgraphOp
                                        const std::vector<OpReqType>& req,
                                        const std::vector<NDArray>& outputs) {
           CustomFComputeDispatcher(name_str, callFComp, fcomp_back_cpu, &attrs,
-                                   nullptr, 0, nullptr, ctx, inputs, req, outputs);
+                                   nullptr, 0, nullptr, ctx, inputs, req, outputs, msgSize, msgGet);
         };
         gradOp.set_attr<FComputeEx>("FComputeEx<cpu>", backward_cpu_lambda, plevel);
       }
@@ -477,7 +500,7 @@ void registerOp(const char* name, const std::string& name_str, bool isSubgraphOp
                                        const std::vector<OpReqType>& req,
                                        const std::vector<NDArray>& outputs) {
           CustomFComputeDispatcher(name_str, callFComp, fcomp_back_gpu, &attrs,
-                                   nullptr, 0, nullptr, ctx, inputs, req, outputs);
+                                   nullptr, 0, nullptr, ctx, inputs, req, outputs, msgSize, msgGet);
         };
         gradOp.set_attr<FComputeEx>("FComputeEx<gpu>", backward_gpu_lambda, plevel);
       }
@@ -486,7 +509,8 @@ void registerOp(const char* name, const std::string& name_str, bool isSubgraphOp
   regOp.add_argument("data", "NDArray[]", "Source inputs");
 }
 
-void registerOperators(void *lib, int verbose) {
+void registerOperators(void *lib, int verbose, mxnet::ext::msgSize_t msgSize,
+                       mxnet::ext::msgGet_t msgGet) {
   using namespace mxnet::ext;
 
   // get C type interface functions
@@ -614,9 +638,10 @@ void registerOperators(void *lib, int verbose) {
 
       int num_in = -1;
       int num_out = -1;
-      CHECK(callParseAttrs(parse_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
-                           &num_in, &num_out))
-      << "Error calling ParseAttrs for custom operator '" << name_str << "'";
+      int retval = callParseAttrs(parse_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
+                                  &num_in, &num_out);
+      std::string msgs = getExtensionMsgs(msgSize, msgGet);
+      CHECK(retval) << "Error calling ParseAttrs for custom operator '" << name_str << "'" << msgs;
 
       // return type void
     };
@@ -632,9 +657,11 @@ void registerOperators(void *lib, int verbose) {
 
       int num_in = -1;
       int num_out = -1;
-      CHECK(callParseAttrs(parse_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
-                           &num_in, &num_out))
-      << "Error calling ParseAttrs::num_inputs for custom operator '" << name_str << "'";
+      int retval = callParseAttrs(parse_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
+                                  &num_in, &num_out);
+      std::string msgs = getExtensionMsgs(msgSize, msgGet);
+      CHECK(retval) << "Error calling ParseAttrs::num_inputs for custom operator '" << name_str
+      << "'" << msgs;
 
       // get extra inputs, if exists
       int extra_inputs = 0;
@@ -668,9 +695,11 @@ void registerOperators(void *lib, int verbose) {
 
       int num_in = -1;
       int num_out = -1;
-      CHECK(callParseAttrs(parse_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
-                           &num_in, &num_out))
-      << "Error calling ParseAttrs::num_outputs for custom operator '" << name_str << "'";
+      int retval = callParseAttrs(parse_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
+                                  &num_in, &num_out);
+      std::string msgs = getExtensionMsgs(msgSize, msgGet);
+      CHECK(retval) << "Error calling ParseAttrs::num_outputs for custom operator '" << name_str
+      << "'" << msgs;
 
       return num_out;
     };
@@ -687,9 +716,11 @@ void registerOperators(void *lib, int verbose) {
 
       int num_in = -1;
       int num_out = -1;
-      CHECK(callParseAttrs(parse_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
-                           &num_in, &num_out))
-      << "Error calling ParseAttrs::num_outputs for custom operator '" << name_str << "'";
+      int retval = callParseAttrs(parse_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
+                                  &num_in, &num_out);
+      std::string msgs = getExtensionMsgs(msgSize, msgGet);
+      CHECK(retval) << "Error calling ParseAttrs::num_outputs for custom operator '" << name_str
+      << "'" << msgs;
       // for backward passes, inputs + outputs + input gradients (one for each output)
 
       // get extra inputs, if exists
@@ -743,11 +774,12 @@ void registerOperators(void *lib, int verbose) {
       uint32_t** outshapes = nullptr;
       int* outdims = nullptr;
 
-      CHECK(callInferShape(shape_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
-                           inshapes.data(), indims.data(), num_inputs,
-                           &mod_inshapes, &mod_indims,
-                           &outshapes, &outdims, out_shape->size()))
-      << "Error calling InferShape for custom operator '" << name_str << "'";
+      int retval = callInferShape(shape_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
+                                  inshapes.data(), indims.data(), num_inputs,
+                                  &mod_inshapes, &mod_indims,
+                                  &outshapes, &outdims, out_shape->size());
+      std::string msgs = getExtensionMsgs(msgSize, msgGet);
+      CHECK(retval) << "Error calling InferShape for custom operator '" << name_str << "'" << msgs;
 
       std::vector<uint32_t*> in_shapes(num_inputs);
       // determine amount of memory needed to store all the modified input shapes
@@ -856,10 +888,11 @@ void registerOperators(void *lib, int verbose) {
       // output types will be populated by inferType function
       std::vector<int> outtypes(out_type->size());
 
-      CHECK(callInferType(type_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
-                           intypes.data(), num_inputs,
-                           outtypes.data(), out_type->size()))
-      << "Error calling InferType for custom operator '" << name_str << "'";
+      int retval = callInferType(type_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
+                                 intypes.data(), num_inputs,
+                                 outtypes.data(), out_type->size());
+      std::string msgs = getExtensionMsgs(msgSize, msgGet);
+      CHECK(retval) << "Error calling InferType for custom operator '" << name_str << "'" << msgs;
 
       // copy and assign modified input types from custom op to MXNet memory
       for (size_t i = 0; i < num_inputs; i++) {
@@ -910,9 +943,10 @@ void registerOperators(void *lib, int verbose) {
       int indices_size = 0;
 
       // call mutate inputs function
-      CHECK(callMutateInputs(mutate_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
-                      &mutate_indices, &indices_size))
-      << "Error calling MutateInputs for custom operator '" << name_str << "'";
+      int retval = callMutateInputs(mutate_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
+                                    &mutate_indices, &indices_size);
+      std::string msgs = getExtensionMsgs(msgSize, msgGet);
+      CHECK(retval) << "Error calling MutateInputs for custom operator '" << name_str << "'" << msgs;
 
       std::vector<uint32_t> mutate_indices_list(indices_size);
       for (int i=0; i < indices_size; i++) {
@@ -955,10 +989,11 @@ void registerOperators(void *lib, int verbose) {
 
         // output types will be populated by inferType function
         std::vector<int> outstypes(out_stypes->size());
-        CHECK(callInferSType(stype_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
-                             instypes.data(), num_inputs,
-                             outstypes.data(), out_stypes->size()))
-        << "Error calling InferSType for custom operator '" << name_str << "'";
+        int retval = callInferSType(stype_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
+                                    instypes.data(), num_inputs,
+                                    outstypes.data(), out_stypes->size());
+        std::string msgs = getExtensionMsgs(msgSize, msgGet);
+        CHECK(retval) << "Error calling InferSType for custom operator '" << name_str << "'" << msgs;
 
         // copy and assign modified input storage types from custom op to MXNet memory.
         for (size_t i = 0; i < num_inputs; i++) {
@@ -1065,19 +1100,22 @@ void registerOperators(void *lib, int verbose) {
       if (ctx.dev_mask() == Context::kCPU) {
         CHECK(createop_map.count("cpu") > 0)
           << "CPU CreateOpState not implemented for '" << name_str << "'";
-        CHECK(callCreateOpState(createop_map.at("cpu"), attr_keys.data(), attr_vals.data(),
-                                attr_keys.size(), &state_op_inst))
-          << "Error calling CreateOpState CPU for custom operator '" << name_str << "'";
+        int retval = callCreateOpState(createop_map.at("cpu"), attr_keys.data(), attr_vals.data(),
+                                       attr_keys.size(), &state_op_inst);
+        std::string msgs = getExtensionMsgs(msgSize, msgGet);
+        CHECK(retval) << "Error calling CreateOpState CPU for custom operator '" << name_str << "'" << msgs;
       } else if (ctx.dev_mask() == Context::kGPU) {
         CHECK(createop_map.count("gpu") > 0)
           << "GPU CreateOpState not implemented for '" << name_str << "'";
-        CHECK(callCreateOpState(createop_map.at("gpu"), attr_keys.data(), attr_vals.data(),
-                                attr_keys.size(), &state_op_inst))
-          << "Error calling CreateOpState GPU for custom operator '" << name_str << "'";
+        int retval = callCreateOpState(createop_map.at("gpu"), attr_keys.data(), attr_vals.data(),
+                                       attr_keys.size(), &state_op_inst);
+        std::string msgs = getExtensionMsgs(msgSize, msgGet);
+        CHECK(retval) << "Error calling CreateOpState GPU for custom operator '" << name_str << "'" << msgs;
       }
 
+      std::string msgs = getExtensionMsgs(msgSize, msgGet);
       CHECK(state_op_inst != nullptr)
-        << "Error custom library failed to create stateful operator '" << name_str << "'";
+      << "Error custom library failed to create stateful operator '" << name_str << "'" << msgs;
 
       CustomStatefulOp* state_op = reinterpret_cast<CustomStatefulOp*>(state_op_inst);
       return OpStatePtr::Create<CustomStatefulOpWrapper>(state_op);
@@ -1089,11 +1127,13 @@ void registerOperators(void *lib, int verbose) {
                num_inouts, infer_type, infer_shape, infer_storage_type, mutate_inputs,
                num_subgraph_inputs, infer_subgraph_type, infer_subgraph_shape,
                infer_subgraph_storage_type, create_opstate, grad_reg, mutate_fp,
-               createop_map, forward_ctx_map, backward_ctx_map, callFComp, callFStatefulComp);
+               createop_map, forward_ctx_map, backward_ctx_map, callFComp, callFStatefulComp,
+               msgSize, msgGet);
   }
 }
 
-void registerPartitioners(void *lib, int verbose) {
+void registerPartitioners(void *lib, int verbose, mxnet::ext::msgSize_t msgSize,
+                       mxnet::ext::msgGet_t msgGet) {
   using namespace mxnet::ext;
 
   // get C type interface functions
@@ -1177,7 +1217,8 @@ void registerPartitioners(void *lib, int verbose) {
   }
 }
 
-void registerPasses(void *lib, int verbose) {
+void registerPasses(void *lib, int verbose, mxnet::ext::msgSize_t msgSize,
+                       mxnet::ext::msgGet_t msgGet) {
   using namespace mxnet::ext;
 
   // get C type interface functions
@@ -1374,17 +1415,18 @@ void registerPasses(void *lib, int verbose) {
       };
 
       char* out_json;
-      CHECK(callGraphPass(pass_fp, in_json.c_str(), &out_json, opt_keys.data(),
-                          opt_vals.data(), opt_keys.size(), pass_name,
-                          arg_names.data(), arg_names.size(), arg_data.data(),
-                          arg_shapes.data(), arg_dims.data(), arg_types.data(),
-                          arg_verIDs.data(), arg_dev_type.data(),
-                          arg_dev_id.data(), aux_names.data(), aux_names.size(),
-                          aux_data.data(), aux_shapes.data(), aux_dims.data(),
-                          aux_types.data(), aux_verIDs.data(),
-                          aux_dev_type.data(), aux_dev_id.data(),
-                          ndarray_malloc, &ndarray_alloc))
-      << "Error calling graph pass for '" << pass_name << "'";
+      int retval = callGraphPass(pass_fp, in_json.c_str(), &out_json, opt_keys.data(),
+                                 opt_vals.data(), opt_keys.size(), pass_name,
+                                 arg_names.data(), arg_names.size(), arg_data.data(),
+                                 arg_shapes.data(), arg_dims.data(), arg_types.data(),
+                                 arg_verIDs.data(), arg_dev_type.data(),
+                                 arg_dev_id.data(), aux_names.data(), aux_names.size(),
+                                 aux_data.data(), aux_shapes.data(), aux_dims.data(),
+                                 aux_types.data(), aux_verIDs.data(),
+                                 aux_dev_type.data(), aux_dev_id.data(),
+                                 ndarray_malloc, &ndarray_alloc);
+      std::string msgs = getExtensionMsgs(msgSize, msgGet);
+      CHECK(retval) << "Error calling graph pass for '" << pass_name << "'" << msgs;
 
       std::string out_string(out_json);
       nnvm::Graph out_graph = nnvm::pass::LoadJSON(out_string);
@@ -1422,16 +1464,24 @@ int MXLoadLib(const char *path, unsigned verbose) {
     LOG(FATAL) << "Library version (" << libVersion << ") does not match MXNet version ("
                << MX_LIBRARY_VERSION << ")";
 
+  // get error messaging APIs
+  mxnet::ext::msgSize_t msgSize =
+    get_func<mxnet::ext::msgSize_t>(lib, const_cast<char*>(MXLIB_MSGSIZE_STR));
+  mxnet::ext::msgGet_t msgGet =
+    get_func<mxnet::ext::msgGet_t>(lib, const_cast<char*>(MXLIB_MSGGET_STR));
+
   // initialize library by passing MXNet version
   mxnet::ext::initialize_t initialize =
     get_func<mxnet::ext::initialize_t>(lib, const_cast<char*>(MXLIB_INITIALIZE_STR));
-  if (!initialize(static_cast<int>(MXNET_VERSION)))
-    LOG(FATAL) << "Library failed to initialize";
-
+  if (!initialize(static_cast<int>(MXNET_VERSION))) {
+    std::string msgs = getExtensionMsgs(msgSize, msgGet);
+    LOG(FATAL) << "Library failed to initialize" << msgs;
+  }
+  
   // find ops, partitioners, and passes in library
-  registerOperators(lib, verbose);
-  registerPartitioners(lib, verbose);
-  registerPasses(lib, verbose);
+  registerOperators(lib, verbose, msgSize, msgGet);
+  registerPartitioners(lib, verbose, msgSize, msgGet);
+  registerPasses(lib, verbose, msgSize, msgGet);
   API_END();
 }
 

From 22ccf41fc536a8928b3b9a14619a309f4c712f99 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Mon, 10 Aug 2020 18:02:06 +0000
Subject: [PATCH 21/25] fixed whitespace

---
 include/mxnet/lib_api.h |  9 +++++----
 src/c_api/c_api.cc      | 41 ++++++++++++++++++++++++++---------------
 2 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h
index cb2b6344e876..ce10bfa82223 100644
--- a/include/mxnet/lib_api.h
+++ b/include/mxnet/lib_api.h
@@ -1520,8 +1520,7 @@ class Registry {
   MX_STR_CONCAT(MX_REGISTER_PASS_DEF_(Name), __COUNTER__) = \
     Registry<CustomPass>::get()->add(MX_TOSTRING(Name))
 
-
- 
+/* \brief Class to store error messages from extensions to pass to MXNet */ 
 class MXerrorMsgs {
  public:
   /*!
@@ -1555,8 +1554,10 @@ class MXerrorMsgs {
   /*! \brief map of entries in registry */
   std::vector<std::stringstream> messages;
 };
-#define MX_ERROR_MSG MXerrorMsgs::get()->add(__FILE__,__LINE__)
- 
+
+// Add a new error message, example: MX_ERROR_MSG << "my error msg";
+#define MX_ERROR_MSG MXerrorMsgs::get()->add(__FILE__, __LINE__)
+
 /* -------------- BELOW ARE CTYPE FUNCTIONS PROTOTYPES --------------- */
 
 /*!
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index f98d34f881ad..d858e882c08b 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -105,7 +105,7 @@ std::string getExtensionMsgs(mxnet::ext::msgSize_t msgSize,
     str = "\nExtension Traceback:\n";
     for (int i = 0; i < msgSize(); i++) {
       const char* tmp;
-      msgGet(i,&tmp);
+      msgGet(i, &tmp);
       // format: [i] message
       str += std::string("\t[") + std::to_string(i) + std::string("] ")
         + std::string(tmp) + std::string("\n");
@@ -305,10 +305,12 @@ void CustomFComputeDispatcher(const std::string op_name,
                            in_shapes.data(), in_dims.data(), in_data.data(), in_types.data(),
                            in_verIDs.data(), in_dev_type.data(), in_dev_id.data(), in_data.size(),
                            out_shapes.data(), out_dims.data(), out_data.data(), out_types.data(),
-                           out_verIDs.data(), out_dev_type.data(), out_dev_id.data(), out_data.size(),
+                           out_verIDs.data(), out_dev_type.data(), out_dev_id.data(),
+                           out_data.size(),
                            cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream,
                            sparse_malloc, &sparse_alloc, in_stypes.data(), out_stypes.data(),
-                           in_indices.data(), out_indices.data(), in_indptr.data(), out_indptr.data(),
+                           in_indices.data(), out_indices.data(), in_indptr.data(),
+                           out_indptr.data(),
                            in_indices_shapes.data(), out_indices_shapes.data(),
                            in_indptr_shapes.data(), out_indptr_shapes.data(),
                            rng_cpu_states, rng_gpu_states);
@@ -326,21 +328,24 @@ void CustomFComputeDispatcher(const std::string op_name,
 
     // call fcompute function
     int retval = callFStatefulComp(stateful_forward_flag, state_op_inst,
-                                   in_shapes.data(), in_dims.data(), in_data.data(), in_types.data(),
+                                   in_shapes.data(), in_dims.data(), in_data.data(),
+                                   in_types.data(),
                                    in_verIDs.data(), in_dev_type.data(), in_dev_id.data(),
                                    in_data.size(),
-                                   out_shapes.data(), out_dims.data(), out_data.data(), out_types.data(),
+                                   out_shapes.data(), out_dims.data(), out_data.data(),
+                                   out_types.data(),
                                    out_verIDs.data(), out_dev_type.data(), out_dev_id.data(),
                                    out_data.size(),
                                    cpu_malloc, &cpu_alloc, gpu_malloc, &gpu_alloc, cuda_stream,
-                                   sparse_malloc, &sparse_alloc, in_stypes.data(), out_stypes.data(),
-                                   in_indices.data(), out_indices.data(),
+                                   sparse_malloc, &sparse_alloc, in_stypes.data(),
+                                   out_stypes.data(), in_indices.data(), out_indices.data(),
                                    in_indptr.data(), out_indptr.data(),
                                    in_indices_shapes.data(), out_indices_shapes.data(),
                                    in_indptr_shapes.data(), out_indptr_shapes.data(),
                                    rng_cpu_states, rng_gpu_states);
     msgs = getExtensionMsgs(msgSize, msgGet);
-    CHECK(retval) << "Error calling FStatefulCompute for custom operator '" << op_name << "'" << msgs;
+    CHECK(retval) << "Error calling FStatefulCompute for custom operator '" << op_name << "'"
+                  << msgs;
   }
 }
 
@@ -409,7 +414,8 @@ void registerOp(const char* name, const std::string& name_str, bool isSubgraphOp
                               const std::vector<OpReqType>& req,
                               const std::vector<NDArray>& outputs) {
       CustomFComputeDispatcher(name_str, nullptr, nullptr, nullptr,
-                               callFStatefulComp, 1, &state_ptr, ctx, inputs, req, outputs, msgSize, msgGet);
+                               callFStatefulComp, 1, &state_ptr, ctx, inputs, req, outputs,
+                               msgSize, msgGet);
     };
     if (createop_map.count("cpu") > 0)
       regOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", fstate_forward, plevel);
@@ -474,7 +480,8 @@ void registerOp(const char* name, const std::string& name_str, bool isSubgraphOp
                                  const std::vector<OpReqType>& req,
                                  const std::vector<NDArray>& outputs) {
         CustomFComputeDispatcher(name_str, nullptr, nullptr, nullptr,
-                                 callFStatefulComp, 0, &state_ptr, ctx, inputs, req, outputs, msgSize, msgGet);
+                                 callFStatefulComp, 0, &state_ptr, ctx, inputs, req, outputs,
+                                 msgSize, msgGet);
       };
       gradOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", fstate_backward, plevel);
       gradOp.set_attr<FStatefulComputeEx>("FStatefulComputeEx<gpu>", fstate_backward, plevel);
@@ -946,7 +953,8 @@ void registerOperators(void *lib, int verbose, mxnet::ext::msgSize_t msgSize,
       int retval = callMutateInputs(mutate_fp, attr_keys.data(), attr_vals.data(), attr_keys.size(),
                                     &mutate_indices, &indices_size);
       std::string msgs = getExtensionMsgs(msgSize, msgGet);
-      CHECK(retval) << "Error calling MutateInputs for custom operator '" << name_str << "'" << msgs;
+      CHECK(retval) << "Error calling MutateInputs for custom operator '" << name_str << "'"
+      << msgs;
 
       std::vector<uint32_t> mutate_indices_list(indices_size);
       for (int i=0; i < indices_size; i++) {
@@ -993,7 +1001,8 @@ void registerOperators(void *lib, int verbose, mxnet::ext::msgSize_t msgSize,
                                     instypes.data(), num_inputs,
                                     outstypes.data(), out_stypes->size());
         std::string msgs = getExtensionMsgs(msgSize, msgGet);
-        CHECK(retval) << "Error calling InferSType for custom operator '" << name_str << "'" << msgs;
+        CHECK(retval) << "Error calling InferSType for custom operator '" << name_str << "'"
+        << msgs;
 
         // copy and assign modified input storage types from custom op to MXNet memory.
         for (size_t i = 0; i < num_inputs; i++) {
@@ -1103,14 +1112,16 @@ void registerOperators(void *lib, int verbose, mxnet::ext::msgSize_t msgSize,
         int retval = callCreateOpState(createop_map.at("cpu"), attr_keys.data(), attr_vals.data(),
                                        attr_keys.size(), &state_op_inst);
         std::string msgs = getExtensionMsgs(msgSize, msgGet);
-        CHECK(retval) << "Error calling CreateOpState CPU for custom operator '" << name_str << "'" << msgs;
+        CHECK(retval) << "Error calling CreateOpState CPU for custom operator '" << name_str << "'"
+                      << msgs;
       } else if (ctx.dev_mask() == Context::kGPU) {
         CHECK(createop_map.count("gpu") > 0)
           << "GPU CreateOpState not implemented for '" << name_str << "'";
         int retval = callCreateOpState(createop_map.at("gpu"), attr_keys.data(), attr_vals.data(),
                                        attr_keys.size(), &state_op_inst);
         std::string msgs = getExtensionMsgs(msgSize, msgGet);
-        CHECK(retval) << "Error calling CreateOpState GPU for custom operator '" << name_str << "'" << msgs;
+        CHECK(retval) << "Error calling CreateOpState GPU for custom operator '" << name_str << "'"
+        << msgs;
       }
 
       std::string msgs = getExtensionMsgs(msgSize, msgGet);
@@ -1477,7 +1488,7 @@ int MXLoadLib(const char *path, unsigned verbose) {
     std::string msgs = getExtensionMsgs(msgSize, msgGet);
     LOG(FATAL) << "Library failed to initialize" << msgs;
   }
-  
+
   // find ops, partitioners, and passes in library
   registerOperators(lib, verbose, msgSize, msgGet);
   registerPartitioners(lib, verbose, msgSize, msgGet);

From d2fba1dc43d3ebc488a4358b8dbf2ed10dceaed0 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Mon, 10 Aug 2020 18:06:36 +0000
Subject: [PATCH 22/25] removed whitespace

---
 include/mxnet/lib_api.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h
index ce10bfa82223..668783f33f05 100644
--- a/include/mxnet/lib_api.h
+++ b/include/mxnet/lib_api.h
@@ -1520,7 +1520,7 @@ class Registry {
   MX_STR_CONCAT(MX_REGISTER_PASS_DEF_(Name), __COUNTER__) = \
     Registry<CustomPass>::get()->add(MX_TOSTRING(Name))
 
-/* \brief Class to store error messages from extensions to pass to MXNet */ 
+/* \brief Class to store error messages from extensions to pass to MXNet */
 class MXerrorMsgs {
  public:
   /*!

From c00e88ada4e72ed1f68789c6688bf7c0e0aa09f7 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Tue, 11 Aug 2020 17:08:07 +0000
Subject: [PATCH 23/25] added reference image for README

---
 example/extensions/lib_pass/README.md              |  2 ++
 example/extensions/lib_pass/example_connection.png | 11 +++++++++++
 2 files changed, 13 insertions(+)
 create mode 100644 example/extensions/lib_pass/example_connection.png

diff --git a/example/extensions/lib_pass/README.md b/example/extensions/lib_pass/README.md
index e7c50d0fc833..18272c0be436 100644
--- a/example/extensions/lib_pass/README.md
+++ b/example/extensions/lib_pass/README.md
@@ -170,6 +170,8 @@ n2->inputs.push_back({n1,0});
 ```
 Here node `n1` produces an output at index 0 that is consumed by node `n2` on the input at index 1.
 
+![example connection](example_connection.png)
+
 Some graph passes require allocating new NDArrays to add/replace model params. The `alloc_arg` and `alloc_aux` APIs enable allocating new NDArrays and integrate them with the model args and aux params. Both APIs have the following signature:
 
 ```c++
diff --git a/example/extensions/lib_pass/example_connection.png b/example/extensions/lib_pass/example_connection.png
new file mode 100644
index 000000000000..3a6256d6456e
--- /dev/null
+++ b/example/extensions/lib_pass/example_connection.png
@@ -0,0 +1,11 @@
+--2020-08-11 06:18:48--  https://user-images.githubusercontent.com/1541993/88966633-7aac6300-d261-11ea-80de-ae3490640ff7.png
+Resolving user-images.githubusercontent.com (user-images.githubusercontent.com)... 151.101.52.133
+Connecting to user-images.githubusercontent.com (user-images.githubusercontent.com)|151.101.52.133|:443... connected.
+HTTP request sent, awaiting response... 200 OK
+Length: 8443 (8.2K) [image/png]
+Saving to: ‘88966633-7aac6300-d261-11ea-80de-ae3490640ff7.png’
+
+     0K ........                                              100% 38.9M=0s
+
+2020-08-11 06:18:48 (38.9 MB/s) - ‘88966633-7aac6300-d261-11ea-80de-ae3490640ff7.png’ saved [8443/8443]
+

From b316c5e9dd19c5abf7021d53ccf06c06775a9566 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Tue, 11 Aug 2020 17:09:52 +0000
Subject: [PATCH 24/25] updated image

---
 .../extensions/lib_pass/example_connection.png | Bin 674 -> 8443 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/example/extensions/lib_pass/example_connection.png b/example/extensions/lib_pass/example_connection.png
index 3a6256d6456ed58b03e8a6e0ff929862d8797ab4..ef56c6228a6f9035b18461fbb9dd85527be57506 100644
GIT binary patch
literal 8443
zcmb_>byS>5)9>J(0155`2?QH7=s<9X0Kr`bcXto&0fKvw-~?xI2n2V61$TD{_6@uH
z?(Tihch9+h+?n%C_jJ{-tGcV6?moW`doM46g-(nP006M0Bt?~AWf$yeLq&%Die#B#
z!wN)GAvqxcpehFA&HxGapVCNDNe%$;cntvf1pxrRVNHJ90Dv<K0I;hM0PrRQ00egF
zO^W<5K@db;%0x~MKo6^<0+8Tv0SK@f9PA5#BL+P62CD(2;Yj|jE5XtIr2`KDgqQ;m
z|I*Qfl~13yu;&T$XNiyv_fL=6@PBJ}Wh4AuZ+n7~E-%o)3N$-O4M&(O@=p(3Br9Ma
z0Dw0$|Df)qE+@-tWNXcAU~Fp$VRo~&dqM&D-FRVDYlxEprJJ>tjU%rc82E<<FRcF5
z3<6UAq2go-2CB=wrxdYufKYNWb1<_21<)xeDfu0YO?Z_=#s7lC{(*sJPEK~bAdsu8
zE3+#*v#o<Eh?R$j2gJe#Vq;^1X)rmu+c+7xG1)j$|JliZ`VoaV8abHTIhotqP(Jl*
zU})>?1O@`14E_E2(@!UJlYcDPIQ}&(*Z@IK5)dmh3+R7?LEOy$FR&-cpRhm1^`|-h
zr_Oj49Lymwho9UMVCDbA!2i+qk9&XWD?uD>t(>3KRcy?i1laz9{8#h8P*1jbMQp8X
z9hB`1j37@&{=4bFwcne&L9En8&8;Cej!%AZv9bIG`fthqiq!ZA$-%<;Z{)uue<Arn
zPqFw{Z2k=VAFZ&+3!w9Z{>~x+^ea{rN&tY&T1r&tgB#p|4w_TK)T|HNELT!cEUIX1
zWuPB=s31E1awo{9NTpIR(V|vUjKLyiz*g&^sxWl=V^gg<|0J1W4!LW$PP^VEQxH5H
zM2-?Me$d$D;dbFTE@&`wkZWo1s&%W~xV0I4)wsmx>Ure(2`Wtq#6(4~Lq+NIV;ODt
zqYH$hb+#)aixRD?U_bqmVI!xqQ-Vbt1=iD|h_E4*_G>azY3!8!qv_9YOpIVERtwik
z7n_)&Y#du6t+UB+ipV5ftsh!=5OOhxZenfUn!HQs_#OpPu^slK`>PwtD*MGRyEtr>
z=JNuKS~}5qB(6E7Q21N=62(2IjtvKDPEH~s^WyDER+Lvg?u2vos9g%o;#r3oK*k}c
zv6Nw_HTmp`_IMH*Uh?A@G2@*Tn7{ug4-XRoxy0*uW%k(g_~?8Zlz7r-ZeVh^7Mq5>
z47+BFmE+0w1O9v^H<pwAhhz>PY!8=#Z!~zhm`PUndw!Z~btnphx9BC(uGFvCpBJax
zXu@@hN%q)H?MryeAt7Hecb{8I7`&6(vilZh;iVW{G)~f%LQz`Yqqpf5-6K;^$<vIz
zGmEr4BUn6zx2jRy)aAJ%043Rq+zx9lj1s?1k5t{7$0rJ9t3irDOX{uW+-Pvz^n9m@
zlE0Cj9W406T34i^7t8cQ!S4)Uvd9V7g@RY_PPAe|*N@F?ghuW=9I)ZGG6a!48bv53
zI>^=4QnSX}eP-2zrH0vCGY~A+0)2-R%7$pzYhpl(-_mUSVw~?XfSPBjX3ld#eo`fA
zWtG+Yyz)m)Kn#Oq!V0(W{_l%10>NHBnYRpLbc$WTGqr`5Oph|`&yhPHiDsgrrP%ul
zv}DcdRdYaX_3#+@wvj2oSS)%<yg2SqT6|zXHpVp5ht1t_9k*S*rR^L=MvtI^4}|DK
z{FX17zljA)nXudBoLcN^C%X-3b1O&0W=@T<_3cJ~01nk+V3;b1nKZdO66_E$o}ni{
zH@;+8>I`F)c>}MJZHP;7lJX`O)1=xur@G&jY;qH?m`8^(=qnDo{n%X~_Ykfk^0(wV
zb)J#d2QsB^=(0M(SRwGt2e=gWsjpx#R_h#xOBM3(v`aRs%>zjI!c(~DCpfexsZ=mw
zrBxLhK9fd;RTS@cV?ioX`t#lRPARBKC|w{CqgH9;4Ml~BQmV6R4E&f#dbee8C|hzz
z^~e67TK0sV3||u$70W-qzX=ge@0M8@!&eM8W4EfhGmlG+BlAXp3iR+0veW3Lc-LnF
zR$m>OMbL@V5Y?$aJk~!*OqOuonMt{bv)Qy&BPu9RMBY9YQj9BWJ%;0e>?O1`V>sqe
zUp>Bs=ugz$<79j|Ljf|h+-QXGmG6GU2qsvSESuMhugp3#GvB!kublXp*6wxIZHFU$
z2SaRS21*Y1+p1)aEU+TJdGqYP#8=n{`I5Y-i(<>BJ@{bi+HQv&dj8-ddshL}_E4R-
z+CGWAh+Z6LulYJgV=sdoJ;^;Ku63q&mQ)2VD5WftMJ}dM#J}FX7vY+%e9Ix5HmXCb
z*_4Aje>I6XOgZ>o!T~*$Zw?6lb9C~iV260kLd??fIvoj?wG>XfDNojH2|g!9uxJg9
z)XLLD3=-uI*#&I29c3#sz?bxr2Xqy<eZH;`g4X-}xSkkSs?p1<V}6^;Mdz77QQ-8E
zZw~h!H&+MYhae+zHQU6p{R}mav;-f@WuI96aLV>fi-EH3UFC4o*0qE4dWn$kM=s@+
z=(<og(KLgIjis_mggVvfS_^lTMuC8kM>0HxwJZ-j^L_KE;SQU$4*^Vq<l>eI9Xfg;
z<mj$7V)9L5Pq4>h$E?-+4V2i~au!45H$S8_i4tg~nEFd-NG(ble=UkK0Eb&3ovB|J
z8}!vt7JSzlnGoiW!v=}wVh7#&b|)WnvhnG)kP<;HIjDo7z7jGx$$}@DZ?mm08I!XF
zaUjRNvH=2xTTOFaD)uUTt7LCkev@B`60Ppmnt^`?K&@V`Z?vm;7G0#I#3I<^KIMcc
zY|<)`&ayac0MKD+44@@i7<bqVCJuxp5E7ASKbHK^|H9=<lML1G=BBdZrpxL=JKPE)
zV81@{06s&Wa<5e+*{qCv$1h7(<DnI73j4o>jB5oY@*#VDy0vgy)s(!o!e8uoV)o(w
z^Y&CZgh$NnF?`XU8smzJ&fo2J?z#RXlx`A67pNhtRhh0;P87yb8?4|Ot%`e~tF!_X
zU>8g1kga1<#lKG$@EH-aQ%T&zKq~{p5K!Ze=;1N5-<MV&=3=V4AsB|Ptp5^gaPBAC
zwFw$x)zh(P$TRr%on3N;4L00ClZAi{xlERqpED-Z^zd;@Hl~rq=qCPIZPZ`e4y155
z5m+!t%jp@c_TvmdZmODUL*#&}CHP*fMrcyZ+>x(tmFhMKV@qtAW@zB)k}&(eY*!iU
z_1DoL?DbzRhq>z+sO;F}8*fr^vJ2)aJJ_f(X>;Bx>M8qov|zB_&nXhVA9n)|qbG7%
z4<DZnfYw$cK;rW9(RjN|gq_Cv1JUTvVw-V|^H~q){vrSMb8R=$6bU?Pv|1EbV7;Sl
zE&txwGn59scSv?_7R%*)`SP9aPuq>hOtWb{ltqS%w&_j(aUXTe@-NS$ss7{3d0Ph4
z8tdh#X;Pp3mlT**b9K)LR6iT_eAl64sA!*qP#8?iIL0kGzVwK0Ack#;{SD0jwIzD3
z8&^s6_ee}a&NhdBMU(NJ`juX_-X2DHxGVWJVn!0O3fCV*fLhILI;^!*cgF|gVg!Eo
zrKP9utF#1MMREONDSZFt)h;HZUaK~Pu&!?ARbL#nz`*TJVHWh_;zFRS^nA2NuHJs_
z`$|<WdETkP?qsogqr;b;-;(|>&=9_vj!ge3Z$9MRn5X-ufluLcP1WRb5dK1*8}4AP
zwkzx5ywGGN>DI8$gns-SBz1MT*y4G)&)V$vrUHwosHo_e9%;12+uYpzTl#ZS?(D7t
zg{(se)HKswy#Cg8$$N$|GqE-Ktt=b8j+WLQ8IME$Ym@WsG2OxmbHpE}>8vc4(~OJJ
z1lo2&80(LBTY@dHo`!<u7n_{VzJ5%=w=X+wuQ7-pdsR?aXw(sa)O34ycQ#@_Z(UC$
zbV?+FSwuiMB5+$O?sYcARsXIPGJt=+J5@3}TCCURbA2LVN&eapNmdORU-QF<d^{jE
zwaAOFP45Np!>Exb15hVNQE4;XfUjkfN8uZZi8+-K#ryjEg`J(9BN|lQv0p&+H$t9M
zM7KTOAElyjmLqzi#a$b;Cvz`3O~}1331aed?DV>es0i*eTlxhxN@se9<c`e_p)7x+
zT(ErDd=xye(TS&~?QcC_hvd)Dp{lQcdwFnu)%J+RN!t$cy^+a^Z3wS%-cv2wm^NCm
zoGBNRlKN3~lyply{fVG{{-EW!6XQbEsxOx6S69gMUgG!!iXb@ofT@Um6+sgpqomKD
z_f!g&?hjo(IL$W)Q*=YRR+tzV&eC2_!xdD#zWK`XdFA){P9B;nIH-c|bP$L+IW^^u
zfq~(y<$W=w2B%nTP^pF$;j}QE!RMLit=WEE^{gg{Ej9>?<Si-n;d9?>ky7`#(|eY7
zg4CAI<&MDnZnKA<YbS252Xj+*Ibv-Z;zWHTo<}W(%}S|Obb$g9N+!1IdE!yy{l^QA
zBd<tt3e?Mj*%dv`$0Y1$bX=*pxCoZK&ajZ5krK)~gpr6Lt5)b@O;MXopdS(#o2IQ@
zvWBZzBst5feM+U``3?CVN#v2ZGnykZx1P2)1)SDzSC_x<0r|w)g})1CnqkL&Ig>ZS
zyT?cuAsI==hkw-e*xID2Ql3T4RTcmBE4?*mdrP46-gJ(yuOq)vnugxM1!gI~Yk@mz
zDBo0;*D0FM?fg>ekFbP|K>yFW9#Xcs8J>rYXvL_d)vpr$+Z)W!rnPK$qB06IJ<uID
z2Um>Z2wC-$2d}F|!g2EJK-0T2@X~1KyGTrwZ4bXKOS<GFy|R3M7uy6f>omC~*1i*-
zj!Uez8qqHTKaX1QxjlGa@T=kObfC(p_ZhXMfSlp7t?z9b^Uik?TiNZ#v{$sFz4cXW
zvwqbny}cB^kp^AC=6S7a3Myn-;;r>q2)lg+b+Qnf8ZzY;k$GdQ4sFII_E>gJvDV8u
z%elS?2H6=s-xj1|Sy{;u!AJhd8;G{cECj6#wdrEj9ME7gL|ic!Ik7e)_;@pNf(T}=
za`0()5AaZgMi79a2Y7=J=9D%Ga{#_u8Ll;)G)$C<MD}!2H1a{1qY-F$LI-a|lSRB<
zMMM}VO~9eHoPy6sMcyOpC-b|ry|r2R$)WyWBZ3?mcE(h{Fdp2f$yL&j{O&gQ=xa%N
z!0(L!w)^1I2ES)yyw~BcM}WrfG;sEYIGYd_8yySlB*Ixr8l>=D!GRs6r>%ZDA|Zwo
z1#%6Wsg_O(20=ee_lQ_LK3Z0^Oyh^Y`@ULdO6kUYal&}<tHskE=4t?b1+dKYUB=r<
zd;rRG^2y;}X2ljLVe^^(QF)|sD??mMq81kP3C}48)3`>SS>s>u#!Jw5rkG|H%hl!S
zW{aVRT0$s8`f-TNnnfE|f=Mc^X4&rR?+ADhg301_UJoNH+k;^XSBC;|Y!#0Avc$)1
zUxm11qW4>=&N11;j3Q*aGr{pITew+oR$EP3%zRhFXjx1K30qEuNM1YJEuzMcun@Am
z9^QxSpnsQ-X8|LSSTD7D&$)!Xu+03fbcOV@m`O`VZ3C6Kr>7^IiEvS5Tq`q!#=ryl
zt!ANoMz%}QMVu&ei@?M6nv2DN>P#Z}GjlSpF)@N!c^nS+lUf6EV$D>X(Jo|=grX~A
zP*UG!)%IZ23}Vse&f5+Yil6aOLAykeS>jD!y_Wq^oLnEjW!+=$5;MWl@1CX7Yz)|9
zJJIZF8%ZJfOQE#eCq1^m{q^nrk#CF)sO4p1Vq(L3yv#dW65s<7Y!Gb^7Zui?a1dd+
z>=;`M4+rkGNcL>mHdgypQE*mlns|&UTGLj36ag9Q=rbuD@maQbtV|lKk@mX0I3n7~
z-BU25b_4d+l5ZPUD#soD={OFj)aYAX7Zt?nq)~9-iDnVP0r%tG*+m#rW^!Ini6jC7
zCnx9o85#fKIk)iRnhE)DOq^(o?1W{~dV)~%1Yr~e!Mo!?9OK&&H9c<?hECXSXJaY0
z9HEZ^(HFCAxlr}4LI#a2cc+rb0{vL+<;{F0sovHw1xq0lEWGF4N_Wh@1DRhXo=;0e
zs5PToH;6|P`rqAo9R!5DFyo7`hQ)x*+Iss-jL|x%^%}nIX2^Db;j-7`qxnR^P9lxg
zryP3QlFSaXeb}<2dxe?KFK~Z8QJGfE=Dr-1g5Szvwq9U7k_pE0u(~yW(u0&I->p$R
zj_U1{9eo=SU=Of5lUk}hcPT%dcp=83xtJtQC&A!70=_cSB84n(-H-S_7?IGs)h0AK
zE6?bp82l{w)j@ADU=V*I^3IigAw_k@bZ0FpU7dE$nnPSC&F8cg;}R9@aqidT{h}X5
zWkP@c!P*l48W(!n&#*mHL7`fv`LS5Ja5v+ps4q$6#s^(9=J3&<b9IMAn8e#Q!D;fl
z=(^U+SIdFViG|RvegrC{GdfD8kPo>di{|^oX<cD{mw89cdDuPPQPt!^fPv4*5`cU7
z$wiWsT-SXgF&DMB-lkIOV@7A<(kpQB2V@^zPAln#uV=z%0Riwj63(A2I;SdX@!~Uc
zepDck#ofa;`We^HCz#z2_qS<5-ygG_dZj`v3)3CY#iL1y9`P*Jh(p;(CFqW*v%>;3
zEv7`!e#yns8Oo;jJp17q(T_NX0)if=aUq^e6Q^1c%J+MTMx{wGsFhE)n(t(%ptx06
zSFa*BYrOkndfdTLVX;cX3kK_?S&mwYLL=;Rcs|F7;ds>-oMX1Sk11>oXO!H7#@b_0
zg2bbU@hIz@1Gl*nT_id5U}p&M@5=*Aok~{G*I*IA=?#NNFPW>Ep75vFa^r{i2%_~?
z*omo`wY||~4B^;CiJHx%iQ79<h;t>{h1(l_$_n7A5+9wl@_vgDoi(-1J^PSF*>*Nr
z^m7?|j>tokhE7rCt&)mX%{059*g;%HZm3aAJ3FaXz0DL!L=wP62pguGqqxnwy6Czz
z#r4y!@OPX9IT}?WXD;E3Exr3)wOmx8R8fy$N<>bqf@Y%3tb$P_Ebn+tjfNoUfe*jK
z=UTn_!o$N$F}XH>=9{ACp_v_8FL3Ze`d_xNy%2Yp$Rj35oe*oI6Sn*P#T0c|F@i9i
zYxEKKJ|4D*O~!LYeZ?5M;M7~-)~+@VYh5W46I&NzhY?1pV`HZ>Dk_QOChMD<<QVH7
z;3K}g$1i8Yd<qoLRvQ7&#N0--i>jZvA)Q(>2Cf5fjT@i+68=cdLJa25S}E$Z)YMFP
z8N4QW`P>vDzh4@5FTWTo^wj@k=>c7a`%-H;V_|BJfbwkekQ2dR)f<!lx%~4W7kouz
z!7_z5=>@IjT6t2(7!oe|CBDUS9lneJkbo^QgrE22vTONI!b?G)>Yl%tCFm{eoF7Yv
z77aV+lW9oG5504cZlWLXr}?6;3o$vkXj&<E?L7D@7Qcnc?;Nr$d1|;id(UcqM|r-V
zG?SZ5pz8*g&Zz9ZW0mq^`(2&nIc}$xM+0lGZuc;-f4T62M{)CjGo9`mZFbu$o6ezh
zo-_4uP}?2u`E)N88U%K5Q?D}YsX8w3EoGPwq}}xXsPd&bE<Pn>a?q;`Ah#BD%7xdZ
z@{Qtx6OVAA8GE9F>?7Q(jF?v$g1!RV@k`jhtR)~VN3FM!61@UEg~_Z&$Ui;pW~UDO
zDPL?GRfw-qkP@G0pSN87iufeYE7isbB~1?>`@mh(H#o?D8TzYr=cc)s6h?4)EBJ)&
zQFvkOxxmk6pit%M%!I!5zo|ws@Yw9SO9q{p^1S5Lu%SQq>!8X2t=tdqO_xnP3-SIq
ze~>~K7=ynR8`5?u2SPYnYHO>z<j9?XuKb|QLd5IEj&9+}d22kI>L3e+Ft@7f_?HI2
z_4OT;hG>hsSqktt8Tkx;zWGL)H1mO|sL#ovJ%Yj;ZG^+ljCTBqsEpzA=NnEaX#0Ja
z*7b@!CEqT--y7u;`CL*0Lk(R|pg$dHz<!=OV!4k8XXhY%il0(L(01?f4}Tet^1f^|
zp&lmt-X7G?%?fFKPAR$xW4kufFxn{Eu@)q?u{-sGa-l`n@5DwL*+dWQXfJ~tlk|t@
z873=SeDh5>WE;F+P87tb%c}|r4(dU%5EOWn`NY?BhJiUSZge@2A=7l3C1}lD<R=26
zLr0%nSqOy>AwsWq$j7YDRMZt*q(3%!IbCQ9shaJudDHpv6xx~w{g|_;*q*?254oqm
z88#^2H7l>ofrqkb6*Bj!UDonGFKPL^ezp?#=*8t^ls3m?IRPcfh<SOQh+`{Iu2jan
zCLzUlXzgm-VeTz|O0bQchy)M@2#o^l!C^Mu{tfHF*m;MU|DwV+0eBvs0wnxdg#AHs
zRU^;t=99)ZlN~e;P+)!@K)xXG4YUo4yVAJNj%Y6C(YLHkH41*l>CD`cx+cpc?2+Wm
zj8e*{6vi>b%$N?wOWE0cHXAT%@k4)yD1ZY4bc&jfDl2fCXuB+0@a3g{Ls4?Km=~}E
zMjpe~h<3t_=4TdX;gjj*aBDsEu?=PqLg_&NC*h#N=%}gQ<Fhv!NCg$7NJuUm@M<-o
zG2CMCuqnk(&hpEtVY^?~s`W@PI=Jv!rIugCI~jo`g0D?LFr#L*Zp?sDZ6xu_<uOzb
zXQ?5IP_=GB7zh2(h(^rzxaKS=T|d05NZH;k1%(|+up!4Ia_zA#cp+8wxtb|j9OS>z
zk}1FY!=&9zt7$M2dkKo@EsZf2xxYP2wqa7BKN#zEchx71c{uxa0~=%?acx5R_ANqd
zRrl1y(adA0hcOkgmIgSy`gq+sa7tVw@5^4uz@`3X$(OR#P`5D8&TXHvep<yBG9%aq
z14%aX6en;QYZ2@B0XxM}a&bwk?cqZx_FMZ?4fn+=MZEi+?p=J;wBHnYqDQC`j`#^b
zG_?Q|is~Y%!6HFJt&Sr?G^QI8$;5Pr>0w)@m_2i4A*0Qfb%#hkbl^}FJ?$7dzON^3
zj>Kv{)3$s;6DaFD+*6!~p#$qgdl}8Xk=)+7v{rXK(&wpl_V<7Cf9q|NKcZ>!Y4}+3
z^T7gIIZ)%$itb`>Uj6-Iq@mbqE<z_IzD2(M3Wv8C7*`S0JV^hVrd_A$687hD41tR6
zmRX1GYqkk1RWn?B5<0w`nZ=udX`q8JE6x-j?-P0F{$WYHS=tla`cPZgPT+NBI5$CY
z#YwteQEGEs85f`KRJ#9Tt;qmJ@2R&|;o34P7CwsQ_QNg?g3b%Rd@*Yi$9ZiVHz-qh
zB4@nni?`B*(QKsb&iTOYYE<HhWOAVBlRS=RgR_K*%HHaZ_M3x&9gEx*9G_>Gdyw$!
zz<zH7%oz+Jp|@WNMV_aXuj8WM2>k#>w=L-WC+2gtFal9!S_}&P%DgfPaZ7bV^ZM?q
zGX?cGt$d^WonTMl;i@yYtV{{n)tjt|VFB1m6#L2M$=Z3&mOu~6z;nuPItHT+4?sJ?
z4vi&l26>e;r25{HVD<;4nnQQFDVub`QUvmblZ9zWLUu7^7~7yLbGi*<QWz3cOcKwj
zf+%3$m{*88nB^9NwcRNw=Ed@~CM3Kk$D;TUIoB2S>s(ms#U6JzpOyYvCBw8^scI{1
zdYF`AKSu>XNgFJlizNp({s{f#7N*tB1$zNtuI=pP+aoNOS=xyaDFqOU1ONTzXmb<@
zZb$d*Rb7@dKk?+@*QT%w8$m815x0`zpK&ek)mmuH7SzQl1y*TvaUNm!Jl9*V^JCM>
zdC4v$F27<mAUlS~;-(OWHE5$#*oPy+u6w9h1cgK*{$dfQYsi4*43mj!-EVMVmqWqr
zcR?>b1~^SAu{b@n$(7(g0nM-C(Fsn%5Mg$yxFv*)Mq_r?uCx)=R2o}jS<K*H0FMnt
zS8qx>U-_ttP-`bpabyO3=WTSwvB_{i-lga2^>UV~Ab&&J4dW4U(V}1`M%~cgOs}Jf
z*zfRoCkiL@$Wjr%%#mHAB##??T4yp!Vhw*eFPh1c>IBS%D@~{+H#l@VXb9Mc2KD;E
zIEP-fzAE>Q6%1Qu5swh^f`z@!uVuoE{$&j-;Va25vz4*-(%s^)>?LkAJ8Rpn+#60g
zf-^seOUgS8i|CBX(v2Y*w%9o7bjSX7m<k}q4j26n%U@&Y$Mo>eiI0%sEp@0<gaeau
z5x)xoDPO}2ht~qYQ|$LT7rG5G``>w}Jys9R^Tq^dyCh`YU(&+@n8P;J@Am)9UfMC%
zBw|}<X(x27tNp=O-mF+KYF7)9oyZ1Ig9&scl2kEyZ=|IiidGUt%EaAky5J58-owT7
z?+RZwiP+J&v;K##<o`5A>tdga$!fJEH=l4;sWM;XFkxfGycBc4%ND0uSpCYOaSH74
zqfb&std*^NPbEEu${U^4@HlLHqD2`Kht_2*7c*?JRjHYUFxESmr7ylHYzQ_LhV>Pb
z%$TqV?4*>*ef<_T!oQSYu?b`hOZ^GUF&dX!>4Q#vSVmx0(^we7Hh|cl2Mo0sfw1uY
e|LMs85#{K5q33h;Xw1`<zLc1}Xt}U{!2bXmKaAM`

literal 674
zcmb`Fu};G<5I}c-#Vts!gza;jrcNXlSddBuf^Jm@lU&nC)G4)FmJa<4_`QA!aW*8R
z0^){~Y~S5GJ(=?a6Xa;PQh-^ZOpzMS0ahVwd?Dq|)0Q`DTay=c6IQ$DcvH46kPD)0
z*HVpAmFHR-lV@3`H6L4BW*U)KNv68FWVykLxFwzDn5DQ}j>X2+?2&x?vTs}sXDKi^
zTQU?vfEp>GP>E3@l-BH~b&kpq5uyDb%Hb>GP^77j<W5OMQ6<>zZ1w<Kdfkx^;5$#3
zVBc)hudyY+X`QFok|4tUgx!&=!>Rz&O@e_D$z%xgUb*ZnE!dOo9fh`l)A9Y+noq|M
z#vlexpu_gjpMRVb;st0U^63?N#=iM7Fo@4Ey_S9m{I7e0w|lB!9!<!Y7L0uY+-SpD


From 29a5c9cecad8094fb5b5d96b2f49873fa3b1cc8b Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-6-220.us-west-2.compute.internal>
Date: Tue, 11 Aug 2020 17:16:32 +0000
Subject: [PATCH 25/25] fixed its

---
 include/mxnet/lib_api.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/mxnet/lib_api.h b/include/mxnet/lib_api.h
index 668783f33f05..3367bc661c12 100644
--- a/include/mxnet/lib_api.h
+++ b/include/mxnet/lib_api.h
@@ -893,7 +893,7 @@ class Graph {
       n->op = node.map[JsonVal("op")].str;
       n->name = node.map[JsonVal("name")].str;
 
-      // if op is null its an input to the graph
+      // if op is null it is an input to the graph
       if (n->op.compare("null") == 0)
         g->inputs.push_back(n);