apache · jroesch · Sep 15, 2021 · Sep 3, 2021 · Sep 7, 2021 · Sep 8, 2021
diff --git a/.github/ISSUE_TEMPLATE/ci-problem.md b/.github/ISSUE_TEMPLATE/ci-problem.md
@@ -19,4 +19,4 @@ Provide a link to the specific run that has failed.
 
 ### Flakiness
 
-Have you seen this multiple times in this branch or in other branches?
+Have you seen this multiple times in this branch or in other branches?
diff --git a/apps/microtvm/reference-vm/arduino/base-box/base_box_provision.sh b/apps/microtvm/reference-vm/arduino/base-box/base_box_provision.sh
@@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-#   Using this script we can reuse docker/install scripts to configure the reference 
+#   Using this script we can reuse docker/install scripts to configure the reference
 #   virtual machine similar to CI QEMU setup.
 #
 

diff --git a/cmake/modules/contrib/EthosU.cmake b/cmake/modules/contrib/EthosU.cmake
@@ -18,4 +18,4 @@
 if(USE_ETHOSU)
   file(GLOB ETHOSU_RELAY_CONTRIB_SRC src/relay/backend/contrib/ethosu/*)
   list(APPEND COMPILER_SRCS ${ETHOSU_RELAY_CONTRIB_SRC})
-endif(USE_ETHOSU)
+endif(USE_ETHOSU)
diff --git a/docs/langref/relay_pattern.rst b/docs/langref/relay_pattern.rst
@@ -89,7 +89,7 @@ Or a convolution with a specific kernel size:
         x = relay.var('x')
         y = relay.var('y')
         assert is_conv2d.match(relay.op.nn.conv2d(x, y, kernel_size=[3, 3]))
-      
+
 
 
 Matching an Optional Op

diff --git a/include/tvm/ir/module.h b/include/tvm/ir/module.h
@@ -122,6 +122,7 @@ class IRModuleNode : public Object {
     v->Visit("global_var_map_", &global_var_map_);
     v->Visit("global_type_var_map_", &global_type_var_map_);
     v->Visit("source_map", &source_map);
+    v->Visit("attrs", &attrs);
   }
 
   TVM_DLL bool SEqualReduce(const IRModuleNode* other, SEqualReducer equal) const;
@@ -277,6 +278,12 @@ class IRModuleNode : public Object {
    */
   TVM_DLL void Update(const IRModule& other);
 
+  /*!
+   * \brief Create a shallow copy of this IRModule.
+   * \returns The shallow copy of the IRModule.
+   */
+  TVM_DLL IRModule ShallowCopy();
+
   /*!
    * \brief Import Relay code from the file at path.
    * \param path The path of the Relay code to import.
@@ -348,12 +355,14 @@ class IRModule : public ObjectRef {
    * \brief constructor
    * \param functions Functions in the module.
    * \param type_definitions Type definitions in the module.
-   * \param import_set Set of imported files in the module
+   * \param import_set Set of imported files in the module.
    * \param map The module source map.
+   * \param attrs The module attributes.
    */
   TVM_DLL explicit IRModule(Map<GlobalVar, BaseFunc> functions,
                             Map<GlobalTypeVar, TypeData> type_definitions = {},
-                            std::unordered_set<String> import_set = {}, parser::SourceMap map = {});
+                            std::unordered_set<String> import_set = {}, parser::SourceMap map = {},
+                            DictAttrs attrs = {});
 
   /*! \brief default constructor */
   IRModule() : IRModule(Map<GlobalVar, BaseFunc>({})) {}
@@ -415,6 +424,13 @@ class IRModule : public ObjectRef {
    */
   TVM_DLL static IRModule FromText(const String& text, const String& source_path);
 
+  /*!
+   * \brief Create a shallow copy of an IRModule.
+   * \param mod The module to copy.
+   * \return The copied module.
+   */
+  IRModule ShallowCopyIRModule(IRModule mod);
+
   /*! \brief Declare the container type. */
   using ContainerType = IRModuleNode;
 

diff --git a/src/ir/module.cc b/src/ir/module.cc
@@ -43,7 +43,8 @@ namespace tvm {
 
 IRModule::IRModule(tvm::Map<GlobalVar, BaseFunc> functions,
                    tvm::Map<GlobalTypeVar, TypeData> type_definitions,
-                   std::unordered_set<String> import_set, parser::SourceMap source_map) {
+                   std::unordered_set<String> import_set, parser::SourceMap source_map,
+                   DictAttrs attrs) {
   auto n = make_object<IRModuleNode>();
   n->functions = std::move(functions);
   n->type_definitions = std::move(type_definitions);
@@ -52,6 +53,7 @@ IRModule::IRModule(tvm::Map<GlobalVar, BaseFunc> functions,
   n->constructor_tag_map_ = {};
   n->import_set_ = std::move(import_set);
   n->source_map = source_map;
+  n->attrs = std::move(attrs);
 
   for (const auto& kv : n->functions) {
     // set global var map
@@ -72,6 +74,7 @@ IRModule::IRModule(tvm::Map<GlobalVar, BaseFunc> functions,
 
 bool IRModuleNode::SEqualReduce(const IRModuleNode* other, SEqualReducer equal) const {
   if (functions.size() != other->functions.size()) return false;
+  if (!equal(this->attrs, other->attrs)) return false;
   for (const auto& kv : this->functions) {
     if (!other->ContainGlobalVar(kv.first->name_hint)) return false;
     if (!equal(kv.second, other->Lookup(kv.first->name_hint))) return false;
@@ -112,6 +115,7 @@ void IRModuleNode::SHashReduce(SHashReducer hash_reduce) const {
     temp.emplace_back(kv.first->name_hint, kv.second);
   }
   reduce_temp();
+  hash_reduce(this->attrs);
 }
 
 bool IRModuleNode::ContainGlobalVar(const String& name) const {
@@ -361,6 +365,11 @@ void IRModuleNode::Update(const IRModule& mod) {
   }
 }
 
+IRModule IRModuleNode::ShallowCopy() {
+  return IRModule(this->functions, this->type_definitions, this->Imports(), this->source_map,
+                  this->attrs);
+}
+
 std::pair<IRModule, GlobalVar> IRModule::FromExprInContext(
     const RelayExpr& expr, const tvm::Map<GlobalVar, BaseFunc>& global_funcs,
     const tvm::Map<GlobalTypeVar, TypeData>& type_definitions,

diff --git a/src/relay/backend/aot_executor_codegen.cc b/src/relay/backend/aot_executor_codegen.cc
@@ -45,6 +45,13 @@ namespace tvm {
 namespace relay {
 namespace backend {
 
+struct EnumClassHash {
+  template <typename T>
+  std::size_t operator()(T t) const {
+    return static_cast<std::size_t>(t);
+  }
+};
+
 using IntegerArray = Array<Integer>;
 using StorageMap =
     std::unordered_map<Expr, StorageInfo, runtime::ObjectPtrHash, runtime::ObjectPtrEqual>;
@@ -277,6 +284,132 @@ class AOTExecutorCodegen : public MixedModeVisitor {
     }
   }
 
+  /*!
+   * \brief Update the "main" control function's metadata
+   *
+   * \param mod The module
+   * \param targets Map of targets
+   * \return function_infos Function info for each function in the module
+   */
+
+  backend::FunctionInfo UpdateMainWorkspaceSize(const IRModule& mod, tec::TargetMap targets,
+                                                Map<Expr, backend::StorageInfo> storage_info_map) {
+    CHECK_EQ(mod->functions.size(), 1)
+        << "There should only be one function in the module passed to UpdateMainWorkspaceSize";
+    Function func = Downcast<Function>(mod->Lookup("main"));
+
+    // This is a Map<device,Map<storage_id, size>>
+    std::unordered_map<DLDeviceType, std::unordered_map<int, int>, EnumClassHash> sid_workspace;
+    // This is a Map<device, size_of_inputs_and_outputs>
+    std::unordered_map<DLDeviceType, int, EnumClassHash> device_io;
+    // This is a Map<device, size_of_constants>
+    std::unordered_map<DLDeviceType, int, EnumClassHash> device_consts;
+
+    // Initialize the mapping from all storage identifiers to workspace sizes,
+    // the amount of device io, and the device constants.
+    for (const auto& kv : storage_info_map) {
+      backend::StorageInfo storage_info = kv.second;
+      std::vector<int64_t> storage_ids = storage_info->storage_ids;
+      std::vector<DLDeviceType> devices = storage_info->device_types;
+
+      CHECK_EQ(storage_ids.size(), devices.size());
+      for (uint32_t i = 0; i < devices.size(); i++) {
+        sid_workspace[devices[i]][storage_ids[i]] = 0;
+        device_io[devices[i]] = 0;
+        device_consts[devices[i]] = 0;
+      }
+    }
+
+    // Iterate the storage map to compute all the tensor sizes in the program.
+    // There are 3 cases in this code:
+    //
+    // First we need to compute the sizes of all
+    // inline constants.
+    //
+    // Second we compute the size of any bound variable as these are input and output
+    // sizes of the program.
+    //
+    // Finally for all other expressions we check which storage identifier they have
+    // been assigned and we compute the maximal size of the storage, as tensors can
+    // share storage with other tensors which are the same size or larger.
+    //
+    // In this final case there is only one allocation for all tensors which share storage
+    // which will be the maximal size of all tensors which were assigned to it.
+    for (const auto& kv : storage_info_map) {
+      Expr expr = kv.first;
+      int64_t size_bytes = backend::CalculateRelayExprSizeBytes(expr->checked_type());
+      backend::StorageInfo storage_info = kv.second;
+      std::vector<int64_t> storage_ids = storage_info->storage_ids;
+      std::vector<DLDeviceType> devices = storage_info->device_types;
+
+      if (expr->IsInstance<ConstantNode>()) {
+        for (const auto& dev : devices) {
+          device_consts[dev] += size_bytes;
+        }
+        continue;
+      } else if (expr->IsInstance<VarNode>() || expr.same_as(func->body)) {
+        CHECK_GE(devices.size(), 1) << "must be at least one device";
+        for (const auto& dev : devices) {
+          device_io[dev] += size_bytes;
+        }
+        continue;
+      }
+
+      // TODO(@electriclilies): This code is never being called which means sid_workspace is not
+      // updated.. This means that storage info is probably not being created correctly. Or is not
+      // equivalent to what was here previously
+      for (uint32_t i = 0; i < storage_ids.size(); i++) {
+        // Here we record the largest size of the tensor
+        // that share the same storage id, because storage_id will
+        // be shared between multiple tensors that are not live simultaneously.
+        if (size_bytes > sid_workspace[devices[i]][storage_ids[i]]) {
+          sid_workspace[devices[i]][storage_ids[i]] = size_bytes;
+        }
+      }
+    }
+
+    // This is a Map<device, workspace_size>
+    std::unordered_map<DLDeviceType, int, EnumClassHash> device_workspace;
+    // Once we know the sizes of sids, we need to accumulate per device
+    for (const auto& dev_sid_size : sid_workspace) {
+      auto dev = dev_sid_size.first;
+      device_workspace[dev] = 0;
+      for (const auto& sid_size : dev_sid_size.second) {
+        device_workspace[dev] += sid_size.second;
+      }
+    }
+
+    Map<Target, Integer> workspace_sizes;
+    Map<Target, Integer> io_sizes;
+    Map<Target, Integer> constant_sizes;
+    Map<Target, tir::PrimFunc> tir_primfuncs;
+    Map<Target, Function> relay_primfuncs;
+
+    // Initialize all target workspaces to zero
+    for (const auto& kv : targets) {
+      auto tgt = kv.second;
+      workspace_sizes.Set(tgt, 0);
+    }
+
+    for (const auto& dev_and_size : device_workspace) {
+      auto tgt = tec::GetTargetFromInteger(dev_and_size.first, targets);
+      workspace_sizes.Set(tgt, dev_and_size.second);
+      relay_primfuncs.Set(tgt, func);
+    }
+    for (const auto& dev_and_size : device_io) {
+      auto tgt = tec::GetTargetFromInteger(dev_and_size.first, targets);
+      io_sizes.Set(tgt, dev_and_size.second);
+    }
+
+    for (const auto& dev_and_size : device_consts) {
+      auto tgt = tec::GetTargetFromInteger(dev_and_size.first, targets);
+      constant_sizes.Set(tgt, dev_and_size.second);
+    }
+
+    return backend::FunctionInfo(workspace_sizes, io_sizes, constant_sizes, tir_primfuncs,
+                                 relay_primfuncs);
+  }
+
   /*!
    * brief Call a function with a given name
    */
@@ -583,8 +716,15 @@ class AOTExecutorCodegen : public MixedModeVisitor {
     // performing the preexisting AOT executor code generation phase.
     IRModule mod = IRModule::FromExpr(func);
 
+    backend::FunctionInfo func_info;
+
+    if (memory_plan.defined()) {
+      // TODO(@electriclilies, @jroesch): remove UpdateMainWorkspaceSize
+      func_info = UpdateMainWorkspaceSize(mod, targets_, memory_plan->expr_to_storage_info);
+    }
+
     IRModule lowered_mod =
-        LowerTEPass(targets_, device_context_map, memory_plan, mod_name, [this](Function func) {
+        LowerTEPass(targets_, device_context_map, mod_name, [this](Function func) {
           // We need to maintain the constant map for external
           // functions so we pass this processing function which
           // allows us to process each function as we lower it.
@@ -676,7 +816,7 @@ class AOTExecutorCodegen : public MixedModeVisitor {
 
     Optional<Array<tvm::runtime::Module>> external_modules =
         lowered_mod->GetAttr<Array<tvm::runtime::Module>>("external_mods");
-    ICHECK(external_modules) << "Attribute \"external_modules\" should be set at this point.";
+    ICHECK(external_modules) << "Attribute \"external_mods\" should be set at this point.";
 
     // This is the point where we separate the functions in the module by target
     ret.lowered_funcs = tec::GetPerTargetModules(lowered_mod);

diff --git a/src/relay/backend/graph_executor_codegen.cc b/src/relay/backend/graph_executor_codegen.cc
@@ -222,7 +222,7 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<
     }
 
     IRModule lowered_mod =
-        LowerTEPass(targets_, device_context_map, memory_plan_, mod_name_, [this](Function func) {
+        LowerTEPass(targets_, device_context_map, mod_name_, [this](Function func) {
           // We need to maintain the constant map for external
           // functions so we pass this processing function which
           // allows us to process each function as we lower it.
@@ -241,26 +241,23 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<
     ICHECK(main_func_info) << "The attribute \"main_func_info\" should be set at this point.";
     function_metadata_.Set(runtime::symbol::tvm_module_main, main_func_info.value());
 
-    // Get only the Relay functions out of the lowered module so we can run type inference on them
-    IRModule main_module = tec::GetMainModule(lowered_mod);
-    main_module = relay::transform::InferType()(main_module);
-    relay::Function main_func = Downcast<relay::Function>(main_module->Lookup("main"));
+    Function lowered_main_func = Downcast<Function>(lowered_mod->Lookup("main"));
 
     // Now that we have lowered all operators to TIR code, we can proceed with compilation.
     //
     // We need to unfortunately re-plan as the previous results have been invalidated by lowering
     // we will fix this in future refactors.
-    memory_plan_ = GraphPlanMemory(main_func);
+    memory_plan_ = GraphPlanMemory(lowered_main_func);
 
     // The graph planner also can not handle planning calls to global variables to we must remap
 
     // First we convert all the parameters into input nodes.
-    for (auto param : main_func->params) {
+    for (auto param : lowered_main_func->params) {
       auto node_ptr = GraphInputNode::make_node_ptr(param->name_hint(), GraphAttrs());
       var_map_[param.get()] = AddNode(node_ptr, param);
     }
 
-    heads_ = VisitExpr(main_func->body);
+    heads_ = VisitExpr(lowered_main_func->body);
     std::ostringstream os;
 
     dmlc::JSONWriter writer(&os);
@@ -277,7 +274,7 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<
 
     Optional<Array<tvm::runtime::Module>> external_modules =
         lowered_mod->GetAttr<Array<tvm::runtime::Module>>("external_mods");
-    ICHECK(external_modules) << "Attribute \"external_modules\" should be set at this point.";
+    ICHECK(external_modules) << "Attribute \"external_mods\" should be set at this point.";
 
     // This is the point where we separate the functions in the module by target
     ret.lowered_funcs = tec::GetPerTargetModules(lowered_mod);