[TIR][USMP] Greedy algorithms for USMP

manupak · manupak · commit 7c93c60c9a38 · 2021-11-30T20:08:51.000Z
This commits removes commented out lines
,few trivial cleanups and few BufferInfo
based tests to check the algorithm.

Change-Id: I1a12b6a424370e9e4c4a55563dde0ad698b07ea3
diff --git a/python/tvm/tir/usmp/utils.py b/python/tvm/tir/usmp/utils.py
@@ -114,6 +114,10 @@ def __init__(
             alignment,
         )
 
+    def set_conflicts(self, conflicts: list):
+        """Sets the the conflicting array of buffer info objects"""
+        _ffi_api.BufferInfoSetConflicts(self, conflicts)
+
 
 @register_object("tir.usmp.PoolAllocation")
 class PoolAllocation(Object):
diff --git a/src/tir/usmp/algo/greedy.cc b/src/tir/usmp/algo/greedy.cc
@@ -18,7 +18,7 @@
  */
 
 /*!
- * \file tir/analysis/usmp/algo/greedy_by_size.cc
+ * \file tir/analysis/usmp/algo/greedy.cc
  * \brief This source contains greedy algorithms for planning
  * memory for USMP. There are two algorithms present here :
  * 1) greedy_by_size and 2) greedy_by_conflicts.
@@ -89,17 +89,17 @@ class GreedyBase {
    * \brief Selects a pool for placement in the given set of ordered pool candidates
    */
   PoolInfo SelectPlacementPool(
-      const Array<PoolInfo>& pool_candidates,
+      const BufferInfo& buf_info,
       const std::unordered_map<PoolInfo, size_t, ObjectPtrHash, ObjectPtrEqual>& pool_offsets) {
     // Here the pool candidates are ordered when it is consumed by the algorithm.
     // This could be from order the user has specified. However, schedulers are
     // welcome to change the order for performance reasons.
-    for (const auto& pool_info : pool_candidates) {
+    for (const auto& pool_info : buf_info->pool_candidates) {
       if (pool_offsets.count(pool_info)) {
         return pool_info;
       }
     }
-    ICHECK(false) << "TVM USMP Internal Error: no candidate have been selected!";
+    CHECK(false) << "TVM USMP Error: no candidate have been selected for " << buf_info;
     return PoolInfo();
   }
 
@@ -141,7 +141,7 @@ class GreedyBase {
           }
         }
       }
-      auto selected_pool = SelectPlacementPool(buf_info->pool_candidates, pool_offset_candidates);
+      auto selected_pool = SelectPlacementPool(buf_info, pool_offset_candidates);
       pool_allocations.Set(
           buf_info, PoolAllocation(selected_pool, Integer(pool_offset_candidates[selected_pool])));
     }
diff --git a/src/tir/usmp/analysis/extract_buffer_info.cc b/src/tir/usmp/analysis/extract_buffer_info.cc
@@ -454,7 +454,6 @@ Map<BufferInfo, tir::Stmt> BufferInfoExtractor::operator()(const PrimFunc& main_
 
   // Traverse the liveness events using a open set to track what
   // is live while updating the conflicts through out the linear traversal
-  //  std::unordered_set<BufferInfo, ObjectPtrHash, ObjectPtrEqual> open_set;
   std::unordered_map<BufferInfo, int, ObjectPtrHash, ObjectPtrEqual> open_set;
   for (const auto& le_event : le_events_timeline) {
     if (le_event.le_type == START) {
@@ -465,7 +464,6 @@ Map<BufferInfo, tir::Stmt> BufferInfoExtractor::operator()(const PrimFunc& main_
           le_event.buffer_info->conflicts.push_back(open_buffer_info);
         }
       }
-      //      open_set.insert(le_event.buffer_info);
       if (open_set.find(le_event.buffer_info) == open_set.end()) {
         open_set[le_event.buffer_info] = 1;
       } else {
@@ -477,7 +475,6 @@ Map<BufferInfo, tir::Stmt> BufferInfoExtractor::operator()(const PrimFunc& main_
       } else {
         open_set[le_event.buffer_info] -= 1;
       }
-      //      open_set.erase(le_event.buffer_info);
     }
   }
   return this->buffer_info_map_;
diff --git a/tests/python/unittest/test_tir_usmp_algo.py b/tests/python/unittest/test_tir_usmp_algo.py
@@ -51,7 +51,7 @@ def get_allocate(stmt):
     return allocates
 
 
-def assign_poolinfos_to_allocates_in_primfunc(primfunc, pool_infos):
+def _assign_poolinfos_to_allocates_in_primfunc(primfunc, pool_infos):
     """helper to assing poolinfos to allocate nodes in a tir.PrimFunc"""
 
     def set_poolinfos(stmt):
@@ -68,12 +68,12 @@ def set_poolinfos(stmt):
     return primfunc.with_body(stmt_functor.ir_transform(primfunc.body, None, set_poolinfos))
 
 
-def assign_poolinfos_to_allocates_in_irmodule(mod, pool_infos):
+def _assign_poolinfos_to_allocates_in_irmodule(mod, pool_infos):
     """helper to assing poolinfos to allocate nodes in a IRModule"""
     ret = tvm.IRModule()
     for global_var, basefunc in mod.functions.items():
         if isinstance(basefunc, tvm.tir.PrimFunc):
-            ret[global_var] = assign_poolinfos_to_allocates_in_primfunc(basefunc, pool_infos)
+            ret[global_var] = _assign_poolinfos_to_allocates_in_primfunc(basefunc, pool_infos)
     return ret
 
 
@@ -96,9 +96,204 @@ def _check_max_workspace_size(buffer_pool_allocations, pool_info, size):
     assert max_workspace_size == size
 
 
+def test_no_pool_error():
+    target = Target("c")
+    tiny_workspace_pool = usmp_utils.PoolInfo(
+        pool_name="tiny_workspace",
+        target_access={target: usmp_utils.PoolInfo.READ_WRITE_ACCESS},
+        size_hint_bytes=10,
+    )
+    bi_a = usmp_utils.BufferInfo(
+        name_hint="bi_a", size_bytes=10, pool_candidates=[tiny_workspace_pool]
+    )
+    bi_b = usmp_utils.BufferInfo(
+        name_hint="bi_b", size_bytes=10, pool_candidates=[tiny_workspace_pool]
+    )
+    bi_c = usmp_utils.BufferInfo(
+        name_hint="bi_c", size_bytes=10, pool_candidates=[tiny_workspace_pool]
+    )
+    bi_a.set_conflicts([bi_b])
+    bi_b.set_conflicts([bi_c])
+    bi_c.set_conflicts([bi_a])
+    buffer_info_arr = [bi_a, bi_b, bi_c]
+    fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.greedy_by_size")
+    with pytest.raises(
+        tvm.TVMError, match="TVM USMP Error: no candidate have been selected for BufferInfoNode"
+    ):
+        buffer_pool_allocations = fusmp_algo(buffer_info_arr)
+
+
+@pytest.mark.parametrize("algorithm", ["greedy_by_size", "greedy_by_conflicts"])
+def test_name_based_ordering(algorithm):
+    """ This checks when the size and conlicts are same a stable result is generated"""
+
+    def _test():
+        target = Target("c")
+        global_workspace_pool = usmp_utils.PoolInfo(
+            pool_name="global_workspace",
+            target_access={target: usmp_utils.PoolInfo.READ_WRITE_ACCESS},
+        )
+        bi_a = usmp_utils.BufferInfo(
+            name_hint="bi_a", size_bytes=10, pool_candidates=[global_workspace_pool]
+        )
+        bi_b = usmp_utils.BufferInfo(
+            name_hint="bi_b", size_bytes=10, pool_candidates=[global_workspace_pool]
+        )
+        bi_c = usmp_utils.BufferInfo(
+            name_hint="bi_c", size_bytes=10, pool_candidates=[global_workspace_pool]
+        )
+        bi_a.set_conflicts([bi_b])
+        bi_b.set_conflicts([bi_c])
+        bi_c.set_conflicts([bi_a])
+
+        buffer_info_arr = [bi_a, bi_b, bi_c]
+        fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
+        buffer_pool_allocations = fusmp_algo(buffer_info_arr)
+        assert buffer_pool_allocations[bi_a].byte_offset == 0
+        assert buffer_pool_allocations[bi_b].byte_offset == 20
+        assert buffer_pool_allocations[bi_c].byte_offset == 10
+
+    # This is tested for several times to check stability
+    for x in range(0, 10):
+        _test()
+
+
+@pytest.mark.parametrize(
+    ["algorithm", "workspace_size"],
+    [("greedy_by_size", 140), ("greedy_by_conflicts", 140)],
+)
+def test_linear(algorithm, workspace_size):
+    """
+    The test case here represent BufferInfo objects
+    that could get generated for a linear sequence
+    such as :
+    (Op A)
+    |
+    bi_a
+    |
+    (Op B)
+    |
+    bi_b
+    |
+    .
+    .
+    .
+    (Op F)
+    |
+    bi_f
+    """
+    target = Target("c")
+    global_workspace_pool = usmp_utils.PoolInfo(
+        pool_name="global_workspace",
+        target_access={target: usmp_utils.PoolInfo.READ_WRITE_ACCESS},
+    )
+    bi_a = usmp_utils.BufferInfo(
+        name_hint="bi_a", size_bytes=10, pool_candidates=[global_workspace_pool]
+    )
+    bi_b = usmp_utils.BufferInfo(
+        name_hint="bi_b", size_bytes=20, pool_candidates=[global_workspace_pool]
+    )
+    bi_c = usmp_utils.BufferInfo(
+        name_hint="bi_c", size_bytes=100, pool_candidates=[global_workspace_pool]
+    )
+    bi_d = usmp_utils.BufferInfo(
+        name_hint="bi_d", size_bytes=40, pool_candidates=[global_workspace_pool]
+    )
+    bi_e = usmp_utils.BufferInfo(
+        name_hint="bi_e", size_bytes=50, pool_candidates=[global_workspace_pool]
+    )
+    bi_f = usmp_utils.BufferInfo(
+        name_hint="bi_f", size_bytes=50, pool_candidates=[global_workspace_pool]
+    )
+
+    # Creating conflicts for a linear graph
+    bi_a.set_conflicts([bi_b])
+    bi_b.set_conflicts([bi_a, bi_c])
+    bi_c.set_conflicts([bi_b, bi_d])
+    bi_d.set_conflicts([bi_c, bi_e])
+    bi_e.set_conflicts([bi_d, bi_f])
+    bi_f.set_conflicts([bi_e])
+
+    buffer_info_arr = [bi_a, bi_b, bi_c, bi_d, bi_e, bi_f]
+    fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
+    buffer_pool_allocations = fusmp_algo(buffer_info_arr)
+    _check_max_workspace_size(buffer_pool_allocations, global_workspace_pool, workspace_size)
+
+
+@pytest.mark.parametrize(
+    ["algorithm", "workspace_size"],
+    [("greedy_by_size", 190), ("greedy_by_conflicts", 320)],
+)
+def test_fanout(algorithm, workspace_size):
+    """
+    The test case here represent BufferInfo objects
+    that could get generated for a fanout topology
+    such as :
+    (Op A)
+    |
+    bi_a ---------
+    |            |
+    (Op B)     (Op C)
+    |            |
+    bi_b        bi_c
+    |            |
+    (Op D)     (Op E)
+    |            |
+    bi_d        bi_e
+    |            |
+    (Op F) ------
+    |
+    bi_f
+    |
+    (Op G)
+    |
+    bi_g
+    """
+    target = Target("c")
+    global_workspace_pool = usmp_utils.PoolInfo(
+        pool_name="global_workspace",
+        target_access={target: usmp_utils.PoolInfo.READ_WRITE_ACCESS},
+    )
+    bi_a = usmp_utils.BufferInfo(
+        name_hint="bi_a", size_bytes=10, pool_candidates=[global_workspace_pool]
+    )
+    bi_b = usmp_utils.BufferInfo(
+        name_hint="bi_b", size_bytes=20, pool_candidates=[global_workspace_pool]
+    )
+    bi_c = usmp_utils.BufferInfo(
+        name_hint="bi_c", size_bytes=100, pool_candidates=[global_workspace_pool]
+    )
+    bi_d = usmp_utils.BufferInfo(
+        name_hint="bi_d", size_bytes=40, pool_candidates=[global_workspace_pool]
+    )
+    bi_e = usmp_utils.BufferInfo(
+        name_hint="bi_e", size_bytes=50, pool_candidates=[global_workspace_pool]
+    )
+    bi_f = usmp_utils.BufferInfo(
+        name_hint="bi_f", size_bytes=60, pool_candidates=[global_workspace_pool]
+    )
+    bi_g = usmp_utils.BufferInfo(
+        name_hint="bi_g", size_bytes=70, pool_candidates=[global_workspace_pool]
+    )
+
+    # Creating conflicts for a linear graph
+    bi_a.set_conflicts([bi_b, bi_c])
+    bi_b.set_conflicts([bi_a, bi_c, bi_e])
+    bi_c.set_conflicts([bi_e, bi_a, bi_b, bi_d])
+    bi_d.set_conflicts([bi_b, bi_f, bi_c, bi_e])
+    bi_e.set_conflicts([bi_c, bi_f, bi_b, bi_d])
+    bi_f.set_conflicts([bi_d, bi_e, bi_f])
+    bi_g.set_conflicts([bi_f])
+
+    buffer_info_arr = [bi_a, bi_b, bi_c, bi_d, bi_e, bi_f, bi_g]
+    fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
+    buffer_pool_allocations = fusmp_algo(buffer_info_arr)
+    _check_max_workspace_size(buffer_pool_allocations, global_workspace_pool, workspace_size)
+
+
 # fmt: off
 @tvm.script.ir_module
-class LinearStructure:
+class MobilenetStructure:
     @T.prim_func
     def tvmgen_default_fused_cast_subtract(placeholder_2: T.handle, placeholder_3: T.handle, T_subtract: T.handle) -> None:
         # function attr dict
@@ -167,22 +362,11 @@ def run_model(input: T.handle, output: T.handle) -> None:
 # fmt: on
 
 
-def print_conflicts(buffer_info_map):
-    """_verify_conflicts("sid_8", ["Conv2dOutput_7", "tensor_2"], buffer_info_map)"""
-
-    for buffer_info_name, buf_info in buffer_info_map.items():
-        conflict_str = "["
-        for conflict in buf_info.conflicts:
-            conflict_str += f'"{conflict.name_hint}", '
-        conflict_str += "]"
-        print(f'_verify_conflicts("{buffer_info_name}", {conflict_str}, buffer_info_map_names)')
-
-
 @pytest.mark.parametrize(
     ["algorithm", "fast_memory_size", "slow_memory_size"],
     [("greedy_by_size", 200704, 1418528), ("greedy_by_conflicts", 200704, 1418528)],
 )
-def test_linear(algorithm, fast_memory_size, slow_memory_size):
+def test_mobilenet_subgraph(algorithm, fast_memory_size, slow_memory_size):
     target = Target("c")
     fast_memory_pool = usmp_utils.PoolInfo(
         pool_name="fast_memory",
@@ -192,18 +376,18 @@ def test_linear(algorithm, fast_memory_size, slow_memory_size):
     slow_memory_pool = usmp_utils.PoolInfo(
         pool_name="slow_memory", target_access={target: usmp_utils.PoolInfo.READ_WRITE_ACCESS}
     )
-    tir_mod = LinearStructure
+    tir_mod = MobilenetStructure
     tir_mod = _assign_targets_to_primfuncs_irmodule(tir_mod, target)
-    tir_mod = assign_poolinfos_to_allocates_in_irmodule(
+    tir_mod = _assign_poolinfos_to_allocates_in_irmodule(
         tir_mod, [fast_memory_pool, slow_memory_pool]
     )
     main_func = tir_mod["run_model"]
     buffer_info_map = tvm.tir.usmp.analysis.extract_buffer_info(main_func, tir_mod)
 
     fcreate_array_bi = tvm.get_global_func("tir.usmp.CreateArrayBufferInfo")
     buffer_info_arr = fcreate_array_bi(buffer_info_map)
-    fusmp_algo_greedy_by_size = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
-    buffer_pool_allocations = fusmp_algo_greedy_by_size(buffer_info_arr)
+    fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
+    buffer_pool_allocations = fusmp_algo(buffer_info_arr)
 
     buffer_info_map_names = dict()
     for buf_info in buffer_info_arr:
@@ -346,22 +530,22 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast(place
 @pytest.mark.parametrize(
     ["algorithm", "workspace_size"], [("greedy_by_size", 7920256), ("greedy_by_conflicts", 7200256)]
 )
-def test_fanout(algorithm, workspace_size):
+def test_resnet_subgraph(algorithm, workspace_size):
     target = Target("c")
     global_workspace_pool = usmp_utils.PoolInfo(
         pool_name="global_workspace",
         target_access={target: usmp_utils.PoolInfo.READ_WRITE_ACCESS},
     )
     tir_mod = ResnetStructure
     tir_mod = _assign_targets_to_primfuncs_irmodule(tir_mod, target)
-    tir_mod = assign_poolinfos_to_allocates_in_irmodule(tir_mod, [global_workspace_pool])
+    tir_mod = _assign_poolinfos_to_allocates_in_irmodule(tir_mod, [global_workspace_pool])
     main_func = tir_mod["tvmgen_default_run_model"]
     buffer_info_map = tvm.tir.usmp.analysis.extract_buffer_info(main_func, tir_mod)
 
     fcreate_array_bi = tvm.get_global_func("tir.usmp.CreateArrayBufferInfo")
     buffer_info_arr = fcreate_array_bi(buffer_info_map)
-    fusmp_algo_greedy_by_size = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
-    buffer_pool_allocations = fusmp_algo_greedy_by_size(buffer_info_arr)
+    fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
+    buffer_pool_allocations = fusmp_algo(buffer_info_arr)
 
     buffer_info_map_names = dict()
     for buf_info in buffer_info_arr:

Original file line number	Diff line number	Diff line change
`@@ -114,6 +114,10 @@ def __init__(`
`114`	`114`	`alignment,`
`115`	`115`	`)`
`116`	`116`
	`117`	`+ def set_conflicts(self, conflicts: list):`
	`118`	`+ """Sets the the conflicting array of buffer info objects"""`
	`119`	`+ _ffi_api.BufferInfoSetConflicts(self, conflicts)`
	`120`	`+`
`117`	`121`
`118`	`122`	`@register_object("tir.usmp.PoolAllocation")`
`119`	`123`	`class PoolAllocation(Object):`
Original file line number	Diff line number	Diff line change
`@@ -18,7 +18,7 @@`
`18`	`18`	`*/`
`19`	`19`
`20`	`20`	`/*!`
`21`		`- * \file tir/analysis/usmp/algo/greedy_by_size.cc`
	`21`	`+ * \file tir/analysis/usmp/algo/greedy.cc`
`22`	`22`	`* \brief This source contains greedy algorithms for planning`
`23`	`23`	`* memory for USMP. There are two algorithms present here :`
`24`	`24`	`* 1) greedy_by_size and 2) greedy_by_conflicts.`
`@@ -89,17 +89,17 @@ class GreedyBase {`
`89`	`89`	`* \brief Selects a pool for placement in the given set of ordered pool candidates`
`90`	`90`	`*/`
`91`	`91`	`PoolInfo SelectPlacementPool(`
`92`		`- const Array<PoolInfo>& pool_candidates,`
	`92`	`+ const BufferInfo& buf_info,`
`93`	`93`	`const std::unordered_map<PoolInfo, size_t, ObjectPtrHash, ObjectPtrEqual>& pool_offsets) {`
`94`	`94`	`// Here the pool candidates are ordered when it is consumed by the algorithm.`
`95`	`95`	`// This could be from order the user has specified. However, schedulers are`
`96`	`96`	`// welcome to change the order for performance reasons.`
`97`		`- for (const auto& pool_info : pool_candidates) {`
	`97`	`+ for (const auto& pool_info : buf_info->pool_candidates) {`
`98`	`98`	`if (pool_offsets.count(pool_info)) {`
`99`	`99`	`return pool_info;`
`100`	`100`	`}`
`101`	`101`	`}`
`102`		`- ICHECK(false) << "TVM USMP Internal Error: no candidate have been selected!";`
	`102`	`+ CHECK(false) << "TVM USMP Error: no candidate have been selected for " << buf_info;`
`103`	`103`	`return PoolInfo();`
`104`	`104`	`}`
`105`	`105`
`@@ -141,7 +141,7 @@ class GreedyBase {`
`141`	`141`	`}`
`142`	`142`	`}`
`143`	`143`	`}`
`144`		`- auto selected_pool = SelectPlacementPool(buf_info->pool_candidates, pool_offset_candidates);`
	`144`	`+ auto selected_pool = SelectPlacementPool(buf_info, pool_offset_candidates);`
`145`	`145`	`pool_allocations.Set(`
`146`	`146`	`buf_info, PoolAllocation(selected_pool, Integer(pool_offset_candidates[selected_pool])));`
`147`	`147`	`}`
Original file line number	Diff line number	Diff line change
`@@ -454,7 +454,6 @@ Map<BufferInfo, tir::Stmt> BufferInfoExtractor::operator()(const PrimFunc& main_`
`454`	`454`
`455`	`455`	`// Traverse the liveness events using a open set to track what`
`456`	`456`	`// is live while updating the conflicts through out the linear traversal`
`457`		`- // std::unordered_set<BufferInfo, ObjectPtrHash, ObjectPtrEqual> open_set;`
`458`	`457`	`std::unordered_map<BufferInfo, int, ObjectPtrHash, ObjectPtrEqual> open_set;`
`459`	`458`	`for (const auto& le_event : le_events_timeline) {`
`460`	`459`	`if (le_event.le_type == START) {`
`@@ -465,7 +464,6 @@ Map<BufferInfo, tir::Stmt> BufferInfoExtractor::operator()(const PrimFunc& main_`
`465`	`464`	`le_event.buffer_info->conflicts.push_back(open_buffer_info);`
`466`	`465`	`}`
`467`	`466`	`}`
`468`		`- // open_set.insert(le_event.buffer_info);`
`469`	`467`	`if (open_set.find(le_event.buffer_info) == open_set.end()) {`
`470`	`468`	`open_set[le_event.buffer_info] = 1;`
`471`	`469`	`} else {`
`@@ -477,7 +475,6 @@ Map<BufferInfo, tir::Stmt> BufferInfoExtractor::operator()(const PrimFunc& main_`
`477`	`475`	`} else {`
`478`	`476`	`open_set[le_event.buffer_info] -= 1;`
`479`	`477`	`}`
`480`		`- // open_set.erase(le_event.buffer_info);`
`481`	`478`	`}`
`482`	`479`	`}`
`483`	`480`	`return this->buffer_info_map_;`