Skip to content

Commit 0c1c794

Browse files
jacobbohlinpfk-beta
authored andcommitted
[microNPU] Improve cycles estimates for memory transfers (apache#10508)
Change-Id: Idadc5f354dce42c8dbcdcbe281d324adddb41ba3
1 parent 6759536 commit 0c1c794

File tree

17 files changed

+176
-33
lines changed

17 files changed

+176
-33
lines changed

python/tvm/contrib/ethosu/cascader/block_config.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,21 @@
2828
class BlockConfig(Object):
2929
"""BlockConfig class"""
3030

31-
def __init__(self, output_shape: List[int], compute_cycles: int, output_cycles: int):
31+
def __init__(
32+
self,
33+
input_shape: List[int],
34+
output_shape: List[int],
35+
compute_cycles: int,
36+
output_cycles: int,
37+
):
3238
self.__init_handle_by_constructor__(
33-
_ffi_api.BlockConfig, output_shape, compute_cycles, output_cycles
39+
_ffi_api.BlockConfig, input_shape, output_shape, compute_cycles, output_cycles
3440
)
3541

42+
@property
43+
def input_shape(self) -> List[int]:
44+
return list(self._input_shape)
45+
3646
@property
3747
def output_shape(self) -> List[int]:
3848
return list(self._output_shape)

python/tvm/contrib/ethosu/cascader/device_config.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -551,7 +551,7 @@ def get_elementwise_block_config(
551551
)
552552
output_cycles *= reduce(lambda a, b: a * b, output_block, 1)
553553
output_cycles = int(math.ceil(output_cycles))
554-
block_config.append(BlockConfig(output_block, 0, output_cycles))
554+
block_config.append(BlockConfig(output_block, output_block, 0, output_cycles))
555555
break
556556

557557
if output_block[split_axis] == 1:
@@ -738,9 +738,10 @@ def get_valid_block_configs(
738738
ifm_channels,
739739
is_partkernel,
740740
)
741-
valid_block_configs.append(
742-
BlockConfig(output_block, compute_cycles, output_cycles)
741+
block_config = BlockConfig(
742+
input_block_shape.as_list(), output_block, compute_cycles, output_cycles
743743
)
744+
valid_block_configs.append(block_config)
744745
else:
745746
# Block config does not fit into SHRAM
746747
# Any Block config that is strictly larger than this one will also fail

python/tvm/contrib/ethosu/cascader/graph.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ def read_bytes(self):
5757
def write_bytes(self):
5858
return self._write_bytes
5959

60+
@property
61+
def block_config(self):
62+
return self._block_config
63+
6064

6165
@tvm._ffi.register_object("contrib.ethosu.cascader.Tensor")
6266
class Tensor(Object):

python/tvm/contrib/ethosu/cascader/tensor_config.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,25 @@ class MemoryRegion(Object):
5858
5959
"""
6060

61-
def __init__(self, name: str, size: int, read_bandwidth: int, write_bandwidth: int):
61+
def __init__(
62+
self,
63+
name: str,
64+
size: int,
65+
read_bandwidth: int,
66+
write_bandwidth: int,
67+
read_latency: int = 0,
68+
write_latency: int = 0,
69+
burst_length: int = 1,
70+
):
6271
self.__init_handle_by_constructor__(
63-
_ffi_api.MemoryRegion, name, size, read_bandwidth, write_bandwidth
72+
_ffi_api.MemoryRegion,
73+
name,
74+
size,
75+
read_bandwidth,
76+
write_bandwidth,
77+
read_latency,
78+
write_latency,
79+
burst_length,
6480
)
6581

6682

src/contrib/ethosu/cascader/block_config.cc

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,23 +33,28 @@ namespace ethosu {
3333
namespace cascader {
3434

3535
void BlockConfigNode::VisitAttrs(AttrVisitor* v) {
36-
Array<Integer> tmp_arr = make_array(output_shape_);
36+
Array<Integer> tmp_arr = make_array(input_shape_);
37+
v->Visit("_input_shape", &tmp_arr);
38+
tmp_arr = make_array(output_shape_);
3739
v->Visit("_output_shape", &tmp_arr);
3840
}
3941

40-
BlockConfig::BlockConfig(const std::vector<int>& output_shape, int compute_cycles,
41-
int output_cycles) {
42+
BlockConfig::BlockConfig(const std::vector<int>& input_shape, const std::vector<int>& output_shape,
43+
int compute_cycles, int output_cycles) {
4244
auto n = make_object<BlockConfigNode>();
45+
n->input_shape_ = std::move(input_shape);
4346
n->output_shape_ = std::move(output_shape);
4447
n->compute_cycles_ = compute_cycles;
4548
n->output_cycles_ = output_cycles;
4649
data_ = std::move(n);
4750
}
4851

4952
TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.BlockConfig")
50-
.set_body_typed([](Array<Integer> output_shape, int compute_cycles, int output_cycles) {
53+
.set_body_typed([](Array<Integer> input_shape, Array<Integer> output_shape, int compute_cycles,
54+
int output_cycles) {
55+
std::vector<int> vinput_shape = make_vector<int, Integer>(input_shape);
5156
std::vector<int> voutput_shape = make_vector<int, Integer>(output_shape);
52-
return BlockConfig(voutput_shape, compute_cycles, output_cycles);
57+
return BlockConfig(vinput_shape, voutput_shape, compute_cycles, output_cycles);
5358
});
5459

5560
TVM_REGISTER_NODE_TYPE(BlockConfigNode);

src/contrib/ethosu/cascader/block_config.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,12 @@ class BlockConfigNode : public Object {
4242
public:
4343
void VisitAttrs(AttrVisitor* v);
4444

45+
/*!
46+
* \brief Get the shape of input block.
47+
* \return The input shape of the block config.
48+
*/
49+
inline std::vector<int> GetInputBlockShape() const { return input_shape_; }
50+
4551
/*!
4652
* \brief Get the shape of output block.
4753
* \return The output shape of the block config.
@@ -66,6 +72,8 @@ class BlockConfigNode : public Object {
6672
protected:
6773
friend class BlockConfig;
6874

75+
/*! \brief The shape of the input block */
76+
std::vector<int> input_shape_;
6977
/*! \brief The shape of the output block */
7078
std::vector<int> output_shape_;
7179
/*! \brief Cycles required to compute this block */
@@ -80,7 +88,8 @@ class BlockConfigNode : public Object {
8088
*/
8189
class BlockConfig : public ObjectRef {
8290
public:
83-
BlockConfig(const std::vector<int>& output_shape, int compute_cycles, int output_cycles);
91+
BlockConfig(const std::vector<int>& input_shape, const std::vector<int>& output_shape,
92+
int compute_cycles, int output_cycles);
8493

8594
TVM_DEFINE_OBJECT_REF_METHODS(BlockConfig, ObjectRef, BlockConfigNode);
8695
};

src/contrib/ethosu/cascader/graph.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ void PerformanceInfoNode::VisitAttrs(AttrVisitor* v) {
4242
Array<IntImm> tmp_reads = make_array(read_bytes);
4343
v->Visit("_read_bytes", &tmp_reads);
4444
v->Visit("_write_bytes", &write_bytes);
45+
v->Visit("_block_config", &block_config);
4546
}
4647

4748
TVM_REGISTER_NODE_TYPE(PerformanceInfoNode);

src/contrib/ethosu/cascader/graph.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include <utility>
3434
#include <vector>
3535

36+
#include "block_config.h"
3637
#include "propagator.h"
3738

3839
namespace tvm {
@@ -71,6 +72,8 @@ class PerformanceInfoNode : public Object {
7172
std::vector<int64_t> read_bytes;
7273
/*! \brief The number of bytes written to the output tensor */
7374
int64_t write_bytes;
75+
/*! \brief The block config used for this performance point */
76+
BlockConfig block_config;
7477

7578
static constexpr const char* _type_key = "contrib.ethosu.cascader.PerformanceInfo";
7679
TVM_DECLARE_FINAL_OBJECT_INFO(PerformanceInfoNode, Object);
@@ -85,11 +88,13 @@ class PerformanceInfoNode : public Object {
8588
*/
8689
class PerformanceInfo : public ObjectRef {
8790
public:
88-
PerformanceInfo(int64_t compute_cycles, std::vector<int64_t> read_bytes, int64_t write_bytes) {
91+
PerformanceInfo(int64_t compute_cycles, std::vector<int64_t> read_bytes, int64_t write_bytes,
92+
BlockConfig block_config) {
8993
auto n = make_object<PerformanceInfoNode>();
9094
n->compute_cycles = compute_cycles;
9195
n->read_bytes = std::move(read_bytes);
9296
n->write_bytes = write_bytes;
97+
n->block_config = block_config;
9398
data_ = std::move(n);
9499
}
95100

src/contrib/ethosu/cascader/parts/ethosu.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ const std::vector<int64_t> EthosuPartNode::GetBytesRead(const std::vector<int>&
5757
for (const auto& input_block_config : input_block_configs) {
5858
std::map<std::vector<int>, int> input_blocks = CountStripes(input_block_config, false);
5959
for (const auto& block : input_blocks) {
60-
bytes_per_input[i] += mul_reduce(block.first) * block.second;
60+
bytes_per_input[i] +=
61+
mul_reduce(block.first) * block.second * input_tensors_[i]->GetDataType().bytes();
6162
}
6263
i++;
6364
}
@@ -136,7 +137,7 @@ const PerformanceInfo EthosuPartNode::GetPerformanceInfo(const StripeConfig& out
136137
total_cycles = (block_compute_cycles * num_blocks) + block_output_cycles;
137138
}
138139

139-
PerformanceInfo info(total_cycles, read_bytes, write_bytes);
140+
PerformanceInfo info(total_cycles, read_bytes, write_bytes, block_config);
140141
return info;
141142
}
142143

src/contrib/ethosu/cascader/parts/inline.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <utility>
2424
#include <vector>
2525

26+
#include "../block_config.h"
2627
#include "../common.h"
2728

2829
namespace tvm {
@@ -33,7 +34,8 @@ namespace cascader {
3334
const PerformanceInfo InlinePartNode::GetPerformanceInfo(const StripeConfig& output_stripe_config,
3435
BufferMode buffer_mode) {
3536
std::vector<int64_t> read_bytes(input_tensors_.size());
36-
PerformanceInfo info(0, read_bytes, 0);
37+
BlockConfig block_config = BlockConfig(std::vector<int>(1, 1), std::vector<int>(1, 1), 0, 0);
38+
PerformanceInfo info(0, read_bytes, 0, block_config);
3739
return info;
3840
}
3941

0 commit comments

Comments
 (0)