Skip to content

Commit f121e5e

Browse files
authored
[Hexagon] [runtime] VTCM Allocator (#12947)
Adds a VTCM Memory Pool class, which allocates the largest contiguous buffer possible within 1 page upon construction. Allocations and free space are maintained in two lists. Buffers that align on 2k size boundaries will choose the smallest open buffer which will satisfy the request. Non-aligned buffers will be allocated from the end of the free space. HexagonBuffer will use this pool to service VTCM scope requests, replacing the individual calls to allocated the memory on separate pages. The pool is created and destroyed in the device API Acquire/ReleaseResources. Adds unit tests to exercise edge cases.
1 parent fa17da2 commit f121e5e

File tree

8 files changed

+420
-35
lines changed

8 files changed

+420
-35
lines changed

src/runtime/hexagon/hexagon_buffer.cc

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
#include <string>
2525
#include <utility>
2626

27-
#include "HAP_compute_res.h"
2827
#include "hexagon_common.h"
28+
#include "hexagon_device_api.h"
2929

3030
namespace tvm {
3131
namespace runtime {
@@ -57,35 +57,26 @@ struct DDRAllocation : public Allocation {
5757

5858
struct VTCMAllocation : public Allocation {
5959
VTCMAllocation(size_t nbytes, size_t alignment) : Allocation(nbytes, alignment) {
60-
compute_res_attr_t res_info;
61-
HEXAGON_SAFE_CALL(HAP_compute_res_attr_init(&res_info));
62-
63-
// allocate nbytes of vtcm on a single page
64-
HEXAGON_SAFE_CALL(HAP_compute_res_attr_set_vtcm_param(&res_info, /*vtcm_size = */ nbytes,
65-
/*b_single_page = */ 0));
66-
67-
// TODO(HWE): Investigate why a non-zero timeout results in
68-
// hanging, both in the simulator and on hardware.
69-
context_id_ = HAP_compute_res_acquire(&res_info, /*timeout = */ 0);
70-
71-
if (context_id_) {
72-
data_ = HAP_compute_res_attr_get_vtcm_ptr(&res_info);
73-
if (!data_) {
74-
LOG(ERROR) << "ERROR: HAP_compute_res_acquire returned nullptr when allocating VTCM.";
75-
HEXAGON_SAFE_CALL(HAP_compute_res_release(context_id_));
76-
return;
77-
}
78-
} else {
79-
LOG(FATAL) << "FATAL: HAP_compute_res_acquire failed to acquire requested VTCM resource.";
80-
throw std::runtime_error(
81-
"HAP_compute_res_acquire failed to acquire requested VTCM resource.");
60+
// TODO(HWE): Handle alignments greater than 2k
61+
CHECK(alignment <= 0x800) << "VTCMAllocation called for invalid alignment";
62+
if ((nbytes & 0x7FF) && ((alignment & 0x7FF) == 0)) {
63+
// Caller has requested 2k alignment, but the size is not a multiple of 2k
64+
// Adjust size to be a multiple of 2k so that we will allocate from the front of the pool
65+
nbytes = nbytes >> 11;
66+
nbytes = nbytes << 11;
67+
nbytes += 0x800;
68+
DLOG(INFO) << "VTCMAllocation size adjusted for alignment " << allocation_nbytes_ << " to "
69+
<< nbytes;
70+
allocation_nbytes_ = nbytes;
8271
}
72+
data_ = HexagonDeviceAPI::Global()->VtcmPool()->Allocate(allocation_nbytes_);
73+
DLOG(INFO) << "VTCMAllocation " << data_ << " " << allocation_nbytes_ << " " << alignment;
8374
}
8475
~VTCMAllocation() {
85-
HEXAGON_SAFE_CALL(HAP_compute_res_release(context_id_));
76+
DLOG(INFO) << "~VTCMAllocation " << data_ << " " << allocation_nbytes_;
77+
HexagonDeviceAPI::Global()->VtcmPool()->Free(data_, allocation_nbytes_);
8678
data_ = nullptr;
8779
}
88-
unsigned int context_id_{0};
8980
};
9081

9182
template <HexagonBuffer::StorageScope S>

src/runtime/hexagon/hexagon_device_api.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333

3434
#include "../workspace_pool.h"
3535
#include "hexagon_common.h"
36-
#include "hexagon_user_dma.h"
3736

3837
namespace tvm {
3938
namespace runtime {

src/runtime/hexagon/hexagon_device_api.h

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "hexagon_buffer_manager.h"
3434
#include "hexagon_thread_manager.h"
3535
#include "hexagon_user_dma.h"
36+
#include "hexagon_vtcm_pool.h"
3637

3738
namespace tvm {
3839
namespace runtime {
@@ -54,37 +55,37 @@ class HexagonDeviceAPI final : public DeviceAPI {
5455

5556
//! \brief Ensures resource managers are in a good state for the runtime
5657
void AcquireResources() {
58+
CHECK_EQ(runtime_vtcm, nullptr);
59+
runtime_vtcm = std::make_unique<HexagonVtcmPool>();
60+
5761
CHECK_EQ(runtime_hexbuffs, nullptr);
5862
runtime_hexbuffs = std::make_unique<HexagonBufferManager>();
59-
DLOG(INFO) << "runtime_hexbuffs created";
6063
mgr = runtime_hexbuffs.get();
6164

6265
CHECK_EQ(runtime_threads, nullptr);
6366
runtime_threads = std::make_unique<HexagonThreadManager>(threads, stack_size, pipe_size);
64-
DLOG(INFO) << "runtime_threads created";
6567

6668
CHECK_EQ(runtime_dma, nullptr);
6769
runtime_dma = std::make_unique<HexagonUserDMA>();
68-
DLOG(INFO) << "runtime_dma created";
6970
}
7071

7172
//! \brief Ensures all runtime resources are freed
7273
void ReleaseResources() {
7374
CHECK(runtime_dma) << "runtime_dma was not created in AcquireResources";
7475
runtime_dma.reset();
75-
DLOG(INFO) << "runtime_dma reset";
7676

7777
CHECK(runtime_threads) << "runtime_threads was not created in AcquireResources";
7878
runtime_threads.reset();
79-
DLOG(INFO) << "runtime_threads reset";
8079

8180
CHECK(runtime_hexbuffs) << "runtime_hexbuffs was not created in AcquireResources";
8281
if (runtime_hexbuffs && !runtime_hexbuffs->empty()) {
83-
DLOG(INFO) << "runtime_hexbuffs was not empty in ReleaseResources";
82+
LOG(INFO) << "runtime_hexbuffs was not empty in ReleaseResources";
8483
}
8584
mgr = &hexbuffs;
86-
DLOG(INFO) << "runtime_hexbuffs reset";
8785
runtime_hexbuffs.reset();
86+
87+
CHECK(runtime_vtcm) << "runtime_vtcm was not created in AcquireResources";
88+
runtime_vtcm.reset();
8889
}
8990

9091
/*! \brief Currently unimplemented interface to specify the active
@@ -168,6 +169,11 @@ class HexagonDeviceAPI final : public DeviceAPI {
168169
return runtime_dma.get();
169170
}
170171

172+
HexagonVtcmPool* VtcmPool() {
173+
CHECK(runtime_vtcm) << "runtime_vtcm has not been created";
174+
return runtime_vtcm.get();
175+
}
176+
171177
protected:
172178
//! Standard Device API interface to copy data from one storage to another.
173179
void CopyDataFromTo(const void* from, size_t from_offset, void* to, size_t to_offset, size_t size,
@@ -202,6 +208,9 @@ class HexagonDeviceAPI final : public DeviceAPI {
202208

203209
//! \brief User DMA manager
204210
std::unique_ptr<HexagonUserDMA> runtime_dma;
211+
212+
//! \brief VTCM memory manager
213+
std::unique_ptr<HexagonVtcmPool> runtime_vtcm;
205214
};
206215
} // namespace hexagon
207216
} // namespace runtime
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
#include "hexagon_vtcm_pool.h"
20+
21+
#include "HAP_compute_res.h"
22+
#include "hexagon_common.h"
23+
24+
namespace tvm {
25+
namespace runtime {
26+
namespace hexagon {
27+
28+
HexagonVtcmPool::HexagonVtcmPool() {
29+
compute_res_attr_t res_info;
30+
HEXAGON_SAFE_CALL(HAP_compute_res_attr_init(&res_info));
31+
32+
// TODO(HWE): get the max and min size programmatically
33+
const unsigned int max_size = 4 * 1024 * 1024;
34+
const unsigned int min_size = 1024 * 1024;
35+
36+
// allocate nbytes of vtcm on a single page
37+
HEXAGON_SAFE_CALL(HAP_compute_res_attr_set_vtcm_param_v2(&res_info,
38+
/*vtcm_size = */ max_size,
39+
/*min_page_size = */ 1,
40+
/*min_vtcm_size = */ min_size));
41+
42+
// TODO(HWE): Investigate why a non-zero timeout results in
43+
// hanging, both in the simulator and on hardware.
44+
context_id_ = HAP_compute_res_acquire(&res_info, /*timeout = */ 0);
45+
CHECK(context_id_) << "HAP_compute_res_acquire failed to acquire requested VTCM resource.";
46+
HEXAGON_SAFE_CALL(HAP_compute_res_attr_get_vtcm_ptr_v2(&res_info, &vtcm_data_, &vtcm_size_));
47+
CHECK(vtcm_data_ != nullptr) << "HAP_compute_res_acquire returned nullptr when allocating VTCM.";
48+
CHECK(vtcm_size_ >= min_size)
49+
<< "HAP_compute_res_acquire failed to allocate minimum amount of VTCM";
50+
free_.emplace_back(std::pair<char*, size_t>(static_cast<char*>(vtcm_data_), vtcm_size_));
51+
// DebugDump();
52+
}
53+
54+
HexagonVtcmPool::~HexagonVtcmPool() { HEXAGON_SAFE_CALL(HAP_compute_res_release(context_id_)); }
55+
56+
void* HexagonVtcmPool::Allocate(size_t nbytes) {
57+
std::lock_guard<std::mutex> lock(mutex_);
58+
59+
CHECK(!free_.empty()) << "No free VTCM";
60+
61+
// If this is not aligned on a 2k block, allocate from the end to avoid fragmentation
62+
if (nbytes & size_t(0x7FF)) {
63+
DLOG(INFO) << "VTCM nbytes requested: " << nbytes << " allocate from the end";
64+
auto last_free_entry = free_.rbegin();
65+
CHECK(last_free_entry->second >= nbytes)
66+
<< "Not enough contiguous VTCM space at the end to allocate";
67+
char* ptr = last_free_entry->first + (last_free_entry->second - nbytes);
68+
allocations_.emplace_back(std::pair<char*, size_t>(ptr, nbytes));
69+
last_free_entry->second -= nbytes;
70+
// DebugDump();
71+
return ptr;
72+
}
73+
74+
auto entry_to_allocate = free_.begin();
75+
for (auto it = free_.begin(); it != free_.end(); it++) {
76+
if ((it->second < entry_to_allocate->second) && (it->second >= nbytes)) {
77+
entry_to_allocate = it;
78+
if (entry_to_allocate->second == nbytes) {
79+
break;
80+
}
81+
}
82+
}
83+
CHECK(entry_to_allocate->second >= nbytes) << "Not enough contiguous VTCM space to allocate";
84+
char* ptr = entry_to_allocate->first;
85+
allocations_.emplace(allocations_.end(), std::pair<char*, size_t>(ptr, nbytes));
86+
87+
if (entry_to_allocate->second == nbytes) {
88+
free_.erase(entry_to_allocate);
89+
} else {
90+
entry_to_allocate->first = entry_to_allocate->first + nbytes;
91+
entry_to_allocate->second = entry_to_allocate->second - nbytes;
92+
}
93+
// DebugDump();
94+
return ptr;
95+
}
96+
97+
void HexagonVtcmPool::Free(void* ptr, size_t nbytes) {
98+
char* ptr_to_free = static_cast<char*>(ptr);
99+
std::lock_guard<std::mutex> lock(mutex_);
100+
101+
auto it = std::find_if(allocations_.begin(), allocations_.end(),
102+
[&](auto entry) { return entry.first == ptr_to_free; });
103+
CHECK(it != allocations_.end()) << "Attempted to free a pointer that had not been allocated";
104+
CHECK(it->second == nbytes) << "Attempted to free a different size than was allocated";
105+
allocations_.erase(it);
106+
107+
it = std::lower_bound(free_.begin(), free_.end(), std::pair<char*, size_t>(ptr_to_free, nbytes),
108+
[](auto p, auto q) { return p.first <= q.first; });
109+
if (it == free_.end()) {
110+
// Insert an entry at the end
111+
it = free_.emplace(it, std::pair<char*, size_t>(ptr_to_free, nbytes));
112+
} else {
113+
CHECK(ptr_to_free != it->first) << "Attempting to free a pointer that was already free";
114+
CHECK(ptr_to_free + nbytes <= it->first)
115+
<< "free_ is in an inconsistent state, freed block overlaps with next";
116+
if (ptr_to_free + nbytes == it->first) {
117+
// Make this entry bigger
118+
it->first = ptr_to_free;
119+
it->second += nbytes;
120+
} else {
121+
// Insert an entry before this
122+
it = free_.emplace(it, std::pair<char*, size_t>(ptr_to_free, nbytes));
123+
}
124+
}
125+
126+
// Check for overlap with the previous entry
127+
if (it != free_.begin()) {
128+
auto it_prev = it;
129+
it_prev--;
130+
CHECK(it_prev->first + it_prev->second <= ptr_to_free)
131+
<< "free_ is in an inconsistent state, freed block overlaps with previous";
132+
if (it_prev->first + it_prev->second == ptr_to_free) {
133+
it_prev->second += it->second;
134+
free_.erase(it);
135+
}
136+
}
137+
// DebugDump();
138+
}
139+
140+
void HexagonVtcmPool::DebugDump() {
141+
LOG(INFO) << "VTCM list state";
142+
for (auto entry : allocations_) {
143+
LOG(INFO) << "VTCM alloc: " << static_cast<void*>(entry.first) << " " << entry.second;
144+
}
145+
for (auto entry : free_) {
146+
LOG(INFO) << "VTCM free: " << static_cast<void*>(entry.first) << " " << entry.second;
147+
}
148+
}
149+
150+
} // namespace hexagon
151+
} // namespace runtime
152+
} // namespace tvm
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#ifndef TVM_RUNTIME_HEXAGON_HEXAGON_VTCM_POOL_H_
21+
#define TVM_RUNTIME_HEXAGON_HEXAGON_VTCM_POOL_H_
22+
23+
#include <tvm/runtime/c_runtime_api.h>
24+
#include <tvm/runtime/device_api.h>
25+
#include <tvm/runtime/logging.h>
26+
#include <tvm/runtime/ndarray.h>
27+
#include <tvm/runtime/packed_func.h>
28+
29+
#include <utility>
30+
#include <vector>
31+
32+
namespace tvm {
33+
namespace runtime {
34+
namespace hexagon {
35+
36+
class HexagonVtcmPool {
37+
public:
38+
//! \brief Allocates all of VTCM memory, and manages allocations from the runtime
39+
HexagonVtcmPool();
40+
41+
//! \brief Destruction deallocates the underlying VTCM allocation.
42+
~HexagonVtcmPool();
43+
44+
//! \brief Prevent copy construction of HexagonVtcmPool.
45+
HexagonVtcmPool(const HexagonVtcmPool&) = delete;
46+
47+
//! \brief Prevent copy assignment with HexagonVtcmPool.
48+
HexagonVtcmPool& operator=(const HexagonVtcmPool&) = delete;
49+
50+
//! \brief Prevent move construction.
51+
HexagonVtcmPool(HexagonVtcmPool&&) = delete;
52+
53+
//! \brief Prevent move assignment.
54+
HexagonVtcmPool& operator=(HexagonVtcmPool&&) = delete;
55+
56+
/* \brief Allocate memory from the VTCM manager
57+
*
58+
* \param nbytes The number of bytes to allocate.
59+
*/
60+
void* Allocate(size_t nbytes);
61+
62+
/* \brief Copy data from a Hexagon Buffer an external buffer.
63+
*
64+
* \param ptr The pointer to the buffer to be freed.
65+
*
66+
* \param nbytes The number of bytes to be freed.
67+
*/
68+
void Free(void* ptr, size_t nbytes);
69+
70+
//! \brief Returns the total number of bytes in this pool
71+
size_t TotalBytes() { return reinterpret_cast<size_t>(vtcm_size_); }
72+
73+
private:
74+
//! \brief Context for HAP_compute_res_*
75+
unsigned int vtcm_size_;
76+
77+
//! \brief Context for HAP_compute_res_*
78+
void* vtcm_data_;
79+
80+
//! \brief Context for HAP_compute_res_*
81+
unsigned int context_id_{0};
82+
83+
//! \brief List of allocations
84+
std::vector<std::pair<char*, size_t>> allocations_;
85+
86+
//! \brief List of free segments
87+
std::vector<std::pair<char*, size_t>> free_;
88+
89+
//! \brief Mutext to protect access to the lists
90+
std::mutex mutex_;
91+
92+
//! \brief Debug only dump of the state of the lists
93+
void DebugDump();
94+
};
95+
96+
} // namespace hexagon
97+
} // namespace runtime
98+
} // namespace tvm
99+
100+
#endif // TVM_RUNTIME_HEXAGON_HEXAGON_VTCM_POOL_H_

0 commit comments

Comments
 (0)