Skip to content

Commit aafc5e7

Browse files
committed
[FFI] Optimize atomic decref in Object (apache#18077)
1 parent 5738bde commit aafc5e7

File tree

2 files changed

+30
-31
lines changed

2 files changed

+30
-31
lines changed

include/tvm/ffi/base_details.h

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -139,34 +139,6 @@ namespace tvm {
139139
namespace ffi {
140140
namespace details {
141141

142-
/********** Atomic Operations *********/
143-
144-
TVM_FFI_INLINE int32_t AtomicIncrementRelaxed(int32_t* ptr) {
145-
#ifdef _MSC_VER
146-
return _InterlockedIncrement(reinterpret_cast<volatile long*>(ptr)) - 1; // NOLINT(*)
147-
#else
148-
return __atomic_fetch_add(ptr, 1, __ATOMIC_RELAXED);
149-
#endif
150-
}
151-
152-
TVM_FFI_INLINE int32_t AtomicDecrementRelAcq(int32_t* ptr) {
153-
#ifdef _MSC_VER
154-
return _InterlockedDecrement(reinterpret_cast<volatile long*>(ptr)) + 1; // NOLINT(*)
155-
#else
156-
return __atomic_fetch_sub(ptr, 1, __ATOMIC_ACQ_REL);
157-
#endif
158-
}
159-
160-
TVM_FFI_INLINE int32_t AtomicLoadRelaxed(const int32_t* ptr) {
161-
int32_t* raw_ptr = const_cast<int32_t*>(ptr);
162-
#ifdef _MSC_VER
163-
// simply load the variable ptr out
164-
return (reinterpret_cast<const volatile long*>(raw_ptr))[0]; // NOLINT(*)
165-
#else
166-
return __atomic_load_n(raw_ptr, __ATOMIC_RELAXED);
167-
#endif
168-
}
169-
170142
// for each iterator
171143
template <bool stop, std::size_t I, typename F>
172144
struct for_each_dispatcher {

include/tvm/ffi/object.h

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,14 @@ class Object {
187187
* \return The usage count of the cell.
188188
* \note We use stl style naming to be consistent with known API in shared_ptr.
189189
*/
190-
int32_t use_count() const { return details::AtomicLoadRelaxed(&(header_.ref_counter)); }
190+
int32_t use_count() const {
191+
// only need relaxed load of counters
192+
#ifdef _MSC_VER
193+
return (reinterpret_cast<const volatile long*>(&header_.ref_counter))[0]; // NOLINT(*)
194+
#else
195+
return __atomic_load_n(&(header_.ref_counter), __ATOMIC_RELAXED);
196+
#endif
197+
}
191198

192199
// Information about the object
193200
static constexpr const char* _type_key = "object.Object";
@@ -220,15 +227,35 @@ class Object {
220227

221228
private:
222229
/*! \brief increase reference count */
223-
void IncRef() { details::AtomicIncrementRelaxed(&(header_.ref_counter)); }
230+
void IncRef() {
231+
#ifdef _MSC_VER
232+
_InterlockedIncrement(reinterpret_cast<volatile long*>(&header_.ref_counter)); // NOLINT(*)
233+
#else
234+
__atomic_fetch_add(&(header_.ref_counter), 1, __ATOMIC_RELAXED);
235+
#endif
236+
}
224237

225238
/*! \brief decrease reference count and delete the object */
226239
void DecRef() {
227-
if (details::AtomicDecrementRelAcq(&(header_.ref_counter)) == 1) {
240+
#ifdef _MSC_VER
241+
if (_InterlockedDecrement( //
242+
reinterpret_cast<volatile long*>(&header_.ref_counter)) == 0) { // NOLINT(*)
243+
// full barrrier is implicit in InterlockedDecrement
244+
if (header_.deleter != nullptr) {
245+
header_.deleter(&(this->header_));
246+
}
247+
}
248+
#else
249+
// first do a release, note we only need to acquire for deleter
250+
if (__atomic_fetch_sub(&(header_.ref_counter), 1, __ATOMIC_RELEASE) == 1) {
251+
// only acquire when we need to call deleter
252+
// in this case we need to ensure all previous writes are visible
253+
__atomic_thread_fence(__ATOMIC_ACQUIRE);
228254
if (header_.deleter != nullptr) {
229255
header_.deleter(&(this->header_));
230256
}
231257
}
258+
#endif
232259
}
233260

234261
// friend classes

0 commit comments

Comments
 (0)