From 749d0a55a4265562da110a3d0cac398337440c39 Mon Sep 17 00:00:00 2001 From: Lukas Tenbrink Date: Thu, 19 Dec 2024 22:58:54 +0100 Subject: [PATCH] Optimize / refactor `CowData`, combining resize and fork to avoid unnecessary reallocations. --- core/templates/cowdata.h | 238 +++++++++++++++++++-------------------- 1 file changed, 119 insertions(+), 119 deletions(-) diff --git a/core/templates/cowdata.h b/core/templates/cowdata.h index ce32a9033866..ea0438548f4b 100644 --- a/core/templates/cowdata.h +++ b/core/templates/cowdata.h @@ -104,14 +104,6 @@ class CowData { // internal helpers - static _FORCE_INLINE_ SafeNumeric *_get_refcount_ptr(uint8_t *p_ptr) { - return (SafeNumeric *)(p_ptr + REF_COUNT_OFFSET); - } - - static _FORCE_INLINE_ USize *_get_size_ptr(uint8_t *p_ptr) { - return (USize *)(p_ptr + SIZE_OFFSET); - } - static _FORCE_INLINE_ T *_get_data_ptr(uint8_t *p_ptr) { return (T *)(p_ptr + DATA_OFFSET); } @@ -132,11 +124,11 @@ class CowData { return (USize *)((uint8_t *)_ptr - DATA_OFFSET + SIZE_OFFSET); } - _FORCE_INLINE_ USize _get_alloc_size(USize p_elements) const { + _FORCE_INLINE_ static USize _get_alloc_size(USize p_elements) { return next_po2(p_elements * sizeof(T)); } - _FORCE_INLINE_ bool _get_alloc_size_checked(USize p_elements, USize *out) const { + _FORCE_INLINE_ static bool _get_alloc_size_checked(USize p_elements, USize *out) { if (unlikely(p_elements == 0)) { *out = 0; return true; @@ -160,11 +152,31 @@ class CowData { return *out; } + // Decrements the reference count. Deallocates the backing buffer if needed. + // After this function, _ptr is guaranteed to be NULL. void _unref(); void _ref(const CowData *p_from); void _ref(const CowData &p_from); - USize _copy_on_write(); - Error _realloc(Size p_alloc_size); + + // Ensures that the backing buffer is at least p_size wide, and that + // this CowData instance is the only reference to it. The buffer is + // populated with as element copies from the old array as possible. + // It is the responsibility of the caller to populate newly allocated + // space up to p_size. + Error _fork_allocate(USize p_size); + Error _copy_on_write() { return _fork_allocate(size()); } + + // Allocates a backing array of the given capacity. The reference count is + // initialized to 1. + // It is the responsibility of the caller to populate the array and the new + // size property. + Error _alloc(USize p_alloc_size); + // Re-allocates the backing array to the given capacity. The reference count is + // initialized to 1. + // It is the responsibility of the caller to populate the array and the new + // size property. + // The caller must also make sure that we are the only reference to the data. + Error _realloc(USize p_alloc_size); public: void operator=(const CowData &p_from) { _ref(p_from); } @@ -196,7 +208,7 @@ class CowData { } } - _FORCE_INLINE_ void clear() { resize(0); } + _FORCE_INLINE_ void clear() { _unref(); } _FORCE_INLINE_ bool is_empty() const { return _ptr == nullptr; } _FORCE_INLINE_ void set(Size p_index, const T &p_elem) { @@ -250,7 +262,7 @@ class CowData { Size count(const T &p_val) const; _FORCE_INLINE_ CowData() {} - _FORCE_INLINE_ ~CowData(); + _FORCE_INLINE_ ~CowData() { _unref(); } _FORCE_INLINE_ CowData(const CowData &p_from) { _ref(p_from); } _FORCE_INLINE_ CowData(CowData &&p_from) { _ptr = p_from._ptr; @@ -266,62 +278,99 @@ void CowData::_unref() { SafeNumeric *refc = _get_refcount(); if (refc->decrement() > 0) { - return; // still in use + // Data is still in use. + _ptr = nullptr; + return; } - // clean up + + // We had the only reference; destroy the data. if constexpr (!std::is_trivially_destructible_v) { + // Call Destructors. USize current_size = *_get_size(); - for (USize i = 0; i < current_size; ++i) { - // call destructors - T *t = &_ptr[i]; - t->~T(); + _ptr[i].~T(); } } - // free mem - Memory::free_static(((uint8_t *)_ptr) - DATA_OFFSET, false); + // Free Memory. + Memory::free_static((uint8_t *)_ptr - DATA_OFFSET, false); + _ptr = nullptr; } template -typename CowData::USize CowData::_copy_on_write() { - if (!_ptr) { - return 0; +Error CowData::_fork_allocate(USize p_size) { + if (p_size == 0) { + // Wants to clean up. + _unref(); + return OK; } - SafeNumeric *refc = _get_refcount(); - - USize rc = refc->get(); - if (unlikely(rc > 1)) { - /* in use by more than me */ - USize current_size = *_get_size(); + USize alloc_size; + ERR_FAIL_COND_V(!_get_alloc_size_checked(p_size, &alloc_size), ERR_OUT_OF_MEMORY); - uint8_t *mem_new = (uint8_t *)Memory::alloc_static(_get_alloc_size(current_size) + DATA_OFFSET, false); - ERR_FAIL_NULL_V(mem_new, 0); + const USize prev_size = size(); - SafeNumeric *_refc_ptr = _get_refcount_ptr(mem_new); - USize *_size_ptr = _get_size_ptr(mem_new); - T *_data_ptr = _get_data_ptr(mem_new); + if (!_ptr) { + // We had no data before; just allocate a new array. + const Error error = _alloc(alloc_size); + if (error) { + _ptr = nullptr; + return error; + } + } else if (_get_refcount()->get() == 1) { + // Resize in-place. + if (p_size == prev_size) { + // We can shortcut here; we don't need to do anything. + return OK; + } - new (_refc_ptr) SafeNumeric(1); //refcount - *(_size_ptr) = current_size; //size + // Destroy extraneous elements. + if constexpr (!std::is_trivially_destructible_v) { + for (USize i = prev_size; i > p_size; i--) { + _ptr[i - 1].~T(); + } + } - // initialize new elements - if constexpr (std::is_trivially_copyable_v) { - memcpy((uint8_t *)_data_ptr, _ptr, current_size * sizeof(T)); - } else { - for (USize i = 0; i < current_size; i++) { - memnew_placement(&_data_ptr[i], T(_ptr[i])); + if (alloc_size != _get_alloc_size(prev_size)) { + const Error error = _realloc(alloc_size); + if (error) { + _ptr = nullptr; + return error; } } + } else { + // Resize by forking. + + // Create a temporary CowData to hold ownership over our _ptr. + // It will be used to copy elements from the old buffer over to our new buffer. + // At the end of the block, it will be automatically destructed by going out of scope. + const CowData prev_data; + prev_data._ptr = _ptr; + _ptr = nullptr; - _unref(); - _ptr = _data_ptr; + const Error error = _alloc(alloc_size); + if (error) { + return error; + } - rc = 1; + // Copy over elements. + const USize copied_element_count = MIN(prev_size, p_size); + if (copied_element_count > 0) { + if constexpr (std::is_trivially_copyable_v) { + memcpy((uint8_t *)_ptr, (uint8_t *)prev_data._ptr, copied_element_count * sizeof(T)); + } else { + for (USize i = 0; i < copied_element_count; i++) { + memnew_placement(&_ptr[i], T(prev_data._ptr[i])); + } + } + } } - return rc; + + // Set our new size. + *_get_size() = p_size; + + return OK; } template @@ -329,95 +378,52 @@ template Error CowData::resize(Size p_size) { ERR_FAIL_COND_V(p_size < 0, ERR_INVALID_PARAMETER); - Size current_size = size(); - - if (p_size == current_size) { + const Size prev_size = size(); + if (p_size == prev_size) { return OK; } - if (p_size == 0) { - // wants to clean up - _unref(); - _ptr = nullptr; - return OK; + const Error error = _fork_allocate(p_size); + if (error) { + return error; } - // possibly changing size, copy on write - _copy_on_write(); - - USize current_alloc_size = _get_alloc_size(current_size); - USize alloc_size; - ERR_FAIL_COND_V(!_get_alloc_size_checked(p_size, &alloc_size), ERR_OUT_OF_MEMORY); - - if (p_size > current_size) { - if (alloc_size != current_alloc_size) { - if (current_size == 0) { - // alloc from scratch - uint8_t *mem_new = (uint8_t *)Memory::alloc_static(alloc_size + DATA_OFFSET, false); - ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY); - - SafeNumeric *_refc_ptr = _get_refcount_ptr(mem_new); - USize *_size_ptr = _get_size_ptr(mem_new); - T *_data_ptr = _get_data_ptr(mem_new); - - new (_refc_ptr) SafeNumeric(1); //refcount - *(_size_ptr) = 0; //size, currently none - - _ptr = _data_ptr; - - } else { - const Error error = _realloc(alloc_size); - if (error) { - return error; - } - } - } - - // construct the newly created elements - + if (p_size > prev_size) { + // Construct missing elements. if constexpr (!std::is_trivially_constructible_v) { - for (Size i = *_get_size(); i < p_size; i++) { + for (Size i = prev_size; i < p_size; i++) { memnew_placement(&_ptr[i], T); } } else if (p_ensure_zero) { - memset((void *)(_ptr + current_size), 0, (p_size - current_size) * sizeof(T)); + memset((void *)&_ptr[prev_size], 0, (p_size - prev_size) * sizeof(T)); } + } - *_get_size() = p_size; + return OK; +} - } else if (p_size < current_size) { - if constexpr (!std::is_trivially_destructible_v) { - // deinitialize no longer needed elements - for (USize i = p_size; i < *_get_size(); i++) { - T *t = &_ptr[i]; - t->~T(); - } - } +template +Error CowData::_alloc(USize p_alloc_size) { + uint8_t *mem_new = (uint8_t *)Memory::alloc_static(p_alloc_size + DATA_OFFSET, false); + ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY); - if (alloc_size != current_alloc_size) { - const Error error = _realloc(alloc_size); - if (error) { - return error; - } - } + _ptr = _get_data_ptr(mem_new); - *_get_size() = p_size; - } + // If we alloc, we're guaranteed to be the only reference. + new (_get_refcount()) SafeNumeric(1); return OK; } template -Error CowData::_realloc(Size p_alloc_size) { +Error CowData::_realloc(USize p_alloc_size) { uint8_t *mem_new = (uint8_t *)Memory::realloc_static(((uint8_t *)_ptr) - DATA_OFFSET, p_alloc_size + DATA_OFFSET, false); ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY); - SafeNumeric *_refc_ptr = _get_refcount_ptr(mem_new); - T *_data_ptr = _get_data_ptr(mem_new); + _ptr = _get_data_ptr(mem_new); // If we realloc, we're guaranteed to be the only reference. - new (_refc_ptr) SafeNumeric(1); - _ptr = _data_ptr; + new (_get_refcount()) SafeNumeric(1); return OK; } @@ -482,7 +488,6 @@ void CowData::_ref(const CowData &p_from) { } _unref(); - _ptr = nullptr; if (!p_from._ptr) { return; //nothing to do @@ -493,11 +498,6 @@ void CowData::_ref(const CowData &p_from) { } } -template -CowData::~CowData() { - _unref(); -} - #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic pop #endif