Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize / refactor CowData, combining resize and fork to avoid unnecessary reallocations. #100619

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
215 changes: 110 additions & 105 deletions core/templates/cowdata.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,14 +106,6 @@ class CowData {

// internal helpers

static _FORCE_INLINE_ SafeNumeric<USize> *_get_refcount_ptr(uint8_t *p_ptr) {
return (SafeNumeric<USize> *)(p_ptr + REF_COUNT_OFFSET);
}

static _FORCE_INLINE_ USize *_get_size_ptr(uint8_t *p_ptr) {
return (USize *)(p_ptr + SIZE_OFFSET);
}

static _FORCE_INLINE_ T *_get_data_ptr(uint8_t *p_ptr) {
return (T *)(p_ptr + DATA_OFFSET);
}
Expand All @@ -134,11 +126,11 @@ class CowData {
return (USize *)((uint8_t *)_ptr - DATA_OFFSET + SIZE_OFFSET);
}

_FORCE_INLINE_ USize _get_alloc_size(USize p_elements) const {
_FORCE_INLINE_ static USize _get_alloc_size(USize p_elements) {
return next_po2(p_elements * sizeof(T));
}

_FORCE_INLINE_ bool _get_alloc_size_checked(USize p_elements, USize *out) const {
_FORCE_INLINE_ static bool _get_alloc_size_checked(USize p_elements, USize *out) {
if (unlikely(p_elements == 0)) {
*out = 0;
return true;
Expand Down Expand Up @@ -167,8 +159,26 @@ class CowData {
void _unref();
void _ref(const CowData *p_from);
void _ref(const CowData &p_from);
USize _copy_on_write();
Error _realloc(Size p_alloc_size);

// Ensures that the backing buffer is at least p_size wide, and that
// this CowData instance is the only reference to it. The buffer is
// populated with as many element copies from the old array as possible.
// It is the responsibility of the caller to populate newly allocated
// space up to p_size.
Error _fork_allocate(USize p_size);
Error _copy_on_write() { return _fork_allocate(size()); }

// Allocates a backing array of the given capacity. The reference count is
// initialized to 1.
// It is the responsibility of the caller to populate the array and the new
// size property.
Error _alloc(USize p_alloc_size);
// Re-allocates the backing array to the given capacity. The reference count is
// initialized to 1.
// It is the responsibility of the caller to populate the array and the new
// size property. The caller must also make sure there are no other references to
// the data as pointers may be invalidated.
Error _realloc(USize p_alloc_size);

public:
void operator=(const CowData<T> &p_from) { _ref(p_from); }
Expand Down Expand Up @@ -200,7 +210,7 @@ class CowData {
}
}

_FORCE_INLINE_ void clear() { resize(0); }
_FORCE_INLINE_ void clear() { _unref(); }
_FORCE_INLINE_ bool is_empty() const { return _ptr == nullptr; }

_FORCE_INLINE_ void set(Size p_index, const T &p_elem) {
Expand Down Expand Up @@ -275,7 +285,8 @@ void CowData<T>::_unref() {
_ptr = nullptr;
return;
}
// Clean up.
// We had the only reference; destroy the data.

// First, invalidate our own reference.
// NOTE: It is required to do so immediately because it must not be observable outside of this
// function after refcount has already been reduced to 0.
Expand All @@ -293,143 +304,137 @@ void CowData<T>::_unref() {
}
}

// free mem
// Free Memory.
Memory::free_static((uint8_t *)prev_ptr - DATA_OFFSET, false);
}

template <typename T>
typename CowData<T>::USize CowData<T>::_copy_on_write() {
if (!_ptr) {
return 0;
Error CowData<T>::_fork_allocate(USize p_size) {
if (p_size == 0) {
// Wants to clean up.
_unref();
return OK;
}

SafeNumeric<USize> *refc = _get_refcount();

USize rc = refc->get();
if (unlikely(rc > 1)) {
/* in use by more than me */
USize current_size = *_get_size();
USize alloc_size;
ERR_FAIL_COND_V(!_get_alloc_size_checked(p_size, &alloc_size), ERR_OUT_OF_MEMORY);

uint8_t *mem_new = (uint8_t *)Memory::alloc_static(_get_alloc_size(current_size) + DATA_OFFSET, false);
ERR_FAIL_NULL_V(mem_new, 0);
const USize prev_size = size();

SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
USize *_size_ptr = _get_size_ptr(mem_new);
T *_data_ptr = _get_data_ptr(mem_new);
if (!_ptr) {
// We had no data before; just allocate a new array.
const Error error = _alloc(alloc_size);
if (error) {
return error;
}
} else if (_get_refcount()->get() == 1) {
// Resize in-place.
// Mind that this case is not just an optimization, but required,
// as some callers depend on _copy_on_write() calls not changing
// the pointer after the first fork (e.g. mutable iterators).
if (p_size == prev_size) {
// We can shortcut here; we don't need to do anything.
return OK;
}

new (_refc_ptr) SafeNumeric<USize>(1); //refcount
*(_size_ptr) = current_size; //size
// Destroy extraneous elements.
if constexpr (!std::is_trivially_destructible_v<T>) {
for (USize i = prev_size; i > p_size; i--) {
_ptr[i - 1].~T();
}
}

// initialize new elements
if constexpr (std::is_trivially_copyable_v<T>) {
memcpy((uint8_t *)_data_ptr, _ptr, current_size * sizeof(T));
} else {
for (USize i = 0; i < current_size; i++) {
memnew_placement(&_data_ptr[i], T(_ptr[i]));
if (alloc_size != _get_alloc_size(prev_size)) {
const Error error = _realloc(alloc_size);
if (error) {
// Out of memory; the current array is still valid though.
return error;
}
}
} else {
// Resize by forking.

// Create a temporary CowData to hold ownership over our _ptr.
// It will be used to copy elements from the old buffer over to our new buffer.
// At the end of the block, it will be automatically destructed by going out of scope.
const CowData prev_data;
prev_data._ptr = _ptr;
_ptr = nullptr;

_unref();
_ptr = _data_ptr;
const Error error = _alloc(alloc_size);
if (error) {
return error;
}

rc = 1;
// Copy over elements.
const USize copied_element_count = MIN(prev_size, p_size);
if (copied_element_count > 0) {
if constexpr (std::is_trivially_copyable_v<T>) {
memcpy((uint8_t *)_ptr, (uint8_t *)prev_data._ptr, copied_element_count * sizeof(T));
} else {
for (USize i = 0; i < copied_element_count; i++) {
memnew_placement(&_ptr[i], T(prev_data._ptr[i]));
}
}
}
}
return rc;

// Set our new size.
*_get_size() = p_size;

return OK;
}

template <typename T>
template <bool p_ensure_zero>
Error CowData<T>::resize(Size p_size) {
ERR_FAIL_COND_V(p_size < 0, ERR_INVALID_PARAMETER);

Size current_size = size();

if (p_size == current_size) {
const Size prev_size = size();
if (p_size == prev_size) {
return OK;
}

if (p_size == 0) {
// Wants to clean up.
_unref(); // Resets _ptr to nullptr.
return OK;
const Error error = _fork_allocate(p_size);
if (error) {
return error;
}

// possibly changing size, copy on write
_copy_on_write();

USize current_alloc_size = _get_alloc_size(current_size);
USize alloc_size;
ERR_FAIL_COND_V(!_get_alloc_size_checked(p_size, &alloc_size), ERR_OUT_OF_MEMORY);

if (p_size > current_size) {
if (alloc_size != current_alloc_size) {
if (current_size == 0) {
// alloc from scratch
uint8_t *mem_new = (uint8_t *)Memory::alloc_static(alloc_size + DATA_OFFSET, false);
ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);

SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
USize *_size_ptr = _get_size_ptr(mem_new);
T *_data_ptr = _get_data_ptr(mem_new);

new (_refc_ptr) SafeNumeric<USize>(1); //refcount
*(_size_ptr) = 0; //size, currently none

_ptr = _data_ptr;

} else {
const Error error = _realloc(alloc_size);
if (error) {
return error;
}
}
}

// construct the newly created elements

if (p_size > prev_size) {
if constexpr (!std::is_trivially_constructible_v<T>) {
for (Size i = *_get_size(); i < p_size; i++) {
for (Size i = prev_size; i < p_size; i++) {
memnew_placement(&_ptr[i], T);
}
} else if (p_ensure_zero) {
memset((void *)(_ptr + current_size), 0, (p_size - current_size) * sizeof(T));
memset((void *)&_ptr[prev_size], 0, (p_size - prev_size) * sizeof(T));
}
}

*_get_size() = p_size;
return OK;
}

} else if (p_size < current_size) {
if constexpr (!std::is_trivially_destructible_v<T>) {
// deinitialize no longer needed elements
for (USize i = p_size; i < *_get_size(); i++) {
T *t = &_ptr[i];
t->~T();
}
}
template <typename T>
Error CowData<T>::_alloc(USize p_alloc_size) {
uint8_t *mem_new = (uint8_t *)Memory::alloc_static(p_alloc_size + DATA_OFFSET, false);
ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);

if (alloc_size != current_alloc_size) {
const Error error = _realloc(alloc_size);
if (error) {
return error;
}
}
_ptr = _get_data_ptr(mem_new);

*_get_size() = p_size;
}
// If we alloc, we're guaranteed to be the only reference.
new (_get_refcount()) SafeNumeric<USize>(1);

return OK;
}

template <typename T>
Error CowData<T>::_realloc(Size p_alloc_size) {
Error CowData<T>::_realloc(USize p_alloc_size) {
uint8_t *mem_new = (uint8_t *)Memory::realloc_static(((uint8_t *)_ptr) - DATA_OFFSET, p_alloc_size + DATA_OFFSET, false);
ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);

SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
T *_data_ptr = _get_data_ptr(mem_new);
_ptr = _get_data_ptr(mem_new);

// If we realloc, we're guaranteed to be the only reference.
new (_refc_ptr) SafeNumeric<USize>(1);
_ptr = _data_ptr;
// So the reference was 1 and was copied to be 1 again.

return OK;
}
Expand Down