Skip to content

Commit

Permalink
Containers: preserve the Global bit in String::nullTerminated*View().
Browse files Browse the repository at this point in the history
So a StringView converted back from such a String has
StringViewFlag::Global set as well. This makes it possible to avoid
unnecessary copies when a String is used as a storage but is then passed
further to other APIs as a StringView again, and those APIs need to
preserve the view lifetime.

It's also useful for testing that a StringView was correctly copied or
not when storing it.
  • Loading branch information
mosra committed Dec 29, 2022
1 parent 0c41e92 commit f65c652
Show file tree
Hide file tree
Showing 5 changed files with 323 additions and 55 deletions.
62 changes: 43 additions & 19 deletions src/Corrade/Containers/String.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,28 +45,42 @@ namespace {

static_assert(std::size_t(LargeSizeMask) == Implementation::StringViewSizeMask,
"reserved bits should be the same in String and StringView");
static_assert(std::size_t(LargeSizeMask) == (std::size_t(StringViewFlag::Global)|(std::size_t(Implementation::SmallStringBit) << (sizeof(std::size_t) - 1)*8)),
"small string and global view bits should cover both reserved bits");

String String::nullTerminatedView(StringView view) {
if(view.flags() & StringViewFlag::NullTerminated)
return String{view.data(), view.size(), [](char*, std::size_t){}};
if(view.flags() & StringViewFlag::NullTerminated) {
String out{view.data(), view.size(), [](char*, std::size_t){}};
out._large.size |= std::size_t(view.flags() & StringViewFlag::Global);
return out;
}
return String{view};
}

String String::nullTerminatedView(AllocatedInitT, StringView view) {
if(view.flags() & StringViewFlag::NullTerminated)
return String{view.data(), view.size(), [](char*, std::size_t){}};
if(view.flags() & StringViewFlag::NullTerminated) {
String out{view.data(), view.size(), [](char*, std::size_t){}};
out._large.size |= std::size_t(view.flags() & StringViewFlag::Global);
return out;
}
return String{AllocatedInit, view};
}

String String::nullTerminatedGlobalView(StringView view) {
if(view.flags() >= (StringViewFlag::NullTerminated|StringViewFlag::Global))
return String{view.data(), view.size(), [](char*, std::size_t){}};
if(view.flags() >= (StringViewFlag::NullTerminated|StringViewFlag::Global)) {
String out{view.data(), view.size(), [](char*, std::size_t){}};
out._large.size |= std::size_t(StringViewFlag::Global);
return out;
}
return String{view};
}

String String::nullTerminatedGlobalView(AllocatedInitT, StringView view) {
if(view.flags() >= (StringViewFlag::NullTerminated|StringViewFlag::Global))
return String{view.data(), view.size(), [](char*, std::size_t){}};
if(view.flags() >= (StringViewFlag::NullTerminated|StringViewFlag::Global)) {
String out{view.data(), view.size(), [](char*, std::size_t){}};
out._large.size |= std::size_t(StringViewFlag::Global);
return out;
}
return String{AllocatedInit, view};
}

Expand Down Expand Up @@ -102,7 +116,13 @@ inline void String::construct(const char* const data, const std::size_t size) {
inline void String::destruct() {
/* If not SSO, delete the data */
if(_small.size & Implementation::SmallStringBit) return;
if(_large.deleter) _large.deleter(_large.data, _large.size);
/* Instances created with a custom deleter either don't the Global bit set
at all, or have it set but the deleter is a no-op passed from
nullTerminatedView() / nullTerminatedGlobalView(). Thus *technically*
it's not needed to clear the LargeSizeMask (which implies there's also
no way to test that it got cleared), but do it for consistency. */
if(_large.deleter)
_large.deleter(_large.data, _large.size & ~LargeSizeMask);
else delete[] _large.data;
}

Expand Down Expand Up @@ -192,7 +212,7 @@ String::String(AllocatedInitT, String&& other) {
/* Otherwise take over the data */
} else {
_large.data = other._large.data;
_large.size = other._large.size;
_large.size = other._large.size; /* including the potential Global bit */
_large.deleter = other._large.deleter;
}

Expand Down Expand Up @@ -297,7 +317,7 @@ String::String(String&& other) noexcept {
SSO, as for small string we would be doing a copy of _small.data and
then also a copy of _small.size *including* the two highest bits */
_large.data = other._large.data;
_large.size = other._large.size;
_large.size = other._large.size; /* including the potential Global bit */
_large.deleter = other._large.deleter;
other._large.data = nullptr;
other._large.size = 0;
Expand Down Expand Up @@ -327,7 +347,7 @@ String& String::operator=(String&& other) noexcept {
deleting anything. */
using std::swap;
swap(other._large.data, _large.data);
swap(other._large.size, _large.size);
swap(other._large.size, _large.size); /* including the potential Global bit */
swap(other._large.deleter, _large.deleter);
return *this;
}
Expand Down Expand Up @@ -366,7 +386,7 @@ String::operator Array<char>() && {
out = Array<char>{out.release(), size};
std::memcpy(out.data(), _small.data, size);
} else {
out = Array<char>{_large.data, _large.size, deleter()};
out = Array<char>{_large.data, _large.size & ~LargeSizeMask, deleter()};
}

/* Same as in release(). Create a zero-size small string to fullfil the
Expand All @@ -383,7 +403,11 @@ String::operator bool() const {
/* The data pointer is guaranteed to be non-null, so no need to check it */
if(_small.size & Implementation::SmallStringBit)
return _small.size & ~SmallSizeMask;
return _large.size;
return _large.size & ~LargeSizeMask;
}

StringViewFlags String::viewFlags() const {
return StringViewFlag(_large.size & std::size_t(StringViewFlag::Global))|StringViewFlag::NullTerminated;
}

const char* String::data() const {
Expand All @@ -401,7 +425,7 @@ char* String::data() {
bool String::isEmpty() const {
if(_small.size & Implementation::SmallStringBit)
return !(_small.size & ~SmallSizeMask);
return !_large.size;
return !(_large.size & ~LargeSizeMask);
}

auto String::deleter() const -> Deleter {
Expand All @@ -414,7 +438,7 @@ auto String::deleter() const -> Deleter {
std::size_t String::size() const {
if(_small.size & Implementation::SmallStringBit)
return _small.size & ~SmallSizeMask;
return _large.size;
return _large.size & ~LargeSizeMask;
}

char* String::begin() {
Expand All @@ -438,19 +462,19 @@ const char* String::cbegin() const {
char* String::end() {
if(_small.size & Implementation::SmallStringBit)
return _small.data + (_small.size & ~SmallSizeMask);
return _large.data + _large.size;
return _large.data + (_large.size & ~LargeSizeMask);
}

const char* String::end() const {
if(_small.size & Implementation::SmallStringBit)
return _small.data + (_small.size & ~SmallSizeMask);
return _large.data + _large.size;
return _large.data + (_large.size & ~LargeSizeMask);
}

const char* String::cend() const {
if(_small.size & Implementation::SmallStringBit)
return _small.data + (_small.size & ~SmallSizeMask);
return _large.data + _large.size;
return _large.data + (_large.size & ~LargeSizeMask);
}

/** @todo does it make a practical sense (debug perf) to rewrite these two
Expand Down
41 changes: 27 additions & 14 deletions src/Corrade/Containers/String.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ namespace Implementation {
template<class> struct StringConverter;

enum: std::size_t {
SmallStringBit = 0x80,
SmallStringBit = 0x40,
SmallStringSize = sizeof(std::size_t)*3 - 1
};
}
Expand Down Expand Up @@ -261,8 +261,9 @@ class CORRADE_UTILITY_EXPORT String {
*
* If the view is @ref StringViewFlag::NullTerminated, returns a
* non-owning reference to it without any extra allocations or copies
* involved. Otherwise creates a null-terminated owning copy using
* @ref String(StringView).
* involved, propagating also @ref StringViewFlag::Global to
* @ref viewFlags() if present. Otherwise creates a null-terminated
* owning copy using @ref String(StringView).
*
* This function is primarily meant for efficiently passing
* @ref BasicStringView "StringView" instances to APIs that expect
Expand Down Expand Up @@ -290,7 +291,8 @@ class CORRADE_UTILITY_EXPORT String {
*
* If the view is both @ref StringViewFlag::NullTerminated and
* @ref StringViewFlag::Global, returns a non-owning reference to it
* without any extra allocations or copies involved. Otherwise creates
* without any extra allocations or copies involved, propagating also
* @ref StringViewFlag::Global to @ref viewFlags(). Otherwise creates
* a null-terminated owning copy using @ref String(StringView).
*
* This function is primarily meant for efficiently storing
Expand Down Expand Up @@ -636,6 +638,17 @@ class CORRADE_UTILITY_EXPORT String {
return _small.size & Implementation::SmallStringBit;
}

/**
* @brief View flags
*
* A @ref BasicStringView "StringView" constructed from this instance
* will have these flags. @ref StringViewFlag::NullTerminated is
* present always, @ref StringViewFlag::Global if the string was
* originally created from a global null-terminated view with
* @ref nullTerminatedView() or @ref nullTerminatedGlobalView().
*/
StringViewFlags viewFlags() const;

/**
* @brief String data
*
Expand Down Expand Up @@ -1216,12 +1229,12 @@ class CORRADE_UTILITY_EXPORT String {

/* Small string optimization. Following size restrictions from
StringView (which uses the top two bits for marking global and
null-terminated views), we can use the highest bit to denote a small
string. The second highest bit is currently unused, as it wouldn't
make sense to allow a String to be larger than StringView, since
these are two mutually convertible and interchangeable in many
cases. In case of a large string, there's size, data pointer and
deleter pointer, either 24 (or 12) bytes in total. In case of a
null-terminated views), we can use the second highest bit of the
size to denote a small string. The highest bit, marked as G in the
below diagram, is used to preserve StringViewFlag::Global in case of
a nullTerminatedGlobalView() and a subsequent conversion back to a
StringView. In case of a large string, there's size, data pointer
and deleter pointer, either 24 (or 12) bytes in total. In case of a
small string, we can store the size only in one byte out of 8 (or
4), which then gives us 23 (or 11) bytes for storing the actual
data, excluding the null terminator that's at most 22 / 10 ASCII
Expand All @@ -1232,27 +1245,27 @@ class CORRADE_UTILITY_EXPORT String {
clarity as well):
+-------------------------------+---------+
| string | si | 01 |
| string | si | 1G |
| data | ze | |
| 23B/11B | 6b | 2b |
+-------------------+-----+++++++---------+
| LSB ||||||| MSB |
+---------+---------+-----+++++++---------+
| data | data | size | 00 |
| data | data | size | 0G |
| pointer | deleter | | |
| 8B/4B | 8B/4B | 56b/24b | 6b | 2b |
+---------+---------+-----------+---------+
On BE it's like this:
+---------+-------------------------------+
| 10 | si | string |
| G1 | si | string |
| | ze | data |
| 2b | 6b | 23B/11B |
+---------+++++++-----+-------------------+
| MSB ||||||| LSB |
+---------+++++++-----+---------+---------+
| 00 | size | data | data |
| G0 | size | data | data |
| | | pointer | deleter |
| 2b | 6b | 56b/24b | 8B/4B | 8B/4B |
+---------+-----------+---------+---------+
Expand Down
4 changes: 2 additions & 2 deletions src/Corrade/Containers/StringView.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@ template<class T> BasicStringView<T>::BasicStringView(T* const data, const Strin
data ? std::strlen(data) : 0,
flags|(data ? StringViewFlag::NullTerminated : StringViewFlag::Global)} {}

template<class T> BasicStringView<T>::BasicStringView(String& string) noexcept: BasicStringView{string.data(), string.size(), StringViewFlag::NullTerminated} {}
template<class T> BasicStringView<T>::BasicStringView(String& string) noexcept: BasicStringView{string.data(), string.size(), string.viewFlags()} {}

/* Yes, I'm also surprised this works. On Windows (MSVC, clang-cl and MinGw) it
needs an explicit export otherwise the symbol doesn't get exported. */
template<> template<> CORRADE_UTILITY_EXPORT BasicStringView<const char>::BasicStringView(const String& string) noexcept: BasicStringView{string.data(), string.size(), StringViewFlag::NullTerminated} {}
template<> template<> CORRADE_UTILITY_EXPORT BasicStringView<const char>::BasicStringView(const String& string) noexcept: BasicStringView{string.data(), string.size(), string.viewFlags()} {}

template<class T> Array<BasicStringView<T>> BasicStringView<T>::split(const char delimiter) const {
Array<BasicStringView<T>> parts;
Expand Down
11 changes: 9 additions & 2 deletions src/Corrade/Containers/StringView.h
Original file line number Diff line number Diff line change
Expand Up @@ -350,15 +350,22 @@ BasicStringView {
/**
* @brief Construct from a @ref String
*
* The resulting view has @ref StringViewFlag::NullTerminated set.
* The resulting view has @ref StringViewFlag::NullTerminated set
* always, and @ref StringViewFlag::Global if the string was originally
* created from a global null-terminated view with
* @ref String::nullTerminatedView() or
* @ref String::nullTerminatedGlobalView().
*/
/*implicit*/ BasicStringView(String& data) noexcept;

/**
* @brief Construct from a const @ref String
*
* Enabled only if the view is not mutable. The resulting view has
* @ref StringViewFlag::NullTerminated set.
* @ref StringViewFlag::NullTerminated set always, and
* @ref StringViewFlag::Global if the string was created from a global
* null-terminated view with @ref String::nullTerminatedView() or
* @ref String::nullTerminatedGlobalView().
*/
template<class U = T, class = typename std::enable_if<std::is_const<U>::value>::type> /*implicit*/ BasicStringView(const String& data) noexcept;

Expand Down
Loading

0 comments on commit f65c652

Please sign in to comment.