From 2a493fdd79bb8fa0704088abd6600136eacc33d9 Mon Sep 17 00:00:00 2001 From: Krystian Stasiowski Date: Wed, 23 Aug 2023 09:15:51 -0400 Subject: [PATCH] chore: store dom::String as a single pointer closes #413 --- include/mrdox/Dom/String.hpp | 195 ++++++++++++++++----------- src/lib/Dom/String.cpp | 246 +++++++++++------------------------ 2 files changed, 196 insertions(+), 245 deletions(-) diff --git a/include/mrdox/Dom/String.hpp b/include/mrdox/Dom/String.hpp index 7ea72e3e4..87a73ed83 100644 --- a/include/mrdox/Dom/String.hpp +++ b/include/mrdox/Dom/String.hpp @@ -28,45 +28,40 @@ concept StringLikeTy = ! std::is_same_v && std::convertible_to; -/** An immutable string with shared ownership. -*/ class MRDOX_DECL String final { - struct Impl; + const char* ptr_ = nullptr; - union - { - Impl* impl_; - // len is stored with the low bit moved to - // the hi bit, and the low bit always set. - std::size_t len_; - }; - char const* psz_; - - static void allocate(std::string_view s,Impl*&, char const*&); - static void deallocate(Impl*) noexcept; - static consteval std::size_t len(std::size_t n) + bool + is_literal() const noexcept { - return (n << (sizeof(std::size_t)*8 - 1)) | n | 1UL; + MRDOX_ASSERT(! empty()); + // for string literals, data_ stores a pointer + // to the first character of the string. + // for ref-counted strings, data_ stores a pointer + // to the null-terminator of the string. + return *ptr_; } - constexpr bool is_literal() const noexcept - { - return (len_ & 1) != 0; - } + class impl_view; -public: - /** Destructor. + impl_view impl() const noexcept; + + /** Construct a ref-counted string. */ - ~String(); + void + construct( + const char* s, + std::size_t n); +public: /** Constructor. Default constructed strings have a zero size, and include a null terminator. */ - String() noexcept; + constexpr String() noexcept = default; /** Constructor. @@ -74,24 +69,73 @@ class MRDOX_DECL the newly constructed string. The moved-from string behaves as if default constructed. */ - String(String&& other) noexcept; + constexpr String(String&& other) noexcept + { + swap(other); + } /** Constructor. The newly constructed string acquries shared ownership of the string referenced by other. */ - String(String const& other) noexcept; + String(const String& other) noexcept; + + /** Destructor. + */ + ~String() noexcept; + + /** Constructor. + + This function constructs a string literal + which references the buffer pointed to by + `str`. Ownership is not transferred; the lifetime + of the buffer must extend until the string is + destroyed, otherwise the behavior is undefined. + + @param str A null-terminated string. If the + string is not null-terminated, the result is + undefined. + */ + template + constexpr + String(const char(&str)[N]) + { + // empty strings are stored as nullptr + if constexpr(N > 1) + ptr_ = str; + } /** Constructor. This function constructs a new string from - the buffer pointed to by `s`. + the string pointed to by `str` of length `len`. - @param s The string to construct with. + @param `str` The string to construct with. + A copy of this string is made. + @param `len` The length of the string. + */ + String( + const char* str, + std::size_t len) + { + // empty strings are stored as nullptr + if(len) + construct(str, len); + } + + /** Constructor. + + This function constructs a new string from + the buffer pointed to by `sv`. + + @param sv The string to construct with. A copy of this string is made. */ - String(std::string_view s); + String(std::string_view sv) + : String(sv.data(), sv.size()) + { + } /** Constructor. @@ -107,25 +151,19 @@ class MRDOX_DECL { } - /** Constructor. - - This function constructs a string literal - which references the buffer pointed to by - sz. Ownership is not transferred; the lifetime - of the buffer must extend until the string is - destroyed, otherwise the behavior is undefined. + /** Assignment. - @param psz A null-terminated string. If the - string is not null-terminated, the result is - undefined. + This acquires shared ownership of the + string referenced by other. Ownership of + the previously referenced string is released. */ - template - constexpr String(char const(&psz)[N]) noexcept - : len_(len(N-1)) - , psz_(psz) + String& + operator=( + const String& other) noexcept { - static_assert(N > 0); - static_assert(N <= std::size_t(-1)>>1); + String temp(other); + swap(temp); + return *this; } /** Assignment. @@ -136,33 +174,29 @@ class MRDOX_DECL After the assignment, the moved-from string behaves as if default constructed. */ - String& operator=(String&& other) noexcept; - - /** Assignment. - - This acquires shared ownership of the - string referenced by other. Ownership of - the previously referenced string is released. - */ - String& operator=(String const& other) noexcept; - - /** Return true if the string is empty. - */ - constexpr bool empty() const noexcept + String& + operator=( + String&& other) noexcept { - return psz_[0] == '\0'; + String temp(std::move(other)); + swap(temp); + return *this; } /** Return the string. */ - std::string_view - get() const noexcept; + operator std::string_view() const noexcept + { + return get(); + } /** Return the string. */ - operator std::string_view() const noexcept + std::string_view + get() const noexcept { - return get(); + return std::string_view( + data(), size()); } /** Return the string. @@ -172,19 +206,24 @@ class MRDOX_DECL return std::string(get()); } - /** Return the size. + /** Return true if the string is empty. */ - std::size_t size() const noexcept + bool + empty() const noexcept { - return get().size(); + return ! ptr_; } /** Return the size. */ - char const* data() const noexcept - { - return get().data(); - } + std::size_t size() const noexcept; + + /** Return the string. + + The pointed-to character buffer returned + by this function is always null-terminated. + */ + const char* data() const noexcept; /** Return the string. @@ -193,20 +232,26 @@ class MRDOX_DECL */ char const* c_str() const noexcept { - return psz_; + return data(); } /** Swap two strings. */ - void swap(String& other) noexcept + constexpr + void + swap(String& other) noexcept { - std::swap(impl_, other.impl_); - std::swap(psz_, other.psz_); + std::swap(ptr_, other.ptr_); } /** Swap two strings. */ - friend void swap(String& lhs, String& rhs) noexcept + friend + constexpr + void + swap( + String& lhs, + String& rhs) noexcept { lhs.swap(rhs); } diff --git a/src/lib/Dom/String.cpp b/src/lib/Dom/String.cpp index 726757b8f..f23b78787 100644 --- a/src/lib/Dom/String.cpp +++ b/src/lib/Dom/String.cpp @@ -15,219 +15,125 @@ namespace clang { namespace mrdox { namespace dom { -namespace { - -// Variable-length integer -template -class varint +class String::impl_view { - using Digit = std::uint8_t; - static_assert(CHAR_BIT == 8); - static_assert(sizeof(Digit) == 1); - static_assert(std::unsigned_integral); - static constexpr auto N = (sizeof(U) * 8 + 6) / 7; - static constexpr Digit Bits = 8 * sizeof(Digit) - 1; - static constexpr Digit EndBit = 1 << Bits; - static constexpr Digit DigMask = (1 << Bits) - 1; - char buf_[N]; - std::size_t n_ = 0; + char* impl_; public: - explicit varint(U u) noexcept + constexpr + impl_view(const char* ptr) + : impl_(const_cast(ptr)) { - auto p = buf_; - for(;;) - { - ++n_; - auto dig = u & DigMask; - u >>= Bits; - if(u == 0) - { - // hi bit set means stop - dig |= EndBit; - *p = static_cast(dig); - break; - } - *p++ = static_cast(dig); - } } - std::string_view get() const noexcept + std::size_t size() { - return { buf_, n_ }; + std::size_t n; + std::memcpy(&n, impl_ + 1, sizeof(std::size_t)); + return n; } - static U read(char const*& p) noexcept + char* data() { - Digit dig = *p++; - if(dig & EndBit) - return dig & DigMask; - U u = dig; - unsigned Shift = Bits; - for(;;) - { - auto dig = *p++; - if(dig & EndBit) - { - dig &= DigMask; - u |= dig << Shift; - return u; - } - u |= dig << Shift; - Shift += Bits; - } + return impl_ - size(); } -}; - -static constinit char sz_empty = { '\0' }; - -} // (anon) - -// An allocated string is stored in one buffer -// laid out thusly: -// -// Impl -// size varint -// chars char[] -// -struct String::Impl -{ - std::atomic refs; - explicit Impl( - std::string_view s, - varint const& uv) noexcept - : refs(1) + char* base() { - auto p = reinterpret_cast(this+1); - auto vs = uv.get(); - std::memcpy(p, vs.data(), vs.size()); - p += vs.size(); - std::memcpy(p, s.data(), s.size()); - p[s.size()] = '\0'; + return data() - sizeof(std::atomic); } - std::string_view get() const noexcept + std::atomic& refs() { - auto p = reinterpret_cast(this+1); - auto const size = varint::read(p); - return { p, size }; + return *reinterpret_cast< + std::atomic*>(base()); } }; -void +String::impl_view String:: -allocate( - std::string_view s, - Impl*& impl, - char const*& psz) +impl() const noexcept { - std::allocator alloc; - varint uv(s.size()); - auto const varlen = uv.get().size(); - auto n = - sizeof(Impl) + // header - varlen + // size (varint) - s.size() + // string data - 1 + // null term '\0' - (sizeof(Impl) - 1); // round up to nearest sizeof(Impl) - impl = new(alloc.allocate(n / sizeof(Impl))) Impl(s, uv); - psz = reinterpret_cast(impl + 1) + varlen; + MRDOX_ASSERT(! empty() && ! is_literal()); + return impl_view(ptr_); } void String:: -deallocate( - Impl* impl) noexcept +construct( + const char* s, + std::size_t n) { - std::allocator alloc; - auto const s = impl->get(); - varint uv(s.size()); - auto const varlen = uv.get().size(); - auto n = - sizeof(Impl) + // header - varlen + // size (varint) - s.size() + // string data - 1 + // null term '\0' - (sizeof(Impl) - 1); // round up to nearest sizeof(Impl) - std::destroy_at(impl); - alloc.deallocate(impl, n / sizeof(Impl)); + char* ptr = static_cast(::operator new( + sizeof(std::atomic) + // ref count + n + // string + 1 + // null terminator + sizeof(std::size_t) // string length (unaligned) + )); + // initialize ref count + ::new(ptr) std::atomic(1); + ptr += sizeof(std::atomic); + // copy in the string + std::memcpy(ptr, s, n); + ptr += n; + // write the null terminator + *ptr = '\0'; + // store the address of the null terminator into ptr_. + // it indicates that the string is ref-counted + ptr_ = ptr++; + // KRYSTIAN NOTE: the size is currently stored unaligned. + // in the future, we should investigate whether the cost + // of the cacheline split load is worth allocating + // additional bytes to properly align this. + // write the strings size + std::memcpy(ptr, &n, sizeof(std::size_t)); } -//------------------------------------------------ - String:: -~String() +String(const String& other) noexcept + : ptr_(other.ptr_) { - if(is_literal()) - return; - if(--impl_->refs > 0) - return; - deallocate(impl_); + if(! empty() && ! is_literal()) + ++impl().refs(); } String:: -String() noexcept - : len_(len(0)) - , psz_(&sz_empty) +~String() noexcept { + // this better be true, since we don't call + // any destructors when deallocating + static_assert( + std::is_trivially_destructible_v< + std::atomic>); + if(empty() || is_literal()) + return; + if(! --impl().refs()) + ::operator delete(impl().base()); } +std::size_t String:: -String( - String&& other) noexcept - : String() -{ - swap(other); -} - -String:: -String( - String const& other) noexcept - : impl_(other.impl_) - , psz_(other.psz_) -{ - if(! is_literal()) - ++impl_->refs; -} - -String:: -String( - std::string_view s) -{ - allocate(s, impl_, psz_); - MRDOX_ASSERT(! is_literal()); -} - -String& -String:: -operator=( - String&& other) noexcept -{ - String temp(std::move(other)); - swap(temp); - return *this; -} - -String& -String:: -operator=( - String const& other) noexcept +size() const noexcept { - String temp(other); - swap(temp); - return *this; + if(empty()) + return 0; + if(is_literal()) + return std::strlen(ptr_); + return impl().size(); } -std::string_view +const char* String:: -get() const noexcept +data() const noexcept { + // if the string is empty, the storage + // typically for the pointer is used + // as a empty null terminated string + if(empty()) + return reinterpret_cast(&ptr_); if(is_literal()) - return std::string_view(psz_, - (len_ & ((std::size_t(-1) >> 1) & ~std::size_t(1))) | - (len_ >> (sizeof(std::size_t)*8 - 1))); - return impl_->get(); + return ptr_; + return impl().data(); } } // dom