diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index fd2108537e7..a91c104015b 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -824,6 +824,12 @@ DebugTraceStacker::DebugTraceStacker(EvalState & evalState, DebugTrace t) void Value::mkString(std::string_view s) { + if constexpr (ValueStorage::maxSmallStringSize > 0) { + if (s.size() <= ValueStorage::maxSmallStringSize) { + ValueStorage::setSmallString(s); + return; + } + } mkStringNoCopy(makeImmutableString(s)); } @@ -843,7 +849,12 @@ static const char ** encodeContext(const NixStringContext & context) void Value::mkString(std::string_view s, const NixStringContext & context) { - mkStringNoCopy(makeImmutableString(s), encodeContext(context)); + auto encodedContext = encodeContext(context); + if (encodedContext == nullptr) { + mkString(s); + return; + } + mkStringNoCopy(makeImmutableString(s), encodedContext); } void Value::mkStringMove(const char * s, const NixStringContext & context) @@ -1958,8 +1969,7 @@ void ExprConcatStrings::eval(EvalState & state, Env & env, Value & v) /* c_str() is not str().c_str() because we want to create a string Value. allocating a GC'd string directly and moving it into a Value lets us avoid an allocation and copy. */ - const auto c_str = [&] { - char * result = allocString(sSize + 1); + const auto c_str = [&](char * result) { char * tmp = result; for (const auto & part : s) { memcpy(tmp, part->data(), part->size()); @@ -2041,8 +2051,23 @@ void ExprConcatStrings::eval(EvalState & state, Env & env, Value & v) .withFrame(env, *this) .debugThrow(); v.mkPath(state.rootPath(CanonPath(str()))); - } else - v.mkStringMove(c_str(), context); + } else { + if (sSize == 0) { + v.mkStringMove("", context); + return; + } + + if (sSize <= Value::maxSmallStringSize) { + /* +1 is required for the NUL terminator. */ + std::array result; + v.mkString(c_str(result.data()), context); + return; + } + + char * result = allocString(sSize + 1); + v.mkStringMove(c_str(result), context); + result[sSize] = 0; + } } void ExprPos::eval(EvalState & state, Env & env, Value & v) diff --git a/src/libexpr/include/nix/expr/symbol-table.hh b/src/libexpr/include/nix/expr/symbol-table.hh index 5f2b47dd6bf..b0ccb22ff33 100644 --- a/src/libexpr/include/nix/expr/symbol-table.hh +++ b/src/libexpr/include/nix/expr/symbol-table.hh @@ -123,6 +123,8 @@ public: const auto & [v, idx] = key.store.add(SymbolValue{}); if (size == 0) { v.mkStringNoCopy("", nullptr); + } else if (size <= Value::maxSmallStringSize) { + v.mkString(key.s); } else { auto s = key.alloc.allocate(size + 1); memcpy(s, key.s.data(), size); diff --git a/src/libexpr/include/nix/expr/value.hh b/src/libexpr/include/nix/expr/value.hh index 9d0cf1e54b3..86cc25b52e1 100644 --- a/src/libexpr/include/nix/expr/value.hh +++ b/src/libexpr/include/nix/expr/value.hh @@ -5,6 +5,7 @@ #include #include #include +#include #include "nix/expr/eval-gc.hh" #include "nix/expr/value/context.hh" @@ -47,6 +48,7 @@ typedef enum { /* layout: Single untaggable field */ tListN, tString, + tSmallString, tPath, } InternalType; @@ -323,14 +325,23 @@ inline constexpr InternalType payloadTypeToInternalType = PayloadTypeToInternalT template class ValueStorage : public detail::ValueBase { + static constexpr std::size_t smallStringStorageSize = std::max({ +#define NIX_VALUE_STORAGE_FIELD_SIZE(T, FIELD_NAME, DISCRIMINATOR) sizeof(T), + NIX_VALUE_STORAGE_FOR_EACH_FIELD(NIX_VALUE_STORAGE_FIELD_SIZE) +#undef NIX_VALUE_STORAGE_FIELD_SIZE + }); + protected: using Payload = union { #define NIX_VALUE_STORAGE_DEFINE_FIELD(T, FIELD_NAME, DISCRIMINATOR) T FIELD_NAME; NIX_VALUE_STORAGE_FOR_EACH_FIELD(NIX_VALUE_STORAGE_DEFINE_FIELD) #undef NIX_VALUE_STORAGE_DEFINE_FIELD + std::array smallString; }; + static constexpr std::size_t maxSmallStringSize = smallStringStorageSize - 1; + private: InternalType internalType = tUninitialized; Payload payload; @@ -357,6 +368,30 @@ protected: #undef NIX_VALUE_STORAGE_GET_IMPL #undef NIX_VALUE_STORAGE_FOR_EACH_FIELD + void setSmallString(std::string_view s) + { + assert(s.size() <= maxSmallStringSize); + internalType = tSmallString; + payload.smallString = {}; + /* Trick is the same as in Facebook's Folly string. Use the last byte + of the string to store the remaining capacity. This was it naturally + becomes the null terminator when string has the size (smallStringStorageSize - 1). */ + payload.smallString.back() = maxSmallStringSize - s.size(); + std::memcpy(payload.smallString.data(), s.data(), s.size()); + } + + std::size_t getSmallStringSize() const + { + std::size_t remainingCapacity = payload.smallString.back(); + return maxSmallStringSize - remainingCapacity; + } + + const char * getSmallStringData() const + { + /* This string is null terminated. See setSmallString. */ + return payload.smallString.data(); + } + /** Get internal type currently occupying the storage. */ InternalType getInternalType() const noexcept { @@ -434,6 +469,7 @@ class ValueStorage(tListN + (pd - pdListN)); case pdPairOfPointers: @@ -643,6 +680,56 @@ protected: { setUntaggablePayload(path.accessor, path.path); } + + /** + * Pointer tagging doesn't play well with big endian systems (because the tag will be in the middle + * of the array), so we don't do this optimization on big endian systems. + * + * 14 = 8 + 8 - 1 (the type tag) - 1 (string size + null terminator) + */ + static constexpr std::size_t maxSmallStringSize = std::endian::native == std::endian::little ? 14 : 0; + + void setSmallString(std::string_view s) + { + assert(s.size() <= maxSmallStringSize); + + std::size_t remainingCapacity = maxSmallStringSize - s.size(); + payload = {pdSmallString, remainingCapacity << 56}; + + /* 7 - we are skipping the first tag byte (it's stored in the 3 least significant bits). */ + { + auto firstDWord = s.substr(0, 7); + std::size_t bitPos = 8; + for (auto c : firstDWord) { + payload[0] |= (PackedPointer{static_cast(c)} << bitPos); + bitPos += 8; + } + + s.remove_prefix(firstDWord.size()); + } + + { + auto secondDWord = s; + assert(secondDWord.size() <= 7); + std::size_t bitPos = 0; + for (auto c : secondDWord) { + payload[1] |= (PackedPointer{static_cast(c)} << bitPos); + bitPos += 8; + } + } + } + + std::size_t getSmallStringSize() const + { + std::size_t remainingCapacity = payload[1] >> 56; + return maxSmallStringSize - remainingCapacity; + } + + const char * getSmallStringData() const + { + /* Skip the type tag byte. */ + return reinterpret_cast(payload.data()) + 1; + } }; /** @@ -849,6 +936,10 @@ struct Value : public ValueStorage } public: + /** + * Maximum size of a string that can be stored inline without allocations. + */ + using ValueStorage::maxSmallStringSize; /** * Never modify the backing `Value` object! @@ -907,6 +998,7 @@ public: case tBool: return nBool; case tString: + case tSmallString: return nString; case tPath: return nPath; @@ -1071,16 +1163,28 @@ public: std::string_view string_view() const noexcept { + if constexpr (maxSmallStringSize > 0) { + if (isa()) + return std::string_view{getSmallStringData(), getSmallStringSize()}; + } return std::string_view(getStorage().c_str); } const char * c_str() const noexcept { + if constexpr (maxSmallStringSize > 0) { + if (isa()) + return getSmallStringData(); + } return getStorage().c_str; } const char ** context() const noexcept { + if constexpr (maxSmallStringSize > 0) { + if (isa()) + return nullptr; + } return getStorage().context; }