Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 30 additions & 5 deletions src/libexpr/eval.cc
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,12 @@ DebugTraceStacker::DebugTraceStacker(EvalState & evalState, DebugTrace t)

void Value::mkString(std::string_view s)
{
if constexpr (ValueStorage::maxSmallStringSize > 0) {
if (s.size() <= ValueStorage::maxSmallStringSize) {
ValueStorage::setSmallString(s);
return;
}
}
mkStringNoCopy(makeImmutableString(s));
}

Expand All @@ -843,7 +849,12 @@ static const char ** encodeContext(const NixStringContext & context)

void Value::mkString(std::string_view s, const NixStringContext & context)
{
mkStringNoCopy(makeImmutableString(s), encodeContext(context));
auto encodedContext = encodeContext(context);
if (encodedContext == nullptr) {
mkString(s);
return;
}
mkStringNoCopy(makeImmutableString(s), encodedContext);
}

void Value::mkStringMove(const char * s, const NixStringContext & context)
Expand Down Expand Up @@ -1958,8 +1969,7 @@ void ExprConcatStrings::eval(EvalState & state, Env & env, Value & v)
/* c_str() is not str().c_str() because we want to create a string
Value. allocating a GC'd string directly and moving it into a
Value lets us avoid an allocation and copy. */
const auto c_str = [&] {
char * result = allocString(sSize + 1);
const auto c_str = [&](char * result) {
char * tmp = result;
for (const auto & part : s) {
memcpy(tmp, part->data(), part->size());
Expand Down Expand Up @@ -2041,8 +2051,23 @@ void ExprConcatStrings::eval(EvalState & state, Env & env, Value & v)
.withFrame(env, *this)
.debugThrow();
v.mkPath(state.rootPath(CanonPath(str())));
} else
v.mkStringMove(c_str(), context);
} else {
if (sSize == 0) {
v.mkStringMove("", context);
return;
}

if (sSize <= Value::maxSmallStringSize) {
/* +1 is required for the NUL terminator. */
std::array<char, Value::maxSmallStringSize + 1> result;
v.mkString(c_str(result.data()), context);
return;
}

char * result = allocString(sSize + 1);
v.mkStringMove(c_str(result), context);
result[sSize] = 0;
}
}

void ExprPos::eval(EvalState & state, Env & env, Value & v)
Expand Down
2 changes: 2 additions & 0 deletions src/libexpr/include/nix/expr/symbol-table.hh
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ public:
const auto & [v, idx] = key.store.add(SymbolValue{});
if (size == 0) {
v.mkStringNoCopy("", nullptr);
} else if (size <= Value::maxSmallStringSize) {
v.mkString(key.s);
} else {
auto s = key.alloc.allocate(size + 1);
memcpy(s, key.s.data(), size);
Expand Down
104 changes: 104 additions & 0 deletions src/libexpr/include/nix/expr/value.hh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <span>
#include <type_traits>
#include <concepts>
#include <bit>

#include "nix/expr/eval-gc.hh"
#include "nix/expr/value/context.hh"
Expand Down Expand Up @@ -47,6 +48,7 @@ typedef enum {
/* layout: Single untaggable field */
tListN,
tString,
tSmallString,
tPath,
} InternalType;

Expand Down Expand Up @@ -323,14 +325,23 @@ inline constexpr InternalType payloadTypeToInternalType = PayloadTypeToInternalT
template<std::size_t ptrSize, typename Enable = void>
class ValueStorage : public detail::ValueBase
{
static constexpr std::size_t smallStringStorageSize = std::max({
#define NIX_VALUE_STORAGE_FIELD_SIZE(T, FIELD_NAME, DISCRIMINATOR) sizeof(T),
NIX_VALUE_STORAGE_FOR_EACH_FIELD(NIX_VALUE_STORAGE_FIELD_SIZE)
#undef NIX_VALUE_STORAGE_FIELD_SIZE
});

protected:
using Payload = union
{
#define NIX_VALUE_STORAGE_DEFINE_FIELD(T, FIELD_NAME, DISCRIMINATOR) T FIELD_NAME;
NIX_VALUE_STORAGE_FOR_EACH_FIELD(NIX_VALUE_STORAGE_DEFINE_FIELD)
#undef NIX_VALUE_STORAGE_DEFINE_FIELD
std::array<char, smallStringStorageSize> smallString;
};

static constexpr std::size_t maxSmallStringSize = smallStringStorageSize - 1;

private:
InternalType internalType = tUninitialized;
Payload payload;
Expand All @@ -357,6 +368,30 @@ protected:
#undef NIX_VALUE_STORAGE_GET_IMPL
#undef NIX_VALUE_STORAGE_FOR_EACH_FIELD

void setSmallString(std::string_view s)
{
assert(s.size() <= maxSmallStringSize);
internalType = tSmallString;
payload.smallString = {};
/* Trick is the same as in Facebook's Folly string. Use the last byte
of the string to store the remaining capacity. This was it naturally
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
of the string to store the remaining capacity. This was it naturally
of the string to store the remaining capacity. This way it naturally

becomes the null terminator when string has the size (smallStringStorageSize - 1). */
payload.smallString.back() = maxSmallStringSize - s.size();
std::memcpy(payload.smallString.data(), s.data(), s.size());
}

std::size_t getSmallStringSize() const
{
std::size_t remainingCapacity = payload.smallString.back();
return maxSmallStringSize - remainingCapacity;
}

const char * getSmallStringData() const
{
/* This string is null terminated. See setSmallString. */
return payload.smallString.data();
}

/** Get internal type currently occupying the storage. */
InternalType getInternalType() const noexcept
{
Expand Down Expand Up @@ -434,6 +469,7 @@ class ValueStorage<ptrSize, std::enable_if_t<detail::useBitPackedValueStorage<pt
/* The order of these enumations must be the same as in InternalType. */
pdListN, //< layout: Single untaggable field.
pdString,
pdSmallString,
pdPath,
pdPairOfPointers, //< layout: Pair of pointers payload
};
Expand Down Expand Up @@ -513,6 +549,7 @@ protected:
/* The order must match that of the enumerations defined in InternalType. */
case pdListN:
case pdString:
case pdSmallString:
case pdPath:
return static_cast<InternalType>(tListN + (pd - pdListN));
case pdPairOfPointers:
Expand Down Expand Up @@ -643,6 +680,56 @@ protected:
{
setUntaggablePayload<pdPath>(path.accessor, path.path);
}

/**
* Pointer tagging doesn't play well with big endian systems (because the tag will be in the middle
* of the array), so we don't do this optimization on big endian systems.
*
* 14 = 8 + 8 - 1 (the type tag) - 1 (string size + null terminator)
*/
static constexpr std::size_t maxSmallStringSize = std::endian::native == std::endian::little ? 14 : 0;

void setSmallString(std::string_view s)
{
assert(s.size() <= maxSmallStringSize);

std::size_t remainingCapacity = maxSmallStringSize - s.size();
payload = {pdSmallString, remainingCapacity << 56};

/* 7 - we are skipping the first tag byte (it's stored in the 3 least significant bits). */
{
auto firstDWord = s.substr(0, 7);
std::size_t bitPos = 8;
for (auto c : firstDWord) {
payload[0] |= (PackedPointer{static_cast<unsigned char>(c)} << bitPos);
bitPos += 8;
}

s.remove_prefix(firstDWord.size());
}

{
auto secondDWord = s;
assert(secondDWord.size() <= 7);
std::size_t bitPos = 0;
for (auto c : secondDWord) {
payload[1] |= (PackedPointer{static_cast<unsigned char>(c)} << bitPos);
bitPos += 8;
}
}
}

std::size_t getSmallStringSize() const
{
std::size_t remainingCapacity = payload[1] >> 56;
return maxSmallStringSize - remainingCapacity;
}

const char * getSmallStringData() const
{
/* Skip the type tag byte. */
return reinterpret_cast<const char *>(payload.data()) + 1;
}
};

/**
Expand Down Expand Up @@ -849,6 +936,10 @@ struct Value : public ValueStorage<sizeof(void *)>
}

public:
/**
* Maximum size of a string that can be stored inline without allocations.
*/
using ValueStorage::maxSmallStringSize;

/**
* Never modify the backing `Value` object!
Expand Down Expand Up @@ -907,6 +998,7 @@ public:
case tBool:
return nBool;
case tString:
case tSmallString:
return nString;
case tPath:
return nPath;
Expand Down Expand Up @@ -1071,16 +1163,28 @@ public:

std::string_view string_view() const noexcept
{
if constexpr (maxSmallStringSize > 0) {
if (isa<tSmallString>())
return std::string_view{getSmallStringData(), getSmallStringSize()};
}
return std::string_view(getStorage<StringWithContext>().c_str);
}

const char * c_str() const noexcept
{
if constexpr (maxSmallStringSize > 0) {
if (isa<tSmallString>())
return getSmallStringData();
}
return getStorage<StringWithContext>().c_str;
}

const char ** context() const noexcept
{
if constexpr (maxSmallStringSize > 0) {
if (isa<tSmallString>())
return nullptr;
}
return getStorage<StringWithContext>().context;
}

Expand Down
Loading