Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve cstring cache internals #4780

Merged
merged 4 commits into from
Jul 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ cc_library(
":config_h",
"@boost//:format",
"@boost//:multiprecision",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/container:node_hash_set",
"@com_google_absl//absl/numeric:bits",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
Expand Down
60 changes: 40 additions & 20 deletions lib/cstring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ limitations under the License.

#include "cstring.h"

#include "absl/container/node_hash_set.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_replace.h"

Expand Down Expand Up @@ -135,45 +136,64 @@ class table_entry {
return length() == other.length() && std::memcmp(string(), other.string(), length()) == 0;
}

bool operator==(std::string_view other) const {
return length() == other.length() && std::memcmp(string(), other.data(), length()) == 0;
}

private:
bool is_inplace() const {
return (m_flags & table_entry_flags::inplace) == table_entry_flags::inplace;
}
};
} // namespace

namespace std {
template <>
struct hash<table_entry> {
std::size_t operator()(const table_entry &entry) const {
// We'd make Util::Hash to be transparent instead. However, this would enable
// transparent hashing globally and in some cases in very undesired manner. So
// for now aim for more fine-grained approach.
struct TableEntryHash {
using is_transparent = void;

// IMPORTANT: These two hashes MUST match in order for heterogenous
// lookup to work properly
size_t operator()(const table_entry &entry) const {
return Util::hash(entry.string(), entry.length());
}

size_t operator()(std::string_view entry) const {
return Util::hash(entry.data(), entry.length());
}
};
} // namespace std

namespace {
std::unordered_set<table_entry> &cache() {
static std::unordered_set<table_entry> g_cache;
auto &cache() {
// We need node_hash_set due to SSO: we return address of embedded string
// that should be stable
static absl::node_hash_set<table_entry, TableEntryHash, std::equal_to<>> g_cache;

return g_cache;
}

const char *save_to_cache(const char *string, std::size_t length, table_entry_flags flags) {
if ((flags & table_entry_flags::no_need_copy) == table_entry_flags::no_need_copy) {
return cache().emplace(string, length, flags).first->string();
}
// Checks if string is already cached and if not, calls ctor to construct in
// place. As a result, only a single lookup is performed regardless whether
// entry is in cache or not.
return cache()
.lazy_emplace(table_entry(string, length, table_entry_flags::no_need_copy),
[string, length, flags](const auto &ctor) { ctor(string, length, flags); })
->string();
}

// temporary table_entry, used for searching only. no need to copy string
auto found = cache().find(table_entry(string, length, table_entry_flags::no_need_copy));
} // namespace

if (found == cache().end()) {
return cache().emplace(string, length, flags).first->string();
}
bool cstring::is_cached(std::string_view s) { return cache().contains(s); }

return found->string();
}
cstring cstring::get_cached(std::string_view s) {
auto entry = cache().find(s);
if (entry == cache().end()) return nullptr;

} // namespace
cstring res;
res.str = entry->string();

return res;
}

void cstring::construct_from_shared(const char *string, std::size_t length) {
str = save_to_cache(string, length, table_entry_flags::none);
Expand Down
5 changes: 5 additions & 0 deletions lib/cstring.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,11 @@ class cstring {
return result;
}

/// @return true if a given string is interned (contained in cstring cache)
static bool is_cached(std::string_view s);
/// @return corresponding cstring if it was interned, null cstring otherwise
static cstring get_cached(std::string_view s);

private:
// passed string is shared, we not unique owners
void construct_from_shared(const char *string, std::size_t length);
Expand Down
15 changes: 15 additions & 0 deletions test/gtest/cstring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,4 +143,19 @@ TEST(cstring, literalSuffix) {
EXPECT_TRUE((std::is_same_v<cstring, decltype(""_cs)>));
}

TEST(cstring, is_cached) {
[[maybe_unused]] cstring test = "test"_cs;
EXPECT_FALSE(
cstring::is_cached("we really do not expect that this string is already in cstring cache"));
EXPECT_TRUE(cstring::is_cached("test"));
}

TEST(cstring, get_cached) {
[[maybe_unused]] cstring test = "test"_cs;
EXPECT_TRUE(
cstring::get_cached("we really do not expect that this string is already in cstring cache")
.isNull());
EXPECT_FALSE(cstring::get_cached("test").isNullOrEmpty());
}

} // namespace Test
Loading