-
Notifications
You must be signed in to change notification settings - Fork 5.5k
stats: add symbol table for future stat name encoding #3927
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 29 commits
8de2463
1c45948
a3df93b
a78ea8d
59e8129
475dcd3
9b3594b
e7b619a
b3f1b7f
bdc714c
0ded3e5
b8b3f60
ae650c3
14e8bb3
86dc207
cb6c754
ff30317
6e42c86
f848464
3687311
2772592
edb35e3
c872073
2f83e68
85c5d8b
7f93df9
190da3d
f2da353
9e68dfd
8e794d5
bd47676
19c6e65
da2b7d7
9466aa3
86dab2c
208512d
27aa402
aaba51b
278f92f
ce87e7e
efd3c6f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| #pragma once | ||
|
|
||
| #include <memory> | ||
| #include <string> | ||
| #include <vector> | ||
|
|
||
| #include "envoy/common/pure.h" | ||
|
|
||
| #include "absl/strings/string_view.h" | ||
|
|
||
| namespace Envoy { | ||
| namespace Stats { | ||
|
|
||
| /** | ||
| * Interface for storing a stat name. | ||
| */ | ||
| class StatName { | ||
| public: | ||
| virtual ~StatName(){}; | ||
| virtual std::string toString() const PURE; | ||
| }; | ||
|
|
||
| using StatNamePtr = std::unique_ptr<StatName>; | ||
|
|
||
| /** | ||
| * Interface for shortening and retrieving stat names. | ||
| * | ||
| * Guarantees that x = encode(x).toString() for any x. | ||
| */ | ||
| class SymbolTable { | ||
| public: | ||
| virtual ~SymbolTable() {} | ||
|
|
||
| /** | ||
| * Encodes a stat name into a StatNamePtr. Expects the name to be period-delimited. | ||
| * | ||
| * @param name the stat name to encode. | ||
| * @return StatNamePtr a unique_ptr to the StatName class encapsulating the symbol vector. | ||
| */ | ||
| virtual StatNamePtr encode(absl::string_view name) PURE; | ||
|
|
||
| /** | ||
| * Returns the size of a SymbolTable, as measured in number of symbols stored. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: |
||
| * @return size_t the size of the table. | ||
| */ | ||
| virtual size_t size() const PURE; | ||
| }; | ||
|
|
||
| } // namespace Stats | ||
| } // namespace Envoy | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,86 @@ | ||
| #include "common/stats/symbol_table_impl.h" | ||
|
|
||
| #include <memory> | ||
| #include <unordered_map> | ||
| #include <vector> | ||
|
|
||
| #include "common/common/assert.h" | ||
|
|
||
| namespace Envoy { | ||
| namespace Stats { | ||
|
|
||
| // TODO(ambuc): There is a possible performance optimization here for avoiding the encoding of IPs, | ||
| // if they appear in stat names. We don't want to waste time symbolizing an integer as an integer, | ||
| // if we can help it. | ||
| StatNamePtr SymbolTableImpl::encode(const absl::string_view name) { | ||
| SymbolVec symbol_vec; | ||
| std::vector<absl::string_view> name_vec = absl::StrSplit(name, '.'); | ||
| symbol_vec.reserve(name_vec.size()); | ||
| std::transform(name_vec.begin(), name_vec.end(), std::back_inserter(symbol_vec), | ||
| [this](absl::string_view x) { return toSymbol(x); }); | ||
|
|
||
| return std::make_unique<StatNameImpl>(symbol_vec, *this); | ||
| } | ||
|
|
||
| std::string SymbolTableImpl::decode(const SymbolVec& symbol_vec) const { | ||
| std::vector<absl::string_view> name; | ||
| name.reserve(symbol_vec.size()); | ||
| std::transform(symbol_vec.begin(), symbol_vec.end(), std::back_inserter(name), | ||
| [this](Symbol x) { return fromSymbol(x); }); | ||
| return absl::StrJoin(name, "."); | ||
| } | ||
|
|
||
| void SymbolTableImpl::free(const SymbolVec& symbol_vec) { | ||
| for (const Symbol symbol : symbol_vec) { | ||
| auto decode_search = decode_map_.find(symbol); | ||
| ASSERT(decode_search != decode_map_.end()); | ||
|
|
||
| auto encode_search = encode_map_.find(decode_search->second); | ||
| ASSERT(encode_search != encode_map_.end()); | ||
|
|
||
| encode_search->second.ref_count_--; | ||
| // If that was the last remaining client usage of the symbol, erase the the current | ||
| // mappings and add the now-unused symbol to the reuse pool. | ||
| if (encode_search->second.ref_count_ == 0) { | ||
| decode_map_.erase(decode_search); | ||
| encode_map_.erase(encode_search); | ||
| pool_.push(symbol); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Symbol SymbolTableImpl::toSymbol(absl::string_view sv) { | ||
| Symbol result; | ||
| auto encode_find = encode_map_.find(sv); | ||
| // If the string segment doesn't already exist, | ||
| if (encode_find == encode_map_.end()) { | ||
| // We create the actual string, place it in the decode_map_, and then insert a string_view | ||
| // pointing to it in the encode_map_. This allows us to only store the string once. | ||
| std::string str = std::string(sv); | ||
|
|
||
| auto decode_insert = decode_map_.insert({next_symbol_, std::move(str)}); | ||
| ASSERT(decode_insert.second); | ||
|
|
||
| auto encode_insert = encode_map_.insert( | ||
| {decode_insert.first->second, {.symbol_ = next_symbol_, .ref_count_ = 1}}); | ||
| ASSERT(encode_insert.second); | ||
|
|
||
| result = next_symbol_; | ||
| newSymbol(); | ||
| } else { | ||
| // If the insertion didn't take place, return the actual value at that location and up the | ||
| // refcount at that location | ||
| result = encode_find->second.symbol_; | ||
| ++(encode_find->second.ref_count_); | ||
| } | ||
| return result; | ||
| } | ||
|
|
||
| absl::string_view SymbolTableImpl::fromSymbol(const Symbol symbol) const { | ||
| auto search = decode_map_.find(symbol); | ||
| ASSERT(search != decode_map_.end()); | ||
| return search->second; | ||
| } | ||
|
|
||
| } // namespace Stats | ||
| } // namespace Envoy |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,139 @@ | ||
| #pragma once | ||
|
|
||
| #include <algorithm> | ||
| #include <memory> | ||
| #include <stack> | ||
| #include <string> | ||
| #include <unordered_map> | ||
| #include <vector> | ||
|
|
||
| #include "envoy/common/exception.h" | ||
| #include "envoy/stats/symbol_table.h" | ||
|
|
||
| #include "common/common/assert.h" | ||
| #include "common/common/utility.h" | ||
|
|
||
| #include "absl/strings/str_join.h" | ||
| #include "absl/strings/str_split.h" | ||
|
|
||
| namespace Envoy { | ||
| namespace Stats { | ||
|
|
||
| using Symbol = uint32_t; | ||
| using SymbolVec = std::vector<Symbol>; | ||
|
|
||
| /** | ||
| * Underlying SymbolTableImpl implementation which manages per-symbol reference counting. | ||
| * | ||
| * The underlying Symbol / SymbolVec data structures are private to the impl. One side | ||
| * effect of the non-monotonically-increasing symbol counter is that if a string is encoded, the | ||
| * resulting stat is destroyed, and then that same string is re-encoded, it may or may not encode to | ||
| * the same underlying symbol. | ||
| */ | ||
| class SymbolTableImpl : public SymbolTable { | ||
| public: | ||
| StatNamePtr encode(absl::string_view name) override; | ||
|
|
||
| // For testing purposes only. | ||
| size_t size() const override { | ||
| ASSERT(encode_map_.size() == decode_map_.size()); | ||
| return encode_map_.size(); | ||
| } | ||
|
|
||
| private: | ||
| friend class StatNameImpl; | ||
| friend class StatNameTest; | ||
|
|
||
| struct SharedSymbol { | ||
| Symbol symbol_; | ||
| uint32_t ref_count_; | ||
| }; | ||
|
|
||
| /** | ||
| * Decodes a vector of symbols back into its period-delimited stat name. | ||
| * If decoding fails on any part of the symbol_vec, we release_assert and crash hard, since this | ||
| * should never happen, and we don't want to continue running with a corrupt stats set. | ||
| * | ||
| * @param symbol_vec the vector of symbols to decode. | ||
| * @return std::string the retrieved stat name. | ||
| */ | ||
| std::string decode(const SymbolVec& symbol_vec) const; | ||
|
|
||
| /** | ||
| * Since SymbolTableImpl does manual reference counting, a client of SymbolTable (such as | ||
| * StatName) must manually call free(symbol_vec) when it is freeing the stat it represents. This | ||
| * way, the symbol table will grow and shrink dynamically, instead of being write-only. | ||
| * | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: |
||
| * @param symbol_vec the vector of symbols to be freed. | ||
| */ | ||
| void free(const SymbolVec& symbol_vec); | ||
|
|
||
| /** | ||
| * Convenience function for encode(), symbolizing one string segment at a time. | ||
| * | ||
| * @param sv the individual string to be encoded as a symbol. | ||
| * @return Symbol the encoded string. | ||
| */ | ||
| Symbol toSymbol(absl::string_view sv); | ||
|
|
||
| /** | ||
| * Convenience function for decode(), decoding one symbol at a time. | ||
| * | ||
| * @param symbol the individual symbol to be decoded. | ||
| * @return absl::string_view the decoded string. | ||
| */ | ||
| absl::string_view fromSymbol(Symbol symbol) const; | ||
|
|
||
| // Stages a new symbol for use. To be called after a successful insertion. | ||
| void newSymbol() { | ||
| if (pool_.empty()) { | ||
| next_symbol_ = ++monotonic_counter_; | ||
| } else { | ||
| next_symbol_ = pool_.top(); | ||
| pool_.pop(); | ||
| } | ||
| // This should catch integer overflow for the new symbol. | ||
| ASSERT(monotonic_counter_ != 0); | ||
| } | ||
|
|
||
| Symbol monotonicCounter() { return monotonic_counter_; } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What value does this have given that the tests are friends?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was my suggestion; the origin of this practice for me was -- I think -- a former team member's C++ readability review; and possibly an earlier version of the style guide. I can't find it now. But it basically suggested that even friend classes should access via methods rather than directly manipulating member vars. |
||
|
|
||
| // Stores the symbol to be used at next insertion. This should exist ahead of insertion time so | ||
| // that if insertion succeeds, the value written is the correct one. | ||
| Symbol next_symbol_ = 0; | ||
|
|
||
| // If the free pool is exhausted, we monotonically increase this counter. | ||
| Symbol monotonic_counter_ = 0; | ||
|
|
||
| // Bimap implementation. | ||
| // The encode map stores both the symbol and the ref count of that symbol. | ||
| // Using absl::string_view lets us only store the complete string once, in the decode map. | ||
| std::unordered_map<absl::string_view, SharedSymbol, StringViewHash> encode_map_; | ||
| std::unordered_map<Symbol, std::string> decode_map_; | ||
|
|
||
| // Free pool of symbols for re-use. | ||
| // TODO(ambuc): There might be an optimization here relating to storing ranges of freed symbols | ||
| // using an Envoy::IntervalSet. | ||
| std::stack<Symbol> pool_; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You might be able to optimize the pool_ -- depending on usage pattern, so that if you free all the highest-numbered symbols, you remove them from the pool_ and adjust monotonic_counter_ (thus making it non-monotonic I suppose). But this is probably fairly minor. I don't actually expect you are going to wind up freeing huge numbers of symbols in practice, based on how the stats are constructed.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the issue with decrementing the monotonic counter is that increasing it again could cause a collision -- I would have to find a clever way to avoid that. I'll put a TODO for myself here to think about this. |
||
| }; | ||
|
|
||
| /** | ||
| * Implements RAII for Symbols, since the StatName destructor does the work of freeing its component | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice! |
||
| * symbols. | ||
| */ | ||
| class StatNameImpl : public StatName { | ||
| public: | ||
| StatNameImpl(SymbolVec symbol_vec, SymbolTableImpl& symbol_table) | ||
| : symbol_vec_(symbol_vec), symbol_table_(symbol_table) {} | ||
| ~StatNameImpl() override { symbol_table_.free(symbol_vec_); } | ||
| std::string toString() const override { return symbol_table_.decode(symbol_vec_); } | ||
|
|
||
| private: | ||
| friend class StatNameTest; | ||
| SymbolVec symbolVec() { return symbol_vec_; } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is this method for? |
||
| SymbolVec symbol_vec_; | ||
| SymbolTableImpl& symbol_table_; | ||
| }; | ||
|
|
||
| } // namespace Stats | ||
| } // namespace Envoy | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be
SharedPtrto support ref counted sharing?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think so -- StatNames should be totally unique, AFAIK.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah I had thought of this too and almost wrote the same comment. I don't think this needs to be shared, but it's possible, based on usage there might emerge a need.