diff --git a/source/common/stats/BUILD b/source/common/stats/BUILD index dd308bc036f3c..fbf7789072a2f 100644 --- a/source/common/stats/BUILD +++ b/source/common/stats/BUILD @@ -159,6 +159,7 @@ envoy_cc_library( "//include/envoy/stats:symbol_table_interface", "//source/common/common:assert_lib", "//source/common/common:logger_lib", + "//source/common/common:mem_block_builder_lib", "//source/common/common:stack_array", "//source/common/common:thread_lib", "//source/common/common:utility_lib", diff --git a/source/common/stats/fake_symbol_table_impl.h b/source/common/stats/fake_symbol_table_impl.h index c446449df9a77..a64325cf1177f 100644 --- a/source/common/stats/fake_symbol_table_impl.h +++ b/source/common/stats/fake_symbol_table_impl.h @@ -64,36 +64,32 @@ class FakeSymbolTableImpl : public SymbolTable { // than 256 of them. RELEASE_ASSERT(num_names < 256, "Maximum number elements in a StatNameList exceeded"); - // First encode all the names. The '1' here represents the number of - // names. The num_names * StatNameSizeEncodingBytes reserves space for the - // lengths of each name. - size_t total_size_bytes = 1 + num_names * StatNameSizeEncodingBytes; - + size_t total_size_bytes = 1; /* one byte for holding the number of names */ for (uint32_t i = 0; i < num_names; ++i) { - total_size_bytes += names[i].size(); + size_t name_size = names[i].size(); + total_size_bytes += SymbolTableImpl::Encoding::totalSizeBytes(name_size); } // Now allocate the exact number of bytes required and move the encodings // into storage. - auto storage = std::make_unique(total_size_bytes); - uint8_t* p = &storage[0]; - *p++ = num_names; + MemBlockBuilder mem_block(total_size_bytes); + mem_block.appendOne(num_names); for (uint32_t i = 0; i < num_names; ++i) { auto& name = names[i]; size_t sz = name.size(); - p = SymbolTableImpl::writeLengthReturningNext(sz, p); + SymbolTableImpl::Encoding::appendEncoding(sz, mem_block); if (!name.empty()) { - memcpy(p, name.data(), sz * sizeof(uint8_t)); - p += sz; + mem_block.appendData( + absl::MakeConstSpan(reinterpret_cast(name.data()), sz)); } } // This assertion double-checks the arithmetic where we computed // total_size_bytes. After appending all the encoded data into the - // allocated byte array, we should wind up with a pointer difference of - // total_size_bytes from the beginning of the allocation. - ASSERT(p == &storage[0] + total_size_bytes); - list.moveStorageIntoList(std::move(storage)); + // allocated byte array, we should have exhausted all the memory + // we though we needed. + ASSERT(mem_block.capacityRemaining() == 0); + list.moveStorageIntoList(mem_block.release()); } std::string toString(const StatName& stat_name) const override { @@ -143,10 +139,11 @@ class FakeSymbolTableImpl : public SymbolTable { StoragePtr encodeHelper(absl::string_view name) const { name = StringUtil::removeTrailingCharacters(name, '.'); - auto bytes = std::make_unique(name.size() + StatNameSizeEncodingBytes); - uint8_t* buffer = SymbolTableImpl::writeLengthReturningNext(name.size(), bytes.get()); - memcpy(buffer, name.data(), name.size()); - return bytes; + MemBlockBuilder mem_block(SymbolTableImpl::Encoding::totalSizeBytes(name.size())); + SymbolTableImpl::Encoding::appendEncoding(name.size(), mem_block); + mem_block.appendData( + absl::MakeConstSpan(reinterpret_cast(name.data()), name.size())); + return mem_block.release(); } }; diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index 284fffc980ad7..9ce19a85bedb6 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -17,6 +17,13 @@ namespace Stats { static const uint32_t SpilloverMask = 0x80; static const uint32_t Low7Bits = 0x7f; +uint64_t StatName::dataSize() const { + if (size_and_data_ == nullptr) { + return 0; + } + return SymbolTableImpl::Encoding::decodeNumber(size_and_data_).first; +} + #ifndef ENVOY_CONFIG_COVERAGE void StatName::debugPrint() { if (size_and_data_ == nullptr) { @@ -38,12 +45,22 @@ void StatName::debugPrint() { #endif SymbolTableImpl::Encoding::~Encoding() { - // Verifies that moveToStorage() was called on this encoding. Failure - // to call moveToStorage() will result in leaks symbols. - ASSERT(vec_.empty()); + // Verifies that moveToMemBlock() was called on this encoding. Failure + // to call moveToMemBlock() will result in leaks symbols. + ASSERT(mem_block_.capacity() == 0); } -void SymbolTableImpl::Encoding::addSymbol(Symbol symbol) { +uint64_t SymbolTableImpl::Encoding::encodingSizeBytes(uint64_t number) { + uint64_t num_bytes = 0; + do { + ++num_bytes; + number >>= 7; + } while (number != 0); + return num_bytes; +} + +void SymbolTableImpl::Encoding::appendEncoding(uint64_t number, + MemBlockBuilder& mem_block) { // UTF-8-like encoding where a value 127 or less gets written as a single // byte. For higher values we write the low-order 7 bits with a 1 in // the high-order bit. Then we right-shift 7 bits and keep adding more bytes @@ -52,45 +69,54 @@ void SymbolTableImpl::Encoding::addSymbol(Symbol symbol) { // When decoding, we stop consuming uint8_t when we see a uint8_t with // high-order bit 0. do { - if (symbol < (1 << 7)) { - vec_.push_back(symbol); // symbols <= 127 get encoded in one byte. + if (number < (1 << 7)) { + mem_block.appendOne(number); // number <= 127 gets encoded in one byte. } else { - vec_.push_back((symbol & Low7Bits) | SpilloverMask); // symbols >= 128 need spillover bytes. + mem_block.appendOne((number & Low7Bits) | SpilloverMask); // >= 128 need spillover bytes. } - symbol >>= 7; - } while (symbol != 0); + number >>= 7; + } while (number != 0); +} + +void SymbolTableImpl::Encoding::addSymbols(const std::vector& symbols) { + ASSERT(data_bytes_required_ == 0); + for (Symbol symbol : symbols) { + data_bytes_required_ += encodingSizeBytes(symbol); + } + mem_block_.setCapacity(data_bytes_required_); + for (Symbol symbol : symbols) { + appendEncoding(symbol, mem_block_); + } +} + +std::pair SymbolTableImpl::Encoding::decodeNumber(const uint8_t* encoding) { + uint64_t number = 0; + uint64_t uc = SpilloverMask; + const uint8_t* start = encoding; + for (uint32_t shift = 0; (uc & SpilloverMask) != 0; ++encoding, shift += 7) { + uc = static_cast(*encoding); + number |= (uc & Low7Bits) << shift; + } + return std::make_pair(number, encoding - start); } SymbolVec SymbolTableImpl::Encoding::decodeSymbols(const SymbolTable::Storage array, uint64_t size) { SymbolVec symbol_vec; - Symbol symbol = 0; - for (uint32_t shift = 0; size > 0; --size, ++array) { - uint32_t uc = static_cast(*array); - - // Inverse addSymbol encoding, walking down the bytes, shifting them into - // symbol, until a byte with a zero high order bit indicates this symbol is - // complete and we can move to the next one. - symbol |= (uc & Low7Bits) << shift; - if ((uc & SpilloverMask) == 0) { - symbol_vec.push_back(symbol); - shift = 0; - symbol = 0; - } else { - shift += 7; - } + symbol_vec.reserve(size); + while (size > 0) { + std::pair symbol_consumed = decodeNumber(array); + symbol_vec.push_back(symbol_consumed.first); + size -= symbol_consumed.second; + array += symbol_consumed.second; } return symbol_vec; } -uint64_t SymbolTableImpl::Encoding::moveToStorage(SymbolTable::Storage symbol_array) { - const uint64_t sz = dataBytesRequired(); - symbol_array = writeLengthReturningNext(sz, symbol_array); - if (sz != 0) { - memcpy(symbol_array, vec_.data(), sz * sizeof(uint8_t)); - } - vec_.clear(); // Logically transfer ownership, enabling empty assert on destruct. - return sz + StatNameSizeEncodingBytes; +void SymbolTableImpl::Encoding::moveToMemBlock(MemBlockBuilder& mem_block) { + appendEncoding(data_bytes_required_, mem_block); + mem_block.appendBlock(mem_block_); + mem_block_.reset(); // Logically transfer ownership, enabling empty assert on destruct. } SymbolTableImpl::SymbolTableImpl() @@ -130,9 +156,7 @@ void SymbolTableImpl::addTokensToEncoding(const absl::string_view name, Encoding } // Now efficiently encode the array of 32-bit symbols into a uint8_t array. - for (Symbol symbol : symbols) { - encoding.addSymbol(symbol); - } + encoding.addSymbols(symbols); } uint64_t SymbolTableImpl::numSymbols() const { @@ -380,9 +404,9 @@ SymbolTable::StoragePtr SymbolTableImpl::encode(absl::string_view name) { name = StringUtil::removeTrailingCharacters(name, '.'); Encoding encoding; addTokensToEncoding(name, encoding); - auto bytes = std::make_unique(encoding.bytesRequired()); - encoding.moveToStorage(bytes.get()); - return bytes; + MemBlockBuilder mem_block(Encoding::totalSizeBytes(encoding.bytesRequired())); + encoding.moveToMemBlock(mem_block); + return mem_block.release(); } StatNameStorage::StatNameStorage(absl::string_view name, SymbolTable& table) @@ -390,8 +414,9 @@ StatNameStorage::StatNameStorage(absl::string_view name, SymbolTable& table) StatNameStorage::StatNameStorage(StatName src, SymbolTable& table) { const uint64_t size = src.size(); - bytes_ = std::make_unique(size); - src.copyToStorage(bytes_.get()); + MemBlockBuilder storage(size); + src.copyToMemBlock(storage); + bytes_ = storage.release(); table.incRefCount(statName()); } @@ -457,14 +482,12 @@ SymbolTable::StoragePtr SymbolTableImpl::join(const StatNameVec& stat_names) con for (StatName stat_name : stat_names) { num_bytes += stat_name.dataSize(); } - auto bytes = std::make_unique(num_bytes + StatNameSizeEncodingBytes); - uint8_t* p = writeLengthReturningNext(num_bytes, bytes.get()); + MemBlockBuilder mem_block(Encoding::totalSizeBytes(num_bytes)); + Encoding::appendEncoding(num_bytes, mem_block); for (StatName stat_name : stat_names) { - const uint64_t stat_name_bytes = stat_name.dataSize(); - memcpy(p, stat_name.data(), stat_name_bytes); - p += stat_name_bytes; + stat_name.appendDataToMemBlock(mem_block); } - return bytes; + return mem_block.release(); } void SymbolTableImpl::populateList(const absl::string_view* names, uint32_t num_names, @@ -483,19 +506,18 @@ void SymbolTableImpl::populateList(const absl::string_view* names, uint32_t num_ // Now allocate the exact number of bytes required and move the encodings // into storage. - auto storage = std::make_unique(total_size_bytes); - uint8_t* p = &storage[0]; - *p++ = num_names; + MemBlockBuilder mem_block(total_size_bytes); + mem_block.appendOne(num_names); for (auto& encoding : encodings) { - p += encoding.moveToStorage(p); + encoding.moveToMemBlock(mem_block); } // This assertion double-checks the arithmetic where we computed // total_size_bytes. After appending all the encoded data into the - // allocated byte array, we should wind up with a pointer difference of - // total_size_bytes from the beginning of the allocation. - ASSERT(p == &storage[0] + total_size_bytes); - list.moveStorageIntoList(std::move(storage)); + // allocated byte array, we should have exhausted all the memory + // we though we needed. + ASSERT(mem_block.capacityRemaining() == 0); + list.moveStorageIntoList(mem_block.release()); } StatNameList::~StatNameList() { ASSERT(!populated()); } diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index 5f8078f0ebd4e..7cd2856ba2e9b 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -14,6 +14,7 @@ #include "common/common/assert.h" #include "common/common/hash.h" #include "common/common/lock_guard.h" +#include "common/common/mem_block_builder.h" #include "common/common/non_copyable.h" #include "common/common/stack_array.h" #include "common/common/thread.h" @@ -30,12 +31,6 @@ namespace Stats { /** A Symbol represents a string-token with a small index. */ using Symbol = uint32_t; -/** - * We encode the byte-size of a StatName as its first two bytes. - */ -constexpr uint64_t StatNameSizeEncodingBytes = 2; -constexpr uint64_t StatNameMaxSize = 1 << (8 * StatNameSizeEncodingBytes); // 65536 - /** Transient representations of a vector of 32-bit symbols */ using SymbolVec = std::vector; @@ -80,11 +75,11 @@ class SymbolTableImpl : public SymbolTable { class Encoding { public: /** - * Before destructing SymbolEncoding, you must call moveToStorage. This + * Before destructing SymbolEncoding, you must call moveToMemBlock. This * transfers ownership, and in particular, the responsibility to call - * SymbolTable::clear() on all referenced symbols. If we ever wanted - * to be able to destruct a SymbolEncoding without transferring it - * we could add a clear(SymbolTable&) method. + * SymbolTable::clear() on all referenced symbols. If we ever wanted to be + * able to destruct a SymbolEncoding without transferring it we could add a + * clear(SymbolTable&) method. */ ~Encoding(); @@ -93,7 +88,7 @@ class SymbolTableImpl : public SymbolTable { * * @param symbol the symbol to encode. */ - void addSymbol(Symbol symbol); + void addSymbols(const std::vector& symbols); /** * Decodes a uint8_t array into a SymbolVec. @@ -104,24 +99,62 @@ class SymbolTableImpl : public SymbolTable { * Returns the number of bytes required to represent StatName as a uint8_t * array, including the encoded size. */ - uint64_t bytesRequired() const { return dataBytesRequired() + StatNameSizeEncodingBytes; } + uint64_t bytesRequired() const { + return data_bytes_required_ + encodingSizeBytes(data_bytes_required_); + } /** * @return the number of uint8_t entries we collected while adding symbols. */ - uint64_t dataBytesRequired() const { return vec_.size(); } + uint64_t dataBytesRequired() const { return data_bytes_required_; } /** * Moves the contents of the vector into an allocated array. The array * must have been allocated with bytesRequired() bytes. * - * @param array destination memory to receive the encoded bytes. - * @return uint64_t the number of bytes transferred. + * @param mem_block_builder memory block to receive the encoded bytes. + */ + void moveToMemBlock(MemBlockBuilder& array); + + /** + * @param number A number to encode in a variable length byte-array. + * @return The number of bytes it would take to encode the number. */ - uint64_t moveToStorage(SymbolTable::Storage array); + static uint64_t encodingSizeBytes(uint64_t number); + + /** + * @param num_data_bytes The number of bytes in a data-block. + * @return The total number of bytes required for the data-block and its encoded size. + */ + static uint64_t totalSizeBytes(uint64_t num_data_bytes) { + return encodingSizeBytes(num_data_bytes) + num_data_bytes; + } + + /** + * Saves the specified number into the byte array, returning the next byte. + * There is no guarantee that bytes will be aligned, so we can't cast to a + * uint16_t* and assign, but must individually copy the bytes. + * + * Requires that the buffer be sized to accommodate encodingSizeBytes(number). + * + * @param number the number to write. + * @param mem_block the memory into which to append the number. + */ + static void appendEncoding(uint64_t number, MemBlockBuilder& mem_block); + + /** + * Decodes a byte-array containing a variable-length number. + * + * @param The encoded byte array, written previously by appendEncoding. + * @return A pair containing the decoded number, and the number of bytes consumed from encoding. + */ + static std::pair decodeNumber(const uint8_t* encoding); + + StoragePtr release() { return mem_block_.release(); } private: - std::vector vec_; + uint64_t data_bytes_required_{0}; + MemBlockBuilder mem_block_; }; SymbolTableImpl(); @@ -144,22 +177,6 @@ class SymbolTableImpl : public SymbolTable { void debugPrint() const override; #endif - /** - * Saves the specified length into the byte array, returning the next byte. - * There is no guarantee that bytes will be aligned, so we can't cast to a - * uint16_t* and assign, but must individually copy the bytes. - * - * @param length the length in bytes to write. Must be < StatNameMaxSize. - * @param bytes the pointer into which to write the length. - * @return the pointer to the next byte for writing the data. - */ - static inline uint8_t* writeLengthReturningNext(uint64_t length, uint8_t* bytes) { - ASSERT(length < StatNameMaxSize); - *bytes++ = length & 0xff; - *bytes++ = length >> 8; - return bytes; - } - StatNameSetPtr makeSet(absl::string_view name) override; void forgetSet(StatNameSet& stat_name_set) override; uint64_t getRecentLookups(const RecentLookupsFn&) const override; @@ -362,20 +379,37 @@ class StatName { * @return uint64_t the number of bytes in the symbol array, excluding the two-byte * overhead for the size itself. */ - uint64_t dataSize() const { - if (size_and_data_ == nullptr) { - return 0; - } - return size_and_data_[0] | (static_cast(size_and_data_[1]) << 8); - } + uint64_t dataSize() const; /** * @return uint64_t the number of bytes in the symbol array, including the two-byte * overhead for the size itself. */ - uint64_t size() const { return dataSize() + StatNameSizeEncodingBytes; } + uint64_t size() const { return SymbolTableImpl::Encoding::totalSizeBytes(dataSize()); } + + /** + * Copies the entire StatName representation into a MemBlockBuilder, including + * the length metadata at the beginning. The MemBlockBuilder must not have + * any other data in it. + * + * @param mem_block_builder the builder to receive the storage. + */ + void copyToMemBlock(MemBlockBuilder& mem_block_builder) { + ASSERT(mem_block_builder.size() == 0); + mem_block_builder.appendData(absl::MakeConstSpan(size_and_data_, size())); + } - void copyToStorage(SymbolTable::Storage storage) { memcpy(storage, size_and_data_, size()); } + /** + * Appends the data portion of the StatName representation into a + * MemBlockBuilder, excluding the length metadata. This is appropriate for + * join(), where several stat-names are combined, and we only need the + * aggregated length metadata. + * + * @param mem_block_builder the builder to receive the storage. + */ + void appendDataToMemBlock(MemBlockBuilder& storage) { + storage.appendData(absl::MakeConstSpan(data(), dataSize())); + } #ifndef ENVOY_CONFIG_COVERAGE void debugPrint(); @@ -384,7 +418,9 @@ class StatName { /** * @return A pointer to the first byte of data (skipping over size bytes). */ - const uint8_t* data() const { return size_and_data_ + StatNameSizeEncodingBytes; } + const uint8_t* data() const { + return size_and_data_ + SymbolTableImpl::Encoding::encodingSizeBytes(dataSize()); + } /** * @return whether this is empty. diff --git a/test/common/stats/BUILD b/test/common/stats/BUILD index 3bb028802e23a..0c8e4d868b1cc 100644 --- a/test/common/stats/BUILD +++ b/test/common/stats/BUILD @@ -129,6 +129,7 @@ envoy_cc_fuzz_test( srcs = ["symbol_table_fuzz_test.cc"], corpus = "symbol_table_corpus", deps = [ + ":stat_test_utility_lib", "//source/common/buffer:buffer_lib", "//source/common/common:assert_lib", "//source/common/decompressor:decompressor_lib", diff --git a/test/common/stats/stat_test_utility.cc b/test/common/stats/stat_test_utility.cc index 8520266e433fc..adcd6d7a3fa5e 100644 --- a/test/common/stats/stat_test_utility.cc +++ b/test/common/stats/stat_test_utility.cc @@ -135,6 +135,37 @@ MemoryTest::Mode MemoryTest::mode() { #endif } +// TODO(jmarantz): this utility is intended to be costs both for unit tests +// and fuzz tests. But those have different checking macros, e.g. EXPECT_EQ vs +// FUZZ_ASSERT. +std::vector serializeDeserializeNumber(uint64_t number) { + uint64_t num_bytes = SymbolTableImpl::Encoding::encodingSizeBytes(number); + const uint64_t block_size = 10; + MemBlockBuilder mem_block(block_size); + SymbolTableImpl::Encoding::appendEncoding(number, mem_block); + num_bytes += mem_block.capacityRemaining(); + RELEASE_ASSERT(block_size == num_bytes, absl::StrCat("Encoding size issue: block_size=", + block_size, " num_bytes=", num_bytes)); + absl::Span span = mem_block.span(); + RELEASE_ASSERT(number == SymbolTableImpl::Encoding::decodeNumber(span.data()).first, ""); + return std::vector(span.data(), span.data() + span.size()); +} + +void serializeDeserializeString(absl::string_view in) { + MemBlockBuilder mem_block(SymbolTableImpl::Encoding::totalSizeBytes(in.size())); + SymbolTableImpl::Encoding::appendEncoding(in.size(), mem_block); + const uint8_t* data = reinterpret_cast(in.data()); + mem_block.appendData(absl::MakeSpan(data, data + in.size())); + RELEASE_ASSERT(mem_block.capacityRemaining() == 0, ""); + absl::Span span = mem_block.span(); + const std::pair number_consumed = + SymbolTableImpl::Encoding::decodeNumber(span.data()); + RELEASE_ASSERT(number_consumed.first == in.size(), absl::StrCat("size matches: ", in)); + span.remove_prefix(number_consumed.second); + const absl::string_view out(reinterpret_cast(span.data()), span.size()); + RELEASE_ASSERT(in == out, absl::StrCat("'", in, "' != '", out, "'")); +} + } // namespace TestUtil } // namespace Stats } // namespace Envoy diff --git a/test/common/stats/stat_test_utility.h b/test/common/stats/stat_test_utility.h index c10fe40320650..ae0857674867a 100644 --- a/test/common/stats/stat_test_utility.h +++ b/test/common/stats/stat_test_utility.h @@ -112,6 +112,14 @@ class SymbolTableCreatorTestPeer { } }; +// Serializes a number into a uint8_t array, and check that it de-serializes to +// the same number. The serialized number is also returned, which can be +// checked in unit tests, but ignored in fuzz tests. +std::vector serializeDeserializeNumber(uint64_t number); + +// Serializes a string into a MemBlock and then decodes it. +void serializeDeserializeString(absl::string_view in); + } // namespace TestUtil } // namespace Stats } // namespace Envoy diff --git a/test/common/stats/symbol_table_fuzz_test.cc b/test/common/stats/symbol_table_fuzz_test.cc index 715d8ce829cae..884a16512e1d6 100644 --- a/test/common/stats/symbol_table_fuzz_test.cc +++ b/test/common/stats/symbol_table_fuzz_test.cc @@ -2,6 +2,7 @@ #include "common/common/base64.h" #include "common/stats/symbol_table_impl.h" +#include "test/common/stats/stat_test_utility.h" #include "test/fuzz/fuzz_runner.h" #include "test/fuzz/utility.h" @@ -24,6 +25,19 @@ DEFINE_FUZZER(const uint8_t* buf, size_t len) { // string before comparing. absl::string_view trimmed_fuzz_data = StringUtil::removeTrailingCharacters(next_data, '.'); FUZZ_ASSERT(trimmed_fuzz_data == symbol_table.toString(stat_name)); + + // Also encode the string directly, without symbolizing it. + TestUtil::serializeDeserializeString(next_data); + + // Grab the first few bytes from next_data to synthesize together a random uint64_t. + if (next_data.size() > 1) { + uint32_t num_bytes = (next_data[0] % 8) + 1; // random number between 1 and 8 inclusive. + uint64_t number = 0; + for (uint32_t i = 0; i < num_bytes; ++i) { + number = 256 * number + next_data[i + 1]; + } + TestUtil::serializeDeserializeNumber(number); + } } } diff --git a/test/common/stats/symbol_table_impl_test.cc b/test/common/stats/symbol_table_impl_test.cc index a267fcdf022e8..d185898cd36f0 100644 --- a/test/common/stats/symbol_table_impl_test.cc +++ b/test/common/stats/symbol_table_impl_test.cc @@ -70,6 +70,10 @@ class StatNameTest : public testing::TestWithParam { StatName makeStat(absl::string_view name) { return pool_->add(name); } + std::vector serializeDeserialize(uint64_t number) { + return TestUtil::serializeDeserializeNumber(number); + } + FakeSymbolTableImpl* fake_symbol_table_{nullptr}; SymbolTableImpl* real_symbol_table_{nullptr}; std::unique_ptr table_; @@ -79,6 +83,48 @@ class StatNameTest : public testing::TestWithParam { INSTANTIATE_TEST_SUITE_P(StatNameTest, StatNameTest, testing::ValuesIn({SymbolTableType::Real, SymbolTableType::Fake})); +TEST_P(StatNameTest, SerializeBytes) { + EXPECT_EQ(std::vector{1}, serializeDeserialize(1)); + EXPECT_EQ(std::vector{127}, serializeDeserialize(127)); + EXPECT_EQ((std::vector{128, 1}), serializeDeserialize(128)); + EXPECT_EQ((std::vector{129, 1}), serializeDeserialize(129)); + EXPECT_EQ((std::vector{255, 1}), serializeDeserialize(255)); + EXPECT_EQ((std::vector{255, 127}), serializeDeserialize(16383)); + EXPECT_EQ((std::vector{128, 128, 1}), serializeDeserialize(16384)); + EXPECT_EQ((std::vector{129, 128, 1}), serializeDeserialize(16385)); + + auto power2 = [](uint32_t exp) -> uint64_t { + uint64_t one = 1; + return one << exp; + }; + EXPECT_EQ((std::vector{255, 255, 127}), serializeDeserialize(power2(21) - 1)); + EXPECT_EQ((std::vector{128, 128, 128, 1}), serializeDeserialize(power2(21))); + EXPECT_EQ((std::vector{129, 128, 128, 1}), serializeDeserialize(power2(21) + 1)); + EXPECT_EQ((std::vector{255, 255, 255, 127}), serializeDeserialize(power2(28) - 1)); + EXPECT_EQ((std::vector{128, 128, 128, 128, 1}), serializeDeserialize(power2(28))); + EXPECT_EQ((std::vector{129, 128, 128, 128, 1}), serializeDeserialize(power2(28) + 1)); + EXPECT_EQ((std::vector{255, 255, 255, 255, 127}), serializeDeserialize(power2(35) - 1)); + EXPECT_EQ((std::vector{128, 128, 128, 128, 128, 1}), serializeDeserialize(power2(35))); + EXPECT_EQ((std::vector{129, 128, 128, 128, 128, 1}), + serializeDeserialize(power2(35) + 1)); + + for (uint32_t i = 0; i < 17000; ++i) { + serializeDeserialize(i); + } +} + +TEST_P(StatNameTest, SerializeStrings) { + TestUtil::serializeDeserializeString(""); + TestUtil::serializeDeserializeString("Hello, world!"); + TestUtil::serializeDeserializeString("embedded\0\nul"); + TestUtil::serializeDeserializeString(std::string(200, 'a')); + TestUtil::serializeDeserializeString(std::string(2000, 'a')); + TestUtil::serializeDeserializeString(std::string(20000, 'a')); + TestUtil::serializeDeserializeString(std::string(200000, 'a')); + TestUtil::serializeDeserializeString(std::string(2000000, 'a')); + TestUtil::serializeDeserializeString(std::string(20000000, 'a')); +} + TEST_P(StatNameTest, AllocFree) { encodeDecode("hello.world"); } TEST_P(StatNameTest, TestArbitrarySymbolRoundtrip) { @@ -101,6 +147,28 @@ TEST_P(StatNameTest, Test100KSymbolsRoundtrip) { } } +TEST_P(StatNameTest, TwoHundredTwoLevel) { + for (int i = 0; i < 200; ++i) { + const std::string stat_name = absl::StrCat("symbol_", i); + EXPECT_EQ(stat_name, encodeDecode(stat_name)); + } + EXPECT_EQ("http.foo", encodeDecode("http.foo")); +} + +TEST_P(StatNameTest, TestLongSymbolName) { + std::string long_name(100000, 'a'); + EXPECT_EQ(long_name, encodeDecode(long_name)); +} + +TEST_P(StatNameTest, TestLongSequence) { + std::string long_name("a"); + for (int i = 0; i < 100000; ++i) { + absl::StrAppend(&long_name, ".a"); + } + + EXPECT_EQ(long_name, encodeDecode(long_name)); +} + TEST_P(StatNameTest, TestUnusualDelimitersRoundtrip) { const std::vector stat_names = {".x", "..x", "...x", "foo", "foo.x", ".foo", ".foo.x", ".foo..x", "..foo.x", "..foo..x"}; diff --git a/test/common/stats/thread_local_store_test.cc b/test/common/stats/thread_local_store_test.cc index 63cd6ed5577db..68b5e48efac6d 100644 --- a/test/common/stats/thread_local_store_test.cc +++ b/test/common/stats/thread_local_store_test.cc @@ -911,8 +911,8 @@ TEST_F(StatsThreadLocalStoreTestNoFixture, MemoryWithoutTlsFakeSymbolTable) { TestUtil::MemoryTest memory_test; TestUtil::forEachSampleStat( 1000, [this](absl::string_view name) { store_->counter(std::string(name)); }); - EXPECT_MEMORY_EQ(memory_test.consumedBytes(), 15268336); // June 30, 2019 - EXPECT_MEMORY_LE(memory_test.consumedBytes(), 16 * million_); + EXPECT_MEMORY_EQ(memory_test.consumedBytes(), 14892880); // Oct 28, 2019 + EXPECT_MEMORY_LE(memory_test.consumedBytes(), 15 * million_); } TEST_F(StatsThreadLocalStoreTestNoFixture, MemoryWithTlsFakeSymbolTable) { @@ -921,7 +921,7 @@ TEST_F(StatsThreadLocalStoreTestNoFixture, MemoryWithTlsFakeSymbolTable) { TestUtil::MemoryTest memory_test; TestUtil::forEachSampleStat( 1000, [this](absl::string_view name) { store_->counter(std::string(name)); }); - EXPECT_MEMORY_EQ(memory_test.consumedBytes(), 17496848); // June 30, 2019 + EXPECT_MEMORY_EQ(memory_test.consumedBytes(), 17121392); // Oct 28, 2019 EXPECT_MEMORY_LE(memory_test.consumedBytes(), 18 * million_); } @@ -931,8 +931,8 @@ TEST_F(StatsThreadLocalStoreTestNoFixture, MemoryWithoutTlsRealSymbolTable) { TestUtil::MemoryTest memory_test; TestUtil::forEachSampleStat( 1000, [this](absl::string_view name) { store_->counter(std::string(name)); }); - EXPECT_MEMORY_EQ(memory_test.consumedBytes(), 9139120); // Aug 9, 2019 - EXPECT_MEMORY_LE(memory_test.consumedBytes(), 10 * million_); + EXPECT_MEMORY_EQ(memory_test.consumedBytes(), 8017968); // Oct 28, 2019 + EXPECT_MEMORY_LE(memory_test.consumedBytes(), 9 * million_); } TEST_F(StatsThreadLocalStoreTestNoFixture, MemoryWithTlsRealSymbolTable) { @@ -941,8 +941,8 @@ TEST_F(StatsThreadLocalStoreTestNoFixture, MemoryWithTlsRealSymbolTable) { TestUtil::MemoryTest memory_test; TestUtil::forEachSampleStat( 1000, [this](absl::string_view name) { store_->counter(std::string(name)); }); - EXPECT_MEMORY_EQ(memory_test.consumedBytes(), 11367632); // Aug 9, 2019 - EXPECT_MEMORY_LE(memory_test.consumedBytes(), 12 * million_); + EXPECT_MEMORY_EQ(memory_test.consumedBytes(), 10246480); // Oct 28, 2019 + EXPECT_MEMORY_LE(memory_test.consumedBytes(), 11 * million_); } TEST_F(StatsThreadLocalStoreTest, ShuttingDown) { diff --git a/test/integration/stats_integration_test.cc b/test/integration/stats_integration_test.cc index 131067c897f7f..7342a14adb74e 100644 --- a/test/integration/stats_integration_test.cc +++ b/test/integration/stats_integration_test.cc @@ -259,7 +259,8 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithFakeSymbolTable) { // 2019/10/17 8484 43340 44000 stats: add unit support to histogram // 2019/11/01 8859 43563 44000 build: switch to libc++ by default // 2019/11/15 9040 43371 44000 build: update protobuf to 3.10.1 - // 2019/11/15 9040 43403 35500 upstream: track whether cluster is local + // 2019/11/15 9040 43403 44000 upstream: track whether cluster is local + // 2019/12/10 8779 42919 43500 use var-length coding for name length // Note: when adjusting this value: EXPECT_MEMORY_EQ is active only in CI // 'release' builds, where we control the platform and tool-chain. So you @@ -273,8 +274,8 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithFakeSymbolTable) { // If you encounter a failure here, please see // https://github.com/envoyproxy/envoy/blob/master/source/docs/stats.md#stats-memory-tests // for details on how to fix. - EXPECT_MEMORY_EQ(m_per_cluster, 43403); // 104 bytes higher than a debug build. - EXPECT_MEMORY_LE(m_per_cluster, 44000); + EXPECT_MEMORY_EQ(m_per_cluster, 42919); // 104 bytes higher than a debug build. + EXPECT_MEMORY_LE(m_per_cluster, 43500); } TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithRealSymbolTable) { @@ -307,6 +308,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithRealSymbolTable) { // 2019/11/01 8859 35221 36000 build: switch to libc++ by default // 2019/11/15 9040 35029 35500 build: update protobuf to 3.10.1 // 2019/11/15 9040 35061 35500 upstream: track whether cluster is local + // 2019/12/10 8779 35053 35000 use var-length coding for name lengths // Note: when adjusting this value: EXPECT_MEMORY_EQ is active only in CI // 'release' builds, where we control the platform and tool-chain. So you @@ -320,7 +322,7 @@ TEST_P(ClusterMemoryTestRunner, MemoryLargeClusterSizeWithRealSymbolTable) { // If you encounter a failure here, please see // https://github.com/envoyproxy/envoy/blob/master/source/docs/stats.md#stats-memory-tests // for details on how to fix. - EXPECT_MEMORY_EQ(m_per_cluster, 35061); // 104 bytes higher than a debug build. + EXPECT_MEMORY_EQ(m_per_cluster, 35053); // 104 bytes higher than a debug build. EXPECT_MEMORY_LE(m_per_cluster, 35500); }