diff --git a/source/common/stats/stat_merger.cc b/source/common/stats/stat_merger.cc index 55dbb1822dc0c..b111fed68d0ba 100644 --- a/source/common/stats/stat_merger.cc +++ b/source/common/stats/stat_merger.cc @@ -5,7 +5,7 @@ namespace Envoy { namespace Stats { -StatMerger::StatMerger(Stats::Store& target_store) : target_store_(target_store) {} +StatMerger::StatMerger(Stats::Store& target_store) : temp_scope_(target_store.createScope("")) {} bool StatMerger::shouldImport(Gauge& gauge, const std::string& gauge_name) { absl::optional should_import = gauge.cachedShouldImport(); @@ -56,13 +56,13 @@ bool StatMerger::shouldImport(Gauge& gauge, const std::string& gauge_name) { void StatMerger::mergeCounters(const Protobuf::Map& counter_deltas) { for (const auto& counter : counter_deltas) { - target_store_.counter(counter.first).add(counter.second); + temp_scope_->counter(counter.first).add(counter.second); } } void StatMerger::mergeGauges(const Protobuf::Map& gauges) { for (const auto& gauge : gauges) { - auto& gauge_ref = target_store_.gauge(gauge.first); + auto& gauge_ref = temp_scope_->gauge(gauge.first); uint64_t& parent_value_ref = parent_gauge_values_[gauge_ref.statName()]; uint64_t old_parent_value = parent_value_ref; uint64_t new_parent_value = gauge.second; @@ -79,12 +79,6 @@ void StatMerger::mergeGauges(const Protobuf::Map& gauges) } } -// TODO(fredlas) the current implementation can "leak" obsolete parent stats into the child. -// That is, the parent had stat "foo", the child doesn't care about "foo" and back in the -// shared memory implementation would have dropped it, but the import causes it to be made into -// a real stat that stays around forever. The initial mini-consensus approach will be to -// track which stats are actually getting used by the child, and drop those that aren't when -// the hot restart completes. void StatMerger::mergeStats(const Protobuf::Map& counter_deltas, const Protobuf::Map& gauges) { mergeCounters(counter_deltas); diff --git a/source/common/stats/stat_merger.h b/source/common/stats/stat_merger.h index d0d25874b4626..3558e312fd5ac 100644 --- a/source/common/stats/stat_merger.h +++ b/source/common/stats/stat_merger.h @@ -36,7 +36,25 @@ class StatMerger { void mergeCounters(const Protobuf::Map& counter_deltas); void mergeGauges(const Protobuf::Map& gauges); StatNameHashMap parent_gauge_values_; - Stats::Store& target_store_; + // A stats Scope for our in-the-merging-process counters to live in. Scopes conceptually hold + // shared_ptrs to the stats that live in them, with the question of which stats are living in a + // given scope determined by which stat names have been accessed via that scope. E.g., if you + // access a stat named "some.shared" directly through the ordinary store, and then access a + // stat named "shared" in a scope configured with the prefix "some.", there is now a single + // stat named some.shared pointed to by both. As another example, if you access the stat + // "single" in the "some" scope, there will be a stat named "some.single" pointed to by just + // that scope. Now, if you delete the scope, some.shared will stick around, but some.single + // will be destroyed. + // + // All of that is relevant here because it is used to get a certain desired behavior. + // Specifically, stats must be kept up to date with values from the parent throughout hot + // restart, but once the restart completes, they must be dropped without a trace if the child has + // not taken action (independent of the hot restart stat merging) that would lead to them getting + // created in the store. By storing these stats in a scope (with an empty prefix), we can + // preserve all stats throughout the hot restart. Then, when the restart completes, dropping + // the scope will drop exactly those stats whose names have not already been accessed through + // another store/scope. + ScopePtr temp_scope_; }; } // namespace Stats diff --git a/source/common/stats/thread_local_store.cc b/source/common/stats/thread_local_store.cc index e9d1892bd2c2a..03d799509c756 100644 --- a/source/common/stats/thread_local_store.cc +++ b/source/common/stats/thread_local_store.cc @@ -29,7 +29,7 @@ ThreadLocalStoreImpl::ThreadLocalStoreImpl(StatDataAllocator& alloc) null_histogram_(alloc.symbolTable()) {} ThreadLocalStoreImpl::~ThreadLocalStoreImpl() { - ASSERT(shutting_down_); + ASSERT(shutting_down_ || !threading_ever_initialized_); default_scope_.reset(); ASSERT(scopes_.empty()); for (StatNameStorageSet* rejected_stats : rejected_stats_purgatory_) { @@ -148,6 +148,7 @@ std::vector ThreadLocalStoreImpl::histograms() const { void ThreadLocalStoreImpl::initializeThreading(Event::Dispatcher& main_thread_dispatcher, ThreadLocal::Instance& tls) { + threading_ever_initialized_ = true; main_thread_dispatcher_ = &main_thread_dispatcher; tls_ = tls.allocateSlot(); tls_->set([](Event::Dispatcher&) -> ThreadLocal::ThreadLocalObjectSharedPtr { diff --git a/source/common/stats/thread_local_store.h b/source/common/stats/thread_local_store.h index 489b5b9604f07..4e318bb949fb0 100644 --- a/source/common/stats/thread_local_store.h +++ b/source/common/stats/thread_local_store.h @@ -305,6 +305,7 @@ class ThreadLocalStoreImpl : Logger::Loggable, public StoreRo std::list> timer_sinks_; TagProducerPtr tag_producer_; StatsMatcherPtr stats_matcher_; + std::atomic threading_ever_initialized_{}; std::atomic shutting_down_{}; std::atomic merge_in_progress_{}; HeapStatDataAllocator heap_allocator_; diff --git a/source/server/hot_restarting_child.cc b/source/server/hot_restarting_child.cc index 3ff34ef0a844f..1807f53903c5c 100644 --- a/source/server/hot_restarting_child.cc +++ b/source/server/hot_restarting_child.cc @@ -82,6 +82,10 @@ void HotRestartingChild::sendParentTerminateRequest() { parent_terminated_ = true; // Once setting parent_terminated_ == true, we can send no more hot restart RPCs, and therefore // receive no more responses, including stats. So, now safe to forget our stat transferral state. + // + // This destruction is actually important far beyond memory efficiency. The scope-based temporary + // counter logic relies on the StatMerger getting destroyed once hot restart's stat merging is + // all done. (See stat_merger.h for details). stat_merger_.reset(); } diff --git a/test/common/stats/BUILD b/test/common/stats/BUILD index 1e1e7626c863f..c8a45d7100f61 100644 --- a/test/common/stats/BUILD +++ b/test/common/stats/BUILD @@ -51,8 +51,11 @@ envoy_cc_test( name = "stat_merger_test", srcs = ["stat_merger_test.cc"], deps = [ + "//source/common/stats:fake_symbol_table_lib", "//source/common/stats:isolated_store_lib", "//source/common/stats:stat_merger_lib", + "//source/common/stats:thread_local_store_lib", + "//test/test_common:utility_lib", ], ) diff --git a/test/common/stats/stat_merger_test.cc b/test/common/stats/stat_merger_test.cc index 5d6d70b5073df..0e02c0f910bba 100644 --- a/test/common/stats/stat_merger_test.cc +++ b/test/common/stats/stat_merger_test.cc @@ -1,7 +1,11 @@ #include +#include "common/stats/fake_symbol_table_impl.h" #include "common/stats/isolated_store_impl.h" #include "common/stats/stat_merger.h" +#include "common/stats/thread_local_store.h" + +#include "test/test_common/utility.h" #include "gtest/gtest.h" @@ -52,23 +56,6 @@ TEST_F(StatMergerTest, counterMerge) { EXPECT_EQ(4, store_.counter("draculaer").latch()); } -// It should be fine for the parent to send us stats we haven't ourselves instantiated. -// TODO(6756) This is how things currently work, but this is actually what 6756 is looking to avoid. -TEST_F(StatMergerTest, newStatFromParent) { - Protobuf::Map counter_values; - Protobuf::Map counter_deltas; - Protobuf::Map gauges; - counter_deltas["newcounter0"] = 0; - counter_deltas["newcounter1"] = 1; - gauges["newgauge"] = 5; - stat_merger_.mergeStats(counter_deltas, gauges); - EXPECT_EQ(0, store_.counter("newcounter0").value()); - EXPECT_EQ(0, store_.counter("newcounter0").latch()); - EXPECT_EQ(1, store_.counter("newcounter1").value()); - EXPECT_EQ(1, store_.counter("newcounter1").latch()); - EXPECT_EQ(5, store_.gauge("newgauge").value()); -} - TEST_F(StatMergerTest, basicDefaultAccumulationImport) { Protobuf::Map gauges; gauges["whywassixafraidofseven"] = 111; @@ -164,6 +151,39 @@ TEST_F(StatMergerTest, exclusionsNotImported) { EXPECT_FALSE(store_.gauge("overload.something.pressure").used()); } +// When the parent sends us counters we haven't ourselves instantiated, they should be stored +// temporarily, but then uninstantiated if hot restart ends without the child accessing them. +TEST(StatMergerNonFixtureTest, newStatFromParent) { + FakeSymbolTableImpl symbol_table; + HeapStatDataAllocator alloc(symbol_table); + ThreadLocalStoreImpl store(alloc); + { + StatMerger stat_merger(store); + + Protobuf::Map counter_values; + Protobuf::Map counter_deltas; + Protobuf::Map gauges; + counter_deltas["newcounter0"] = 0; + counter_deltas["newcounter1"] = 1; + counter_deltas["newcounter2"] = 2; + gauges["newgauge1"] = 1; + gauges["newgauge2"] = 2; + stat_merger.mergeStats(counter_deltas, gauges); + EXPECT_EQ(0, store.counter("newcounter0").value()); + EXPECT_EQ(0, store.counter("newcounter0").latch()); + EXPECT_EQ(1, store.counter("newcounter1").value()); + EXPECT_EQ(1, store.counter("newcounter1").latch()); + EXPECT_EQ(1, store.gauge("newgauge1").value()); + } + // We accessed 0 and 1 above, but not 2. Now that StatMerger has been destroyed, + // 2 should be gone. + EXPECT_TRUE(TestUtility::findCounter(store, "newcounter0")); + EXPECT_TRUE(TestUtility::findCounter(store, "newcounter1")); + EXPECT_FALSE(TestUtility::findCounter(store, "newcounter2")); + EXPECT_TRUE(TestUtility::findGauge(store, "newgauge1")); + EXPECT_FALSE(TestUtility::findGauge(store, "newgauge2")); +} + } // namespace } // namespace Stats } // namespace Envoy diff --git a/test/common/stats/thread_local_store_test.cc b/test/common/stats/thread_local_store_test.cc index f41c2c7f5d446..e00ac79745cb8 100644 --- a/test/common/stats/thread_local_store_test.cc +++ b/test/common/stats/thread_local_store_test.cc @@ -788,13 +788,6 @@ TEST(StatsThreadLocalStoreTestNoFixture, MemoryWithoutTls) { EXPECT_LT(start_mem, end_mem); const size_t million = 1000 * 1000; EXPECT_LT(end_mem - start_mem, 20 * million); // actual value: 19601552 as of March 14, 2019 - - // HACK: doesn't like shutting down without threading having started. - NiceMock main_thread_dispatcher; - NiceMock tls; - store->initializeThreading(main_thread_dispatcher, tls); - store->shutdownThreading(); - tls.shutdownThread(); } TEST(StatsThreadLocalStoreTestNoFixture, MemoryWithTls) {