diff --git a/include/envoy/server/guarddog_config.h b/include/envoy/server/guarddog_config.h index 907a9186f759e..e89a3f5308c77 100644 --- a/include/envoy/server/guarddog_config.h +++ b/include/envoy/server/guarddog_config.h @@ -9,6 +9,7 @@ #include "envoy/event/dispatcher.h" #include "envoy/protobuf/message_validator.h" #include "envoy/server/guarddog.h" +#include "envoy/stats/scope.h" #include "common/protobuf/protobuf.h" @@ -19,6 +20,8 @@ namespace Configuration { struct GuardDogActionFactoryContext { Api::Api& api_; Event::Dispatcher& dispatcher_; // not owned (this is the guard dog's dispatcher) + Stats::Scope& stats_; // not owned (this is the server's stats scope) + absl::string_view guarddog_name_; }; class GuardDogAction { diff --git a/source/extensions/watchdog/profile_action/BUILD b/source/extensions/watchdog/profile_action/BUILD index 6add86fd8479c..afe779924b43e 100644 --- a/source/extensions/watchdog/profile_action/BUILD +++ b/source/extensions/watchdog/profile_action/BUILD @@ -24,6 +24,7 @@ envoy_cc_library( "//include/envoy/thread:thread_interface", "//source/common/profiler:profiler_lib", "//source/common/protobuf:utility_lib", + "//source/common/stats:symbol_table_lib", "@envoy_api//envoy/extensions/watchdog/profile_action/v3alpha:pkg_cc_proto", ], ) diff --git a/source/extensions/watchdog/profile_action/profile_action.cc b/source/extensions/watchdog/profile_action/profile_action.cc index f4f669c55b572..2002414faa8cd 100644 --- a/source/extensions/watchdog/profile_action/profile_action.cc +++ b/source/extensions/watchdog/profile_action/profile_action.cc @@ -6,6 +6,7 @@ #include "common/profiler/profiler.h" #include "common/protobuf/utility.h" +#include "common/stats/symbol_table_impl.h" #include "absl/strings/str_format.h" @@ -32,18 +33,30 @@ ProfileAction::ProfileAction( duration_( std::chrono::milliseconds(PROTOBUF_GET_MS_OR_DEFAULT(config, profile_duration, 5000))), max_profiles_(config.max_profiles() == 0 ? DefaultMaxProfiles : config.max_profiles()), - running_profile_(false), profiles_started_(0), context_(context), - timer_cb_(context_.dispatcher_.createTimer([this] { + profiles_attempted_(context.stats_.counterFromStatName( + Stats::StatNameManagedStorage( + absl::StrCat(context.guarddog_name_, ".profile_action.attempted"), + context.stats_.symbolTable()) + .statName())), + profiles_successfully_captured_(context.stats_.counterFromStatName( + Stats::StatNameManagedStorage( + absl::StrCat(context.guarddog_name_, ".profile_action.successfully_captured"), + context.stats_.symbolTable()) + .statName())), + context_(context), timer_cb_(context_.dispatcher_.createTimer([this] { if (Profiler::Cpu::profilerEnabled()) { Profiler::Cpu::stopProfiler(); running_profile_ = false; } else { ENVOY_LOG_MISC(error, "Profile Action's stop() was scheduled, but profiler isn't running!"); + return; } if (!context_.api_.fileSystem().fileExists(profile_filename_)) { ENVOY_LOG_MISC(error, "Profile file {} wasn't created!", profile_filename_); + } else { + profiles_successfully_captured_.inc(); } })) {} @@ -54,6 +67,7 @@ void ProfileAction::run( if (running_profile_) { return; } + profiles_attempted_.inc(); // Check if there's a tid that justifies profiling if (thread_last_checkin_pairs.empty()) { diff --git a/source/extensions/watchdog/profile_action/profile_action.h b/source/extensions/watchdog/profile_action/profile_action.h index f57f75ba00ab4..144f6b9861ff3 100644 --- a/source/extensions/watchdog/profile_action/profile_action.h +++ b/source/extensions/watchdog/profile_action/profile_action.h @@ -29,9 +29,11 @@ class ProfileAction : public Server::Configuration::GuardDogAction { const std::string path_; const std::chrono::milliseconds duration_; const uint64_t max_profiles_; - bool running_profile_; + bool running_profile_ = false; std::string profile_filename_; - uint64_t profiles_started_; + Stats::Counter& profiles_attempted_; + Stats::Counter& profiles_successfully_captured_; + uint64_t profiles_started_ = 0; Server::Configuration::GuardDogActionFactoryContext& context_; Event::TimerPtr timer_cb_; }; diff --git a/source/server/guarddog_impl.cc b/source/server/guarddog_impl.cc index daebd2948cc9d..7c512d20310e4 100644 --- a/source/server/guarddog_impl.cc +++ b/source/server/guarddog_impl.cc @@ -61,7 +61,8 @@ GuardDogImpl::GuardDogImpl(Stats::Scope& stats_scope, const Server::Configuratio // We should be able to share the dispatcher since guard dog's lifetime // should eclipse those of actions. - Configuration::GuardDogActionFactoryContext context = {api, *dispatcher_}; + Configuration::GuardDogActionFactoryContext context = {api, *dispatcher_, stats_scope, + name}; const auto& actions = config.actions(); for (const auto& action : actions) { diff --git a/test/extensions/watchdog/abort_action/BUILD b/test/extensions/watchdog/abort_action/BUILD index 30d2c13b2999f..b79b1205e86e3 100644 --- a/test/extensions/watchdog/abort_action/BUILD +++ b/test/extensions/watchdog/abort_action/BUILD @@ -25,6 +25,7 @@ envoy_extension_cc_test( "//include/envoy/server:guarddog_config_interface", "//source/extensions/watchdog/abort_action:abort_action_lib", "//source/extensions/watchdog/abort_action:config", + "//test/common/stats:stat_test_utility_lib", "//test/test_common:utility_lib", "@envoy_api//envoy/config/bootstrap/v3:pkg_cc_proto", "@envoy_api//envoy/extensions/watchdog/abort_action/v3alpha:pkg_cc_proto", @@ -41,6 +42,7 @@ envoy_extension_cc_test( "//include/envoy/server:guarddog_config_interface", "//source/extensions/watchdog/abort_action:abort_action_lib", "//source/extensions/watchdog/abort_action:config", + "//test/common/stats:stat_test_utility_lib", "//test/mocks/event:event_mocks", "//test/test_common:utility_lib", "@envoy_api//envoy/extensions/watchdog/abort_action/v3alpha:pkg_cc_proto", diff --git a/test/extensions/watchdog/abort_action/abort_action_test.cc b/test/extensions/watchdog/abort_action/abort_action_test.cc index 0d3157ffec128..e648fbe66c603 100644 --- a/test/extensions/watchdog/abort_action/abort_action_test.cc +++ b/test/extensions/watchdog/abort_action/abort_action_test.cc @@ -11,6 +11,7 @@ #include "extensions/watchdog/abort_action/abort_action.h" #include "extensions/watchdog/abort_action/config.h" +#include "test/common/stats/stat_test_utility.h" #include "test/test_common/utility.h" #include "absl/synchronization/notification.h" @@ -28,8 +29,9 @@ class AbortActionTest : public testing::Test { protected: AbortActionTest() : api_(Api::createApiForTest()), dispatcher_(api_->allocateDispatcher("test")), - context_({*api_, *dispatcher_}) {} + context_({*api_, *dispatcher_, stats_, "test"}) {} + Stats::TestUtil::TestStore stats_; Api::ApiPtr api_; Event::DispatcherPtr dispatcher_; Server::Configuration::GuardDogActionFactoryContext context_; diff --git a/test/extensions/watchdog/abort_action/config_test.cc b/test/extensions/watchdog/abort_action/config_test.cc index b5a490118f6e8..7e2b716ac8f33 100644 --- a/test/extensions/watchdog/abort_action/config_test.cc +++ b/test/extensions/watchdog/abort_action/config_test.cc @@ -4,6 +4,7 @@ #include "extensions/watchdog/abort_action/config.h" +#include "test/common/stats/stat_test_utility.h" #include "test/mocks/event/mocks.h" #include "test/test_common/utility.h" @@ -40,9 +41,10 @@ TEST(AbortActionFactoryTest, CanCreateAction) { )EOF", config); + Stats::TestUtil::TestStore stats_; Event::MockDispatcher dispatcher; Api::ApiPtr api = Api::createApiForTest(); - Server::Configuration::GuardDogActionFactoryContext context{*api, dispatcher}; + Server::Configuration::GuardDogActionFactoryContext context{*api, dispatcher, stats_, "test"}; EXPECT_NE(factory->createGuardDogActionFromProto(config, context), nullptr); } diff --git a/test/extensions/watchdog/profile_action/BUILD b/test/extensions/watchdog/profile_action/BUILD index 7af95504deaed..fb8c662ea7bc7 100644 --- a/test/extensions/watchdog/profile_action/BUILD +++ b/test/extensions/watchdog/profile_action/BUILD @@ -26,6 +26,7 @@ envoy_extension_cc_test( "//source/common/profiler:profiler_lib", "//source/extensions/watchdog/profile_action:config", "//source/extensions/watchdog/profile_action:profile_action_lib", + "//test/common/stats:stat_test_utility_lib", "//test/mocks/event:event_mocks", "//test/test_common:environment_lib", "//test/test_common:simulated_time_system_lib", @@ -44,6 +45,7 @@ envoy_extension_cc_test( "//include/envoy/server:guarddog_config_interface", "//source/extensions/watchdog/profile_action:config", "//source/extensions/watchdog/profile_action:profile_action_lib", + "//test/common/stats:stat_test_utility_lib", "//test/mocks/event:event_mocks", "//test/test_common:utility_lib", "@envoy_api//envoy/extensions/watchdog/profile_action/v3alpha:pkg_cc_proto", diff --git a/test/extensions/watchdog/profile_action/config_test.cc b/test/extensions/watchdog/profile_action/config_test.cc index 2b40dfa68f1f5..c5ab318c1d3d6 100644 --- a/test/extensions/watchdog/profile_action/config_test.cc +++ b/test/extensions/watchdog/profile_action/config_test.cc @@ -4,6 +4,7 @@ #include "extensions/watchdog/profile_action/config.h" +#include "test/common/stats/stat_test_utility.h" #include "test/mocks/event/mocks.h" #include "test/test_common/utility.h" @@ -42,9 +43,10 @@ TEST(ProfileActionFactoryTest, CanCreateAction) { )EOF", config); + Stats::TestUtil::TestStore stats; Event::MockDispatcher dispatcher; - Api::ApiPtr api = Api::createApiForTest(); - Server::Configuration::GuardDogActionFactoryContext context{*api, dispatcher}; + Api::ApiPtr api = Api::createApiForTest(stats); + Server::Configuration::GuardDogActionFactoryContext context{*api, dispatcher, stats, "test"}; EXPECT_CALL(dispatcher, createTimer_(_)); EXPECT_NE(factory->createGuardDogActionFromProto(config, context), nullptr); diff --git a/test/extensions/watchdog/profile_action/profile_action_test.cc b/test/extensions/watchdog/profile_action/profile_action_test.cc index 01503c07efbb0..40f9085eff04b 100644 --- a/test/extensions/watchdog/profile_action/profile_action_test.cc +++ b/test/extensions/watchdog/profile_action/profile_action_test.cc @@ -15,6 +15,7 @@ #include "extensions/watchdog/profile_action/config.h" #include "extensions/watchdog/profile_action/profile_action.h" +#include "test/common/stats/stat_test_utility.h" #include "test/mocks/event/mocks.h" #include "test/test_common/environment.h" #include "test/test_common/simulated_time_system.h" @@ -35,8 +36,9 @@ class ProfileActionTest : public testing::Test { protected: ProfileActionTest() : time_system_(std::make_unique()), - api_(Api::createApiForTest(*time_system_)), dispatcher_(api_->allocateDispatcher("test")), - context_({*api_, *dispatcher_}), test_path_(generateTestPath()) {} + api_(Api::createApiForTest(stats_, *time_system_)), + dispatcher_(api_->allocateDispatcher("test")), + context_({*api_, *dispatcher_, stats_, "test"}), test_path_(generateTestPath()) {} // Generates a unique path for a testcase. static std::string generateTestPath() { @@ -76,6 +78,7 @@ class ProfileActionTest : public testing::Test { outstanding_notifies_ -= 1; } + Stats::TestUtil::TestStore stats_; std::unique_ptr time_system_; Api::ApiPtr api_; Event::DispatcherPtr dispatcher_; @@ -352,6 +355,72 @@ TEST_F(ProfileActionTest, ShouldSaturatedMaxProfiles) { #endif } +// The attempted counter should be updated on profile attempts that don't +// interfere with an existing profile the action is running. +// The successfully captured profile should be updated only if we captured the profile. +TEST_F(ProfileActionTest, ShouldUpdateCountersCorrectly) { + envoy::extensions::watchdog::profile_action::v3alpha::ProfileActionConfig config; + config.set_profile_path(test_path_); + config.mutable_profile_duration()->set_seconds(1); + + // Create the ProfileAction before we start running the dispatcher + // otherwise the timer created will in ProfileActions ctor will + // not be thread safe. + action_ = std::make_unique(config, context_); + Thread::ThreadPtr thread = api_->threadFactory().createThread( + [this]() -> void { dispatcher_->run(Event::Dispatcher::RunType::RunUntilExit); }); + std::vector> tid_ltt_pairs; + + // This will fail since no TIDs are provided. + dispatcher_->post([this, &tid_ltt_pairs]() -> void { + action_->run(envoy::config::bootstrap::v3::Watchdog::WatchdogAction::MISS, tid_ltt_pairs, + api_->timeSource().monotonicTime()); + absl::MutexLock lock(&mutex_); + outstanding_notifies_ += 1; + }); + + { + absl::MutexLock lock(&mutex_); + waitForOutstandingNotify(); + time_system_->advanceTimeWait(std::chrono::seconds(2)); + } + + // Check the counters are correct on a fail + EXPECT_EQ(TestUtility::findCounter(stats_, "test.profile_action.attempted")->value(), 1); + EXPECT_EQ(TestUtility::findCounter(stats_, "test.profile_action.successfully_captured")->value(), + 0); + + // Run a profile that will succeed. + const auto now = api_->timeSource().monotonicTime(); + tid_ltt_pairs.emplace_back(Thread::ThreadId(10), now); + + dispatcher_->post([this, &tid_ltt_pairs, &now]() -> void { + action_->run(envoy::config::bootstrap::v3::Watchdog::WatchdogAction::MISS, tid_ltt_pairs, now); + absl::MutexLock lock(&mutex_); + outstanding_notifies_ += 1; + }); + + { + absl::MutexLock lock(&mutex_); + waitForOutstandingNotify(); + time_system_->advanceTimeWait(std::chrono::seconds(2)); + } + +#ifdef PROFILER_AVAILABLE + // Check the counters are correct on success + EXPECT_EQ(TestUtility::findCounter(stats_, "test.profile_action.attempted")->value(), 2); + EXPECT_EQ(TestUtility::findCounter(stats_, "test.profile_action.successfully_captured")->value(), + 1); +#else + EXPECT_EQ(TestUtility::findCounter(stats_, "test.profile_action.attempted")->value(), 2); + EXPECT_EQ(TestUtility::findCounter(stats_, "test.profile_action.successfully_captured")->value(), + 0); +#endif + + dispatcher_->exit(); + thread->join(); +} + } // namespace } // namespace ProfileAction } // namespace Watchdog