diff --git a/REPO_LAYOUT.md b/REPO_LAYOUT.md index e4f2452a1417b..3eae104c8c35c 100644 --- a/REPO_LAYOUT.md +++ b/REPO_LAYOUT.md @@ -63,9 +63,8 @@ Not every directory within test is described below, but a few highlights: ## [source/extensions](source/extensions/) layout We maintain a very specific code and namespace layout for extensions. This aids in discovering -code/extensions, and also will allow us in the future to more easily scale out our extension -maintainers by having OWNERS files specific to certain extensions. (As of this writing, this is not -currently implemented but that is the plan moving forward.) +code/extensions, and allows us specify extension owners in [CODEOWNERS](CODEOWNERS). + * All extensions are either registered in [all_extensions.bzl](source/extensions/all_extensions.bzl) or [extensions_build_config.bzl](source/extensions/extensions_build_config.bzl). The former is @@ -76,6 +75,14 @@ currently implemented but that is the plan moving forward.) * These are the top level extension directories and associated namespaces: * [access_loggers/](/source/extensions/access_loggers): Access log implementations which use the `Envoy::Extensions::AccessLoggers` namespace. + * [bootstrap](/source/extensions/bootstrap): Bootstrap extensions which use + the `Envoy::Extensions::Bootstrap` namespace. + * [clusters](/source/extensions/clusters): Cluster extensions which use the + `Envoy::Extensions::Clusters` namespace. + * [compression](/source/extensions/compression): Compression extensions + which use `Envoy::Extensions::Compression` namespace. + * [fatal_actions](/source/extensions/fatal_actions): Fatal Action extensions + which use the `Envoy::Extensions::FatalActions` namespace. * [filters/http/](/source/extensions/filters/http): HTTP L7 filters which use the `Envoy::Extensions::HttpFilters` namespace. * [filters/listener/](/source/extensions/filters/listener): Listener filters which use the @@ -86,14 +93,24 @@ currently implemented but that is the plan moving forward.) `Envoy::Extensions::GrpcCredentials` namespace. * [health_checker/](/source/extensions/health_checker): Custom health checkers which use the `Envoy::Extensions::HealthCheckers` namespace. - * [resolvers/](/source/extensions/resolvers): Network address resolvers which use the - `Envoy::Extensions::Resolvers` namespace. + * [internal_redirect](/source/extensions/internal_redirect): Internal Redirect + extensions which use the `Envoy::Extensions::InternalRedirect` namespace. + * [quic_listeners](/source/extensions/quic_listeners): QUIC extensions which + use the `Envoy::Quic` namespace. + * [resource_monitors](/source/extensions/resource_monitors): Resource monitor + extensions which use the `Envoy::Extensions::ResourceMonitors` namespace. + * [retry](/source/extensions/retry): Retry extensions which use the + `Envoy::Extensions::Retry` namespace. * [stat_sinks/](/source/extensions/stat_sinks): Stat sink implementations which use the `Envoy::Extensions::StatSinks` namespace. * [tracers/](/source/extensions/tracers): Tracers which use the `Envoy::Extensions::Tracers` namespace. * [transport_sockets/](/source/extensions/transport_sockets): Transport socket implementations which use the `Envoy::Extensions::TransportSockets` namespace. + * [upstreams](/source/extensions/upstreams): Upstream extensions use the + `Envoy::Extensions::Upstreams` namespace. + * [watchdog](/source/extensions/watchdog): Watchdog extensions use the + `Envoy::Extensions::Watchdog` namespace. * Each extension is contained wholly in its own namespace. E.g., `Envoy::Extensions::NetworkFilters::Echo`. * Common code that is used by multiple extensions should be in a `common/` directory as close to diff --git a/api/envoy/config/bootstrap/v3/bootstrap.proto b/api/envoy/config/bootstrap/v3/bootstrap.proto index a9a0290b297cc..9581fd6ca7721 100644 --- a/api/envoy/config/bootstrap/v3/bootstrap.proto +++ b/api/envoy/config/bootstrap/v3/bootstrap.proto @@ -40,7 +40,7 @@ option (udpa.annotations.file_status).package_version_status = ACTIVE; // ` for more detail. // Bootstrap :ref:`configuration overview `. -// [#next-free-field: 28] +// [#next-free-field: 29] message Bootstrap { option (udpa.annotations.versioning).previous_message_type = "envoy.config.bootstrap.v2.Bootstrap"; @@ -243,6 +243,10 @@ message Bootstrap { // Each item contains extension specific configuration. repeated core.v3.TypedExtensionConfig bootstrap_extensions = 21; + // Specifies optional extensions instantiated at startup time and + // invoked during crash time on the request that caused the crash. + repeated FatalAction fatal_actions = 28; + // Configuration sources that will participate in // *udpa.core.v1.ResourceLocator* authority resolution. The algorithm is as // follows: @@ -420,6 +424,20 @@ message Watchdog { type.v3.Percent multikill_threshold = 5; } +// Fatal actions to run while crashing. Actions can be safe (meaning they are +// async-signal safe) or unsafe. We run all safe actions before we run unsafe actions. +// If using an unsafe action that could get stuck or deadlock, it important to +// have an out of band system to terminate the process. +// +// The interface for the extension is ``Envoy::Server::Configuration::FatalAction``. +// *FatalAction* extensions live in the ``envoy.extensions.fatal_actions`` API +// namespace. +message FatalAction { + // Extension specific configuration for the action. It's expected to conform + // to the ``Envoy::Server::Configuration::FatalAction`` interface. + core.v3.TypedExtensionConfig config = 1; +} + // Runtime :ref:`configuration overview ` (deprecated). message Runtime { option (udpa.annotations.versioning).previous_message_type = "envoy.config.bootstrap.v2.Runtime"; diff --git a/api/envoy/config/bootstrap/v4alpha/bootstrap.proto b/api/envoy/config/bootstrap/v4alpha/bootstrap.proto index ef10dead97069..1fe40d6b9607a 100644 --- a/api/envoy/config/bootstrap/v4alpha/bootstrap.proto +++ b/api/envoy/config/bootstrap/v4alpha/bootstrap.proto @@ -38,7 +38,7 @@ option (udpa.annotations.file_status).package_version_status = NEXT_MAJOR_VERSIO // ` for more detail. // Bootstrap :ref:`configuration overview `. -// [#next-free-field: 28] +// [#next-free-field: 29] message Bootstrap { option (udpa.annotations.versioning).previous_message_type = "envoy.config.bootstrap.v3.Bootstrap"; @@ -229,6 +229,10 @@ message Bootstrap { // Each item contains extension specific configuration. repeated core.v4alpha.TypedExtensionConfig bootstrap_extensions = 21; + // Specifies optional extensions instantiated at startup time and + // invoked during crash time on the request that caused the crash. + repeated FatalAction fatal_actions = 28; + // Configuration sources that will participate in // *udpa.core.v1.ResourceLocator* authority resolution. The algorithm is as // follows: @@ -412,6 +416,23 @@ message Watchdog { type.v3.Percent multikill_threshold = 5; } +// Fatal actions to run while crashing. Actions can be safe (meaning they are +// async-signal safe) or unsafe. We run all safe actions before we run unsafe actions. +// If using an unsafe action that could get stuck or deadlock, it important to +// have an out of band system to terminate the process. +// +// The interface for the extension is ``Envoy::Server::Configuration::FatalAction``. +// *FatalAction* extensions live in the ``envoy.extensions.fatal_actions`` API +// namespace. +message FatalAction { + option (udpa.annotations.versioning).previous_message_type = + "envoy.config.bootstrap.v3.FatalAction"; + + // Extension specific configuration for the action. It's expected to conform + // to the ``Envoy::Server::Configuration::FatalAction`` interface. + core.v4alpha.TypedExtensionConfig config = 1; +} + // Runtime :ref:`configuration overview ` (deprecated). message Runtime { option (udpa.annotations.versioning).previous_message_type = "envoy.config.bootstrap.v3.Runtime"; diff --git a/docs/root/extending/extending.rst b/docs/root/extending/extending.rst index cd36ddb2188db..7f2e676e20eb1 100644 --- a/docs/root/extending/extending.rst +++ b/docs/root/extending/extending.rst @@ -24,6 +24,8 @@ types including: * :ref:`Watchdog action ` * :ref:`Internal redirect policy ` * :ref:`Compression libraries ` +* :ref:`Bootstrap extensions ` +* :ref:`Fatal actions ` As of this writing there is no high level extension developer documentation. The :repo:`existing extensions ` are a good way to learn what is possible. diff --git a/docs/root/version_history/current.rst b/docs/root/version_history/current.rst index f72b7f9842d0d..db0901f0640b6 100644 --- a/docs/root/version_history/current.rst +++ b/docs/root/version_history/current.rst @@ -66,6 +66,7 @@ New Features * sds: improved support for atomic :ref:`key rotations ` and added configurable rotation triggers for :ref:`TlsCertificate ` and :ref:`CertificateValidationContext `. +* signal: added an extension point for custom actions to run on the thread that has encountered a fatal error. Actions are configurable via :ref:`fatal_actions `. * tcp: added a new :ref:`envoy.overload_actions.reject_incoming_connections ` action to reject incoming TCP connections. * tls: added support for RSA certificates with 4096-bit keys in FIPS mode. * tracing: added SkyWalking tracer. diff --git a/generated_api_shadow/envoy/config/bootstrap/v3/bootstrap.proto b/generated_api_shadow/envoy/config/bootstrap/v3/bootstrap.proto index cce2dceb72d89..52b9a88365d0e 100644 --- a/generated_api_shadow/envoy/config/bootstrap/v3/bootstrap.proto +++ b/generated_api_shadow/envoy/config/bootstrap/v3/bootstrap.proto @@ -40,7 +40,7 @@ option (udpa.annotations.file_status).package_version_status = ACTIVE; // ` for more detail. // Bootstrap :ref:`configuration overview `. -// [#next-free-field: 28] +// [#next-free-field: 29] message Bootstrap { option (udpa.annotations.versioning).previous_message_type = "envoy.config.bootstrap.v2.Bootstrap"; @@ -241,6 +241,10 @@ message Bootstrap { // Each item contains extension specific configuration. repeated core.v3.TypedExtensionConfig bootstrap_extensions = 21; + // Specifies optional extensions instantiated at startup time and + // invoked during crash time on the request that caused the crash. + repeated FatalAction fatal_actions = 28; + // Configuration sources that will participate in // *udpa.core.v1.ResourceLocator* authority resolution. The algorithm is as // follows: @@ -421,6 +425,20 @@ message Watchdog { type.v3.Percent multikill_threshold = 5; } +// Fatal actions to run while crashing. Actions can be safe (meaning they are +// async-signal safe) or unsafe. We run all safe actions before we run unsafe actions. +// If using an unsafe action that could get stuck or deadlock, it important to +// have an out of band system to terminate the process. +// +// The interface for the extension is ``Envoy::Server::Configuration::FatalAction``. +// *FatalAction* extensions live in the ``envoy.extensions.fatal_actions`` API +// namespace. +message FatalAction { + // Extension specific configuration for the action. It's expected to conform + // to the ``Envoy::Server::Configuration::FatalAction`` interface. + core.v3.TypedExtensionConfig config = 1; +} + // Runtime :ref:`configuration overview ` (deprecated). message Runtime { option (udpa.annotations.versioning).previous_message_type = "envoy.config.bootstrap.v2.Runtime"; diff --git a/generated_api_shadow/envoy/config/bootstrap/v4alpha/bootstrap.proto b/generated_api_shadow/envoy/config/bootstrap/v4alpha/bootstrap.proto index 57f37e5ff6ee1..074936c7226e7 100644 --- a/generated_api_shadow/envoy/config/bootstrap/v4alpha/bootstrap.proto +++ b/generated_api_shadow/envoy/config/bootstrap/v4alpha/bootstrap.proto @@ -39,7 +39,7 @@ option (udpa.annotations.file_status).package_version_status = NEXT_MAJOR_VERSIO // ` for more detail. // Bootstrap :ref:`configuration overview `. -// [#next-free-field: 28] +// [#next-free-field: 29] message Bootstrap { option (udpa.annotations.versioning).previous_message_type = "envoy.config.bootstrap.v3.Bootstrap"; @@ -242,6 +242,10 @@ message Bootstrap { // Each item contains extension specific configuration. repeated core.v4alpha.TypedExtensionConfig bootstrap_extensions = 21; + // Specifies optional extensions instantiated at startup time and + // invoked during crash time on the request that caused the crash. + repeated FatalAction fatal_actions = 28; + // Configuration sources that will participate in // *udpa.core.v1.ResourceLocator* authority resolution. The algorithm is as // follows: @@ -425,6 +429,23 @@ message Watchdog { type.v3.Percent multikill_threshold = 5; } +// Fatal actions to run while crashing. Actions can be safe (meaning they are +// async-signal safe) or unsafe. We run all safe actions before we run unsafe actions. +// If using an unsafe action that could get stuck or deadlock, it important to +// have an out of band system to terminate the process. +// +// The interface for the extension is ``Envoy::Server::Configuration::FatalAction``. +// *FatalAction* extensions live in the ``envoy.extensions.fatal_actions`` API +// namespace. +message FatalAction { + option (udpa.annotations.versioning).previous_message_type = + "envoy.config.bootstrap.v3.FatalAction"; + + // Extension specific configuration for the action. It's expected to conform + // to the ``Envoy::Server::Configuration::FatalAction`` interface. + core.v4alpha.TypedExtensionConfig config = 1; +} + // Runtime :ref:`configuration overview ` (deprecated). message Runtime { option (udpa.annotations.versioning).previous_message_type = "envoy.config.bootstrap.v3.Runtime"; diff --git a/include/envoy/server/BUILD b/include/envoy/server/BUILD index a9f7854ba1c2a..0fbd876f0a948 100644 --- a/include/envoy/server/BUILD +++ b/include/envoy/server/BUILD @@ -329,3 +329,15 @@ envoy_cc_library( "//include/envoy/config:typed_config_interface", ], ) + +envoy_cc_library( + name = "fatal_action_interface", + hdrs = ["fatal_action_config.h"], + deps = [ + "//include/envoy/config:typed_config_interface", + "//include/envoy/event:dispatcher_interface", + "//include/envoy/protobuf:message_validator_interface", + "//include/envoy/server:instance_interface", + "@envoy_api//envoy/config/bootstrap/v3:pkg_cc_proto", + ], +) diff --git a/include/envoy/server/fatal_action_config.h b/include/envoy/server/fatal_action_config.h new file mode 100644 index 0000000000000..c8768dced40a9 --- /dev/null +++ b/include/envoy/server/fatal_action_config.h @@ -0,0 +1,59 @@ +#pragma once + +#include + +#include "envoy/common/pure.h" +#include "envoy/config/bootstrap/v3/bootstrap.pb.h" +#include "envoy/config/typed_config.h" +#include "envoy/event/dispatcher.h" +#include "envoy/protobuf/message_validator.h" +#include "envoy/server/instance.h" + +namespace Envoy { +namespace Server { +namespace Configuration { + +class FatalAction { +public: + virtual ~FatalAction() = default; + /** + * Callback function to run when we are crashing. + * @param current_object the object we were working on when we started + * crashing. + */ + virtual void run(const ScopeTrackedObject* current_object) PURE; + + /** + * @return whether the action is async-signal-safe. + * See man 7 signal-safety for the definition of async-signal-safe. + */ + virtual bool isAsyncSignalSafe() const PURE; +}; + +using FatalActionPtr = std::unique_ptr; + +/** + * Implemented by each custom FatalAction and registered via Registry::registerFactory() + * or the convenience class RegisterFactory. + */ +class FatalActionFactory : public Config::TypedFactory { +public: + ~FatalActionFactory() override = default; + + /** + * Creates a particular FatalAction implementation. + * + * @param config supplies the configuration for the action. + * @param context supplies the GuardDog Action's context. + * @return FatalActionsPtr the FatalActions object. + */ + virtual FatalActionPtr + createFatalActionFromProto(const envoy::config::bootstrap::v3::FatalAction& config, + Instance* server) PURE; + + std::string category() const override { return "envoy.fatal_action"; } +}; + +} // namespace Configuration +} // namespace Server +} // namespace Envoy diff --git a/source/common/event/dispatcher_impl.cc b/source/common/event/dispatcher_impl.cc index d615e5986f5d6..382ea30f1d665 100644 --- a/source/common/event/dispatcher_impl.cc +++ b/source/common/event/dispatcher_impl.cc @@ -278,6 +278,19 @@ void DispatcherImpl::runPostCallbacks() { } } +void DispatcherImpl::runFatalActionsOnTrackedObject( + const FatalAction::FatalActionPtrList& actions) const { + // Only run if this is the dispatcher of the current thread and + // DispatcherImpl::Run has been called. + if (run_tid_.isEmpty() || (run_tid_ != api_.threadFactory().currentThreadId())) { + return; + } + + for (const auto& action : actions) { + action->run(current_object_); + } +} + void DispatcherImpl::touchWatchdog() { if (watchdog_registration_) { watchdog_registration_->touchWatchdog(); diff --git a/source/common/event/dispatcher_impl.h b/source/common/event/dispatcher_impl.h index 62c10920d7fea..46de3a8b50f7d 100644 --- a/source/common/event/dispatcher_impl.h +++ b/source/common/event/dispatcher_impl.h @@ -94,6 +94,9 @@ class DispatcherImpl : Logger::Loggable, } } + void + runFatalActionsOnTrackedObject(const FatalAction::FatalActionPtrList& actions) const override; + private: // Holds a reference to the watchdog registered with this dispatcher and the timer used to ensure // that the dog is touched periodically. diff --git a/source/common/signal/BUILD b/source/common/signal/BUILD index 2a18144c87dbf..893f2fa78454b 100644 --- a/source/common/signal/BUILD +++ b/source/common/signal/BUILD @@ -13,6 +13,8 @@ envoy_cc_library( srcs = ["fatal_error_handler.cc"], hdrs = ["fatal_error_handler.h"], deps = [ + ":fatal_action_lib", + "//include/envoy/event:dispatcher_interface", "//source/common/common:macros", ], ) @@ -30,3 +32,12 @@ envoy_cc_library( "//source/server:backtrace_lib", ], ) + +envoy_cc_library( + name = "fatal_action_lib", + hdrs = ["fatal_action.h"], + deps = [ + "//include/envoy/server:fatal_action_interface", + "//include/envoy/thread:thread_interface", + ], +) diff --git a/source/common/signal/fatal_action.h b/source/common/signal/fatal_action.h new file mode 100644 index 0000000000000..4ecaf1461635f --- /dev/null +++ b/source/common/signal/fatal_action.h @@ -0,0 +1,49 @@ +#pragma once + +#include + +#include "envoy/common/pure.h" +#include "envoy/server/fatal_action_config.h" +#include "envoy/thread/thread.h" + +namespace Envoy { +namespace FatalAction { + +using FatalActionPtrList = std::list; + +// Status when trying to run the Fatal Actions. +enum class Status { + Success, + + // We either haven't set up the Fatal Action manager, or we unregistered it + // as the server terminated. + ActionManangerUnset, + + // Another thread beat us to running the Fatal Actions. + RunningOnAnotherThread, + + // We have already ran those actions on this thread. + AlreadyRanOnThisThread, +}; + +// A simple class which manages the Fatal Actions registered via the +// extension point. +class FatalActionManager { +public: + FatalActionManager(FatalActionPtrList safe_actions, FatalActionPtrList unsafe_actions, + Thread::ThreadFactory& thread_factory) + : safe_actions_(std::move(safe_actions)), unsafe_actions_(std::move(unsafe_actions)), + thread_factory_(thread_factory) {} + + const FatalActionPtrList& getSafeActions() const { return safe_actions_; } + const FatalActionPtrList& getUnsafeActions() const { return unsafe_actions_; } + Thread::ThreadFactory& getThreadFactory() const { return thread_factory_; } + +private: + FatalActionPtrList safe_actions_; + FatalActionPtrList unsafe_actions_; + Thread::ThreadFactory& thread_factory_; +}; + +} // namespace FatalAction +} // namespace Envoy diff --git a/source/common/signal/fatal_error_handler.cc b/source/common/signal/fatal_error_handler.cc index 125093e3c589a..e7099e6f02544 100644 --- a/source/common/signal/fatal_error_handler.cc +++ b/source/common/signal/fatal_error_handler.cc @@ -1,8 +1,13 @@ #include "common/signal/fatal_error_handler.h" +#include #include +#include "envoy/event/dispatcher.h" + +#include "common/common/assert.h" #include "common/common/macros.h" +#include "common/signal/fatal_action.h" #include "absl/base/attributes.h" #include "absl/synchronization/mutex.h" @@ -12,6 +17,12 @@ namespace FatalErrorHandler { namespace { +// The type of Fatal Actions. +enum class FatalActionType { + Safe, + Unsafe, +}; + ABSL_CONST_INIT static absl::Mutex failure_mutex(absl::kConstInit); // Since we can't grab the failure mutex on fatal error (snagging locks under // fatal crash causing potential deadlocks) access the handler list as an atomic @@ -22,17 +33,84 @@ ABSL_CONST_INIT static absl::Mutex failure_mutex(absl::kConstInit); using FailureFunctionList = std::list; ABSL_CONST_INIT std::atomic fatal_error_handlers{nullptr}; +// Use an atomic operation since on fatal error we'll consume the +// fatal_action_manager and don't want to have any locks as they aren't +// async-signal-safe. +ABSL_CONST_INIT std::atomic fatal_action_manager{nullptr}; +ABSL_CONST_INIT std::atomic failure_tid{-1}; + +// Executes the Fatal Actions provided. +void runFatalActionsInternal(const FatalAction::FatalActionPtrList& actions) { + // Exchange the fatal_error_handlers pointer so other functions cannot + // concurrently access the list. + FailureFunctionList* list = fatal_error_handlers.exchange(nullptr); + if (list == nullptr) { + return; + } + + // Get the dispatcher and its tracked object. + for (auto* handler : *list) { + handler->runFatalActionsOnTrackedObject(actions); + } + + // Restore the fatal_error_handlers pointer so subsequent calls using the list + // can succeed. + fatal_error_handlers.store(list); +} + +// Helper function to run exclusively either safe or unsafe actions depending on +// the provided action_type. +// Returns a FatalAction status corresponding to our attempt to run the +// action_type. +FatalAction::Status runFatalActions(FatalActionType action_type) { + // Check that registerFatalActions has already been called. + FatalAction::FatalActionManager* action_manager = fatal_action_manager.load(); + + if (action_manager == nullptr) { + return FatalAction::Status::ActionManangerUnset; + } + + int64_t my_tid = action_manager->getThreadFactory().currentThreadId().getId(); + + if (action_type == FatalActionType::Safe) { + // Try to run safe actions + int64_t expected_tid = -1; + + if (failure_tid.compare_exchange_strong(expected_tid, my_tid)) { + // Run the actions + runFatalActionsInternal(action_manager->getSafeActions()); + return FatalAction::Status::Success; + } else if (expected_tid == my_tid) { + return FatalAction::Status::AlreadyRanOnThisThread; + } + + } else { + // Try to run unsafe actions + int64_t failing_tid = failure_tid.load(); + + ASSERT(failing_tid != -1); + + if (my_tid == failing_tid) { + runFatalActionsInternal(action_manager->getUnsafeActions()); + return FatalAction::Status::Success; + } + } + + return FatalAction::Status::RunningOnAnotherThread; +} + } // namespace void registerFatalErrorHandler(const FatalErrorHandlerInterface& handler) { #ifdef ENVOY_OBJECT_TRACE_ON_DUMP absl::MutexLock l(&failure_mutex); - FailureFunctionList* list = fatal_error_handlers.exchange(nullptr, std::memory_order_relaxed); + FailureFunctionList* list = fatal_error_handlers.exchange(nullptr); if (list == nullptr) { list = new FailureFunctionList; } list->push_back(&handler); - fatal_error_handlers.store(list, std::memory_order_release); + // Store the fatal_error_handlers pointer now that the list is updated. + fatal_error_handlers.store(list); #else UNREFERENCED_PARAMETER(handler); #endif @@ -41,7 +119,7 @@ void registerFatalErrorHandler(const FatalErrorHandlerInterface& handler) { void removeFatalErrorHandler(const FatalErrorHandlerInterface& handler) { #ifdef ENVOY_OBJECT_TRACE_ON_DUMP absl::MutexLock l(&failure_mutex); - FailureFunctionList* list = fatal_error_handlers.exchange(nullptr, std::memory_order_relaxed); + FailureFunctionList* list = fatal_error_handlers.exchange(nullptr); if (list == nullptr) { // removeFatalErrorHandler() may see an empty list of fatal error handlers // if it's called at the same time as callFatalErrorHandlers(). In that case @@ -53,7 +131,7 @@ void removeFatalErrorHandler(const FatalErrorHandlerInterface& handler) { if (list->empty()) { delete list; } else { - fatal_error_handlers.store(list, std::memory_order_release); + fatal_error_handlers.store(list); } #else UNREFERENCED_PARAMETER(handler); @@ -61,13 +139,50 @@ void removeFatalErrorHandler(const FatalErrorHandlerInterface& handler) { } void callFatalErrorHandlers(std::ostream& os) { - FailureFunctionList* list = fatal_error_handlers.exchange(nullptr, std::memory_order_relaxed); + FailureFunctionList* list = fatal_error_handlers.exchange(nullptr); if (list != nullptr) { for (const auto* handler : *list) { handler->onFatalError(os); } - delete list; + + fatal_error_handlers.store(list); + } +} + +void registerFatalActions(FatalAction::FatalActionPtrList safe_actions, + FatalAction::FatalActionPtrList unsafe_actions, + Thread::ThreadFactory& thread_factory) { + // Create a FatalActionManager and store it. + FatalAction::FatalActionManager* previous_manager = + fatal_action_manager.exchange(new FatalAction::FatalActionManager( + std::move(safe_actions), std::move(unsafe_actions), thread_factory)); + + // Previous manager should be NULL. + ASSERT(!previous_manager); +} + +FatalAction::Status runSafeActions() { return runFatalActions(FatalActionType::Safe); } + +FatalAction::Status runUnsafeActions() { return runFatalActions(FatalActionType::Unsafe); } + +void clearFatalActionsOnTerminate() { + auto* raw_ptr = fatal_action_manager.exchange(nullptr); + if (raw_ptr != nullptr) { + delete raw_ptr; + } +} + +// This resets the internal state of Fatal Action for the module. +// This is necessary as it allows us to have multiple test cases invoke the +// fatal actions without state from other tests leaking in. +void resetFatalActionStateForTest() { + // Free the memory of the Fatal Action, since it's not managed by a smart + // pointer. This prevents memory leaks in tests. + auto* raw_ptr = fatal_action_manager.exchange(nullptr); + if (raw_ptr != nullptr) { + delete raw_ptr; } + failure_tid.store(-1); } } // namespace FatalErrorHandler diff --git a/source/common/signal/fatal_error_handler.h b/source/common/signal/fatal_error_handler.h index b06997af7e815..d7d25013432d0 100644 --- a/source/common/signal/fatal_error_handler.h +++ b/source/common/signal/fatal_error_handler.h @@ -4,6 +4,8 @@ #include "envoy/common/pure.h" +#include "common/signal/fatal_action.h" + namespace Envoy { // A simple class which allows registering functions to be called when Envoy @@ -14,6 +16,9 @@ class FatalErrorHandlerInterface { // Called when Envoy receives a fatal signal. Must be async-signal-safe: in // particular, it can't allocate memory. virtual void onFatalError(std::ostream& os) const PURE; + + virtual void + runFatalActionsOnTrackedObject(const FatalAction::FatalActionPtrList& actions) const PURE; }; namespace FatalErrorHandler { @@ -35,5 +40,37 @@ void removeFatalErrorHandler(const FatalErrorHandlerInterface& handler); * called from a fatal signal handler. */ void callFatalErrorHandlers(std::ostream& os); + +/** + * Creates the singleton FatalActionManager if not already created and + * registers the specified actions to run on failure. + */ +void registerFatalActions(FatalAction::FatalActionPtrList safe_actions, + FatalAction::FatalActionPtrList unsafe_actions, + Thread::ThreadFactory& thread_factory); + +/** + * Tries to run all of the safe fatal actions. Only one thread will + * be allowed to run the fatal functions. + * + * Returns a FatalAction::Status which the caller should use to + * determine how to proceed. + */ +FatalAction::Status runSafeActions(); + +/** + * Tries to run all of the unsafe fatal actions. Call this only + * after calling runSafeActions. + * + * Returns a FatalAction::Status which the caller should use to determine + * how to proceed. + */ +FatalAction::Status runUnsafeActions(); + +/** + * Clear the Fatal Actions at the end of the server's call to terminate(). + * We should clean up the Fatal Action to prevent the memory from leaking. + */ +void clearFatalActionsOnTerminate(); } // namespace FatalErrorHandler } // namespace Envoy diff --git a/source/common/signal/signal_action.cc b/source/common/signal/signal_action.cc index c3a53c19da701..143b95a81f6c0 100644 --- a/source/common/signal/signal_action.cc +++ b/source/common/signal/signal_action.cc @@ -5,6 +5,7 @@ #include #include "common/common/assert.h" +#include "common/signal/fatal_action.h" #include "common/version/version.h" namespace Envoy { @@ -22,8 +23,31 @@ void SignalAction::sigHandler(int sig, siginfo_t* info, void* context) { } tracer.logTrace(); - // Finally after logging the stack trace, call any registered crash handlers. - FatalErrorHandler::callFatalErrorHandlers(std::cerr); + // Finally after logging the stack trace, call the crash handlers + // in order from safe to unsafe. + auto status = FatalErrorHandler::runSafeActions(); + + switch (status) { + case FatalAction::Status::Success: + FatalErrorHandler::callFatalErrorHandlers(std::cerr); + FatalErrorHandler::runUnsafeActions(); + break; + case FatalAction::Status::ActionManangerUnset: + FatalErrorHandler::callFatalErrorHandlers(std::cerr); + break; + case FatalAction::Status::RunningOnAnotherThread: { + // We should wait for some duration for the other thread to finish + // running. We should add support for this scenario, even though the + // probability of it occurring is low. + // TODO(kbaichoo): Implement a configurable call to sleep + NOT_IMPLEMENTED_GCOVR_EXCL_LINE; + break; + } + case FatalAction::Status::AlreadyRanOnThisThread: + // We caused another fatal signal to be raised. + std::cerr << "Our FatalActions triggered a fatal signal.\n"; + break; + } signal(sig, SIG_DFL); raise(sig); diff --git a/source/extensions/fatal_actions/README.md b/source/extensions/fatal_actions/README.md new file mode 100644 index 0000000000000..d4ab098a4071a --- /dev/null +++ b/source/extensions/fatal_actions/README.md @@ -0,0 +1 @@ +Currently there are no fatal action extensions. diff --git a/source/server/BUILD b/source/server/BUILD index c3404818a2172..cc21ce681f280 100644 --- a/source/server/BUILD +++ b/source/server/BUILD @@ -418,6 +418,7 @@ envoy_cc_library( "//include/envoy/network:dns_interface", "//include/envoy/server:bootstrap_extension_config_interface", "//include/envoy/server:drain_manager_interface", + "//include/envoy/server:fatal_action_interface", "//include/envoy/server:instance_interface", "//include/envoy/server:listener_manager_interface", "//include/envoy/server:options_interface", @@ -444,6 +445,7 @@ envoy_cc_library( "//source/common/router:rds_lib", "//source/common/runtime:runtime_lib", "//source/common/secret:secret_manager_impl_lib", + "//source/common/signal:fatal_error_handler_lib", "//source/common/singleton:manager_impl_lib", "//source/common/stats:thread_local_store_lib", "//source/common/upstream:cluster_manager_lib", diff --git a/source/server/server.cc b/source/server/server.cc index ee7792c719c72..214f9d0459388 100644 --- a/source/server/server.cc +++ b/source/server/server.cc @@ -20,6 +20,7 @@ #include "envoy/network/dns.h" #include "envoy/registry/registry.h" #include "envoy/server/bootstrap_extension_config.h" +#include "envoy/server/instance.h" #include "envoy/server/options.h" #include "envoy/upstream/cluster_manager.h" @@ -40,6 +41,7 @@ #include "common/protobuf/utility.h" #include "common/router/rds_impl.h" #include "common/runtime/runtime_impl.h" +#include "common/signal/fatal_error_handler.h" #include "common/singleton/manager_impl.h" #include "common/stats/thread_local_store.h" #include "common/stats/timespan_impl.h" @@ -420,6 +422,26 @@ void InstanceImpl::initialize(const Options& options, factory.createBootstrapExtension(*config, serverFactoryContext())); } + // Register the fatal actions. + { + FatalAction::FatalActionPtrList safe_actions; + FatalAction::FatalActionPtrList unsafe_actions; + for (const auto& action_config : bootstrap_.fatal_actions()) { + auto& factory = + Config::Utility::getAndCheckFactory( + action_config.config()); + auto action = factory.createFatalActionFromProto(action_config, this); + + if (action->isAsyncSignalSafe()) { + safe_actions.push_back(std::move(action)); + } else { + unsafe_actions.push_back(std::move(action)); + } + } + Envoy::FatalErrorHandler::registerFatalActions( + std::move(safe_actions), std::move(unsafe_actions), api_->threadFactory()); + } + if (!bootstrap_.default_socket_interface().empty()) { auto& sock_name = bootstrap_.default_socket_interface(); auto sock = const_cast(Network::socketInterface(sock_name)); @@ -733,6 +755,7 @@ void InstanceImpl::terminate() { restarter_.shutdown(); ENVOY_LOG(info, "exiting"); ENVOY_FLUSH_LOG(); + FatalErrorHandler::clearFatalActionsOnTerminate(); } Runtime::Loader& InstanceImpl::runtime() { return Runtime::LoaderSingleton::get(); } diff --git a/test/common/event/dispatcher_impl_test.cc b/test/common/event/dispatcher_impl_test.cc index cbb0119f0ff27..092075137a4f6 100644 --- a/test/common/event/dispatcher_impl_test.cc +++ b/test/common/event/dispatcher_impl_test.cc @@ -458,6 +458,51 @@ TEST_F(DispatcherImplTest, IsThreadSafe) { EXPECT_FALSE(dispatcher_->isThreadSafe()); } +class TestFatalAction : public Server::Configuration::FatalAction { +public: + void run(const ScopeTrackedObject* /*current_object*/) override { ++times_ran_; } + bool isAsyncSignalSafe() const override { return true; } + int getNumTimesRan() { return times_ran_; } + +private: + int times_ran_ = 0; +}; + +TEST_F(DispatcherImplTest, OnlyRunsFatalActionsIfRunningOnSameThread) { + FatalAction::FatalActionPtrList actions; + actions.emplace_back(std::make_unique()); + auto* action = dynamic_cast(actions.front().get()); + + ASSERT_EQ(action->getNumTimesRan(), 0); + + // Should not run as dispatcher isn't running yet + auto non_running_dispatcher = api_->allocateDispatcher("non_running_thread"); + static_cast(non_running_dispatcher.get()) + ->runFatalActionsOnTrackedObject(actions); + ASSERT_EQ(action->getNumTimesRan(), 0); + + // Should not run when not on same thread + static_cast(dispatcher_.get())->runFatalActionsOnTrackedObject(actions); + ASSERT_EQ(action->getNumTimesRan(), 0); + + // Should run since on same thread as dispatcher + dispatcher_->post([this, &actions]() { + { + Thread::LockGuard lock(mu_); + static_cast(dispatcher_.get())->runFatalActionsOnTrackedObject(actions); + work_finished_ = true; + } + cv_.notifyOne(); + }); + + Thread::LockGuard lock(mu_); + while (!work_finished_) { + cv_.wait(mu_); + } + + EXPECT_EQ(action->getNumTimesRan(), 1); +} + class NotStartedDispatcherImplTest : public testing::Test { protected: NotStartedDispatcherImplTest() diff --git a/test/common/signal/BUILD b/test/common/signal/BUILD index c3f9cf5df8434..00b5d183480fc 100644 --- a/test/common/signal/BUILD +++ b/test/common/signal/BUILD @@ -23,3 +23,13 @@ envoy_cc_test( "//test/test_common:utility_lib", ], ) + +envoy_cc_test( + name = "fatal_action_test", + srcs = ["fatal_action_test.cc"], + deps = [ + "//source/common/signal:fatal_error_handler_lib", + "//test/mocks/server:instance_mocks", + "//test/test_common:utility_lib", + ], +) diff --git a/test/common/signal/fatal_action_test.cc b/test/common/signal/fatal_action_test.cc new file mode 100644 index 0000000000000..5105e8f49ffd6 --- /dev/null +++ b/test/common/signal/fatal_action_test.cc @@ -0,0 +1,126 @@ +#include "envoy/server/fatal_action_config.h" + +#include "common/signal/fatal_action.h" +#include "common/signal/fatal_error_handler.h" + +#include "test/mocks/server/instance.h" +#include "test/test_common/utility.h" + +#include "absl/synchronization/notification.h" +#include "gtest/gtest.h" + +namespace Envoy { +namespace FatalErrorHandler { + +extern void resetFatalActionStateForTest(); + +} // namespace FatalErrorHandler +namespace FatalAction { + +// Use this test handler instead of a mock, because fatal error handlers must be +// signal-safe and a mock might allocate memory. +class TestFatalErrorHandler : public FatalErrorHandlerInterface { + void onFatalError(std::ostream& /*os*/) const override {} + void + runFatalActionsOnTrackedObject(const FatalAction::FatalActionPtrList& actions) const override { + // Call the Fatal Actions with nullptr + for (const Server::Configuration::FatalActionPtr& action : actions) { + action->run(nullptr); + } + } +}; + +class TestFatalAction : public Server::Configuration::FatalAction { +public: + TestFatalAction(bool is_safe, int* const counter) : is_safe_(is_safe), counter_(counter) {} + void run(const ScopeTrackedObject* /*current_object*/) override { ++(*counter_); } + bool isAsyncSignalSafe() const override { return is_safe_; } + +private: + const bool is_safe_; + int* const counter_; +}; + +class FatalActionTest : public ::testing::Test { +public: + FatalActionTest() : handler_(std::make_unique()) { + FatalErrorHandler::registerFatalErrorHandler(*handler_); + } + +protected: + void TearDown() override { + // Reset module state + FatalErrorHandler::resetFatalActionStateForTest(); + FatalErrorHandler::removeFatalErrorHandler(*handler_); + } + + std::unique_ptr handler_; + FatalAction::FatalActionPtrList safe_actions_; + FatalAction::FatalActionPtrList unsafe_actions_; + int counter_ = 0; +}; + +TEST_F(FatalActionTest, ShouldNotBeAbleToRunActionsBeforeRegistration) { + // Call the actions + EXPECT_EQ(FatalErrorHandler::runSafeActions(), Status::ActionManangerUnset); + EXPECT_EQ(FatalErrorHandler::runUnsafeActions(), Status::ActionManangerUnset); +} + +TEST_F(FatalActionTest, ShouldOnlyBeAbleToRegisterFatalActionsOnce) { + // Register empty list of actions + FatalErrorHandler::registerFatalActions({}, {}, Thread::threadFactoryForTest()); + EXPECT_DEBUG_DEATH( + { FatalErrorHandler::registerFatalActions({}, {}, Thread::threadFactoryForTest()); }, ""); +} + +TEST_F(FatalActionTest, CanCallRegisteredActions) { + // Set up Fatal Actions + safe_actions_.emplace_back(std::make_unique(true, &counter_)); + unsafe_actions_.emplace_back(std::make_unique(false, &counter_)); + FatalErrorHandler::registerFatalActions(std::move(safe_actions_), std::move(unsafe_actions_), + Thread::threadFactoryForTest()); + + // Call the actions and check they increment the counter. + EXPECT_EQ(FatalErrorHandler::runSafeActions(), Status::Success); + EXPECT_EQ(counter_, 1); + + EXPECT_EQ(FatalErrorHandler::runUnsafeActions(), Status::Success); + EXPECT_EQ(counter_, 2); +} + +TEST_F(FatalActionTest, CanOnlyRunSafeActionsOnce) { + FatalErrorHandler::registerFatalActions(std::move(safe_actions_), std::move(unsafe_actions_), + Thread::threadFactoryForTest()); + ASSERT_EQ(FatalErrorHandler::runSafeActions(), Status::Success); + + EXPECT_EQ(FatalErrorHandler::runSafeActions(), Status::AlreadyRanOnThisThread); +} + +TEST_F(FatalActionTest, ShouldOnlyBeAbleToRunUnsafeActionsFromThreadThatRanSafeActions) { + FatalErrorHandler::registerFatalActions(std::move(safe_actions_), std::move(unsafe_actions_), + Thread::threadFactoryForTest()); + + absl::Notification run_unsafe_actions; + absl::Notification ran_safe_actions; + auto fatal_action_thread = + Thread::threadFactoryForTest().createThread([&run_unsafe_actions, &ran_safe_actions]() { + // Run Safe Actions and notify + EXPECT_EQ(FatalErrorHandler::runSafeActions(), Status::Success); + ran_safe_actions.Notify(); + + run_unsafe_actions.WaitForNotification(); + EXPECT_EQ(FatalErrorHandler::runUnsafeActions(), Status::Success); + }); + + // Wait for other thread to run safe actions, then try to run safe and + // unsafe actions, they should both not run for this thread. + ran_safe_actions.WaitForNotification(); + ASSERT_EQ(FatalErrorHandler::runSafeActions(), Status::RunningOnAnotherThread); + ASSERT_EQ(FatalErrorHandler::runUnsafeActions(), Status::RunningOnAnotherThread); + run_unsafe_actions.Notify(); + + fatal_action_thread->join(); +} + +} // namespace FatalAction +} // namespace Envoy diff --git a/test/common/signal/signals_test.cc b/test/common/signal/signals_test.cc index cc66d32d81d0e..3ecf49f6695ba 100644 --- a/test/common/signal/signals_test.cc +++ b/test/common/signal/signals_test.cc @@ -8,8 +8,6 @@ #include "test/common/stats/stat_test_utility.h" #include "test/test_common/utility.h" -#include "gtest/gtest.h" - namespace Envoy { #if defined(__has_feature) #if __has_feature(address_sanitizer) @@ -21,10 +19,41 @@ namespace Envoy { #define ASANITIZED /* Sanitized by GCC */ #endif +namespace FatalErrorHandler { + +extern void resetFatalActionStateForTest(); + +} // namespace FatalErrorHandler + // Use this test handler instead of a mock, because fatal error handlers must be // signal-safe and a mock might allocate memory. class TestFatalErrorHandler : public FatalErrorHandlerInterface { void onFatalError(std::ostream& os) const override { os << "HERE!"; } + void + runFatalActionsOnTrackedObject(const FatalAction::FatalActionPtrList& actions) const override { + // Run the actions + for (const auto& action : actions) { + action->run(nullptr); + } + } +}; + +// Use this to test fatal actions get called, as well as the order they run. +class EchoFatalAction : public Server::Configuration::FatalAction { +public: + EchoFatalAction(absl::string_view echo_msg) : echo_msg_(echo_msg) {} + void run(const ScopeTrackedObject* /*current_object*/) override { std::cerr << echo_msg_; } + bool isAsyncSignalSafe() const override { return true; } + +private: + const std::string echo_msg_; +}; + +// Use this to test failing while in a signal handler. +class SegfaultFatalAction : public Server::Configuration::FatalAction { +public: + void run(const ScopeTrackedObject* /*current_object*/) override { raise(SIGSEGV); } + bool isAsyncSignalSafe() const override { return false; } }; // Death tests that expect a particular output are disabled under address sanitizer. @@ -46,6 +75,7 @@ TEST(SignalsDeathTest, InvalidAddressDeathTest) { TEST(SignalsDeathTest, RegisteredHandlerTest) { TestFatalErrorHandler handler; FatalErrorHandler::registerFatalErrorHandler(handler); + SignalAction actions; // Make sure the fatal error log "HERE" registered above is logged on fatal error. EXPECT_DEATH( @@ -115,6 +145,39 @@ TEST(SignalsDeathTest, RestoredPreviousHandlerDeathTest) { EXPECT_DEATH([]() -> void { abort(); }(), "backtrace.*Abort(ed)?"); } +TEST(SignalsDeathTest, CanRunAllFatalActions) { + SignalAction actions; + TestFatalErrorHandler handler; + FatalErrorHandler::registerFatalErrorHandler(handler); + + FatalAction::FatalActionPtrList safe_actions; + FatalAction::FatalActionPtrList unsafe_actions; + + safe_actions.emplace_back(std::make_unique("Safe Action!")); + unsafe_actions.emplace_back(std::make_unique("Unsafe Action!")); + FatalErrorHandler::registerFatalActions(std::move(safe_actions), std::move(unsafe_actions), + Thread::threadFactoryForTest()); + EXPECT_DEATH([]() -> void { raise(SIGSEGV); }(), "Safe Action!.*HERE.*Unsafe Action!"); + FatalErrorHandler::removeFatalErrorHandler(handler); + FatalErrorHandler::resetFatalActionStateForTest(); +} + +TEST(SignalsDeathTest, ShouldJustExitIfFatalActionsRaiseAnotherSignal) { + SignalAction actions; + TestFatalErrorHandler handler; + FatalErrorHandler::registerFatalErrorHandler(handler); + + FatalAction::FatalActionPtrList safe_actions; + FatalAction::FatalActionPtrList unsafe_actions; + + unsafe_actions.emplace_back(std::make_unique()); + FatalErrorHandler::registerFatalActions(std::move(safe_actions), std::move(unsafe_actions), + Thread::threadFactoryForTest()); + + EXPECT_DEATH([]() -> void { raise(SIGABRT); }(), "Our FatalActions triggered a fatal signal."); + FatalErrorHandler::removeFatalErrorHandler(handler); + FatalErrorHandler::resetFatalActionStateForTest(); +} #endif TEST(SignalsDeathTest, IllegalStackAccessDeathTest) { @@ -179,6 +242,9 @@ class MemoryCheckingFatalErrorHandler : public FatalErrorHandlerInterface { allocated_after_call_ = memory_test_.consumedBytes(); } + void runFatalActionsOnTrackedObject(const FatalAction::FatalActionPtrList& + /*actions*/) const override {} + private: const Stats::TestUtil::MemoryTest& memory_test_; uint64_t& allocated_after_call_; diff --git a/test/mocks/server/BUILD b/test/mocks/server/BUILD index 7214ca8203715..84fe48578be9a 100644 --- a/test/mocks/server/BUILD +++ b/test/mocks/server/BUILD @@ -37,6 +37,14 @@ envoy_cc_mock( ], ) +envoy_cc_mock( + name = "fatal_action_factory_mocks", + hdrs = ["fatal_action_factory.h"], + deps = [ + "//include/envoy/server:fatal_action_interface", + ], +) + envoy_cc_mock( name = "options_mocks", srcs = ["options.cc"], @@ -306,6 +314,7 @@ envoy_cc_mock( "//test/mocks/server:config_tracker_mocks", "//test/mocks/server:drain_manager_mocks", "//test/mocks/server:factory_context_mocks", + "//test/mocks/server:fatal_action_factory_mocks", "//test/mocks/server:filter_chain_factory_context_mocks", "//test/mocks/server:guard_dog_mocks", "//test/mocks/server:health_checker_factory_context_mocks", diff --git a/test/mocks/server/fatal_action_factory.h b/test/mocks/server/fatal_action_factory.h new file mode 100644 index 0000000000000..751b3f6e1528f --- /dev/null +++ b/test/mocks/server/fatal_action_factory.h @@ -0,0 +1,20 @@ +#pragma once + +#include "envoy/server/fatal_action_config.h" + +#include "gmock/gmock.h" + +namespace Envoy { +namespace Server { +namespace Configuration { +class MockFatalActionFactory : public FatalActionFactory { +public: + MOCK_METHOD(FatalActionPtr, createFatalActionFromProto, + (const envoy::config::bootstrap::v3::FatalAction& config, Instance* server), + (override)); + MOCK_METHOD(ProtobufTypes::MessagePtr, createEmptyConfigProto, (), (override)); + MOCK_METHOD(std::string, name, (), (const, override)); +}; +} // namespace Configuration +} // namespace Server +} // namespace Envoy diff --git a/test/mocks/server/mocks.h b/test/mocks/server/mocks.h index d041412d01d17..731d4c22778ee 100644 --- a/test/mocks/server/mocks.h +++ b/test/mocks/server/mocks.h @@ -8,6 +8,7 @@ #include "config_tracker.h" #include "drain_manager.h" #include "factory_context.h" +#include "fatal_action_factory.h" #include "filter_chain_factory_context.h" #include "guard_dog.h" #include "health_checker_factory_context.h" diff --git a/test/per_file_coverage.sh b/test/per_file_coverage.sh index 13526f0eebb2c..e0d615c048965 100755 --- a/test/per_file_coverage.sh +++ b/test/per_file_coverage.sh @@ -20,7 +20,7 @@ declare -a KNOWN_LOW_COVERAGE=( "source/common/crypto:0.0" "source/common/common:96.1" "source/common/common/posix:94.1" -"source/common/signal:90.4" +"source/common/signal:83.1" # Death tests don't report LCOV "source/common/watchdog:42.9" # Death tests don't report LCOV "source/exe:93.7" "source/extensions:96.3" diff --git a/test/server/BUILD b/test/server/BUILD index c26a2ebcc87ac..82766be214b2c 100644 --- a/test/server/BUILD +++ b/test/server/BUILD @@ -371,6 +371,7 @@ envoy_cc_test( "//test/common/stats:stat_test_utility_lib", "//test/integration:integration_lib", "//test/mocks/server:bootstrap_extension_factory_mocks", + "//test/mocks/server:fatal_action_factory_mocks", "//test/mocks/server:hot_restart_mocks", "//test/mocks/server:instance_mocks", "//test/mocks/server:options_mocks", diff --git a/test/server/server_test.cc b/test/server/server_test.cc index d7abaf788c1d1..aa097fc787cf3 100644 --- a/test/server/server_test.cc +++ b/test/server/server_test.cc @@ -1,8 +1,10 @@ #include +#include "envoy/common/scope_tracker.h" #include "envoy/config/core/v3/base.pb.h" #include "envoy/network/exception.h" #include "envoy/server/bootstrap_extension_config.h" +#include "envoy/server/fatal_action_config.h" #include "common/common/assert.h" #include "common/network/address_impl.h" @@ -19,6 +21,7 @@ #include "test/common/stats/stat_test_utility.h" #include "test/integration/server.h" #include "test/mocks/server/bootstrap_extension_factory.h" +#include "test/mocks/server/fatal_action_factory.h" #include "test/mocks/server/hot_restart.h" #include "test/mocks/server/instance.h" #include "test/mocks/server/options.h" @@ -1212,6 +1215,72 @@ TEST_P(ServerInstanceImplTest, WithUnknownBootstrapExtensions) { "Didn't find a registered implementation for name: 'envoy_test.bootstrap.foo'"); } +// Insufficient support on Windows. +#ifndef WIN32 +class SafeFatalAction : public Configuration::FatalAction { +public: + void run(const ScopeTrackedObject* /*current_object*/) override { + std::cerr << "Called SafeFatalAction" << std::endl; + } + + bool isAsyncSignalSafe() const override { return true; } +}; + +class UnsafeFatalAction : public Configuration::FatalAction { +public: + void run(const ScopeTrackedObject* /*current_object*/) override { + std::cerr << "Called UnsafeFatalAction" << std::endl; + } + + bool isAsyncSignalSafe() const override { return false; } +}; + +TEST_P(ServerInstanceImplTest, WithFatalActions) { + // Inject Safe Factory. + NiceMock mock_safe_factory; + EXPECT_CALL(mock_safe_factory, createEmptyConfigProto()).WillRepeatedly(Invoke([]() { + return std::make_unique(); + })); + EXPECT_CALL(mock_safe_factory, name()).WillRepeatedly(Return("envoy_test.fatal_action.safe")); + + Registry::InjectFactory registered_safe_factory( + mock_safe_factory); + + // Inject Unsafe Factory + NiceMock mock_unsafe_factory; + EXPECT_CALL(mock_unsafe_factory, createEmptyConfigProto()).WillRepeatedly(Invoke([]() { + return std::make_unique(); + })); + EXPECT_CALL(mock_unsafe_factory, name()).WillRepeatedly(Return("envoy_test.fatal_action.unsafe")); + + Registry::InjectFactory registered_unsafe_factory( + mock_unsafe_factory); + + EXPECT_DEATH( + { + EXPECT_CALL(mock_safe_factory, createFatalActionFromProto(_, _)) + .WillOnce( + Invoke([](const envoy::config::bootstrap::v3::FatalAction& /*config*/, + Instance* /*server*/) { return std::make_unique(); })); + EXPECT_CALL(mock_unsafe_factory, createFatalActionFromProto(_, _)) + .WillOnce( + Invoke([](const envoy::config::bootstrap::v3::FatalAction& /*config*/, + Instance* /*server*/) { return std::make_unique(); })); + absl::Notification abort_called; + auto server_thread = + startTestServer("test/server/test_data/server/fatal_actions.yaml", false); + // Trigger SIGABRT, wait for the ABORT + server_->dispatcher().post([&] { + abort(); + abort_called.Notify(); + }); + + abort_called.WaitForNotification(); + }, + ""); +} +#endif + // Static configuration validation. We test with both allow/reject settings various aspects of // configuration from YAML. class StaticValidationTest diff --git a/test/server/test_data/server/fatal_actions.yaml b/test/server/test_data/server/fatal_actions.yaml new file mode 100644 index 0000000000000..cbebfaa188ebb --- /dev/null +++ b/test/server/test_data/server/fatal_actions.yaml @@ -0,0 +1,9 @@ +fatal_actions: + - config: + name: envoy_test.fatal_action.safe + typed_config: + '@type': type.googleapis.com/google.protobuf.Empty + - config: + name: envoy_test.fatal_action.unsafe + typed_config: + '@type': type.googleapis.com/google.protobuf.Empty