Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,6 @@ extensions/filters/common/original_src @snowp @klarose
/*/extensions/filters/http/decompressor @rojkov @dio
# Watchdog Extensions
/*/extensions/watchdog/profile_action @kbaichoo @antoniovicente
/*/extensions/watchdog/abort_action @kbaichoo @antoniovicente
# Core upstream code
extensions/upstreams/http @alyssawilk @snowp @mattklein123
extensions/upstreams/http/http @alyssawilk @snowp @mattklein123
Expand Down
2 changes: 1 addition & 1 deletion api/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,6 @@ proto_library(
"//envoy/extensions/upstreams/http/http/v3:pkg",
"//envoy/extensions/upstreams/http/tcp/v3:pkg",
"//envoy/extensions/wasm/v3:pkg",
"//envoy/extensions/watchdog/abort_action/v3alpha:pkg",
"//envoy/extensions/watchdog/profile_action/v3alpha:pkg",
"//envoy/service/accesslog/v3:pkg",
"//envoy/service/auth/v3:pkg",
Expand All @@ -272,6 +271,7 @@ proto_library(
"//envoy/type/metadata/v3:pkg",
"//envoy/type/tracing/v3:pkg",
"//envoy/type/v3:pkg",
"//envoy/watchdog/v3alpha:pkg",
],
)

Expand Down

This file was deleted.

2 changes: 2 additions & 0 deletions api/envoy/watchdog/v3alpha/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
This contains watchdog actions that are part of core Envoy, and therefore cannot
be in the extensions directory.
29 changes: 29 additions & 0 deletions api/envoy/watchdog/v3alpha/abort_action.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
syntax = "proto3";

package envoy.watchdog.v3alpha;

import "google/protobuf/duration.proto";

import "udpa/annotations/status.proto";
import "udpa/annotations/versioning.proto";
import "validate/validate.proto";

option java_package = "io.envoyproxy.envoy.watchdog.v3alpha";
option java_outer_classname = "AbortActionProto";
option java_multiple_files = true;
option (udpa.annotations.file_status).work_in_progress = true;
option (udpa.annotations.file_status).package_version_status = ACTIVE;

// [#protodoc-title: Watchdog Action that kills a stuck thread to kill the process.]
Comment thread
htuch marked this conversation as resolved.

// A GuardDogAction that will terminate the process by killing the
// stuck thread. This would allow easier access to the call stack of the stuck
// thread since we would run signal handlers on that thread. By default
// this will be registered to run as the last watchdog action on KILL and
// MULTIKILL events if those are enabled.
message AbortActionConfig {
// How long to wait for the thread to respond to the thread kill function
// before killing the process from this action. This is a blocking action.
// By default this is 5 seconds.
google.protobuf.Duration wait_duration = 1;
}
2 changes: 1 addition & 1 deletion api/versioning/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ proto_library(
"//envoy/extensions/upstreams/http/http/v3:pkg",
"//envoy/extensions/upstreams/http/tcp/v3:pkg",
"//envoy/extensions/wasm/v3:pkg",
"//envoy/extensions/watchdog/abort_action/v3alpha:pkg",
"//envoy/extensions/watchdog/profile_action/v3alpha:pkg",
"//envoy/service/accesslog/v3:pkg",
"//envoy/service/auth/v3:pkg",
Expand All @@ -155,6 +154,7 @@ proto_library(
"//envoy/type/metadata/v3:pkg",
"//envoy/type/tracing/v3:pkg",
"//envoy/type/v3:pkg",
"//envoy/watchdog/v3alpha:pkg",
],
)

Expand Down
1 change: 0 additions & 1 deletion bazel/repositories.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ WINDOWS_SKIP_TARGETS = [
"envoy.tracers.lightstep",
"envoy.tracers.datadog",
"envoy.tracers.opencensus",
"envoy.watchdog.abort_action",
]

# Make all contents of an external repository accessible under a filegroup. Used for external HTTP
Expand Down
2 changes: 1 addition & 1 deletion docs/root/api-v3/config/watchdog/watchdog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ Watchdog
:maxdepth: 2

../../extensions/watchdog/profile_action/v3alpha/*
../../extensions/watchdog/abort_action/v3alpha/*
../../watchdog/v3alpha/*
1 change: 1 addition & 0 deletions docs/root/version_history/current.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Minor Behavior Changes
*Changes that may cause incompatibilities for some users, but should not for most*

* ext_authz filter: the deprecated field :ref:`use_alpha <envoy_api_field_config.filter.http.ext_authz.v2.ExtAuthz.use_alpha>` is no longer supported and cannot be set anymore.
* watchdog: the watchdog action :ref:`abort_action <envoy_v3_api_msg_watchdog.v3alpha.AbortActionConfig>` is now the default action to terminate the process if watchdog kill / multikill is enabled.

Bug Fixes
---------
Expand Down
2 changes: 1 addition & 1 deletion docs/root/version_history/v1.16.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ New Features
* watchdog: support randomizing the watchdog's kill timeout to prevent synchronized kills via a maximium jitter parameter :ref:`max_kill_timeout_jitter<envoy_v3_api_field_config.bootstrap.v3.Watchdog.max_kill_timeout_jitter>`.
* watchdog: supports an extension point where actions can be registered to fire on watchdog events such as miss, megamiss, kill and multikill. See :ref:`watchdog actions<envoy_v3_api_field_config.bootstrap.v3.Watchdog.actions>`.
* watchdog: watchdog action extension that does cpu profiling. See :ref:`Profile Action <envoy_v3_api_file_envoy/extensions/watchdog/profile_action/v3alpha/profile_action.proto>`.
* watchdog: watchdog action extension that sends SIGABRT to the stuck thread to terminate the process. See :ref:`Abort Action <envoy_v3_api_file_envoy/extensions/watchdog/abort_action/v3alpha/abort_action.proto>`.
* watchdog: watchdog action extension that sends SIGABRT to the stuck thread to terminate the process. See :ref:`Abort Action <envoy_v3_api_msg_watchdog.v3alpha.AbortActionConfig>`.
Comment thread
antoniovicente marked this conversation as resolved.
* xds: added :ref:`extension config discovery<envoy_v3_api_msg_config.core.v3.ExtensionConfigSource>` support for HTTP filters.
* xds: added support for mixed v2/v3 discovery response, which enable type url downgrade and upgrade. This feature is disabled by default and is controlled by runtime guard `envoy.reloadable_features.enable_type_url_downgrade_and_upgrade`.
* zlib: added option to use `zlib-ng <https://github.com/zlib-ng/zlib-ng>`_ as zlib library.
Expand Down

This file was deleted.

29 changes: 29 additions & 0 deletions generated_api_shadow/envoy/watchdog/v3alpha/abort_action.proto

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions source/common/thread/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
load(
"//bazel:envoy_build_system.bzl",
"envoy_cc_library",
"envoy_package",
)

licenses(["notice"]) # Apache 2

envoy_package()

envoy_cc_library(
name = "terminate_thread_lib",
srcs = ["terminate_thread.cc"],
hdrs = ["terminate_thread.h"],
deps = [
"//include/envoy/thread:thread_interface",
"//source/common/common:minimal_logger_lib",
],
)
31 changes: 31 additions & 0 deletions source/common/thread/terminate_thread.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include "common/thread/terminate_thread.h"

#include <sys/types.h>

#include <csignal>

#include "common/common/logger.h"

namespace Envoy {
namespace Thread {
namespace {
#ifdef __linux__
pid_t toPlatformTid(int64_t tid) { return static_cast<pid_t>(tid); }
#elif defined(__APPLE__)
uint64_t toPlatformTid(int64_t tid) { return static_cast<uint64_t>(tid); }
#endif
} // namespace

bool terminateThread(const ThreadId& tid) {
#ifndef WIN32
// Assume POSIX-compatible system and signal to the thread.
return kill(toPlatformTid(tid.getId()), SIGABRT) == 0;
#else
// Windows, currently unsupported termination of thread.
ENVOY_LOG_MISC(error, "Windows is currently unsupported for terminateThread.");
return false;
#endif
}

} // namespace Thread
} // namespace Envoy
19 changes: 19 additions & 0 deletions source/common/thread/terminate_thread.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#pragma once

#include "envoy/thread/thread.h"

namespace Envoy {
namespace Thread {
/**
* Tries to terminates the process by killing the thread specified by
* the ThreadId. The implementation is platform dependent and currently
* only works on platforms that support SIGABRT.
*
* Returns true if the platform specific function to terminate the thread
* succeeded (i.e. kill() == 0). If the platform is currently unsupported, this
* will return false.
*/
bool terminateThread(const ThreadId& tid);
Comment thread
mattklein123 marked this conversation as resolved.

} // namespace Thread
} // namespace Envoy
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
load(
"//bazel:envoy_build_system.bzl",
"envoy_cc_extension",
"envoy_cc_library",
"envoy_extension_package",
"envoy_package",
)

licenses(["notice"]) # Apache 2

envoy_extension_package()
envoy_package()

envoy_cc_library(
name = "abort_action_lib",
Expand All @@ -19,22 +18,21 @@ envoy_cc_library(
"//include/envoy/thread:thread_interface",
"//source/common/common:assert_lib",
"//source/common/protobuf:utility_lib",
"@envoy_api//envoy/extensions/watchdog/abort_action/v3alpha:pkg_cc_proto",
"//source/common/thread:terminate_thread_lib",
"@envoy_api//envoy/watchdog/v3alpha:pkg_cc_proto",
],
)

envoy_cc_extension(
name = "config",
srcs = ["config.cc"],
hdrs = ["config.h"],
security_posture = "data_plane_agnostic",
status = "alpha",
envoy_cc_library(
name = "abort_action_config",
srcs = ["abort_action_config.cc"],
hdrs = ["abort_action_config.h"],
deps = [
":abort_action_lib",
"//include/envoy/registry",
"//source/common/config:utility_lib",
"//source/common/protobuf",
"//source/common/protobuf:message_validator_lib",
"@envoy_api//envoy/extensions/watchdog/abort_action/v3alpha:pkg_cc_proto",
"@envoy_api//envoy/watchdog/v3alpha:pkg_cc_proto",
],
)
2 changes: 2 additions & 0 deletions source/common/watchdog/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
This contains watchdog actions that are part of core Envoy, and therefore cannot
be in the extensions directory.
54 changes: 54 additions & 0 deletions source/common/watchdog/abort_action.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#include "common/watchdog/abort_action.h"

#include "envoy/thread/thread.h"

#include "common/common/assert.h"
#include "common/common/fmt.h"
#include "common/common/logger.h"
#include "common/protobuf/utility.h"
#include "common/thread/terminate_thread.h"

namespace Envoy {
namespace Watchdog {
namespace {
constexpr uint64_t DefaultWaitDurationMs = 5000;
} // end namespace

AbortAction::AbortAction(envoy::watchdog::v3alpha::AbortActionConfig& config,
Server::Configuration::GuardDogActionFactoryContext& /*context*/)
: wait_duration_(absl::Milliseconds(
PROTOBUF_GET_MS_OR_DEFAULT(config, wait_duration, DefaultWaitDurationMs))) {}

void AbortAction::run(
envoy::config::bootstrap::v3::Watchdog::WatchdogAction::WatchdogEvent /*event*/,
const std::vector<std::pair<Thread::ThreadId, MonotonicTime>>& thread_last_checkin_pairs,
MonotonicTime /*now*/) {

if (thread_last_checkin_pairs.empty()) {
ENVOY_LOG_MISC(warn, "Watchdog AbortAction called without any thread.");
return;
}

// The following lines of code won't be considered covered by code coverage
// tools since they would run in DEATH tests.
const auto& thread_id = thread_last_checkin_pairs[0].first;
const std::string tid_string = thread_id.debugString();
ENVOY_LOG_MISC(error, "Watchdog AbortAction terminating thread with tid {}.", tid_string);

if (Thread::terminateThread(thread_id)) {
// Successfully signaled to thread to terminate, sleep for wait_duration.
absl::SleepFor(wait_duration_);
} else {
ENVOY_LOG_MISC(error, "Failed to terminate tid {}", tid_string);
}

// Abort from the action since the signaled thread hasn't yet crashed the process.
// Panicing in the action gives flexibility since it doesn't depend on
// external code to kill the process if the signal fails.
PANIC(fmt::format(
"Failed to terminate thread with id {}, aborting from Watchdog AbortAction instead.",
tid_string));
}

} // namespace Watchdog
} // namespace Envoy
Loading