Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,6 @@ extensions/filters/common/original_src @snowp @klarose
/*/extensions/filters/http/decompressor @rojkov @dio
# Watchdog Extensions
/*/extensions/watchdog/profile_action @kbaichoo @antoniovicente
/*/extensions/watchdog/abort_action @kbaichoo @antoniovicente
# Core upstream code
extensions/upstreams/http @alyssawilk @snowp @mattklein123
extensions/upstreams/http/http @alyssawilk @snowp @mattklein123
Expand Down
2 changes: 1 addition & 1 deletion api/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,6 @@ proto_library(
"//envoy/extensions/upstreams/http/http/v3:pkg",
"//envoy/extensions/upstreams/http/tcp/v3:pkg",
"//envoy/extensions/wasm/v3:pkg",
"//envoy/extensions/watchdog/abort_action/v3alpha:pkg",
"//envoy/extensions/watchdog/profile_action/v3alpha:pkg",
"//envoy/service/accesslog/v3:pkg",
"//envoy/service/auth/v3:pkg",
Expand All @@ -272,6 +271,7 @@ proto_library(
"//envoy/type/metadata/v3:pkg",
"//envoy/type/tracing/v3:pkg",
"//envoy/type/v3:pkg",
"//envoy/watchdog/v3alpha:pkg",
],
)

Expand Down

This file was deleted.

2 changes: 2 additions & 0 deletions api/envoy/watchdog/v3alpha/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
This contains watchdog actions that are part of core Envoy, and therefore cannot
be in the extensions directory.
29 changes: 29 additions & 0 deletions api/envoy/watchdog/v3alpha/abort_action.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
syntax = "proto3";

package envoy.watchdog.v3alpha;

import "google/protobuf/duration.proto";

import "udpa/annotations/status.proto";
import "udpa/annotations/versioning.proto";
import "validate/validate.proto";

option java_package = "io.envoyproxy.envoy.watchdog.v3alpha";
option java_outer_classname = "AbortActionProto";
option java_multiple_files = true;
option (udpa.annotations.file_status).work_in_progress = true;
option (udpa.annotations.file_status).package_version_status = ACTIVE;

// [#protodoc-title: Watchdog Action that kills a stuck thread to kill the process.]
Comment thread
htuch marked this conversation as resolved.

// A GuardDogAction that will terminate the process by killing the
// stuck thread. This would allow easier access to the call stack of the stuck
// thread since we would run signal handlers on that thread. By default
// this will be registered to run as the last watchdog action on KILL and
// MULTIKILL events if those are enabled.
message AbortActionConfig {
// How long to wait for the thread to respond to the thread kill function
// before killing the process from this action. This is a blocking action.
// By default this is 5 seconds.
google.protobuf.Duration wait_duration = 1;
}
2 changes: 1 addition & 1 deletion api/versioning/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ proto_library(
"//envoy/extensions/upstreams/http/http/v3:pkg",
"//envoy/extensions/upstreams/http/tcp/v3:pkg",
"//envoy/extensions/wasm/v3:pkg",
"//envoy/extensions/watchdog/abort_action/v3alpha:pkg",
"//envoy/extensions/watchdog/profile_action/v3alpha:pkg",
"//envoy/service/accesslog/v3:pkg",
"//envoy/service/auth/v3:pkg",
Expand All @@ -155,6 +154,7 @@ proto_library(
"//envoy/type/metadata/v3:pkg",
"//envoy/type/tracing/v3:pkg",
"//envoy/type/v3:pkg",
"//envoy/watchdog/v3alpha:pkg",
],
)

Expand Down
1 change: 0 additions & 1 deletion bazel/repositories.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ WINDOWS_SKIP_TARGETS = [
"envoy.tracers.lightstep",
"envoy.tracers.datadog",
"envoy.tracers.opencensus",
"envoy.watchdog.abort_action",
]

# Make all contents of an external repository accessible under a filegroup. Used for external HTTP
Expand Down
2 changes: 1 addition & 1 deletion docs/root/api-v3/config/watchdog/watchdog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ Watchdog
:maxdepth: 2

../../extensions/watchdog/profile_action/v3alpha/*
../../extensions/watchdog/abort_action/v3alpha/*
../../watchdog/abort_action/v3alpha/*
1 change: 1 addition & 0 deletions docs/root/version_history/current.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Minor Behavior Changes
*Changes that may cause incompatibilities for some users, but should not for most*

* ext_authz filter: the deprecated field :ref:`use_alpha <envoy_api_field_config.filter.http.ext_authz.v2.ExtAuthz.use_alpha>` is no longer supported and cannot be set anymore.
* watchdog: the watchdog action :ref:`abort_action <envoy_api_msg_watchdog.abort_action.v3alpha.AbortActionConfig>` is now the default action to terminate the process if watchdog kill / multikill is enabled.

Bug Fixes
---------
Expand Down

This file was deleted.

29 changes: 29 additions & 0 deletions generated_api_shadow/envoy/watchdog/v3alpha/abort_action.proto

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions source/common/thread/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
load(
"//bazel:envoy_build_system.bzl",
"envoy_cc_library",
"envoy_package",
)

licenses(["notice"]) # Apache 2

envoy_package()

envoy_cc_library(
name = "terminate_thread_lib",
srcs = ["terminate_thread.cc"],
hdrs = ["terminate_thread.h"],
deps = [
"//include/envoy/thread:thread_interface",
"//source/common/common:minimal_logger_lib",
],
)
31 changes: 31 additions & 0 deletions source/common/thread/terminate_thread.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include "common/thread/terminate_thread.h"

#include <sys/types.h>

#include <csignal>

#include "common/common/logger.h"

namespace Envoy {
namespace Thread {
namespace {
#ifdef __linux__
pid_t toPlatformTid(int64_t tid) { return static_cast<pid_t>(tid); }
#elif defined(__APPLE__)
uint64_t toPlatformTid(int64_t tid) { return static_cast<uint64_t>(tid); }
#endif
} // namespace

bool terminateThread(const ThreadId& tid) {
#ifndef WIN32
// Assume POSIX-compatible system and signal to the thread.
return kill(toPlatformTid(tid.getId()), SIGABRT) == 0;
#else
// Windows, currently unsupported termination of thread.
ENVOY_LOG_MISC(error, "Windows is currently unsupported for terminateThread.");
return false;
#endif
}

} // namespace Thread
} // namespace Envoy
18 changes: 18 additions & 0 deletions source/common/thread/terminate_thread.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#pragma once

#include "envoy/thread/thread.h"

namespace Envoy {
namespace Thread {
/**
* Tries to terminates the process by killing the thread specified by
* the ThreadId. The implementation is platform dependent and currently
* only works on platforms that support SIGABRT.
*
* Returns the result from the platform specific function (i.e. kill) to terminate
* the thread. If the platform is currently unsupported, this will return false.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does not return the result of the platform specific function, it returns true if the platform specific function succeeded. See implementation, return is: kill() == 0;

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch I've made it:

Returns true if the platform specific function to terminate the thread succeeded (i.e. kill() == 0). If the platform is currently unsupported, this will return false.

*/
bool terminateThread(const ThreadId& tid);
Comment thread
mattklein123 marked this conversation as resolved.

} // namespace Thread
} // namespace Envoy
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
load(
"//bazel:envoy_build_system.bzl",
"envoy_cc_extension",
"envoy_cc_library",
"envoy_extension_package",
"envoy_package",
)

licenses(["notice"]) # Apache 2

envoy_extension_package()
envoy_package()

envoy_cc_library(
name = "abort_action_lib",
Expand All @@ -19,22 +18,21 @@ envoy_cc_library(
"//include/envoy/thread:thread_interface",
"//source/common/common:assert_lib",
"//source/common/protobuf:utility_lib",
"@envoy_api//envoy/extensions/watchdog/abort_action/v3alpha:pkg_cc_proto",
"//source/common/thread:terminate_thread_lib",
"@envoy_api//envoy/watchdog/v3alpha:pkg_cc_proto",
],
)

envoy_cc_extension(
name = "config",
srcs = ["config.cc"],
hdrs = ["config.h"],
security_posture = "data_plane_agnostic",
status = "alpha",
envoy_cc_library(
name = "abort_action_config",
srcs = ["abort_action_config.cc"],
hdrs = ["abort_action_config.h"],
deps = [
":abort_action_lib",
"//include/envoy/registry",
"//source/common/config:utility_lib",
"//source/common/protobuf",
"//source/common/protobuf:message_validator_lib",
"@envoy_api//envoy/extensions/watchdog/abort_action/v3alpha:pkg_cc_proto",
"@envoy_api//envoy/watchdog/v3alpha:pkg_cc_proto",
],
)
2 changes: 2 additions & 0 deletions source/common/watchdog/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
This contains watchdog actions that are part of core Envoy, and therefore cannot
be in the extensions directory.
54 changes: 54 additions & 0 deletions source/common/watchdog/abort_action.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#include "common/watchdog/abort_action.h"

#include "envoy/thread/thread.h"

#include "common/common/assert.h"
#include "common/common/fmt.h"
#include "common/common/logger.h"
#include "common/protobuf/utility.h"
#include "common/thread/terminate_thread.h"

namespace Envoy {
namespace Watchdog {
namespace {
constexpr uint64_t DefaultWaitDurationMs = 5000;
} // end namespace

AbortAction::AbortAction(envoy::watchdog::v3alpha::AbortActionConfig& config,
Server::Configuration::GuardDogActionFactoryContext& /*context*/)
: wait_duration_(absl::Milliseconds(
PROTOBUF_GET_MS_OR_DEFAULT(config, wait_duration, DefaultWaitDurationMs))) {}

void AbortAction::run(
envoy::config::bootstrap::v3::Watchdog::WatchdogAction::WatchdogEvent /*event*/,
const std::vector<std::pair<Thread::ThreadId, MonotonicTime>>& thread_last_checkin_pairs,
MonotonicTime /*now*/) {

if (thread_last_checkin_pairs.empty()) {
ENVOY_LOG_MISC(warn, "Watchdog AbortAction called without any thread.");
return;
}

// The following lines of code won't be considered covered by code coverage
// tools since they would run in DEATH tests.
const auto& thread_id = thread_last_checkin_pairs[0].first;
const std::string tid_string = thread_id.debugString();
ENVOY_LOG_MISC(error, "Watchdog AbortAction terminating thread with tid {}.", tid_string);

if (Thread::terminateThread(thread_id)) {
// Successfully signaled to thread to terminate, sleep for wait_duration.
absl::SleepFor(wait_duration_);
} else {
ENVOY_LOG_MISC(error, "Failed to terminate tid {}", tid_string);
}

// Abort from the action since the signaled thread hasn't yet crashed the process.
// Panicing in the action gives flexibility since it doesn't depend on
// external code to kill the process if the signal fails.
PANIC(fmt::format(
"Failed to terminate thread with id {}, aborting from Watchdog AbortAction instead.",
tid_string));
}

} // namespace Watchdog
} // namespace Envoy
Loading