Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/root/version_history/current.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ Minor Behavior Changes
----------------------
*Changes that may cause incompatibilities for some users, but should not for most*

* config: the log message for "gRPC config stream closed" now uses the most recent error message, and reports seconds instead of milliseconds for how long the most recent status has been received.

Bug Fixes
---------
*Changes expected to improve the state of the world and are unlikely to have negative effects*
Expand Down
21 changes: 14 additions & 7 deletions source/common/config/grpc_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,24 +169,31 @@ class GrpcStream : public Grpc::AsyncStreamCallbacks<ResponseProto>,
return;
}

const uint64_t ms_since_first_close = std::chrono::duration_cast<std::chrono::milliseconds>(
time_source_.monotonicTime() - last_close_time_)
.count();
const auto duration_since_first_close = time_source_.monotonicTime() - last_close_time_;
const uint64_t seconds_since_first_close =
std::chrono::duration_cast<std::chrono::seconds>(duration_since_first_close).count();
const Grpc::Status::GrpcStatus close_status = last_close_status_.value();

if (status != close_status) {
// This is a different failure. Warn on both statuses and remember the new one.
ENVOY_LOG(warn, "{} gRPC config stream closed: {}, {} (previously {}, {} since {}ms ago)",
ENVOY_LOG(warn, "{} gRPC config stream closed: {}, {} (previously {}, {} since {}s ago)",
service_method_.name(), status, message, close_status, last_close_message_,
ms_since_first_close);
seconds_since_first_close);
setCloseStatus(status, message);
return;
}

// #18508: The error message may have changed.
// To reduce noise, do not update the last close time, or use the message to distinguish the
// error in the previous condition.
last_close_message_ = message;

const uint64_t ms_since_first_close =
std::chrono::duration_cast<std::chrono::milliseconds>(duration_since_first_close).count();
if (ms_since_first_close > RetryMaxDelayMs) {
// Warn if we are over the time limit.
ENVOY_LOG(warn, "{} gRPC config stream closed since {}ms ago: {}, {}", service_method_.name(),
ms_since_first_close, close_status, last_close_message_);
ENVOY_LOG(warn, "{} gRPC config stream closed since {}s ago: {}, {}", service_method_.name(),
seconds_since_first_close, close_status, message);
return;
}

Expand Down
32 changes: 21 additions & 11 deletions test/common/config/grpc_stream_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -108,19 +108,18 @@ TEST_F(GrpcStreamTest, LogClose) {
Grpc::Status::WellKnownGrpcStatus::Unavailable);

// Different retriable failure: warn.
time_system_.advanceTimeWait(std::chrono::milliseconds(1000));
time_system_.advanceTimeWait(std::chrono::seconds(1));
EXPECT_CALL(callbacks_, onEstablishmentFailure());
EXPECT_LOG_CONTAINS(
"warn", "stream closed: 4, Deadline Exceeded (previously 14, Unavailable since 1000ms ago)",
{
"warn", "stream closed: 4, Deadline Exceeded (previously 14, Unavailable since 1s ago)", {
grpc_stream_.onRemoteClose(Grpc::Status::WellKnownGrpcStatus::DeadlineExceeded,
"Deadline Exceeded");
});
EXPECT_EQ(grpc_stream_.getCloseStatus().value(),
Grpc::Status::WellKnownGrpcStatus::DeadlineExceeded);

// Same retriable failure after a short amount of time: debug.
time_system_.advanceTimeWait(std::chrono::milliseconds(1000));
time_system_.advanceTimeWait(std::chrono::seconds(1));
EXPECT_CALL(callbacks_, onEstablishmentFailure());
EXPECT_LOG_CONTAINS("debug", "gRPC config stream closed", {
grpc_stream_.onRemoteClose(Grpc::Status::WellKnownGrpcStatus::DeadlineExceeded,
Expand All @@ -130,24 +129,35 @@ TEST_F(GrpcStreamTest, LogClose) {
Grpc::Status::WellKnownGrpcStatus::DeadlineExceeded);

// Same retriable failure after a long time: warn.
time_system_.advanceTimeWait(std::chrono::milliseconds(100000));
time_system_.advanceTimeWait(std::chrono::seconds(100));
EXPECT_CALL(callbacks_, onEstablishmentFailure());
EXPECT_LOG_CONTAINS("warn", "gRPC config stream closed since 101000ms ago", {
EXPECT_LOG_CONTAINS("warn", "gRPC config stream closed since 101s ago: 4, Deadline Exceeded", {
grpc_stream_.onRemoteClose(Grpc::Status::WellKnownGrpcStatus::DeadlineExceeded,
"Deadline Exceeded");
});
EXPECT_EQ(grpc_stream_.getCloseStatus().value(),
Grpc::Status::WellKnownGrpcStatus::DeadlineExceeded);

// Warn again.
time_system_.advanceTimeWait(std::chrono::milliseconds(1000));
// Warn again, using the newest message.
time_system_.advanceTimeWait(std::chrono::seconds(1));
EXPECT_CALL(callbacks_, onEstablishmentFailure());
EXPECT_LOG_CONTAINS("warn", "gRPC config stream closed since 102000ms ago", {
EXPECT_LOG_CONTAINS("warn", "gRPC config stream closed since 102s ago: 4, new message", {
grpc_stream_.onRemoteClose(Grpc::Status::WellKnownGrpcStatus::DeadlineExceeded,
"Deadline Exceeded");
"new message");
});
EXPECT_EQ(grpc_stream_.getCloseStatus().value(),
Grpc::Status::WellKnownGrpcStatus::DeadlineExceeded);

// Different retriable failure, using the most recent error message from the previous one.
time_system_.advanceTimeWait(std::chrono::seconds(1));
EXPECT_CALL(callbacks_, onEstablishmentFailure());
EXPECT_LOG_CONTAINS(
"warn",
"gRPC config stream closed: 14, Unavailable (previously 4, new message since 103s ago)", {
grpc_stream_.onRemoteClose(Grpc::Status::WellKnownGrpcStatus::Unavailable, "Unavailable");
});
EXPECT_EQ(grpc_stream_.getCloseStatus().value(),
Grpc::Status::WellKnownGrpcStatus::Unavailable);
}

// Successfully receiving a message clears close status.
Expand All @@ -158,7 +168,7 @@ TEST_F(GrpcStreamTest, LogClose) {
EXPECT_TRUE(grpc_stream_.grpcStreamAvailable());
// Status isn't cleared yet.
EXPECT_EQ(grpc_stream_.getCloseStatus().value(),
Grpc::Status::WellKnownGrpcStatus::DeadlineExceeded);
Grpc::Status::WellKnownGrpcStatus::Unavailable);

auto response = std::make_unique<envoy::service::discovery::v3::DiscoveryResponse>();
grpc_stream_.onReceiveMessage(std::move(response));
Expand Down