From e74aaeda8634259b25898dea3b07160dca98cd42 Mon Sep 17 00:00:00 2001 From: Taylor Barrella Date: Tue, 12 Oct 2021 06:43:23 -0700 Subject: [PATCH] config: adjust "gRPC config stream closed" log message (#18533) backport of c35ff419e3b4fe3f7e6deae08e6e329cf91ba5b5 Signed-off-by: Taylor Barrella --- docs/root/version_history/current.rst | 2 ++ source/common/config/grpc_stream.h | 21 +++++++++++------ test/common/config/grpc_stream_test.cc | 32 +++++++++++++++++--------- 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/docs/root/version_history/current.rst b/docs/root/version_history/current.rst index 682d16a62a9c1..8c552bcaa4797 100644 --- a/docs/root/version_history/current.rst +++ b/docs/root/version_history/current.rst @@ -9,6 +9,8 @@ Minor Behavior Changes ---------------------- *Changes that may cause incompatibilities for some users, but should not for most* +* config: the log message for "gRPC config stream closed" now uses the most recent error message, and reports seconds instead of milliseconds for how long the most recent status has been received. + Bug Fixes --------- *Changes expected to improve the state of the world and are unlikely to have negative effects* diff --git a/source/common/config/grpc_stream.h b/source/common/config/grpc_stream.h index 23ee9a0d287b2..d7469f7860457 100644 --- a/source/common/config/grpc_stream.h +++ b/source/common/config/grpc_stream.h @@ -169,24 +169,31 @@ class GrpcStream : public Grpc::AsyncStreamCallbacks, return; } - const uint64_t ms_since_first_close = std::chrono::duration_cast( - time_source_.monotonicTime() - last_close_time_) - .count(); + const auto duration_since_first_close = time_source_.monotonicTime() - last_close_time_; + const uint64_t seconds_since_first_close = + std::chrono::duration_cast(duration_since_first_close).count(); const Grpc::Status::GrpcStatus close_status = last_close_status_.value(); if (status != close_status) { // This is a different failure. Warn on both statuses and remember the new one. - ENVOY_LOG(warn, "{} gRPC config stream closed: {}, {} (previously {}, {} since {}ms ago)", + ENVOY_LOG(warn, "{} gRPC config stream closed: {}, {} (previously {}, {} since {}s ago)", service_method_.name(), status, message, close_status, last_close_message_, - ms_since_first_close); + seconds_since_first_close); setCloseStatus(status, message); return; } + // #18508: The error message may have changed. + // To reduce noise, do not update the last close time, or use the message to distinguish the + // error in the previous condition. + last_close_message_ = message; + + const uint64_t ms_since_first_close = + std::chrono::duration_cast(duration_since_first_close).count(); if (ms_since_first_close > RetryMaxDelayMs) { // Warn if we are over the time limit. - ENVOY_LOG(warn, "{} gRPC config stream closed since {}ms ago: {}, {}", service_method_.name(), - ms_since_first_close, close_status, last_close_message_); + ENVOY_LOG(warn, "{} gRPC config stream closed since {}s ago: {}, {}", service_method_.name(), + seconds_since_first_close, close_status, message); return; } diff --git a/test/common/config/grpc_stream_test.cc b/test/common/config/grpc_stream_test.cc index 760b24a305bbf..fcc438a14d4aa 100644 --- a/test/common/config/grpc_stream_test.cc +++ b/test/common/config/grpc_stream_test.cc @@ -108,11 +108,10 @@ TEST_F(GrpcStreamTest, LogClose) { Grpc::Status::WellKnownGrpcStatus::Unavailable); // Different retriable failure: warn. - time_system_.advanceTimeWait(std::chrono::milliseconds(1000)); + time_system_.advanceTimeWait(std::chrono::seconds(1)); EXPECT_CALL(callbacks_, onEstablishmentFailure()); EXPECT_LOG_CONTAINS( - "warn", "stream closed: 4, Deadline Exceeded (previously 14, Unavailable since 1000ms ago)", - { + "warn", "stream closed: 4, Deadline Exceeded (previously 14, Unavailable since 1s ago)", { grpc_stream_.onRemoteClose(Grpc::Status::WellKnownGrpcStatus::DeadlineExceeded, "Deadline Exceeded"); }); @@ -120,7 +119,7 @@ TEST_F(GrpcStreamTest, LogClose) { Grpc::Status::WellKnownGrpcStatus::DeadlineExceeded); // Same retriable failure after a short amount of time: debug. - time_system_.advanceTimeWait(std::chrono::milliseconds(1000)); + time_system_.advanceTimeWait(std::chrono::seconds(1)); EXPECT_CALL(callbacks_, onEstablishmentFailure()); EXPECT_LOG_CONTAINS("debug", "gRPC config stream closed", { grpc_stream_.onRemoteClose(Grpc::Status::WellKnownGrpcStatus::DeadlineExceeded, @@ -130,24 +129,35 @@ TEST_F(GrpcStreamTest, LogClose) { Grpc::Status::WellKnownGrpcStatus::DeadlineExceeded); // Same retriable failure after a long time: warn. - time_system_.advanceTimeWait(std::chrono::milliseconds(100000)); + time_system_.advanceTimeWait(std::chrono::seconds(100)); EXPECT_CALL(callbacks_, onEstablishmentFailure()); - EXPECT_LOG_CONTAINS("warn", "gRPC config stream closed since 101000ms ago", { + EXPECT_LOG_CONTAINS("warn", "gRPC config stream closed since 101s ago: 4, Deadline Exceeded", { grpc_stream_.onRemoteClose(Grpc::Status::WellKnownGrpcStatus::DeadlineExceeded, "Deadline Exceeded"); }); EXPECT_EQ(grpc_stream_.getCloseStatus().value(), Grpc::Status::WellKnownGrpcStatus::DeadlineExceeded); - // Warn again. - time_system_.advanceTimeWait(std::chrono::milliseconds(1000)); + // Warn again, using the newest message. + time_system_.advanceTimeWait(std::chrono::seconds(1)); EXPECT_CALL(callbacks_, onEstablishmentFailure()); - EXPECT_LOG_CONTAINS("warn", "gRPC config stream closed since 102000ms ago", { + EXPECT_LOG_CONTAINS("warn", "gRPC config stream closed since 102s ago: 4, new message", { grpc_stream_.onRemoteClose(Grpc::Status::WellKnownGrpcStatus::DeadlineExceeded, - "Deadline Exceeded"); + "new message"); }); EXPECT_EQ(grpc_stream_.getCloseStatus().value(), Grpc::Status::WellKnownGrpcStatus::DeadlineExceeded); + + // Different retriable failure, using the most recent error message from the previous one. + time_system_.advanceTimeWait(std::chrono::seconds(1)); + EXPECT_CALL(callbacks_, onEstablishmentFailure()); + EXPECT_LOG_CONTAINS( + "warn", + "gRPC config stream closed: 14, Unavailable (previously 4, new message since 103s ago)", { + grpc_stream_.onRemoteClose(Grpc::Status::WellKnownGrpcStatus::Unavailable, "Unavailable"); + }); + EXPECT_EQ(grpc_stream_.getCloseStatus().value(), + Grpc::Status::WellKnownGrpcStatus::Unavailable); } // Successfully receiving a message clears close status. @@ -158,7 +168,7 @@ TEST_F(GrpcStreamTest, LogClose) { EXPECT_TRUE(grpc_stream_.grpcStreamAvailable()); // Status isn't cleared yet. EXPECT_EQ(grpc_stream_.getCloseStatus().value(), - Grpc::Status::WellKnownGrpcStatus::DeadlineExceeded); + Grpc::Status::WellKnownGrpcStatus::Unavailable); auto response = std::make_unique(); grpc_stream_.onReceiveMessage(std::move(response));