From a06b3658dc283207e59ee7bfbea15d5c77eb7144 Mon Sep 17 00:00:00 2001 From: Michael Puncel Date: Wed, 10 Jul 2019 12:32:23 -0400 Subject: [PATCH 1/2] Fix crash in request hedging handling. This commit fixes a crash in which an upstream request would not be properly reset after the router was done with it and the callbacks destroyed which would cause a segfault when data was received on the upstream request. This happened in particular when "bad" headers are received for an upstream request but there are still requests in flight. When hedging is disabled (i.e. default behavior) this is not an issue because the filter will be destroyed synchronously which will reset the stream, however with hedging it needs to be done more proactively. Signed-off-by: Michael Puncel --- source/common/router/router.cc | 2 +- test/common/router/router_test.cc | 83 +++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/source/common/router/router.cc b/source/common/router/router.cc index ca379197b838f..598f532106ea9 100644 --- a/source/common/router/router.cc +++ b/source/common/router/router.cc @@ -1052,7 +1052,7 @@ void Filter::onUpstreamHeaders(uint64_t response_code, Http::HeaderMapPtr&& head // chance to return before returning a response downstream. if (could_not_retry && (numRequestsAwaitingHeaders() > 0 || pending_retries_ > 0)) { upstream_request.upstream_host_->stats().rq_error_.inc(); - upstream_request.removeFromList(upstream_requests_); + upstream_request.removeFromList(upstream_requests_)->resetStream(); return; } diff --git a/test/common/router/router_test.cc b/test/common/router/router_test.cc index b9e2f9dc8172f..23aa2817d88f9 100644 --- a/test/common/router/router_test.cc +++ b/test/common/router/router_test.cc @@ -1592,6 +1592,89 @@ TEST_F(RouterTest, HedgedPerTryTimeoutFirstRequestSucceeds) { // TODO: Verify hedge stats here once they are implemented. } +// Tests that an upstream request is reset even if it can't be retried as long as there is +// another in-flight request we're waiting on. +// Sequence: +// 1) first upstream request per try timeout +// 2) second upstream request sent +// 3) second upstream request gets 5xx, retries exhausted, assert it's reset +// 4) first upstream request gets 2xx +TEST_F(RouterTest, HedgedPerTryTimeoutResetsOnBadHeaders) { + enableHedgeOnPerTryTimeout(); + + NiceMock encoder1; + Http::StreamDecoder* response_decoder1 = nullptr; + EXPECT_CALL(cm_.conn_pool_, newStream(_, _)) + .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks) + -> Http::ConnectionPool::Cancellable* { + response_decoder1 = &decoder; + EXPECT_CALL(*router_.retry_state_, onHostAttempted(_)); + callbacks.onPoolReady(encoder1, cm_.conn_pool_.host_); + return nullptr; + })); + EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, + putResult(Upstream::Outlier::Result::LOCAL_ORIGIN_CONNECT_SUCCESS, + absl::optional(absl::nullopt))) + .Times(2); + expectPerTryTimerCreate(); + expectResponseTimerCreate(); + + Http::TestHeaderMapImpl headers{{"x-envoy-upstream-rq-per-try-timeout-ms", "5"}}; + HttpTestUtility::addDefaultHeaders(headers); + router_.decodeHeaders(headers, true); + + EXPECT_CALL( + cm_.conn_pool_.host_->outlier_detector_, + putResult(Upstream::Outlier::Result::LOCAL_ORIGIN_TIMEOUT, absl::optional(504))); + EXPECT_CALL(encoder1.stream_, resetStream(_)).Times(0); + NiceMock encoder2; + Http::StreamDecoder* response_decoder2 = nullptr; + router_.retry_state_->expectHedgedPerTryTimeoutRetry(); + per_try_timeout_->callback_(); + + EXPECT_CALL(cm_.conn_pool_, newStream(_, _)) + .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks) + -> Http::ConnectionPool::Cancellable* { + response_decoder2 = &decoder; + EXPECT_CALL(*router_.retry_state_, onHostAttempted(_)); + callbacks.onPoolReady(encoder2, cm_.conn_pool_.host_); + return nullptr; + })); + expectPerTryTimerCreate(); + router_.retry_state_->callback_(); + + // We should not have updated any stats yet because no requests have been + // canceled + EXPECT_TRUE(verifyHostUpstreamStats(0, 0)); + + // Now write a 5xx back on the 2nd request with no retries remaining. The 2nd request + // should be reset immediately. + Http::HeaderMapPtr bad_response_headers(new Http::TestHeaderMapImpl{{":status", "500"}}); + EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(500)); + EXPECT_CALL(encoder1.stream_, resetStream(_)).Times(0); + EXPECT_CALL(encoder2.stream_, resetStream(_)); + EXPECT_CALL(*router_.retry_state_, shouldRetryHeaders(_, _)) + .WillOnce(Return(RetryStatus::NoOverflow)); + // Not end_stream, otherwise we wouldn't need to reset. + response_decoder2->decodeHeaders(std::move(bad_response_headers), false); + + // Now write a 200 back. We expect the 2nd stream to be reset and stats to be + // incremented properly. + Http::HeaderMapPtr response_headers(new Http::TestHeaderMapImpl{{":status", "200"}}); + EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(200)); + EXPECT_CALL(encoder1.stream_, resetStream(_)).Times(0); + + EXPECT_CALL(callbacks_, encodeHeaders_(_, _)) + .WillOnce(Invoke([&](Http::HeaderMap& headers, bool end_stream) -> void { + EXPECT_EQ(headers.Status()->value(), "200"); + EXPECT_TRUE(end_stream); + })); + response_decoder1->decodeHeaders(std::move(response_headers), true); + EXPECT_TRUE(verifyHostUpstreamStats(1, 1)); + + // TODO: Verify hedge stats here once they are implemented. +} + // Three requests sent: 1) 5xx error, 2) per try timeout, 3) gets good response // headers. TEST_F(RouterTest, HedgedPerTryTimeoutThirdRequestSucceeds) { From 317d6e2878920f0243eab426dc0efa93a857caf9 Mon Sep 17 00:00:00 2001 From: Michael Puncel Date: Thu, 11 Jul 2019 13:47:08 -0400 Subject: [PATCH 2/2] add comment explaining resetStream Signed-off-by: Michael Puncel --- source/common/router/router.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/common/router/router.cc b/source/common/router/router.cc index 598f532106ea9..1a7dcd831cd45 100644 --- a/source/common/router/router.cc +++ b/source/common/router/router.cc @@ -1052,6 +1052,9 @@ void Filter::onUpstreamHeaders(uint64_t response_code, Http::HeaderMapPtr&& head // chance to return before returning a response downstream. if (could_not_retry && (numRequestsAwaitingHeaders() > 0 || pending_retries_ > 0)) { upstream_request.upstream_host_->stats().rq_error_.inc(); + + // Reset the stream because there are other in-flight requests that we'll + // wait around for and we're not interested in consuming any body/trailers. upstream_request.removeFromList(upstream_requests_)->resetStream(); return; }