From c4366f9497b8ce4506d2978162d8dc6ef680a16a Mon Sep 17 00:00:00 2001 From: Andrew Jenkins Date: Thu, 28 Mar 2019 23:44:52 +0000 Subject: [PATCH] upstream: Null-deref if conn closed and TCP health timeout pending Fixes no-longer-embargoed ossfuzz 11100: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=11100&q=envoy&colspec=ID%20Type%20Component%20Status%20Proj%20Reported%20Owner%20Summary If a TCP health checker connection is closed but the timeout_timer fires anyway (for instance, the timeout timer is pending), then the TcpHealthCheckerImpl::TcpActiveHealthCheckSession::onTimeout() method attempts to dereference client_, which is nullptr. We need guards in onTimeout, similar to the guards in HttpHealthCheckerImpl::HttpActiveHealthCheckSession::onTimeout(). I added a unit test following the pattern of HttpHealthCheckerImplTest.TimeoutAfterDisconnect that reproduces the backtrace on ossfuzz without my proposed fix, and does not backtrace with my fix. Signed-off-by: Andrew Jenkins --- source/common/upstream/health_checker_impl.cc | 6 ++-- .../upstream/health_checker_impl_test.cc | 36 +++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/source/common/upstream/health_checker_impl.cc b/source/common/upstream/health_checker_impl.cc index 719975959a3e6..f96b0eff766bd 100644 --- a/source/common/upstream/health_checker_impl.cc +++ b/source/common/upstream/health_checker_impl.cc @@ -417,8 +417,10 @@ void TcpHealthCheckerImpl::TcpActiveHealthCheckSession::onInterval() { } void TcpHealthCheckerImpl::TcpActiveHealthCheckSession::onTimeout() { - host_->setActiveHealthFailureType(Host::ActiveHealthFailureType::TIMEOUT); - client_->close(Network::ConnectionCloseType::NoFlush); + if (client_) { + host_->setActiveHealthFailureType(Host::ActiveHealthFailureType::TIMEOUT); + client_->close(Network::ConnectionCloseType::NoFlush); + } } GrpcHealthCheckerImpl::GrpcHealthCheckerImpl(const Cluster& cluster, diff --git a/test/common/upstream/health_checker_impl_test.cc b/test/common/upstream/health_checker_impl_test.cc index 0e8d56ca695bc..c93dfadb91b4c 100644 --- a/test/common/upstream/health_checker_impl_test.cc +++ b/test/common/upstream/health_checker_impl_test.cc @@ -2417,6 +2417,42 @@ TEST_F(TcpHealthCheckerImplTest, TimeoutWithoutReusingConnection) { EXPECT_EQ(2UL, cluster_->info_->stats_store_.counter("health_check.failure").value()); } +TEST_F(TcpHealthCheckerImplTest, TimeoutAfterDisconnect) { + InSequence s; + + setupData(); + health_checker_->start(); + expectSessionCreate(); + + expectClientCreate(); + cluster_->prioritySet().getMockHostSet(0)->hosts_ = { + makeTestHost(cluster_->info_, "tcp://127.0.0.1:80")}; + EXPECT_CALL(*connection_, write(_, _)); + EXPECT_CALL(*timeout_timer_, enableTimer(_)); + cluster_->prioritySet().getMockHostSet(0)->runCallbacks( + {cluster_->prioritySet().getMockHostSet(0)->hosts_.back()}, {}); + connection_->raiseEvent(Network::ConnectionEvent::Connected); + connection_->close(Network::ConnectionCloseType::NoFlush); + EXPECT_CALL(*event_logger_, logUnhealthy(_, _, _, true)); + EXPECT_CALL(*timeout_timer_, disableTimer()); + EXPECT_CALL(*interval_timer_, enableTimer(_)); + timeout_timer_->callback_(); + EXPECT_EQ(Host::Health::Healthy, cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->health()); + + expectClientCreate(); + EXPECT_CALL(*connection_, write(_, _)); + EXPECT_CALL(*timeout_timer_, enableTimer(_)); + interval_timer_->callback_(); + connection_->raiseEvent(Network::ConnectionEvent::Connected); + connection_->close(Network::ConnectionCloseType::NoFlush); + EXPECT_CALL(*event_logger_, logEjectUnhealthy(_, _, _)); + EXPECT_CALL(*timeout_timer_, disableTimer()); + EXPECT_CALL(*interval_timer_, enableTimer(_)); + timeout_timer_->callback_(); + EXPECT_EQ(Host::Health::Unhealthy, + cluster_->prioritySet().getMockHostSet(0)->hosts_[0]->health()); +} + TEST_F(TcpHealthCheckerImplTest, NoData) { InSequence s;