diff --git a/docs/root/configuration/cluster_manager/cluster_stats.rst b/docs/root/configuration/cluster_manager/cluster_stats.rst index 370a9e1402d17..f881e8963ccdd 100644 --- a/docs/root/configuration/cluster_manager/cluster_stats.rst +++ b/docs/root/configuration/cluster_manager/cluster_stats.rst @@ -56,6 +56,7 @@ Every cluster has a statistics tree rooted at *cluster..* with the followi upstream_cx_rx_bytes_buffered, Gauge, Received connection bytes currently buffered upstream_cx_tx_bytes_total, Counter, Total sent connection bytes upstream_cx_tx_bytes_buffered, Gauge, Send connection bytes currently buffered + upstream_cx_pool_overflow, Counter, Total times that the cluster's connection pool circuit breaker overflowed upstream_cx_protocol_error, Counter, Total connection protocol errors upstream_cx_max_requests, Counter, Total connections closed due to maximum requests upstream_cx_none_healthy, Counter, Total times connection not established due to no healthy hosts diff --git a/docs/root/intro/arch_overview/circuit_breaking.rst b/docs/root/intro/arch_overview/circuit_breaking.rst index 152284363fb51..57dc097dba90d 100644 --- a/docs/root/intro/arch_overview/circuit_breaking.rst +++ b/docs/root/intro/arch_overview/circuit_breaking.rst @@ -49,6 +49,10 @@ configure and code each application independently. Envoy supports various types clean up; connection pools do not. Note that in order for a connection pool to function it needs at least one upstream connection, so this value should likely be no greater than :ref:`Cluster maximum connections `. + If this circuit breaker overflows the + :ref:`upstream_cx_pool_overflow ` counter for the cluster + will increment. + Each circuit breaking limit is :ref:`configurable ` and tracked on a per upstream cluster and per priority basis. This allows different components of diff --git a/docs/root/intro/version_history.rst b/docs/root/intro/version_history.rst index c0498d313c85e..5c6a806f296f0 100644 --- a/docs/root/intro/version_history.rst +++ b/docs/root/intro/version_history.rst @@ -3,6 +3,7 @@ Version history 1.11.0 (Pending) ================ +* upstream: added :ref:`upstream_cx_pool_overflow ` for the connection pool circuit breaker. 1.10.0 (Apr 5, 2019) ==================== diff --git a/include/envoy/upstream/upstream.h b/include/envoy/upstream/upstream.h index 3fdb15cdeee17..214096d79390e 100644 --- a/include/envoy/upstream/upstream.h +++ b/include/envoy/upstream/upstream.h @@ -480,6 +480,7 @@ class PrioritySet { COUNTER (upstream_cx_protocol_error) \ COUNTER (upstream_cx_max_requests) \ COUNTER (upstream_cx_none_healthy) \ + COUNTER (upstream_cx_pool_overflow) \ COUNTER (upstream_rq_total) \ GAUGE (upstream_rq_active) \ COUNTER (upstream_rq_completed) \ diff --git a/source/common/upstream/conn_pool_map_impl.h b/source/common/upstream/conn_pool_map_impl.h index 156751942535f..19fa80ce4baa0 100644 --- a/source/common/upstream/conn_pool_map_impl.h +++ b/source/common/upstream/conn_pool_map_impl.h @@ -35,8 +35,7 @@ ConnPoolMap::getPool(KEY_TYPE key, const PoolFactory& facto if (!connPoolResource.canCreate()) { // We're full. Try to free up a pool. If we can't, bail out. if (!freeOnePool()) { - // TODO(klarose): Add some explicit counters for failure cases here, similar to the other - // circuit breakers. + host_->cluster().stats().upstream_cx_pool_overflow_.inc(); return absl::nullopt; } diff --git a/test/common/upstream/conn_pool_map_impl_test.cc b/test/common/upstream/conn_pool_map_impl_test.cc index b8183d2eae29b..83839f4e76b66 100644 --- a/test/common/upstream/conn_pool_map_impl_test.cc +++ b/test/common/upstream/conn_pool_map_impl_test.cc @@ -225,6 +225,28 @@ TEST_F(ConnPoolMapImplTest, GetPoolHittingLimitFails) { EXPECT_EQ(test_map->size(), 1); } +TEST_F(ConnPoolMapImplTest, GetPoolHittingLimitIncrementsFailureCounter) { + TestMapPtr test_map = makeTestMapWithLimit(1); + + test_map->getPool(1, getBasicFactory()); + ON_CALL(*mock_pools_[0], hasActiveConnections()).WillByDefault(Return(true)); + test_map->getPool(2, getNeverCalledFactory()); + + EXPECT_EQ(host_->cluster_.stats_.upstream_cx_pool_overflow_.value(), 1); +} + +TEST_F(ConnPoolMapImplTest, GetPoolHittingLimitIncrementsFailureMultiple) { + TestMapPtr test_map = makeTestMapWithLimit(1); + + test_map->getPool(1, getBasicFactory()); + ON_CALL(*mock_pools_[0], hasActiveConnections()).WillByDefault(Return(true)); + test_map->getPool(2, getNeverCalledFactory()); + test_map->getPool(2, getNeverCalledFactory()); + test_map->getPool(2, getNeverCalledFactory()); + + EXPECT_EQ(host_->cluster_.stats_.upstream_cx_pool_overflow_.value(), 3); +} + TEST_F(ConnPoolMapImplTest, GetPoolHittingLimitGreaterThan1Fails) { TestMapPtr test_map = makeTestMapWithLimit(2); @@ -250,6 +272,18 @@ TEST_F(ConnPoolMapImplTest, GetPoolLimitHitThenOneFreesUpNextCallSucceeds) { EXPECT_EQ(test_map->size(), 1); } +TEST_F(ConnPoolMapImplTest, GetPoolLimitHitFollowedBySuccessDoesNotClearFailure) { + TestMapPtr test_map = makeTestMapWithLimit(1); + + test_map->getPool(1, getActivePoolFactory()); + test_map->getPool(2, getNeverCalledFactory()); + + ON_CALL(*mock_pools_[0], hasActiveConnections()).WillByDefault(Return(false)); + + test_map->getPool(2, getBasicFactory()); + EXPECT_EQ(host_->cluster_.stats_.upstream_cx_pool_overflow_.value(), 1); +} + // Test that only the pool which are idle are actually cleared TEST_F(ConnPoolMapImplTest, GetOnePoolIdleOnlyClearsThatOne) { TestMapPtr test_map = makeTestMapWithLimit(2); diff --git a/test/integration/integration_test.cc b/test/integration/integration_test.cc index c6bfa5b5fe7f0..ce4f4c8186ecf 100644 --- a/test/integration/integration_test.cc +++ b/test/integration/integration_test.cc @@ -775,6 +775,8 @@ TEST_P(IntegrationTest, NoConnectionPoolsFree) { EXPECT_STREQ("503", response->headers().Status()->value().c_str()); test_server_->waitForCounterGe("cluster.cluster_0.upstream_rq_503", 1); + + EXPECT_EQ(test_server_->counter("cluster.cluster_0.upstream_cx_pool_overflow")->value(), 1); } INSTANTIATE_TEST_SUITE_P(IpVersions, UpstreamEndpointIntegrationTest,