envoyproxy · htuch · Jul 23, 2018 · Jul 17, 2018 · Jul 18, 2018 · Jul 18, 2018
diff --git a/api/envoy/api/v2/route/route.proto b/api/envoy/api/v2/route/route.proto
@@ -439,12 +439,11 @@ message RouteAction {
     google.protobuf.Duration per_try_timeout = 3 [(gogoproto.stdduration) = true];
   }
 
-  // Specifies the idle timeout for the route. If not specified, this defaults
-  // to 5 minutes. The default value was select so as not to interfere with any
-  // smaller configured timeouts that may have existed in configurations prior
-  // to the introduction of this feature, while introducing robustness to TCP
-  // connections that terminate without FIN. A value of 0 will completely
-  // disable the idle timeout.
+  // Specifies the idle timeout for the route. If not specified, there is no per-route idle timeout
+  // specified, although the connection manager wide :ref:`stream_idle_timeout
+  // <envoy_api_field_config.filter.network.http_connection_manager.v2.HttpConnectionManager.stream_idle_timeout>`
+  // will still apply. A value of 0 will completely disable the route's idle timeout, even if a
+  // connection manager stream idle timeout is configured.
   //
   // The idle timeout is distinct to :ref:`timeout
   // <envoy_api_field_route.RouteAction.timeout>`, which provides an upper bound

diff --git a/api/envoy/config/filter/network/http_connection_manager/v2/http_connection_manager.proto b/api/envoy/config/filter/network/http_connection_manager/v2/http_connection_manager.proto
@@ -19,7 +19,7 @@ import "gogoproto/gogo.proto";
 // [#protodoc-title: HTTP connection manager]
 // HTTP connection manager :ref:`configuration overview <config_http_conn_man>`.
 
-// [#comment:next free field: 24]
+// [#comment:next free field: 25]
 message HttpConnectionManager {
   enum CodecType {
     option (gogoproto.goproto_enum_prefix) = false;
@@ -137,6 +137,33 @@ message HttpConnectionManager {
   // <envoy_api_field_config.filter.network.http_connection_manager.v2.HttpConnectionManager.drain_timeout>`.
   google.protobuf.Duration idle_timeout = 11 [(gogoproto.stdduration) = true];
 
+  // The stream idle timeout for connections managed by the connection manager.
+  // If not specified, this defaults to 5 minutes. The default value was selected
+  // so as not to interfere with any smaller configured timeouts that may have
+  // existed in configurations prior to the introduction of this feature, while
+  // introducing robustness to TCP connections that terminate without a FIN.
+  //
+  // This idle timeout applies to new streams and is overridable by the
+  // :ref:`route-level idle_timeout
+  // <envoy_api_field_route.RouteAction.idle_timeout>`. Even on a stream in
+  // which the override applies, prior to receipt of the initial request
+  // headers, the :ref:`stream_idle_timeout
+  // <envoy_api_field_config.filter.network.http_connection_manager.v2.HttpConnectionManager.stream_idle_timeout>`
+  // applies. Each time an encode/decode event for headers or data is processed
+  // for the stream, the timer will be reset. If the timeout fires, the stream
+  // is terminated with a 408 Request Timeout error code if no upstream response
+  // header has been received, otherwise a stream reset occurs.
+  //
+  // Note that it is possible to idle timeout even if the wire traffic for a stream is non-idle, due
+  // to the granularity of events presented to the connection manager. For example, while receiving
+  // very large request headers, it may be the case that there is traffic regularly arriving on the
+  // wire while the connection manage is only able to observe the end-of-headers event, hence the
+  // stream may still idle timeout.
+  //
+  // A value of 0 will completely disable the connection manager stream idle
+  // timeout, although per-route idle timeout overrides will continue to apply.
+  google.protobuf.Duration stream_idle_timeout = 24 [(gogoproto.stdduration) = true];
+
   // The time that Envoy will wait between sending an HTTP/2 “shutdown
   // notification” (GOAWAY frame with max stream ID) and a final GOAWAY frame.
   // This is used so that Envoy provides a grace period for new streams that

diff --git a/docs/root/intro/arch_overview/http_connection_management.rst b/docs/root/intro/arch_overview/http_connection_management.rst
@@ -42,3 +42,27 @@ table <arch_overview_http_routing>`. The route table can be specified in one of
 
 * Statically.
 * Dynamically via the :ref:`RDS API <config_http_conn_man_rds>`.
+
+Timeouts
+--------
+
+Various configurable timeouts apply to an HTTP connection and its constituent streams:
+
+* Connection-level :ref:`idle timeout
+  <envoy_api_field_config.filter.network.http_connection_manager.v2.HttpConnectionManager.idle_timeout>`:
+  this applies to the idle period where no streams are active.
+* Connection-level :ref:`drain timeout
+  <envoy_api_field_config.filter.network.http_connection_manager.v2.HttpConnectionManager.drain_timeout>`:
+  this spans between an Envoy originated GOAWAY and connection termination.
+* Stream-level idle timeout: this applies to each individual stream. It may be configured at both
+  the :ref:`connection manager
+  <envoy_api_field_config.filter.network.http_connection_manager.v2.HttpConnectionManager.stream_idle_timeout>`
+  and :ref:`per-route <envoy_api_field_route.RouteAction.idle_timeout>` granularity.
+  Header/data/trailer events on the stream reset the idle timeout.
+* Stream-level :ref:`per-route upstream timeout <envoy_api_field_route.RouteAction.timeout>`: this
+  applies to the upstream response, i.e. a maximum bound on the time from the end of the downstream
+  request until the end of the upstream response. This may also be specified at the :ref:`per-retry
+  <envoy_api_field_route.RouteAction.RetryPolicy.per_try_timeout>` granularity.
+* Stream-level :ref:`per-route gRPC max timeout
+  <envoy_api_field_route.RouteAction.max_grpc_timeout>`: this bounds the upstream timeout and allows
+  the timeout to be overriden via the *grpc-timeout* request header.
diff --git a/docs/root/intro/version_history.rst b/docs/root/intro/version_history.rst
@@ -14,6 +14,12 @@ Version history
 * health check: added support for :ref:`custom health check <envoy_api_field_core.HealthCheck.custom_health_check>`.
 * health check: added support for :ref:`specifying jitter as a percentage <envoy_api_field_core.HealthCheck.interval_jitter_percent>`.
 * health_check: added support for :ref:`health check event logging <arch_overview_health_check_logging>`.
+* http: added support for a per-stream idle timeout. This applies at both :ref:`connection manager
+  <envoy_api_field_config.filter.network.http_connection_manager.v2.HttpConnectionManager.stream_idle_timeout>`
+  and :ref:`per-route granularity <envoy_api_field_route.RouteAction.idle_timeout>`. The timeout
+  defaults to 5 minutes; if you have other timeouts (e.g. connection idle timeout, upstream
+  response per-retry) that are longer than this in duration, you may want to consider setting a
+  non-default per-stream idle timeout.
 * http: added support for a :ref:`per-stream idle timeout
   <envoy_api_field_route.RouteAction.idle_timeout>`. This defaults to 5 minutes; if you have
   other timeouts (e.g. connection idle timeout, upstream response per-retry) that are longer than

diff --git a/include/envoy/router/router.h b/include/envoy/router/router.h
@@ -473,7 +473,8 @@ class RouteEntry : public ResponseEntry {
   virtual std::chrono::milliseconds timeout() const PURE;
 
   /**
-   * @return absl::optional<std::chrono::milliseconds> the route's idle timeout.
+   * @return optional<std::chrono::milliseconds> the route's idle timeout. Zero indicates a
+   *         disabled idle timeout, while nullopt indicates deference to the global timeout.
    */
   virtual absl::optional<std::chrono::milliseconds> idleTimeout() const PURE;
 

diff --git a/source/common/http/conn_manager_config.h b/source/common/http/conn_manager_config.h
@@ -192,7 +192,13 @@ class ConnectionManagerConfig {
   /**
    * @return optional idle timeout for incoming connection manager connections.
    */
-  virtual const absl::optional<std::chrono::milliseconds>& idleTimeout() PURE;
+  virtual absl::optional<std::chrono::milliseconds> idleTimeout() const PURE;
+
+  /**
+   * @return per-stream idle timeout for incoming connection manager connections. Zero indicates a
+   *         disabled idle timeout.
+   */
+  virtual std::chrono::milliseconds streamIdleTimeout() const PURE;
 
   /**
    * @return Router::RouteConfigProvider& the configuration provider used to acquire a route

diff --git a/source/common/http/conn_manager_impl.cc b/source/common/http/conn_manager_impl.cc
@@ -369,6 +369,13 @@ ConnectionManagerImpl::ActiveStream::ActiveStream(ConnectionManagerImpl& connect
   // prevents surprises for logging code in edge cases.
   request_info_.setDownstreamRemoteAddress(
       connection_manager_.read_callbacks_->connection().remoteAddress());
+
+  if (connection_manager_.config_.streamIdleTimeout().count()) {
+    idle_timeout_ms_ = connection_manager_.config_.streamIdleTimeout();
+    idle_timer_ = connection_manager_.read_callbacks_->connection().dispatcher().createTimer(
+        [this]() -> void { onIdleTimeout(); });
+    resetIdleTimer();
+  }
 }
 
 ConnectionManagerImpl::ActiveStream::~ActiveStream() {
@@ -605,9 +612,18 @@ void ConnectionManagerImpl::ActiveStream::decodeHeaders(HeaderMapPtr&& headers,
     const Router::RouteEntry* route_entry = cached_route_.value()->routeEntry();
     if (route_entry != nullptr && route_entry->idleTimeout()) {
       idle_timeout_ms_ = route_entry->idleTimeout().value();
-      idle_timer_ = connection_manager_.read_callbacks_->connection().dispatcher().createTimer(
-          [this]() -> void { onIdleTimeout(); });
-      resetIdleTimer();
+      if (idle_timeout_ms_.count()) {
+        // If we have a route-level idle timeout but no global stream idle timeout, create a timer.
+        if (idle_timer_ == nullptr) {
+          idle_timer_ = connection_manager_.read_callbacks_->connection().dispatcher().createTimer(
+              [this]() -> void { onIdleTimeout(); });
+        }
+      } else if (idle_timer_ != nullptr) {
+        // If we had a global stream idle timeout but the route-level idle timeout is set to zero
+        // (to override), we disable the idle timer.
+        idle_timer_->disableTimer();
+        idle_timer_ = nullptr;
+      }
     }
   }
 
@@ -617,6 +633,9 @@ void ConnectionManagerImpl::ActiveStream::decodeHeaders(HeaderMapPtr&& headers,
   }
 
   decodeHeaders(nullptr, *request_headers_, end_stream);
+
+  // Reset it here for both global and overriden cases.
+  resetIdleTimer();
 }
 
 void ConnectionManagerImpl::ActiveStream::traceRequest() {

diff --git a/source/common/router/config_impl.cc b/source/common/router/config_impl.cc
@@ -258,8 +258,7 @@ RouteEntryImplBase::RouteEntryImplBase(const VirtualHostImpl& vhost,
       cluster_not_found_response_code_(ConfigUtility::parseClusterNotFoundResponseCode(
           route.route().cluster_not_found_response_code())),
       timeout_(PROTOBUF_GET_MS_OR_DEFAULT(route.route(), timeout, DEFAULT_ROUTE_TIMEOUT_MS)),
-      idle_timeout_(
-          PROTOBUF_GET_MS_OR_DEFAULT(route.route(), idle_timeout, DEFAULT_ROUTE_IDLE_TIMEOUT_MS)),
+      idle_timeout_(PROTOBUF_GET_OPTIONAL_MS(route.route(), idle_timeout)),
       max_grpc_timeout_(PROTOBUF_GET_OPTIONAL_MS(route.route(), max_grpc_timeout)),
       runtime_(loadRuntimeData(route.match())), loader_(factory_context.runtime()),
       host_redirect_(route.redirect().host_redirect()),

diff --git a/source/common/router/config_impl.h b/source/common/router/config_impl.h
@@ -311,10 +311,7 @@ class RouteEntryImplBase : public RouteEntry,
     return vhost_.virtualClusterFromEntries(headers);
   }
   std::chrono::milliseconds timeout() const override { return timeout_; }
-  absl::optional<std::chrono::milliseconds> idleTimeout() const override {
-    return idle_timeout_.count() == 0 ? absl::nullopt
-                                      : absl::optional<std::chrono::milliseconds>(idle_timeout_);
-  }
+  absl::optional<std::chrono::milliseconds> idleTimeout() const override { return idle_timeout_; }
   absl::optional<std::chrono::milliseconds> maxGrpcTimeout() const override {
     return max_grpc_timeout_;
   }
@@ -510,9 +507,6 @@ class RouteEntryImplBase : public RouteEntry,
   // Default timeout is 15s if nothing is specified in the route config.
   static const uint64_t DEFAULT_ROUTE_TIMEOUT_MS = 15000;
 
-  // Default idle timeout is 5 minutes if nothing is specified in the route config.
-  static const uint64_t DEFAULT_ROUTE_IDLE_TIMEOUT_MS = 5 * 60 * 1000;
-
   std::unique_ptr<const CorsPolicyImpl> cors_policy_;
   const VirtualHostImpl& vhost_; // See note in RouteEntryImplBase::clusterEntry() on why raw ref
                                  // to virtual host is currently safe.
@@ -522,7 +516,7 @@ class RouteEntryImplBase : public RouteEntry,
   const Http::LowerCaseString cluster_header_name_;
   const Http::Code cluster_not_found_response_code_;
   const std::chrono::milliseconds timeout_;
-  const std::chrono::milliseconds idle_timeout_;
+  const absl::optional<std::chrono::milliseconds> idle_timeout_;
   const absl::optional<std::chrono::milliseconds> max_grpc_timeout_;
   const absl::optional<RuntimeData> runtime_;
   Runtime::Loader& loader_;

diff --git a/source/extensions/filters/network/http_connection_manager/config.cc b/source/extensions/filters/network/http_connection_manager/config.cc
@@ -129,6 +129,9 @@ HttpConnectionManagerConfig::HttpConnectionManagerConfig(
       route_config_provider_manager_(route_config_provider_manager),
       http2_settings_(Http::Utility::parseHttp2Settings(config.http2_protocol_options())),
       http1_settings_(Http::Utility::parseHttp1Settings(config.http_protocol_options())),
+      idle_timeout_(PROTOBUF_GET_OPTIONAL_MS(config, idle_timeout)),
+      stream_idle_timeout_(
+          PROTOBUF_GET_MS_OR_DEFAULT(config, stream_idle_timeout, StreamIdleTimeoutMs)),
       drain_timeout_(PROTOBUF_GET_MS_OR_DEFAULT(config, drain_timeout, 5000)),
       generate_request_id_(PROTOBUF_GET_WRAPPED_OR_DEFAULT(config, generate_request_id, true)),
       date_provider_(date_provider),
@@ -216,10 +219,6 @@ HttpConnectionManagerConfig::HttpConnectionManagerConfig(
          overall_sampling}));
   }
 
-  if (config.has_idle_timeout()) {
-    idle_timeout_ = std::chrono::milliseconds(PROTOBUF_GET_MS_REQUIRED(config, idle_timeout));
-  }
-
   for (const auto& access_log : config.access_log()) {
     AccessLog::InstanceSharedPtr current_access_log =
         AccessLog::AccessLogFactory::fromProto(access_log, context_);

diff --git a/source/extensions/filters/network/http_connection_manager/config.h b/source/extensions/filters/network/http_connection_manager/config.h
@@ -87,7 +87,8 @@ class HttpConnectionManagerConfig : Logger::Loggable<Logger::Id::config>,
   std::chrono::milliseconds drainTimeout() override { return drain_timeout_; }
   FilterChainFactory& filterFactory() override { return *this; }
   bool generateRequestId() override { return generate_request_id_; }
-  const absl::optional<std::chrono::milliseconds>& idleTimeout() override { return idle_timeout_; }
+  absl::optional<std::chrono::milliseconds> idleTimeout() const override { return idle_timeout_; }
+  std::chrono::milliseconds streamIdleTimeout() const override { return stream_idle_timeout_; }
   Router::RouteConfigProvider& routeConfigProvider() override { return *route_config_provider_; }
   const std::string& serverName() override { return server_name_; }
   Http::ConnectionManagerStats& stats() override { return stats_; }
@@ -137,12 +138,16 @@ class HttpConnectionManagerConfig : Logger::Loggable<Logger::Id::config>,
   Http::TracingConnectionManagerConfigPtr tracing_config_;
   absl::optional<std::string> user_agent_;
   absl::optional<std::chrono::milliseconds> idle_timeout_;
+  std::chrono::milliseconds stream_idle_timeout_;
   Router::RouteConfigProviderSharedPtr route_config_provider_;
   std::chrono::milliseconds drain_timeout_;
   bool generate_request_id_;
   Http::DateProvider& date_provider_;
   Http::ConnectionManagerListenerStats listener_stats_;
   const bool proxy_100_continue_;
+
+  // Default idle timeout is 5 minutes if nothing is specified in the HCM config.
+  static const uint64_t StreamIdleTimeoutMs = 5 * 60 * 1000;
 };
 
 } // namespace HttpConnectionManager

diff --git a/source/server/http/admin.h b/source/server/http/admin.h
@@ -89,7 +89,8 @@ class AdminImpl : public Admin,
   std::chrono::milliseconds drainTimeout() override { return std::chrono::milliseconds(100); }
   Http::FilterChainFactory& filterFactory() override { return *this; }
   bool generateRequestId() override { return false; }
-  const absl::optional<std::chrono::milliseconds>& idleTimeout() override { return idle_timeout_; }
+  absl::optional<std::chrono::milliseconds> idleTimeout() const override { return idle_timeout_; }
+  std::chrono::milliseconds streamIdleTimeout() const override { return {}; }
   Router::RouteConfigProvider& routeConfigProvider() override { return route_config_provider_; }
   const std::string& serverName() override { return Http::DefaultServerString::get(); }
   Http::ConnectionManagerStats& stats() override { return stats_; }