Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 28 additions & 4 deletions api/envoy/api/v2/route/route.proto
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ message CorsPolicy {
google.protobuf.BoolValue enabled = 7;
}

// [#comment:next free field: 24]
// [#comment:next free field: 25]
message RouteAction {
oneof cluster_specifier {
option (validate.required) = true;
Expand Down Expand Up @@ -401,7 +401,9 @@ message RouteAction {
google.protobuf.BoolValue auto_host_rewrite = 7;
}

// Specifies the timeout for the route. If not specified, the default is 15s.
// Specifies the upstream timeout for the route. If not specified, the default is 15s. This
// spans between the point at which the entire downstream request (i.e. end-of-stream) has been
// processed and when the upstream response has been completely processed.
//
// .. note::
//
Expand All @@ -423,8 +425,8 @@ message RouteAction {
// :ref:`config_http_filters_router_x-envoy-max-retries`.
google.protobuf.UInt32Value num_retries = 2;

// Specifies a non-zero timeout per retry attempt. This parameter is optional.
// The same conditions documented for
// Specifies a non-zero upstream timeout per retry attempt. This parameter is optional. The
// same conditions documented for
// :ref:`config_http_filters_router_x-envoy-upstream-rq-per-try-timeout-ms` apply.
//
// .. note::
Expand All @@ -437,6 +439,28 @@ message RouteAction {
google.protobuf.Duration per_try_timeout = 3 [(gogoproto.stdduration) = true];
}

// Specifies the idle timeout for the route. If not specified, this defaults
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does one fully disable it if they want? Do we tell them to set a really big value? Or should we support 0 to mean no timeout? Thoughts?

// to 5 minutes. The default value was select so as not to interfere with any
// smaller configured timeouts that may have existed in configurations prior
// to the introduction of this feature, while introducing robustness to TCP
// connections that terminate without FIN. A value of 0 will completely
// disable the idle timeout.
//
// The idle timeout is distinct to :ref:`timeout
// <envoy_api_field_route.RouteAction.timeout>`, which provides an upper bound
// on the upstream response time; :ref:`idle_timeout
// <envoy_api_field_route.RouteAction.idle_timeout>` instead bounds the amount
// of time the request's stream may be idle.
//
// After header decoding, the idle timeout will apply on downstream and
// upstream request events. Each time an encode/decode event for headers or
// data is processed for the stream, the timer will be reset. If the timeout
// fires, the stream is terminated with a 408 Request Timeout error code if no
// upstream response header has been received, otherwise a stream reset
// occurs.
google.protobuf.Duration idle_timeout = 24
[(validate.rules).duration.gt = {}, (gogoproto.stdduration) = true];

// Indicates that the route has a retry policy.
RetryPolicy retry_policy = 9;

Expand Down
1 change: 1 addition & 0 deletions docs/root/configuration/http_conn_man/stats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ statistics:
downstream_rq_5xx, Counter, Total 5xx responses
downstream_rq_ws_on_non_ws_route, Counter, Total WebSocket upgrade requests rejected by non WebSocket routes
downstream_rq_time, Histogram, Request time milliseconds
downstream_rq_idle_timeout, Counter, Total requests closed due to idle timeout
rs_too_large, Counter, Total response errors due to buffering an overly large body

Per user agent statistics
Expand Down
4 changes: 4 additions & 0 deletions docs/root/intro/version_history.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ Version history
* health check: added support for :ref:`custom health check <envoy_api_field_core.HealthCheck.custom_health_check>`.
* health check: added support for :ref:`specifying jitter as a percentage <envoy_api_field_core.HealthCheck.interval_jitter_percent>`.
* health_check: added support for :ref:`health check event logging <arch_overview_health_check_logging>`.
* http: added support for a :ref:`per-stream idle timeout
<envoy_api_field_route.RouteAction.idle_timeout>`. This defaults to 5 minutes; if you have
other timeouts (e.g. connection idle timeout, upstream response per-retry) that are longer than
this in duration, you may want to consider setting a non-default per-stream idle timeout.
* http: better handling of HEAD requests. Now sending transfer-encoding: chunked rather than content-length: 0.
* http: response filters not applied to early error paths such as http_parser generated 400s.
* proxy_protocol: added support for HAProxy Proxy Protocol v2 (AF_INET/AF_INET6 only).
Expand Down
5 changes: 5 additions & 0 deletions include/envoy/router/router.h
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,11 @@ class RouteEntry : public ResponseEntry {
*/
virtual std::chrono::milliseconds timeout() const PURE;

/**
* @return absl::optional<std::chrono::milliseconds> the route's idle timeout.
*/
virtual absl::optional<std::chrono::milliseconds> idleTimeout() const PURE;

/**
* @return absl::optional<std::chrono::milliseconds> the maximum allowed timeout value derived
* from 'grpc-timeout' header of a gRPC request. Non-present value disables use of 'grpc-timeout'
Expand Down
1 change: 1 addition & 0 deletions source/common/http/async_client_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ class AsyncStreamImpl : public AsyncClient::Stream,
return std::chrono::milliseconds(0);
}
}
absl::optional<std::chrono::milliseconds> idleTimeout() const override { return absl::nullopt; }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Random orthogonal thought that might eventually want to plumb this through for real so that we could have idle timeouts for local origin requests/streams. No action now.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ack.

absl::optional<std::chrono::milliseconds> maxGrpcTimeout() const override {
return absl::nullopt;
}
Expand Down
1 change: 1 addition & 0 deletions source/common/http/conn_manager_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ namespace Http {
COUNTER (downstream_rq_4xx) \
COUNTER (downstream_rq_5xx) \
HISTOGRAM(downstream_rq_time) \
COUNTER (downstream_rq_idle_timeout) \
COUNTER (rs_too_large)
// clang-format on

Expand Down
40 changes: 40 additions & 0 deletions source/common/http/conn_manager_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,28 @@ ConnectionManagerImpl::ActiveStream::~ActiveStream() {
ASSERT(state_.filter_call_state_ == 0);
}

void ConnectionManagerImpl::ActiveStream::resetIdleTimer() {
if (idle_timer_ != nullptr) {
// TODO(htuch): If this shows up in performance profiles, optimize by only
// updating a timestamp here and doing periodic checks for idle timeouts
// instead, or reducing the accuracy of timers.
idle_timer_->enableTimer(idle_timeout_ms_);
}
}

void ConnectionManagerImpl::ActiveStream::onIdleTimeout() {
connection_manager_.stats_.named_.downstream_rq_idle_timeout_.inc();
// If headers have not been sent to the user, send a 408.
if (response_headers_ != nullptr) {
// TODO(htuch): We could send trailers here with an x-envoy timeout header
// or gRPC status code, and/or set H2 RST_STREAM error.
connection_manager_.doEndStream(*this);
} else {
sendLocalReply(Grpc::Common::hasGrpcContentType(*request_headers_), Http::Code::RequestTimeout,
"stream timeout", nullptr);
}
}

void ConnectionManagerImpl::ActiveStream::addStreamDecoderFilterWorker(
StreamDecoderFilterSharedPtr filter, bool dual_filter) {
ActiveStreamDecoderFilterPtr wrapper(new ActiveStreamDecoderFilter(*this, filter, dual_filter));
Expand Down Expand Up @@ -579,6 +601,16 @@ void ConnectionManagerImpl::ActiveStream::decodeHeaders(HeaderMapPtr&& headers,
// Allow non websocket requests to go through websocket enabled routes.
}

if (cached_route_.value()) {
const Router::RouteEntry* route_entry = cached_route_.value()->routeEntry();
if (route_entry != nullptr && route_entry->idleTimeout()) {
idle_timeout_ms_ = route_entry->idleTimeout().value();
idle_timer_ = connection_manager_.read_callbacks_->connection().dispatcher().createTimer(
[this]() -> void { onIdleTimeout(); });
resetIdleTimer();
}
}

// Check if tracing is enabled at all.
if (connection_manager_.config_.tracingConfig()) {
traceRequest();
Expand Down Expand Up @@ -702,6 +734,8 @@ void ConnectionManagerImpl::ActiveStream::decodeData(Buffer::Instance& data, boo

void ConnectionManagerImpl::ActiveStream::decodeData(ActiveStreamDecoderFilter* filter,
Buffer::Instance& data, bool end_stream) {
resetIdleTimer();

// If a response is complete or a reset has been sent, filters do not care about further body
// data. Just drop it.
if (state_.local_complete_) {
Expand Down Expand Up @@ -750,6 +784,7 @@ void ConnectionManagerImpl::ActiveStream::addDecodedData(ActiveStreamDecoderFilt
}

void ConnectionManagerImpl::ActiveStream::decodeTrailers(HeaderMapPtr&& trailers) {
resetIdleTimer();
maybeEndDecode(true);
request_trailers_ = std::move(trailers);
decodeTrailers(nullptr, *request_trailers_);
Expand Down Expand Up @@ -846,6 +881,7 @@ void ConnectionManagerImpl::ActiveStream::sendLocalReply(

void ConnectionManagerImpl::ActiveStream::encode100ContinueHeaders(
ActiveStreamEncoderFilter* filter, HeaderMap& headers) {
resetIdleTimer();
ASSERT(connection_manager_.config_.proxy100Continue());
// Make sure commonContinue continues encode100ContinueHeaders.
has_continue_headers_ = true;
Expand Down Expand Up @@ -882,6 +918,8 @@ void ConnectionManagerImpl::ActiveStream::encode100ContinueHeaders(

void ConnectionManagerImpl::ActiveStream::encodeHeaders(ActiveStreamEncoderFilter* filter,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

resetIdleTimer on encode100ContinueHeaders?

HeaderMap& headers, bool end_stream) {
resetIdleTimer();

std::list<ActiveStreamEncoderFilterPtr>::iterator entry = commonEncodePrefix(filter, end_stream);
std::list<ActiveStreamEncoderFilterPtr>::iterator continue_data_entry = encoder_filters_.end();

Expand Down Expand Up @@ -1019,6 +1057,7 @@ void ConnectionManagerImpl::ActiveStream::addEncodedData(ActiveStreamEncoderFilt

void ConnectionManagerImpl::ActiveStream::encodeData(ActiveStreamEncoderFilter* filter,
Buffer::Instance& data, bool end_stream) {
resetIdleTimer();
std::list<ActiveStreamEncoderFilterPtr>::iterator entry = commonEncodePrefix(filter, end_stream);
for (; entry != encoder_filters_.end(); entry++) {
ASSERT(!(state_.filter_call_state_ & FilterCallState::EncodeData));
Expand All @@ -1042,6 +1081,7 @@ void ConnectionManagerImpl::ActiveStream::encodeData(ActiveStreamEncoderFilter*

void ConnectionManagerImpl::ActiveStream::encodeTrailers(ActiveStreamEncoderFilter* filter,
HeaderMap& trailers) {
resetIdleTimer();
std::list<ActiveStreamEncoderFilterPtr>::iterator entry = commonEncodePrefix(filter, true);
for (; entry != encoder_filters_.end(); entry++) {
ASSERT(!(state_.filter_call_state_ & FilterCallState::EncodeTrailers));
Expand Down
7 changes: 7 additions & 0 deletions source/common/http/conn_manager_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,10 @@ class ConnectionManagerImpl : Logger::Loggable<Logger::Id::http>,
void setBufferLimit(uint32_t limit);
// Set up the Encoder/Decoder filter chain.
bool createFilterChain();
// Per-stream idle timeout callback.
void onIdleTimeout();
// Reset per-stream idle timer.
void resetIdleTimer();

ConnectionManagerImpl& connection_manager_;
Router::ConfigConstSharedPtr snapped_route_config_;
Expand All @@ -379,6 +383,9 @@ class ConnectionManagerImpl : Logger::Loggable<Logger::Id::http>,
std::list<ActiveStreamEncoderFilterPtr> encoder_filters_;
std::list<AccessLog::InstanceSharedPtr> access_log_handlers_;
Stats::TimespanPtr request_timer_;
// Per-stream idle timeout.
Event::TimerPtr idle_timer_;
std::chrono::milliseconds idle_timeout_ms_{};
State state_;
RequestInfo::RequestInfoImpl request_info_;
absl::optional<Router::RouteConstSharedPtr> cached_route_;
Expand Down
2 changes: 2 additions & 0 deletions source/common/router/config_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,8 @@ RouteEntryImplBase::RouteEntryImplBase(const VirtualHostImpl& vhost,
cluster_not_found_response_code_(ConfigUtility::parseClusterNotFoundResponseCode(
route.route().cluster_not_found_response_code())),
timeout_(PROTOBUF_GET_MS_OR_DEFAULT(route.route(), timeout, DEFAULT_ROUTE_TIMEOUT_MS)),
idle_timeout_(
PROTOBUF_GET_MS_OR_DEFAULT(route.route(), idle_timeout, DEFAULT_ROUTE_IDLE_TIMEOUT_MS)),
max_grpc_timeout_(PROTOBUF_GET_OPTIONAL_MS(route.route(), max_grpc_timeout)),
runtime_(loadRuntimeData(route.match())), loader_(factory_context.runtime()),
host_redirect_(route.redirect().host_redirect()),
Expand Down
11 changes: 11 additions & 0 deletions source/common/router/config_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,10 @@ class RouteEntryImplBase : public RouteEntry,
return vhost_.virtualClusterFromEntries(headers);
}
std::chrono::milliseconds timeout() const override { return timeout_; }
absl::optional<std::chrono::milliseconds> idleTimeout() const override {
return idle_timeout_.count() == 0 ? absl::nullopt
: absl::optional<std::chrono::milliseconds>(idle_timeout_);
}
absl::optional<std::chrono::milliseconds> maxGrpcTimeout() const override {
return max_grpc_timeout_;
}
Expand Down Expand Up @@ -395,6 +399,9 @@ class RouteEntryImplBase : public RouteEntry,
const RetryPolicy& retryPolicy() const override { return parent_->retryPolicy(); }
const ShadowPolicy& shadowPolicy() const override { return parent_->shadowPolicy(); }
std::chrono::milliseconds timeout() const override { return parent_->timeout(); }
absl::optional<std::chrono::milliseconds> idleTimeout() const override {
return parent_->idleTimeout();
}
absl::optional<std::chrono::milliseconds> maxGrpcTimeout() const override {
return parent_->maxGrpcTimeout();
}
Expand Down Expand Up @@ -503,6 +510,9 @@ class RouteEntryImplBase : public RouteEntry,
// Default timeout is 15s if nothing is specified in the route config.
static const uint64_t DEFAULT_ROUTE_TIMEOUT_MS = 15000;

// Default idle timeout is 5 minutes if nothing is specified in the route config.
static const uint64_t DEFAULT_ROUTE_IDLE_TIMEOUT_MS = 5 * 60 * 1000;

std::unique_ptr<const CorsPolicyImpl> cors_policy_;
const VirtualHostImpl& vhost_; // See note in RouteEntryImplBase::clusterEntry() on why raw ref
// to virtual host is currently safe.
Expand All @@ -512,6 +522,7 @@ class RouteEntryImplBase : public RouteEntry,
const Http::LowerCaseString cluster_header_name_;
const Http::Code cluster_not_found_response_code_;
const std::chrono::milliseconds timeout_;
const std::chrono::milliseconds idle_timeout_;
const absl::optional<std::chrono::milliseconds> max_grpc_timeout_;
const absl::optional<RuntimeData> runtime_;
Runtime::Loader& loader_;
Expand Down
Loading