envoyproxy · yuval-k · Nov 11, 2020 · Nov 12, 2020 · Nov 19, 2020
diff --git a/docs/root/configuration/http/http_filters/ext_authz_filter.rst b/docs/root/configuration/http/http_filters/ext_authz_filter.rst
@@ -165,8 +165,7 @@ The HTTP filter outputs statistics in the *cluster.<route target cluster>.ext_au
   :widths: 1, 1, 2
 
   ok, Counter, Total responses from the filter.
-  error, Counter, Total errors (including timeouts) contacting the external service.
-  timeout, Counter, Total timeouts contacting the external service (only counted when timeout is measured when check request is created).
+  error, Counter, Total errors contacting the external service.
   denied, Counter, Total responses from the authorizations service that were to deny the traffic.
   disabled, Counter, Total requests that are allowed without calling external services due to the filter is disabled.
   failure_mode_allowed, Counter, "Total requests that were error(s) but were allowed through because

diff --git a/include/envoy/router/router.h b/include/envoy/router/router.h
@@ -789,6 +789,13 @@ class RouteEntry : public ResponseEntry {
    */
   virtual std::chrono::milliseconds timeout() const PURE;
 
+  /**
+   * @return bool measure timeout() from when the request is started, before a connection is
+   * received from the connection pool. By default, timeout is measured when the request is
+   * completely sent upstream.
+   */
+  virtual bool measureTimeoutOnRequestStart() const PURE;
+
   /**
    * @return optional<std::chrono::milliseconds> the route's idle timeout. Zero indicates a
    *         disabled idle timeout, while nullopt indicates deference to the global timeout.

diff --git a/source/common/http/async_client_impl.h b/source/common/http/async_client_impl.h
@@ -254,6 +254,7 @@ class AsyncStreamImpl : public AsyncClient::Stream,
         return std::chrono::milliseconds(0);
       }
     }
+    bool measureTimeoutOnRequestStart() const override { return false; }
     absl::optional<std::chrono::milliseconds> idleTimeout() const override { return absl::nullopt; }
     absl::optional<std::chrono::milliseconds> maxStreamDuration() const override {
       return absl::nullopt;

diff --git a/source/common/router/config_impl.h b/source/common/router/config_impl.h
@@ -497,6 +497,7 @@ class RouteEntryImplBase : public RouteEntry,
     return vhost_.virtualClusterFromEntries(headers);
   }
   std::chrono::milliseconds timeout() const override { return timeout_; }
+  bool measureTimeoutOnRequestStart() const override { return false; }
   absl::optional<std::chrono::milliseconds> idleTimeout() const override { return idle_timeout_; }
   absl::optional<std::chrono::milliseconds> maxStreamDuration() const override {
     return max_stream_duration_;
@@ -610,6 +611,9 @@ class RouteEntryImplBase : public RouteEntry,
       return parent_->shadowPolicies();
     }
     std::chrono::milliseconds timeout() const override { return parent_->timeout(); }
+    bool measureTimeoutOnRequestStart() const override {
+      return parent_->measureTimeoutOnRequestStart();
+    }
     absl::optional<std::chrono::milliseconds> idleTimeout() const override {
       return parent_->idleTimeout();
     }

diff --git a/source/common/router/router.cc b/source/common/router/router.cc
@@ -189,6 +189,16 @@ FilterUtility::finalTimeout(const RouteEntry& route, Http::RequestHeaderMap& req
     expected_timeout = timeout.global_timeout_.count();
   }
 
+  setTimeoutHeaders(route, expected_timeout, request_headers,
+                    insert_envoy_expected_request_timeout_ms, grpc_request);
+
+  return timeout;
+}
+
+void FilterUtility::setTimeoutHeaders(const RouteEntry& route, uint64_t expected_timeout,
+                                      Http::RequestHeaderMap& request_headers,
+                                      bool insert_envoy_expected_request_timeout_ms,
+                                      bool grpc_request) {
   if (insert_envoy_expected_request_timeout_ms && expected_timeout > 0) {
     request_headers.setEnvoyExpectedRequestTimeoutMs(expected_timeout);
   }
@@ -200,8 +210,6 @@ FilterUtility::finalTimeout(const RouteEntry& route, Http::RequestHeaderMap& req
   if (grpc_request && route.maxGrpcTimeout() && expected_timeout != 0) {
     Grpc::Common::toGrpcTimeout(std::chrono::milliseconds(expected_timeout), request_headers);
   }
-
-  return timeout;
 }
 
 bool FilterUtility::trySetGlobalTimeout(const Http::HeaderEntry* header_timeout_entry,
@@ -328,6 +336,30 @@ void Filter::chargeUpstreamCode(Http::Code code,
   chargeUpstreamCode(response_status_code, *fake_response_headers, upstream_host, dropped);
 }
 
+void Filter::finalizeTimeoutHeaders() {
+  // If we have a deadline, update the timeout headers to match how much time we have left.
+  if (deadline_) {
+    // Calculate how much time we have left
+    std::chrono::milliseconds expected_timeout =
+        std::chrono::duration_cast<std::chrono::milliseconds>(deadline_.value() -
+                                                              config_.timeSource().monotonicTime());
+    // This _should_ be positive, but might not be, as there are no guarantees on scheduling.
+    if (expected_timeout.count() <= 0) {
+      // If we don't have a positive timeout, do nothing. The timer should fire shortly and cancel
+      // this request.
+      return;
+    }
+
+    // If the expected timeout is less than the current per_try_timeout, or there is no
+    // per_try_timeout, update timeout headers to the expected_timeout.
+    if (timeout_.per_try_timeout_.count() == 0 || expected_timeout < timeout_.per_try_timeout_) {
+      FilterUtility::setTimeoutHeaders(*route_entry_, expected_timeout.count(),
+                                       *downstream_headers_, !config_.suppress_envoy_headers_,
+                                       grpc_request_);
+    }
+  }
+}
+
 Http::FilterHeadersStatus Filter::decodeHeaders(Http::RequestHeaderMap& headers, bool end_stream) {
   downstream_headers_ = &headers;
 
@@ -530,6 +562,16 @@ Http::FilterHeadersStatus Filter::decodeHeaders(Http::RequestHeaderMap& headers,
                                          grpc_request_, hedging_params_.hedge_on_per_try_timeout_,
                                          config_.respect_expected_rq_timeout_);
 
+  if (route_entry_->measureTimeoutOnRequestStart() && (timeout_.global_timeout_.count() > 0)) {
+    // If we measure the timeout from the request start, calculate the deadline for the request
+    // so we can update timeout headers just before the request is sent upstream.
+    // TODO(yuval-k): We might want to set the deadline in startResponseTimer(), so it is set
+    // when the response timer is started via onRequestComplete as well.
+    deadline_ = callbacks_->dispatcher().timeSource().monotonicTime() + timeout_.global_timeout_;
+    // start measuring timeout.
+    startResponseTimer();
+  }
+
   // If this header is set with any value, use an alternate response code on timeout
   if (headers.EnvoyUpstreamRequestTimeoutAltResponse()) {
     timeout_response_code_ = Http::Code::NoContent;
@@ -742,6 +784,7 @@ void Filter::maybeDoShadowing() {
       request->trailers(Http::createHeaderMap<Http::RequestTrailerMapImpl>(*downstream_trailers_));
     }
 
+    // TODO: add measure on start here?
     auto options = Http::AsyncClient::RequestOptions()
                        .setTimeout(timeout_.global_timeout_)
                        .setParentSpan(callbacks_->activeSpan())
@@ -751,6 +794,12 @@ void Filter::maybeDoShadowing() {
   }
 }
 
+void Filter::startResponseTimer() {
+  response_timeout_ =
+      callbacks_->dispatcher().createTimer([this]() -> void { onResponseTimeout(); });
+  response_timeout_->enableTimer(timeout_.global_timeout_);
+}
+
 void Filter::onRequestComplete() {
   // This should be called exactly once, when the downstream request has been received in full.
   ASSERT(!downstream_end_stream_);
@@ -764,9 +813,10 @@ void Filter::onRequestComplete() {
     // seems unnecessary right now.
     maybeDoShadowing();
 
-    if (timeout_.global_timeout_.count() > 0) {
-      response_timeout_ = dispatcher.createTimer([this]() -> void { onResponseTimeout(); });
-      response_timeout_->enableTimer(timeout_.global_timeout_);
+    // if response_timeout_ != nullptr then timeout was already started in decodeHeaders, no need to
+    // start it again.
+    if (timeout_.global_timeout_.count() > 0 && response_timeout_ == nullptr) {
+      startResponseTimer();
     }
 
     for (auto& upstream_request : upstream_requests_) {

diff --git a/source/common/router/router.h b/source/common/router/router.h
@@ -160,6 +160,9 @@ class FilterUtility {
                                   bool insert_envoy_expected_request_timeout_ms, bool grpc_request,
                                   bool per_try_timeout_hedging_enabled,
                                   bool respect_expected_rq_timeout);
+  static void setTimeoutHeaders(const RouteEntry& route, uint64_t expected_timeout,
+                                Http::RequestHeaderMap& request_headers,
+                                bool insert_envoy_expected_request_timeout_ms, bool grpc_request);
 
   static bool trySetGlobalTimeout(const Http::HeaderEntry* header_timeout_entry,
                                   TimeoutData& timeout);
@@ -277,6 +280,7 @@ class RouterFilterInterface {
   virtual Upstream::ClusterInfoConstSharedPtr cluster() PURE;
   virtual FilterConfig& config() PURE;
   virtual FilterUtility::TimeoutData timeout() PURE;
+  virtual void finalizeTimeoutHeaders() PURE;
   virtual Http::RequestHeaderMap* downstreamHeaders() PURE;
   virtual Http::RequestTrailerMap* downstreamTrailers() PURE;
   virtual bool downstreamResponseStarted() const PURE;
@@ -447,6 +451,7 @@ class Filter : Logger::Loggable<Logger::Id::router>,
   Upstream::ClusterInfoConstSharedPtr cluster() override { return cluster_; }
   FilterConfig& config() override { return config_; }
   FilterUtility::TimeoutData timeout() override { return timeout_; }
+  void finalizeTimeoutHeaders() override;
   Http::RequestHeaderMap* downstreamHeaders() override { return downstream_headers_; }
   Http::RequestTrailerMap* downstreamTrailers() override { return downstream_trailers_; }
   bool downstreamResponseStarted() const override { return downstream_response_started_; }
@@ -488,6 +493,7 @@ class Filter : Logger::Loggable<Logger::Id::router>,
   bool maybeRetryReset(Http::StreamResetReason reset_reason, UpstreamRequest& upstream_request);
   uint32_t numRequestsAwaitingHeaders();
   void onGlobalTimeout();
+  void startResponseTimer();
   void onRequestComplete();
   void onResponseTimeout();
   // Handle an upstream request aborted due to a local timeout.
@@ -529,6 +535,7 @@ class Filter : Logger::Loggable<Logger::Id::router>,
   const VirtualCluster* request_vcluster_;
   Event::TimerPtr response_timeout_;
   FilterUtility::TimeoutData timeout_;
+  absl::optional<MonotonicTime> deadline_;
   FilterUtility::HedgingParams hedging_params_;
   Http::Code timeout_response_code_ = Http::Code::GatewayTimeout;
   std::list<UpstreamRequestPtr> upstream_requests_;

diff --git a/source/common/router/upstream_request.cc b/source/common/router/upstream_request.cc
@@ -416,6 +416,8 @@ void UpstreamRequest::onPoolReady(
     }
   }
 
+  parent_.finalizeTimeoutHeaders();
+
   const Http::Status status =
       upstream_->encodeHeaders(*parent_.downstreamHeaders(), shouldSendEndStream());
   calling_encode_headers_ = false;

diff --git a/source/common/runtime/runtime_features.cc b/source/common/runtime/runtime_features.cc
@@ -110,9 +110,6 @@ constexpr const char* disabled_runtime_features[] = {
     "envoy.reloadable_features.upstream_http2_flood_checks",
     // Sentinel and test flag.
     "envoy.reloadable_features.test_feature_false",
-    // gRPC Timeout header is missing (#13580)
-    "envoy.reloadable_features.ext_authz_measure_timeout_on_check_created",
-
 };
 
 RuntimeFeatures::RuntimeFeatures() {

diff --git a/source/extensions/filters/common/ext_authz/ext_authz.h b/source/extensions/filters/common/ext_authz/ext_authz.h
@@ -6,14 +6,12 @@
 #include <vector>
 
 #include "envoy/common/pure.h"
-#include "envoy/event/dispatcher.h"
 #include "envoy/http/codes.h"
 #include "envoy/service/auth/v3/external_auth.pb.h"
 #include "envoy/stream_info/stream_info.h"
 #include "envoy/tracing/http_tracer.h"
 
 #include "common/http/headers.h"
-#include "common/runtime/runtime_features.h"
 #include "common/singleton/const_singleton.h"
 
 namespace Envoy {
@@ -61,27 +59,12 @@ enum class CheckStatus {
   Denied
 };
 
-/**
- * Possible error kind for Error status..
- */
-enum class ErrorKind {
-  // Other error.
-  Other,
-  // The request timed out. This will only be set if the timeout is measure when the check request
-  // was created.
-  Timedout,
-};
-
 /**
  * Authorization response object for a RequestCallback.
  */
 struct Response {
   // Call status.
   CheckStatus status;
-
-  // In case status is Error, this will contain the kind of error that occurred.
-  ErrorKind error_kind{ErrorKind::Other};
-
   // A set of HTTP headers returned by the authorization server, that will be optionally appended
   // to the request to the upstream server.
   Http::HeaderVector headers_to_append;
@@ -134,23 +117,13 @@ class Client {
    * passed request parameters to make a permit/deny decision.
    * @param callback supplies the completion callbacks.
    *        NOTE: The callback may happen within the calling stack.
-   * @param dispatcher is the dispatcher of the current thread.
    * @param request is the proto message with the attributes of the specific payload.
    * @param parent_span source for generating an egress child span as part of the trace.
    * @param stream_info supplies the client's stream info.
    */
-  virtual void check(RequestCallbacks& callback, Event::Dispatcher& dispatcher,
+  virtual void check(RequestCallbacks& callback,
                      const envoy::service::auth::v3::CheckRequest& request,
                      Tracing::Span& parent_span, const StreamInfo::StreamInfo& stream_info) PURE;
-
-protected:
-  /**
-   * @return should we start the request time out when the check request is created.
-   */
-  static bool timeoutStartsAtCheckCreation() {
-    return Runtime::runtimeFeatureEnabled(
-        "envoy.reloadable_features.ext_authz_measure_timeout_on_check_created");
-  }
 };
 
 using ClientPtr = std::unique_ptr<Client>;

diff --git a/source/extensions/filters/common/ext_authz/ext_authz_grpc_impl.cc b/source/extensions/filters/common/ext_authz/ext_authz_grpc_impl.cc
@@ -31,29 +31,15 @@ void GrpcClientImpl::cancel() {
   ASSERT(callbacks_ != nullptr);
   request_->cancel();
   callbacks_ = nullptr;
-  timeout_timer_.reset();
 }
 
-void GrpcClientImpl::check(RequestCallbacks& callbacks, Event::Dispatcher& dispatcher,
+void GrpcClientImpl::check(RequestCallbacks& callbacks,
                            const envoy::service::auth::v3::CheckRequest& request,
                            Tracing::Span& parent_span, const StreamInfo::StreamInfo& stream_info) {
   ASSERT(callbacks_ == nullptr);
   callbacks_ = &callbacks;
-
   Http::AsyncClient::RequestOptions options;
-  if (timeout_.has_value()) {
-    if (timeoutStartsAtCheckCreation()) {
-      // TODO(yuval-k): We currently use dispatcher based timeout even if the underlying client is
-      // Google gRPC client, which has its own timeout mechanism. We may want to change that in the
-      // future if the implementations converge.
-      timeout_timer_ = dispatcher.createTimer([this]() -> void { onTimeout(); });
-      timeout_timer_->enableTimer(timeout_.value());
-    } else {
-      // not starting timer on check creation, set the timeout on the request.
-      options.setTimeout(timeout_);
-    }
-  }
-
+  options.setTimeout(timeout_);
   options.setParentContext(Http::AsyncClient::ParentContext{&stream_info});
 
   ENVOY_LOG(trace, "Sending CheckRequest: {}", request.DebugString());
@@ -99,31 +85,16 @@ void GrpcClientImpl::onSuccess(std::unique_ptr<envoy::service::auth::v3::CheckRe
 
   callbacks_->onComplete(std::move(authz_response));
   callbacks_ = nullptr;
-  timeout_timer_.reset();
 }
 
 void GrpcClientImpl::onFailure(Grpc::Status::GrpcStatus status, const std::string&,
                                Tracing::Span&) {
   ENVOY_LOG(trace, "CheckRequest call failed with status: {}",
             Grpc::Utility::grpcStatusToString(status));
   ASSERT(status != Grpc::Status::WellKnownGrpcStatus::Ok);
-  timeout_timer_.reset();
-  respondFailure(ErrorKind::Other);
-}
-
-void GrpcClientImpl::onTimeout() {
-  ENVOY_LOG(trace, "CheckRequest timed-out");
-  ASSERT(request_ != nullptr);
-  request_->cancel();
-  // let the client know of failure:
-  respondFailure(ErrorKind::Timedout);
-}
-
-void GrpcClientImpl::respondFailure(ErrorKind kind) {
   Response response{};
   response.status = CheckStatus::Error;
   response.status_code = Http::Code::Forbidden;
-  response.error_kind = kind;
   callbacks_->onComplete(std::make_unique<Response>(response));
   callbacks_ = nullptr;
 }

diff --git a/source/extensions/filters/common/ext_authz/ext_authz_grpc_impl.h b/source/extensions/filters/common/ext_authz/ext_authz_grpc_impl.h
@@ -51,9 +51,8 @@ class GrpcClientImpl : public Client,
 
   // ExtAuthz::Client
   void cancel() override;
-  void check(RequestCallbacks& callbacks, Event::Dispatcher& dispatcher,
-             const envoy::service::auth::v3::CheckRequest& request, Tracing::Span& parent_span,
-             const StreamInfo::StreamInfo& stream_info) override;
+  void check(RequestCallbacks& callbacks, const envoy::service::auth::v3::CheckRequest& request,
+             Tracing::Span& parent_span, const StreamInfo::StreamInfo& stream_info) override;
 
   // Grpc::AsyncRequestCallbacks
   void onCreateInitialMetadata(Http::RequestHeaderMap&) override {}
@@ -63,20 +62,16 @@ class GrpcClientImpl : public Client,
                  Tracing::Span& span) override;
 
 private:
-  void onTimeout();
-  void respondFailure(Filters::Common::ExtAuthz::ErrorKind kind);
   void toAuthzResponseHeader(
       ResponsePtr& response,
       const Protobuf::RepeatedPtrField<envoy::config::core::v3::HeaderValueOption>& headers);
-
   Grpc::AsyncClient<envoy::service::auth::v3::CheckRequest, envoy::service::auth::v3::CheckResponse>
       async_client_;
   Grpc::AsyncRequest* request_{};
   absl::optional<std::chrono::milliseconds> timeout_;
   RequestCallbacks* callbacks_{};
   const Protobuf::MethodDescriptor& service_method_;
   const envoy::config::core::v3::ApiVersion transport_api_version_;
-  Event::TimerPtr timeout_timer_;
 };
 
 using GrpcClientImplPtr = std::unique_ptr<GrpcClientImpl>;