diff --git a/api/envoy/api/v2/route/route.proto b/api/envoy/api/v2/route/route.proto
index 23dc44fc2624d..2f1337c9c58eb 100644
--- a/api/envoy/api/v2/route/route.proto
+++ b/api/envoy/api/v2/route/route.proto
@@ -148,7 +148,6 @@ message VirtualHost {
   // Indicates the hedge policy for all routes in this virtual host. Note that setting a
   // route level entry will take precedence over this config and it'll be treated
   // independently (e.g.: values are not inherited).
-  // [#not-implemented-hide:]
   HedgePolicy hedge_policy = 17;
 }
 
@@ -803,7 +802,6 @@ message RouteAction {
   // Indicates that the route has a hedge policy. Note that if this is set,
   // it'll take precedence over the virtual host level hedge policy entirely
   // (e.g.: policies are not merged, most internal one becomes the enforced policy).
-  // [#not-implemented-hide:]
   HedgePolicy hedge_policy = 27;
 }
 
@@ -899,22 +897,28 @@ message RetryPolicy {
   RetryBackOff retry_back_off = 8;
 }
 
-// HTTP request hedging TODO(mpuncel) docs
-// [#not-implemented-hide:]
+// HTTP request hedging :ref:`architecture overview <arch_overview_http_routing_hedging>`.
 message HedgePolicy {
   // Specifies the number of initial requests that should be sent upstream.
   // Must be at least 1.
   // Defaults to 1.
+  // [#not-implemented-hide:]
   google.protobuf.UInt32Value initial_requests = 1 [(validate.rules).uint32.gte = 1];
 
   // Specifies a probability that an additional upstream request should be sent
   // on top of what is specified by initial_requests.
   // Defaults to 0.
+  // [#not-implemented-hide:]
   envoy.type.FractionalPercent additional_request_chance = 2;
 
   // Indicates that a hedged request should be sent when the per-try timeout
   // is hit. This will only occur if the retry policy also indicates that a
-  // timed out request should be retried. Defaults to false.
+  // timed out request should be retried.
+  // Once a timed out request is retried due to per try timeout, the router
+  // filter will ensure that it is not retried again even if the returned
+  // response headers would otherwise be retried according the specified
+  // :ref:`RetryPolicy <envoy_api_msg_route.RetryPolicy>`.
+  // Defaults to false.
   bool hedge_on_per_try_timeout = 3;
 }
 
diff --git a/docs/root/configuration/http_filters/router_filter.rst b/docs/root/configuration/http_filters/router_filter.rst
index d33a974eaad51..0d416ac865026 100644
--- a/docs/root/configuration/http_filters/router_filter.rst
+++ b/docs/root/configuration/http_filters/router_filter.rst
@@ -217,6 +217,18 @@ requests. This timeout must be <= the global route timeout (see
 caller to set a tight per try timeout to allow for retries while maintaining a reasonable overall
 timeout.
 
+x-envoy-hedge-on-per-try-timeout
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Setting this header on egress requests will cause Envoy to use a request
+hedging strategy in the case of a per try timeout. This overrides the value set
+in the :ref:`route configuration
+<envoy_api_field_route.HedgePolicy.hedge_on_per_try_timeout>`. This means that a retry
+will be issued without resetting the original request, leaving multiple upstream requests
+in flight.
+
+The value of the header should be "true" or "false", and is ignored if invalid.
+
 .. _config_http_filters_router_x-envoy-immediate-health-check-fail:
 
 x-envoy-immediate-health-check-fail
diff --git a/docs/root/intro/arch_overview/http_routing.rst b/docs/root/intro/arch_overview/http_routing.rst
index 8d0ed1baea453..6a191be268214 100644
--- a/docs/root/intro/arch_overview/http_routing.rst
+++ b/docs/root/intro/arch_overview/http_routing.rst
@@ -35,6 +35,7 @@ request. The router filter supports the following features:
 * Request timeout specified either via :ref:`HTTP
   header <config_http_filters_router_headers_consumed>` or via :ref:`route configuration
   <envoy_api_field_route.RouteAction.timeout>`.
+* :ref:`Request hedging <arch_overview_http_routing_hedging>` for retries in response to a request (per try) timeout.
 * Traffic shifting from one upstream cluster to another via :ref:`runtime values
   <envoy_api_field_route.RouteMatch.runtime_fraction>` (see :ref:`traffic shifting/splitting
   <config_http_conn_man_route_table_traffic_splitting>`).
@@ -87,6 +88,27 @@ headers <config_http_filters_router_headers_consumed>`. The following configurat
 Note that retries may be disabled depending on the contents of the :ref:`x-envoy-overloaded
 <config_http_filters_router_x-envoy-overloaded_consumed>`.
 
+.. _arch_overview_http_routing_hedging:
+
+Request Hedging
+---------------
+
+Envoy supports request hedging which can be enabled by specifying a :ref:`hedge
+policy <envoy_api_msg_route.HedgePolicy>`. This means that Envoy will race
+multiple simultaneous upstream requests and return the response associated with
+the first acceptable response headers to the downstream. The retry policy is
+used to determine whether a response should be returned or whether more
+responses should be awaited.
+
+Currently hedging can only be performed in response to a request timeout. This
+means that a retry request will be issued without canceling the initial
+timed-out request and a late response will be awaited. The first "good"
+response according to retry policy will be returned downstream.
+
+The implementation ensures that the same upstream request is not retried twice.
+This might otherwise occur if a request times out and then results in a 5xx
+response, creating two retriable events.
+
 .. _arch_overview_http_routing_priority:
 
 Priority routing
diff --git a/docs/root/intro/version_history.rst b/docs/root/intro/version_history.rst
index 4c72e0bd481fe..0ba3ca7a27c1a 100644
--- a/docs/root/intro/version_history.rst
+++ b/docs/root/intro/version_history.rst
@@ -33,6 +33,7 @@ Version history
   :ref:`buffer_flush_timeout <envoy_api_field_config.filter.network.redis_proxy.v2.RedisProxy.ConnPoolSettings.buffer_flush_timeout>` to control how quickly the buffer is flushed if it is not full.
 * router: add support for configuring a :ref:`grpc timeout offset <envoy_api_field_route.RouteAction.grpc_timeout_offset>` on incoming requests.
 * router: added ability to control retry back-off intervals via :ref:`retry policy <envoy_api_msg_route.RetryPolicy.RetryBackOff>`.
+* router: added ability to issue a hedged retry in response to a per try timeout via a :ref:`hedge policy <envoy_api_msg_route.HedgePolicy>`.
 * router: added a route name field to each http route in route.Route list 
 * router: per try timeouts will no longer start before the downstream request has been received
   in full by the router. This ensures that the per try timeout does not account for slow
diff --git a/include/envoy/http/header_map.h b/include/envoy/http/header_map.h
index daf921ad19676..2b92fa573ba24 100644
--- a/include/envoy/http/header_map.h
+++ b/include/envoy/http/header_map.h
@@ -289,6 +289,7 @@ class HeaderEntry {
   HEADER_FUNC(EnvoyExpectedRequestTimeoutMs)                                                       \
   HEADER_FUNC(EnvoyExternalAddress)                                                                \
   HEADER_FUNC(EnvoyForceTrace)                                                                     \
+  HEADER_FUNC(EnvoyHedgeOnPerTryTimeout)                                                           \
   HEADER_FUNC(EnvoyImmediateHealthCheckFail)                                                       \
   HEADER_FUNC(EnvoyInternalRequest)                                                                \
   HEADER_FUNC(EnvoyIpTags)                                                                         \
diff --git a/include/envoy/router/router.h b/include/envoy/router/router.h
index ad2eeab6bff18..c0bf0e81456b3 100644
--- a/include/envoy/router/router.h
+++ b/include/envoy/router/router.h
@@ -255,6 +255,16 @@ class RetryState {
   virtual RetryStatus shouldRetryHeaders(const Http::HeaderMap& response_headers,
                                          DoRetryCallback callback) PURE;
 
+  /**
+   * Determines whether given response headers would be retried by the retry policy, assuming
+   * sufficient retry budget and circuit breaker headroom. This is useful in cases where
+   * the information about whether a response is "good" or not is useful, but a retry should
+   * not be attempted for other reasons.
+   * @param response_headers supplies the response headers.
+   * @return bool true if a retry would be warranted based on the retry policy.
+   */
+  virtual bool wouldRetryFromHeaders(const Http::HeaderMap& response_headers) PURE;
+
   /**
    * Determine whether a request should be retried after a reset based on the reason for the reset.
    * @param reset_reason supplies the reset reason.
@@ -268,6 +278,19 @@ class RetryState {
   virtual RetryStatus shouldRetryReset(const Http::StreamResetReason reset_reason,
                                        DoRetryCallback callback) PURE;
 
+  /**
+   * Determine whether a "hedged" retry should be sent after the per try
+   * timeout expires. This means the original request is not canceled, but a
+   * new one is sent to hedge against the original request taking even longer.
+   * @param callback supplies the callback that will be invoked when the retry should take place.
+   *                 This is used to add timed backoff, etc. The callback will never be called
+   *                 inline.
+   * @return RetryStatus if a retry should take place. @param callback will be called at some point
+   *         in the future. Otherwise a retry should not take place and the callback will never be
+   *         called. Calling code should proceed with error handling.
+   */
+  virtual RetryStatus shouldHedgeRetryPerTryTimeout(DoRetryCallback callback) PURE;
+
   /**
    * Called when a host was attempted but the request failed and is eligible for another retry.
    * Should be used to update whatever internal state depends on previously attempted hosts.
diff --git a/source/common/http/conn_manager_utility.cc b/source/common/http/conn_manager_utility.cc
index f061d33c99d23..c6d1fbf7e8948 100644
--- a/source/common/http/conn_manager_utility.cc
+++ b/source/common/http/conn_manager_utility.cc
@@ -170,6 +170,7 @@ Network::Address::InstanceConstSharedPtr ConnectionManagerUtility::mutateRequest
     request_headers.removeEnvoyForceTrace();
     request_headers.removeEnvoyIpTags();
     request_headers.removeEnvoyOriginalUrl();
+    request_headers.removeEnvoyHedgeOnPerTryTimeout();
 
     for (const LowerCaseString& header : route_config.internalOnlyHeaders()) {
       request_headers.remove(header);
diff --git a/source/common/http/headers.h b/source/common/http/headers.h
index 5aa15e718bf83..796671ea4f3b0 100644
--- a/source/common/http/headers.h
+++ b/source/common/http/headers.h
@@ -42,6 +42,7 @@ class HeaderValues {
   const LowerCaseString EnvoyDownstreamServiceNode{"x-envoy-downstream-service-node"};
   const LowerCaseString EnvoyExternalAddress{"x-envoy-external-address"};
   const LowerCaseString EnvoyForceTrace{"x-envoy-force-trace"};
+  const LowerCaseString EnvoyHedgeOnPerTryTimeout{"x-envoy-hedge-on-per-try-timeout"};
   const LowerCaseString EnvoyImmediateHealthCheckFail{"x-envoy-immediate-health-check-fail"};
   const LowerCaseString EnvoyOriginalUrl{"x-envoy-original-url"};
   const LowerCaseString EnvoyInternalRequest{"x-envoy-internal"};
diff --git a/source/common/router/retry_state_impl.cc b/source/common/router/retry_state_impl.cc
index a3fa408e80c04..8badaa382cfa6 100644
--- a/source/common/router/retry_state_impl.cc
+++ b/source/common/router/retry_state_impl.cc
@@ -207,6 +207,17 @@ RetryStatus RetryStateImpl::shouldRetryReset(Http::StreamResetReason reset_reaso
   return shouldRetry(wouldRetryFromReset(reset_reason), callback);
 }
 
+RetryStatus RetryStateImpl::shouldHedgeRetryPerTryTimeout(DoRetryCallback callback) {
+  // A hedged retry on per try timeout is always retried if there are retries
+  // left. NOTE: this is a bit different than non-hedged per try timeouts which
+  // are only retried if the applicable retry policy specifies either
+  // RETRY_ON_5XX or RETRY_ON_GATEWAY_ERROR. This is because these types of
+  // retries are associated with a stream reset which is analogous to a gateway
+  // error. When hedging on per try timeout is enabled, however, there is no
+  // stream reset.
+  return shouldRetry([]() -> bool { return true; }, callback);
+}
+
 bool RetryStateImpl::wouldRetryFromHeaders(const Http::HeaderMap& response_headers) {
   if (response_headers.EnvoyOverloaded() != nullptr) {
     return false;
diff --git a/source/common/router/retry_state_impl.h b/source/common/router/retry_state_impl.h
index 78d017cf8db9c..647680da4f2f8 100644
--- a/source/common/router/retry_state_impl.h
+++ b/source/common/router/retry_state_impl.h
@@ -38,8 +38,12 @@ class RetryStateImpl : public RetryState {
   bool enabled() override { return retry_on_ != 0; }
   RetryStatus shouldRetryHeaders(const Http::HeaderMap& response_headers,
                                  DoRetryCallback callback) override;
+  // Returns true if the retry policy would retry the passed headers. Does not
+  // take into account circuit breaking or remaining tries.
+  bool wouldRetryFromHeaders(const Http::HeaderMap& response_headers) override;
   RetryStatus shouldRetryReset(const Http::StreamResetReason reset_reason,
                                DoRetryCallback callback) override;
+  RetryStatus shouldHedgeRetryPerTryTimeout(DoRetryCallback callback) override;
 
   void onHostAttempted(Upstream::HostDescriptionConstSharedPtr host) override {
     std::for_each(retry_host_predicates_.begin(), retry_host_predicates_.end(),
@@ -75,7 +79,6 @@ class RetryStateImpl : public RetryState {
   void enableBackoffTimer();
   void resetRetry();
   bool wouldRetryFromReset(const Http::StreamResetReason reset_reason);
-  bool wouldRetryFromHeaders(const Http::HeaderMap& response_headers);
   RetryStatus shouldRetry(bool would_retry, DoRetryCallback callback);
 
   const Upstream::ClusterInfo& cluster_;
diff --git a/source/common/router/router.cc b/source/common/router/router.cc
index 8113a5c5a4a17..e3b0da51f971a 100644
--- a/source/common/router/router.cc
+++ b/source/common/router/router.cc
@@ -115,7 +115,8 @@ bool FilterUtility::shouldShadow(const ShadowPolicy& policy, Runtime::Loader& ru
 
 FilterUtility::TimeoutData
 FilterUtility::finalTimeout(const RouteEntry& route, Http::HeaderMap& request_headers,
-                            bool insert_envoy_expected_request_timeout_ms, bool grpc_request) {
+                            bool insert_envoy_expected_request_timeout_ms, bool grpc_request,
+                            bool per_try_timeout_hedging_enabled) {
   // See if there is a user supplied timeout in a request header. If there is we take that.
   // Otherwise if the request is gRPC and a maximum gRPC timeout is configured we use the timeout
   // in the gRPC headers (or infinity when gRPC headers have no timeout), but cap that timeout to
@@ -169,7 +170,10 @@ FilterUtility::finalTimeout(const RouteEntry& route, Http::HeaderMap& request_he
 
   // See if there is any timeout to write in the expected timeout header.
   uint64_t expected_timeout = timeout.per_try_timeout_.count();
-  if (expected_timeout == 0) {
+  // Use the global timeout if no per try timeout was specified or if we're
+  // doing hedging when there are per try timeouts. Either of these scenarios
+  // mean that the upstream server can use the full global timeout.
+  if (per_try_timeout_hedging_enabled || expected_timeout == 0) {
     expected_timeout = timeout.global_timeout_.count();
   }
 
@@ -189,6 +193,26 @@ FilterUtility::finalTimeout(const RouteEntry& route, Http::HeaderMap& request_he
   return timeout;
 }
 
+FilterUtility::HedgingParams FilterUtility::finalHedgingParams(const RouteEntry& route,
+                                                               Http::HeaderMap& request_headers) {
+  HedgingParams hedging_params;
+  hedging_params.hedge_on_per_try_timeout_ = route.hedgePolicy().hedgeOnPerTryTimeout();
+
+  Http::HeaderEntry* hedge_on_per_try_timeout_entry = request_headers.EnvoyHedgeOnPerTryTimeout();
+  if (hedge_on_per_try_timeout_entry) {
+    if (hedge_on_per_try_timeout_entry->value() == "true") {
+      hedging_params.hedge_on_per_try_timeout_ = true;
+    }
+    if (hedge_on_per_try_timeout_entry->value() == "false") {
+      hedging_params.hedge_on_per_try_timeout_ = false;
+    }
+
+    request_headers.removeEnvoyHedgeOnPerTryTimeout();
+  }
+
+  return hedging_params;
+}
+
 Filter::~Filter() {
   // Upstream resources should already have been cleaned.
   ASSERT(upstream_requests_.empty());
@@ -368,8 +392,10 @@ Http::FilterHeadersStatus Filter::decodeHeaders(Http::HeaderMap& headers, bool e
     return Http::FilterHeadersStatus::StopIteration;
   }
 
+  hedging_params_ = FilterUtility::finalHedgingParams(*route_entry_, headers);
+
   timeout_ = FilterUtility::finalTimeout(*route_entry_, headers, !config_.suppress_envoy_headers_,
-                                         grpc_request_);
+                                         grpc_request_, hedging_params_.hedge_on_per_try_timeout_);
 
   // If this header is set with any value, use an alternate response code on timeout
   if (headers.EnvoyUpstreamRequestTimeoutAltResponse()) {
@@ -435,7 +461,12 @@ void Filter::sendNoHealthyUpstreamResponse() {
 }
 
 Http::FilterDataStatus Filter::decodeData(Buffer::Instance& data, bool end_stream) {
+  // upstream_requests_.size() cannot be 0 because we add to it unconditionally
+  // in decodeHeaders(). It cannot be > 1 because that only happens when a per
+  // try timeout occurs with hedge_on_per_try_timeout enabled but the the per
+  // try timeout timer is not started until onUpstreamComplete().
   ASSERT(upstream_requests_.size() == 1);
+
   bool buffering = (retry_state_ && retry_state_->enabled()) || do_shadowing_;
   if (buffering && buffer_limit_ > 0 &&
       getLength(callbacks_->decodingBuffer()) + data.length() > buffer_limit_) {
@@ -471,9 +502,16 @@ Http::FilterDataStatus Filter::decodeData(Buffer::Instance& data, bool end_strea
 
 Http::FilterTrailersStatus Filter::decodeTrailers(Http::HeaderMap& trailers) {
   ENVOY_STREAM_LOG(debug, "router decoding trailers:\n{}", *callbacks_, trailers);
-  downstream_trailers_ = &trailers;
+
+  // upstream_requests_.size() cannot be 0 because we add to it unconditionally
+  // in decodeHeaders(). It cannot be > 1 because that only happens when a per
+  // try timeout occurs with hedge_on_per_try_timeout enabled but the the per
+  // try timeout timer is not started until onUpstreamComplete().
   ASSERT(upstream_requests_.size() == 1);
-  upstream_requests_.front()->encodeTrailers(trailers);
+  downstream_trailers_ = &trailers;
+  for (auto& upstream_request : upstream_requests_) {
+    upstream_request->encodeTrailers(trailers);
+  }
   onRequestComplete();
   return Http::FilterTrailersStatus::StopIteration;
 }
@@ -487,15 +525,10 @@ void Filter::setDecoderFilterCallbacks(Http::StreamDecoderFilterCallbacks& callb
 }
 
 void Filter::cleanup() {
-  ASSERT(upstream_requests_.size() <= 1);
-  // UpstreamRequests are only destroyed in this method (cleanup()) or when we
-  // do a retry (setupRetry()). In the latter case we don't want to save the
-  // upstream timings to the downstream info.
-  if (upstream_requests_.size() == 1) {
-    UpstreamRequestPtr upstream_request =
-        upstream_requests_.back()->removeFromList(upstream_requests_);
-    callbacks_->streamInfo().setUpstreamTiming(upstream_request->upstream_timing_);
-  }
+  // All callers of cleanup() should have cleaned out the upstream_requests_
+  // list as appropriate.
+  ASSERT(upstream_requests_.empty());
+
   retry_state_.reset();
   if (response_timeout_) {
     response_timeout_->disableTimer();
@@ -530,7 +563,7 @@ void Filter::onRequestComplete() {
   downstream_request_complete_time_ = dispatcher.timeSource().monotonicTime();
 
   // Possible that we got an immediate reset.
-  if (upstream_requests_.size() == 1) {
+  if (!upstream_requests_.empty()) {
     // Even if we got an immediate reset, we could still shadow, but that is a riskier change and
     // seems unnecessary right now.
     maybeDoShadowing();
@@ -549,37 +582,100 @@ void Filter::onRequestComplete() {
 }
 
 void Filter::onDestroy() {
-  if (upstream_requests_.size() == 1 && !attempting_internal_redirect_with_complete_stream_) {
-    upstream_requests_.front()->resetStream();
-  }
+  // Reset any in-flight upstream requests.
+  resetAll();
   cleanup();
 }
 
 void Filter::onResponseTimeout() {
   ENVOY_STREAM_LOG(debug, "upstream timeout", *callbacks_);
-  cluster_->stats().upstream_rq_timeout_.inc();
 
-  ASSERT(upstream_requests_.size() <= 1);
-  if (upstream_requests_.size() == 1) {
-    if (upstream_requests_.front()->upstream_host_) {
-      upstream_requests_.front()->upstream_host_->stats().rq_timeout_.inc();
-    }
+  // If we had an upstream request that got a "good" response, save its
+  // upstream timing information into the downstream stream info.
+  if (final_upstream_request_) {
+    callbacks_->streamInfo().setUpstreamTiming(final_upstream_request_->upstream_timing_);
+  }
+
+  // Reset any upstream requests that are still in flight.
+  while (!upstream_requests_.empty()) {
+    UpstreamRequestPtr upstream_request =
+        upstream_requests_.back()->removeFromList(upstream_requests_);
+
+    // Don't record a timeout for upstream requests we've already seen headers
+    // for.
+    if (upstream_request->awaiting_headers_) {
+      cluster_->stats().upstream_rq_timeout_.inc();
+      if (upstream_request->upstream_host_) {
+        upstream_request->upstream_host_->stats().rq_timeout_.inc();
+      }
 
-    updateOutlierDetection(timeout_response_code_, *upstream_requests_.front().get());
-    upstream_requests_.front()->resetStream();
+      // If this upstream request already hit a "soft" timeout, then it
+      // already recorded a timeout into outlier detection. Don't do it again.
+      if (!upstream_request->outlier_detection_timeout_recorded_) {
+        updateOutlierDetection(timeout_response_code_, *upstream_request);
+      }
+      upstream_request->resetStream();
+
+      chargeUpstreamAbort(timeout_response_code_, false, *upstream_request);
+    }
   }
 
   onUpstreamTimeoutAbort(StreamInfo::ResponseFlag::UpstreamRequestTimeout,
                          StreamInfo::ResponseCodeDetails::get().UpstreamTimeout);
 }
 
+// Called when the per try timeout is hit but we didn't reset the request
+// (hedge_on_per_try_timeout enabled).
+void Filter::onSoftPerTryTimeout(UpstreamRequest& upstream_request) {
+  // Track this as a timeout for outlier detection purposes even though we didn't
+  // cancel the request yet and might get a 2xx later.
+  updateOutlierDetection(timeout_response_code_, upstream_request);
+  upstream_request.outlier_detection_timeout_recorded_ = true;
+
+  if (!downstream_response_started_ && retry_state_) {
+    RetryStatus retry_status =
+        retry_state_->shouldHedgeRetryPerTryTimeout([this]() -> void { doRetry(); });
+
+    if (retry_status == RetryStatus::Yes && setupRetry()) {
+      setupRetry();
+      // Don't increment upstream_host->stats().rq_error_ here, we'll do that
+      // later if 1) we hit global timeout or 2) we get bad response headers
+      // back.
+      upstream_request.retried_ = true;
+
+      // TODO: cluster stat for hedge attempted.
+    } else if (retry_status == RetryStatus::NoOverflow) {
+      callbacks_->streamInfo().setResponseFlag(StreamInfo::ResponseFlag::UpstreamOverflow);
+    } else if (retry_status == RetryStatus::NoRetryLimitExceeded) {
+      callbacks_->streamInfo().setResponseFlag(
+          StreamInfo::ResponseFlag::UpstreamRetryLimitExceeded);
+    }
+  }
+}
+
 void Filter::onPerTryTimeout(UpstreamRequest& upstream_request) {
+  if (hedging_params_.hedge_on_per_try_timeout_) {
+    onSoftPerTryTimeout(upstream_request);
+    return;
+  }
+
+  cluster_->stats().upstream_rq_per_try_timeout_.inc();
+  if (upstream_request.upstream_host_) {
+    upstream_request.upstream_host_->stats().rq_timeout_.inc();
+  }
+
+  upstream_request.resetStream();
+
   updateOutlierDetection(timeout_response_code_, upstream_request);
 
   if (maybeRetryReset(Http::StreamResetReason::LocalReset, upstream_request)) {
     return;
   }
 
+  chargeUpstreamAbort(timeout_response_code_, false, upstream_request);
+
+  // Remove this upstream request from the list now that we're done with it.
+  upstream_request.removeFromList(upstream_requests_);
   onUpstreamTimeoutAbort(StreamInfo::ResponseFlag::UpstreamRequestTimeout,
                          StreamInfo::ResponseCodeDetails::get().UpstreamPerTryTimeout);
 }
@@ -590,6 +686,26 @@ void Filter::updateOutlierDetection(Http::Code code, UpstreamRequest& upstream_r
   }
 }
 
+void Filter::chargeUpstreamAbort(Http::Code code, bool dropped, UpstreamRequest& upstream_request) {
+  if (downstream_response_started_) {
+    if (upstream_request.grpc_rq_success_deferred_) {
+      upstream_request.upstream_host_->stats().rq_error_.inc();
+      config_.stats_.rq_reset_after_downstream_response_started_.inc();
+    }
+  } else {
+    Upstream::HostDescriptionConstSharedPtr upstream_host = upstream_request.upstream_host_;
+
+    chargeUpstreamCode(code, upstream_host, dropped);
+    // If we had non-5xx but still have been reset by backend or timeout before
+    // starting response, we treat this as an error. We only get non-5xx when
+    // timeout_response_code_ is used for code above, where this member can
+    // assume values such as 204 (NoContent).
+    if (upstream_host != nullptr && !Http::CodeUtility::is5xx(enumToInt(code))) {
+      upstream_host->stats().rq_error_.inc();
+    }
+  }
+}
+
 void Filter::onUpstreamTimeoutAbort(StreamInfo::ResponseFlag response_flags,
                                     absl::string_view details) {
   const absl::string_view body =
@@ -599,37 +715,19 @@ void Filter::onUpstreamTimeoutAbort(StreamInfo::ResponseFlag response_flags,
 
 void Filter::onUpstreamAbort(Http::Code code, StreamInfo::ResponseFlag response_flags,
                              absl::string_view body, bool dropped, absl::string_view details) {
-  ASSERT(upstream_requests_.size() <= 1);
   // If we have not yet sent anything downstream, send a response with an appropriate status code.
   // Otherwise just reset the ongoing response.
   if (downstream_response_started_) {
-    if (upstream_requests_.size() == 1 && upstream_requests_.front()->grpc_rq_success_deferred_) {
-      upstream_requests_.front()->upstream_host_->stats().rq_error_.inc();
-      config_.stats_.rq_reset_after_downstream_response_started_.inc();
-    }
     // This will destroy any created retry timers.
     callbacks_->streamInfo().setResponseCodeDetails(details);
     cleanup();
     callbacks_->resetStream();
   } else {
-    Upstream::HostDescriptionConstSharedPtr upstream_host;
-    if (upstream_requests_.size() == 1) {
-      upstream_host = upstream_requests_.front()->upstream_host_;
-    }
-
     // This will destroy any created retry timers.
     cleanup();
 
     callbacks_->streamInfo().setResponseFlag(response_flags);
 
-    chargeUpstreamCode(code, upstream_host, dropped);
-    // If we had non-5xx but still have been reset by backend or timeout before
-    // starting response, we treat this as an error. We only get non-5xx when
-    // timeout_response_code_ is used for code above, where this member can
-    // assume values such as 204 (NoContent).
-    if (upstream_host != nullptr && !Http::CodeUtility::is5xx(enumToInt(code))) {
-      upstream_host->stats().rq_error_.inc();
-    }
     callbacks_->sendLocalReply(
         code, body,
         [dropped, this](Http::HeaderMap& headers) {
@@ -643,25 +741,20 @@ void Filter::onUpstreamAbort(Http::Code code, StreamInfo::ResponseFlag response_
 
 bool Filter::maybeRetryReset(Http::StreamResetReason reset_reason,
                              UpstreamRequest& upstream_request) {
-  // We don't retry if we already started the response.
-  if (downstream_response_started_ || !retry_state_) {
+  // We don't retry if we already started the response, don't have a retry policy defined,
+  // or if we've already retried this upstream request (currently only possible if a per
+  // try timeout occurred and hedge_on_per_try_timeout is enabled).
+  if (downstream_response_started_ || !retry_state_ || upstream_request.retried_) {
     return false;
   }
 
-  Upstream::HostDescriptionConstSharedPtr upstream_host;
-  upstream_host = upstream_request.upstream_host_;
-
-  // Notify retry modifiers about the attempted host.
-  if (upstream_host != nullptr) {
-    retry_state_->onHostAttempted(upstream_host);
-  }
-
   const RetryStatus retry_status =
       retry_state_->shouldRetryReset(reset_reason, [this]() -> void { doRetry(); });
-  if (retry_status == RetryStatus::Yes && setupRetry(true)) {
-    if (upstream_host) {
-      upstream_host->stats().rq_error_.inc();
+  if (retry_status == RetryStatus::Yes && setupRetry()) {
+    if (upstream_request.upstream_host_) {
+      upstream_request.upstream_host_->stats().rq_error_.inc();
     }
+    upstream_request.removeFromList(upstream_requests_);
     return true;
   } else if (retry_status == RetryStatus::NoOverflow) {
     callbacks_->streamInfo().setResponseFlag(StreamInfo::ResponseFlag::UpstreamOverflow);
@@ -684,12 +777,21 @@ void Filter::onUpstreamReset(Http::StreamResetReason reset_reason,
     return;
   }
 
+  const bool dropped = reset_reason == Http::StreamResetReason::Overflow;
+  chargeUpstreamAbort(Http::Code::ServiceUnavailable, dropped, upstream_request);
+  upstream_request.removeFromList(upstream_requests_);
+
+  // If there are other in-flight requests that might see an upstream response,
+  // don't return anything downstream.
+  if (numRequestsAwaitingHeaders() > 0 || pending_retries_ > 0) {
+    return;
+  }
+
   const StreamInfo::ResponseFlag response_flags = streamResetReasonToResponseFlag(reset_reason);
   const std::string body =
       absl::StrCat("upstream connect error or disconnect/reset before headers. reset reason: ",
                    Http::Utility::resetReasonToString(reset_reason));
 
-  const bool dropped = reset_reason == Http::StreamResetReason::Overflow;
   callbacks_->streamInfo().setUpstreamTransportFailureReason(transport_failure_reason);
   const std::string& basic_details =
       downstream_response_started_ ? StreamInfo::ResponseCodeDetails::get().LateUpstreamReset
@@ -747,6 +849,9 @@ void Filter::onUpstream100ContinueHeaders(Http::HeaderMapPtr&& headers,
   ENVOY_STREAM_LOG(debug, "upstream 100 continue", *callbacks_);
 
   downstream_response_started_ = true;
+  final_upstream_request_ = &upstream_request;
+  resetOtherUpstreams(upstream_request);
+
   // Don't send retries after 100-Continue has been sent on. Arguably we could attempt to do a
   // retry, assume the next upstream would also send an 100-Continue and swallow the second one
   // but it's sketchy (as the subsequent upstream might not send a 100-Continue) and not worth
@@ -756,9 +861,35 @@ void Filter::onUpstream100ContinueHeaders(Http::HeaderMapPtr&& headers,
   callbacks_->encode100ContinueHeaders(std::move(headers));
 }
 
+void Filter::resetAll() {
+  while (!upstream_requests_.empty()) {
+    upstream_requests_.back()->removeFromList(upstream_requests_)->resetStream();
+  }
+}
+
+void Filter::resetOtherUpstreams(UpstreamRequest& upstream_request) {
+  // Pop each upstream request on the list and reset it if it's not the one
+  // provided. At the end we'll move it back into the list.
+  UpstreamRequestPtr final_upstream_request;
+  while (!upstream_requests_.empty()) {
+    UpstreamRequestPtr upstream_request_tmp =
+        upstream_requests_.back()->removeFromList(upstream_requests_);
+    if (upstream_request_tmp.get() != &upstream_request) {
+      upstream_request_tmp->resetStream();
+      // TODO: per-host stat for hedge abandoned.
+      // TODO: cluster stat for hedge abandoned.
+    } else {
+      final_upstream_request = std::move(upstream_request_tmp);
+    }
+  }
+
+  ASSERT(final_upstream_request);
+  // Now put the final request back on this list.
+  final_upstream_request->moveIntoList(std::move(final_upstream_request), upstream_requests_);
+}
+
 void Filter::onUpstreamHeaders(uint64_t response_code, Http::HeaderMapPtr&& headers,
                                UpstreamRequest& upstream_request, bool end_stream) {
-  ASSERT(upstream_requests_.size() == 1);
   ENVOY_STREAM_LOG(debug, "upstream headers complete: end_stream={}", *callbacks_, end_stream);
 
   upstream_request.upstream_host_->outlierDetector().putHttpResponseCode(response_code);
@@ -767,31 +898,42 @@ void Filter::onUpstreamHeaders(uint64_t response_code, Http::HeaderMapPtr&& head
     upstream_request.upstream_host_->healthChecker().setUnhealthy();
   }
 
+  bool could_not_retry = false;
+
+  // Check if this upstream request was already retried, for instance after
+  // hitting a per try timeout. Don't retry it if we already have.
   if (retry_state_) {
-    // Notify retry modifiers about the attempted host.
-    retry_state_->onHostAttempted(upstream_request.upstream_host_);
-
-    // Capture upstream_host since setupRetry() in the following line will clear
-    // upstream_request.
-    const auto upstream_host = upstream_request.upstream_host_;
-    const RetryStatus retry_status =
-        retry_state_->shouldRetryHeaders(*headers, [this]() -> void { doRetry(); });
-    if (retry_status == RetryStatus::Yes && setupRetry(end_stream)) {
-      Http::CodeStats& code_stats = httpContext().codeStats();
-      code_stats.chargeBasicResponseStat(cluster_->statsScope(), config_.retry_,
-                                         static_cast<Http::Code>(response_code));
-      upstream_host->stats().rq_error_.inc();
-      return;
-    } else if (retry_status == RetryStatus::NoOverflow) {
-      callbacks_->streamInfo().setResponseFlag(StreamInfo::ResponseFlag::UpstreamOverflow);
-    } else if (retry_status == RetryStatus::NoRetryLimitExceeded) {
-      callbacks_->streamInfo().setResponseFlag(
-          StreamInfo::ResponseFlag::UpstreamRetryLimitExceeded);
+    if (upstream_request.retried_) {
+      // We already retried this request (presumably for a per try timeout) so
+      // we definitely won't retry it again. Check if we would have retried it
+      // if we could.
+      could_not_retry = retry_state_->wouldRetryFromHeaders(*headers);
+    } else {
+      const RetryStatus retry_status =
+          retry_state_->shouldRetryHeaders(*headers, [this]() -> void { doRetry(); });
+      // Capture upstream_host since setupRetry() in the following line will clear
+      // upstream_request.
+      const auto upstream_host = upstream_request.upstream_host_;
+      if (retry_status == RetryStatus::Yes && setupRetry()) {
+        if (!end_stream) {
+          upstream_request.resetStream();
+        }
+        upstream_request.removeFromList(upstream_requests_);
+
+        Http::CodeStats& code_stats = httpContext().codeStats();
+        code_stats.chargeBasicResponseStat(cluster_->statsScope(), config_.retry_,
+                                           static_cast<Http::Code>(response_code));
+        upstream_host->stats().rq_error_.inc();
+        return;
+      } else if (retry_status == RetryStatus::NoOverflow) {
+        callbacks_->streamInfo().setResponseFlag(StreamInfo::ResponseFlag::UpstreamOverflow);
+        could_not_retry = true;
+      } else if (retry_status == RetryStatus::NoRetryLimitExceeded) {
+        callbacks_->streamInfo().setResponseFlag(
+            StreamInfo::ResponseFlag::UpstreamRetryLimitExceeded);
+        could_not_retry = true;
+      }
     }
-
-    // Make sure any retry timers are destroyed since we may not call cleanup() if end_stream is
-    // false.
-    retry_state_.reset();
   }
 
   if (static_cast<Http::Code>(response_code) == Http::Code::Found &&
@@ -802,6 +944,21 @@ void Filter::onUpstreamHeaders(uint64_t response_code, Http::HeaderMapPtr&& head
     // next downstream.
   }
 
+  // Check if we got a "bad" response, but there are still upstream requests in
+  // flight awaiting headers or scheduled retries. If so, exit to give them a
+  // chance to return before returning a response downstream.
+  if (could_not_retry && (numRequestsAwaitingHeaders() > 0 || pending_retries_ > 0)) {
+    upstream_request.upstream_host_->stats().rq_error_.inc();
+    upstream_request.removeFromList(upstream_requests_);
+    return;
+  }
+
+  // Make sure any retry timers are destroyed since we may not call cleanup() if end_stream is
+  // false.
+  if (retry_state_) {
+    retry_state_.reset();
+  }
+
   // Only send upstream service time if we received the complete request and this is not a
   // premature response.
   if (DateUtil::timePointValid(downstream_request_complete_time_)) {
@@ -833,6 +990,8 @@ void Filter::onUpstreamHeaders(uint64_t response_code, Http::HeaderMapPtr&& head
   route_entry_->finalizeResponseHeaders(*headers, callbacks_->streamInfo());
 
   downstream_response_started_ = true;
+  final_upstream_request_ = &upstream_request;
+  resetOtherUpstreams(upstream_request);
   if (end_stream) {
     onUpstreamComplete(upstream_request);
   }
@@ -844,6 +1003,9 @@ void Filter::onUpstreamHeaders(uint64_t response_code, Http::HeaderMapPtr&& head
 
 void Filter::onUpstreamData(Buffer::Instance& data, UpstreamRequest& upstream_request,
                             bool end_stream) {
+  // This should be true because when we saw headers we either reset the stream
+  // (hence wouldn't have made it to onUpstreamData) or all other in-flight
+  // streams.
   ASSERT(upstream_requests_.size() == 1);
   if (end_stream) {
     // gRPC request termination without trailers is an error.
@@ -857,7 +1019,11 @@ void Filter::onUpstreamData(Buffer::Instance& data, UpstreamRequest& upstream_re
 }
 
 void Filter::onUpstreamTrailers(Http::HeaderMapPtr&& trailers, UpstreamRequest& upstream_request) {
+  // This should be true because when we saw headers we either reset the stream
+  // (hence wouldn't have made it to onUpstreamTrailers) or all other in-flight
+  // streams.
   ASSERT(upstream_requests_.size() == 1);
+
   if (upstream_request.grpc_rq_success_deferred_) {
     absl::optional<Grpc::Status::GrpcStatus> grpc_status = Grpc::Common::getGrpcStatus(*trailers);
     if (grpc_status &&
@@ -867,7 +1033,9 @@ void Filter::onUpstreamTrailers(Http::HeaderMapPtr&& trailers, UpstreamRequest&
       upstream_request.upstream_host_->stats().rq_error_.inc();
     }
   }
+
   onUpstreamComplete(upstream_request);
+
   callbacks_->encodeTrailers(std::move(trailers));
 }
 
@@ -879,6 +1047,7 @@ void Filter::onUpstreamComplete(UpstreamRequest& upstream_request) {
   if (!downstream_end_stream_) {
     upstream_request.resetStream();
   }
+  callbacks_->streamInfo().setUpstreamTiming(final_upstream_request_->upstream_timing_);
 
   if (config_.emit_dynamic_stats_ && !callbacks_->streamInfo().healthCheck() &&
       DateUtil::timePointValid(downstream_request_complete_time_)) {
@@ -923,10 +1092,11 @@ void Filter::onUpstreamComplete(UpstreamRequest& upstream_request) {
     }
   }
 
+  upstream_request.removeFromList(upstream_requests_);
   cleanup();
 }
 
-bool Filter::setupRetry(bool end_stream) {
+bool Filter::setupRetry() {
   // If we responded before the request was complete we don't bother doing a retry. This may not
   // catch certain cases where we are in full streaming mode and we have a connect timeout or an
   // overflow of some kind. However, in many cases deployments will use the buffer filter before
@@ -935,14 +1105,9 @@ bool Filter::setupRetry(bool end_stream) {
   if (!downstream_end_stream_) {
     return false;
   }
+  pending_retries_++;
 
-  ASSERT(upstream_requests_.size() == 1);
   ENVOY_STREAM_LOG(debug, "performing retry", *callbacks_);
-  if (!end_stream) {
-    upstream_requests_.front()->resetStream();
-  }
-
-  upstream_requests_.front()->removeFromList(upstream_requests_);
 
   return true;
 }
@@ -984,6 +1149,8 @@ bool Filter::setupRedirect(const Http::HeaderMap& headers, UpstreamRequest& upst
 void Filter::doRetry() {
   is_retry_ = true;
   attempt_count_++;
+  ASSERT(pending_retries_ > 0);
+  pending_retries_--;
   Http::ConnectionPool::Instance* conn_pool = getConnPool();
   if (!conn_pool) {
     sendNoHealthyUpstreamResponse();
@@ -997,10 +1164,13 @@ void Filter::doRetry() {
 
   ASSERT(response_timeout_ || timeout_.global_timeout_.count() == 0);
   UpstreamRequestPtr upstream_request = std::make_unique<UpstreamRequest>(*this, *conn_pool);
+  UpstreamRequest* upstream_request_tmp = upstream_request.get();
   upstream_request->moveIntoList(std::move(upstream_request), upstream_requests_);
   upstream_requests_.front()->encodeHeaders(!callbacks_->decodingBuffer() && !downstream_trailers_);
-  // It's possible we got immediately reset.
-  if (upstream_requests_.size() == 1) {
+  // It's possible we got immediately reset which means the upstream request we just
+  // added to the front of the list might have been removed, so we need to check to make
+  // sure we don't encodeData on the wrong request.
+  if (!upstream_requests_.empty() && (upstream_requests_.front().get() == upstream_request_tmp)) {
     if (callbacks_->decodingBuffer()) {
       // If we are doing a retry we need to make a copy.
       Buffer::OwnedImpl copy(*callbacks_->decodingBuffer());
@@ -1013,11 +1183,18 @@ void Filter::doRetry() {
   }
 }
 
+uint32_t Filter::numRequestsAwaitingHeaders() {
+  return std::count_if(upstream_requests_.begin(), upstream_requests_.end(),
+                       [](const auto& req) -> bool { return req->awaiting_headers_; });
+}
+
 Filter::UpstreamRequest::UpstreamRequest(Filter& parent, Http::ConnectionPool::Instance& pool)
     : parent_(parent), conn_pool_(pool), grpc_rq_success_deferred_(false),
       stream_info_(pool.protocol(), parent_.callbacks_->dispatcher().timeSource()),
-      calling_encode_headers_(false), upstream_canary_(false), encode_complete_(false),
-      encode_trailers_(false), create_per_try_timeout_on_request_complete_(false) {
+      calling_encode_headers_(false), upstream_canary_(false), decode_complete_(false),
+      encode_complete_(false), encode_trailers_(false), retried_(false), awaiting_headers_(true),
+      outlier_detection_timeout_recorded_(false),
+      create_per_try_timeout_on_request_complete_(false) {
 
   if (parent_.config_.start_child_span_) {
     span_ = parent_.callbacks_->activeSpan().spawnChild(
@@ -1058,6 +1235,7 @@ void Filter::UpstreamRequest::decodeHeaders(Http::HeaderMapPtr&& headers, bool e
   upstream_timing_.onFirstUpstreamRxByteReceived(parent_.callbacks_->dispatcher().timeSource());
   maybeEndDecode(end_stream);
 
+  awaiting_headers_ = false;
   if (!parent_.config_.upstream_logs_.empty()) {
     upstream_headers_ = std::make_unique<Http::HeaderMapImpl>(*headers);
   }
@@ -1087,6 +1265,7 @@ void Filter::UpstreamRequest::decodeMetadata(Http::MetadataMapPtr&& metadata_map
 void Filter::UpstreamRequest::maybeEndDecode(bool end_stream) {
   if (end_stream) {
     upstream_timing_.onLastUpstreamRxByteReceived(parent_.callbacks_->dispatcher().timeSource());
+    decode_complete_ = true;
   }
 }
 
@@ -1144,6 +1323,7 @@ void Filter::UpstreamRequest::encodeTrailers(const Http::HeaderMap& trailers) {
 void Filter::UpstreamRequest::onResetStream(Http::StreamResetReason reason,
                                             absl::string_view transport_failure_reason) {
   clearRequestEncoder();
+  awaiting_headers_ = false;
   if (!calling_encode_headers_) {
     stream_info_.setResponseFlag(parent_.streamResetReasonToResponseFlag(reason));
     parent_.onUpstreamReset(reason, transport_failure_reason, *this);
@@ -1153,6 +1333,11 @@ void Filter::UpstreamRequest::onResetStream(Http::StreamResetReason reason,
 }
 
 void Filter::UpstreamRequest::resetStream() {
+  // Don't reset the stream if we're already done with it.
+  if (encode_complete_ && decode_complete_) {
+    return;
+  }
+
   if (conn_pool_stream_handle_) {
     ENVOY_STREAM_LOG(debug, "cancelling pool request", *parent_.callbacks_);
     ASSERT(!request_encoder_);
@@ -1164,6 +1349,7 @@ void Filter::UpstreamRequest::resetStream() {
     ENVOY_STREAM_LOG(debug, "resetting pool request", *parent_.callbacks_);
     request_encoder_->getStream().removeCallbacks(*this);
     request_encoder_->getStream().resetStream(Http::StreamResetReason::LocalReset);
+    clearRequestEncoder();
   }
 }
 
@@ -1181,11 +1367,7 @@ void Filter::UpstreamRequest::onPerTryTimeout() {
   // to the global timeout
   if (!parent_.downstream_response_started_) {
     ENVOY_STREAM_LOG(debug, "upstream per try timeout", *parent_.callbacks_);
-    parent_.cluster_->stats().upstream_rq_per_try_timeout_.inc();
-    if (upstream_host_) {
-      upstream_host_->stats().rq_timeout_.inc();
-    }
-    resetStream();
+
     stream_info_.setResponseFlag(StreamInfo::ResponseFlag::UpstreamRequestTimeout);
     parent_.onPerTryTimeout(*this);
   } else {
@@ -1293,7 +1475,15 @@ void Filter::UpstreamRequest::clearRequestEncoder() {
 
 void Filter::UpstreamRequest::DownstreamWatermarkManager::onAboveWriteBufferHighWatermark() {
   ASSERT(parent_.request_encoder_);
-  ASSERT(parent_.parent_.upstream_requests_.size() == 1);
+
+  // There are two states we should get this callback in: 1) the watermark was
+  // hit due to writes from a different filter instance over a shared
+  // downstream connection, or 2) the watermark was hit due to THIS filter
+  // instance writing back the "winning" upstream request. In either case we
+  // can disable reads from upstream.
+  ASSERT(!parent_.parent_.final_upstream_request_ ||
+         &parent_ == parent_.parent_.final_upstream_request_);
+
   // The downstream connection is overrun. Pause reads from upstream.
   // If there are multiple calls to readDisable either the codec (H2) or the underlying
   // Network::Connection (H1) will handle reference counting.
@@ -1303,7 +1493,7 @@ void Filter::UpstreamRequest::DownstreamWatermarkManager::onAboveWriteBufferHigh
 
 void Filter::UpstreamRequest::DownstreamWatermarkManager::onBelowWriteBufferLowWatermark() {
   ASSERT(parent_.request_encoder_);
-  ASSERT(parent_.parent_.upstream_requests_.size() == 1);
+
   // One source of connection blockage has buffer available. Pass this on to the stream, which
   // will resume reads if this was the last remaining high watermark.
   parent_.parent_.cluster_->stats().upstream_flow_control_resumed_reading_total_.inc();
diff --git a/source/common/router/router.h b/source/common/router/router.h
index c8c786baec458..cf3e181428eff 100644
--- a/source/common/router/router.h
+++ b/source/common/router/router.h
@@ -63,6 +63,10 @@ class FilterUtility {
     std::chrono::milliseconds per_try_timeout_{0};
   };
 
+  struct HedgingParams {
+    bool hedge_on_per_try_timeout_;
+  };
+
   /**
    * Set the :scheme header based on the properties of the upstream cluster.
    */
@@ -89,7 +93,17 @@ class FilterUtility {
    * @return TimeoutData for both the global and per try timeouts.
    */
   static TimeoutData finalTimeout(const RouteEntry& route, Http::HeaderMap& request_headers,
-                                  bool insert_envoy_expected_request_timeout_ms, bool grpc_request);
+                                  bool insert_envoy_expected_request_timeout_ms, bool grpc_request,
+                                  bool per_try_timeout_hedging_enabled);
+
+  /**
+   * Determine the final hedging settings after applying randomized behavior.
+   * @param route supplies the request route.
+   * @param request_headers supplies the request headers.
+   * @return HedgingParams the final parameters to use for request hedging.
+   */
+  static HedgingParams finalHedgingParams(const RouteEntry& route,
+                                          Http::HeaderMap& request_headers);
 };
 
 /**
@@ -158,8 +172,8 @@ class Filter : Logger::Loggable<Logger::Id::router>,
                public Upstream::LoadBalancerContextBase {
 public:
   Filter(FilterConfig& config)
-      : config_(config), downstream_response_started_(false), downstream_end_stream_(false),
-        do_shadowing_(false), is_retry_(false),
+      : config_(config), final_upstream_request_(nullptr), downstream_response_started_(false),
+        downstream_end_stream_(false), do_shadowing_(false), is_retry_(false),
         attempting_internal_redirect_with_complete_stream_(false) {}
 
   ~Filter();
@@ -299,6 +313,9 @@ class Filter : Logger::Loggable<Logger::Id::router>,
       stream_info_.onUpstreamHostSelected(host);
       upstream_host_ = host;
       parent_.callbacks_->streamInfo().onUpstreamHostSelected(host);
+      if (parent_.retry_state_ && host) {
+        parent_.retry_state_->onHostAttempted(host);
+      }
     }
 
     // Http::StreamDecoder
@@ -315,12 +332,31 @@ class Filter : Logger::Loggable<Logger::Id::router>,
     void onBelowWriteBufferLowWatermark() override { enableDataFromDownstream(); }
 
     void disableDataFromDownstream() {
-      ASSERT(parent_.upstream_requests_.size() == 1);
+      // If there is only one upstream request, we can be assured that
+      // disabling reads will not slow down other upstream requests. If we've
+      // already seen the full downstream request (downstream_end_stream_) then
+      // disabling reads is a no-op.
+      // This assert condition must be true because
+      // parent_.upstream_requests_.size() can only be greater than 1 in the
+      // case of a per-try-timeout with hedge_on_per_try_timeout enabled, and
+      // the per try timeout timer is started only after downstream_end_stream_
+      // is true.
+      ASSERT(parent_.upstream_requests_.size() == 1 || parent_.downstream_end_stream_);
       parent_.cluster_->stats().upstream_flow_control_backed_up_total_.inc();
       parent_.callbacks_->onDecoderFilterAboveWriteBufferHighWatermark();
     }
+
     void enableDataFromDownstream() {
-      ASSERT(parent_.upstream_requests_.size() == 1);
+      // If there is only one upstream request, we can be assured that
+      // disabling reads will not overflow any write buffers in other upstream
+      // requests. If we've already seen the full downstream request
+      // (downstream_end_stream_) then enabling reads is a no-op.
+      // This assert condition must be true because
+      // parent_.upstream_requests_.size() can only be greater than 1 in the
+      // case of a per-try-timeout with hedge_on_per_try_timeout enabled, and
+      // the per try timeout timer is started only after downstream_end_stream_
+      // is true.
+      ASSERT(parent_.upstream_requests_.size() == 1 || parent_.downstream_end_stream_);
       parent_.cluster_->stats().upstream_flow_control_drained_total_.inc();
       parent_.callbacks_->onDecoderFilterBelowWriteBufferLowWatermark();
     }
@@ -367,8 +403,12 @@ class Filter : Logger::Loggable<Logger::Id::router>,
 
     bool calling_encode_headers_ : 1;
     bool upstream_canary_ : 1;
+    bool decode_complete_ : 1;
     bool encode_complete_ : 1;
     bool encode_trailers_ : 1;
+    bool retried_ : 1;
+    bool awaiting_headers_ : 1;
+    bool outlier_detection_timeout_recorded_ : 1;
     // Tracks whether we deferred a per try timeout because the downstream request
     // had not been completed yet.
     bool create_per_try_timeout_on_request_complete_ : 1;
@@ -383,6 +423,7 @@ class Filter : Logger::Loggable<Logger::Id::router>,
                           Upstream::HostDescriptionConstSharedPtr upstream_host, bool dropped);
   void chargeUpstreamCode(Http::Code code, Upstream::HostDescriptionConstSharedPtr upstream_host,
                           bool dropped);
+  void chargeUpstreamAbort(Http::Code code, bool dropped, UpstreamRequest& upstream_request);
   void cleanup();
   virtual RetryStatePtr createRetryState(const RetryPolicy& policy,
                                          Http::HeaderMap& request_headers,
@@ -393,12 +434,16 @@ class Filter : Logger::Loggable<Logger::Id::router>,
   Http::ConnectionPool::Instance* getConnPool();
   void maybeDoShadowing();
   bool maybeRetryReset(Http::StreamResetReason reset_reason, UpstreamRequest& upstream_request);
+  uint32_t numRequestsAwaitingHeaders();
+  void onGlobalTimeout();
   void onPerTryTimeout(UpstreamRequest& upstream_request);
   void onRequestComplete();
   void onResponseTimeout();
   void onUpstream100ContinueHeaders(Http::HeaderMapPtr&& headers,
                                     UpstreamRequest& upstream_request);
   // Handle an upstream request aborted due to a local timeout.
+  void onSoftPerTryTimeout();
+  void onSoftPerTryTimeout(UpstreamRequest& upstream_request);
   void onUpstreamTimeoutAbort(StreamInfo::ResponseFlag response_flag, absl::string_view details);
   // Handle an "aborted" upstream request, meaning we didn't see response
   // headers (e.g. due to a reset). Handles recording stats and responding
@@ -413,8 +458,14 @@ class Filter : Logger::Loggable<Logger::Id::router>,
   void onUpstreamComplete(UpstreamRequest& upstream_request);
   void onUpstreamReset(Http::StreamResetReason reset_reason, absl::string_view transport_failure,
                        UpstreamRequest& upstream_request);
+  // Reset all in-flight upstream requests.
+  void resetAll();
+  // Reset all in-flight upstream requests that do NOT match the passed argument. This is used
+  // if a "good" response comes back and we return downstream, so there is no point in waiting
+  // for the remaining upstream requests to return.
+  void resetOtherUpstreams(UpstreamRequest& upstream_request);
   void sendNoHealthyUpstreamResponse();
-  bool setupRetry(bool end_stream);
+  bool setupRetry();
   bool setupRedirect(const Http::HeaderMap& headers, UpstreamRequest& upstream_request);
   void updateOutlierDetection(Http::Code code, UpstreamRequest& upstream_request);
   void doRetry();
@@ -434,8 +485,12 @@ class Filter : Logger::Loggable<Logger::Id::router>,
   const VirtualCluster* request_vcluster_;
   Event::TimerPtr response_timeout_;
   FilterUtility::TimeoutData timeout_;
+  FilterUtility::HedgingParams hedging_params_;
   Http::Code timeout_response_code_ = Http::Code::GatewayTimeout;
   std::list<UpstreamRequestPtr> upstream_requests_;
+  // Tracks which upstream request "wins" and will have the corresponding
+  // response forwarded downstream
+  UpstreamRequest* final_upstream_request_;
   bool grpc_request_{};
   Http::HeaderMap* downstream_headers_{};
   Http::HeaderMap* downstream_trailers_{};
@@ -453,6 +508,7 @@ class Filter : Logger::Loggable<Logger::Id::router>,
   bool include_attempt_count_ : 1;
   bool attempting_internal_redirect_with_complete_stream_ : 1;
   uint32_t attempt_count_{1};
+  uint32_t pending_retries_{0};
 };
 
 class ProdFilter : public Filter {
diff --git a/test/common/http/conn_manager_impl_test.cc b/test/common/http/conn_manager_impl_test.cc
index eba68e39a4333..d0463bde38d09 100644
--- a/test/common/http/conn_manager_impl_test.cc
+++ b/test/common/http/conn_manager_impl_test.cc
@@ -3145,10 +3145,13 @@ TEST_F(HttpConnectionManagerImplTest, HitFilterWatermarkLimits) {
       .WillOnce(Return(FilterDataStatus::StopIterationAndWatermark));
   decoder_filters_[0]->callbacks_->encodeData(fake_response, false);
 
+  // unregister callbacks2
+  decoder_filters_[0]->callbacks_->removeDownstreamWatermarkCallbacks(callbacks2);
+
   // Change the limit so the buffered data is below the new watermark.
   buffer_len = encoder_filters_[1]->callbacks_->encodingBuffer()->length();
   EXPECT_CALL(callbacks, onBelowWriteBufferLowWatermark());
-  EXPECT_CALL(callbacks2, onBelowWriteBufferLowWatermark());
+  EXPECT_CALL(callbacks2, onBelowWriteBufferLowWatermark()).Times(0);
   encoder_filters_[1]->callbacks_->setEncoderBufferLimit((buffer_len + 1) * 2);
 }
 
diff --git a/test/common/http/conn_manager_utility_test.cc b/test/common/http/conn_manager_utility_test.cc
index 44068b01899ab..806e6cf6ebb7e 100644
--- a/test/common/http/conn_manager_utility_test.cc
+++ b/test/common/http/conn_manager_utility_test.cc
@@ -520,6 +520,7 @@ TEST_F(ConnectionManagerUtilityTest, ExternalAddressExternalRequestUseRemote) {
   route_config_.internal_only_headers_.push_back(LowerCaseString("custom_header"));
   TestHeaderMapImpl headers{{"x-envoy-decorator-operation", "foo"},
                             {"x-envoy-downstream-service-cluster", "foo"},
+                            {"x-envoy-hedge-on-per-try-timeout", "foo"},
                             {"x-envoy-retriable-status-codes", "123,456"},
                             {"x-envoy-retry-on", "foo"},
                             {"x-envoy-retry-grpc-on", "foo"},
@@ -537,6 +538,7 @@ TEST_F(ConnectionManagerUtilityTest, ExternalAddressExternalRequestUseRemote) {
   EXPECT_EQ("50.0.0.1", headers.get_("x-envoy-external-address"));
   EXPECT_FALSE(headers.has("x-envoy-decorator-operation"));
   EXPECT_FALSE(headers.has("x-envoy-downstream-service-cluster"));
+  EXPECT_FALSE(headers.has("x-envoy-hedge-on-per-try-timeout"));
   EXPECT_FALSE(headers.has("x-envoy-retriable-status-codes"));
   EXPECT_FALSE(headers.has("x-envoy-retry-on"));
   EXPECT_FALSE(headers.has("x-envoy-retry-grpc-on"));
diff --git a/test/common/router/router_test.cc b/test/common/router/router_test.cc
index 6909728926f5a..d38ae33d2c711 100644
--- a/test/common/router/router_test.cc
+++ b/test/common/router/router_test.cc
@@ -211,6 +211,15 @@ class RouterTestBase : public testing::Test {
     ON_CALL(callbacks_, connection()).WillByDefault(Return(&connection_));
   }
 
+  void enableHedgeOnPerTryTimeout() {
+    callbacks_.route_->route_entry_.hedge_policy_.hedge_on_per_try_timeout_ = true;
+    callbacks_.route_->route_entry_.hedge_policy_.additional_request_chance_ =
+        envoy::type::FractionalPercent{};
+    callbacks_.route_->route_entry_.hedge_policy_.additional_request_chance_.set_numerator(0);
+    callbacks_.route_->route_entry_.hedge_policy_.additional_request_chance_.set_denominator(
+        envoy::type::FractionalPercent::HUNDRED);
+  }
+
   Event::SimulatedTimeSystem test_time_;
   std::string upstream_zone_{"to_az"};
   envoy::api::v2::core::Locality upstream_locality_;
@@ -1311,6 +1320,275 @@ TEST_F(RouterTest, UpstreamPerTryTimeoutExcludesNewStream) {
   EXPECT_TRUE(verifyHostUpstreamStats(0, 1));
 }
 
+// Tests that a retry is sent after the first request hits the per try timeout, but then
+// headers received in response to the first request are still used (and the 2nd request
+// canceled).
+TEST_F(RouterTest, HedgedPerTryTimeoutFirstRequestSucceeds) {
+  enableHedgeOnPerTryTimeout();
+
+  NiceMock<Http::MockStreamEncoder> encoder1;
+  Http::StreamDecoder* response_decoder1 = nullptr;
+  EXPECT_CALL(cm_.conn_pool_, newStream(_, _))
+      .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks)
+                           -> Http::ConnectionPool::Cancellable* {
+        response_decoder1 = &decoder;
+        EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
+        callbacks.onPoolReady(encoder1, cm_.conn_pool_.host_);
+        return nullptr;
+      }));
+  expectPerTryTimerCreate();
+  expectResponseTimerCreate();
+
+  Http::TestHeaderMapImpl headers{{"x-envoy-upstream-rq-per-try-timeout-ms", "5"}};
+  HttpTestUtility::addDefaultHeaders(headers);
+  router_.decodeHeaders(headers, true);
+
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(504));
+  EXPECT_CALL(encoder1.stream_, resetStream(_)).Times(0);
+
+  NiceMock<Http::MockStreamEncoder> encoder2;
+  Http::StreamDecoder* response_decoder2 = nullptr;
+  router_.retry_state_->expectHedgedPerTryTimeoutRetry();
+  per_try_timeout_->callback_();
+
+  EXPECT_CALL(cm_.conn_pool_, newStream(_, _))
+      .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks)
+                           -> Http::ConnectionPool::Cancellable* {
+        response_decoder2 = &decoder;
+        EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
+        callbacks.onPoolReady(encoder2, cm_.conn_pool_.host_);
+        return nullptr;
+      }));
+  expectPerTryTimerCreate();
+  router_.retry_state_->callback_();
+
+  // We should not have updated any stats yet because no requests have been
+  // canceled
+  EXPECT_TRUE(verifyHostUpstreamStats(0, 0));
+
+  // Now write a 200 back. We expect the 2nd stream to be reset and stats to be
+  // incremented properly.
+  Http::HeaderMapPtr response_headers(new Http::TestHeaderMapImpl{{":status", "200"}});
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(200));
+  EXPECT_CALL(encoder1.stream_, resetStream(_)).Times(0);
+  EXPECT_CALL(encoder2.stream_, resetStream(_));
+
+  EXPECT_CALL(callbacks_, encodeHeaders_(_, _))
+      .WillOnce(Invoke([&](Http::HeaderMap& headers, bool end_stream) -> void {
+        EXPECT_EQ(headers.Status()->value(), "200");
+        EXPECT_TRUE(end_stream);
+      }));
+  response_decoder1->decodeHeaders(std::move(response_headers), true);
+  EXPECT_TRUE(verifyHostUpstreamStats(1, 0));
+
+  // TODO: Verify hedge stats here once they are implemented.
+}
+
+// Three requests sent: 1) 5xx error, 2) per try timeout, 3) gets good response
+// headers.
+TEST_F(RouterTest, HedgedPerTryTimeoutThirdRequestSucceeds) {
+  enableHedgeOnPerTryTimeout();
+
+  NiceMock<Http::MockStreamEncoder> encoder1;
+  Http::StreamDecoder* response_decoder1 = nullptr;
+  EXPECT_CALL(cm_.conn_pool_, newStream(_, _))
+      .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks)
+                           -> Http::ConnectionPool::Cancellable* {
+        response_decoder1 = &decoder;
+        EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
+        callbacks.onPoolReady(encoder1, cm_.conn_pool_.host_);
+        return nullptr;
+      }));
+  expectResponseTimerCreate();
+  expectPerTryTimerCreate();
+
+  Http::TestHeaderMapImpl headers{{"x-envoy-upstream-rq-per-try-timeout-ms", "5"}};
+  HttpTestUtility::addDefaultHeaders(headers);
+  router_.decodeHeaders(headers, true);
+
+  EXPECT_CALL(encoder1.stream_, resetStream(_)).Times(0);
+
+  Http::HeaderMapPtr response_headers1(new Http::TestHeaderMapImpl{{":status", "500"}});
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(500));
+  EXPECT_CALL(encoder1.stream_, resetStream(_)).Times(0);
+  EXPECT_CALL(callbacks_, encodeHeaders_(_, _)).Times(0);
+  router_.retry_state_->expectHeadersRetry();
+  response_decoder1->decodeHeaders(std::move(response_headers1), true);
+
+  NiceMock<Http::MockStreamEncoder> encoder2;
+  Http::StreamDecoder* response_decoder2 = nullptr;
+  EXPECT_CALL(cm_.conn_pool_, newStream(_, _))
+      .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks)
+                           -> Http::ConnectionPool::Cancellable* {
+        response_decoder2 = &decoder;
+        EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
+        callbacks.onPoolReady(encoder2, cm_.conn_pool_.host_);
+        return nullptr;
+      }));
+  expectPerTryTimerCreate();
+  router_.retry_state_->callback_();
+
+  EXPECT_TRUE(verifyHostUpstreamStats(0, 1));
+
+  // Now trigger a per try timeout on the 2nd request, expect a 3rd
+  router_.retry_state_->expectHedgedPerTryTimeoutRetry();
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(504));
+  NiceMock<Http::MockStreamEncoder> encoder3;
+  Http::StreamDecoder* response_decoder3 = nullptr;
+  EXPECT_CALL(cm_.conn_pool_, newStream(_, _))
+      .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks)
+                           -> Http::ConnectionPool::Cancellable* {
+        response_decoder3 = &decoder;
+        EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
+        callbacks.onPoolReady(encoder3, cm_.conn_pool_.host_);
+        return nullptr;
+      }));
+
+  EXPECT_CALL(callbacks_, encodeHeaders_(_, _)).Times(0);
+  per_try_timeout_->callback_();
+  expectPerTryTimerCreate();
+  router_.retry_state_->callback_();
+  EXPECT_TRUE(verifyHostUpstreamStats(0, 1));
+
+  // Now write a 200 back. We expect the 2nd stream to be reset and stats to be
+  // incremented properly.
+  Http::HeaderMapPtr response_headers2(new Http::TestHeaderMapImpl{{":status", "200"}});
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(200));
+  EXPECT_CALL(encoder1.stream_, resetStream(_)).Times(0);
+  EXPECT_CALL(encoder2.stream_, resetStream(_));
+  EXPECT_CALL(encoder3.stream_, resetStream(_)).Times(0);
+
+  EXPECT_CALL(callbacks_, encodeHeaders_(_, _))
+      .WillOnce(Invoke([&](Http::HeaderMap& headers, bool end_stream) -> void {
+        EXPECT_EQ(headers.Status()->value(), "200");
+        EXPECT_TRUE(end_stream);
+      }));
+  EXPECT_CALL(*router_.retry_state_, shouldRetryHeaders(_, _)).WillOnce(Return(RetryStatus::No));
+  response_decoder3->decodeHeaders(std::move(response_headers2), true);
+  EXPECT_TRUE(verifyHostUpstreamStats(1, 1));
+
+  // TODO: Verify hedge stats here once they are implemented.
+}
+
+// First request times out and is retried, and then a response is received.
+// Make sure we don't attempt to retry because we already retried for timeout.
+TEST_F(RouterTest, RetryOnlyOnceForSameUpstreamRequest) {
+  enableHedgeOnPerTryTimeout();
+
+  NiceMock<Http::MockStreamEncoder> encoder1;
+  Http::StreamDecoder* response_decoder1 = nullptr;
+  EXPECT_CALL(cm_.conn_pool_, newStream(_, _))
+      .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks)
+                           -> Http::ConnectionPool::Cancellable* {
+        response_decoder1 = &decoder;
+        EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
+        callbacks.onPoolReady(encoder1, cm_.conn_pool_.host_);
+        return nullptr;
+      }));
+  expectPerTryTimerCreate();
+  expectResponseTimerCreate();
+
+  Http::TestHeaderMapImpl headers{{"x-envoy-upstream-rq-per-try-timeout-ms", "5"}};
+  HttpTestUtility::addDefaultHeaders(headers);
+  router_.decodeHeaders(headers, true);
+
+  EXPECT_CALL(encoder1.stream_, resetStream(_)).Times(0);
+
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(504));
+  router_.retry_state_->expectHedgedPerTryTimeoutRetry();
+  per_try_timeout_->callback_();
+
+  NiceMock<Http::MockStreamEncoder> encoder2;
+  Http::StreamDecoder* response_decoder2 = nullptr;
+  EXPECT_CALL(cm_.conn_pool_, newStream(_, _))
+      .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks)
+                           -> Http::ConnectionPool::Cancellable* {
+        response_decoder2 = &decoder;
+        EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
+        callbacks.onPoolReady(encoder2, cm_.conn_pool_.host_);
+        return nullptr;
+      }));
+
+  expectPerTryTimerCreate();
+  router_.retry_state_->callback_();
+
+  // Now send a 5xx back and make sure we don't ask whether we should retry it.
+  Http::HeaderMapPtr response_headers1(new Http::TestHeaderMapImpl{{":status", "500"}});
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(500));
+  EXPECT_CALL(*router_.retry_state_, shouldRetryHeaders(_, _)).Times(0);
+  EXPECT_CALL(*router_.retry_state_, wouldRetryFromHeaders(_)).WillOnce(Return(true));
+  response_decoder1->decodeHeaders(std::move(response_headers1), true);
+
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(504));
+
+  response_timeout_->callback_();
+}
+
+// Sequence: upstream request hits soft per try timeout and is retried, and
+// then "bad" response headers come back before the retry has been scheduled.
+// Ensures that the "bad" headers are not sent downstream because there is
+// still an attempt pending.
+TEST_F(RouterTest, BadHeadersDroppedIfPreviousRetryScheduled) {
+  enableHedgeOnPerTryTimeout();
+
+  NiceMock<Http::MockStreamEncoder> encoder1;
+  Http::StreamDecoder* response_decoder1 = nullptr;
+  EXPECT_CALL(cm_.conn_pool_, newStream(_, _))
+      .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks)
+                           -> Http::ConnectionPool::Cancellable* {
+        response_decoder1 = &decoder;
+        EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
+        callbacks.onPoolReady(encoder1, cm_.conn_pool_.host_);
+        return nullptr;
+      }));
+  expectPerTryTimerCreate();
+  expectResponseTimerCreate();
+
+  Http::TestHeaderMapImpl headers{{"x-envoy-upstream-rq-per-try-timeout-ms", "5"}};
+  HttpTestUtility::addDefaultHeaders(headers);
+  router_.decodeHeaders(headers, true);
+
+  EXPECT_CALL(encoder1.stream_, resetStream(_)).Times(0);
+
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(504));
+  router_.retry_state_->expectHedgedPerTryTimeoutRetry();
+  per_try_timeout_->callback_();
+
+  expectPerTryTimerCreate();
+
+  // Now send a 5xx back and make sure we don't ask whether we should retry it
+  // and also that we don't respond downstream with it.
+  Http::HeaderMapPtr response_headers1(new Http::TestHeaderMapImpl{{":status", "500"}});
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(500));
+  EXPECT_CALL(*router_.retry_state_, shouldRetryHeaders(_, _)).Times(0);
+  EXPECT_CALL(*router_.retry_state_, wouldRetryFromHeaders(_)).WillOnce(Return(true));
+  EXPECT_CALL(callbacks_, encodeHeaders_(_, _)).Times(0);
+  response_decoder1->decodeHeaders(std::move(response_headers1), true);
+
+  // Now trigger the retry for the per try timeout earlier.
+  NiceMock<Http::MockStreamEncoder> encoder2;
+  Http::StreamDecoder* response_decoder2 = nullptr;
+  EXPECT_CALL(cm_.conn_pool_, newStream(_, _))
+      .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks)
+                           -> Http::ConnectionPool::Cancellable* {
+        response_decoder2 = &decoder;
+        EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
+        callbacks.onPoolReady(encoder2, cm_.conn_pool_.host_);
+        return nullptr;
+      }));
+  router_.retry_state_->callback_();
+
+  Http::HeaderMapPtr response_headers2(new Http::TestHeaderMapImpl{{":status", "200"}});
+  EXPECT_CALL(*router_.retry_state_, shouldRetryHeaders(_, _)).WillOnce(Return(RetryStatus::No));
+  EXPECT_CALL(callbacks_, encodeHeaders_(_, _))
+      .WillOnce(Invoke([&](Http::HeaderMap& headers, bool end_stream) -> void {
+        EXPECT_EQ(headers.Status()->value(), "200");
+        EXPECT_TRUE(end_stream);
+      }));
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(200));
+  response_decoder2->decodeHeaders(std::move(response_headers2), true);
+}
+
 TEST_F(RouterTest, RetryRequestNotComplete) {
   NiceMock<Http::MockStreamEncoder> encoder1;
   Http::StreamDecoder* response_decoder = nullptr;
@@ -1338,6 +1616,270 @@ TEST_F(RouterTest, RetryRequestNotComplete) {
   EXPECT_TRUE(verifyHostUpstreamStats(0, 1));
 }
 
+// Two requests are sent (slow request + hedged retry) and then global timeout
+// is hit. Verify everything gets cleaned up.
+TEST_F(RouterTest, HedgedPerTryTimeoutGlobalTimeout) {
+  enableHedgeOnPerTryTimeout();
+
+  NiceMock<Http::MockStreamEncoder> encoder1;
+  Http::StreamDecoder* response_decoder1 = nullptr;
+  EXPECT_CALL(cm_.conn_pool_, newStream(_, _))
+      .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks)
+                           -> Http::ConnectionPool::Cancellable* {
+        response_decoder1 = &decoder;
+        EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
+        callbacks.onPoolReady(encoder1, cm_.conn_pool_.host_);
+        return nullptr;
+      }));
+  expectPerTryTimerCreate();
+  expectResponseTimerCreate();
+
+  Http::TestHeaderMapImpl headers{{"x-envoy-upstream-rq-per-try-timeout-ms", "5"}};
+  HttpTestUtility::addDefaultHeaders(headers);
+  router_.decodeHeaders(headers, true);
+
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(504));
+  EXPECT_CALL(encoder1.stream_, resetStream(_)).Times(0);
+  EXPECT_CALL(callbacks_, encodeHeaders_(_, _)).Times(0);
+  router_.retry_state_->expectHedgedPerTryTimeoutRetry();
+  per_try_timeout_->callback_();
+
+  NiceMock<Http::MockStreamEncoder> encoder2;
+  Http::StreamDecoder* response_decoder2 = nullptr;
+  EXPECT_CALL(cm_.conn_pool_, newStream(_, _))
+      .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks)
+                           -> Http::ConnectionPool::Cancellable* {
+        response_decoder2 = &decoder;
+        EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
+        callbacks.onPoolReady(encoder2, cm_.conn_pool_.host_);
+        return nullptr;
+      }));
+  expectPerTryTimerCreate();
+  router_.retry_state_->callback_();
+
+  EXPECT_TRUE(verifyHostUpstreamStats(0, 0));
+
+  // Now trigger global timeout, expect everything to be reset
+  EXPECT_CALL(encoder1.stream_, resetStream(_)).Times(1);
+  EXPECT_CALL(encoder2.stream_, resetStream(_)).Times(1);
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(504));
+
+  EXPECT_CALL(callbacks_, encodeHeaders_(_, _))
+      .WillOnce(Invoke([&](Http::HeaderMap& headers, bool) -> void {
+        EXPECT_EQ(headers.Status()->value(), "504");
+      }));
+  response_timeout_->callback_();
+  EXPECT_TRUE(verifyHostUpstreamStats(0, 2));
+  EXPECT_EQ(2, cm_.conn_pool_.host_->stats_store_.counter("rq_timeout").value());
+  // TODO: Verify hedge stats here once they are implemented.
+}
+
+// Sequence: 1) per try timeout w/ hedge retry, 2) second request gets a 5xx
+// response, no retries remaining 3) first request gets a 5xx response.
+TEST_F(RouterTest, HedgingRetriesExhaustedBadResponse) {
+  enableHedgeOnPerTryTimeout();
+
+  NiceMock<Http::MockStreamEncoder> encoder1;
+  Http::StreamDecoder* response_decoder1 = nullptr;
+  EXPECT_CALL(cm_.conn_pool_, newStream(_, _))
+      .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks)
+                           -> Http::ConnectionPool::Cancellable* {
+        response_decoder1 = &decoder;
+        EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
+        callbacks.onPoolReady(encoder1, cm_.conn_pool_.host_);
+        return nullptr;
+      }));
+  expectPerTryTimerCreate();
+  expectResponseTimerCreate();
+
+  Http::TestHeaderMapImpl headers{{"x-envoy-upstream-rq-per-try-timeout-ms", "5"}};
+  HttpTestUtility::addDefaultHeaders(headers);
+  router_.decodeHeaders(headers, true);
+
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(504));
+  EXPECT_CALL(encoder1.stream_, resetStream(_)).Times(0);
+  EXPECT_CALL(callbacks_, encodeHeaders_(_, _)).Times(0);
+  router_.retry_state_->expectHedgedPerTryTimeoutRetry();
+  per_try_timeout_->callback_();
+
+  NiceMock<Http::MockStreamEncoder> encoder2;
+  Http::StreamDecoder* response_decoder2 = nullptr;
+  EXPECT_CALL(cm_.conn_pool_, newStream(_, _))
+      .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks)
+                           -> Http::ConnectionPool::Cancellable* {
+        response_decoder2 = &decoder;
+        EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
+        callbacks.onPoolReady(encoder2, cm_.conn_pool_.host_);
+        return nullptr;
+      }));
+  expectPerTryTimerCreate();
+  router_.retry_state_->callback_();
+
+  EXPECT_TRUE(verifyHostUpstreamStats(0, 0));
+
+  // Now trigger a 503 in response to the second request.
+  Http::HeaderMapPtr bad_response_headers1(new Http::TestHeaderMapImpl{{":status", "503"}});
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(503));
+
+  EXPECT_CALL(*router_.retry_state_, shouldRetryHeaders(_, _))
+      .WillOnce(Return(RetryStatus::NoRetryLimitExceeded));
+  response_decoder2->decodeHeaders(std::move(bad_response_headers1), true);
+
+  EXPECT_TRUE(verifyHostUpstreamStats(0, 1));
+
+  // Now trigger a 502 in response to the first request.
+  Http::HeaderMapPtr bad_response_headers2(new Http::TestHeaderMapImpl{{":status", "502"}});
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(502));
+
+  // We should not call shouldRetryHeaders() because you never retry the same
+  // request twice.
+  EXPECT_CALL(*router_.retry_state_, shouldRetryHeaders(_, _)).Times(0);
+
+  EXPECT_CALL(callbacks_, encodeHeaders_(_, _))
+      .WillOnce(Invoke([&](Http::HeaderMap& headers, bool) -> void {
+        EXPECT_EQ(headers.Status()->value(), "502");
+      }));
+  response_decoder1->decodeHeaders(std::move(bad_response_headers2), true);
+
+  EXPECT_TRUE(verifyHostUpstreamStats(0, 2));
+}
+
+// Sequence: 1) per try timeout w/ hedge retry, 2) first request gets reset by upstream,
+// 3) 2nd request gets a 200 which should be sent downstream.
+TEST_F(RouterTest, HedgingRetriesProceedAfterReset) {
+  enableHedgeOnPerTryTimeout();
+
+  NiceMock<Http::MockStreamEncoder> encoder1;
+  Http::StreamDecoder* response_decoder1 = nullptr;
+  EXPECT_CALL(cm_.conn_pool_, newStream(_, _))
+      .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks)
+                           -> Http::ConnectionPool::Cancellable* {
+        response_decoder1 = &decoder;
+        EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
+        callbacks.onPoolReady(encoder1, cm_.conn_pool_.host_);
+        return nullptr;
+      }));
+  expectPerTryTimerCreate();
+  expectResponseTimerCreate();
+
+  Http::TestHeaderMapImpl headers{{"x-envoy-upstream-rq-per-try-timeout-ms", "5"}};
+  HttpTestUtility::addDefaultHeaders(headers);
+  router_.decodeHeaders(headers, true);
+
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(504));
+  EXPECT_CALL(encoder1.stream_, resetStream(_)).Times(0);
+  EXPECT_CALL(callbacks_, encodeHeaders_(_, _)).Times(0);
+  router_.retry_state_->expectHedgedPerTryTimeoutRetry();
+  per_try_timeout_->callback_();
+
+  NiceMock<Http::MockStreamEncoder> encoder2;
+  Http::StreamDecoder* response_decoder2 = nullptr;
+  EXPECT_CALL(cm_.conn_pool_, newStream(_, _))
+      .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks)
+                           -> Http::ConnectionPool::Cancellable* {
+        response_decoder2 = &decoder;
+        EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
+        callbacks.onPoolReady(encoder2, cm_.conn_pool_.host_);
+        return nullptr;
+      }));
+  expectPerTryTimerCreate();
+  router_.retry_state_->callback_();
+
+  EXPECT_TRUE(verifyHostUpstreamStats(0, 0));
+
+  // Now trigger an upstream reset in response to the first request.
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(503));
+  EXPECT_CALL(encoder1.stream_, resetStream(_));
+  encoder1.stream_.resetStream(Http::StreamResetReason::RemoteReset);
+
+  EXPECT_TRUE(verifyHostUpstreamStats(0, 1));
+
+  // We should not call shouldRetryReset() because you never retry the same
+  // request twice.
+  EXPECT_CALL(*router_.retry_state_, shouldRetryReset(_, _)).Times(0);
+
+  // Now trigger a 200 in response to the second request.
+  Http::HeaderMapPtr response_headers(new Http::TestHeaderMapImpl{{":status", "200"}});
+
+  EXPECT_CALL(*router_.retry_state_, shouldRetryHeaders(_, _)).WillOnce(Return(RetryStatus::No));
+  EXPECT_CALL(callbacks_, encodeHeaders_(_, _))
+      .WillOnce(Invoke([&](Http::HeaderMap& headers, bool) -> void {
+        EXPECT_EQ(headers.Status()->value(), "200");
+      }));
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(200));
+  response_decoder2->decodeHeaders(std::move(response_headers), true);
+
+  EXPECT_TRUE(verifyHostUpstreamStats(1, 1));
+}
+
+// Sequence: 1) request with data hits per try timeout w/ hedge retry, 2)
+// second request is immediately reset 3) 1st request gets a 200.
+// The goal of this test is to ensure that the router can properly detect that an immediate
+// reset happens and that we don't accidentally write data twice on the first request.
+TEST_F(RouterTest, HedgingRetryImmediatelyReset) {
+  enableHedgeOnPerTryTimeout();
+
+  NiceMock<Http::MockStreamEncoder> encoder;
+  Http::StreamDecoder* response_decoder = nullptr;
+  EXPECT_CALL(cm_.conn_pool_, newStream(_, _))
+      .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks)
+                           -> Http::ConnectionPool::Cancellable* {
+        response_decoder = &decoder;
+        EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
+        callbacks.onPoolReady(encoder, cm_.conn_pool_.host_);
+        return nullptr;
+      }));
+
+  Http::TestHeaderMapImpl headers{{"x-envoy-upstream-rq-per-try-timeout-ms", "5"}};
+  HttpTestUtility::addDefaultHeaders(headers);
+  router_.decodeHeaders(headers, false);
+
+  expectPerTryTimerCreate();
+  expectResponseTimerCreate();
+  Buffer::OwnedImpl body("test body");
+  EXPECT_CALL(encoder, encodeData(_, _)).Times(1);
+  Buffer::InstancePtr body_data(new Buffer::OwnedImpl("hello"));
+  router_.retry_state_->expectHedgedPerTryTimeoutRetry();
+  EXPECT_EQ(Http::FilterDataStatus::StopIterationNoBuffer, router_.decodeData(*body_data, true));
+
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(504));
+  EXPECT_CALL(encoder.stream_, resetStream(_)).Times(0);
+  EXPECT_CALL(callbacks_, encodeHeaders_(_, _)).Times(0);
+  per_try_timeout_->callback_();
+
+  NiceMock<Http::MockStreamEncoder> encoder2;
+  EXPECT_CALL(cm_.conn_pool_, newStream(_, _))
+      .WillOnce(Invoke([&](Http::StreamDecoder&, Http::ConnectionPool::Callbacks& callbacks)
+                           -> Http::ConnectionPool::Cancellable* {
+        EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
+        EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(503));
+        callbacks.onPoolFailure(Http::ConnectionPool::PoolFailureReason::ConnectionFailure,
+                                absl::string_view(), cm_.conn_pool_.host_);
+        return nullptr;
+      }));
+  EXPECT_CALL(*router_.retry_state_, shouldRetryReset(_, _))
+      .WillOnce(Return(RetryStatus::NoRetryLimitExceeded));
+  ON_CALL(callbacks_, decodingBuffer()).WillByDefault(Return(body_data.get()));
+  router_.retry_state_->callback_();
+
+  EXPECT_TRUE(verifyHostUpstreamStats(0, 1));
+
+  // Now trigger a 200 in response to the first request.
+  Http::HeaderMapPtr response_headers(new Http::TestHeaderMapImpl{{":status", "200"}});
+
+  // The request was already retried when the per try timeout occurred so it
+  // should't even consult the retry state.
+  EXPECT_CALL(*router_.retry_state_, shouldRetryHeaders(_, _)).Times(0);
+  EXPECT_CALL(callbacks_, encodeHeaders_(_, _))
+      .WillOnce(Invoke([&](Http::HeaderMap& headers, bool) -> void {
+        EXPECT_EQ(headers.Status()->value(), "200");
+      }));
+  EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(200));
+  response_decoder->decodeHeaders(std::move(response_headers), true);
+
+  EXPECT_TRUE(verifyHostUpstreamStats(1, 1));
+}
+
 TEST_F(RouterTest, RetryNoneHealthy) {
   NiceMock<Http::MockStreamEncoder> encoder1;
   Http::StreamDecoder* response_decoder = nullptr;
@@ -1424,6 +1966,7 @@ TEST_F(RouterTest, RetryUpstreamPerTryTimeout) {
       .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks)
                            -> Http::ConnectionPool::Cancellable* {
         response_decoder = &decoder;
+        EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
         callbacks.onPoolReady(encoder1, cm_.conn_pool_.host_);
         return nullptr;
       }));
@@ -1436,7 +1979,6 @@ TEST_F(RouterTest, RetryUpstreamPerTryTimeout) {
   HttpTestUtility::addDefaultHeaders(headers);
   router_.decodeHeaders(headers, true);
 
-  EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
   router_.retry_state_->expectResetRetry();
   EXPECT_CALL(cm_.conn_pool_.host_->outlier_detector_, putHttpResponseCode(504));
   per_try_timeout_->callback_();
@@ -1444,6 +1986,7 @@ TEST_F(RouterTest, RetryUpstreamPerTryTimeout) {
 
   // We expect this reset to kick off a new request.
   NiceMock<Http::MockStreamEncoder> encoder2;
+  EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
   EXPECT_CALL(cm_.conn_pool_, newStream(_, _))
       .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks)
                            -> Http::ConnectionPool::Cancellable* {
@@ -1454,7 +1997,6 @@ TEST_F(RouterTest, RetryUpstreamPerTryTimeout) {
   expectPerTryTimerCreate();
   router_.retry_state_->callback_();
 
-  EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
   // Normal response.
   EXPECT_CALL(*router_.retry_state_, shouldRetryHeaders(_, _)).WillOnce(Return(RetryStatus::No));
   Http::HeaderMapPtr response_headers(new Http::TestHeaderMapImpl{{":status", "200"}});
@@ -1493,13 +2035,12 @@ TEST_F(RouterTest, RetryUpstreamConnectionFailure) {
       .WillOnce(Invoke([&](Http::StreamDecoder& decoder, Http::ConnectionPool::Callbacks& callbacks)
                            -> Http::ConnectionPool::Cancellable* {
         response_decoder = &decoder;
+        EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
         callbacks.onPoolReady(encoder2, cm_.conn_pool_.host_);
         return nullptr;
       }));
   router_.retry_state_->callback_();
 
-  EXPECT_CALL(*router_.retry_state_, onHostAttempted(_));
-
   // Normal response.
   EXPECT_CALL(*router_.retry_state_, shouldRetryHeaders(_, _)).WillOnce(Return(RetryStatus::No));
   Http::HeaderMapPtr response_headers(new Http::TestHeaderMapImpl{{":status", "200"}});
@@ -2519,12 +3060,81 @@ TEST_F(RouterTest, UpstreamTimingTimeout) {
   EXPECT_EQ(stream_info.firstUpstreamRxByteReceived().value(), std::chrono::milliseconds(56));
 }
 
+TEST(RouterFilterUtilityTest, FinalHedgingParamsHedgeOnPerTryTimeout) {
+  Http::TestHeaderMapImpl empty_headers;
+  { // route says true, header not present, expect true.
+    NiceMock<MockRouteEntry> route;
+    route.hedge_policy_.hedge_on_per_try_timeout_ = true;
+    EXPECT_CALL(route, hedgePolicy).WillRepeatedly(ReturnRef(route.hedge_policy_));
+    FilterUtility::HedgingParams hedgingParams =
+        FilterUtility::finalHedgingParams(route, empty_headers);
+    EXPECT_TRUE(hedgingParams.hedge_on_per_try_timeout_);
+  }
+  { // route says false, header not present, expect false.
+    NiceMock<MockRouteEntry> route;
+    route.hedge_policy_.hedge_on_per_try_timeout_ = false;
+    EXPECT_CALL(route, hedgePolicy).WillRepeatedly(ReturnRef(route.hedge_policy_));
+    FilterUtility::HedgingParams hedgingParams =
+        FilterUtility::finalHedgingParams(route, empty_headers);
+    EXPECT_FALSE(hedgingParams.hedge_on_per_try_timeout_);
+  }
+  { // route says false, header says true, expect true.
+    Http::TestHeaderMapImpl headers{{"x-envoy-hedge-on-per-try-timeout", "true"}};
+    NiceMock<MockRouteEntry> route;
+    route.hedge_policy_.hedge_on_per_try_timeout_ = false;
+    EXPECT_CALL(route, hedgePolicy).WillRepeatedly(ReturnRef(route.hedge_policy_));
+    FilterUtility::HedgingParams hedgingParams = FilterUtility::finalHedgingParams(route, headers);
+    EXPECT_TRUE(hedgingParams.hedge_on_per_try_timeout_);
+  }
+  { // route says false, header says false, expect false.
+    Http::TestHeaderMapImpl headers{{"x-envoy-hedge-on-per-try-timeout", "false"}};
+    NiceMock<MockRouteEntry> route;
+    route.hedge_policy_.hedge_on_per_try_timeout_ = false;
+    EXPECT_CALL(route, hedgePolicy).WillRepeatedly(ReturnRef(route.hedge_policy_));
+    FilterUtility::HedgingParams hedgingParams = FilterUtility::finalHedgingParams(route, headers);
+    EXPECT_FALSE(hedgingParams.hedge_on_per_try_timeout_);
+  }
+  { // route says true, header says false, expect false.
+    Http::TestHeaderMapImpl headers{{"x-envoy-hedge-on-per-try-timeout", "false"}};
+    NiceMock<MockRouteEntry> route;
+    route.hedge_policy_.hedge_on_per_try_timeout_ = true;
+    EXPECT_CALL(route, hedgePolicy).WillRepeatedly(ReturnRef(route.hedge_policy_));
+    FilterUtility::HedgingParams hedgingParams = FilterUtility::finalHedgingParams(route, headers);
+    EXPECT_FALSE(hedgingParams.hedge_on_per_try_timeout_);
+  }
+  { // route says true, header says true, expect true.
+    Http::TestHeaderMapImpl headers{{"x-envoy-hedge-on-per-try-timeout", "true"}};
+    NiceMock<MockRouteEntry> route;
+    route.hedge_policy_.hedge_on_per_try_timeout_ = true;
+    EXPECT_CALL(route, hedgePolicy).WillRepeatedly(ReturnRef(route.hedge_policy_));
+    FilterUtility::HedgingParams hedgingParams = FilterUtility::finalHedgingParams(route, headers);
+    EXPECT_TRUE(hedgingParams.hedge_on_per_try_timeout_);
+  }
+  { // route says true, header is invalid, expect true.
+    Http::TestHeaderMapImpl headers{{"x-envoy-hedge-on-per-try-timeout", "bad"}};
+    NiceMock<MockRouteEntry> route;
+    route.hedge_policy_.hedge_on_per_try_timeout_ = true;
+    EXPECT_CALL(route, hedgePolicy).WillRepeatedly(ReturnRef(route.hedge_policy_));
+    FilterUtility::HedgingParams hedgingParams = FilterUtility::finalHedgingParams(route, headers);
+    EXPECT_TRUE(hedgingParams.hedge_on_per_try_timeout_);
+  }
+  { // route says false, header is invalid, expect false.
+    Http::TestHeaderMapImpl headers{{"x-envoy-hedge-on-per-try-timeout", "bad"}};
+    NiceMock<MockRouteEntry> route;
+    route.hedge_policy_.hedge_on_per_try_timeout_ = false;
+    EXPECT_CALL(route, hedgePolicy).WillRepeatedly(ReturnRef(route.hedge_policy_));
+    FilterUtility::HedgingParams hedgingParams = FilterUtility::finalHedgingParams(route, headers);
+    EXPECT_FALSE(hedgingParams.hedge_on_per_try_timeout_);
+  }
+}
+
 TEST(RouterFilterUtilityTest, FinalTimeout) {
   {
     NiceMock<MockRouteEntry> route;
     EXPECT_CALL(route, timeout()).WillOnce(Return(std::chrono::milliseconds(10)));
     Http::TestHeaderMapImpl headers;
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, false);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, false, false);
     EXPECT_EQ(std::chrono::milliseconds(10), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(0), timeout.per_try_timeout_);
   }
@@ -2532,7 +3142,8 @@ TEST(RouterFilterUtilityTest, FinalTimeout) {
     NiceMock<MockRouteEntry> route;
     EXPECT_CALL(route, timeout()).WillOnce(Return(std::chrono::milliseconds(10)));
     Http::TestHeaderMapImpl headers{{"x-envoy-upstream-rq-timeout-ms", "15"}};
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, false);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, false, false);
     EXPECT_EQ(std::chrono::milliseconds(15), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(0), timeout.per_try_timeout_);
     EXPECT_FALSE(headers.has("x-envoy-upstream-rq-timeout-ms"));
@@ -2543,7 +3154,8 @@ TEST(RouterFilterUtilityTest, FinalTimeout) {
     NiceMock<MockRouteEntry> route;
     EXPECT_CALL(route, timeout()).WillOnce(Return(std::chrono::milliseconds(10)));
     Http::TestHeaderMapImpl headers{{"x-envoy-upstream-rq-timeout-ms", "bad"}};
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, false);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, false, false);
     EXPECT_EQ(std::chrono::milliseconds(10), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(0), timeout.per_try_timeout_);
     EXPECT_FALSE(headers.has("x-envoy-upstream-rq-timeout-ms"));
@@ -2555,7 +3167,8 @@ TEST(RouterFilterUtilityTest, FinalTimeout) {
     EXPECT_CALL(route, timeout()).WillOnce(Return(std::chrono::milliseconds(10)));
     Http::TestHeaderMapImpl headers{{"x-envoy-upstream-rq-timeout-ms", "15"},
                                     {"x-envoy-upstream-rq-per-try-timeout-ms", "15"}};
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, false);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, false, false);
     EXPECT_EQ(std::chrono::milliseconds(15), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(0), timeout.per_try_timeout_);
     EXPECT_FALSE(headers.has("x-envoy-upstream-rq-timeout-ms"));
@@ -2568,7 +3181,8 @@ TEST(RouterFilterUtilityTest, FinalTimeout) {
     EXPECT_CALL(route, timeout()).WillOnce(Return(std::chrono::milliseconds(10)));
     Http::TestHeaderMapImpl headers{{"x-envoy-upstream-rq-timeout-ms", "15"},
                                     {"x-envoy-upstream-rq-per-try-timeout-ms", "5"}};
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, false);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, false, false);
     EXPECT_EQ(std::chrono::milliseconds(15), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(5), timeout.per_try_timeout_);
     EXPECT_FALSE(headers.has("x-envoy-upstream-rq-timeout-ms"));
@@ -2576,12 +3190,42 @@ TEST(RouterFilterUtilityTest, FinalTimeout) {
     EXPECT_EQ("5", headers.get_("x-envoy-expected-rq-timeout-ms"));
     EXPECT_FALSE(headers.has("grpc-timeout"));
   }
+  {
+    NiceMock<MockRouteEntry> route;
+    EXPECT_CALL(route, timeout()).WillOnce(Return(std::chrono::milliseconds(10)));
+    Http::TestHeaderMapImpl headers{{"x-envoy-upstream-rq-timeout-ms", "15"},
+                                    {"x-envoy-upstream-rq-per-try-timeout-ms", "5"}};
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, false, true);
+    EXPECT_EQ(std::chrono::milliseconds(15), timeout.global_timeout_);
+    EXPECT_EQ(std::chrono::milliseconds(5), timeout.per_try_timeout_);
+    EXPECT_FALSE(headers.has("x-envoy-upstream-rq-timeout-ms"));
+    EXPECT_FALSE(headers.has("x-envoy-upstream-rq-per-try-timeout-ms"));
+    EXPECT_EQ("15", headers.get_("x-envoy-expected-rq-timeout-ms"));
+    EXPECT_FALSE(headers.has("grpc-timeout"));
+  }
+  {
+    NiceMock<MockRouteEntry> route;
+    EXPECT_CALL(route, maxGrpcTimeout())
+        .WillRepeatedly(Return(absl::optional<std::chrono::milliseconds>(10)));
+    Http::TestHeaderMapImpl headers{{"x-envoy-upstream-rq-timeout-ms", "15"},
+                                    {"x-envoy-upstream-rq-per-try-timeout-ms", "5"}};
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, true, true);
+    EXPECT_EQ(std::chrono::milliseconds(15), timeout.global_timeout_);
+    EXPECT_EQ(std::chrono::milliseconds(5), timeout.per_try_timeout_);
+    EXPECT_FALSE(headers.has("x-envoy-upstream-rq-timeout-ms"));
+    EXPECT_FALSE(headers.has("x-envoy-upstream-rq-per-try-timeout-ms"));
+    EXPECT_EQ("15", headers.get_("x-envoy-expected-rq-timeout-ms"));
+    EXPECT_EQ("15m", headers.get_("grpc-timeout"));
+  }
   {
     NiceMock<MockRouteEntry> route;
     route.retry_policy_.per_try_timeout_ = std::chrono::milliseconds(7);
     EXPECT_CALL(route, timeout()).WillOnce(Return(std::chrono::milliseconds(10)));
     Http::TestHeaderMapImpl headers{{"x-envoy-upstream-rq-timeout-ms", "15"}};
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, false);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, false, false);
     EXPECT_EQ(std::chrono::milliseconds(15), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(7), timeout.per_try_timeout_);
     EXPECT_FALSE(headers.has("x-envoy-upstream-rq-timeout-ms"));
@@ -2595,7 +3239,8 @@ TEST(RouterFilterUtilityTest, FinalTimeout) {
     EXPECT_CALL(route, timeout()).WillOnce(Return(std::chrono::milliseconds(10)));
     Http::TestHeaderMapImpl headers{{"x-envoy-upstream-rq-timeout-ms", "15"},
                                     {"x-envoy-upstream-rq-per-try-timeout-ms", "5"}};
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, false);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, false, false);
     EXPECT_EQ(std::chrono::milliseconds(15), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(5), timeout.per_try_timeout_);
     EXPECT_FALSE(headers.has("x-envoy-upstream-rq-timeout-ms"));
@@ -2608,7 +3253,8 @@ TEST(RouterFilterUtilityTest, FinalTimeout) {
     EXPECT_CALL(route, maxGrpcTimeout())
         .WillRepeatedly(Return(absl::optional<std::chrono::milliseconds>(0)));
     Http::TestHeaderMapImpl headers{{"content-type", "application/grpc"}};
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, true);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, true, false);
     EXPECT_EQ(std::chrono::milliseconds(0), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(0), timeout.per_try_timeout_);
     EXPECT_FALSE(headers.has("grpc-timeout"));
@@ -2618,7 +3264,8 @@ TEST(RouterFilterUtilityTest, FinalTimeout) {
     EXPECT_CALL(route, maxGrpcTimeout()).WillRepeatedly(Return(absl::nullopt));
     EXPECT_CALL(route, timeout()).WillOnce(Return(std::chrono::milliseconds(10)));
     Http::TestHeaderMapImpl headers{{"content-type", "application/grpc"}};
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, true);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, true, false);
     EXPECT_EQ(std::chrono::milliseconds(10), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(0), timeout.per_try_timeout_);
     EXPECT_FALSE(headers.has("grpc-timeout"));
@@ -2629,7 +3276,8 @@ TEST(RouterFilterUtilityTest, FinalTimeout) {
         .WillRepeatedly(Return(absl::optional<std::chrono::milliseconds>(0)));
     Http::TestHeaderMapImpl headers{{"content-type", "application/grpc"},
                                     {"grpc-timeout", "1000m"}};
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, true);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, true, false);
     EXPECT_EQ(std::chrono::milliseconds(1000), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(0), timeout.per_try_timeout_);
     EXPECT_EQ("1000m", headers.get_("grpc-timeout"));
@@ -2640,7 +3288,8 @@ TEST(RouterFilterUtilityTest, FinalTimeout) {
         .WillRepeatedly(Return(absl::optional<std::chrono::milliseconds>(999)));
     Http::TestHeaderMapImpl headers{{"content-type", "application/grpc"},
                                     {"grpc-timeout", "1000m"}};
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, true);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, true, false);
     EXPECT_EQ(std::chrono::milliseconds(999), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(0), timeout.per_try_timeout_);
     EXPECT_EQ("999m", headers.get_("grpc-timeout"));
@@ -2650,7 +3299,8 @@ TEST(RouterFilterUtilityTest, FinalTimeout) {
     EXPECT_CALL(route, maxGrpcTimeout())
         .WillRepeatedly(Return(absl::optional<std::chrono::milliseconds>(999)));
     Http::TestHeaderMapImpl headers{{"content-type", "application/grpc"}, {"grpc-timeout", "0m"}};
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, true);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, true, false);
     EXPECT_EQ(std::chrono::milliseconds(999), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(0), timeout.per_try_timeout_);
     EXPECT_EQ("999m", headers.get_("grpc-timeout"));
@@ -2662,7 +3312,8 @@ TEST(RouterFilterUtilityTest, FinalTimeout) {
     EXPECT_CALL(route, grpcTimeoutOffset())
         .WillRepeatedly(Return(absl::optional<std::chrono::milliseconds>(10)));
     Http::TestHeaderMapImpl headers{{"content-type", "application/grpc"}, {"grpc-timeout", "100m"}};
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, true);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, true, false);
     EXPECT_EQ(std::chrono::milliseconds(90), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(0), timeout.per_try_timeout_);
   }
@@ -2673,7 +3324,8 @@ TEST(RouterFilterUtilityTest, FinalTimeout) {
     EXPECT_CALL(route, grpcTimeoutOffset())
         .WillRepeatedly(Return(absl::optional<std::chrono::milliseconds>(10)));
     Http::TestHeaderMapImpl headers{{"content-type", "application/grpc"}, {"grpc-timeout", "1m"}};
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, true);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, true, false);
     EXPECT_EQ(std::chrono::milliseconds(1), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(0), timeout.per_try_timeout_);
   }
@@ -2684,7 +3336,8 @@ TEST(RouterFilterUtilityTest, FinalTimeout) {
     Http::TestHeaderMapImpl headers{{"content-type", "application/grpc"},
                                     {"grpc-timeout", "1000m"},
                                     {"x-envoy-upstream-rq-timeout-ms", "15"}};
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, true);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, true, false);
     EXPECT_EQ(std::chrono::milliseconds(15), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(0), timeout.per_try_timeout_);
     EXPECT_FALSE(headers.has("x-envoy-upstream-rq-timeout-ms"));
@@ -2698,7 +3351,8 @@ TEST(RouterFilterUtilityTest, FinalTimeout) {
     Http::TestHeaderMapImpl headers{{"content-type", "application/grpc"},
                                     {"grpc-timeout", "1000m"},
                                     {"x-envoy-upstream-rq-timeout-ms", "bad"}};
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, true);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, true, false);
     EXPECT_EQ(std::chrono::milliseconds(1000), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(0), timeout.per_try_timeout_);
     EXPECT_FALSE(headers.has("x-envoy-upstream-rq-timeout-ms"));
@@ -2713,7 +3367,8 @@ TEST(RouterFilterUtilityTest, FinalTimeout) {
                                     {"grpc-timeout", "1000m"},
                                     {"x-envoy-upstream-rq-timeout-ms", "15"},
                                     {"x-envoy-upstream-rq-per-try-timeout-ms", "15"}};
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, true);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, true, false);
     EXPECT_EQ(std::chrono::milliseconds(15), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(0), timeout.per_try_timeout_);
     EXPECT_FALSE(headers.has("x-envoy-upstream-rq-timeout-ms"));
@@ -2729,7 +3384,8 @@ TEST(RouterFilterUtilityTest, FinalTimeout) {
                                     {"grpc-timeout", "1000m"},
                                     {"x-envoy-upstream-rq-timeout-ms", "15"},
                                     {"x-envoy-upstream-rq-per-try-timeout-ms", "5"}};
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, true);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, true, false);
     EXPECT_EQ(std::chrono::milliseconds(15), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(5), timeout.per_try_timeout_);
     EXPECT_FALSE(headers.has("x-envoy-upstream-rq-timeout-ms"));
@@ -2745,7 +3401,8 @@ TEST(RouterFilterUtilityTest, FinalTimeout) {
     Http::TestHeaderMapImpl headers{{"content-type", "application/grpc"},
                                     {"grpc-timeout", "1000m"},
                                     {"x-envoy-upstream-rq-timeout-ms", "15"}};
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, true);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, true, false);
     EXPECT_EQ(std::chrono::milliseconds(15), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(7), timeout.per_try_timeout_);
     EXPECT_FALSE(headers.has("x-envoy-upstream-rq-timeout-ms"));
@@ -2762,7 +3419,8 @@ TEST(RouterFilterUtilityTest, FinalTimeout) {
                                     {"grpc-timeout", "1000m"},
                                     {"x-envoy-upstream-rq-timeout-ms", "15"},
                                     {"x-envoy-upstream-rq-per-try-timeout-ms", "5"}};
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, true);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, true, false);
     EXPECT_EQ(std::chrono::milliseconds(15), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(5), timeout.per_try_timeout_);
     EXPECT_FALSE(headers.has("x-envoy-upstream-rq-timeout-ms"));
@@ -2777,7 +3435,8 @@ TEST(RouterFilterUtilityTest, FinalTimeoutSupressEnvoyHeaders) {
     NiceMock<MockRouteEntry> route;
     EXPECT_CALL(route, timeout()).WillOnce(Return(std::chrono::milliseconds(10)));
     Http::TestHeaderMapImpl headers{{"x-envoy-upstream-rq-timeout-ms", "15"}};
-    FilterUtility::TimeoutData timeout = FilterUtility::finalTimeout(route, headers, true, false);
+    FilterUtility::TimeoutData timeout =
+        FilterUtility::finalTimeout(route, headers, true, false, false);
     EXPECT_EQ(std::chrono::milliseconds(15), timeout.global_timeout_);
     EXPECT_EQ(std::chrono::milliseconds(0), timeout.per_try_timeout_);
     EXPECT_FALSE(headers.has("x-envoy-upstream-rq-timeout-ms"));
@@ -3064,7 +3723,10 @@ TEST_F(WatermarkTest, DownstreamWatermarks) {
 }
 
 TEST_F(WatermarkTest, UpstreamWatermarks) {
-  sendRequest();
+  sendRequest(false);
+
+  response_decoder_->decodeHeaders(
+      Http::HeaderMapPtr{new Http::TestHeaderMapImpl{{":status", "200"}}}, false);
 
   ASSERT(callbacks_.callbacks_.begin() != callbacks_.callbacks_.end());
   Envoy::Http::DownstreamWatermarkCallbacks* watermark_callbacks = *callbacks_.callbacks_.begin();
@@ -3083,7 +3745,9 @@ TEST_F(WatermarkTest, UpstreamWatermarks) {
                     .counter("upstream_flow_control_resumed_reading_total")
                     .value());
 
-  sendResponse();
+  Buffer::OwnedImpl data;
+  EXPECT_CALL(encoder_, getStream()).Times(2).WillRepeatedly(ReturnRef(stream_));
+  response_decoder_->decodeData(data, true);
 }
 
 TEST_F(WatermarkTest, FilterWatermarks) {
diff --git a/test/integration/http_timeout_integration_test.cc b/test/integration/http_timeout_integration_test.cc
index 11539865ffc6a..70ecd9036ea15 100644
--- a/test/integration/http_timeout_integration_test.cc
+++ b/test/integration/http_timeout_integration_test.cc
@@ -95,4 +95,185 @@ TEST_P(HttpTimeoutIntegrationTest, PerTryTimeout) {
   EXPECT_EQ("504", response->headers().Status()->value().getStringView());
 }
 
+// With hedge_on_per_try_timeout enabled via config, sends a request with a
+// global timeout and per try timeout specified, sleeps for longer than the per
+// try but slightly less than the global timeout. We then have the first
+// upstream request return headers and expect those to be returned downstream
+// (which proves the request was not canceled when the timeout was hit).
+TEST_P(HttpTimeoutIntegrationTest, HedgedPerTryTimeout) {
+  initialize();
+
+  codec_client_ = makeHttpConnection(makeClientConnection(lookupPort("http")));
+  auto encoder_decoder = codec_client_->startRequest(
+      Http::TestHeaderMapImpl{{":method", "POST"},
+                              {":path", "/test/long/url"},
+                              {":scheme", "http"},
+                              {":authority", "host"},
+                              {"x-forwarded-for", "10.0.0.1"},
+                              {"x-envoy-retry-on", "5xx"},
+                              {"x-envoy-hedge-on-per-try-timeout", "true"},
+                              {"x-envoy-upstream-rq-timeout-ms", "500"},
+                              {"x-envoy-upstream-rq-per-try-timeout-ms", "400"}});
+  auto response = std::move(encoder_decoder.second);
+  request_encoder_ = &encoder_decoder.first;
+
+  ASSERT_TRUE(fake_upstreams_[0]->waitForHttpConnection(*dispatcher_, fake_upstream_connection_));
+  ASSERT_TRUE(fake_upstream_connection_->waitForNewStream(*dispatcher_, upstream_request_));
+  ASSERT_TRUE(upstream_request_->waitForHeadersComplete());
+  codec_client_->sendData(*request_encoder_, 0, true);
+
+  ASSERT_TRUE(upstream_request_->waitForEndStream(*dispatcher_));
+
+  // Trigger per try timeout (but not global timeout).
+  timeSystem().sleep(std::chrono::milliseconds(400));
+
+  // Trigger retry (there's a 25ms backoff before it's issued).
+  timeSystem().sleep(std::chrono::milliseconds(26));
+
+  // Wait for a second request to be sent upstream
+  FakeStreamPtr upstream_request2;
+  ASSERT_TRUE(fake_upstream_connection_->waitForNewStream(*dispatcher_, upstream_request2));
+  ASSERT_TRUE(upstream_request2->waitForHeadersComplete());
+  ASSERT_TRUE(upstream_request2->waitForEndStream(*dispatcher_));
+
+  // Encode 200 response headers for the first (timed out) request.
+  Http::TestHeaderMapImpl response_headers{{":status", "200"}};
+  upstream_request_->encodeHeaders(response_headers, true);
+
+  response->waitForHeaders();
+
+  // The second request should be reset since we used the response from the first request.
+  ASSERT_TRUE(upstream_request2->waitForReset(std::chrono::seconds(15)));
+
+  codec_client_->close();
+
+  EXPECT_TRUE(upstream_request_->complete());
+  EXPECT_EQ(0U, upstream_request_->bodyLength());
+
+  EXPECT_TRUE(response->complete());
+  EXPECT_EQ("200", response->headers().Status()->value().getStringView());
+}
+
+TEST_P(HttpTimeoutIntegrationTest, HedgedPerTryTimeoutWithBodyNoBufferFirstRequestWins) {
+  testRouterRequestAndResponseWithHedgedPerTryTimeout(1024, 512, true);
+}
+
+TEST_P(HttpTimeoutIntegrationTest, HedgedPerTryTimeoutWithBodyNoBufferSecondRequestWins) {
+  testRouterRequestAndResponseWithHedgedPerTryTimeout(1024, 512, false);
+}
+
+TEST_P(HttpTimeoutIntegrationTest,
+       HedgedPerTryTimeoutLowUpstreamBufferLimitLargeRequestFirstRequestWins) {
+  config_helper_.setBufferLimits(1024, 1024 * 1024); // Set buffer limits upstream and downstream.
+  testRouterRequestAndResponseWithHedgedPerTryTimeout(1024 * 1024, 1024, true);
+}
+
+TEST_P(HttpTimeoutIntegrationTest,
+       HedgedPerTryTimeoutLowUpstreamBufferLimitLargeRequestSecondRequestWins) {
+  config_helper_.setBufferLimits(1024, 1024 * 1024); // Set buffer limits upstream and downstream.
+  testRouterRequestAndResponseWithHedgedPerTryTimeout(1024 * 1024, 1024, false);
+}
+
+TEST_P(HttpTimeoutIntegrationTest,
+       HedgedPerTryTimeoutLowDownstreamBufferLimitLargeResponseFirstRequestWins) {
+  config_helper_.setBufferLimits(1024 * 1024, 1024); // Set buffer limits upstream and downstream.
+  testRouterRequestAndResponseWithHedgedPerTryTimeout(1024, 1024 * 1024, true);
+}
+
+TEST_P(HttpTimeoutIntegrationTest,
+       HedgedPerTryTimeoutLowDownstreamBufferLimitLargeResponseSecondRequestWins) {
+  config_helper_.setBufferLimits(1024 * 1024, 1024); // Set buffer limits upstream and downstream.
+  testRouterRequestAndResponseWithHedgedPerTryTimeout(1024, 1024 * 1024, false);
+}
+
+// Sends a request with x-envoy-hedge-on-per-try-timeout, sleeps (with
+// simulated time) for longer than the per try timeout but shorter than the
+// global timeout, asserts that a retry is sent, and then responds with a 200
+// response on the original request and ensures the downstream sees it.
+// Request/response/header size are configurable to test flow control. If
+// first_request_wins is true, then the "winning" response will be sent in
+// response to the first (timed out) request. If false, the second request will
+// get the good response.
+void HttpTimeoutIntegrationTest::testRouterRequestAndResponseWithHedgedPerTryTimeout(
+    uint64_t request_size, uint64_t response_size, bool first_request_wins) {
+  initialize();
+
+  codec_client_ = makeHttpConnection(makeClientConnection(lookupPort("http")));
+  Http::TestHeaderMapImpl request_headers{{":method", "POST"},
+                                          {":path", "/test/long/url"},
+                                          {":scheme", "http"},
+                                          {":authority", "host"},
+                                          {"x-forwarded-for", "10.0.0.1"},
+                                          {"x-envoy-retry-on", "5xx"},
+                                          {"x-envoy-hedge-on-per-try-timeout", "true"},
+                                          {"x-envoy-upstream-rq-timeout-ms", "5000"},
+                                          {"x-envoy-upstream-rq-per-try-timeout-ms", "400"}};
+  auto encoder_decoder = codec_client_->startRequest(request_headers);
+
+  auto response = std::move(encoder_decoder.second);
+  request_encoder_ = &encoder_decoder.first;
+
+  ASSERT_TRUE(fake_upstreams_[0]->waitForHttpConnection(*dispatcher_, fake_upstream_connection_));
+  ASSERT_TRUE(fake_upstream_connection_->waitForNewStream(*dispatcher_, upstream_request_));
+  ASSERT_TRUE(upstream_request_->waitForHeadersComplete());
+
+  codec_client_->sendData(*request_encoder_, request_size, true);
+
+  ASSERT_TRUE(upstream_request_->waitForEndStream(*dispatcher_));
+
+  // Trigger per try timeout (but not global timeout).
+  timeSystem().sleep(std::chrono::milliseconds(400));
+
+  FakeStreamPtr upstream_request2;
+  // Trigger retry (there's a 25ms backoff before it's issued).
+  timeSystem().sleep(std::chrono::milliseconds(26));
+
+  // Wait for a second request to be sent upstream
+  ASSERT_TRUE(fake_upstream_connection_->waitForNewStream(*dispatcher_, upstream_request2));
+  ASSERT_TRUE(upstream_request2->waitForHeadersComplete());
+  ASSERT_TRUE(upstream_request2->waitForEndStream(*dispatcher_));
+
+  Http::TestHeaderMapImpl response_headers{{":status", "200"}};
+  if (first_request_wins) {
+    // Encode 200 response headers for the first (timed out) request.
+    upstream_request_->encodeHeaders(response_headers, response_size == 0);
+  } else {
+    // Encode 200 response headers for the second request.
+    upstream_request2->encodeHeaders(response_headers, response_size == 0);
+  }
+
+  response->waitForHeaders();
+
+  if (first_request_wins) {
+    // The second request should be reset since we used the response from the first request.
+    ASSERT_TRUE(upstream_request2->waitForReset(std::chrono::seconds(15)));
+  } else {
+    // The first request should be reset since we used the response from the second request.
+    ASSERT_TRUE(upstream_request_->waitForReset(std::chrono::seconds(15)));
+  }
+
+  if (response_size) {
+    if (first_request_wins) {
+      upstream_request_->encodeData(response_size, true);
+    } else {
+      upstream_request2->encodeData(response_size, true);
+    }
+  }
+
+  response->waitForEndStream();
+
+  codec_client_->close();
+
+  EXPECT_TRUE(upstream_request_->complete());
+  EXPECT_TRUE(upstream_request2->complete());
+  if (first_request_wins) {
+    EXPECT_EQ(request_size, upstream_request_->bodyLength());
+  } else {
+    EXPECT_EQ(request_size, upstream_request2->bodyLength());
+  }
+
+  EXPECT_TRUE(response->complete());
+  EXPECT_EQ("200", response->headers().Status()->value().getStringView());
+}
+
 } // namespace Envoy
diff --git a/test/integration/http_timeout_integration_test.h b/test/integration/http_timeout_integration_test.h
index fd378f4ce7f57..230a82d2577ae 100644
--- a/test/integration/http_timeout_integration_test.h
+++ b/test/integration/http_timeout_integration_test.h
@@ -17,6 +17,10 @@ class HttpTimeoutIntegrationTest : public testing::TestWithParam<Network::Addres
     setDownstreamProtocol(Http::CodecClient::Type::HTTP2);
     setUpstreamProtocol(FakeHttpConnection::Type::HTTP2);
   }
+
+  void testRouterRequestAndResponseWithHedgedPerTryTimeout(uint64_t request_size,
+                                                           uint64_t response_size,
+                                                           bool first_request_wins);
 };
 
 } // namespace Envoy
diff --git a/test/mocks/router/mocks.cc b/test/mocks/router/mocks.cc
index 9a2d2e2f96fda..f67ea18e5c758 100644
--- a/test/mocks/router/mocks.cc
+++ b/test/mocks/router/mocks.cc
@@ -25,6 +25,11 @@ void MockRetryState::expectHeadersRetry() {
       .WillOnce(DoAll(SaveArg<1>(&callback_), Return(RetryStatus::Yes)));
 }
 
+void MockRetryState::expectHedgedPerTryTimeoutRetry() {
+  EXPECT_CALL(*this, shouldHedgeRetryPerTryTimeout(_))
+      .WillOnce(DoAll(SaveArg<0>(&callback_), Return(RetryStatus::Yes)));
+}
+
 void MockRetryState::expectResetRetry() {
   EXPECT_CALL(*this, shouldRetryReset(_, _))
       .WillOnce(DoAll(SaveArg<1>(&callback_), Return(RetryStatus::Yes)));
diff --git a/test/mocks/router/mocks.h b/test/mocks/router/mocks.h
index 45979da33e5d1..b8ae9835840d1 100644
--- a/test/mocks/router/mocks.h
+++ b/test/mocks/router/mocks.h
@@ -78,11 +78,11 @@ class TestHedgePolicy : public HedgePolicy {
   const envoy::type::FractionalPercent& additionalRequestChance() const override {
     return additional_request_chance_;
   }
-  bool hedgeOnPerTryTimeout() const override { return hedge_on_per_try_timeout; }
+  bool hedgeOnPerTryTimeout() const override { return hedge_on_per_try_timeout_; }
 
   uint32_t initial_requests_{};
   envoy::type::FractionalPercent additional_request_chance_{};
-  bool hedge_on_per_try_timeout{};
+  bool hedge_on_per_try_timeout_{};
 };
 
 class TestRetryPolicy : public RetryPolicy {
@@ -115,13 +115,16 @@ class MockRetryState : public RetryState {
   ~MockRetryState();
 
   void expectHeadersRetry();
+  void expectHedgedPerTryTimeoutRetry();
   void expectResetRetry();
 
   MOCK_METHOD0(enabled, bool());
   MOCK_METHOD2(shouldRetryHeaders,
                RetryStatus(const Http::HeaderMap& response_headers, DoRetryCallback callback));
+  MOCK_METHOD1(wouldRetryFromHeaders, bool(const Http::HeaderMap& response_headers));
   MOCK_METHOD2(shouldRetryReset,
                RetryStatus(const Http::StreamResetReason reset_reason, DoRetryCallback callback));
+  MOCK_METHOD1(shouldHedgeRetryPerTryTimeout, RetryStatus(DoRetryCallback callback));
   MOCK_METHOD1(onHostAttempted, void(Upstream::HostDescriptionConstSharedPtr));
   MOCK_METHOD1(shouldSelectAnotherHost, bool(const Upstream::Host& host));
   MOCK_METHOD2(priorityLoadForRetry,
diff --git a/test/tools/router_check/test/config/ClusterHeader.golden.proto.json b/test/tools/router_check/test/config/ClusterHeader.golden.proto.json
index c5d9f402d58e7..4ab14950d8341 100644
--- a/test/tools/router_check/test/config/ClusterHeader.golden.proto.json
+++ b/test/tools/router_check/test/config/ClusterHeader.golden.proto.json
@@ -1,4 +1,4 @@
-{ 
+{
   "tests": [
     {
       "test_name": "Test1",
diff --git a/test/tools/router_check/test/config/HeaderMatchedRouting.golden.proto.json b/test/tools/router_check/test/config/HeaderMatchedRouting.golden.proto.json
index eacfd0ccb7cf3..dceffa439012d 100644
--- a/test/tools/router_check/test/config/HeaderMatchedRouting.golden.proto.json
+++ b/test/tools/router_check/test/config/HeaderMatchedRouting.golden.proto.json
@@ -136,5 +136,5 @@
       },
       "validate": {"cluster_name": "local_service_with_grpc_and_other_header"}
     }
-  ]  
+  ]
 }
diff --git a/test/tools/router_check/test/config/Redirect.golden.proto.json b/test/tools/router_check/test/config/Redirect.golden.proto.json
index 02a93977419b9..fb7c3e7128b86 100644
--- a/test/tools/router_check/test/config/Redirect.golden.proto.json
+++ b/test/tools/router_check/test/config/Redirect.golden.proto.json
@@ -84,5 +84,5 @@
       },
       "validate": {"path_redirect": "https://new.lyft.com/new_baz"}
     }
-  ]  
+  ]
 }
diff --git a/test/tools/router_check/test/config/Redirect2.golden.proto.json b/test/tools/router_check/test/config/Redirect2.golden.proto.json
index a1cbaa7b6e81d..773386289992e 100644
--- a/test/tools/router_check/test/config/Redirect2.golden.proto.json
+++ b/test/tools/router_check/test/config/Redirect2.golden.proto.json
@@ -66,5 +66,5 @@
         "cluster_name": ""
       }
     }
-  ]  
+  ]
 }
diff --git a/test/tools/router_check/test/config/Redirect3.golden.proto.json b/test/tools/router_check/test/config/Redirect3.golden.proto.json
index 5288fd36c157c..58aaac9f47e2b 100644
--- a/test/tools/router_check/test/config/Redirect3.golden.proto.json
+++ b/test/tools/router_check/test/config/Redirect3.golden.proto.json
@@ -26,5 +26,5 @@
         "path_redirect": "http://new.lyft.com/foo"
       }
     }
-  ]  
+  ]
 }
diff --git a/test/tools/router_check/test/config/TestRoutes.golden.proto.json b/test/tools/router_check/test/config/TestRoutes.golden.proto.json
index c7c0390447c72..788e8af713a50 100644
--- a/test/tools/router_check/test/config/TestRoutes.golden.proto.json
+++ b/test/tools/router_check/test/config/TestRoutes.golden.proto.json
@@ -325,5 +325,5 @@
         ]
       }
     }
-  ]  
+  ]
 }
diff --git a/test/tools/router_check/test/config/Weighted.golden.proto.json b/test/tools/router_check/test/config/Weighted.golden.proto.json
index 542a2b91a6291..63622d1b4f8de 100644
--- a/test/tools/router_check/test/config/Weighted.golden.proto.json
+++ b/test/tools/router_check/test/config/Weighted.golden.proto.json
@@ -41,5 +41,5 @@
       },
       "validate": {"cluster_name": "cluster3"}
     }
-  ]  
+  ]
 }