diff --git a/api/envoy/extensions/filters/http/cache/v3/cache.proto b/api/envoy/extensions/filters/http/cache/v3/cache.proto
index 70687b7150842..0613f499c86c8 100644
--- a/api/envoy/extensions/filters/http/cache/v3/cache.proto
+++ b/api/envoy/extensions/filters/http/cache/v3/cache.proto
@@ -20,7 +20,7 @@ option (udpa.annotations.file_status).package_version_status = ACTIVE;
 // [#protodoc-title: HTTP Cache Filter]
 
 // [#extension: envoy.filters.http.cache]
-// [#next-free-field: 7]
+// [#next-free-field: 8]
 message CacheConfig {
   option (udpa.annotations.versioning).previous_message_type =
       "envoy.config.filter.http.cache.v2alpha.CacheConfig";
@@ -93,4 +93,16 @@ message CacheConfig {
   // causes the cache to validate with its upstream even if the lookup is a hit. Setting this
   // to true will ignore these headers.
   bool ignore_request_cache_control_header = 6;
+
+  // If this is set, requests sent upstream to populate the cache will go to the
+  // specified cluster rather than the cluster selected by the vhost and route.
+  //
+  // If you have actions to be taken by the router filter - either
+  // ``upstream_http_filters`` or one of the ``RouteConfiguration`` actions such as
+  // ``response_headers_to_add`` - then the cache's side-channel going directly to the
+  // routed cluster will bypass these actions. You can set ``override_upstream_cluster``
+  // to an internal listener which duplicates the relevant ``RouteConfiguration``, to
+  // replicate the desired behavior on the side-channel upstream request issued by the
+  // cache.
+  string override_upstream_cluster = 7;
 }
diff --git a/changelogs/current.yaml b/changelogs/current.yaml
index 9ecf0d6e48ce5..92200502aafca 100644
--- a/changelogs/current.yaml
+++ b/changelogs/current.yaml
@@ -2,6 +2,23 @@ date: Pending
 
 behavior_changes:
 # *Changes that are expected to cause an incompatibility if applicable; deployment changes are likely required*
+- area: cache_filter
+  change: |
+    CacheFilter (WIP) has been completely reworked. Any existing cache implementations
+    will need to be modified to fit the new API. The new cache API is much simpler, as
+    individual cache implementations no longer need to comprehend various http headers,
+    only read and write at keys. Cache filter now handles "thundering herds" - if
+    multiple requests for the same resource arrive before the cache is populated,
+    now only one request goes upstream, and there is only one insert to the cache.
+    Range requests now convert to an upstream request for the entire resource, to
+    populate the cache.
+    Surprising behavior change may result if there are any active filters upstream
+    of a CacheFilter, including if RouteConfiguration does any actions
+    (e.g. adding headers) - it is recommended that for anything other than the most
+    simplistic configuration (for which the CacheFilter should be the furthest
+    upstream filter), a CacheFilter should be configured to make its requests
+    to an InternalListener which duplicates the RouteConfiguration and any filter
+    chain upstream of the CacheFilter. This is recognized as far from ideal.
 
 minor_behavior_changes:
 # *Changes that may cause incompatibilities for some users, but should not for most*
diff --git a/docs/root/_static/cache-filter-internal-listener.svg b/docs/root/_static/cache-filter-internal-listener.svg
new file mode 100644
index 0000000000000..be569c60adbaf
--- /dev/null
+++ b/docs/root/_static/cache-filter-internal-listener.svg
@@ -0,0 +1 @@
+<svg version="1.1" viewBox="100.0 100.0 760.0 400.0" fill="none" stroke="none" stroke-linecap="square" stroke-miterlimit="10" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg"><clipPath id="p.0"><path d="m0 0l960.0 0l0 540.0l-960.0 0l0 -540.0z" clip-rule="nonzero"/></clipPath><g clip-path="url(#p.0)"><path fill="#ffffff" d="m0 0l960.0 0l0 540.0l-960.0 0z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m194.4147 128.89012l0 0c0 -9.848549 7.9838257 -17.832382 17.832367 -17.832382l649.02814 0c4.729492 0 9.265198 1.8787689 12.609436 5.2229843c3.3441772 3.344223 5.2229614 7.879959 5.2229614 12.609398l0 71.32736c0 9.8485565 -7.9838257 17.832382 -17.832397 17.832382l-649.02814 0c-9.848541 0 -17.832367 -7.9838257 -17.832367 -17.832382z" fill-rule="evenodd"/><path stroke="#595959" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m194.4147 128.89012l0 0c0 -9.848549 7.9838257 -17.832382 17.832367 -17.832382l649.02814 0c4.729492 0 9.265198 1.8787689 12.609436 5.2229843c3.3441772 3.344223 5.2229614 7.879959 5.2229614 12.609398l0 71.32736c0 9.8485565 -7.9838257 17.832382 -17.832397 17.832382l-649.02814 0c-9.848541 0 -17.832367 -7.9838257 -17.832367 -17.832382z" fill-rule="evenodd"/><path fill="#fce5cd" d="m222.71654 129.77428l128.37794 0l0 68.25197l-128.37794 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m222.71654 129.77428l128.37794 0l0 68.25197l-128.37794 0z" fill-rule="evenodd"/><path fill="#595959" d="m251.53247 172.54027l0 -17.1875l11.59375 0l0 2.03125l-9.3125 0l0 5.328125l8.0625 0l0 2.015625l-8.0625 0l0 7.8125l-2.28125 0zm14.285156 -14.75l0 -2.4375l2.109375 0l0 2.4375l-2.109375 0zm0 14.75l0 -12.453125l2.109375 0l0 12.453125l-2.109375 0zm5.2695312 0l0 -17.1875l2.109375 0l0 17.1875l-2.109375 0zm9.988281 -1.890625l0.3125 1.859375q-0.890625 0.203125 -1.59375 0.203125q-1.15625 0 -1.796875 -0.359375q-0.625 -0.375 -0.890625 -0.96875q-0.25 -0.59375 -0.25 -2.484375l0 -7.171875l-1.546875 0l0 -1.640625l1.546875 0l0 -3.078125l2.09375 -1.265625l0 4.34375l2.125 0l0 1.640625l-2.125 0l0 7.28125q0 0.90625 0.109375 1.171875q0.125 0.25 0.375 0.40625q0.25 0.140625 0.71875 0.140625q0.34375 0 0.921875 -0.078125zm10.589844 -2.125l2.171875 0.28125q-0.515625 1.90625 -1.90625 2.96875q-1.390625 1.046875 -3.5625 1.046875q-2.734375 0 -4.34375 -1.671875q-1.59375 -1.6875 -1.59375 -4.734375q0 -3.140625 1.609375 -4.875q1.625 -1.734375 4.203125 -1.734375q2.5 0 4.078125 1.703125q1.59375 1.703125 1.59375 4.78125q0 0.1875 -0.015625 0.5625l-9.28125 0q0.109375 2.046875 1.15625 3.140625q1.046875 1.09375 2.609375 1.09375q1.15625 0 1.96875 -0.609375q0.828125 -0.609375 1.3125 -1.953125zm-6.9375 -3.40625l6.953125 0q-0.140625 -1.5625 -0.796875 -2.359375q-1.0 -1.21875 -2.609375 -1.21875q-1.453125 0 -2.453125 0.984375q-0.984375 0.96875 -1.09375 2.59375zm11.738281 7.421875l0 -12.453125l1.890625 0l0 1.890625q0.734375 -1.328125 1.34375 -1.75q0.625 -0.421875 1.359375 -0.421875q1.0625 0 2.171875 0.6875l-0.734375 1.953125q-0.765625 -0.453125 -1.546875 -0.453125q-0.6875 0 -1.25 0.421875q-0.546875 0.40625 -0.78125 1.140625q-0.34375 1.125 -0.34375 2.46875l0 6.515625l-2.109375 0zm11.7421875 0l6.59375 -17.1875l2.453125 0l7.03125 17.1875l-2.59375 0l-2.0 -5.203125l-7.1875 0l-1.890625 5.203125l-2.40625 0zm4.953125 -7.0625l5.828125 0l-1.796875 -4.75q-0.8125 -2.171875 -1.21875 -3.5625q-0.328125 1.65625 -0.921875 3.28125l-1.890625 5.03125z" fill-rule="nonzero"/><path fill="#fce5cd" d="m389.5092 129.77428l128.37796 0l0 68.25197l-128.37796 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m389.5092 129.77428l128.37796 0l0 68.25197l-128.37796 0z" fill-rule="evenodd"/><path fill="#595959" d="m433.12003 152.10901l2.28125 0.578125q-0.71875 2.796875 -2.578125 4.28125q-1.859375 1.46875 -4.546875 1.46875q-2.765625 0 -4.515625 -1.125q-1.734375 -1.140625 -2.640625 -3.28125q-0.90625 -2.15625 -0.90625 -4.609375q0 -2.6875 1.015625 -4.6875q1.03125 -2.0 2.921875 -3.03125q1.890625 -1.03125 4.171875 -1.03125q2.578125 0 4.328125 1.3125q1.765625 1.3125 2.453125 3.6875l-2.234375 0.53125q-0.609375 -1.875 -1.75 -2.734375q-1.125 -0.859375 -2.84375 -0.859375q-1.984375 0 -3.3125 0.953125q-1.328125 0.953125 -1.875 2.546875q-0.53125 1.59375 -0.53125 3.296875q0 2.1875 0.625 3.828125q0.640625 1.640625 1.984375 2.453125q1.359375 0.796875 2.921875 0.796875q1.921875 0 3.234375 -1.09375q1.328125 -1.109375 1.796875 -3.28125zm12.925781 4.5q-1.171875 0.984375 -2.265625 1.40625q-1.078125 0.40625 -2.3125 0.40625q-2.046875 0 -3.15625 -1.0q-1.09375 -1.0 -1.09375 -2.5625q0 -0.921875 0.40625 -1.671875q0.421875 -0.75 1.09375 -1.203125q0.671875 -0.46875 1.515625 -0.703125q0.625 -0.15625 1.875 -0.3125q2.5625 -0.3125 3.765625 -0.734375q0.015625 -0.421875 0.015625 -0.546875q0 -1.28125 -0.609375 -1.8125q-0.796875 -0.71875 -2.390625 -0.71875q-1.5 0 -2.203125 0.53125q-0.703125 0.515625 -1.046875 1.84375l-2.0625 -0.28125q0.28125 -1.328125 0.921875 -2.140625q0.640625 -0.8125 1.859375 -1.25q1.21875 -0.453125 2.828125 -0.453125q1.59375 0 2.59375 0.375q1.0 0.375 1.46875 0.953125q0.46875 0.5625 0.65625 1.4375q0.09375 0.53125 0.09375 1.9375l0 2.8125q0 2.9375 0.140625 3.71875q0.140625 0.78125 0.53125 1.5l-2.203125 0q-0.328125 -0.65625 -0.421875 -1.53125zm-0.171875 -4.71875q-1.15625 0.46875 -3.453125 0.796875q-1.296875 0.1875 -1.84375 0.421875q-0.53125 0.234375 -0.828125 0.6875q-0.28125 0.453125 -0.28125 1.0q0 0.84375 0.625 1.40625q0.640625 0.5625 1.875 0.5625q1.21875 0 2.171875 -0.53125q0.953125 -0.53125 1.390625 -1.453125q0.34375 -0.71875 0.34375 -2.109375l0 -0.78125zm13.519562 1.6875l2.078125 0.265625q-0.34375 2.15625 -1.75 3.375q-1.390625 1.203125 -3.4375 1.203125q-2.546875 0 -4.1094055 -1.671875q-1.546875 -1.671875 -1.546875 -4.78125q0 -2.015625 0.671875 -3.53125q0.671875 -1.515625 2.03125 -2.265625q1.3594055 -0.765625 2.9687805 -0.765625q2.03125 0 3.3125 1.03125q1.296875 1.03125 1.65625 2.90625l-2.046875 0.328125q-0.296875 -1.265625 -1.046875 -1.890625q-0.734375 -0.640625 -1.796875 -0.640625q-1.59375 0 -2.59375 1.15625q-0.9844055 1.140625 -0.9844055 3.609375q0 2.5 0.9531555 3.640625q0.96875 1.140625 2.515625 1.140625q1.234375 0 2.0625 -0.765625q0.84375 -0.765625 1.0625 -2.34375zm3.875 4.5625l0 -17.1875l2.109375 0l0 6.171875q1.484375 -1.71875 3.734375 -1.71875q1.375 0 2.390625 0.546875q1.03125 0.546875 1.46875 1.515625q0.4375 0.953125 0.4375 2.78125l0 7.890625l-2.109375 0l0 -7.890625q0 -1.578125 -0.6875 -2.296875q-0.6875 -0.71875 -1.9375 -0.71875q-0.9375 0 -1.765625 0.484375q-0.828125 0.484375 -1.1875 1.3125q-0.34375 0.828125 -0.34375 2.296875l0 6.8125l-2.109375 0zm21.878906 -4.015625l2.171875 0.28125q-0.515625 1.90625 -1.90625 2.96875q-1.390625 1.046875 -3.5625 1.046875q-2.734375 0 -4.34375 -1.671875q-1.59375 -1.6875 -1.59375 -4.734375q0 -3.140625 1.609375 -4.875q1.625 -1.734375 4.203125 -1.734375q2.5 0 4.078125 1.703125q1.59375 1.703125 1.59375 4.78125q0 0.1875 -0.015625 0.5625l-9.28125 0q0.109375 2.046875 1.15625 3.140625q1.046875 1.09375 2.609375 1.09375q1.15625 0 1.96875 -0.609375q0.828125 -0.609375 1.3125 -1.953125zm-6.9375 -3.40625l6.953125 0q-0.140625 -1.5625 -0.796875 -2.359375q-1.0 -1.21875 -2.609375 -1.21875q-1.453125 0 -2.453125 0.984375q-0.984375 0.96875 -1.09375 2.59375z" fill-rule="nonzero"/><path fill="#595959" d="m429.0009 186.94026l0 -17.1875l11.59375 0l0 2.03125l-9.3125 0l0 5.328125l8.0625 0l0 2.015625l-8.0625 0l0 7.8125l-2.28125 0zm14.285156 -14.75l0 -2.4375l2.109375 0l0 2.4375l-2.109375 0zm0 14.75l0 -12.453125l2.109375 0l0 12.453125l-2.109375 0zm5.2695312 0l0 -17.1875l2.109375 0l0 17.1875l-2.109375 0zm9.988312 -1.890625l0.3125 1.859375q-0.890625 0.203125 -1.59375 0.203125q-1.15625 0 -1.796875 -0.359375q-0.625 -0.375 -0.890625 -0.96875q-0.25 -0.59375 -0.25 -2.484375l0 -7.171875l-1.5469055 0l0 -1.640625l1.5469055 0l0 -3.078125l2.09375 -1.265625l0 4.34375l2.125 0l0 1.640625l-2.125 0l0 7.28125q0 0.90625 0.109375 1.171875q0.125 0.25 0.375 0.40625q0.25 0.140625 0.71875 0.140625q0.34375 0 0.921875 -0.078125zm10.589844 -2.125l2.171875 0.28125q-0.515625 1.90625 -1.90625 2.96875q-1.390625 1.046875 -3.5625 1.046875q-2.734375 0 -4.34375 -1.671875q-1.59375 -1.6875 -1.59375 -4.734375q0 -3.140625 1.609375 -4.875q1.625 -1.734375 4.203125 -1.734375q2.5 0 4.078125 1.703125q1.59375 1.703125 1.59375 4.78125q0 0.1875 -0.015625 0.5625l-9.28125 0q0.109375 2.046875 1.15625 3.140625q1.046875 1.09375 2.609375 1.09375q1.15625 0 1.96875 -0.609375q0.828125 -0.609375 1.3125 -1.953125zm-6.9375 -3.40625l6.953125 0q-0.140625 -1.5625 -0.796875 -2.359375q-1.0 -1.21875 -2.609375 -1.21875q-1.453125 0 -2.453125 0.984375q-0.984375 0.96875 -1.09375 2.59375zm11.738281 7.421875l0 -12.453125l1.890625 0l0 1.890625q0.734375 -1.328125 1.34375 -1.75q0.625 -0.421875 1.359375 -0.421875q1.0625 0 2.171875 0.6875l-0.734375 1.953125q-0.765625 -0.453125 -1.546875 -0.453125q-0.6875 0 -1.25 0.421875q-0.546875 0.40625 -0.78125 1.140625q-0.34375 1.125 -0.34375 2.46875l0 6.515625l-2.109375 0z" fill-rule="nonzero"/><path fill="#fce5cd" d="m556.3018 129.77428l128.37799 0l0 68.25197l-128.37799 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m556.3018 129.77428l128.37799 0l0 68.25197l-128.37799 0z" fill-rule="evenodd"/><path fill="#595959" d="m584.4556 172.54027l0 -17.1875l11.59375 0l0 2.03125l-9.3125 0l0 5.328125l8.0625 0l0 2.015625l-8.0625 0l0 7.8125l-2.28125 0zm14.285156 -14.75l0 -2.4375l2.109375 0l0 2.4375l-2.109375 0zm0 14.75l0 -12.453125l2.109375 0l0 12.453125l-2.109375 0zm5.2695312 0l0 -17.1875l2.109375 0l0 17.1875l-2.109375 0zm9.988281 -1.890625l0.3125 1.859375q-0.890625 0.203125 -1.59375 0.203125q-1.15625 0 -1.796875 -0.359375q-0.625 -0.375 -0.890625 -0.96875q-0.25 -0.59375 -0.25 -2.484375l0 -7.171875l-1.546875 0l0 -1.640625l1.546875 0l0 -3.078125l2.09375 -1.265625l0 4.34375l2.125 0l0 1.640625l-2.125 0l0 7.28125q0 0.90625 0.109375 1.171875q0.125 0.25 0.375 0.40625q0.25 0.140625 0.71875 0.140625q0.34375 0 0.921875 -0.078125zm10.589905 -2.125l2.171875 0.28125q-0.515625 1.90625 -1.90625 2.96875q-1.390625 1.046875 -3.5625 1.046875q-2.734436 0 -4.343811 -1.671875q-1.59375 -1.6875 -1.59375 -4.734375q0 -3.140625 1.609375 -4.875q1.625 -1.734375 4.203186 -1.734375q2.5 0 4.078125 1.703125q1.59375 1.703125 1.59375 4.78125q0 0.1875 -0.015625 0.5625l-9.281311 0q0.109375 2.046875 1.15625 3.140625q1.046875 1.09375 2.609436 1.09375q1.15625 0 1.96875 -0.609375q0.828125 -0.609375 1.3125 -1.953125zm-6.937561 -3.40625l6.953186 0q-0.140625 -1.5625 -0.796875 -2.359375q-1.0 -1.21875 -2.609375 -1.21875q-1.453186 0 -2.453186 0.984375q-0.984375 0.96875 -1.09375 2.59375zm11.738342 7.421875l0 -12.453125l1.890625 0l0 1.890625q0.734375 -1.328125 1.34375 -1.75q0.625 -0.421875 1.359375 -0.421875q1.0625 0 2.171875 0.6875l-0.734375 1.953125q-0.765625 -0.453125 -1.546875 -0.453125q-0.6875 0 -1.25 0.421875q-0.546875 0.40625 -0.78125 1.140625q-0.34375 1.125 -0.34375 2.46875l0 6.515625l-2.109375 0zm14.863281 0l0 -17.1875l6.4375 0q1.96875 0 3.15625 0.53125q1.1875 0.515625 1.859375 1.609375q0.6875 1.078125 0.6875 2.265625q0 1.09375 -0.609375 2.078125q-0.59375 0.96875 -1.796875 1.5625q1.5625 0.453125 2.390625 1.5625q0.84375 1.09375 0.84375 2.59375q0 1.203125 -0.515625 2.25q-0.5 1.03125 -1.25 1.59375q-0.75 0.5625 -1.890625 0.859375q-1.125 0.28125 -2.765625 0.28125l-6.546875 0zm2.265625 -9.96875l3.71875 0q1.515625 0 2.171875 -0.1875q0.859375 -0.265625 1.296875 -0.859375q0.4375 -0.59375 0.4375 -1.5q0 -0.859375 -0.40625 -1.5q-0.40625 -0.65625 -1.171875 -0.890625q-0.765625 -0.25 -2.609375 -0.25l-3.4375 0l0 5.1875zm0 7.9375l4.28125 0q1.09375 0 1.546875 -0.078125q0.78125 -0.140625 1.3125 -0.46875q0.53125 -0.328125 0.859375 -0.953125q0.34375 -0.625 0.34375 -1.453125q0 -0.953125 -0.5 -1.65625q-0.484375 -0.71875 -1.359375 -1.0q-0.875 -0.296875 -2.515625 -0.296875l-3.96875 0l0 5.90625z" fill-rule="nonzero"/><path fill="#fce5cd" d="m723.0945 129.77428l128.37793 0l0 68.25197l-128.37793 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m723.0945 129.77428l128.37793 0l0 68.25197l-128.37793 0z" fill-rule="evenodd"/><path fill="#595959" d="m753.1565 158.14026l0 -17.1875l7.609375 0q2.296875 0 3.484375 0.46875q1.203125 0.46875 1.921875 1.640625q0.71875 1.171875 0.71875 2.578125q0 1.828125 -1.1875 3.09375q-1.1875 1.25 -3.65625 1.59375q0.90625 0.421875 1.375 0.84375q1.0 0.921875 1.890625 2.296875l2.984375 4.671875l-2.859375 0l-2.28125 -3.578125q-0.984375 -1.546875 -1.640625 -2.359375q-0.640625 -0.828125 -1.15625 -1.15625q-0.5 -0.328125 -1.03125 -0.453125q-0.390625 -0.078125 -1.265625 -0.078125l-2.640625 0l0 7.625l-2.265625 0zm2.265625 -9.59375l4.890625 0q1.5625 0 2.4375 -0.328125q0.875 -0.328125 1.328125 -1.03125q0.46875 -0.703125 0.46875 -1.546875q0 -1.21875 -0.890625 -2.0q-0.875 -0.78125 -2.796875 -0.78125l-5.4375 0l0 5.6875zm13.972656 3.375q0 -3.46875 1.921875 -5.125q1.609375 -1.390625 3.921875 -1.390625q2.5625 0 4.1875 1.6875q1.625 1.6875 1.625 4.640625q0 2.40625 -0.71875 3.78125q-0.71875 1.375 -2.09375 2.140625q-1.375 0.765625 -3.0 0.765625q-2.625 0 -4.234375 -1.671875q-1.609375 -1.6875 -1.609375 -4.828125zm2.171875 0q0 2.390625 1.03125 3.578125q1.046875 1.1875 2.640625 1.1875q1.5625 0 2.609375 -1.1875q1.046875 -1.203125 1.046875 -3.65625q0 -2.3125 -1.0625 -3.5q-1.046875 -1.1875 -2.59375 -1.1875q-1.59375 0 -2.640625 1.1875q-1.03125 1.1875 -1.03125 3.578125zm20.113281 6.21875l0 -1.828125q-1.453125 2.109375 -3.9375 2.109375q-1.109375 0 -2.0625 -0.421875q-0.953125 -0.421875 -1.421875 -1.0625q-0.453125 -0.640625 -0.640625 -1.5625q-0.140625 -0.625 -0.140625 -1.96875l0 -7.71875l2.109375 0l0 6.90625q0 1.65625 0.140625 2.234375q0.1875 0.828125 0.828125 1.3125q0.65625 0.46875 1.609375 0.46875q0.9375 0 1.765625 -0.484375q0.84375 -0.5 1.1875 -1.328125q0.34375 -0.84375 0.34375 -2.4375l0 -6.671875l2.109375 0l0 12.453125l-1.890625 0zm9.800781 -1.890625l0.3125 1.859375q-0.890625 0.203125 -1.59375 0.203125q-1.15625 0 -1.796875 -0.359375q-0.625 -0.375 -0.890625 -0.96875q-0.25 -0.59375 -0.25 -2.484375l0 -7.171875l-1.546875 0l0 -1.640625l1.546875 0l0 -3.078125l2.09375 -1.265625l0 4.34375l2.125 0l0 1.640625l-2.125 0l0 7.28125q0 0.90625 0.109375 1.171875q0.125 0.25 0.375 0.40625q0.25 0.140625 0.71875 0.140625q0.34375 0 0.921875 -0.078125zm10.589844 -2.125l2.171875 0.28125q-0.515625 1.90625 -1.90625 2.96875q-1.390625 1.046875 -3.5625 1.046875q-2.734375 0 -4.34375 -1.671875q-1.59375 -1.6875 -1.59375 -4.734375q0 -3.140625 1.609375 -4.875q1.625 -1.734375 4.203125 -1.734375q2.5 0 4.078125 1.703125q1.59375 1.703125 1.59375 4.78125q0 0.1875 -0.015625 0.5625l-9.28125 0q0.109375 2.046875 1.15625 3.140625q1.046875 1.09375 2.609375 1.09375q1.15625 0 1.96875 -0.609375q0.828125 -0.609375 1.3125 -1.953125zm-6.9375 -3.40625l6.953125 0q-0.140625 -1.5625 -0.796875 -2.359375q-1.0 -1.21875 -2.609375 -1.21875q-1.453125 0 -2.453125 0.984375q-0.984375 0.96875 -1.09375 2.59375zm11.738281 7.421875l0 -12.453125l1.890625 0l0 1.890625q0.734375 -1.328125 1.34375 -1.75q0.625 -0.421875 1.359375 -0.421875q1.0625 0 2.171875 0.6875l-0.734375 1.953125q-0.765625 -0.453125 -1.546875 -0.453125q-0.6875 0 -1.25 0.421875q-0.546875 0.40625 -0.78125 1.140625q-0.34375 1.125 -0.34375 2.46875l0 6.515625l-2.109375 0z" fill-rule="nonzero"/><path fill="#595959" d="m762.5862 186.94026l0 -17.1875l11.59375 0l0 2.03125l-9.3125 0l0 5.328125l8.0625 0l0 2.015625l-8.0625 0l0 7.8125l-2.28125 0zm14.285156 -14.75l0 -2.4375l2.109375 0l0 2.4375l-2.109375 0zm0 14.75l0 -12.453125l2.109375 0l0 12.453125l-2.109375 0zm5.2695312 0l0 -17.1875l2.109375 0l0 17.1875l-2.109375 0zm9.988281 -1.890625l0.3125 1.859375q-0.890625 0.203125 -1.59375 0.203125q-1.15625 0 -1.796875 -0.359375q-0.625 -0.375 -0.890625 -0.96875q-0.25 -0.59375 -0.25 -2.484375l0 -7.171875l-1.546875 0l0 -1.640625l1.546875 0l0 -3.078125l2.09375 -1.265625l0 4.34375l2.125 0l0 1.640625l-2.125 0l0 7.28125q0 0.90625 0.109375 1.171875q0.125 0.25 0.375 0.40625q0.25 0.140625 0.71875 0.140625q0.34375 0 0.921875 -0.078125zm10.589844 -2.125l2.171875 0.28125q-0.515625 1.90625 -1.90625 2.96875q-1.390625 1.046875 -3.5625 1.046875q-2.734375 0 -4.34375 -1.671875q-1.59375 -1.6875 -1.59375 -4.734375q0 -3.140625 1.609375 -4.875q1.625 -1.734375 4.203125 -1.734375q2.5 0 4.078125 1.703125q1.59375 1.703125 1.59375 4.78125q0 0.1875 -0.015625 0.5625l-9.28125 0q0.109375 2.046875 1.15625 3.140625q1.046875 1.09375 2.609375 1.09375q1.15625 0 1.96875 -0.609375q0.828125 -0.609375 1.3125 -1.953125zm-6.9375 -3.40625l6.953125 0q-0.140625 -1.5625 -0.796875 -2.359375q-1.0 -1.21875 -2.609375 -1.21875q-1.453125 0 -2.453125 0.984375q-0.984375 0.96875 -1.09375 2.59375zm11.738281 7.421875l0 -12.453125l1.890625 0l0 1.890625q0.734375 -1.328125 1.34375 -1.75q0.625 -0.421875 1.359375 -0.421875q1.0625 0 2.171875 0.6875l-0.734375 1.953125q-0.765625 -0.453125 -1.546875 -0.453125q-0.6875 0 -1.25 0.421875q-0.546875 0.40625 -0.78125 1.140625q-0.34375 1.125 -0.34375 2.46875l0 6.515625l-2.109375 0z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m351.09448 163.90027l38.4252 0" fill-rule="evenodd"/><path stroke="#595959" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m354.52158 163.90027l31.571014 0" fill-rule="evenodd"/><path fill="#595959" stroke="#595959" stroke-width="1.0" stroke-linecap="butt" d="m354.52158 163.90027l1.1245728 -1.124588l-3.0897522 1.124588l3.0897522 1.1245728z" fill-rule="evenodd"/><path fill="#595959" stroke="#595959" stroke-width="1.0" stroke-linecap="butt" d="m386.0926 163.90027l-1.1245728 1.1245728l3.0897522 -1.1245728l-3.0897522 -1.124588z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m517.8819 163.90027l38.42517 0" fill-rule="evenodd"/><path stroke="#595959" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m521.30896 163.90027l31.571045 0" fill-rule="evenodd"/><path fill="#595959" stroke="#595959" stroke-width="1.0" stroke-linecap="butt" d="m521.30896 163.90027l1.1245728 -1.124588l-3.0897217 1.124588l3.0897217 1.1245728z" fill-rule="evenodd"/><path fill="#595959" stroke="#595959" stroke-width="1.0" stroke-linecap="butt" d="m552.88 163.90027l-1.1245728 1.1245728l3.0897827 -1.1245728l-3.0897827 -1.124588z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m684.6693 163.90027l38.42517 0" fill-rule="evenodd"/><path stroke="#595959" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m688.0964 163.90027l31.571045 0" fill-rule="evenodd"/><path fill="#595959" stroke="#595959" stroke-width="1.0" stroke-linecap="butt" d="m688.0964 163.90027l1.1245728 -1.124588l-3.0897827 1.124588l3.0897827 1.1245728z" fill-rule="evenodd"/><path fill="#595959" stroke="#595959" stroke-width="1.0" stroke-linecap="butt" d="m719.6674 163.90027l-1.1245728 1.1245728l3.0897217 -1.1245728l-3.0897217 -1.124588z" fill-rule="evenodd"/><path fill="#fce5cd" d="m532.3018 282.71652l128.37799 0l0 68.25198l-128.37799 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m532.3018 282.71652l128.37799 0l0 68.25198l-128.37799 0z" fill-rule="evenodd"/><path fill="#595959" d="m560.4556 325.4825l0 -17.1875l11.59375 0l0 2.03125l-9.3125 0l0 5.328125l8.0625 0l0 2.015625l-8.0625 0l0 7.8125l-2.28125 0zm14.285156 -14.75l0 -2.4375l2.109375 0l0 2.4375l-2.109375 0zm0 14.75l0 -12.453125l2.109375 0l0 12.453125l-2.109375 0zm5.2695312 0l0 -17.1875l2.109375 0l0 17.1875l-2.109375 0zm9.988281 -1.890625l0.3125 1.859375q-0.890625 0.203125 -1.59375 0.203125q-1.15625 0 -1.796875 -0.359375q-0.625 -0.375 -0.890625 -0.96875q-0.25 -0.59375 -0.25 -2.484375l0 -7.171875l-1.546875 0l0 -1.640625l1.546875 0l0 -3.078125l2.09375 -1.265625l0 4.34375l2.125 0l0 1.640625l-2.125 0l0 7.28125q0 0.90625 0.109375 1.171875q0.125 0.25 0.375 0.40625q0.25 0.140625 0.71875 0.140625q0.34375 0 0.921875 -0.078125zm10.589905 -2.125l2.171875 0.28125q-0.515625 1.90625 -1.90625 2.96875q-1.390625 1.046875 -3.5625 1.046875q-2.734436 0 -4.343811 -1.671875q-1.59375 -1.6875 -1.59375 -4.734375q0 -3.140625 1.609375 -4.875q1.625 -1.734375 4.203186 -1.734375q2.5 0 4.078125 1.703125q1.59375 1.703125 1.59375 4.78125q0 0.1875 -0.015625 0.5625l-9.281311 0q0.109375 2.046875 1.15625 3.140625q1.046875 1.09375 2.609436 1.09375q1.15625 0 1.96875 -0.609375q0.828125 -0.609375 1.3125 -1.953125zm-6.937561 -3.40625l6.953186 0q-0.140625 -1.5625 -0.796875 -2.359375q-1.0 -1.21875 -2.609375 -1.21875q-1.453186 0 -2.453186 0.984375q-0.984375 0.96875 -1.09375 2.59375zm11.738342 7.421875l0 -12.453125l1.890625 0l0 1.890625q0.734375 -1.328125 1.34375 -1.75q0.625 -0.421875 1.359375 -0.421875q1.0625 0 2.171875 0.6875l-0.734375 1.953125q-0.765625 -0.453125 -1.546875 -0.453125q-0.6875 0 -1.25 0.421875q-0.546875 0.40625 -0.78125 1.140625q-0.34375 1.125 -0.34375 2.46875l0 6.515625l-2.109375 0zm14.863281 0l0 -17.1875l6.4375 0q1.96875 0 3.15625 0.53125q1.1875 0.515625 1.859375 1.609375q0.6875 1.078125 0.6875 2.265625q0 1.09375 -0.609375 2.078125q-0.59375 0.96875 -1.796875 1.5625q1.5625 0.453125 2.390625 1.5625q0.84375 1.09375 0.84375 2.59375q0 1.203125 -0.515625 2.25q-0.5 1.03125 -1.25 1.59375q-0.75 0.5625 -1.890625 0.859375q-1.125 0.28125 -2.765625 0.28125l-6.546875 0zm2.265625 -9.96875l3.71875 0q1.515625 0 2.171875 -0.1875q0.859375 -0.265625 1.296875 -0.859375q0.4375 -0.59375 0.4375 -1.5q0 -0.859375 -0.40625 -1.5q-0.40625 -0.65625 -1.171875 -0.890625q-0.765625 -0.25 -2.609375 -0.25l-3.4375 0l0 5.1875zm0 7.9375l4.28125 0q1.09375 0 1.546875 -0.078125q0.78125 -0.140625 1.3125 -0.46875q0.53125 -0.328125 0.859375 -0.953125q0.34375 -0.625 0.34375 -1.453125q0 -0.953125 -0.5 -1.65625q-0.484375 -0.71875 -1.359375 -1.0q-0.875 -0.296875 -2.515625 -0.296875l-3.96875 0l0 5.90625z" fill-rule="nonzero"/><path fill="#fce5cd" d="m699.0945 282.71652l128.37793 0l0 68.25198l-128.37793 0z" fill-rule="evenodd"/><path stroke="#000000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m699.0945 282.71652l128.37793 0l0 68.25198l-128.37793 0z" fill-rule="evenodd"/><path fill="#595959" d="m729.1565 311.08252l0 -17.1875l7.609375 0q2.296875 0 3.484375 0.46875q1.203125 0.46875 1.921875 1.640625q0.71875 1.171875 0.71875 2.578125q0 1.828125 -1.1875 3.09375q-1.1875 1.25 -3.65625 1.59375q0.90625 0.421875 1.375 0.84375q1.0 0.921875 1.890625 2.296875l2.984375 4.671875l-2.859375 0l-2.28125 -3.578125q-0.984375 -1.546875 -1.640625 -2.359375q-0.640625 -0.828125 -1.15625 -1.15625q-0.5 -0.328125 -1.03125 -0.453125q-0.390625 -0.078125 -1.265625 -0.078125l-2.640625 0l0 7.625l-2.265625 0zm2.265625 -9.59375l4.890625 0q1.5625 0 2.4375 -0.328125q0.875 -0.328125 1.328125 -1.03125q0.46875 -0.703125 0.46875 -1.546875q0 -1.21875 -0.890625 -2.0q-0.875 -0.78125 -2.796875 -0.78125l-5.4375 0l0 5.6875zm13.972656 3.375q0 -3.46875 1.921875 -5.125q1.609375 -1.390625 3.921875 -1.390625q2.5625 0 4.1875 1.6875q1.625 1.6875 1.625 4.640625q0 2.40625 -0.71875 3.78125q-0.71875 1.375 -2.09375 2.140625q-1.375 0.765625 -3.0 0.765625q-2.625 0 -4.234375 -1.671875q-1.609375 -1.6875 -1.609375 -4.828125zm2.171875 0q0 2.390625 1.03125 3.578125q1.046875 1.1875 2.640625 1.1875q1.5625 0 2.609375 -1.1875q1.046875 -1.203125 1.046875 -3.65625q0 -2.3125 -1.0625 -3.5q-1.046875 -1.1875 -2.59375 -1.1875q-1.59375 0 -2.640625 1.1875q-1.03125 1.1875 -1.03125 3.578125zm20.113281 6.21875l0 -1.828125q-1.453125 2.109375 -3.9375 2.109375q-1.109375 0 -2.0625 -0.421875q-0.953125 -0.421875 -1.421875 -1.0625q-0.453125 -0.640625 -0.640625 -1.5625q-0.140625 -0.625 -0.140625 -1.96875l0 -7.71875l2.109375 0l0 6.90625q0 1.65625 0.140625 2.234375q0.1875 0.828125 0.828125 1.3125q0.65625 0.46875 1.609375 0.46875q0.9375 0 1.765625 -0.484375q0.84375 -0.5 1.1875 -1.328125q0.34375 -0.84375 0.34375 -2.4375l0 -6.671875l2.109375 0l0 12.453125l-1.890625 0zm9.800781 -1.890625l0.3125 1.859375q-0.890625 0.203125 -1.59375 0.203125q-1.15625 0 -1.796875 -0.359375q-0.625 -0.375 -0.890625 -0.96875q-0.25 -0.59375 -0.25 -2.484375l0 -7.171875l-1.546875 0l0 -1.640625l1.546875 0l0 -3.078125l2.09375 -1.265625l0 4.34375l2.125 0l0 1.640625l-2.125 0l0 7.28125q0 0.90625 0.109375 1.171875q0.125 0.25 0.375 0.40625q0.25 0.140625 0.71875 0.140625q0.34375 0 0.921875 -0.078125zm10.589844 -2.125l2.171875 0.28125q-0.515625 1.90625 -1.90625 2.96875q-1.390625 1.046875 -3.5625 1.046875q-2.734375 0 -4.34375 -1.671875q-1.59375 -1.6875 -1.59375 -4.734375q0 -3.140625 1.609375 -4.875q1.625 -1.734375 4.203125 -1.734375q2.5 0 4.078125 1.703125q1.59375 1.703125 1.59375 4.78125q0 0.1875 -0.015625 0.5625l-9.28125 0q0.109375 2.046875 1.15625 3.140625q1.046875 1.09375 2.609375 1.09375q1.15625 0 1.96875 -0.609375q0.828125 -0.609375 1.3125 -1.953125zm-6.9375 -3.40625l6.953125 0q-0.140625 -1.5625 -0.796875 -2.359375q-1.0 -1.21875 -2.609375 -1.21875q-1.453125 0 -2.453125 0.984375q-0.984375 0.96875 -1.09375 2.59375zm11.738281 7.421875l0 -12.453125l1.890625 0l0 1.890625q0.734375 -1.328125 1.34375 -1.75q0.625 -0.421875 1.359375 -0.421875q1.0625 0 2.171875 0.6875l-0.734375 1.953125q-0.765625 -0.453125 -1.546875 -0.453125q-0.6875 0 -1.25 0.421875q-0.546875 0.40625 -0.78125 1.140625q-0.34375 1.125 -0.34375 2.46875l0 6.515625l-2.109375 0z" fill-rule="nonzero"/><path fill="#595959" d="m738.5862 339.8825l0 -17.1875l11.59375 0l0 2.03125l-9.3125 0l0 5.328125l8.0625 0l0 2.015625l-8.0625 0l0 7.8125l-2.28125 0zm14.285156 -14.75l0 -2.4375l2.109375 0l0 2.4375l-2.109375 0zm0 14.75l0 -12.453125l2.109375 0l0 12.453125l-2.109375 0zm5.2695312 0l0 -17.1875l2.109375 0l0 17.1875l-2.109375 0zm9.988281 -1.890625l0.3125 1.859375q-0.890625 0.203125 -1.59375 0.203125q-1.15625 0 -1.796875 -0.359375q-0.625 -0.375 -0.890625 -0.96875q-0.25 -0.59375 -0.25 -2.484375l0 -7.171875l-1.546875 0l0 -1.640625l1.546875 0l0 -3.078125l2.09375 -1.265625l0 4.34375l2.125 0l0 1.640625l-2.125 0l0 7.28125q0 0.90625 0.109375 1.171875q0.125 0.25 0.375 0.40625q0.25 0.140625 0.71875 0.140625q0.34375 0 0.921875 -0.078125zm10.589844 -2.125l2.171875 0.28125q-0.515625 1.90625 -1.90625 2.96875q-1.390625 1.046875 -3.5625 1.046875q-2.734375 0 -4.34375 -1.671875q-1.59375 -1.6875 -1.59375 -4.734375q0 -3.140625 1.609375 -4.875q1.625 -1.734375 4.203125 -1.734375q2.5 0 4.078125 1.703125q1.59375 1.703125 1.59375 4.78125q0 0.1875 -0.015625 0.5625l-9.28125 0q0.109375 2.046875 1.15625 3.140625q1.046875 1.09375 2.609375 1.09375q1.15625 0 1.96875 -0.609375q0.828125 -0.609375 1.3125 -1.953125zm-6.9375 -3.40625l6.953125 0q-0.140625 -1.5625 -0.796875 -2.359375q-1.0 -1.21875 -2.609375 -1.21875q-1.453125 0 -2.453125 0.984375q-0.984375 0.96875 -1.09375 2.59375zm11.738281 7.421875l0 -12.453125l1.890625 0l0 1.890625q0.734375 -1.328125 1.34375 -1.75q0.625 -0.421875 1.359375 -0.421875q1.0625 0 2.171875 0.6875l-0.734375 1.953125q-0.765625 -0.453125 -1.546875 -0.453125q-0.6875 0 -1.25 0.421875q-0.546875 0.40625 -0.78125 1.140625q-0.34375 1.125 -0.34375 2.46875l0 6.515625l-2.109375 0z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m493.8819 285.91113l0 0c0 -9.848572 7.9838257 -17.832397 17.832367 -17.832397l342.57144 0c4.729492 0 9.265198 1.8787537 12.609436 5.222992c3.3441772 3.3442078 5.2229614 7.879944 5.2229614 12.6094055l0 71.32736c0 9.848541 -7.9838257 17.832367 -17.832397 17.832367l-342.57144 0c-9.848541 0 -17.832367 -7.9838257 -17.832367 -17.832367z" fill-rule="evenodd"/><path stroke="#595959" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m493.8819 285.91113l0 0c0 -9.848572 7.9838257 -17.832397 17.832367 -17.832397l342.57144 0c4.729492 0 9.265198 1.8787537 12.609436 5.222992c3.3441772 3.3442078 5.2229614 7.879944 5.2229614 12.6094055l0 71.32736c0 9.848541 -7.9838257 17.832367 -17.832397 17.832367l-342.57144 0c-9.848541 0 -17.832367 -7.9838257 -17.832367 -17.832367z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m41.774277 122.26509l142.51968 0l0 68.25197l-142.51968 0z" fill-rule="evenodd"/><path fill="#595959" d="m72.84076 142.25607q0 -4.265625 2.296875 -6.6875q2.296875 -2.421875 5.9375 -2.421875q2.375 0 4.28125 1.140625q1.921875 1.125 2.921875 3.171875q1.0 2.03125 1.0 4.609375q0 2.609375 -1.0625 4.671875q-1.046875 2.0625 -2.984375 3.125q-1.9375 1.0625 -4.171875 1.0625q-2.421875 0 -4.34375 -1.171875q-1.90625 -1.171875 -2.890625 -3.203125q-0.984375 -2.03125 -0.984375 -4.296875zm2.34375 0.046875q0 3.09375 1.671875 4.890625q1.671875 1.78125 4.1875 1.78125q2.578125 0 4.234375 -1.796875q1.65625 -1.8125 1.65625 -5.125q0 -2.09375 -0.71875 -3.65625q-0.703125 -1.578125 -2.078125 -2.4375q-1.359375 -0.859375 -3.046875 -0.859375q-2.421875 0 -4.171875 1.65625q-1.734375 1.65625 -1.734375 5.546875zm16.730469 8.328125l0 -12.453125l1.890625 0l0 1.890625q0.734375 -1.328125 1.34375 -1.75q0.625 -0.421875 1.359375 -0.421875q1.0625 0 2.171875 0.6875l-0.734375 1.953125q-0.765625 -0.453125 -1.546875 -0.453125q-0.6875 0 -1.25 0.421875q-0.546875 0.40625 -0.78125 1.140625q-0.34375 1.125 -0.34375 2.46875l0 6.515625l-2.109375 0zm8.0234375 -14.75l0 -2.4375l2.109375 0l0 2.4375l-2.109375 0zm0 14.75l0 -12.453125l2.109375 0l0 12.453125l-2.109375 0zm4.9414062 1.03125l2.046875 0.3125q0.125 0.9375 0.71875 1.375q0.78125 0.59375 2.140625 0.59375q1.46875 0 2.265625 -0.59375q0.796875 -0.578125 1.078125 -1.640625q0.15625 -0.640625 0.140625 -2.703125q-1.375 1.625 -3.4375 1.625q-2.5625 0 -3.96875 -1.84375q-1.40625 -1.859375 -1.40625 -4.453125q0 -1.78125 0.640625 -3.28125q0.640625 -1.515625 1.859375 -2.328125q1.234375 -0.828125 2.890625 -0.828125q2.203125 0 3.625 1.78125l0 -1.5l1.953125 0l0 10.765625q0 2.90625 -0.59375 4.109375q-0.59375 1.21875 -1.875 1.921875q-1.28125 0.703125 -3.15625 0.703125q-2.234375 0 -3.609375 -1.0q-1.359375 -1.0 -1.3125 -3.015625zm1.734375 -7.484375q0 2.453125 0.96875 3.578125q0.984375 1.125 2.453125 1.125q1.453125 0 2.4375 -1.109375q0.984375 -1.125 0.984375 -3.515625q0 -2.28125 -1.015625 -3.4375q-1.015625 -1.171875 -2.453125 -1.171875q-1.40625 0 -2.390625 1.140625q-0.984375 1.140625 -0.984375 3.390625zm12.003906 -8.296875l0 -2.4375l2.109375 0l0 2.4375l-2.109375 0zm0 14.75l0 -12.453125l2.109375 0l0 12.453125l-2.109375 0zm5.3164062 0l0 -12.453125l1.90625 0l0 1.78125q1.375 -2.0625 3.953125 -2.0625q1.125 0 2.0625 0.40625q0.953125 0.40625 1.421875 1.0625q0.46875 0.65625 0.65625 1.5625q0.125 0.578125 0.125 2.046875l0 7.65625l-2.109375 0l0 -7.578125q0 -1.28125 -0.25 -1.921875q-0.25 -0.640625 -0.875 -1.015625q-0.625 -0.390625 -1.46875 -0.390625q-1.34375 0 -2.328125 0.859375q-0.984375 0.859375 -0.984375 3.25l0 6.796875l-2.109375 0zm21.472656 -1.53125q-1.171875 0.984375 -2.265625 1.40625q-1.078125 0.40625 -2.3125 0.40625q-2.046875 0 -3.15625 -1.0q-1.09375 -1.0 -1.09375 -2.5625q0 -0.921875 0.40625 -1.671875q0.421875 -0.75 1.09375 -1.203125q0.671875 -0.46875 1.515625 -0.703125q0.625 -0.15625 1.875 -0.3125q2.5625 -0.3125 3.765625 -0.734375q0.015625 -0.421875 0.015625 -0.546875q0 -1.28125 -0.609375 -1.8125q-0.796875 -0.71875 -2.390625 -0.71875q-1.5 0 -2.203125 0.53125q-0.703125 0.515625 -1.046875 1.84375l-2.0625 -0.28125q0.28125 -1.328125 0.921875 -2.140625q0.640625 -0.8125 1.859375 -1.25q1.21875 -0.453125 2.828125 -0.453125q1.59375 0 2.59375 0.375q1.0 0.375 1.46875 0.953125q0.46875 0.5625 0.65625 1.4375q0.09375 0.53125 0.09375 1.9375l0 2.8125q0 2.9375 0.140625 3.71875q0.140625 0.78125 0.53125 1.5l-2.203125 0q-0.328125 -0.65625 -0.421875 -1.53125zm-0.171875 -4.71875q-1.15625 0.46875 -3.453125 0.796875q-1.296875 0.1875 -1.84375 0.421875q-0.53125 0.234375 -0.828125 0.6875q-0.28125 0.453125 -0.28125 1.0q0 0.84375 0.625 1.40625q0.640625 0.5625 1.875 0.5625q1.21875 0 2.171875 -0.53125q0.953125 -0.53125 1.390625 -1.453125q0.34375 -0.71875 0.34375 -2.109375l0 -0.78125zm5.3476562 6.25l0 -17.1875l2.109375 0l0 17.1875l-2.109375 0z" fill-rule="nonzero"/><path fill="#595959" d="m75.881775 179.43108l0 -17.1875l2.109375 0l0 17.1875l-2.109375 0zm5.3945312 -14.75l0 -2.4375l2.109375 0l0 2.4375l-2.109375 0zm0 14.75l0 -12.453125l2.109375 0l0 12.453125l-2.109375 0zm4.4726562 -3.71875l2.09375 -0.328125q0.171875 1.25 0.96875 1.921875q0.8125 0.671875 2.25 0.671875q1.453125 0 2.15625 -0.59375q0.703125 -0.59375 0.703125 -1.390625q0 -0.71875 -0.625 -1.125q-0.421875 -0.28125 -2.15625 -0.71875q-2.3125 -0.578125 -3.21875 -1.0q-0.890625 -0.4375 -1.359375 -1.1875q-0.453125 -0.765625 -0.453125 -1.671875q0 -0.828125 0.375 -1.53125q0.390625 -0.71875 1.046875 -1.1875q0.484375 -0.359375 1.328125 -0.609375q0.859375 -0.265625 1.828125 -0.265625q1.46875 0 2.578125 0.421875q1.109375 0.421875 1.625 1.15625q0.53125 0.71875 0.734375 1.921875l-2.0625 0.28125q-0.140625 -0.96875 -0.8125 -1.5q-0.671875 -0.546875 -1.90625 -0.546875q-1.453125 0 -2.078125 0.484375q-0.625 0.484375 -0.625 1.125q0 0.40625 0.265625 0.734375q0.25 0.34375 0.8125 0.5625q0.3125 0.125 1.859375 0.546875q2.234375 0.59375 3.109375 0.984375q0.890625 0.375 1.390625 1.109375q0.515625 0.71875 0.515625 1.796875q0 1.046875 -0.625 1.984375q-0.609375 0.9375 -1.765625 1.453125q-1.15625 0.5 -2.625 0.5q-2.421875 0 -3.703125 -1.0q-1.265625 -1.015625 -1.625 -3.0zm17.453125 1.828125l0.3125 1.859375q-0.890625 0.203125 -1.59375 0.203125q-1.15625 0 -1.796875 -0.359375q-0.625 -0.375 -0.890625 -0.96875q-0.25 -0.59375 -0.25 -2.484375l0 -7.171875l-1.546875 0l0 -1.640625l1.546875 0l0 -3.078125l2.09375 -1.265625l0 4.34375l2.125 0l0 1.640625l-2.125 0l0 7.28125q0 0.90625 0.109375 1.171875q0.125 0.25 0.375 0.40625q0.25 0.140625 0.71875 0.140625q0.34375 0 0.921875 -0.078125zm10.589844 -2.125l2.171875 0.28125q-0.515625 1.90625 -1.90625 2.96875q-1.390625 1.046875 -3.5625 1.046875q-2.734375 0 -4.34375 -1.671875q-1.59375 -1.6875 -1.59375 -4.734375q0 -3.140625 1.609375 -4.875q1.625 -1.734375 4.203125 -1.734375q2.5 0 4.078125 1.703125q1.59375 1.703125 1.59375 4.78125q0 0.1875 -0.015625 0.5625l-9.28125 0q0.109375 2.046875 1.15625 3.140625q1.046875 1.09375 2.609375 1.09375q1.15625 0 1.96875 -0.609375q0.828125 -0.609375 1.3125 -1.953125zm-6.9375 -3.40625l6.953125 0q-0.140625 -1.5625 -0.796875 -2.359375q-1.0 -1.21875 -2.609375 -1.21875q-1.453125 0 -2.453125 0.984375q-0.984375 0.96875 -1.09375 2.59375zm11.753906 7.421875l0 -12.453125l1.90625 0l0 1.78125q1.375 -2.0625 3.953125 -2.0625q1.125 0 2.0625 0.40625q0.953125 0.40625 1.421875 1.0625q0.46875 0.65625 0.65625 1.5625q0.125 0.578125 0.125 2.046875l0 7.65625l-2.109375 0l0 -7.578125q0 -1.28125 -0.25 -1.921875q-0.25 -0.640625 -0.875 -1.015625q-0.625 -0.390625 -1.46875 -0.390625q-1.34375 0 -2.328125 0.859375q-0.984375 0.859375 -0.984375 3.25l0 6.796875l-2.109375 0zm21.878906 -4.015625l2.171875 0.28125q-0.515625 1.90625 -1.90625 2.96875q-1.390625 1.046875 -3.5625 1.046875q-2.734375 0 -4.34375 -1.671875q-1.59375 -1.6875 -1.59375 -4.734375q0 -3.140625 1.609375 -4.875q1.625 -1.734375 4.203125 -1.734375q2.5 0 4.078125 1.703125q1.59375 1.703125 1.59375 4.78125q0 0.1875 -0.015625 0.5625l-9.28125 0q0.109375 2.046875 1.15625 3.140625q1.046875 1.09375 2.609375 1.09375q1.15625 0 1.96875 -0.609375q0.828125 -0.609375 1.3125 -1.953125zm-6.9375 -3.40625l6.953125 0q-0.140625 -1.5625 -0.796875 -2.359375q-1.0 -1.21875 -2.609375 -1.21875q-1.453125 0 -2.453125 0.984375q-0.984375 0.96875 -1.09375 2.59375zm11.738281 7.421875l0 -12.453125l1.890625 0l0 1.890625q0.734375 -1.328125 1.34375 -1.75q0.625 -0.421875 1.359375 -0.421875q1.0625 0 2.171875 0.6875l-0.734375 1.953125q-0.765625 -0.453125 -1.546875 -0.453125q-0.6875 0 -1.25 0.421875q-0.546875 0.40625 -0.78125 1.140625q-0.34375 1.125 -0.34375 2.46875l0 6.515625l-2.109375 0z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m315.54855 287.44882l142.51968 0l0 68.25195l-142.51968 0z" fill-rule="evenodd"/><path fill="#595959" d="m349.0174 315.8148l0 -17.1875l2.28125 0l0 17.1875l-2.28125 0zm6.0117188 0l0 -12.453125l1.90625 0l0 1.78125q1.375 -2.0625 3.953125 -2.0625q1.125 0 2.0625 0.40625q0.953125 0.40625 1.421875 1.0625q0.46875 0.65625 0.65625 1.5625q0.125 0.578125 0.125 2.046875l0 7.65625l-2.109375 0l0 -7.578125q0 -1.28125 -0.25 -1.921875q-0.25 -0.640625 -0.875 -1.015625q-0.625 -0.390625 -1.46875 -0.390625q-1.34375 0 -2.328125 0.859375q-0.984375 0.859375 -0.984375 3.25l0 6.796875l-2.109375 0zm17.957031 -1.890625l0.3125 1.859375q-0.890625 0.203125 -1.59375 0.203125q-1.15625 0 -1.796875 -0.359375q-0.625 -0.375 -0.890625 -0.96875q-0.25 -0.59375 -0.25 -2.484375l0 -7.171875l-1.546875 0l0 -1.640625l1.546875 0l0 -3.078125l2.09375 -1.265625l0 4.34375l2.125 0l0 1.640625l-2.125 0l0 7.28125q0 0.90625 0.109375 1.171875q0.125 0.25 0.375 0.40625q0.25 0.140625 0.71875 0.140625q0.34375 0 0.921875 -0.078125zm10.589844 -2.125l2.171875 0.28125q-0.515625 1.90625 -1.90625 2.96875q-1.390625 1.046875 -3.5625 1.046875q-2.734375 0 -4.34375 -1.671875q-1.59375 -1.6875 -1.59375 -4.734375q0 -3.140625 1.609375 -4.875q1.625 -1.734375 4.203125 -1.734375q2.5 0 4.078125 1.703125q1.59375 1.703125 1.59375 4.78125q0 0.1875 -0.015625 0.5625l-9.28125 0q0.109375 2.046875 1.15625 3.140625q1.046875 1.09375 2.609375 1.09375q1.15625 0 1.96875 -0.609375q0.828125 -0.609375 1.3125 -1.953125zm-6.9375 -3.40625l6.953125 0q-0.140625 -1.5625 -0.796875 -2.359375q-1.0 -1.21875 -2.609375 -1.21875q-1.453125 0 -2.453125 0.984375q-0.984375 0.96875 -1.09375 2.59375zm11.738281 7.421875l0 -12.453125l1.890625 0l0 1.890625q0.734375 -1.328125 1.34375 -1.75q0.625 -0.421875 1.359375 -0.421875q1.0625 0 2.171875 0.6875l-0.734375 1.953125q-0.765625 -0.453125 -1.546875 -0.453125q-0.6875 0 -1.25 0.421875q-0.546875 0.40625 -0.78125 1.140625q-0.34375 1.125 -0.34375 2.46875l0 6.515625l-2.109375 0zm8.0078125 0l0 -12.453125l1.90625 0l0 1.78125q1.375 -2.0625 3.953125 -2.0625q1.125 0 2.0625 0.40625q0.953125 0.40625 1.421875 1.0625q0.46875 0.65625 0.65625 1.5625q0.125 0.578125 0.125 2.046875l0 7.65625l-2.109375 0l0 -7.578125q0 -1.28125 -0.25 -1.921875q-0.25 -0.640625 -0.875 -1.015625q-0.625 -0.390625 -1.46875 -0.390625q-1.34375 0 -2.328125 0.859375q-0.984375 0.859375 -0.984375 3.25l0 6.796875l-2.109375 0zm21.472656 -1.53125q-1.171875 0.984375 -2.265625 1.40625q-1.078125 0.40625 -2.3125 0.40625q-2.046875 0 -3.15625 -1.0q-1.09375 -1.0 -1.09375 -2.5625q0 -0.921875 0.40625 -1.671875q0.421875 -0.75 1.09375 -1.203125q0.671875 -0.46875 1.515625 -0.703125q0.625 -0.15625 1.875 -0.3125q2.5625 -0.3125 3.765625 -0.734375q0.015625 -0.421875 0.015625 -0.546875q0 -1.28125 -0.609375 -1.8125q-0.796875 -0.71875 -2.390625 -0.71875q-1.5 0 -2.203125 0.53125q-0.703125 0.515625 -1.046875 1.84375l-2.0625 -0.28125q0.28125 -1.328125 0.921875 -2.140625q0.640625 -0.8125 1.859375 -1.25q1.21875 -0.453125 2.828125 -0.453125q1.59375 0 2.59375 0.375q1.0 0.375 1.46875 0.953125q0.46875 0.5625 0.65625 1.4375q0.09375 0.53125 0.09375 1.9375l0 2.8125q0 2.9375 0.140625 3.71875q0.140625 0.78125 0.53125 1.5l-2.203125 0q-0.328125 -0.65625 -0.421875 -1.53125zm-0.171875 -4.71875q-1.15625 0.46875 -3.453125 0.796875q-1.296875 0.1875 -1.84375 0.421875q-0.53125 0.234375 -0.828125 0.6875q-0.28125 0.453125 -0.28125 1.0q0 0.84375 0.625 1.40625q0.640625 0.5625 1.875 0.5625q1.21875 0 2.171875 -0.53125q0.953125 -0.53125 1.390625 -1.453125q0.34375 -0.71875 0.34375 -2.109375l0 -0.78125zm5.3476562 6.25l0 -17.1875l2.109375 0l0 17.1875l-2.109375 0z" fill-rule="nonzero"/><path fill="#595959" d="m349.65607 344.6148l0 -17.1875l2.109375 0l0 17.1875l-2.109375 0zm5.3945312 -14.75l0 -2.4375l2.109375 0l0 2.4375l-2.109375 0zm0 14.75l0 -12.453125l2.109375 0l0 12.453125l-2.109375 0zm4.4726562 -3.71875l2.09375 -0.328125q0.171875 1.25 0.96875 1.921875q0.8125 0.671875 2.25 0.671875q1.453125 0 2.15625 -0.59375q0.703125 -0.59375 0.703125 -1.390625q0 -0.71875 -0.625 -1.125q-0.421875 -0.28125 -2.15625 -0.71875q-2.3125 -0.578125 -3.21875 -1.0q-0.890625 -0.4375 -1.359375 -1.1875q-0.453125 -0.765625 -0.453125 -1.671875q0 -0.828125 0.375 -1.53125q0.390625 -0.71875 1.046875 -1.1875q0.484375 -0.359375 1.328125 -0.609375q0.859375 -0.265625 1.828125 -0.265625q1.46875 0 2.578125 0.421875q1.109375 0.421875 1.625 1.15625q0.53125 0.71875 0.734375 1.921875l-2.0625 0.28125q-0.140625 -0.96875 -0.8125 -1.5q-0.671875 -0.546875 -1.90625 -0.546875q-1.453125 0 -2.078125 0.484375q-0.625 0.484375 -0.625 1.125q0 0.40625 0.265625 0.734375q0.25 0.34375 0.8125 0.5625q0.3125 0.125 1.859375 0.546875q2.234375 0.59375 3.109375 0.984375q0.890625 0.375 1.390625 1.109375q0.515625 0.71875 0.515625 1.796875q0 1.046875 -0.625 1.984375q-0.609375 0.9375 -1.765625 1.453125q-1.15625 0.5 -2.625 0.5q-2.421875 0 -3.703125 -1.0q-1.265625 -1.015625 -1.625 -3.0zm17.453125 1.828125l0.3125 1.859375q-0.890625 0.203125 -1.59375 0.203125q-1.15625 0 -1.796875 -0.359375q-0.625 -0.375 -0.890625 -0.96875q-0.25 -0.59375 -0.25 -2.484375l0 -7.171875l-1.546875 0l0 -1.640625l1.546875 0l0 -3.078125l2.09375 -1.265625l0 4.34375l2.125 0l0 1.640625l-2.125 0l0 7.28125q0 0.90625 0.109375 1.171875q0.125 0.25 0.375 0.40625q0.25 0.140625 0.71875 0.140625q0.34375 0 0.921875 -0.078125zm10.589844 -2.125l2.171875 0.28125q-0.515625 1.90625 -1.90625 2.96875q-1.390625 1.046875 -3.5625 1.046875q-2.734375 0 -4.34375 -1.671875q-1.59375 -1.6875 -1.59375 -4.734375q0 -3.140625 1.609375 -4.875q1.625 -1.734375 4.203125 -1.734375q2.5 0 4.078125 1.703125q1.59375 1.703125 1.59375 4.78125q0 0.1875 -0.015625 0.5625l-9.28125 0q0.109375 2.046875 1.15625 3.140625q1.046875 1.09375 2.609375 1.09375q1.15625 0 1.96875 -0.609375q0.828125 -0.609375 1.3125 -1.953125zm-6.9375 -3.40625l6.953125 0q-0.140625 -1.5625 -0.796875 -2.359375q-1.0 -1.21875 -2.609375 -1.21875q-1.453125 0 -2.453125 0.984375q-0.984375 0.96875 -1.09375 2.59375zm11.753906 7.421875l0 -12.453125l1.90625 0l0 1.78125q1.375 -2.0625 3.953125 -2.0625q1.125 0 2.0625 0.40625q0.953125 0.40625 1.421875 1.0625q0.46875 0.65625 0.65625 1.5625q0.125 0.578125 0.125 2.046875l0 7.65625l-2.109375 0l0 -7.578125q0 -1.28125 -0.25 -1.921875q-0.25 -0.640625 -0.875 -1.015625q-0.625 -0.390625 -1.46875 -0.390625q-1.34375 0 -2.328125 0.859375q-0.984375 0.859375 -0.984375 3.25l0 6.796875l-2.109375 0zm21.878906 -4.015625l2.171875 0.28125q-0.515625 1.90625 -1.90625 2.96875q-1.390625 1.046875 -3.5625 1.046875q-2.734375 0 -4.34375 -1.671875q-1.59375 -1.6875 -1.59375 -4.734375q0 -3.140625 1.609375 -4.875q1.625 -1.734375 4.203125 -1.734375q2.5 0 4.078125 1.703125q1.59375 1.703125 1.59375 4.78125q0 0.1875 -0.015625 0.5625l-9.28125 0q0.109375 2.046875 1.15625 3.140625q1.046875 1.09375 2.609375 1.09375q1.15625 0 1.96875 -0.609375q0.828125 -0.609375 1.3125 -1.953125zm-6.9375 -3.40625l6.953125 0q-0.140625 -1.5625 -0.796875 -2.359375q-1.0 -1.21875 -2.609375 -1.21875q-1.453125 0 -2.453125 0.984375q-0.984375 0.96875 -1.09375 2.59375zm11.738281 7.421875l0 -12.453125l1.890625 0l0 1.890625q0.734375 -1.328125 1.34375 -1.75q0.625 -0.421875 1.359375 -0.421875q1.0625 0 2.171875 0.6875l-0.734375 1.953125q-0.765625 -0.453125 -1.546875 -0.453125q-0.6875 0 -1.25 0.421875q-0.546875 0.40625 -0.78125 1.140625q-0.34375 1.125 -0.34375 2.46875l0 6.515625l-2.109375 0z" fill-rule="nonzero"/><path fill="#000000" fill-opacity="0.0" d="m660.6798 316.84253l38.42517 0" fill-rule="evenodd"/><path stroke="#595959" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m664.1069 316.84253l31.571045 0" fill-rule="evenodd"/><path fill="#595959" stroke="#595959" stroke-width="1.0" stroke-linecap="butt" d="m664.1069 316.84253l1.1245728 -1.1246033l-3.0897827 1.1246033l3.0897827 1.1245728z" fill-rule="evenodd"/><path fill="#595959" stroke="#595959" stroke-width="1.0" stroke-linecap="butt" d="m695.6779 316.84253l-1.1245728 1.1245728l3.0897217 -1.1245728l-3.0897217 -1.1246033z" fill-rule="evenodd"/><path fill="#000000" fill-opacity="0.0" d="m519.6227 179.3202c4.415161 8.150925 40.075684 33.45276 26.490845 48.905518c-13.5849 15.452759 -99.339935 27.848648 -108.00003 43.81102c-8.660095 15.962372 46.699463 43.302704 56.039368 51.963257" fill-rule="evenodd"/><path stroke="#ff0000" stroke-width="1.0" stroke-linejoin="round" stroke-linecap="butt" d="m521.6546 182.07996l0.43286133 0.4949646c0.16070557 0.18093872 0.32696533 0.36532593 0.49835205 0.5531006c1.3710938 1.5021057 3.0731812 3.2194366 4.9490967 5.099304c3.75177 3.7597198 8.19873 8.169495 12.083191 12.807449c7.768921 9.275925 13.287842 19.46457 6.4954224 27.190948c-13.5849 15.452759 -99.339905 27.848648 -108.0 43.81102c-4.3300476 7.9811707 7.344818 18.806854 21.269684 28.719818c6.962433 4.956482 14.487366 9.684784 20.855438 13.71521c3.184021 2.0152588 6.0788574 3.856018 8.469513 5.463623c0.59768677 0.401886 1.1638489 0.7892151 1.6951294 1.1610718c0.26568604 0.18591309 0.5226135 0.36795044 0.77038574 0.5460205l0.30081177 0.21948242" fill-rule="evenodd"/><path fill="#ff0000" stroke="#ff0000" stroke-width="1.0" stroke-linecap="butt" d="m521.6546 182.07996l1.5723877 0.23883057l-2.7375488 -1.8213501l0.92633057 3.154892z" fill-rule="evenodd"/><path fill="#ff0000" stroke="#ff0000" stroke-width="1.0" stroke-linecap="butt" d="m491.4745 321.86197l-1.5804749 0.17733765l3.116333 1.0486755l-1.7131653 -2.806488z" fill-rule="evenodd"/></g></svg>
diff --git a/docs/root/configuration/http/http_filters/_include/http-cache-configuration-internal-listener.yaml b/docs/root/configuration/http/http_filters/_include/http-cache-configuration-internal-listener.yaml
new file mode 100644
index 0000000000000..84a8442d51149
--- /dev/null
+++ b/docs/root/configuration/http/http_filters/_include/http-cache-configuration-internal-listener.yaml
@@ -0,0 +1,113 @@
+bootstrap_extensions:
+- name: envoy.bootstrap.internal_listener
+  typed_config:
+    "@type": type.googleapis.com/envoy.extensions.bootstrap.internal_listener.v3.InternalListener
+static_resources:
+  listeners:
+  - address:
+      socket_address:
+        address: 0.0.0.0
+        port_value: 8000
+    filter_chains:
+    - filters:
+      - name: envoy.filters.network.http_connection_manager
+        typed_config:
+          "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
+          codec_type: AUTO
+          stat_prefix: ingress_http
+          route_config:
+            name: local_route
+            virtual_hosts:
+            - name: backend
+              response_headers_to_add:
+              - header:
+                  key: x-something
+                  value: something
+              domains:
+              - "*"
+              routes:
+              - match:
+                  prefix: "/service/1"
+                route:
+                  cluster: service1
+              - match:
+                  prefix: "/service/2"
+                route:
+                  cluster: service2
+          http_filters:
+          - name: "envoy.filters.http.cache"
+            typed_config:
+              "@type": "type.googleapis.com/envoy.extensions.filters.http.cache.v3.CacheConfig"
+              override_upstream_cluster: cache_internal_listener_cluster
+              typed_config:
+                "@type": "type.googleapis.com/envoy.extensions.http.cache.simple_http_cache.v3.SimpleHttpCacheConfig"
+          - name: envoy.filters.http.router
+            typed_config:
+              "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
+  - name: cache_internal_listener
+    internal_listener: {}
+    filter_chains:
+    - filters:
+      - name: envoy.filters.network.http_connection_manager
+        typed_config:
+          "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
+          codec_type: AUTO
+          stat_prefix: cache_internal_listener
+          route_config:
+            name: local_route
+            virtual_hosts:
+            - name: backend
+              response_headers_to_add:
+              - header:
+                  key: x-something
+                  value: something
+              domains:
+              - "*"
+              routes:
+              - match:
+                  prefix: "/service/1"
+                route:
+                  cluster: service1
+              - match:
+                  prefix: "/service/2"
+                route:
+                  cluster: service2
+          http_filters:
+          - name: envoy.filters.http.router
+            typed_config:
+              "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
+
+  clusters:
+  - name: service1
+    type: STRICT_DNS
+    lb_policy: ROUND_ROBIN
+    load_assignment:
+      cluster_name: service1
+      endpoints:
+      - lb_endpoints:
+        - endpoint:
+            address:
+              socket_address:
+                address: service1
+                port_value: 8000
+  - name: service2
+    type: STRICT_DNS
+    lb_policy: ROUND_ROBIN
+    load_assignment:
+      cluster_name: service2
+      endpoints:
+      - lb_endpoints:
+        - endpoint:
+            address:
+              socket_address:
+                address: service2
+                port_value: 8000
+  - name: cache_internal_listener_cluster
+    load_assignment:
+      cluster_name: cache_internal_listener_cluster
+      endpoints:
+      - lb_endpoints:
+        - endpoint:
+            address:
+              envoy_internal_address:
+                server_listener_name: cache_internal_listener
diff --git a/docs/root/configuration/http/http_filters/cache_filter.rst b/docs/root/configuration/http/http_filters/cache_filter.rst
index 68deed12bb0a3..e804b7dc1d8a3 100644
--- a/docs/root/configuration/http/http_filters/cache_filter.rst
+++ b/docs/root/configuration/http/http_filters/cache_filter.rst
@@ -13,12 +13,25 @@ Cache filter
   upstream than the cache filter, while non-cacheable requests still go through the
   listener filter chain. It is therefore recommended for consistency that only the
   router filter should be further upstream in the listener filter chain than the
-  cache filter.
+  cache filter, and even then only if the router filter does not perform any mutations
+  such as if ``request_headers_to_add`` is set.
 
 .. image:: /_static/cache-filter-chain.svg
    :width: 80%
    :align: center
 
+* For more complex filter chains where some filters must be upstream of the cache
+  filter for correct behavior, or if the router filter is configured to perform
+  mutations via
+  :ref:`RouteConfiguration <envoy_v3_api_field_config.route.v3.RouteConfiguration.request_headers_to_add>`
+  the recommended way to configure this so that it works correctly is to configure
+  an internal listener which duplicates the part of the filter chain that is
+  upstream of the cache filter, and the ``RouteConfiguration``.
+
+.. image:: /_static/cache-filter-internal-listener.svg
+   :width: 80%
+   :align: center
+
 The HTTP Cache filter implements most of the complexity of HTTP caching semantics.
 
 For HTTP Requests:
@@ -50,6 +63,16 @@ Example filter configuration with a ``SimpleHttpCache`` cache implementation:
    :lineno-start: 29
    :caption: :download:`http-cache-configuration.yaml <_include/http-cache-configuration.yaml>`
 
+The more complicated filter chain configuration required if mutations occur upstream of the cache filter
+involves duplicating the full route config into an internal listener (unfortunately this is currently unavoidable):
+
+.. literalinclude:: _include/http-cache-configuration-internal-listener.yaml
+   :language: yaml
+   :lines: 38-113
+   :linenos:
+   :lineno-start: 38
+   :caption: :download:`http-cache-configuration-internal-listener.yaml <_include/http-cache-configuration-internal-listener.yaml>`
+
 .. seealso::
 
    :ref:`Envoy Cache Sandbox <install_sandboxes_cache_filter>`
diff --git a/source/extensions/filters/http/cache/BUILD b/source/extensions/filters/http/cache/BUILD
index c8bb391d51cc0..c6bb70fb55305 100644
--- a/source/extensions/filters/http/cache/BUILD
+++ b/source/extensions/filters/http/cache/BUILD
@@ -12,25 +12,87 @@ licenses(["notice"])  # Apache 2
 
 envoy_extension_package()
 
+envoy_cc_library(
+    name = "http_source_interface",
+    hdrs = ["http_source.h"],
+    deps = [
+        "//envoy/buffer:buffer_interface",
+        "//envoy/http:header_map_interface",
+        "//source/extensions/filters/http/cache:range_utils_lib",
+        "@com_google_absl//absl/functional:any_invocable",
+    ],
+)
+
+envoy_cc_library(
+    name = "upstream_request_lib",
+    srcs = ["upstream_request_impl.cc"],
+    hdrs = [
+        "upstream_request.h",
+        "upstream_request_impl.h",
+    ],
+    deps = [
+        ":http_source_interface",
+        ":range_utils_lib",
+        ":stats",
+        "//source/common/buffer:watermark_buffer_lib",
+        "//source/common/common:cancel_wrapper_lib",
+        "//source/common/common:logger_lib",
+        "@com_google_absl//absl/types:variant",
+    ],
+)
+
+envoy_cc_library(
+    name = "cache_sessions_lib",
+    srcs = [
+        "cache_sessions.cc",
+    ],
+    hdrs = [
+        "cache_sessions.h",
+    ],
+    deps = [
+        ":http_cache_lib",
+        ":stats",
+        ":upstream_request_lib",
+        "//source/common/http:utility_lib",
+    ],
+)
+
+envoy_cc_library(
+    name = "cache_sessions_impl_lib",
+    srcs = [
+        "cache_sessions_impl.cc",
+    ],
+    hdrs = [
+        "cache_sessions_impl.h",
+    ],
+    deps = [
+        ":cache_sessions_lib",
+        ":cacheability_utils_lib",
+        ":upstream_request_lib",
+        "//source/common/common:cancel_wrapper_lib",
+    ],
+)
+
 envoy_cc_library(
     name = "cache_filter_lib",
     srcs = [
         "cache_filter.cc",
-        "upstream_request.cc",
     ],
     hdrs = [
         "cache_filter.h",
-        "filter_state.h",
-        "upstream_request.h",
     ],
     deps = [
         ":cache_custom_headers",
         ":cache_entry_utils_lib",
-        ":cache_filter_logging_info_lib",
         ":cache_headers_utils_lib",
-        ":cache_insert_queue_lib",
+        ":cache_sessions_impl_lib",
+        ":cache_sessions_lib",
         ":cacheability_utils_lib",
         ":http_cache_lib",
+        ":stats",
+        ":upstream_request_lib",
+        "//source/common/buffer:buffer_lib",
+        "//source/common/common:cancel_wrapper_lib",
         "//source/common/common:enum_to_int",
         "//source/common/common:logger_lib",
         "//source/common/common:macros",
@@ -65,16 +127,6 @@ envoy_cc_library(
     ],
 )
 
-envoy_cc_library(
-    name = "cache_insert_queue_lib",
-    srcs = ["cache_insert_queue.cc"],
-    hdrs = ["cache_insert_queue.h"],
-    deps = [
-        ":http_cache_lib",
-        "//source/common/buffer:buffer_lib",
-    ],
-)
-
 envoy_cc_library(
     name = "cache_policy_lib",
     hdrs = ["cache_policy.h"],
@@ -82,7 +134,6 @@ envoy_cc_library(
         ":cache_headers_utils_lib",
         ":http_cache_lib",
         "//source/common/http:header_map_lib",
-        "//source/common/stream_info:filter_state_lib",
     ],
 )
 
@@ -91,6 +142,15 @@ envoy_proto_library(
     srcs = ["key.proto"],
 )
 
+envoy_cc_library(
+    name = "cache_progress_receiver_interface",
+    hdrs = ["cache_progress_receiver.h"],
+    deps = [
+        "//envoy/http:header_map_interface",
+        "//source/extensions/filters/http/cache:range_utils_lib",
+    ],
+)
+
 envoy_cc_library(
     name = "http_cache_lib",
     srcs = ["http_cache.cc"],
@@ -99,6 +159,8 @@ envoy_cc_library(
         ":cache_custom_headers",
         ":cache_entry_utils_lib",
         ":cache_headers_utils_lib",
+        ":cache_progress_receiver_interface",
+        ":http_source_interface",
         ":key_cc_proto",
         ":range_utils_lib",
         "//envoy/buffer:buffer_interface",
@@ -137,6 +199,7 @@ envoy_cc_library(
     hdrs = ["cache_headers_utils.h"],
     deps = [
         ":cache_custom_headers",
+        ":key_cc_proto",
         "//envoy/common:time_interface",
         "//envoy/http:header_map_interface",
         "//source/common/common:matchers_lib",
@@ -159,12 +222,12 @@ envoy_cc_library(
     ],
 )
 
-envoy_cc_library(
-    name = "cache_filter_logging_info_lib",
-    srcs = ["cache_filter_logging_info.cc"],
-    hdrs = ["cache_filter_logging_info.h"],
+envoy_cc_extension(
+    name = "stats",
+    srcs = ["stats.cc"],
+    hdrs = ["stats.h"],
     deps = [
-        "//source/common/stream_info:filter_state_lib",
+        ":cache_entry_utils_lib",
     ],
 )
 
@@ -174,6 +237,8 @@ envoy_cc_extension(
     hdrs = ["config.h"],
     deps = [
         ":cache_filter_lib",
+        ":cache_sessions_lib",
+        ":stats",
         "//source/extensions/filters/http/common:factory_base_lib",
         "@envoy_api//envoy/extensions/filters/http/cache/v3:pkg_cc_proto",
     ],
diff --git a/source/extensions/filters/http/cache/cache_entry_utils.cc b/source/extensions/filters/http/cache/cache_entry_utils.cc
index dd5eb27120bd5..948e37ea58fe9 100644
--- a/source/extensions/filters/http/cache/cache_entry_utils.cc
+++ b/source/extensions/filters/http/cache/cache_entry_utils.cc
@@ -9,16 +9,26 @@ namespace Cache {
 
 absl::string_view cacheEntryStatusString(CacheEntryStatus s) {
   switch (s) {
-  case CacheEntryStatus::Ok:
-    return "Ok";
-  case CacheEntryStatus::Unusable:
-    return "Unusable";
-  case CacheEntryStatus::RequiresValidation:
-    return "RequiresValidation";
+  case CacheEntryStatus::Hit:
+    return "Hit";
+  case CacheEntryStatus::Miss:
+    return "Miss";
+  case CacheEntryStatus::Follower:
+    return "Follower";
+  case CacheEntryStatus::Uncacheable:
+    return "Uncacheable";
+  case CacheEntryStatus::Validated:
+    return "Validated";
+  case CacheEntryStatus::ValidatedFree:
+    return "ValidatedFree";
+  case CacheEntryStatus::FailedValidation:
+    return "FailedValidation";
   case CacheEntryStatus::FoundNotModified:
     return "FoundNotModified";
   case CacheEntryStatus::LookupError:
     return "LookupError";
+  case CacheEntryStatus::UpstreamReset:
+    return "UpstreamReset";
   }
   IS_ENVOY_BUG(absl::StrCat("Unexpected CacheEntryStatus: ", s));
   return "UnexpectedCacheEntryStatus";
diff --git a/source/extensions/filters/http/cache/cache_entry_utils.h b/source/extensions/filters/http/cache/cache_entry_utils.h
index d43c4f6a4198d..42f0e283f0618 100644
--- a/source/extensions/filters/http/cache/cache_entry_utils.h
+++ b/source/extensions/filters/http/cache/cache_entry_utils.h
@@ -22,23 +22,37 @@ struct ResponseMetadata {
   // calculations at: https://httpwg.org/specs/rfc7234.html#age.calculations
   Envoy::SystemTime response_time_;
 };
-using ResponseMetadataPtr = std::unique_ptr<ResponseMetadata>;
 
 // Whether a given cache entry is good for the current request.
 enum class CacheEntryStatus {
   // This entry is fresh, and an appropriate response to the request.
-  Ok,
-  // No usable entry was found. If this was generated for a cache entry, the
-  // cache should delete that entry.
-  Unusable,
-  // This entry is stale, but appropriate for validating
-  RequiresValidation,
+  Hit,
+  // The request was cacheable and was not already in the cache. This also means
+  // the cache was populated by this request.
+  Miss,
+  // The entry was being inserted when this request was made - it's like a
+  // hit, but streamed from the same request as the original "Miss", so still
+  // potentially subject to upstream reset because the cache entry isn't fully
+  // populated yet.
+  Follower,
+  // The request was not cacheable. All matching requests will go to the
+  // upstream.
+  Uncacheable,
+  // This entry required validation, and validated successfully.
+  Validated,
+  // This entry required validation while another entry was already validating,
+  // so it validated successfully without its own lookup.
+  ValidatedFree,
+  // This entry required validation, and did not validate.
+  FailedValidation,
   // This entry is fresh, and an appropriate basis for a 304 Not Modified
   // response.
   FoundNotModified,
   // The cache lookup failed, e.g. because the cache was unreachable or an RPC
-  // timed out. The caller shouldn't use this lookup's context for an insert.
+  // timed out. Mostly behaves the same as Uncacheable but may retry each time.
   LookupError,
+  // The cache attempted to read from upstream for insert, but upstream reset.
+  UpstreamReset,
 };
 
 absl::string_view cacheEntryStatusString(CacheEntryStatus s);
diff --git a/source/extensions/filters/http/cache/cache_filter.cc b/source/extensions/filters/http/cache/cache_filter.cc
index 0ca04ed2131d0..8aaece62955d0 100644
--- a/source/extensions/filters/http/cache/cache_filter.cc
+++ b/source/extensions/filters/http/cache/cache_filter.cc
@@ -2,14 +2,13 @@
 
 #include "envoy/http/header_map.h"
 
+#include "source/common/buffer/buffer_impl.h"
 #include "source/common/common/enum_to_int.h"
 #include "source/common/http/headers.h"
 #include "source/common/http/utility.h"
-#include "source/extensions/filters/http/cache/cache_custom_headers.h"
 #include "source/extensions/filters/http/cache/cache_entry_utils.h"
-#include "source/extensions/filters/http/cache/cache_filter_logging_info.h"
 #include "source/extensions/filters/http/cache/cacheability_utils.h"
-#include "source/extensions/filters/http/cache/upstream_request.h"
+#include "source/extensions/filters/http/cache/upstream_request_impl.h"
 
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
@@ -20,6 +19,8 @@ namespace Extensions {
 namespace HttpFilters {
 namespace Cache {
 
+using CancelWrapper::cancelWrapped;
+
 namespace {
 // This value is only used if there is no encoderBufferLimit on the stream;
 // without *some* constraint here, a very large chunk can be requested and
@@ -29,52 +30,62 @@ namespace {
 // behavioral change when a constraint is added.
 //
 // And everyone knows 64MB should be enough for anyone.
-static const size_t MAX_BYTES_TO_FETCH_FROM_CACHE_PER_REQUEST = 64 * 1024 * 1024;
+static constexpr size_t MaxBytesToFetchFromCachePerRead = 64 * 1024 * 1024;
 } // namespace
 
-struct CacheResponseCodeDetailValues {
-  const absl::string_view ResponseFromCacheFilter = "cache.response_from_cache_filter";
-};
-
-using CacheResponseCodeDetails = ConstSingleton<CacheResponseCodeDetailValues>;
+namespace CacheResponseCodeDetails {
+static constexpr absl::string_view ResponseFromCacheFilter = "cache.response_from_cache_filter";
+static constexpr absl::string_view CacheFilterInsert = "cache.insert_via_upstream";
+static constexpr absl::string_view CacheFilterAbortedDuringLookup = "cache.aborted_lookup";
+static constexpr absl::string_view CacheFilterAbortedDuringHeaders = "cache.aborted_headers";
+static constexpr absl::string_view CacheFilterAbortedDuringBody = "cache.aborted_body";
+static constexpr absl::string_view CacheFilterAbortedDuringTrailers = "cache.aborted_trailers";
+} // namespace CacheResponseCodeDetails
 
 CacheFilterConfig::CacheFilterConfig(
     const envoy::extensions::filters::http::cache::v3::CacheConfig& config,
+    std::shared_ptr<CacheSessions> cache_sessions,
     Server::Configuration::CommonFactoryContext& context)
     : vary_allow_list_(config.allowed_vary_headers(), context), time_source_(context.timeSource()),
       ignore_request_cache_control_header_(config.ignore_request_cache_control_header()),
-      cluster_manager_(context.clusterManager()) {}
+      cluster_manager_(context.clusterManager()), cache_sessions_(std::move(cache_sessions)),
+      override_upstream_cluster_(config.override_upstream_cluster()) {}
+
+bool CacheFilterConfig::isCacheableResponse(const Http::ResponseHeaderMap& headers) const {
+  return CacheabilityUtils::isCacheableResponse(headers, vary_allow_list_);
+}
 
-CacheFilter::CacheFilter(std::shared_ptr<const CacheFilterConfig> config,
-                         std::shared_ptr<HttpCache> http_cache)
-    : cache_(http_cache), config_(config) {}
+CacheFilter::CacheFilter(std::shared_ptr<const CacheFilterConfig> config) : config_(config) {}
+
+void CacheFilter::setDecoderFilterCallbacks(Http::StreamDecoderFilterCallbacks& callbacks) {
+  callbacks.addDownstreamWatermarkCallbacks(*this);
+  PassThroughFilter::setDecoderFilterCallbacks(callbacks);
+}
 
 void CacheFilter::onDestroy() {
-  filter_state_ = FilterState::Destroyed;
-  if (lookup_ != nullptr) {
-    lookup_->onDestroy();
-  }
-  if (upstream_request_ != nullptr) {
-    upstream_request_->disconnectFilter();
-    upstream_request_ = nullptr;
+  is_destroyed_ = true;
+  if (cancel_in_flight_callback_) {
+    cancel_in_flight_callback_();
   }
+  lookup_result_.reset();
 }
 
-void CacheFilter::sendUpstreamRequest(Http::RequestHeaderMap& request_headers) {
+absl::optional<absl::string_view> CacheFilter::clusterName() {
   Router::RouteConstSharedPtr route = decoder_callbacks_->route();
   const Router::RouteEntry* route_entry = (route == nullptr) ? nullptr : route->routeEntry();
   if (route_entry == nullptr) {
-    return sendNoRouteResponse();
+    return absl::nullopt;
   }
+  return route_entry->clusterName();
+}
+
+OptRef<Http::AsyncClient> CacheFilter::asyncClient(absl::string_view cluster_name) {
   Upstream::ThreadLocalCluster* thread_local_cluster =
-      config_->clusterManager().getThreadLocalCluster(route_entry->clusterName());
+      config_->clusterManager().getThreadLocalCluster(cluster_name);
   if (thread_local_cluster == nullptr) {
-    return sendNoClusterResponse(route_entry->clusterName());
+    return absl::nullopt;
   }
-  upstream_request_ =
-      UpstreamRequest::create(this, std::move(lookup_), std::move(lookup_result_), cache_,
-                              thread_local_cluster->httpAsyncClient(), config_->upstreamOptions());
-  upstream_request_->sendHeaders(request_headers);
+  return thread_local_cluster->httpAsyncClient();
 }
 
 void CacheFilter::sendNoRouteResponse() {
@@ -89,275 +100,308 @@ void CacheFilter::sendNoClusterResponse(absl::string_view cluster_name) {
                                      "cache_no_cluster");
 }
 
-void CacheFilter::onStreamComplete() {
-  LookupStatus lookup_status = lookupStatus();
-  InsertStatus insert_status = insertStatus();
-  decoder_callbacks_->streamInfo().filterState()->setData(
-      CacheFilterLoggingInfo::FilterStateKey,
-      std::make_shared<CacheFilterLoggingInfo>(lookup_status, insert_status),
-      StreamInfo::FilterState::StateType::ReadOnly);
-}
-
 Http::FilterHeadersStatus CacheFilter::decodeHeaders(Http::RequestHeaderMap& headers,
                                                      bool end_stream) {
-  if (!cache_) {
-    filter_state_ = FilterState::NotServingFromCache;
+  ASSERT(decoder_callbacks_);
+  if (!config_->hasCache()) {
     return Http::FilterHeadersStatus::Continue;
   }
-  ENVOY_STREAM_LOG(debug, "CacheFilter::decodeHeaders: {}", *decoder_callbacks_, headers);
   if (!end_stream) {
-    ENVOY_STREAM_LOG(
-        debug,
-        "CacheFilter::decodeHeaders ignoring request because it has body and/or trailers: {}",
-        *decoder_callbacks_, headers);
-    filter_state_ = FilterState::NotServingFromCache;
+    ENVOY_STREAM_LOG(debug,
+                     "CacheFilter::decodeHeaders ignoring request because it has body and/or "
+                     "trailers: headers={}",
+                     *decoder_callbacks_, headers);
+    stats().incForStatus(CacheEntryStatus::Uncacheable);
     return Http::FilterHeadersStatus::Continue;
   }
-  if (!CacheabilityUtils::canServeRequestFromCache(headers)) {
-    ENVOY_STREAM_LOG(debug, "CacheFilter::decodeHeaders ignoring uncacheable request: {}",
-                     *decoder_callbacks_, headers);
-    filter_state_ = FilterState::NotServingFromCache;
-    insert_status_ = InsertStatus::NoInsertRequestNotCacheable;
+  absl::Status can_serve = CacheabilityUtils::canServeRequestFromCache(headers);
+  if (!can_serve.ok()) {
+    ENVOY_STREAM_LOG(debug,
+                     "CacheFilter::decodeHeaders ignoring uncacheable request: {}\nheaders={}",
+                     *decoder_callbacks_, can_serve, headers);
+    stats().incForStatus(CacheEntryStatus::Uncacheable);
     return Http::FilterHeadersStatus::Continue;
   }
-  ASSERT(decoder_callbacks_);
+  ENVOY_STREAM_LOG(debug, "CacheFilter::decodeHeaders: {}", *decoder_callbacks_, headers);
 
-  LookupRequest lookup_request(headers, config_->timeSource().systemTime(),
-                               config_->varyAllowList(),
-                               config_->ignoreRequestCacheControlHeader());
-  request_allows_inserts_ = !lookup_request.requestCacheControl().no_store_;
+  absl::optional<absl::string_view> original_cluster_name = clusterName();
+  absl::string_view cluster_name;
+  if (config_->overrideUpstreamCluster().empty()) {
+    if (!original_cluster_name) {
+      sendNoRouteResponse();
+      return Http::FilterHeadersStatus::StopIteration;
+    }
+    cluster_name = *original_cluster_name;
+  } else {
+    cluster_name = config_->overrideUpstreamCluster();
+    if (!original_cluster_name) {
+      // It's possible the destination cluster will only be determined further upstream in
+      // the cache filter's side-channel, in which case we can't use it in the key;
+      // in this case use "unknown" instead.
+      original_cluster_name = "unknown";
+    }
+  }
+  OptRef<Http::AsyncClient> async_client = asyncClient(cluster_name);
+  if (!async_client) {
+    sendNoClusterResponse(cluster_name);
+    return Http::FilterHeadersStatus::StopIteration;
+  }
+  auto upstream_request_factory = std::make_unique<UpstreamRequestImplFactory>(
+      decoder_callbacks_->dispatcher(), *async_client, config_->upstreamOptions());
+  auto lookup_request = std::make_unique<ActiveLookupRequest>(
+      headers, std::move(upstream_request_factory), *original_cluster_name,
+      decoder_callbacks_->dispatcher(), config_->timeSource().systemTime(), config_, config_,
+      config_->ignoreRequestCacheControlHeader());
   is_head_request_ = headers.getMethodValue() == Http::Headers::get().MethodValues.Head;
-  lookup_ = cache_->makeLookupContext(std::move(lookup_request), *decoder_callbacks_);
-
-  ASSERT(lookup_);
-  getHeaders(headers);
   ENVOY_STREAM_LOG(debug, "CacheFilter::decodeHeaders starting lookup", *decoder_callbacks_);
+  config_->cacheSessions().lookup(
+      std::move(lookup_request),
+      cancelWrapped(
+          [this](ActiveLookupResultPtr lookup_result) { onLookupResult(std::move(lookup_result)); },
+          &cancel_in_flight_callback_));
+
+  // Stop the decoding stream.
+  return Http::FilterHeadersStatus::StopIteration;
+}
 
-  // Stop the decoding stream until the cache lookup result is ready.
-  return Http::FilterHeadersStatus::StopAllIterationAndWatermark;
+static absl::string_view responseCodeDetailsFromStatus(CacheEntryStatus status) {
+  switch (status) {
+  case CacheEntryStatus::Miss:
+  case CacheEntryStatus::FailedValidation:
+    return CacheResponseCodeDetails::CacheFilterInsert;
+  case CacheEntryStatus::Hit:
+  case CacheEntryStatus::FoundNotModified:
+  case CacheEntryStatus::Follower:
+  case CacheEntryStatus::Validated:
+  case CacheEntryStatus::ValidatedFree:
+  case CacheEntryStatus::UpstreamReset:
+    return CacheResponseCodeDetails::ResponseFromCacheFilter;
+  case CacheEntryStatus::Uncacheable:
+  case CacheEntryStatus::LookupError:
+    break;
+  }
+  return StreamInfo::ResponseCodeDetails::get().ViaUpstream;
 }
 
-void CacheFilter::onUpstreamRequestComplete() { upstream_request_ = nullptr; }
+void CacheFilter::onLookupResult(ActiveLookupResultPtr lookup_result) {
+  ASSERT(lookup_result != nullptr, "lookup result should always be non-null");
+  lookup_result_ = std::move(lookup_result);
+  if (!lookup_result_->http_source_) {
+    // Lookup failed, typically implying upstream request was reset.
+    decoder_callbacks_->streamInfo().setResponseCodeDetails(
+        CacheResponseCodeDetails::CacheFilterAbortedDuringLookup);
+    decoder_callbacks_->resetStream();
+    return;
+  }
 
-void CacheFilter::onUpstreamRequestReset() {
-  upstream_request_ = nullptr;
-  decoder_callbacks_->sendLocalReply(Http::Code::ServiceUnavailable, "", nullptr, absl::nullopt,
-                                     "cache_upstream_reset");
+  stats().incForStatus(lookup_result_->status_);
+  if (lookup_result_->status_ != CacheEntryStatus::Uncacheable) {
+    decoder_callbacks_->streamInfo().setResponseFlag(
+        StreamInfo::CoreResponseFlag::ResponseFromCacheFilter);
+  }
+
+  ENVOY_STREAM_LOG(debug, "CacheFilter calling getHeaders", *decoder_callbacks_);
+  lookup_result_->http_source_->getHeaders(cancelWrapped(
+      [this](Http::ResponseHeaderMapPtr response_headers, EndStream end_stream_enum) {
+        onHeaders(std::move(response_headers), end_stream_enum);
+      },
+      &cancel_in_flight_callback_));
 }
 
 Http::FilterHeadersStatus CacheFilter::encodeHeaders(Http::ResponseHeaderMap& headers, bool) {
-  if (filter_state_ == FilterState::ServingFromCache) {
-    // This call was invoked during decoding by decoder_callbacks_->encodeHeaders because a fresh
-    // cached response was found and is being added to the encoding stream -- ignore it.
-    return Http::FilterHeadersStatus::Continue;
-  }
-
-  // If lookup_ is null, the request wasn't cacheable, so the response isn't either.
-  if (!lookup_) {
+  if (lookup_result_) {
+    // This call was invoked during decoding by decoder_callbacks_->encodeHeaders with data
+    // either read from the upstream via the cache filter, or from the cache.
     return Http::FilterHeadersStatus::Continue;
   }
-
-  if (lookup_result_ == nullptr) {
-    // Filter chain iteration is paused while a lookup is outstanding, but the filter chain manager
-    // can still generate a local reply. One case where this can happen is when a downstream idle
-    // timeout fires, which may mean that the HttpCache isn't correctly setting deadlines on its
-    // asynchronous operations or is otherwise getting stuck.
-    ENVOY_BUG(Http::Utility::getResponseStatus(headers) !=
-                  Envoy::enumToInt(Http::Code::RequestTimeout),
-              "Request timed out while cache lookup was outstanding.");
-    // Cancel the lookup since it's now not useful.
-    lookup_->onDestroy();
-    lookup_ = nullptr;
+  if (!cancel_in_flight_callback_) {
+    // If there was no lookup result and there's no request in flight, this implies
+    // no request was sent, so we must be in a pass-through configuration (either no
+    // cache or the request had a body).
     return Http::FilterHeadersStatus::Continue;
   }
 
-  IS_ENVOY_BUG("encodeHeaders should not be called except under the conditions handled above");
+  // Filter chain iteration is paused while a lookup is outstanding, but the filter chain manager
+  // can still generate a local reply. One case where this can happen is when a downstream idle
+  // timeout fires, which may mean that the HttpCache isn't correctly setting deadlines on its
+  // asynchronous operations or is otherwise getting stuck.
+  ENVOY_BUG(Http::Utility::getResponseStatus(headers) !=
+                Envoy::enumToInt(Http::Code::RequestTimeout),
+            "Request timed out while cache lookup was outstanding.");
+  // Cancel the lookup since it's now not useful.
+  ASSERT(cancel_in_flight_callback_);
+  cancel_in_flight_callback_();
   return Http::FilterHeadersStatus::Continue;
 }
 
-/*static*/ LookupStatus
-CacheFilter::resolveLookupStatus(absl::optional<CacheEntryStatus> cache_entry_status,
-                                 FilterState filter_state) {
-  if (cache_entry_status.has_value()) {
-    switch (cache_entry_status.value()) {
-    case CacheEntryStatus::Ok:
-      return LookupStatus::CacheHit;
-    case CacheEntryStatus::Unusable:
-      return LookupStatus::CacheMiss;
-    case CacheEntryStatus::RequiresValidation: {
-      // The CacheFilter sent the response upstream for validation; check the
-      // filter state to see whether and how the upstream responded. The
-      // filter currently won't send the stale entry if it can't reach the
-      // upstream or if the upstream responds with a 5xx, so don't include
-      // special handling for those cases.
-      switch (filter_state) {
-      case FilterState::ValidatingCachedResponse:
-        return LookupStatus::RequestIncomplete;
-      case FilterState::ServingFromCache:
-        ABSL_FALLTHROUGH_INTENDED;
-      case FilterState::ResponseServedFromCache:
-        // Functionally a cache hit, this is differentiated for metrics reporting.
-        return LookupStatus::StaleHitWithSuccessfulValidation;
-      case FilterState::NotServingFromCache:
-        return LookupStatus::StaleHitWithFailedValidation;
-      case FilterState::Initial:
-        ABSL_FALLTHROUGH_INTENDED;
-      case FilterState::Destroyed:
-        IS_ENVOY_BUG(absl::StrCat("Unexpected filter state in requestCacheStatus: cache lookup "
-                                  "response required validation, but filter state is ",
-                                  filter_state));
-      }
-      return LookupStatus::Unknown;
-    }
-    case CacheEntryStatus::FoundNotModified:
-      // TODO(capoferro): Report this as a FoundNotModified when we handle
-      // those.
-      return LookupStatus::CacheHit;
-    case CacheEntryStatus::LookupError:
-      return LookupStatus::LookupError;
+void CacheFilter::getBody() {
+  ASSERT(lookup_result_, "CacheFilter is trying to call getBody with no LookupResult");
+  get_body_loop_ = GetBodyLoop::Again;
+  while (get_body_loop_ == GetBodyLoop::Again) {
+    ASSERT(!remaining_ranges_.empty(), "No reason to call getBody when there's no body to get.");
+
+    // We don't want to request more than a buffer-size at a time from the cache.
+    uint64_t fetch_size_limit = encoder_callbacks_->encoderBufferLimit();
+    // If there is no buffer size limit, we still want *some* constraint.
+    if (fetch_size_limit == 0) {
+      fetch_size_limit = MaxBytesToFetchFromCachePerRead;
     }
-    IS_ENVOY_BUG(absl::StrCat(
-        "Unhandled CacheEntryStatus encountered when retrieving request cache status: " +
-        std::to_string(static_cast<int>(filter_state))));
-    return LookupStatus::Unknown;
-  }
-  // Either decodeHeaders decided not to do a cache lookup (because the
-  // request isn't cacheable), or decodeHeaders hasn't been called yet.
-  switch (filter_state) {
-  case FilterState::Initial:
-    return LookupStatus::RequestIncomplete;
-  case FilterState::NotServingFromCache:
-    return LookupStatus::RequestNotCacheable;
-  // Ignore the following lines. This code should not be executed.
-  // GCOV_EXCL_START
-  case FilterState::ValidatingCachedResponse:
-    ABSL_FALLTHROUGH_INTENDED;
-  case FilterState::ServingFromCache:
-    ABSL_FALLTHROUGH_INTENDED;
-  case FilterState::ResponseServedFromCache:
-    ABSL_FALLTHROUGH_INTENDED;
-  case FilterState::Destroyed:
-    ENVOY_LOG(error, absl::StrCat("Unexpected filter state in requestCacheStatus: "
-                                  "lookup_result_ is empty but filter state is ",
-                                  filter_state));
-  }
-  return LookupStatus::Unknown;
+    AdjustedByteRange fetch_range = {remaining_ranges_[0].begin(),
+                                     (remaining_ranges_[0].length() > fetch_size_limit)
+                                         ? (remaining_ranges_[0].begin() + fetch_size_limit)
+                                         : remaining_ranges_[0].end()};
+
+    ENVOY_STREAM_LOG(debug, "CacheFilter calling getBody", *decoder_callbacks_);
+    get_body_loop_ = GetBodyLoop::InCallback;
+    lookup_result_->http_source_->getBody(
+        fetch_range, cancelWrapped(
+                         [this, &dispatcher = decoder_callbacks_->dispatcher()](
+                             Buffer::InstancePtr&& body, EndStream end_stream_enum) {
+                           if (onBody(std::move(body), end_stream_enum)) {
+                             if (get_body_loop_ == GetBodyLoop::InCallback) {
+                               // If the callback was called inline, loop it.
+                               get_body_loop_ = GetBodyLoop::Again;
+                             } else {
+                               // If the callback was posted we're not in the loop
+                               // any more, so getBody to enter the loop.
+                               getBody();
+                             }
+                           }
+                         },
+                         &cancel_in_flight_callback_));
+  }
+  get_body_loop_ = GetBodyLoop::Idle;
 }
 
-void CacheFilter::getHeaders(Http::RequestHeaderMap& request_headers) {
-  ASSERT(lookup_, "CacheFilter is trying to call getHeaders with no LookupContext");
-  callback_called_directly_ = true;
-  lookup_->getHeaders([this, &request_headers, &dispatcher = decoder_callbacks_->dispatcher()](
-                          LookupResult&& result, bool end_stream) {
-    ASSERT(!callback_called_directly_ && dispatcher.isThreadSafe(),
-           "caches must post the callback to the filter's dispatcher");
-    onHeaders(std::move(result), request_headers, end_stream);
-  });
-  callback_called_directly_ = false;
+void CacheFilter::getTrailers() {
+  ASSERT(lookup_result_, "CacheFilter is trying to call getTrailers with no LookupResult");
+
+  lookup_result_->http_source_->getTrailers(cancelWrapped(
+      [this, &dispatcher = decoder_callbacks_->dispatcher()](Http::ResponseTrailerMapPtr&& trailers,
+                                                             EndStream end_stream_enum) {
+        ASSERT(
+            dispatcher.isThreadSafe(),
+            "caches must ensure the callback is called from the original thread, either by posting "
+            "to dispatcher or by calling directly");
+        onTrailers(std::move(trailers), end_stream_enum);
+      },
+      &cancel_in_flight_callback_));
 }
 
-void CacheFilter::getBody() {
-  ASSERT(lookup_, "CacheFilter is trying to call getBody with no LookupContext");
-  ASSERT(!remaining_ranges_.empty(), "No reason to call getBody when there's no body to get.");
-
-  // We don't want to request more than a buffer-size at a time from the cache.
-  uint64_t fetch_size_limit = encoder_callbacks_->encoderBufferLimit();
-  // If there is no buffer size limit, we still want *some* constraint.
-  if (fetch_size_limit == 0) {
-    fetch_size_limit = MAX_BYTES_TO_FETCH_FROM_CACHE_PER_REQUEST;
-  }
-  AdjustedByteRange fetch_range = {remaining_ranges_[0].begin(),
-                                   (remaining_ranges_[0].length() > fetch_size_limit)
-                                       ? (remaining_ranges_[0].begin() + fetch_size_limit)
-                                       : remaining_ranges_[0].end()};
-
-  callback_called_directly_ = true;
-  lookup_->getBody(fetch_range, [this, &dispatcher = decoder_callbacks_->dispatcher()](
-                                    Buffer::InstancePtr&& body, bool end_stream) {
-    ASSERT(!callback_called_directly_ && dispatcher.isThreadSafe(),
-           "caches must post the callback to the filter's dispatcher");
-    onBody(std::move(body), end_stream);
-  });
-  callback_called_directly_ = false;
+static AdjustedByteRange rangeFromHeaders(Http::ResponseHeaderMap& response_headers) {
+  if (Http::Utility::getResponseStatus(response_headers) !=
+      static_cast<uint64_t>(Envoy::Http::Code::PartialContent)) {
+    // Don't use content-length; we can just request *all the body* from
+    // the source and it will tell us when it gets to the end.
+    return {0, std::numeric_limits<uint64_t>::max()};
+  }
+  Http::HeaderMap::GetResult content_range_result =
+      response_headers.get(Envoy::Http::Headers::get().ContentRange);
+  if (content_range_result.empty()) {
+    return {0, std::numeric_limits<uint64_t>::max()};
+  }
+  absl::string_view content_range = content_range_result[0]->value().getStringView();
+  if (!absl::ConsumePrefix(&content_range, "bytes ")) {
+    return {0, std::numeric_limits<uint64_t>::max()};
+  }
+  if (absl::ConsumePrefix(&content_range, "*/")) {
+    uint64_t len;
+    if (absl::SimpleAtoi(content_range, &len)) {
+      return {0, len};
+    }
+    return {0, std::numeric_limits<uint64_t>::max()};
+  }
+  std::pair<absl::string_view, absl::string_view> range_of = absl::StrSplit(content_range, '/');
+  std::pair<absl::string_view, absl::string_view> range = absl::StrSplit(range_of.first, '-');
+  uint64_t begin, end;
+  if (!absl::SimpleAtoi(range.first, &begin)) {
+    begin = 0;
+  }
+  if (!absl::SimpleAtoi(range.second, &end)) {
+    end = std::numeric_limits<uint64_t>::max();
+  } else {
+    end++;
+  }
+  return {begin, end};
 }
 
-void CacheFilter::getTrailers() {
-  ASSERT(lookup_, "CacheFilter is trying to call getTrailers with no LookupContext");
-
-  callback_called_directly_ = true;
-  lookup_->getTrailers([this, &dispatcher = decoder_callbacks_->dispatcher()](
-                           Http::ResponseTrailerMapPtr&& trailers) {
-    ASSERT(!callback_called_directly_ && dispatcher.isThreadSafe(),
-           "caches must post the callback to the filter's dispatcher");
-    onTrailers(std::move(trailers));
-  });
-  callback_called_directly_ = false;
-}
+void CacheFilter::onHeaders(Http::ResponseHeaderMapPtr response_headers,
+                            EndStream end_stream_enum) {
+  ASSERT(lookup_result_, "onHeaders should not be called with no LookupResult");
 
-void CacheFilter::onHeaders(LookupResult&& result, Http::RequestHeaderMap& request_headers,
-                            bool end_stream) {
-  if (filter_state_ == FilterState::Destroyed) {
-    // The filter is being destroyed, any callbacks should be ignored.
+  if (end_stream_enum == EndStream::Reset) {
+    decoder_callbacks_->streamInfo().setResponseCodeDetails(
+        CacheResponseCodeDetails::CacheFilterAbortedDuringHeaders);
+    decoder_callbacks_->resetStream();
     return;
   }
-  if (filter_state_ == FilterState::NotServingFromCache) {
-    // A response was injected into the filter chain before the cache lookup finished, e.g. because
-    // the request stream timed out.
-    return;
+  ASSERT(response_headers != nullptr);
+
+  if (lookup_result_->status_ == CacheEntryStatus::Miss ||
+      lookup_result_->status_ == CacheEntryStatus::Validated ||
+      lookup_result_->status_ == CacheEntryStatus::ValidatedFree) {
+    // CacheSessions adds an age header indiscriminately because once it has
+    // handed off it doesn't remember which request is associated with the insert.
+    // So here we remove that header for the non-cache response and the validated
+    // response.
+    response_headers->remove(Envoy::Http::CustomHeaders::get().Age);
   }
 
-  // TODO(yosrym93): Handle request only-if-cached directive
-  lookup_result_ = std::make_unique<LookupResult>(std::move(result));
-  cache_entry_status_ = lookup_result_->cache_entry_status_;
-  switch (cache_entry_status_.value()) {
-  case CacheEntryStatus::FoundNotModified:
-    PANIC("unsupported code");
-  case CacheEntryStatus::RequiresValidation:
-    // If a cache entry requires validation, inject validation headers in the
-    // request and let it pass through as if no cache entry was found. If the
-    // cache entry was valid, the response status should be 304 (unmodified)
-    // and the cache entry will be injected in the response body.
-    handleCacheHitWithValidation(request_headers);
-    return;
-  case CacheEntryStatus::Ok:
-    if (lookup_result_->range_details_.has_value()) {
-      handleCacheHitWithRangeRequest();
-      return;
-    }
-    handleCacheHit(/* end_stream_after_headers = */ end_stream);
-    return;
-  case CacheEntryStatus::Unusable:
-    sendUpstreamRequest(request_headers);
+  static const std::string partial_content = std::to_string(enumToInt(Http::Code::PartialContent));
+  if (response_headers->getStatusValue() == partial_content) {
+    is_partial_response_ = true;
+  }
+
+  bool end_stream = ((end_stream_enum == EndStream::End) || is_head_request_);
+
+  if (!end_stream) {
+    remaining_ranges_ = {rangeFromHeaders(*response_headers)};
+    ENVOY_STREAM_LOG(debug, "CacheFilter requesting range {}-{} {}", *decoder_callbacks_,
+                     remaining_ranges_[0].begin(), remaining_ranges_[0].end(), *response_headers);
+  }
+
+  decoder_callbacks_->encodeHeaders(std::move(response_headers), end_stream,
+                                    responseCodeDetailsFromStatus(lookup_result_->status_));
+  // onDestroy can potentially be called during encodeHeaders.
+  if (is_destroyed_) {
     return;
-  case CacheEntryStatus::LookupError:
-    filter_state_ = FilterState::NotServingFromCache;
-    insert_status_ = InsertStatus::NoInsertLookupError;
-    decoder_callbacks_->continueDecoding();
+  }
+  if (end_stream) {
     return;
   }
-  ENVOY_LOG(error, "Unhandled CacheEntryStatus in CacheFilter::onHeaders: {}",
-            cacheEntryStatusString(cache_entry_status_.value()));
-  // Treat unhandled status as a cache miss.
-  sendUpstreamRequest(request_headers);
+  return getBody();
 }
 
-// TODO(toddmgreer): Handle downstream backpressure.
-void CacheFilter::onBody(Buffer::InstancePtr&& body, bool end_stream) {
-  // Can be called during decoding if a valid cache hit is found,
-  // or during encoding if a cache entry was being validated.
-  if (filter_state_ == FilterState::Destroyed) {
-    // The filter is being destroyed, any callbacks should be ignored.
-    return;
-  }
+bool CacheFilter::onBody(Buffer::InstancePtr&& body, EndStream end_stream_enum) {
   ASSERT(!remaining_ranges_.empty(),
          "CacheFilter doesn't call getBody unless there's more body to get, so this is a "
          "bogus callback.");
-  if (remaining_ranges_[0].end() == std::numeric_limits<uint64_t>::max() && body == nullptr) {
-    ASSERT(!end_stream);
-    getTrailers();
-    return;
+  if (end_stream_enum == EndStream::Reset) {
+    decoder_callbacks_->streamInfo().setResponseCodeDetails(
+        CacheResponseCodeDetails::CacheFilterAbortedDuringBody);
+    decoder_callbacks_->resetStream();
+    return false;
+  }
+  bool end_stream = end_stream_enum == EndStream::End;
+
+  if (body == nullptr) {
+    // if we called getBody and got a nullptr that implies there was less body
+    // than expected, or we didn't have complete expectations.
+    // It should not be treated as a bug here to have incorrect expectations,
+    // as an untrusted upstream could send mismatched content-length and
+    // body-stream.
+    // If there is no body but there are trailers, this is how we know to
+    // move on to trailers.
+    if (end_stream) {
+      Buffer::OwnedImpl empty_buffer;
+      decoder_callbacks_->encodeData(empty_buffer, true);
+      finalizeEncodingCachedResponse();
+      return false;
+    } else {
+      getTrailers();
+      return false;
+    }
   }
-  ASSERT(body, "Cache said it had a body, but isn't giving it to us.");
 
   const uint64_t bytes_from_cache = body->length();
   if (bytes_from_cache < remaining_ranges_[0].length()) {
@@ -365,194 +409,79 @@ void CacheFilter::onBody(Buffer::InstancePtr&& body, bool end_stream) {
   } else if (bytes_from_cache == remaining_ranges_[0].length()) {
     remaining_ranges_.erase(remaining_ranges_.begin());
   } else {
-    ASSERT(false, "Received oversized body from cache.");
     decoder_callbacks_->resetStream();
-    return;
+    IS_ENVOY_BUG("Received oversized body from http source.");
+    return false;
+  }
+
+  // For a range request the upstream may not have thought it was end_stream
+  // but it still could be for the downstream.
+  if (is_partial_response_ && remaining_ranges_.empty()) {
+    end_stream = true;
   }
 
   decoder_callbacks_->encodeData(*body, end_stream);
+  // Filter can potentially be destroyed during encodeData (e.g. if
+  // encodeData provokes a reset)
+  if (is_destroyed_) {
+    return false;
+  }
 
   if (end_stream) {
     finalizeEncodingCachedResponse();
+    return false;
   } else if (!remaining_ranges_.empty()) {
-    getBody();
-  } else if (lookup_result_->range_details_.has_value()) {
+    if (downstream_watermarked_) {
+      get_body_on_unblocked_ = true;
+      return false;
+    } else {
+      return true;
+    }
+  } else if (is_partial_response_) {
     // If a range was requested we don't send trailers.
     // (It is unclear from the spec whether we should, but pragmatically we
     // don't have any indication of whether trailers are present or not, and
     // range requests in general are for filling in missing chunks so including
     // trailers with every chunk would be wasteful.)
     finalizeEncodingCachedResponse();
+    return false;
   } else {
     getTrailers();
+    return false;
   }
 }
 
-void CacheFilter::onTrailers(Http::ResponseTrailerMapPtr&& trailers) {
-  // Can be called during decoding if a valid cache hit is found,
-  // or during encoding if a cache entry was being validated.
-  if (filter_state_ == FilterState::Destroyed) {
-    // The filter is being destroyed, any callbacks should be ignored.
-    return;
-  }
-  decoder_callbacks_->encodeTrailers(std::move(trailers));
-  // Filter can potentially be destroyed during encodeTrailers.
-  if (filter_state_ == FilterState::Destroyed) {
-    return;
-  }
-  finalizeEncodingCachedResponse();
-}
-
-void CacheFilter::handleCacheHit(bool end_stream_after_headers) {
-  filter_state_ = FilterState::ServingFromCache;
-  insert_status_ = InsertStatus::NoInsertCacheHit;
-  encodeCachedResponse(end_stream_after_headers);
-}
+void CacheFilter::onAboveWriteBufferHighWatermark() { downstream_watermarked_++; }
 
-void CacheFilter::handleCacheHitWithRangeRequest() {
-  if (!lookup_result_->range_details_.has_value()) {
-    ENVOY_LOG(error, "handleCacheHitWithRangeRequest() should not be called without "
-                     "range_details_ being populated in lookup_result_");
-    return;
-  }
-  if (!lookup_result_->range_details_->satisfiable_) {
-    filter_state_ = FilterState::ServingFromCache;
-    insert_status_ = InsertStatus::NoInsertCacheHit;
-    lookup_result_->headers_->setStatus(
-        static_cast<uint64_t>(Envoy::Http::Code::RangeNotSatisfiable));
-    if (lookup_result_->content_length_.has_value()) {
-      lookup_result_->headers_->addCopy(
-          Envoy::Http::Headers::get().ContentRange,
-          absl::StrCat("bytes */", lookup_result_->content_length_.value()));
-    } else {
-      IS_ENVOY_BUG(
-          "handleCacheHitWithRangeRequest() should not be called with satisfiable_=false "
-          "without content_length_ being populated in lookup_result_. Cache implementation "
-          "should wait to respond to getHeaders in this case until content_length_ is known, "
-          "declaring a miss, or should strip range_details_ from the lookup result.");
-    }
-    // We shouldn't serve any of the body, so the response content length
-    // is 0.
-    lookup_result_->setContentLength(0);
-    encodeCachedResponse(/* end_stream_after_headers = */ true);
-    return;
-  }
-
-  std::vector<AdjustedByteRange> ranges = lookup_result_->range_details_->ranges_;
-  if (ranges.size() != 1) {
-    // Multi-part responses are not supported, and they will be treated as
-    // a usual 200 response. A possible way to achieve that would be to move
-    // all ranges to remaining_ranges_, and add logic inside '::onBody' to
-    // interleave the body bytes with sub-headers and separator string for
-    // each part. Would need to keep track if the current range is over or
-    // not to know when to insert the separator, and calculate the length
-    // based on length of ranges + extra headers and separators.
-    handleCacheHit(/* end_stream_after_headers = */ false);
-    return;
-  }
-
-  filter_state_ = FilterState::ServingFromCache;
-  insert_status_ = InsertStatus::NoInsertCacheHit;
-
-  lookup_result_->headers_->setStatus(static_cast<uint64_t>(Envoy::Http::Code::PartialContent));
-  lookup_result_->headers_->addCopy(
-      Envoy::Http::Headers::get().ContentRange,
-      absl::StrCat("bytes ", ranges[0].begin(), "-", ranges[0].end() - 1, "/",
-                   lookup_result_->content_length_.has_value()
-                       ? absl::StrCat(lookup_result_->content_length_.value())
-                       : "*"));
-  // We serve only the desired range, so adjust the length
-  // accordingly.
-  lookup_result_->setContentLength(ranges[0].length());
-  remaining_ranges_ = std::move(ranges);
-  encodeCachedResponse(/* end_stream_after_headers = */ false);
-}
-
-void CacheFilter::handleCacheHitWithValidation(Envoy::Http::RequestHeaderMap& request_headers) {
-  filter_state_ = FilterState::ValidatingCachedResponse;
-  injectValidationHeaders(request_headers);
-  sendUpstreamRequest(request_headers);
-}
-
-void CacheFilter::injectValidationHeaders(Http::RequestHeaderMap& request_headers) {
-  ASSERT(lookup_result_, "injectValidationHeaders precondition unsatisfied: lookup_result_ "
-                         "does not point to a cache lookup result");
-  ASSERT(filter_state_ == FilterState::ValidatingCachedResponse,
-         "injectValidationHeaders precondition unsatisfied: the "
-         "CacheFilter is not validating a cache lookup result");
-
-  const Http::HeaderEntry* etag_header =
-      lookup_result_->headers_->getInline(CacheCustomHeaders::etag());
-  const Http::HeaderEntry* last_modified_header =
-      lookup_result_->headers_->getInline(CacheCustomHeaders::lastModified());
-
-  if (etag_header) {
-    absl::string_view etag = etag_header->value().getStringView();
-    request_headers.setInline(CacheCustomHeaders::ifNoneMatch(), etag);
-  }
-  if (DateUtil::timePointValid(CacheHeadersUtils::httpTime(last_modified_header))) {
-    // Valid Last-Modified header exists.
-    absl::string_view last_modified = last_modified_header->value().getStringView();
-    request_headers.setInline(CacheCustomHeaders::ifModifiedSince(), last_modified);
+void CacheFilter::onBelowWriteBufferLowWatermark() {
+  if (downstream_watermarked_ == 0) {
+    IS_ENVOY_BUG("low watermark not preceded by high watermark should not happen");
   } else {
-    // Either Last-Modified is missing or invalid, fallback to Date.
-    // A correct behaviour according to:
-    // https://httpwg.org/specs/rfc7232.html#header.if-modified-since
-    absl::string_view date = lookup_result_->headers_->getDateValue();
-    request_headers.setInline(CacheCustomHeaders::ifModifiedSince(), date);
+    downstream_watermarked_--;
+  }
+  if (downstream_watermarked_ == 0 && get_body_on_unblocked_) {
+    get_body_on_unblocked_ = false;
+    getBody();
   }
 }
 
-void CacheFilter::encodeCachedResponse(bool end_stream_after_headers) {
-  ASSERT(lookup_result_, "encodeCachedResponse precondition unsatisfied: lookup_result_ "
-                         "does not point to a cache lookup result");
-
-  // Set appropriate response flags and codes.
-  decoder_callbacks_->streamInfo().setResponseFlag(
-      StreamInfo::CoreResponseFlag::ResponseFromCacheFilter);
-  decoder_callbacks_->streamInfo().setResponseCodeDetails(
-      CacheResponseCodeDetails::get().ResponseFromCacheFilter);
-
-  decoder_callbacks_->encodeHeaders(std::move(lookup_result_->headers_),
-                                    is_head_request_ || end_stream_after_headers,
-                                    CacheResponseCodeDetails::get().ResponseFromCacheFilter);
-  // Filter can potentially be destroyed during encodeHeaders.
-  if (filter_state_ == FilterState::Destroyed) {
+void CacheFilter::onTrailers(Http::ResponseTrailerMapPtr&& trailers, EndStream end_stream_enum) {
+  ASSERT(!is_destroyed_, "callback should be cancelled when filter is destroyed");
+  if (end_stream_enum == EndStream::Reset) {
+    decoder_callbacks_->streamInfo().setResponseCodeDetails(
+        CacheResponseCodeDetails::CacheFilterAbortedDuringTrailers);
+    decoder_callbacks_->resetStream();
     return;
   }
-  if (is_head_request_ || end_stream_after_headers) {
-    filter_state_ = FilterState::ResponseServedFromCache;
+  decoder_callbacks_->encodeTrailers(std::move(trailers));
+  // Filter can potentially be destroyed during encodeTrailers.
+  if (is_destroyed_) {
     return;
   }
-  if (remaining_ranges_.empty() && lookup_result_->content_length_.value_or(1) > 0) {
-    // No range has been added, so we add entire body to the response.
-    remaining_ranges_.emplace_back(
-        0, lookup_result_->content_length_.value_or(std::numeric_limits<uint64_t>::max()));
-  }
-  if (!remaining_ranges_.empty()) {
-    getBody();
-  } else {
-    getTrailers();
-  }
-}
-
-void CacheFilter::finalizeEncodingCachedResponse() {
-  filter_state_ = FilterState::ResponseServedFromCache;
-}
-
-LookupStatus CacheFilter::lookupStatus() const {
-  if (lookup_result_ == nullptr && lookup_ != nullptr) {
-    return LookupStatus::RequestIncomplete;
-  }
-
-  return resolveLookupStatus(cache_entry_status_, filter_state_);
+  finalizeEncodingCachedResponse();
 }
 
-InsertStatus CacheFilter::insertStatus() const {
-  return insert_status_.value_or((upstream_request_ == nullptr)
-                                     ? InsertStatus::NoInsertRequestIncomplete
-                                     : InsertStatus::FilterAbortedBeforeInsertComplete);
-}
+void CacheFilter::finalizeEncodingCachedResponse() {}
 
 } // namespace Cache
 } // namespace HttpFilters
diff --git a/source/extensions/filters/http/cache/cache_filter.h b/source/extensions/filters/http/cache/cache_filter.h
index 3669bbf824c30..2be8ec73f512c 100644
--- a/source/extensions/filters/http/cache/cache_filter.h
+++ b/source/extensions/filters/http/cache/cache_filter.h
@@ -5,11 +5,11 @@
 
 #include "envoy/extensions/filters/http/cache/v3/cache.pb.h"
 
+#include "source/common/common/cancel_wrapper.h"
 #include "source/common/common/logger.h"
-#include "source/extensions/filters/http/cache/cache_filter_logging_info.h"
 #include "source/extensions/filters/http/cache/cache_headers_utils.h"
-#include "source/extensions/filters/http/cache/filter_state.h"
-#include "source/extensions/filters/http/cache/http_cache.h"
+#include "source/extensions/filters/http/cache/cache_sessions.h"
+#include "source/extensions/filters/http/cache/stats.h"
 #include "source/extensions/filters/http/common/pass_through_filter.h"
 
 namespace Envoy {
@@ -17,19 +17,26 @@ namespace Extensions {
 namespace HttpFilters {
 namespace Cache {
 
-class UpstreamRequest;
-
-class CacheFilterConfig {
+// CacheFilterConfig contains everything which is shared by all CacheFilter
+// objects created from a given CacheConfig.
+class CacheFilterConfig : public CacheableResponseChecker, public CacheFilterStatsProvider {
 public:
   CacheFilterConfig(const envoy::extensions::filters::http::cache::v3::CacheConfig& config,
+                    std::shared_ptr<CacheSessions> cache_sessions,
                     Server::Configuration::CommonFactoryContext& context);
 
+  // Implements CacheableResponseChecker::isCacheableResponse.
+  bool isCacheableResponse(const Http::ResponseHeaderMap& headers) const override;
   // The allow list rules that decide if a header can be varied upon.
   const VaryAllowList& varyAllowList() const { return vary_allow_list_; }
   TimeSource& timeSource() const { return time_source_; }
   const Http::AsyncClient::StreamOptions& upstreamOptions() const { return upstream_options_; }
   Upstream::ClusterManager& clusterManager() const { return cluster_manager_; }
+  const std::string& overrideUpstreamCluster() const { return override_upstream_cluster_; }
   bool ignoreRequestCacheControlHeader() const { return ignore_request_cache_control_header_; }
+  CacheSessions& cacheSessions() const { return *cache_sessions_; }
+  bool hasCache() const { return cache_sessions_ != nullptr; }
+  CacheFilterStats& stats() const override { return cache_sessions_->stats(); }
 
 private:
   const VaryAllowList vary_allow_list_;
@@ -37,111 +44,65 @@ class CacheFilterConfig {
   const bool ignore_request_cache_control_header_;
   Upstream::ClusterManager& cluster_manager_;
   Http::AsyncClient::StreamOptions upstream_options_;
+  std::shared_ptr<CacheSessions> cache_sessions_;
+  CacheFilterStatsPtr stats_;
+  std::string override_upstream_cluster_;
 };
 
 /**
  * A filter that caches responses and attempts to satisfy requests from cache.
  */
 class CacheFilter : public Http::PassThroughFilter,
-                    public Logger::Loggable<Logger::Id::cache_filter>,
-                    public std::enable_shared_from_this<CacheFilter> {
+                    public Http::DownstreamWatermarkCallbacks,
+                    public Logger::Loggable<Logger::Id::cache_filter> {
 public:
-  CacheFilter(std::shared_ptr<const CacheFilterConfig> config,
-              std::shared_ptr<HttpCache> http_cache);
+  CacheFilter(std::shared_ptr<const CacheFilterConfig> config);
   // Http::StreamFilterBase
   void onDestroy() override;
-  void onStreamComplete() override;
   // Http::StreamDecoderFilter
+  void setDecoderFilterCallbacks(Http::StreamDecoderFilterCallbacks& callbacks) override;
   Http::FilterHeadersStatus decodeHeaders(Http::RequestHeaderMap& headers,
                                           bool end_stream) override;
   // Http::StreamEncoderFilter
   Http::FilterHeadersStatus encodeHeaders(Http::ResponseHeaderMap& headers,
                                           bool end_stream) override;
 
-  static LookupStatus resolveLookupStatus(absl::optional<CacheEntryStatus> cache_entry_status,
-                                          FilterState filter_state);
+  // Http::DownstreamWatermarkCallbacks
+  void onAboveWriteBufferHighWatermark() override;
+  void onBelowWriteBufferLowWatermark() override;
 
 private:
-  // For a cache miss that may be cacheable, the upstream request is sent outside of the usual
-  // filter chain so that the request can continue even if the downstream client disconnects.
-  void sendUpstreamRequest(Http::RequestHeaderMap& request_headers);
-
-  // In the event that there is no matching route when attempting to sendUpstreamRequest,
-  // send a 404 locally.
+  using CancelFunction = CancelWrapper::CancelFunction;
+  // Gets the cluster name for the current route, if there is one.
+  absl::optional<absl::string_view> clusterName();
+  // Gets an AsyncClient for the given cluster, or nullopt if there is no upstream.
+  OptRef<Http::AsyncClient> asyncClient(absl::string_view cluster_name);
+
+  // In the event that there is no matching route when attempting to fetch asyncClient,
+  // send a 404 local response.
   void sendNoRouteResponse();
 
-  // In the event that there is no available cluster when attempting to sendUpstreamRequest,
-  // send a 503 locally.
+  // In the event that there is no available cluster when attempting to fetch asyncClient,
+  // send a 503 local response.
   void sendNoClusterResponse(absl::string_view cluster_name);
 
-  // Called by UpstreamRequest if it is reset before CacheFilter is destroyed.
-  // CacheFilter must make no more calls to upstream_request_ once this has been called.
-  void onUpstreamRequestReset();
-
-  // Called by UpstreamRequest if it finishes without reset before CacheFilter is destroyed.
-  // CacheFilter must make no more calls to upstream_request_ once this has been called.
-  void onUpstreamRequestComplete();
-
   // Utility functions; make any necessary checks and call the corresponding lookup_ functions
   void getHeaders(Http::RequestHeaderMap& request_headers);
   void getBody();
   void getTrailers();
 
-  // Callbacks for HttpCache to call when headers/body/trailers are ready.
-  void onHeaders(LookupResult&& result, Http::RequestHeaderMap& request_headers, bool end_stream);
-  void onBody(Buffer::InstancePtr&& body, bool end_stream);
-  void onTrailers(Http::ResponseTrailerMapPtr&& trailers);
-
-  // Set required state in the CacheFilter for handling a cache hit.
-  void handleCacheHit(bool end_stream_after_headers);
-
-  // Set up the required state in the CacheFilter for handling a range
-  // request.
-  void handleCacheHitWithRangeRequest();
-
-  // Set required state in the CacheFilter for handling a cache hit when
-  // validation is required.
-  void handleCacheHitWithValidation(Envoy::Http::RequestHeaderMap& request_headers);
-
-  // Precondition: lookup_result_ points to a cache lookup result that requires validation.
-  // Should only be called during onHeaders as it modifies RequestHeaderMap.
-  // Adds required conditional headers for cache validation to the request headers
-  // according to the present cache lookup result headers.
-  void injectValidationHeaders(Http::RequestHeaderMap& request_headers);
-
-  // Precondition: lookup_result_ points to a fresh or validated cache look up result.
-  // Adds a cache lookup result to the response encoding stream.
-  // Can be called during decoding if a valid cache hit is found,
-  // or during encoding if a cache entry was validated successfully.
-  //
-  // When validating, headers should be set to the merged values from the validation
-  // response and the lookup_result_; if unset, the headers from the lookup_result_ are used.
-  void encodeCachedResponse(bool end_stream_after_headers);
-
-  // Precondition: finished adding a response from cache to the response encoding stream.
-  // Updates filter_state_ and continues the encoding stream if necessary.
+  void onLookupResult(ActiveLookupResultPtr lookup_result);
+  void onHeaders(Http::ResponseHeaderMapPtr headers, EndStream end_stream);
+  // Returns true if getBody should be called again.
+  bool onBody(Buffer::InstancePtr&& body, EndStream end_stream);
+  void onTrailers(Http::ResponseTrailerMapPtr&& trailers, EndStream end_stream);
+  CacheFilterStats& stats() const { return config_->stats(); }
+
   void finalizeEncodingCachedResponse();
 
-  // The result of this request's cache lookup.
-  LookupStatus lookupStatus() const;
-
-  // The final status of the insert operation or header update, or decision not
-  // to insert or update. If the request or insert is ongoing, assumes it's
-  // being cancelled.
-  InsertStatus insertStatus() const;
-
-  // upstream_request_ belongs to the object itself, so that it can be disconnected
-  // from the filter and still complete the cache-write in the event that the
-  // downstream disconnects. The filter and the UpstreamRequest must communicate to
-  // each other their separate destruction-triggers.
-  // When CacheFilter is destroyed first it should call
-  // upstream_request_->disconnectFilter()
-  // and if upstream_request_ is destroyed first, it will call onUpstreamRequestReset.
-  UpstreamRequest* upstream_request_ = nullptr;
   std::shared_ptr<HttpCache> cache_;
-  LookupContextPtr lookup_;
-  LookupResultPtr lookup_result_;
-  absl::optional<CacheEntryStatus> cache_entry_status_;
+  ActiveLookupResultPtr lookup_result_;
+  bool is_partial_response_ = false;
 
   // Tracks what body bytes still need to be read from the cache. This is
   // currently only one Range, but will expand when full range support is added. Initialized by
@@ -154,16 +115,19 @@ class CacheFilter : public Http::PassThroughFilter,
   // https://httpwg.org/specs/rfc7234.html#response.cacheability
   bool request_allows_inserts_ = false;
 
-  FilterState filter_state_ = FilterState::Initial;
+  bool is_destroyed_ = false;
 
   bool is_head_request_ = false;
-  // This toggle is used to detect callbacks being called directly and not posted.
-  bool callback_called_directly_ = false;
-  // The status of the insert operation or header update, or decision not to insert or update.
-  // If it's too early to determine the final status, this is empty.
-  absl::optional<InsertStatus> insert_status_;
-
-  friend class UpstreamRequest;
+  // If this is populated it should be called from onDestroy.
+  CancelFunction cancel_in_flight_callback_;
+
+  int downstream_watermarked_ = 0;
+  // To avoid a potential recursion stack-overflow, the onBody function
+  // does not call getBody again directly but instead returns true if
+  // we *should* call getBody again, allowing it to be a loop rather
+  // than recursion.
+  enum class GetBodyLoop { InCallback, Again, Idle } get_body_loop_;
+  bool get_body_on_unblocked_ = false;
 };
 
 using CacheFilterSharedPtr = std::shared_ptr<CacheFilter>;
diff --git a/source/extensions/filters/http/cache/cache_filter_logging_info.cc b/source/extensions/filters/http/cache/cache_filter_logging_info.cc
deleted file mode 100644
index 94a5b94eb279f..0000000000000
--- a/source/extensions/filters/http/cache/cache_filter_logging_info.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-#include "source/extensions/filters/http/cache/cache_filter_logging_info.h"
-
-#include "absl/strings/str_format.h"
-
-namespace Envoy {
-namespace Extensions {
-namespace HttpFilters {
-namespace Cache {
-
-absl::string_view lookupStatusToString(LookupStatus status) {
-  switch (status) {
-  case LookupStatus::Unknown:
-    return "Unknown";
-  case LookupStatus::CacheHit:
-    return "CacheHit";
-  case LookupStatus::CacheMiss:
-    return "CacheMiss";
-  case LookupStatus::StaleHitWithSuccessfulValidation:
-    return "StaleHitWithSuccessfulValidation";
-  case LookupStatus::StaleHitWithFailedValidation:
-    return "StaleHitWithFailedValidation";
-  case LookupStatus::NotModifiedHit:
-    return "NotModifiedHit";
-  case LookupStatus::RequestNotCacheable:
-    return "RequestNotCacheable";
-  case LookupStatus::RequestIncomplete:
-    return "RequestIncomplete";
-  case LookupStatus::LookupError:
-    return "LookupError";
-  }
-  IS_ENVOY_BUG(absl::StrCat("Unexpected LookupStatus: ", status));
-  return "UnexpectedLookupStatus";
-}
-
-std::ostream& operator<<(std::ostream& os, const LookupStatus& request_cache_status) {
-  return os << lookupStatusToString(request_cache_status);
-}
-
-absl::string_view insertStatusToString(InsertStatus status) {
-  switch (status) {
-  case InsertStatus::InsertSucceeded:
-    return "InsertSucceeded";
-  case InsertStatus::InsertAbortedByCache:
-    return "InsertAbortedByCache";
-  case InsertStatus::InsertAbortedCacheCongested:
-    return "InsertAbortedCacheCongested";
-  case InsertStatus::FilterAbortedBeforeInsertComplete:
-    return "FilterAbortedBeforeInsertComplete";
-  case InsertStatus::HeaderUpdate:
-    return "HeaderUpdate";
-  case InsertStatus::NoInsertCacheHit:
-    return "NoInsertCacheHit";
-  case InsertStatus::NoInsertRequestNotCacheable:
-    return "NoInsertRequestNotCacheable";
-  case InsertStatus::NoInsertResponseNotCacheable:
-    return "NoInsertResponseNotCacheable";
-  case InsertStatus::NoInsertRequestIncomplete:
-    return "NoInsertRequestIncomplete";
-  case InsertStatus::NoInsertResponseValidatorsMismatch:
-    return "NoInsertResponseValidatorsMismatch";
-  case InsertStatus::NoInsertResponseVaryMismatch:
-    return "NoInsertResponseVaryMismatch";
-  case InsertStatus::NoInsertResponseVaryDisallowed:
-    return "NoInsertResponseVaryDisallowed";
-  case InsertStatus::NoInsertLookupError:
-    return "NoInsertLookupError";
-  }
-  IS_ENVOY_BUG(absl::StrCat("Unexpected InsertStatus: ", status));
-  return "UnexpectedInsertStatus";
-}
-
-} // namespace Cache
-} // namespace HttpFilters
-} // namespace Extensions
-} // namespace Envoy
diff --git a/source/extensions/filters/http/cache/cache_filter_logging_info.h b/source/extensions/filters/http/cache/cache_filter_logging_info.h
deleted file mode 100644
index 296fcf5d45f4f..0000000000000
--- a/source/extensions/filters/http/cache/cache_filter_logging_info.h
+++ /dev/null
@@ -1,112 +0,0 @@
-#pragma once
-
-#include "envoy/stream_info/filter_state.h"
-
-#include "absl/strings/str_format.h"
-
-namespace Envoy {
-namespace Extensions {
-namespace HttpFilters {
-namespace Cache {
-
-enum class LookupStatus {
-  // The CacheFilter couldn't determine the status of the request, probably
-  // because of an internal error.
-  Unknown,
-  // The CacheFilter found a response in cache to serve.
-  CacheHit,
-  // The CacheFilter didn't find a response in cache.
-  CacheMiss,
-  // The CacheFilter found a stale response, and sent a validation request to
-  // the upstream; the upstream responded with a 304 Not Modified. This is
-  // functionally a cache hit. It is differentiated for metrics reporting.
-  StaleHitWithSuccessfulValidation,
-  // The CacheFilter found a stale response, and sent a validation request to
-  // the upstream; the upstream responded with anything other than a 304 Not
-  // Modified. The CacheFilter forwards 5xx responses from the
-  // upstream in this case, instead of sending the stale cache entry.
-  StaleHitWithFailedValidation,
-  // The CacheFilter found a response in cache and served a 304 Not Modified.
-  NotModifiedHit,
-  // The request wasn't cacheable, and the CacheFilter didn't try to look it up
-  // in cache.
-  RequestNotCacheable,
-  // The request was cancelled before the CacheFilter could determine a cache
-  // status.
-  RequestIncomplete,
-  // The CacheFilter couldn't determine whether there was a response in cache,
-  // e.g. because the cache was unreachable or the lookup RPC timed out.
-  LookupError,
-};
-
-absl::string_view lookupStatusToString(LookupStatus status);
-
-std::ostream& operator<<(std::ostream& os, const LookupStatus& request_cache_status);
-
-enum class InsertStatus {
-  // The CacheFilter attempted to insert a cache entry, and succeeded as far as
-  // it knows. The filter doesn't wait for a final confirmation from the cache,
-  // so the filter may still show this status for an insert that failed at e.g.
-  // the last body chunk.
-  InsertSucceeded,
-  // The CacheFilter started an insert, but the HttpCache aborted it.
-  InsertAbortedByCache,
-  // The CacheFilter started an insert, but aborted it because the cache wasn't
-  // ready as a body chunk came in.
-  InsertAbortedCacheCongested,
-  // The CacheFilter started an insert, but the filter was reset before the insert
-  // completed. The insert may or may not have gone on to completion independently.
-  FilterAbortedBeforeInsertComplete,
-  // The CacheFilter attempted to update the headers of an existing cache entry.
-  // This doesn't indicate  whether or not the update succeeded.
-  HeaderUpdate,
-  // The CacheFilter found a cache entry and didn't attempt to insert or update its
-  // headers.
-  NoInsertCacheHit,
-  // The CacheFilter got an uncacheable request and didn't try to cache the
-  // response.
-  NoInsertRequestNotCacheable,
-  // The CacheFilter got an uncacheable response and didn't cache it.
-  NoInsertResponseNotCacheable,
-  // The request was cancelled before the CacheFilter decided whether or not to
-  // insert the response.
-  NoInsertRequestIncomplete,
-  // The CacheFilter got a 304 validation response not matching the etag strong
-  // validator of our cached entry. The cached entry should be replaced or removed.
-  NoInsertResponseValidatorsMismatch,
-  // The CacheFilter got a 304 validation response not matching the vary header
-  // fields. The cached variant set needs to be removed.
-  NoInsertResponseVaryMismatch,
-  // The CacheFilter got a 304 validation response, but the vary header was disallowed by the vary
-  // allow list
-  NoInsertResponseVaryDisallowed,
-  // The CacheFilter couldn't determine whether the request was in cache and
-  // didn't try to insert it.
-  NoInsertLookupError,
-};
-
-absl::string_view insertStatusToString(InsertStatus status);
-
-// Cache-related information about a request, to be used for logging and stats.
-class CacheFilterLoggingInfo : public Envoy::StreamInfo::FilterState::Object {
-public:
-  // FilterStateKey is used to store the FilterState::Object in the FilterState.
-  static constexpr absl::string_view FilterStateKey =
-      "io.envoyproxy.extensions.filters.http.cache.CacheFilterLoggingInfo";
-
-  CacheFilterLoggingInfo(LookupStatus cache_lookup_status, InsertStatus cache_insert_status)
-      : cache_lookup_status_(cache_lookup_status), cache_insert_status_(cache_insert_status) {}
-
-  LookupStatus lookupStatus() const { return cache_lookup_status_; }
-
-  InsertStatus insertStatus() const { return cache_insert_status_; }
-
-private:
-  const LookupStatus cache_lookup_status_;
-  const InsertStatus cache_insert_status_;
-};
-
-} // namespace Cache
-} // namespace HttpFilters
-} // namespace Extensions
-} // namespace Envoy
diff --git a/source/extensions/filters/http/cache/cache_headers_utils.cc b/source/extensions/filters/http/cache/cache_headers_utils.cc
index 06fd13bf898d2..d3bf53fc62d50 100644
--- a/source/extensions/filters/http/cache/cache_headers_utils.cc
+++ b/source/extensions/filters/http/cache/cache_headers_utils.cc
@@ -7,8 +7,10 @@
 
 #include "envoy/http/header_map.h"
 
+#include "source/common/common/enum_to_int.h"
 #include "source/common/http/header_map_impl.h"
 #include "source/common/http/header_utility.h"
+#include "source/common/http/utility.h"
 #include "source/extensions/filters/http/cache/cache_custom_headers.h"
 
 #include "absl/algorithm/container.h"
@@ -232,6 +234,67 @@ Seconds CacheHeadersUtils::calculateAge(const Http::ResponseHeaderMap& response_
   return std::chrono::duration_cast<Seconds>(current_age);
 }
 
+void CacheHeadersUtils::injectValidationHeaders(
+    Http::RequestHeaderMap& request_headers, const Http::ResponseHeaderMap& old_response_headers) {
+  const Http::HeaderEntry* etag_header = old_response_headers.getInline(CacheCustomHeaders::etag());
+  const Http::HeaderEntry* last_modified_header =
+      old_response_headers.getInline(CacheCustomHeaders::lastModified());
+
+  if (etag_header) {
+    absl::string_view etag = etag_header->value().getStringView();
+    request_headers.setInline(CacheCustomHeaders::ifNoneMatch(), etag);
+  }
+  if (DateUtil::timePointValid(CacheHeadersUtils::httpTime(last_modified_header))) {
+    // Valid Last-Modified header exists.
+    absl::string_view last_modified = last_modified_header->value().getStringView();
+    request_headers.setInline(CacheCustomHeaders::ifModifiedSince(), last_modified);
+  } else {
+    // Either Last-Modified is missing or invalid, fallback to Date.
+    // A correct behaviour according to:
+    // https://httpwg.org/specs/rfc7232.html#header.if-modified-since
+    absl::string_view date = old_response_headers.getDateValue();
+    request_headers.setInline(CacheCustomHeaders::ifModifiedSince(), date);
+  }
+}
+
+// TODO(yosrym93): Write a test that exercises this when SimpleHttpCache implements updateHeaders
+bool CacheHeadersUtils::shouldUpdateCachedEntry(const Http::ResponseHeaderMap& new_headers,
+                                                const Http::ResponseHeaderMap& old_headers) {
+  ASSERT(Http::Utility::getResponseStatus(new_headers) == enumToInt(Http::Code::NotModified),
+         "shouldUpdateCachedEntry must only be called with 304 responses");
+
+  // According to: https://httpwg.org/specs/rfc7234.html#freshening.responses,
+  // and assuming a single cached response per key:
+  // If the 304 response contains a strong validator (etag) that does not match the cached response,
+  // the cached response should not be updated.
+  const Http::HeaderEntry* response_etag = new_headers.getInline(CacheCustomHeaders::etag());
+  const Http::HeaderEntry* cached_etag = old_headers.getInline(CacheCustomHeaders::etag());
+  return !response_etag || (cached_etag && cached_etag->value().getStringView() ==
+                                               response_etag->value().getStringView());
+}
+
+Key CacheHeadersUtils::makeKey(const Http::RequestHeaderMap& request_headers,
+                               absl::string_view cluster_name) {
+  ASSERT(request_headers.Path(), "Can't form cache lookup key for malformed Http::RequestHeaderMap "
+                                 "with null Path.");
+  ASSERT(request_headers.Host(), "Can't form cache lookup key for malformed Http::RequestHeaderMap "
+                                 "with null Host.");
+  Key key;
+  absl::string_view scheme = request_headers.getSchemeValue();
+  ASSERT(Http::Utility::schemeIsValid(scheme));
+  // TODO(toddmgreer): Let config determine whether to include scheme, host, and
+  // query params.
+  key.set_cluster_name(cluster_name);
+  key.set_host(std::string(request_headers.getHostValue()));
+  key.set_path(std::string(request_headers.getPathValue()));
+  if (Http::Utility::schemeIsHttp(scheme)) {
+    key.set_scheme(Key::HTTP);
+  } else if (Http::Utility::schemeIsHttps(scheme)) {
+    key.set_scheme(Key::HTTPS);
+  }
+  return key;
+}
+
 absl::optional<uint64_t> CacheHeadersUtils::readAndRemoveLeadingDigits(absl::string_view& str) {
   uint64_t val = 0;
   uint32_t bytes_consumed = 0;
diff --git a/source/extensions/filters/http/cache/cache_headers_utils.h b/source/extensions/filters/http/cache/cache_headers_utils.h
index 5f96d24c54e22..b439cb9d9c636 100644
--- a/source/extensions/filters/http/cache/cache_headers_utils.h
+++ b/source/extensions/filters/http/cache/cache_headers_utils.h
@@ -11,6 +11,7 @@
 #include "source/common/http/header_utility.h"
 #include "source/common/http/headers.h"
 #include "source/common/protobuf/protobuf.h"
+#include "source/extensions/filters/http/cache/key.pb.h"
 
 #include "absl/container/btree_set.h"
 #include "absl/strings/str_join.h"
@@ -107,6 +108,18 @@ SystemTime httpTime(const Http::HeaderEntry* header_entry);
 Seconds calculateAge(const Http::ResponseHeaderMap& response_headers, SystemTime response_time,
                      SystemTime now);
 
+// Create a resource key from headers and cluster name.
+Key makeKey(const Http::RequestHeaderMap& request_headers, absl::string_view cluster_name);
+
+// Adds required conditional headers for cache validation to the request headers
+// according to the previous response headers.
+void injectValidationHeaders(Http::RequestHeaderMap& request_headers,
+                             const Http::ResponseHeaderMap& old_response_headers);
+
+// Checks if a cached entry should be updated with a 304 response.
+bool shouldUpdateCachedEntry(const Http::ResponseHeaderMap& new_headers,
+                             const Http::ResponseHeaderMap& old_headers);
+
 /**
  * Read a leading positive decimal integer value and advance "*str" past the
  * digits read. If overflow occurs, or no digits exist, return
@@ -124,6 +137,14 @@ void getAllMatchingHeaderNames(const Http::HeaderMap& headers,
 std::vector<absl::string_view> parseCommaDelimitedHeader(const Http::HeaderMap::GetResult& entry);
 } // namespace CacheHeadersUtils
 
+// Helper abstraction for a container that contains a VaryAllowList.
+class CacheableResponseChecker {
+public:
+  // Calls CacheabilityUtils::isCacheableResponse with the contained VaryAllowList.
+  virtual bool isCacheableResponse(const Http::ResponseHeaderMap& headers) const PURE;
+  virtual ~CacheableResponseChecker() = default;
+};
+
 class VaryAllowList {
 public:
   // Parses the allow list from the Cache Config into the object's private allow_list_.
diff --git a/source/extensions/filters/http/cache/cache_insert_queue.cc b/source/extensions/filters/http/cache/cache_insert_queue.cc
deleted file mode 100644
index 47ed1b1ab92e4..0000000000000
--- a/source/extensions/filters/http/cache/cache_insert_queue.cc
+++ /dev/null
@@ -1,235 +0,0 @@
-#include "source/extensions/filters/http/cache/cache_insert_queue.h"
-
-#include "source/common/buffer/buffer_impl.h"
-
-namespace Envoy {
-namespace Extensions {
-namespace HttpFilters {
-namespace Cache {
-
-// Representation of a piece of data to be sent to a cache for writing.
-class CacheInsertFragment {
-public:
-  // Sends a fragment to the cache.
-  // on_complete is called when the cache completes the operation.
-  virtual void
-  send(InsertContext& context,
-       absl::AnyInvocable<void(bool cache_success, bool end_stream, size_t sz)> on_complete) PURE;
-
-  virtual ~CacheInsertFragment() = default;
-};
-
-// A CacheInsertFragment containing some amount of http response body data.
-// The size of a fragment is equal to the size of the buffer arriving at
-// CacheFilter::encodeData.
-class CacheInsertFragmentBody : public CacheInsertFragment {
-public:
-  CacheInsertFragmentBody(const Buffer::Instance& buffer, bool end_stream)
-      : buffer_(buffer), end_stream_(end_stream) {}
-
-  void send(InsertContext& context,
-            absl::AnyInvocable<void(bool cache_success, bool end_stream, size_t sz)> on_complete)
-      override {
-    size_t sz = buffer_.length();
-    context.insertBody(
-        std::move(buffer_),
-        [cb = std::move(on_complete), end_stream = end_stream_, sz](bool cache_success) mutable {
-          std::move(cb)(cache_success, end_stream, sz);
-        },
-        end_stream_);
-  }
-
-private:
-  Buffer::OwnedImpl buffer_;
-  const bool end_stream_;
-};
-
-// A CacheInsertFragment containing the full trailers of the response.
-class CacheInsertFragmentTrailers : public CacheInsertFragment {
-public:
-  explicit CacheInsertFragmentTrailers(const Http::ResponseTrailerMap& trailers)
-      : trailers_(Http::ResponseTrailerMapImpl::create()) {
-    Http::ResponseTrailerMapImpl::copyFrom(*trailers_, trailers);
-  }
-
-  void send(InsertContext& context,
-            absl::AnyInvocable<void(bool cache_success, bool end_stream, size_t sz)> on_complete)
-      override {
-    // While zero isn't technically true for the size of trailers, it doesn't
-    // matter at this point because watermarks after the stream is complete
-    // aren't useful.
-    context.insertTrailers(*trailers_, [cb = std::move(on_complete)](bool cache_success) mutable {
-      std::move(cb)(cache_success, true, 0);
-    });
-  }
-
-private:
-  std::unique_ptr<Http::ResponseTrailerMap> trailers_;
-};
-
-CacheInsertQueue::CacheInsertQueue(std::shared_ptr<HttpCache> cache,
-                                   Http::StreamEncoderFilterCallbacks& encoder_callbacks,
-                                   InsertContextPtr insert_context, InsertQueueCallbacks& callbacks)
-    : dispatcher_(encoder_callbacks.dispatcher()), insert_context_(std::move(insert_context)),
-      low_watermark_bytes_(encoder_callbacks.encoderBufferLimit() / 2),
-      high_watermark_bytes_(encoder_callbacks.encoderBufferLimit()), callbacks_(callbacks),
-      cache_(cache) {}
-
-void CacheInsertQueue::insertHeaders(const Http::ResponseHeaderMap& response_headers,
-                                     const ResponseMetadata& metadata, bool end_stream) {
-  end_stream_queued_ = end_stream;
-  // While zero isn't technically true for the size of headers, headers are
-  // typically excluded from the stream buffer limit.
-  fragment_in_flight_ = true;
-  insert_context_->insertHeaders(
-      response_headers, metadata,
-      [this, end_stream](bool cache_success) { onFragmentComplete(cache_success, end_stream, 0); },
-      end_stream);
-  // This requirement simplifies the cache implementation; most caches will have to
-  // do asynchronous operations, and so will post anyway. It is an error to call continueDecoding
-  // during decodeHeaders, and calling a callback inline *may* do that, therefore we
-  // require the cache to post. A previous version performed a post here to guarantee
-  // correct behavior, but that meant for async caches it would double-post - it makes
-  // more sense to single-post when it may not be necessary (in the rarer case of a cache
-  // not needing async action) than to double-post in the common async case.
-  // This requirement may become unnecessary after some more iterations result in
-  // continueDecoding no longer being a thing in this filter.
-  ASSERT(fragment_in_flight_,
-         "insertHeaders must post the callback to dispatcher, not just call it");
-}
-
-void CacheInsertQueue::insertBody(const Buffer::Instance& fragment, bool end_stream) {
-  if (end_stream) {
-    end_stream_queued_ = true;
-  }
-  if (fragment_in_flight_) {
-    size_t sz = fragment.length();
-    queue_size_bytes_ += sz;
-    fragments_.push_back(std::make_unique<CacheInsertFragmentBody>(fragment, end_stream));
-    if (!watermarked_ && queue_size_bytes_ > high_watermark_bytes_) {
-      if (callbacks_.has_value()) {
-        callbacks_->insertQueueOverHighWatermark();
-      }
-      watermarked_ = true;
-    }
-  } else {
-    fragment_in_flight_ = true;
-    insert_context_->insertBody(
-        Buffer::OwnedImpl(fragment),
-        [this, end_stream](bool cache_success) {
-          onFragmentComplete(cache_success, end_stream, 0);
-        },
-        end_stream);
-    ASSERT(fragment_in_flight_,
-           "insertBody must post the callback to dispatcher, not just call it");
-  }
-}
-
-void CacheInsertQueue::insertTrailers(const Http::ResponseTrailerMap& trailers) {
-  end_stream_queued_ = true;
-  if (fragment_in_flight_) {
-    fragments_.push_back(std::make_unique<CacheInsertFragmentTrailers>(trailers));
-  } else {
-    fragment_in_flight_ = true;
-    insert_context_->insertTrailers(
-        trailers, [this](bool cache_success) { onFragmentComplete(cache_success, true, 0); });
-    ASSERT(fragment_in_flight_,
-           "insertTrailers must post the callback to dispatcher, not just call it");
-  }
-}
-
-void CacheInsertQueue::onFragmentComplete(bool cache_success, bool end_stream, size_t sz) {
-  ASSERT(dispatcher_.isThreadSafe());
-  fragment_in_flight_ = false;
-  if (aborting_) {
-    // Parent filter was destroyed, so we can quit this operation.
-    fragments_.clear();
-    self_ownership_.reset();
-    return;
-  }
-  ASSERT(queue_size_bytes_ >= sz, "queue can't be emptied by more than its size");
-  queue_size_bytes_ -= sz;
-  if (watermarked_ && queue_size_bytes_ <= low_watermark_bytes_) {
-    if (callbacks_.has_value()) {
-      callbacks_->insertQueueUnderLowWatermark();
-    }
-    watermarked_ = false;
-  }
-  if (!cache_success) {
-    // canceled by cache; unwatermark if necessary, inform the filter if
-    // it's still around, and delete the queue.
-    if (watermarked_) {
-      if (callbacks_.has_value()) {
-        callbacks_->insertQueueUnderLowWatermark();
-      }
-      watermarked_ = false;
-    }
-    fragments_.clear();
-    // Clearing self-ownership might provoke the destructor, so take a copy of the
-    // abort callback to avoid reading from 'this' after it may be deleted.
-    //
-    // This complexity is necessary because if the queue *is not* currently
-    // self-owned, it will be deleted during insertQueueAborted, so
-    // clearing self_ownership_ second would be a write-after-destroy error.
-    // If it *is* currently self-owned, then we must still call the callback if
-    // any, but clearing self_ownership_ *first* would mean we got destroyed
-    // so we would no longer have access to the callback.
-    // Since destroying first *or* second can be an error, rearrange things
-    // so that destroying first *is not* an error. :)
-    auto callbacks = std::move(callbacks_);
-    self_ownership_.reset();
-    if (callbacks.has_value()) {
-      callbacks->insertQueueAborted();
-    }
-    return;
-  }
-  if (end_stream) {
-    ASSERT(fragments_.empty(), "ending a stream with the queue not empty is a bug");
-    ASSERT(!watermarked_, "being over the high watermark when the queue is empty makes no sense");
-    self_ownership_.reset();
-    return;
-  }
-  if (!fragments_.empty()) {
-    // If there's more in the queue, push the next fragment to the cache.
-    auto fragment = std::move(fragments_.front());
-    fragments_.pop_front();
-    fragment_in_flight_ = true;
-    fragment->send(*insert_context_, [this](bool cache_success, bool end_stream, size_t sz) {
-      onFragmentComplete(cache_success, end_stream, sz);
-    });
-  }
-}
-
-void CacheInsertQueue::setSelfOwned(std::unique_ptr<CacheInsertQueue> self) {
-  // If we sent a high watermark event, this is our last chance to unset it on the
-  // stream, so we'd better do so.
-  if (watermarked_) {
-    if (callbacks_.has_value()) {
-      callbacks_->insertQueueUnderLowWatermark();
-    }
-    watermarked_ = false;
-  }
-  // Disable all the callbacks, they're going to have nowhere to go.
-  callbacks_.reset();
-  if (fragments_.empty() && !fragment_in_flight_) {
-    // If the queue is already empty we can just let it be destroyed immediately.
-    return;
-  }
-  if (!end_stream_queued_) {
-    // If the queue can't be completed we can abort early but we need to wait for
-    // any callback-in-flight to complete before destroying the queue.
-    aborting_ = true;
-  }
-  self_ownership_ = std::move(self);
-}
-
-CacheInsertQueue::~CacheInsertQueue() {
-  ASSERT(!watermarked_, "should not have a watermarked status when the queue is destroyed");
-  ASSERT(fragments_.empty(), "queue should be empty by the time the destructor is run");
-  insert_context_->onDestroy();
-}
-
-} // namespace Cache
-} // namespace HttpFilters
-} // namespace Extensions
-} // namespace Envoy
diff --git a/source/extensions/filters/http/cache/cache_insert_queue.h b/source/extensions/filters/http/cache/cache_insert_queue.h
deleted file mode 100644
index 52537ef82f003..0000000000000
--- a/source/extensions/filters/http/cache/cache_insert_queue.h
+++ /dev/null
@@ -1,89 +0,0 @@
-#pragma once
-
-#include <deque>
-#include <functional>
-
-#include "source/extensions/filters/http/cache/http_cache.h"
-
-namespace Envoy {
-namespace Extensions {
-namespace HttpFilters {
-namespace Cache {
-
-class InsertQueueCallbacks {
-public:
-  virtual void insertQueueOverHighWatermark() PURE;
-  virtual void insertQueueUnderLowWatermark() PURE;
-  virtual void insertQueueAborted() PURE;
-  virtual ~InsertQueueCallbacks() = default;
-};
-class CacheInsertFragment;
-
-// This queue acts as an intermediary between CacheFilter and the cache
-// implementation extension. Having a queue allows CacheFilter to stream at its
-// normal rate, while allowing a cache implementation to run asynchronously and
-// potentially at a slower rate, without having to implement its own buffer.
-//
-// If the queue contains more than the "high watermark" for the buffer
-// (encoder_callbacks.encoderBufferLimit()), then a high watermark event is
-// sent to the encoder, which may cause the filter to slow down, to allow the
-// cache implementation time to catch up and avoid buffering significantly
-// more data in memory than the configuration intends to allow. When this happens,
-// the queue must drain to half the encoderBufferLimit before a low watermark
-// event is sent to resume normal flow.
-//
-// From the cache implementation's perspective, the queue ensures that the cache
-// receives data one piece at a time - no more data will be delivered until the
-// cache implementation calls the provided callback indicating that it is ready
-// to receive more data.
-class CacheInsertQueue {
-public:
-  CacheInsertQueue(std::shared_ptr<HttpCache> cache,
-                   Http::StreamEncoderFilterCallbacks& encoder_callbacks,
-                   InsertContextPtr insert_context, InsertQueueCallbacks& callbacks);
-  void insertHeaders(const Http::ResponseHeaderMap& response_headers,
-                     const ResponseMetadata& metadata, bool end_stream);
-  void insertBody(const Buffer::Instance& fragment, bool end_stream);
-  void insertTrailers(const Http::ResponseTrailerMap& trailers);
-  void setSelfOwned(std::unique_ptr<CacheInsertQueue> self);
-  ~CacheInsertQueue();
-
-private:
-  void onFragmentComplete(bool cache_success, bool end_stream, size_t sz);
-
-  Event::Dispatcher& dispatcher_;
-  const InsertContextPtr insert_context_;
-  const size_t low_watermark_bytes_, high_watermark_bytes_;
-  OptRef<InsertQueueCallbacks> callbacks_;
-  std::deque<std::unique_ptr<CacheInsertFragment>> fragments_;
-  // Size of the data currently in the queue (including any fragment in flight).
-  size_t queue_size_bytes_ = 0;
-  // True when the high watermark has been exceeded and the low watermark
-  // threshold has not been crossed since.
-  bool watermarked_ = false;
-  // True when the queue has sent a fragment to the cache implementation and has
-  // not yet received a response.
-  bool fragment_in_flight_ = false;
-  // True if end_stream has been queued. If the queue gets handed ownership
-  // of itself before the end is in sight then it might as well abort since
-  // it's not going to get a complete entry.
-  bool end_stream_queued_ = false;
-  // If the filter was deleted while !end_stream_queued_, aborting_ is set to
-  // true; when the next fragment completes (or cancels), the queue is destroyed.
-  bool aborting_ = false;
-  // When the filter is destroyed, it passes ownership of CacheInsertQueue
-  // to itself, because CacheInsertQueue can outlive the filter. The queue
-  // will remove its self-ownership (thereby deleting itself) upon
-  // completion of its work.
-  std::unique_ptr<CacheInsertQueue> self_ownership_;
-  // The queue needs to keep a copy of the cache alive; if only the filter
-  // keeps the cache alive then it's possible for the filter config to be deleted
-  // while a cache action is still in flight, which can cause the cache to be
-  // deleted prematurely.
-  std::shared_ptr<HttpCache> cache_;
-};
-
-} // namespace Cache
-} // namespace HttpFilters
-} // namespace Extensions
-} // namespace Envoy
diff --git a/source/extensions/filters/http/cache/cache_progress_receiver.h b/source/extensions/filters/http/cache/cache_progress_receiver.h
new file mode 100644
index 0000000000000..a724f084b247d
--- /dev/null
+++ b/source/extensions/filters/http/cache/cache_progress_receiver.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include "envoy/http/header_map.h"
+
+#include "source/extensions/filters/http/cache/range_utils.h"
+
+namespace Envoy {
+namespace Extensions {
+namespace HttpFilters {
+namespace Cache {
+
+class CacheReader;
+
+class CacheProgressReceiver {
+public:
+  virtual void onHeadersInserted(std::unique_ptr<CacheReader> cache_entry,
+                                 Http::ResponseHeaderMapPtr headers, bool end_stream) PURE;
+  virtual void onBodyInserted(AdjustedByteRange range, bool end_stream) PURE;
+  virtual void onTrailersInserted(Http::ResponseTrailerMapPtr trailers) PURE;
+  virtual void onInsertFailed(absl::Status status) PURE;
+  virtual ~CacheProgressReceiver() = default;
+};
+
+} // namespace Cache
+} // namespace HttpFilters
+} // namespace Extensions
+} // namespace Envoy
diff --git a/source/extensions/filters/http/cache/cache_sessions.cc b/source/extensions/filters/http/cache/cache_sessions.cc
new file mode 100644
index 0000000000000..a9243b43ef085
--- /dev/null
+++ b/source/extensions/filters/http/cache/cache_sessions.cc
@@ -0,0 +1,111 @@
+#include "source/extensions/filters/http/cache/cache_sessions.h"
+
+#include <limits>
+
+#include "source/common/http/utility.h"
+#include "source/extensions/filters/http/cache/cache_custom_headers.h"
+#include "source/extensions/filters/http/cache/cache_headers_utils.h"
+
+namespace Envoy {
+namespace Extensions {
+namespace HttpFilters {
+namespace Cache {
+
+ActiveLookupRequest::ActiveLookupRequest(
+    const Http::RequestHeaderMap& request_headers,
+    UpstreamRequestFactoryPtr upstream_request_factory, absl::string_view cluster_name,
+    Event::Dispatcher& dispatcher, SystemTime timestamp,
+    const std::shared_ptr<const CacheableResponseChecker> cacheable_response_checker,
+    const std::shared_ptr<const CacheFilterStatsProvider> stats_provider,
+    bool ignore_request_cache_control_header)
+    : upstream_request_factory_(std::move(upstream_request_factory)), dispatcher_(dispatcher),
+      key_(CacheHeadersUtils::makeKey(request_headers, cluster_name)),
+      request_headers_(Http::createHeaderMap<Http::RequestHeaderMapImpl>(request_headers)),
+      cacheable_response_checker_(std::move(cacheable_response_checker)),
+      stats_provider_(std::move(stats_provider)), timestamp_(timestamp) {
+  if (!ignore_request_cache_control_header) {
+    initializeRequestCacheControl(request_headers);
+  }
+}
+
+absl::optional<std::vector<RawByteRange>> ActiveLookupRequest::parseRange() const {
+  auto range_header = RangeUtils::getRangeHeader(*request_headers_);
+  if (!range_header) {
+    return absl::nullopt;
+  }
+  return RangeUtils::parseRangeHeader(range_header.value(), 1);
+}
+
+bool ActiveLookupRequest::isRangeRequest() const {
+  return RangeUtils::getRangeHeader(*request_headers_).has_value();
+}
+
+void ActiveLookupRequest::initializeRequestCacheControl(
+    const Http::RequestHeaderMap& request_headers) {
+  const absl::string_view cache_control =
+      request_headers.getInlineValue(CacheCustomHeaders::requestCacheControl());
+
+  if (!cache_control.empty()) {
+    request_cache_control_ = RequestCacheControl(cache_control);
+  } else {
+    const absl::string_view pragma = request_headers.getInlineValue(CacheCustomHeaders::pragma());
+    // According to: https://httpwg.org/specs/rfc7234.html#header.pragma,
+    // when Cache-Control header is missing, "Pragma:no-cache" is equivalent to
+    // "Cache-Control:no-cache". Any other directives are ignored.
+    request_cache_control_.must_validate_ = RequestCacheControl(pragma).must_validate_;
+  }
+}
+
+bool ActiveLookupRequest::requiresValidation(const Http::ResponseHeaderMap& response_headers,
+                                             SystemTime::duration response_age) const {
+  // TODO(yosrym93): Store parsed response cache-control in cache instead of parsing it on every
+  // lookup.
+  const absl::string_view cache_control =
+      response_headers.getInlineValue(CacheCustomHeaders::responseCacheControl());
+  const ResponseCacheControl response_cache_control(cache_control);
+
+  const bool request_max_age_exceeded = request_cache_control_.max_age_.has_value() &&
+                                        request_cache_control_.max_age_.value() < response_age;
+  if (response_cache_control.must_validate_ || request_cache_control_.must_validate_ ||
+      request_max_age_exceeded) {
+    // Either the request or response explicitly require validation, or a request max-age
+    // requirement is not satisfied.
+    return true;
+  }
+
+  // CacheabilityUtils::isCacheableResponse(..) guarantees that any cached response satisfies this.
+  ASSERT(response_cache_control.max_age_.has_value() ||
+             (response_headers.getInline(CacheCustomHeaders::expires()) && response_headers.Date()),
+         "Cache entry does not have valid expiration data.");
+
+  SystemTime::duration freshness_lifetime;
+  if (response_cache_control.max_age_.has_value()) {
+    freshness_lifetime = response_cache_control.max_age_.value();
+  } else {
+    const SystemTime expires_value =
+        CacheHeadersUtils::httpTime(response_headers.getInline(CacheCustomHeaders::expires()));
+    const SystemTime date_value = CacheHeadersUtils::httpTime(response_headers.Date());
+    freshness_lifetime = expires_value - date_value;
+  }
+
+  if (response_age > freshness_lifetime) {
+    // Response is stale, requires validation if
+    // the response does not allow being served stale,
+    // or the request max-stale directive does not allow it.
+    const bool allowed_by_max_stale =
+        request_cache_control_.max_stale_.has_value() &&
+        request_cache_control_.max_stale_.value() > response_age - freshness_lifetime;
+    return response_cache_control.no_stale_ || !allowed_by_max_stale;
+  } else {
+    // Response is fresh, requires validation only if there is an unsatisfied min-fresh requirement.
+    const bool min_fresh_unsatisfied =
+        request_cache_control_.min_fresh_.has_value() &&
+        request_cache_control_.min_fresh_.value() > freshness_lifetime - response_age;
+    return min_fresh_unsatisfied;
+  }
+}
+
+} // namespace Cache
+} // namespace HttpFilters
+} // namespace Extensions
+} // namespace Envoy
diff --git a/source/extensions/filters/http/cache/cache_sessions.h b/source/extensions/filters/http/cache/cache_sessions.h
new file mode 100644
index 0000000000000..573e9efa023db
--- /dev/null
+++ b/source/extensions/filters/http/cache/cache_sessions.h
@@ -0,0 +1,105 @@
+#pragma once
+
+#include <memory>
+
+#include "envoy/buffer/buffer.h"
+
+#include "source/extensions/filters/http/cache/http_cache.h"
+#include "source/extensions/filters/http/cache/key.pb.h"
+#include "source/extensions/filters/http/cache/stats.h"
+#include "source/extensions/filters/http/cache/upstream_request.h"
+
+namespace Envoy {
+namespace Extensions {
+namespace HttpFilters {
+namespace Cache {
+
+class ActiveLookupRequest {
+public:
+  // Prereq: request_headers's Path(), Scheme(), and Host() are non-null.
+  ActiveLookupRequest(
+      const Http::RequestHeaderMap& request_headers,
+      UpstreamRequestFactoryPtr upstream_request_factory, absl::string_view cluster_name,
+      Event::Dispatcher& dispatcher, SystemTime timestamp,
+      const std::shared_ptr<const CacheableResponseChecker> cacheable_response_checker_,
+      const std::shared_ptr<const CacheFilterStatsProvider> stats_provider_,
+      bool ignore_request_cache_control_header);
+
+  // Caches may modify the key according to local needs, though care must be
+  // taken to ensure that meaningfully distinct responses have distinct keys.
+  const Key& key() const { return key_; }
+
+  Http::RequestHeaderMap& requestHeaders() const { return *request_headers_; }
+  bool isCacheableResponse(const Http::ResponseHeaderMap& headers) const {
+    return cacheable_response_checker_->isCacheableResponse(headers);
+  }
+  const std::shared_ptr<const CacheableResponseChecker>& cacheableResponseChecker() const {
+    return cacheable_response_checker_;
+  }
+  const std::shared_ptr<const CacheFilterStatsProvider>& statsProvider() const {
+    return stats_provider_;
+  }
+  CacheFilterStats& stats() const { return statsProvider()->stats(); }
+  UpstreamRequestPtr createUpstreamRequest() const {
+    return upstream_request_factory_->create(statsProvider());
+  }
+  Event::Dispatcher& dispatcher() const { return dispatcher_; }
+  SystemTime timestamp() const { return timestamp_; }
+  bool requiresValidation(const Http::ResponseHeaderMap& response_headers,
+                          SystemTime::duration age) const;
+  absl::optional<std::vector<RawByteRange>> parseRange() const;
+  bool isRangeRequest() const;
+
+private:
+  void initializeRequestCacheControl(const Http::RequestHeaderMap& request_headers);
+
+  UpstreamRequestFactoryPtr upstream_request_factory_;
+  Event::Dispatcher& dispatcher_;
+  Key key_;
+  std::vector<RawByteRange> request_range_spec_;
+  Http::RequestHeaderMapPtr request_headers_;
+  const std::shared_ptr<const CacheableResponseChecker> cacheable_response_checker_;
+  const std::shared_ptr<const CacheFilterStatsProvider> stats_provider_;
+  // Time when this LookupRequest was created (in response to an HTTP request).
+  SystemTime timestamp_;
+  RequestCacheControl request_cache_control_;
+};
+using ActiveLookupRequestPtr = std::unique_ptr<ActiveLookupRequest>;
+
+struct ActiveLookupResult {
+  // The source from which headers, body and trailers can be retrieved. May be
+  // a cache-reader CacheSession, or may be an UpstreamRequest if the request
+  // was uncacheable. The filter doesn't need to know which.
+  std::unique_ptr<HttpSource> http_source_;
+
+  CacheEntryStatus status_;
+};
+
+using ActiveLookupResultPtr = std::unique_ptr<ActiveLookupResult>;
+using ActiveLookupResultCallback = absl::AnyInvocable<void(ActiveLookupResultPtr)>;
+
+// CacheSessions is a wrapper around an HttpCache which provides a shorter-lived in-memory
+// cache of headers and already open cache entries. All the http-specific aspects of the
+// cache (range requests, validation, etc.) are performed by the CacheSession
+// so the HttpCache only needs to support simple read/write operations.
+//
+// May or may not be a singleton, depending on the specific cache extension; must include
+// the Singleton::Instance interface to support cases when it is.
+class CacheSessions : public Singleton::Instance, public CacheFilterStatsProvider {
+public:
+  // This is implemented in CacheSessionsImpl so that tests which only use a mock don't
+  // need to build the real thing, but declared here so that the actual use-site can
+  // create an instance without including the larger header.
+  static std::shared_ptr<CacheSessions> create(Server::Configuration::FactoryContext& context,
+                                               std::unique_ptr<HttpCache> cache);
+
+  virtual void lookup(ActiveLookupRequestPtr request, ActiveLookupResultCallback&& cb) PURE;
+  virtual HttpCache& cache() const PURE;
+  CacheInfo cacheInfo() const { return cache().cacheInfo(); }
+  ~CacheSessions() override = default;
+};
+
+} // namespace Cache
+} // namespace HttpFilters
+} // namespace Extensions
+} // namespace Envoy
diff --git a/source/extensions/filters/http/cache/cache_sessions_impl.cc b/source/extensions/filters/http/cache/cache_sessions_impl.cc
new file mode 100644
index 0000000000000..6720184d64e29
--- /dev/null
+++ b/source/extensions/filters/http/cache/cache_sessions_impl.cc
@@ -0,0 +1,917 @@
+#include "source/extensions/filters/http/cache/cache_sessions_impl.h"
+
+#include "source/common/buffer/buffer_impl.h"
+#include "source/common/common/enum_to_int.h"
+#include "source/common/http/utility.h"
+#include "source/extensions/filters/http/cache/cache_custom_headers.h"
+#include "source/extensions/filters/http/cache/cache_entry_utils.h"
+#include "source/extensions/filters/http/cache/cache_headers_utils.h"
+#include "source/extensions/filters/http/cache/cacheability_utils.h"
+#include "source/extensions/filters/http/cache/range_utils.h"
+#include "source/extensions/filters/http/cache/upstream_request.h"
+
+namespace Envoy {
+namespace Extensions {
+namespace HttpFilters {
+namespace Cache {
+
+using CancelWrapper::cancelWrapped;
+
+class UpstreamRequestWithCacheabilityReset : public HttpSource {
+public:
+  UpstreamRequestWithCacheabilityReset(
+      std::shared_ptr<const CacheableResponseChecker> cacheable_response_checker,
+      std::unique_ptr<HttpSource> original_source, std::shared_ptr<CacheSession> entry)
+      : cacheable_response_checker_(cacheable_response_checker),
+        original_source_(std::move(original_source)), entry_(std::move(entry)) {}
+  void getHeaders(GetHeadersCallback&& cb) override {
+    original_source_->getHeaders(
+        [entry = std::move(entry_), cb = std::move(cb),
+         cacheable_response_checker = std::move(cacheable_response_checker_)](
+            Http::ResponseHeaderMapPtr headers, EndStream end_stream) mutable {
+          if (cacheable_response_checker->isCacheableResponse(*headers)) {
+            entry->clearUncacheableState();
+          }
+          cb(std::move(headers), end_stream);
+        });
+  }
+  void getBody(AdjustedByteRange range, GetBodyCallback&& cb) override {
+    original_source_->getBody(std::move(range), std::move(cb));
+  }
+  void getTrailers(GetTrailersCallback&& cb) override {
+    original_source_->getTrailers(std::move(cb));
+  }
+
+private:
+  std::shared_ptr<const CacheableResponseChecker> cacheable_response_checker_;
+  std::unique_ptr<HttpSource> original_source_;
+  std::shared_ptr<CacheSession> entry_;
+};
+
+class UpstreamRequestWithHeadersPrepopulated : public HttpSource {
+public:
+  UpstreamRequestWithHeadersPrepopulated(std::unique_ptr<HttpSource> original_source,
+                                         Http::ResponseHeaderMapPtr headers, EndStream end_stream)
+      : original_source_(std::move(original_source)), headers_(std::move(headers)),
+        end_stream_after_headers_(end_stream) {}
+  void getHeaders(GetHeadersCallback&& cb) override {
+    cb(std::move(headers_), end_stream_after_headers_);
+  }
+  void getBody(AdjustedByteRange range, GetBodyCallback&& cb) override {
+    original_source_->getBody(std::move(range), std::move(cb));
+  }
+  void getTrailers(GetTrailersCallback&& cb) override {
+    original_source_->getTrailers(std::move(cb));
+  }
+
+private:
+  std::unique_ptr<HttpSource> original_source_;
+  Http::ResponseHeaderMapPtr headers_;
+  EndStream end_stream_after_headers_;
+};
+
+static Http::RequestHeaderMapPtr
+requestHeadersWithRangeRemoved(const Http::RequestHeaderMap& original_headers) {
+  Http::RequestHeaderMapPtr headers =
+      Http::createHeaderMap<Http::RequestHeaderMapImpl>(original_headers);
+  headers->remove(Envoy::Http::Headers::get().Range);
+  return headers;
+}
+
+static Http::ResponseHeaderMapPtr notSatisfiableHeaders() {
+  static const std::string not_satisfiable =
+      std::to_string(enumToInt(Http::Code::RangeNotSatisfiable));
+  return Http::createHeaderMap<Http::ResponseHeaderMapImpl>({
+      {Http::Headers::get().Status, not_satisfiable},
+      {Http::Headers::get().ContentLength, "0"},
+  });
+}
+
+void ActiveLookupContext::getHeaders(GetHeadersCallback&& cb) {
+  absl::optional<std::vector<RawByteRange>> ranges = lookup().parseRange();
+  if (ranges) {
+    // If it's a range request, inject the appropriate modified content-range and
+    // content-length headers into the response once we have the response headers.
+    entry_->wantHeaders(
+        dispatcher(), lookup().timestamp(),
+        [ranges = std::move(ranges.value()), cl = content_length_,
+         cb = std::move(cb)](Http::ResponseHeaderMapPtr headers, EndStream end_stream) mutable {
+          ASSERT(headers != nullptr, "it should be impossible for headers to be null");
+          if (cl == 0 && headers->ContentLength()) {
+            absl::SimpleAtoi(headers->getContentLengthValue(), &cl) || (cl = 0);
+          }
+          RangeDetails range_details = RangeUtils::createAdjustedRangeDetails(ranges, cl);
+          if (!range_details.satisfiable_) {
+            return cb(notSatisfiableHeaders(), EndStream::End);
+          }
+          if (range_details.ranges_.empty()) {
+            return cb(std::move(headers), end_stream);
+          }
+          auto& range = range_details.ranges_[0];
+          headers->setReferenceKey(
+              Envoy::Http::Headers::get().ContentRange,
+              fmt::format("bytes {}-{}/{}", range.begin(), range.end() - 1, cl));
+          headers->setContentLength(range.length());
+          static const std::string partial_content =
+              std::to_string(enumToInt(Http::Code::PartialContent));
+          headers->setStatus(partial_content);
+          cb(std::move(headers), end_stream);
+        });
+  } else {
+    entry_->wantHeaders(dispatcher(), lookup().timestamp(), std::move(cb));
+  }
+}
+
+void ActiveLookupContext::getBody(AdjustedByteRange range, GetBodyCallback&& cb) {
+  entry_->wantBodyRange(range, dispatcher(), std::move(cb));
+}
+
+void ActiveLookupContext::getTrailers(GetTrailersCallback&& cb) {
+  entry_->wantTrailers(dispatcher(), std::move(cb));
+}
+
+std::shared_ptr<CacheSessions> CacheSessions::create(Server::Configuration::FactoryContext& context,
+                                                     std::unique_ptr<HttpCache> cache) {
+  return std::make_shared<CacheSessionsImpl>(context, std::move(cache));
+}
+
+CacheSession::CacheSession(std::weak_ptr<CacheSessionsImpl> cache_sessions, const Key& key)
+    : cache_sessions_(std::move(cache_sessions)), key_(key) {}
+
+void CacheSession::clearUncacheableState() {
+  absl::MutexLock lock(&mu_);
+  if (state_ != State::NotCacheable) {
+    return;
+  }
+  state_ = State::New;
+}
+
+void CacheSession::wantHeaders(Event::Dispatcher&, SystemTime lookup_timestamp,
+                               GetHeadersCallback&& cb) {
+  Http::ResponseHeaderMapPtr headers;
+  EndStream end_stream_after_headers;
+  {
+    absl::MutexLock lock(&mu_);
+    ASSERT(entry_.response_headers_ != nullptr,
+           "headers should have been initialized during lookup");
+    headers = Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*entry_.response_headers_);
+    Seconds age = CacheHeadersUtils::calculateAge(
+        *headers, entry_.response_metadata_.response_time_, lookup_timestamp);
+    headers->setReferenceKey(Envoy::Http::CustomHeaders::get().Age, std::to_string(age.count()));
+    end_stream_after_headers = endStreamAfterHeaders();
+  }
+  cb(std::move(headers), end_stream_after_headers);
+}
+
+void CacheSession::wantBodyRange(AdjustedByteRange range, Event::Dispatcher& dispatcher,
+                                 GetBodyCallback&& cb) {
+  absl::MutexLock lock(&mu_);
+  ASSERT(entry_.response_headers_ != nullptr,
+         "body should not be requested when headers haven't been sent");
+  if (auto cache_sessions = cache_sessions_.lock()) {
+    cache_sessions->stats().incCacheSessionsSubscribers();
+  }
+  body_subscribers_.emplace_back(dispatcher, std::move(range), std::move(cb));
+  // if there's not already a body read operation in flight, start one.
+  maybeTriggerBodyReadForWaitingSubscriber();
+}
+
+void CacheSession::wantTrailers(Event::Dispatcher& dispatcher, GetTrailersCallback&& cb) {
+  absl::MutexLock lock(&mu_);
+  if (entry_.response_trailers_ != nullptr) {
+    auto trailers = Http::createHeaderMap<Http::ResponseTrailerMapImpl>(*entry_.response_trailers_);
+    dispatcher.post([cb = std::move(cb), trailers = std::move(trailers)]() mutable {
+      cb(std::move(trailers), EndStream::End);
+    });
+    return;
+  }
+  ASSERT(!entry_.body_length_.has_value(),
+         "wantTrailers should not be called when there are no trailers");
+  if (auto cache_sessions = cache_sessions_.lock()) {
+    cache_sessions->stats().incCacheSessionsSubscribers();
+  }
+  trailer_subscribers_.emplace_back(dispatcher, std::move(cb));
+}
+
+void CacheSession::onHeadersInserted(CacheReaderPtr cache_reader,
+                                     Http::ResponseHeaderMapPtr headers, bool end_stream) {
+  absl::MutexLock lock(&mu_);
+  std::shared_ptr<CacheSessionsImpl> cache_sessions = cache_sessions_.lock();
+  if (!cache_sessions) {
+    ENVOY_LOG(error, "cache config was deleted while header-insertion was in flight");
+    return onCacheWentAway();
+  }
+  entry_.cache_reader_ = std::move(cache_reader);
+  entry_.response_headers_ = std::move(headers);
+  entry_.response_metadata_ = cache_sessions->makeMetadata();
+  if (end_stream) {
+    insertComplete();
+  } else {
+    state_ = State::Inserting;
+  }
+  sendLookupResponsesAndMaybeValidationRequest(CacheEntryStatus::Miss);
+}
+
+bool CacheSession::requiresValidationFor(const ActiveLookupRequest& lookup) const {
+  mu_.AssertHeld();
+  const Seconds age = CacheHeadersUtils::calculateAge(
+      *entry_.response_headers_, entry_.response_metadata_.response_time_, lookup.timestamp());
+  return lookup.requiresValidation(*entry_.response_headers_, age);
+}
+
+void CacheSession::sendLookupResponsesAndMaybeValidationRequest(CacheEntryStatus status) {
+  mu_.AssertHeld();
+  ASSERT(state_ == State::Exists || state_ == State::Inserting);
+  auto it = lookup_subscribers_.begin();
+  if (status != CacheEntryStatus::Miss) {
+    // Reorder subscribers so those who do not require validation are at the end,
+    // and 'it' is the first subscriber that does not require validation.
+    it = std::partition(lookup_subscribers_.begin(), lookup_subscribers_.end(),
+                        [this](LookupSubscriber& s) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+                          return requiresValidationFor(s.context_->lookup());
+                        });
+  }
+  for (auto recipient = it; recipient != lookup_subscribers_.end(); recipient++) {
+    sendSuccessfulLookupResultTo(*recipient, status);
+    // If there was more than one recipient, and the first one was a miss, the
+    // rest will be streamed.
+    if (status == CacheEntryStatus::Miss) {
+      status = CacheEntryStatus::Follower;
+    }
+  }
+  if (it != lookup_subscribers_.end()) {
+    if (auto cache_sessions = cache_sessions_.lock()) {
+      cache_sessions->stats().subCacheSessionsSubscribers(
+          std::distance(it, lookup_subscribers_.end()));
+    }
+  }
+  lookup_subscribers_.erase(it, lookup_subscribers_.end());
+  if (!lookup_subscribers_.empty()) {
+    // At least one subscriber required validation.
+    return performValidation();
+  }
+}
+
+EndStream CacheSession::endStreamAfterHeaders() const {
+  mu_.AssertHeld();
+  bool end_stream = entry_.body_length_.value_or(1) == 0 && entry_.response_trailers_ == nullptr;
+  return end_stream ? EndStream::End : EndStream::More;
+}
+
+EndStream CacheSession::endStreamAfterBody() const {
+  mu_.AssertHeld();
+  ASSERT(entry_.body_length_.has_value(),
+         "should not be testing endStreamAfterBody if body not complete");
+  return (entry_.response_trailers_ == nullptr) ? EndStream::End : EndStream::More;
+}
+
+void CacheSession::sendSuccessfulLookupResultTo(LookupSubscriber& subscriber,
+                                                CacheEntryStatus status) {
+  mu_.AssertHeld();
+  ASSERT(state_ == State::Exists || state_ == State::Inserting);
+  auto result = std::make_unique<ActiveLookupResult>();
+  result->status_ = status;
+  result->http_source_ = std::move(subscriber.context_);
+  subscriber.dispatcher().post(
+      [result = std::move(result), callback = std::move(subscriber.callback_)]() mutable {
+        callback(std::move(result));
+      });
+}
+
+void CacheSession::onBodyInserted(AdjustedByteRange range, bool end_stream) {
+  absl::MutexLock lock(&mu_);
+  body_length_available_ = range.end();
+  if (end_stream) {
+    insertComplete();
+    ASSERT(trailer_subscribers_.empty(), "should not be trailer requests before body was complete");
+  }
+  maybeTriggerBodyReadForWaitingSubscriber();
+}
+
+void CacheSession::onTrailersInserted(Http::ResponseTrailerMapPtr trailers) {
+  ASSERT(trailers);
+  absl::MutexLock lock(&mu_);
+  entry_.response_trailers_ = std::move(trailers);
+  insertComplete();
+  for (TrailerSubscriber& subscriber : trailer_subscribers_) {
+    sendTrailersTo(subscriber);
+  }
+  if (auto cache_sessions = cache_sessions_.lock()) {
+    cache_sessions->stats().subCacheSessionsSubscribers(trailer_subscribers_.size());
+  }
+  trailer_subscribers_.clear();
+  // If there's a body subscriber waiting for more body that doesn't exist,
+  // it needs to be notified so it can call getTrailers.
+  abortBodyOutOfRangeSubscribers();
+}
+
+void CacheSession::sendTrailersTo(TrailerSubscriber& subscriber) {
+  mu_.AssertHeld();
+  ASSERT(entry_.response_trailers_ != nullptr);
+  subscriber.dispatcher().post(
+      [trailers = Http::createHeaderMap<Http::ResponseTrailerMapImpl>(*entry_.response_trailers_),
+       callback = std::move(subscriber.callback_)]() mutable {
+        callback(std::move(trailers), EndStream::End);
+      });
+}
+
+void CacheSession::onInsertFailed(absl::Status status) {
+  absl::MutexLock lock(&mu_);
+  ENVOY_LOG(error, "cache insert failed: {}", status);
+  onCacheError();
+}
+
+static void postUpstreamPassThrough(CacheSession::LookupSubscriber&& sub, CacheEntryStatus status) {
+  Event::Dispatcher& dispatcher = sub.dispatcher();
+  dispatcher.post([sub = std::move(sub), status]() mutable {
+    auto result = std::make_unique<ActiveLookupResult>();
+    auto upstream = sub.context_->lookup().createUpstreamRequest();
+    upstream->sendHeaders(
+        Http::createHeaderMap<Http::RequestHeaderMapImpl>(sub.context_->lookup().requestHeaders()));
+    result->http_source_ = std::move(upstream);
+    result->status_ = status;
+    sub.callback_(std::move(result));
+  });
+}
+
+static void postUpstreamPassThroughWithReset(CacheSession::LookupSubscriber&& sub,
+                                             std::shared_ptr<CacheSession> entry) {
+  Event::Dispatcher& dispatcher = sub.dispatcher();
+  dispatcher.post([sub = std::move(sub), entry = std::move(entry)]() mutable {
+    auto result = std::make_unique<ActiveLookupResult>();
+    auto upstream = sub.context_->lookup().createUpstreamRequest();
+    upstream->sendHeaders(
+        Http::createHeaderMap<Http::RequestHeaderMapImpl>(sub.context_->lookup().requestHeaders()));
+    result->http_source_ = std::make_unique<UpstreamRequestWithCacheabilityReset>(
+        sub.context_->lookup().cacheableResponseChecker(), std::move(upstream), entry);
+    result->status_ = CacheEntryStatus::Uncacheable;
+    sub.callback_(std::move(result));
+  });
+}
+
+void CacheSession::onCacheError() {
+  mu_.AssertHeld();
+  auto cache_sessions = cache_sessions_.lock();
+  if (cache_sessions) {
+    Event::Dispatcher* dispatcher = nullptr;
+    if (!lookup_subscribers_.empty()) {
+      dispatcher = &lookup_subscribers_.front().dispatcher();
+    } else if (!body_subscribers_.empty()) {
+      dispatcher = &body_subscribers_.front().dispatcher();
+    } else if (!trailer_subscribers_.empty()) {
+      dispatcher = &trailer_subscribers_.front().dispatcher();
+    }
+    if (dispatcher) {
+      // TODO(toddmgreer): there may be some kinds of cache error that
+      // don't merit evicting the entry.
+      cache_sessions->cache().evict(*dispatcher, key_);
+    }
+    cache_sessions->stats().subCacheSessionsSubscribers(body_subscribers_.size());
+    cache_sessions->stats().subCacheSessionsSubscribers(trailer_subscribers_.size());
+    cache_sessions->stats().subCacheSessionsSubscribers(lookup_subscribers_.size());
+  }
+  for (LookupSubscriber& sub : lookup_subscribers_) {
+    postUpstreamPassThrough(std::move(sub), CacheEntryStatus::LookupError);
+  }
+  for (BodySubscriber& sub : body_subscribers_) {
+    sub.callback_(nullptr, EndStream::Reset);
+  }
+  for (TrailerSubscriber& sub : trailer_subscribers_) {
+    sub.callback_(nullptr, EndStream::Reset);
+  }
+  lookup_subscribers_.clear();
+  body_subscribers_.clear();
+  trailer_subscribers_.clear();
+  state_ = State::New;
+}
+
+void CacheSession::insertComplete() {
+  mu_.AssertHeld();
+  state_ = State::Exists;
+  entry_.body_length_ = body_length_available_;
+  if (content_length_header_ == entry_.body_length_) {
+    return;
+  }
+  if (content_length_header_ != 0) {
+    ENVOY_LOG(error,
+              "cache insert for {}{} had content-length header {} but actual size {}. Cache has "
+              "modified the header to match actual size.",
+              key_.host(), key_.path(), content_length_header_, entry_.body_length_.value());
+  }
+  content_length_header_ = body_length_available_;
+}
+
+void CacheSession::abortBodyOutOfRangeSubscribers() {
+  mu_.AssertHeld();
+  if (!entry_.body_length_.has_value()) {
+    // Don't know if a request is out of range until the available range is known.
+    return;
+  }
+  // For any subscribers whose requested range has been revealed to be invalid
+  // (we only get here in the case where content length was specified in the
+  // headers, but the actual body was shorter, i.e. the upstream response was
+  // actually invalid), reset their requests.
+  // Subscribers who asked for body starting at or beyond the end of the
+  // real size receive null body rather than reset.
+  EndStream end_stream = endStreamAfterBody();
+  auto cache_sessions = cache_sessions_.lock();
+  body_subscribers_.erase(
+      std::remove_if(body_subscribers_.begin(), body_subscribers_.end(),
+                     [this, end_stream, &cache_sessions](BodySubscriber& bs)
+                         ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+                           if (bs.range_.begin() >= body_length_available_) {
+                             if (bs.range_.begin() == body_length_available_) {
+                               auto cb = std::move(bs.callback_);
+                               bs.dispatcher().post([cb = std::move(cb), end_stream]() mutable {
+                                 cb(nullptr, end_stream);
+                               });
+                             } else {
+                               bs.callback_(nullptr, EndStream::Reset);
+                             }
+                             if (cache_sessions) {
+                               cache_sessions->stats().subCacheSessionsSubscribers(1);
+                             }
+                             return true;
+                           }
+                           return false;
+                         }),
+      body_subscribers_.end());
+}
+
+void CacheSession::maybeTriggerBodyReadForWaitingSubscriber() {
+  mu_.AssertHeld();
+  ASSERT(entry_.cache_reader_);
+  if (read_action_in_flight_) {
+    // There is already an action in flight so don't read more body yet.
+    return;
+  }
+  abortBodyOutOfRangeSubscribers();
+  auto it = std::find_if(
+      body_subscribers_.begin(), body_subscribers_.end(),
+      [this](BodySubscriber& subscriber) { return canReadBodyRangeFromCacheEntry(subscriber); });
+  if (it == body_subscribers_.end()) {
+    // There is nobody waiting to read some body that's available.
+    return;
+  }
+  AdjustedByteRange range = it->range_;
+  if (range.end() > body_length_available_) {
+    range = AdjustedByteRange(range.begin(), body_length_available_);
+  }
+  if (range.length() > max_read_chunk_size_) {
+    range = AdjustedByteRange(range.begin(), range.begin() + max_read_chunk_size_);
+  }
+  // Don't need this to be cancellable because there's a shared_ptr in the lambda keeping the
+  // CacheSession alive. We post to a thread before making the request for two reasons - we want
+  // the request to be performed on the requester's worker thread for balance, and we want to be
+  // able to lock the mutex again on the callback - if the cache called back immediately rather than
+  // posting and we *didn't* post before making the request, the mutex would still be held
+  // from this outer function so the callback would deadlock. By posting to a queue we ensure
+  // that deadlock cannot occur.
+  // Also, by ensuring the action occurs from a dispatcher queue, we guarantee that
+  // the "trigger again" at the end of onBodyChunkFromCache can't build up to a stack overflow
+  // of maybeTrigger->getBody->onBodyChunk->maybeTrigger->...
+  read_action_in_flight_ = true;
+  it->dispatcher().post([&dispatcher = it->dispatcher(), p = shared_from_this(), range,
+                         cache_reader = entry_.cache_reader_.get()]() mutable {
+    cache_reader->getBody(
+        dispatcher, range,
+        [p = std::move(p), range](Buffer::InstancePtr buffer, EndStream end_stream) {
+          p->onBodyChunkFromCache(std::move(range), std::move(buffer), end_stream);
+        });
+  });
+}
+
+bool CacheSession::canReadBodyRangeFromCacheEntry(BodySubscriber& subscriber) {
+  mu_.AssertHeld();
+  return subscriber.range_.begin() < body_length_available_;
+}
+
+void CacheSession::onBodyChunkFromCache(AdjustedByteRange range, Buffer::InstancePtr buffer,
+                                        EndStream end_stream) {
+  absl::MutexLock lock(&mu_);
+  read_action_in_flight_ = false;
+  if (end_stream == EndStream::Reset) {
+    ENVOY_LOG(error, "cache entry provoked reset");
+    onCacheError();
+    return;
+  }
+  if (buffer == nullptr) {
+    IS_ENVOY_BUG("cache returned null buffer non-reset");
+    onCacheError();
+    return;
+  }
+  ASSERT(buffer->length() <= range.length());
+  if (buffer->length() < range.length()) {
+    range = AdjustedByteRange(range.begin(), range.begin() + buffer->length());
+  }
+  auto recipients_begin = std::partition(body_subscribers_.begin(), body_subscribers_.end(),
+                                         [&range](BodySubscriber& subscriber) {
+                                           return subscriber.range_.begin() < range.begin() ||
+                                                  subscriber.range_.begin() >= range.end();
+                                         });
+  ASSERT(recipients_begin != body_subscribers_.end(),
+         "reading body chunk from cache with no corresponding request shouldn't happen");
+  if (std::next(recipients_begin) == body_subscribers_.end()) {
+    BodySubscriber& subscriber = *recipients_begin;
+    ASSERT(subscriber.range_.begin() == range.begin(),
+           "if there's only one matching subscriber it should have requested this precise chunk");
+    // There is only one recipient of this chunk, send it the actual buffer,
+    // no need to copy.
+    sendBodyChunkTo(subscriber,
+                    AdjustedByteRange(subscriber.range_.begin(),
+                                      std::min(subscriber.range_.end(), range.end())),
+                    std::move(buffer));
+  } else {
+    uint8_t* bytes = static_cast<uint8_t*>(buffer->linearize(range.length()));
+    for (auto it = recipients_begin; it != body_subscribers_.end(); it++) {
+      AdjustedByteRange r(it->range_.begin(), std::min(it->range_.end(), range.end()));
+      sendBodyChunkTo(
+          *it, r,
+          std::make_unique<Buffer::OwnedImpl>(bytes + r.begin() - range.begin(), r.length()));
+    }
+  }
+  if (auto cache_sessions = cache_sessions_.lock()) {
+    cache_sessions->stats().subCacheSessionsSubscribers(
+        std::distance(recipients_begin, body_subscribers_.end()));
+  }
+  body_subscribers_.erase(recipients_begin, body_subscribers_.end());
+  maybeTriggerBodyReadForWaitingSubscriber();
+}
+
+void CacheSession::sendBodyChunkTo(BodySubscriber& subscriber, AdjustedByteRange range,
+                                   Buffer::InstancePtr buffer) {
+  mu_.AssertHeld();
+  bool end_stream = entry_.body_length_.has_value() && range.end() == entry_.body_length_.value() &&
+                    entry_.response_trailers_ == nullptr;
+  subscriber.dispatcher().post([end_stream, callback = std::move(subscriber.callback_),
+                                buffer = std::move(buffer)]() mutable {
+    callback(std::move(buffer), end_stream ? EndStream::End : EndStream::More);
+  });
+}
+
+CacheSession::~CacheSession() { ASSERT(!upstream_request_); }
+
+void CacheSession::getLookupResult(ActiveLookupRequestPtr lookup, ActiveLookupResultCallback&& cb) {
+  ASSERT(lookup->dispatcher().isThreadSafe());
+  absl::MutexLock lock(&mu_);
+  LookupSubscriber sub{std::make_unique<ActiveLookupContext>(std::move(lookup), shared_from_this(),
+                                                             content_length_header_),
+                       std::move(cb)};
+  switch (state_) {
+  case State::Vary:
+    IS_ENVOY_BUG("not implemented yet");
+    ABSL_FALLTHROUGH_INTENDED;
+  case State::NotCacheable: {
+    postUpstreamPassThroughWithReset(std::move(sub), shared_from_this());
+    return;
+  }
+  case State::Validating:
+  case State::Pending:
+    sub.context_->lookup().stats().incCacheSessionsSubscribers();
+    lookup_subscribers_.push_back(std::move(sub));
+    return;
+  case State::Exists:
+  case State::Inserting: {
+    CacheEntryStatus status = CacheEntryStatus::Hit;
+    if (requiresValidationFor(sub.context_->lookup())) {
+      if (sub.context_->lookup().requestHeaders().getMethodValue() ==
+          Http::Headers::get().MethodValues.Head) {
+        // A HEAD request that requires validation can't write to the
+        // cache or use the cache entry, so just turn it into a pass-through.
+        return postUpstreamPassThrough(std::move(sub), CacheEntryStatus::Uncacheable);
+      }
+      if (state_ == State::Inserting) {
+        // Skip validation if the cache write is still in progress.
+        status = CacheEntryStatus::ValidatedFree;
+      } else {
+        sub.context_->lookup().stats().incCacheSessionsSubscribers();
+        lookup_subscribers_.push_back(std::move(sub));
+        return performValidation();
+      }
+    }
+    auto result = std::make_unique<ActiveLookupResult>();
+    Event::Dispatcher& dispatcher = sub.dispatcher();
+    result->http_source_ = std::move(sub.context_);
+    result->status_ = status;
+    dispatcher.post([cb = std::move(sub.callback_), result = std::move(result)]() mutable {
+      cb(std::move(result));
+    });
+    return;
+  }
+  case State::New: {
+    Event::Dispatcher& dispatcher = sub.dispatcher();
+    if (sub.context_->lookup().requestHeaders().getMethodValue() ==
+        Http::Headers::get().MethodValues.Head) {
+      // HEAD requests are not cacheable, just pass through.
+      postUpstreamPassThrough(std::move(sub), CacheEntryStatus::Uncacheable);
+      return;
+    }
+    LookupRequest request(Key{sub.context_->lookup().key()}, dispatcher);
+    sub.context_->lookup().stats().incCacheSessionsSubscribers();
+    lookup_subscribers_.emplace_back(std::move(sub));
+    state_ = State::Pending;
+    std::shared_ptr<CacheSessionsImpl> cache_sessions = cache_sessions_.lock();
+    ASSERT(cache_sessions, "should be impossible for cache to be deleted in getLookupResult");
+    // posted to prevent callback mutex-deadlock.
+    return dispatcher.post([cache_sessions = std::move(cache_sessions), p = shared_from_this(),
+                            request = std::move(request)]() mutable {
+      // p is captured as shared_ptr to ensure 'this' is not deleted while the
+      // lookup is in flight.
+      cache_sessions->cache().lookup(
+          std::move(request), [p = std::move(p)](absl::StatusOr<LookupResult>&& lookup_result) {
+            p->onCacheLookupResult(std::move(lookup_result));
+          });
+    });
+  }
+  }
+}
+
+void CacheSession::onCacheLookupResult(absl::StatusOr<LookupResult>&& lookup_result) {
+  absl::MutexLock lock(&mu_);
+  if (!lookup_result.ok()) {
+    return onCacheError();
+  }
+  entry_ = std::move(lookup_result.value());
+  if (!entry_.populated()) {
+    performUpstreamRequest();
+  } else {
+    state_ = State::Exists;
+    body_length_available_ = entry_.body_length_.value();
+    sendLookupResponsesAndMaybeValidationRequest();
+  }
+}
+
+void CacheSession::performUpstreamRequest() {
+  ENVOY_LOG(debug, "making upstream request to populate cache for {}", key_.path());
+  mu_.AssertHeld();
+  ASSERT(state_ == State::Pending);
+  ASSERT(
+      !lookup_subscribers_.empty(),
+      "upstream request should only be possible if someone requested a lookup and it was a miss");
+  ASSERT(!upstream_request_, "should only be one upstream request in flight");
+  LookupSubscriber& first_sub = lookup_subscribers_.front();
+  const ActiveLookupRequest& lookup = first_sub.context_->lookup();
+  Http::RequestHeaderMapPtr request_headers;
+  bool was_ranged_request = lookup.isRangeRequest();
+  if (was_ranged_request) {
+    request_headers = requestHeadersWithRangeRemoved(lookup.requestHeaders());
+  } else {
+    request_headers = Http::createHeaderMap<Http::RequestHeaderMapImpl>(lookup.requestHeaders());
+  }
+  upstream_request_ = lookup.createUpstreamRequest();
+  first_sub.dispatcher().post([upstream_request = upstream_request_.get(),
+                               request_headers = std::move(request_headers), this,
+                               p = shared_from_this(), was_ranged_request]() mutable {
+    upstream_request->sendHeaders(std::move(request_headers));
+    upstream_request->getHeaders([this, p = std::move(p), was_ranged_request](
+                                     Http::ResponseHeaderMapPtr headers, EndStream end_stream) {
+      onUpstreamHeaders(std::move(headers), end_stream, was_ranged_request);
+    });
+  });
+}
+
+void CacheSession::onCacheWentAway() {
+  mu_.AssertHeld();
+  for (LookupSubscriber& sub : lookup_subscribers_) {
+    postUpstreamPassThrough(std::move(sub), CacheEntryStatus::LookupError);
+  }
+  lookup_subscribers_.clear();
+}
+
+void CacheSession::processSuccessfulValidation(Http::ResponseHeaderMapPtr headers) {
+  mu_.AssertHeld();
+  ENVOY_LOG(debug, "successful validation");
+  ASSERT(!lookup_subscribers_.empty(),
+         "should be impossible to be validating with no context awaiting validation");
+
+  const bool should_update_cached_entry =
+      CacheHeadersUtils::shouldUpdateCachedEntry(*headers, *entry_.response_headers_);
+  // Replace the 304 status code with the cached status code.
+  headers->setStatus(entry_.response_headers_->getStatusValue());
+
+  // Remove content length header if the 304 had one; if the cache entry had a
+  // content length header it will be added by the header adding block below.
+  headers->removeContentLength();
+
+  // A response that has been validated should not contain an Age header as it is equivalent to a
+  // freshly served response from the origin, unless the 304 response has an Age header, which
+  // means it was served by an upstream cache.
+  // Remove any existing Age header in the cached response.
+  entry_.response_headers_->removeInline(CacheCustomHeaders::age());
+
+  // Add any missing headers from the cached response to the 304 response.
+  entry_.response_headers_->iterate([&headers](const Http::HeaderEntry& cached_header) {
+    // TODO(yosrym93): see if we do this without copying the header key twice.
+    Http::LowerCaseString key(cached_header.key().getStringView());
+    if (headers->get(key).empty()) {
+      headers->setCopy(key, cached_header.value().getStringView());
+    }
+    return Http::HeaderMap::Iterate::Continue;
+  });
+
+  entry_.response_headers_ = std::move(headers);
+  state_ = State::Exists;
+  if (auto cache_sessions = cache_sessions_.lock()) {
+    if (should_update_cached_entry) {
+      // TODO(yosrym93): else evict, set state to Pending, and treat as insert.
+      LookupSubscriber& sub = lookup_subscribers_.front();
+      // Update metadata associated with the cached response. Right now this is only
+      // response_time.
+      entry_.response_metadata_.response_time_ = cache_sessions->time_source_.systemTime();
+      cache_sessions->cache().updateHeaders(sub.dispatcher(), key_, *entry_.response_headers_,
+                                            entry_.response_metadata_);
+    }
+  }
+
+  CacheEntryStatus status = CacheEntryStatus::Validated;
+  for (LookupSubscriber& recipient : lookup_subscribers_) {
+    sendSuccessfulLookupResultTo(recipient, status);
+    // For requests sharing the same validation upstream, use a distinct status
+    // so it's detectable that we didn't need to do multiple validations.
+    status = CacheEntryStatus::ValidatedFree;
+  }
+  if (auto cache_sessions = cache_sessions_.lock()) {
+    cache_sessions->stats().subCacheSessionsSubscribers(lookup_subscribers_.size());
+  }
+  lookup_subscribers_.clear();
+}
+
+void CacheSession::onUncacheable(Http::ResponseHeaderMapPtr headers, EndStream end_stream,
+                                 bool range_header_was_stripped) {
+  // If it turned out to be not cacheable, mark it as such, pass the already
+  // open connection to the first request, and give any other requests in flight
+  // a pass-through to upstream.
+  // If the upstream request stripped off a range header from the downstream
+  // request in order to populate the cache, we'll have to drop that upstream
+  // request and just issue a new request for every downstream.
+  mu_.AssertHeld();
+  state_ = State::NotCacheable;
+  bool use_existing_stream = !range_header_was_stripped;
+  if (!use_existing_stream) {
+    // Reset the upstream request if the request wanted a range and
+    // the upstream request didn't want a range.
+    upstream_request_ = nullptr;
+  }
+  for (LookupSubscriber& sub : lookup_subscribers_) {
+    sub.context_->setContentLength(content_length_header_);
+    if (use_existing_stream) {
+      ActiveLookupResultPtr result = std::make_unique<ActiveLookupResult>();
+      result->status_ = CacheEntryStatus::Uncacheable;
+      result->http_source_ = std::make_unique<UpstreamRequestWithHeadersPrepopulated>(
+          std::move(upstream_request_), std::move(headers), end_stream);
+      sub.dispatcher().post([result = std::move(result), cb = std::move(sub.callback_)]() mutable {
+        cb(std::move(result));
+      });
+      use_existing_stream = false;
+    } else {
+      postUpstreamPassThrough(std::move(sub), CacheEntryStatus::Uncacheable);
+    }
+  }
+  if (auto cache_sessions = cache_sessions_.lock()) {
+    cache_sessions->stats().subCacheSessionsSubscribers(lookup_subscribers_.size());
+  }
+  lookup_subscribers_.clear();
+  return;
+}
+
+void CacheSession::onUpstreamHeaders(Http::ResponseHeaderMapPtr headers, EndStream end_stream,
+                                     bool range_header_was_stripped) {
+  absl::MutexLock lock(&mu_);
+  Event::Dispatcher& dispatcher = lookup_subscribers_.front().dispatcher();
+  ASSERT(upstream_request_);
+  if (end_stream == EndStream::Reset) {
+    upstream_request_ = nullptr;
+    state_ = State::New;
+    for (LookupSubscriber& subscriber : lookup_subscribers_) {
+      subscriber.dispatcher().post([callback = std::move(subscriber.callback_)]() mutable {
+        auto result = std::make_unique<ActiveLookupResult>();
+        result->status_ = CacheEntryStatus::UpstreamReset;
+        callback(std::move(result));
+      });
+    }
+    if (auto cache_sessions = cache_sessions_.lock()) {
+      cache_sessions->stats().subCacheSessionsSubscribers(lookup_subscribers_.size());
+    }
+    lookup_subscribers_.clear();
+    return;
+  }
+  ASSERT(headers);
+  if (state_ == State::Validating) {
+    if (Http::Utility::getResponseStatus(*headers) == enumToInt(Http::Code::NotModified)) {
+      upstream_request_ = nullptr;
+      return processSuccessfulValidation(std::move(headers));
+    } else {
+      // Validate failed, so going down the 'insert' path instead.
+      state_ = State::Pending;
+      if (auto cache_sessions = cache_sessions_.lock()) {
+        cache_sessions->cache().evict(dispatcher, key_);
+      }
+      body_length_available_ = 0;
+      entry_ = {};
+    }
+  } else {
+    ASSERT(state_ == State::Pending, "should only get upstreamHeaders for Validating or Pending");
+  }
+  absl::string_view cl = headers->getContentLengthValue();
+  if (!cl.empty()) {
+    absl::SimpleAtoi(cl, &content_length_header_) || (content_length_header_ = 0);
+  }
+  if (!lookup_subscribers_.front().context_->lookup().isCacheableResponse(*headers)) {
+    return onUncacheable(std::move(headers), end_stream, range_header_was_stripped);
+  }
+  if (VaryHeaderUtils::hasVary(*headers)) {
+    // TODO(ravenblack): implement Vary header support.
+    ENVOY_LOG(debug, "Vary header found in upstream response, treating as not cacheable");
+    return onUncacheable(std::move(headers), end_stream, range_header_was_stripped);
+  }
+  auto cache_sessions = cache_sessions_.lock();
+  if (!cache_sessions) {
+    // Cache was deleted while callback was in flight. As a fallback just make all
+    // requests pass through. This shouldn't happen, but it's possible that a config
+    // update can come in *and* the last filter using the cache can get
+    // downstream-disconnected and so deleted, leaving the upstream request
+    // dangling with no cache to talk to.
+    ENVOY_LOG(error, "cache config was deleted while upstream request was in flight");
+    return onCacheWentAway();
+  }
+  if (end_stream == EndStream::End) {
+    upstream_request_ = nullptr;
+  }
+  // We're already on this subscriber's thread; this is posted to ensure no
+  // deadlock on the mutex if the insert operation calls back directly.
+  lookup_subscribers_.front().dispatcher().post(
+      [p = shared_from_this(), &dispatcher = lookup_subscribers_.front().dispatcher(), key = key_,
+       cache_sessions, headers = std::move(headers),
+       upstream_request = std::move(upstream_request_)]() mutable {
+        cache_sessions->cache().insert(dispatcher, key, std::move(headers),
+                                       cache_sessions->makeMetadata(), std::move(upstream_request),
+                                       p);
+        // When the cache entry insertion completes it will call back to onHeadersInserted,
+        // or on error onInsertFailed.
+      });
+}
+
+void CacheSessionsImpl::lookup(ActiveLookupRequestPtr request, ActiveLookupResultCallback&& cb) {
+  ASSERT(request);
+  ASSERT(cb);
+  std::shared_ptr<CacheSession> entry = getEntry(request->key());
+  entry->getLookupResult(std::move(request), std::move(cb));
+}
+
+ResponseMetadata CacheSessionsImpl::makeMetadata() {
+  ResponseMetadata metadata;
+  metadata.response_time_ = time_source_.systemTime();
+  return metadata;
+}
+
+void CacheSession::performValidation() {
+  mu_.AssertHeld();
+  ASSERT(!lookup_subscribers_.empty());
+  ENVOY_LOG(debug, "validating");
+  state_ = State::Validating;
+  LookupSubscriber& first_sub = lookup_subscribers_.front();
+  const ActiveLookupRequest& lookup = first_sub.context_->lookup();
+  Http::RequestHeaderMapPtr req = requestHeadersWithRangeRemoved(lookup.requestHeaders());
+  CacheHeadersUtils::injectValidationHeaders(*req, *entry_.response_headers_);
+  upstream_request_ = lookup.createUpstreamRequest();
+  first_sub.dispatcher().post([upstream_request = upstream_request_.get(), req = std::move(req),
+                               this, p = shared_from_this()]() mutable {
+    upstream_request->sendHeaders(std::move(req));
+    upstream_request->getHeaders(
+        [this, p = std::move(p)](Http::ResponseHeaderMapPtr headers, EndStream end_stream) {
+          onUpstreamHeaders(std::move(headers), end_stream, false);
+        });
+  });
+}
+
+std::shared_ptr<CacheSession> CacheSessionsImpl::getEntry(const Key& key) {
+  const SystemTime now = time_source_.systemTime();
+  cache().touch(key, now);
+  absl::MutexLock lock(&mu_);
+  auto [it, is_new] = entries_.try_emplace(key);
+  if (is_new) {
+    stats().incCacheSessionsEntries();
+    it->second = std::make_shared<CacheSession>(weak_from_this(), key);
+  }
+  auto ret = it->second;
+  ret->setExpiry(now + expiry_duration_);
+  // As a lazy way of keeping the cache metadata from growing endlessly,
+  // remove at most one adjacent metadata entry every time an entry is touched
+  // if the adjacent entry hasn't been touched in a while.
+  // This should do a decent job of expiring them simply, with a low cost, and
+  // without taking any long-lived locks as would be required for periodic
+  // scanning.
+  if (++it == entries_.end()) {
+    it = entries_.begin();
+  }
+  if (it->second->isExpiredAt(now)) {
+    stats().decCacheSessionsEntries();
+    entries_.erase(it);
+  }
+  return ret;
+}
+
+} // namespace Cache
+} // namespace HttpFilters
+} // namespace Extensions
+} // namespace Envoy
diff --git a/source/extensions/filters/http/cache/cache_sessions_impl.h b/source/extensions/filters/http/cache/cache_sessions_impl.h
new file mode 100644
index 0000000000000..8883706eaabe9
--- /dev/null
+++ b/source/extensions/filters/http/cache/cache_sessions_impl.h
@@ -0,0 +1,310 @@
+#pragma once
+
+#include "envoy/buffer/buffer.h"
+
+#include "source/common/common/cancel_wrapper.h"
+#include "source/extensions/filters/http/cache/cache_sessions.h"
+#include "source/extensions/filters/http/cache/upstream_request.h"
+
+#include "absl/base/thread_annotations.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/synchronization/mutex.h"
+#include "stats.h"
+
+namespace Envoy {
+namespace Extensions {
+namespace HttpFilters {
+namespace Cache {
+
+class CacheSession;
+class CacheSessionsImpl;
+
+class ActiveLookupContext : public HttpSource {
+public:
+  ActiveLookupContext(ActiveLookupRequestPtr lookup, std::shared_ptr<CacheSession> entry,
+                      uint64_t content_length = 0)
+      : lookup_(std::move(lookup)), entry_(entry), content_length_(content_length) {}
+  // HttpSource
+  void getHeaders(GetHeadersCallback&& cb) override;
+  void getBody(AdjustedByteRange range, GetBodyCallback&& cb) override;
+  void getTrailers(GetTrailersCallback&& cb) override;
+
+  Event::Dispatcher& dispatcher() const { return lookup().dispatcher(); }
+  ActiveLookupRequest& lookup() const { return *lookup_; }
+
+  void setContentLength(uint64_t l) { content_length_ = l; }
+
+private:
+  ActiveLookupRequestPtr lookup_;
+  std::shared_ptr<CacheSession> entry_;
+  uint64_t content_length_;
+};
+
+class CacheSession : public Logger::Loggable<Logger::Id::cache_filter>,
+                     public CacheProgressReceiver,
+                     public std::enable_shared_from_this<CacheSession> {
+public:
+  CacheSession(std::weak_ptr<CacheSessionsImpl> cache_sessions, const Key& key);
+
+  // CacheProgressReceiver
+  void onHeadersInserted(CacheReaderPtr cache_reader, Http::ResponseHeaderMapPtr headers,
+                         bool end_stream) override;
+  void onBodyInserted(AdjustedByteRange range, bool end_stream) override;
+  void onTrailersInserted(Http::ResponseTrailerMapPtr trailers) override;
+  void onInsertFailed(absl::Status status) override;
+
+  void getLookupResult(ActiveLookupRequestPtr lookup,
+                       ActiveLookupResultCallback&& lookup_result_callback)
+      ABSL_LOCKS_EXCLUDED(mu_);
+  void onCacheLookupResult(absl::StatusOr<LookupResult>&& result) ABSL_LOCKS_EXCLUDED(mu_);
+
+  void wantHeaders(Event::Dispatcher& dispatcher, SystemTime lookup_timestamp,
+                   GetHeadersCallback&& cb) ABSL_LOCKS_EXCLUDED(mu_);
+  void wantBodyRange(AdjustedByteRange range, Event::Dispatcher& dispatcher, GetBodyCallback&& cb)
+      ABSL_LOCKS_EXCLUDED(mu_);
+  void wantTrailers(Event::Dispatcher& dispatcher, GetTrailersCallback&& cb)
+      ABSL_LOCKS_EXCLUDED(mu_);
+  void clearUncacheableState() ABSL_LOCKS_EXCLUDED(mu_);
+
+  ~CacheSession();
+
+  class Subscriber {
+  public:
+    explicit Subscriber(Event::Dispatcher& dispatcher) : dispatcher_(dispatcher) {}
+    Event::Dispatcher& dispatcher() { return dispatcher_.get(); }
+
+  private:
+    // In order to be moveable in a vector we can't use a plain reference.
+    std::reference_wrapper<Event::Dispatcher> dispatcher_;
+  };
+  class BodySubscriber : public Subscriber {
+  public:
+    BodySubscriber(Event::Dispatcher& dispatcher, AdjustedByteRange range, GetBodyCallback&& cb)
+        : Subscriber(dispatcher), callback_(std::move(cb)), range_(std::move(range)) {}
+    GetBodyCallback callback_;
+    AdjustedByteRange range_;
+  };
+  class TrailerSubscriber : public Subscriber {
+  public:
+    TrailerSubscriber(Event::Dispatcher& dispatcher, GetTrailersCallback&& cb)
+        : Subscriber(dispatcher), callback_(std::move(cb)) {}
+    GetTrailersCallback callback_;
+  };
+  class LookupSubscriber : public Subscriber {
+  public:
+    LookupSubscriber(std::unique_ptr<ActiveLookupContext> context, ActiveLookupResultCallback&& cb)
+        : Subscriber(context->dispatcher()), callback_(std::move(cb)),
+          context_(std::move(context)) {}
+    ActiveLookupResultCallback callback_;
+    std::unique_ptr<ActiveLookupContext> context_;
+  };
+
+private:
+  enum class State {
+    // New state means this is the first client of the cache entry - it should immediately
+    // update the state to Pending and attempt a lookup (then if necessary insertion).
+    New,
+    // Pending state means another client is already doing lookup/insertion/verification.
+    // Client should subscribe to this, and act on received messages.
+    Pending,
+    // Inserting state means a cache entry exists but has not yet completed writing.
+    Inserting,
+    // Exists state means a cache entry probably exists. Client should attempt to read from
+    // the entry. On cache failure, state should revert to New. On expiry, state should become
+    // Validating.
+    Exists,
+    // Validating state means the cache entry exists but either is expired or some header has
+    // explicitly required validation from upstream.
+    Validating,
+    // Vary state means the cache entry includes headers and the request must be
+    // re-keyed onto the appropriate variation key.
+    Vary,
+    // NotCacheable state means this key is considered non-cacheable. Client should pass through.
+    // If the passed-through response turns out to be cacheable (i.e. upstream has changed
+    // cache headers), client should update state to Writing, or, if state is already changed,
+    // client should abort the new upstream request and use the shared one.
+    NotCacheable
+  };
+
+  EndStream endStreamAfterHeaders() const ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  EndStream endStreamAfterBody() const ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  // Switches state to Written, removes the insert_context_, notifies all
+  // subscribers.
+  void insertComplete() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  // Switches state to New, removes the insert_context_, resets all subscribers.
+  // Ideally this shouldn't happen, but an unreliable upstream could cause it.
+  // TODO(ravenblackx): this could theoretically be improved with a retry process
+  // rather than resetting all the downstreams on error, but that's beyond MVP.
+  void insertAbort() ABSL_LOCKS_EXCLUDED(mu_);
+
+  void headersWritten(const Http::ResponseHeaderMap&& response_headers,
+                      ResponseMetadata&& response_metadata,
+                      absl::optional<uint64_t> content_length_override, bool end_stream)
+      ABSL_LOCKS_EXCLUDED(mu_);
+
+  // Populates the headers in memory.
+  void saveHeaders(const Http::ResponseHeaderMap&& response_headers,
+                   ResponseMetadata&& response_metadata, absl::optional<uint64_t> content_length,
+                   bool end_stream) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  bool requiresValidationFor(const ActiveLookupRequest& lookup) const
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  // For each subscriber, either sends a lookup response (if validation passes), or
+  // triggers validation *once* for all subscribers for whom validation failed.
+  // If an insert occurred then first_status should be Miss, otherwise Hit.
+  void sendLookupResponsesAndMaybeValidationRequest(
+      CacheEntryStatus first_status = CacheEntryStatus::Hit) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  // Sends an upstream validation request.
+  void performValidation() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  void processSuccessfulValidation(Http::ResponseHeaderMapPtr headers)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  // If the headers include vary, update all blocked subscribers with their new keys
+  // and returns true. Otherwise returns false.
+  bool handleVary(const Http::ResponseHeaderMap&& response_headers)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  // Called by the InsertContext.
+  // Updates the state to reflect the increased availability, and
+  // triggers a file-read action if there is a subscriber waiting on a body chunk
+  // within the available range, and no read file action is in flight.
+  void bodyWrittenTo(uint64_t sz, bool end_stream) ABSL_LOCKS_EXCLUDED(mu_);
+
+  // Called by the InsertContext.
+  // Populates the trailers in memory, and calls sendTrailers.
+  void trailersWritten(Http::ResponseTrailerMapPtr response_trailers) ABSL_LOCKS_EXCLUDED(mu_);
+
+  // Attempts to open the cache file.
+  //
+  // On failure notifies the first queued LookupContext of a cache miss, so
+  // the cache entry can be either populated or marked as uncacheable.
+  //
+  // On success, attempts to validate the cache entry.
+  //
+  // If it is valid, all queued LookupContexts are notified to use the file.
+  //
+  // If it is not valid, attempts to populate the cache entry.
+  //
+  // If attempt to populate the cache entry fails, marks as uncacheable,
+  // hands the UpstreamRequest to the first LookupContext, and notifies the
+  // rest of the queue that the result is uncacheable and they should bypass
+  // the cache, or, if the original request had a range header which was
+  // discarded for the UpstreamRequest, the UpstreamRequest is reset and *all*
+  // LookupContexts are notified to bypass the cache.
+  void sendSuccessfulLookupResultTo(LookupSubscriber& subscriber, CacheEntryStatus status)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  void checkCacheEntryExistence(Event::Dispatcher& dispatcher) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  void onCacheEntryExistence(LookupResult&& lookup_result) ABSL_LOCKS_EXCLUDED(mu_);
+  void sendBodyChunkTo(BodySubscriber& subscriber, AdjustedByteRange range, Buffer::InstancePtr buf)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  void sendTrailersTo(TrailerSubscriber& subscriber) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  void sendAbortTo(Subscriber& subscriber) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  bool tryEnqueueBodyChunk(BodySubscriber& subscriber) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  // If there's not already a read operation in flight and any requested
+  // range is within the available range, start an operation to
+  // read that range (prioritized by oldest subscriber).
+  void maybeTriggerBodyReadForWaitingSubscriber() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  bool selectBodyToRead() ABSL_LOCKS_EXCLUDED(mu_);
+  void abortBodyOutOfRangeSubscribers() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  bool canReadBodyRangeFromCacheEntry(BodySubscriber& subscriber);
+  void onBodyChunkFromCache(AdjustedByteRange range, Buffer::InstancePtr buffer,
+                            EndStream end_stream) ABSL_LOCKS_EXCLUDED(mu_);
+  void onCacheError() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  void doCacheEntryInvalid() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  void doCacheMiss() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  void validateCacheEntry(Event::Dispatcher& dispatcher) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  void performUpstreamRequest() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  void onUpstreamHeaders(Http::ResponseHeaderMapPtr headers, EndStream end_stream,
+                         bool range_header_was_stripped) ABSL_LOCKS_EXCLUDED(mu_);
+  void onUncacheable(Http::ResponseHeaderMapPtr headers, EndStream end_stream,
+                     bool range_header_was_stripped) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  // For the unlikely case that cache config was modified while operations were in flight,
+  // requests still in the lookup state are transformed to pass-through.
+  // Requests for headers/body/trailers should be able to continue as the cache
+  // *entries* can outlive the cache object itself as long as they're in use.
+  void onCacheWentAway() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  // May change state from New to Pending, or from Written to Validating.
+  // When changing state, also makes the corresponding upstream request.
+  void mutateStateForHeaderRequest(const LookupRequest& lookup) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  bool headersAreReady() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
+  mutable absl::Mutex mu_;
+  State state_ ABSL_GUARDED_BY(mu_) = State::New;
+  uint64_t content_length_header_ = 0;
+  LookupResult entry_ ABSL_GUARDED_BY(mu_);
+  // While streaming this is a proxy for body_length_ which should not
+  // be populated in entry_ until the insert is complete.
+  uint64_t body_length_available_ = 0;
+  std::weak_ptr<CacheSessionsImpl> cache_sessions_;
+  Key key_;
+  bool in_body_loop_callback_ = false;
+
+  std::vector<LookupSubscriber> lookup_subscribers_ ABSL_GUARDED_BY(mu_);
+  std::vector<BodySubscriber> body_subscribers_ ABSL_GUARDED_BY(mu_);
+  std::vector<TrailerSubscriber> trailer_subscribers_ ABSL_GUARDED_BY(mu_);
+  UpstreamRequestPtr upstream_request_ ABSL_GUARDED_BY(mu_);
+  bool read_action_in_flight_ ABSL_GUARDED_BY(mu_) = false;
+
+  // The following fields and functions are only used by CacheSessions.
+  friend class CacheSessionsImpl;
+  bool inserting() const {
+    absl::MutexLock lock(&mu_);
+    return state_ == State::Inserting;
+  }
+  void setExpiry(SystemTime expiry) { expires_at_ = expiry; }
+  bool isExpiredAt(SystemTime t) const { return expires_at_ < t && !inserting(); }
+
+  SystemTime expires_at_; // This is guarded by CacheSessions's mutex.
+
+  // An arbitrary 256k limit on per-read fragment size.
+  // TODO(ravenblack): Make this configurable?
+  static constexpr uint64_t max_read_chunk_size_ = 256 * 1024;
+};
+
+class CacheSessionsImpl : public CacheSessions,
+                          public std::enable_shared_from_this<CacheSessionsImpl> {
+public:
+  CacheSessionsImpl(Server::Configuration::FactoryContext& context,
+                    std::unique_ptr<HttpCache> cache)
+      : time_source_(context.serverFactoryContext().timeSource()), cache_(std::move(cache)),
+        stats_(generateStats(context.scope(), cache_->cacheInfo().name_)) {}
+
+  void lookup(ActiveLookupRequestPtr request, ActiveLookupResultCallback&& cb) override;
+  CacheFilterStats& stats() const override { return *stats_; }
+
+  ResponseMetadata makeMetadata();
+
+  HttpCache& cache() const override { return *cache_; }
+
+private:
+  // Returns an entry with the given key, creating it if necessary.
+  std::shared_ptr<CacheSession> getEntry(const Key& key) ABSL_LOCKS_EXCLUDED(mu_);
+
+  TimeSource& time_source_;
+  std::unique_ptr<HttpCache> cache_;
+  CacheFilterStatsPtr stats_;
+  std::chrono::duration<int> expiry_duration_ = std::chrono::minutes(5);
+  mutable absl::Mutex mu_;
+  // If there turns out to be problematic contention on this mutex, this could
+  // easily be turned into a simple short-hash-keyed array of maps each with
+  // their own mutex. Since it's only held for a short time and is related to
+  // async operations, it seems unlikely that mutex contention would be a
+  // significant bottleneck.
+  absl::flat_hash_map<Key, std::shared_ptr<CacheSession>, MessageUtil, MessageUtil>
+      entries_ ABSL_GUARDED_BY(mu_);
+
+  friend class CacheSession;
+};
+
+} // namespace Cache
+} // namespace HttpFilters
+} // namespace Extensions
+} // namespace Envoy
diff --git a/source/extensions/filters/http/cache/cacheability_utils.cc b/source/extensions/filters/http/cache/cacheability_utils.cc
index 9f142f5b1859f..24eb8966ee778 100644
--- a/source/extensions/filters/http/cache/cacheability_utils.cc
+++ b/source/extensions/filters/http/cache/cacheability_utils.cc
@@ -31,7 +31,7 @@ const std::vector<const Http::LowerCaseString*>& conditionalHeaders() {
 }
 } // namespace
 
-bool CacheabilityUtils::canServeRequestFromCache(const Http::RequestHeaderMap& headers) {
+absl::Status CacheabilityUtils::canServeRequestFromCache(const Http::RequestHeaderMap& headers) {
   const absl::string_view method = headers.getMethodValue();
   const Http::HeaderValues& header_values = Http::Headers::get();
 
@@ -45,16 +45,31 @@ bool CacheabilityUtils::canServeRequestFromCache(const Http::RequestHeaderMap& h
   // header fields can be ignored by caches and intermediaries.
   for (auto conditional_header : conditionalHeaders()) {
     if (!headers.get(*conditional_header).empty()) {
-      return false;
+      return absl::InvalidArgumentError(*conditional_header);
     }
   }
 
   // TODO(toddmgreer): Also serve HEAD requests from cache.
   // Cache-related headers are checked in HttpCache::LookupRequest.
-  return headers.Path() && headers.Host() &&
-         !headers.getInline(CacheCustomHeaders::authorization()) &&
-         (method == header_values.MethodValues.Get || method == header_values.MethodValues.Head) &&
-         Http::Utility::schemeIsValid(headers.getSchemeValue());
+  if (!headers.Path()) {
+    return absl::InvalidArgumentError("no path");
+  }
+  if (!headers.Host()) {
+    return absl::InvalidArgumentError("no host");
+  }
+  if (headers.getInline(CacheCustomHeaders::authorization())) {
+    return absl::InvalidArgumentError("authorization");
+  }
+  if (method.empty()) {
+    return absl::InvalidArgumentError("no method");
+  }
+  if (method != header_values.MethodValues.Get && method != header_values.MethodValues.Head) {
+    return absl::InvalidArgumentError(method);
+  }
+  if (!Http::Utility::schemeIsValid(headers.getSchemeValue())) {
+    return absl::InvalidArgumentError("scheme");
+  }
+  return absl::OkStatus();
 }
 
 bool CacheabilityUtils::isCacheableResponse(const Http::ResponseHeaderMap& headers,
diff --git a/source/extensions/filters/http/cache/cacheability_utils.h b/source/extensions/filters/http/cache/cacheability_utils.h
index 8418011f08c2a..e551f4b72370a 100644
--- a/source/extensions/filters/http/cache/cacheability_utils.h
+++ b/source/extensions/filters/http/cache/cacheability_utils.h
@@ -4,6 +4,8 @@
 #include "source/common/http/headers.h"
 #include "source/extensions/filters/http/cache/cache_headers_utils.h"
 
+#include "absl/status/status.h"
+
 namespace Envoy {
 namespace Extensions {
 namespace HttpFilters {
@@ -13,7 +15,7 @@ namespace CacheabilityUtils {
 // This does not depend on cache-control headers as
 // request cache-control headers only decide whether
 // validation is required and whether the response can be cached.
-bool canServeRequestFromCache(const Http::RequestHeaderMap& headers);
+absl::Status canServeRequestFromCache(const Http::RequestHeaderMap& headers);
 
 // Checks if a response can be stored in cache.
 // Note that if a request is not cacheable according to 'canServeRequestFromCache'
diff --git a/source/extensions/filters/http/cache/config.cc b/source/extensions/filters/http/cache/config.cc
index 3e33af3543070..ef926828166d1 100644
--- a/source/extensions/filters/http/cache/config.cc
+++ b/source/extensions/filters/http/cache/config.cc
@@ -1,6 +1,8 @@
 #include "source/extensions/filters/http/cache/config.h"
 
 #include "source/extensions/filters/http/cache/cache_filter.h"
+#include "source/extensions/filters/http/cache/cache_sessions.h"
+#include "source/extensions/filters/http/cache/stats.h"
 
 namespace Envoy {
 namespace Extensions {
@@ -10,7 +12,7 @@ namespace Cache {
 Http::FilterFactoryCb CacheFilterFactory::createFilterFactoryFromProtoTyped(
     const envoy::extensions::filters::http::cache::v3::CacheConfig& config,
     const std::string& /*stats_prefix*/, Server::Configuration::FactoryContext& context) {
-  std::shared_ptr<HttpCache> cache;
+  std::shared_ptr<CacheSessions> cache;
   if (!config.disabled().value()) {
     if (!config.has_typed_config()) {
       throw EnvoyException("at least one of typed_config or disabled must be set");
@@ -25,10 +27,10 @@ Http::FilterFactoryCb CacheFilterFactory::createFilterFactoryFromProtoTyped(
 
     cache = http_cache_factory->getCache(config, context);
   }
-
-  return [config = std::make_shared<CacheFilterConfig>(config, context.serverFactoryContext()),
-          cache](Http::FilterChainFactoryCallbacks& callbacks) -> void {
-    callbacks.addStreamFilter(std::make_shared<CacheFilter>(config, cache));
+  return [config = std::make_shared<CacheFilterConfig>(config, std::move(cache),
+                                                       context.serverFactoryContext())](
+             Http::FilterChainFactoryCallbacks& callbacks) -> void {
+    callbacks.addStreamFilter(std::make_shared<CacheFilter>(config));
   };
 }
 
diff --git a/source/extensions/filters/http/cache/filter_state.h b/source/extensions/filters/http/cache/filter_state.h
deleted file mode 100644
index a161aecde53db..0000000000000
--- a/source/extensions/filters/http/cache/filter_state.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#pragma once
-
-namespace Envoy {
-namespace Extensions {
-namespace HttpFilters {
-namespace Cache {
-
-enum class FilterState {
-  Initial,
-
-  // Cache lookup found a cached response that requires validation.
-  ValidatingCachedResponse,
-
-  // Cache lookup found a fresh or validated cached response and it is being added to the encoding
-  // stream.
-  ServingFromCache,
-
-  // The cached response was successfully added to the encoding stream (either during decoding or
-  // encoding).
-  ResponseServedFromCache,
-
-  // The filter won't serve a response from the cache, whether because the request wasn't cacheable,
-  // there was no response in cache, the response in cache couldn't be served, or the request was
-  // terminated before the cached response could be written. This may be set during decoding or
-  // encoding.
-  NotServingFromCache,
-
-  // CacheFilter::onDestroy has been called, the filter will be destroyed soon. Any triggered
-  // callbacks should be ignored.
-  Destroyed
-};
-
-} // namespace Cache
-} // namespace HttpFilters
-} // namespace Extensions
-} // namespace Envoy
diff --git a/source/extensions/filters/http/cache/http_cache.cc b/source/extensions/filters/http/cache/http_cache.cc
index 1b1862bebd3b9..dffc940629138 100644
--- a/source/extensions/filters/http/cache/http_cache.cc
+++ b/source/extensions/filters/http/cache/http_cache.cc
@@ -23,139 +23,10 @@ namespace Extensions {
 namespace HttpFilters {
 namespace Cache {
 
-LookupRequest::LookupRequest(const Http::RequestHeaderMap& request_headers, SystemTime timestamp,
-                             const VaryAllowList& vary_allow_list,
-                             bool ignore_request_cache_control_header)
-    : request_headers_(Http::createHeaderMap<Http::RequestHeaderMapImpl>(request_headers)),
-      vary_allow_list_(vary_allow_list), timestamp_(timestamp) {
-  // These ASSERTs check prerequisites. A request without these headers can't be looked up in cache;
-  // CacheFilter doesn't create LookupRequests for such requests.
-  ASSERT(request_headers.Path(), "Can't form cache lookup key for malformed Http::RequestHeaderMap "
-                                 "with null Path.");
-  ASSERT(request_headers.Host(), "Can't form cache lookup key for malformed Http::RequestHeaderMap "
-                                 "with null Host.");
-  absl::string_view scheme = request_headers.getSchemeValue();
-  ASSERT(Http::Utility::schemeIsValid(request_headers.getSchemeValue()));
-
-  if (!ignore_request_cache_control_header) {
-    initializeRequestCacheControl(request_headers);
-  }
-  // TODO(toddmgreer): Let config determine whether to include scheme, host, and
-  // query params.
-
-  // TODO(toddmgreer): get cluster name.
-  key_.set_cluster_name("cluster_name_goes_here");
-  key_.set_host(std::string(request_headers.getHostValue()));
-  key_.set_path(std::string(request_headers.getPathValue()));
-  if (Http::Utility::schemeIsHttp(scheme)) {
-    key_.set_scheme(Key::HTTP);
-  } else if (Http::Utility::schemeIsHttps(scheme)) {
-    key_.set_scheme(Key::HTTPS);
-  }
-}
-
-// Unless this API is still alpha, calls to stableHashKey() must always return
-// the same result, or a way must be provided to deal with a complete cache
-// flush.
 size_t stableHashKey(const Key& key) { return DeterministicProtoHash::hash(key); }
 
-void LookupRequest::initializeRequestCacheControl(const Http::RequestHeaderMap& request_headers) {
-  const absl::string_view cache_control =
-      request_headers.getInlineValue(CacheCustomHeaders::requestCacheControl());
-  const absl::string_view pragma = request_headers.getInlineValue(CacheCustomHeaders::pragma());
-
-  if (!cache_control.empty()) {
-    request_cache_control_ = RequestCacheControl(cache_control);
-  } else {
-    // According to: https://httpwg.org/specs/rfc7234.html#header.pragma,
-    // when Cache-Control header is missing, "Pragma:no-cache" is equivalent to
-    // "Cache-Control:no-cache". Any other directives are ignored.
-    request_cache_control_.must_validate_ = RequestCacheControl(pragma).must_validate_;
-  }
-}
-
-bool LookupRequest::requiresValidation(const Http::ResponseHeaderMap& response_headers,
-                                       SystemTime::duration response_age) const {
-  // TODO(yosrym93): Store parsed response cache-control in cache instead of parsing it on every
-  // lookup.
-  const absl::string_view cache_control =
-      response_headers.getInlineValue(CacheCustomHeaders::responseCacheControl());
-  const ResponseCacheControl response_cache_control(cache_control);
-
-  const bool request_max_age_exceeded = request_cache_control_.max_age_.has_value() &&
-                                        request_cache_control_.max_age_.value() < response_age;
-  if (response_cache_control.must_validate_ || request_cache_control_.must_validate_ ||
-      request_max_age_exceeded) {
-    // Either the request or response explicitly require validation, or a request max-age
-    // requirement is not satisfied.
-    return true;
-  }
-
-  // CacheabilityUtils::isCacheableResponse(..) guarantees that any cached response satisfies this.
-  ASSERT(response_cache_control.max_age_.has_value() ||
-             (response_headers.getInline(CacheCustomHeaders::expires()) && response_headers.Date()),
-         "Cache entry does not have valid expiration data.");
-
-  SystemTime::duration freshness_lifetime;
-  if (response_cache_control.max_age_.has_value()) {
-    freshness_lifetime = response_cache_control.max_age_.value();
-  } else {
-    const SystemTime expires_value =
-        CacheHeadersUtils::httpTime(response_headers.getInline(CacheCustomHeaders::expires()));
-    const SystemTime date_value = CacheHeadersUtils::httpTime(response_headers.Date());
-    freshness_lifetime = expires_value - date_value;
-  }
-
-  if (response_age > freshness_lifetime) {
-    // Response is stale, requires validation if
-    // the response does not allow being served stale,
-    // or the request max-stale directive does not allow it.
-    const bool allowed_by_max_stale =
-        request_cache_control_.max_stale_.has_value() &&
-        request_cache_control_.max_stale_.value() > response_age - freshness_lifetime;
-    return response_cache_control.no_stale_ || !allowed_by_max_stale;
-  } else {
-    // Response is fresh, requires validation only if there is an unsatisfied min-fresh requirement.
-    const bool min_fresh_unsatisfied =
-        request_cache_control_.min_fresh_.has_value() &&
-        request_cache_control_.min_fresh_.value() > freshness_lifetime - response_age;
-    return min_fresh_unsatisfied;
-  }
-}
-
-LookupResult LookupRequest::makeLookupResult(Http::ResponseHeaderMapPtr&& response_headers,
-                                             ResponseMetadata&& metadata,
-                                             absl::optional<uint64_t> content_length) const {
-  // TODO(toddmgreer): Implement all HTTP caching semantics.
-  ASSERT(response_headers);
-  LookupResult result;
-
-  // Assumption: Cache lookup time is negligible. Therefore, now == timestamp_
-  const Seconds age =
-      CacheHeadersUtils::calculateAge(*response_headers, metadata.response_time_, timestamp_);
-  response_headers->setInline(CacheCustomHeaders::age(), std::to_string(age.count()));
-
-  result.cache_entry_status_ = requiresValidation(*response_headers, age)
-                                   ? CacheEntryStatus::RequiresValidation
-                                   : CacheEntryStatus::Ok;
-  result.headers_ = std::move(response_headers);
-  if (content_length.has_value()) {
-    result.content_length_ = content_length;
-  } else {
-    absl::string_view content_length_header = result.headers_->getContentLengthValue();
-    int64_t length_from_header;
-    if (!content_length_header.empty() &&
-        absl::SimpleAtoi(content_length_header, &length_from_header)) {
-      result.content_length_ = length_from_header;
-    }
-  }
-  if (result.content_length_.has_value()) {
-    result.range_details_ =
-        RangeUtils::createRangeDetails(requestHeaders(), result.content_length_.value());
-  }
-
-  return result;
-}
+LookupRequest::LookupRequest(Key&& key, Event::Dispatcher& dispatcher)
+    : dispatcher_(dispatcher), key_(key) {}
 
 } // namespace Cache
 } // namespace HttpFilters
diff --git a/source/extensions/filters/http/cache/http_cache.h b/source/extensions/filters/http/cache/http_cache.h
index 8ae1d5d869d4e..0efa9a237c695 100644
--- a/source/extensions/filters/http/cache/http_cache.h
+++ b/source/extensions/filters/http/cache/http_cache.h
@@ -1,20 +1,18 @@
 #pragma once
 
-#include <iosfwd>
 #include <string>
 #include <vector>
 
-#include "envoy/buffer/buffer.h"
 #include "envoy/common/time.h"
 #include "envoy/config/typed_config.h"
 #include "envoy/extensions/filters/http/cache/v3/cache.pb.h"
 #include "envoy/http/header_map.h"
 #include "envoy/server/factory_context.h"
 
-#include "source/common/common/assert.h"
-#include "source/common/common/logger.h"
 #include "source/extensions/filters/http/cache/cache_entry_utils.h"
 #include "source/extensions/filters/http/cache/cache_headers_utils.h"
+#include "source/extensions/filters/http/cache/cache_progress_receiver.h"
+#include "source/extensions/filters/http/cache/http_source.h"
 #include "source/extensions/filters/http/cache/key.pb.h"
 #include "source/extensions/filters/http/cache/range_utils.h"
 
@@ -25,37 +23,18 @@ namespace Extensions {
 namespace HttpFilters {
 namespace Cache {
 
-// Result of a lookup operation, including cached headers and information needed
-// to serve a response based on it, or to attempt to validate.
-struct LookupResult {
-  // If cache_entry_status_ == Unusable, none of the other members are
-  // meaningful.
-  CacheEntryStatus cache_entry_status_ = CacheEntryStatus::Unusable;
-
-  // Headers of the cached response.
-  Http::ResponseHeaderMapPtr headers_;
-
-  // Size of the full response body. Cache filter will generate a content-length
-  // header with this value, replacing any preexisting content-length header.
-  // (This lets us dechunk responses as we insert them, then later serve them
-  // with a content-length header.)
-  // If the cache entry is still populating, and the cache supports streaming,
-  // and the response had no content-length header, the content length may be
-  // unknown at lookup-time.
-  absl::optional<uint64_t> content_length_;
+class CacheSessions;
+class CacheReader;
 
-  // If the request is a range request, this struct indicates if the ranges can
-  // be satisfied and which ranges are requested. nullopt indicates that this is
-  // not a range request or the range header has been ignored.
-  absl::optional<RangeDetails> range_details_;
-
-  // Update the content length of the object and its response headers.
-  void setContentLength(uint64_t new_length) {
-    content_length_ = new_length;
-    headers_->setContentLength(new_length);
-  }
+// Result of a lookup operation.
+struct LookupResult {
+  std::unique_ptr<CacheReader> cache_reader_;
+  std::unique_ptr<Http::ResponseHeaderMap> response_headers_;
+  std::unique_ptr<Http::ResponseTrailerMap> response_trailers_;
+  ResponseMetadata response_metadata_;
+  absl::optional<uint64_t> body_length_;
+  bool populated() const { return body_length_.has_value(); }
 };
-using LookupResultPtr = std::unique_ptr<LookupResult>;
 
 // Produces a hash of key that is consistent across restarts, architectures,
 // builds, and configurations. Caches that store persistent entries based on a
@@ -76,234 +55,86 @@ size_t stableHashKey(const Key& key);
 class LookupRequest {
 public:
   // Prereq: request_headers's Path(), Scheme(), and Host() are non-null.
-  LookupRequest(const Http::RequestHeaderMap& request_headers, SystemTime timestamp,
-                const VaryAllowList& vary_allow_list,
-                bool ignore_request_cache_control_header = false);
-
-  const RequestCacheControl& requestCacheControl() const { return request_cache_control_; }
+  LookupRequest(Key&& key, Event::Dispatcher& dispatcher);
 
   // Caches may modify the key according to local needs, though care must be
   // taken to ensure that meaningfully distinct responses have distinct keys.
   const Key& key() const { return key_; }
 
-  // WARNING: Incomplete--do not use in production (yet).
-  // Returns a LookupResult suitable for sending to the cache filter's
-  // LookupHeadersCallback. Specifically,
-  // - LookupResult::cache_entry_status_ is set according to HTTP cache
-  // validation logic.
-  // - LookupResult::headers_ takes ownership of response_headers.
-  // - LookupResult::content_length_ == content_length.
-  // - LookupResult::response_ranges_ entries are satisfiable (as documented
-  // there).
-  LookupResult makeLookupResult(Http::ResponseHeaderMapPtr&& response_headers,
-                                ResponseMetadata&& metadata,
-                                absl::optional<uint64_t> content_length) const;
-
-  const Http::RequestHeaderMap& requestHeaders() const { return *request_headers_; }
-  const VaryAllowList& varyAllowList() const { return vary_allow_list_; }
+  Event::Dispatcher& dispatcher() const { return dispatcher_; }
 
 private:
-  void initializeRequestCacheControl(const Http::RequestHeaderMap& request_headers);
-  bool requiresValidation(const Http::ResponseHeaderMap& response_headers,
-                          SystemTime::duration age) const;
-
+  Event::Dispatcher& dispatcher_;
   Key key_;
-  std::vector<RawByteRange> request_range_spec_;
-  Http::RequestHeaderMapPtr request_headers_;
-  const VaryAllowList& vary_allow_list_;
-  // Time when this LookupRequest was created (in response to an HTTP request).
-  SystemTime timestamp_;
-  RequestCacheControl request_cache_control_;
 };
 
 // Statically known information about a cache.
 struct CacheInfo {
   absl::string_view name_;
-  bool supports_range_requests_ = false;
-};
-
-using LookupBodyCallback = absl::AnyInvocable<void(Buffer::InstancePtr&&, bool end_stream)>;
-using LookupHeadersCallback = absl::AnyInvocable<void(LookupResult&&, bool end_stream)>;
-using LookupTrailersCallback = absl::AnyInvocable<void(Http::ResponseTrailerMapPtr&&)>;
-using InsertCallback = absl::AnyInvocable<void(bool success_ready_for_more)>;
-using UpdateHeadersCallback = absl::AnyInvocable<void(bool)>;
-
-// Manages the lifetime of an insertion.
-class InsertContext {
-public:
-  // Accepts response_headers for caching. Only called once.
-  //
-  // Implementations MUST post to the filter's dispatcher insert_complete(true)
-  // on success, or insert_complete(false) to attempt to abort the insertion.
-  // This call may be made asynchronously, but any async operation that can
-  // potentially silently fail must include a timeout, to avoid memory leaks.
-  virtual void insertHeaders(const Http::ResponseHeaderMap& response_headers,
-                             const ResponseMetadata& metadata, InsertCallback insert_complete,
-                             bool end_stream) PURE;
-
-  // The insertion is streamed into the cache in fragments whose size is determined
-  // by the client, but with a pace determined by the cache. To avoid streaming
-  // data into cache too fast for the cache to handle, clients should wait for
-  // the cache to call ready_for_next_fragment before sending the next fragment.
-  //
-  // The client can abort the streaming insertion by dropping the
-  // InsertContextPtr. A cache can abort the insertion by passing 'false' into
-  // ready_for_next_fragment.
-  //
-  // The cache implementation MUST post ready_for_next_fragment to the filter's
-  // dispatcher. This post may be made asynchronously, but any async operation
-  // that can potentially silently fail must include a timeout, to avoid memory leaks.
-  virtual void insertBody(const Buffer::Instance& fragment, InsertCallback ready_for_next_fragment,
-                          bool end_stream) PURE;
-
-  // Inserts trailers into the cache.
-  //
-  // The cache implementation MUST post insert_complete to the filter's dispatcher.
-  // This call may be made asynchronously, but any async operation that can
-  // potentially silently fail must include a timeout, to avoid memory leaks.
-  virtual void insertTrailers(const Http::ResponseTrailerMap& trailers,
-                              InsertCallback insert_complete) PURE;
-
-  // This routine is called prior to an InsertContext being destroyed. InsertContext is responsible
-  // for making sure that any async activities are cleaned up before returning from onDestroy().
-  // This includes timers, network calls, etc. The reason there is an onDestroy() method vs. doing
-  // this type of cleanup in the destructor is to avoid potential data races between an async
-  // callback and the destructor in case the connection terminates abruptly.
-  // Example scenario with a hypothetical cache that uses RPC:
-  // 1. [Filter's thread] CacheFilter calls InsertContext::insertBody.
-  // 2. [Filter's thread] RPCInsertContext sends RPC and returns.
-  // 3. [Filter's thread] Client disconnects; Destroying stream; CacheFilter destructor begins.
-  // 4. [Filter's thread] RPCInsertContext destructor begins.
-  // 5. [Other thread] RPC completes and calls RPCInsertContext::onRPCDone.
-  // --> RPCInsertContext's destructor and onRpcDone cause a data race in RpcInsertContext.
-  // onDestroy() should cancel any outstanding async operations and, if necessary,
-  // it should block on that cancellation to avoid data races. InsertContext must not invoke any
-  // callbacks to the CacheFilter after returning from onDestroy().
-  virtual void onDestroy() PURE;
-
-  virtual ~InsertContext() = default;
 };
-using InsertContextPtr = std::unique_ptr<InsertContext>;
 
-// Lookup context manages the lifetime of a lookup, helping clients to pull data
-// from the cache at a pace that works for them. At any time a client can abort
-// an in-progress lookup by simply dropping the LookupContextPtr.
-class LookupContext {
+class CacheReader {
 public:
-  // Get the headers from the cache. It is a programming error to call this
-  // twice.
-  // In the case that a cache supports shared streaming (serving content from
-  // the cache entry while it is still being populated), and a range request is made
-  // for a streaming entry that didn't have a content-length header from upstream, range
-  // requests may be unable to receive a response until the content-length is
-  // known to exceed the end of the requested range. In this case a cache
-  // implementation should wait until that is known before calling the callback,
-  // and must pass a LookupResult with range_details_->satisfiable_ = false
-  // if the request is invalid.
-  //
-  // A cache that posts the callback must wrap it such that if the LookupContext is
-  // destroyed before the callback is executed, the callback is not executed.
-  virtual void getHeaders(LookupHeadersCallback&& cb) PURE;
-
-  // Reads the next fragment from the cache, calling cb when the fragment is ready.
-  // The Buffer::InstancePtr passed to cb must not be null.
-  //
-  // The cache must call cb with a range of bytes starting at range.start() and
-  // ending at or before range.end(). Caller is responsible for tracking what
-  // ranges have been received, what to request next, and when to stop.
-  //
-  // A request may have a range that exceeds the size of the content, in support
-  // of a "shared stream" cache entry, where the request may not know the size of
-  // the content in advance. In this case the cache should call cb with
-  // end_stream=true when the end of the body is reached, if there are no trailers.
-  //
-  // If there are trailers *and* the size of the content was not known when the
-  // LookupContext was created, the cache should pass a null buffer pointer to the
-  // LookupBodyCallback (when getBody is called with a range starting beyond the
-  // end of the actual content-length) to indicate that no more body is available
-  // and the filter should request trailers. It is invalid to pass a null buffer
-  // pointer other than in this case.
-  //
-  // If a cache happens to load data in fragments of a set size, it may be
-  // efficient to respond with fewer than the requested number of bytes. For
-  // example, assuming a 23 byte full-bodied response from a cache that reads in
-  // absurdly small 10 byte fragments:
-  //
-  // getBody requests bytes  0-23 .......... callback with bytes 0-9
-  // getBody requests bytes 10-23 .......... callback with bytes 10-19
-  // getBody requests bytes 20-23 .......... callback with bytes 20-23
-  //
-  // A cache that posts the callback must wrap it such that if the LookupContext is
-  // destroyed before the callback is executed, the callback is not executed.
-  virtual void getBody(const AdjustedByteRange& range, LookupBodyCallback&& cb) PURE;
-
-  // Get the trailers from the cache. Only called if the request reached the end of
-  // the body and LookupBodyCallback did not pass true for end_stream. The
-  // Http::ResponseTrailerMapPtr passed to cb must not be null.
-  //
-  // A cache that posts the callback must wrap it such that if the LookupContext is
-  // destroyed before the callback is executed, the callback is not executed.
-  virtual void getTrailers(LookupTrailersCallback&& cb) PURE;
-
-  // This routine is called prior to a LookupContext being destroyed. LookupContext is responsible
-  // for making sure that any async activities are cleaned up before returning from onDestroy().
-  // This includes timers, network calls, etc. The reason there is an onDestroy() method vs. doing
-  // this type of cleanup in the destructor is to avoid potential data races between an async
-  // callback and the destructor in case the connection terminates abruptly.
-  // Example scenario with a hypothetical cache that uses RPC:
-  // 1. [Filter's thread] CacheFilter calls LookupContext::getHeaders.
-  // 2. [Filter's thread] RPCLookupContext sends RPC and returns.
-  // 3. [Filter's thread] Client disconnects; Destroying stream; CacheFilter destructor begins.
-  // 4. [Filter's thread] RPCLookupContext destructor begins.
-  // 5. [Other thread] RPC completes and calls RPCLookupContext::onRPCDone.
-  // --> RPCLookupContext's destructor and onRpcDone cause a data race in RPCLookupContext.
-  // onDestroy() should cancel any outstanding async operations and, if necessary,
-  // it should block on that cancellation to avoid data races. LookupContext must not invoke any
-  // callbacks to the CacheFilter after having onDestroy() invoked.
-  virtual void onDestroy() PURE;
-
-  virtual ~LookupContext() = default;
+  // May call the callback immediately; dispatcher is provided as an option to facilitate
+  // asynchronous operations.
+  // Will only be called with ranges the cache has announced are available, either via
+  // CacheProgressReceiver::onBodyInserted or via HttpCache::LookupCallback.
+  // end_stream should always be More, unless a cache error occurs in which case Reset -
+  // client already knows the body length so cache does not need to detect 'End'.
+  virtual void getBody(Event::Dispatcher& dispatcher, AdjustedByteRange range,
+                       GetBodyCallback&& cb) PURE;
+  virtual ~CacheReader() = default;
 };
-using LookupContextPtr = std::unique_ptr<LookupContext>;
+using CacheReaderPtr = std::unique_ptr<CacheReader>;
 
 // Implement this interface to provide a cache implementation for use by
 // CacheFilter.
 class HttpCache {
 public:
-  // Returns a LookupContextPtr to manage the state of a cache lookup. On a cache
-  // miss, the returned LookupContext will be given to the insert call (if any).
-  //
-  // It is possible for a cache to make a "shared stream" of responses allowing
-  // read access to a cache entry before its write is complete. In this case the
-  // content-length value may be unset.
-  virtual LookupContextPtr makeLookupContext(LookupRequest&& request,
-                                             Http::StreamFilterCallbacks& callbacks) PURE;
-
-  // Returns an InsertContextPtr to manage the state of a cache insertion.
-  // Responses with a chunked transfer-encoding must be dechunked before
-  // insertion.
-  virtual InsertContextPtr makeInsertContext(LookupContextPtr&& lookup_context,
-                                             Http::StreamFilterCallbacks& callbacks) PURE;
-
-  // Precondition: lookup_context represents a prior cache lookup that required
-  // validation.
-  //
-  // Update the headers of that cache entry to match response_headers. The cache
-  // entry's body and trailers (if any) will not be modified.
-  //
-  // This is called when an expired cache entry is successfully validated, to
-  // update the cache entry.
-  //
-  // The on_complete callback is called with true if the update is successful,
-  // false if the update was not performed.
-  virtual void updateHeaders(const LookupContext& lookup_context,
-                             const Http::ResponseHeaderMap& response_headers,
-                             const ResponseMetadata& metadata,
-                             UpdateHeadersCallback on_complete) PURE;
+  // LookupCallback returns an empty LookupResult if the cache entry does not exist.
+  // Statuses are for actual errors.
+  using LookupCallback = absl::AnyInvocable<void(absl::StatusOr<LookupResult>&&)>;
 
   // Returns statically known information about a cache.
   virtual CacheInfo cacheInfo() const PURE;
 
+  // Calls the callback with a LookupResult; its body_length_ should be nullopt
+  // if the key was not found in the cache. Its cache_reader may be nullopt if the
+  // cache entry has no body.
+  // Using the dispatcher is optional, the callback is thread-safe.
+  // The callback must be called - if the cache is deleted while a callback
+  // is still in flight, the callback should be called with an error status.
+  virtual void lookup(LookupRequest&& request, LookupCallback&& callback) PURE;
+
+  // Remove the entry from the cache.
+  // This should accept any dispatcher, as the cache has no worker affinity.
+  virtual void evict(Event::Dispatcher& dispatcher, const Key& key) PURE;
+
+  // To facilitate LRU cache eviction, provide a timestamp whenever a cache entry is
+  // looked up.
+  virtual void touch(const Key& key, SystemTime timestamp) PURE;
+
+  // Replaces the headers in the cache.
+  // If this requires asynchronous operations, getBody must continue to function for the duration
+  // (perhaps reading from the existing data).
+  // This should avoid modifying the data in-place non-atomically, as during hot restart or other
+  // circumstances in which multiple instances are accessing the same cache, the data store could
+  // be read from while partially written.
+  // If the key doesn't exist, this should be a no-op.
+  virtual void updateHeaders(Event::Dispatcher& dispatcher, const Key& key,
+                             const Http::ResponseHeaderMap& updated_headers,
+                             const ResponseMetadata& updated_metadata) PURE;
+
+  // insert is only called after the headers have been read successfully and confirmed
+  // to be cacheable, so the headers are provided immediately as the HttpSource has
+  // already consumed them.
+  // If end_stream was true, HttpSourcePtr is null.
+  // The cache insert for future lookup() should only be completed atomically when the
+  // insertion is finished, while the CacheReader passed to progress->onHeadersInserted
+  // should be ready for streaming from immediately (subject to relevant body progress).
+  virtual void insert(Event::Dispatcher& dispatcher, Key key, Http::ResponseHeaderMapPtr headers,
+                      ResponseMetadata metadata, HttpSourcePtr source,
+                      std::shared_ptr<CacheProgressReceiver> progress) PURE;
   virtual ~HttpCache() = default;
 };
 
@@ -313,12 +144,12 @@ class HttpCacheFactory : public Config::TypedFactory {
   // From UntypedFactory
   std::string category() const override { return "envoy.http.cache"; }
 
-  // Returns an HttpCache that will remain valid indefinitely (at least as long
-  // as the calling CacheFilter).
+  // Returns a CacheSessions initialized with an HttpCache that will remain
+  // valid indefinitely (at least as long as the calling CacheFilter).
   //
   // Pass factory context to allow HttpCache to use async client, stats scope
   // etc.
-  virtual std::shared_ptr<HttpCache>
+  virtual std::shared_ptr<CacheSessions>
   getCache(const envoy::extensions::filters::http::cache::v3::CacheConfig& config,
            Server::Configuration::FactoryContext& context) PURE;
 
diff --git a/source/extensions/filters/http/cache/http_source.h b/source/extensions/filters/http/cache/http_source.h
new file mode 100644
index 0000000000000..47ff6ab7b5197
--- /dev/null
+++ b/source/extensions/filters/http/cache/http_source.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#include <memory>
+
+#include "envoy/buffer/buffer.h"
+#include "envoy/http/header_map.h"
+
+#include "source/extensions/filters/http/cache/range_utils.h"
+
+#include "absl/functional/any_invocable.h"
+
+namespace Envoy {
+namespace Extensions {
+namespace HttpFilters {
+namespace Cache {
+
+// Reset indicates that the upstream source reset (or, if it's not a stream, some
+// kind of unexpected error).
+// More is equivalent to bool end_stream=false.
+// End is equivalent to bool end_stream=true.
+enum class EndStream { Reset, More, End };
+using GetHeadersCallback =
+    absl::AnyInvocable<void(Http::ResponseHeaderMapPtr headers, EndStream end_stream)>;
+using GetBodyCallback = absl::AnyInvocable<void(Buffer::InstancePtr buffer, EndStream end_stream)>;
+using GetTrailersCallback =
+    absl::AnyInvocable<void(Http::ResponseTrailerMapPtr trailers, EndStream end_stream)>;
+
+// HttpSource is an interface for a source of HTTP data.
+// Callbacks can potentially be called before returning from the get* function.
+// The callback should be called on the same thread as the caller.
+// Only one request should be in flight at a time, and requests must be in
+// order as the source is assumed to be a stream (i.e. headers before body,
+// earlier body before later body, trailers last).
+class HttpSource {
+public:
+  // Calls the provided callback with http headers.
+  virtual void getHeaders(GetHeadersCallback&& cb) PURE;
+  // Calls the provided callback with a buffer that is the beginning of the
+  // requested range, up to but not necessarily including the entire requested
+  // range, or no buffer if there is no more data or an error occurred.
+  virtual void getBody(AdjustedByteRange range, GetBodyCallback&& cb) PURE;
+  virtual void getTrailers(GetTrailersCallback&& cb) PURE;
+  virtual ~HttpSource() = default;
+};
+
+using HttpSourcePtr = std::unique_ptr<HttpSource>;
+
+} // namespace Cache
+} // namespace HttpFilters
+} // namespace Extensions
+} // namespace Envoy
diff --git a/source/extensions/filters/http/cache/range_utils.cc b/source/extensions/filters/http/cache/range_utils.cc
index 6fab49640460e..7382aceed0a20 100644
--- a/source/extensions/filters/http/cache/range_utils.cc
+++ b/source/extensions/filters/http/cache/range_utils.cc
@@ -68,48 +68,41 @@ RangeUtils::getRangeHeader(const Envoy::Http::RequestHeaderMap& headers) {
 RangeDetails
 RangeUtils::createAdjustedRangeDetails(const std::vector<RawByteRange>& request_range_spec,
                                        uint64_t content_length) {
-  RangeDetails result;
   if (request_range_spec.empty()) {
     // No range header, so the request can proceed.
-    result.satisfiable_ = true;
-    return result;
+    return {true, {}};
   }
 
   if (content_length == 0) {
     // There is a range header, but it's unsatisfiable.
-    result.satisfiable_ = false;
-    return result;
+    return {false, {}};
   }
 
+  RangeDetails result;
   for (const RawByteRange& spec : request_range_spec) {
     if (spec.isSuffix()) {
       // spec is a suffix-byte-range-spec.
       if (spec.suffixLength() == 0) {
-        // This range is unsatisfiable, so skip it.
-        continue;
+        // This range is unsatisfiable.
+        return {false, {}};
       }
       if (spec.suffixLength() >= content_length) {
         // All bytes are being requested, so we may as well send a '200
         // OK' response.
-        result.ranges_.clear();
-        result.satisfiable_ = true;
-        return result;
+        return {true, {}};
       }
       result.ranges_.emplace_back(content_length - spec.suffixLength(), content_length);
     } else {
       // spec is a byte-range-spec
       if (spec.firstBytePos() >= content_length) {
-        // This range is unsatisfiable, so skip it.
-        continue;
+        // This range is unsatisfiable.
+        return {false, {}};
       }
       if (spec.lastBytePos() >= content_length - 1) {
         if (spec.firstBytePos() == 0) {
           // All bytes are being requested, so we may as well send a '200
           // OK' response.
-
-          result.ranges_.clear();
-          result.satisfiable_ = true;
-          return result;
+          return {true, {}};
         }
         result.ranges_.emplace_back(spec.firstBytePos(), content_length);
       } else {
diff --git a/source/extensions/filters/http/cache/stats.cc b/source/extensions/filters/http/cache/stats.cc
new file mode 100644
index 0000000000000..7a63154f1d216
--- /dev/null
+++ b/source/extensions/filters/http/cache/stats.cc
@@ -0,0 +1,142 @@
+#include "source/extensions/filters/http/cache/stats.h"
+
+#include "envoy/stats/stats_macros.h"
+
+#include "absl/strings/str_replace.h"
+
+namespace Envoy {
+namespace Extensions {
+namespace HttpFilters {
+namespace Cache {
+
+#define CACHE_FILTER_STATS(COUNTER, GAUGE, HISTOGRAM, TEXT_READOUT, STATNAME)                      \
+  STATNAME(cache_sessions_entries)                                                                 \
+  STATNAME(cache_sessions_subscribers)                                                             \
+  STATNAME(upstream_buffered_bytes)                                                                \
+  STATNAME(cache)                                                                                  \
+  STATNAME(cache_label)                                                                            \
+  STATNAME(event)                                                                                  \
+  STATNAME(event_type)                                                                             \
+  STATNAME(hit)                                                                                    \
+  STATNAME(miss)                                                                                   \
+  STATNAME(failed_validation)                                                                      \
+  STATNAME(uncacheable)                                                                            \
+  STATNAME(upstream_reset)                                                                         \
+  STATNAME(lookup_error)                                                                           \
+  STATNAME(validate)
+
+MAKE_STAT_NAMES_STRUCT(CacheStatNames, CACHE_FILTER_STATS);
+
+using Envoy::Stats::Utility::counterFromStatNames;
+using Envoy::Stats::Utility::gaugeFromStatNames;
+
+class CacheFilterStatsImpl : public CacheFilterStats {
+public:
+  CacheFilterStatsImpl(Stats::Scope& scope, absl::string_view label)
+      : stat_names_(scope.symbolTable()), prefix_(stat_names_.cache_),
+        label_(stat_names_.pool_.add(absl::StrReplaceAll(label, {{".", "_"}}))),
+        tags_just_label_({{stat_names_.cache_label_, label_}}),
+        tags_hit_(
+            {{stat_names_.cache_label_, label_}, {stat_names_.event_type_, stat_names_.hit_}}),
+        tags_miss_(
+            {{stat_names_.cache_label_, label_}, {stat_names_.event_type_, stat_names_.miss_}}),
+        tags_failed_validation_({{stat_names_.cache_label_, label_},
+                                 {stat_names_.event_type_, stat_names_.failed_validation_}}),
+        tags_uncacheable_({{stat_names_.cache_label_, label_},
+                           {stat_names_.event_type_, stat_names_.uncacheable_}}),
+        tags_upstream_reset_({{stat_names_.cache_label_, label_},
+                              {stat_names_.event_type_, stat_names_.upstream_reset_}}),
+        tags_lookup_error_({{stat_names_.cache_label_, label_},
+                            {stat_names_.event_type_, stat_names_.lookup_error_}}),
+        tags_validate_(
+            {{stat_names_.cache_label_, label_}, {stat_names_.event_type_, stat_names_.validate_}}),
+        gauge_cache_sessions_entries_(
+            gaugeFromStatNames(scope, {prefix_, stat_names_.cache_sessions_entries_},
+                               Stats::Gauge::ImportMode::NeverImport, tags_just_label_)),
+        gauge_cache_sessions_subscribers_(
+            gaugeFromStatNames(scope, {prefix_, stat_names_.cache_sessions_subscribers_},
+                               Stats::Gauge::ImportMode::NeverImport, tags_just_label_)),
+        gauge_upstream_buffered_bytes_(
+            gaugeFromStatNames(scope, {prefix_, stat_names_.upstream_buffered_bytes_},
+                               Stats::Gauge::ImportMode::NeverImport, tags_just_label_)),
+        counter_hit_(counterFromStatNames(scope, {prefix_, stat_names_.event_}, tags_hit_)),
+        counter_miss_(counterFromStatNames(scope, {prefix_, stat_names_.event_}, tags_miss_)),
+        counter_failed_validation_(
+            counterFromStatNames(scope, {prefix_, stat_names_.event_}, tags_failed_validation_)),
+        counter_uncacheable_(
+            counterFromStatNames(scope, {prefix_, stat_names_.event_}, tags_uncacheable_)),
+        counter_upstream_reset_(
+            counterFromStatNames(scope, {prefix_, stat_names_.event_}, tags_upstream_reset_)),
+        counter_lookup_error_(
+            counterFromStatNames(scope, {prefix_, stat_names_.event_}, tags_lookup_error_)),
+        counter_validate_(
+            counterFromStatNames(scope, {prefix_, stat_names_.event_}, tags_validate_)) {}
+  void incForStatus(CacheEntryStatus status) override;
+  void incCacheSessionsEntries() override { gauge_cache_sessions_entries_.inc(); }
+  void decCacheSessionsEntries() override { gauge_cache_sessions_entries_.dec(); }
+  void incCacheSessionsSubscribers() override { gauge_cache_sessions_subscribers_.inc(); }
+  void subCacheSessionsSubscribers(uint64_t count) override {
+    gauge_cache_sessions_subscribers_.sub(count);
+  }
+  void addUpstreamBufferedBytes(uint64_t bytes) override {
+    gauge_upstream_buffered_bytes_.add(bytes);
+  }
+  void subUpstreamBufferedBytes(uint64_t bytes) override {
+    gauge_upstream_buffered_bytes_.sub(bytes);
+  }
+
+private:
+  CacheFilterStatsImpl(CacheFilterStatsImpl&) = delete;
+  CacheStatNames stat_names_;
+  const Stats::StatName prefix_;
+  const Stats::StatName label_;
+  const Stats::StatNameTagVector tags_just_label_;
+  const Stats::StatNameTagVector tags_hit_;
+  const Stats::StatNameTagVector tags_miss_;
+  const Stats::StatNameTagVector tags_failed_validation_;
+  const Stats::StatNameTagVector tags_uncacheable_;
+  const Stats::StatNameTagVector tags_upstream_reset_;
+  const Stats::StatNameTagVector tags_lookup_error_;
+  const Stats::StatNameTagVector tags_validate_;
+  Stats::Gauge& gauge_cache_sessions_entries_;
+  Stats::Gauge& gauge_cache_sessions_subscribers_;
+  Stats::Gauge& gauge_upstream_buffered_bytes_;
+  Stats::Counter& counter_hit_;
+  Stats::Counter& counter_miss_;
+  Stats::Counter& counter_failed_validation_;
+  Stats::Counter& counter_uncacheable_;
+  Stats::Counter& counter_upstream_reset_;
+  Stats::Counter& counter_lookup_error_;
+  Stats::Counter& counter_validate_;
+};
+
+CacheFilterStatsPtr generateStats(Stats::Scope& scope, absl::string_view label) {
+  return std::make_unique<CacheFilterStatsImpl>(scope, label);
+}
+
+void CacheFilterStatsImpl::incForStatus(CacheEntryStatus status) {
+  switch (status) {
+  case CacheEntryStatus::Miss:
+    return counter_miss_.inc();
+  case CacheEntryStatus::FailedValidation:
+    return counter_failed_validation_.inc();
+  case CacheEntryStatus::Hit:
+  case CacheEntryStatus::FoundNotModified:
+  case CacheEntryStatus::Follower:
+  case CacheEntryStatus::ValidatedFree:
+    return counter_hit_.inc();
+  case CacheEntryStatus::Validated:
+    return counter_validate_.inc();
+  case CacheEntryStatus::UpstreamReset:
+    return counter_upstream_reset_.inc();
+  case CacheEntryStatus::Uncacheable:
+    return counter_uncacheable_.inc();
+  case CacheEntryStatus::LookupError:
+    return counter_lookup_error_.inc();
+  }
+}
+
+} // namespace Cache
+} // namespace HttpFilters
+} // namespace Extensions
+} // namespace Envoy
diff --git a/source/extensions/filters/http/cache/stats.h b/source/extensions/filters/http/cache/stats.h
new file mode 100644
index 0000000000000..82c36e1cbb9d3
--- /dev/null
+++ b/source/extensions/filters/http/cache/stats.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include <memory>
+
+#include "source/extensions/filters/http/cache/cache_entry_utils.h"
+
+namespace Envoy {
+namespace Extensions {
+namespace HttpFilters {
+namespace Cache {
+
+class CacheFilterStats {
+public:
+  virtual void incForStatus(CacheEntryStatus status) PURE;
+  virtual void incCacheSessionsEntries() PURE;
+  virtual void decCacheSessionsEntries() PURE;
+  virtual void incCacheSessionsSubscribers() PURE;
+  virtual void subCacheSessionsSubscribers(uint64_t count) PURE;
+  virtual void addUpstreamBufferedBytes(uint64_t bytes) PURE;
+  virtual void subUpstreamBufferedBytes(uint64_t bytes) PURE;
+  virtual ~CacheFilterStats() = default;
+};
+
+class CacheFilterStatsProvider {
+public:
+  virtual CacheFilterStats& stats() const PURE;
+  virtual ~CacheFilterStatsProvider() = default;
+};
+
+using CacheFilterStatsPtr = std::unique_ptr<CacheFilterStats>;
+
+CacheFilterStatsPtr generateStats(Stats::Scope& scope, absl::string_view label);
+
+} // namespace Cache
+} // namespace HttpFilters
+} // namespace Extensions
+} // namespace Envoy
diff --git a/source/extensions/filters/http/cache/upstream_request.cc b/source/extensions/filters/http/cache/upstream_request.cc
deleted file mode 100644
index 550b81c59a7f6..0000000000000
--- a/source/extensions/filters/http/cache/upstream_request.cc
+++ /dev/null
@@ -1,272 +0,0 @@
-#include "source/extensions/filters/http/cache/upstream_request.h"
-
-#include "source/common/common/enum_to_int.h"
-#include "source/common/http/utility.h"
-#include "source/extensions/filters/http/cache/cache_custom_headers.h"
-#include "source/extensions/filters/http/cache/cache_filter.h"
-#include "source/extensions/filters/http/cache/cacheability_utils.h"
-
-namespace Envoy {
-namespace Extensions {
-namespace HttpFilters {
-namespace Cache {
-
-namespace {
-inline bool isResponseNotModified(const Http::ResponseHeaderMap& response_headers) {
-  return Http::Utility::getResponseStatus(response_headers) == enumToInt(Http::Code::NotModified);
-}
-} // namespace
-
-void UpstreamRequest::setFilterState(FilterState fs) {
-  filter_state_ = fs;
-  if (filter_ != nullptr && filter_->filter_state_ != FilterState::Destroyed) {
-    filter_->filter_state_ = fs;
-  }
-}
-
-void UpstreamRequest::setInsertStatus(InsertStatus is) {
-  if (filter_ != nullptr && filter_->filter_state_ != FilterState::Destroyed) {
-    filter_->insert_status_ = is;
-  }
-}
-
-void UpstreamRequest::processSuccessfulValidation(Http::ResponseHeaderMapPtr response_headers) {
-  ASSERT(lookup_result_, "CacheFilter trying to validate a non-existent lookup result");
-  ASSERT(
-      filter_state_ == FilterState::ValidatingCachedResponse,
-      "processSuccessfulValidation must only be called when a cached response is being validated");
-  ASSERT(isResponseNotModified(*response_headers),
-         "processSuccessfulValidation must only be called with 304 responses");
-
-  // Check whether the cached entry should be updated before modifying the 304 response.
-  const bool should_update_cached_entry = shouldUpdateCachedEntry(*response_headers);
-
-  setFilterState(FilterState::ServingFromCache);
-
-  // Replace the 304 response status code with the cached status code.
-  response_headers->setStatus(lookup_result_->headers_->getStatusValue());
-
-  // Remove content length header if the 304 had one; if the cache entry had a
-  // content length header it will be added by the header adding block below.
-  response_headers->removeContentLength();
-
-  // A response that has been validated should not contain an Age header as it is equivalent to a
-  // freshly served response from the origin, unless the 304 response has an Age header, which
-  // means it was served by an upstream cache.
-  // Remove any existing Age header in the cached response.
-  lookup_result_->headers_->removeInline(CacheCustomHeaders::age());
-
-  // Add any missing headers from the cached response to the 304 response.
-  lookup_result_->headers_->iterate([&response_headers](const Http::HeaderEntry& cached_header) {
-    // TODO(yosrym93): Try to avoid copying the header key twice.
-    Http::LowerCaseString key(cached_header.key().getStringView());
-    absl::string_view value = cached_header.value().getStringView();
-    if (response_headers->get(key).empty()) {
-      response_headers->setCopy(key, value);
-    }
-    return Http::HeaderMap::Iterate::Continue;
-  });
-
-  if (should_update_cached_entry) {
-    // TODO(yosrym93): else the cached entry should be deleted.
-    // Update metadata associated with the cached response. Right now this is only response_time;
-    const ResponseMetadata metadata = {config_->timeSource().systemTime()};
-    cache_->updateHeaders(*lookup_, *response_headers, metadata,
-                          [](bool updated ABSL_ATTRIBUTE_UNUSED) {});
-    setInsertStatus(InsertStatus::HeaderUpdate);
-  }
-
-  // A cache entry was successfully validated, so abort the upstream request, send
-  // encode the merged-modified headers, and encode cached body and trailers.
-  if (filter_ != nullptr) {
-    lookup_result_->headers_ = std::move(response_headers);
-    filter_->lookup_result_ = std::move(lookup_result_);
-    filter_->lookup_ = std::move(lookup_);
-    filter_->upstream_request_ = nullptr;
-    lookup_result_ = nullptr;
-    filter_->encodeCachedResponse(/* end_stream_after_headers = */ false);
-    filter_ = nullptr;
-    abort();
-  }
-}
-
-// TODO(yosrym93): Write a test that exercises this when SimpleHttpCache implements updateHeaders
-bool UpstreamRequest::shouldUpdateCachedEntry(
-    const Http::ResponseHeaderMap& response_headers) const {
-  ASSERT(isResponseNotModified(response_headers),
-         "shouldUpdateCachedEntry must only be called with 304 responses");
-  ASSERT(lookup_result_, "shouldUpdateCachedEntry precondition unsatisfied: lookup_result_ "
-                         "does not point to a cache lookup result");
-  ASSERT(filter_state_ == FilterState::ValidatingCachedResponse,
-         "shouldUpdateCachedEntry precondition unsatisfied: the "
-         "CacheFilter is not validating a cache lookup result");
-
-  // According to: https://httpwg.org/specs/rfc7234.html#freshening.responses,
-  // and assuming a single cached response per key:
-  // If the 304 response contains a strong validator (etag) that does not match the cached response,
-  // the cached response should not be updated.
-  const Http::HeaderEntry* response_etag = response_headers.getInline(CacheCustomHeaders::etag());
-  const Http::HeaderEntry* cached_etag =
-      lookup_result_->headers_->getInline(CacheCustomHeaders::etag());
-  return !response_etag || (cached_etag && cached_etag->value().getStringView() ==
-                                               response_etag->value().getStringView());
-}
-
-UpstreamRequest* UpstreamRequest::create(CacheFilter* filter, LookupContextPtr lookup,
-                                         LookupResultPtr lookup_result,
-                                         std::shared_ptr<HttpCache> cache,
-                                         Http::AsyncClient& async_client,
-                                         const Http::AsyncClient::StreamOptions& options) {
-  return new UpstreamRequest(filter, std::move(lookup), std::move(lookup_result), std::move(cache),
-                             async_client, options);
-}
-
-UpstreamRequest::UpstreamRequest(CacheFilter* filter, LookupContextPtr lookup,
-                                 LookupResultPtr lookup_result, std::shared_ptr<HttpCache> cache,
-                                 Http::AsyncClient& async_client,
-                                 const Http::AsyncClient::StreamOptions& options)
-    : filter_(filter), lookup_(std::move(lookup)), lookup_result_(std::move(lookup_result)),
-      is_head_request_(filter->is_head_request_),
-      request_allows_inserts_(filter->request_allows_inserts_), config_(filter->config_),
-      filter_state_(filter->filter_state_), cache_(std::move(cache)),
-      stream_(async_client.start(*this, options)) {
-  ASSERT(stream_ != nullptr);
-}
-
-void UpstreamRequest::insertQueueOverHighWatermark() {
-  // TODO(ravenblack): currently AsyncRequest::Stream does not support pausing.
-}
-
-void UpstreamRequest::insertQueueUnderLowWatermark() {
-  // TODO(ravenblack): currently AsyncRequest::Stream does not support pausing.
-}
-
-void UpstreamRequest::insertQueueAborted() {
-  insert_queue_ = nullptr;
-  ENVOY_LOG(debug, "cache aborted insert operation");
-  setInsertStatus(InsertStatus::InsertAbortedByCache);
-  if (filter_ == nullptr) {
-    abort();
-  }
-}
-
-void UpstreamRequest::sendHeaders(Http::RequestHeaderMap& request_headers) {
-  // If this request had a body or trailers, CacheFilter::decodeHeaders
-  // would have bypassed cache lookup and insertion, so this class wouldn't
-  // be instantiated. So end_stream will always be true.
-  stream_->sendHeaders(request_headers, true);
-}
-
-void UpstreamRequest::abort() {
-  stream_->reset(); // Calls onReset, resulting in deletion.
-}
-
-UpstreamRequest::~UpstreamRequest() {
-  if (filter_ != nullptr) {
-    filter_->onUpstreamRequestReset();
-  }
-  if (lookup_) {
-    lookup_->onDestroy();
-    lookup_ = nullptr;
-  }
-  if (insert_queue_) {
-    // The insert queue may still have actions in flight, so it needs to be allowed
-    // to drain itself before destruction.
-    insert_queue_->setSelfOwned(std::move(insert_queue_));
-  }
-}
-
-void UpstreamRequest::onReset() { delete this; }
-void UpstreamRequest::onComplete() {
-  if (filter_) {
-    ENVOY_STREAM_LOG(debug, "UpstreamRequest complete", *filter_->decoder_callbacks_);
-    filter_->onUpstreamRequestComplete();
-    filter_ = nullptr;
-  } else {
-    ENVOY_LOG(debug, "UpstreamRequest complete after stream finished");
-  }
-  delete this;
-}
-void UpstreamRequest::disconnectFilter() {
-  filter_ = nullptr;
-  if (insert_queue_ == nullptr) {
-    abort();
-  }
-}
-
-void UpstreamRequest::onHeaders(Http::ResponseHeaderMapPtr&& headers, bool end_stream) {
-  if (filter_state_ == FilterState::ValidatingCachedResponse && isResponseNotModified(*headers)) {
-    return processSuccessfulValidation(std::move(headers));
-  }
-  // Either a cache miss or a cache entry that is no longer valid.
-  // Check if the new response can be cached.
-  if (request_allows_inserts_ && !is_head_request_ &&
-      CacheabilityUtils::isCacheableResponse(*headers, config_->varyAllowList())) {
-    if (filter_) {
-      ENVOY_STREAM_LOG(debug, "UpstreamRequest::onHeaders inserting headers",
-                       *filter_->decoder_callbacks_);
-    }
-    auto insert_context =
-        cache_->makeInsertContext(std::move(lookup_), *filter_->encoder_callbacks_);
-    lookup_ = nullptr;
-    if (insert_context != nullptr) {
-      // The callbacks passed to CacheInsertQueue are all called through the dispatcher,
-      // so they're thread-safe. During CacheFilter::onDestroy the queue is given ownership
-      // of itself and all the callbacks are cancelled, so they are also filter-destruction-safe.
-      insert_queue_ = std::make_unique<CacheInsertQueue>(cache_, *filter_->encoder_callbacks_,
-                                                         std::move(insert_context), *this);
-      // Add metadata associated with the cached response. Right now this is only response_time;
-      const ResponseMetadata metadata = {config_->timeSource().systemTime()};
-      insert_queue_->insertHeaders(*headers, metadata, end_stream);
-      // insert_status_ remains absl::nullopt if end_stream == false, as we have not completed the
-      // insertion yet.
-      if (end_stream) {
-        setInsertStatus(InsertStatus::InsertSucceeded);
-      }
-    }
-  } else {
-    setInsertStatus(InsertStatus::NoInsertResponseNotCacheable);
-  }
-  setFilterState(FilterState::NotServingFromCache);
-  if (filter_) {
-    filter_->decoder_callbacks_->encodeHeaders(std::move(headers), is_head_request_ || end_stream,
-                                               StreamInfo::ResponseCodeDetails::get().ViaUpstream);
-  }
-}
-
-void UpstreamRequest::onData(Buffer::Instance& body, bool end_stream) {
-  if (insert_queue_ != nullptr) {
-    insert_queue_->insertBody(body, end_stream);
-  }
-  if (filter_) {
-    ENVOY_STREAM_LOG(debug, "UpstreamRequest::onData inserted body", *filter_->decoder_callbacks_);
-    filter_->decoder_callbacks_->encodeData(body, end_stream);
-    if (end_stream) {
-      // We don't actually know at this point if the insert succeeded, but as far as the
-      // filter is concerned it has been fully handed off to the cache
-      // implementation.
-      setInsertStatus(InsertStatus::InsertSucceeded);
-    }
-  } else {
-    ENVOY_LOG(debug, "UpstreamRequest::onData inserted body");
-  }
-}
-
-void UpstreamRequest::onTrailers(Http::ResponseTrailerMapPtr&& trailers) {
-  if (insert_queue_ != nullptr) {
-    insert_queue_->insertTrailers(*trailers);
-  }
-  if (filter_ != nullptr) {
-    ENVOY_STREAM_LOG(debug, "UpstreamRequest::onTrailers inserting trailers",
-                     *filter_->decoder_callbacks_);
-    filter_->decoder_callbacks_->encodeTrailers(std::move(trailers));
-    setInsertStatus(InsertStatus::InsertSucceeded);
-  } else {
-    ENVOY_LOG(debug, "UpstreamRequest::onTrailers inserting trailers");
-  }
-}
-
-} // namespace Cache
-} // namespace HttpFilters
-} // namespace Extensions
-} // namespace Envoy
diff --git a/source/extensions/filters/http/cache/upstream_request.h b/source/extensions/filters/http/cache/upstream_request.h
index 6aa6259ca26cd..4b5a49d6748ac 100644
--- a/source/extensions/filters/http/cache/upstream_request.h
+++ b/source/extensions/filters/http/cache/upstream_request.h
@@ -1,83 +1,40 @@
 #pragma once
 
-#include "source/common/common/logger.h"
-#include "source/extensions/filters/http/cache/cache_filter_logging_info.h"
-#include "source/extensions/filters/http/cache/cache_insert_queue.h"
+#include "source/extensions/filters/http/cache/http_source.h"
 
 namespace Envoy {
 namespace Extensions {
 namespace HttpFilters {
 namespace Cache {
 
-class CacheFilter;
-class CacheFilterConfig;
-enum class FilterState;
+class CacheFilterStatsProvider;
 
-class UpstreamRequest : public Logger::Loggable<Logger::Id::cache_filter>,
-                        public Http::AsyncClient::StreamCallbacks,
-                        public InsertQueueCallbacks {
+class UpstreamRequest : public HttpSource {
 public:
-  void sendHeaders(Http::RequestHeaderMap& request_headers);
-  // Called by filter_ when filter_ is destroyed first.
-  // UpstreamRequest will make no more calls to filter_ once disconnectFilter
-  // has been called.
-  void disconnectFilter();
-
-  // StreamCallbacks
-  void onHeaders(Http::ResponseHeaderMapPtr&& headers, bool end_stream) override;
-  void onData(Buffer::Instance& data, bool end_stream) override;
-  void onTrailers(Http::ResponseTrailerMapPtr&& trailers) override;
-  void onComplete() override;
-  void onReset() override;
-
-  // InsertQueueCallbacks
-  void insertQueueOverHighWatermark() override;
-  void insertQueueUnderLowWatermark() override;
-  void insertQueueAborted() override;
-
-  static UpstreamRequest* create(CacheFilter* filter, LookupContextPtr lookup,
-                                 LookupResultPtr lookup_result, std::shared_ptr<HttpCache> cache,
-                                 Http::AsyncClient& async_client,
-                                 const Http::AsyncClient::StreamOptions& options);
-  UpstreamRequest(CacheFilter* filter, LookupContextPtr lookup, LookupResultPtr lookup_result,
-                  std::shared_ptr<HttpCache> cache, Http::AsyncClient& async_client,
-                  const Http::AsyncClient::StreamOptions& options);
-  ~UpstreamRequest() override;
-
-private:
-  // Precondition: lookup_result_ points to a cache lookup result that requires validation.
-  //               filter_state_ is ValidatingCachedResponse.
-  // Serves a validated cached response after updating it with a 304 response.
-  void processSuccessfulValidation(Http::ResponseHeaderMapPtr response_headers);
-
-  // Updates the filter state belonging to the UpstreamRequest, and the one belonging to
-  // the filter if it has not been destroyed.
-  void setFilterState(FilterState fs);
-
-  // Updates the insert status belonging to the filter, if it has not been destroyed.
-  void setInsertStatus(InsertStatus is);
-
-  // If an error occurs while the stream is active, abort will reset the stream, which
-  // in turn provokes the rest of the destruction process.
-  void abort();
-
-  // Precondition: lookup_result_ points to a cache lookup result that requires validation.
-  //               filter_state_ is ValidatingCachedResponse.
-  // Checks if a cached entry should be updated with a 304 response.
-  bool shouldUpdateCachedEntry(const Http::ResponseHeaderMap& response_headers) const;
+  virtual void sendHeaders(Http::RequestHeaderMapPtr headers) PURE;
+};
 
-  CacheFilter* filter_ = nullptr;
-  LookupContextPtr lookup_;
-  LookupResultPtr lookup_result_;
-  bool is_head_request_;
-  bool request_allows_inserts_;
-  std::shared_ptr<const CacheFilterConfig> config_;
-  FilterState filter_state_;
-  std::shared_ptr<HttpCache> cache_;
-  Http::AsyncClient::Stream* stream_ = nullptr;
-  std::unique_ptr<CacheInsertQueue> insert_queue_;
+using UpstreamRequestPtr = std::unique_ptr<UpstreamRequest>;
+
+// UpstreamRequest acts as a bridge between the "pull" operations preferred by
+// the cache filter (getHeaders/getBody/getTrailers) and the "push" operations
+// preferred by most of envoy (encodeHeaders etc. being called by the source).
+//
+// In order to bridge the two, UpstreamRequest must act as a buffer; on a get*
+// request it calls back only when the buffer has [some of] the requested data
+// in it; if the buffer gets overfull, watermark events are triggered on the
+// upstream. The client side should only send get* requests when it is ready for
+// more data, so the downstream is automatically resilient to OOM.
+// TODO(#33319): AsyncClient::Stream does not currently support watermark events.
+class UpstreamRequestFactory {
+public:
+  virtual UpstreamRequestPtr
+  create(const std::shared_ptr<const CacheFilterStatsProvider> stats_provider) PURE;
+  virtual ~UpstreamRequestFactory() = default;
 };
 
+using UpstreamRequestFactoryPtr = std::unique_ptr<UpstreamRequestFactory>;
+
 } // namespace Cache
 } // namespace HttpFilters
 } // namespace Extensions
diff --git a/source/extensions/filters/http/cache/upstream_request_impl.cc b/source/extensions/filters/http/cache/upstream_request_impl.cc
new file mode 100644
index 0000000000000..a91c2456747b5
--- /dev/null
+++ b/source/extensions/filters/http/cache/upstream_request_impl.cc
@@ -0,0 +1,212 @@
+#include "source/extensions/filters/http/cache/upstream_request_impl.h"
+
+#include "range_utils.h"
+
+namespace Envoy {
+namespace Extensions {
+namespace HttpFilters {
+namespace Cache {
+
+UpstreamRequestPtr UpstreamRequestImplFactory::create(
+    const std::shared_ptr<const CacheFilterStatsProvider> stats_provider) {
+  // Can't use make_unique because the constructor is private.
+  auto ret = std::unique_ptr<UpstreamRequestImpl>(new UpstreamRequestImpl(
+      dispatcher_, async_client_, stream_options_, std::move(stats_provider)));
+  return ret;
+}
+
+UpstreamRequestImpl::UpstreamRequestImpl(
+    Event::Dispatcher& dispatcher, Http::AsyncClient& async_client,
+    const Http::AsyncClient::StreamOptions& options,
+    const std::shared_ptr<const CacheFilterStatsProvider> stats_provider)
+    : dispatcher_(dispatcher), stream_(async_client.start(*this, options)),
+      body_buffer_([this]() { onBelowLowWatermark(); }, [this]() { onAboveHighWatermark(); },
+                   nullptr),
+      stats_provider_(std::move(stats_provider)) {
+  ASSERT(stream_ != nullptr);
+  body_buffer_.setWatermarks(options.buffer_limit_.value_or(0));
+}
+
+void UpstreamRequestImpl::onAboveHighWatermark() {
+  ASSERT(dispatcher_.isThreadSafe());
+  // TODO(ravenblack): currently AsyncRequest::Stream does not support pausing.
+  // Waiting on issue #33319
+}
+
+void UpstreamRequestImpl::onBelowLowWatermark() {
+  ASSERT(dispatcher_.isThreadSafe());
+  // TODO(ravenblack): currently AsyncRequest::Stream does not support pausing.
+  // Waiting on issue #33319
+}
+
+void UpstreamRequestImpl::getHeaders(GetHeadersCallback&& cb) {
+  ASSERT(dispatcher_.isThreadSafe());
+  ASSERT(absl::holds_alternative<absl::monostate>(callback_));
+  if (!stream_ && !end_stream_after_headers_ && !end_stream_after_body_ && !trailers_) {
+    return cb(nullptr, EndStream::Reset);
+  }
+  callback_ = std::move(cb);
+  return maybeDeliverHeaders();
+}
+
+void UpstreamRequestImpl::onHeaders(Http::ResponseHeaderMapPtr&& headers, bool end_stream) {
+  ASSERT(dispatcher_.isThreadSafe());
+  headers_ = std::move(headers);
+  end_stream_after_headers_ = end_stream;
+  return maybeDeliverHeaders();
+}
+
+void UpstreamRequestImpl::maybeDeliverHeaders() {
+  ASSERT(dispatcher_.isThreadSafe());
+  if (!absl::holds_alternative<GetHeadersCallback>(callback_) || !headers_) {
+    return;
+  }
+  return absl::get<GetHeadersCallback>(consumeCallback())(
+      std::move(headers_), end_stream_after_headers_ ? EndStream::End : EndStream::More);
+}
+
+void UpstreamRequestImpl::getBody(AdjustedByteRange range, GetBodyCallback&& cb) {
+  ASSERT(dispatcher_.isThreadSafe());
+  ASSERT(absl::holds_alternative<absl::monostate>(callback_));
+  ASSERT(range.begin() == stream_pos_, "UpstreamRequest does not support out of order reads");
+  ASSERT(!end_stream_after_headers_);
+  if (!stream_ && !end_stream_after_body_ && !trailers_) {
+    return cb(nullptr, EndStream::Reset);
+  }
+  requested_body_range_ = std::move(range);
+  callback_ = std::move(cb);
+  return maybeDeliverBody();
+}
+
+void UpstreamRequestImpl::onData(Buffer::Instance& data, bool end_stream) {
+  ASSERT(dispatcher_.isThreadSafe());
+  end_stream_after_body_ = end_stream;
+  stats().addUpstreamBufferedBytes(data.length());
+  body_buffer_.move(data);
+  return maybeDeliverBody();
+}
+
+void UpstreamRequestImpl::maybeDeliverBody() {
+  ASSERT(dispatcher_.isThreadSafe());
+  if (!absl::holds_alternative<GetBodyCallback>(callback_)) {
+    return;
+  }
+  uint64_t len = std::min(requested_body_range_.length(), body_buffer_.length());
+  if (len == 0) {
+    if (trailers_) {
+      // If we've already seen trailers from upstream and there's no more buffered
+      // body, but the client is still requesting body, it means the client didn't
+      // know how much body to expect. A null body with end_stream=false informs the
+      // client to move on to requesting trailers.
+      return absl::get<GetBodyCallback>(consumeCallback())(nullptr, EndStream::More);
+    }
+    if (end_stream_after_body_) {
+      // If we already reached the end of message and are still requesting more
+      // body, a null buffer indicates the body ended.
+      return absl::get<GetBodyCallback>(consumeCallback())(nullptr, EndStream::End);
+    }
+    // If we have no body or end but have requested some body, that means we're
+    // just waiting for it to arrive, and maybeDeliverBody will be called again
+    // when that happens.
+    return;
+  }
+  auto fragment = std::make_unique<Buffer::OwnedImpl>();
+  fragment->move(body_buffer_, len);
+  stream_pos_ += len;
+  stats().subUpstreamBufferedBytes(len);
+  bool end_stream = end_stream_after_body_ && body_buffer_.length() == 0;
+  return absl::get<GetBodyCallback>(consumeCallback())(
+      std::move(fragment), end_stream ? EndStream::End : EndStream::More);
+}
+
+void UpstreamRequestImpl::getTrailers(GetTrailersCallback&& cb) {
+  ASSERT(dispatcher_.isThreadSafe());
+  ASSERT(absl::holds_alternative<absl::monostate>(callback_));
+  ASSERT(!end_stream_after_headers_ && !end_stream_after_body_);
+  if (!stream_ && !trailers_) {
+    return cb(nullptr, EndStream::Reset);
+  }
+  callback_ = std::move(cb);
+  return maybeDeliverTrailers();
+}
+
+void UpstreamRequestImpl::onTrailers(Http::ResponseTrailerMapPtr&& trailers) {
+  ASSERT(dispatcher_.isThreadSafe());
+  trailers_ = std::move(trailers);
+  return maybeDeliverTrailers();
+}
+
+void UpstreamRequestImpl::maybeDeliverTrailers() {
+  ASSERT(dispatcher_.isThreadSafe());
+  if (!absl::holds_alternative<GetTrailersCallback>(callback_) || !trailers_) {
+    if (body_buffer_.length() == 0 && absl::holds_alternative<GetBodyCallback>(callback_)) {
+      // If we received trailers while requesting body it means that we didn't
+      // know how much body to request, or the upstream returned less body than
+      // expected by surprise - a null body response informs the client to
+      // request trailers instead.
+      return absl::get<GetBodyCallback>(consumeCallback())(nullptr, EndStream::More);
+    }
+    return;
+  }
+  return absl::get<GetTrailersCallback>(consumeCallback())(std::move(trailers_), EndStream::End);
+}
+
+UpstreamRequestImpl::~UpstreamRequestImpl() {
+  ASSERT(dispatcher_.isThreadSafe());
+  // Cancel in-flight callbacks on destroy.
+  callback_ = absl::monostate{};
+  cancel_();
+  if (stream_) {
+    // Resets the stream and calls onReset, guaranteeing no further callbacks.
+    stream_->reset();
+  }
+  if (body_buffer_.length() > 0) {
+    stats().subUpstreamBufferedBytes(body_buffer_.length());
+  }
+}
+
+void UpstreamRequestImpl::sendHeaders(Http::RequestHeaderMapPtr request_headers) {
+  ASSERT(dispatcher_.isThreadSafe());
+  // UpstreamRequest must take a copy of the headers as the AsyncStream may
+  // still use the reference provided to it after the original reference has moved.
+  request_headers_ = std::move(request_headers);
+  // If this request had a body or trailers, CacheFilter::decodeHeaders
+  // would have bypassed cache lookup and insertion, so this class wouldn't
+  // be instantiated. So end_stream will always be true.
+  stream_->sendHeaders(*request_headers_, /*end_stream=*/true);
+  absl::optional<absl::string_view> range_header = RangeUtils::getRangeHeader(*request_headers_);
+  if (range_header) {
+    absl::optional<std::vector<RawByteRange>> ranges =
+        RangeUtils::parseRangeHeader(range_header.value(), 1);
+    if (ranges) {
+      stream_pos_ = ranges.value().front().firstBytePos();
+    }
+  }
+}
+
+template <class... Ts> struct overloaded : Ts... {
+  using Ts::operator()...;
+};
+template <class... Ts> overloaded(Ts...) -> overloaded<Ts...>;
+
+void UpstreamRequestImpl::onReset() {
+  ASSERT(dispatcher_.isThreadSafe());
+  stream_ = nullptr;
+  absl::visit(overloaded{
+                  [](absl::monostate&&) {},
+                  [](GetHeadersCallback&& cb) { cb(nullptr, EndStream::Reset); },
+                  [](GetBodyCallback&& cb) { cb(nullptr, EndStream::Reset); },
+                  [](GetTrailersCallback&& cb) { cb(nullptr, EndStream::Reset); },
+              },
+              consumeCallback());
+}
+
+void UpstreamRequestImpl::onComplete() {
+  ASSERT(dispatcher_.isThreadSafe());
+  stream_ = nullptr;
+}
+
+} // namespace Cache
+} // namespace HttpFilters
+} // namespace Extensions
+} // namespace Envoy
diff --git a/source/extensions/filters/http/cache/upstream_request_impl.h b/source/extensions/filters/http/cache/upstream_request_impl.h
new file mode 100644
index 0000000000000..133a3b65d1a7f
--- /dev/null
+++ b/source/extensions/filters/http/cache/upstream_request_impl.h
@@ -0,0 +1,102 @@
+#pragma once
+
+#include "source/common/buffer/watermark_buffer.h"
+#include "source/common/common/cancel_wrapper.h"
+#include "source/common/common/logger.h"
+#include "source/extensions/filters/http/cache/http_source.h"
+#include "source/extensions/filters/http/cache/range_utils.h"
+#include "source/extensions/filters/http/cache/stats.h"
+#include "source/extensions/filters/http/cache/upstream_request.h"
+
+#include "absl/types/variant.h"
+
+namespace Envoy {
+namespace Extensions {
+namespace HttpFilters {
+namespace Cache {
+
+class UpstreamRequestImpl : public Logger::Loggable<Logger::Id::cache_filter>,
+                            public UpstreamRequest,
+                            public Http::AsyncClient::StreamCallbacks {
+public:
+  // Called from the factory.
+  void sendHeaders(Http::RequestHeaderMapPtr request_headers) override;
+  // HttpSource.
+  void getHeaders(GetHeadersCallback&& cb) override;
+  // Though range is an argument here, only the length is used by UpstreamRequest
+  // - the pieces requested should always be in order so we can just consume the
+  // stream as it comes.
+  void getBody(AdjustedByteRange range, GetBodyCallback&& cb) override;
+  void getTrailers(GetTrailersCallback&& cb) override;
+
+  // StreamCallbacks
+  void onHeaders(Http::ResponseHeaderMapPtr&& headers, bool end_stream) override;
+  void onData(Buffer::Instance& data, bool end_stream) override;
+  void onTrailers(Http::ResponseTrailerMapPtr&& trailers) override;
+  void onComplete() override;
+  void onReset() override;
+
+  // Called by WatermarkBuffer
+  void onAboveHighWatermark();
+  void onBelowLowWatermark();
+
+  ~UpstreamRequestImpl() override;
+
+private:
+  friend class UpstreamRequestImplFactory;
+  UpstreamRequestImpl(Event::Dispatcher& dispatcher, Http::AsyncClient& async_client,
+                      const Http::AsyncClient::StreamOptions& options,
+                      const std::shared_ptr<const CacheFilterStatsProvider> stats_provider);
+  // If the headers and callback are both present, call the callback.
+  void maybeDeliverHeaders();
+
+  // If the required body chunk and callback are both present, call the callback.
+  void maybeDeliverBody();
+
+  // If the trailers and callback are both present, call the callback.
+  void maybeDeliverTrailers();
+
+  using CallbackTypes =
+      absl::variant<absl::monostate, GetHeadersCallback, GetBodyCallback, GetTrailersCallback>;
+
+  // Returns the current callback and clears the member variable so it's safe to
+  // assert that it's empty.
+  CallbackTypes consumeCallback() { return std::exchange(callback_, absl::monostate{}); }
+
+  CacheFilterStats& stats() const { return stats_provider_->stats(); }
+
+  Event::Dispatcher& dispatcher_;
+  Http::AsyncClient::Stream* stream_;
+  Http::RequestHeaderMapPtr request_headers_;
+  Http::ResponseHeaderMapPtr headers_;
+  CallbackTypes callback_;
+  bool end_stream_after_headers_{false};
+  Buffer::WatermarkBuffer body_buffer_;
+  AdjustedByteRange requested_body_range_{0, 1};
+  uint64_t stream_pos_ = 0;
+  bool end_stream_after_body_{false};
+  Http::ResponseTrailerMapPtr trailers_;
+  CancelWrapper::CancelFunction cancel_ = []() {};
+  const std::shared_ptr<const CacheFilterStatsProvider> stats_provider_;
+};
+
+class UpstreamRequestImplFactory : public UpstreamRequestFactory {
+public:
+  UpstreamRequestImplFactory(Event::Dispatcher& dispatcher, Http::AsyncClient& async_client,
+                             Http::AsyncClient::StreamOptions stream_options)
+      : dispatcher_(dispatcher), async_client_(async_client),
+        stream_options_(std::move(stream_options)) {}
+
+  UpstreamRequestPtr
+  create(const std::shared_ptr<const CacheFilterStatsProvider> stats_provider) override;
+
+private:
+  Event::Dispatcher& dispatcher_;
+  Http::AsyncClient& async_client_;
+  Http::AsyncClient::StreamOptions stream_options_;
+};
+
+} // namespace Cache
+} // namespace HttpFilters
+} // namespace Extensions
+} // namespace Envoy
diff --git a/source/extensions/http/cache/file_system_http_cache/BUILD b/source/extensions/http/cache/file_system_http_cache/BUILD
index 00210bdd84017..9e9b573105f0b 100644
--- a/source/extensions/http/cache/file_system_http_cache/BUILD
+++ b/source/extensions/http/cache/file_system_http_cache/BUILD
@@ -20,6 +20,7 @@ envoy_cc_extension(
     name = "config",
     srcs = [
         "cache_eviction_thread.cc",
+        "cache_file_reader.cc",
         "config.cc",
         "file_system_http_cache.cc",
         "insert_context.cc",
@@ -28,6 +29,7 @@ envoy_cc_extension(
     ],
     hdrs = [
         "cache_eviction_thread.h",
+        "cache_file_reader.h",
         "file_system_http_cache.h",
         "insert_context.h",
         "lookup_context.h",
@@ -48,6 +50,7 @@ envoy_cc_extension(
         "//source/common/http:headers_lib",
         "//source/common/protobuf",
         "//source/extensions/common/async_files",
+        "//source/extensions/filters/http/cache:cache_sessions_impl_lib",
         "//source/extensions/filters/http/cache:http_cache_lib",
         "@com_google_absl//absl/base",
         "@com_google_absl//absl/strings",
diff --git a/source/extensions/http/cache/file_system_http_cache/DESIGN.md b/source/extensions/http/cache/file_system_http_cache/DESIGN.md
index cd42187b849a6..760fe51a37899 100644
--- a/source/extensions/http/cache/file_system_http_cache/DESIGN.md
+++ b/source/extensions/http/cache/file_system_http_cache/DESIGN.md
@@ -9,12 +9,10 @@
 - [ ] Eviction should be configurable as a "window", like watermarks, or with an optional frequency constraint, so the eviction thread can be kept from churning.
 - [x] Cache should be limited to a specified amount of storage
 - [ ] Cache should be configurable to periodically update the internal size from the filesystem, to account for external alterations.
-- [ ] Cache should mitigate thundering herd problem (i.e. if two or more workers request the same cacheable uncached result at the same time, only one worker should hit upstream). See [discussion](#thundering-herd).
 - [ ] There should be an ability to remove objects from the cache with some kind of API call.
 - [ ] Cache should expose counters for eviction stats (files evicted, bytes evicted).
 - [ ] Cache should expose counters for timing information (eviction thread idle, eviction thread busy)
 - [x] Cache should expose gauges for total size stored.
-- [ ] Cache should optionally expose histograms for insert and lookup latencies.
 - [ ] Cache should optionally expose histogram for cache entry sizes.
 - [x] Cache should index by the request route *and* a key generated from headers that may affect the outcome of a request (See [allowed_vary_headers](https://www.envoyproxy.io/docs/envoy/latest/api-v3/extensions/filters/http/cache/v3/cache.proto.html))
 - [ ] Cache should create a [tree structure](#tree-structure) of folders (may be configured as just one branch), so user may avoid filesystem performance issues with overcrowded directories.
@@ -22,35 +20,11 @@
 
 ## Storage design
 
-* The only state stored in memory is that a cache entry is in the process of being written; this allows other requests for the same resource in the same process to avoid creating duplicate write operations. (This is an optimization only - simultaneous writes don't break anything, and may occur when multiple processes are involved.)
+* A `CacheSession` maintains an open file handle of which ownership is passed to the `CacheSession`. It is possible for such an entry to be evicted (on a validation fail most likely), which should be fine - the file will be unlinked and the open file handle will keep the data "alive" until the requests using the old file handle are completed.
+* Simultaneous writes don't break anything, and may occur when multiple processes are touching the same cache.
 * The cache can be configured with a maximum number of cache entry files, thereby effectively enforcing a maximum number of files per path.
 * A new cache entry that causes the cache to exceed the configured maximum size or maximum number of entries triggers the eviction thread to evict sufficient LRU entries to bring it back below the threshold\[s\] exceeded.
-* Each cache entry file starts with [a fixed structure header followed by a serialized proto](cache_file_header.proto), followed by proto-serialized headers, raw body and proto-serialized trailers.
+* Each cache entry file starts with [a fixed structure header followed by a serialized proto](cache_file_header.proto), followed by raw body, proto-serialized trailers and proto-serialized headers. Headers are at the end to facilitate updating headers on validate operations.
 * Cache entry files are named `cache-` followed by a stable hash key for the entry.
 <a name="tree-structure"></a>
 * (When implemented) the tree structure of folders is simply one level deep of folders named `cache-0000`, `cache-0001` etc. as four-digit hexadecimal numbers up to the configured number of subdirectories. Cache files are placed in a folder according to a short stable hash of their key. On cache startup, any cache entries found to be in the wrong folder (as would be the case if the number of folders was reconfigured) will simply be removed.
-
-## Discussions
-
-<a name="thundering-herd"></a>
-### Thundering herd
-
-The current implementation, if there are multiple requests for the same resource before the cache is populated, has only one of them perform an insert operation to the cache, and the rest simply bypass the cache. This can cause the "thundering herd" problem - if requests come in bursts the cache will not protect the upstream from that load.
-
-One possible solution would be to have all requesters for the same cache entry stream as the cache entry is written. However, if we do that, and the inserting stream gets closed prematurely, all the dependent streams would be forced to drop their also-incomplete responses.
-
-Another possible solution is to have secondary requesters wait until the cache entry is populated or abandoned before deciding whether to become cache readers or inserters (or bypass, if it turns out to be uncacheable). The downside of this option is that for large content, the dependent clients won't start streaming at all until the first client *finishes* streaming.
-
-An ideal solution would be to either make an inserting stream non-cancellable (i.e. if the client cancels, the upstream connection continues to stream to populate the cache). This could be achieved either by using the existing stream and adding a "non-cancellable" feature in the core (a bit of a large scale change), or by making insertion not use the existing stream at all, instead creating its own client. The problem with that option is that ideally the client would only pass through the filters that are upstream of the cache filter, and there is currently no mechanism for creating a new "partial filter chain" client like this.
-
-_Proposal from jmarantz:_
-
-The lock object could go into one of several states:
-* In-progress, size unknown
-* headers-complete, content length known and less than a threshold
-* headers-complete, content length known and more than a threshold
-* headers-complete, chunked encoding
-
-Each state could be individually configured as "block" or "pass through", allowing the user to decide which option is more appropriate for a particular use-case.
-
-This proposal would be redundant if we can figure a reliable way to stream a cache entry.
diff --git a/source/extensions/http/cache/file_system_http_cache/cache_file_fixed_block.cc b/source/extensions/http/cache/file_system_http_cache/cache_file_fixed_block.cc
index 8b3e43692f158..43b9445cb2c10 100644
--- a/source/extensions/http/cache/file_system_http_cache/cache_file_fixed_block.cc
+++ b/source/extensions/http/cache/file_system_http_cache/cache_file_fixed_block.cc
@@ -18,8 +18,8 @@ constexpr std::array<char, 4> ExpectedFileId = {'C', 'A', 'C', 'H'};
 // The expected next four bytes of the header - if cacheVersionId() doesn't match
 // ExpectedCacheVersionId then the file is from an incompatible cache version and should
 // be removed from the cache.
-// Next 4 bytes of file should be "0000".
-constexpr std::array<char, 4> ExpectedCacheVersionId = {'0', '0', '0', '0'};
+// Next 4 bytes of file should be "0001".
+constexpr std::array<char, 4> ExpectedCacheVersionId = {'0', '0', '0', '1'};
 
 } // namespace
 
diff --git a/source/extensions/http/cache/file_system_http_cache/cache_file_fixed_block.h b/source/extensions/http/cache/file_system_http_cache/cache_file_fixed_block.h
index cc675dc536bbd..0a4d0494ab1ee 100644
--- a/source/extensions/http/cache/file_system_http_cache/cache_file_fixed_block.h
+++ b/source/extensions/http/cache/file_system_http_cache/cache_file_fixed_block.h
@@ -106,17 +106,11 @@ class CacheFileFixedBlock {
    */
   void setTrailersSize(uint32_t sz) { trailer_size_ = sz; }
 
-  /**
-   * the offset from the start of the file to the start of the serialized headers proto.
-   * @return the offset in bytes.
-   */
-  static uint32_t offsetToHeaders() { return size(); }
-
   /**
    * the offset from the start of the file to the start of the body data.
    * @return the offset in bytes.
    */
-  uint32_t offsetToBody() const { return offsetToHeaders() + headerSize(); }
+  static uint64_t offsetToBody() { return size(); }
 
   /**
    * the offset from the start of the file to the start of the serialized trailers proto.
@@ -125,10 +119,10 @@ class CacheFileFixedBlock {
   uint64_t offsetToTrailers() const { return offsetToBody() + bodySize(); }
 
   /**
-   * the offset from the start of the file to the end of the file.
+   * the offset from the start of the file to the start of the serialized headers proto.
    * @return the offset in bytes.
    */
-  uint64_t offsetToEnd() const { return offsetToTrailers() + trailerSize(); }
+  uint64_t offsetToHeaders() const { return offsetToTrailers() + trailerSize(); }
 
   /**
    * is this a valid cache file header block for the current code version?
diff --git a/source/extensions/http/cache/file_system_http_cache/cache_file_header_proto_util.cc b/source/extensions/http/cache/file_system_http_cache/cache_file_header_proto_util.cc
index f150f6199bb16..e1938b259278f 100644
--- a/source/extensions/http/cache/file_system_http_cache/cache_file_header_proto_util.cc
+++ b/source/extensions/http/cache/file_system_http_cache/cache_file_header_proto_util.cc
@@ -91,6 +91,12 @@ CacheFileHeader makeCacheFileHeaderProto(Buffer::Instance& buffer) {
   return ret;
 }
 
+CacheFileTrailer makeCacheFileTrailerProto(Buffer::Instance& buffer) {
+  CacheFileTrailer ret;
+  ret.ParseFromString(buffer.toString());
+  return ret;
+}
+
 } // namespace FileSystemHttpCache
 } // namespace Cache
 } // namespace HttpFilters
diff --git a/source/extensions/http/cache/file_system_http_cache/cache_file_header_proto_util.h b/source/extensions/http/cache/file_system_http_cache/cache_file_header_proto_util.h
index 8d003aee56a46..c1929d8e5158f 100644
--- a/source/extensions/http/cache/file_system_http_cache/cache_file_header_proto_util.h
+++ b/source/extensions/http/cache/file_system_http_cache/cache_file_header_proto_util.h
@@ -97,6 +97,13 @@ ResponseMetadata metadataFromHeaderProto(const CacheFileHeader& header);
  */
 CacheFileHeader makeCacheFileHeaderProto(Buffer::Instance& buffer);
 
+/**
+ * Deserializes a CacheFileTrailer message from a Buffer.
+ * @param buffer the buffer containing a serialized CacheFileTrailer message.
+ * @return the deserialized CacheFileTrailer message.
+ */
+CacheFileTrailer makeCacheFileTrailerProto(Buffer::Instance& buffer);
+
 } // namespace FileSystemHttpCache
 } // namespace Cache
 } // namespace HttpFilters
diff --git a/source/extensions/http/cache/file_system_http_cache/cache_file_reader.cc b/source/extensions/http/cache/file_system_http_cache/cache_file_reader.cc
new file mode 100644
index 0000000000000..9c69c70eadb00
--- /dev/null
+++ b/source/extensions/http/cache/file_system_http_cache/cache_file_reader.cc
@@ -0,0 +1,41 @@
+#include "source/extensions/http/cache/file_system_http_cache/cache_file_reader.h"
+
+#include "source/extensions/http/cache/file_system_http_cache/cache_file_fixed_block.h"
+
+namespace Envoy {
+namespace Extensions {
+namespace HttpFilters {
+namespace Cache {
+namespace FileSystemHttpCache {
+
+using Common::AsyncFiles::AsyncFileHandle;
+
+CacheFileReader::CacheFileReader(AsyncFileHandle handle) : file_handle_(handle) {}
+
+void CacheFileReader::getBody(Event::Dispatcher& dispatcher, AdjustedByteRange range,
+                              GetBodyCallback&& cb) {
+  auto queued = file_handle_->read(
+      &dispatcher, CacheFileFixedBlock::offsetToBody() + range.begin(), range.length(),
+      [len = range.length(),
+       cb = std::move(cb)](absl::StatusOr<Buffer::InstancePtr> read_result) mutable -> void {
+        if (!read_result.ok()) {
+          return cb(nullptr, EndStream::Reset);
+        }
+        if (read_result.value()->length() != len) {
+          return cb(nullptr, EndStream::Reset);
+        }
+        return cb(std::move(read_result.value()), EndStream::More);
+      });
+  ASSERT(queued.ok(), queued.status().ToString());
+}
+
+CacheFileReader::~CacheFileReader() {
+  auto queued = file_handle_->close(nullptr, [](absl::Status) {});
+  ASSERT(queued.ok());
+}
+
+} // namespace FileSystemHttpCache
+} // namespace Cache
+} // namespace HttpFilters
+} // namespace Extensions
+} // namespace Envoy
diff --git a/source/extensions/http/cache/file_system_http_cache/cache_file_reader.h b/source/extensions/http/cache/file_system_http_cache/cache_file_reader.h
new file mode 100644
index 0000000000000..ec519e8a81553
--- /dev/null
+++ b/source/extensions/http/cache/file_system_http_cache/cache_file_reader.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include "source/extensions/common/async_files/async_file_handle.h"
+#include "source/extensions/filters/http/cache/http_cache.h"
+
+namespace Envoy {
+namespace Extensions {
+namespace HttpFilters {
+namespace Cache {
+namespace FileSystemHttpCache {
+
+class CacheFileReader : public CacheReader {
+public:
+  CacheFileReader(Common::AsyncFiles::AsyncFileHandle handle);
+  ~CacheFileReader() override;
+  // From CacheReader
+  void getBody(Event::Dispatcher& dispatcher, AdjustedByteRange range, GetBodyCallback&& cb) final;
+
+private:
+  Common::AsyncFiles::AsyncFileHandle file_handle_;
+};
+
+} // namespace FileSystemHttpCache
+} // namespace Cache
+} // namespace HttpFilters
+} // namespace Extensions
+} // namespace Envoy
diff --git a/source/extensions/http/cache/file_system_http_cache/config.cc b/source/extensions/http/cache/file_system_http_cache/config.cc
index 6e80cc217419a..f065116e6eb55 100644
--- a/source/extensions/http/cache/file_system_http_cache/config.cc
+++ b/source/extensions/http/cache/file_system_http_cache/config.cc
@@ -6,6 +6,7 @@
 #include "envoy/registry/registry.h"
 
 #include "source/extensions/common/async_files/async_file_manager_factory.h"
+#include "source/extensions/filters/http/cache/cache_sessions.h"
 #include "source/extensions/filters/http/cache/http_cache.h"
 #include "source/extensions/http/cache/file_system_http_cache/cache_eviction_thread.h"
 #include "source/extensions/http/cache/file_system_http_cache/file_system_http_cache.h"
@@ -47,10 +48,10 @@ class CacheSingleton : public Envoy::Singleton::Instance {
       : async_file_manager_factory_(async_file_manager_factory),
         cache_eviction_thread_(thread_factory) {}
 
-  std::shared_ptr<FileSystemHttpCache> get(std::shared_ptr<CacheSingleton> singleton,
-                                           const ConfigProto& non_normalized_config,
-                                           Stats::Scope& stats_scope) {
-    std::shared_ptr<FileSystemHttpCache> cache;
+  std::shared_ptr<CacheSessions> get(std::shared_ptr<CacheSingleton> singleton,
+                                     const ConfigProto& non_normalized_config,
+                                     Server::Configuration::FactoryContext& context) {
+    std::shared_ptr<CacheSessions> cache;
     ConfigProto config = normalizeConfig(non_normalized_config);
     auto key = config.cache_path();
     absl::MutexLock lock(&mu_);
@@ -61,14 +62,20 @@ class CacheSingleton : public Envoy::Singleton::Instance {
     if (!cache) {
       std::shared_ptr<Common::AsyncFiles::AsyncFileManager> async_file_manager =
           async_file_manager_factory_->getAsyncFileManager(config.manager_config());
-      cache = std::make_shared<FileSystemHttpCache>(singleton, cache_eviction_thread_,
-                                                    std::move(config),
-                                                    std::move(async_file_manager), stats_scope);
+      std::unique_ptr<FileSystemHttpCache> fs_cache = std::make_unique<FileSystemHttpCache>(
+          singleton, cache_eviction_thread_, std::move(config), std::move(async_file_manager),
+          context.scope());
+      cache = CacheSessions::create(context, std::move(fs_cache));
       caches_[key] = cache;
-    } else if (!Protobuf::util::MessageDifferencer::Equals(cache->config(), config)) {
-      throw EnvoyException(
-          fmt::format("mismatched FileSystemHttpCacheConfig with same path\n{}\nvs.\n{}",
-                      cache->config().DebugString(), config.DebugString()));
+    } else {
+      // Check that the config of the cache found in the lookup table for the given path
+      // has the same config as the config being added.
+      FileSystemHttpCache& fs_cache = static_cast<FileSystemHttpCache&>(cache->cache());
+      if (!Protobuf::util::MessageDifferencer::Equals(fs_cache.config(), config)) {
+        throw EnvoyException(
+            fmt::format("mismatched FileSystemHttpCacheConfig with same path\n{}\nvs.\n{}",
+                        fs_cache.config().DebugString(), config.DebugString()));
+      }
     }
     return cache;
   }
@@ -81,7 +88,7 @@ class CacheSingleton : public Envoy::Singleton::Instance {
   // that config of cache. The caches each keep shared_ptrs to this singleton, which keeps the
   // singleton from being destroyed unless it's no longer keeping track of any caches.
   // (The singleton shared_ptr is *only* held by cache instances.)
-  absl::flat_hash_map<std::string, std::weak_ptr<FileSystemHttpCache>> caches_ ABSL_GUARDED_BY(mu_);
+  absl::flat_hash_map<std::string, std::weak_ptr<CacheSessions>> caches_ ABSL_GUARDED_BY(mu_);
 };
 
 SINGLETON_MANAGER_REGISTRATION(file_system_http_cache_singleton);
@@ -95,7 +102,7 @@ class FileSystemHttpCacheFactory : public HttpCacheFactory {
     return std::make_unique<ConfigProto>();
   }
   // From HttpCacheFactory
-  std::shared_ptr<HttpCache>
+  std::shared_ptr<CacheSessions>
   getCache(const envoy::extensions::filters::http::cache::v3::CacheConfig& filter_config,
            Server::Configuration::FactoryContext& context) override {
     ConfigProto config;
@@ -108,7 +115,7 @@ class FileSystemHttpCacheFactory : public HttpCacheFactory {
                       &context.serverFactoryContext().singletonManager()),
                   context.serverFactoryContext().api().threadFactory());
             });
-    return caches->get(caches, config, context.scope());
+    return caches->get(caches, config, context);
   }
 };
 
diff --git a/source/extensions/http/cache/file_system_http_cache/file_system_http_cache.cc b/source/extensions/http/cache/file_system_http_cache/file_system_http_cache.cc
index 6497a941f8fa7..231843b0a097f 100644
--- a/source/extensions/http/cache/file_system_http_cache/file_system_http_cache.cc
+++ b/source/extensions/http/cache/file_system_http_cache/file_system_http_cache.cc
@@ -17,62 +17,9 @@ namespace HttpFilters {
 namespace Cache {
 namespace FileSystemHttpCache {
 
-// Copying in 128K chunks is an arbitrary choice for a reasonable balance of performance and
-// memory usage. Since UpdateHeaders is unlikely to be a common operation it is most likely
-// not worthwhile to carefully tune this.
-const size_t FileSystemHttpCache::max_update_headers_copy_chunk_size_ = 128 * 1024;
-
 const CacheStats& FileSystemHttpCache::stats() const { return shared_->stats_; }
 const ConfigProto& FileSystemHttpCache::config() const { return shared_->config_; }
 
-void FileSystemHttpCache::writeVaryNodeToDisk(Event::Dispatcher& dispatcher, const Key& key,
-                                              const Http::ResponseHeaderMap& response_headers,
-                                              std::shared_ptr<Cleanup> cleanup) {
-  auto vary_values = VaryHeaderUtils::getVaryValues(response_headers);
-  auto headers = std::make_shared<CacheFileHeader>();
-  auto h = headers->add_headers();
-  h->set_key("vary");
-  h->set_value(absl::StrJoin(vary_values, ","));
-  std::string filename = absl::StrCat(cachePath(), generateFilename(key));
-  async_file_manager_->createAnonymousFile(
-      &dispatcher, cachePath(),
-      [headers, filename = std::move(filename), cleanup,
-       dispatcher = &dispatcher](absl::StatusOr<AsyncFileHandle> open_result) {
-        if (!open_result.ok()) {
-          ENVOY_LOG(warn, "writing vary node, failed to createAnonymousFile: {}",
-                    open_result.status());
-          return;
-        }
-        auto file_handle = std::move(open_result.value());
-        CacheFileFixedBlock block;
-        auto buf = bufferFromProto(*headers);
-        block.setHeadersSize(buf.length());
-        Buffer::OwnedImpl buf2;
-        block.serializeToBuffer(buf2);
-        buf2.add(buf);
-        size_t sz = buf2.length();
-        auto queued = file_handle->write(
-            dispatcher, buf2, 0,
-            [dispatcher, file_handle, cleanup, sz,
-             filename = std::move(filename)](absl::StatusOr<size_t> write_result) {
-              if (!write_result.ok() || write_result.value() != sz) {
-                ENVOY_LOG(warn, "writing vary node, failed to write: {}", write_result.status());
-                file_handle->close(nullptr, [](absl::Status) {}).IgnoreError();
-                return;
-              }
-              auto queued = file_handle->createHardLink(
-                  dispatcher, filename, [cleanup, file_handle](absl::Status link_result) {
-                    if (!link_result.ok()) {
-                      ENVOY_LOG(warn, "writing vary node, failed to link: {}", link_result);
-                    }
-                    file_handle->close(nullptr, [](absl::Status) {}).IgnoreError();
-                  });
-              ASSERT(queued.ok());
-            });
-        ASSERT(queued.ok());
-      });
-}
-
 absl::string_view FileSystemHttpCache::name() {
   return "envoy.extensions.http.cache.file_system_http_cache";
 }
@@ -82,303 +29,243 @@ FileSystemHttpCache::FileSystemHttpCache(
     ConfigProto config, std::shared_ptr<Common::AsyncFiles::AsyncFileManager>&& async_file_manager,
     Stats::Scope& stats_scope)
     : owner_(owner), async_file_manager_(async_file_manager),
-      shared_(std::make_shared<CacheShared>(config, stats_scope)),
-      cache_eviction_thread_(cache_eviction_thread) {
+      shared_(std::make_shared<CacheShared>(config, stats_scope, cache_eviction_thread)),
+      cache_eviction_thread_(cache_eviction_thread), cache_info_(CacheInfo{name()}) {
   cache_eviction_thread_.addCache(shared_);
 }
 
-CacheShared::CacheShared(ConfigProto config, Stats::Scope& stats_scope)
-    : config_(config), stat_names_(stats_scope.symbolTable()),
+CacheShared::CacheShared(ConfigProto config, Stats::Scope& stats_scope,
+                         CacheEvictionThread& eviction_thread)
+    : signal_eviction_([&eviction_thread]() { eviction_thread.signal(); }), config_(config),
+      stat_names_(stats_scope.symbolTable()),
       stats_(generateStats(stat_names_, stats_scope, cachePath())) {}
 
-FileSystemHttpCache::~FileSystemHttpCache() { cache_eviction_thread_.removeCache(shared_); }
+void CacheShared::disconnectEviction() {
+  absl::MutexLock lock(&signal_mu_);
+  signal_eviction_ = []() {};
+}
+
+FileSystemHttpCache::~FileSystemHttpCache() {
+  shared_->disconnectEviction();
+  cache_eviction_thread_.removeCache(shared_);
+}
 
 CacheInfo FileSystemHttpCache::cacheInfo() const {
   CacheInfo info;
   info.name_ = name();
-  info.supports_range_requests_ = true;
   return info;
 }
 
-absl::optional<Key>
-FileSystemHttpCache::makeVaryKey(const Key& base, const VaryAllowList& vary_allow_list,
-                                 const absl::btree_set<absl::string_view>& vary_header_values,
-                                 const Http::RequestHeaderMap& request_headers) {
-  const absl::optional<std::string> vary_identifier =
-      VaryHeaderUtils::createVaryIdentifier(vary_allow_list, vary_header_values, request_headers);
-  if (!vary_identifier.has_value()) {
-    // Skip the insert if we are unable to create a vary key.
-    return absl::nullopt;
-  }
-  Key vary_key = base;
-  vary_key.add_custom_fields(vary_identifier.value());
-  return vary_key;
+void FileSystemHttpCache::lookup(LookupRequest&& lookup, LookupCallback&& callback) {
+  std::string filepath = absl::StrCat(cachePath(), generateFilename(lookup.key()));
+  async_file_manager_->openExistingFile(
+      &lookup.dispatcher(), filepath, Common::AsyncFiles::AsyncFileManager::Mode::ReadOnly,
+      [&dispatcher = lookup.dispatcher(),
+       callback = std::move(callback)](absl::StatusOr<AsyncFileHandle> open_result) mutable {
+        if (!open_result.ok()) {
+          if (open_result.status().code() == absl::StatusCode::kNotFound) {
+            return callback(LookupResult{});
+          }
+          ENVOY_LOG(error, "open file failed: {}", open_result.status());
+          return callback(open_result.status());
+        }
+        FileLookupContext::begin(dispatcher, std::move(open_result.value()), std::move(callback));
+      });
 }
 
-LookupContextPtr FileSystemHttpCache::makeLookupContext(LookupRequest&& lookup,
-                                                        Http::StreamFilterCallbacks& callbacks) {
-  return std::make_unique<FileLookupContext>(callbacks.dispatcher(), *this, std::move(lookup));
+void FileSystemHttpCache::insert(Event::Dispatcher& dispatcher, Key key,
+                                 Http::ResponseHeaderMapPtr headers, ResponseMetadata metadata,
+                                 HttpSourcePtr source,
+                                 std::shared_ptr<CacheProgressReceiver> progress) {
+  std::string filepath = absl::StrCat(cachePath(), generateFilename(key));
+  FileInsertContext::begin(dispatcher, std::move(key), std::move(filepath), std::move(headers),
+                           std::move(metadata), std::move(source), std::move(progress), shared_,
+                           *async_file_manager_);
 }
 
 // Helper class to reduce the lambda depth of updateHeaders.
 class HeaderUpdateContext : public Logger::Loggable<Logger::Id::cache_filter> {
 public:
-  HeaderUpdateContext(Event::Dispatcher& dispatcher, const FileSystemHttpCache& cache,
-                      const Key& key, std::shared_ptr<Cleanup> cleanup,
-                      const Http::ResponseHeaderMap& response_headers,
-                      const ResponseMetadata& metadata, UpdateHeadersCallback on_complete)
-      : dispatcher_(dispatcher),
-        filepath_(absl::StrCat(cache.cachePath(), cache.generateFilename(key))),
-        cache_path_(cache.cachePath()), cleanup_(cleanup),
-        async_file_manager_(cache.asyncFileManager()),
-        response_headers_(Http::createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers)),
-        response_metadata_(metadata), on_complete_(std::move(on_complete)) {}
-
-  void begin(std::shared_ptr<HeaderUpdateContext> ctx) {
-    async_file_manager_->openExistingFile(
-        dispatcher(), filepath_, Common::AsyncFiles::AsyncFileManager::Mode::ReadOnly,
-        [ctx = std::move(ctx), this](absl::StatusOr<AsyncFileHandle> open_result) {
-          if (!open_result.ok()) {
-            fail("failed to open", open_result.status());
-            return;
-          }
-          read_handle_ = std::move(open_result.value());
-          unlinkOriginal(std::move(ctx));
-        });
-  }
-
-  ~HeaderUpdateContext() {
-    // For chaining the close actions in a file thread, the closes must be chained sequentially.
-    // write_handle_ can only be set if read_handle_ is set, so this ordering is safe.
-    if (read_handle_) {
-      read_handle_
-          ->close(dispatcher(),
-                  [write_handle = write_handle_](absl::Status) {
-                    if (write_handle) {
-                      write_handle->close(nullptr, [](absl::Status) {}).IgnoreError();
-                    }
-                  })
-          .IgnoreError();
-    }
+  static void begin(Event::Dispatcher& dispatcher, AsyncFileHandle handle,
+                    Buffer::InstancePtr new_headers) {
+    auto p = new HeaderUpdateContext(dispatcher, std::move(handle), std::move(new_headers));
+    p->readHeaderBlock();
   }
 
 private:
-  void unlinkOriginal(std::shared_ptr<HeaderUpdateContext> ctx) {
-    async_file_manager_->unlink(
-        dispatcher(), filepath_, [ctx = std::move(ctx), this](absl::Status unlink_result) {
-          if (!unlink_result.ok()) {
-            ENVOY_LOG(warn, "file_system_http_cache: {} for update cache file {}: {}",
-                      "unlink failed", filepath_, unlink_result);
-            // But keep going, because unlink might have failed because the file was already
-            // deleted after we opened it. Worth a try to replace it!
-          }
-          readHeaderBlock(std::move(ctx));
-        });
-  }
-  void readHeaderBlock(std::shared_ptr<HeaderUpdateContext> ctx) {
-    auto queued = read_handle_->read(
-        dispatcher(), 0, CacheFileFixedBlock::size(),
-        [ctx = std::move(ctx), this](absl::StatusOr<Buffer::InstancePtr> read_result) {
-          if (!read_result.ok() || read_result.value()->length() != CacheFileFixedBlock::size()) {
-            fail("failed to read header block", read_result.status());
-            return;
+  HeaderUpdateContext(Event::Dispatcher& dispatcher, AsyncFileHandle handle,
+                      Buffer::InstancePtr new_headers)
+      : dispatcher_(dispatcher), handle_(std::move(handle)), new_headers_(std::move(new_headers)) {}
+
+  void readHeaderBlock() {
+    auto queued = handle_->read(
+        &dispatcher_, 0, CacheFileFixedBlock::size(),
+        [this](absl::StatusOr<Buffer::InstancePtr> read_result) {
+          if (!read_result.ok()) {
+            return fail("failed to read header block", read_result.status());
+          } else if (read_result.value()->length() != CacheFileFixedBlock::size()) {
+            return fail(
+                "incomplete read of header block",
+                absl::AbortedError(absl::StrCat("read ", read_result.value()->length(),
+                                                ", expected ", CacheFileFixedBlock::size())));
           }
           header_block_.populateFromStringView(read_result.value()->toString());
-          readHeaders(std::move(ctx));
+          truncateOldHeaders();
         });
     ASSERT(queued.ok());
   }
-  void readHeaders(std::shared_ptr<HeaderUpdateContext> ctx) {
-    auto queued = read_handle_->read(
-        dispatcher(), header_block_.offsetToHeaders(), header_block_.headerSize(),
-        [ctx = std::move(ctx), this](absl::StatusOr<Buffer::InstancePtr> read_result) {
-          if (!read_result.ok() || read_result.value()->length() != header_block_.headerSize()) {
-            fail("failed to read headers", read_result.status());
-            return;
-          }
-          header_proto_ = makeCacheFileHeaderProto(*read_result.value());
-          if (header_proto_.headers_size() == 1 && header_proto_.headers(0).key() == "vary") {
-            // TODO(ravenblack): do we need to handle vary entries here? How
-            // did we get to updateHeaders on a vary entry rather than the
-            // variant? Just abort for now.
-            // (The entry was deleted at this point, so we should eventually get
-            // back into a usable state even if this is a valid event.)
-            fail("not implemented updating vary header", absl::OkStatus());
-            return;
-          }
-          header_proto_ = mergeProtoWithHeadersAndMetadata(header_proto_, *response_headers_,
-                                                           response_metadata_);
-          size_t new_header_size = headerProtoSize(header_proto_);
-          header_size_difference_ = header_block_.headerSize() - new_header_size;
-          header_block_.setHeadersSize(new_header_size);
-          startWriting(std::move(ctx));
-        });
-    ASSERT(queued.ok());
-  }
-  void startWriting(std::shared_ptr<HeaderUpdateContext> ctx) {
-    async_file_manager_->createAnonymousFile(
-        dispatcher(), cache_path_,
-        [ctx = std::move(ctx), this](absl::StatusOr<AsyncFileHandle> create_result) {
-          if (!create_result.ok()) {
-            fail("failed to open new cache file", create_result.status());
-            return;
-          }
-          write_handle_ = std::move(create_result.value());
-          writeHeaderBlockAndHeaders(std::move(ctx));
-        });
-  }
-  void writeHeaderBlockAndHeaders(std::shared_ptr<HeaderUpdateContext> ctx) {
-    Buffer::OwnedImpl buf;
-    header_block_.serializeToBuffer(buf);
-    buf.add(bufferFromProto(header_proto_));
-    auto sz = buf.length();
-    auto queued = write_handle_->write(
-        dispatcher(), buf, 0,
-        [ctx = std::move(ctx), sz, this](absl::StatusOr<size_t> write_result) {
-          if (!write_result.ok() || write_result.value() != sz) {
-            fail("failed to write header block and headers", write_result.status());
-            return;
-          }
-          copyBodyAndTrailers(std::move(ctx), header_block_.offsetToBody());
-        });
+
+  void truncateOldHeaders() {
+    auto queued = handle_->truncate(&dispatcher_, header_block_.offsetToHeaders(),
+                                    [this](absl::Status truncate_result) {
+                                      if (!truncate_result.ok()) {
+                                        return fail("failed to truncate headers", truncate_result);
+                                      }
+                                      overwriteHeaderBlock();
+                                    });
     ASSERT(queued.ok());
   }
-  void copyBodyAndTrailers(std::shared_ptr<HeaderUpdateContext> ctx, off_t offset) {
-    size_t sz = header_block_.offsetToEnd() - offset;
-    if (sz == 0) {
-      linkNewFile(ctx);
-      return;
-    }
-    sz = std::min(sz, FileSystemHttpCache::max_update_headers_copy_chunk_size_);
-    auto queued = read_handle_->read(
-        dispatcher(), offset + header_size_difference_, sz,
-        [ctx = std::move(ctx), offset, sz, this](absl::StatusOr<Buffer::InstancePtr> read_result) {
-          if (!read_result.ok() || read_result.value()->length() != sz) {
-            fail("failed to read body chunk", read_result.status());
-            return;
+
+  void overwriteHeaderBlock() {
+    size_t len = new_headers_->length();
+    header_block_.setHeadersSize(len);
+    Buffer::OwnedImpl write_buf;
+    header_block_.serializeToBuffer(write_buf);
+    auto queued =
+        handle_->write(&dispatcher_, write_buf, 0, [this](absl::StatusOr<size_t> write_result) {
+          if (!write_result.ok()) {
+            return fail("overwriting headers failed", write_result.status());
+          } else if (write_result.value() != CacheFileFixedBlock::size()) {
+            return fail(
+                "overwriting headers failed",
+                absl::AbortedError(absl::StrCat("wrote ", write_result.value(), ", expected ",
+                                                CacheFileFixedBlock::size())));
           }
-          auto queued = write_handle_->write(
-              dispatcher(), *read_result.value(), offset,
-              [ctx = std::move(ctx), offset, sz, this](absl::StatusOr<size_t> write_result) {
-                if (!write_result.ok() || write_result.value() != sz) {
-                  fail("failed to write body chunk", write_result.status());
-                  return;
-                }
-                copyBodyAndTrailers(std::move(ctx), offset + sz);
-              });
-          ASSERT(queued.ok());
+          writeNewHeaders();
         });
     ASSERT(queued.ok());
   }
-  void linkNewFile(std::shared_ptr<HeaderUpdateContext> ctx) {
-    auto queued = write_handle_->createHardLink(
-        dispatcher(), filepath_, [ctx = std::move(ctx), this](absl::Status link_result) {
-          if (!link_result.ok()) {
-            fail("failed to link new cache file", link_result);
-            return;
-          }
-          std::move(on_complete_)(true);
-        });
+
+  void writeNewHeaders() {
+    size_t len = new_headers_->length();
+    auto queued =
+        handle_->write(&dispatcher_, *new_headers_, header_block_.offsetToHeaders(),
+                       [this, len](absl::StatusOr<size_t> write_result) {
+                         if (!write_result.ok()) {
+                           return fail("failed to write new headers", write_result.status());
+                         } else if (write_result.value() != len) {
+                           return fail("incomplete write of new headers",
+                                       absl::AbortedError(absl::StrCat(
+                                           "wrote ", write_result.value(), ", expected ", len)));
+                         }
+                         finish();
+                       });
     ASSERT(queued.ok());
   }
+
   void fail(absl::string_view msg, absl::Status status) {
-    ENVOY_LOG(warn, "file_system_http_cache: {} for update cache file {}: {}", msg, filepath_,
-              status);
-    std::move(on_complete_)(false);
+    ENVOY_LOG(error, "{}: {}", msg, status);
+    finish();
+  }
+
+  void finish() {
+    auto close_status = handle_->close(nullptr, [](absl::Status) {});
+    ASSERT(close_status.ok());
+    delete this;
   }
-  Event::Dispatcher* dispatcher() { return &dispatcher_; }
+
   Event::Dispatcher& dispatcher_;
-  std::string filepath_;
-  std::string cache_path_;
-  std::shared_ptr<Cleanup> cleanup_;
-  std::shared_ptr<Common::AsyncFiles::AsyncFileManager> async_file_manager_;
-  Http::ResponseHeaderMapPtr response_headers_;
-  ResponseMetadata response_metadata_;
+  AsyncFileHandle handle_;
+  Buffer::InstancePtr new_headers_;
   CacheFileFixedBlock header_block_;
-  off_t header_size_difference_;
-  CacheFileHeader header_proto_;
-  AsyncFileHandle read_handle_;
-  AsyncFileHandle write_handle_;
-  UpdateHeadersCallback on_complete_;
 };
 
-void FileSystemHttpCache::updateHeaders(const LookupContext& base_lookup_context,
-                                        const Http::ResponseHeaderMap& response_headers,
-                                        const ResponseMetadata& metadata,
-                                        UpdateHeadersCallback on_complete) {
-  const FileLookupContext& lookup_context =
-      dynamic_cast<const FileLookupContext&>(base_lookup_context);
-  const Key& key = lookup_context.key();
-  auto cleanup = maybeStartWritingEntry(key);
-  if (!cleanup) {
-    return;
-  }
-  auto ctx =
-      std::make_shared<HeaderUpdateContext>(*lookup_context.dispatcher(), *this, key, cleanup,
-                                            response_headers, metadata, std::move(on_complete));
-  ctx->begin(ctx);
+/**
+ * Replaces the headers of a cache entry.
+ *
+ * In order to avoid a race in which the wrong size of headers is read by
+ * one instance while headers are being updated by another instance, the
+ * update is performed by:
+ * 1. truncate the file so there are no headers.
+ * 2. update the size of the headers in the header block.
+ * 3. write the new headers.
+ *
+ * This way, if another instance tries to read headers when they are briefly
+ * not present, that read will fail to get the expected size, and it will be
+ * treated as a cache miss rather than providing a "mixed" (corrupted) read.
+ *
+ * Most of the time the cache is not reading headers from the file as they
+ * are cached in memory, so even this race should be extremely rare.
+ */
+void FileSystemHttpCache::updateHeaders(Event::Dispatcher& dispatcher, const Key& key,
+                                        const Http::ResponseHeaderMap& updated_headers,
+                                        const ResponseMetadata& updated_metadata) {
+  std::string filepath = absl::StrCat(cachePath(), generateFilename(key));
+  CacheFileHeader header_proto = makeCacheFileHeaderProto(key, updated_headers, updated_metadata);
+  Buffer::InstancePtr header_buffer = std::make_unique<Buffer::OwnedImpl>();
+  Buffer::OwnedImpl tmp = bufferFromProto(header_proto);
+  header_buffer->move(tmp);
+  async_file_manager_->openExistingFile(
+      &dispatcher, filepath, Common::AsyncFiles::AsyncFileManager::Mode::ReadWrite,
+      [&dispatcher = dispatcher, header_buffer = std::move(header_buffer)](
+          absl::StatusOr<AsyncFileHandle> open_result) mutable {
+        if (!open_result.ok()) {
+          ENVOY_LOG(error, "open file for updateHeaders failed: {}", open_result.status());
+          return;
+        }
+        HeaderUpdateContext::begin(dispatcher, open_result.value(), std::move(header_buffer));
+      });
 }
 
-absl::string_view FileSystemHttpCache::cachePath() const { return shared_->cachePath(); }
-
-bool FileSystemHttpCache::workInProgress(const Key& key) {
-  absl::MutexLock lock(&cache_mu_);
-  return entries_being_written_.contains(key);
+void FileSystemHttpCache::evict(Event::Dispatcher& dispatcher, const Key& key) {
+  std::string filepath = absl::StrCat(cachePath(), generateFilename(key));
+  async_file_manager_->stat(&dispatcher, filepath,
+                            [file_manager = async_file_manager_, &dispatcher, filepath,
+                             stats = shared_](absl::StatusOr<struct stat> stat_result) {
+                              if (!stat_result.ok()) {
+                                return;
+                              }
+                              off_t sz = stat_result.value().st_size;
+                              file_manager->unlink(&dispatcher, filepath,
+                                                   [sz, stats](absl::Status unlink_result) {
+                                                     if (!unlink_result.ok()) {
+                                                       return;
+                                                     }
+                                                     stats->trackFileRemoved(sz);
+                                                   });
+                            });
 }
 
-std::shared_ptr<Cleanup> FileSystemHttpCache::maybeStartWritingEntry(const Key& key) {
-  absl::MutexLock lock(&cache_mu_);
-  if (!entries_being_written_.emplace(key).second) {
-    return nullptr;
-  }
-  return std::make_shared<Cleanup>([this, key]() {
-    absl::MutexLock lock(&cache_mu_);
-    entries_being_written_.erase(key);
-  });
+void FileSystemHttpCache::touch(const Key&, SystemTime) {
+  // Reading from a file counts as a touch for stat purposes, so no
+  // need to update timestamps directly.
 }
 
-std::shared_ptr<Cleanup>
-FileSystemHttpCache::setCacheEntryToVary(Event::Dispatcher& dispatcher, const Key& key,
-                                         const Http::ResponseHeaderMap& response_headers,
-                                         const Key& varied_key, std::shared_ptr<Cleanup> cleanup) {
-  writeVaryNodeToDisk(dispatcher, key, response_headers, cleanup);
-  return maybeStartWritingEntry(varied_key);
-}
+absl::string_view FileSystemHttpCache::cachePath() const { return shared_->cachePath(); }
 
 std::string FileSystemHttpCache::generateFilename(const Key& key) const {
   // TODO(ravenblack): Add support for directory tree structure.
   return absl::StrCat("cache-", stableHashKey(key));
 }
 
-InsertContextPtr FileSystemHttpCache::makeInsertContext(LookupContextPtr&& lookup_context,
-                                                        Http::StreamFilterCallbacks&) {
-  auto file_lookup_context = std::unique_ptr<FileLookupContext>(
-      dynamic_cast<FileLookupContext*>(lookup_context.release()));
-  ASSERT(file_lookup_context);
-  if (file_lookup_context->workInProgress()) {
-    auto ret = std::make_unique<DontInsertContext>(*file_lookup_context->dispatcher());
-    file_lookup_context->onDestroy();
-    return ret;
-  }
-  return std::make_unique<FileInsertContext>(shared_from_this(), std::move(file_lookup_context));
-}
-
-void FileSystemHttpCache::trackFileAdded(uint64_t file_size) {
-  shared_->trackFileAdded(file_size);
-  if (shared_->needsEviction()) {
-    cache_eviction_thread_.signal();
-  }
-}
+void FileSystemHttpCache::trackFileAdded(uint64_t file_size) { shared_->trackFileAdded(file_size); }
 void CacheShared::trackFileAdded(uint64_t file_size) {
   size_count_++;
   size_bytes_ += file_size;
   stats_.size_count_.inc();
   stats_.size_bytes_.add(file_size);
+  if (needsEviction()) {
+    {
+      absl::MutexLock lock(&signal_mu_);
+      signal_eviction_();
+    }
+  }
 }
 
 void FileSystemHttpCache::trackFileRemoved(uint64_t file_size) {
   shared_->trackFileRemoved(file_size);
 }
+
 void CacheShared::trackFileRemoved(uint64_t file_size) {
   // Atomically decrement-but-clamp-at-zero the count of files in the cache.
   //
diff --git a/source/extensions/http/cache/file_system_http_cache/file_system_http_cache.h b/source/extensions/http/cache/file_system_http_cache/file_system_http_cache.h
index 97cd77a40e28a..ef242f28909a7 100644
--- a/source/extensions/http/cache/file_system_http_cache/file_system_http_cache.h
+++ b/source/extensions/http/cache/file_system_http_cache/file_system_http_cache.h
@@ -46,40 +46,18 @@ class FileSystemHttpCache : public HttpCache,
   ~FileSystemHttpCache() override;
 
   // Overrides for HttpCache
-  LookupContextPtr makeLookupContext(LookupRequest&& lookup,
-                                     Http::StreamFilterCallbacks& callbacks) override;
-  InsertContextPtr makeInsertContext(LookupContextPtr&& lookup_context,
-                                     Http::StreamFilterCallbacks& callbacks) override;
   CacheInfo cacheInfo() const override;
-  const CacheStats& stats() const;
+  void lookup(LookupRequest&& lookup, LookupCallback&& callback) override;
+  void evict(Event::Dispatcher& dispatcher, const Key& key) override;
+  void touch(const Key& key, SystemTime timestamp) override;
+  void updateHeaders(Event::Dispatcher& dispatcher, const Key& key,
+                     const Http::ResponseHeaderMap& updated_headers,
+                     const ResponseMetadata& updated_metadata) override;
+  void insert(Event::Dispatcher& dispatcher, Key key, Http::ResponseHeaderMapPtr headers,
+              ResponseMetadata metadata, HttpSourcePtr source,
+              std::shared_ptr<CacheProgressReceiver> progress) override;
 
-  /**
-   * Replaces the headers of a cache entry.
-   *
-   * To avoid races between readers and writers, this unfortunately must be performed by
-   * making a full copy of the original cache entry, and then replacing the original with
-   * that new file.
-   *
-   * For example, if we simply overwrote the headers in place, a reader might be about to
-   * read the headers expecting them to have one length, then read them with a different
-   * length (or in a partially overwritten state).
-   *
-   * If we opened the file, unlinked it, modified it, and linked it, a reader might still
-   * have already had the original version open and still get the same race as above.
-   *
-   * Therefore the only way to reliably ensure that there are no readers of the file while
-   * it is being written (other than something involving system-wide locks that would have
-   * other issues), is to rewrite the contents to a new file. This may be expensive
-   * for large cache files.
-   *
-   * @param lookup_context the lookup context that provoked the updateHeaders call.
-   * @param response_headers the http response headers to update the cache entry with.
-   * @param metadata the metadata to update the cache entry with.
-   * @param on_complete called with true when the entry is updated, or false if not updated.
-   */
-  void updateHeaders(const LookupContext& lookup_context,
-                     const Http::ResponseHeaderMap& response_headers,
-                     const ResponseMetadata& metadata, UpdateHeadersCallback on_complete) override;
+  const CacheStats& stats() const;
 
   /**
    * The config of this cache. Used by the factory to ensure there aren't incompatible
@@ -88,58 +66,6 @@ class FileSystemHttpCache : public HttpCache,
    */
   const ConfigProto& config() const;
 
-  /**
-   * True if the given key currently has a stream writing to it.
-   * @param key the key to check against entries_being_written_.
-   * @return True if the key is present in entries_being_written_.
-   */
-  bool workInProgress(const Key& key) ABSL_LOCKS_EXCLUDED(cache_mu_);
-
-  /**
-   * Inserts the key into entries_being_written_, excluding it from being written to by
-   * another task until the returned Cleanup's destructor is called.
-   *
-   * If the key is already in entries_being_written_, returns nullptr, indicating that
-   * the caller should not continue with the write.
-   * @param key the key the caller wishes to start writing to.
-   * @return nullptr if a write is already in progress for that key. A Cleanup object
-   *     if a write should begin.
-   */
-  ABSL_MUST_USE_RESULT std::shared_ptr<Cleanup> maybeStartWritingEntry(const Key& key)
-      ABSL_LOCKS_EXCLUDED(cache_mu_);
-
-  /**
-   * Returns a key of the base key plus vary_identifier, if a vary_identifier can be
-   * generated from the inputs. Otherwise returns nullopt.
-   * @param base The base key.
-   * @param vary_allow_list A vary_allow_list from a LookupContext.
-   * @param vary_header_values The vary header values from a vary cache entry.
-   * @param request_headers The headers from the LookupContext.
-   * @return a key made from the base key plus a vary_identifier, if a vary_identifier
-   *     can be generated from the inputs; nullopt if a vary_identifier cannot be
-   *     generated.
-   */
-  static absl::optional<Key>
-  makeVaryKey(const Key& base, const VaryAllowList& vary_allow_list,
-              const absl::btree_set<absl::string_view>& vary_header_values,
-              const Http::RequestHeaderMap& request_headers);
-
-  /**
-   * Writes a vary cache file in the background, and inserts `varied_key`
-   * into `entries_being_written_` if not already present.
-   * @param key the base key for the entry, without vary headers.
-   * @param response_headers the headers from the response.
-   * @param varied_key the key with the vary header from response_headers incorporated.
-   * @param cleanup the Cleanup from the original key, which will be called when the
-   *     write operation has completed.
-   * @return nullptr if an entry for varied_key was already being written, a Cleanup
-   *     if varied_key was added.
-   */
-  ABSL_MUST_USE_RESULT std::shared_ptr<Cleanup>
-  setCacheEntryToVary(Event::Dispatcher& dispatcher, const Key& key,
-                      const Http::ResponseHeaderMap& response_headers, const Key& varied_key,
-                      std::shared_ptr<Cleanup> cleanup) ABSL_LOCKS_EXCLUDED(cache_mu_);
-
   /**
    * Returns the extension name.
    * @return the extension name.
@@ -159,14 +85,6 @@ class FileSystemHttpCache : public HttpCache,
    */
   absl::string_view cachePath() const;
 
-  /**
-   * Returns the AsyncFileManager associated with this instance.
-   * @return a shared_ptr to the AsyncFileManager associated with this instance.
-   */
-  std::shared_ptr<Common::AsyncFiles::AsyncFileManager> asyncFileManager() const {
-    return async_file_manager_;
-  }
-
   /**
    * Updates stats to reflect that a file has been added to the cache.
    * @param file_size The size in bytes of the file that was added.
@@ -179,46 +97,13 @@ class FileSystemHttpCache : public HttpCache,
    */
   void trackFileRemoved(uint64_t file_size);
 
-  // UpdateHeaders copies an existing cache entry to a new file. This value is
-  // the size of a copy-chunk. It's public for unit tests only, as the chunk size
-  // is totally irrelevant to the outward-facing API.
-  static const size_t max_update_headers_copy_chunk_size_;
-
-  using PostEvictionCallback = std::function<void(uint64_t size_bytes, uint64_t count)>;
-
   // Waits for all queued actions to be completed.
-  void drainAsyncFileActionsForTest() { async_file_manager_->waitForIdle(); };
+  inline void drainAsyncFileActionsForTest() { async_file_manager_->waitForIdle(); };
 
 private:
-  /**
-   * Writes a vary node to disk for the given key. A vary node in the cache consists of
-   * only the vary header.
-   * @param key the key at which to write the cache entry.
-   * @param response_headers the response headers from which the vary_header_values will
-   *     be extracted.
-   * @param cleanup the cleanup operation to be performed when the write completes.
-   */
-  void writeVaryNodeToDisk(Event::Dispatcher& dispatcher, const Key& key,
-                           const Http::ResponseHeaderMap& response_headers,
-                           std::shared_ptr<Cleanup> cleanup);
-
   // A shared_ptr to keep the cache singleton alive as long as any of its caches are in use.
   const Singleton::InstanceSharedPtr owner_;
 
-  absl::Mutex cache_mu_;
-  // When a new cache entry is being written, its key will be here and the cache file
-  // will not be present. The cache miss will be detected normally from the filesystem.
-  // This should be checked before writing; cancel the write if another thread is already
-  // writing the same entry.
-  // TODO(ravenblack): if contention of cache_mu_ causes a performance issue, this could
-  // be split into multiple hash tables along key boundaries, each with their own mutex.
-  // TODO(ravenblack): if detecting cache misses by filesystem causes a performance issue
-  // with uncacheable responses, another set could be added to keep track of uncacheable
-  // responses, to skip the filesystem hit. (Using ReaderLock should make such a table
-  // inherently low contention.)
-  absl::flat_hash_set<Key, MessageUtil, MessageUtil>
-      entries_being_written_ ABSL_GUARDED_BY(cache_mu_);
-
   std::shared_ptr<Common::AsyncFiles::AsyncFileManager> async_file_manager_;
 
   // Stats and config are held in a shared_ptr so that CacheEvictionThread can use
@@ -231,13 +116,17 @@ class FileSystemHttpCache : public HttpCache,
 
   // Allow test access to cache_eviction_thread_ for synchronization.
   friend class FileSystemCacheTestContext;
+
+  CacheInfo cache_info_;
 };
 
 // This part of the cache implementation is shared between CacheEvictionThread and
 // FileSystemHttpCache. The implementation of CacheShared is also split between the
 // two implementation files, accordingly.
 struct CacheShared {
-  CacheShared(ConfigProto config, Stats::Scope& stats_scope);
+  CacheShared(ConfigProto config, Stats::Scope& stats_scope, CacheEvictionThread& eviction_thread);
+  absl::Mutex signal_mu_;
+  std::function<void()> signal_eviction_ ABSL_GUARDED_BY(signal_mu_);
   const ConfigProto config_;
   CacheStatNames stat_names_;
   CacheStats stats_;
@@ -252,6 +141,13 @@ struct CacheShared {
   std::atomic<uint64_t> size_bytes_ = 0;
   bool needs_init_ = true;
 
+  /**
+   * When the cache is deleted, cache state metrics may still be being updated - the
+   * cache eviction thread may or may not outlive that, so updates to cache state
+   * must be prevented from triggering eviction beyond that deletion.
+   */
+  void disconnectEviction();
+
   /**
    * @return true if the eviction thread should do a pass over this cache.
    */
diff --git a/source/extensions/http/cache/file_system_http_cache/insert_context.cc b/source/extensions/http/cache/file_system_http_cache/insert_context.cc
index a2fc30b2f6997..6f649d49fcd11 100644
--- a/source/extensions/http/cache/file_system_http_cache/insert_context.cc
+++ b/source/extensions/http/cache/file_system_http_cache/insert_context.cc
@@ -2,6 +2,7 @@
 
 #include "source/common/protobuf/utility.h"
 #include "source/extensions/http/cache/file_system_http_cache/cache_file_header_proto_util.h"
+#include "source/extensions/http/cache/file_system_http_cache/cache_file_reader.h"
 #include "source/extensions/http/cache/file_system_http_cache/file_system_http_cache.h"
 #include "source/extensions/http/cache/file_system_http_cache/lookup_context.h"
 
@@ -11,291 +12,233 @@ namespace HttpFilters {
 namespace Cache {
 namespace FileSystemHttpCache {
 
-namespace {
-std::string writeFailureMessage(absl::string_view kind, absl::StatusOr<size_t> result,
-                                size_t wanted) {
-  if (result.ok()) {
-    return fmt::format("incomplete write of {} - wrote {}, expected {}", kind, result.value(),
-                       wanted);
-  } else {
-    return fmt::format("write failed of {}: {}", kind, result.status());
-  }
+// Arbitrary 128K fragments to balance memory usage and speed.
+static constexpr size_t MaxInsertFragmentSize = 128 * 1024;
+
+using Common::AsyncFiles::AsyncFileHandle;
+using Common::AsyncFiles::AsyncFileManager;
+
+void FileInsertContext::begin(Event::Dispatcher& dispatcher, Key key, std::string filepath,
+                              Http::ResponseHeaderMapPtr headers, ResponseMetadata metadata,
+                              HttpSourcePtr source, std::shared_ptr<CacheProgressReceiver> progress,
+                              std::shared_ptr<CacheShared> stat_recorder,
+                              AsyncFileManager& file_manager) {
+  auto p = new FileInsertContext(dispatcher, std::move(key), std::move(filepath),
+                                 std::move(headers), std::move(metadata), std::move(source),
+                                 std::move(progress), std::move(stat_recorder));
+  p->createFile(file_manager);
 }
-} // namespace
-
-FileInsertContext::FileInsertContext(std::shared_ptr<FileSystemHttpCache> cache,
-                                     std::unique_ptr<FileLookupContext> lookup_context)
-    : lookup_context_(std::move(lookup_context)), key_(lookup_context_->lookup().key()),
-      cache_(std::move(cache)) {}
 
-void FileInsertContext::insertHeaders(const Http::ResponseHeaderMap& response_headers,
-                                      const ResponseMetadata& metadata,
-                                      InsertCallback insert_complete, bool end_stream) {
-  ASSERT(dispatcher()->isThreadSafe());
-  callback_in_flight_ = std::move(insert_complete);
-  const VaryAllowList& vary_allow_list = lookup_context_->lookup().varyAllowList();
-  const Http::RequestHeaderMap& request_headers = lookup_context_->lookup().requestHeaders();
-  if (VaryHeaderUtils::hasVary(response_headers)) {
-    auto vary_header_values = VaryHeaderUtils::getVaryValues(response_headers);
-    Key old_key = key_;
-    const auto vary_identifier =
-        VaryHeaderUtils::createVaryIdentifier(vary_allow_list, vary_header_values, request_headers);
-    if (vary_identifier.has_value()) {
-      key_.add_custom_fields(vary_identifier.value());
-    } else {
-      // No error for this cancel, it's just an entry that's ineligible for insertion.
-      cancelInsert();
-      return;
-    }
-    cleanup_ =
-        cache_->setCacheEntryToVary(*dispatcher(), old_key, response_headers, key_, cleanup_);
-  } else {
-    cleanup_ = cache_->maybeStartWritingEntry(key_);
-  }
-  if (!cleanup_) {
-    // No error for this cancel, someone else just got there first.
-    cancelInsert();
-    return;
+FileInsertContext::FileInsertContext(Event::Dispatcher& dispatcher, Key key, std::string filepath,
+                                     Http::ResponseHeaderMapPtr headers, ResponseMetadata metadata,
+                                     HttpSourcePtr source,
+                                     std::shared_ptr<CacheProgressReceiver> progress,
+                                     std::shared_ptr<CacheShared> stat_recorder)
+    : dispatcher_(dispatcher), filepath_(std::move(filepath)),
+      cache_file_header_proto_(makeCacheFileHeaderProto(key, *headers, metadata)),
+      headers_(std::move(headers)), source_(std::move(source)),
+      progress_receiver_(std::move(progress)), stat_recorder_(std::move(stat_recorder)) {}
+
+void FileInsertContext::fail(absl::Status status) {
+  progress_receiver_->onInsertFailed(status);
+  if (file_handle_) {
+    auto queued = file_handle_->close(nullptr, [](absl::Status) {});
+    ASSERT(queued.ok());
   }
-  cache_file_header_proto_ = makeCacheFileHeaderProto(key_, response_headers, metadata);
-  end_stream_after_headers_ = end_stream;
-  createFile();
+  delete this;
+}
+
+void FileInsertContext::complete() {
+  auto queued = file_handle_->close(nullptr, [](absl::Status) {});
+  ASSERT(queued.ok());
+  delete this;
 }
 
-void FileInsertContext::createFile() {
-  ASSERT(dispatcher()->isThreadSafe());
-  ASSERT(!cancel_action_in_flight_);
-  ASSERT(callback_in_flight_ != nullptr);
-  cancel_action_in_flight_ = cache_->asyncFileManager()->createAnonymousFile(
-      dispatcher(), cache_->cachePath(), [this](absl::StatusOr<AsyncFileHandle> open_result) {
-        cancel_action_in_flight_ = nullptr;
+void FileInsertContext::createFile(AsyncFileManager& file_manager) {
+  absl::string_view cache_path = absl::string_view{filepath_};
+  cache_path = absl::string_view{cache_path.begin(), cache_path.rfind('/') + 1};
+  file_manager.createAnonymousFile(
+      &dispatcher_, cache_path, [this](absl::StatusOr<AsyncFileHandle> open_result) -> void {
         if (!open_result.ok()) {
-          cancelInsert("failed to create anonymous file");
-          return;
+          return fail(
+              absl::Status(open_result.status().code(),
+                           fmt::format("create file failed: {}", open_result.status().message())));
         }
         file_handle_ = std::move(open_result.value());
+        dupFile();
+      });
+}
+
+void FileInsertContext::dupFile() {
+  auto queued =
+      file_handle_->duplicate(&dispatcher_, [this](absl::StatusOr<AsyncFileHandle> dup_result) {
+        if (!dup_result.ok()) {
+          return fail(
+              absl::Status(dup_result.status().code(), fmt::format("duplicate file failed: {}",
+                                                                   dup_result.status().message())));
+        }
+        bool end_stream = source_ == nullptr;
+        progress_receiver_->onHeadersInserted(
+            std::make_unique<CacheFileReader>(std::move(dup_result.value())), std::move(headers_),
+            end_stream);
         writeEmptyHeaderBlock();
       });
+  ASSERT(queued.ok(), queued.status().ToString());
 }
 
 void FileInsertContext::writeEmptyHeaderBlock() {
-  ASSERT(dispatcher()->isThreadSafe());
-  ASSERT(!cancel_action_in_flight_);
-  ASSERT(callback_in_flight_ != nullptr);
   Buffer::OwnedImpl unset_header;
   header_block_.serializeToBuffer(unset_header);
   // Write an empty header block.
   auto queued = file_handle_->write(
-      dispatcher(), unset_header, 0, [this](absl::StatusOr<size_t> write_result) {
-        cancel_action_in_flight_ = nullptr;
-        if (!write_result.ok() || write_result.value() != CacheFileFixedBlock::size()) {
-          cancelInsert(
-              writeFailureMessage("empty header block", write_result, CacheFileFixedBlock::size()));
-          return;
+      &dispatcher_, unset_header, 0, [this](absl::StatusOr<size_t> write_result) {
+        if (!write_result.ok()) {
+          return fail(absl::Status(
+              write_result.status().code(),
+              fmt::format("write to file failed: {}", write_result.status().message())));
+        } else if (write_result.value() != CacheFileFixedBlock::size()) {
+          return fail(absl::UnavailableError(
+              fmt::format("write to file failed; wrote {} bytes instead of {}",
+                          write_result.value(), CacheFileFixedBlock::size())));
+        }
+        if (source_) {
+          getBody();
+        } else {
+          writeHeaders();
         }
-        writeHeaderProto();
       });
   ASSERT(queued.ok(), queued.status().ToString());
-  cancel_action_in_flight_ = std::move(queued.value());
-}
-
-void FileInsertContext::succeedCurrentAction() {
-  ASSERT(!cancel_action_in_flight_);
-  ASSERT(callback_in_flight_ != nullptr);
-  auto cb = std::move(callback_in_flight_);
-  callback_in_flight_ = nullptr;
-  cb(true);
 }
 
-void FileInsertContext::writeHeaderProto() {
-  ASSERT(dispatcher()->isThreadSafe());
-  ASSERT(!cancel_action_in_flight_);
-  ASSERT(callback_in_flight_ != nullptr);
-  auto buf = bufferFromProto(cache_file_header_proto_);
-  auto sz = buf.length();
-  auto queued =
-      file_handle_->write(dispatcher(), buf, header_block_.offsetToHeaders(),
-                          [this, sz](absl::StatusOr<size_t> write_result) {
-                            cancel_action_in_flight_ = nullptr;
-                            if (!write_result.ok() || write_result.value() != sz) {
-                              cancelInsert(writeFailureMessage("headers", write_result, sz));
-                              return;
-                            }
-                            header_block_.setHeadersSize(sz);
-                            if (end_stream_after_headers_) {
-                              commit();
-                              return;
-                            }
-                            succeedCurrentAction();
-                          });
-  ASSERT(queued.ok(), queued.status().ToString());
-  cancel_action_in_flight_ = std::move(queued.value());
+void FileInsertContext::getBody() {
+  ASSERT(source_);
+  source_->getBody(AdjustedByteRange(read_pos_, read_pos_ + MaxInsertFragmentSize),
+                   [this](Buffer::InstancePtr buf, EndStream end_stream) {
+                     if (end_stream == EndStream::Reset) {
+                       return fail(
+                           absl::UnavailableError("cache write failed due to upstream reset"));
+                     }
+                     if (buf == nullptr) {
+                       if (end_stream == EndStream::End) {
+                         progress_receiver_->onBodyInserted(AdjustedByteRange(0, read_pos_), true);
+                         writeHeaders();
+                       } else {
+                         getTrailers();
+                       }
+                     } else {
+                       read_pos_ += buf->length();
+                       onBody(std::move(buf), end_stream == EndStream::End);
+                     }
+                   });
 }
 
-void FileInsertContext::insertBody(const Buffer::Instance& fragment,
-                                   InsertCallback ready_for_next_fragment, bool end_stream) {
-  ASSERT(dispatcher()->isThreadSafe());
-  ASSERT(!cancel_action_in_flight_, "should be no actions in flight when receiving new data");
-  ASSERT(!callback_in_flight_);
-  if (!cleanup_) {
-    // Already cancelled, do nothing, return failure.
-    std::move(ready_for_next_fragment)(false);
-    return;
-  }
-  callback_in_flight_ = std::move(ready_for_next_fragment);
-  size_t sz = fragment.length();
-  Buffer::OwnedImpl consumable_fragment(fragment);
+void FileInsertContext::onBody(Buffer::InstancePtr buf, bool end_stream) {
+  ASSERT(buf);
+  size_t len = buf->length();
   auto queued = file_handle_->write(
-      dispatcher(), consumable_fragment, header_block_.offsetToBody() + header_block_.bodySize(),
-      [this, sz, end_stream](absl::StatusOr<size_t> write_result) {
-        cancel_action_in_flight_ = nullptr;
-        if (!write_result.ok() || write_result.value() != sz) {
-          cancelInsert(writeFailureMessage("body chunk", write_result, sz));
-          return;
+      &dispatcher_, *buf, header_block_.offsetToBody() + header_block_.bodySize(),
+      [this, len, end_stream](absl::StatusOr<size_t> write_result) {
+        if (!write_result.ok()) {
+          return fail(absl::Status(
+              write_result.status().code(),
+              fmt::format("write to file failed: {}", write_result.status().message())));
+        } else if (write_result.value() != len) {
+          return fail(absl::UnavailableError(fmt::format(
+              "write to file failed: wrote {} bytes instead of {}", write_result.value(), len)));
         }
-        header_block_.setBodySize(header_block_.bodySize() + sz);
+        progress_receiver_->onBodyInserted(
+            AdjustedByteRange(header_block_.bodySize(), header_block_.bodySize() + len),
+            end_stream);
+        header_block_.setBodySize(header_block_.bodySize() + len);
         if (end_stream) {
-          commit();
+          writeHeaders();
         } else {
-          succeedCurrentAction();
+          getBody();
         }
       });
   ASSERT(queued.ok(), queued.status().ToString());
-  cancel_action_in_flight_ = std::move(queued.value());
 }
 
-void FileInsertContext::insertTrailers(const Http::ResponseTrailerMap& trailers,
-                                       InsertCallback insert_complete) {
-  ASSERT(dispatcher()->isThreadSafe());
-  ASSERT(!cancel_action_in_flight_, "should be no actions in flight when receiving new data");
-  ASSERT(!callback_in_flight_);
-  if (!cleanup_) {
-    // Already cancelled, do nothing, return failure.
-    std::move(insert_complete)(false);
-    return;
-  }
-  callback_in_flight_ = std::move(insert_complete);
-  CacheFileTrailer file_trailer = makeCacheFileTrailerProto(trailers);
-  Buffer::OwnedImpl consumable_buffer = bufferFromProto(file_trailer);
-  size_t sz = consumable_buffer.length();
-  auto queued =
-      file_handle_->write(dispatcher(), consumable_buffer, header_block_.offsetToTrailers(),
-                          [this, sz](absl::StatusOr<size_t> write_result) {
-                            cancel_action_in_flight_ = nullptr;
-                            if (!write_result.ok() || write_result.value() != sz) {
-                              cancelInsert(writeFailureMessage("trailer chunk", write_result, sz));
-                              return;
-                            }
-                            header_block_.setTrailersSize(sz);
-                            commit();
-                          });
+void FileInsertContext::getTrailers() {
+  source_->getTrailers([this](Http::ResponseTrailerMapPtr trailers, EndStream end_stream) {
+    if (end_stream == EndStream::Reset) {
+      return fail(
+          absl::UnavailableError("write to cache failed, upstream reset during getTrailers"));
+    }
+    onTrailers(std::move(trailers));
+  });
+}
+
+void FileInsertContext::onTrailers(Http::ResponseTrailerMapPtr trailers) {
+  CacheFileTrailer trailer_proto = makeCacheFileTrailerProto(*trailers);
+  progress_receiver_->onTrailersInserted(std::move(trailers));
+  Buffer::OwnedImpl trailer_buffer = bufferFromProto(trailer_proto);
+  header_block_.setTrailersSize(trailer_buffer.length());
+  auto queued = file_handle_->write(&dispatcher_, trailer_buffer, header_block_.offsetToTrailers(),
+                                    [this](absl::StatusOr<size_t> write_result) {
+                                      if (!write_result.ok() ||
+                                          write_result.value() != header_block_.trailerSize()) {
+                                        // We've already told the client that the write worked, and
+                                        // it already has the data they need, so we can act like it
+                                        // was complete until the next lookup, even though the file
+                                        // didn't actually get linked.
+                                        return complete();
+                                      }
+                                      writeHeaders();
+                                    });
   ASSERT(queued.ok(), queued.status().ToString());
-  cancel_action_in_flight_ = std::move(queued.value());
 }
 
-void FileInsertContext::onDestroy() {
-  lookup_context_->onDestroy();
-  cancelInsert("InsertContext destroyed prematurely");
+void FileInsertContext::writeHeaders() {
+  Buffer::OwnedImpl header_buffer = bufferFromProto(cache_file_header_proto_);
+  header_block_.setHeadersSize(header_buffer.length());
+  auto queued = file_handle_->write(&dispatcher_, header_buffer, header_block_.offsetToHeaders(),
+                                    [this](absl::StatusOr<size_t> write_result) {
+                                      if (!write_result.ok() ||
+                                          write_result.value() != header_block_.headerSize()) {
+                                        // We've already told the client that the write worked, and
+                                        // it already has the data they need, so we can act like it
+                                        // was complete until the next lookup, even though the file
+                                        // didn't actually get linked.
+                                        return complete();
+                                      }
+                                      commit();
+                                    });
+  ASSERT(queued.ok(), queued.status().ToString());
 }
 
 void FileInsertContext::commit() {
-  ASSERT(dispatcher()->isThreadSafe());
-  ASSERT(!cancel_action_in_flight_);
-  ASSERT(callback_in_flight_ != nullptr);
-  // Write the file header block now that we know the sizes of the pieces.
+  // now that the header block knows the size of all the pieces, overwrite it in the file.
   Buffer::OwnedImpl block_buffer;
   header_block_.serializeToBuffer(block_buffer);
   auto queued = file_handle_->write(
-      dispatcher(), block_buffer, 0, [this](absl::StatusOr<size_t> write_result) {
-        cancel_action_in_flight_ = nullptr;
+      &dispatcher_, block_buffer, 0, [this](absl::StatusOr<size_t> write_result) {
         if (!write_result.ok() || write_result.value() != CacheFileFixedBlock::size()) {
-          cancelInsert(
-              writeFailureMessage("header block", write_result, CacheFileFixedBlock::size()));
-          return;
+          // We've already told the client that the write worked, and it already
+          // has the data they need, so we can act like it was complete until
+          // the next lookup, even though the file didn't actually get linked.
+          return complete();
         }
-        commitMeasureExisting();
+        createHardLink();
       });
   ASSERT(queued.ok(), queued.status().ToString());
-  cancel_action_in_flight_ = std::move(queued.value());
-}
-
-std::string FileInsertContext::pathAndFilename() {
-  return absl::StrCat(cache_->cachePath(), cache_->generateFilename(key_));
-}
-
-void FileInsertContext::commitMeasureExisting() {
-  ASSERT(!cancel_action_in_flight_);
-  ASSERT(callback_in_flight_ != nullptr);
-  cancel_action_in_flight_ = cache_->asyncFileManager()->stat(
-      dispatcher(), pathAndFilename(), [this](absl::StatusOr<struct stat> stat_result) {
-        cancel_action_in_flight_ = nullptr;
-        if (stat_result.ok()) {
-          commitUnlinkExisting(stat_result.value().st_size);
-        } else {
-          commitUnlinkExisting(0);
-        }
-      });
-}
-
-void FileInsertContext::commitUnlinkExisting(size_t file_size) {
-  ASSERT(!cancel_action_in_flight_);
-  ASSERT(callback_in_flight_ != nullptr);
-  cancel_action_in_flight_ = cache_->asyncFileManager()->unlink(
-      dispatcher(), pathAndFilename(), [this, file_size](absl::Status unlink_result) {
-        cancel_action_in_flight_ = nullptr;
-        if (unlink_result.ok()) {
-          cache_->trackFileRemoved(file_size);
-        }
-        commitCreateHardLink();
-      });
 }
 
-void FileInsertContext::commitCreateHardLink() {
-  ASSERT(!cancel_action_in_flight_);
-  ASSERT(callback_in_flight_ != nullptr);
-  auto queued = file_handle_->createHardLink(
-      dispatcher(), pathAndFilename(), [this](absl::Status link_result) {
-        cancel_action_in_flight_ = nullptr;
+void FileInsertContext::createHardLink() {
+  auto queued =
+      file_handle_->createHardLink(&dispatcher_, filepath_, [this](absl::Status link_result) {
         if (!link_result.ok()) {
-          cancelInsert(absl::StrCat("failed to link file (", link_result.ToString(),
-                                    "): ", pathAndFilename()));
-          return;
+          ENVOY_LOG(error, "failed to link file {}: {}", filepath_, link_result);
+          return complete();
         }
-        ENVOY_LOG(debug, "created cache file {}", cache_->generateFilename(key_));
-        succeedCurrentAction();
+        ENVOY_LOG(debug, "created cache file {}", filepath_);
         uint64_t file_size = header_block_.offsetToTrailers() + header_block_.trailerSize();
-        cache_->trackFileAdded(file_size);
-        // By clearing cleanup before destructor, we prevent logging an error.
-        cleanup_ = nullptr;
+        stat_recorder_->trackFileAdded(file_size);
+        complete();
       });
   ASSERT(queued.ok(), queued.status().ToString());
-  cancel_action_in_flight_ = std::move(queued.value());
-}
-
-void FileInsertContext::cancelInsert(absl::string_view error) {
-  if (cancel_action_in_flight_) {
-    cancel_action_in_flight_();
-    cancel_action_in_flight_ = nullptr;
-  }
-  if (callback_in_flight_) {
-    callback_in_flight_(false);
-    callback_in_flight_ = nullptr;
-  }
-  if (cleanup_) {
-    cleanup_ = nullptr;
-    if (!error.empty()) {
-      ENVOY_LOG(warn, "FileSystemHttpCache: {}", error);
-    }
-  }
-  if (file_handle_) {
-    auto close_status = file_handle_->close(nullptr, [](absl::Status) {});
-    ASSERT(close_status.ok());
-    file_handle_ = nullptr;
-  }
 }
 
-Event::Dispatcher* FileInsertContext::dispatcher() const { return lookup_context_->dispatcher(); }
-
 } // namespace FileSystemHttpCache
 } // namespace Cache
 } // namespace HttpFilters
diff --git a/source/extensions/http/cache/file_system_http_cache/insert_context.h b/source/extensions/http/cache/file_system_http_cache/insert_context.h
index 73433f43d4fbb..e3acbd6e44fa8 100644
--- a/source/extensions/http/cache/file_system_http_cache/insert_context.h
+++ b/source/extensions/http/cache/file_system_http_cache/insert_context.h
@@ -2,7 +2,7 @@
 
 #include <memory>
 
-#include "source/extensions/common/async_files/async_file_handle.h"
+#include "source/extensions/common/async_files/async_file_manager.h"
 #include "source/extensions/filters/http/cache/http_cache.h"
 #include "source/extensions/http/cache/file_system_http_cache/cache_file_fixed_block.h"
 #include "source/extensions/http/cache/file_system_http_cache/cache_file_header.pb.h"
@@ -13,106 +13,68 @@ namespace HttpFilters {
 namespace Cache {
 namespace FileSystemHttpCache {
 
-using ::Envoy::Extensions::Common::AsyncFiles::AsyncFileHandle;
-using ::Envoy::Extensions::Common::AsyncFiles::CancelFunction;
+struct CacheShared;
 
-class FileLookupContext;
-class FileSystemHttpCache;
-
-class DontInsertContext : public InsertContext {
+class FileInsertContext : public Logger::Loggable<Logger::Id::cache_filter> {
 public:
-  explicit DontInsertContext(Event::Dispatcher& dispatcher) : dispatcher_(dispatcher) {}
-  void insertHeaders(const Http::ResponseHeaderMap&, const ResponseMetadata&,
-                     InsertCallback insert_complete, bool) override {
-    dispatcher_.post([cb = std::move(insert_complete)]() mutable { cb(false); });
-  }
-  void insertBody(const Buffer::Instance&, InsertCallback ready_for_next_chunk, bool) override {
-    dispatcher_.post([cb = std::move(ready_for_next_chunk)]() mutable { cb(false); });
-  }
-  void insertTrailers(const Http::ResponseTrailerMap&, InsertCallback insert_complete) override {
-    dispatcher_.post([cb = std::move(insert_complete)]() mutable { cb(false); });
-  }
-  void onDestroy() override {};
+  static void begin(Event::Dispatcher& dispatcher, Key key, std::string filepath,
+                    Http::ResponseHeaderMapPtr headers, ResponseMetadata metadata,
+                    HttpSourcePtr source, std::shared_ptr<CacheProgressReceiver> progress,
+                    std::shared_ptr<CacheShared> stat_recorder,
+                    Common::AsyncFiles::AsyncFileManager& async_file_manager);
 
 private:
-  Event::Dispatcher& dispatcher_;
-};
-
-class FileInsertContext : public InsertContext, public Logger::Loggable<Logger::Id::cache_filter> {
-public:
-  FileInsertContext(std::shared_ptr<FileSystemHttpCache> cache,
-                    std::unique_ptr<FileLookupContext> lookup_context);
-  void insertHeaders(const Http::ResponseHeaderMap& response_headers,
-                     const ResponseMetadata& metadata, InsertCallback insert_complete,
-                     bool end_stream) override;
-  void insertBody(const Buffer::Instance& chunk, InsertCallback ready_for_next_chunk,
-                  bool end_stream) override;
-  void insertTrailers(const Http::ResponseTrailerMap& trailers,
-                      InsertCallback insert_complete) override;
-  void onDestroy() override;
+  FileInsertContext(Event::Dispatcher& dispatcher, Key key, std::string filepath,
+                    Http::ResponseHeaderMapPtr headers, ResponseMetadata metadata,
+                    HttpSourcePtr source, std::shared_ptr<CacheProgressReceiver> progress,
+                    std::shared_ptr<CacheShared> stat_recorder);
+  void fail(absl::Status status);
+  void complete();
 
-private:
-  Event::Dispatcher* dispatcher() const;
   // The sequence of actions involved in writing the cache entry to a file. Each
   // of these actions are posted to an async file thread, and the results posted back
   // to the dispatcher, so the callbacks are run on the original filter's thread.
+  // Any failure calls CacheProgressReceiver::onInsertFailed.
 
   // The first step of writing the cache entry to a file. On success calls
-  // writeEmptyHeaderBlock, on failure calls the InsertCallback with false which
-  // should abort the operation.
-  void createFile();
+  // dupFile.
+  void createFile(Common::AsyncFiles::AsyncFileManager& file_manager);
+  // Makes a duplicate file handle for the Reader.
+  // On success calls writeEmptyHeaderBlock and CacheProgressReceiver::onHeadersInserted.
+  void dupFile();
   // An empty header block is written at the start of the file, making room for
-  // a populated header block to be written later. On success calls writeHeaderProto,
-  // on failure calls the InsertCallback with false which should abort the operation.
+  // a populated header block to be written later. On success calls
+  // either getBody or writeHeaders depending on if there is any body.
   void writeEmptyHeaderBlock();
-  // Writes the http headers and updates the headers size in the in-memory header_block_.
-  // On success and end_stream, calls commit. On success and not end_stream, calls the
-  // InsertCallback with true to move on to the data section. On failure calls the
-  // InsertCallback with false which should abort the operation.
-  void writeHeaderProto();
-  // Helper to call the InsertCallback with true.
-  void succeedCurrentAction();
-  // Returns the full path for the cache file matching key_.
-  std::string pathAndFilename();
-  // Starts the commit process; rewrites the header block of the current file. On
-  // success calls commitMeasureExisting. On failure calls the InsertCallback with false
-  // which should abort the operation.
+  // Reads a chunk of body for insertion. Calls onBody on success. Calls getTrailers
+  // if no body remained and there are trailers, or writeHeaders if no body remained
+  // and there are no trailers.
+  void getBody();
+  // Writes a chunk of body to the file. Calls CacheProgressReceiver::onBodyInserted
+  // and getBody, or writeHeaders if body ended and there are no trailers.
+  void onBody(Buffer::InstancePtr buf, bool end_stream);
+  // Reads trailers. Calls onTrailers on success.
+  void getTrailers();
+  // Writes the trailers to file. Calls CacheProcessReceiver::onTrailersInserted
+  // and writeHeaders on success.
+  void onTrailers(Http::ResponseTrailerMapPtr trailers);
+  // Writes the headers to file. Calls commit on success.
+  void writeHeaders();
+  // Rewrites the header block of the file, and calls createHardLink.
   void commit();
-  // Checks for the presence and size of a pre-existing cache entry at the destination
-  // path. If the file does not exist or on failure, calls commitUnlinkExisting, as it
-  // doesn't hurt to try the delete in case the stat failure was e.g. a lack of read
-  // permission. On success passes the size from stat, on failure passes 0.
-  void commitMeasureExisting();
-  // Deletes the pre-existing file in the pathAndFilename() location. On success updates
-  // cache metrics with the measured size. Regardless of success calls commitCreateHardLink.
-  void commitUnlinkExisting(size_t file_size);
-  // Creates a hard link at pathAndFilename() to the current file. On success calls
-  // InsertCallback with true. On failure calls it with false which should abort the
-  // operation.
-  void commitCreateHardLink();
+  // Creates a hard link, and updates stats.
+  void createHardLink();
+
+  Event::Dispatcher& dispatcher_;
+  std::string filepath_;
   CacheFileHeader cache_file_header_proto_;
-  bool end_stream_after_headers_ = false;
-  std::unique_ptr<FileLookupContext> lookup_context_;
-  Key key_;
-  std::shared_ptr<FileSystemHttpCache> cache_;
-  std::shared_ptr<Cleanup> cleanup_;
-  AsyncFileHandle file_handle_;
-  absl::AnyInvocable<void(bool)> callback_in_flight_;
-  CancelFunction cancel_action_in_flight_;
+  Http::ResponseHeaderMapPtr headers_;
+  HttpSourcePtr source_;
+  std::shared_ptr<CacheProgressReceiver> progress_receiver_;
+  std::shared_ptr<CacheShared> stat_recorder_;
   CacheFileFixedBlock header_block_;
-
-  /**
-   * If seen_end_stream_ is not true (i.e. InsertContext has not yet delivered the
-   * entire response), cancel insertion. Called by InsertContext onDestroy.
-   */
-  void cancelIfIncomplete();
-
-  /**
-   * Cancels any action in flight, calls any uncalled completion callbacks with false,
-   * and closes the file if open.
-   * @param err a string to log with the failure.
-   */
-  void cancelInsert(absl::string_view err = "");
+  Common::AsyncFiles::AsyncFileHandle file_handle_;
+  off_t read_pos_{0};
 };
 
 } // namespace FileSystemHttpCache
diff --git a/source/extensions/http/cache/file_system_http_cache/lookup_context.cc b/source/extensions/http/cache/file_system_http_cache/lookup_context.cc
index 9f3d7e028f5f6..8c87d3b8f98a7 100644
--- a/source/extensions/http/cache/file_system_http_cache/lookup_context.cc
+++ b/source/extensions/http/cache/file_system_http_cache/lookup_context.cc
@@ -3,6 +3,7 @@
 #include "source/extensions/http/cache/file_system_http_cache/cache_file_fixed_block.h"
 #include "source/extensions/http/cache/file_system_http_cache/cache_file_header.pb.h"
 #include "source/extensions/http/cache/file_system_http_cache/cache_file_header_proto_util.h"
+#include "source/extensions/http/cache/file_system_http_cache/cache_file_reader.h"
 #include "source/extensions/http/cache/file_system_http_cache/file_system_http_cache.h"
 
 namespace Envoy {
@@ -11,187 +12,89 @@ namespace HttpFilters {
 namespace Cache {
 namespace FileSystemHttpCache {
 
-std::string FileLookupContext::filepath() {
-  return absl::StrCat(cache_.cachePath(), cache_.generateFilename(key_));
-}
-
-bool FileLookupContext::workInProgress() const { return cache_.workInProgress(key()); }
+FileLookupContext::FileLookupContext(Event::Dispatcher& dispatcher, AsyncFileHandle handle,
+                                     HttpCache::LookupCallback&& callback)
+    : dispatcher_(dispatcher), file_handle_(std::move(handle)), callback_(std::move(callback)) {}
 
-void FileLookupContext::getHeaders(LookupHeadersCallback&& cb) {
-  lookup_headers_callback_ = std::move(cb);
-  tryOpenCacheFile();
+void FileLookupContext::begin(Event::Dispatcher& dispatcher, AsyncFileHandle handle,
+                              HttpCache::LookupCallback&& callback) {
+  // bare pointer because this object owns itself - it gets captured in
+  // lambdas and is deleted when 'done' is eventually called.
+  FileLookupContext* p = new FileLookupContext(dispatcher, std::move(handle), std::move(callback));
+  p->getHeaderBlock();
 }
 
-void FileLookupContext::tryOpenCacheFile() {
-  cancel_action_in_flight_ = cache_.asyncFileManager()->openExistingFile(
-      dispatcher(), filepath(), Common::AsyncFiles::AsyncFileManager::Mode::ReadOnly,
-      [this](absl::StatusOr<AsyncFileHandle> open_result) {
-        cancel_action_in_flight_ = nullptr;
-        if (!open_result.ok()) {
-          return doCacheMiss();
-        }
-        ASSERT(!file_handle_);
-        file_handle_ = std::move(open_result.value());
-        getHeaderBlockFromFile();
-      });
-}
-
-void FileLookupContext::doCacheMiss() {
-  cache_.stats().cache_miss_.inc();
-  std::move(lookup_headers_callback_)(LookupResult{}, /* end_stream (ignored) = */ false);
-  lookup_headers_callback_ = nullptr;
-}
-
-void FileLookupContext::doCacheEntryInvalid() {
-  invalidateCacheEntry();
-  doCacheMiss();
+void FileLookupContext::done(absl::StatusOr<LookupResult>&& result) {
+  if (!result.ok() || result.value().cache_reader_ == nullptr) {
+    auto queued = file_handle_->close(nullptr, [](absl::Status) {});
+    ASSERT(queued.ok(), queued.status().ToString());
+  }
+  auto cb = std::move(callback_);
+  delete this;
+  cb(std::move(result));
 }
 
-void FileLookupContext::getHeaderBlockFromFile() {
-  ASSERT(dispatcher()->isThreadSafe());
-  auto queued = file_handle_->read(
-      dispatcher(), 0, CacheFileFixedBlock::size(),
-      [this](absl::StatusOr<Buffer::InstancePtr> read_result) {
-        ASSERT(dispatcher()->isThreadSafe());
-        cancel_action_in_flight_ = nullptr;
-        if (!read_result.ok() || read_result.value()->length() != CacheFileFixedBlock::size()) {
-          return doCacheEntryInvalid();
-        }
-        header_block_.populateFromStringView(read_result.value()->toString());
-        if (!header_block_.isValid()) {
-          return doCacheEntryInvalid();
-        }
-        getHeadersFromFile();
-      });
+absl::Status cacheEntryInvalidStatus() { return absl::DataLossError("corrupted cache file"); }
+
+void FileLookupContext::getHeaderBlock() {
+  auto queued =
+      file_handle_->read(&dispatcher_, 0, CacheFileFixedBlock::size(),
+                         [this](absl::StatusOr<Buffer::InstancePtr> read_result) -> void {
+                           if (!read_result.ok()) {
+                             return done(read_result.status());
+                           }
+                           if (read_result.value()->length() != CacheFileFixedBlock::size()) {
+                             return done(cacheEntryInvalidStatus());
+                           }
+                           header_block_.populateFromStringView(read_result.value()->toString());
+                           if (!header_block_.isValid()) {
+                             return done(cacheEntryInvalidStatus());
+                           }
+                           if (header_block_.trailerSize()) {
+                             getTrailers();
+                           } else {
+                             getHeaders();
+                           }
+                         });
   ASSERT(queued.ok(), queued.status().ToString());
-  cancel_action_in_flight_ = std::move(queued.value());
 }
 
-void FileLookupContext::getHeadersFromFile() {
-  ASSERT(dispatcher()->isThreadSafe());
-  auto queued = file_handle_->read(
-      dispatcher(), header_block_.offsetToHeaders(), header_block_.headerSize(),
-      [this](absl::StatusOr<Buffer::InstancePtr> read_result) {
-        ASSERT(dispatcher()->isThreadSafe());
-        cancel_action_in_flight_ = nullptr;
-        if (!read_result.ok() || read_result.value()->length() != header_block_.headerSize()) {
-          return doCacheEntryInvalid();
-        }
-        auto header_proto = makeCacheFileHeaderProto(*read_result.value());
-        if (header_proto.headers_size() == 1 && header_proto.headers().at(0).key() == "vary") {
-          auto maybe_vary_key = cache_.makeVaryKey(
-              key_, lookup().varyAllowList(),
-              absl::StrSplit(header_proto.headers().at(0).value(), ','), lookup().requestHeaders());
-          if (!maybe_vary_key.has_value()) {
-            return doCacheMiss();
-          }
-          key_ = maybe_vary_key.value();
-          return closeFileAndGetHeadersAgainWithNewVaryKey();
-        }
-        cache_.stats().cache_hit_.inc();
-        std::move(lookup_headers_callback_)(
-            lookup().makeLookupResult(headersFromHeaderProto(header_proto),
-                                      metadataFromHeaderProto(header_proto),
-                                      header_block_.bodySize()),
-            /* end_stream = */ header_block_.trailerSize() == 0 && header_block_.bodySize() == 0);
-      });
+void FileLookupContext::getHeaders() {
+  auto queued =
+      file_handle_->read(&dispatcher_, header_block_.offsetToHeaders(), header_block_.headerSize(),
+                         [this](absl::StatusOr<Buffer::InstancePtr> read_result) -> void {
+                           if (!read_result.ok()) {
+                             return done(read_result.status());
+                           }
+                           if (read_result.value()->length() != header_block_.headerSize()) {
+                             return done(cacheEntryInvalidStatus());
+                           }
+                           auto header_proto = makeCacheFileHeaderProto(*read_result.value());
+                           result_.response_headers_ = headersFromHeaderProto(header_proto);
+                           result_.response_metadata_ = metadataFromHeaderProto(header_proto);
+                           result_.body_length_ = header_block_.bodySize();
+                           result_.cache_reader_ =
+                               std::make_unique<CacheFileReader>(std::move(file_handle_));
+                           return done(std::move(result_));
+                         });
   ASSERT(queued.ok(), queued.status().ToString());
-  cancel_action_in_flight_ = std::move(queued.value());
 }
 
-void FileLookupContext::closeFileAndGetHeadersAgainWithNewVaryKey() {
-  ASSERT(dispatcher()->isThreadSafe());
-  auto queued = file_handle_->close(dispatcher(), [this](absl::Status) {
-    ASSERT(dispatcher()->isThreadSafe());
-    file_handle_ = nullptr;
-    // Restart with the new key.
-    return tryOpenCacheFile();
-  });
-  ASSERT(queued.ok(), queued.status().ToString());
-  cancel_action_in_flight_ = std::move(queued.value());
-}
-
-void FileLookupContext::invalidateCacheEntry() {
-  ASSERT(dispatcher()->isThreadSafe());
-  // We don't capture the cancel action here because we want these operations to continue even
-  // if the filter was destroyed in the meantime. For the same reason, we must not capture 'this'.
-  cache_.asyncFileManager()->stat(
-      dispatcher(), filepath(),
-      [file = filepath(), cache = cache_.shared_from_this(),
-       dispatcher = dispatcher()](absl::StatusOr<struct stat> stat_result) {
-        ASSERT(dispatcher->isThreadSafe());
-        size_t file_size = 0;
-        if (stat_result.ok()) {
-          file_size = stat_result.value().st_size;
-        }
-        cache->asyncFileManager()->unlink(dispatcher, file,
-                                          [cache, file_size](absl::Status unlink_result) {
-                                            if (unlink_result.ok()) {
-                                              cache->trackFileRemoved(file_size);
-                                            }
-                                          });
-      });
-}
-
-void FileLookupContext::getBody(const AdjustedByteRange& range, LookupBodyCallback&& cb) {
-  ASSERT(dispatcher()->isThreadSafe());
-  ASSERT(cb);
-  ASSERT(!cancel_action_in_flight_);
-  ASSERT(file_handle_);
+void FileLookupContext::getTrailers() {
   auto queued = file_handle_->read(
-      dispatcher(), header_block_.offsetToBody() + range.begin(), range.length(),
-      [this, cb = std::move(cb), range](absl::StatusOr<Buffer::InstancePtr> read_result) mutable {
-        ASSERT(dispatcher()->isThreadSafe());
-        cancel_action_in_flight_ = nullptr;
-        if (!read_result.ok() || read_result.value()->length() != range.length()) {
-          invalidateCacheEntry();
-          // Calling callback with nullptr fails the request.
-          std::move(cb)(nullptr, /* end_stream (ignored) = */ false);
-          return;
+      &dispatcher_, header_block_.offsetToTrailers(), header_block_.trailerSize(),
+      [this](absl::StatusOr<Buffer::InstancePtr> read_result) -> void {
+        if (!read_result.ok()) {
+          return done(read_result.status());
         }
-        std::move(cb)(std::move(read_result.value()),
-                      /* end_stream = */ range.end() == header_block_.bodySize() &&
-                          header_block_.trailerSize() == 0);
-      });
-  ASSERT(queued.ok(), queued.status().ToString());
-  cancel_action_in_flight_ = std::move(queued.value());
-}
-
-void FileLookupContext::getTrailers(LookupTrailersCallback&& cb) {
-  ASSERT(dispatcher()->isThreadSafe());
-  ASSERT(cb);
-  ASSERT(!cancel_action_in_flight_);
-  ASSERT(file_handle_);
-  auto queued = file_handle_->read(
-      dispatcher(), header_block_.offsetToTrailers(), header_block_.trailerSize(),
-      [this, cb = std::move(cb)](absl::StatusOr<Buffer::InstancePtr> read_result) mutable {
-        ASSERT(dispatcher()->isThreadSafe());
-        cancel_action_in_flight_ = nullptr;
-        if (!read_result.ok() || read_result.value()->length() != header_block_.trailerSize()) {
-          invalidateCacheEntry();
-          // There is no failure response for getTrailers, so we just
-          // say there were no trailers in the event of this failure.
-          std::move(cb)(Http::ResponseTrailerMapImpl::create());
-          return;
+        if (read_result.value()->length() != header_block_.trailerSize()) {
+          return done(cacheEntryInvalidStatus());
         }
-        CacheFileTrailer trailer;
-        trailer.ParseFromString(read_result.value()->toString());
-        std::move(cb)(trailersFromTrailerProto(trailer));
+        auto trailer_proto = makeCacheFileTrailerProto(*read_result.value());
+        result_.response_trailers_ = trailersFromTrailerProto(trailer_proto);
+        getHeaders();
       });
   ASSERT(queued.ok(), queued.status().ToString());
-  cancel_action_in_flight_ = std::move(queued.value());
-}
-
-void FileLookupContext::onDestroy() {
-  if (cancel_action_in_flight_) {
-    std::move(cancel_action_in_flight_)();
-    cancel_action_in_flight_ = nullptr;
-  }
-  if (file_handle_) {
-    auto status = file_handle_->close(nullptr, [](absl::Status) {});
-    ASSERT(status.ok(), status.status().ToString());
-    file_handle_ = nullptr;
-  }
 }
 
 } // namespace FileSystemHttpCache
diff --git a/source/extensions/http/cache/file_system_http_cache/lookup_context.h b/source/extensions/http/cache/file_system_http_cache/lookup_context.h
index f2454fba5bc41..a9642279c7474 100644
--- a/source/extensions/http/cache/file_system_http_cache/lookup_context.h
+++ b/source/extensions/http/cache/file_system_http_cache/lookup_context.h
@@ -12,85 +12,31 @@ namespace HttpFilters {
 namespace Cache {
 namespace FileSystemHttpCache {
 
+class CacheSession;
 class FileSystemHttpCache;
 
 using Envoy::Extensions::Common::AsyncFiles::AsyncFileHandle;
-using Envoy::Extensions::Common::AsyncFiles::CancelFunction;
 
-class FileLookupContext : public LookupContext {
+class FileLookupContext {
 public:
-  FileLookupContext(Event::Dispatcher& dispatcher, FileSystemHttpCache& cache,
-                    LookupRequest&& lookup)
-      : dispatcher_(dispatcher), cache_(cache), key_(lookup.key()), lookup_(std::move(lookup)) {}
-
-  // From LookupContext
-  void getHeaders(LookupHeadersCallback&& cb) final;
-  void getBody(const AdjustedByteRange& range, LookupBodyCallback&& cb) final;
-  void getTrailers(LookupTrailersCallback&& cb) final;
-  void onDestroy() final;
-  // This shouldn't be necessary since onDestroy is supposed to always be called, but in some
-  // tests it is not.
-  ~FileLookupContext() override { onDestroy(); }
-
-  const LookupRequest& lookup() const { return lookup_; }
-  const Key& key() const { return key_; }
-  bool workInProgress() const;
-  Event::Dispatcher* dispatcher() const { return &dispatcher_; }
+  static void begin(Event::Dispatcher& dispatcher, AsyncFileHandle handle,
+                    HttpCache::LookupCallback&& callback);
 
 private:
-  void tryOpenCacheFile();
-  void doCacheMiss();
-  void doCacheEntryInvalid();
-  void getHeaderBlockFromFile();
-  void getHeadersFromFile();
-  void closeFileAndGetHeadersAgainWithNewVaryKey();
-
-  // In the event that the cache failed to retrieve, remove the cache entry from the
-  // cache so we don't keep repeating the same failure.
-  void invalidateCacheEntry();
-
-  std::string filepath();
+  FileLookupContext(Event::Dispatcher& dispatcher, AsyncFileHandle handle,
+                    HttpCache::LookupCallback&& callback);
+  void getHeaderBlock();
+  void getHeaders();
+  void getTrailers();
+  void done(absl::StatusOr<LookupResult>&& result);
 
   Event::Dispatcher& dispatcher_;
-
-  // We can safely use a reference here, because the shared_ptr to a cache is guaranteed to outlive
-  // all filters that use it.
-  FileSystemHttpCache& cache_;
-
   AsyncFileHandle file_handle_;
-  CancelFunction cancel_action_in_flight_;
   CacheFileFixedBlock header_block_;
-  Key key_;
-
-  LookupHeadersCallback lookup_headers_callback_;
-  const LookupRequest lookup_;
+  HttpCache::LookupCallback callback_;
+  LookupResult result_;
 };
 
-// TODO(ravenblack): A CacheEntryInProgressReader should be implemented to prevent
-// "thundering herd" problem.
-//
-// First the insert needs to be performed not by using the existing request but by
-// issuing its own request[s], otherwise the first client to request a resource could
-// provoke failure for any other clients sharing that data-stream, by closing its
-// request before the cache population is completed.
-//
-// The plan is to make the entire cache insert happen "out of band", and to populate
-// the cache with a CacheEntryInProgress object, allowing clients to stream from it in
-// parallel.
-//
-// This may require intercepting at the initialization of LookupContext to trigger
-// immediate "InProgress" cache insertion for any resource compatible with cache
-// insertion, and the beginning of that out-of-band download - this way the original
-// requester can be a sibling of any subsequent requester, whereas if we waited for
-// the cache filter's insert path to be reached then the process would potentially be
-// much more confusing (because we will never want a stream to be doing the inserting
-// if we have an external task for that, and because there would be a race where two
-// clients could get past the lookup before either creates an InsertContext).
-//
-// The current, early implementation simply allows requests to bypass the cache when
-// the cache entry is in the process of being populated. It is therefore subject to
-// the "thundering herd" problem.
-
 } // namespace FileSystemHttpCache
 } // namespace Cache
 } // namespace HttpFilters
diff --git a/source/extensions/http/cache/file_system_http_cache/stats.h b/source/extensions/http/cache/file_system_http_cache/stats.h
index f136aeb13bc26..23e9aa5f29ab5 100644
--- a/source/extensions/http/cache/file_system_http_cache/stats.h
+++ b/source/extensions/http/cache/file_system_http_cache/stats.h
@@ -30,11 +30,7 @@ namespace FileSystemHttpCache {
   GAUGE(size_limit_bytes, NeverImport)                                                             \
   GAUGE(size_limit_count, NeverImport)                                                             \
   STATNAME(cache)                                                                                  \
-  STATNAME(cache_path)                                                                             \
-  STATNAME(event)                                                                                  \
-  STATNAME(event_type)                                                                             \
-  STATNAME(hit)                                                                                    \
-  STATNAME(miss)
+  STATNAME(cache_path)
 // TODO(ravenblack): Add other stats from DESIGN.md
 
 #define COUNTER_HELPER_(NAME)                                                                      \
@@ -51,31 +47,19 @@ struct CacheStats {
   CacheStats(const CacheStatNames& stat_names, Envoy::Stats::Scope& scope,
              Stats::StatName cache_path)
       : stat_names_(stat_names), prefix_(stat_names_.cache_), cache_path_(cache_path),
-        tags_({{stat_names_.cache_path_, cache_path_}}),
-        tags_hit_(
-            {{stat_names_.cache_path_, cache_path_}, {stat_names_.event_type_, stat_names_.hit_}}),
-        tags_miss_(
-            {{stat_names_.cache_path_, cache_path_}, {stat_names_.event_type_, stat_names_.miss_}})
+        tags_({{stat_names_.cache_path_, cache_path_}})
             ALL_CACHE_STATS(COUNTER_HELPER_, GAUGE_HELPER_, HISTOGRAM_HELPER_, TEXT_READOUT_HELPER_,
-                            STATNAME_HELPER_),
-        cache_hit_(Envoy::Stats::Utility::counterFromStatNames(scope, {prefix_, stat_names.event_},
-                                                               tags_hit_)),
-        cache_miss_(Envoy::Stats::Utility::counterFromStatNames(scope, {prefix_, stat_names.event_},
-                                                                tags_miss_)) {}
+                            STATNAME_HELPER_) {}
 
 private:
   const CacheStatNames& stat_names_;
   const Stats::StatName prefix_;
   const Stats::StatName cache_path_;
   Stats::StatNameTagVector tags_;
-  Stats::StatNameTagVector tags_hit_;
-  Stats::StatNameTagVector tags_miss_;
 
 public:
   ALL_CACHE_STATS(GENERATE_COUNTER_STRUCT, GENERATE_GAUGE_STRUCT, GENERATE_HISTOGRAM_STRUCT,
                   GENERATE_TEXT_READOUT_STRUCT, GENERATE_STATNAME_STRUCT);
-  Stats::Counter& cache_hit_;
-  Stats::Counter& cache_miss_;
 };
 
 CacheStats generateStats(CacheStatNames& stat_names, Stats::Scope& scope,
diff --git a/source/extensions/http/cache/simple_http_cache/BUILD b/source/extensions/http/cache/simple_http_cache/BUILD
index 5668d611573e4..0ba4b5562ac24 100644
--- a/source/extensions/http/cache/simple_http_cache/BUILD
+++ b/source/extensions/http/cache/simple_http_cache/BUILD
@@ -22,8 +22,8 @@ envoy_cc_extension(
         "//source/common/http:header_map_lib",
         "//source/common/http:headers_lib",
         "//source/common/protobuf",
+        "//source/extensions/filters/http/cache:cache_sessions_impl_lib",
         "//source/extensions/filters/http/cache:http_cache_lib",
-        "//source/extensions/filters/http/common:pass_through_filter_lib",
         "@envoy_api//envoy/extensions/http/cache/simple_http_cache/v3:pkg_cc_proto",
     ],
 )
diff --git a/source/extensions/http/cache/simple_http_cache/simple_http_cache.cc b/source/extensions/http/cache/simple_http_cache/simple_http_cache.cc
index 15070529c6232..7a8eddc549057 100644
--- a/source/extensions/http/cache/simple_http_cache/simple_http_cache.cc
+++ b/source/extensions/http/cache/simple_http_cache/simple_http_cache.cc
@@ -5,6 +5,7 @@
 
 #include "source/common/buffer/buffer_impl.h"
 #include "source/common/http/header_map_impl.h"
+#include "source/extensions/filters/http/cache/cache_sessions.h"
 
 namespace Envoy {
 namespace Extensions {
@@ -12,314 +13,151 @@ namespace HttpFilters {
 namespace Cache {
 namespace {
 
-// Returns a Key with the vary header added to custom_fields.
-// It is an error to call this with headers that don't include vary.
-// Returns nullopt if the vary headers in the response are not
-// compatible with the VaryAllowList in the LookupRequest.
-absl::optional<Key> variedRequestKey(const LookupRequest& request,
-                                     const Http::ResponseHeaderMap& response_headers) {
-  absl::btree_set<absl::string_view> vary_header_values =
-      VaryHeaderUtils::getVaryValues(response_headers);
-  ASSERT(!vary_header_values.empty());
-  const absl::optional<std::string> vary_identifier = VaryHeaderUtils::createVaryIdentifier(
-      request.varyAllowList(), vary_header_values, request.requestHeaders());
-  if (!vary_identifier.has_value()) {
-    return absl::nullopt;
-  }
-  Key varied_request_key = request.key();
-  varied_request_key.add_custom_fields(vary_identifier.value());
-  return varied_request_key;
-}
-
-class SimpleLookupContext : public LookupContext {
-public:
-  SimpleLookupContext(Event::Dispatcher& dispatcher, SimpleHttpCache& cache,
-                      LookupRequest&& request)
-      : dispatcher_(dispatcher), cache_(cache), request_(std::move(request)) {}
-
-  void getHeaders(LookupHeadersCallback&& cb) override {
-    auto entry = cache_.lookup(request_);
-    body_ = std::move(entry.body_);
-    trailers_ = std::move(entry.trailers_);
-    LookupResult result = entry.response_headers_
-                              ? request_.makeLookupResult(std::move(entry.response_headers_),
-                                                          std::move(entry.metadata_), body_.size())
-                              : LookupResult{};
-    bool end_stream = body_.empty() && trailers_ == nullptr;
-    dispatcher_.post([result = std::move(result), cb = std::move(cb), end_stream,
-                      cancelled = cancelled_]() mutable {
-      if (!*cancelled) {
-        std::move(cb)(std::move(result), end_stream);
-      }
-    });
-  }
-
-  void getBody(const AdjustedByteRange& range, LookupBodyCallback&& cb) override {
-    ASSERT(range.end() <= body_.length(), "Attempt to read past end of body.");
-    auto result = std::make_unique<Buffer::OwnedImpl>(&body_[range.begin()], range.length());
-    bool end_stream = trailers_ == nullptr && range.end() == body_.length();
-    dispatcher_.post([result = std::move(result), cb = std::move(cb), end_stream,
-                      cancelled = cancelled_]() mutable {
-      if (!*cancelled) {
-        std::move(cb)(std::move(result), end_stream);
-      }
-    });
-  }
+constexpr absl::string_view Name = "envoy.extensions.http.cache.simple";
 
-  // The cache must call cb with the cached trailers.
-  void getTrailers(LookupTrailersCallback&& cb) override {
-    ASSERT(trailers_);
-    dispatcher_.post(
-        [cb = std::move(cb), trailers = std::move(trailers_), cancelled = cancelled_]() mutable {
-          if (!*cancelled) {
-            std::move(cb)(std::move(trailers));
-          }
-        });
-  }
+constexpr uint64_t InsertReadChunkSize = 512 * 1024;
 
-  const LookupRequest& request() const { return request_; }
-  void onDestroy() override { *cancelled_ = true; }
-  Event::Dispatcher& dispatcher() const { return dispatcher_; }
+class InsertContext {
+public:
+  static void start(std::shared_ptr<SimpleHttpCache::Entry> entry,
+                    std::shared_ptr<CacheProgressReceiver> progress_receiver, HttpSourcePtr source);
 
 private:
-  Event::Dispatcher& dispatcher_;
-  std::shared_ptr<bool> cancelled_ = std::make_shared<bool>(false);
-  SimpleHttpCache& cache_;
-  const LookupRequest request_;
-  std::string body_;
-  Http::ResponseTrailerMapPtr trailers_;
+  InsertContext(std::shared_ptr<SimpleHttpCache::Entry> entry,
+                std::shared_ptr<CacheProgressReceiver> progress_receiver, HttpSourcePtr source);
+  void onBody(AdjustedByteRange range, Buffer::InstancePtr buffer, EndStream end_stream);
+  void onTrailers(Http::ResponseTrailerMapPtr trailers, EndStream end_stream);
+  std::shared_ptr<SimpleHttpCache::Entry> entry_;
+  std::shared_ptr<CacheProgressReceiver> progress_receiver_;
+  HttpSourcePtr source_;
 };
 
-class SimpleInsertContext : public InsertContext {
+class SimpleHttpCacheReader : public CacheReader {
 public:
-  SimpleInsertContext(SimpleLookupContext& lookup_context, SimpleHttpCache& cache)
-      : dispatcher_(lookup_context.dispatcher()), key_(lookup_context.request().key()),
-        request_headers_(lookup_context.request().requestHeaders()),
-        vary_allow_list_(lookup_context.request().varyAllowList()), cache_(cache) {}
-
-  void post(InsertCallback cb, bool result) {
-    dispatcher_.post([cb = std::move(cb), result = result, cancelled = cancelled_]() mutable {
-      if (!*cancelled) {
-        std::move(cb)(result);
-      }
-    });
-  }
-
-  void insertHeaders(const Http::ResponseHeaderMap& response_headers,
-                     const ResponseMetadata& metadata, InsertCallback insert_success,
-                     bool end_stream) override {
-    ASSERT(!committed_);
-    response_headers_ = Http::createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers);
-    metadata_ = metadata;
-    if (end_stream) {
-      post(std::move(insert_success), commit());
-    } else {
-      post(std::move(insert_success), true);
-    }
-  }
-
-  void insertBody(const Buffer::Instance& chunk, InsertCallback ready_for_next_chunk,
-                  bool end_stream) override {
-    ASSERT(!committed_);
-    ASSERT(ready_for_next_chunk || end_stream);
-
-    body_.add(chunk);
-    if (end_stream) {
-      post(std::move(ready_for_next_chunk), commit());
-    } else {
-      post(std::move(ready_for_next_chunk), true);
-    }
-  }
-
-  void insertTrailers(const Http::ResponseTrailerMap& trailers,
-                      InsertCallback insert_complete) override {
-    ASSERT(!committed_);
-    trailers_ = Http::createHeaderMap<Http::ResponseTrailerMapImpl>(trailers);
-    post(std::move(insert_complete), commit());
-  }
-
-  void onDestroy() override { *cancelled_ = true; }
+  SimpleHttpCacheReader(std::shared_ptr<SimpleHttpCache::Entry> entry) : entry_(std::move(entry)) {}
+  void getBody(Event::Dispatcher& dispatcher, AdjustedByteRange range,
+               GetBodyCallback&& cb) override;
 
 private:
-  bool commit() {
-    committed_ = true;
-    if (VaryHeaderUtils::hasVary(*response_headers_)) {
-      return cache_.varyInsert(key_, std::move(response_headers_), std::move(metadata_),
-                               body_.toString(), request_headers_, vary_allow_list_,
-                               std::move(trailers_));
-    } else {
-      return cache_.insert(key_, std::move(response_headers_), std::move(metadata_),
-                           body_.toString(), std::move(trailers_));
-    }
-  }
-
-  Event::Dispatcher& dispatcher_;
-  std::shared_ptr<bool> cancelled_ = std::make_shared<bool>(false);
-  Key key_;
-  const Http::RequestHeaderMap& request_headers_;
-  const VaryAllowList& vary_allow_list_;
-  Http::ResponseHeaderMapPtr response_headers_;
-  ResponseMetadata metadata_;
-  SimpleHttpCache& cache_;
-  Buffer::OwnedImpl body_;
-  bool committed_ = false;
-  Http::ResponseTrailerMapPtr trailers_;
+  std::shared_ptr<SimpleHttpCache::Entry> entry_;
 };
-} // namespace
 
-LookupContextPtr SimpleHttpCache::makeLookupContext(LookupRequest&& request,
-                                                    Http::StreamFilterCallbacks& callbacks) {
-  return std::make_unique<SimpleLookupContext>(callbacks.dispatcher(), *this, std::move(request));
+void SimpleHttpCacheReader::getBody(Event::Dispatcher&, AdjustedByteRange range,
+                                    GetBodyCallback&& cb) {
+  cb(entry_->body(std::move(range)), EndStream::More);
 }
 
-void SimpleHttpCache::updateHeaders(const LookupContext& lookup_context,
-                                    const Http::ResponseHeaderMap& response_headers,
-                                    const ResponseMetadata& metadata,
-                                    UpdateHeadersCallback on_complete) {
-  const auto& simple_lookup_context = static_cast<const SimpleLookupContext&>(lookup_context);
-  const Key& key = simple_lookup_context.request().key();
-  absl::WriterMutexLock lock(&mutex_);
-  auto iter = map_.find(key);
-  auto post_complete = [on_complete = std::move(on_complete),
-                        &dispatcher = simple_lookup_context.dispatcher()](bool result) mutable {
-    dispatcher.post([on_complete = std::move(on_complete), result]() mutable {
-      std::move(on_complete)(result);
-    });
-  };
-  if (iter == map_.end() || !iter->second.response_headers_) {
-    std::move(post_complete)(false);
+void InsertContext::start(std::shared_ptr<SimpleHttpCache::Entry> entry,
+                          std::shared_ptr<CacheProgressReceiver> progress_receiver,
+                          HttpSourcePtr source) {
+  auto ctx = new InsertContext(std::move(entry), std::move(progress_receiver), std::move(source));
+  ctx->source_->getBody(AdjustedByteRange(0, InsertReadChunkSize), [ctx](Buffer::InstancePtr buffer,
+                                                                         EndStream end_stream) {
+    ctx->onBody(AdjustedByteRange(0, InsertReadChunkSize), std::move(buffer), end_stream);
+  });
+}
+
+InsertContext::InsertContext(std::shared_ptr<SimpleHttpCache::Entry> entry,
+                             std::shared_ptr<CacheProgressReceiver> progress_receiver,
+                             HttpSourcePtr source)
+    : entry_(std::move(entry)), progress_receiver_(std::move(progress_receiver)),
+      source_(std::move(source)) {}
+
+void InsertContext::onBody(AdjustedByteRange range, Buffer::InstancePtr buffer,
+                           EndStream end_stream) {
+  if (end_stream == EndStream::Reset) {
+    progress_receiver_->onInsertFailed(absl::UnavailableError("upstream reset"));
+    delete this;
     return;
   }
-  if (VaryHeaderUtils::hasVary(*iter->second.response_headers_)) {
-    absl::optional<Key> varied_key =
-        variedRequestKey(simple_lookup_context.request(), *iter->second.response_headers_);
-    if (!varied_key.has_value()) {
-      std::move(post_complete)(false);
-      return;
-    }
-    iter = map_.find(varied_key.value());
-    if (iter == map_.end() || !iter->second.response_headers_) {
-      std::move(post_complete)(false);
-      return;
-    }
+  if (end_stream == EndStream::End) {
+    entry_->setEndStreamAfterBody();
+  }
+  if (buffer) {
+    ASSERT(range.length() >= buffer->length());
+    range = AdjustedByteRange(range.begin(), range.begin() + buffer->length());
+    entry_->appendBody(std::move(buffer));
+  } else if (end_stream == EndStream::More) {
+    // Neither buffer nor EndStream::End means we want trailers.
+    return source_->getTrailers([this](Http::ResponseTrailerMapPtr trailers, EndStream end_stream) {
+      onTrailers(std::move(trailers), end_stream);
+    });
+  } else {
+    range = AdjustedByteRange(0, entry_->bodySize());
   }
-  Entry& entry = iter->second;
-
-  applyHeaderUpdate(response_headers, *entry.response_headers_);
-  entry.metadata_ = metadata;
-  std::move(post_complete)(true);
-}
-
-SimpleHttpCache::Entry SimpleHttpCache::lookup(const LookupRequest& request) {
-  absl::ReaderMutexLock lock(&mutex_);
-  auto iter = map_.find(request.key());
-  if (iter == map_.end()) {
-    return Entry{};
+  progress_receiver_->onBodyInserted(std::move(range), end_stream == EndStream::End);
+  if (end_stream != EndStream::End) {
+    AdjustedByteRange next_range(range.end(), range.end() + InsertReadChunkSize);
+    return source_->getBody(next_range,
+                            [this, next_range](Buffer::InstancePtr buffer, EndStream end_stream) {
+                              onBody(next_range, std::move(buffer), end_stream);
+                            });
   }
-  ASSERT(iter->second.response_headers_);
+  delete this;
+}
 
-  if (VaryHeaderUtils::hasVary(*iter->second.response_headers_)) {
-    return varyLookup(request, iter->second.response_headers_);
+void InsertContext::onTrailers(Http::ResponseTrailerMapPtr trailers, EndStream end_stream) {
+  if (end_stream == EndStream::Reset) {
+    progress_receiver_->onInsertFailed(absl::UnavailableError("upstream reset during trailers"));
   } else {
-    Http::ResponseTrailerMapPtr trailers_map;
-    if (iter->second.trailers_) {
-      trailers_map = Http::createHeaderMap<Http::ResponseTrailerMapImpl>(*iter->second.trailers_);
-    }
-    return SimpleHttpCache::Entry{
-        Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*iter->second.response_headers_),
-        iter->second.metadata_, iter->second.body_, std::move(trailers_map)};
+    entry_->setTrailers(std::move(trailers));
+    progress_receiver_->onTrailersInserted(entry_->copyTrailers());
   }
+  delete this;
 }
 
-bool SimpleHttpCache::insert(const Key& key, Http::ResponseHeaderMapPtr&& response_headers,
-                             ResponseMetadata&& metadata, std::string&& body,
-                             Http::ResponseTrailerMapPtr&& trailers) {
-  absl::WriterMutexLock lock(&mutex_);
-  map_[key] = SimpleHttpCache::Entry{std::move(response_headers), std::move(metadata),
-                                     std::move(body), std::move(trailers)};
-  return true;
-}
+} // namespace
 
-SimpleHttpCache::Entry
-SimpleHttpCache::varyLookup(const LookupRequest& request,
-                            const Http::ResponseHeaderMapPtr& response_headers) {
-  // This method should be called from lookup, which holds the mutex for reading.
-  mutex_.AssertReaderHeld();
+Buffer::InstancePtr SimpleHttpCache::Entry::body(AdjustedByteRange range) const {
+  absl::ReaderMutexLock lock(&mu_);
+  return std::make_unique<Buffer::OwnedImpl>(
+      absl::string_view{body_}.substr(range.begin(), range.length()));
+}
 
-  absl::optional<Key> varied_key = variedRequestKey(request, *response_headers);
-  if (!varied_key.has_value()) {
-    return SimpleHttpCache::Entry{};
-  }
-  Key& varied_request_key = varied_key.value();
+void SimpleHttpCache::Entry::appendBody(Buffer::InstancePtr buf) {
+  absl::WriterMutexLock lock(&mu_);
+  body_ += buf->toString();
+}
 
-  auto iter = map_.find(varied_request_key);
-  if (iter == map_.end()) {
-    return SimpleHttpCache::Entry{};
-  }
-  ASSERT(iter->second.response_headers_);
-  Http::ResponseTrailerMapPtr trailers_map;
-  if (iter->second.trailers_) {
-    trailers_map = Http::createHeaderMap<Http::ResponseTrailerMapImpl>(*iter->second.trailers_);
-  }
+uint64_t SimpleHttpCache::Entry::bodySize() const {
+  absl::ReaderMutexLock lock(&mu_);
+  return body_.size();
+}
 
-  return SimpleHttpCache::Entry{
-      Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*iter->second.response_headers_),
-      iter->second.metadata_, iter->second.body_, std::move(trailers_map)};
+Http::ResponseHeaderMapPtr SimpleHttpCache::Entry::copyHeaders() const {
+  absl::ReaderMutexLock lock(&mu_);
+  return Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*response_headers_);
 }
 
-bool SimpleHttpCache::varyInsert(const Key& request_key,
-                                 Http::ResponseHeaderMapPtr&& response_headers,
-                                 ResponseMetadata&& metadata, std::string&& body,
-                                 const Http::RequestHeaderMap& request_headers,
-                                 const VaryAllowList& vary_allow_list,
-                                 Http::ResponseTrailerMapPtr&& trailers) {
-  absl::WriterMutexLock lock(&mutex_);
-
-  absl::btree_set<absl::string_view> vary_header_values =
-      VaryHeaderUtils::getVaryValues(*response_headers);
-  ASSERT(!vary_header_values.empty());
-
-  // Insert the varied response.
-  Key varied_request_key = request_key;
-  const absl::optional<std::string> vary_identifier =
-      VaryHeaderUtils::createVaryIdentifier(vary_allow_list, vary_header_values, request_headers);
-  if (!vary_identifier.has_value()) {
-    // Skip the insert if we are unable to create a vary key.
-    return false;
+Http::ResponseTrailerMapPtr SimpleHttpCache::Entry::copyTrailers() const {
+  absl::ReaderMutexLock lock(&mu_);
+  if (!trailers_) {
+    return nullptr;
   }
+  return Http::createHeaderMap<Http::ResponseTrailerMapImpl>(*trailers_);
+}
 
-  varied_request_key.add_custom_fields(vary_identifier.value());
-  map_[varied_request_key] = SimpleHttpCache::Entry{
-      std::move(response_headers), std::move(metadata), std::move(body), std::move(trailers)};
-
-  // Add a special entry to flag that this request generates varied responses.
-  auto iter = map_.find(request_key);
-  if (iter == map_.end()) {
-    Envoy::Http::ResponseHeaderMapPtr vary_only_map =
-        Envoy::Http::createHeaderMap<Envoy::Http::ResponseHeaderMapImpl>({});
-    vary_only_map->setCopy(Envoy::Http::CustomHeaders::get().Vary,
-                           absl::StrJoin(vary_header_values, ","));
-    // TODO(cbdm): In a cache that evicts entries, we could maintain a list of the "varykey"s that
-    // we have inserted as the body for this first lookup. This way, we would know which keys we
-    // have inserted for that resource. For the first entry simply use vary_identifier as the
-    // entry_list; for future entries append vary_identifier to existing list.
-    std::string entry_list;
-    map_[request_key] =
-        SimpleHttpCache::Entry{std::move(vary_only_map), {}, std::move(entry_list), {}};
-  }
-  return true;
+ResponseMetadata SimpleHttpCache::Entry::metadata() const {
+  absl::ReaderMutexLock lock(&mu_);
+  return metadata_;
 }
 
-InsertContextPtr SimpleHttpCache::makeInsertContext(LookupContextPtr&& lookup_context,
-                                                    Http::StreamFilterCallbacks&) {
-  ASSERT(lookup_context != nullptr);
-  auto ret = std::make_unique<SimpleInsertContext>(
-      dynamic_cast<SimpleLookupContext&>(*lookup_context), *this);
-  lookup_context->onDestroy();
-  return ret;
+void SimpleHttpCache::Entry::updateHeadersAndMetadata(Http::ResponseHeaderMapPtr response_headers,
+                                                      ResponseMetadata metadata) {
+  absl::WriterMutexLock lock(&mu_);
+  response_headers_ = std::move(response_headers);
+  metadata_ = std::move(metadata);
 }
 
-constexpr absl::string_view Name = "envoy.extensions.http.cache.simple";
+void SimpleHttpCache::Entry::setTrailers(Http::ResponseTrailerMapPtr trailers) {
+  absl::WriterMutexLock lock(&mu_);
+  trailers_ = std::move(trailers);
+}
+
+void SimpleHttpCache::Entry::setEndStreamAfterBody() {
+  absl::WriterMutexLock lock(&mu_);
+  end_stream_after_body_ = true;
+}
 
 CacheInfo SimpleHttpCache::cacheInfo() const {
   CacheInfo cache_info;
@@ -327,6 +165,57 @@ CacheInfo SimpleHttpCache::cacheInfo() const {
   return cache_info;
 }
 
+void SimpleHttpCache::lookup(LookupRequest&& request, LookupCallback&& callback) {
+  LookupResult result;
+  {
+    absl::ReaderMutexLock lock(&mu_);
+    auto it = entries_.find(request.key());
+    if (it != entries_.end()) {
+      result.cache_reader_ = std::make_unique<SimpleHttpCacheReader>(it->second);
+      result.response_headers_ = it->second->copyHeaders();
+      result.response_metadata_ = it->second->metadata();
+      result.response_trailers_ = it->second->copyTrailers();
+      result.body_length_ = it->second->bodySize();
+    }
+  }
+  callback(std::move(result));
+}
+
+void SimpleHttpCache::evict(Event::Dispatcher&, const Key& key) {
+  absl::WriterMutexLock lock(&mu_);
+  entries_.erase(key);
+}
+
+void SimpleHttpCache::updateHeaders(Event::Dispatcher&, const Key& key,
+                                    const Http::ResponseHeaderMap& updated_headers,
+                                    const ResponseMetadata& updated_metadata) {
+  absl::WriterMutexLock lock(&mu_);
+  auto it = entries_.find(key);
+  if (it == entries_.end()) {
+    return;
+  }
+  it->second->updateHeadersAndMetadata(
+      Http::createHeaderMap<Http::ResponseHeaderMapImpl>(updated_headers), updated_metadata);
+}
+
+void SimpleHttpCache::insert(Event::Dispatcher&, Key key, Http::ResponseHeaderMapPtr headers,
+                             ResponseMetadata metadata, HttpSourcePtr source,
+                             std::shared_ptr<CacheProgressReceiver> progress) {
+  auto entry = std::make_shared<Entry>(Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*headers),
+                                       std::move(metadata));
+  {
+    absl::WriterMutexLock lock(&mu_);
+    entries_.emplace(key, entry);
+  }
+  if (source) {
+    progress->onHeadersInserted(std::make_unique<SimpleHttpCacheReader>(entry), std::move(headers),
+                                false);
+    InsertContext::start(entry, std::move(progress), std::move(source));
+  } else {
+    progress->onHeadersInserted(nullptr, std::move(headers), true);
+  }
+}
+
 SINGLETON_MANAGER_REGISTRATION(simple_http_cache_singleton);
 
 class SimpleHttpCacheFactory : public HttpCacheFactory {
@@ -339,17 +228,16 @@ class SimpleHttpCacheFactory : public HttpCacheFactory {
         envoy::extensions::http::cache::simple_http_cache::v3::SimpleHttpCacheConfig>();
   }
   // From HttpCacheFactory
-  std::shared_ptr<HttpCache>
+  std::shared_ptr<CacheSessions>
   getCache(const envoy::extensions::filters::http::cache::v3::CacheConfig&,
            Server::Configuration::FactoryContext& context) override {
-    return context.serverFactoryContext().singletonManager().getTyped<SimpleHttpCache>(
-        SINGLETON_MANAGER_REGISTERED_NAME(simple_http_cache_singleton), &createCache);
+    return context.serverFactoryContext().singletonManager().getTyped<CacheSessions>(
+        SINGLETON_MANAGER_REGISTERED_NAME(simple_http_cache_singleton), [&context]() {
+          return CacheSessions::create(context, std::make_unique<SimpleHttpCache>());
+        });
   }
 
 private:
-  static std::shared_ptr<Singleton::Instance> createCache() {
-    return std::make_shared<SimpleHttpCache>();
-  }
 };
 
 static Registry::RegisterFactory<SimpleHttpCacheFactory, HttpCacheFactory> register_;
diff --git a/source/extensions/http/cache/simple_http_cache/simple_http_cache.h b/source/extensions/http/cache/simple_http_cache/simple_http_cache.h
index 3acb936ac6874..903e4b7f5a3bb 100644
--- a/source/extensions/http/cache/simple_http_cache/simple_http_cache.h
+++ b/source/extensions/http/cache/simple_http_cache/simple_http_cache.h
@@ -13,49 +13,49 @@ namespace HttpFilters {
 namespace Cache {
 
 // Example cache backend that never evicts. Not suitable for production use.
-class SimpleHttpCache : public HttpCache, public Singleton::Instance {
-private:
-  struct Entry {
-    Http::ResponseHeaderMapPtr response_headers_;
-    ResponseMetadata metadata_;
-    std::string body_;
+class SimpleHttpCache : public HttpCache {
+public:
+  class Entry {
+  public:
+    Entry(Http::ResponseHeaderMapPtr response_headers, ResponseMetadata metadata)
+        : response_headers_(std::move(response_headers)), metadata_(std::move(metadata)) {}
+    Buffer::InstancePtr body(AdjustedByteRange range) const;
+    void appendBody(Buffer::InstancePtr buf);
+    uint64_t bodySize() const;
+    Http::ResponseHeaderMapPtr copyHeaders() const;
+    Http::ResponseTrailerMapPtr copyTrailers() const;
+    ResponseMetadata metadata() const;
+    void updateHeadersAndMetadata(Http::ResponseHeaderMapPtr response_headers,
+                                  ResponseMetadata metadata);
+    void setTrailers(Http::ResponseTrailerMapPtr trailers);
+    void setEndStreamAfterBody();
+
+  private:
+    mutable absl::Mutex mu_;
+    // Body can be being written to while being read from, so mutex guarded.
+    std::string body_ ABSL_GUARDED_BY(mu_);
+    Http::ResponseHeaderMapPtr response_headers_ ABSL_GUARDED_BY(mu_);
+    ResponseMetadata metadata_ ABSL_GUARDED_BY(mu_);
+    bool end_stream_after_body_{false};
     Http::ResponseTrailerMapPtr trailers_;
   };
 
-  // Looks for a response that has been varied. Only called from lookup.
-  Entry varyLookup(const LookupRequest& request,
-                   const Http::ResponseHeaderMapPtr& response_headers);
-
-  // A list of headers that we do not want to update upon validation
-  // We skip these headers because either it's updated by other application logic
-  // or they are fall into categories defined in the IETF doc below
-  // https://www.ietf.org/archive/id/draft-ietf-httpbis-cache-18.html s3.2
-  static const absl::flat_hash_set<Http::LowerCaseString> headersNotToUpdate();
-
-public:
   // HttpCache
-  LookupContextPtr makeLookupContext(LookupRequest&& request,
-                                     Http::StreamFilterCallbacks& callbacks) override;
-  InsertContextPtr makeInsertContext(LookupContextPtr&& lookup_context,
-                                     Http::StreamFilterCallbacks& callbacks) override;
-  void updateHeaders(const LookupContext& lookup_context,
-                     const Http::ResponseHeaderMap& response_headers,
-                     const ResponseMetadata& metadata, UpdateHeadersCallback on_complete) override;
   CacheInfo cacheInfo() const override;
+  void lookup(LookupRequest&& request, LookupCallback&& callback) override;
+  void evict(Event::Dispatcher& dispatcher, const Key& key) override;
+  // Touch is to influence expiry, this implementation has no expiry.
+  void touch(const Key&, SystemTime) override {}
+  void updateHeaders(Event::Dispatcher& dispatcher, const Key& key,
+                     const Http::ResponseHeaderMap& updated_headers,
+                     const ResponseMetadata& updated_metadata) override;
+  void insert(Event::Dispatcher& dispatcher, Key key, Http::ResponseHeaderMapPtr headers,
+              ResponseMetadata metadata, HttpSourcePtr source,
+              std::shared_ptr<CacheProgressReceiver> progress) override;
 
-  Entry lookup(const LookupRequest& request);
-  bool insert(const Key& key, Http::ResponseHeaderMapPtr&& response_headers,
-              ResponseMetadata&& metadata, std::string&& body,
-              Http::ResponseTrailerMapPtr&& trailers);
-
-  // Inserts a response that has been varied on certain headers.
-  bool varyInsert(const Key& request_key, Http::ResponseHeaderMapPtr&& response_headers,
-                  ResponseMetadata&& metadata, std::string&& body,
-                  const Http::RequestHeaderMap& request_headers,
-                  const VaryAllowList& vary_allow_list, Http::ResponseTrailerMapPtr&& trailers);
-
-  absl::Mutex mutex_;
-  absl::flat_hash_map<Key, Entry, MessageUtil, MessageUtil> map_ ABSL_GUARDED_BY(mutex_);
+  absl::Mutex mu_;
+  absl::flat_hash_map<Key, std::shared_ptr<Entry>, MessageUtil, MessageUtil>
+      entries_ ABSL_GUARDED_BY(mu_);
 };
 
 } // namespace Cache
diff --git a/test/extensions/common/async_files/mocks.h b/test/extensions/common/async_files/mocks.h
index 20f8cb317fba4..fd0347229ed22 100644
--- a/test/extensions/common/async_files/mocks.h
+++ b/test/extensions/common/async_files/mocks.h
@@ -103,8 +103,8 @@ class MockAsyncFileManager : public Extensions::Common::AsyncFiles::AsyncFileMan
     auto action =
         dynamic_cast<TypedMockAsyncFileAction<absl::AnyInvocable<void(T)>>*>(entry.action_.get());
     ASSERT_TRUE(action != nullptr)
-        << "mismatched type for nextActionCompletes: action is " << action->describe()
-        << ", nextActionCompletes was given " << typeid(T).name();
+        << "mismatched type for nextActionCompletes: nextActionCompletes was given "
+        << typeid(T).name();
     if (entry.dispatcher_) {
       entry.dispatcher_->post([action = std::move(entry.action_), state = std::move(entry.state_),
                                result = std::move(result)]() mutable {
diff --git a/test/extensions/filters/http/cache/BUILD b/test/extensions/filters/http/cache/BUILD
index 9cce4811ed0ed..2da7848b88d25 100644
--- a/test/extensions/filters/http/cache/BUILD
+++ b/test/extensions/filters/http/cache/BUILD
@@ -11,9 +11,14 @@ envoy_package()
 
 envoy_cc_test_library(
     name = "mocks",
+    srcs = ["mocks.cc"],
     hdrs = ["mocks.h"],
     deps = [
+        "//source/extensions/filters/http/cache:cache_sessions_lib",
         "//source/extensions/filters/http/cache:http_cache_lib",
+        "//source/extensions/filters/http/cache:http_source_interface",
+        "//source/extensions/filters/http/cache:stats",
+        "//test/test_common:printers_lib",
     ],
 )
 
@@ -33,12 +38,13 @@ envoy_extension_cc_test(
 )
 
 envoy_extension_cc_test(
-    name = "cache_filter_logging_info_test",
-    srcs = ["cache_filter_logging_info_test.cc"],
+    name = "stats_test",
+    srcs = ["stats_test.cc"],
     extension_names = ["envoy.filters.http.cache"],
     rbe_pool = "6gig",
     deps = [
-        "//source/extensions/filters/http/cache:cache_filter_logging_info_lib",
+        "//source/extensions/filters/http/cache:stats",
+        "//test/mocks/server:factory_context_mocks",
     ],
 )
 
@@ -59,12 +65,6 @@ envoy_extension_cc_test(
     rbe_pool = "6gig",
     deps = [
         "//source/extensions/filters/http/cache:http_cache_lib",
-        "//source/extensions/http/cache/simple_http_cache:config",
-        "//test/mocks/http:http_mocks",
-        "//test/mocks/server:factory_context_mocks",
-        "//test/test_common:simulated_time_system_lib",
-        "//test/test_common:test_runtime_lib",
-        "//test/test_common:utility_lib",
     ],
 )
 
@@ -79,6 +79,19 @@ envoy_extension_cc_test(
     ],
 )
 
+envoy_extension_cc_test(
+    name = "upstream_request_test",
+    srcs = ["upstream_request_test.cc"],
+    extension_names = ["envoy.filters.http.cache"],
+    rbe_pool = "6gig",
+    deps = [
+        ":mocks",
+        "//source/extensions/filters/http/cache:upstream_request_lib",
+        "//test/mocks/http:http_mocks",
+        "//test/test_common:utility_lib",
+    ],
+)
+
 envoy_extension_cc_test(
     name = "cache_filter_test",
     srcs = ["cache_filter_test.cc"],
@@ -87,8 +100,7 @@ envoy_extension_cc_test(
     deps = [
         ":mocks",
         "//source/extensions/filters/http/cache:cache_filter_lib",
-        "//source/extensions/filters/http/cache:cache_filter_logging_info_lib",
-        "//source/extensions/http/cache/simple_http_cache:config",
+        "//test/mocks/buffer:buffer_mocks",
         "//test/mocks/server:factory_context_mocks",
         "//test/test_common:simulated_time_system_lib",
         "//test/test_common:status_utility_lib",
@@ -106,6 +118,21 @@ envoy_extension_cc_test(
     deps = [
         "//source/extensions/filters/http/cache:cacheability_utils_lib",
         "//test/mocks/server:server_factory_context_mocks",
+        "//test/test_common:status_utility_lib",
+        "//test/test_common:utility_lib",
+    ],
+)
+
+envoy_extension_cc_test(
+    name = "cache_sessions_test",
+    srcs = ["cache_sessions_test.cc"],
+    extension_names = ["envoy.filters.http.cache"],
+    rbe_pool = "6gig",
+    deps = [
+        ":mocks",
+        "//source/extensions/filters/http/cache:cache_sessions_impl_lib",
+        "//test/mocks/server:factory_context_mocks",
+        "//test/test_common:status_utility_lib",
         "//test/test_common:utility_lib",
     ],
 )
@@ -133,6 +160,7 @@ envoy_extension_cc_test(
     ],
     extension_names = ["envoy.filters.http.cache"],
     rbe_pool = "6gig",
+    shard_count = 4,
     deps = [
         "//source/extensions/filters/http/cache:config",
         "//source/extensions/filters/http/cache:http_cache_lib",
@@ -161,6 +189,7 @@ envoy_extension_cc_test_library(
     extension_names = ["envoy.filters.http.cache"],
     rbe_pool = "6gig",
     deps = [
+        ":mocks",
         "//source/extensions/filters/http/cache:cache_headers_utils_lib",
         "//source/extensions/filters/http/cache:http_cache_lib",
         "//test/mocks/server:factory_context_mocks",
diff --git a/test/extensions/filters/http/cache/cache_entry_utils_test.cc b/test/extensions/filters/http/cache/cache_entry_utils_test.cc
index 3d4d14331379e..26e5b9d47cbda 100644
--- a/test/extensions/filters/http/cache/cache_entry_utils_test.cc
+++ b/test/extensions/filters/http/cache/cache_entry_utils_test.cc
@@ -11,19 +11,24 @@ namespace Cache {
 namespace {
 
 TEST(Coverage, CacheEntryStatusString) {
-  EXPECT_EQ(cacheEntryStatusString(CacheEntryStatus::Ok), "Ok");
-  EXPECT_EQ(cacheEntryStatusString(CacheEntryStatus::Unusable), "Unusable");
-  EXPECT_EQ(cacheEntryStatusString(CacheEntryStatus::RequiresValidation), "RequiresValidation");
+  EXPECT_EQ(cacheEntryStatusString(CacheEntryStatus::Hit), "Hit");
+  EXPECT_EQ(cacheEntryStatusString(CacheEntryStatus::Follower), "Follower");
+  EXPECT_EQ(cacheEntryStatusString(CacheEntryStatus::Miss), "Miss");
+  EXPECT_EQ(cacheEntryStatusString(CacheEntryStatus::Uncacheable), "Uncacheable");
+  EXPECT_EQ(cacheEntryStatusString(CacheEntryStatus::Validated), "Validated");
+  EXPECT_EQ(cacheEntryStatusString(CacheEntryStatus::ValidatedFree), "ValidatedFree");
+  EXPECT_EQ(cacheEntryStatusString(CacheEntryStatus::FailedValidation), "FailedValidation");
   EXPECT_EQ(cacheEntryStatusString(CacheEntryStatus::FoundNotModified), "FoundNotModified");
   EXPECT_EQ(cacheEntryStatusString(CacheEntryStatus::LookupError), "LookupError");
+  EXPECT_EQ(cacheEntryStatusString(CacheEntryStatus::UpstreamReset), "UpstreamReset");
   EXPECT_ENVOY_BUG(cacheEntryStatusString(static_cast<CacheEntryStatus>(99)),
                    "Unexpected CacheEntryStatus");
 }
 
 TEST(Coverage, CacheEntryStatusStream) {
   std::ostringstream stream;
-  stream << CacheEntryStatus::Ok;
-  EXPECT_EQ(stream.str(), "Ok");
+  stream << CacheEntryStatus::Hit;
+  EXPECT_EQ(stream.str(), "Hit");
 }
 
 TEST(CacheEntryUtils, ApplyHeaderUpdateReplacesMultiValues) {
diff --git a/test/extensions/filters/http/cache/cache_filter_integration_test.cc b/test/extensions/filters/http/cache/cache_filter_integration_test.cc
index 34e318bdd4ccd..515c706afbeb9 100644
--- a/test/extensions/filters/http/cache/cache_filter_integration_test.cc
+++ b/test/extensions/filters/http/cache/cache_filter_integration_test.cc
@@ -1,4 +1,5 @@
 #include <initializer_list>
+#include <iostream>
 #include <optional>
 
 #include "envoy/common/optref.h"
@@ -12,8 +13,17 @@ namespace HttpFilters {
 namespace Cache {
 namespace {
 
+using Http::HeaderValueOf;
+using testing::_;
+using testing::AllOf;
+using testing::Eq;
+using testing::HasSubstr;
+using testing::Not;
+using testing::Pointee;
+using testing::Property;
+
 // TODO(toddmgreer): Expand integration test to include age header values,
-// expiration, range headers, HEAD requests, trailers, config customizations,
+// expiration, HEAD requests, config customizations,
 // cache-control headers, and conditional header fields, as they are
 // implemented.
 
@@ -65,14 +75,22 @@ class CacheIntegrationTest : public Event::TestUsingSimulatedTime,
     return response;
   }
 
+  IntegrationStreamDecoderPtr sendHeaderOnlyRequest(const Http::TestRequestHeaderMapImpl& headers) {
+    IntegrationStreamDecoderPtr response_decoder = codec_client_->makeHeaderOnlyRequest(headers);
+    return response_decoder;
+  }
+
+  void awaitResponse(IntegrationStreamDecoderPtr& response_decoder) {
+    EXPECT_TRUE(response_decoder->waitForEndStream());
+  }
+
   IntegrationStreamDecoderPtr sendHeaderOnlyRequestAwaitResponse(
       const Http::TestRequestHeaderMapImpl& headers,
       std::function<void()> simulate_upstream = []() {}) {
-    IntegrationStreamDecoderPtr response_decoder = codec_client_->makeHeaderOnlyRequest(headers);
+    IntegrationStreamDecoderPtr response_decoder = sendHeaderOnlyRequest(headers);
     simulate_upstream();
     // Wait for the response to be read by the codec client.
-    EXPECT_TRUE(response_decoder->waitForEndStream());
-    EXPECT_TRUE(response_decoder->complete());
+    awaitResponse(response_decoder);
     return response_decoder;
   }
 
@@ -80,10 +98,10 @@ class CacheIntegrationTest : public Event::TestUsingSimulatedTime,
   std::function<void()> simulateUpstreamResponse(
       const Http::TestResponseHeaderMapImpl& headers, OptRef<const std::string> body,
       OptRef<const Http::TestResponseTrailerMapImpl> trailers, bool split_body = false) {
-    return [this, headers = std::move(headers), body = std::move(body),
-            trailers = std::move(trailers), split_body]() {
+    return [this, &headers, body = std::move(body), trailers = std::move(trailers),
+            split_body]() mutable {
       waitForNextUpstreamRequest();
-      upstream_request_->encodeHeaders(headers, /*end_stream=*/!body);
+      upstream_request_->encodeHeaders(headers, /*end_stream=*/!body && !trailers.has_value());
       if (body.has_value()) {
         if (split_body) {
           upstream_request_->encodeData(body.ref().substr(0, body.ref().size() / 2), false);
@@ -110,8 +128,8 @@ class CacheIntegrationTest : public Event::TestUsingSimulatedTime,
            "@type": "type.googleapis.com/envoy.extensions.http.cache.simple_http_cache.v3.SimpleHttpCacheConfig"
     )EOF"};
   DateFormatter formatter_{"%a, %d %b %Y %H:%M:%S GMT"};
-  OptRef<const std::string> empty_body_;
-  OptRef<const Http::TestResponseTrailerMapImpl> empty_trailers_;
+  OptRef<const std::string> no_body_;
+  OptRef<const Http::TestResponseTrailerMapImpl> no_trailers_;
 };
 
 // TODO(#26236): Fix test suite for HTTP/3.
@@ -133,12 +151,12 @@ TEST_P(CacheIntegrationTest, MissInsertHit) {
   // use split_body to cover multipart body responses.
   {
     IntegrationStreamDecoderPtr response_decoder = sendHeaderOnlyRequestAwaitResponse(
-        request_headers, simulateUpstreamResponse(response_headers, makeOptRef(response_body),
-                                                  empty_trailers_, true));
+        request_headers,
+        simulateUpstreamResponse(response_headers, makeOptRef(response_body), no_trailers_, true));
     EXPECT_THAT(response_decoder->headers(), IsSupersetOfHeaders(response_headers));
     EXPECT_EQ(response_decoder->headers().get(Http::CustomHeaders::get().Age).size(), 0);
     EXPECT_EQ(response_decoder->body(), response_body);
-    EXPECT_THAT(waitForAccessLog(access_log_name_), testing::HasSubstr("- via_upstream"));
+    EXPECT_THAT(waitForAccessLog(access_log_name_), HasSubstr("via_upstream"));
   }
 
   // Advance time, to verify the original date header is preserved.
@@ -155,10 +173,207 @@ TEST_P(CacheIntegrationTest, MissInsertHit) {
     // Advance time to force a log flush.
     simTime().advanceTimeWait(Seconds(1));
     EXPECT_THAT(waitForAccessLog(access_log_name_, 1),
-                testing::HasSubstr("RFCF cache.response_from_cache_filter"));
+                HasSubstr("RFCF cache.response_from_cache_filter"));
   }
 }
 
+TEST_P(CacheIntegrationTest, ParallelRequestsShareInsert) {
+  initializeFilter(default_config);
+
+  // Include test name and params in URL to make each test's requests unique.
+  const Http::TestRequestHeaderMapImpl request_headers =
+      httpRequestHeader("GET", /*authority=*/"ParallelRequestsShareInsert");
+  const std::string response_body(42, 'a');
+  Http::TestResponseHeaderMapImpl response_headers = httpResponseHeadersForBody(response_body);
+  // Send three requests.
+  auto codec_client_2 = makeHttpConnection(makeClientConnection((lookupPort("http"))));
+  auto codec_client_3 = makeHttpConnection(makeClientConnection((lookupPort("http"))));
+  IntegrationStreamDecoderPtr response_decoder1 =
+      codec_client_->makeHeaderOnlyRequest(request_headers);
+  IntegrationStreamDecoderPtr response_decoder2 =
+      codec_client_2->makeHeaderOnlyRequest(request_headers);
+  IntegrationStreamDecoderPtr response_decoder3 =
+      codec_client_3->makeHeaderOnlyRequest(request_headers);
+  // Use split_body to cover multipart body responses.
+  simulateUpstreamResponse(response_headers, makeOptRef(response_body), no_trailers_, true)();
+  awaitResponse(response_decoder1);
+  awaitResponse(response_decoder2);
+  awaitResponse(response_decoder3);
+  EXPECT_THAT(response_decoder1->headers(), IsSupersetOfHeaders(response_headers));
+  EXPECT_THAT(response_decoder2->headers(), IsSupersetOfHeaders(response_headers));
+  EXPECT_THAT(response_decoder3->headers(), IsSupersetOfHeaders(response_headers));
+  // Two of the responses should have an age, and one should not.
+  // Which of the requests get the age header depends on the order of
+  // parallel request resolution, which is not relevant to this test.
+  EXPECT_THAT(response_decoder1->headers().get(Http::CustomHeaders::get().Age).size() +
+                  response_decoder2->headers().get(Http::CustomHeaders::get().Age).size() +
+                  response_decoder3->headers().get(Http::CustomHeaders::get().Age).size(),
+              Eq(2));
+  EXPECT_EQ(response_decoder1->body(), response_body);
+  EXPECT_EQ(response_decoder2->body(), response_body);
+  EXPECT_EQ(response_decoder3->body(), response_body);
+  codec_client_2->close();
+  codec_client_3->close();
+  // Advance time to force a log flush.
+  simTime().advanceTimeWait(Seconds(1));
+
+  EXPECT_THAT(waitForAccessLog(access_log_name_, 0, true),
+              HasSubstr("RFCF cache.insert_via_upstream"));
+  EXPECT_THAT(waitForAccessLog(access_log_name_, 1, true),
+              HasSubstr("RFCF cache.response_from_cache_filter"));
+  EXPECT_THAT(waitForAccessLog(access_log_name_, 2, true),
+              HasSubstr("RFCF cache.response_from_cache_filter"));
+}
+
+TEST_P(CacheIntegrationTest, ParallelRangeRequestsShareInsertAndGetDistinctResponses) {
+  initializeFilter(default_config);
+
+  // Include test name and params in URL to make each test's requests unique.
+  Http::TestRequestHeaderMapImpl request_headers =
+      httpRequestHeader("GET", /*authority=*/"ParallelRequestsShareInsert");
+  Http::TestRequestHeaderMapImpl request_headers_2 = request_headers;
+  Http::TestRequestHeaderMapImpl request_headers_3 = request_headers;
+  request_headers.setReference(Http::Headers::get().Range, "bytes=0-4");
+  request_headers_2.setReference(Http::Headers::get().Range, "bytes=5-9");
+  request_headers_3.setReference(Http::Headers::get().Range, "bytes=3-6");
+  const std::string response_body("helloworld");
+  Http::TestResponseHeaderMapImpl response_headers = httpResponseHeadersForBody(response_body);
+  // Send three requests.
+  auto codec_client_2 = makeHttpConnection(makeClientConnection((lookupPort("http"))));
+  auto codec_client_3 = makeHttpConnection(makeClientConnection((lookupPort("http"))));
+  IntegrationStreamDecoderPtr response_decoder1 =
+      codec_client_->makeHeaderOnlyRequest(request_headers);
+  IntegrationStreamDecoderPtr response_decoder2 =
+      codec_client_2->makeHeaderOnlyRequest(request_headers_2);
+  IntegrationStreamDecoderPtr response_decoder3 =
+      codec_client_3->makeHeaderOnlyRequest(request_headers_3);
+  // Use split_body to cover multipart body responses.
+  simulateUpstreamResponse(response_headers, makeOptRef(response_body), no_trailers_, true)();
+  awaitResponse(response_decoder1);
+  awaitResponse(response_decoder2);
+  awaitResponse(response_decoder3);
+  EXPECT_THAT(response_decoder1->headers(),
+              AllOf(HeaderValueOf("content-range", "bytes 0-4/10"),
+                    HeaderValueOf("content-length", "5"), HeaderValueOf(":status", "206")));
+  EXPECT_THAT(response_decoder2->headers(),
+              AllOf(HeaderValueOf("content-range", "bytes 5-9/10"),
+                    HeaderValueOf("content-length", "5"), HeaderValueOf(":status", "206")));
+  EXPECT_THAT(response_decoder3->headers(),
+              AllOf(HeaderValueOf("content-range", "bytes 3-6/10"),
+                    HeaderValueOf("content-length", "4"), HeaderValueOf(":status", "206")));
+  // Two of the responses should have an age, and one should not.
+  // Which of the requests get the age header depends on the order of
+  // parallel request resolution, which is not relevant to this test.
+  EXPECT_THAT(response_decoder1->headers().get(Http::CustomHeaders::get().Age).size() +
+                  response_decoder2->headers().get(Http::CustomHeaders::get().Age).size() +
+                  response_decoder3->headers().get(Http::CustomHeaders::get().Age).size(),
+              Eq(2));
+  EXPECT_EQ(response_decoder1->body(), "hello");
+  EXPECT_EQ(response_decoder2->body(), "world");
+  EXPECT_EQ(response_decoder3->body(), "lowo");
+  codec_client_2->close();
+  codec_client_3->close();
+  // Advance time to force a log flush.
+  simTime().advanceTimeWait(Seconds(1));
+
+  EXPECT_THAT(waitForAccessLog(access_log_name_, 0, true),
+              HasSubstr("RFCF cache.insert_via_upstream"));
+  EXPECT_THAT(waitForAccessLog(access_log_name_, 1, true),
+              HasSubstr("RFCF cache.response_from_cache_filter"));
+  EXPECT_THAT(waitForAccessLog(access_log_name_, 2, true),
+              HasSubstr("RFCF cache.response_from_cache_filter"));
+}
+
+TEST_P(CacheIntegrationTest, RequestNoCacheProvokesValidationAndOnFailureInsert) {
+  initializeFilter(default_config);
+  Http::TestRequestHeaderMapImpl request_headers =
+      httpRequestHeader("GET", /*authority=*/"RequestNoCacheProvokesValidationAndOnFailureInsert");
+  request_headers.setReference(Http::CustomHeaders::get().CacheControl, "no-cache");
+  const std::string response_body("helloworld");
+  Http::TestResponseHeaderMapImpl response_headers = httpResponseHeadersForBody(response_body);
+  // send two requests in parallel, they should share a response because
+  // validation is implicit if it's cacheable and same-time.
+  auto codec_client_2 = makeHttpConnection(makeClientConnection((lookupPort("http"))));
+  IntegrationStreamDecoderPtr response_decoder1 =
+      codec_client_->makeHeaderOnlyRequest(request_headers);
+  IntegrationStreamDecoderPtr response_decoder2 =
+      codec_client_2->makeHeaderOnlyRequest(request_headers);
+  simulateUpstreamResponse(response_headers, makeOptRef(response_body), no_trailers_, true)();
+  EXPECT_THAT(upstream_request_->headers(), AllOf(HeaderValueOf("cache-control", "no-cache"),
+                                                  Not(HeaderValueOf("if-modified-since", _))));
+  awaitResponse(response_decoder1);
+  awaitResponse(response_decoder2);
+  EXPECT_EQ(response_decoder1->body(), "helloworld");
+  EXPECT_EQ(response_decoder2->body(), "helloworld");
+  codec_client_2->close();
+  // send a request subsequent to cache being populated, which should validate
+  auto codec_client_3 = makeHttpConnection(makeClientConnection((lookupPort("http"))));
+  IntegrationStreamDecoderPtr response_decoder3 =
+      codec_client_3->makeHeaderOnlyRequest(request_headers);
+  // Response with a 200 status, implying validation failed.
+  simulateUpstreamResponse(response_headers, makeOptRef(response_body), no_trailers_, true)();
+  // Additional upstream request should be a validation, so should have if-modified-since
+  EXPECT_THAT(upstream_request_->headers(), AllOf(HeaderValueOf("cache-control", "no-cache"),
+                                                  HeaderValueOf("if-modified-since", _)));
+  awaitResponse(response_decoder3);
+  EXPECT_EQ(response_decoder3->body(), "helloworld");
+  codec_client_3->close();
+  // Advance time to force a log flush.
+  simTime().advanceTimeWait(Seconds(1));
+  EXPECT_THAT(waitForAccessLog(access_log_name_, 0, true),
+              HasSubstr("RFCF cache.insert_via_upstream"));
+  EXPECT_THAT(waitForAccessLog(access_log_name_, 1, true),
+              HasSubstr("RFCF cache.response_from_cache_filter"));
+  EXPECT_THAT(waitForAccessLog(access_log_name_, 2, true),
+              HasSubstr("RFCF cache.insert_via_upstream"));
+}
+
+TEST_P(CacheIntegrationTest, RequestNoCacheProvokesValidationAndOnSuccessReadsFromCache) {
+  initializeFilter(default_config);
+  Http::TestRequestHeaderMapImpl request_headers = httpRequestHeader(
+      "GET", /*authority=*/"RequestNoCacheProvokesValidationAndOnSuccessReadsFromCache");
+  request_headers.setReference(Http::CustomHeaders::get().CacheControl, "no-cache");
+  const std::string response_body("helloworld");
+  Http::TestResponseHeaderMapImpl response_headers = httpResponseHeadersForBody(response_body);
+  // send two requests in parallel, they should share a response because
+  // validation is implicit if it's cacheable and same-time.
+  auto codec_client_2 = makeHttpConnection(makeClientConnection((lookupPort("http"))));
+  IntegrationStreamDecoderPtr response_decoder1 =
+      codec_client_->makeHeaderOnlyRequest(request_headers);
+  IntegrationStreamDecoderPtr response_decoder2 =
+      codec_client_2->makeHeaderOnlyRequest(request_headers);
+  simulateUpstreamResponse(response_headers, makeOptRef(response_body), no_trailers_, true)();
+  EXPECT_THAT(upstream_request_->headers(), AllOf(HeaderValueOf("cache-control", "no-cache"),
+                                                  Not(HeaderValueOf("if-modified-since", _))));
+  awaitResponse(response_decoder1);
+  awaitResponse(response_decoder2);
+  EXPECT_EQ(response_decoder1->body(), "helloworld");
+  EXPECT_EQ(response_decoder2->body(), "helloworld");
+  codec_client_2->close();
+  // send a request subsequent to cache being populated, which should validate
+  auto codec_client_3 = makeHttpConnection(makeClientConnection((lookupPort("http"))));
+  IntegrationStreamDecoderPtr response_decoder3 =
+      codec_client_3->makeHeaderOnlyRequest(request_headers);
+  // Response with a 304 status, implying validation succeeded.
+  Http::TestResponseHeaderMapImpl response_headers_304{
+      {":status", "304"}, {"last-modified", "Mon, 01 Jan 1970 00:30:00 GMT"}};
+  simulateUpstreamResponse(response_headers_304, absl::nullopt, no_trailers_, true)();
+  // Additional upstream request should be a validation, so should have if-modified-since
+  EXPECT_THAT(upstream_request_->headers(), AllOf(HeaderValueOf("cache-control", "no-cache"),
+                                                  HeaderValueOf("if-modified-since", _)));
+  awaitResponse(response_decoder3);
+  EXPECT_EQ(response_decoder3->body(), "helloworld");
+  codec_client_3->close();
+  // Advance time to force a log flush.
+  simTime().advanceTimeWait(Seconds(1));
+  EXPECT_THAT(waitForAccessLog(access_log_name_, 0, true),
+              HasSubstr("RFCF cache.insert_via_upstream"));
+  EXPECT_THAT(waitForAccessLog(access_log_name_, 1, true),
+              HasSubstr("RFCF cache.response_from_cache_filter"));
+  EXPECT_THAT(waitForAccessLog(access_log_name_, 2, true),
+              HasSubstr("RFCF cache.response_from_cache_filter"));
+}
+
 TEST_P(CacheIntegrationTest, ExpiredValidated) {
   initializeFilter(default_config);
 
@@ -173,11 +388,16 @@ TEST_P(CacheIntegrationTest, ExpiredValidated) {
   {
     IntegrationStreamDecoderPtr response_decoder = sendHeaderOnlyRequestAwaitResponse(
         request_headers,
-        simulateUpstreamResponse(response_headers, makeOptRef(response_body), empty_trailers_));
+        simulateUpstreamResponse(response_headers, makeOptRef(response_body), no_trailers_));
     EXPECT_THAT(response_decoder->headers(), IsSupersetOfHeaders(response_headers));
-    EXPECT_EQ(response_decoder->headers().get(Http::CustomHeaders::get().Age).size(), 0);
+    EXPECT_EQ(response_decoder->headers().get(Http::CustomHeaders::get().Age).size(), 0)
+        << "age="
+        << response_decoder->headers()
+               .get(Http::CustomHeaders::get().Age)[0]
+               ->value()
+               .getStringView();
     EXPECT_EQ(response_decoder->body(), response_body);
-    EXPECT_THAT(waitForAccessLog(access_log_name_), testing::HasSubstr("- via_upstream"));
+    EXPECT_THAT(waitForAccessLog(access_log_name_), HasSubstr("via_upstream"));
   }
 
   // Advance time for the cached response to be stale (expired)
@@ -195,8 +415,7 @@ TEST_P(CacheIntegrationTest, ExpiredValidated) {
         sendHeaderOnlyRequestAwaitResponse(request_headers, [&]() {
           waitForNextUpstreamRequest();
           // Check for injected precondition headers
-          Http::TestRequestHeaderMapImpl injected_headers = {{"if-none-match", "abc123"}};
-          EXPECT_THAT(upstream_request_->headers(), IsSupersetOfHeaders(injected_headers));
+          EXPECT_THAT(upstream_request_->headers(), HeaderValueOf("if-none-match", "abc123"));
 
           upstream_request_->encodeHeaders(not_modified_response_headers, /*end_stream=*/true);
         });
@@ -211,9 +430,14 @@ TEST_P(CacheIntegrationTest, ExpiredValidated) {
     // A response that has been validated should not contain an Age header as it is equivalent to
     // a freshly served response from the origin, unless the 304 response has an Age header, which
     // means it was served by an upstream cache.
-    EXPECT_EQ(response_decoder->headers().get(Http::CustomHeaders::get().Age).size(), 0);
+    EXPECT_EQ(response_decoder->headers().get(Http::CustomHeaders::get().Age).size(), 0)
+        << "age="
+        << response_decoder->headers()
+               .get(Http::CustomHeaders::get().Age)[0]
+               ->value()
+               .getStringView();
+    ;
   }
-
   // Advance time to get a fresh cached response
   simTime().advanceTimeWait(Seconds(1));
 
@@ -228,7 +452,61 @@ TEST_P(CacheIntegrationTest, ExpiredValidated) {
     // Advance time to force a log flush.
     simTime().advanceTimeWait(Seconds(1));
     EXPECT_THAT(waitForAccessLog(access_log_name_, 2),
-                testing::HasSubstr("RFCF cache.response_from_cache_filter"));
+                HasSubstr("RFCF cache.response_from_cache_filter"));
+  }
+}
+
+TEST_P(CacheIntegrationTest, TemporarilyUncacheableEventuallyCaches) {
+  initializeFilterWithTrailersEnabled(default_config);
+
+  // Include test name and params in URL to make each test's requests unique.
+  const Http::TestRequestHeaderMapImpl request_headers =
+      httpRequestHeader("GET", /*authority=*/"TemporarilyUncacheableEventuallyCaches");
+  const Http::TestResponseTrailerMapImpl response_trailers = {{"x-test", "yes"}};
+  std::string response_body{"aaaaaaaaaa"};
+  Http::TestResponseHeaderMapImpl cacheable_response_headers{
+      {":status", "200"}, {"cache-control", "max-age=10"}, {"etag", "abc123"}};
+
+  // Send first request, and get 500 response from upstream.
+  {
+    Http::TestResponseHeaderMapImpl response_headers{{":status", "500"}};
+    IntegrationStreamDecoderPtr response_decoder = sendHeaderOnlyRequestAwaitResponse(
+        request_headers,
+        simulateUpstreamResponse(response_headers, absl::nullopt, response_trailers));
+    EXPECT_THAT(response_decoder->headers(), IsSupersetOfHeaders(response_headers));
+    EXPECT_THAT(response_decoder->body(), Eq(""));
+    EXPECT_THAT(response_decoder->trailers(), Pointee(IsSupersetOfHeaders(response_trailers)));
+    EXPECT_THAT(waitForAccessLog(access_log_name_), HasSubstr("via_upstream"));
+  }
+  // Send second request, and get cacheable 200 response from upstream.
+  // This should reset the uncacheable state imposed by the first request.
+  // *Ideally* this would write to the cache this time as well, but getting
+  // to this state means we already started an inexpensive pass-through, so
+  // it's too late to start writing to the cache from this request without
+  // adding unnecessary complexity.
+  {
+    IntegrationStreamDecoderPtr response_decoder = sendHeaderOnlyRequestAwaitResponse(
+        request_headers,
+        simulateUpstreamResponse(cacheable_response_headers, response_body, response_trailers));
+    EXPECT_THAT(response_decoder->headers(), IsSupersetOfHeaders(cacheable_response_headers));
+    EXPECT_THAT(response_decoder->body(), Eq(response_body));
+    EXPECT_THAT(response_decoder->trailers(), Pointee(IsSupersetOfHeaders(response_trailers)));
+    // Advance time to force a log flush.
+    simTime().advanceTimeWait(Seconds(1));
+    EXPECT_THAT(waitForAccessLog(access_log_name_, 1), HasSubstr("via_upstream"));
+  }
+  // Send third request, and get cacheable 200 response from upstream, it should be cached this
+  // time.
+  {
+    IntegrationStreamDecoderPtr response_decoder = sendHeaderOnlyRequestAwaitResponse(
+        request_headers,
+        simulateUpstreamResponse(cacheable_response_headers, response_body, response_trailers));
+    EXPECT_THAT(response_decoder->headers(), IsSupersetOfHeaders(cacheable_response_headers));
+    EXPECT_THAT(response_decoder->body(), Eq(response_body));
+    EXPECT_THAT(response_decoder->trailers(), Pointee(IsSupersetOfHeaders(response_trailers)));
+    // Advance time to force a log flush.
+    simTime().advanceTimeWait(Seconds(1));
+    EXPECT_THAT(waitForAccessLog(access_log_name_, 2), HasSubstr("cache.insert_via_upstream"));
   }
 }
 
@@ -246,11 +524,11 @@ TEST_P(CacheIntegrationTest, ExpiredFetchedNewResponse) {
         response_body, /*cache_control=*/"max-age=10", /*extra_headers=*/{{"etag", "a1"}});
     IntegrationStreamDecoderPtr response_decoder = sendHeaderOnlyRequestAwaitResponse(
         request_headers,
-        simulateUpstreamResponse(response_headers, makeOptRef(response_body), empty_trailers_));
+        simulateUpstreamResponse(response_headers, makeOptRef(response_body), no_trailers_));
     EXPECT_THAT(response_decoder->headers(), IsSupersetOfHeaders(response_headers));
     EXPECT_EQ(response_decoder->headers().get(Http::CustomHeaders::get().Age).size(), 0);
     EXPECT_EQ(response_decoder->body(), response_body);
-    EXPECT_THAT(waitForAccessLog(access_log_name_), testing::HasSubstr("- via_upstream"));
+    EXPECT_THAT(waitForAccessLog(access_log_name_), HasSubstr("via_upstream"));
   }
 
   // Advance time for the cached response to be stale (expired)
@@ -269,8 +547,7 @@ TEST_P(CacheIntegrationTest, ExpiredFetchedNewResponse) {
         sendHeaderOnlyRequestAwaitResponse(request_headers, [&]() {
           waitForNextUpstreamRequest();
           // Check for injected precondition headers
-          Http::TestRequestHeaderMapImpl injected_headers = {{"if-none-match", "a1"}};
-          EXPECT_THAT(upstream_request_->headers(), IsSupersetOfHeaders(injected_headers));
+          EXPECT_THAT(upstream_request_->headers(), HeaderValueOf("if-none-match", "a1"));
 
           // Reply with the updated response -> cached response is invalid
           upstream_request_->encodeHeaders(response_headers, /*end_stream=*/false);
@@ -285,7 +562,7 @@ TEST_P(CacheIntegrationTest, ExpiredFetchedNewResponse) {
 
     // Advance time to force a log flush.
     simTime().advanceTimeWait(Seconds(1));
-    EXPECT_THAT(waitForAccessLog(access_log_name_, 1), testing::HasSubstr("- via_upstream"));
+    EXPECT_THAT(waitForAccessLog(access_log_name_, 1), HasSubstr("via_upstream"));
   }
 }
 
@@ -314,16 +591,13 @@ TEST_P(CacheIntegrationTest, GetRequestWithBodyAndTrailers) {
     // send 42 'a's
     upstream_request_->encodeData(42, true);
     // Wait for the response to be read by the codec client.
-    ASSERT_TRUE(response->waitForEndStream());
-    EXPECT_TRUE(response->complete());
+    ASSERT_TRUE(response->waitForEndStream(std::chrono::milliseconds(1000)));
     EXPECT_THAT(response->headers(), IsSupersetOfHeaders(response_headers));
     EXPECT_TRUE(response->headers().get(Http::CustomHeaders::get().Age).empty());
     EXPECT_EQ(response->body(), std::string(42, 'a'));
   }
 }
 
-// Send the same GET request with body and trailers twice, then check that the response
-// doesn't have an age header, to confirm that it wasn't served from cache.
 TEST_P(CacheIntegrationTest, GetRequestWithResponseTrailers) {
   initializeFilterWithTrailersEnabled(default_config);
   // Include test name and params in URL to make each test's requests unique.
@@ -344,8 +618,8 @@ TEST_P(CacheIntegrationTest, GetRequestWithResponseTrailers) {
     EXPECT_THAT(response_decoder->headers(), IsSupersetOfHeaders(response_headers));
     EXPECT_EQ(response_decoder->headers().get(Http::CustomHeaders::get().Age).size(), 0);
     EXPECT_EQ(response_decoder->body(), response_body);
-    ASSERT_TRUE(response_decoder->trailers() != nullptr);
-    EXPECT_THAT(waitForAccessLog(access_log_name_), testing::HasSubstr("- via_upstream"));
+    EXPECT_THAT(response_decoder->trailers(), Pointee(IsSupersetOfHeaders(response_trailers)));
+    EXPECT_THAT(waitForAccessLog(access_log_name_), HasSubstr("via_upstream"));
   }
 
   // Advance time, to verify the original date header is preserved.
@@ -358,10 +632,10 @@ TEST_P(CacheIntegrationTest, GetRequestWithResponseTrailers) {
     EXPECT_THAT(response_decoder->headers(),
                 HeaderHasValueRef(Http::CustomHeaders::get().Age, "10"));
     EXPECT_EQ(response_decoder->body(), response_body);
-    ASSERT_TRUE(response_decoder->trailers() != nullptr);
+    EXPECT_THAT(response_decoder->trailers(), Pointee(IsSupersetOfHeaders(response_trailers)));
     simTime().advanceTimeWait(Seconds(1));
     EXPECT_THAT(waitForAccessLog(access_log_name_, 1),
-                testing::HasSubstr("RFCF cache.response_from_cache_filter"));
+                HasSubstr("RFCF cache.response_from_cache_filter"));
   }
 }
 
@@ -370,7 +644,7 @@ TEST_P(CacheIntegrationTest, ServeHeadRequest) {
 
   // Include test name and params in URL to make each test's requests unique.
   const Http::TestRequestHeaderMapImpl request_headers =
-      httpRequestHeader("HEAD", "ServeHeadRequest");
+      httpRequestHeader(Http::Headers::get().MethodValues.Head, "ServeHeadRequest");
   const std::string response_body(42, 'a');
   Http::TestResponseHeaderMapImpl response_headers = httpResponseHeadersForBody(response_body);
 
@@ -378,11 +652,11 @@ TEST_P(CacheIntegrationTest, ServeHeadRequest) {
   {
     // Since it is a head request, no need to encodeData => the response_body is absl::nullopt.
     IntegrationStreamDecoderPtr response_decoder = sendHeaderOnlyRequestAwaitResponse(
-        request_headers, simulateUpstreamResponse(response_headers, empty_body_, empty_trailers_));
+        request_headers, simulateUpstreamResponse(response_headers, no_body_, no_trailers_));
     EXPECT_THAT(response_decoder->headers(), IsSupersetOfHeaders(response_headers));
     EXPECT_EQ(response_decoder->headers().get(Http::CustomHeaders::get().Age).size(), 0);
     EXPECT_EQ(response_decoder->body().size(), 0);
-    EXPECT_THAT(waitForAccessLog(access_log_name_), testing::HasSubstr("- via_upstream"));
+    EXPECT_THAT(waitForAccessLog(access_log_name_), HasSubstr("via_upstream"));
   }
 
   // Advance time, to verify the original date header is preserved.
@@ -393,13 +667,13 @@ TEST_P(CacheIntegrationTest, ServeHeadRequest) {
   {
     // Since it is a head request, no need to encodeData => the response_body is empty.
     IntegrationStreamDecoderPtr response_decoder = sendHeaderOnlyRequestAwaitResponse(
-        request_headers, simulateUpstreamResponse(response_headers, empty_body_, empty_trailers_));
+        request_headers, simulateUpstreamResponse(response_headers, no_body_, no_trailers_));
     EXPECT_THAT(response_decoder->headers(), IsSupersetOfHeaders(response_headers));
     EXPECT_EQ(response_decoder->body().size(), 0);
     EXPECT_EQ(response_decoder->headers().get(Http::CustomHeaders::get().Age).size(), 0);
     // Advance time to force a log flush.
     simTime().advanceTimeWait(Seconds(1));
-    EXPECT_THAT(waitForAccessLog(access_log_name_, 1), testing::HasSubstr("- via_upstream"));
+    EXPECT_THAT(waitForAccessLog(access_log_name_, 1), HasSubstr("via_upstream"));
   }
 }
 
@@ -416,11 +690,11 @@ TEST_P(CacheIntegrationTest, ServeHeadFromCacheAfterGetRequest) {
         httpRequestHeader("GET", /*authority=*/"ServeHeadFromCacheAfterGetRequest");
     IntegrationStreamDecoderPtr response_decoder = sendHeaderOnlyRequestAwaitResponse(
         request_headers,
-        simulateUpstreamResponse(response_headers, makeOptRef(response_body), empty_trailers_));
+        simulateUpstreamResponse(response_headers, makeOptRef(response_body), no_trailers_));
     EXPECT_THAT(response_decoder->headers(), IsSupersetOfHeaders(response_headers));
     EXPECT_EQ(response_decoder->headers().get(Http::CustomHeaders::get().Age).size(), 0);
     EXPECT_EQ(response_decoder->body(), response_body);
-    EXPECT_THAT(waitForAccessLog(access_log_name_), testing::HasSubstr("- via_upstream"));
+    EXPECT_THAT(waitForAccessLog(access_log_name_), HasSubstr("via_upstream"));
   }
   // Advance time, to verify the original date header is preserved.
   simTime().advanceTimeWait(Seconds(10));
@@ -439,7 +713,7 @@ TEST_P(CacheIntegrationTest, ServeHeadFromCacheAfterGetRequest) {
     // Advance time to force a log flush.
     simTime().advanceTimeWait(Seconds(1));
     EXPECT_THAT(waitForAccessLog(access_log_name_, 1),
-                testing::HasSubstr("RFCF cache.response_from_cache_filter"));
+                HasSubstr("RFCF cache.response_from_cache_filter"));
   }
 }
 
@@ -456,11 +730,11 @@ TEST_P(CacheIntegrationTest, ServeGetFromUpstreamAfterHeadRequest) {
         httpRequestHeader("HEAD", "ServeGetFromUpstreamAfterHeadRequest");
     // No need to encode the data, therefore response_body is empty.
     IntegrationStreamDecoderPtr response_decoder = sendHeaderOnlyRequestAwaitResponse(
-        request_headers, simulateUpstreamResponse(response_headers, empty_body_, empty_trailers_));
+        request_headers, simulateUpstreamResponse(response_headers, no_body_, no_trailers_));
     EXPECT_THAT(response_decoder->headers(), IsSupersetOfHeaders(response_headers));
     EXPECT_EQ(response_decoder->headers().get(Http::CustomHeaders::get().Age).size(), 0);
     EXPECT_EQ(response_decoder->body().size(), 0);
-    EXPECT_THAT(waitForAccessLog(access_log_name_), testing::HasSubstr("- via_upstream"));
+    EXPECT_THAT(waitForAccessLog(access_log_name_), HasSubstr("via_upstream"));
   }
 
   // Send GET request, and get response from upstream.
@@ -470,7 +744,7 @@ TEST_P(CacheIntegrationTest, ServeGetFromUpstreamAfterHeadRequest) {
         httpRequestHeader("GET", /*authority=*/"ServeGetFromUpstreamAfterHeadRequest");
     IntegrationStreamDecoderPtr response_decoder = sendHeaderOnlyRequestAwaitResponse(
         request_headers,
-        simulateUpstreamResponse(response_headers, makeOptRef(response_body), empty_trailers_));
+        simulateUpstreamResponse(response_headers, makeOptRef(response_body), no_trailers_));
     EXPECT_THAT(response_decoder->headers(), IsSupersetOfHeaders(response_headers));
     EXPECT_EQ(response_decoder->body(), response_body);
     EXPECT_EQ(response_decoder->headers().get(Http::CustomHeaders::get().Age).size(), 0);
@@ -478,77 +752,11 @@ TEST_P(CacheIntegrationTest, ServeGetFromUpstreamAfterHeadRequest) {
     // Advance time to force a log flush.
     simTime().advanceTimeWait(Seconds(1));
 
-    EXPECT_THAT(waitForAccessLog(access_log_name_, 1), testing::HasSubstr("- via_upstream"));
+    EXPECT_THAT(waitForAccessLog(access_log_name_, 1), HasSubstr("via_upstream"));
   }
 }
 
-TEST_P(CacheIntegrationTest, ServeGetFollowedByHead304WithValidation) {
-  initializeFilter(default_config);
-
-  const std::string response_body(42, 'a');
-  Http::TestResponseHeaderMapImpl response_headers = httpResponseHeadersForBody(
-      response_body, /*cache_control=*/"max-age=10", /*extra_headers=*/{{"etag", "abc123"}});
-
-  // Send GET request, and get response from upstream.
-  {
-    // Include test name and params in URL to make each test's requests unique.
-    const Http::TestRequestHeaderMapImpl request_headers =
-        httpRequestHeader("GET", /*authority=*/"ServeGetFollowedByHead304WithValidation");
-
-    IntegrationStreamDecoderPtr response_decoder = sendHeaderOnlyRequestAwaitResponse(
-        request_headers,
-        simulateUpstreamResponse(response_headers, makeOptRef(response_body), empty_trailers_));
-    EXPECT_THAT(response_decoder->headers(), IsSupersetOfHeaders(response_headers));
-    EXPECT_EQ(response_decoder->headers().get(Http::CustomHeaders::get().Age).size(), 0);
-    EXPECT_EQ(response_decoder->body(), response_body);
-    EXPECT_THAT(waitForAccessLog(access_log_name_), testing::HasSubstr("- via_upstream"));
-  }
-  // Advance time for the cached response to be stale (expired)
-  // Also to make sure response date header gets updated with the 304 date
-  simTime().advanceTimeWait(Seconds(11));
-
-  // Send HEAD request, the cached response should be validate then served
-  {
-    // Include test name and params in URL to make each test's requests unique.
-    const Http::TestRequestHeaderMapImpl request_headers =
-        httpRequestHeader("HEAD", "ServeGetFollowedByHead304WithValidation");
-
-    // Create a 304 (not modified) response -> cached response is valid
-    const std::string not_modified_date = formatter_.now(simTime());
-    const Http::TestResponseHeaderMapImpl not_modified_response_headers = {
-        {":status", "304"}, {"date", not_modified_date}};
-
-    IntegrationStreamDecoderPtr response_decoder =
-        sendHeaderOnlyRequestAwaitResponse(request_headers, [&]() {
-          waitForNextUpstreamRequest();
-
-          // Check for injected precondition headers
-          const Http::TestRequestHeaderMapImpl injected_headers = {{"if-none-match", "abc123"}};
-          EXPECT_THAT(upstream_request_->headers(), IsSupersetOfHeaders(injected_headers));
-
-          upstream_request_->encodeHeaders(not_modified_response_headers,
-                                           /*end_stream=*/true);
-        });
-
-    // The original response headers should be updated with 304 response headers
-    response_headers.setDate(not_modified_date);
-
-    EXPECT_THAT(response_decoder->headers(), IsSupersetOfHeaders(response_headers));
-    EXPECT_EQ(response_decoder->body().size(), 0);
-
-    // A response that has been validated should not contain an Age header as it is equivalent to
-    // a freshly served response from the origin, unless the 304 response has an Age header, which
-    // means it was served by an upstream cache.
-    EXPECT_EQ(response_decoder->headers().get(Http::CustomHeaders::get().Age).size(), 0);
-
-    // Advance time to force a log flush.
-    simTime().advanceTimeWait(Seconds(1));
-    EXPECT_THAT(waitForAccessLog(access_log_name_, 1),
-                testing::HasSubstr("RFCF cache.response_from_cache_filter"));
-  }
-}
-
-TEST_P(CacheIntegrationTest, ServeGetFollowedByHead200WithValidation) {
+TEST_P(CacheIntegrationTest, ServeGetFollowedByHead200ThatNeedsValidationPassesThroughHeadRequest) {
   initializeFilter(default_config);
 
   // Send GET request, and get response from upstream.
@@ -562,11 +770,11 @@ TEST_P(CacheIntegrationTest, ServeGetFollowedByHead200WithValidation) {
 
     IntegrationStreamDecoderPtr response_decoder = sendHeaderOnlyRequestAwaitResponse(
         request_headers,
-        simulateUpstreamResponse(response_headers, makeOptRef(response_body), empty_trailers_));
+        simulateUpstreamResponse(response_headers, makeOptRef(response_body), no_trailers_));
     EXPECT_THAT(response_decoder->headers(), IsSupersetOfHeaders(response_headers));
     EXPECT_EQ(response_decoder->headers().get(Http::CustomHeaders::get().Age).size(), 0);
     EXPECT_EQ(response_decoder->body(), response_body);
-    EXPECT_THAT(waitForAccessLog(access_log_name_), testing::HasSubstr("- via_upstream"));
+    EXPECT_THAT(waitForAccessLog(access_log_name_), HasSubstr("via_upstream"));
   }
 
   // Advance time for the cached response to be stale (expired)
@@ -588,10 +796,6 @@ TEST_P(CacheIntegrationTest, ServeGetFollowedByHead200WithValidation) {
         sendHeaderOnlyRequestAwaitResponse(request_headers, [&]() {
           waitForNextUpstreamRequest();
 
-          // Check for injected precondition headers
-          Http::TestRequestHeaderMapImpl injected_headers = {{"if-none-match", "a1"}};
-          EXPECT_THAT(upstream_request_->headers(), IsSupersetOfHeaders(injected_headers));
-
           // Reply with the updated response -> cached response is invalid
           upstream_request_->encodeHeaders(response_headers,
                                            /*end_stream=*/true);
@@ -603,7 +807,7 @@ TEST_P(CacheIntegrationTest, ServeGetFollowedByHead200WithValidation) {
 
     // Advance time to force a log flush.
     simTime().advanceTimeWait(Seconds(1));
-    EXPECT_THAT(waitForAccessLog(access_log_name_, 1), testing::HasSubstr("- via_upstream"));
+    EXPECT_THAT(waitForAccessLog(access_log_name_, 1), HasSubstr("via_upstream"));
   }
 }
 
diff --git a/test/extensions/filters/http/cache/cache_filter_logging_info_test.cc b/test/extensions/filters/http/cache/cache_filter_logging_info_test.cc
deleted file mode 100644
index 6aa76376fed6f..0000000000000
--- a/test/extensions/filters/http/cache/cache_filter_logging_info_test.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-#include "source/extensions/filters/http/cache/cache_filter_logging_info.h"
-
-#include "test/test_common/utility.h"
-
-#include "gtest/gtest.h"
-
-namespace Envoy {
-namespace Extensions {
-namespace HttpFilters {
-namespace Cache {
-namespace {
-
-TEST(Coverage, LookupStatusToString) {
-  EXPECT_EQ(lookupStatusToString(LookupStatus::Unknown), "Unknown");
-  EXPECT_EQ(lookupStatusToString(LookupStatus::CacheHit), "CacheHit");
-  EXPECT_EQ(lookupStatusToString(LookupStatus::CacheMiss), "CacheMiss");
-  EXPECT_EQ(lookupStatusToString(LookupStatus::StaleHitWithSuccessfulValidation),
-            "StaleHitWithSuccessfulValidation");
-  EXPECT_EQ(lookupStatusToString(LookupStatus::StaleHitWithFailedValidation),
-            "StaleHitWithFailedValidation");
-  EXPECT_EQ(lookupStatusToString(LookupStatus::NotModifiedHit), "NotModifiedHit");
-  EXPECT_EQ(lookupStatusToString(LookupStatus::RequestNotCacheable), "RequestNotCacheable");
-  EXPECT_EQ(lookupStatusToString(LookupStatus::RequestIncomplete), "RequestIncomplete");
-  EXPECT_EQ(lookupStatusToString(LookupStatus::LookupError), "LookupError");
-  EXPECT_ENVOY_BUG(lookupStatusToString(static_cast<LookupStatus>(99)), "Unexpected LookupStatus");
-}
-
-TEST(Coverage, InsertStatusToString) {
-  EXPECT_EQ(insertStatusToString(InsertStatus::InsertSucceeded), "InsertSucceeded");
-  EXPECT_EQ(insertStatusToString(InsertStatus::InsertAbortedByCache), "InsertAbortedByCache");
-  EXPECT_EQ(insertStatusToString(InsertStatus::InsertAbortedCacheCongested),
-            "InsertAbortedCacheCongested");
-  EXPECT_EQ(insertStatusToString(InsertStatus::FilterAbortedBeforeInsertComplete),
-            "FilterAbortedBeforeInsertComplete");
-  EXPECT_EQ(insertStatusToString(InsertStatus::HeaderUpdate), "HeaderUpdate");
-  EXPECT_EQ(insertStatusToString(InsertStatus::NoInsertCacheHit), "NoInsertCacheHit");
-  EXPECT_EQ(insertStatusToString(InsertStatus::NoInsertRequestNotCacheable),
-            "NoInsertRequestNotCacheable");
-  EXPECT_EQ(insertStatusToString(InsertStatus::NoInsertResponseNotCacheable),
-            "NoInsertResponseNotCacheable");
-  EXPECT_EQ(insertStatusToString(InsertStatus::NoInsertRequestIncomplete),
-            "NoInsertRequestIncomplete");
-  EXPECT_EQ(insertStatusToString(InsertStatus::NoInsertResponseValidatorsMismatch),
-            "NoInsertResponseValidatorsMismatch");
-  EXPECT_EQ(insertStatusToString(InsertStatus::NoInsertResponseVaryMismatch),
-            "NoInsertResponseVaryMismatch");
-  EXPECT_EQ(insertStatusToString(InsertStatus::NoInsertResponseVaryDisallowed),
-            "NoInsertResponseVaryDisallowed");
-  EXPECT_EQ(insertStatusToString(InsertStatus::NoInsertLookupError), "NoInsertLookupError");
-  EXPECT_ENVOY_BUG(insertStatusToString(static_cast<InsertStatus>(99)), "Unexpected InsertStatus");
-}
-
-TEST(Coverage, StatusStream) {
-  std::ostringstream stream;
-  stream << LookupStatus::Unknown;
-  EXPECT_EQ(stream.str(), "Unknown");
-}
-
-} // namespace
-} // namespace Cache
-} // namespace HttpFilters
-} // namespace Extensions
-} // namespace Envoy
diff --git a/test/extensions/filters/http/cache/cache_filter_test.cc b/test/extensions/filters/http/cache/cache_filter_test.cc
index fad814b0f575e..50dde44567ae3 100644
--- a/test/extensions/filters/http/cache/cache_filter_test.cc
+++ b/test/extensions/filters/http/cache/cache_filter_test.cc
@@ -4,10 +4,9 @@
 
 #include "source/common/http/headers.h"
 #include "source/extensions/filters/http/cache/cache_filter.h"
-#include "source/extensions/filters/http/cache/cache_filter_logging_info.h"
-#include "source/extensions/http/cache/simple_http_cache/simple_http_cache.h"
 
 #include "test/extensions/filters/http/cache/mocks.h"
+#include "test/mocks/buffer/mocks.h"
 #include "test/mocks/server/factory_context.h"
 #include "test/test_common/simulated_time_system.h"
 #include "test/test_common/status_utility.h"
@@ -23,58 +22,35 @@ namespace HttpFilters {
 namespace Cache {
 namespace {
 
+using ::Envoy::StatusHelpers::IsOk;
 using ::Envoy::StatusHelpers::IsOkAndHolds;
+using ::testing::_;
+using ::testing::Eq;
 using ::testing::Gt;
 using ::testing::IsNull;
+using ::testing::Not;
 using ::testing::NotNull;
+using ::testing::Optional;
+using ::testing::Property;
 using ::testing::Return;
 
 class CacheFilterTest : public ::testing::Test {
 protected:
-  // The filter has to be created as a shared_ptr to enable shared_from_this() which is used in the
-  // cache callbacks.
-  CacheFilterSharedPtr makeFilter(std::shared_ptr<HttpCache> cache, bool auto_destroy = true) {
-    auto config = std::make_shared<CacheFilterConfig>(config_, context_.server_factory_context_);
-    std::shared_ptr<CacheFilter> filter(new CacheFilter(config, cache),
-                                        [auto_destroy](CacheFilter* f) {
-                                          if (auto_destroy) {
-                                            f->onDestroy();
-                                          }
-                                          delete f;
-                                        });
-    filter_state_ = std::make_shared<StreamInfo::FilterStateImpl>(
-        StreamInfo::FilterState::LifeSpan::FilterChain);
+  CacheFilterSharedPtr makeFilter(std::shared_ptr<CacheSessions> cache, bool auto_destroy = true) {
+    auto config = std::make_shared<CacheFilterConfig>(config_, std::move(cache),
+                                                      context_.server_factory_context_);
+    std::shared_ptr<CacheFilter> filter(new CacheFilter(config), [auto_destroy](CacheFilter* f) {
+      if (auto_destroy) {
+        f->onDestroy();
+      }
+      delete f;
+    });
     filter->setDecoderFilterCallbacks(decoder_callbacks_);
     filter->setEncoderFilterCallbacks(encoder_callbacks_);
     return filter;
   }
 
   void SetUp() override {
-    context_.server_factory_context_.cluster_manager_.initializeThreadLocalClusters(
-        {"fake_cluster"});
-    ON_CALL(context_.server_factory_context_.cluster_manager_.thread_local_cluster_.async_client_,
-            start)
-        .WillByDefault([this](Http::AsyncClient::StreamCallbacks& callbacks,
-                              const Http::AsyncClient::StreamOptions&) {
-          int i = mock_upstreams_.size();
-          mock_upstreams_.push_back(std::make_unique<NiceMock<Http::MockAsyncClientStream>>());
-          mock_upstreams_callbacks_.emplace_back(std::ref(callbacks));
-          auto ret = mock_upstreams_.back().get();
-          mock_upstreams_headers_sent_.emplace_back();
-          ON_CALL(*ret, sendHeaders)
-              .WillByDefault([this, i](Http::RequestHeaderMap& headers, bool end_stream) {
-                EXPECT_EQ(mock_upstreams_headers_sent_[i], absl::nullopt)
-                    << "headers should only be sent once";
-                EXPECT_TRUE(end_stream) << "post requests should be bypassing the filter";
-                mock_upstreams_headers_sent_[i] = Http::TestRequestHeaderMapImpl();
-                mock_upstreams_headers_sent_[i]->copyFrom(headers);
-              });
-          ON_CALL(*ret, reset).WillByDefault([this, i]() {
-            mock_upstreams_callbacks_[i].get().onReset();
-          });
-          return ret;
-        });
-
     ON_CALL(encoder_callbacks_, dispatcher()).WillByDefault(::testing::ReturnRef(*dispatcher_));
     ON_CALL(decoder_callbacks_, dispatcher()).WillByDefault(::testing::ReturnRef(*dispatcher_));
     ON_CALL(decoder_callbacks_.stream_info_, filterState())
@@ -83,205 +59,30 @@ class CacheFilterTest : public ::testing::Test {
     time_source_.setSystemTime(std::chrono::hours(1));
     // Use the initialized time source to set the response date header
     response_headers_.setDate(formatter_.now(time_source_));
-  }
-
-  absl::StatusOr<const CacheFilterLoggingInfo> cacheFilterLoggingInfo() {
-    if (!filter_state_->hasData<CacheFilterLoggingInfo>(CacheFilterLoggingInfo::FilterStateKey)) {
-      return absl::NotFoundError("cacheFilterLoggingInfo not found");
-    }
-    return *filter_state_->getDataReadOnly<CacheFilterLoggingInfo>(
-        CacheFilterLoggingInfo::FilterStateKey);
-  }
-
-  absl::StatusOr<LookupStatus> lookupStatus() {
-    absl::StatusOr<const CacheFilterLoggingInfo> info_or = cacheFilterLoggingInfo();
-    if (info_or.ok()) {
-      return info_or.value().lookupStatus();
-    }
-    return info_or.status();
-  }
-
-  absl::StatusOr<InsertStatus> insertStatus() {
-    absl::StatusOr<const CacheFilterLoggingInfo> info_or = cacheFilterLoggingInfo();
-    if (info_or.ok()) {
-      return info_or.value().insertStatus();
-    }
-    return info_or.status();
+    ON_CALL(*mock_cache_, lookup)
+        .WillByDefault([this](ActiveLookupRequestPtr request, ActiveLookupResultCallback&& cb) {
+          captured_lookup_request_ = std::move(request);
+          captured_lookup_callback_ = std::move(cb);
+        });
+    context_.server_factory_context_.cluster_manager_.initializeThreadLocalClusters(
+        {"fake_cluster"});
+    ON_CALL(*mock_http_source_, getHeaders).WillByDefault([this](GetHeadersCallback&& cb) {
+      EXPECT_THAT(captured_get_headers_callback_, IsNull());
+      captured_get_headers_callback_ = std::move(cb);
+    });
+    ON_CALL(*mock_http_source_, getBody)
+        .WillByDefault([this](AdjustedByteRange, GetBodyCallback&& cb) {
+          // getBody can be called multiple times so overwriting body callback makes sense.
+          captured_get_body_callback_ = std::move(cb);
+        });
+    ON_CALL(*mock_http_source_, getTrailers).WillByDefault([this](GetTrailersCallback&& cb) {
+      EXPECT_THAT(captured_get_trailers_callback_, IsNull());
+      captured_get_trailers_callback_ = std::move(cb);
+    });
   }
 
   void pumpDispatcher() { dispatcher_->run(Event::Dispatcher::RunType::Block); }
 
-  void receiveUpstreamComplete(size_t upstream_index) {
-    ASSERT(mock_upstreams_callbacks_.size() > upstream_index);
-    mock_upstreams_callbacks_[upstream_index].get().onComplete();
-  }
-
-  void
-  receiveUpstreamHeaders(size_t upstream_index, Http::ResponseHeaderMap& headers, bool end_stream,
-                         testing::Matcher<Http::ResponseHeaderMap&> expected_response_headers = _) {
-    ASSERT(mock_upstreams_callbacks_.size() > upstream_index);
-
-    EXPECT_CALL(decoder_callbacks_, encodeHeaders_(expected_response_headers, _));
-
-    mock_upstreams_callbacks_[upstream_index].get().onHeaders(
-        std::make_unique<Http::TestResponseHeaderMapImpl>(headers), end_stream);
-    ::testing::Mock::VerifyAndClearExpectations(&decoder_callbacks_);
-    if (end_stream) {
-      receiveUpstreamComplete(upstream_index);
-    }
-  }
-
-  // On successful verification, the upstream request gets reset rather than
-  // onComplete.
-  void receiveUpstreamHeadersWithReset(
-      size_t upstream_index, Http::ResponseHeaderMap& headers, bool end_stream,
-      testing::Matcher<Http::ResponseHeaderMap&> expected_response_headers = _) {
-    ASSERT(mock_upstreams_callbacks_.size() > upstream_index);
-    ASSERT(mock_upstreams_.size() > upstream_index);
-    EXPECT_CALL(*mock_upstreams_[upstream_index], reset());
-    EXPECT_CALL(decoder_callbacks_, encodeHeaders_(expected_response_headers, _));
-    mock_upstreams_callbacks_[upstream_index].get().onHeaders(
-        std::make_unique<Http::TestResponseHeaderMapImpl>(headers), end_stream);
-    ::testing::Mock::VerifyAndClearExpectations(&decoder_callbacks_);
-    testing::Mock::VerifyAndClearExpectations(mock_upstreams_[1].get());
-  }
-
-  void receiveUpstreamBody(size_t upstream_index, absl::string_view body, bool end_stream) {
-    ASSERT(mock_upstreams_callbacks_.size() > upstream_index);
-    EXPECT_CALL(decoder_callbacks_, encodeData);
-    Buffer::OwnedImpl buf{body};
-    mock_upstreams_callbacks_[upstream_index].get().onData(buf, end_stream);
-    ::testing::Mock::VerifyAndClearExpectations(&decoder_callbacks_);
-    if (end_stream) {
-      receiveUpstreamComplete(upstream_index);
-    }
-  }
-
-  void receiveUpstreamBodyAfterFilterDestroyed(size_t upstream_index, absl::string_view body,
-                                               bool end_stream) {
-    // Same as receiveUpstreamBody but without expecting a call to encodeData.
-    ASSERT(mock_upstreams_callbacks_.size() > upstream_index);
-    Buffer::OwnedImpl buf{body};
-    mock_upstreams_callbacks_[upstream_index].get().onData(buf, end_stream);
-    if (end_stream) {
-      receiveUpstreamComplete(upstream_index);
-    }
-  }
-
-  void receiveUpstreamTrailers(size_t upstream_index, Http::ResponseTrailerMap& trailers) {
-    ASSERT(mock_upstreams_callbacks_.size() > upstream_index);
-    EXPECT_CALL(decoder_callbacks_, encodeTrailers_);
-    mock_upstreams_callbacks_[upstream_index].get().onTrailers(
-        std::make_unique<Http::TestResponseTrailerMapImpl>(trailers));
-    ::testing::Mock::VerifyAndClearExpectations(&decoder_callbacks_);
-    receiveUpstreamComplete(upstream_index);
-  }
-
-  void receiveUpstreamTrailersAfterFilterDestroyed(size_t upstream_index,
-                                                   Http::ResponseTrailerMap& trailers) {
-    ASSERT(mock_upstreams_callbacks_.size() > upstream_index);
-    mock_upstreams_callbacks_[upstream_index].get().onTrailers(
-        std::make_unique<Http::TestResponseTrailerMapImpl>(trailers));
-    receiveUpstreamComplete(upstream_index);
-  }
-
-  void populateCommonCacheEntry(size_t upstream_index, CacheFilterSharedPtr filter,
-                                absl::string_view body = "",
-                                OptRef<Http::ResponseTrailerMap> trailers = absl::nullopt) {
-    testDecodeRequestMiss(upstream_index, filter);
-
-    receiveUpstreamHeaders(upstream_index, response_headers_,
-                           body.empty() && trailers == absl::nullopt);
-
-    if (!body.empty()) {
-      receiveUpstreamBody(upstream_index, body, trailers == absl::nullopt);
-    }
-    if (trailers) {
-      receiveUpstreamTrailers(upstream_index, *trailers);
-    }
-
-    filter->onStreamComplete();
-    EXPECT_THAT(lookupStatus(), IsOkAndHolds(LookupStatus::CacheMiss));
-    pumpDispatcher();
-  }
-
-  void testDecodeRequestMiss(size_t upstream_index, CacheFilterSharedPtr filter) {
-    // The filter should stop decoding iteration when decodeHeaders is called as a cache lookup is
-    // in progress.
-    EXPECT_EQ(filter->decodeHeaders(request_headers_, true),
-              Http::FilterHeadersStatus::StopAllIterationAndWatermark);
-
-    // The cache lookup callback should be posted to the dispatcher.
-    // Run events on the dispatcher so that the callback is invoked.
-    pumpDispatcher();
-
-    // An upstream request should be sent.
-    ASSERT_THAT(mock_upstreams_.size(), Gt(upstream_index));
-    ASSERT_THAT(mock_upstreams_headers_sent_.size(), Gt(upstream_index));
-    EXPECT_THAT(mock_upstreams_headers_sent_[upstream_index], testing::Optional(request_headers_));
-  }
-
-  void testDecodeRequestHitNoBody(CacheFilterSharedPtr filter) {
-    // The filter should encode cached headers.
-    EXPECT_CALL(
-        decoder_callbacks_,
-        encodeHeaders_(testing::AllOf(IsSupersetOfHeaders(response_headers_),
-                                      HeaderHasValueRef(Http::CustomHeaders::get().Age, age)),
-                       true));
-
-    // The filter should not encode any data as the response has no body.
-    EXPECT_CALL(decoder_callbacks_, encodeData).Times(0);
-
-    // The filter should stop decoding iteration when decodeHeaders is called as a cache lookup is
-    // in progress.
-    EXPECT_EQ(filter->decodeHeaders(request_headers_, true),
-              Http::FilterHeadersStatus::StopAllIterationAndWatermark);
-
-    // The filter should not continue decoding when the cache lookup result is ready, as the
-    // expected result is a hit.
-    EXPECT_CALL(decoder_callbacks_, continueDecoding).Times(0);
-
-    // The cache lookup callback should be posted to the dispatcher.
-    // Run events on the dispatcher so that the callback is invoked.
-    pumpDispatcher();
-
-    ::testing::Mock::VerifyAndClearExpectations(&decoder_callbacks_);
-  }
-
-  void testDecodeRequestHitWithBody(CacheFilterSharedPtr filter, std::string body) {
-    // The filter should encode cached headers.
-    EXPECT_CALL(
-        decoder_callbacks_,
-        encodeHeaders_(testing::AllOf(IsSupersetOfHeaders(response_headers_),
-                                      HeaderHasValueRef(Http::CustomHeaders::get().Age, age)),
-                       false));
-
-    // The filter should encode cached data.
-    EXPECT_CALL(
-        decoder_callbacks_,
-        encodeData(testing::Property(&Buffer::Instance::toString, testing::Eq(body)), true));
-
-    // The filter should stop decoding iteration when decodeHeaders is called as a cache lookup is
-    // in progress.
-    EXPECT_EQ(filter->decodeHeaders(request_headers_, true),
-              Http::FilterHeadersStatus::StopAllIterationAndWatermark);
-
-    // The filter should not continue decoding when the cache lookup result is ready, as the
-    // expected result is a hit.
-    EXPECT_CALL(decoder_callbacks_, continueDecoding).Times(0);
-
-    // The cache lookup callback should be posted to the dispatcher.
-    // Run events on the dispatcher so that the callback is invoked.
-    // The posted lookup callback will cause another callback to be posted (when getBody() is
-    // called) which should also be invoked.
-    pumpDispatcher();
-
-    ::testing::Mock::VerifyAndClearExpectations(&decoder_callbacks_);
-  }
-
-  void waitBeforeSecondRequest() { time_source_.advanceTimeWait(delay_); }
-
-  std::shared_ptr<SimpleHttpCache> simple_cache_ = std::make_shared<SimpleHttpCache>();
   envoy::extensions::filters::http::cache::v3::CacheConfig config_;
   std::shared_ptr<StreamInfo::FilterState> filter_state_ =
       std::make_shared<StreamInfo::FilterStateImpl>(StreamInfo::FilterState::LifeSpan::FilterChain);
@@ -289,1361 +90,544 @@ class CacheFilterTest : public ::testing::Test {
   Event::SimulatedTimeSystem time_source_;
   DateFormatter formatter_{"%a, %d %b %Y %H:%M:%S GMT"};
   Http::TestRequestHeaderMapImpl request_headers_{
-      {":path", "/"}, {":method", "GET"}, {":scheme", "https"}};
+      {":path", "/"}, {"host", "fake_host"}, {":method", "GET"}, {":scheme", "https"}};
   Http::TestResponseHeaderMapImpl response_headers_{{":status", "200"},
                                                     {"cache-control", "public,max-age=3600"}};
+  Http::TestResponseTrailerMapImpl response_trailers_{{"x-test-trailer", "yes"}};
   NiceMock<Http::MockStreamDecoderFilterCallbacks> decoder_callbacks_;
   NiceMock<Http::MockStreamEncoderFilterCallbacks> encoder_callbacks_;
-  std::vector<std::unique_ptr<Http::MockAsyncClientStream>> mock_upstreams_;
-  std::vector<std::reference_wrapper<Http::AsyncClient::StreamCallbacks>> mock_upstreams_callbacks_;
-  std::vector<absl::optional<Http::TestRequestHeaderMapImpl>> mock_upstreams_headers_sent_;
   Api::ApiPtr api_ = Api::createApiForTest();
   Event::DispatcherPtr dispatcher_ = api_->allocateDispatcher("test_thread");
-  const Seconds delay_ = Seconds(10);
-  const std::string age = std::to_string(delay_.count());
+  std::shared_ptr<MockCacheSessions> mock_cache_ = std::make_shared<MockCacheSessions>();
+  std::unique_ptr<MockHttpSource> mock_http_source_ = std::make_unique<MockHttpSource>();
+  MockCacheFilterStats& stats() { return mock_cache_->mock_stats_; }
+  ActiveLookupRequestPtr captured_lookup_request_;
+  ActiveLookupResultCallback captured_lookup_callback_;
+  GetHeadersCallback captured_get_headers_callback_;
+  GetBodyCallback captured_get_body_callback_;
+  GetTrailersCallback captured_get_trailers_callback_;
 };
-
-TEST_F(CacheFilterTest, FilterIsBeingDestroyed) {
-  CacheFilterSharedPtr filter = makeFilter(simple_cache_, false);
-  filter->onDestroy();
-  // decodeHeaders should do nothing... at least make sure it doesn't crash.
-  filter->decodeHeaders(request_headers_, true);
-}
-
-TEST_F(CacheFilterTest, UncacheableRequest) {
-  request_headers_.setHost("UncacheableRequest");
-
-  // POST requests are uncacheable
-  request_headers_.setMethod(Http::Headers::get().MethodValues.Post);
-
-  for (int request = 0; request < 2; request++) {
-    std::cerr << "  request " << request << std::endl;
-    // Create filter for the request
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    // Decode request headers
-    // The filter should not encode any headers or data as no cached response exists.
-    EXPECT_CALL(decoder_callbacks_, encodeHeaders_).Times(0);
-    EXPECT_CALL(decoder_callbacks_, encodeData).Times(0);
-
-    // Uncacheable requests should bypass the cache filter-> No cache lookups should be initiated.
-    EXPECT_EQ(filter->decodeHeaders(request_headers_, true), Http::FilterHeadersStatus::Continue);
-    ::testing::Mock::VerifyAndClearExpectations(&decoder_callbacks_);
-
-    // Encode response header
-    EXPECT_EQ(filter->encodeHeaders(response_headers_, true), Http::FilterHeadersStatus::Continue);
-
-    filter->onStreamComplete();
-    EXPECT_THAT(lookupStatus(), IsOkAndHolds(LookupStatus::RequestNotCacheable));
-    EXPECT_THAT(insertStatus(), IsOkAndHolds(InsertStatus::NoInsertRequestNotCacheable));
-  }
-}
-
-TEST_F(CacheFilterTest, UncacheableResponse) {
-  request_headers_.setHost("UncacheableResponse");
-
-  // Responses with "Cache-Control: no-store" are uncacheable
-  response_headers_.setReferenceKey(Http::CustomHeaders::get().CacheControl, "no-store");
-
-  for (int request = 0; request < 2; request++) {
-    std::cerr << "  request " << request << std::endl;
-    // Create filter for the request.
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    testDecodeRequestMiss(request, filter);
-
-    receiveUpstreamHeaders(request, response_headers_, true);
-
-    pumpDispatcher();
-    filter->onStreamComplete();
-    EXPECT_THAT(lookupStatus(), IsOkAndHolds(LookupStatus::CacheMiss));
-    EXPECT_THAT(insertStatus(), IsOkAndHolds(InsertStatus::NoInsertResponseNotCacheable));
-  }
-}
-
-TEST_F(CacheFilterTest, CacheMiss) {
-  for (int request = 0; request < 2; request++) {
-    std::cerr << "  request " << request << std::endl;
-    // Each iteration a request is sent to a different host, therefore the second one is a miss
-    request_headers_.setHost(absl::StrCat("CacheMiss", request));
-
-    // Create filter for request 1
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    testDecodeRequestMiss(request, filter);
-
-    receiveUpstreamHeaders(request, response_headers_, true);
-
-    pumpDispatcher();
-    filter->onStreamComplete();
-    EXPECT_THAT(lookupStatus(), IsOkAndHolds(LookupStatus::CacheMiss));
-    EXPECT_THAT(insertStatus(), IsOkAndHolds(InsertStatus::InsertSucceeded));
-  }
-}
-
-TEST_F(CacheFilterTest, Disabled) {
-  request_headers_.setHost("CacheDisabled");
-  CacheFilterSharedPtr filter = makeFilter(std::shared_ptr<HttpCache>{});
-  EXPECT_EQ(filter->decodeHeaders(request_headers_, true), Http::FilterHeadersStatus::Continue);
-}
-
-TEST_F(CacheFilterTest, CacheMissWithTrailers) {
-  request_headers_.setHost("CacheMissWithTrailers");
-  const std::string body = "abc";
-  Buffer::OwnedImpl body_buffer(body);
-  Http::TestResponseTrailerMapImpl trailers{{"somekey", "somevalue"}};
-
-  for (int request = 0; request < 2; request++) {
-    std::cerr << "  request " << request << std::endl;
-    // Each iteration a request is sent to a different host, therefore the second one is a miss
-    request_headers_.setHost(absl::StrCat("CacheMissWithTrailers", request));
-
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    testDecodeRequestMiss(request, filter);
-
-    receiveUpstreamHeaders(request, response_headers_, false);
-    receiveUpstreamBody(request, body, false);
-    receiveUpstreamTrailers(request, trailers);
-
-    pumpDispatcher();
-    filter->onStreamComplete();
-    EXPECT_THAT(lookupStatus(), IsOkAndHolds(LookupStatus::CacheMiss));
-    EXPECT_THAT(insertStatus(), IsOkAndHolds(InsertStatus::InsertSucceeded));
-  }
-}
-
-TEST_F(CacheFilterTest, CacheMissWithTrailersWhenCacheRespondsQuickerThanUpstream) {
-  request_headers_.setHost("CacheMissWithTrailers");
-  const std::string body = "abc";
-  Buffer::OwnedImpl body_buffer(body);
-  Http::TestResponseTrailerMapImpl trailers;
-
-  for (int request = 0; request < 2; request++) {
-    std::cerr << "  request " << request << std::endl;
-    // Each iteration a request is sent to a different host, therefore the second one is a miss
-    request_headers_.setHost("CacheMissWithTrailers" + std::to_string(request));
-
-    // Create filter for request 1
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    testDecodeRequestMiss(request, filter);
-    receiveUpstreamHeaders(request, response_headers_, false);
-    pumpDispatcher();
-    receiveUpstreamBody(request, body, false);
-    pumpDispatcher();
-    receiveUpstreamTrailers(request, trailers);
-    pumpDispatcher();
-
-    filter->onStreamComplete();
-    EXPECT_THAT(lookupStatus(), IsOkAndHolds(LookupStatus::CacheMiss));
-    EXPECT_THAT(insertStatus(), IsOkAndHolds(InsertStatus::InsertSucceeded));
-  }
-  // Clear events off the dispatcher.
-  pumpDispatcher();
-}
-
-TEST_F(CacheFilterTest, CacheHitNoBody) {
-  request_headers_.setHost("CacheHitNoBody");
-
-  populateCommonCacheEntry(0, makeFilter(simple_cache_));
-  waitBeforeSecondRequest();
-  {
-    // Create filter for request 2.
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    testDecodeRequestHitNoBody(filter);
-
-    filter->onStreamComplete();
-    EXPECT_THAT(lookupStatus(), IsOkAndHolds(LookupStatus::CacheHit));
-    EXPECT_THAT(insertStatus(), IsOkAndHolds(InsertStatus::NoInsertCacheHit));
-  }
-}
-
-TEST_F(CacheFilterTest, CacheHitWithBody) {
-  request_headers_.setHost("CacheHitWithBody");
-  const std::string body = "abc";
-
-  populateCommonCacheEntry(0, makeFilter(simple_cache_), body);
-  waitBeforeSecondRequest();
-  {
-    // Create filter for request 2
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    testDecodeRequestHitWithBody(filter, body);
-
-    filter->onStreamComplete();
-    EXPECT_THAT(lookupStatus(), IsOkAndHolds(LookupStatus::CacheHit));
-    EXPECT_THAT(insertStatus(), IsOkAndHolds(InsertStatus::NoInsertCacheHit));
-  }
-}
-
-TEST_F(CacheFilterTest, WatermarkEventsAreSentIfCacheBlocksStreamAndLimitExceeded) {
-  request_headers_.setHost("CacheHitWithBody");
-  const std::string body1 = "abcde";
-  const std::string body2 = "fghij";
-  // Set the buffer limit to 2 bytes to ensure we send watermark events.
-  EXPECT_CALL(encoder_callbacks_, encoderBufferLimit()).WillRepeatedly(::testing::Return(2));
-  auto mock_http_cache = std::make_shared<MockHttpCache>();
-  MockLookupContext* mock_lookup_context = mock_http_cache->mockLookupContext();
-  MockInsertContext* mock_insert_context = mock_http_cache->mockInsertContext();
-  EXPECT_CALL(*mock_lookup_context, getHeaders(_)).WillOnce([&](LookupHeadersCallback&& cb) {
-    dispatcher_->post([cb = std::move(cb)]() mutable { std::move(cb)(LookupResult{}, false); });
-  });
-  EXPECT_CALL(*mock_insert_context, insertHeaders(_, _, _, false))
-      .WillOnce([&](const Http::ResponseHeaderMap&, const ResponseMetadata&,
-                    InsertCallback insert_complete, bool) {
-        dispatcher_->post([cb = std::move(insert_complete)]() mutable { std::move(cb)(true); });
-      });
-  InsertCallback captured_insert_body_callback;
-  // The first time insertBody is called, block until the test is ready to call it.
-  // For completion chunk, complete immediately.
-  EXPECT_CALL(*mock_insert_context, insertBody(_, _, false))
-      .WillOnce([&](const Buffer::Instance&, InsertCallback ready_for_next_chunk, bool) {
-        EXPECT_THAT(captured_insert_body_callback, IsNull());
-        captured_insert_body_callback = std::move(ready_for_next_chunk);
-      });
-  EXPECT_CALL(*mock_insert_context, insertBody(_, _, true))
-      .WillOnce([&](const Buffer::Instance&, InsertCallback ready_for_next_chunk, bool) {
-        dispatcher_->post(
-            [cb = std::move(ready_for_next_chunk)]() mutable { std::move(cb)(true); });
-      });
-  {
-    CacheFilterSharedPtr filter = makeFilter(mock_http_cache);
-
-    testDecodeRequestMiss(0, filter);
-
-    // Encode response.
-    response_headers_.setContentLength(body1.size() + body2.size());
-    receiveUpstreamHeaders(0, response_headers_, false);
-    // The insertHeaders callback should be posted to the dispatcher.
-    // Run events on the dispatcher so that the callback is invoked.
-    pumpDispatcher();
-
-    // TODO(ravenblack): once watermarking is available in async upstreams
-    // revisit this test.
-    // EXPECT_CALL(encoder_callbacks_, onEncoderFilterAboveWriteBufferHighWatermark());
-
-    // Write the body in two pieces - the first one should exceed the watermark and
-    // send a high watermark event.
-    receiveUpstreamBody(0, body1, false);
-    receiveUpstreamBody(0, body2, true);
-    ASSERT_THAT(captured_insert_body_callback, NotNull());
-
-    // TODO(ravenblack): once watermarking is available in async upstreams
-    // revisit this test.
-    // When the cache releases, a low watermark event should be sent.
-    // EXPECT_CALL(encoder_callbacks_, onEncoderFilterBelowWriteBufferLowWatermark());
-
-    captured_insert_body_callback(true);
-
-    pumpDispatcher();
-    filter->onStreamComplete();
-    EXPECT_THAT(lookupStatus(), IsOkAndHolds(LookupStatus::CacheMiss));
-    EXPECT_THAT(insertStatus(), IsOkAndHolds(InsertStatus::InsertSucceeded));
-  }
-}
-
-TEST_F(CacheFilterTest, FilterDestroyedWhileWatermarkedSendsLowWatermarkEvent) {
-  request_headers_.setHost("CacheHitWithBody");
-  const std::string body1 = "abcde";
-  const std::string body2 = "fghij";
-  // Set the buffer limit to 2 bytes to ensure we send watermark events.
-  EXPECT_CALL(encoder_callbacks_, encoderBufferLimit()).WillRepeatedly(::testing::Return(2));
-  auto mock_http_cache = std::make_shared<MockHttpCache>();
-  MockLookupContext* mock_lookup_context = mock_http_cache->mockLookupContext();
-  MockInsertContext* mock_insert_context = mock_http_cache->mockInsertContext();
-  EXPECT_CALL(*mock_lookup_context, getHeaders(_)).WillOnce([&](LookupHeadersCallback&& cb) {
-    dispatcher_->post([cb = std::move(cb)]() mutable { std::move(cb)(LookupResult{}, false); });
-  });
-  EXPECT_CALL(*mock_insert_context, insertHeaders(_, _, _, false))
-      .WillOnce([&](const Http::ResponseHeaderMap&, const ResponseMetadata&,
-                    InsertCallback insert_complete, bool) {
-        dispatcher_->post([cb = std::move(insert_complete)]() mutable { std::move(cb)(true); });
-      });
-  InsertCallback captured_insert_body_callback;
-  // The first time insertBody is called, block until the test is ready to call it.
-  // Cache aborts, so there is no second call.
-  EXPECT_CALL(*mock_insert_context, insertBody(_, _, false))
-      .WillOnce([&](const Buffer::Instance&, InsertCallback ready_for_next_chunk, bool) {
-        EXPECT_THAT(captured_insert_body_callback, IsNull());
-        captured_insert_body_callback = std::move(ready_for_next_chunk);
-      });
-  {
-    CacheFilterSharedPtr filter = makeFilter(mock_http_cache, false);
-
-    testDecodeRequestMiss(0, filter);
-
-    // Encode response.
-    response_headers_.setContentLength(body1.size() + body2.size());
-    receiveUpstreamHeaders(0, response_headers_, false);
-    // The insertHeaders callback should be posted to the dispatcher.
-    // Run events on the dispatcher so that the callback is invoked.
-    pumpDispatcher();
-
-    // TODO(ravenblack): enable watermark testing again once the cache filter's
-    // watermark behavior is usable. Currently this is blocked in two ways -
-    // async http streams don't support watermarking so we can't slow it down anyway,
-    // and populating the cache and streaming to the individual client are still
-    // linked, which means slowing it down for the client could also ruin the cache
-    // behavior. I intend to make the request that triggers a cache insert turn into
-    // a cache streamed read operation once the cache insert begins.
-    // EXPECT_CALL(encoder_callbacks_, onEncoderFilterAboveWriteBufferHighWatermark());
-    // Write the body in two pieces - the first one should exceed the watermark and
-    // send a high watermark event.
-    receiveUpstreamBody(0, body1, false);
-    pumpDispatcher();
-    receiveUpstreamBody(0, body2, true);
-    pumpDispatcher();
-    ASSERT_THAT(captured_insert_body_callback, NotNull());
-    // When the filter is destroyed, a low watermark event should be sent.
-    // TODO(ravenblack): enable watermark testing once it works.
-    // EXPECT_CALL(encoder_callbacks_, onEncoderFilterBelowWriteBufferLowWatermark());
-    filter->onDestroy();
-    filter.reset();
-    captured_insert_body_callback(false);
-    pumpDispatcher();
-  }
-}
-
-MATCHER_P2(RangeMatcher, begin, end, "") {
-  return testing::ExplainMatchResult(begin, arg.begin(), result_listener) &&
-         testing::ExplainMatchResult(end, arg.end(), result_listener);
-}
-
-TEST_F(CacheFilterTest, CacheEntryStreamedWithTrailersAndNoContentLengthCanDeliverTrailers) {
-  request_headers_.setHost("CacheEntryStreamedWithTrailers");
-  const std::string body = "abcde";
-  auto mock_http_cache = std::make_shared<MockHttpCache>();
-  MockLookupContext* mock_lookup_context = mock_http_cache->mockLookupContext();
-  // response_headers_ intentionally has no content length, LookupResult also has no content length.
-  EXPECT_CALL(*mock_lookup_context, getHeaders(_)).WillOnce([&](LookupHeadersCallback&& cb) {
-    dispatcher_->post([cb = std::move(cb), this]() mutable {
-      std::move(cb)(
-          LookupResult{CacheEntryStatus::Ok,
-                       std::make_unique<Http::TestResponseHeaderMapImpl>(response_headers_),
-                       absl::nullopt, absl::nullopt},
-          /* end_stream = */ false);
-    });
-  });
-  EXPECT_CALL(*mock_lookup_context, getBody(RangeMatcher(0, Gt(5)), _))
-      .WillOnce([&](AdjustedByteRange, LookupBodyCallback&& cb) {
-        dispatcher_->post([cb = std::move(cb), &body]() mutable {
-          std::move(cb)(std::make_unique<Buffer::OwnedImpl>(body), false);
-        });
-      });
-  EXPECT_CALL(*mock_lookup_context, getBody(RangeMatcher(5, Gt(5)), _))
-      .WillOnce([&](AdjustedByteRange, LookupBodyCallback&& cb) {
-        dispatcher_->post([cb = std::move(cb)]() mutable { std::move(cb)(nullptr, false); });
-      });
-  EXPECT_CALL(*mock_lookup_context, getTrailers(_)).WillOnce([&](LookupTrailersCallback&& cb) {
-    dispatcher_->post([cb = std::move(cb)]() mutable {
-      std::move(cb)(std::make_unique<Http::TestResponseTrailerMapImpl>());
-    });
-  });
-  {
-    CacheFilterSharedPtr filter = makeFilter(mock_http_cache);
-    EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), false));
-    EXPECT_EQ(filter->decodeHeaders(request_headers_, true),
-              Http::FilterHeadersStatus::StopAllIterationAndWatermark);
-    EXPECT_CALL(
-        decoder_callbacks_,
-        encodeData(testing::Property(&Buffer::Instance::toString, testing::Eq("abcde")), false));
-    EXPECT_CALL(decoder_callbacks_, encodeTrailers_(_));
-
-    // The cache lookup callback should be posted to the dispatcher.
-    // Run events on the dispatcher so that the callback is invoked.
-    // The posted lookup callback will cause another callback to be posted (when getBody() is
-    // called) which should also be invoked.
-    pumpDispatcher();
-
-    ::testing::Mock::VerifyAndClearExpectations(&decoder_callbacks_);
-
-    filter->onStreamComplete();
-    EXPECT_THAT(lookupStatus(), IsOkAndHolds(LookupStatus::CacheHit));
-    EXPECT_THAT(insertStatus(), IsOkAndHolds(InsertStatus::NoInsertCacheHit));
-  }
-}
-
-TEST_F(CacheFilterTest, OnDestroyBeforeOnHeadersAbortsAction) {
-  request_headers_.setHost("CacheHitWithBody");
-  auto mock_http_cache = std::make_shared<MockHttpCache>();
-  MockLookupContext* mock_lookup_context = mock_http_cache->mockLookupContext();
-  EXPECT_CALL(*mock_lookup_context, getHeaders(_)).WillOnce([&](LookupHeadersCallback&& cb) {
-    std::unique_ptr<Http::ResponseHeaderMap> response_headers =
-        std::make_unique<Http::TestResponseHeaderMapImpl>(response_headers_);
-    dispatcher_->post([cb = std::move(cb),
-                       response_headers = std::move(response_headers)]() mutable {
-      std::move(cb)(
-          LookupResult{CacheEntryStatus::Ok, std::move(response_headers), 8, absl::nullopt}, false);
-    });
-  });
-  auto filter = makeFilter(mock_http_cache, false);
-  EXPECT_EQ(filter->decodeHeaders(request_headers_, true),
-            Http::FilterHeadersStatus::StopAllIterationAndWatermark);
-  filter->onDestroy();
-  // Nothing extra should happen when the posted lookup completion resolves, because
-  // the filter was destroyed.
-  pumpDispatcher();
-}
-
-TEST_F(CacheFilterTest, OnDestroyBeforeOnBodyAbortsAction) {
-  request_headers_.setHost("CacheHitWithBody");
-  auto mock_http_cache = std::make_shared<MockHttpCache>();
-  MockLookupContext* mock_lookup_context = mock_http_cache->mockLookupContext();
-  EXPECT_CALL(*mock_lookup_context, getHeaders(_)).WillOnce([&](LookupHeadersCallback&& cb) {
-    std::unique_ptr<Http::ResponseHeaderMap> response_headers =
-        std::make_unique<Http::TestResponseHeaderMapImpl>(response_headers_);
-    dispatcher_->post([cb = std::move(cb),
-                       response_headers = std::move(response_headers)]() mutable {
-      std::move(cb)(
-          LookupResult{CacheEntryStatus::Ok, std::move(response_headers), 5, absl::nullopt}, false);
-    });
-  });
-  LookupBodyCallback body_callback;
-  EXPECT_CALL(*mock_lookup_context, getBody(RangeMatcher(0, 5), _))
-      .WillOnce([&](const AdjustedByteRange&, LookupBodyCallback&& cb) {
-        body_callback = std::move(cb);
-      });
-  auto filter = makeFilter(mock_http_cache, false);
-  EXPECT_EQ(filter->decodeHeaders(request_headers_, true),
-            Http::FilterHeadersStatus::StopAllIterationAndWatermark);
-  dispatcher_->run(Event::Dispatcher::RunType::NonBlock);
-  filter->onDestroy();
-  ::testing::Mock::VerifyAndClearExpectations(mock_lookup_context);
-  EXPECT_THAT(body_callback, NotNull());
-  // body_callback should not be called because LookupContext::onDestroy,
-  // correctly implemented, should have aborted it.
-}
-
-TEST_F(CacheFilterTest, OnDestroyBeforeOnTrailersAbortsAction) {
-  request_headers_.setHost("CacheHitWithTrailers");
-  auto mock_http_cache = std::make_shared<MockHttpCache>();
-  MockLookupContext* mock_lookup_context = mock_http_cache->mockLookupContext();
-  EXPECT_CALL(*mock_lookup_context, getHeaders(_)).WillOnce([&](LookupHeadersCallback&& cb) {
-    std::unique_ptr<Http::ResponseHeaderMap> response_headers =
-        std::make_unique<Http::TestResponseHeaderMapImpl>(response_headers_);
-    dispatcher_->post([cb = std::move(cb),
-                       response_headers = std::move(response_headers)]() mutable {
-      std::move(cb)(
-          LookupResult{CacheEntryStatus::Ok, std::move(response_headers), 5, absl::nullopt}, false);
-    });
-  });
-  EXPECT_CALL(*mock_lookup_context, getBody(RangeMatcher(0, 5), _))
-      .WillOnce([&](const AdjustedByteRange&, LookupBodyCallback&& cb) {
-        dispatcher_->post([cb = std::move(cb)]() mutable {
-          std::move(cb)(std::make_unique<Buffer::OwnedImpl>("abcde"), false);
-        });
-      });
-  LookupTrailersCallback trailers_callback;
-  EXPECT_CALL(*mock_lookup_context, getTrailers(_)).WillOnce([&](LookupTrailersCallback&& cb) {
-    trailers_callback = std::move(cb);
-  });
-  auto filter = makeFilter(mock_http_cache, false);
-  EXPECT_EQ(filter->decodeHeaders(request_headers_, true),
-            Http::FilterHeadersStatus::StopAllIterationAndWatermark);
-  dispatcher_->run(Event::Dispatcher::RunType::NonBlock);
-  filter->onDestroy();
-  // onTrailers should do nothing because the filter was destroyed.
-  trailers_callback(std::make_unique<Http::TestResponseTrailerMapImpl>());
-  dispatcher_->run(Event::Dispatcher::RunType::NonBlock);
-}
-
-TEST_F(CacheFilterTest, BodyReadFromCacheLimitedToBufferSizeChunks) {
-  request_headers_.setHost("CacheHitWithBody");
-  // Set the buffer limit to 5 bytes, and we will have the file be of size
-  // 8 bytes.
-  EXPECT_CALL(encoder_callbacks_, encoderBufferLimit()).WillRepeatedly(::testing::Return(5));
-  auto mock_http_cache = std::make_shared<MockHttpCache>();
-  MockLookupContext* mock_lookup_context = mock_http_cache->mockLookupContext();
-  EXPECT_CALL(*mock_lookup_context, getHeaders(_)).WillOnce([&](LookupHeadersCallback&& cb) {
-    std::unique_ptr<Http::ResponseHeaderMap> response_headers =
-        std::make_unique<Http::TestResponseHeaderMapImpl>(response_headers_);
-    dispatcher_->post([cb = std::move(cb),
-                       response_headers = std::move(response_headers)]() mutable {
-      std::move(cb)(
-          LookupResult{CacheEntryStatus::Ok, std::move(response_headers), 8, absl::nullopt}, false);
-    });
-  });
-  EXPECT_CALL(*mock_lookup_context, getBody(RangeMatcher(0, 5), _))
-      .WillOnce([&](const AdjustedByteRange&, LookupBodyCallback&& cb) {
-        dispatcher_->post([cb = std::move(cb)]() mutable {
-          std::move(cb)(std::make_unique<Buffer::OwnedImpl>("abcde"), false);
-        });
-      });
-  EXPECT_CALL(*mock_lookup_context, getBody(RangeMatcher(5, 8), _))
-      .WillOnce([&](const AdjustedByteRange&, LookupBodyCallback&& cb) {
-        dispatcher_->post([cb = std::move(cb)]() mutable {
-          std::move(cb)(std::make_unique<Buffer::OwnedImpl>("fgh"), true);
-        });
-      });
-
-  CacheFilterSharedPtr filter = makeFilter(mock_http_cache, false);
-
-  // The filter should encode cached headers.
-  EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), false));
-
-  // The filter should encode cached data in two pieces.
-  EXPECT_CALL(
-      decoder_callbacks_,
-      encodeData(testing::Property(&Buffer::Instance::toString, testing::Eq("abcde")), false));
-  EXPECT_CALL(decoder_callbacks_,
-              encodeData(testing::Property(&Buffer::Instance::toString, testing::Eq("fgh")), true));
-
-  // The filter should stop decoding iteration when decodeHeaders is called as a cache lookup is
-  // in progress.
-  EXPECT_EQ(filter->decodeHeaders(request_headers_, true),
-            Http::FilterHeadersStatus::StopAllIterationAndWatermark);
-
-  // The filter should not continue decoding when the cache lookup result is ready, as the
-  // expected result is a hit.
-  EXPECT_CALL(decoder_callbacks_, continueDecoding).Times(0);
-
-  // The cache lookup callback should be posted to the dispatcher.
-  // Run events on the dispatcher so that the callback is invoked.
-  // The posted lookup callback will cause another callback to be posted (when getBody() is
-  // called) which should also be invoked.
-  pumpDispatcher();
-
-  filter->onDestroy();
-  filter.reset();
-}
-
-TEST_F(CacheFilterTest, CacheInsertAbortedByCache) {
-  request_headers_.setHost("CacheHitWithBody");
-  const std::string body = "abc";
-  auto mock_http_cache = std::make_shared<MockHttpCache>();
-  MockLookupContext* mock_lookup_context = mock_http_cache->mockLookupContext();
-  MockInsertContext* mock_insert_context = mock_http_cache->mockInsertContext();
-  EXPECT_CALL(*mock_lookup_context, getHeaders(_)).WillOnce([&](LookupHeadersCallback&& cb) {
-    dispatcher_->post([cb = std::move(cb)]() mutable { std::move(cb)(LookupResult{}, false); });
-  });
-  EXPECT_CALL(*mock_insert_context, insertHeaders(_, _, _, false))
-      .WillOnce([&](const Http::ResponseHeaderMap&, const ResponseMetadata&,
-                    InsertCallback insert_complete, bool) {
-        dispatcher_->post([cb = std::move(insert_complete)]() mutable { std::move(cb)(true); });
-      });
-  EXPECT_CALL(*mock_insert_context, insertBody(_, _, false))
-      .WillOnce([&](const Buffer::Instance&, InsertCallback ready_for_next_chunk, bool) {
-        dispatcher_->post(
-            [cb = std::move(ready_for_next_chunk)]() mutable { std::move(cb)(false); });
-      });
-  {
-    // Create filter for request 0.
-    CacheFilterSharedPtr filter = makeFilter(mock_http_cache);
-
-    testDecodeRequestMiss(0, filter);
-
-    // Encode response.
-    response_headers_.setContentLength(body.size());
-    receiveUpstreamHeaders(0, response_headers_, false);
-    receiveUpstreamBody(0, body, false);
-    EXPECT_CALL(*mock_upstreams_[0], reset());
-    pumpDispatcher();
-
-    filter->onStreamComplete();
-    EXPECT_THAT(lookupStatus(), IsOkAndHolds(LookupStatus::CacheMiss));
-    EXPECT_THAT(insertStatus(), IsOkAndHolds(InsertStatus::InsertAbortedByCache));
-  }
-}
-
-TEST_F(CacheFilterTest, FilterDestroyedWhileIncompleteCacheWriteInQueueShouldCompleteWrite) {
-  request_headers_.setHost("CacheHitWithBody");
-  const std::string body = "abc";
-  auto mock_http_cache = std::make_shared<MockHttpCache>();
-  std::weak_ptr<MockHttpCache> weak_cache_pointer = mock_http_cache;
-  MockLookupContext* mock_lookup_context = mock_http_cache->mockLookupContext();
-  MockInsertContext* mock_insert_context = mock_http_cache->mockInsertContext();
-  EXPECT_CALL(*mock_lookup_context, getHeaders(_)).WillOnce([&](LookupHeadersCallback&& cb) {
-    dispatcher_->post([cb = std::move(cb)]() mutable { std::move(cb)(LookupResult{}, false); });
-  });
-  InsertCallback captured_insert_header_callback;
-  EXPECT_CALL(*mock_insert_context, insertHeaders(_, _, _, false))
-      .WillOnce([&](const Http::ResponseHeaderMap&, const ResponseMetadata&,
-                    InsertCallback insert_complete,
-                    bool) { captured_insert_header_callback = std::move(insert_complete); });
-  EXPECT_CALL(*mock_insert_context, insertBody(_, _, false))
-      .WillOnce([this](const Buffer::Instance&, InsertCallback insert_complete, bool) {
-        dispatcher_->post([cb = std::move(insert_complete)]() mutable { cb(true); });
-      });
-  EXPECT_CALL(*mock_insert_context, insertTrailers(_, _))
-      .WillOnce([this](const Http::ResponseTrailerMap&, InsertCallback insert_complete) {
-        dispatcher_->post([cb = std::move(insert_complete)]() mutable { cb(true); });
-      });
-
-  {
-    // Create filter for request 0 and move the local shared_ptr,
-    // transferring ownership to the filter.
-    CacheFilterSharedPtr filter = makeFilter(std::move(mock_http_cache));
-
-    testDecodeRequestMiss(0, filter);
-
-    // Encode header of response.
-    response_headers_.setContentLength(body.size());
-    receiveUpstreamHeaders(0, response_headers_, false);
-    // Destroy the filter prematurely (it goes out of scope).
-  }
-  ASSERT_THAT(captured_insert_header_callback, NotNull());
-  EXPECT_THAT(weak_cache_pointer.lock(), NotNull())
-      << "cache instance was unexpectedly destroyed when filter was destroyed";
-  // The callback should now continue to write the cache entry. Completing the
-  // write allows the UpstreamRequest and CacheInsertQueue to complete and self-destruct.
-  captured_insert_header_callback(true);
-  pumpDispatcher();
-  receiveUpstreamBodyAfterFilterDestroyed(0, body, false);
-  pumpDispatcher();
-  Http::TestResponseTrailerMapImpl trailers{{"somekey", "somevalue"}};
-  receiveUpstreamTrailersAfterFilterDestroyed(0, trailers);
-  pumpDispatcher();
-}
-
-TEST_F(CacheFilterTest, FilterDeletedWhileCompleteCacheWriteInQueueShouldContinueWrite) {
-  request_headers_.setHost("CacheHitWithBody");
-  const std::string body = "abc";
-  auto mock_http_cache = std::make_shared<MockHttpCache>();
-  MockLookupContext* mock_lookup_context = mock_http_cache->mockLookupContext();
-  MockInsertContext* mock_insert_context = mock_http_cache->mockInsertContext();
-  EXPECT_CALL(*mock_lookup_context, getHeaders(_)).WillOnce([&](LookupHeadersCallback&& cb) {
-    dispatcher_->post([cb = std::move(cb)]() mutable { std::move(cb)(LookupResult{}, false); });
-  });
-  InsertCallback captured_insert_header_callback;
-  InsertCallback captured_insert_body_callback;
-  EXPECT_CALL(*mock_insert_context, insertHeaders(_, _, _, false))
-      .WillOnce([&](const Http::ResponseHeaderMap&, const ResponseMetadata&,
-                    InsertCallback insert_complete,
-                    bool) { captured_insert_header_callback = std::move(insert_complete); });
-  EXPECT_CALL(*mock_insert_context, insertBody(_, _, true))
-      .WillOnce([&](const Buffer::Instance&, InsertCallback ready_for_next_chunk, bool) {
-        captured_insert_body_callback = std::move(ready_for_next_chunk);
-      });
-  populateCommonCacheEntry(0, makeFilter(mock_http_cache), body);
-  // Header callback should be captured, body callback should not yet since the
-  // queue has not reached that chunk.
-  ASSERT_THAT(captured_insert_header_callback, NotNull());
-  ASSERT_THAT(captured_insert_body_callback, IsNull());
-  // The callback should be posted to the dispatcher.
-  captured_insert_header_callback(true);
-  // Run events on the dispatcher so that the callback is invoked,
-  // where it should now proceed to write the body chunk, since the
-  // write is still completable.
-  pumpDispatcher();
-  // So the mock should now be writing the body.
-  ASSERT_THAT(captured_insert_body_callback, NotNull());
-  captured_insert_body_callback(true);
-  // The callback should be posted to the dispatcher.
-  // Run events on the dispatcher so that the callback is invoked,
-  // where it should now do nothing due to the filter being destroyed.
-  pumpDispatcher();
-}
-
-TEST_F(CacheFilterTest, SuccessfulValidation) {
-  request_headers_.setHost("SuccessfulValidation");
-  const std::string body = "abc";
-  const std::string etag = "abc123";
-  const std::string last_modified_date = formatter_.now(time_source_);
-
-  // Encode response
-  // Add Etag & Last-Modified headers to the response for validation
-  response_headers_.setReferenceKey(Http::CustomHeaders::get().Etag, etag);
-  response_headers_.setReferenceKey(Http::CustomHeaders::get().LastModified, last_modified_date);
-  response_headers_.setContentLength(body.size());
-  populateCommonCacheEntry(0, makeFilter(simple_cache_), body);
-  waitBeforeSecondRequest();
-  {
-    // Create filter for request 2
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    // Make request require validation
-    request_headers_.setReferenceKey(Http::CustomHeaders::get().CacheControl, "no-cache");
-
-    // Decoding the request should find a cached response that requires validation.
-    // As far as decoding the request is concerned, this is the same as a cache miss with the
-    // exception of injecting validation precondition headers.
-    testDecodeRequestMiss(1, filter);
-
-    // Make sure validation conditional headers are added
-    const Http::TestRequestHeaderMapImpl injected_headers = {
-        {"if-none-match", etag}, {"if-modified-since", last_modified_date}};
-    EXPECT_THAT(mock_upstreams_headers_sent_[1],
-                testing::Optional(IsSupersetOfHeaders(injected_headers)));
-
-    // Encode 304 response
-    // Advance time to make sure the cached date is updated with the 304 date
-    const std::string not_modified_date = formatter_.now(time_source_);
-    Http::TestResponseHeaderMapImpl not_modified_response_headers = {{":status", "304"},
-                                                                     {"date", not_modified_date}};
-
-    // Receiving the 304 response should result in sending the merged headers with
-    // updated date.
-    Http::TestResponseHeaderMapImpl expected_response_headers = response_headers_;
-    expected_response_headers.setDate(not_modified_date);
-
-    // The upstream should be reset on not_modified
-    receiveUpstreamHeadersWithReset(1, not_modified_response_headers, true,
-                                    IsSupersetOfHeaders(expected_response_headers));
-
-    // It should be impossible for onData to be called on the upstream after reset
-    // has been called on it.
-
-    // The filter should add the cached response body to encoded data.
-    EXPECT_CALL(
-        decoder_callbacks_,
-        encodeData(testing::Property(&Buffer::Instance::toString, testing::Eq(body)), true));
-
-    // The cache getBody callback should be posted to the dispatcher.
-    // Run events on the dispatcher so that the callback is invoked.
-    pumpDispatcher();
-
-    ::testing::Mock::VerifyAndClearExpectations(&encoder_callbacks_);
-
-    filter->onStreamComplete();
-    EXPECT_THAT(lookupStatus(), IsOkAndHolds(LookupStatus::StaleHitWithSuccessfulValidation));
-    EXPECT_THAT(insertStatus(), IsOkAndHolds(InsertStatus::HeaderUpdate));
-  }
-}
-
-TEST_F(CacheFilterTest, SuccessfulValidationWithFilterDestroyedDuringContinueEncoding) {
-  request_headers_.setHost("SuccessfulValidation");
-  const std::string body = "abc";
-  const std::string etag = "abc123";
-  const std::string last_modified_date = formatter_.now(time_source_);
-  // Encode response
-  // Add Etag & Last-Modified headers to the response for validation
-  response_headers_.setReferenceKey(Http::CustomHeaders::get().Etag, etag);
-  response_headers_.setReferenceKey(Http::CustomHeaders::get().LastModified, last_modified_date);
-  response_headers_.setContentLength(body.size());
-  populateCommonCacheEntry(0, makeFilter(simple_cache_), body);
-  waitBeforeSecondRequest();
-  {
-    // Create filter for request 2
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    // Make request require validation
-    request_headers_.setReferenceKey(Http::CustomHeaders::get().CacheControl, "no-cache");
-
-    // Decoding the request should find a cached response that requires validation.
-    // As far as decoding the request is concerned, this is the same as a cache miss with the
-    // exception of injecting validation precondition headers.
-    testDecodeRequestMiss(1, filter);
-
-    // Make sure validation conditional headers are added
-    const Http::TestRequestHeaderMapImpl injected_headers = {
-        {"if-none-match", etag}, {"if-modified-since", last_modified_date}};
-    EXPECT_THAT(mock_upstreams_headers_sent_[1],
-                testing::Optional(IsSupersetOfHeaders(injected_headers)));
-
-    // Encode 304 response
-    // Advance time to make sure the cached date is updated with the 304 date
-    const std::string not_modified_date = formatter_.now(time_source_);
-    Http::TestResponseHeaderMapImpl not_modified_response_headers = {{":status", "304"},
-                                                                     {"date", not_modified_date}};
-
-    // Check for the cached response headers with updated date
-    Http::TestResponseHeaderMapImpl expected_response_headers = response_headers_;
-    expected_response_headers.setDate(not_modified_date);
-
-    // The upstream should be reset on not_modified
-    receiveUpstreamHeadersWithReset(1, not_modified_response_headers, true,
-                                    IsSupersetOfHeaders(expected_response_headers));
-
-    // It should be impossible for onBody to be called after reset was called.
-
-    // The filter should add the cached response body to encoded data.
-    EXPECT_CALL(
-        decoder_callbacks_,
-        encodeData(testing::Property(&Buffer::Instance::toString, testing::Eq(body)), true));
-
-    // The cache getBody callback should be posted to the dispatcher.
-    // Run events on the dispatcher so that the callback is invoked.
-    pumpDispatcher();
-
-    ::testing::Mock::VerifyAndClearExpectations(&encoder_callbacks_);
-  }
-}
-
-TEST_F(CacheFilterTest, UnsuccessfulValidation) {
-  request_headers_.setHost("UnsuccessfulValidation");
-  const std::string body = "abc";
-  const std::string etag = "abc123";
-  const std::string last_modified_date = formatter_.now(time_source_);
-  // Encode response
-  // Add Etag & Last-Modified headers to the response for validation.
-  response_headers_.setReferenceKey(Http::CustomHeaders::get().Etag, etag);
-  response_headers_.setReferenceKey(Http::CustomHeaders::get().LastModified, last_modified_date);
-  response_headers_.setContentLength(body.size());
-  populateCommonCacheEntry(0, makeFilter(simple_cache_), body);
-  waitBeforeSecondRequest();
-  {
-    // Create filter for request 1.
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    // Make request require validation
-    request_headers_.setReferenceKey(Http::CustomHeaders::get().CacheControl, "no-cache");
-
-    // Decoding the request should find a cached response that requires validation.
-    // As far as decoding the request is concerned, this is the same as a cache miss with the
-    // exception of injecting validation precondition headers.
-    testDecodeRequestMiss(1, filter);
-
-    // Make sure validation conditional headers are added.
-    const Http::TestRequestHeaderMapImpl injected_headers = {
-        {"if-none-match", etag}, {"if-modified-since", last_modified_date}};
-    EXPECT_THAT(mock_upstreams_headers_sent_[1],
-                testing::Optional(IsSupersetOfHeaders(injected_headers)));
-
-    // Encode new response.
-    // Change the status code to make sure new headers are served, not the cached ones.
-    response_headers_.setStatus(204);
-
-    // The filter should not stop encoding iteration as this is a new response.
-    receiveUpstreamHeaders(1, response_headers_, false);
-    std::string new_body = "";
-    receiveUpstreamBody(1, new_body, true);
-
-    // The response headers should have the new status.
-    EXPECT_THAT(response_headers_, HeaderHasValueRef(Http::Headers::get().Status, "204"));
-
-    // The filter should not encode any data.
-    EXPECT_CALL(encoder_callbacks_, addEncodedData).Times(0);
-
-    // If a cache getBody callback is made, it should be posted to the dispatcher.
-    // Run events on the dispatcher so that any available callbacks are invoked.
-    pumpDispatcher();
-
-    ::testing::Mock::VerifyAndClearExpectations(&encoder_callbacks_);
-
-    filter->onStreamComplete();
-    EXPECT_THAT(lookupStatus(), IsOkAndHolds(LookupStatus::StaleHitWithFailedValidation));
-    EXPECT_THAT(insertStatus(), IsOkAndHolds(InsertStatus::InsertSucceeded));
-  }
-}
-
-TEST_F(CacheFilterTest, SingleSatisfiableRange) {
-  request_headers_.setHost("SingleSatisfiableRange");
-  const std::string body = "abc";
-  response_headers_.setContentLength(body.size());
-  populateCommonCacheEntry(0, makeFilter(simple_cache_), body);
-  waitBeforeSecondRequest();
-  {
-    // Add range info to headers.
-    request_headers_.addReference(Http::Headers::get().Range, "bytes=-2");
-
-    response_headers_.setStatus(static_cast<uint64_t>(Http::Code::PartialContent));
-    response_headers_.addReference(Http::Headers::get().ContentRange, "bytes 1-2/3");
-    response_headers_.setContentLength(2);
-
-    // Create filter for request 2
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    // Decode request 2 header
-    EXPECT_CALL(
-        decoder_callbacks_,
-        encodeHeaders_(testing::AllOf(IsSupersetOfHeaders(response_headers_),
-                                      HeaderHasValueRef(Http::CustomHeaders::get().Age, age)),
-                       false));
-
-    EXPECT_CALL(
-        decoder_callbacks_,
-        encodeData(testing::Property(&Buffer::Instance::toString, testing::Eq("bc")), true));
-    EXPECT_EQ(filter->decodeHeaders(request_headers_, true),
-              Http::FilterHeadersStatus::StopAllIterationAndWatermark);
-
-    // The cache lookup callback should be posted to the dispatcher.
-    // Run events on the dispatcher so that the callback is invoked.
-    // The posted lookup callback will cause another callback to be posted (when getBody() is
-    // called) which should also be invoked.
-    pumpDispatcher();
-
-    ::testing::Mock::VerifyAndClearExpectations(&decoder_callbacks_);
-
-    filter->onStreamComplete();
-    EXPECT_THAT(lookupStatus(), IsOkAndHolds(LookupStatus::CacheHit));
-    EXPECT_THAT(insertStatus(), IsOkAndHolds(InsertStatus::NoInsertCacheHit));
-  }
-}
-
-TEST_F(CacheFilterTest, MultipleSatisfiableRanges) {
-  request_headers_.setHost("MultipleSatisfiableRanges");
-  const std::string body = "abc";
-  response_headers_.setContentLength(body.size());
-  populateCommonCacheEntry(0, makeFilter(simple_cache_), body);
-  waitBeforeSecondRequest();
-  {
-    // Add range info to headers
-    // multi-part responses are not supported, 200 expected
-    request_headers_.addReference(Http::Headers::get().Range, "bytes=0-1,-2");
-
-    // Create filter for request 2
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    // Decode request 2 header
-    EXPECT_CALL(
-        decoder_callbacks_,
-        encodeHeaders_(testing::AllOf(IsSupersetOfHeaders(response_headers_),
-                                      HeaderHasValueRef(Http::CustomHeaders::get().Age, age)),
-                       false));
-
-    EXPECT_CALL(
-        decoder_callbacks_,
-        encodeData(testing::Property(&Buffer::Instance::toString, testing::Eq(body)), true));
-    EXPECT_EQ(filter->decodeHeaders(request_headers_, true),
-              Http::FilterHeadersStatus::StopAllIterationAndWatermark);
-
-    // The cache lookup callback should be posted to the dispatcher.
-    // Run events on the dispatcher so that the callback is invoked.
-    // The posted lookup callback will cause another callback to be posted (when getBody() is
-    // called) which should also be invoked.
-    pumpDispatcher();
-
-    ::testing::Mock::VerifyAndClearExpectations(&decoder_callbacks_);
-
-    filter->onStreamComplete();
-    EXPECT_THAT(lookupStatus(), IsOkAndHolds(LookupStatus::CacheHit));
-  }
-}
-
-TEST_F(CacheFilterTest, NotSatisfiableRange) {
-  request_headers_.setHost("NotSatisfiableRange");
-  const std::string body = "abc";
-  response_headers_.setContentLength(body.size());
-  populateCommonCacheEntry(0, makeFilter(simple_cache_), body);
-  waitBeforeSecondRequest();
-  {
-    // Add range info to headers
-    request_headers_.addReference(Http::Headers::get().Range, "bytes=123-");
-
-    response_headers_.setStatus(static_cast<uint64_t>(Http::Code::RangeNotSatisfiable));
-    response_headers_.addReference(Http::Headers::get().ContentRange, "bytes */3");
-    response_headers_.setContentLength(0);
-
-    // Create filter for request 2
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    // Decode request 2 header
-    EXPECT_CALL(
-        decoder_callbacks_,
-        encodeHeaders_(testing::AllOf(IsSupersetOfHeaders(response_headers_),
-                                      HeaderHasValueRef(Http::CustomHeaders::get().Age, age)),
-                       true));
-
-    // 416 response should not have a body, so we don't expect a call to encodeData
-    EXPECT_CALL(decoder_callbacks_,
-                encodeData(testing::Property(&Buffer::Instance::toString, testing::Eq(body)), true))
-        .Times(0);
-
-    EXPECT_EQ(filter->decodeHeaders(request_headers_, true),
-              Http::FilterHeadersStatus::StopAllIterationAndWatermark);
-
-    // The cache lookup callback should be posted to the dispatcher.
-    // Run events on the dispatcher so that the callback is invoked.
-    // The posted lookup callback will cause another callback to be posted (when getBody() is
-    // called) which should also be invoked.
-    pumpDispatcher();
-
-    // This counts as a cache hit: we served an HTTP error, but we
-    // correctly got that info from the cache instead of upstream.
-    ::testing::Mock::VerifyAndClearExpectations(&decoder_callbacks_);
-    filter->onStreamComplete();
-    EXPECT_THAT(lookupStatus(), IsOkAndHolds(LookupStatus::CacheHit));
-  }
-}
-
-// Send two identical GET requests with bodies. The CacheFilter will just pass everything through.
-TEST_F(CacheFilterTest, GetRequestWithBodyAndTrailers) {
-  request_headers_.setHost("GetRequestWithBodyAndTrailers");
-  const std::string body = "abc";
-  Buffer::OwnedImpl request_buffer(body);
-  Http::TestRequestTrailerMapImpl request_trailers;
-
-  for (int i = 0; i < 2; ++i) {
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    EXPECT_EQ(filter->decodeHeaders(request_headers_, false), Http::FilterHeadersStatus::Continue);
-    EXPECT_EQ(filter->decodeData(request_buffer, false), Http::FilterDataStatus::Continue);
-    EXPECT_EQ(filter->decodeTrailers(request_trailers), Http::FilterTrailersStatus::Continue);
-
-    EXPECT_EQ(filter->encodeHeaders(response_headers_, true), Http::FilterHeadersStatus::Continue);
-    filter->onStreamComplete();
-    EXPECT_THAT(lookupStatus(), IsOkAndHolds(LookupStatus::RequestNotCacheable));
-  }
-}
-
-// Checks the case where a cache lookup callback is posted to the dispatcher, then the CacheFilter
-// was deleted (e.g. connection dropped with the client) before the posted callback was executed. In
-// this case the CacheFilter should not be accessed after it was deleted, which is ensured by using
-// a weak_ptr to the CacheFilter in the posted callback.
-// This test may mistakenly pass (false positive) even if the CacheFilter is accessed after
-// being deleted, as filter_state_ may be accessed and read as "FilterState::Destroyed" which will
-// result in a correct behavior. However, running the test with ASAN sanitizer enabled should
-// reliably fail if the CacheFilter is accessed after being deleted.
-TEST_F(CacheFilterTest, FilterDeletedBeforePostedCallbackExecuted) {
-  request_headers_.setHost("FilterDeletedBeforePostedCallbackExecuted");
-  populateCommonCacheEntry(0, makeFilter(simple_cache_));
-  {
-    // Create filter for request 1.
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    // Call decode headers to start the cache lookup, which should immediately post the callback to
-    // the dispatcher.
-    EXPECT_EQ(filter->decodeHeaders(request_headers_, true),
-              Http::FilterHeadersStatus::StopAllIterationAndWatermark);
-
-    // Destroy the filter
-    filter->onStreamComplete();
-    EXPECT_THAT(lookupStatus(), IsOkAndHolds(LookupStatus::RequestIncomplete));
-  }
-
-  // Make sure that onHeaders was not called by making sure no decoder callbacks were made.
-  EXPECT_CALL(decoder_callbacks_, continueDecoding).Times(0);
-  EXPECT_CALL(decoder_callbacks_, encodeHeaders_).Times(0);
-
-  // Run events on the dispatcher so that the callback is invoked after the filter deletion.
-  pumpDispatcher();
-
-  ::testing::Mock::VerifyAndClearExpectations(&decoder_callbacks_);
-}
-
-TEST_F(CacheFilterTest, LocalReplyDuringLookup) {
-  request_headers_.setHost("LocalReplyDuringLookup");
-  populateCommonCacheEntry(0, makeFilter(simple_cache_));
-  {
-    // Create filter for request 1.
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    // Call decode headers to start the cache lookup, which should immediately post the callback to
-    // the dispatcher.
-    EXPECT_EQ(filter->decodeHeaders(request_headers_, true),
-              Http::FilterHeadersStatus::StopAllIterationAndWatermark);
-
-    // While the lookup callback is still on the dispatcher, simulate a local reply.
-    Envoy::Http::TestResponseHeaderMapImpl local_response_headers{{":status", "503"}};
-    EXPECT_EQ(filter->encodeHeaders(local_response_headers, true),
-              Envoy::Http::FilterHeadersStatus::Continue);
-
-    // Make sure that the filter doesn't try to encode the cached response after processing the
-    // local reply.
-    EXPECT_CALL(decoder_callbacks_, continueDecoding).Times(0);
-    EXPECT_CALL(decoder_callbacks_, encodeHeaders_).Times(0);
-
-    // Run events on the dispatcher so that the lookup callback is invoked after the local reply.
-    pumpDispatcher();
-
-    filter->onStreamComplete();
-    EXPECT_THAT(lookupStatus(), IsOkAndHolds(LookupStatus::RequestIncomplete));
-  }
-}
-
-// Mark tests with EXPECT_ENVOY_BUG as death tests:
-// https://google.github.io/googletest/advanced.html#death-test-naming
-using CacheFilterDeathTest = CacheFilterTest;
-
-TEST_F(CacheFilterDeathTest, BadRangeRequestLookup) {
-  request_headers_.setHost("BadRangeRequestLookup");
-  auto mock_http_cache = std::make_shared<MockHttpCache>();
-  MockLookupContext* mock_lookup_context = mock_http_cache->mockLookupContext();
-  EXPECT_CALL(*mock_lookup_context, getHeaders(_)).WillOnce([&](LookupHeadersCallback&& cb) {
-    dispatcher_->post([cb = std::move(cb), this]() mutable {
-      // LookupResult with unknown length and an unsatisfiable RangeDetails is invalid.
-      std::move(cb)(
-          LookupResult{CacheEntryStatus::Ok,
-                       std::make_unique<Http::TestResponseHeaderMapImpl>(response_headers_),
-                       absl::nullopt,
-                       RangeDetails{/*satisfiable_ = */ false, {AdjustedByteRange{0, 5}}}},
-          false);
-    });
-  });
-  {
-    CacheFilterSharedPtr filter = makeFilter(mock_http_cache);
-    // encodeHeaders can be called when ENVOY_BUG doesn't exit.
-    response_headers_ = {{":status", "416"}};
-    EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), true))
-        .Times(testing::AnyNumber());
-    request_headers_.addReference(Http::Headers::get().Range, "bytes=-5");
-    EXPECT_EQ(filter->decodeHeaders(request_headers_, true),
-              Http::FilterHeadersStatus::StopAllIterationAndWatermark);
-
-    // The cache lookup callback should be posted to the dispatcher.
-    // Run events on the dispatcher so that the callback is invoked.
-    EXPECT_ENVOY_BUG(
-        pumpDispatcher(),
-        "handleCacheHitWithRangeRequest() should not be called with satisfiable_=false");
-  }
-}
-
-TEST_F(CacheFilterTest, RangeRequestSatisfiedBeforeLengthKnown) {
-  request_headers_.setHost("RangeRequestSatisfiedBeforeLengthKnown");
-  std::string body = "abcde";
-  auto mock_http_cache = std::make_shared<MockHttpCache>();
-  MockLookupContext* mock_lookup_context = mock_http_cache->mockLookupContext();
-  EXPECT_CALL(*mock_lookup_context, getHeaders(_)).WillOnce([&](LookupHeadersCallback&& cb) {
-    // LookupResult with unknown length and an unsatisfiable RangeDetails is invalid.
-    dispatcher_->post([cb = std::move(cb), this]() mutable {
-      std::move(cb)(
-          LookupResult{CacheEntryStatus::Ok,
-                       std::make_unique<Http::TestResponseHeaderMapImpl>(response_headers_),
-                       absl::nullopt,
-                       RangeDetails{/*satisfiable_ = */ true, {AdjustedByteRange{0, 5}}}},
-          false);
-    });
-  });
-  EXPECT_CALL(*mock_lookup_context, getBody(RangeMatcher(0, 5), _))
-      .WillOnce([&](AdjustedByteRange, LookupBodyCallback&& cb) {
-        dispatcher_->post([cb = std::move(cb), &body]() mutable {
-          cb(std::make_unique<Buffer::OwnedImpl>(body), false);
-        });
-      });
-  {
-    CacheFilterSharedPtr filter = makeFilter(mock_http_cache);
-    response_headers_ = {{":status", "206"}, {"content-range", "bytes 0-4/*"}};
-    EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), false))
-        .Times(testing::AnyNumber());
-    request_headers_.addReference(Http::Headers::get().Range, "bytes=-5");
-    EXPECT_EQ(filter->decodeHeaders(request_headers_, true),
-              Http::FilterHeadersStatus::StopAllIterationAndWatermark);
-
-    // The cache lookup callback should be posted to the dispatcher.
-    // Run events on the dispatcher so that the callback is invoked.
-    pumpDispatcher();
-  }
-}
-
-TEST_F(CacheFilterDeathTest, StreamTimeoutDuringLookup) {
-  request_headers_.setHost("StreamTimeoutDuringLookup");
-  populateCommonCacheEntry(0, makeFilter(simple_cache_));
-  Envoy::Http::TestResponseHeaderMapImpl local_response_headers{{":status", "408"}};
-  EXPECT_ENVOY_BUG(
-      {
-        // Create filter for request 2.
-        CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-        // Call decode headers to start the cache lookup, which should immediately post the
-        // callback to the dispatcher.
-        EXPECT_EQ(filter->decodeHeaders(request_headers_, true),
-                  Http::FilterHeadersStatus::StopAllIterationAndWatermark);
-
-        // Make sure that the filter doesn't try to encode the cached response after processing
-        // the local reply.
-        EXPECT_CALL(decoder_callbacks_, continueDecoding).Times(0);
-        EXPECT_CALL(decoder_callbacks_, encodeHeaders_).Times(0);
-
-        // While the lookup callback is still on the dispatcher, simulate an idle timeout.
-        EXPECT_EQ(filter->encodeHeaders(local_response_headers, true),
-                  Envoy::Http::FilterHeadersStatus::Continue);
-        // As a death test when ENVOY_BUG crashes, as in debug builds, this will exit here,
-        // so we must not perform any required cleanup operations below this point in the block.
-        // When ENVOY_BUG does not crash, we can still validate additional things.
-        pumpDispatcher();
-
-        filter->onStreamComplete();
-        EXPECT_THAT(lookupStatus(), IsOkAndHolds(LookupStatus::RequestIncomplete));
-      },
-      "Request timed out while cache lookup was outstanding.");
-
-  // Clear out captured lookup lambdas from the dispatcher.
-  pumpDispatcher();
-}
-
-TEST(LookupStatusDeathTest, ResolveLookupStatusRequireValidationAndInitialIsBug) {
-  EXPECT_ENVOY_BUG(
-      CacheFilter::resolveLookupStatus(CacheEntryStatus::RequiresValidation, FilterState::Initial),
-      "Unexpected filter state in requestCacheStatus");
-}
-
-TEST(LookupStatusDeathTest, ResolveLookupStatusRequireValidationAndDestroyedIsBug) {
-  EXPECT_ENVOY_BUG(CacheFilter::resolveLookupStatus(CacheEntryStatus::RequiresValidation,
-                                                    FilterState::Destroyed),
-                   "Unexpected filter state in requestCacheStatus");
-}
-
-TEST(LookupStatusTest, ResolveLookupStatusReturnsCorrectStatuses) {
-  EXPECT_EQ(CacheFilter::resolveLookupStatus(absl::nullopt, FilterState::Initial),
-            LookupStatus::RequestIncomplete);
-  EXPECT_EQ(CacheFilter::resolveLookupStatus(absl::nullopt, FilterState::NotServingFromCache),
-            LookupStatus::RequestNotCacheable);
-  EXPECT_EQ(CacheFilter::resolveLookupStatus(absl::nullopt, FilterState::ValidatingCachedResponse),
-            LookupStatus::Unknown);
-  EXPECT_EQ(CacheFilter::resolveLookupStatus(absl::nullopt, FilterState::ValidatingCachedResponse),
-            LookupStatus::Unknown);
-  EXPECT_EQ(CacheFilter::resolveLookupStatus(absl::nullopt, FilterState::ServingFromCache),
-            LookupStatus::Unknown);
-  EXPECT_EQ(CacheFilter::resolveLookupStatus(absl::nullopt, FilterState::Destroyed),
-            LookupStatus::Unknown);
-  EXPECT_EQ(CacheFilter::resolveLookupStatus(CacheEntryStatus::RequiresValidation,
-                                             FilterState::ValidatingCachedResponse),
-            LookupStatus::RequestIncomplete);
-  EXPECT_EQ(CacheFilter::resolveLookupStatus(CacheEntryStatus::RequiresValidation,
-                                             FilterState::ServingFromCache),
-            LookupStatus::StaleHitWithSuccessfulValidation);
-  EXPECT_EQ(CacheFilter::resolveLookupStatus(CacheEntryStatus::RequiresValidation,
-                                             FilterState::ResponseServedFromCache),
-            LookupStatus::StaleHitWithSuccessfulValidation);
-  EXPECT_EQ(CacheFilter::resolveLookupStatus(CacheEntryStatus::RequiresValidation,
-                                             FilterState::NotServingFromCache),
-            LookupStatus::StaleHitWithFailedValidation);
-  EXPECT_EQ(
-      CacheFilter::resolveLookupStatus(CacheEntryStatus::FoundNotModified, FilterState::Destroyed),
-      LookupStatus::CacheHit);
-  EXPECT_EQ(CacheFilter::resolveLookupStatus(CacheEntryStatus::LookupError, FilterState::Destroyed),
-            LookupStatus::LookupError);
-}
-
-// A new type alias for a different type of tests that use the exact same class
-using ValidationHeadersTest = CacheFilterTest;
-
-TEST_F(ValidationHeadersTest, EtagAndLastModified) {
-  request_headers_.setHost("EtagAndLastModified");
-  const std::string etag = "abc123";
-  response_headers_.setReferenceKey(Http::CustomHeaders::get().Etag, etag);
-  response_headers_.setReferenceKey(Http::CustomHeaders::get().LastModified,
-                                    formatter_.now(time_source_));
-  populateCommonCacheEntry(0, makeFilter(simple_cache_));
-  {
-    // Make request 1 to test for added conditional headers
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    // Make sure the request requires validation
-    request_headers_.setReferenceKey(Http::CustomHeaders::get().CacheControl, "no-cache");
-    testDecodeRequestMiss(1, filter);
-
-    // Make sure validation conditional headers are added
-    const Http::TestRequestHeaderMapImpl injected_headers = {
-        {"if-none-match", "abc123"}, {"if-modified-since", formatter_.now(time_source_)}};
-    EXPECT_THAT(mock_upstreams_headers_sent_[1],
-                testing::Optional(IsSupersetOfHeaders(injected_headers)));
-  }
-}
-
-TEST_F(ValidationHeadersTest, EtagOnly) {
-  request_headers_.setHost("EtagOnly");
-  const std::string etag = "abc123";
-  response_headers_.setReferenceKey(Http::CustomHeaders::get().Etag, etag);
-  populateCommonCacheEntry(0, makeFilter(simple_cache_));
-  {
-    // Make request 1 to test for added conditional headers
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    // Make sure the request requires validation
-    request_headers_.setReferenceKey(Http::CustomHeaders::get().CacheControl, "no-cache");
-    testDecodeRequestMiss(1, filter);
-
-    // Make sure validation conditional headers are added
-    // If-Modified-Since falls back to date
-    const Http::TestRequestHeaderMapImpl injected_headers = {
-        {"if-none-match", "abc123"}, {"if-modified-since", formatter_.now(time_source_)}};
-    EXPECT_THAT(mock_upstreams_headers_sent_[1],
-                testing::Optional(IsSupersetOfHeaders(injected_headers)));
-  }
-}
-
-TEST_F(ValidationHeadersTest, LastModifiedOnly) {
-  request_headers_.setHost("LastModifiedOnly");
-  response_headers_.setReferenceKey(Http::CustomHeaders::get().LastModified,
-                                    formatter_.now(time_source_));
-  populateCommonCacheEntry(0, makeFilter(simple_cache_));
-  {
-    // Make request 2 to test for added conditional headers
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    // Make sure the request requires validation
-    request_headers_.setReferenceKey(Http::CustomHeaders::get().CacheControl, "no-cache");
-    testDecodeRequestMiss(1, filter);
-
-    // Make sure validation conditional headers are added
-    const Http::TestRequestHeaderMapImpl injected_headers = {
-        {"if-modified-since", formatter_.now(time_source_)}};
-    EXPECT_THAT(mock_upstreams_headers_sent_[1],
-                testing::Optional(IsSupersetOfHeaders(injected_headers)));
-  }
-}
-
-TEST_F(ValidationHeadersTest, NoEtagOrLastModified) {
-  request_headers_.setHost("NoEtagOrLastModified");
-  populateCommonCacheEntry(0, makeFilter(simple_cache_));
-  {
-    // Make request 2 to test for added conditional headers
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    // Make sure the request requires validation
-    request_headers_.setReferenceKey(Http::CustomHeaders::get().CacheControl, "no-cache");
-    testDecodeRequestMiss(1, filter);
-
-    // Make sure validation conditional headers are added
-    // If-Modified-Since falls back to date
-    const Http::TestRequestHeaderMapImpl injected_headers = {
-        {"if-modified-since", formatter_.now(time_source_)}};
-    EXPECT_THAT(mock_upstreams_headers_sent_[1],
-                testing::Optional(IsSupersetOfHeaders(injected_headers)));
-  }
-}
-
-TEST_F(ValidationHeadersTest, InvalidLastModified) {
-  request_headers_.setHost("InvalidLastModified");
-  response_headers_.setReferenceKey(Http::CustomHeaders::get().LastModified, "invalid-date");
-  populateCommonCacheEntry(0, makeFilter(simple_cache_));
-  {
-    // Make request 1 to test for added conditional headers
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-
-    // Make sure the request requires validation
-    request_headers_.setReferenceKey(Http::CustomHeaders::get().CacheControl, "no-cache");
-    testDecodeRequestMiss(1, filter);
-
-    // Make sure validation conditional headers are added
-    // If-Modified-Since falls back to date
-    const Http::TestRequestHeaderMapImpl injected_headers = {
-        {"if-modified-since", formatter_.now(time_source_)}};
-    EXPECT_THAT(mock_upstreams_headers_sent_[1],
-                testing::Optional(IsSupersetOfHeaders(injected_headers)));
-  }
+class CacheFilterDeathTest : public CacheFilterTest {};
+
+MATCHER_P(RangeStartsWith, v, "") {
+  return ::testing::ExplainMatchResult(::testing::Property("begin", &AdjustedByteRange::begin, v),
+                                       arg, result_listener);
+}
+
+MATCHER_P2(IsRange, start, end, "") {
+  return ::testing::ExplainMatchResult(
+      ::testing::AllOf(::testing::Property("begin", &AdjustedByteRange::begin, start),
+                       ::testing::Property("end", &AdjustedByteRange::end, end)),
+      arg, result_listener);
+}
+
+TEST_F(CacheFilterTest, PassThroughIfCacheDisabled) {
+  auto filter = makeFilter(nullptr);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::Continue));
+  EXPECT_THAT(filter->encodeHeaders(response_headers_, true),
+              Eq(Http::FilterHeadersStatus::Continue));
+  // Details should not have been set by cache filter.
+  EXPECT_THAT(decoder_callbacks_.details(), Eq(""));
+}
+
+TEST_F(CacheFilterTest, PassThroughIfRequestHasBody) {
+  auto filter = makeFilter(mock_cache_);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Uncacheable));
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, false),
+              Eq(Http::FilterHeadersStatus::Continue));
+  Buffer::OwnedImpl body("a");
+  EXPECT_THAT(filter->decodeData(body, true), Eq(Http::FilterDataStatus::Continue));
+  EXPECT_THAT(filter->encodeHeaders(response_headers_, true),
+              Eq(Http::FilterHeadersStatus::Continue));
+  // Details should not have been set by cache filter.
+  EXPECT_THAT(decoder_callbacks_.details(), Eq(""));
+}
+
+TEST_F(CacheFilterTest, PassThroughIfCacheabilityIsNo) {
+  auto filter = makeFilter(mock_cache_);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Uncacheable));
+  request_headers_.addCopy(Http::CustomHeaders::get().IfNoneMatch, "1");
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::Continue));
+  EXPECT_THAT(filter->encodeHeaders(response_headers_, true),
+              Eq(Http::FilterHeadersStatus::Continue));
+  // Details should not have been set by cache filter.
+  EXPECT_THAT(decoder_callbacks_.details(), Eq(""));
 }
 
 TEST_F(CacheFilterTest, NoRouteShouldLocalReply) {
-  request_headers_.setHost("NoRoute");
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_);
   EXPECT_CALL(decoder_callbacks_, route()).WillOnce(Return(nullptr));
-  {
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-    // The filter should stop decoding iteration when decodeHeaders is called as a cache lookup is
-    // in progress.
-    EXPECT_EQ(filter->decodeHeaders(request_headers_, true),
-              Http::FilterHeadersStatus::StopAllIterationAndWatermark);
-    EXPECT_CALL(decoder_callbacks_,
-                sendLocalReply(Http::Code::NotFound, _, _, _, "cache_no_route"));
-    // The cache lookup callback should be posted to the dispatcher.
-    // Run events on the dispatcher so that the callback is invoked.
-    pumpDispatcher();
-  }
+  EXPECT_CALL(decoder_callbacks_, sendLocalReply(Http::Code::NotFound, _, _, _, "cache_no_route"));
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_THAT(decoder_callbacks_.details(), Eq("cache_no_route"));
 }
 
 TEST_F(CacheFilterTest, NoClusterShouldLocalReply) {
-  request_headers_.setHost("NoCluster");
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_);
   EXPECT_CALL(context_.server_factory_context_.cluster_manager_, getThreadLocalCluster(_))
       .WillOnce(Return(nullptr));
-  {
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-    // The filter should stop decoding iteration when decodeHeaders is called as a cache lookup is
-    // in progress.
-    EXPECT_EQ(filter->decodeHeaders(request_headers_, true),
-              Http::FilterHeadersStatus::StopAllIterationAndWatermark);
-    EXPECT_CALL(decoder_callbacks_,
-                sendLocalReply(Http::Code::ServiceUnavailable, _, _, _, "cache_no_cluster"));
-    // The cache lookup callback should be posted to the dispatcher.
-    // Run events on the dispatcher so that the callback is invoked.
-    pumpDispatcher();
-  }
-}
-
-TEST_F(CacheFilterTest, UpstreamResetMidResponseShouldLocalReply) {
-  request_headers_.setHost("UpstreamResetMidResponse");
-  {
-    CacheFilterSharedPtr filter = makeFilter(simple_cache_);
-    testDecodeRequestMiss(0, filter);
-    receiveUpstreamHeaders(0, response_headers_, false);
-    pumpDispatcher();
-    EXPECT_CALL(decoder_callbacks_,
-                sendLocalReply(Http::Code::ServiceUnavailable, _, _, _, "cache_upstream_reset"));
-    mock_upstreams_callbacks_[0].get().onReset();
-    pumpDispatcher();
-  }
+  EXPECT_CALL(decoder_callbacks_,
+              sendLocalReply(Http::Code::ServiceUnavailable, _, _, _, "cache_no_cluster"));
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_THAT(decoder_callbacks_.details(), Eq("cache_no_cluster"));
+}
+
+TEST_F(CacheFilterTest, OverriddenClusterShouldTryThatCluster) {
+  config_.set_override_upstream_cluster("overridden_cluster");
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_);
+  // Validate that the specified cluster was *tried*; letting it not exist
+  // to keep the test simple.
+  EXPECT_CALL(context_.server_factory_context_.cluster_manager_,
+              getThreadLocalCluster("overridden_cluster"))
+      .WillOnce(Return(nullptr));
+  EXPECT_CALL(decoder_callbacks_,
+              sendLocalReply(Http::Code::ServiceUnavailable, _, _, _, "cache_no_cluster"));
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_THAT(decoder_callbacks_.details(), Eq("cache_no_cluster"));
+}
+
+TEST_F(CacheFilterDeathTest, TimeoutBeforeLookupCompletesImpliesABug) {
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_, /* auto_destroy = */ false);
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  ASSERT_THAT(captured_lookup_callback_, NotNull());
+  // Validate some request fields; this can be omitted for other tests since
+  // everything should be the same.
+  EXPECT_THAT(captured_lookup_request_->key().host(), Eq("fake_host"));
+  EXPECT_THAT(captured_lookup_request_->requestHeaders(), IsSupersetOfHeaders(request_headers_));
+  EXPECT_THAT(&captured_lookup_request_->dispatcher(), Eq(dispatcher_.get()));
+
+  response_headers_.setStatus(absl::StrCat(Envoy::enumToInt(Http::Code::RequestTimeout)));
+  EXPECT_ENVOY_BUG(filter->encodeHeaders(response_headers_, true),
+                   "Request timed out while cache lookup was outstanding.");
+}
+
+TEST_F(CacheFilterTest, EncodeHeadersBeforeLookupCompletesAbortsTheLookupCallback) {
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_);
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  ASSERT_THAT(captured_lookup_callback_, NotNull());
+  EXPECT_THAT(filter->encodeHeaders(response_headers_, true),
+              Eq(Http::FilterHeadersStatus::Continue));
+  // A null lookup result is disallowed; encodeHeaders being called before it
+  // completes should have cancelled the callback, so calling it now with invalid
+  // data proves the cancellation has taken effect.
+  captured_lookup_callback_(nullptr);
+  // Since filter was aborted it should not have set response code details.
+  EXPECT_THAT(decoder_callbacks_.details(), Eq(""));
+}
+
+TEST_F(CacheFilterTest, FilterDestroyedBeforeLookupCompletesAbortsTheLookupCallback) {
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_);
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  ASSERT_THAT(captured_lookup_callback_, NotNull());
+  filter.reset();
+  // Callback with nullptr would be invalid *and* would be operating on a
+  // now-defunct filter pointer - so calling it proves it was cancelled.
+  captured_lookup_callback_(nullptr);
+}
+
+TEST_F(CacheFilterTest, ResetDuringLookupResetsDownstream) {
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_);
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_CALL(decoder_callbacks_, resetStream);
+  captured_lookup_callback_(std::make_unique<ActiveLookupResult>(
+      ActiveLookupResult{nullptr, CacheEntryStatus::LookupError}));
+  EXPECT_THAT(decoder_callbacks_.details(), Eq("cache.aborted_lookup"));
+}
+
+TEST_F(CacheFilterTest, ResetDuringGetHeadersResetsDownstream) {
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Miss));
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_CALL(*mock_http_source_, getHeaders);
+  captured_lookup_callback_(std::make_unique<ActiveLookupResult>(
+      ActiveLookupResult{std::move(mock_http_source_), CacheEntryStatus::Miss}));
+  EXPECT_CALL(decoder_callbacks_, resetStream);
+  captured_get_headers_callback_(nullptr, EndStream::Reset);
+  EXPECT_THAT(decoder_callbacks_.details(), Eq("cache.aborted_headers"));
+}
+
+TEST_F(CacheFilterTest, GetHeadersWithHeadersOnlyResponseCompletes) {
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Miss));
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_CALL(*mock_http_source_, getHeaders);
+  captured_lookup_callback_(std::make_unique<ActiveLookupResult>(
+      ActiveLookupResult{std::move(mock_http_source_), CacheEntryStatus::Miss}));
+  EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), true));
+  captured_get_headers_callback_(createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_),
+                                 EndStream::End);
+  EXPECT_THAT(decoder_callbacks_.details(), Eq("cache.insert_via_upstream"));
+}
+
+TEST_F(CacheFilterTest, PartialContentCodeWithNoContentRangeGivesFullContent) {
+  response_headers_.setStatus(std::to_string(enumToInt(Http::Code::PartialContent)));
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Miss));
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_CALL(*mock_http_source_, getHeaders);
+  EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), false));
+  EXPECT_CALL(*mock_http_source_, getBody(IsRange(0, Gt(500)), _));
+  captured_lookup_callback_(std::make_unique<ActiveLookupResult>(
+      ActiveLookupResult{std::move(mock_http_source_), CacheEntryStatus::Miss}));
+  captured_get_headers_callback_(createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_),
+                                 EndStream::More);
+}
+
+TEST_F(CacheFilterTest, PartialContentCodeWithInvalidContentRangeGivesFullContent) {
+  response_headers_.setStatus(std::to_string(enumToInt(Http::Code::PartialContent)));
+  response_headers_.addCopy("content-range", "invalid-value");
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Miss));
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_CALL(*mock_http_source_, getHeaders);
+  EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), false));
+  EXPECT_CALL(*mock_http_source_, getBody(IsRange(0, Gt(500)), _));
+  captured_lookup_callback_(std::make_unique<ActiveLookupResult>(
+      ActiveLookupResult{std::move(mock_http_source_), CacheEntryStatus::Miss}));
+  captured_get_headers_callback_(createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_),
+                                 EndStream::More);
+}
+
+TEST_F(CacheFilterTest, PartialContentCodeWithInvalidContentRangeNumberGivesFullContent) {
+  response_headers_.setStatus(std::to_string(enumToInt(Http::Code::PartialContent)));
+  response_headers_.addCopy("content-range", "bytes */invalid");
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Miss));
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_CALL(*mock_http_source_, getHeaders);
+  EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), false));
+  EXPECT_CALL(*mock_http_source_, getBody(IsRange(0, Gt(500)), _));
+  captured_lookup_callback_(std::make_unique<ActiveLookupResult>(
+      ActiveLookupResult{std::move(mock_http_source_), CacheEntryStatus::Miss}));
+  captured_get_headers_callback_(createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_),
+                                 EndStream::More);
+}
+
+TEST_F(CacheFilterTest, PartialContentCodeWithWildContentRangeUsesSize) {
+  response_headers_.setStatus(std::to_string(enumToInt(Http::Code::PartialContent)));
+  response_headers_.addCopy("content-range", "bytes */100");
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Miss));
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_CALL(*mock_http_source_, getHeaders);
+  EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), false));
+  EXPECT_CALL(*mock_http_source_, getBody(IsRange(0, 100), _));
+  captured_lookup_callback_(std::make_unique<ActiveLookupResult>(
+      ActiveLookupResult{std::move(mock_http_source_), CacheEntryStatus::Miss}));
+  captured_get_headers_callback_(createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_),
+                                 EndStream::More);
+}
+
+TEST_F(CacheFilterTest, PartialContentCodeWithInvalidRangeElementsDefaultsToZeroAndMax) {
+  response_headers_.setStatus(std::to_string(enumToInt(Http::Code::PartialContent)));
+  response_headers_.addCopy("content-range", "bytes invalid-invalid/100");
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Miss));
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_CALL(*mock_http_source_, getHeaders);
+  EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), false));
+  EXPECT_CALL(*mock_http_source_, getBody(IsRange(0, Gt(500)), _));
+  captured_lookup_callback_(std::make_unique<ActiveLookupResult>(
+      ActiveLookupResult{std::move(mock_http_source_), CacheEntryStatus::Miss}));
+  captured_get_headers_callback_(createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_),
+                                 EndStream::More);
+}
+
+TEST_F(CacheFilterTest, DestroyedDuringEncodeHeadersPreventsGetBody) {
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_, false);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Miss));
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_CALL(*mock_http_source_, getHeaders);
+  captured_lookup_callback_(std::make_unique<ActiveLookupResult>(
+      ActiveLookupResult{std::move(mock_http_source_), CacheEntryStatus::Miss}));
+  EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), false))
+      .WillOnce([&filter](Http::ResponseHeaderMap&, bool) { filter->onDestroy(); });
+  captured_get_headers_callback_(createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_),
+                                 EndStream::More);
+}
+
+TEST_F(CacheFilterTest, ResetDuringGetBodyResetsDownstream) {
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Miss));
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_CALL(*mock_http_source_, getHeaders);
+  EXPECT_CALL(*mock_http_source_, getBody);
+  captured_lookup_callback_(std::make_unique<ActiveLookupResult>(
+      ActiveLookupResult{std::move(mock_http_source_), CacheEntryStatus::Miss}));
+  EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), false));
+  captured_get_headers_callback_(createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_),
+                                 EndStream::More);
+  EXPECT_CALL(decoder_callbacks_, resetStream);
+  captured_get_body_callback_(nullptr, EndStream::Reset);
+  EXPECT_THAT(decoder_callbacks_.details(), Eq("cache.aborted_body"));
+}
+
+TEST_F(CacheFilterTest, GetBodyAdvancesRequestRange) {
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Miss));
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_CALL(*mock_http_source_, getHeaders);
+  EXPECT_CALL(*mock_http_source_, getBody(RangeStartsWith(0), _));
+  EXPECT_CALL(*mock_http_source_, getBody(RangeStartsWith(5), _));
+  captured_lookup_callback_(std::make_unique<ActiveLookupResult>(
+      ActiveLookupResult{std::move(mock_http_source_), CacheEntryStatus::Miss}));
+  EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), false));
+  EXPECT_CALL(decoder_callbacks_, encodeData(BufferStringEqual("hello"), false));
+  EXPECT_CALL(decoder_callbacks_, encodeData(BufferStringEqual(" world!"), true));
+  captured_get_headers_callback_(createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_),
+                                 EndStream::More);
+  captured_get_body_callback_(std::make_unique<Buffer::OwnedImpl>("hello"), EndStream::More);
+  captured_get_body_callback_(std::make_unique<Buffer::OwnedImpl>(" world!"), EndStream::End);
+  EXPECT_THAT(decoder_callbacks_.details(), Eq("cache.insert_via_upstream"));
+}
+
+TEST_F(CacheFilterTest, GetBodyReturningNullBufferAndEndStreamCompletes) {
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Hit));
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_CALL(*mock_http_source_, getHeaders);
+  EXPECT_CALL(*mock_http_source_, getBody(RangeStartsWith(0), _));
+  EXPECT_CALL(*mock_http_source_, getBody(RangeStartsWith(5), _));
+  captured_lookup_callback_(std::make_unique<ActiveLookupResult>(
+      ActiveLookupResult{std::move(mock_http_source_), CacheEntryStatus::Hit}));
+  EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), false));
+  EXPECT_CALL(decoder_callbacks_, encodeData(BufferStringEqual("hello"), false));
+  EXPECT_CALL(decoder_callbacks_, encodeData(BufferStringEqual(""), true));
+  captured_get_headers_callback_(createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_),
+                                 EndStream::More);
+  captured_get_body_callback_(std::make_unique<Buffer::OwnedImpl>("hello"), EndStream::More);
+  captured_get_body_callback_(nullptr, EndStream::End);
+  EXPECT_THAT(decoder_callbacks_.details(), Eq("cache.response_from_cache_filter"));
+}
+
+TEST_F(CacheFilterTest, GetBodyReturningNullBufferAndNoEndStreamGoesOnToTrailers) {
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Hit));
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_CALL(*mock_http_source_, getHeaders);
+  EXPECT_CALL(*mock_http_source_, getBody(RangeStartsWith(0), _));
+  EXPECT_CALL(*mock_http_source_, getBody(RangeStartsWith(5), _));
+  EXPECT_CALL(*mock_http_source_, getTrailers);
+  captured_lookup_callback_(std::make_unique<ActiveLookupResult>(
+      ActiveLookupResult{std::move(mock_http_source_), CacheEntryStatus::Hit}));
+  EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), false));
+  EXPECT_CALL(decoder_callbacks_, encodeData(BufferStringEqual("hello"), false));
+  EXPECT_CALL(decoder_callbacks_, encodeTrailers_(IsSupersetOfHeaders(response_trailers_)));
+  captured_get_headers_callback_(createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_),
+                                 EndStream::More);
+  captured_get_body_callback_(std::make_unique<Buffer::OwnedImpl>("hello"), EndStream::More);
+  captured_get_body_callback_(nullptr, EndStream::More);
+  captured_get_trailers_callback_(createHeaderMap<Http::ResponseTrailerMapImpl>(response_trailers_),
+                                  EndStream::End);
+  EXPECT_THAT(decoder_callbacks_.details(), Eq("cache.response_from_cache_filter"));
+}
+
+TEST_F(CacheFilterDeathTest, GetBodyReturningBufferLargerThanRequestedIsABug) {
+  request_headers_.addCopy("range", "bytes=0-5");
+  response_headers_.setStatus(std::to_string(enumToInt(Http::Code::PartialContent)));
+  response_headers_.addCopy("content-range", "bytes 0-5/12");
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Hit));
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_CALL(*mock_http_source_, getHeaders);
+  EXPECT_CALL(*mock_http_source_, getBody(IsRange(0, 6), _));
+  captured_lookup_callback_(std::make_unique<ActiveLookupResult>(
+      ActiveLookupResult{std::move(mock_http_source_), CacheEntryStatus::Hit}));
+  EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), false));
+  captured_get_headers_callback_(createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_),
+                                 EndStream::More);
+  EXPECT_ENVOY_BUG(captured_get_body_callback_(std::make_unique<Buffer::OwnedImpl>("hello world!"),
+                                               EndStream::End),
+                   "Received oversized body from http source.");
+}
+
+TEST_F(CacheFilterTest, EndOfRequestedRangeEndsStreamWhenUpstreamDoesNot) {
+  request_headers_.addCopy("range", "bytes=0-4");
+  response_headers_.setStatus(std::to_string(enumToInt(Http::Code::PartialContent)));
+  response_headers_.addCopy("content-range", "bytes 0-4/12");
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Hit));
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_CALL(*mock_http_source_, getHeaders);
+  EXPECT_CALL(*mock_http_source_, getBody(IsRange(0, 5), _));
+  captured_lookup_callback_(std::make_unique<ActiveLookupResult>(
+      ActiveLookupResult{std::move(mock_http_source_), CacheEntryStatus::Hit}));
+  EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), false));
+  EXPECT_CALL(decoder_callbacks_, encodeData(BufferStringEqual("hello"), true));
+  captured_get_headers_callback_(createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_),
+                                 EndStream::More);
+  captured_get_body_callback_(std::make_unique<Buffer::OwnedImpl>("hello"), EndStream::More);
+  EXPECT_THAT(decoder_callbacks_.details(), Eq("cache.response_from_cache_filter"));
+}
+
+TEST_F(CacheFilterTest, FilterDestroyedDuringEncodeDataPreventsFurtherRequests) {
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_, false);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Hit));
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_CALL(*mock_http_source_, getHeaders);
+  EXPECT_CALL(*mock_http_source_, getBody);
+  captured_lookup_callback_(std::make_unique<ActiveLookupResult>(
+      ActiveLookupResult{std::move(mock_http_source_), CacheEntryStatus::Hit}));
+  EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), false));
+  EXPECT_CALL(decoder_callbacks_, encodeData(BufferStringEqual("hello"), false))
+      .WillOnce([&filter]() { filter->onDestroy(); });
+  captured_get_headers_callback_(createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_),
+                                 EndStream::More);
+  captured_get_body_callback_(std::make_unique<Buffer::OwnedImpl>("hello"), EndStream::More);
+  // Destruction of filter should prevent "more" from being requested.
+}
+
+TEST_F(CacheFilterTest, WatermarkDelaysUpstreamRequestingMore) {
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_, false);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Hit));
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_CALL(*mock_http_source_, getHeaders);
+  EXPECT_CALL(*mock_http_source_, getBody(RangeStartsWith(0), _));
+  EXPECT_CALL(*mock_http_source_, getBody(RangeStartsWith(5), _));
+  captured_lookup_callback_(std::make_unique<ActiveLookupResult>(
+      ActiveLookupResult{std::move(mock_http_source_), CacheEntryStatus::Hit}));
+  EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), false));
+  EXPECT_CALL(decoder_callbacks_, encodeData(BufferStringEqual("hello"), false));
+  captured_get_headers_callback_(createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_),
+                                 EndStream::More);
+  filter->onAboveWriteBufferHighWatermark();
+  // Move captured_get_body_callback_ into another variable so that it being
+  // nullptr can be used to ensure the second callback is not in flight.
+  auto cb = std::move(captured_get_body_callback_);
+  captured_get_body_callback_ = nullptr;
+  cb(std::make_unique<Buffer::OwnedImpl>("hello"), EndStream::More);
+  // A new callback should not be in flight because of the watermark.
+  EXPECT_THAT(captured_get_body_callback_, IsNull());
+  // Watermark deeper!
+  filter->onAboveWriteBufferHighWatermark();
+  // Unwatermarking one level should not release the request.
+  filter->onBelowWriteBufferLowWatermark();
+  EXPECT_THAT(captured_get_body_callback_, IsNull());
+  // Unwatermarking back to zero should release the request.
+  filter->onBelowWriteBufferLowWatermark();
+  EXPECT_THAT(captured_get_body_callback_, NotNull());
+  EXPECT_CALL(decoder_callbacks_, encodeData(BufferStringEqual("world"), true));
+  captured_get_body_callback_(std::make_unique<Buffer::OwnedImpl>("world"), EndStream::End);
+  EXPECT_THAT(decoder_callbacks_.details(), Eq("cache.response_from_cache_filter"));
+}
+
+TEST_F(CacheFilterTest, DeepRecursionOfGetBodyDoesntOverflowStack) {
+  // Since it's possible for a cache to call back with body data instantly without
+  // posting it to a dispatcher, we want to be sure that the implementation
+  // doesn't cause a buffer overflow if that happens *a lot*.
+  uint64_t depth = 0;
+  uint64_t max_depth = 60000;
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_, false);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Hit));
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_CALL(*mock_http_source_, getHeaders);
+  EXPECT_CALL(*mock_http_source_, getBody)
+      .WillRepeatedly([&depth, &max_depth](AdjustedByteRange range, GetBodyCallback&& cb) {
+        ASSERT_THAT(range.begin(), Eq(depth));
+        if (++depth < max_depth) {
+          return cb(std::make_unique<Buffer::OwnedImpl>("a"), EndStream::More);
+        } else {
+          return cb(std::make_unique<Buffer::OwnedImpl>("a"), EndStream::End);
+        }
+      });
+  captured_lookup_callback_(std::make_unique<ActiveLookupResult>(
+      ActiveLookupResult{std::move(mock_http_source_), CacheEntryStatus::Hit}));
+  EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), false));
+  EXPECT_CALL(decoder_callbacks_, encodeData(BufferStringEqual("a"), false)).Times(max_depth - 1);
+  EXPECT_CALL(decoder_callbacks_, encodeData(BufferStringEqual("a"), true));
+  captured_get_headers_callback_(createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_),
+                                 EndStream::More);
+  EXPECT_THAT(decoder_callbacks_.details(), Eq("cache.response_from_cache_filter"));
+}
+
+TEST_F(CacheFilterTest, FilterDestroyedDuringEncodeTrailersPreventsFurtherAction) {
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_, false);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Hit));
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_CALL(*mock_http_source_, getHeaders);
+  EXPECT_CALL(*mock_http_source_, getBody);
+  EXPECT_CALL(*mock_http_source_, getTrailers);
+  captured_lookup_callback_(std::make_unique<ActiveLookupResult>(
+      ActiveLookupResult{std::move(mock_http_source_), CacheEntryStatus::Hit}));
+  EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), false));
+  EXPECT_CALL(decoder_callbacks_, encodeTrailers_(IsSupersetOfHeaders(response_trailers_)))
+      .WillOnce([&filter]() { filter->onDestroy(); });
+  captured_get_headers_callback_(createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_),
+                                 EndStream::More);
+  captured_get_body_callback_(nullptr, EndStream::More);
+  captured_get_trailers_callback_(createHeaderMap<Http::ResponseTrailerMapImpl>(response_trailers_),
+                                  EndStream::End);
+  // Destruction of filter should prevent finalizeEncodingCachedResponse, but
+  // that's undetectable right now because it doesn't do anything anyway.
+}
+
+TEST_F(CacheFilterTest, FilterResetDuringEncodeTrailersResetsDownstream) {
+  CacheFilterSharedPtr filter = makeFilter(mock_cache_, false);
+  EXPECT_CALL(stats(), incForStatus(CacheEntryStatus::Hit));
+  EXPECT_CALL(*mock_cache_, lookup);
+  EXPECT_THAT(filter->decodeHeaders(request_headers_, true),
+              Eq(Http::FilterHeadersStatus::StopIteration));
+  EXPECT_CALL(*mock_http_source_, getHeaders);
+  EXPECT_CALL(*mock_http_source_, getBody(RangeStartsWith(0), _));
+  EXPECT_CALL(*mock_http_source_, getTrailers);
+  captured_lookup_callback_(std::make_unique<ActiveLookupResult>(
+      ActiveLookupResult{std::move(mock_http_source_), CacheEntryStatus::Hit}));
+  EXPECT_CALL(decoder_callbacks_, encodeHeaders_(IsSupersetOfHeaders(response_headers_), false));
+  EXPECT_CALL(decoder_callbacks_, resetStream);
+  captured_get_headers_callback_(createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_),
+                                 EndStream::More);
+  captured_get_body_callback_(nullptr, EndStream::More);
+  captured_get_trailers_callback_(nullptr, EndStream::Reset);
+  EXPECT_THAT(decoder_callbacks_.details(), Eq("cache.aborted_trailers"));
 }
 
 } // namespace
diff --git a/test/extensions/filters/http/cache/cache_sessions_test.cc b/test/extensions/filters/http/cache/cache_sessions_test.cc
new file mode 100644
index 0000000000000..a2690cfcdb6c2
--- /dev/null
+++ b/test/extensions/filters/http/cache/cache_sessions_test.cc
@@ -0,0 +1,851 @@
+#include <functional>
+
+#include "envoy/event/dispatcher.h"
+
+#include "source/common/http/headers.h"
+#include "source/extensions/filters/http/cache/cache_sessions.h"
+
+#include "test/extensions/filters/http/cache/mocks.h"
+#include "test/mocks/http/mocks.h"
+#include "test/mocks/server/factory_context.h"
+#include "test/test_common/logging.h"
+#include "test/test_common/utility.h"
+
+#include "gtest/gtest.h"
+
+namespace Envoy {
+namespace Extensions {
+namespace HttpFilters {
+namespace Cache {
+namespace {
+
+using ::testing::_;
+using ::testing::AllOf;
+using ::testing::AnyNumber;
+using ::testing::Between;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::ExplainMatchResult;
+using ::testing::IsEmpty;
+using ::testing::IsNull;
+using ::testing::Mock;
+using ::testing::MockFunction;
+using ::testing::NotNull;
+using ::testing::Pointee;
+using ::testing::Property;
+using ::testing::Return;
+
+template <typename T> T consumeCallback(T& cb) {
+  T ret = std::move(cb);
+  cb = nullptr;
+  return ret;
+}
+
+class CacheSessionsTest : public ::testing::Test {
+protected:
+  Event::SimulatedTimeSystem time_system_;
+  Api::ApiPtr api_ = Api::createApiForTest();
+  Event::DispatcherPtr dispatcher_ = api_->allocateDispatcher("test_thread");
+  std::shared_ptr<CacheSessions> cache_sessions_;
+  MockHttpCache* mock_http_cache_;
+  Http::MockAsyncClient mock_async_client_;
+  std::vector<HttpCache::LookupCallback> captured_lookup_callbacks_;
+  std::vector<UpstreamRequest*> fake_upstreams_;
+  std::vector<Http::RequestHeaderMapPtr> fake_upstream_sent_headers_;
+  std::vector<GetHeadersCallback> fake_upstream_get_headers_callbacks_;
+  std::shared_ptr<MockCacheableResponseChecker> mock_cacheable_response_checker_ =
+      std::make_shared<MockCacheableResponseChecker>();
+  testing::NiceMock<Server::Configuration::MockFactoryContext> mock_factory_context_;
+
+  void advanceTime(std::chrono::milliseconds increment) {
+    SystemTime current_time = time_system_.systemTime();
+    current_time += increment;
+    time_system_.setSystemTime(current_time);
+  }
+
+  void SetUp() override {
+    EXPECT_CALL(*mock_cacheable_response_checker_, isCacheableResponse)
+        .Times(AnyNumber())
+        .WillRepeatedly(Return(true));
+    auto mock_http_cache = std::make_unique<MockHttpCache>();
+    mock_http_cache_ = mock_http_cache.get();
+    cache_sessions_ = CacheSessions::create(mock_factory_context_, std::move(mock_http_cache));
+    ON_CALL(*mock_http_cache_, lookup)
+        .WillByDefault([this](LookupRequest&&, HttpCache::LookupCallback&& cb) {
+          captured_lookup_callbacks_.push_back(std::move(cb));
+        });
+  }
+
+  void pumpDispatcher() { dispatcher_->run(Event::Dispatcher::RunType::Block); }
+
+  void TearDown() override {
+    pumpDispatcher();
+    // Any residual cache lookups must complete their callbacks to close
+    // out ownership of the CacheSessionsEntries.
+    for (auto& cb : captured_lookup_callbacks_) {
+      if (cb) {
+        // Cache entries will be evicted when cache returns an error for lookup.
+        EXPECT_CALL(*mock_http_cache_, evict);
+        consumeCallback(cb)(absl::UnknownError("test teardown"));
+        pumpDispatcher();
+      }
+    }
+    // Any residual upstreams must complete their callbacks to close out
+    // ownership of the CacheSessionsEntries.
+    for (auto& cb : fake_upstream_get_headers_callbacks_) {
+      if (cb) {
+        consumeCallback(cb)(nullptr, EndStream::Reset);
+        pumpDispatcher();
+      }
+    }
+  }
+
+  UpstreamRequestFactoryPtr mockUpstreamFactory() {
+    auto factory = std::make_unique<MockUpstreamRequestFactory>();
+    EXPECT_CALL(*factory, create).WillRepeatedly([this]() -> UpstreamRequestPtr {
+      auto upstream_request = std::make_unique<MockUpstreamRequest>();
+      fake_upstreams_.emplace_back(upstream_request.get());
+      fake_upstream_sent_headers_.push_back(nullptr);
+      fake_upstream_get_headers_callbacks_.push_back(nullptr);
+      // We can't capture the callback inside the FakeUpstream because that
+      // causes an ownership cycle.
+      int i = fake_upstreams_.size() - 1;
+      EXPECT_CALL(*upstream_request, sendHeaders)
+          .WillOnce([this, i](Http::RequestHeaderMapPtr headers) {
+            fake_upstream_sent_headers_[i] = std::move(headers);
+          });
+      EXPECT_CALL(*upstream_request, getHeaders)
+          .Times(Between(0, 1))
+          .WillRepeatedly([this, i](GetHeadersCallback&& cb) {
+            fake_upstream_get_headers_callbacks_[i] = std::move(cb);
+          });
+      return upstream_request;
+    });
+    return factory;
+  }
+
+  Http::TestRequestHeaderMapImpl requestHeaders(absl::string_view path) {
+    return Http::TestRequestHeaderMapImpl{
+        {"host", "test_host"}, {":path", std::string{path}}, {":scheme", "https"}};
+  }
+
+  ActiveLookupRequestPtr testLookupRequest(Http::RequestHeaderMap& headers) {
+    return std::make_unique<ActiveLookupRequest>(
+        headers, mockUpstreamFactory(), "test_cluster", *dispatcher_,
+        api_->timeSource().systemTime(), mock_cacheable_response_checker_, cache_sessions_, false);
+  }
+
+  ActiveLookupRequestPtr testLookupRequest(absl::string_view path) {
+    auto headers = requestHeaders(path);
+    return testLookupRequest(headers);
+  }
+
+  ActiveLookupRequestPtr testLookupRangeRequest(absl::string_view path, int start, int end) {
+    auto headers = requestHeaders(path);
+    headers.addCopy("range", absl::StrCat("bytes=", start, "-", end));
+    return testLookupRequest(headers);
+  }
+
+  ActiveLookupRequestPtr testLookupRequestWithNoCache(absl::string_view path) {
+    auto headers = requestHeaders(path);
+    headers.addCopy("cache-control", "no-cache");
+    return testLookupRequest(headers);
+  }
+};
+
+Http::ResponseHeaderMapPtr uncacheableResponseHeaders() {
+  auto h = std::make_unique<Http::TestResponseHeaderMapImpl>();
+  h->addCopy("cache-control", "no-cache");
+  return h;
+}
+
+static std::string dateNow() {
+  static const DateFormatter formatter{"%a, %d %b %Y %H:%M:%S GMT"};
+  SystemTime now = Event::SimulatedTimeSystem().systemTime();
+  return formatter.fromTime(now);
+}
+
+static std::string dateNowPlus60s() {
+  static const DateFormatter formatter{"%a, %d %b %Y %H:%M:%S GMT"};
+  SystemTime t = Event::SimulatedTimeSystem().systemTime();
+  t += std::chrono::seconds(60);
+  return formatter.fromTime(t);
+}
+
+Http::ResponseHeaderMapPtr cacheableResponseHeaders(absl::optional<uint64_t> content_length = 0) {
+  auto h = std::make_unique<Http::TestResponseHeaderMapImpl>();
+  h->setStatus("200");
+  h->addCopy(":scheme", "http");
+  h->addCopy(":method", "GET");
+  h->addCopy("cache-control", "max-age=86400");
+  h->addCopy("date", dateNow());
+  if (content_length.has_value()) {
+    h->addCopy("content-length", absl::StrCat(content_length.value()));
+  }
+  return h;
+}
+
+Http::ResponseHeaderMapPtr
+cacheableResponseHeadersByExpire(absl::optional<uint64_t> content_length = 0) {
+  auto h = std::make_unique<Http::TestResponseHeaderMapImpl>();
+  h->setStatus("200");
+  h->addCopy(":scheme", "http");
+  h->addCopy(":method", "GET");
+  h->addCopy("expires", dateNowPlus60s());
+  h->addCopy("date", dateNow());
+  if (content_length.has_value()) {
+    h->addCopy("content-length", absl::StrCat(content_length.value()));
+  }
+  return h;
+}
+
+inline constexpr auto KeyHasPath = [](const auto& m) { return Property("path", &Key::path, m); };
+
+inline constexpr auto LookupHasKey = [](const auto& m) {
+  return Property("key", &LookupRequest::key, m);
+};
+
+inline constexpr auto LookupHasPath = [](const auto& m) { return LookupHasKey(KeyHasPath(m)); };
+
+inline constexpr auto RangeIs = [](const auto& m1, const auto& m2) {
+  return AllOf(Property("begin", &AdjustedByteRange::begin, m1),
+               Property("end", &AdjustedByteRange::end, m2));
+};
+
+MATCHER_P(HasNoHeader, key, "") {
+  *result_listener << arg;
+  return ExplainMatchResult(IsEmpty(), arg.get(::Envoy::Http::LowerCaseString(std::string(key))),
+                            result_listener);
+}
+
+MATCHER_P(GetResultHasValue, matcher, "") {
+  if (!ExplainMatchResult(Property("size", &Http::HeaderMap::GetResult::size, 1), arg,
+                          result_listener)) {
+    return false;
+  }
+  return ExplainMatchResult(matcher, arg[0]->value().getStringView(), result_listener);
+}
+
+MATCHER_P2(HasHeader, key, matcher, "") {
+  *result_listener << arg;
+  return ExplainMatchResult(GetResultHasValue(matcher),
+                            arg.get(::Envoy::Http::LowerCaseString(std::string(key))),
+                            result_listener);
+}
+
+TEST_F(CacheSessionsTest, RequestsForSeparateKeysIssueSeparateLookupRequests) {
+  EXPECT_CALL(*mock_http_cache_, lookup(LookupHasPath("/a"), _));
+  EXPECT_CALL(*mock_http_cache_, lookup(LookupHasPath("/b"), _));
+  EXPECT_CALL(*mock_http_cache_, lookup(LookupHasPath("/c"), _));
+  EXPECT_CALL(*mock_http_cache_, touch(KeyHasPath("/a"), _));
+  EXPECT_CALL(*mock_http_cache_, touch(KeyHasPath("/b"), _));
+  EXPECT_CALL(*mock_http_cache_, touch(KeyHasPath("/c"), _));
+  cache_sessions_->lookup(testLookupRequest("/a"), [](ActiveLookupResultPtr) {});
+  cache_sessions_->lookup(testLookupRequest("/b"), [](ActiveLookupResultPtr) {});
+  cache_sessions_->lookup(testLookupRequest("/c"), [](ActiveLookupResultPtr) {});
+  pumpDispatcher();
+  EXPECT_THAT(captured_lookup_callbacks_.size(), Eq(3));
+}
+
+TEST_F(CacheSessionsTest, MultipleRequestsForSameKeyIssuesOnlyOneLookupRequest) {
+  EXPECT_CALL(*mock_http_cache_, lookup(LookupHasPath("/a"), _));
+  EXPECT_CALL(*mock_http_cache_, touch(KeyHasPath("/a"), _)).Times(3);
+  cache_sessions_->lookup(testLookupRequest("/a"), [](ActiveLookupResultPtr) {});
+  cache_sessions_->lookup(testLookupRequest("/a"), [](ActiveLookupResultPtr) {});
+  cache_sessions_->lookup(testLookupRequest("/a"), [](ActiveLookupResultPtr) {});
+  pumpDispatcher();
+  EXPECT_THAT(captured_lookup_callbacks_.size(), Eq(1));
+}
+
+TEST_F(CacheSessionsTest, CacheSessionsEntriesExpireOnAdjacentLookup) {
+  EXPECT_CALL(*mock_http_cache_, lookup(LookupHasPath("/a"), _)).Times(2);
+  EXPECT_CALL(*mock_http_cache_, lookup(LookupHasPath("/b"), _));
+  EXPECT_CALL(*mock_http_cache_, touch(KeyHasPath("/a"), _)).Times(2);
+  EXPECT_CALL(*mock_http_cache_, touch(KeyHasPath("/b"), _));
+  cache_sessions_->lookup(testLookupRequest("/a"), [](ActiveLookupResultPtr) {});
+  advanceTime(std::chrono::hours(1));
+  // request to adjacent resource to trigger expiry of original.
+  cache_sessions_->lookup(testLookupRequest("/b"), [](ActiveLookupResultPtr) {});
+  // another request for the original resource should have a new lookup because
+  // the old entry should have been removed.
+  cache_sessions_->lookup(testLookupRequest("/a"), [](ActiveLookupResultPtr) {});
+  pumpDispatcher();
+  EXPECT_THAT(captured_lookup_callbacks_.size(), Eq(3));
+}
+
+TEST_F(CacheSessionsTest, CacheDeletionDuringLookupStillCompletesLookup) {
+  EXPECT_CALL(*mock_http_cache_, lookup(LookupHasPath("/a"), _));
+  EXPECT_CALL(*mock_http_cache_, touch(KeyHasPath("/a"), _));
+  EXPECT_CALL(*mock_http_cache_, evict(_, KeyHasPath("/a")));
+  ActiveLookupResultPtr result;
+  cache_sessions_->lookup(testLookupRequest("/a"),
+                          [&result](ActiveLookupResultPtr r) { result = std::move(r); });
+  // cache gets deleted before lookup callback.
+  cache_sessions_.reset();
+  pumpDispatcher();
+  consumeCallback(captured_lookup_callbacks_[0])(absl::UnknownError("cache fail"));
+  pumpDispatcher();
+  ASSERT_THAT(result, NotNull());
+  EXPECT_THAT(result->status_, Eq(CacheEntryStatus::LookupError));
+  // Should have become an upstream pass-through request.
+  EXPECT_THAT(result->http_source_.get(), Eq(fake_upstreams_[0]));
+}
+
+TEST_F(CacheSessionsTest, CacheMissWithUncacheableResponseProvokesPassThrough) {
+  Mock::VerifyAndClearExpectations(mock_cacheable_response_checker_.get());
+  EXPECT_CALL(*mock_cacheable_response_checker_, isCacheableResponse)
+      .Times(testing::AnyNumber())
+      .WillRepeatedly(testing::Return(false));
+  EXPECT_CALL(*mock_http_cache_, lookup(LookupHasPath("/a"), _));
+  EXPECT_CALL(*mock_http_cache_, touch(KeyHasPath("/a"), _)).Times(3);
+  ActiveLookupResultPtr result1, result2, result3;
+  cache_sessions_->lookup(testLookupRequest("/a"),
+                          [&result1](ActiveLookupResultPtr r) { result1 = std::move(r); });
+  cache_sessions_->lookup(testLookupRequest("/a"),
+                          [&result2](ActiveLookupResultPtr r) { result2 = std::move(r); });
+  pumpDispatcher();
+  // Cache miss.
+  consumeCallback(captured_lookup_callbacks_[0])(LookupResult{});
+  pumpDispatcher();
+  // Upstream request should have been sent.
+  ASSERT_THAT(fake_upstreams_.size(), Eq(1));
+  EXPECT_THAT(fake_upstream_sent_headers_[0],
+              Pointee(IsSupersetOfHeaders(Http::TestRequestHeaderMapImpl{{":path", "/a"}})));
+  ASSERT_THAT(fake_upstream_get_headers_callbacks_.size(), Eq(1));
+  pumpDispatcher();
+  consumeCallback(fake_upstream_get_headers_callbacks_[0])(uncacheableResponseHeaders(),
+                                                           EndStream::End);
+  pumpDispatcher();
+  // Uncacheable should have provoked one passthrough upstream request, and
+  // given the already existing upstream request to the first result.
+  ASSERT_THAT(fake_upstreams_.size(), Eq(2));
+  EXPECT_THAT(fake_upstream_sent_headers_[1],
+              Pointee(IsSupersetOfHeaders(Http::TestRequestHeaderMapImpl{{":path", "/a"}})));
+  ASSERT_THAT(fake_upstream_get_headers_callbacks_.size(), Eq(2));
+  // getHeaders should not have been called yet on the second upstream, because
+  // that one is handed to the client unused.
+  EXPECT_THAT(fake_upstream_get_headers_callbacks_[1], IsNull());
+  Http::ResponseHeaderMapPtr headers1, headers2, headers3;
+  EXPECT_THAT(result1->status_, Eq(CacheEntryStatus::Uncacheable));
+  EXPECT_THAT(result2->status_, Eq(CacheEntryStatus::Uncacheable));
+  // First getHeaders should be retrieving the wrapped already-captured headers from
+  // the original upstream.
+  result1->http_source_->getHeaders(
+      [&headers1](Http::ResponseHeaderMapPtr h, EndStream) { headers1 = std::move(h); });
+  // Second one should call the upstream, so now we have a captured callback.
+  result2->http_source_->getHeaders(
+      [&headers2](Http::ResponseHeaderMapPtr h, EndStream) { headers2 = std::move(h); });
+  ASSERT_THAT(fake_upstream_get_headers_callbacks_[1], NotNull());
+  consumeCallback(fake_upstream_get_headers_callbacks_[1])(uncacheableResponseHeaders(),
+                                                           EndStream::End);
+  pumpDispatcher();
+  EXPECT_THAT(headers1, Pointee(IsSupersetOfHeaders(
+                            Http::TestResponseHeaderMapImpl{{"cache-control", "no-cache"}})));
+  EXPECT_THAT(headers2, Pointee(IsSupersetOfHeaders(
+                            Http::TestResponseHeaderMapImpl{{"cache-control", "no-cache"}})));
+  // Finally, a subsequent request should also be pass-through with no lookup required.
+  cache_sessions_->lookup(testLookupRequest("/a"),
+                          [&result3](ActiveLookupResultPtr r) { result3 = std::move(r); });
+  pumpDispatcher();
+  ASSERT_THAT(result3, NotNull());
+  EXPECT_THAT(result3->status_, Eq(CacheEntryStatus::Uncacheable));
+  ASSERT_THAT(fake_upstream_get_headers_callbacks_.size(), Eq(3));
+  result3->http_source_->getHeaders(
+      [&headers3](Http::ResponseHeaderMapPtr h, EndStream) { headers3 = std::move(h); });
+  consumeCallback(fake_upstream_get_headers_callbacks_[2])(uncacheableResponseHeaders(),
+                                                           EndStream::End);
+  pumpDispatcher();
+  EXPECT_THAT(headers3, Pointee(IsSupersetOfHeaders(
+                            Http::TestResponseHeaderMapImpl{{"cache-control", "no-cache"}})));
+}
+
+TEST_F(CacheSessionsTest, CacheMissWithCacheableResponseProvokesSharedInsertStream) {
+  EXPECT_CALL(*mock_http_cache_, lookup(LookupHasPath("/a"), _));
+  EXPECT_CALL(*mock_http_cache_, touch(KeyHasPath("/a"), _)).Times(3);
+  ActiveLookupResultPtr result1, result2, result3;
+  auto response_headers = cacheableResponseHeaders();
+  cache_sessions_->lookup(testLookupRequest("/a"),
+                          [&result1](ActiveLookupResultPtr r) { result1 = std::move(r); });
+  cache_sessions_->lookup(testLookupRequest("/a"),
+                          [&result2](ActiveLookupResultPtr r) { result2 = std::move(r); });
+  pumpDispatcher();
+  // Cache miss.
+  consumeCallback(captured_lookup_callbacks_[0])(LookupResult{});
+  pumpDispatcher();
+  // Upstream request should have been sent.
+  ASSERT_THAT(fake_upstreams_.size(), Eq(1));
+  EXPECT_THAT(fake_upstream_sent_headers_[0],
+              Pointee(IsSupersetOfHeaders(Http::TestRequestHeaderMapImpl{{":path", "/a"}})));
+  ASSERT_THAT(fake_upstream_get_headers_callbacks_.size(), Eq(1));
+  std::shared_ptr<CacheProgressReceiver> progress;
+  EXPECT_CALL(
+      *mock_http_cache_,
+      insert(_, KeyHasPath("/a"), Pointee(IsSupersetOfHeaders(*response_headers)), _, IsNull(), _))
+      .WillOnce([&](Event::Dispatcher&, Key, Http::ResponseHeaderMapPtr, ResponseMetadata,
+                    HttpSourcePtr,
+                    std::shared_ptr<CacheProgressReceiver> receiver) { progress = receiver; });
+  consumeCallback(fake_upstream_get_headers_callbacks_[0])(
+      Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*response_headers), EndStream::End);
+  pumpDispatcher();
+  ASSERT_THAT(progress, NotNull());
+  progress->onHeadersInserted(std::make_unique<MockCacheReader>(),
+                              Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*response_headers),
+                              true);
+  pumpDispatcher();
+  ASSERT_THAT(result1, NotNull());
+  // First result should be cache miss because it triggered insertion.
+  EXPECT_THAT(result1->status_, Eq(CacheEntryStatus::Miss));
+  ASSERT_THAT(result2, NotNull());
+  // Second result should be a follower from the insertion.
+  EXPECT_THAT(result2->status_, Eq(CacheEntryStatus::Follower));
+  // Request after insert is complete should be able to lookup immediately.
+  cache_sessions_->lookup(testLookupRequest("/a"),
+                          [&result3](ActiveLookupResultPtr r) { result3 = std::move(r); });
+  pumpDispatcher();
+  ASSERT_THAT(result3, NotNull());
+  EXPECT_THAT(result3->status_, Eq(CacheEntryStatus::Hit));
+  // And get headers immediately too.
+  Http::ResponseHeaderMapPtr headers3;
+  EndStream end_stream;
+  result3->http_source_->getHeaders([&](Http::ResponseHeaderMapPtr headers, EndStream es) {
+    headers3 = std::move(headers);
+    end_stream = es;
+  });
+  EXPECT_THAT(headers3, Pointee(IsSupersetOfHeaders(*response_headers)));
+  EXPECT_THAT(end_stream, Eq(EndStream::End));
+}
+
+TEST_F(CacheSessionsTest,
+       CacheMissWithCacheableResponseProvokesSharedInsertStreamWithBodyAndTrailers) {
+  EXPECT_CALL(*mock_http_cache_, lookup(LookupHasPath("/a"), _));
+  EXPECT_CALL(*mock_http_cache_, touch(KeyHasPath("/a"), _)).Times(3);
+  ActiveLookupResultPtr result1, result2, result3;
+  auto response_headers = cacheableResponseHeaders();
+  cache_sessions_->lookup(testLookupRequest("/a"),
+                          [&result1](ActiveLookupResultPtr r) { result1 = std::move(r); });
+  cache_sessions_->lookup(testLookupRequest("/a"),
+                          [&result2](ActiveLookupResultPtr r) { result2 = std::move(r); });
+  pumpDispatcher();
+  // Cache miss.
+  consumeCallback(captured_lookup_callbacks_[0])(LookupResult{});
+  pumpDispatcher();
+  // Upstream request should have been sent.
+  ASSERT_THAT(fake_upstreams_.size(), Eq(1));
+  EXPECT_THAT(fake_upstream_sent_headers_[0],
+              Pointee(IsSupersetOfHeaders(Http::TestRequestHeaderMapImpl{{":path", "/a"}})));
+  ASSERT_THAT(fake_upstream_get_headers_callbacks_.size(), Eq(1));
+  std::shared_ptr<CacheProgressReceiver> progress;
+  MockCacheReader* mock_cache_reader;
+  EXPECT_CALL(
+      *mock_http_cache_,
+      insert(_, KeyHasPath("/a"), Pointee(IsSupersetOfHeaders(*response_headers)), _, NotNull(), _))
+      .WillOnce([&](Event::Dispatcher&, Key, Http::ResponseHeaderMapPtr, ResponseMetadata,
+                    HttpSourcePtr,
+                    std::shared_ptr<CacheProgressReceiver> receiver) { progress = receiver; });
+  consumeCallback(fake_upstream_get_headers_callbacks_[0])(
+      Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*response_headers), EndStream::More);
+  pumpDispatcher();
+  // The upstream was given to the cache; since it's a fake we can forget about
+  // that and just have the cache complete its write operations when we choose.
+  ASSERT_THAT(progress, NotNull());
+  {
+    auto m = std::make_unique<MockCacheReader>();
+    mock_cache_reader = m.get();
+    progress->onHeadersInserted(
+        std::move(m), Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*response_headers), false);
+  }
+  pumpDispatcher();
+  ASSERT_THAT(result1, NotNull());
+  // First result should be cache miss because it triggered insertion.
+  EXPECT_THAT(result1->status_, Eq(CacheEntryStatus::Miss));
+  ASSERT_THAT(result2, NotNull());
+  // Second result should be a follower from the existing insertion.
+  EXPECT_THAT(result2->status_, Eq(CacheEntryStatus::Follower));
+  // Request after header-insert is complete should be able to lookup immediately.
+  cache_sessions_->lookup(testLookupRequest("/a"),
+                          [&result3](ActiveLookupResultPtr r) { result3 = std::move(r); });
+  pumpDispatcher();
+  ASSERT_THAT(result3, NotNull());
+  EXPECT_THAT(result3->status_, Eq(CacheEntryStatus::Hit));
+  // And get headers immediately too.
+  Http::ResponseHeaderMapPtr headers3;
+  EndStream end_stream;
+  result3->http_source_->getHeaders([&](Http::ResponseHeaderMapPtr headers, EndStream es) {
+    headers3 = std::move(headers);
+    end_stream = es;
+  });
+  pumpDispatcher();
+  EXPECT_THAT(headers3, Pointee(IsSupersetOfHeaders(*response_headers)));
+  EXPECT_THAT(end_stream, Eq(EndStream::More));
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_callback1, body_callback2, body_callback3;
+  result1->http_source_->getBody(AdjustedByteRange(0, 5), body_callback1.AsStdFunction());
+  result2->http_source_->getBody(AdjustedByteRange(0, 2), body_callback2.AsStdFunction());
+  result3->http_source_->getBody(AdjustedByteRange(1, 5), body_callback3.AsStdFunction());
+  EXPECT_CALL(*mock_cache_reader, getBody(_, RangeIs(0, 3), _))
+      .WillOnce([&](Event::Dispatcher&, AdjustedByteRange, GetBodyCallback&& cb) {
+        cb(std::make_unique<Buffer::OwnedImpl>("abc"), EndStream::More);
+      });
+  EXPECT_CALL(body_callback1, Call(Pointee(BufferStringEqual("abc")), EndStream::More));
+  EXPECT_CALL(body_callback2, Call(Pointee(BufferStringEqual("ab")), EndStream::More));
+  EXPECT_CALL(body_callback3, Call(Pointee(BufferStringEqual("bc")), EndStream::More));
+  progress->onBodyInserted(AdjustedByteRange(0, 3), false);
+  pumpDispatcher();
+  Mock::VerifyAndClearExpectations(mock_cache_reader);
+  Mock::VerifyAndClearExpectations(&body_callback1);
+  Mock::VerifyAndClearExpectations(&body_callback2);
+  Mock::VerifyAndClearExpectations(&body_callback3);
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_callback4, body_callback5, body_callback6;
+  result1->http_source_->getBody(AdjustedByteRange(3, 5), body_callback4.AsStdFunction());
+  result2->http_source_->getBody(AdjustedByteRange(3, 5), body_callback5.AsStdFunction());
+  // Issuing a request for body that's in the cache, while other requests are still awaiting
+  // body that is not yet in the cache, should skip the queue.
+  EXPECT_CALL(*mock_cache_reader, getBody(_, RangeIs(0, 3), _))
+      .WillOnce([&](Event::Dispatcher&, AdjustedByteRange, GetBodyCallback&& cb) {
+        cb(std::make_unique<Buffer::OwnedImpl>("abc"), EndStream::More);
+      });
+  EXPECT_CALL(body_callback6, Call(Pointee(BufferStringEqual("abc")), EndStream::More));
+  result3->http_source_->getBody(AdjustedByteRange(0, 3), body_callback6.AsStdFunction());
+  pumpDispatcher();
+  Mock::VerifyAndClearExpectations(&body_callback6);
+  Mock::VerifyAndClearExpectations(mock_cache_reader);
+  // Finally, insert completing should post to the queued requests.
+  EXPECT_CALL(*mock_cache_reader, getBody(_, RangeIs(3, 5), _))
+      .WillOnce([&](Event::Dispatcher&, AdjustedByteRange, GetBodyCallback&& cb) {
+        cb(std::make_unique<Buffer::OwnedImpl>("de"), EndStream::More);
+      });
+  EXPECT_CALL(body_callback4, Call(Pointee(BufferStringEqual("de")), EndStream::More));
+  EXPECT_CALL(body_callback5, Call(Pointee(BufferStringEqual("de")), EndStream::More));
+  progress->onBodyInserted(AdjustedByteRange(3, 5), false);
+  pumpDispatcher();
+  Http::TestResponseTrailerMapImpl trailers{{"x-test", "yes"}};
+  MockFunction<void(Http::ResponseTrailerMapPtr, EndStream)> trailers_callback1, trailers_callback2;
+  result1->http_source_->getTrailers(trailers_callback1.AsStdFunction());
+  pumpDispatcher();
+  EXPECT_CALL(trailers_callback1, Call(Pointee(IsSupersetOfHeaders(trailers)), EndStream::End));
+  progress->onTrailersInserted(Http::createHeaderMap<Http::ResponseTrailerMapImpl>(trailers));
+  pumpDispatcher();
+  Mock::VerifyAndClearExpectations(&trailers_callback1);
+  EXPECT_CALL(trailers_callback2, Call(Pointee(IsSupersetOfHeaders(trailers)), EndStream::End));
+  result2->http_source_->getTrailers(trailers_callback2.AsStdFunction());
+  pumpDispatcher();
+}
+
+TEST_F(CacheSessionsTest, CacheHitGoesDirectlyToCachedResponses) {
+  auto response_headers = cacheableResponseHeaders();
+  EXPECT_CALL(*mock_http_cache_, lookup(LookupHasPath("/a"), _));
+  EXPECT_CALL(*mock_http_cache_, touch(KeyHasPath("/a"), _));
+  ActiveLookupResultPtr result;
+  cache_sessions_->lookup(testLookupRequest("/a"),
+                          [&result](ActiveLookupResultPtr r) { result = std::move(r); });
+  pumpDispatcher();
+  MockCacheReader* mock_cache_reader;
+  // Cache hit.
+  Http::TestResponseTrailerMapImpl response_trailers{{"x-test", "yes"}};
+  {
+    auto m = std::make_unique<MockCacheReader>();
+    mock_cache_reader = m.get();
+    ResponseMetadata metadata;
+    metadata.response_time_ = api_->timeSource().systemTime();
+    consumeCallback(captured_lookup_callbacks_[0])(LookupResult{
+        std::move(m),
+        Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*response_headers),
+        Http::createHeaderMap<Http::ResponseTrailerMapImpl>(response_trailers),
+        std::move(metadata),
+        5,
+    });
+  }
+  pumpDispatcher();
+  EXPECT_THAT(result->status_, Eq(CacheEntryStatus::Hit));
+  MockFunction<void(Http::ResponseHeaderMapPtr, EndStream)> header_callback;
+  EXPECT_CALL(header_callback,
+              Call(Pointee(IsSupersetOfHeaders(*response_headers)), EndStream::More));
+  result->http_source_->getHeaders(header_callback.AsStdFunction());
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_callback1, body_callback2;
+  EXPECT_CALL(body_callback1, Call(Pointee(BufferStringEqual("abcde")), EndStream::More));
+  EXPECT_CALL(*mock_cache_reader, getBody(_, RangeIs(0, 5), _))
+      .WillOnce([&](Event::Dispatcher&, AdjustedByteRange, GetBodyCallback cb) {
+        cb(std::make_unique<Buffer::OwnedImpl>("abcde"), EndStream::More);
+      });
+  result->http_source_->getBody(AdjustedByteRange(0, 9999), body_callback1.AsStdFunction());
+  pumpDispatcher();
+  // Asking for more body when there is no more returns a nullptr indicating it's
+  // time for trailers.
+  EXPECT_CALL(body_callback2, Call(IsNull(), EndStream::More));
+  result->http_source_->getBody(AdjustedByteRange(5, 9999), body_callback2.AsStdFunction());
+  pumpDispatcher();
+  // Then finally the 'filter' asks for trailers, and gets them back immediately.
+  MockFunction<void(Http::ResponseTrailerMapPtr, EndStream)> trailer_callback;
+  EXPECT_CALL(trailer_callback,
+              Call(Pointee(IsSupersetOfHeaders(response_trailers)), EndStream::End));
+  result->http_source_->getTrailers(trailer_callback.AsStdFunction());
+  pumpDispatcher();
+}
+
+TEST_F(CacheSessionsTest, CacheInsertFailurePassesThroughLookupsAndWillLookupAgain) {
+  EXPECT_CALL(*mock_http_cache_, lookup(LookupHasPath("/a"), _));
+  EXPECT_CALL(*mock_http_cache_, touch(KeyHasPath("/a"), _)).Times(2);
+  ActiveLookupResultPtr result1, result2, result3;
+  auto response_headers = cacheableResponseHeadersByExpire();
+  cache_sessions_->lookup(testLookupRequest("/a"),
+                          [&result1](ActiveLookupResultPtr r) { result1 = std::move(r); });
+  cache_sessions_->lookup(testLookupRequest("/a"),
+                          [&result2](ActiveLookupResultPtr r) { result2 = std::move(r); });
+  pumpDispatcher();
+  // Cache miss.
+  consumeCallback(captured_lookup_callbacks_[0])(LookupResult{});
+  pumpDispatcher();
+  // Upstream request should have been sent.
+  ASSERT_THAT(fake_upstreams_.size(), Eq(1));
+  EXPECT_THAT(fake_upstream_sent_headers_[0],
+              Pointee(IsSupersetOfHeaders(Http::TestRequestHeaderMapImpl{{":path", "/a"}})));
+  ASSERT_THAT(fake_upstream_get_headers_callbacks_.size(), Eq(1));
+  std::shared_ptr<CacheProgressReceiver> progress;
+  EXPECT_CALL(
+      *mock_http_cache_,
+      insert(_, KeyHasPath("/a"), Pointee(IsSupersetOfHeaders(*response_headers)), _, IsNull(), _))
+      .WillOnce([&](Event::Dispatcher&, Key, Http::ResponseHeaderMapPtr, ResponseMetadata,
+                    HttpSourcePtr,
+                    std::shared_ptr<CacheProgressReceiver> receiver) { progress = receiver; });
+  consumeCallback(fake_upstream_get_headers_callbacks_[0])(
+      Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*response_headers), EndStream::End);
+  pumpDispatcher();
+  Mock::VerifyAndClearExpectations(mock_http_cache_);
+  EXPECT_CALL(*mock_http_cache_, touch(KeyHasPath("/a"), _));
+  EXPECT_CALL(*mock_http_cache_, evict(_, KeyHasPath("/a")));
+  progress->onInsertFailed(absl::InternalError("test error"));
+  pumpDispatcher();
+  ASSERT_THAT(result1->http_source_, NotNull());
+  ASSERT_THAT(result2->http_source_, NotNull());
+  MockFunction<void(Http::ResponseHeaderMapPtr, EndStream)> header_callback1, header_callback2;
+  EXPECT_CALL(header_callback1,
+              Call(Pointee(Http::IsSupersetOfHeaders(*response_headers)), EndStream::End));
+  EXPECT_CALL(header_callback2,
+              Call(Pointee(Http::IsSupersetOfHeaders(*response_headers)), EndStream::End));
+  result1->http_source_->getHeaders(header_callback1.AsStdFunction());
+  result2->http_source_->getHeaders(header_callback2.AsStdFunction());
+  // Both requests should have a fresh upstream for pass-through.
+  ASSERT_THAT(fake_upstream_get_headers_callbacks_.size(), Eq(3));
+  consumeCallback(fake_upstream_get_headers_callbacks_[1])(
+      Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*response_headers), EndStream::End);
+  consumeCallback(fake_upstream_get_headers_callbacks_[2])(
+      Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*response_headers), EndStream::End);
+  pumpDispatcher();
+  // A new request should provoke a new lookup because the previous insertion failed.
+  EXPECT_CALL(*mock_http_cache_, lookup(LookupHasPath("/a"), _));
+  cache_sessions_->lookup(testLookupRequest("/a"),
+                          [&result3](ActiveLookupResultPtr r) { result3 = std::move(r); });
+  pumpDispatcher();
+  // Should have sent a second lookup.
+  ASSERT_THAT(captured_lookup_callbacks_.size(), Eq(2));
+  // Cache miss again.
+  consumeCallback(captured_lookup_callbacks_[1])(LookupResult{});
+  pumpDispatcher();
+  // Should be the original request, the two that pass-through, and the new request.
+  ASSERT_THAT(fake_upstream_get_headers_callbacks_.size(), Eq(4));
+  EXPECT_CALL(
+      *mock_http_cache_,
+      insert(_, KeyHasPath("/a"), Pointee(IsSupersetOfHeaders(*response_headers)), _, IsNull(), _))
+      .WillOnce([&](Event::Dispatcher&, Key, Http::ResponseHeaderMapPtr, ResponseMetadata,
+                    HttpSourcePtr,
+                    std::shared_ptr<CacheProgressReceiver> receiver) { progress = receiver; });
+  consumeCallback(fake_upstream_get_headers_callbacks_[3])(
+      Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*response_headers), EndStream::End);
+  pumpDispatcher();
+  Mock::VerifyAndClearExpectations(mock_http_cache_);
+  EXPECT_CALL(*mock_http_cache_, evict(_, KeyHasPath("/a")));
+  progress->onInsertFailed(absl::InternalError("test error"));
+  pumpDispatcher();
+  ASSERT_THAT(result3->http_source_, NotNull());
+  // Should be yet another upstream request for the new pass-through.
+  ASSERT_THAT(fake_upstream_get_headers_callbacks_.size(), Eq(5));
+}
+
+TEST_F(CacheSessionsTest, CacheInsertFailureResetsStreamingContexts) {
+  EXPECT_CALL(*mock_http_cache_, lookup(LookupHasPath("/a"), _));
+  EXPECT_CALL(*mock_http_cache_, touch(KeyHasPath("/a"), _)).Times(2);
+  ActiveLookupResultPtr result1, result2;
+  auto response_headers = cacheableResponseHeaders();
+  cache_sessions_->lookup(testLookupRequest("/a"),
+                          [&result1](ActiveLookupResultPtr r) { result1 = std::move(r); });
+  cache_sessions_->lookup(testLookupRequest("/a"),
+                          [&result2](ActiveLookupResultPtr r) { result2 = std::move(r); });
+  pumpDispatcher();
+  // Cache miss.
+  consumeCallback(captured_lookup_callbacks_[0])(LookupResult{});
+  pumpDispatcher();
+  // Upstream request should have been sent.
+  ASSERT_THAT(fake_upstreams_.size(), Eq(1));
+  EXPECT_THAT(fake_upstream_sent_headers_[0],
+              Pointee(IsSupersetOfHeaders(Http::TestRequestHeaderMapImpl{{":path", "/a"}})));
+  ASSERT_THAT(fake_upstream_get_headers_callbacks_.size(), Eq(1));
+  std::shared_ptr<CacheProgressReceiver> progress;
+  EXPECT_CALL(
+      *mock_http_cache_,
+      insert(_, KeyHasPath("/a"), Pointee(IsSupersetOfHeaders(*response_headers)), _, IsNull(), _))
+      .WillOnce([&](Event::Dispatcher&, Key, Http::ResponseHeaderMapPtr, ResponseMetadata,
+                    HttpSourcePtr,
+                    std::shared_ptr<CacheProgressReceiver> receiver) { progress = receiver; });
+  consumeCallback(fake_upstream_get_headers_callbacks_[0])(
+      Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*response_headers), EndStream::End);
+  pumpDispatcher();
+  Mock::VerifyAndClearExpectations(mock_http_cache_);
+  EXPECT_CALL(*mock_http_cache_, evict(_, KeyHasPath("/a")));
+  progress->onHeadersInserted(std::make_unique<MockCacheReader>(),
+                              Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*response_headers),
+                              false);
+  pumpDispatcher();
+  ASSERT_THAT(result1->http_source_, NotNull());
+  ASSERT_THAT(result2->http_source_, NotNull());
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_callback;
+  MockFunction<void(Http::ResponseTrailerMapPtr, EndStream)> trailers_callback;
+  result1->http_source_->getBody(AdjustedByteRange(0, 5), body_callback.AsStdFunction());
+  result2->http_source_->getTrailers(trailers_callback.AsStdFunction());
+  EXPECT_CALL(body_callback, Call(IsNull(), EndStream::Reset));
+  EXPECT_CALL(trailers_callback, Call(IsNull(), EndStream::Reset));
+  progress->onInsertFailed(absl::InternalError("test error"));
+  pumpDispatcher();
+}
+
+TEST_F(CacheSessionsTest, MismatchedSizeAndContentLengthFromUpstreamLogsAnError) {
+  EXPECT_LOG_CONTAINS(
+      "error", "cache insert for test_host/a had content-length header 5 but actual size 3", {
+        EXPECT_CALL(*mock_http_cache_, lookup(LookupHasPath("/a"), _));
+        EXPECT_CALL(*mock_http_cache_, touch(KeyHasPath("/a"), _));
+        ActiveLookupResultPtr result1;
+        auto response_headers = cacheableResponseHeaders(5);
+        cache_sessions_->lookup(testLookupRequest("/a"),
+                                [&result1](ActiveLookupResultPtr r) { result1 = std::move(r); });
+        pumpDispatcher();
+        // Cache miss.
+        consumeCallback(captured_lookup_callbacks_[0])(LookupResult{});
+        pumpDispatcher();
+        // Upstream request should have been sent.
+        ASSERT_THAT(fake_upstreams_.size(), Eq(1));
+        std::shared_ptr<CacheProgressReceiver> progress;
+        // Cacheable response.
+        EXPECT_CALL(*mock_http_cache_,
+                    insert(_, KeyHasPath("/a"), Pointee(IsSupersetOfHeaders(*response_headers)), _,
+                           NotNull(), _))
+            .WillOnce([&](Event::Dispatcher&, Key, Http::ResponseHeaderMapPtr, ResponseMetadata,
+                          HttpSourcePtr, std::shared_ptr<CacheProgressReceiver> receiver) {
+              progress = receiver;
+            });
+        consumeCallback(fake_upstream_get_headers_callbacks_[0])(
+            Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*response_headers), EndStream::More);
+        pumpDispatcher();
+        // The upstream was given to the cache; since it's a fake we can forget about
+        // that and just have the cache complete its write operations when we choose.
+        ASSERT_THAT(progress, NotNull());
+        progress->onHeadersInserted(
+            std::make_unique<MockCacheReader>(),
+            Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*response_headers), false);
+        pumpDispatcher();
+        // Actual body only 3 bytes despite content-length 5.
+        progress->onBodyInserted(AdjustedByteRange(0, 3), true);
+        pumpDispatcher();
+      });
+}
+
+TEST_F(CacheSessionsTest, RangeRequestMissGetsFullResourceFromUpstreamAndServesRanges) {
+  EXPECT_CALL(*mock_http_cache_, lookup(LookupHasPath("/a"), _));
+  EXPECT_CALL(*mock_http_cache_, touch(KeyHasPath("/a"), _)).Times(2);
+  ActiveLookupResultPtr result1, result2;
+  auto response_headers = cacheableResponseHeaders(1024);
+  cache_sessions_->lookup(testLookupRangeRequest("/a", 0, 5),
+                          [&result1](ActiveLookupResultPtr r) { result1 = std::move(r); });
+  cache_sessions_->lookup(testLookupRangeRequest("/a", 5, 10),
+                          [&result2](ActiveLookupResultPtr r) { result2 = std::move(r); });
+  pumpDispatcher();
+  // Cache miss.
+  consumeCallback(captured_lookup_callbacks_[0])(LookupResult{});
+  pumpDispatcher();
+  // Upstream request should have been sent.
+  ASSERT_THAT(fake_upstreams_.size(), Eq(1));
+  // Upstream request should have had the range header removed.
+  EXPECT_THAT(fake_upstream_sent_headers_[0],
+              Pointee(AllOf(IsSupersetOfHeaders(Http::TestRequestHeaderMapImpl{{":path", "/a"}}),
+                            HasNoHeader("range"))));
+  std::shared_ptr<CacheProgressReceiver> progress;
+  // Cacheable response.
+  EXPECT_CALL(
+      *mock_http_cache_,
+      insert(_, KeyHasPath("/a"), Pointee(IsSupersetOfHeaders(*response_headers)), _, IsNull(), _))
+      .WillOnce([&](Event::Dispatcher&, Key, Http::ResponseHeaderMapPtr, ResponseMetadata,
+                    HttpSourcePtr,
+                    std::shared_ptr<CacheProgressReceiver> receiver) { progress = receiver; });
+  consumeCallback(fake_upstream_get_headers_callbacks_[0])(
+      Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*response_headers), EndStream::End);
+  pumpDispatcher();
+  Mock::VerifyAndClearExpectations(mock_http_cache_);
+  MockFunction<void(Http::ResponseHeaderMapPtr, EndStream)> headers_callback1, headers_callback2;
+  progress->onHeadersInserted(std::unique_ptr<MockCacheReader>(),
+                              Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*response_headers),
+                              false);
+  pumpDispatcher();
+  EXPECT_CALL(headers_callback1,
+              Call(Pointee(AllOf(HasHeader(":status", "206"), HasHeader("content-length", "6"),
+                                 HasHeader("content-range", "bytes 0-5/1024"))),
+                   EndStream::More));
+  EXPECT_CALL(headers_callback2,
+              Call(Pointee(AllOf(HasHeader(":status", "206"), HasHeader("content-length", "6"),
+                                 HasHeader("content-range", "bytes 5-10/1024"))),
+                   EndStream::More));
+  ASSERT_THAT(result1, NotNull());
+  result1->http_source_->getHeaders(headers_callback1.AsStdFunction());
+  result2->http_source_->getHeaders(headers_callback2.AsStdFunction());
+  Mock::VerifyAndClearExpectations(&headers_callback1);
+  Mock::VerifyAndClearExpectations(&headers_callback2);
+  // No need to test the body behavior here because it's no different than
+  // how body ranges are requested by any other request - the difference
+  // in behavior there is controlled by the filter which is outside the scope
+  // of CacheSessions unit tests.
+}
+
+TEST_F(CacheSessionsTest, RangeRequestWhenLengthIsUnknownReturnsNotSatisfiable) {
+  EXPECT_CALL(*mock_http_cache_, lookup(LookupHasPath("/a"), _));
+  EXPECT_CALL(*mock_http_cache_, touch(KeyHasPath("/a"), _));
+  ActiveLookupResultPtr result1;
+  auto response_headers = cacheableResponseHeaders(0);
+  cache_sessions_->lookup(testLookupRangeRequest("/a", 0, 5),
+                          [&result1](ActiveLookupResultPtr r) { result1 = std::move(r); });
+  pumpDispatcher();
+  // Cache miss.
+  consumeCallback(captured_lookup_callbacks_[0])(LookupResult{});
+  pumpDispatcher();
+  // Upstream request should have been sent.
+  ASSERT_THAT(fake_upstreams_.size(), Eq(1));
+  // Upstream request should have had the range header removed.
+  EXPECT_THAT(fake_upstream_sent_headers_[0],
+              Pointee(AllOf(IsSupersetOfHeaders(Http::TestRequestHeaderMapImpl{{":path", "/a"}}),
+                            HasNoHeader("range"))));
+  std::shared_ptr<CacheProgressReceiver> progress;
+  // Cacheable response.
+  EXPECT_CALL(
+      *mock_http_cache_,
+      insert(_, KeyHasPath("/a"), Pointee(IsSupersetOfHeaders(*response_headers)), _, IsNull(), _))
+      .WillOnce([&](Event::Dispatcher&, Key, Http::ResponseHeaderMapPtr, ResponseMetadata,
+                    HttpSourcePtr,
+                    std::shared_ptr<CacheProgressReceiver> receiver) { progress = receiver; });
+  consumeCallback(fake_upstream_get_headers_callbacks_[0])(
+      Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*response_headers), EndStream::End);
+  pumpDispatcher();
+  Mock::VerifyAndClearExpectations(mock_http_cache_);
+  MockFunction<void(Http::ResponseHeaderMapPtr, EndStream)> headers_callback1;
+  progress->onHeadersInserted(std::unique_ptr<MockCacheReader>(),
+                              Http::createHeaderMap<Http::ResponseHeaderMapImpl>(*response_headers),
+                              false);
+  pumpDispatcher();
+  EXPECT_CALL(headers_callback1, Call(Pointee(HasHeader(":status", "416")), EndStream::End));
+  ASSERT_THAT(result1, NotNull());
+  result1->http_source_->getHeaders(headers_callback1.AsStdFunction());
+  Mock::VerifyAndClearExpectations(&headers_callback1);
+}
+
+// TODO: UpdateHeadersSkipSpecificHeaders
+// TODO: Vary
+
+} // namespace
+} // namespace Cache
+} // namespace HttpFilters
+} // namespace Extensions
+} // namespace Envoy
diff --git a/test/extensions/filters/http/cache/cacheability_utils_test.cc b/test/extensions/filters/http/cache/cacheability_utils_test.cc
index f533f19365fea..98e1db2634c88 100644
--- a/test/extensions/filters/http/cache/cacheability_utils_test.cc
+++ b/test/extensions/filters/http/cache/cacheability_utils_test.cc
@@ -3,6 +3,7 @@
 #include "source/extensions/filters/http/cache/cacheability_utils.h"
 
 #include "test/mocks/server/server_factory_context.h"
+#include "test/test_common/status_utility.h"
 #include "test/test_common/utility.h"
 
 #include "gtest/gtest.h"
@@ -13,6 +14,9 @@ namespace HttpFilters {
 namespace Cache {
 namespace {
 
+using StatusHelpers::HasStatus;
+using testing::HasSubstr;
+
 class CanServeRequestFromCacheTest : public testing::Test {
 protected:
   Http::TestRequestHeaderMapImpl request_headers_ = {
@@ -50,47 +54,55 @@ class IsCacheableResponseTest : public testing::Test {
 };
 
 TEST_F(CanServeRequestFromCacheTest, CacheableRequest) {
-  EXPECT_TRUE(CacheabilityUtils::canServeRequestFromCache(request_headers_));
+  EXPECT_OK(CacheabilityUtils::canServeRequestFromCache(request_headers_));
 }
 
 TEST_F(CanServeRequestFromCacheTest, PathHeader) {
-  EXPECT_TRUE(CacheabilityUtils::canServeRequestFromCache(request_headers_));
+  EXPECT_OK(CacheabilityUtils::canServeRequestFromCache(request_headers_));
   request_headers_.removePath();
-  EXPECT_FALSE(CacheabilityUtils::canServeRequestFromCache(request_headers_));
+  EXPECT_THAT(CacheabilityUtils::canServeRequestFromCache(request_headers_),
+              HasStatus(absl::StatusCode::kInvalidArgument, HasSubstr("no path")));
 }
 
 TEST_F(CanServeRequestFromCacheTest, HostHeader) {
-  EXPECT_TRUE(CacheabilityUtils::canServeRequestFromCache(request_headers_));
+  EXPECT_OK(CacheabilityUtils::canServeRequestFromCache(request_headers_));
   request_headers_.removeHost();
-  EXPECT_FALSE(CacheabilityUtils::canServeRequestFromCache(request_headers_));
+  EXPECT_THAT(CacheabilityUtils::canServeRequestFromCache(request_headers_),
+              HasStatus(absl::StatusCode::kInvalidArgument, HasSubstr("no host")));
 }
 
 TEST_F(CanServeRequestFromCacheTest, MethodHeader) {
   const Http::HeaderValues& header_values = Http::Headers::get();
-  EXPECT_TRUE(CacheabilityUtils::canServeRequestFromCache(request_headers_));
+  EXPECT_OK(CacheabilityUtils::canServeRequestFromCache(request_headers_));
   request_headers_.setMethod(header_values.MethodValues.Post);
-  EXPECT_FALSE(CacheabilityUtils::canServeRequestFromCache(request_headers_));
+  EXPECT_THAT(CacheabilityUtils::canServeRequestFromCache(request_headers_),
+              HasStatus(absl::StatusCode::kInvalidArgument, HasSubstr("POST")));
   request_headers_.setMethod(header_values.MethodValues.Put);
-  EXPECT_FALSE(CacheabilityUtils::canServeRequestFromCache(request_headers_));
+  EXPECT_THAT(CacheabilityUtils::canServeRequestFromCache(request_headers_),
+              HasStatus(absl::StatusCode::kInvalidArgument, HasSubstr("PUT")));
   request_headers_.setMethod(header_values.MethodValues.Head);
-  EXPECT_TRUE(CacheabilityUtils::canServeRequestFromCache(request_headers_));
+  EXPECT_OK(CacheabilityUtils::canServeRequestFromCache(request_headers_));
   request_headers_.removeMethod();
-  EXPECT_FALSE(CacheabilityUtils::canServeRequestFromCache(request_headers_));
+  EXPECT_THAT(CacheabilityUtils::canServeRequestFromCache(request_headers_),
+              HasStatus(absl::StatusCode::kInvalidArgument, HasSubstr("no method")));
 }
 
 TEST_F(CanServeRequestFromCacheTest, SchemeHeader) {
-  EXPECT_TRUE(CacheabilityUtils::canServeRequestFromCache(request_headers_));
+  EXPECT_OK(CacheabilityUtils::canServeRequestFromCache(request_headers_));
   request_headers_.setScheme("ftp");
-  EXPECT_FALSE(CacheabilityUtils::canServeRequestFromCache(request_headers_));
+  EXPECT_THAT(CacheabilityUtils::canServeRequestFromCache(request_headers_),
+              HasStatus(absl::StatusCode::kInvalidArgument, HasSubstr("scheme")));
   request_headers_.removeScheme();
-  EXPECT_FALSE(CacheabilityUtils::canServeRequestFromCache(request_headers_));
+  EXPECT_THAT(CacheabilityUtils::canServeRequestFromCache(request_headers_),
+              HasStatus(absl::StatusCode::kInvalidArgument, HasSubstr("scheme")));
 }
 
 TEST_F(CanServeRequestFromCacheTest, AuthorizationHeader) {
-  EXPECT_TRUE(CacheabilityUtils::canServeRequestFromCache(request_headers_));
+  EXPECT_OK(CacheabilityUtils::canServeRequestFromCache(request_headers_));
   request_headers_.setReferenceKey(Http::CustomHeaders::get().Authorization,
                                    "basic YWxhZGRpbjpvcGVuc2VzYW1l");
-  EXPECT_FALSE(CacheabilityUtils::canServeRequestFromCache(request_headers_));
+  EXPECT_THAT(CacheabilityUtils::canServeRequestFromCache(request_headers_),
+              HasStatus(absl::StatusCode::kInvalidArgument, HasSubstr("authorization")));
 }
 
 INSTANTIATE_TEST_SUITE_P(ConditionalHeaders, RequestConditionalHeadersTest,
@@ -103,9 +115,10 @@ INSTANTIATE_TEST_SUITE_P(ConditionalHeaders, RequestConditionalHeadersTest,
                          });
 
 TEST_P(RequestConditionalHeadersTest, ConditionalHeaders) {
-  EXPECT_TRUE(CacheabilityUtils::canServeRequestFromCache(request_headers_));
+  EXPECT_OK(CacheabilityUtils::canServeRequestFromCache(request_headers_));
   request_headers_.setCopy(Http::LowerCaseString{conditionalHeader()}, "test-value");
-  EXPECT_FALSE(CacheabilityUtils::canServeRequestFromCache(request_headers_));
+  EXPECT_THAT(CacheabilityUtils::canServeRequestFromCache(request_headers_),
+              HasStatus(absl::StatusCode::kInvalidArgument, HasSubstr(conditionalHeader())));
 }
 
 TEST_F(IsCacheableResponseTest, CacheableResponse) {
diff --git a/test/extensions/filters/http/cache/http_cache_implementation_test_common.cc b/test/extensions/filters/http/cache/http_cache_implementation_test_common.cc
index ba1fe450a86b8..90c88e6fcbeeb 100644
--- a/test/extensions/filters/http/cache/http_cache_implementation_test_common.cc
+++ b/test/extensions/filters/http/cache/http_cache_implementation_test_common.cc
@@ -8,6 +8,7 @@
 #include "source/extensions/filters/http/cache/cache_headers_utils.h"
 #include "source/extensions/filters/http/cache/http_cache.h"
 
+#include "test/extensions/filters/http/cache/mocks.h"
 #include "test/mocks/http/mocks.h"
 #include "test/test_common/simulated_time_system.h"
 #include "test/test_common/utility.h"
@@ -19,26 +20,25 @@
 using ::envoy::extensions::filters::http::cache::v3::CacheConfig;
 using ::testing::_;
 using ::testing::AnyNumber;
-using ::testing::Not;
+using ::testing::Eq;
+using ::testing::Ge;
+using ::testing::Mock;
+using ::testing::MockFunction;
+using ::testing::NotNull;
+using ::testing::Optional;
+using ::testing::Pair;
+using ::testing::Pointee;
+using ::testing::Property;
 
 namespace Envoy {
 namespace Extensions {
 namespace HttpFilters {
 namespace Cache {
 
-namespace {
-
-CacheConfig getConfig() {
-  // Allows 'accept' to be varied in the tests.
-  CacheConfig config;
-  config.add_allowed_vary_headers()->set_exact("accept");
-
-  return config;
-}
-
-MATCHER(IsOk, "") { return arg.ok(); }
-
-} // namespace
+inline constexpr auto RangeIs = [](const auto& m1, const auto& m2) {
+  return AllOf(Property("begin", &AdjustedByteRange::begin, m1),
+               Property("end", &AdjustedByteRange::end, m2));
+};
 
 void HttpCacheTestDelegate::pumpDispatcher() {
   // There may be multiple steps in a cache operation going back and forth with work
@@ -51,15 +51,11 @@ void HttpCacheTestDelegate::pumpDispatcher() {
   }
 }
 
-HttpCacheImplementationTest::HttpCacheImplementationTest()
-    : delegate_(GetParam()()),
-      vary_allow_list_(getConfig().allowed_vary_headers(), factory_context_) {
+HttpCacheImplementationTest::HttpCacheImplementationTest() : delegate_(GetParam()()) {
   request_headers_.setMethod("GET");
   request_headers_.setHost("example.com");
   request_headers_.setScheme("https");
   request_headers_.setCopy(Http::CustomHeaders::get().CacheControl, "max-age=3600");
-  ON_CALL(encoder_callbacks_, dispatcher()).WillByDefault(testing::ReturnRef(dispatcher()));
-  ON_CALL(decoder_callbacks_, dispatcher()).WillByDefault(testing::ReturnRef(dispatcher()));
   delegate_->setUp();
 }
 
@@ -69,209 +65,110 @@ HttpCacheImplementationTest::~HttpCacheImplementationTest() {
   delegate_->tearDown();
 }
 
-bool HttpCacheImplementationTest::updateHeaders(
+void HttpCacheImplementationTest::updateHeaders(
     absl::string_view request_path, const Http::TestResponseHeaderMapImpl& response_headers,
     const ResponseMetadata& metadata) {
-  LookupContextPtr lookup_context = lookup(request_path);
-  bool captured_result = false;
-  bool seen_result = false;
-  cache()->updateHeaders(*lookup_context, response_headers, metadata,
-                         [&captured_result, &seen_result](bool result) {
-                           captured_result = result;
-                           seen_result = true;
-                         });
+  Key key = simpleKey(request_path);
+  cache().updateHeaders(dispatcher(), key, response_headers, metadata);
   pumpDispatcher();
-  EXPECT_TRUE(seen_result);
-  return captured_result;
 }
 
-LookupContextPtr HttpCacheImplementationTest::lookup(absl::string_view request_path) {
+LookupResult HttpCacheImplementationTest::lookup(absl::string_view request_path) {
   LookupRequest request = makeLookupRequest(request_path);
-  LookupContextPtr context = cache()->makeLookupContext(std::move(request), decoder_callbacks_);
+  LookupResult result;
   bool seen_result = false;
-  context->getHeaders([this, &seen_result](LookupResult&& result, bool end_stream) {
-    lookup_result_ = std::move(result);
-    lookup_end_stream_after_headers_ = end_stream;
+  cache().lookup(std::move(request), [&result, &seen_result](absl::StatusOr<LookupResult>&& r) {
+    result = std::move(r.value());
     seen_result = true;
   });
   pumpDispatcher();
   EXPECT_TRUE(seen_result);
-  return context;
+  return result;
 }
 
-absl::Status HttpCacheImplementationTest::insert(
-    LookupContextPtr lookup, const Http::TestResponseHeaderMapImpl& headers,
-    const absl::string_view body, const absl::optional<Http::TestResponseTrailerMapImpl> trailers) {
+CacheReaderPtr HttpCacheImplementationTest::insert(
+    Key key, const Http::TestResponseHeaderMapImpl& headers, const absl::string_view body,
+    const absl::optional<Http::TestResponseTrailerMapImpl> trailers) {
   // For responses with body, we must wait for insertBody's callback before
   // calling insertTrailers or completing. Note, in a multipart body test this
   // would need to check for the callback having been called for *every* body part,
   // but since the test only uses single-part bodies, inserting trailers or
   // completing in direct response to the callback works.
-  bool inserted_headers = false;
-  bool inserted_body = false;
-  bool inserted_trailers = false;
-  InsertContextPtr inserter = cache()->makeInsertContext(std::move(lookup), encoder_callbacks_);
-  absl::Cleanup destroy_inserter{[&inserter] { inserter->onDestroy(); }};
-  const ResponseMetadata metadata{time_system_.systemTime()};
-
-  bool headers_end_stream = body.empty() && !trailers.has_value();
-  inserter->insertHeaders(
-      headers, metadata, [&inserted_headers](bool result) { inserted_headers = result; },
-      headers_end_stream);
-  pumpDispatcher();
-  if (!inserted_headers) {
-    return absl::InternalError("headers were not inserted");
-  }
-  if (headers_end_stream) {
-    return absl::OkStatus();
+  uint64_t body_insert_pos = 0;
+  bool last_body_end_stream = false;
+  std::unique_ptr<FakeStreamHttpSource> source;
+  bool end_stream_after_headers = body.empty() && !trailers;
+  if (!end_stream_after_headers) {
+    source = std::make_unique<FakeStreamHttpSource>(
+        dispatcher(), nullptr, body,
+        trailers ? Http::createHeaderMap<Http::ResponseTrailerMapImpl>(*trailers) : nullptr);
   }
-
-  if (!body.empty()) {
-    inserter->insertBody(
-        Buffer::OwnedImpl(body), [&inserted_body](bool result) { inserted_body = result; },
-        /*end_stream=*/!trailers.has_value());
-    pumpDispatcher();
-    if (!inserted_body) {
-      return absl::InternalError("body was not inserted");
-    }
+  const ResponseMetadata metadata{time_system_.systemTime()};
+  auto mock_progress_receiver = std::make_shared<MockCacheProgressReceiver>();
+  CacheReaderPtr cache_reader;
+  EXPECT_CALL(*mock_progress_receiver,
+              onHeadersInserted(_, HeaderMapEqualIgnoreOrder(&headers), end_stream_after_headers))
+      .WillOnce([&cache_reader](CacheReaderPtr cr, Http::ResponseHeaderMapPtr, bool) {
+        cache_reader = std::move(cr);
+      });
+  cache().insert(dispatcher(), key, Http::createHeaderMap<Http::ResponseHeaderMapImpl>(headers),
+                 metadata, std::move(source), mock_progress_receiver);
+  if (!end_stream_after_headers) {
+    EXPECT_CALL(*mock_progress_receiver, onBodyInserted)
+        .WillRepeatedly([&](AdjustedByteRange range, bool end_stream) {
+          EXPECT_THAT(range.begin(), Eq(body_insert_pos));
+          body_insert_pos = range.end();
+          EXPECT_FALSE(last_body_end_stream);
+          last_body_end_stream = end_stream;
+        });
   }
-  if (!trailers.has_value()) {
-    return absl::OkStatus();
+  if (trailers) {
+    EXPECT_CALL(*mock_progress_receiver, onTrailersInserted(HeaderMapEqualIgnoreOrder(trailers)));
   }
-  inserter->insertTrailers(trailers.value(),
-                           [&inserted_trailers](bool result) { inserted_trailers = result; });
   pumpDispatcher();
-  if (!inserted_trailers) {
-    return absl::InternalError("trailers were not inserted");
+  if (!end_stream_after_headers) {
+    EXPECT_THAT(body_insert_pos, Eq(body.size()));
+    EXPECT_THAT(last_body_end_stream, Eq(trailers ? false : true));
   }
-  return absl::OkStatus();
-}
-
-LookupContextPtr HttpCacheImplementationTest::lookupContextWithAllParts() {
-  absl::string_view path = "/common";
-  Http::TestResponseHeaderMapImpl response_headers{
-      {":status", "200"},
-      {"date", formatter_.fromTime(time_system_.systemTime())},
-      {"cache-control", "public,max-age=3600"}};
-  Http::TestResponseTrailerMapImpl response_trailers{
-      {"common-trailer", "irrelevant value"},
-  };
-  EXPECT_THAT(insert(lookup(path), response_headers, "commonbody", response_trailers), IsOk());
-  LookupRequest request = makeLookupRequest(path);
-  return cache()->makeLookupContext(std::move(request), decoder_callbacks_);
-}
-
-absl::Status HttpCacheImplementationTest::insert(absl::string_view request_path,
-                                                 const Http::TestResponseHeaderMapImpl& headers,
-                                                 const absl::string_view body) {
-  return insert(lookup(request_path), headers, body);
+  return cache_reader;
 }
 
-std::pair<Http::ResponseHeaderMapPtr, bool>
-HttpCacheImplementationTest::getHeaders(LookupContext& context) {
-  std::pair<Http::ResponseHeaderMapPtr, bool> returned_pair;
-  bool seen_result = false;
-  context.getHeaders([&returned_pair, &seen_result](LookupResult&& lookup_result, bool end_stream) {
-    EXPECT_NE(lookup_result.cache_entry_status_, CacheEntryStatus::Unusable);
-    EXPECT_NE(lookup_result.headers_, nullptr);
-    returned_pair.first = std::move(lookup_result.headers_);
-    returned_pair.second = end_stream;
-    seen_result = true;
-  });
-  pumpDispatcher();
-  EXPECT_TRUE(seen_result);
-  return returned_pair;
+CacheReaderPtr HttpCacheImplementationTest::insert(
+    absl::string_view request_path, const Http::TestResponseHeaderMapImpl& headers,
+    const absl::string_view body, const absl::optional<Http::TestResponseTrailerMapImpl> trailers) {
+  return insert(simpleKey(request_path), headers, body, trailers);
 }
 
-std::pair<std::string, bool> HttpCacheImplementationTest::getBody(LookupContext& context,
-                                                                  uint64_t start, uint64_t end) {
+std::pair<std::string, EndStream>
+HttpCacheImplementationTest::getBody(CacheReader& reader, uint64_t start, uint64_t end) {
   AdjustedByteRange range(start, end);
-  std::pair<std::string, bool> returned_pair;
+  std::pair<std::string, EndStream> returned_pair;
   bool seen_result = false;
-  context.getBody(range,
-                  [&returned_pair, &seen_result](Buffer::InstancePtr&& data, bool end_stream) {
-                    EXPECT_NE(data, nullptr);
-                    returned_pair = std::make_pair(data->toString(), end_stream);
-                    seen_result = true;
-                  });
+  reader.getBody(dispatcher(), range,
+                 [&returned_pair, &seen_result](Buffer::InstancePtr data, EndStream end_stream) {
+                   returned_pair = std::make_pair(data->toString(), end_stream);
+                   seen_result = true;
+                 });
   pumpDispatcher();
   EXPECT_TRUE(seen_result);
   return returned_pair;
 }
 
-Http::TestResponseTrailerMapImpl HttpCacheImplementationTest::getTrailers(LookupContext& context) {
-  Http::ResponseTrailerMapPtr trailers;
-  context.getTrailers([&trailers](Http::ResponseTrailerMapPtr&& data) {
-    if (data) {
-      trailers = std::move(data);
-    }
-  });
-  pumpDispatcher();
-  EXPECT_THAT(trailers, testing::NotNull());
-  return *trailers;
+Key HttpCacheImplementationTest::simpleKey(absl::string_view request_path) const {
+  Key key;
+  key.set_path(request_path);
+  return key;
 }
 
-LookupRequest HttpCacheImplementationTest::makeLookupRequest(absl::string_view request_path) {
-  request_headers_.setPath(request_path);
-  return {request_headers_, time_system_.systemTime(), vary_allow_list_};
-}
-
-testing::AssertionResult HttpCacheImplementationTest::expectLookupSuccessWithHeaders(
-    LookupContext* lookup_context, const Http::TestResponseHeaderMapImpl& headers) {
-  if (lookup_result_.cache_entry_status_ != CacheEntryStatus::Ok) {
-    return AssertionFailure() << "Expected: lookup_result_.cache_entry_status == "
-                                 "CacheEntryStatus::Ok\n  Actual: "
-                              << lookup_result_.cache_entry_status_;
-  }
-  if (!lookup_result_.headers_) {
-    return AssertionFailure() << "Expected nonnull lookup_result_.headers";
-  }
-  if (!lookup_context) {
-    return AssertionFailure() << "Expected nonnull lookup_context";
-  }
-  if (!TestUtility::headerMapEqualIgnoreOrder(headers, *lookup_result_.headers_)) {
-    return AssertionFailure() << "Expected headers: " << headers
-                              << "\nActual:  " << *lookup_result_.headers_;
-  }
-  return AssertionSuccess();
-}
-
-testing::AssertionResult HttpCacheImplementationTest::expectLookupSuccessWithBodyAndTrailers(
-    LookupContext* lookup_context, absl::string_view body,
-    Http::TestResponseTrailerMapImpl trailers) {
-  if (lookup_result_.cache_entry_status_ != CacheEntryStatus::Ok) {
-    return AssertionFailure() << "Expected: lookup_result_.cache_entry_status == "
-                                 "CacheEntryStatus::Ok\n  Actual: "
-                              << lookup_result_.cache_entry_status_;
-  }
-  if (!lookup_result_.headers_) {
-    return AssertionFailure() << "Expected nonnull lookup_result_.headers";
-  }
-  if (!lookup_context) {
-    return AssertionFailure() << "Expected nonnull lookup_context";
-  }
-  const auto [actual_body, end_stream] = getBody(*lookup_context, 0, body.size());
-  if (body != actual_body) {
-    return AssertionFailure() << "Expected body == " << body << "\n  Actual:  " << actual_body;
-  }
-  if (!end_stream) {
-    const Http::TestResponseTrailerMapImpl actual_trailers = getTrailers(*lookup_context);
-    if (trailers != actual_trailers) {
-      return AssertionFailure() << "Expected trailers == " << trailers
-                                << "\n  Actual:  " << actual_trailers;
-    }
-  } else if (!trailers.empty()) {
-    return AssertionFailure() << "Expected trailers == " << trailers
-                              << "\n  Actual: end_stream after body";
-  }
-  return AssertionSuccess();
+LookupRequest HttpCacheImplementationTest::makeLookupRequest(absl::string_view request_path) const {
+  return {simpleKey(request_path), dispatcher()};
 }
 
 // Simple flow of putting in an item, getting it, deleting it.
 TEST_P(HttpCacheImplementationTest, PutGet) {
   const std::string request_path1("/name");
-  LookupContextPtr name_lookup_context = lookup(request_path1);
-  EXPECT_EQ(CacheEntryStatus::Unusable, lookup_result_.cache_entry_status_);
+  LookupResult lookup_result = lookup(request_path1);
+  EXPECT_THAT(lookup_result.body_length_, Eq(absl::nullopt));
 
   Http::TestResponseHeaderMapImpl response_headers{
       {":status", "200"},
@@ -279,251 +176,28 @@ TEST_P(HttpCacheImplementationTest, PutGet) {
       {"cache-control", "public,max-age=3600"}};
 
   const std::string body1("Value");
-  ASSERT_THAT(insert(std::move(name_lookup_context), response_headers, body1), IsOk());
-  name_lookup_context = lookup(request_path1);
-  EXPECT_TRUE(expectLookupSuccessWithBodyAndTrailers(name_lookup_context.get(), body1));
+  insert(request_path1, response_headers, body1);
+  lookup_result = lookup(request_path1);
+  EXPECT_THAT(lookup_result.body_length_, Optional(5));
+  EXPECT_THAT(lookup_result.response_headers_, HeaderMapEqualIgnoreOrder(&response_headers));
+  EXPECT_THAT(getBody(*lookup_result.cache_reader_, 0, 5), Pair("Value", EndStream::More));
 
-  const std::string& request_path2("/another-name");
-  LookupContextPtr another_name_lookup_context = lookup(request_path2);
-  EXPECT_EQ(CacheEntryStatus::Unusable, lookup_result_.cache_entry_status_);
+  const std::string& request_path_2("/another-name");
+  LookupResult another_name_lookup_result = lookup(request_path_2);
+  EXPECT_THAT(another_name_lookup_result.body_length_, Eq(absl::nullopt));
 
   const std::string new_body1("NewValue");
-  ASSERT_THAT(insert(std::move(name_lookup_context), response_headers, new_body1), IsOk());
-  EXPECT_TRUE(expectLookupSuccessWithBodyAndTrailers(lookup(request_path1).get(), new_body1));
-}
-
-TEST_P(HttpCacheImplementationTest, PrivateResponse) {
-  Http::TestResponseHeaderMapImpl response_headers{
-      {":status", "200"},
-      {"date", formatter_.fromTime(time_system_.systemTime())},
-      {"age", "2"},
-      {"cache-control", "private,max-age=3600"}};
-  const std::string request_path("/name");
-
-  LookupContextPtr name_lookup_context = lookup(request_path);
-  ASSERT_EQ(CacheEntryStatus::Unusable, lookup_result_.cache_entry_status_);
-
-  const std::string body("Value");
-  // We must make sure at cache insertion time, private responses must not be
-  // inserted. However, if the insertion did happen, it would be served at the
-  // time of lookup.
-  ASSERT_THAT(insert(std::move(name_lookup_context), response_headers, body), IsOk());
-
-  LookupContextPtr next_lookup = lookup(request_path);
-  ASSERT_TRUE(expectLookupSuccessWithBodyAndTrailers(next_lookup.get(), body));
-  next_lookup->onDestroy();
-}
-
-TEST_P(HttpCacheImplementationTest, Miss) {
-  LookupContextPtr name_lookup_context = lookup("/name");
-  ASSERT_EQ(CacheEntryStatus::Unusable, lookup_result_.cache_entry_status_);
-}
-
-TEST_P(HttpCacheImplementationTest, Fresh) {
-  const std::string time_value_1 = formatter_.fromTime(time_system_.systemTime());
-  const Http::TestResponseHeaderMapImpl response_headers = {
-      {"date", time_value_1}, {"cache-control", "public, max-age=3600"}};
-  // TODO(toddmgreer): Test with various date headers.
-  ASSERT_THAT(insert("/", response_headers, ""), IsOk());
-  time_system_.advanceTimeWait(Seconds(3600));
-  lookup("/");
-  EXPECT_EQ(CacheEntryStatus::Ok, lookup_result_.cache_entry_status_);
-}
-
-TEST_P(HttpCacheImplementationTest, StaleUnusable) {
-  if (validationEnabled()) {
-    // This test is for HttpCache implementations that do not yet support
-    // updateHeaders (and instead return Unusable), so skip this test if the
-    // delegate enables validation.
-    GTEST_SKIP();
-  }
-  SystemTime insert_time = time_system_.systemTime();
-  const Http::TestResponseHeaderMapImpl headers = {{":status", "200"},
-                                                   {"date", formatter_.fromTime(insert_time)},
-                                                   {"cache-control", "public, max-age=3600"}};
-  ASSERT_THAT(insert("/", headers, ""), IsOk());
-
-  time_system_.advanceTimeWait(Seconds(3601));
-
-  LookupContextPtr a_lookup = lookup("/");
-  a_lookup->onDestroy();
-  ASSERT_EQ(CacheEntryStatus::Unusable, lookup_result_.cache_entry_status_);
-}
-
-TEST_P(HttpCacheImplementationTest, StaleRequiresValidation) {
-  if (!validationEnabled()) {
-    // Caches that do not implement or disable validation should skip this test.
-    GTEST_SKIP();
-  }
-  SystemTime insert_time = time_system_.systemTime();
-  const Http::TestResponseHeaderMapImpl headers = {{":status", "200"},
-                                                   {"date", formatter_.fromTime(insert_time)},
-                                                   {"etag", "\"foo\""},
-                                                   {"cache-control", "public, max-age=3600"}};
-  ASSERT_THAT(insert("/", headers, ""), IsOk());
-
-  time_system_.advanceTimeWait(Seconds(3601));
-
-  LookupContextPtr a_lookup = lookup("/");
-  a_lookup->onDestroy();
-  ASSERT_EQ(CacheEntryStatus::RequiresValidation, lookup_result_.cache_entry_status_);
-}
-
-TEST_P(HttpCacheImplementationTest, RequestSmallMinFresh) {
-  request_headers_.setReferenceKey(Http::CustomHeaders::get().CacheControl, "min-fresh=1000");
-  const std::string request_path("/name");
-  LookupContextPtr name_lookup_context = lookup(request_path);
-  ASSERT_EQ(CacheEntryStatus::Unusable, lookup_result_.cache_entry_status_);
-
-  SystemTime insert_time = time_system_.systemTime();
-  Http::TestResponseHeaderMapImpl response_headers{{":status", "200"},
-                                                   {"date", formatter_.fromTime(insert_time)},
-                                                   {"age", "6000"},
-                                                   {"cache-control", "public, max-age=9000"}};
-  const std::string body("Value");
-  ASSERT_THAT(insert(std::move(name_lookup_context), response_headers, body), IsOk());
-
-  LookupContextPtr next_lookup = lookup(request_path);
-  ASSERT_TRUE(expectLookupSuccessWithBodyAndTrailers(next_lookup.get(), body));
-  next_lookup->onDestroy();
-}
-
-TEST_P(HttpCacheImplementationTest, ResponseStaleWithRequestLargeMaxStale) {
-  request_headers_.setReferenceKey(Http::CustomHeaders::get().CacheControl, "max-stale=9000");
-
-  const std::string request_path("/name");
-  LookupContextPtr name_lookup_context = lookup(request_path);
-  ASSERT_EQ(CacheEntryStatus::Unusable, lookup_result_.cache_entry_status_);
-
-  SystemTime insert_time = time_system_.systemTime();
-  Http::TestResponseHeaderMapImpl headers{{":status", "200"},
-                                          {"date", formatter_.fromTime(insert_time)},
-                                          {"age", "7200"},
-                                          {"cache-control", "public, max-age=3600"}};
-
-  const std::string body("Value");
-  ASSERT_THAT(insert(std::move(name_lookup_context), headers, body), IsOk());
-
-  LookupContextPtr next_lookup = lookup(request_path);
-  ASSERT_TRUE(expectLookupSuccessWithBodyAndTrailers(next_lookup.get(), body));
-  next_lookup->onDestroy();
-}
-
-TEST_P(HttpCacheImplementationTest, StreamingPut) {
-  SystemTime insert_time = time_system_.systemTime();
-  Http::TestResponseHeaderMapImpl response_headers{{":status", "200"},
-                                                   {"date", formatter_.fromTime(insert_time)},
-                                                   {"age", "2"},
-                                                   {"cache-control", "public, max-age=3600"}};
-  const std::string request_path("/path");
-  InsertContextPtr inserter = cache()->makeInsertContext(lookup(request_path), encoder_callbacks_);
-  absl::Cleanup destroy_inserter{[&inserter] { inserter->onDestroy(); }};
-  ResponseMetadata metadata{time_system_.systemTime()};
-  bool inserted_headers = false;
-  bool inserted_body1 = false;
-  bool inserted_body2 = false;
-  inserter->insertHeaders(
-      response_headers, metadata, [&inserted_headers](bool ready) { inserted_headers = ready; },
-      false);
-  pumpDispatcher();
-  ASSERT_TRUE(inserted_headers);
-  inserter->insertBody(
-      Buffer::OwnedImpl("Hello, "), [&inserted_body1](bool ready) { inserted_body1 = ready; },
-      false);
-  pumpDispatcher();
-  ASSERT_TRUE(inserted_body1);
-  inserter->insertBody(
-      Buffer::OwnedImpl("World!"), [&inserted_body2](bool ready) { inserted_body2 = ready; }, true);
-  pumpDispatcher();
-  ASSERT_TRUE(inserted_body2);
-
-  LookupContextPtr name_lookup = lookup(request_path);
-  ASSERT_EQ(CacheEntryStatus::Ok, lookup_result_.cache_entry_status_);
-  ASSERT_NE(nullptr, lookup_result_.headers_);
-  ASSERT_EQ(13, lookup_result_.content_length_);
-  ASSERT_THAT(getBody(*name_lookup, 0, 13), testing::Pair("Hello, World!", true));
-  name_lookup->onDestroy();
-}
-
-TEST_P(HttpCacheImplementationTest, VaryResponses) {
-  // Responses will vary on accept.
-  const std::string request_path("/path");
-  Http::TestResponseHeaderMapImpl response_headers{
-      {":status", "200"},
-      {"date", formatter_.fromTime(time_system_.systemTime())},
-      {"cache-control", "public,max-age=3600"},
-      {"vary", "accept"}};
-  Http::TestResponseTrailerMapImpl response_trailers{{"why", "is"}, {"sky", "blue"}};
-
-  // First request.
-  request_headers_.setCopy(Http::LowerCaseString("accept"), "image/*");
-
-  LookupContextPtr first_lookup_miss = lookup(request_path);
-  EXPECT_EQ(lookup_result_.cache_entry_status_, CacheEntryStatus::Unusable);
-  const std::string body1("accept is image/*");
-  ASSERT_THAT(insert(std::move(first_lookup_miss), response_headers, body1, response_trailers),
-              IsOk());
-  LookupContextPtr first_lookup_hit = lookup(request_path);
-  EXPECT_TRUE(
-      expectLookupSuccessWithBodyAndTrailers(first_lookup_hit.get(), body1, response_trailers));
-
-  // Second request with a different value for the varied header.
-  request_headers_.setCopy(Http::LowerCaseString("accept"), "text/html");
-  LookupContextPtr second_lookup_miss = lookup(request_path);
-  // Should miss because we don't have this version of the response saved yet.
-  EXPECT_EQ(lookup_result_.cache_entry_status_, CacheEntryStatus::Unusable);
-  // Add second version and make sure we receive the correct one.
-  const std::string body2("accept is text/html");
-  ASSERT_THAT(insert(std::move(second_lookup_miss), response_headers, body2), IsOk());
-
-  LookupContextPtr second_lookup_hit = lookup(request_path);
-  EXPECT_TRUE(expectLookupSuccessWithBodyAndTrailers(second_lookup_hit.get(), body2));
-
-  // Set the headers for the first request again.
-  request_headers_.setCopy(Http::LowerCaseString("accept"), "image/*");
-  time_system_.advanceTimeWait(Seconds(1));
-
-  LookupContextPtr first_lookup_hit_again = lookup(request_path);
-  // Looks up first version again to be sure it wasn't replaced with the second
-  // one.
-  EXPECT_TRUE(expectLookupSuccessWithBodyAndTrailers(first_lookup_hit_again.get(), body1,
-                                                     response_trailers));
-
-  // Create a new allow list to make sure a now disallowed cached vary entry is not served.
-  Protobuf::RepeatedPtrField<::envoy::type::matcher::v3::StringMatcher> proto_allow_list;
-  ::envoy::type::matcher::v3::StringMatcher* matcher = proto_allow_list.Add();
-  matcher->set_exact("width");
-  NiceMock<Server::Configuration::MockServerFactoryContext> factory_context;
-  vary_allow_list_ = VaryAllowList(proto_allow_list, factory_context);
-  lookup(request_path);
-  EXPECT_EQ(lookup_result_.cache_entry_status_, CacheEntryStatus::Unusable);
-}
-
-TEST_P(HttpCacheImplementationTest, VaryOnDisallowedKey) {
-  // Responses will vary on accept.
-  const std::string request_path("/path");
-  Http::TestResponseHeaderMapImpl response_headers{
-      {":status", "200"},
-      {"date", formatter_.fromTime(time_system_.systemTime())},
-      {"cache-control", "public,max-age=3600"},
-      {"vary", "user-agent"}};
-
-  // First request.
-  request_headers_.setCopy(Http::LowerCaseString("user-agent"), "user_agent_one");
-  LookupContextPtr first_value_vary = lookup(request_path);
-  EXPECT_EQ(CacheEntryStatus::Unusable, lookup_result_.cache_entry_status_);
-  const std::string body("one");
-  ASSERT_THAT(insert(std::move(first_value_vary), response_headers, body), Not(IsOk()));
-  first_value_vary = lookup(request_path);
-  EXPECT_EQ(CacheEntryStatus::Unusable, lookup_result_.cache_entry_status_);
+  insert(request_path_2, response_headers, new_body1);
+  lookup_result = lookup(request_path_2);
+  EXPECT_THAT(lookup_result.body_length_, Optional(8));
+  EXPECT_THAT(lookup_result.response_headers_, HeaderMapEqualIgnoreOrder(&response_headers));
+  EXPECT_THAT(getBody(*lookup_result.cache_reader_, 0, 8), Pair("NewValue", EndStream::More));
+  // Also check that reading chunks of body from arbitrary positions works.
+  EXPECT_THAT(getBody(*lookup_result.cache_reader_, 0, 4), Pair("NewV", EndStream::More));
+  EXPECT_THAT(getBody(*lookup_result.cache_reader_, 3, 8), Pair("Value", EndStream::More));
 }
 
 TEST_P(HttpCacheImplementationTest, UpdateHeadersAndMetadata) {
-  if (!validationEnabled()) {
-    // Caches that do not implement or disable validation should skip this test.
-    GTEST_SKIP();
-  }
-
   const std::string request_path_1("/name");
 
   {
@@ -534,14 +208,9 @@ TEST_P(HttpCacheImplementationTest, UpdateHeadersAndMetadata) {
         {"etag", "\"foo\""},
         {"content-length", "4"}};
 
-    ASSERT_THAT(insert(request_path_1, response_headers, "body"), IsOk());
-    auto lookup_context = lookup(request_path_1);
-    lookup_context->onDestroy();
-    ASSERT_NE(lookup_result_.headers_, nullptr);
-
-    // An age header is inserted by `makeLookupResult`
-    response_headers.setReferenceKey(Http::LowerCaseString("age"), "0");
-    EXPECT_THAT(lookup_result_.headers_.get(), HeaderMapEqualIgnoreOrder(&response_headers));
+    insert(request_path_1, response_headers, "body");
+    LookupResult lookup_result = lookup(request_path_1);
+    EXPECT_THAT(lookup_result.body_length_, Optional(4));
   }
 
   // Update the date field in the headers
@@ -554,13 +223,11 @@ TEST_P(HttpCacheImplementationTest, UpdateHeadersAndMetadata) {
                                         {":status", "200"},
                                         {"etag", "\"foo\""},
                                         {"content-length", "4"}};
-    EXPECT_TRUE(updateHeaders(request_path_1, response_headers, {time_system_.systemTime()}));
-    auto lookup_context = lookup(request_path_1);
-    lookup_context->onDestroy();
+    updateHeaders(request_path_1, response_headers, {time_system_.systemTime()});
+    LookupResult lookup_result = lookup(request_path_1);
 
-    // An age header is inserted by `makeLookupResult`
-    response_headers.setReferenceKey(Http::LowerCaseString("age"), "0");
-    EXPECT_THAT(lookup_result_.headers_.get(), HeaderMapEqualIgnoreOrder(&response_headers));
+    EXPECT_THAT(lookup_result.response_headers_.get(),
+                HeaderMapEqualIgnoreOrder(&response_headers));
   }
 }
 
@@ -573,262 +240,254 @@ TEST_P(HttpCacheImplementationTest, UpdateHeadersForMissingKeyFails) {
       {"etag", "\"foo\""},
   };
   time_system_.advanceTimeWait(Seconds(3601));
-  EXPECT_FALSE(updateHeaders(request_path_1, response_headers, {time_system_.systemTime()}));
-  lookup(request_path_1);
-  EXPECT_EQ(CacheEntryStatus::Unusable, lookup_result_.cache_entry_status_);
-}
-
-TEST_P(HttpCacheImplementationTest, UpdateHeadersForVaryHeaders) {
-  if (!validationEnabled()) {
-    // UpdateHeaders would not be called when validation is disabled.
-    GTEST_SKIP();
-  }
-
-  const std::string request_path_1("/name");
-  const std::string time_value_1 = formatter_.fromTime(time_system_.systemTime());
-  Http::TestResponseHeaderMapImpl response_headers_1{{":status", "200"},
-                                                     {"date", time_value_1},
-                                                     {"cache-control", "public,max-age=3600"},
-                                                     {"accept", "image/*"},
-                                                     {"vary", "accept"}};
-  ASSERT_THAT(insert(request_path_1, response_headers_1, "body"), IsOk());
-  // An age header is inserted by `makeLookUpResult`
-  response_headers_1.setReferenceKey(Http::LowerCaseString("age"), "0");
-  EXPECT_TRUE(expectLookupSuccessWithHeaders(lookup(request_path_1).get(), response_headers_1));
-  // Update the date field in the headers
-  time_system_.advanceTimeWait(Seconds(3600));
-  const SystemTime time_2 = time_system_.systemTime();
-  const std::string time_value_2 = formatter_.fromTime(time_2);
-  Http::TestResponseHeaderMapImpl response_headers_2{{":status", "200"},
-                                                     {"date", time_value_2},
-                                                     {"cache-control", "public,max-age=3600"},
-                                                     {"accept", "image/*"},
-                                                     {"vary", "accept"}};
-  EXPECT_TRUE(updateHeaders(request_path_1, response_headers_2, {time_2}));
-  response_headers_2.setReferenceKey(Http::LowerCaseString("age"), "0");
-  // the age is still 0 because an entry is considered fresh after validation
-  EXPECT_TRUE(expectLookupSuccessWithHeaders(lookup(request_path_1).get(), response_headers_2));
+  updateHeaders(request_path_1, response_headers, {time_system_.systemTime()});
+  LookupResult lookup_result = lookup(request_path_1);
+  EXPECT_FALSE(lookup_result.body_length_.has_value());
 }
 
-TEST_P(HttpCacheImplementationTest, UpdateHeadersSkipEtagHeader) {
-  if (!validationEnabled()) {
-    // UpdateHeaders is not called when validation is disabled.
-    GTEST_SKIP();
-  }
+TEST_P(HttpCacheImplementationTest, PutGetWithTrailers) {
+  const std::string request_path1("/name");
+  LookupResult lookup_result = lookup(request_path1);
+  EXPECT_THAT(lookup_result.body_length_, Eq(absl::nullopt));
 
-  const std::string request_path_1("/name");
-  const std::string time_value_1 = formatter_.fromTime(time_system_.systemTime());
-  Http::TestResponseHeaderMapImpl response_headers_1{{":status", "200"},
-                                                     {"date", time_value_1},
-                                                     {"cache-control", "public,max-age=3600"},
-                                                     {"etag", "0000-0000"}};
-  ASSERT_THAT(insert(request_path_1, response_headers_1, "body"), IsOk());
-  // An age header is inserted by `makeLookUpResult`
-  response_headers_1.setReferenceKey(Http::LowerCaseString("age"), "0");
-  EXPECT_TRUE(expectLookupSuccessWithHeaders(lookup(request_path_1).get(), response_headers_1));
+  Http::TestResponseHeaderMapImpl response_headers{
+      {":status", "200"},
+      {"date", formatter_.fromTime(time_system_.systemTime())},
+      {"cache-control", "public,max-age=3600"}};
 
-  // Update the date field in the headers
-  time_system_.advanceTimeWait(Seconds(3601));
-  const SystemTime time_2 = time_system_.systemTime();
-  const std::string time_value_2 = formatter_.fromTime(time_2);
-  Http::TestResponseHeaderMapImpl response_headers_2{{":status", "200"},
-                                                     {"date", time_value_2},
-                                                     {"cache-control", "public,max-age=3600"},
-                                                     {"etag", "1111-1111"}};
-  // The etag header should not be updated
-  Http::TestResponseHeaderMapImpl response_headers_3{{":status", "200"},
-                                                     {"date", time_value_2},
-                                                     {"cache-control", "public,max-age=3600"},
-                                                     {"etag", "0000-0000"}};
-
-  EXPECT_TRUE(updateHeaders(request_path_1, response_headers_2, {time_2}));
-  response_headers_3.setReferenceKey(Http::LowerCaseString("age"), "0");
-  EXPECT_TRUE(expectLookupSuccessWithHeaders(lookup(request_path_1).get(), response_headers_3));
-}
+  Http::TestResponseTrailerMapImpl response_trailers{{"x-trailer1", "hello"},
+                                                     {"x-trailer2", "world"}};
 
-TEST_P(HttpCacheImplementationTest, UpdateHeadersSkipSpecificHeaders) {
-  if (!validationEnabled()) {
-    // UpdateHeaders is not called when validation is disabled.
-    GTEST_SKIP();
-  }
+  const std::string body1("Value");
+  insert(request_path1, response_headers, body1, response_trailers);
+  lookup_result = lookup(request_path1);
+  EXPECT_THAT(lookup_result.body_length_, Optional(5));
+  EXPECT_THAT(lookup_result.response_headers_, HeaderMapEqualIgnoreOrder(&response_headers));
+  EXPECT_THAT(lookup_result.response_trailers_, HeaderMapEqualIgnoreOrder(&response_trailers));
+  EXPECT_THAT(getBody(*lookup_result.cache_reader_, 0, 5), Pair("Value", EndStream::More));
 
-  const std::string request_path_1("/name");
-  const std::string time_value_1 = formatter_.fromTime(time_system_.systemTime());
+  const std::string& request_path_2("/another-name");
+  LookupResult another_name_lookup_result = lookup(request_path_2);
+  EXPECT_THAT(another_name_lookup_result.body_length_, Eq(absl::nullopt));
 
-  // Vary not tested because we have separate tests that cover it
-  Http::TestResponseHeaderMapImpl origin_response_headers{
-      {":status", "200"},
-      {"date", time_value_1},
-      {"cache-control", "public,max-age=3600"},
-      {"content-range", "bytes 200-1000/67589"},
-      {"content-length", "800"},
-      {"etag", "0000-0000"},
-      {"etag", "1111-1111"},
-      {"link", "<https://example.com>; rel=\"preconnect\""}};
-  ASSERT_THAT(insert(request_path_1, origin_response_headers, "body"), IsOk());
-
-  // An age header is inserted by `makeLookUpResult`
-  origin_response_headers.setReferenceKey(Http::LowerCaseString("age"), "0");
-  EXPECT_TRUE(
-      expectLookupSuccessWithHeaders(lookup(request_path_1).get(), origin_response_headers));
-  time_system_.advanceTimeWait(Seconds(100));
-
-  const SystemTime time_2 = time_system_.systemTime();
-  const std::string time_value_2 = formatter_.fromTime(time_2);
-  Http::TestResponseHeaderMapImpl incoming_response_headers{
-      {":status", "200"},
-      {"date", time_value_2},
-      {"cache-control", "public,max-age=3600"},
-      {"content-range", "bytes 5-1000/67589"},
-      {"content-length", "995"},
-      {"content-length", "996"},
-      {"age", "20"},
-      {"etag", "2222-2222"},
-      {"link", "<https://changed.com>; rel=\"preconnect\""}};
-
-  // The skipped headers should not be updated
-  // "age" and "link" should be updated
-  Http::TestResponseHeaderMapImpl expected_response_headers{
+  const std::string new_body1("NewValue");
+  insert(request_path_2, response_headers, new_body1, response_trailers);
+  lookup_result = lookup(request_path_2);
+  EXPECT_THAT(lookup_result.body_length_, Optional(8));
+  EXPECT_THAT(lookup_result.response_headers_, HeaderMapEqualIgnoreOrder(&response_headers));
+  EXPECT_THAT(lookup_result.response_trailers_, HeaderMapEqualIgnoreOrder(&response_trailers));
+  EXPECT_THAT(getBody(*lookup_result.cache_reader_, 0, 8), Pair("NewValue", EndStream::More));
+  // Also check that reading chunks of body from arbitrary positions works.
+  EXPECT_THAT(getBody(*lookup_result.cache_reader_, 0, 4), Pair("NewV", EndStream::More));
+  EXPECT_THAT(getBody(*lookup_result.cache_reader_, 3, 8), Pair("Value", EndStream::More));
+}
+
+TEST_P(HttpCacheImplementationTest, InsertReadingNullBufferBodyWithEndStream) {
+  const std::string request_path1("/name");
+  LookupResult lookup_result = lookup(request_path1);
+  EXPECT_THAT(lookup_result.body_length_, Eq(absl::nullopt));
+  Http::TestResponseHeaderMapImpl response_headers{
       {":status", "200"},
-      {"date", time_value_2},
-      {"cache-control", "public,max-age=3600"},
-      {"content-range", "bytes 200-1000/67589"},
-      {"content-length", "800"},
-      {"age", "20"},
-      {"etag", "0000-0000"},
-      {"etag", "1111-1111"},
-      {"link", "<https://changed.com>; rel=\"preconnect\""}};
-
-  EXPECT_TRUE(updateHeaders(request_path_1, incoming_response_headers, {time_2}));
-  EXPECT_TRUE(
-      expectLookupSuccessWithHeaders(lookup(request_path_1).get(), expected_response_headers));
+      {"date", formatter_.fromTime(time_system_.systemTime())},
+      {"cache-control", "public,max-age=3600"}};
+  const std::string body("Hello World");
+  auto source = std::make_unique<MockHttpSource>();
+  GetBodyCallback get_body_1, get_body_2;
+  EXPECT_CALL(*source, getBody(RangeIs(0, Ge(11)), _))
+      .WillOnce([&](AdjustedByteRange, GetBodyCallback cb) { get_body_1 = std::move(cb); });
+  EXPECT_CALL(*source, getBody(RangeIs(11, Ge(11)), _))
+      .WillOnce([&](AdjustedByteRange, GetBodyCallback cb) { get_body_2 = std::move(cb); });
+  const ResponseMetadata metadata{time_system_.systemTime()};
+  auto mock_progress_receiver = std::make_shared<MockCacheProgressReceiver>();
+  CacheReaderPtr cache_reader;
+  EXPECT_CALL(*mock_progress_receiver,
+              onHeadersInserted(_, HeaderMapEqualIgnoreOrder(&response_headers), false))
+      .WillOnce([&cache_reader](CacheReaderPtr cr, Http::ResponseHeaderMapPtr, bool) {
+        cache_reader = std::move(cr);
+      });
+  cache().insert(dispatcher(), simpleKey(request_path1),
+                 Http::createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers), metadata,
+                 std::move(source), mock_progress_receiver);
+  pumpDispatcher();
+  Mock::VerifyAndClearExpectations(mock_progress_receiver.get());
+  ASSERT_THAT(cache_reader, NotNull());
+  ASSERT_THAT(get_body_1, NotNull());
+  EXPECT_CALL(*mock_progress_receiver, onBodyInserted(RangeIs(0, 11), false));
+  get_body_1(std::make_unique<Buffer::OwnedImpl>("Hello World"), EndStream::More);
+  pumpDispatcher();
+  Mock::VerifyAndClearExpectations(mock_progress_receiver.get());
+  ASSERT_THAT(get_body_2, NotNull());
+  EXPECT_CALL(*mock_progress_receiver, onBodyInserted(RangeIs(0, 11), true));
+  get_body_2(nullptr, EndStream::End);
+  pumpDispatcher();
 }
 
-TEST_P(HttpCacheImplementationTest, UpdateHeadersWithMultivalue) {
-  if (!validationEnabled()) {
-    // UpdateHeaders is not called when validation is disabled.
-    GTEST_SKIP();
-  }
-
-  const std::string request_path_1("/name");
+TEST_P(HttpCacheImplementationTest, HeadersOnlyInsert) {
+  const std::string request_path1("/name");
+  LookupResult lookup_result = lookup(request_path1);
+  EXPECT_THAT(lookup_result.body_length_, Eq(absl::nullopt));
 
-  const SystemTime time_1 = time_system_.systemTime();
-  const std::string time_value_1(formatter_.fromTime(time_1));
-  // Vary not tested because we have separate tests that cover it
-  Http::TestResponseHeaderMapImpl response_headers_1{
+  Http::TestResponseHeaderMapImpl response_headers{
       {":status", "200"},
-      {"date", time_value_1},
-      {"cache-control", "public,max-age=3600"},
-      {"etag", "\"foo\""},
-      {"link", "<https://www.example.com>; rel=\"preconnect\""},
-      {"link", "<https://example.com>; rel=\"preconnect\""}};
-  ASSERT_THAT(insert(request_path_1, response_headers_1, "body"), IsOk());
-  lookup(request_path_1);
-  response_headers_1.setCopy(Http::LowerCaseString("age"), "0");
-  EXPECT_THAT(lookup_result_.headers_.get(), HeaderMapEqualIgnoreOrder(&response_headers_1));
+      {"date", formatter_.fromTime(time_system_.systemTime())},
+      {"cache-control", "public,max-age=3600"}};
 
-  time_system_.advanceTimeWait(Seconds(3601));
-  const SystemTime time_2 = time_system_.systemTime();
-  const std::string time_value_2 = formatter_.fromTime(time_2);
+  const ResponseMetadata metadata{time_system_.systemTime()};
+  auto mock_progress_receiver = std::make_shared<MockCacheProgressReceiver>();
+  EXPECT_CALL(*mock_progress_receiver,
+              onHeadersInserted(_, HeaderMapEqualIgnoreOrder(&response_headers), true));
+  // source=nullptr indicates that the response was headers-only.
+  cache().insert(dispatcher(), simpleKey(request_path1),
+                 Http::createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers), metadata,
+                 nullptr, mock_progress_receiver);
+  pumpDispatcher();
+}
+
+TEST_P(HttpCacheImplementationTest, ReadingFromBodyDuringInsert) {
+  const std::string request_path1("/name");
+  LookupResult lookup_result = lookup(request_path1);
+  EXPECT_THAT(lookup_result.body_length_, Eq(absl::nullopt));
 
-  Http::TestResponseHeaderMapImpl response_headers_2{
+  Http::TestResponseHeaderMapImpl response_headers{
       {":status", "200"},
-      {"date", time_value_2},
-      {"cache-control", "public,max-age=3600"},
-      {"etag", "\"foo\""},
-      {"link", "<https://www.another-example.com>; rel=\"preconnect\""},
-      {"link", "<https://another-example.com>; rel=\"preconnect\""}};
+      {"date", formatter_.fromTime(time_system_.systemTime())},
+      {"cache-control", "public,max-age=3600"}};
 
-  EXPECT_TRUE(updateHeaders(request_path_1, response_headers_2, {time_2}));
-  lookup(request_path_1);
-  response_headers_2.setCopy(Http::LowerCaseString("age"), "0");
-  EXPECT_THAT(lookup_result_.headers_.get(), HeaderMapEqualIgnoreOrder(&response_headers_2));
+  const std::string body("Hello World");
+  auto source = std::make_unique<MockHttpSource>();
+  GetBodyCallback get_body_1, get_body_2;
+  EXPECT_CALL(*source, getBody(RangeIs(0, Ge(6)), _))
+      .WillOnce([&](AdjustedByteRange, GetBodyCallback cb) { get_body_1 = std::move(cb); });
+  EXPECT_CALL(*source, getBody(RangeIs(6, Ge(11)), _))
+      .WillOnce([&](AdjustedByteRange, GetBodyCallback cb) { get_body_2 = std::move(cb); });
+  const ResponseMetadata metadata{time_system_.systemTime()};
+  auto mock_progress_receiver = std::make_shared<MockCacheProgressReceiver>();
+  CacheReaderPtr cache_reader;
+  EXPECT_CALL(*mock_progress_receiver,
+              onHeadersInserted(_, HeaderMapEqualIgnoreOrder(&response_headers), false))
+      .WillOnce([&cache_reader](CacheReaderPtr cr, Http::ResponseHeaderMapPtr, bool) {
+        cache_reader = std::move(cr);
+      });
+  cache().insert(dispatcher(), simpleKey(request_path1),
+                 Http::createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers), metadata,
+                 std::move(source), mock_progress_receiver);
+  pumpDispatcher();
+  Mock::VerifyAndClearExpectations(mock_progress_receiver.get());
+  ASSERT_THAT(cache_reader, NotNull());
+  ASSERT_THAT(get_body_1, NotNull());
+  EXPECT_CALL(*mock_progress_receiver, onBodyInserted(RangeIs(0, 6), false));
+  get_body_1(std::make_unique<Buffer::OwnedImpl>("Hello "), EndStream::More);
+  pumpDispatcher();
+  Mock::VerifyAndClearExpectations(mock_progress_receiver.get());
+  MockFunction<void(Buffer::InstancePtr, EndStream)> mock_body_callback;
+  EXPECT_CALL(mock_body_callback, Call(Pointee(BufferStringEqual("Hello ")), EndStream::More));
+  cache_reader->getBody(dispatcher(), AdjustedByteRange(0, 6), mock_body_callback.AsStdFunction());
+  pumpDispatcher();
+  Mock::VerifyAndClearExpectations(&mock_body_callback);
+  ASSERT_THAT(get_body_2, NotNull());
+  EXPECT_CALL(*mock_progress_receiver, onBodyInserted(RangeIs(6, 11), true));
+  get_body_2(std::make_unique<Buffer::OwnedImpl>("World"), EndStream::End);
+  pumpDispatcher();
+  EXPECT_CALL(mock_body_callback, Call(Pointee(BufferStringEqual("Hello World")), EndStream::More));
+  cache_reader->getBody(dispatcher(), AdjustedByteRange(0, 11), mock_body_callback.AsStdFunction());
+  pumpDispatcher();
+  Mock::VerifyAndClearExpectations(&mock_body_callback);
+  Mock::VerifyAndClearExpectations(mock_progress_receiver.get());
 }
 
-TEST_P(HttpCacheImplementationTest, PutGetWithTrailers) {
+TEST_P(HttpCacheImplementationTest, UpstreamResetWhileExpectingBodyShouldBeInsertFailed) {
   const std::string request_path1("/name");
-  LookupContextPtr name_lookup_context = lookup(request_path1);
-  EXPECT_EQ(CacheEntryStatus::Unusable, lookup_result_.cache_entry_status_);
+  LookupResult lookup_result = lookup(request_path1);
+  EXPECT_THAT(lookup_result.body_length_, Eq(absl::nullopt));
 
   Http::TestResponseHeaderMapImpl response_headers{
       {":status", "200"},
       {"date", formatter_.fromTime(time_system_.systemTime())},
       {"cache-control", "public,max-age=3600"}};
-  const std::string body1("Value");
-  Http::TestResponseTrailerMapImpl response_trailers{{"why", "is"}, {"sky", "blue"}};
-  ASSERT_THAT(insert(std::move(name_lookup_context), response_headers, body1, response_trailers),
-              IsOk());
-  name_lookup_context = lookup(request_path1);
-  EXPECT_TRUE(
-      expectLookupSuccessWithBodyAndTrailers(name_lookup_context.get(), body1, response_trailers));
 
-  const std::string& request_path2("/another-name");
-  LookupContextPtr another_name_lookup_context = lookup(request_path2);
-  EXPECT_EQ(CacheEntryStatus::Unusable, lookup_result_.cache_entry_status_);
+  const std::string body("Hello World");
+  auto source = std::make_unique<MockHttpSource>();
+  GetBodyCallback get_body_1;
+  EXPECT_CALL(*source, getBody(RangeIs(0, Ge(11)), _))
+      .WillOnce([&](AdjustedByteRange, GetBodyCallback cb) { get_body_1 = std::move(cb); });
+  const ResponseMetadata metadata{time_system_.systemTime()};
+  auto mock_progress_receiver = std::make_shared<MockCacheProgressReceiver>();
+  CacheReaderPtr cache_reader;
+  EXPECT_CALL(*mock_progress_receiver,
+              onHeadersInserted(_, HeaderMapEqualIgnoreOrder(&response_headers), false))
+      .WillOnce([&cache_reader](CacheReaderPtr cr, Http::ResponseHeaderMapPtr, bool) {
+        cache_reader = std::move(cr);
+      });
+  cache().insert(dispatcher(), simpleKey(request_path1),
+                 Http::createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers), metadata,
+                 std::move(source), mock_progress_receiver);
+  pumpDispatcher();
+  Mock::VerifyAndClearExpectations(mock_progress_receiver.get());
+  ASSERT_THAT(cache_reader, NotNull());
+  ASSERT_THAT(get_body_1, NotNull());
+  EXPECT_CALL(*mock_progress_receiver, onInsertFailed);
+  get_body_1(nullptr, EndStream::Reset);
+  pumpDispatcher();
+  Mock::VerifyAndClearExpectations(mock_progress_receiver.get());
+}
 
-  const std::string new_body1("NewValue");
-  ASSERT_THAT(
-      insert(std::move(name_lookup_context), response_headers, new_body1, response_trailers),
-      IsOk());
-  EXPECT_TRUE(expectLookupSuccessWithBodyAndTrailers(lookup(request_path1).get(), new_body1,
-                                                     response_trailers));
+TEST_P(HttpCacheImplementationTest, TouchOnExistingEntryHasNoExternallyVisibleEffect) {
+  auto key = simpleKey("/name");
+  Http::TestResponseHeaderMapImpl response_headers{
+      {":status", "200"},
+      {"date", formatter_.fromTime(time_system_.systemTime())},
+      {"cache-control", "public,max-age=3600"}};
+  insert(key, response_headers, "");
+  cache().touch(key, SystemTime());
 }
 
-TEST_P(HttpCacheImplementationTest, EmptyTrailers) {
+TEST_P(HttpCacheImplementationTest, TouchOnAbsentEntryHasNoExternallyVisibleEffect) {
+  auto key = simpleKey("/name");
+  cache().touch(key, SystemTime());
+}
+
+TEST_P(HttpCacheImplementationTest, UpstreamResetWhileExpectingTrailersShouldBeInsertFailed) {
   const std::string request_path1("/name");
-  LookupContextPtr name_lookup_context = lookup(request_path1);
-  EXPECT_EQ(CacheEntryStatus::Unusable, lookup_result_.cache_entry_status_);
+  LookupResult lookup_result = lookup(request_path1);
+  EXPECT_THAT(lookup_result.body_length_, Eq(absl::nullopt));
 
   Http::TestResponseHeaderMapImpl response_headers{
       {":status", "200"},
       {"date", formatter_.fromTime(time_system_.systemTime())},
       {"cache-control", "public,max-age=3600"}};
-  const std::string body1("Value");
-  ASSERT_THAT(insert(std::move(name_lookup_context), response_headers, body1), IsOk());
-  name_lookup_context = lookup(request_path1);
-  EXPECT_TRUE(expectLookupSuccessWithBodyAndTrailers(name_lookup_context.get(), body1));
-}
 
-TEST_P(HttpCacheImplementationTest, DoesNotRunHeadersCallbackWhenCancelledAfterPosted) {
-  bool was_called = false;
-  {
-    LookupContextPtr context = lookupContextWithAllParts();
-    context->getHeaders([&was_called](LookupResult&&, bool) { was_called = true; });
-    pumpIntoDispatcher();
-    context->onDestroy();
-  }
+  const std::string body("Hello World");
+  auto source = std::make_unique<MockHttpSource>();
+  GetBodyCallback get_body_1;
+  GetTrailersCallback get_trailers;
+  EXPECT_CALL(*source, getBody(RangeIs(0, Ge(6)), _))
+      .WillOnce([&](AdjustedByteRange, GetBodyCallback cb) { get_body_1 = std::move(cb); });
+  EXPECT_CALL(*source, getTrailers(_)).WillOnce([&](GetTrailersCallback cb) {
+    get_trailers = std::move(cb);
+  });
+  const ResponseMetadata metadata{time_system_.systemTime()};
+  auto mock_progress_receiver = std::make_shared<MockCacheProgressReceiver>();
+  CacheReaderPtr cache_reader;
+  EXPECT_CALL(*mock_progress_receiver,
+              onHeadersInserted(_, HeaderMapEqualIgnoreOrder(&response_headers), false))
+      .WillOnce([&cache_reader](CacheReaderPtr cr, Http::ResponseHeaderMapPtr, bool) {
+        cache_reader = std::move(cr);
+      });
+  cache().insert(dispatcher(), simpleKey(request_path1),
+                 Http::createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers), metadata,
+                 std::move(source), mock_progress_receiver);
   pumpDispatcher();
-  EXPECT_FALSE(was_called);
-}
-
-TEST_P(HttpCacheImplementationTest, DoesNotRunBodyCallbackWhenCancelledAfterPosted) {
-  bool was_called = false;
-  {
-    LookupContextPtr context = lookupContextWithAllParts();
-    context->getHeaders([](LookupResult&&, bool) {});
-    pumpDispatcher();
-    context->getBody({0, 10}, [&was_called](Buffer::InstancePtr&&, bool) { was_called = true; });
-    pumpIntoDispatcher();
-    context->onDestroy();
-  }
+  Mock::VerifyAndClearExpectations(mock_progress_receiver.get());
+  ASSERT_THAT(cache_reader, NotNull());
+  ASSERT_THAT(get_body_1, NotNull());
+  // Null body + EndStream::More signifies trailers.
+  get_body_1(nullptr, EndStream::More);
   pumpDispatcher();
-  EXPECT_FALSE(was_called);
-}
-
-TEST_P(HttpCacheImplementationTest, DoesNotRunTrailersCallbackWhenCancelledAfterPosted) {
-  bool was_called = false;
-  {
-    LookupContextPtr context = lookupContextWithAllParts();
-    context->getHeaders([](LookupResult&&, bool) {});
-    pumpDispatcher();
-    context->getBody({0, 10}, [](Buffer::InstancePtr&&, bool) {});
-    pumpDispatcher();
-    context->getTrailers([&was_called](Http::ResponseTrailerMapPtr&&) { was_called = true; });
-    pumpIntoDispatcher();
-    context->onDestroy();
-  }
+  Mock::VerifyAndClearExpectations(mock_progress_receiver.get());
+  ASSERT_THAT(get_trailers, NotNull());
+  EXPECT_CALL(*mock_progress_receiver, onInsertFailed);
+  get_trailers(nullptr, EndStream::Reset);
   pumpDispatcher();
-  EXPECT_FALSE(was_called);
 }
 
 } // namespace Cache
diff --git a/test/extensions/filters/http/cache/http_cache_implementation_test_common.h b/test/extensions/filters/http/cache/http_cache_implementation_test_common.h
index 300ba8029e657..38d85ec2bad76 100644
--- a/test/extensions/filters/http/cache/http_cache_implementation_test_common.h
+++ b/test/extensions/filters/http/cache/http_cache_implementation_test_common.h
@@ -9,7 +9,6 @@
 
 #include "test/mocks/event/mocks.h"
 #include "test/mocks/http/mocks.h"
-#include "test/mocks/server/server_factory_context.h"
 #include "test/test_common/simulated_time_system.h"
 #include "test/test_common/utility.h"
 
@@ -31,13 +30,7 @@ class HttpCacheTestDelegate {
   virtual void setUp() {}
   virtual void tearDown() {}
 
-  virtual std::shared_ptr<HttpCache> cache() = 0;
-
-  // Specifies whether or not the cache supports validating stale cache entries
-  // and updating their headers. If false, tests will expect the cache to return
-  // CacheEntryStatus::Unusable for stale entries, instead of
-  // RequiresValidation.
-  virtual bool validationEnabled() const = 0;
+  virtual HttpCache& cache() PURE;
 
   // May be overridden to, for example, also drain other threads into the dispatcher
   // before draining the dispatcher.
@@ -61,54 +54,36 @@ class HttpCacheImplementationTest
   HttpCacheImplementationTest();
   ~HttpCacheImplementationTest() override;
 
-  std::shared_ptr<HttpCache> cache() const { return delegate_->cache(); }
-  bool validationEnabled() const { return delegate_->validationEnabled(); }
+  HttpCache& cache() const { return delegate_->cache(); }
   void pumpIntoDispatcher() { delegate_->beforePumpingDispatcher(); }
   void pumpDispatcher() { delegate_->pumpDispatcher(); }
-  LookupContextPtr lookup(absl::string_view request_path);
+  LookupResult lookup(absl::string_view request_path);
 
-  virtual absl::Status
-  insert(LookupContextPtr lookup, const Http::TestResponseHeaderMapImpl& headers,
-         const absl::string_view body,
+  virtual CacheReaderPtr
+  insert(Key key, const Http::TestResponseHeaderMapImpl& headers, const absl::string_view body,
          const absl::optional<Http::TestResponseTrailerMapImpl> trailers = absl::nullopt);
 
-  absl::Status insert(absl::string_view request_path,
-                      const Http::TestResponseHeaderMapImpl& headers, const absl::string_view body);
-
-  // Returns the headers and a bool for end_stream.
-  std::pair<Http::ResponseHeaderMapPtr, bool> getHeaders(LookupContext& context);
+  CacheReaderPtr
+  insert(absl::string_view request_path, const Http::TestResponseHeaderMapImpl& headers,
+         const absl::string_view body,
+         const absl::optional<Http::TestResponseTrailerMapImpl> trailers = absl::nullopt);
 
-  // Returns a body chunk and a bool for end_stream.
-  std::pair<std::string, bool> getBody(LookupContext& context, uint64_t start, uint64_t end);
+  std::pair<std::string, EndStream> getBody(CacheReader& reader, uint64_t start, uint64_t end);
 
-  Http::TestResponseTrailerMapImpl getTrailers(LookupContext& context);
+  void evict(absl::string_view request_path);
 
-  bool updateHeaders(absl::string_view request_path,
+  void updateHeaders(absl::string_view request_path,
                      const Http::TestResponseHeaderMapImpl& response_headers,
                      const ResponseMetadata& metadata);
 
-  LookupRequest makeLookupRequest(absl::string_view request_path);
-
-  LookupContextPtr lookupContextWithAllParts();
-
-  testing::AssertionResult
-  expectLookupSuccessWithHeaders(LookupContext* lookup_context,
-                                 const Http::TestResponseHeaderMapImpl& headers);
-  testing::AssertionResult
-  expectLookupSuccessWithBodyAndTrailers(LookupContext* lookup, absl::string_view body,
-                                         Http::TestResponseTrailerMapImpl trailers = {});
+  Key simpleKey(absl::string_view request_path) const;
+  LookupRequest makeLookupRequest(absl::string_view request_path) const;
 
-  NiceMock<Server::Configuration::MockServerFactoryContext> factory_context_;
   std::unique_ptr<HttpCacheTestDelegate> delegate_;
-  VaryAllowList vary_allow_list_;
-  LookupResult lookup_result_;
-  bool lookup_end_stream_after_headers_;
   Http::TestRequestHeaderMapImpl request_headers_;
   Event::SimulatedTimeSystem time_system_;
-  Event::Dispatcher& dispatcher() { return delegate_->dispatcher(); }
+  Event::Dispatcher& dispatcher() const { return delegate_->dispatcher(); }
   DateFormatter formatter_{"%a, %d %b %Y %H:%M:%S GMT"};
-  NiceMock<Http::MockStreamDecoderFilterCallbacks> decoder_callbacks_;
-  NiceMock<Http::MockStreamEncoderFilterCallbacks> encoder_callbacks_;
 };
 
 } // namespace Cache
diff --git a/test/extensions/filters/http/cache/http_cache_test.cc b/test/extensions/filters/http/cache/http_cache_test.cc
index 13de60b4011c2..0dc5d1525b754 100644
--- a/test/extensions/filters/http/cache/http_cache_test.cc
+++ b/test/extensions/filters/http/cache/http_cache_test.cc
@@ -1,18 +1,7 @@
-#include <chrono>
-#include <string>
-
-#include "source/extensions/filters/http/cache/cache_headers_utils.h"
 #include "source/extensions/filters/http/cache/http_cache.h"
 
-#include "test/mocks/http/mocks.h"
-#include "test/mocks/server/server_factory_context.h"
-#include "test/test_common/simulated_time_system.h"
-#include "test/test_common/utility.h"
-
 #include "gtest/gtest.h"
 
-using testing::TestWithParam;
-
 namespace Envoy {
 namespace Extensions {
 namespace HttpFilters {
@@ -20,361 +9,12 @@ namespace Cache {
 
 namespace {
 
-struct LookupRequestTestCase {
-  std::string test_name, request_cache_control, response_cache_control;
-  SystemTime request_time, response_date;
-  CacheEntryStatus expected_cache_entry_status;
-  std::string expected_age;
-};
-
-using Seconds = std::chrono::seconds;
-
-envoy::extensions::filters::http::cache::v3::CacheConfig getConfig() {
-  // Allows 'accept' to be varied in the tests.
-  envoy::extensions::filters::http::cache::v3::CacheConfig config;
-  const auto& add_accept = config.mutable_allowed_vary_headers()->Add();
-  add_accept->set_exact("accept");
-  return config;
-}
-
-class LookupRequestTest : public testing::TestWithParam<LookupRequestTestCase> {
-public:
-  LookupRequestTest() : vary_allow_list_(getConfig().allowed_vary_headers(), factory_context_) {}
-
-  DateFormatter formatter_{"%a, %d %b %Y %H:%M:%S GMT"};
-  Http::TestRequestHeaderMapImpl request_headers_{
-      {":path", "/"}, {":method", "GET"}, {":scheme", "https"}, {":authority", "example.com"}};
-
-  NiceMock<Server::Configuration::MockServerFactoryContext> factory_context_;
-  VaryAllowList vary_allow_list_;
-
-  static const SystemTime& currentTime() {
-    CONSTRUCT_ON_FIRST_USE(SystemTime, Event::SimulatedTimeSystem().systemTime());
-  }
-
-  static const std::vector<LookupRequestTestCase>& getTestCases() {
-    CONSTRUCT_ON_FIRST_USE(std::vector<LookupRequestTestCase>,
-                           {"request_requires_revalidation",
-                            /*request_cache_control=*/"no-cache",
-                            /*response_cache_control=*/"public, max-age=3600",
-                            /*request_time=*/currentTime(),
-                            /*response_date=*/currentTime(),
-                            /*expected_result=*/CacheEntryStatus::RequiresValidation,
-                            /*expected_age=*/"0"},
-                           {"response_requires_revalidation",
-                            /*request_cache_control=*/"",
-                            /*response_cache_control=*/"no-cache",
-                            /*request_time=*/currentTime(),
-                            /*response_date=*/currentTime(),
-                            /*expected_result=*/CacheEntryStatus::RequiresValidation,
-                            /*expected_age=*/"0"},
-                           {"request_max_age_satisfied",
-                            /*request_cache_control=*/"max-age=10",
-                            /*response_cache_control=*/"public, max-age=3600",
-                            /*request_time=*/currentTime() + Seconds(9),
-                            /*response_date=*/currentTime(),
-                            /*expected_result=*/CacheEntryStatus::Ok,
-                            /*expected_age=*/"9"},
-                           {"request_max_age_unsatisfied",
-                            /*request_cache_control=*/"max-age=10",
-                            /*response_cache_control=*/"public, max-age=3600",
-                            /*request_time=*/currentTime() + Seconds(11),
-                            /*response_date=*/currentTime(),
-                            /*expected_result=*/CacheEntryStatus::RequiresValidation,
-                            /*expected_age=*/"11"},
-                           {"request_min_fresh_satisfied",
-                            /*request_cache_control=*/"min-fresh=1000",
-                            /*response_cache_control=*/"public, max-age=2000",
-                            /*request_time=*/currentTime() + Seconds(999),
-                            /*response_date=*/currentTime(),
-                            /*expected_result=*/CacheEntryStatus::Ok,
-                            /*expected_age=*/"999"},
-                           {"request_min_fresh_unsatisfied",
-                            /*request_cache_control=*/"min-fresh=1000",
-                            /*response_cache_control=*/"public, max-age=2000",
-                            /*request_time=*/currentTime() + Seconds(1001),
-                            /*response_date=*/currentTime(),
-                            /*expected_result=*/CacheEntryStatus::RequiresValidation,
-                            /*expected_age=*/"1001"},
-                           {"request_max_age_satisfied_but_min_fresh_unsatisfied",
-                            /*request_cache_control=*/"max-age=1500, min-fresh=1000",
-                            /*response_cache_control=*/"public, max-age=2000",
-                            /*request_time=*/currentTime() + Seconds(1001),
-                            /*response_date=*/currentTime(),
-                            /*expected_result=*/CacheEntryStatus::RequiresValidation,
-                            /*expected_age=*/"1001"},
-                           {"request_max_age_satisfied_but_max_stale_unsatisfied",
-                            /*request_cache_control=*/"max-age=1500, max-stale=400",
-                            /*response_cache_control=*/"public, max-age=1000",
-                            /*request_time=*/currentTime() + Seconds(1401),
-                            /*response_date=*/currentTime(),
-                            /*expected_result=*/CacheEntryStatus::RequiresValidation,
-                            /*expected_age=*/"1401"},
-                           {"request_max_stale_satisfied_but_min_fresh_unsatisfied",
-                            /*request_cache_control=*/"min-fresh=1000, max-stale=500",
-                            /*response_cache_control=*/"public, max-age=2000",
-                            /*request_time=*/currentTime() + Seconds(1001),
-                            /*response_date=*/currentTime(),
-                            /*expected_result=*/CacheEntryStatus::RequiresValidation,
-                            /*expected_age=*/"1001"},
-                           {"request_max_stale_satisfied_but_max_age_unsatisfied",
-                            /*request_cache_control=*/"max-age=1200, max-stale=500",
-                            /*response_cache_control=*/"public, max-age=1000",
-                            /*request_time=*/currentTime() + Seconds(1201),
-                            /*response_date=*/currentTime(),
-                            /*expected_result=*/CacheEntryStatus::RequiresValidation,
-                            /*expected_age=*/"1201"},
-                           {"request_min_fresh_satisfied_but_max_age_unsatisfied",
-                            /*request_cache_control=*/"max-age=500, min-fresh=400",
-                            /*response_cache_control=*/"public, max-age=1000",
-                            /*request_time=*/currentTime() + Seconds(501),
-                            /*response_date=*/currentTime(),
-                            /*expected_result=*/CacheEntryStatus::RequiresValidation,
-                            /*expected_age=*/"501"},
-                           {"expired",
-                            /*request_cache_control=*/"",
-                            /*response_cache_control=*/"public, max-age=1000",
-                            /*request_time=*/currentTime() + Seconds(1001),
-                            /*response_date=*/currentTime(),
-                            /*expected_result=*/CacheEntryStatus::RequiresValidation,
-                            /*expected_age=*/"1001"},
-                           {"expired_but_max_stale_satisfied",
-                            /*request_cache_control=*/"max-stale=500",
-                            /*response_cache_control=*/"public, max-age=1000",
-                            /*request_time=*/currentTime() + Seconds(1499),
-                            /*response_date=*/currentTime(),
-                            /*expected_result=*/CacheEntryStatus::Ok,
-                            /*expected_age=*/"1499"},
-                           {"expired_max_stale_unsatisfied",
-                            /*request_cache_control=*/"max-stale=500",
-                            /*response_cache_control=*/"public, max-age=1000",
-                            /*request_time=*/currentTime() + Seconds(1501),
-                            /*response_date=*/currentTime(),
-                            /*expected_result=*/CacheEntryStatus::RequiresValidation,
-                            /*expected_age=*/"1501"},
-                           {"expired_max_stale_satisfied_but_response_must_revalidate",
-                            /*request_cache_control=*/"max-stale=500",
-                            /*response_cache_control=*/"public, max-age=1000, must-revalidate",
-                            /*request_time=*/currentTime() + Seconds(1499),
-                            /*response_date=*/currentTime(),
-                            /*expected_result=*/CacheEntryStatus::RequiresValidation,
-                            /*expected_age=*/"1499"},
-                           {"fresh_and_response_must_revalidate",
-                            /*request_cache_control=*/"",
-                            /*response_cache_control=*/"public, max-age=1000, must-revalidate",
-                            /*request_time=*/currentTime() + Seconds(999),
-                            /*response_date=*/currentTime(),
-                            /*expected_result=*/CacheEntryStatus::Ok,
-                            /*expected_age=*/"999"},
-
-    );
-  }
-};
-
-LookupResult makeLookupResult(const LookupRequest& lookup_request,
-                              const Http::TestResponseHeaderMapImpl& response_headers,
-                              absl::optional<uint64_t> content_length = absl::nullopt) {
-  // For the purpose of the test, set the response_time to the date header value.
-  ResponseMetadata metadata = {CacheHeadersUtils::httpTime(response_headers.Date())};
-  return lookup_request.makeLookupResult(
-      std::make_unique<Http::TestResponseHeaderMapImpl>(response_headers), std::move(metadata),
-      content_length);
-}
-
-INSTANTIATE_TEST_SUITE_P(ResultMatchesExpectation, LookupRequestTest,
-                         testing::ValuesIn(LookupRequestTest::getTestCases()),
-                         [](const auto& info) { return info.param.test_name; });
-
-TEST_P(LookupRequestTest, ResultWithoutBodyMatchesExpectation) {
-  request_headers_.setReferenceKey(Http::CustomHeaders::get().CacheControl,
-                                   GetParam().request_cache_control);
-  const SystemTime request_time = GetParam().request_time, response_date = GetParam().response_date;
-  const LookupRequest lookup_request(request_headers_, request_time, vary_allow_list_);
-  const Http::TestResponseHeaderMapImpl response_headers(
-      {{"cache-control", GetParam().response_cache_control},
-       {"date", formatter_.fromTime(response_date)}});
-  const LookupResult lookup_response = makeLookupResult(lookup_request, response_headers, 0);
-
-  EXPECT_EQ(GetParam().expected_cache_entry_status, lookup_response.cache_entry_status_);
-  ASSERT_TRUE(lookup_response.headers_);
-  EXPECT_THAT(*lookup_response.headers_, Http::IsSupersetOfHeaders(response_headers));
-  EXPECT_THAT(*lookup_response.headers_,
-              HeaderHasValueRef(Http::CustomHeaders::get().Age, GetParam().expected_age));
-  EXPECT_EQ(lookup_response.content_length_, 0);
-}
-
-TEST_P(LookupRequestTest, ResultWithUnknownContentLengthMatchesExpectation) {
-  request_headers_.setReferenceKey(Http::CustomHeaders::get().CacheControl,
-                                   GetParam().request_cache_control);
-  const SystemTime request_time = GetParam().request_time, response_date = GetParam().response_date;
-  const LookupRequest lookup_request(request_headers_, request_time, vary_allow_list_);
-  const Http::TestResponseHeaderMapImpl response_headers(
-      {{"cache-control", GetParam().response_cache_control},
-       {"date", formatter_.fromTime(response_date)}});
-  const LookupResult lookup_response = makeLookupResult(lookup_request, response_headers);
-
-  EXPECT_EQ(GetParam().expected_cache_entry_status, lookup_response.cache_entry_status_);
-  ASSERT_TRUE(lookup_response.headers_);
-  EXPECT_THAT(*lookup_response.headers_, Http::IsSupersetOfHeaders(response_headers));
-  EXPECT_THAT(*lookup_response.headers_,
-              HeaderHasValueRef(Http::CustomHeaders::get().Age, GetParam().expected_age));
-  EXPECT_FALSE(lookup_response.content_length_.has_value());
-}
-
-TEST_P(LookupRequestTest, ResultWithBodyMatchesExpectation) {
-  request_headers_.setReferenceKey(Http::CustomHeaders::get().CacheControl,
-                                   GetParam().request_cache_control);
-  const SystemTime request_time = GetParam().request_time, response_date = GetParam().response_date;
-  const LookupRequest lookup_request(request_headers_, request_time, vary_allow_list_);
-  const Http::TestResponseHeaderMapImpl response_headers(
-      {{"cache-control", GetParam().response_cache_control},
-       {"date", formatter_.fromTime(response_date)}});
-  const uint64_t content_length = 5;
-  const LookupResult lookup_response =
-      makeLookupResult(lookup_request, response_headers, content_length);
-
-  EXPECT_EQ(GetParam().expected_cache_entry_status, lookup_response.cache_entry_status_);
-  ASSERT_TRUE(lookup_response.headers_);
-  EXPECT_THAT(*lookup_response.headers_, Http::IsSupersetOfHeaders(response_headers));
-  EXPECT_THAT(*lookup_response.headers_,
-              HeaderHasValueRef(Http::CustomHeaders::get().Age, GetParam().expected_age));
-  EXPECT_EQ(lookup_response.content_length_, content_length);
-}
-
-TEST_F(LookupRequestTest, ExpiredViaFallbackheader) {
-  const LookupRequest lookup_request(request_headers_, currentTime(), vary_allow_list_);
-  const Http::TestResponseHeaderMapImpl response_headers(
-      {{"expires", formatter_.fromTime(currentTime() - Seconds(5))},
-       {"date", formatter_.fromTime(currentTime())}});
-  const LookupResult lookup_response = makeLookupResult(lookup_request, response_headers);
-
-  EXPECT_EQ(CacheEntryStatus::RequiresValidation, lookup_response.cache_entry_status_);
-}
-
-TEST_F(LookupRequestTest, NotExpiredViaFallbackheader) {
-  const LookupRequest lookup_request(request_headers_, currentTime(), vary_allow_list_);
-  const Http::TestResponseHeaderMapImpl response_headers(
-      {{"expires", formatter_.fromTime(currentTime() + Seconds(5))},
-       {"date", formatter_.fromTime(currentTime())}});
-  const LookupResult lookup_response = makeLookupResult(lookup_request, response_headers);
-  EXPECT_EQ(CacheEntryStatus::Ok, lookup_response.cache_entry_status_);
-}
-
-// If request Cache-Control header is missing,
-// "Pragma:no-cache" is equivalent to "Cache-Control:no-cache".
-// https://httpwg.org/specs/rfc7234.html#header.pragma
-TEST_F(LookupRequestTest, PragmaNoCacheFallback) {
-  request_headers_.setReferenceKey(Http::CustomHeaders::get().Pragma, "no-cache");
-  const LookupRequest lookup_request(request_headers_, currentTime(), vary_allow_list_);
-  const Http::TestResponseHeaderMapImpl response_headers(
-      {{"date", formatter_.fromTime(currentTime())}, {"cache-control", "public, max-age=3600"}});
-  const LookupResult lookup_response = makeLookupResult(lookup_request, response_headers);
-  // Response is not expired but the request requires revalidation through
-  // Pragma: no-cache.
-  EXPECT_EQ(CacheEntryStatus::RequiresValidation, lookup_response.cache_entry_status_);
-}
-
-// "pragma:no-cache" is ignored if ignoreRequestCacheControlHeader is true.
-TEST_F(LookupRequestTest, IgnoreRequestCacheControlHeaderIgnoresPragma) {
-  request_headers_.setReferenceKey(Http::CustomHeaders::get().Pragma, "no-cache");
-  const LookupRequest lookup_request(request_headers_, currentTime(), vary_allow_list_,
-                                     /*ignore_request_cache_control_header=*/true);
-  const Http::TestResponseHeaderMapImpl response_headers(
-      {{"date", formatter_.fromTime(currentTime())}, {"cache-control", "public, max-age=3600"}});
-  const LookupResult lookup_response = makeLookupResult(lookup_request, response_headers);
-  // Response is not expired and no-cache is ignored.
-  EXPECT_EQ(CacheEntryStatus::Ok, lookup_response.cache_entry_status_);
-}
-
-// "cache-control:no-cache" is ignored if ignoreRequestCacheControlHeader is true.
-TEST_F(LookupRequestTest, IgnoreRequestCacheControlHeaderIgnoresCacheControl) {
-  request_headers_.setReferenceKey(Http::CustomHeaders::get().CacheControl, "no-cache");
-  const LookupRequest lookup_request(request_headers_, currentTime(), vary_allow_list_,
-                                     /*ignore_request_cache_control_header=*/true);
-  const Http::TestResponseHeaderMapImpl response_headers(
-      {{"date", formatter_.fromTime(currentTime())}, {"cache-control", "public, max-age=3600"}});
-  const LookupResult lookup_response = makeLookupResult(lookup_request, response_headers);
-  // Response is not expired and no-cache is ignored.
-  EXPECT_EQ(CacheEntryStatus::Ok, lookup_response.cache_entry_status_);
-}
-
-TEST_F(LookupRequestTest, PragmaNoCacheFallbackExtraDirectivesIgnored) {
-  request_headers_.setReferenceKey(Http::CustomHeaders::get().Pragma,
-                                   "no-cache, custom-directive=custom-value");
-  const LookupRequest lookup_request(request_headers_, currentTime(), vary_allow_list_);
-  const Http::TestResponseHeaderMapImpl response_headers(
-      {{"date", formatter_.fromTime(currentTime())}, {"cache-control", "public, max-age=3600"}});
-  const LookupResult lookup_response = makeLookupResult(lookup_request, response_headers);
-  // Response is not expired but the request requires revalidation through
-  // Pragma: no-cache.
-  EXPECT_EQ(CacheEntryStatus::RequiresValidation, lookup_response.cache_entry_status_);
-}
-
-TEST_F(LookupRequestTest, PragmaFallbackOtherValuesIgnored) {
-  request_headers_.setReferenceKey(Http::CustomHeaders::get().Pragma, "max-age=0");
-  const LookupRequest lookup_request(request_headers_, currentTime() + Seconds(5),
-                                     vary_allow_list_);
-  const Http::TestResponseHeaderMapImpl response_headers(
-      {{"date", formatter_.fromTime(currentTime())}, {"cache-control", "public, max-age=3600"}});
-  const LookupResult lookup_response = makeLookupResult(lookup_request, response_headers);
-  // Response is fresh, Pragma header with values other than "no-cache" is
-  // ignored.
-  EXPECT_EQ(CacheEntryStatus::Ok, lookup_response.cache_entry_status_);
-}
-
-TEST_F(LookupRequestTest, PragmaNoFallback) {
-  request_headers_.setReferenceKey(Http::CustomHeaders::get().Pragma, "no-cache");
-  request_headers_.setReferenceKey(Http::CustomHeaders::get().CacheControl, "max-age=10");
-  const LookupRequest lookup_request(request_headers_, currentTime() + Seconds(5),
-                                     vary_allow_list_);
-  const Http::TestResponseHeaderMapImpl response_headers(
-      {{"date", formatter_.fromTime(currentTime())}, {"cache-control", "public, max-age=3600"}});
-  const LookupResult lookup_response = makeLookupResult(lookup_request, response_headers);
-  // Pragma header is ignored when Cache-Control header is present.
-  EXPECT_EQ(CacheEntryStatus::Ok, lookup_response.cache_entry_status_);
-}
-
 TEST(HttpCacheTest, StableHashKey) {
   Key key;
   key.set_host("example.com");
   ASSERT_EQ(stableHashKey(key), 6153940628716543519u);
 }
 
-TEST_P(LookupRequestTest, ResultWithBodyAndTrailersMatchesExpectation) {
-  request_headers_.setReferenceKey(Http::CustomHeaders::get().CacheControl,
-                                   GetParam().request_cache_control);
-  const SystemTime request_time = GetParam().request_time, response_date = GetParam().response_date;
-  const LookupRequest lookup_request(request_headers_, request_time, vary_allow_list_);
-  const Http::TestResponseHeaderMapImpl response_headers(
-      {{"cache-control", GetParam().response_cache_control},
-       {"date", formatter_.fromTime(response_date)}});
-  const uint64_t content_length = 5;
-  const LookupResult lookup_response =
-      makeLookupResult(lookup_request, response_headers, content_length);
-
-  EXPECT_EQ(GetParam().expected_cache_entry_status, lookup_response.cache_entry_status_);
-  ASSERT_TRUE(lookup_response.headers_ != nullptr);
-  EXPECT_THAT(*lookup_response.headers_, Http::IsSupersetOfHeaders(response_headers));
-  // Age is populated in LookupRequest::makeLookupResult, which is called in makeLookupResult.
-  EXPECT_THAT(*lookup_response.headers_,
-              HeaderHasValueRef(Http::CustomHeaders::get().Age, GetParam().expected_age));
-  EXPECT_EQ(lookup_response.content_length_, content_length);
-}
-
-TEST_F(LookupRequestTest, HttpScheme) {
-  Http::TestRequestHeaderMapImpl request_headers{
-      {":path", "/"}, {":method", "GET"}, {":scheme", "http"}, {":authority", "example.com"}};
-  const LookupRequest lookup_request(request_headers, currentTime(), vary_allow_list_);
-  EXPECT_EQ(lookup_request.key().scheme(), Key::HTTP);
-}
-
-TEST_F(LookupRequestTest, HttpsScheme) {
-  Http::TestRequestHeaderMapImpl request_headers{
-      {":path", "/"}, {":method", "GET"}, {":scheme", "https"}, {":authority", "example.com"}};
-  const LookupRequest lookup_request(request_headers, currentTime(), vary_allow_list_);
-  EXPECT_EQ(lookup_request.key().scheme(), Key::HTTPS);
-}
-
 } // namespace
 } // namespace Cache
 } // namespace HttpFilters
diff --git a/test/extensions/filters/http/cache/mocks.cc b/test/extensions/filters/http/cache/mocks.cc
new file mode 100644
index 0000000000000..b44d758ee5b58
--- /dev/null
+++ b/test/extensions/filters/http/cache/mocks.cc
@@ -0,0 +1,73 @@
+#include "test/extensions/filters/http/cache/mocks.h"
+
+#include "source/common/buffer/buffer_impl.h"
+
+namespace Envoy {
+namespace Extensions {
+namespace HttpFilters {
+namespace Cache {
+
+void PrintTo(const EndStream& end_stream, std::ostream* os) {
+  static const absl::flat_hash_map<EndStream, absl::string_view> vmap{
+      {EndStream::End, "End"},
+      {EndStream::More, "More"},
+      {EndStream::Reset, "Reset"},
+  };
+  *os << "EndStream::" << vmap.at(end_stream);
+}
+
+void PrintTo(const Key& key, std::ostream* os) { *os << key.DebugString(); }
+
+using testing::NotNull;
+
+FakeStreamHttpSource::FakeStreamHttpSource(Event::Dispatcher& dispatcher,
+                                           Http::ResponseHeaderMapPtr headers,
+                                           absl::string_view body,
+                                           Http::ResponseTrailerMapPtr trailers)
+    : dispatcher_(dispatcher), headers_(std::move(headers)), body_(body),
+      trailers_(std::move(trailers)) {}
+
+void FakeStreamHttpSource::getHeaders(GetHeadersCallback&& cb) {
+  ASSERT_THAT(headers_, NotNull());
+  EndStream end_stream = (!body_.empty() || trailers_) ? EndStream::More : EndStream::End;
+  dispatcher_.post([headers = std::move(headers_), cb = std::move(cb), end_stream]() mutable {
+    cb(std::move(headers), end_stream);
+  });
+}
+
+void FakeStreamHttpSource::getBody(AdjustedByteRange range, GetBodyCallback&& cb) {
+  if (body_.empty()) {
+    cb(nullptr, trailers_ ? EndStream::More : EndStream::End);
+  } else {
+    if (range.length() > max_fragment_size_) {
+      range = AdjustedByteRange(range.begin(), range.begin() + max_fragment_size_);
+    }
+    ASSERT_THAT(range.begin(), testing::Ge(body_pos_))
+        << "getBody called out of order, pos=" << body_pos_ << ", range=[" << range.begin() << ", "
+        << range.end() << ")";
+    if (range.begin() == body_.size()) {
+      cb(nullptr, trailers_ ? EndStream::More : EndStream::End);
+    } else {
+      range = AdjustedByteRange(range.begin(), std::min(range.end(), body_.size()));
+      EndStream end_stream =
+          (trailers_ || range.end() < body_.size()) ? EndStream::More : EndStream::End;
+      Buffer::InstancePtr fragment = std::make_unique<Buffer::OwnedImpl>(
+          absl::string_view{body_}.substr(range.begin(), range.length()));
+      dispatcher_.post([cb = std::move(cb), fragment = std::move(fragment), end_stream]() mutable {
+        cb(std::move(fragment), end_stream);
+      });
+      body_pos_ = range.end();
+    }
+  }
+}
+
+void FakeStreamHttpSource::getTrailers(GetTrailersCallback&& cb) {
+  ASSERT_THAT(trailers_, NotNull())
+      << "should have stopped on an earlier EndStream::End not called getTrailers";
+  cb(std::move(trailers_), EndStream::End);
+}
+
+} // namespace Cache
+} // namespace HttpFilters
+} // namespace Extensions
+} // namespace Envoy
diff --git a/test/extensions/filters/http/cache/mocks.h b/test/extensions/filters/http/cache/mocks.h
index 57ee46266be85..8099bd638690b 100644
--- a/test/extensions/filters/http/cache/mocks.h
+++ b/test/extensions/filters/http/cache/mocks.h
@@ -1,6 +1,11 @@
 #pragma once
 
+#include "source/extensions/filters/http/cache/cache_sessions.h"
 #include "source/extensions/filters/http/cache/http_cache.h"
+#include "source/extensions/filters/http/cache/http_source.h"
+#include "source/extensions/filters/http/cache/stats.h"
+
+#include "test/test_common/printers.h"
 
 #include "gmock/gmock.h"
 
@@ -9,66 +14,135 @@ namespace Extensions {
 namespace HttpFilters {
 namespace Cache {
 
-class MockLookupContext : public LookupContext {
+void PrintTo(const EndStream& end_stream, std::ostream* os);
+void PrintTo(const Key& key, std::ostream* os);
+
+class MockCacheFilterStats : public CacheFilterStats {
 public:
-  MOCK_METHOD(void, getHeaders, (LookupHeadersCallback && cb));
-  MOCK_METHOD(void, getBody, (const AdjustedByteRange& range, LookupBodyCallback&& cb));
-  MOCK_METHOD(void, getTrailers, (LookupTrailersCallback && cb));
-  MOCK_METHOD(void, onDestroy, ());
+  MOCK_METHOD(void, incForStatus, (CacheEntryStatus s));
+  MOCK_METHOD(void, incCacheSessionsEntries, ());
+  MOCK_METHOD(void, decCacheSessionsEntries, ());
+  MOCK_METHOD(void, incCacheSessionsSubscribers, ());
+  MOCK_METHOD(void, subCacheSessionsSubscribers, (uint64_t count));
+  MOCK_METHOD(void, addUpstreamBufferedBytes, (uint64_t bytes));
+  MOCK_METHOD(void, subUpstreamBufferedBytes, (uint64_t bytes));
 };
 
-class MockInsertContext : public InsertContext {
+class MockCacheSessions : public CacheSessions {
 public:
-  MOCK_METHOD(void, insertHeaders,
-              (const Http::ResponseHeaderMap& response_headers, const ResponseMetadata& metadata,
-               InsertCallback insert_complete, bool end_stream));
-  MOCK_METHOD(void, insertBody,
-              (const Buffer::Instance& fragment, InsertCallback ready_for_next_fragment,
-               bool end_stream));
-  MOCK_METHOD(void, insertTrailers,
-              (const Http::ResponseTrailerMap& trailers, InsertCallback insert_complete));
-  MOCK_METHOD(void, onDestroy, ());
+  MockCacheSessions() {
+    EXPECT_CALL(*this, stats)
+        .Times(testing::AnyNumber())
+        .WillRepeatedly(testing::ReturnRef(mock_stats_));
+  }
+  MOCK_METHOD(void, lookup, (ActiveLookupRequestPtr request, ActiveLookupResultCallback&& cb));
+  MOCK_METHOD(CacheInfo, cacheInfo, (), (const));
+  MOCK_METHOD(HttpCache&, cache, (), (const));
+  MOCK_METHOD(CacheFilterStats&, stats, (), (const));
+  testing::NiceMock<MockCacheFilterStats> mock_stats_;
 };
 
 class MockHttpCache : public HttpCache {
 public:
-  MOCK_METHOD(LookupContextPtr, makeLookupContext,
-              (LookupRequest && request, Http::StreamFilterCallbacks& callbacks));
-  MOCK_METHOD(InsertContextPtr, makeInsertContext,
-              (LookupContextPtr && lookup_context, Http::StreamFilterCallbacks& callbacks));
+  MockHttpCache() {
+    EXPECT_CALL(*this, cacheInfo)
+        .Times(testing::AnyNumber())
+        .WillRepeatedly(testing::Return(CacheInfo{"mock_cache"}));
+  }
+  MOCK_METHOD(void, lookup, (LookupRequest && request, LookupCallback&& callback));
+  MOCK_METHOD(void, evict, (Event::Dispatcher & dispatcher, const Key& key));
+  MOCK_METHOD(void, touch, (const Key& key, SystemTime timestamp));
   MOCK_METHOD(void, updateHeaders,
-              (const LookupContext& lookup_context, const Http::ResponseHeaderMap& response_headers,
-               const ResponseMetadata& metadata, absl::AnyInvocable<void(bool)> on_complete));
+              (Event::Dispatcher & dispatcher, const Key& key,
+               const Http::ResponseHeaderMap& updated_headers,
+               const ResponseMetadata& updated_metadata));
   MOCK_METHOD(CacheInfo, cacheInfo, (), (const));
-  MockLookupContext* mockLookupContext() {
-    ASSERT(mock_lookup_context_ == nullptr);
-    mock_lookup_context_ = std::make_unique<MockLookupContext>();
-    EXPECT_CALL(*mock_lookup_context_, onDestroy());
-    EXPECT_CALL(*this, makeLookupContext)
-        .WillOnce([this](LookupRequest&&,
-                         Http::StreamFilterCallbacks&) -> std::unique_ptr<LookupContext> {
-          auto ret = std::move(mock_lookup_context_);
-          mock_lookup_context_ = nullptr;
-          return ret;
-        });
-    return mock_lookup_context_.get();
-  }
-  MockInsertContext* mockInsertContext() {
-    ASSERT(mock_insert_context_ == nullptr);
-    mock_insert_context_ = std::make_unique<MockInsertContext>();
-    EXPECT_CALL(*mock_insert_context_, onDestroy());
-    EXPECT_CALL(*this, makeInsertContext)
-        .WillOnce([this](LookupContextPtr&& lookup_context,
-                         Http::StreamFilterCallbacks&) -> std::unique_ptr<InsertContext> {
-          lookup_context->onDestroy();
-          auto ret = std::move(mock_insert_context_);
-          mock_insert_context_ = nullptr;
-          return ret;
-        });
-    return mock_insert_context_.get();
+  MOCK_METHOD(void, insert,
+              (Event::Dispatcher & dispatcher, Key key, Http::ResponseHeaderMapPtr headers,
+               ResponseMetadata metadata, HttpSourcePtr source,
+               std::shared_ptr<CacheProgressReceiver> progress));
+};
+
+class MockCacheReader : public CacheReader {
+public:
+  MOCK_METHOD(void, getBody,
+              (Event::Dispatcher & dispatcher, AdjustedByteRange range, GetBodyCallback&& cb));
+};
+
+class MockHttpSource : public HttpSource {
+public:
+  MOCK_METHOD(void, getHeaders, (GetHeadersCallback && cb));
+  MOCK_METHOD(void, getBody, (AdjustedByteRange range, GetBodyCallback&& cb));
+  MOCK_METHOD(void, getTrailers, (GetTrailersCallback && cb));
+};
+
+class MockCacheFilterStatsProvider : public CacheFilterStatsProvider {
+public:
+  MockCacheFilterStatsProvider() {
+    ON_CALL(*this, stats).WillByDefault(testing::ReturnRef(mock_stats_));
   }
-  std::unique_ptr<MockLookupContext> mock_lookup_context_;
-  std::unique_ptr<MockInsertContext> mock_insert_context_;
+  MOCK_METHOD(CacheFilterStats&, stats, (), (const));
+  testing::NiceMock<MockCacheFilterStats> mock_stats_;
+};
+
+class FakeStreamHttpSource : public HttpSource {
+public:
+  // Any field can be nullptr; if headers is nullptr it's assumed headers have
+  // already been consumed. Body and trailers being nullptr imply the resource had
+  // no body or trailers respectively.
+  FakeStreamHttpSource(Event::Dispatcher& dispatcher, Http::ResponseHeaderMapPtr headers,
+                       absl::string_view body, Http::ResponseTrailerMapPtr trailers);
+  void getHeaders(GetHeadersCallback&& cb) override;
+  // This will use the dispatcher, to better resemble the behavior of an actual
+  // async http stream.
+  void getBody(AdjustedByteRange range, GetBodyCallback&& cb) override;
+  void getTrailers(GetTrailersCallback&& cb) override;
+  void setMaxFragmentSize(uint64_t v) { max_fragment_size_ = v; }
+
+private:
+  Event::Dispatcher& dispatcher_;
+  Http::ResponseHeaderMapPtr headers_;
+  std::string body_;
+  Http::ResponseTrailerMapPtr trailers_;
+  uint64_t body_pos_{0};
+  uint64_t max_fragment_size_ = std::numeric_limits<uint64_t>::max();
+};
+
+class MockCacheProgressReceiver : public CacheProgressReceiver {
+public:
+  MOCK_METHOD(void, onHeadersInserted,
+              (CacheReaderPtr cache_reader, Http::ResponseHeaderMapPtr headers, bool end_stream));
+  MOCK_METHOD(void, onBodyInserted, (AdjustedByteRange range, bool end_stream));
+  MOCK_METHOD(void, onTrailersInserted, (Http::ResponseTrailerMapPtr trailers));
+  MOCK_METHOD(void, onInsertFailed, (absl::Status));
+};
+
+class MockHttpCacheFactory : public HttpCacheFactory {
+public:
+  MOCK_METHOD(std::shared_ptr<CacheSessions>, getCache,
+              (const envoy::extensions::filters::http::cache::v3::CacheConfig& config,
+               Server::Configuration::FactoryContext& context));
+};
+
+class MockUpstreamRequest : public UpstreamRequest {
+public:
+  // HttpSource
+  MOCK_METHOD(void, getHeaders, (GetHeadersCallback && cb));
+  MOCK_METHOD(void, getBody, (AdjustedByteRange range, GetBodyCallback&& cb));
+  MOCK_METHOD(void, getTrailers, (GetTrailersCallback && cb));
+  // UpstreamRequest only
+  MOCK_METHOD(void, sendHeaders, (Http::RequestHeaderMapPtr h));
+};
+
+class MockUpstreamRequestFactory : public UpstreamRequestFactory {
+public:
+  MOCK_METHOD(UpstreamRequestPtr, create,
+              (const std::shared_ptr<const CacheFilterStatsProvider> stats_provider));
+};
+
+class MockCacheableResponseChecker : public CacheableResponseChecker {
+public:
+  MOCK_METHOD(bool, isCacheableResponse, (const Http::ResponseHeaderMap& h), (const));
 };
 
 } // namespace Cache
diff --git a/test/extensions/filters/http/cache/range_utils_test.cc b/test/extensions/filters/http/cache/range_utils_test.cc
index 980a4dc52fecc..16806ca84cfa9 100644
--- a/test/extensions/filters/http/cache/range_utils_test.cc
+++ b/test/extensions/filters/http/cache/range_utils_test.cc
@@ -255,6 +255,8 @@ INSTANTIATE_TEST_SUITE_P(
                     "bytes=1-2,3-4-5",
                     "bytes=1-2,bytes=3-4",
                     "bytes=1-2,3-4,a",
+                    // negative length
+                    "bytes=2-1",
                     // too many byte ranges (test sets the limit as 5)
                     "bytes=0-1,1-2,2-3,3-4,4-5,5-6",
                     // UINT64_MAX-UINT64_MAX+1
diff --git a/test/extensions/filters/http/cache/stats_test.cc b/test/extensions/filters/http/cache/stats_test.cc
new file mode 100644
index 0000000000000..863e0a6ff1bdf
--- /dev/null
+++ b/test/extensions/filters/http/cache/stats_test.cc
@@ -0,0 +1,117 @@
+#include <memory>
+
+#include "source/extensions/filters/http/cache/stats.h"
+
+#include "test/mocks/server/factory_context.h"
+
+#include "gtest/gtest.h"
+
+namespace Envoy {
+namespace Extensions {
+namespace HttpFilters {
+namespace Cache {
+namespace {
+
+class CacheStatsTest : public ::testing::Test {
+protected:
+  NiceMock<Server::Configuration::MockFactoryContext> context_;
+  std::unique_ptr<CacheFilterStats> stats_ = generateStats(context_.scope(), "fake.cache");
+};
+
+MATCHER_P(OptCounterHasValue, m, "") {
+  return testing::ExplainMatchResult(
+      testing::Optional(
+          testing::Property("get", &std::reference_wrapper<const Stats::Counter>::get,
+                            testing::Property("value", &Envoy::Stats::Counter::value, m))),
+      arg, result_listener);
+}
+
+MATCHER_P(OptGaugeHasValue, m, "") {
+  return testing::ExplainMatchResult(
+      testing::Optional(
+          testing::Property("get", &std::reference_wrapper<const Stats::Gauge>::get,
+                            testing::Property("value", &Envoy::Stats::Gauge::value, m))),
+      arg, result_listener);
+}
+
+MATCHER_P(OptCounterHasName, m, "") {
+  return testing::ExplainMatchResult(
+      testing::Optional(testing::Property(
+          "get", &std::reference_wrapper<const Stats::Counter>::get,
+          testing::Property("tagExtractedName", &Envoy::Stats::Counter::tagExtractedName, m))),
+      arg, result_listener);
+}
+
+MATCHER_P2(OptCounterIs, name, value, "") {
+  return testing::ExplainMatchResult(
+      testing::AllOf(OptCounterHasName(name), OptCounterHasValue(value)), arg, result_listener);
+}
+
+TEST_F(CacheStatsTest, StatsAreConstructedCorrectly) {
+  // 4 for hit
+  stats_->incForStatus(CacheEntryStatus::Hit);
+  stats_->incForStatus(CacheEntryStatus::FoundNotModified);
+  stats_->incForStatus(CacheEntryStatus::Follower);
+  stats_->incForStatus(CacheEntryStatus::ValidatedFree);
+  Stats::CounterOptConstRef hits =
+      context_.store_.findCounterByString("cache.event.cache_label.fake_cache.event_type.hit");
+  EXPECT_THAT(hits, OptCounterIs("cache.event", 4));
+  // 1 for miss
+  stats_->incForStatus(CacheEntryStatus::Miss);
+  Stats::CounterOptConstRef misses =
+      context_.store_.findCounterByString("cache.event.cache_label.fake_cache.event_type.miss");
+  EXPECT_THAT(misses, OptCounterIs("cache.event", 1));
+  // 1 for failed validation
+  stats_->incForStatus(CacheEntryStatus::FailedValidation);
+  Stats::CounterOptConstRef failed_validations = context_.store_.findCounterByString(
+      "cache.event.cache_label.fake_cache.event_type.failed_validation");
+  EXPECT_THAT(failed_validations, OptCounterIs("cache.event", 1));
+  // 1 for validated
+  stats_->incForStatus(CacheEntryStatus::Validated);
+  Stats::CounterOptConstRef validates =
+      context_.store_.findCounterByString("cache.event.cache_label.fake_cache.event_type.validate");
+  EXPECT_THAT(validates, OptCounterIs("cache.event", 1));
+
+  stats_->incForStatus(CacheEntryStatus::Uncacheable);
+  Stats::CounterOptConstRef uncacheables = context_.store_.findCounterByString(
+      "cache.event.cache_label.fake_cache.event_type.uncacheable");
+  EXPECT_THAT(uncacheables, OptCounterIs("cache.event", 1));
+
+  stats_->incForStatus(CacheEntryStatus::UpstreamReset);
+  Stats::CounterOptConstRef upstream_resets = context_.store_.findCounterByString(
+      "cache.event.cache_label.fake_cache.event_type.upstream_reset");
+  EXPECT_THAT(upstream_resets, OptCounterIs("cache.event", 1));
+
+  stats_->incForStatus(CacheEntryStatus::LookupError);
+  Stats::CounterOptConstRef lookup_errors = context_.store_.findCounterByString(
+      "cache.event.cache_label.fake_cache.event_type.lookup_error");
+  EXPECT_THAT(lookup_errors, OptCounterIs("cache.event", 1));
+
+  stats_->incCacheSessionsEntries();
+  stats_->incCacheSessionsEntries();
+  stats_->incCacheSessionsEntries();
+  stats_->decCacheSessionsEntries();
+  Stats::GaugeOptConstRef cache_sessions_entries =
+      context_.store_.findGaugeByString("cache.cache_sessions_entries.cache_label.fake_cache");
+  EXPECT_THAT(cache_sessions_entries, OptGaugeHasValue(2));
+
+  stats_->incCacheSessionsSubscribers();
+  stats_->incCacheSessionsSubscribers();
+  stats_->incCacheSessionsSubscribers();
+  stats_->subCacheSessionsSubscribers(2);
+  Stats::GaugeOptConstRef cache_sessions_subscribers =
+      context_.store_.findGaugeByString("cache.cache_sessions_subscribers.cache_label.fake_cache");
+  EXPECT_THAT(cache_sessions_subscribers, OptGaugeHasValue(1));
+
+  stats_->addUpstreamBufferedBytes(1024);
+  stats_->subUpstreamBufferedBytes(512);
+  Stats::GaugeOptConstRef upstream_buffered_bytes =
+      context_.store_.findGaugeByString("cache.upstream_buffered_bytes.cache_label.fake_cache");
+  EXPECT_THAT(upstream_buffered_bytes, OptGaugeHasValue(512));
+}
+
+} // namespace
+} // namespace Cache
+} // namespace HttpFilters
+} // namespace Extensions
+} // namespace Envoy
diff --git a/test/extensions/filters/http/cache/upstream_request_test.cc b/test/extensions/filters/http/cache/upstream_request_test.cc
new file mode 100644
index 0000000000000..257060152f314
--- /dev/null
+++ b/test/extensions/filters/http/cache/upstream_request_test.cc
@@ -0,0 +1,302 @@
+#include "source/extensions/filters/http/cache/upstream_request_impl.h"
+
+#include "test/extensions/filters/http/cache/mocks.h"
+#include "test/mocks/http/mocks.h"
+#include "test/test_common/utility.h"
+
+#include "gtest/gtest.h"
+
+namespace Envoy {
+namespace Extensions {
+namespace HttpFilters {
+namespace Cache {
+namespace {
+
+using testing::_;
+using testing::IsNull;
+using testing::MockFunction;
+using testing::Pointee;
+
+class UpstreamRequestTest : public ::testing::Test {
+protected:
+  // Arbitrary buffer limit for testing.
+  virtual int bufferLimit() const { return 1024; }
+  void SetUp() override {
+    EXPECT_CALL(async_client_, start(_, _))
+        .WillOnce([this](Http::AsyncClient::StreamCallbacks& callbacks,
+                         const Http::AsyncClient::StreamOptions&) {
+          http_callbacks_ = &callbacks;
+          return &http_stream_;
+        });
+    EXPECT_CALL(http_stream_, sendHeaders(HeaderMapEqualRef(&request_headers_), true));
+    Http::AsyncClient::StreamOptions options;
+    options.setBufferLimit(bufferLimit());
+    EXPECT_CALL(dispatcher_, isThreadSafe())
+        .Times(testing::AnyNumber())
+        .WillRepeatedly(testing::Return(true));
+    upstream_request_ =
+        UpstreamRequestImplFactory(dispatcher_, async_client_, options).create(stats_provider_);
+    upstream_request_->sendHeaders(
+        Http::createHeaderMap<Http::RequestHeaderMapImpl>(request_headers_));
+  }
+
+protected:
+  Event::MockDispatcher dispatcher_;
+  Http::AsyncClient::StreamCallbacks* http_callbacks_;
+  Http::MockAsyncClientStream http_stream_;
+  Http::MockAsyncClient async_client_;
+  Http::TestRequestHeaderMapImpl request_headers_{{":method", "GET"}, {":path", "/banana"}};
+  std::shared_ptr<MockCacheFilterStatsProvider> stats_provider_ =
+      std::make_shared<testing::NiceMock<MockCacheFilterStatsProvider>>();
+  UpstreamRequestPtr upstream_request_;
+  Http::TestResponseHeaderMapImpl response_headers_{{":status", "200"}};
+  Http::TestResponseTrailerMapImpl response_trailers_{{"x", "y"}};
+};
+
+TEST_F(UpstreamRequestTest, ResetBeforeHeadersRequestedDeliversResetToCallback) {
+  MockFunction<void(Http::ResponseHeaderMapPtr, EndStream)> header_cb;
+  http_callbacks_->onReset();
+  EXPECT_CALL(header_cb, Call(IsNull(), EndStream::Reset));
+  upstream_request_->getHeaders(header_cb.AsStdFunction());
+}
+
+TEST_F(UpstreamRequestTest, ResetBeforeHeadersArrivedDeliversResetToCallback) {
+  MockFunction<void(Http::ResponseHeaderMapPtr, EndStream)> header_cb;
+  upstream_request_->getHeaders(header_cb.AsStdFunction());
+  EXPECT_CALL(header_cb, Call(IsNull(), EndStream::Reset));
+  http_callbacks_->onReset();
+}
+
+TEST_F(UpstreamRequestTest, HeadersArrivedThenRequestedDeliversHeaders) {
+  MockFunction<void(Http::ResponseHeaderMapPtr, EndStream)> header_cb;
+  http_callbacks_->onHeaders(std::make_unique<Http::TestResponseHeaderMapImpl>(response_headers_),
+                             false);
+  EXPECT_CALL(header_cb, Call(HeaderMapEqualIgnoreOrder(&response_headers_), EndStream::More));
+  upstream_request_->getHeaders(header_cb.AsStdFunction());
+  EXPECT_CALL(http_stream_, reset());
+}
+
+TEST_F(UpstreamRequestTest, HeadersRequestedThenArrivedDeliversHeaders) {
+  MockFunction<void(Http::ResponseHeaderMapPtr, EndStream)> header_cb;
+  upstream_request_->getHeaders(header_cb.AsStdFunction());
+  EXPECT_CALL(header_cb, Call(HeaderMapEqualIgnoreOrder(&response_headers_), EndStream::More));
+  http_callbacks_->onHeaders(std::make_unique<Http::TestResponseHeaderMapImpl>(response_headers_),
+                             false);
+  EXPECT_CALL(http_stream_, reset());
+}
+
+TEST_F(UpstreamRequestTest, HeadersEndStreamWorksAndPreventsReset) {
+  MockFunction<void(Http::ResponseHeaderMapPtr, EndStream)> header_cb;
+  upstream_request_->getHeaders(header_cb.AsStdFunction());
+  EXPECT_CALL(header_cb, Call(HeaderMapEqualIgnoreOrder(&response_headers_), EndStream::End));
+  http_callbacks_->onHeaders(std::make_unique<Http::TestResponseHeaderMapImpl>(response_headers_),
+                             true);
+  http_callbacks_->onComplete();
+}
+
+TEST_F(UpstreamRequestTest, ResetBeforeBodyRequestedDeliversResetToCallback) {
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_cb;
+  http_callbacks_->onReset();
+  EXPECT_CALL(body_cb, Call(IsNull(), EndStream::Reset));
+  upstream_request_->getBody(AdjustedByteRange{0, 5}, body_cb.AsStdFunction());
+}
+
+TEST_F(UpstreamRequestTest, ResetAfterBodyRequestedDeliversResetToCallback) {
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_cb;
+  upstream_request_->getBody(AdjustedByteRange{0, 5}, body_cb.AsStdFunction());
+  EXPECT_CALL(body_cb, Call(IsNull(), EndStream::Reset));
+  http_callbacks_->onReset();
+}
+
+TEST_F(UpstreamRequestTest, BodyRequestedThenArrivedDeliversBody) {
+  Buffer::OwnedImpl data{"hello"};
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_cb;
+  upstream_request_->getBody(AdjustedByteRange{0, 5}, body_cb.AsStdFunction());
+  EXPECT_CALL(body_cb, Call(Pointee(BufferStringEqual("hello")), EndStream::End));
+  http_callbacks_->onData(data, true);
+  http_callbacks_->onComplete();
+}
+
+TEST_F(UpstreamRequestTest, BodyArrivedThenOversizedRequestedDeliversBody) {
+  Buffer::OwnedImpl data{"hello"};
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_cb;
+  http_callbacks_->onData(data, true);
+  http_callbacks_->onComplete();
+  EXPECT_CALL(body_cb, Call(Pointee(BufferStringEqual("hello")), EndStream::End));
+  upstream_request_->getBody(AdjustedByteRange{0, 99}, body_cb.AsStdFunction());
+}
+
+TEST_F(UpstreamRequestTest, BodyArrivedThenRequestedInPiecesDeliversBody) {
+  Buffer::OwnedImpl data{"hello"};
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_cb1;
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_cb2;
+  http_callbacks_->onData(data, true);
+  http_callbacks_->onComplete();
+  EXPECT_CALL(body_cb1, Call(Pointee(BufferStringEqual("hel")), EndStream::More));
+  upstream_request_->getBody(AdjustedByteRange{0, 3}, body_cb1.AsStdFunction());
+  EXPECT_CALL(body_cb2, Call(Pointee(BufferStringEqual("lo")), EndStream::End));
+  upstream_request_->getBody(AdjustedByteRange{3, 5}, body_cb2.AsStdFunction());
+}
+
+TEST_F(UpstreamRequestTest, BodyAlternatingActionsDeliversBody) {
+  Buffer::OwnedImpl data{"hello"};
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_cb1;
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_cb2;
+  upstream_request_->getBody(AdjustedByteRange{0, 3}, body_cb1.AsStdFunction());
+  EXPECT_CALL(body_cb1, Call(Pointee(BufferStringEqual("hel")), EndStream::More));
+  http_callbacks_->onData(data, true);
+  http_callbacks_->onComplete();
+  EXPECT_CALL(body_cb2, Call(Pointee(BufferStringEqual("lo")), EndStream::End));
+  upstream_request_->getBody(AdjustedByteRange{3, 5}, body_cb2.AsStdFunction());
+}
+
+TEST_F(UpstreamRequestTest, BodyInMultiplePiecesDeliversBody) {
+  Buffer::OwnedImpl data1{"hello"};
+  Buffer::OwnedImpl data2{"there"};
+  Buffer::OwnedImpl data3{"banana"};
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_cb1;
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_cb2;
+  upstream_request_->getBody(AdjustedByteRange{0, 99}, body_cb1.AsStdFunction());
+  EXPECT_CALL(body_cb1, Call(Pointee(BufferStringEqual("hello")), EndStream::More));
+  http_callbacks_->onData(data1, false);
+  http_callbacks_->onData(data2, false);
+  http_callbacks_->onData(data3, true);
+  http_callbacks_->onComplete();
+  EXPECT_CALL(body_cb2, Call(Pointee(BufferStringEqual("therebanana")), EndStream::End));
+  upstream_request_->getBody(AdjustedByteRange{5, 99}, body_cb2.AsStdFunction());
+}
+
+TEST_F(UpstreamRequestTest, DeletionWhileBodyCallbackInFlightCallsReset) {
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_cb;
+  upstream_request_->getBody(AdjustedByteRange{0, 99}, body_cb.AsStdFunction());
+  EXPECT_CALL(http_stream_, reset());
+}
+
+TEST_F(UpstreamRequestTest, RequestingMoreBodyAfterCompletionReturnsNull) {
+  Buffer::OwnedImpl data{"hello"};
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_cb1;
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_cb2;
+  http_callbacks_->onData(data, true);
+  http_callbacks_->onComplete();
+  EXPECT_CALL(body_cb1, Call(Pointee(BufferStringEqual("hello")), EndStream::End));
+  upstream_request_->getBody(AdjustedByteRange{0, 99}, body_cb1.AsStdFunction());
+  EXPECT_CALL(body_cb2, Call(IsNull(), EndStream::End));
+  upstream_request_->getBody(AdjustedByteRange{5, 99}, body_cb2.AsStdFunction());
+}
+
+TEST_F(UpstreamRequestTest, RequestingMoreBodyAfterTrailersResumesAndEventuallyReturnsNull) {
+  Buffer::OwnedImpl data{"hello"};
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_cb1;
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_cb2;
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_cb3;
+  MockFunction<void(Http::ResponseTrailerMapPtr, EndStream)> trailers_cb;
+  http_callbacks_->onData(data, false);
+  http_callbacks_->onTrailers(
+      std::make_unique<Http::TestResponseTrailerMapImpl>(response_trailers_));
+  http_callbacks_->onComplete();
+  EXPECT_CALL(body_cb1, Call(Pointee(BufferStringEqual("hel")), EndStream::More));
+  upstream_request_->getBody(AdjustedByteRange{0, 3}, body_cb1.AsStdFunction());
+  EXPECT_CALL(body_cb2, Call(Pointee(BufferStringEqual("lo")), EndStream::More));
+  upstream_request_->getBody(AdjustedByteRange{3, 99}, body_cb2.AsStdFunction());
+  EXPECT_CALL(body_cb3, Call(IsNull(), EndStream::More));
+  upstream_request_->getBody(AdjustedByteRange{5, 99}, body_cb3.AsStdFunction());
+  EXPECT_CALL(trailers_cb, Call(HeaderMapEqualIgnoreOrder(&response_trailers_), EndStream::End));
+  upstream_request_->getTrailers(trailers_cb.AsStdFunction());
+}
+
+TEST_F(UpstreamRequestTest, ResetBeforeTrailersRequestedDeliversResetToCallback) {
+  MockFunction<void(Http::ResponseTrailerMapPtr, EndStream)> trailer_cb;
+  http_callbacks_->onReset();
+  EXPECT_CALL(trailer_cb, Call(IsNull(), EndStream::Reset));
+  upstream_request_->getTrailers(trailer_cb.AsStdFunction());
+}
+
+TEST_F(UpstreamRequestTest, ResetBeforeTrailersArrivedDeliversResetToCallback) {
+  MockFunction<void(Http::ResponseTrailerMapPtr, EndStream)> trailer_cb;
+  upstream_request_->getTrailers(trailer_cb.AsStdFunction());
+  EXPECT_CALL(trailer_cb, Call(IsNull(), EndStream::Reset));
+  http_callbacks_->onReset();
+}
+
+TEST_F(UpstreamRequestTest, TrailersArrivedThenRequestedDeliversTrailers) {
+  MockFunction<void(Http::ResponseTrailerMapPtr, EndStream)> trailer_cb;
+  http_callbacks_->onTrailers(
+      std::make_unique<Http::TestResponseTrailerMapImpl>(response_trailers_));
+  http_callbacks_->onComplete();
+  EXPECT_CALL(trailer_cb, Call(HeaderMapEqualIgnoreOrder(&response_trailers_), EndStream::End));
+  upstream_request_->getTrailers(trailer_cb.AsStdFunction());
+}
+
+TEST_F(UpstreamRequestTest, TrailersRequestedThenArrivedDeliversTrailers) {
+  MockFunction<void(Http::ResponseTrailerMapPtr, EndStream)> trailer_cb;
+  upstream_request_->getTrailers(trailer_cb.AsStdFunction());
+  EXPECT_CALL(trailer_cb, Call(HeaderMapEqualIgnoreOrder(&response_trailers_), EndStream::End));
+  http_callbacks_->onTrailers(
+      std::make_unique<Http::TestResponseTrailerMapImpl>(response_trailers_));
+  http_callbacks_->onComplete();
+}
+
+TEST_F(UpstreamRequestTest, TrailersArrivedWhileExpectingMoreBodyDeliversNullBodyThenTrailers) {
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_cb;
+  MockFunction<void(Http::ResponseTrailerMapPtr, EndStream)> trailer_cb;
+  EXPECT_CALL(body_cb, Call(IsNull(), EndStream::More));
+  upstream_request_->getBody(AdjustedByteRange{0, 5}, body_cb.AsStdFunction());
+  http_callbacks_->onTrailers(
+      std::make_unique<Http::TestResponseTrailerMapImpl>(response_trailers_));
+  testing::Mock::VerifyAndClearExpectations(&body_cb);
+  EXPECT_CALL(trailer_cb, Call(HeaderMapEqualIgnoreOrder(&response_trailers_), EndStream::End));
+  upstream_request_->getTrailers(trailer_cb.AsStdFunction());
+  http_callbacks_->onComplete();
+}
+
+TEST_F(UpstreamRequestTest, DestroyedWhileBodyBufferedCorrectsStats) {
+  Buffer::OwnedImpl data{"hello"};
+  EXPECT_CALL(stats_provider_->mock_stats_, addUpstreamBufferedBytes(data.length()));
+  EXPECT_CALL(http_stream_, reset());
+  EXPECT_CALL(stats_provider_->mock_stats_, subUpstreamBufferedBytes(data.length()));
+  http_callbacks_->onData(data, true);
+  upstream_request_.reset();
+}
+
+class UpstreamRequestWithRangeHeaderTest : public UpstreamRequestTest {
+protected:
+  void SetUp() override {
+    request_headers_.addCopy("range", "bytes=3-4");
+    UpstreamRequestTest::SetUp();
+  }
+};
+
+TEST_F(UpstreamRequestWithRangeHeaderTest, RangeHeaderSkipsToExpectedStreamPos) {
+  Buffer::OwnedImpl data{"lo"};
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_cb;
+  upstream_request_->getBody(AdjustedByteRange{3, 5}, body_cb.AsStdFunction());
+  EXPECT_CALL(body_cb, Call(Pointee(BufferStringEqual("lo")), EndStream::End));
+  http_callbacks_->onData(data, true);
+  http_callbacks_->onComplete();
+}
+
+class UpstreamRequestWithSmallBuffersTest : public UpstreamRequestTest {
+protected:
+  int bufferLimit() const override { return 3; }
+};
+
+TEST_F(UpstreamRequestWithSmallBuffersTest, WatermarksPauseTheUpstream) {
+  Buffer::OwnedImpl data{"hello"};
+  MockFunction<void(Buffer::InstancePtr, EndStream)> body_cb;
+  // TODO(ravenblack): validate that onAboveHighWatermark actions
+  // are performed during onData, once it's possible to pause flow
+  // from upstream.
+  http_callbacks_->onData(data, true);
+  http_callbacks_->onComplete();
+  // TODO(ravenblack): validate that onBelowHighWatermark actions
+  // are performed during onData, once it's possible to pause flow
+  // from upstream.
+  EXPECT_CALL(body_cb, Call(Pointee(BufferStringEqual("hello")), EndStream::End));
+  upstream_request_->getBody(AdjustedByteRange{0, 5}, body_cb.AsStdFunction());
+}
+
+} // namespace
+} // namespace Cache
+} // namespace HttpFilters
+} // namespace Extensions
+} // namespace Envoy
diff --git a/test/extensions/http/cache/file_system_http_cache/BUILD b/test/extensions/http/cache/file_system_http_cache/BUILD
index 4dcec9e977840..f1b24e65def4d 100644
--- a/test/extensions/http/cache/file_system_http_cache/BUILD
+++ b/test/extensions/http/cache/file_system_http_cache/BUILD
@@ -20,6 +20,7 @@ envoy_extension_cc_test(
         "//source/extensions/http/cache/file_system_http_cache:config",
         "//test/extensions/common/async_files:mocks",
         "//test/extensions/filters/http/cache:http_cache_implementation_test_common_lib",
+        "//test/extensions/filters/http/cache:mocks",
         "//test/mocks/server:factory_context_mocks",
         "//test/test_common:simulated_time_system_lib",
         "//test/test_common:status_utility_lib",
diff --git a/test/extensions/http/cache/file_system_http_cache/cache_file_fixed_block_test.cc b/test/extensions/http/cache/file_system_http_cache/cache_file_fixed_block_test.cc
index 6e3eff89905cb..a40b2750c9b76 100644
--- a/test/extensions/http/cache/file_system_http_cache/cache_file_fixed_block_test.cc
+++ b/test/extensions/http/cache/file_system_http_cache/cache_file_fixed_block_test.cc
@@ -57,9 +57,9 @@ TEST_F(CacheFileFixedBlockTest, ReturnsCorrectOffsets) {
   block.setHeadersSize(100);
   block.setBodySize(1000);
   block.setTrailersSize(10);
-  EXPECT_EQ(block.offsetToHeaders(), CacheFileFixedBlock::size());
-  EXPECT_EQ(block.offsetToBody(), CacheFileFixedBlock::size() + 100);
-  EXPECT_EQ(block.offsetToTrailers(), CacheFileFixedBlock::size() + 1100);
+  EXPECT_EQ(block.offsetToHeaders(), CacheFileFixedBlock::size() + 1010);
+  EXPECT_EQ(block.offsetToBody(), CacheFileFixedBlock::size());
+  EXPECT_EQ(block.offsetToTrailers(), CacheFileFixedBlock::size() + 1000);
 }
 
 TEST_F(CacheFileFixedBlockTest, SerializesAndDeserializesCorrectly) {
@@ -75,9 +75,9 @@ TEST_F(CacheFileFixedBlockTest, SerializesAndDeserializesCorrectly) {
   block.serializeToBuffer(buf);
   block2.populateFromStringView(buf.toString());
   EXPECT_TRUE(block2.isValid());
-  EXPECT_EQ(block2.offsetToHeaders(), CacheFileFixedBlock::size());
-  EXPECT_EQ(block2.offsetToBody(), CacheFileFixedBlock::size() + 100);
-  EXPECT_EQ(block2.offsetToTrailers(), CacheFileFixedBlock::size() + large_body_size + 100);
+  EXPECT_EQ(block2.offsetToHeaders(), CacheFileFixedBlock::size() + large_body_size + 10);
+  EXPECT_EQ(block2.offsetToBody(), CacheFileFixedBlock::size());
+  EXPECT_EQ(block2.offsetToTrailers(), CacheFileFixedBlock::size() + large_body_size);
   EXPECT_EQ(block2.headerSize(), 100);
   EXPECT_EQ(block2.bodySize(), large_body_size);
   EXPECT_EQ(block2.trailerSize(), 10);
diff --git a/test/extensions/http/cache/file_system_http_cache/cache_file_header_proto_util_test.cc b/test/extensions/http/cache/file_system_http_cache/cache_file_header_proto_util_test.cc
index 44ff6617280ed..466c81377d1bc 100644
--- a/test/extensions/http/cache/file_system_http_cache/cache_file_header_proto_util_test.cc
+++ b/test/extensions/http/cache/file_system_http_cache/cache_file_header_proto_util_test.cc
@@ -137,6 +137,14 @@ TEST(CacheFileHeaderProtoUtil, MakeCacheFileHeaderProtoFromBuffer) {
   EXPECT_THAT(header_proto, ProtoEqIgnoreRepeatedFieldOrdering(header_proto_from_buffer));
 }
 
+TEST(CacheFileHeaderProtoUtil, MakeCacheFileTrailerProtoFromBuffer) {
+  CacheFileTrailer trailer_proto;
+  TestUtility::loadFromYaml(test_trailer_proto, trailer_proto);
+  Buffer::OwnedImpl buffer = bufferFromProto(trailer_proto);
+  CacheFileTrailer trailer_proto_from_buffer = makeCacheFileTrailerProto(buffer);
+  EXPECT_THAT(trailer_proto, ProtoEqIgnoreRepeatedFieldOrdering(trailer_proto_from_buffer));
+}
+
 TEST(CacheFileHeaderProtoUtil, UpdateProtoFromHeadersAndMetadata) {
   CacheFileHeader header_proto;
   TestUtility::loadFromYaml(test_header_proto, header_proto);
diff --git a/test/extensions/http/cache/file_system_http_cache/file_system_http_cache_test.cc b/test/extensions/http/cache/file_system_http_cache/file_system_http_cache_test.cc
index 190764361752f..77829670f5721 100644
--- a/test/extensions/http/cache/file_system_http_cache/file_system_http_cache_test.cc
+++ b/test/extensions/http/cache/file_system_http_cache/file_system_http_cache_test.cc
@@ -6,6 +6,7 @@
 #include "source/common/filesystem/directory.h"
 #include "source/extensions/filters/http/cache/cache_entry_utils.h"
 #include "source/extensions/filters/http/cache/cache_headers_utils.h"
+#include "source/extensions/filters/http/cache/cache_sessions.h"
 #include "source/extensions/http/cache/file_system_http_cache/cache_eviction_thread.h"
 #include "source/extensions/http/cache/file_system_http_cache/cache_file_fixed_block.h"
 #include "source/extensions/http/cache/file_system_http_cache/cache_file_header_proto_util.h"
@@ -13,6 +14,7 @@
 
 #include "test/extensions/common/async_files/mocks.h"
 #include "test/extensions/filters/http/cache/http_cache_implementation_test_common.h"
+#include "test/extensions/filters/http/cache/mocks.h"
 #include "test/mocks/server/factory_context.h"
 #include "test/test_common/environment.h"
 #include "test/test_common/simulated_time_system.h"
@@ -34,11 +36,16 @@ using Common::AsyncFiles::MockAsyncFileHandle;
 using Common::AsyncFiles::MockAsyncFileManager;
 using Common::AsyncFiles::MockAsyncFileManagerFactory;
 using ::envoy::extensions::filters::http::cache::v3::CacheConfig;
+using StatusHelpers::HasStatusCode;
+using StatusHelpers::IsOkAndHolds;
 using ::testing::HasSubstr;
+using ::testing::IsNull;
 using ::testing::NiceMock;
 using ::testing::Return;
 using ::testing::StrictMock;
 
+MATCHER(PopulatedLookup, "") { return arg.populated(); }
+
 absl::string_view yaml_config = R"(
   typed_config:
     "@type": type.googleapis.com/envoy.extensions.http.cache.file_system_http_cache.v3.FileSystemHttpCacheConfig
@@ -66,12 +73,9 @@ class FileSystemCacheTestContext {
         .WillByDefault([]() -> Thread::ThreadFactory& { return Thread::threadFactoryForTest(); });
   }
 
-  void initCache() {
-    cache_ = std::dynamic_pointer_cast<FileSystemHttpCache>(
-        http_cache_factory_->getCache(cacheConfig(testConfig()), context_));
-  }
+  void initCache() { cache_ = http_cache_factory_->getCache(cacheConfig(testConfig()), context_); }
 
-  void waitForEvictionThreadIdle() { cache_->cache_eviction_thread_.waitForIdle(); }
+  void waitForEvictionThreadIdle() { cache()->cache_eviction_thread_.waitForIdle(); }
 
   ConfigProto testConfig() {
     envoy::extensions::filters::http::cache::v3::CacheConfig cache_config;
@@ -97,10 +101,11 @@ class FileSystemCacheTestContext {
     }
   }
 
+  FileSystemHttpCache* cache() { return dynamic_cast<FileSystemHttpCache*>(&cache_->cache()); }
   ::Envoy::TestEnvironment env_;
   std::string cache_path_;
   NiceMock<Server::Configuration::MockFactoryContext> context_;
-  std::shared_ptr<FileSystemHttpCache> cache_;
+  std::shared_ptr<CacheSessions> cache_;
   HttpCacheFactory* http_cache_factory_;
 };
 
@@ -117,14 +122,13 @@ TEST_F(FileSystemHttpCacheTestWithNoDefaultCache, InitialStatsAreSetCorrectly) {
   cfg.mutable_max_cache_size_bytes()->set_value(max_size);
   env_.writeStringToFileForTest(absl::StrCat(cache_path_, "cache-a"), file_1_contents, true);
   env_.writeStringToFileForTest(absl::StrCat(cache_path_, "cache-b"), file_2_contents, true);
-  cache_ = std::dynamic_pointer_cast<FileSystemHttpCache>(
-      http_cache_factory_->getCache(cacheConfig(cfg), context_));
+  cache_ = http_cache_factory_->getCache(cacheConfig(cfg), context_);
   waitForEvictionThreadIdle();
-  EXPECT_EQ(cache_->stats().size_limit_bytes_.value(), max_size);
-  EXPECT_EQ(cache_->stats().size_limit_count_.value(), max_count);
-  EXPECT_EQ(cache_->stats().size_bytes_.value(), file_1_contents.size() + file_2_contents.size());
-  EXPECT_EQ(cache_->stats().size_count_.value(), 2);
-  EXPECT_EQ(cache_->stats().eviction_runs_.value(), 0);
+  EXPECT_EQ(cache()->stats().size_limit_bytes_.value(), max_size);
+  EXPECT_EQ(cache()->stats().size_limit_count_.value(), max_count);
+  EXPECT_EQ(cache()->stats().size_bytes_.value(), file_1_contents.size() + file_2_contents.size());
+  EXPECT_EQ(cache()->stats().size_count_.value(), 2);
+  EXPECT_EQ(cache()->stats().eviction_runs_.value(), 0);
 }
 
 TEST_F(FileSystemHttpCacheTestWithNoDefaultCache, EvictsOldestFilesUntilUnderCountLimit) {
@@ -136,19 +140,18 @@ TEST_F(FileSystemHttpCacheTestWithNoDefaultCache, EvictsOldestFilesUntilUnderCou
   env_.writeStringToFileForTest(absl::StrCat(cache_path_, "cache-b"), file_contents, true);
   // TODO(#24994): replace this with backdating the files when that's possible.
   sleep(1); // NO_CHECK_FORMAT(real_time)
-  cache_ = std::dynamic_pointer_cast<FileSystemHttpCache>(
-      http_cache_factory_->getCache(cacheConfig(cfg), context_));
+  cache_ = http_cache_factory_->getCache(cacheConfig(cfg), context_);
   waitForEvictionThreadIdle();
-  EXPECT_EQ(cache_->stats().eviction_runs_.value(), 0);
-  EXPECT_EQ(cache_->stats().size_bytes_.value(), file_contents.size() * 2);
-  EXPECT_EQ(cache_->stats().size_count_.value(), 2);
+  EXPECT_EQ(cache()->stats().eviction_runs_.value(), 0);
+  EXPECT_EQ(cache()->stats().size_bytes_.value(), file_contents.size() * 2);
+  EXPECT_EQ(cache()->stats().size_count_.value(), 2);
   env_.writeStringToFileForTest(absl::StrCat(cache_path_, "cache-c"), file_contents, true);
   env_.writeStringToFileForTest(absl::StrCat(cache_path_, "cache-d"), file_contents, true);
-  cache_->trackFileAdded(file_contents.size());
-  cache_->trackFileAdded(file_contents.size());
+  cache()->trackFileAdded(file_contents.size());
+  cache()->trackFileAdded(file_contents.size());
   waitForEvictionThreadIdle();
-  EXPECT_EQ(cache_->stats().size_bytes_.value(), file_contents.size() * 2);
-  EXPECT_EQ(cache_->stats().size_count_.value(), 2);
+  EXPECT_EQ(cache()->stats().size_bytes_.value(), file_contents.size() * 2);
+  EXPECT_EQ(cache()->stats().size_count_.value(), 2);
   EXPECT_FALSE(Filesystem::fileSystemForTest().fileExists(absl::StrCat(cache_path_, "cache-a")));
   EXPECT_FALSE(Filesystem::fileSystemForTest().fileExists(absl::StrCat(cache_path_, "cache-b")));
   EXPECT_TRUE(Filesystem::fileSystemForTest().fileExists(absl::StrCat(cache_path_, "cache-c")));
@@ -156,7 +159,7 @@ TEST_F(FileSystemHttpCacheTestWithNoDefaultCache, EvictsOldestFilesUntilUnderCou
   // There may have been one or two eviction runs here, because there's a race
   // between the eviction and the second file being added. Either amount of runs
   // is valid, as the eventual consistency is achieved either way.
-  EXPECT_THAT(cache_->stats().eviction_runs_.value(), testing::AnyOf(1, 2));
+  EXPECT_THAT(cache()->stats().eviction_runs_.value(), testing::AnyOf(1, 2));
 }
 
 TEST_F(FileSystemHttpCacheTestWithNoDefaultCache, EvictsOldestFilesUntilUnderSizeLimit) {
@@ -169,21 +172,20 @@ TEST_F(FileSystemHttpCacheTestWithNoDefaultCache, EvictsOldestFilesUntilUnderSiz
   env_.writeStringToFileForTest(absl::StrCat(cache_path_, "cache-b"), file_contents, true);
   // TODO(#24994): replace this with backdating the files when that's possible.
   sleep(1); // NO_CHECK_FORMAT(real_time)
-  cache_ = std::dynamic_pointer_cast<FileSystemHttpCache>(
-      http_cache_factory_->getCache(cacheConfig(cfg), context_));
+  cache_ = http_cache_factory_->getCache(cacheConfig(cfg), context_);
   waitForEvictionThreadIdle();
-  EXPECT_EQ(cache_->stats().eviction_runs_.value(), 0);
+  EXPECT_EQ(cache()->stats().eviction_runs_.value(), 0);
   env_.writeStringToFileForTest(absl::StrCat(cache_path_, "cache-c"), large_file_contents, true);
-  EXPECT_EQ(cache_->stats().size_bytes_.value(), file_contents.size() * 2);
-  EXPECT_EQ(cache_->stats().size_count_.value(), 2);
-  cache_->trackFileAdded(large_file_contents.size());
+  EXPECT_EQ(cache()->stats().size_bytes_.value(), file_contents.size() * 2);
+  EXPECT_EQ(cache()->stats().size_count_.value(), 2);
+  cache()->trackFileAdded(large_file_contents.size());
   waitForEvictionThreadIdle();
-  EXPECT_EQ(cache_->stats().size_bytes_.value(), large_file_contents.size());
-  EXPECT_EQ(cache_->stats().size_count_.value(), 1);
+  EXPECT_EQ(cache()->stats().size_bytes_.value(), large_file_contents.size());
+  EXPECT_EQ(cache()->stats().size_count_.value(), 1);
   EXPECT_FALSE(Filesystem::fileSystemForTest().fileExists(absl::StrCat(cache_path_, "cache-a")));
   EXPECT_FALSE(Filesystem::fileSystemForTest().fileExists(absl::StrCat(cache_path_, "cache-b")));
   EXPECT_TRUE(Filesystem::fileSystemForTest().fileExists(absl::StrCat(cache_path_, "cache-c")));
-  EXPECT_EQ(cache_->stats().eviction_runs_.value(), 1);
+  EXPECT_EQ(cache()->stats().eviction_runs_.value(), 1);
 }
 
 class FileSystemHttpCacheTest : public FileSystemCacheTestContext, public ::testing::Test {
@@ -204,34 +206,26 @@ MATCHER_P2(IsStatTag, name, value, "") {
 TEST_F(FileSystemHttpCacheTest, StatsAreConstructedCorrectly) {
   std::string cache_path_no_periods = absl::StrReplaceAll(cache_path_, {{".", "_"}});
   // Validate that a gauge has appropriate name and tags.
-  EXPECT_EQ(cache_->stats().size_bytes_.tagExtractedName(), "cache.size_bytes");
-  EXPECT_THAT(cache_->stats().size_bytes_.tags(),
+  EXPECT_EQ(cache()->stats().size_bytes_.tagExtractedName(), "cache.size_bytes");
+  EXPECT_THAT(cache()->stats().size_bytes_.tags(),
               ::testing::ElementsAre(IsStatTag("cache_path", cache_path_no_periods)));
   // Validate that a counter has appropriate name and tags.
-  EXPECT_EQ(cache_->stats().eviction_runs_.tagExtractedName(), "cache.eviction_runs");
-  EXPECT_THAT(cache_->stats().eviction_runs_.tags(),
+  EXPECT_EQ(cache()->stats().eviction_runs_.tagExtractedName(), "cache.eviction_runs");
+  EXPECT_THAT(cache()->stats().eviction_runs_.tags(),
               ::testing::ElementsAre(IsStatTag("cache_path", cache_path_no_periods)));
-  EXPECT_EQ(cache_->stats().cache_hit_.tagExtractedName(), "cache.event");
-  EXPECT_EQ(cache_->stats().cache_miss_.tagExtractedName(), "cache.event");
-  EXPECT_THAT(cache_->stats().cache_hit_.tags(),
-              ::testing::ElementsAre(IsStatTag("cache_path", cache_path_no_periods),
-                                     IsStatTag("event_type", "hit")));
-  EXPECT_THAT(cache_->stats().cache_miss_.tags(),
-              ::testing::ElementsAre(IsStatTag("cache_path", cache_path_no_periods),
-                                     IsStatTag("event_type", "miss")));
 }
 
 TEST_F(FileSystemHttpCacheTest, TrackFileRemovedClampsAtZero) {
-  cache_->trackFileAdded(1);
-  EXPECT_EQ(cache_->stats().size_bytes_.value(), 1);
-  EXPECT_EQ(cache_->stats().size_count_.value(), 1);
-  cache_->trackFileRemoved(8);
-  EXPECT_EQ(cache_->stats().size_bytes_.value(), 0);
-  EXPECT_EQ(cache_->stats().size_count_.value(), 0);
+  cache()->trackFileAdded(1);
+  EXPECT_EQ(cache()->stats().size_bytes_.value(), 1);
+  EXPECT_EQ(cache()->stats().size_count_.value(), 1);
+  cache()->trackFileRemoved(8);
+  EXPECT_EQ(cache()->stats().size_bytes_.value(), 0);
+  EXPECT_EQ(cache()->stats().size_count_.value(), 0);
   // Remove a second time to ensure that count going below zero also clamps at zero.
-  cache_->trackFileRemoved(8);
-  EXPECT_EQ(cache_->stats().size_bytes_.value(), 0);
-  EXPECT_EQ(cache_->stats().size_count_.value(), 0);
+  cache()->trackFileRemoved(8);
+  EXPECT_EQ(cache()->stats().size_bytes_.value(), 0);
+  EXPECT_EQ(cache()->stats().size_count_.value(), 0);
 }
 
 TEST_F(FileSystemHttpCacheTest, ExceptionOnTryingToCreateCachesWithDistinctConfigsOnSamePath) {
@@ -253,17 +247,11 @@ TEST_F(FileSystemHttpCacheTest, CacheConfigsWithDifferentPathsReturnDistinctCach
   EXPECT_NE(cache_, second_cache);
 }
 
-CacheConfig varyAllowListConfig() {
-  CacheConfig config;
-  config.add_allowed_vary_headers()->set_exact("accept");
-  return config;
-}
-
 class MockSingletonManager : public Singleton::ManagerImpl {
 public:
   MockSingletonManager() {
     // By default just act like a real SingletonManager, but allow overrides.
-    ON_CALL(*this, get(_, _, _))
+    ON_CALL(*this, get)
         .WillByDefault(std::bind(&MockSingletonManager::realGet, this, std::placeholders::_1,
                                  std::placeholders::_2, std::placeholders::_3));
   }
@@ -298,18 +286,28 @@ class FileSystemHttpCacheTestWithMockFiles : public FileSystemHttpCacheTest {
       EXPECT_TRUE(result);
       true_callbacks_called_++;
     };
-    trailers_size_ = bufferFromProto(makeCacheFileTrailerProto(response_trailers_)).length();
-    key_ = LookupRequest{request_headers_, time_system_.systemTime(), vary_allow_list_}.key();
+    key_ = CacheHeadersUtils::makeKey(request_headers_, "fake-cluster");
     headers_size_ = headerProtoSize(makeCacheFileHeaderProto(key_, response_headers_, metadata_));
   }
 
-  Buffer::InstancePtr testHeaderBlock(size_t body_size) {
+  void setTrailers(Http::TestResponseTrailerMapImpl trailers) {
+    response_trailers_ = trailers;
+    trailers_size_ = bufferFromProto(makeCacheFileTrailerProto(response_trailers_)).length();
+  }
+
+  void setBodySize(size_t sz) { body_size_ = sz; }
+
+  CacheFileFixedBlock testHeaderBlock() {
     CacheFileFixedBlock block;
     block.setHeadersSize(headers_size_);
     block.setTrailersSize(trailers_size_);
-    block.setBodySize(body_size);
+    block.setBodySize(body_size_);
+    return block;
+  }
+
+  Buffer::InstancePtr testHeaderBlockBuffer() {
     auto buffer = std::make_unique<Buffer::OwnedImpl>();
-    block.serializeToBuffer(*buffer);
+    testHeaderBlock().serializeToBuffer(*buffer);
     return buffer;
   }
 
@@ -321,23 +319,58 @@ class FileSystemHttpCacheTestWithMockFiles : public FileSystemHttpCacheTest {
     return std::make_unique<Buffer::OwnedImpl>(bufferFromProto(testHeaderProto()));
   }
 
+  Buffer::InstancePtr undersizedBuffer() { return std::make_unique<Buffer::OwnedImpl>("x"); }
+
+  CacheFileTrailer testTrailerProto() { return makeCacheFileTrailerProto(response_trailers_); }
+
+  Buffer::InstancePtr testTrailerBuffer() {
+    return std::make_unique<Buffer::OwnedImpl>(bufferFromProto(testTrailerProto()));
+  }
+
   void SetUp() override { initCache(); }
 
-  LookupContextPtr testLookupContext() {
-    auto request = LookupRequest{request_headers_, time_system_.systemTime(), vary_allow_list_};
-    key_ = request.key();
-    return cache_->makeLookupContext(std::move(request), decoder_callbacks_);
+  void testLookup(absl::StatusOr<LookupResult>* lookup_result_out) {
+    cache()->lookup(LookupRequest{Key{key_}, *dispatcher_},
+                    [lookup_result_out](absl::StatusOr<LookupResult>&& result) {
+                      *lookup_result_out = std::move(result);
+                    });
+    pumpDispatcher();
   }
 
-  InsertContextPtr testInserter() {
-    auto ret = cache_->makeInsertContext(testLookupContext(), encoder_callbacks_);
-    return ret;
+  void testSuccessfulLookup(absl::StatusOr<LookupResult>* lookup_result_out) {
+    EXPECT_CALL(*mock_async_file_manager_, openExistingFile);
+    EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
+    if (trailers_size_) {
+      EXPECT_CALL(*mock_async_file_handle_,
+                  read(_, testHeaderBlock().offsetToTrailers(), trailers_size_, _));
+    }
+    EXPECT_CALL(*mock_async_file_handle_,
+                read(_, testHeaderBlock().offsetToHeaders(), headers_size_, _));
+    testLookup(lookup_result_out);
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
+    pumpDispatcher();
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<Buffer::InstancePtr>(testHeaderBlockBuffer()));
+    pumpDispatcher();
+    if (trailers_size_) {
+      mock_async_file_manager_->nextActionCompletes(
+          absl::StatusOr<Buffer::InstancePtr>(testTrailerBuffer()));
+      pumpDispatcher();
+    }
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<Buffer::InstancePtr>(testHeaderBuffer()));
+    pumpDispatcher();
+    // result should be populated.
+    ASSERT_THAT(*lookup_result_out, IsOkAndHolds(PopulatedLookup()));
   }
 
   void pumpDispatcher() { dispatcher_->run(Event::Dispatcher::RunType::Block); }
 
 protected:
   ::testing::NiceMock<MockSingletonManager> mock_singleton_manager_;
+  std::shared_ptr<MockCacheProgressReceiver> cache_progress_receiver_ =
+      std::make_shared<MockCacheProgressReceiver>();
   std::shared_ptr<MockAsyncFileManagerFactory> mock_async_file_manager_factory_ =
       std::make_shared<NiceMock<MockAsyncFileManagerFactory>>();
   std::shared_ptr<MockAsyncFileManager> mock_async_file_manager_ =
@@ -349,7 +382,6 @@ class FileSystemHttpCacheTestWithMockFiles : public FileSystemHttpCacheTest {
   Event::SimulatedTimeSystem time_system_;
   Http::TestRequestHeaderMapImpl request_headers_;
   NiceMock<Server::Configuration::MockServerFactoryContext> factory_context_;
-  VaryAllowList vary_allow_list_{varyAllowListConfig().allowed_vary_headers(), factory_context_};
   DateFormatter formatter_{"%a, %d %b %Y %H:%M:%S GMT"};
   Http::TestResponseHeaderMapImpl response_headers_{
       {":status", "200"},
@@ -364,144 +396,48 @@ class FileSystemHttpCacheTestWithMockFiles : public FileSystemHttpCacheTest {
   std::function<void(bool result)> expect_false_callback_;
   std::function<void(bool result)> expect_true_callback_;
   size_t headers_size_;
-  size_t trailers_size_;
+  size_t trailers_size_{0};
+  size_t body_size_{0};
   Api::ApiPtr api_ = Api::createApiForTest();
   Event::DispatcherPtr dispatcher_ = api_->allocateDispatcher("test_thread");
 };
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, WriteVaryNodeFailingToCreateFileJustAborts) {
-  auto inserter = testInserter();
-  absl::Cleanup destroy_inserter{[&inserter]() { inserter->onDestroy(); }};
-  Http::TestResponseHeaderMapImpl response_headers{
-      {":status", "200"},
-      {"date", formatter_.fromTime(time_system_.systemTime())},
-      {"cache-control", "public,max-age=3600"},
-      {"vary", "accept"}};
-  // one file created for the vary node, one for the actual write.
-  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile(_, _, _)).Times(2);
-  inserter->insertHeaders(
-      response_headers, metadata_, [&](bool result) { EXPECT_FALSE(result); }, true);
-  // File handle for the vary node.
-  // (This is the failure under test, we expect write to *not* be called.)
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<AsyncFileHandle>{absl::UnknownError("create failure for vary node")});
-  pumpDispatcher();
-  // Fail to create file for the cache entry node.
-  // (This provokes the false callback to insertHeaders.)
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<AsyncFileHandle>{absl::UnknownError("open failure")});
-  pumpDispatcher();
-  // File handle was not used and is expected to be closed.
-  EXPECT_OK(mock_async_file_handle_->close(nullptr, [](absl::Status) {}));
-}
-
-TEST_F(FileSystemHttpCacheTestWithMockFiles, WriteVaryNodeFailingToWriteJustClosesTheFile) {
-  auto inserter = testInserter();
-  absl::Cleanup destroy_inserter{[&inserter]() { inserter->onDestroy(); }};
-  Http::TestResponseHeaderMapImpl response_headers{
-      {":status", "200"},
-      {"date", formatter_.fromTime(time_system_.systemTime())},
-      {"cache-control", "public,max-age=3600"},
-      {"vary", "accept"}};
-  // one file created for the vary node, one for the actual write.
-  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile(_, _, _)).Times(2);
-  inserter->insertHeaders(
-      response_headers, metadata_, [&](bool result) { EXPECT_FALSE(result); }, true);
-  EXPECT_CALL(*mock_async_file_handle_, write(_, _, _, _));
-  // File handle for the vary node.
-  // (This triggers the expected write call.)
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<AsyncFileHandle>{mock_async_file_handle_});
-  pumpDispatcher();
-  // Fail to create file for the cache entry node.
-  // (This provokes the false callback to insertHeaders.)
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<AsyncFileHandle>{absl::UnknownError("open failure")});
-  pumpDispatcher();
-  // Fail to write for the vary node.
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<size_t>(absl::UnknownError("write failure")));
-  pumpDispatcher();
-}
-
-TEST_F(FileSystemHttpCacheTestWithMockFiles, LookupDuringAnotherInsertPreventsInserts) {
-  auto inserter = testInserter();
-  absl::Cleanup destroy_inserter{[&inserter]() { inserter->onDestroy(); }};
-  // First inserter will try to create a file.
-  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile(_, _, _));
-  inserter->insertHeaders(
-      response_headers_, metadata_, [&](bool result) { EXPECT_FALSE(result); }, false);
-
-  auto inserter2 = testInserter();
-  absl::Cleanup destroy_inserter2{[&inserter2]() { inserter2->onDestroy(); }};
-  // Allow the first inserter to complete after the second lookup was made.
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<AsyncFileHandle>{absl::UnknownError("intentionally failed to open file")});
-  pumpDispatcher();
-  inserter2->insertHeaders(response_headers_, metadata_, expect_false_callback_, false);
-  inserter2->insertBody(Buffer::OwnedImpl("boop"), expect_false_callback_, false);
-  inserter2->insertTrailers(response_trailers_, expect_false_callback_);
-  EXPECT_EQ(false_callbacks_called_, 3);
-  // The file handle didn't actually get used in this test, but is expected to be closed.
-  EXPECT_OK(mock_async_file_handle_->close(nullptr, [](absl::Status) {}));
-}
-
-TEST_F(FileSystemHttpCacheTestWithMockFiles, DuplicateInsertWhileInsertInProgressIsPrevented) {
-  auto inserter = testInserter();
-  absl::Cleanup destroy_inserter{[&inserter]() { inserter->onDestroy(); }};
-  auto inserter2 = testInserter();
-  absl::Cleanup destroy_inserter2{[&inserter2]() { inserter2->onDestroy(); }};
-  // First inserter will try to create a file.
-  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile(_, _, _));
-  inserter->insertHeaders(response_headers_, metadata_, expect_false_callback_, false);
-  inserter2->insertHeaders(response_headers_, metadata_, expect_false_callback_, false);
-  // Allow the first inserter to complete after the second insert was called.
+TEST_F(FileSystemHttpCacheTestWithMockFiles, NotFoundForReadReturnsMiss) {
+  EXPECT_CALL(*mock_async_file_manager_, openExistingFile);
+  absl::StatusOr<LookupResult> lookup_result;
+  testLookup(&lookup_result);
   mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<AsyncFileHandle>{absl::UnknownError("intentionally failed to open file")});
-  pumpDispatcher();
-  inserter2->insertBody(Buffer::OwnedImpl("boop"), expect_false_callback_, false);
-  inserter2->insertTrailers(response_trailers_, expect_false_callback_);
-  EXPECT_EQ(false_callbacks_called_, 4);
-  // The file handle didn't actually get used in this test, but is expected to be closed.
-  EXPECT_OK(mock_async_file_handle_->close(nullptr, [](absl::Status) {}));
-}
-
-TEST_F(FileSystemHttpCacheTestWithMockFiles, FailedOpenForReadReturnsMiss) {
-  auto lookup = testLookupContext();
-  absl::Cleanup destroy_lookup([&lookup]() { lookup->onDestroy(); });
-  LookupResult result;
-  EXPECT_CALL(*mock_async_file_manager_, openExistingFile(_, _, _, _));
-  lookup->getHeaders([&](LookupResult&& r, bool /*end_stream*/) { result = std::move(r); });
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<AsyncFileHandle>(absl::UnknownError("Intentionally failed to open file")));
+      absl::StatusOr<AsyncFileHandle>(absl::NotFoundError("forced not-found")));
   pumpDispatcher();
+  EXPECT_FALSE(lookup_result.value().populated());
   // File handle didn't get used but is expected to be closed.
   EXPECT_OK(mock_async_file_handle_->close(nullptr, [](absl::Status) {}));
-  EXPECT_EQ(result.cache_entry_status_, CacheEntryStatus::Unusable);
-  EXPECT_EQ(cache_->stats().cache_miss_.value(), 1);
-  EXPECT_EQ(cache_->stats().cache_hit_.value(), 0);
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, FailedReadOfHeaderBlockInvalidatesTheCacheEntry) {
-  // Fake-add two files of size 12345, so we can validate the stats decrease of removing a file.
-  cache_->trackFileAdded(12345);
-  cache_->trackFileAdded(12345);
-  EXPECT_EQ(cache_->stats().size_bytes_.value(), 2 * 12345);
-  EXPECT_EQ(cache_->stats().size_count_.value(), 2);
-  auto lookup = testLookupContext();
-  absl::Cleanup destroy_lookup([&lookup]() { lookup->onDestroy(); });
-  LookupResult result;
-  EXPECT_CALL(*mock_async_file_manager_, openExistingFile(_, _, _, _));
+TEST_F(FileSystemHttpCacheTestWithMockFiles, FailedReadOfHeaderBlockReturnsError) {
+  EXPECT_CALL(*mock_async_file_manager_, openExistingFile);
   EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
-  lookup->getHeaders([&](LookupResult&& r, bool /*end_stream*/) { result = std::move(r); });
+  absl::StatusOr<LookupResult> lookup_result;
+  testLookup(&lookup_result);
   mock_async_file_manager_->nextActionCompletes(
       absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
   pumpDispatcher();
-  EXPECT_CALL(*mock_async_file_manager_, stat(_, _, _));
-  EXPECT_CALL(*mock_async_file_manager_, unlink(_, _, _));
   mock_async_file_manager_->nextActionCompletes(
       absl::StatusOr<Buffer::InstancePtr>(absl::UnknownError("intentional failure to read")));
   pumpDispatcher();
+  EXPECT_THAT(lookup_result, HasStatusCode(absl::StatusCode::kUnknown));
+}
+
+TEST_F(FileSystemHttpCacheTestWithMockFiles, SuccessfulEvictDecreasesStats) {
+  // Fake-add two files of size 12345, so we can validate the stats decrease of removing a file.
+  cache()->trackFileAdded(12345);
+  cache()->trackFileAdded(12345);
+  EXPECT_EQ(cache()->stats().size_bytes_.value(), 2 * 12345);
+  EXPECT_EQ(cache()->stats().size_count_.value(), 2);
+  EXPECT_CALL(*mock_async_file_manager_, stat);
+  EXPECT_CALL(*mock_async_file_manager_, unlink);
+  cache()->evict(*dispatcher_, key_);
+  pumpDispatcher();
   struct stat stat_result = {};
   stat_result.st_size = 12345;
   // stat
@@ -510,14 +446,14 @@ TEST_F(FileSystemHttpCacheTestWithMockFiles, FailedReadOfHeaderBlockInvalidatesT
   // unlink
   mock_async_file_manager_->nextActionCompletes(absl::OkStatus());
   pumpDispatcher();
-  EXPECT_EQ(result.cache_entry_status_, CacheEntryStatus::Unusable);
-  waitForEvictionThreadIdle();
   // Should have deducted the size of the file that got deleted. Since we started at 2 * 12345,
   // this should make the value 12345.
-  EXPECT_EQ(cache_->stats().size_bytes_.value(), 12345);
+  EXPECT_EQ(cache()->stats().size_bytes_.value(), 12345);
   // Should have deducted one file for the file that got deleted. Since we started at 2,
   // this should make the value 1.
-  EXPECT_EQ(cache_->stats().size_count_.value(), 1);
+  EXPECT_EQ(cache()->stats().size_count_.value(), 1);
+  // File handle didn't get used but is expected to be closed.
+  EXPECT_OK(mock_async_file_handle_->close(nullptr, [](absl::Status) {}));
 }
 
 Buffer::InstancePtr invalidHeaderBlock() {
@@ -530,841 +466,416 @@ Buffer::InstancePtr invalidHeaderBlock() {
   return buffer;
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, ReadWithInvalidHeaderBlockInvalidatesTheCacheEntry) {
-  auto lookup = testLookupContext();
-  absl::Cleanup destroy_lookup([&lookup]() { lookup->onDestroy(); });
-  LookupResult result;
-  EXPECT_CALL(*mock_async_file_manager_, openExistingFile(_, _, _, _));
+TEST_F(FileSystemHttpCacheTestWithMockFiles, ReadWithInvalidHeaderBlockReturnsError) {
+  EXPECT_CALL(*mock_async_file_manager_, openExistingFile);
   EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
-  lookup->getHeaders([&](LookupResult&& r, bool /*end_stream*/) { result = std::move(r); });
+  absl::StatusOr<LookupResult> lookup_result;
+  testLookup(&lookup_result);
   mock_async_file_manager_->nextActionCompletes(
       absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
   pumpDispatcher();
-  EXPECT_CALL(*mock_async_file_manager_, stat(_, _, _));
-  EXPECT_CALL(*mock_async_file_manager_, unlink(_, _, _));
   mock_async_file_manager_->nextActionCompletes(
       absl::StatusOr<Buffer::InstancePtr>(invalidHeaderBlock()));
   pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<struct stat>{
-      absl::UnknownError("intentionally failed to stat, for coverage")});
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::UnknownError("intentionally failed to unlink, for coverage"));
-  pumpDispatcher();
-  EXPECT_EQ(result.cache_entry_status_, CacheEntryStatus::Unusable);
+  EXPECT_THAT(lookup_result, HasStatusCode(absl::StatusCode::kDataLoss));
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, FailedReadOfHeaderProtoInvalidatesTheCacheEntry) {
-  auto lookup = testLookupContext();
-  absl::Cleanup destroy_lookup([&lookup]() { lookup->onDestroy(); });
-  LookupResult result;
-  EXPECT_CALL(*mock_async_file_manager_, openExistingFile(_, _, _, _));
+TEST_F(FileSystemHttpCacheTestWithMockFiles, ReadWithIncompleteHeaderBlockReturnsError) {
+  EXPECT_CALL(*mock_async_file_manager_, openExistingFile);
   EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
-  EXPECT_CALL(*mock_async_file_handle_, read(_, CacheFileFixedBlock::size(), headers_size_, _));
-  lookup->getHeaders([&](LookupResult&& r, bool /*end_stream*/) { result = std::move(r); });
+  absl::StatusOr<LookupResult> lookup_result;
+  testLookup(&lookup_result);
   mock_async_file_manager_->nextActionCompletes(
       absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
   pumpDispatcher();
   mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(testHeaderBlock(0)));
+      absl::StatusOr<Buffer::InstancePtr>(undersizedBuffer()));
   pumpDispatcher();
-  EXPECT_CALL(*mock_async_file_manager_, stat(_, _, _));
-  EXPECT_CALL(*mock_async_file_manager_, unlink(_, _, _));
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(absl::UnknownError("intentional failure to read")));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<struct stat>{
-      absl::UnknownError("intentionally failed to stat, for coverage")});
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::UnknownError("intentionally failed to unlink, for coverage"));
-  pumpDispatcher();
-  EXPECT_EQ(result.cache_entry_status_, CacheEntryStatus::Unusable);
+  EXPECT_THAT(lookup_result, HasStatusCode(absl::StatusCode::kDataLoss));
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, FailedReadOfBodyInvalidatesTheCacheEntry) {
-  auto lookup = testLookupContext();
-  absl::Cleanup destroy_lookup([&lookup]() { lookup->onDestroy(); });
-  LookupResult result;
-  bool end_stream_after_headers = true; // initialized wrong to ensure it's set.
-  EXPECT_CALL(*mock_async_file_manager_, openExistingFile(_, _, _, _));
+TEST_F(FileSystemHttpCacheTestWithMockFiles, FailedReadOfHeaderProtoReturnsError) {
+  EXPECT_CALL(*mock_async_file_manager_, openExistingFile);
   EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
   EXPECT_CALL(*mock_async_file_handle_, read(_, CacheFileFixedBlock::size(), headers_size_, _));
-  lookup->getHeaders([&](LookupResult&& r, bool es) {
-    result = std::move(r);
-    end_stream_after_headers = es;
-  });
+  absl::StatusOr<LookupResult> lookup_result;
+  testLookup(&lookup_result);
   mock_async_file_manager_->nextActionCompletes(
       absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
   pumpDispatcher();
   mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(testHeaderBlock(0)));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(testHeaderBuffer()));
+      absl::StatusOr<Buffer::InstancePtr>(testHeaderBlockBuffer()));
   pumpDispatcher();
-  // result should be populated.
-  EXPECT_NE(result.cache_entry_status_, CacheEntryStatus::Unusable);
-  EXPECT_FALSE(end_stream_after_headers);
-  EXPECT_CALL(*mock_async_file_handle_, read(_, _, _, _));
-  lookup->getBody(AdjustedByteRange(0, 8), [&](Buffer::InstancePtr body, bool /*end_stream*/) {
-    EXPECT_EQ(body.get(), nullptr);
-  });
-  EXPECT_CALL(*mock_async_file_manager_, stat(_, _, _));
-  EXPECT_CALL(*mock_async_file_manager_, unlink(_, _, _));
   mock_async_file_manager_->nextActionCompletes(
       absl::StatusOr<Buffer::InstancePtr>(absl::UnknownError("intentional failure to read")));
   pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<struct stat>{
-      absl::UnknownError("intentionally failed to stat, for coverage")});
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::UnknownError("intentionally failed to unlink, for coverage"));
-  pumpDispatcher();
+  EXPECT_THAT(lookup_result, HasStatusCode(absl::StatusCode::kUnknown));
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, FailedReadOfTrailersInvalidatesTheCacheEntry) {
-  auto lookup = testLookupContext();
-  absl::Cleanup destroy_lookup([&lookup]() { lookup->onDestroy(); });
-  LookupResult result;
-  bool end_stream_after_headers = true;
-  EXPECT_CALL(*mock_async_file_manager_, openExistingFile(_, _, _, _));
+TEST_F(FileSystemHttpCacheTestWithMockFiles, IncompleteReadOfHeaderProtoReturnsError) {
+  EXPECT_CALL(*mock_async_file_manager_, openExistingFile);
   EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
   EXPECT_CALL(*mock_async_file_handle_, read(_, CacheFileFixedBlock::size(), headers_size_, _));
-  lookup->getHeaders([&](LookupResult&& r, bool es) {
-    result = std::move(r);
-    end_stream_after_headers = es;
-  });
+  absl::StatusOr<LookupResult> lookup_result;
+  testLookup(&lookup_result);
   mock_async_file_manager_->nextActionCompletes(
       absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
   pumpDispatcher();
   mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(testHeaderBlock(0)));
+      absl::StatusOr<Buffer::InstancePtr>(testHeaderBlockBuffer()));
   pumpDispatcher();
   mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(testHeaderBuffer()));
+      absl::StatusOr<Buffer::InstancePtr>(undersizedBuffer()));
   pumpDispatcher();
-  // result should be populated.
-  EXPECT_NE(result.cache_entry_status_, CacheEntryStatus::Unusable);
-  EXPECT_FALSE(end_stream_after_headers);
-  EXPECT_CALL(*mock_async_file_handle_, read(_, _, 8, _));
-  lookup->getBody(AdjustedByteRange(0, 8), [&](Buffer::InstancePtr body, bool end_stream) {
-    EXPECT_EQ(body->toString(), "beepbeep");
-    EXPECT_FALSE(end_stream);
-  });
+  EXPECT_THAT(lookup_result, HasStatusCode(absl::StatusCode::kDataLoss));
+}
+
+TEST_F(FileSystemHttpCacheTestWithMockFiles, FailedReadOfBodyProvokesReset) {
+  setBodySize(10);
+  absl::StatusOr<LookupResult> lookup_result;
+  testSuccessfulLookup(&lookup_result);
+  EXPECT_CALL(*mock_async_file_handle_, read(_, testHeaderBlock().offsetToBody(), 8, _));
+  Buffer::InstancePtr got_body;
+  EndStream got_end_stream = EndStream::More;
+  lookup_result.value().cache_reader_->getBody(*dispatcher_, AdjustedByteRange(0, 8),
+                                               [&](Buffer::InstancePtr body, EndStream end_stream) {
+                                                 got_body = std::move(body);
+                                                 got_end_stream = end_stream;
+                                               });
   mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(std::make_unique<Buffer::OwnedImpl>("beepbeep")));
-  pumpDispatcher();
-  EXPECT_CALL(*mock_async_file_handle_, read(_, _, _, _));
-  // No point validating that the trailers are empty since that's not even particularly
-  // desirable behavior - it's a quirk of the filter that we can't properly signify an error.
-  lookup->getTrailers([&](Http::ResponseTrailerMapPtr) {});
-  EXPECT_CALL(*mock_async_file_manager_, stat(_, _, _));
-  EXPECT_CALL(*mock_async_file_manager_, unlink(_, _, _));
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<Buffer::InstancePtr>(
-      absl::UnknownError("intentional failure to read trailers")));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<struct stat>{
-      absl::UnknownError("intentionally failed to stat, for coverage")});
+      absl::StatusOr<Buffer::InstancePtr>(absl::UnknownError("intentional failure to read")));
   pumpDispatcher();
+  EXPECT_THAT(got_body, IsNull());
+  EXPECT_EQ(got_end_stream, EndStream::Reset);
+}
+
+TEST_F(FileSystemHttpCacheTestWithMockFiles, IncompleteReadOfBodyProvokesReset) {
+  setBodySize(10);
+  absl::StatusOr<LookupResult> lookup_result;
+  testSuccessfulLookup(&lookup_result);
+  EXPECT_CALL(*mock_async_file_handle_, read(_, testHeaderBlock().offsetToBody(), 8, _));
+  Buffer::InstancePtr got_body;
+  EndStream got_end_stream = EndStream::More;
+  lookup_result.value().cache_reader_->getBody(*dispatcher_, AdjustedByteRange(0, 8),
+                                               [&](Buffer::InstancePtr body, EndStream end_stream) {
+                                                 got_body = std::move(body);
+                                                 got_end_stream = end_stream;
+                                               });
   mock_async_file_manager_->nextActionCompletes(
-      absl::UnknownError("intentionally failed to unlink, for coverage"));
+      absl::StatusOr<Buffer::InstancePtr>(undersizedBuffer()));
   pumpDispatcher();
+  EXPECT_THAT(got_body, IsNull());
+  EXPECT_EQ(got_end_stream, EndStream::Reset);
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, ReadWithMultipleBlocksWorksCorrectly) {
-  trailers_size_ = 0;
-  auto lookup = testLookupContext();
-  LookupResult result;
-  EXPECT_CALL(*mock_async_file_manager_, openExistingFile(_, _, _, _));
+TEST_F(FileSystemHttpCacheTestWithMockFiles, FailedReadOfTrailersReturnsError) {
+  setTrailers({{"fruit", "banana"}});
+  EXPECT_CALL(*mock_async_file_manager_, openExistingFile);
   EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
   EXPECT_CALL(*mock_async_file_handle_,
-              read(_, CacheFileFixedBlock::offsetToHeaders(), headers_size_, _));
-  lookup->getHeaders([&](LookupResult&& r, bool end_stream) {
-    result = std::move(r);
-    EXPECT_FALSE(end_stream) << "in headers";
-  });
+              read(_, testHeaderBlock().offsetToTrailers(), trailers_size_, _));
+  absl::StatusOr<LookupResult> lookup_result;
+  testLookup(&lookup_result);
   mock_async_file_manager_->nextActionCompletes(
       absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
   pumpDispatcher();
   mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(testHeaderBlock(8)));
+      absl::StatusOr<Buffer::InstancePtr>(testHeaderBlockBuffer()));
   pumpDispatcher();
   mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(testHeaderBuffer()));
-  pumpDispatcher();
-  EXPECT_CALL(*mock_async_file_handle_,
-              read(_, CacheFileFixedBlock::offsetToHeaders() + headers_size_, 4, _));
-  EXPECT_CALL(*mock_async_file_handle_,
-              read(_, CacheFileFixedBlock::offsetToHeaders() + headers_size_ + 4, 4, _));
-  lookup->getBody(AdjustedByteRange(0, 4), [&](Buffer::InstancePtr body, bool end_stream) {
-    EXPECT_EQ(body->toString(), "beep");
-    EXPECT_FALSE(end_stream) << "in body part 1";
-  });
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(std::make_unique<Buffer::OwnedImpl>("beep")));
-  pumpDispatcher();
-  lookup->getBody(AdjustedByteRange(4, 8), [&](Buffer::InstancePtr body, bool end_stream) {
-    EXPECT_EQ(body->toString(), "boop");
-    EXPECT_TRUE(end_stream) << "in body part 2";
-  });
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(std::make_unique<Buffer::OwnedImpl>("boop")));
-  pumpDispatcher();
-  // While we're here, incidentally test the behavior of aborting a lookup in progress
-  // while no file actions are in flight.
-  lookup->onDestroy();
-  lookup.reset();
-  // There should be a file-close in the queue.
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus());
-  pumpDispatcher();
-}
-
-TEST_F(FileSystemHttpCacheTestWithMockFiles, DestroyingALookupWithFileActionInFlightCancelsAction) {
-  auto lookup = testLookupContext();
-  absl::Cleanup destroy_lookup([&lookup]() { lookup->onDestroy(); });
-  LookupResult result;
-  EXPECT_CALL(*mock_async_file_manager_, openExistingFile(_, _, _, _));
-  EXPECT_CALL(*mock_async_file_manager_, mockCancel());
-  lookup->getHeaders([&](LookupResult&& r, bool /*end_stream*/) { result = std::move(r); });
-  // File wasn't used in this test but is expected to be closed.
-  EXPECT_OK(mock_async_file_handle_->close(nullptr, [](absl::Status) {}));
-}
-
-TEST_F(FileSystemHttpCacheTestWithMockFiles,
-       DestroyingInsertContextWithFileActionInFlightCancelsAction) {
-  auto inserter = testInserter();
-  absl::Cleanup destroy_inserter([&inserter]() { inserter->onDestroy(); });
-  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile(_, _, _));
-  EXPECT_CALL(*mock_async_file_manager_, mockCancel());
-  inserter->insertHeaders(response_headers_, metadata_, expect_false_callback_, false);
-  // File wasn't used in this test but is expected to be closed.
-  EXPECT_OK(mock_async_file_handle_->close(nullptr, [](absl::Status) {}));
-}
-
-TEST_F(FileSystemHttpCacheTestWithMockFiles, InsertAbortsOnFailureToWriteEmptyHeaderBlock) {
-  auto inserter = testInserter();
-  absl::Cleanup destroy_inserter([&inserter]() { inserter->onDestroy(); });
-  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile(_, _, _));
-  EXPECT_CALL(*mock_async_file_handle_, write(_, _, _, _));
-  inserter->insertHeaders(response_headers_, metadata_, expect_false_callback_, false);
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<size_t>(
-      absl::UnknownError("intentionally failed write to empty header block")));
+      absl::StatusOr<Buffer::InstancePtr>(absl::UnknownError("intentional failure to read")));
   pumpDispatcher();
-  EXPECT_EQ(false_callbacks_called_, 1);
+  // result should be populated.
+  EXPECT_THAT(lookup_result, HasStatusCode(absl::StatusCode::kUnknown));
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, InsertAbortsOnFailureToWriteHeaderChunk) {
-  auto inserter = testInserter();
-  absl::Cleanup destroy_inserter([&inserter]() { inserter->onDestroy(); });
-  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile(_, _, _));
-  EXPECT_CALL(*mock_async_file_handle_, write(_, _, _, _)).Times(2);
-  inserter->insertHeaders(response_headers_, metadata_, expect_false_callback_, false);
+TEST_F(FileSystemHttpCacheTestWithMockFiles, IncompleteReadOfTrailersReturnsError) {
+  setTrailers({{"fruit", "banana"}});
+  EXPECT_CALL(*mock_async_file_manager_, openExistingFile);
+  EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
+  EXPECT_CALL(*mock_async_file_handle_,
+              read(_, testHeaderBlock().offsetToTrailers(), trailers_size_, _));
+  absl::StatusOr<LookupResult> lookup_result;
+  testLookup(&lookup_result);
   mock_async_file_manager_->nextActionCompletes(
       absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
   pumpDispatcher();
   mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<size_t>(CacheFileFixedBlock::size()));
+      absl::StatusOr<Buffer::InstancePtr>(testHeaderBlockBuffer()));
   pumpDispatcher();
   mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<size_t>(absl::UnknownError("intentionally failed write of header chunk")));
+      absl::StatusOr<Buffer::InstancePtr>(undersizedBuffer()));
   pumpDispatcher();
-  EXPECT_EQ(false_callbacks_called_, 1);
+  // result should be populated.
+  EXPECT_THAT(lookup_result, HasStatusCode(absl::StatusCode::kDataLoss));
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, InsertAbortsOnFailureToWriteBodyChunk) {
-  auto inserter = testInserter();
-  absl::Cleanup destroy_inserter([&inserter]() { inserter->onDestroy(); });
-  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile(_, _, _));
-  EXPECT_CALL(*mock_async_file_handle_, write(_, _, _, _)).Times(3);
-  inserter->insertHeaders(response_headers_, metadata_, expect_true_callback_, false);
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
-  pumpDispatcher();
+TEST_F(FileSystemHttpCacheTestWithMockFiles, InsertAbortsOnFailureToCreateFile) {
+  EXPECT_CALL(*cache_progress_receiver_, onInsertFailed);
+  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile);
+  cache()->insert(*dispatcher_, key_,
+                  Http::createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_), metadata_,
+                  nullptr, cache_progress_receiver_);
   mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<size_t>(CacheFileFixedBlock::size()));
+      absl::StatusOr<AsyncFileHandle>(absl::UnknownError("intentionally failed to create file")));
   pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<size_t>(headers_size_));
-  pumpDispatcher();
-  EXPECT_EQ(true_callbacks_called_, 1);
-  inserter->insertBody(Buffer::OwnedImpl("woop"), expect_false_callback_, false);
-  // Intentionally undersized write of body chunk.
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<size_t>(1));
-  pumpDispatcher();
-  EXPECT_EQ(false_callbacks_called_, 1);
+  // File handle didn't get used but is expected to be closed.
+  EXPECT_OK(mock_async_file_handle_->close(nullptr, [](absl::Status) {}));
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, InsertAbortsOnFailureToWriteTrailerChunk) {
-  auto inserter = testInserter();
-  absl::Cleanup destroy_inserter([&inserter]() { inserter->onDestroy(); });
-  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile(_, _, _));
-  EXPECT_CALL(*mock_async_file_handle_, write(_, _, _, _)).Times(4);
-  inserter->insertHeaders(response_headers_, metadata_, expect_true_callback_, false);
-  const absl::string_view body = "woop";
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<size_t>(CacheFileFixedBlock::size()));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<size_t>(headers_size_));
-  pumpDispatcher();
-  EXPECT_EQ(true_callbacks_called_, 1);
-  inserter->insertBody(Buffer::OwnedImpl(body), expect_true_callback_, false);
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<size_t>(body.size()));
-  pumpDispatcher();
-  EXPECT_EQ(true_callbacks_called_, 2);
-  inserter->insertTrailers(response_trailers_, expect_false_callback_);
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<size_t>(absl::UnknownError("intentionally failed write of trailer chunk")));
-  pumpDispatcher();
-  EXPECT_EQ(false_callbacks_called_, 1);
+TEST_F(FileSystemHttpCacheTestWithMockFiles, UpdateHeadersFailingToReadHeaderBlockAborts) {
+  EXPECT_LOG_CONTAINS("error", "failed to read header block", {
+    EXPECT_CALL(*mock_async_file_manager_, openExistingFile);
+    EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
+    cache()->updateHeaders(*dispatcher_, key_, response_headers_, metadata_);
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
+    pumpDispatcher();
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<Buffer::InstancePtr>(absl::UnknownError("intentionally failed")));
+    pumpDispatcher();
+  });
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, InsertAbortsOnFailureToWriteUpdatedHeaderBlock) {
-  auto inserter = testInserter();
-  absl::Cleanup destroy_inserter([&inserter]() { inserter->onDestroy(); });
-  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile(_, _, _));
-  EXPECT_CALL(*mock_async_file_handle_, write(_, _, _, _)).Times(5);
-  inserter->insertHeaders(response_headers_, metadata_, expect_true_callback_, false);
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<size_t>(CacheFileFixedBlock::size()));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<size_t>(headers_size_));
-  pumpDispatcher();
-  EXPECT_EQ(true_callbacks_called_, 1);
-  const absl::string_view body = "woop";
-  inserter->insertBody(Buffer::OwnedImpl(body), expect_true_callback_, false);
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<size_t>(body.size()));
-  pumpDispatcher();
-  EXPECT_EQ(true_callbacks_called_, 2);
-  inserter->insertTrailers(response_trailers_, expect_false_callback_);
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<size_t>(trailers_size_));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<size_t>(
-      absl::UnknownError("intentionally failed write of updated header block")));
-  pumpDispatcher();
-  EXPECT_EQ(false_callbacks_called_, 1);
+TEST_F(FileSystemHttpCacheTestWithMockFiles, UpdateHeadersIncompleteReadHeaderBlockAborts) {
+  EXPECT_LOG_CONTAINS("error", "incomplete read of header block", {
+    EXPECT_CALL(*mock_async_file_manager_, openExistingFile);
+    EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
+    cache()->updateHeaders(*dispatcher_, key_, response_headers_, metadata_);
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
+    pumpDispatcher();
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<Buffer::InstancePtr>(undersizedBuffer()));
+    pumpDispatcher();
+  });
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, InsertAbortsOnFailureToLinkFile) {
-  auto inserter = testInserter();
-  absl::Cleanup destroy_inserter([&inserter]() { inserter->onDestroy(); });
-  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile(_, _, _));
-  EXPECT_CALL(*mock_async_file_handle_, write(_, _, _, _)).Times(5);
-  EXPECT_CALL(*mock_async_file_manager_, stat(_, _, _));
-  EXPECT_CALL(*mock_async_file_manager_, unlink(_, _, _));
-  EXPECT_CALL(*mock_async_file_handle_, createHardLink(_, _, _));
-  inserter->insertHeaders(response_headers_, metadata_, expect_true_callback_, false);
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<size_t>(CacheFileFixedBlock::size()));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<size_t>(headers_size_));
-  pumpDispatcher();
-  EXPECT_EQ(true_callbacks_called_, 1);
-  const absl::string_view body = "woop";
-  inserter->insertBody(Buffer::OwnedImpl(body), expect_true_callback_, false);
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<size_t>(body.size()));
-  pumpDispatcher();
-  EXPECT_EQ(true_callbacks_called_, 2);
-  inserter->insertTrailers(response_trailers_, expect_false_callback_);
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<size_t>(trailers_size_));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<size_t>(CacheFileFixedBlock::size()));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<struct stat>{
-      absl::UnknownError("intentionally failed to stat, for coverage")});
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus());
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::UnknownError("intentionally failed to link cache file"));
-  pumpDispatcher();
-  EXPECT_EQ(false_callbacks_called_, 1);
+TEST_F(FileSystemHttpCacheTestWithMockFiles, UpdateHeadersFailureToTruncateAborts) {
+  EXPECT_LOG_CONTAINS("error", "failed to truncate headers", {
+    EXPECT_CALL(*mock_async_file_manager_, openExistingFile);
+    EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
+    EXPECT_CALL(*mock_async_file_handle_, truncate(_, testHeaderBlock().offsetToHeaders(), _));
+    cache()->updateHeaders(*dispatcher_, key_, response_headers_, metadata_);
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
+    pumpDispatcher();
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<Buffer::InstancePtr>(testHeaderBlockBuffer()));
+    pumpDispatcher();
+    mock_async_file_manager_->nextActionCompletes(absl::UnknownError("intentionally failed"));
+    pumpDispatcher();
+  });
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, UpdateHeadersAbortsIfFileOpenFailed) {
-  time_system_.advanceTimeWait(Seconds(3601));
-  Http::TestResponseHeaderMapImpl response_headers{
-      {":status", "200"},
-      {"date", formatter_.fromTime(time_system_.systemTime())},
-      {"x-whatever", "updated"},
-      {"cache-control", "public,max-age=3600"},
-  };
-  auto lookup_context = testLookupContext();
-  EXPECT_CALL(*mock_async_file_manager_, openExistingFile(_, _, _, _));
-  bool update_success;
-  cache_->updateHeaders(*lookup_context, response_headers, {time_system_.systemTime()},
-                        [&update_success](bool success) { update_success = success; });
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<AsyncFileHandle>(absl::UnknownError("Intentionally failed to open file")));
-  pumpDispatcher();
-  lookup_context->onDestroy();
-  EXPECT_FALSE(update_success);
-  // File is not used in this test, but is expected to be closed.
-  EXPECT_OK(mock_async_file_handle_->close(nullptr, [](absl::Status) {}));
+TEST_F(FileSystemHttpCacheTestWithMockFiles, UpdateHeadersFailureToOverwriteHeaderBlockAborts) {
+  EXPECT_LOG_CONTAINS("error", "overwriting headers failed", {
+    EXPECT_CALL(*mock_async_file_manager_, openExistingFile);
+    EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
+    EXPECT_CALL(*mock_async_file_handle_, truncate(_, testHeaderBlock().offsetToHeaders(), _));
+    EXPECT_CALL(*mock_async_file_handle_, write(_, _, 0, _));
+    cache()->updateHeaders(*dispatcher_, key_, response_headers_, metadata_);
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
+    pumpDispatcher();
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<Buffer::InstancePtr>(testHeaderBlockBuffer()));
+    pumpDispatcher();
+    mock_async_file_manager_->nextActionCompletes(absl::OkStatus());
+    pumpDispatcher();
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<size_t>(absl::UnknownError("intentionally failed")));
+    pumpDispatcher();
+  });
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, UpdateHeadersKeepsTryingIfUnlinkOriginalFails) {
-  time_system_.advanceTimeWait(Seconds(3601));
-  Http::TestResponseHeaderMapImpl response_headers{
-      {":status", "200"},
-      {"date", formatter_.fromTime(time_system_.systemTime())},
-      {"x-whatever", "updated"},
-      {"cache-control", "public,max-age=3600"},
-  };
-  auto lookup_context = testLookupContext();
-  EXPECT_CALL(*mock_async_file_manager_, openExistingFile(_, _, _, _));
-  EXPECT_CALL(*mock_async_file_manager_, unlink(_, _, _));
-  EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
-  bool update_success;
-  cache_->updateHeaders(*lookup_context, response_headers, {time_system_.systemTime()},
-                        [&update_success](bool success) { update_success = success; });
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::UnknownError("Intentionally failed to unlink"));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<Buffer::InstancePtr>(
-      absl::UnknownError("Intentionally failed to read header block")));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus()); // close read handle
-  pumpDispatcher();
-  lookup_context->onDestroy();
-  EXPECT_FALSE(update_success);
+TEST_F(FileSystemHttpCacheTestWithMockFiles, UpdateHeadersIncompleteOverwriteHeaderBlockAborts) {
+  EXPECT_LOG_CONTAINS("error", "overwriting headers failed", {
+    EXPECT_CALL(*mock_async_file_manager_, openExistingFile);
+    EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
+    EXPECT_CALL(*mock_async_file_handle_, truncate(_, testHeaderBlock().offsetToHeaders(), _));
+    EXPECT_CALL(*mock_async_file_handle_, write(_, _, 0, _));
+    cache()->updateHeaders(*dispatcher_, key_, response_headers_, metadata_);
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
+    pumpDispatcher();
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<Buffer::InstancePtr>(testHeaderBlockBuffer()));
+    pumpDispatcher();
+    mock_async_file_manager_->nextActionCompletes(absl::OkStatus());
+    pumpDispatcher();
+    mock_async_file_manager_->nextActionCompletes(absl::StatusOr<size_t>(1));
+    pumpDispatcher();
+  });
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, UpdateHeadersAbortsIfReadHeadersFails) {
-  time_system_.advanceTimeWait(Seconds(3601));
-  Http::TestResponseHeaderMapImpl response_headers{
-      {":status", "200"},
-      {"date", formatter_.fromTime(time_system_.systemTime())},
-      {"x-whatever", "updated"},
-      {"cache-control", "public,max-age=3600"},
-  };
-  auto lookup_context = testLookupContext();
-  EXPECT_CALL(*mock_async_file_manager_, openExistingFile(_, _, _, _));
-  EXPECT_CALL(*mock_async_file_manager_, unlink(_, _, _));
-  EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
-  EXPECT_CALL(*mock_async_file_handle_, read(_, CacheFileFixedBlock::size(), headers_size_, _));
-  bool update_success;
-  cache_->updateHeaders(*lookup_context, response_headers, {time_system_.systemTime()},
-                        [&update_success](bool success) { update_success = success; });
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus());
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(testHeaderBlock(0)));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<Buffer::InstancePtr>(
-      absl::UnknownError("Intentionally failed to read headers block")));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus()); // close read handle
-  pumpDispatcher();
-  lookup_context->onDestroy();
-  EXPECT_FALSE(update_success);
+TEST_F(FileSystemHttpCacheTestWithMockFiles, UpdateHeadersFailureToWriteHeadersAborts) {
+  EXPECT_LOG_CONTAINS("error", "failed to write new headers", {
+    EXPECT_CALL(*mock_async_file_manager_, openExistingFile);
+    EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
+    EXPECT_CALL(*mock_async_file_handle_, truncate(_, testHeaderBlock().offsetToHeaders(), _));
+    EXPECT_CALL(*mock_async_file_handle_, write(_, _, 0, _));
+    EXPECT_CALL(*mock_async_file_handle_, write(_, _, testHeaderBlock().offsetToHeaders(), _));
+    cache()->updateHeaders(*dispatcher_, key_, response_headers_, metadata_);
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
+    pumpDispatcher();
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<Buffer::InstancePtr>(testHeaderBlockBuffer()));
+    pumpDispatcher();
+    mock_async_file_manager_->nextActionCompletes(absl::OkStatus());
+    pumpDispatcher();
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<size_t>(CacheFileFixedBlock::size()));
+    pumpDispatcher();
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<size_t>(absl::UnknownError("intentionally failed")));
+    pumpDispatcher();
+  });
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, UpdateHeadersAbortsIfReadHeadersFindsAVaryEntry) {
-  time_system_.advanceTimeWait(Seconds(3601));
-  CacheFileFixedBlock vary_block;
-  CacheFileHeader vary_headers;
-  auto* vary_header = vary_headers.add_headers();
-  vary_header->set_key("vary");
-  vary_header->set_value("irrelevant");
-  auto vary_headers_buffer = std::make_unique<Buffer::OwnedImpl>(bufferFromProto(vary_headers));
-  vary_block.setHeadersSize(vary_headers_buffer->length());
-  auto vary_block_buffer = std::make_unique<Buffer::OwnedImpl>();
-  vary_block.serializeToBuffer(*vary_block_buffer);
-  Http::TestResponseHeaderMapImpl response_headers{
-      {":status", "200"},
-      {"date", formatter_.fromTime(time_system_.systemTime())},
-      {"x-whatever", "updated"},
-      {"cache-control", "public,max-age=3600"},
-  };
-  auto lookup_context = testLookupContext();
-  EXPECT_CALL(*mock_async_file_manager_, openExistingFile(_, _, _, _));
-  EXPECT_CALL(*mock_async_file_manager_, unlink(_, _, _));
-  EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
-  EXPECT_CALL(*mock_async_file_handle_,
-              read(_, CacheFileFixedBlock::size(), vary_headers_buffer->length(), _));
-  bool update_success;
-  cache_->updateHeaders(*lookup_context, response_headers, {time_system_.systemTime()},
-                        [&update_success](bool success) { update_success = success; });
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus());
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(std::move(vary_block_buffer)));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(std::move(vary_headers_buffer)));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus()); // close read handle
-  pumpDispatcher();
-  lookup_context->onDestroy();
-  EXPECT_FALSE(update_success);
+TEST_F(FileSystemHttpCacheTestWithMockFiles, UpdateHeadersIncompleteWriteHeadersAborts) {
+  EXPECT_LOG_CONTAINS("error", "incomplete write of new headers", {
+    EXPECT_CALL(*mock_async_file_manager_, openExistingFile);
+    EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
+    EXPECT_CALL(*mock_async_file_handle_, truncate(_, testHeaderBlock().offsetToHeaders(), _));
+    EXPECT_CALL(*mock_async_file_handle_, write(_, _, 0, _));
+    EXPECT_CALL(*mock_async_file_handle_, write(_, _, testHeaderBlock().offsetToHeaders(), _));
+    cache()->updateHeaders(*dispatcher_, key_, response_headers_, metadata_);
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
+    pumpDispatcher();
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<Buffer::InstancePtr>(testHeaderBlockBuffer()));
+    pumpDispatcher();
+    mock_async_file_manager_->nextActionCompletes(absl::OkStatus());
+    pumpDispatcher();
+    mock_async_file_manager_->nextActionCompletes(
+        absl::StatusOr<size_t>(CacheFileFixedBlock::size()));
+    pumpDispatcher();
+    mock_async_file_manager_->nextActionCompletes(absl::StatusOr<size_t>(1));
+    pumpDispatcher();
+  });
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, UpdateHeadersAbortsIfOpenForWriteFails) {
-  time_system_.advanceTimeWait(Seconds(3601));
-  Http::TestResponseHeaderMapImpl response_headers{
-      {":status", "200"},
-      {"date", formatter_.fromTime(time_system_.systemTime())},
-      {"x-whatever", "updated"},
-      {"cache-control", "public,max-age=3600"},
-  };
-  auto lookup_context = testLookupContext();
-  EXPECT_CALL(*mock_async_file_manager_, openExistingFile(_, _, _, _));
-  EXPECT_CALL(*mock_async_file_manager_, unlink(_, _, _));
-  EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
-  EXPECT_CALL(*mock_async_file_handle_, read(_, CacheFileFixedBlock::size(), headers_size_, _));
-  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile(_, _, _));
-  bool update_success;
-  cache_->updateHeaders(*lookup_context, response_headers, {time_system_.systemTime()},
-                        [&update_success](bool success) { update_success = success; });
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus());
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(testHeaderBlock(0)));
+TEST_F(FileSystemHttpCacheTestWithMockFiles, EvictWithStatFailureSilentlyAborts) {
+  EXPECT_CALL(*mock_async_file_manager_, stat);
+  cache()->evict(*dispatcher_, key_);
   pumpDispatcher();
   mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(testHeaderBuffer()));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<AsyncFileHandle>(
-      absl::UnknownError("Intentionally failed to create file for write")));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus()); // close read handle
+      absl::StatusOr<struct stat>(absl::UnknownError("intentional failure")));
   pumpDispatcher();
-  lookup_context->onDestroy();
-  EXPECT_FALSE(update_success);
+  // File handle didn't get used but is expected to be closed.
+  EXPECT_OK(mock_async_file_handle_->close(nullptr, [](absl::Status) {}));
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, UpdateHeadersAbortsIfWriteHeaderBlockFails) {
-  time_system_.advanceTimeWait(Seconds(3601));
-  Http::TestResponseHeaderMapImpl response_headers{
-      {":status", "200"},
-      {"date", formatter_.fromTime(time_system_.systemTime())},
-      {"x-whatever", "updated"},
-      {"cache-control", "public,max-age=3600"},
-  };
-  auto lookup_context = testLookupContext();
-  MockAsyncFileHandle write_handle =
-      std::make_shared<StrictMock<MockAsyncFileContext>>(mock_async_file_manager_);
-  EXPECT_CALL(*mock_async_file_manager_, openExistingFile(_, _, _, _));
-  EXPECT_CALL(*mock_async_file_manager_, unlink(_, _, _));
-  EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
-  EXPECT_CALL(*mock_async_file_handle_, read(_, CacheFileFixedBlock::size(), headers_size_, _));
-  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile(_, _, _));
-  EXPECT_CALL(*write_handle, write(_, _, 0, _));
-  bool update_success;
-  cache_->updateHeaders(*lookup_context, response_headers, {time_system_.systemTime()},
-                        [&update_success](bool success) { update_success = success; });
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
+TEST_F(FileSystemHttpCacheTestWithMockFiles, EvictWithUnlinkFailureSilentlyAborts) {
+  EXPECT_CALL(*mock_async_file_manager_, stat);
+  EXPECT_CALL(*mock_async_file_manager_, unlink);
+  cache()->evict(*dispatcher_, key_);
   pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus());
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(testHeaderBlock(0)));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(testHeaderBuffer()));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<AsyncFileHandle>(write_handle));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<size_t>(absl::UnknownError("Intentionally failed to write header block")));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus()); // close read handle
+  struct stat stat_result = {};
+  stat_result.st_size = 12345;
+  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<struct stat>(stat_result));
   pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus()); // close write handle
+  mock_async_file_manager_->nextActionCompletes(absl::UnknownError("intentional failure"));
   pumpDispatcher();
-  lookup_context->onDestroy();
-  EXPECT_FALSE(update_success);
+  // File handle didn't get used but is expected to be closed.
+  EXPECT_OK(mock_async_file_handle_->close(nullptr, [](absl::Status) {}));
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, UpdateHeadersAbortsIfReadBodyFails) {
-  time_system_.advanceTimeWait(Seconds(3601));
-  Http::TestResponseHeaderMapImpl response_headers{
-      {":status", "200"},
-      {"date", formatter_.fromTime(time_system_.systemTime())},
-      {"x-whatever", "updated"},
-      {"cache-control", "public,max-age=3600"},
-  };
-  size_t updated_headers_size = headerProtoSize(mergeProtoWithHeadersAndMetadata(
-      testHeaderProto(), response_headers, {time_system_.systemTime()}));
-  size_t body_size = 64;
-  auto lookup_context = testLookupContext();
-  MockAsyncFileHandle write_handle =
-      std::make_shared<StrictMock<MockAsyncFileContext>>(mock_async_file_manager_);
-  EXPECT_CALL(*mock_async_file_manager_, openExistingFile(_, _, _, _));
-  EXPECT_CALL(*mock_async_file_manager_, unlink(_, _, _));
-  EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
-  EXPECT_CALL(*mock_async_file_handle_, read(_, CacheFileFixedBlock::size(), headers_size_, _));
-  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile(_, _, _));
-  EXPECT_CALL(*write_handle, write(_, _, 0, _));
-  EXPECT_CALL(*mock_async_file_handle_,
-              read(_, CacheFileFixedBlock::size() + headers_size_, body_size + trailers_size_, _));
-  bool update_success;
-  cache_->updateHeaders(*lookup_context, response_headers, {time_system_.systemTime()},
-                        [&update_success](bool success) { update_success = success; });
+TEST_F(FileSystemHttpCacheTestWithMockFiles, InsertAbortsOnFailureToDupFileHandle) {
+  EXPECT_CALL(*cache_progress_receiver_, onInsertFailed);
+  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile);
+  EXPECT_CALL(*mock_async_file_handle_, duplicate);
+  cache()->insert(*dispatcher_, key_,
+                  Http::createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_), metadata_,
+                  nullptr, cache_progress_receiver_);
   mock_async_file_manager_->nextActionCompletes(
       absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
   pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus());
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(testHeaderBlock(body_size)));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(testHeaderBuffer()));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<AsyncFileHandle>(write_handle));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<size_t>(updated_headers_size + CacheFileFixedBlock::size()));
-  pumpDispatcher();
   mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(absl::UnknownError("intentionally failed body read")));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus()); // close read handle
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus()); // close write handle
+      absl::StatusOr<AsyncFileHandle>(absl::UnknownError("intentionally failed to dup file")));
   pumpDispatcher();
-  lookup_context->onDestroy();
-  EXPECT_FALSE(update_success);
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, UpdateHeadersAbortsIfWriteBodyFails) {
-  time_system_.advanceTimeWait(Seconds(3601));
-  Http::TestResponseHeaderMapImpl response_headers{
-      {":status", "200"},
-      {"date", formatter_.fromTime(time_system_.systemTime())},
-      {"x-whatever", "updated"},
-      {"cache-control", "public,max-age=3600"},
-  };
-  size_t updated_headers_size = headerProtoSize(mergeProtoWithHeadersAndMetadata(
-      testHeaderProto(), response_headers, {time_system_.systemTime()}));
-  size_t body_size = 64;
-  auto lookup_context = testLookupContext();
-  MockAsyncFileHandle write_handle =
-      std::make_shared<StrictMock<MockAsyncFileContext>>(mock_async_file_manager_);
-  EXPECT_CALL(*mock_async_file_manager_, openExistingFile(_, _, _, _));
-  EXPECT_CALL(*mock_async_file_manager_, unlink(_, _, _));
-  EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
-  EXPECT_CALL(*mock_async_file_handle_, read(_, CacheFileFixedBlock::size(), headers_size_, _));
-  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile(_, _, _));
-  EXPECT_CALL(*write_handle, write(_, _, 0, _));
-  EXPECT_CALL(*mock_async_file_handle_,
-              read(_, CacheFileFixedBlock::size() + headers_size_, body_size + trailers_size_, _));
-  EXPECT_CALL(*write_handle, write(_, _, CacheFileFixedBlock::size() + updated_headers_size, _));
-  bool update_success;
-  cache_->updateHeaders(*lookup_context, response_headers, {time_system_.systemTime()},
-                        [&update_success](bool success) { update_success = success; });
+TEST_F(FileSystemHttpCacheTestWithMockFiles, InsertAbortsOnFailureToWriteEmptyHeaderBlock) {
+  auto duplicated_file_handle = std::make_shared<MockAsyncFileContext>();
+  EXPECT_CALL(*duplicated_file_handle, close).WillOnce([]() { return []() {}; });
+  auto http_source = std::make_unique<MockHttpSource>();
+  EXPECT_CALL(*cache_progress_receiver_, onHeadersInserted);
+  EXPECT_CALL(*cache_progress_receiver_, onInsertFailed);
+  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile);
+  EXPECT_CALL(*mock_async_file_handle_, duplicate);
+  EXPECT_CALL(*mock_async_file_handle_, write(_, _, 0, _));
+  cache()->insert(*dispatcher_, key_,
+                  Http::createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_), metadata_,
+                  std::move(http_source), cache_progress_receiver_);
   mock_async_file_manager_->nextActionCompletes(
       absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
   pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus());
-  pumpDispatcher();
   mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(testHeaderBlock(body_size)));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(testHeaderBuffer()));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<AsyncFileHandle>(write_handle));
+      absl::StatusOr<AsyncFileHandle>(duplicated_file_handle));
   pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<size_t>(updated_headers_size + CacheFileFixedBlock::size()));
-  pumpDispatcher();
-  std::string body_and_trailers;
-  body_and_trailers.resize(body_size + trailers_size_);
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(std::make_unique<Buffer::OwnedImpl>(body_and_trailers)));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<size_t>(absl::UnknownError("intentionally failed body write")));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus()); // close read handle
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus()); // close write handle
+  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<size_t>(
+      absl::UnknownError("intentionally failed write to empty header block")));
   pumpDispatcher();
-  lookup_context->onDestroy();
-  EXPECT_FALSE(update_success);
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, UpdateHeadersCopiesInChunksIfBodySizeIsLarge) {
-  time_system_.advanceTimeWait(Seconds(3601));
-  Http::TestResponseHeaderMapImpl response_headers{
-      {":status", "200"},
-      {"date", formatter_.fromTime(time_system_.systemTime())},
-      {"x-whatever", "updated"},
-      {"cache-control", "public,max-age=3600"},
-  };
-  size_t updated_headers_size = headerProtoSize(mergeProtoWithHeadersAndMetadata(
-      testHeaderProto(), response_headers, {time_system_.systemTime()}));
-  size_t body_size = FileSystemHttpCache::max_update_headers_copy_chunk_size_ + 1;
-  auto lookup_context = testLookupContext();
-  MockAsyncFileHandle write_handle =
-      std::make_shared<StrictMock<MockAsyncFileContext>>(mock_async_file_manager_);
-  EXPECT_CALL(*mock_async_file_manager_, openExistingFile(_, _, _, _));
-  EXPECT_CALL(*mock_async_file_manager_, unlink(_, _, _));
-  EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
-  EXPECT_CALL(*mock_async_file_handle_, read(_, CacheFileFixedBlock::size(), headers_size_, _));
-  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile(_, _, _));
-  EXPECT_CALL(*write_handle, write(_, _, 0, _));
-  EXPECT_CALL(*mock_async_file_handle_,
-              read(_, CacheFileFixedBlock::size() + headers_size_,
-                   FileSystemHttpCache::max_update_headers_copy_chunk_size_, _));
-  EXPECT_CALL(*write_handle, write(_, _, CacheFileFixedBlock::size() + updated_headers_size, _));
-  EXPECT_CALL(
-      *mock_async_file_handle_,
-      read(_,
-           CacheFileFixedBlock::size() + headers_size_ +
-               FileSystemHttpCache::max_update_headers_copy_chunk_size_,
-           body_size + trailers_size_ - FileSystemHttpCache::max_update_headers_copy_chunk_size_,
-           _));
-  bool update_success;
-  cache_->updateHeaders(*lookup_context, response_headers, {time_system_.systemTime()},
-                        [&update_success](bool success) { update_success = success; });
+TEST_F(FileSystemHttpCacheTestWithMockFiles, InsertAbortsOnFailureToWriteBodyChunk) {
+  auto duplicated_file_handle = std::make_shared<MockAsyncFileContext>();
+  EXPECT_CALL(*duplicated_file_handle, close).WillOnce([]() { return []() {}; });
+  auto http_source =
+      std::make_unique<FakeStreamHttpSource>(*dispatcher_, nullptr, "abcde", nullptr);
+  EXPECT_CALL(*cache_progress_receiver_, onHeadersInserted);
+  EXPECT_CALL(*cache_progress_receiver_, onInsertFailed);
+  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile);
+  EXPECT_CALL(*mock_async_file_handle_, duplicate);
+  EXPECT_CALL(*mock_async_file_handle_, write(_, _, 0, _));
+  EXPECT_CALL(*mock_async_file_handle_, write(_, _, testHeaderBlock().offsetToBody(), _));
+  cache()->insert(*dispatcher_, key_,
+                  Http::createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_), metadata_,
+                  std::move(http_source), cache_progress_receiver_);
   mock_async_file_manager_->nextActionCompletes(
       absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
   pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus());
-  pumpDispatcher();
   mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(testHeaderBlock(body_size)));
+      absl::StatusOr<AsyncFileHandle>(duplicated_file_handle));
   pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(testHeaderBuffer()));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<AsyncFileHandle>(write_handle));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<size_t>(updated_headers_size + CacheFileFixedBlock::size()));
-  pumpDispatcher();
-  std::string body_chunk;
-  body_chunk.resize(FileSystemHttpCache::max_update_headers_copy_chunk_size_);
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(std::make_unique<Buffer::OwnedImpl>(body_chunk)));
+  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<size_t>(testHeaderBlock().size()));
   pumpDispatcher();
   mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<size_t>(FileSystemHttpCache::max_update_headers_copy_chunk_size_));
+      absl::StatusOr<size_t>(absl::UnknownError("intentional fail to write body")));
   pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<Buffer::InstancePtr>(
-      absl::UnknownError("intentionally failed second body read")));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus()); // close read handle
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus()); // close write handle
-  pumpDispatcher();
-  lookup_context->onDestroy();
-  EXPECT_FALSE(update_success);
 }
 
-TEST_F(FileSystemHttpCacheTestWithMockFiles, UpdateHeadersAbortsIfLinkFails) {
-  time_system_.advanceTimeWait(Seconds(3601));
-  Http::TestResponseHeaderMapImpl response_headers{
-      {":status", "200"},
-      {"date", formatter_.fromTime(time_system_.systemTime())},
-      {"x-whatever", "updated"},
-      {"cache-control", "public,max-age=3600"},
-  };
-  size_t updated_headers_size = headerProtoSize(mergeProtoWithHeadersAndMetadata(
-      testHeaderProto(), response_headers, {time_system_.systemTime()}));
-  size_t body_size = 64;
-  auto lookup_context = testLookupContext();
-  MockAsyncFileHandle write_handle =
-      std::make_shared<StrictMock<MockAsyncFileContext>>(mock_async_file_manager_);
-  EXPECT_CALL(*mock_async_file_manager_, openExistingFile(_, _, _, _));
-  EXPECT_CALL(*mock_async_file_manager_, unlink(_, _, _));
-  EXPECT_CALL(*mock_async_file_handle_, read(_, 0, CacheFileFixedBlock::size(), _));
-  EXPECT_CALL(*mock_async_file_handle_, read(_, CacheFileFixedBlock::size(), headers_size_, _));
-  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile(_, _, _));
-  EXPECT_CALL(*write_handle, write(_, _, 0, _));
-  EXPECT_CALL(*mock_async_file_handle_,
-              read(_, CacheFileFixedBlock::size() + headers_size_, body_size + trailers_size_, _));
-  EXPECT_CALL(*write_handle, write(_, _, CacheFileFixedBlock::size() + updated_headers_size, _));
-  EXPECT_CALL(*write_handle, createHardLink(_, _, _));
-  bool update_success;
-  cache_->updateHeaders(*lookup_context, response_headers, {time_system_.systemTime()},
-                        [&update_success](bool success) { update_success = success; });
+TEST_F(FileSystemHttpCacheTestWithMockFiles, InsertSilentlyAbortsOnFailureToWriteTrailerChunk) {
+  setTrailers({{"fruit", "banana"}});
+  auto duplicated_file_handle = std::make_shared<MockAsyncFileContext>();
+  EXPECT_CALL(*duplicated_file_handle, close).WillOnce([]() { return []() {}; });
+  auto http_source = std::make_unique<FakeStreamHttpSource>(
+      *dispatcher_, nullptr, "",
+      Http::createHeaderMap<Http::ResponseTrailerMapImpl>(response_trailers_));
+  EXPECT_CALL(*cache_progress_receiver_, onHeadersInserted);
+  EXPECT_CALL(*cache_progress_receiver_, onTrailersInserted);
+  EXPECT_CALL(*mock_async_file_manager_, createAnonymousFile);
+  EXPECT_CALL(*mock_async_file_handle_, duplicate);
+  EXPECT_CALL(*mock_async_file_handle_, write(_, _, 0, _));
+  EXPECT_CALL(*mock_async_file_handle_, write(_, _, testHeaderBlock().offsetToTrailers(), _));
+  cache()->insert(*dispatcher_, key_,
+                  Http::createHeaderMap<Http::ResponseHeaderMapImpl>(response_headers_), metadata_,
+                  std::move(http_source), cache_progress_receiver_);
   mock_async_file_manager_->nextActionCompletes(
       absl::StatusOr<AsyncFileHandle>(mock_async_file_handle_));
   pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus());
-  pumpDispatcher();
   mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(testHeaderBlock(body_size)));
+      absl::StatusOr<AsyncFileHandle>(duplicated_file_handle));
   pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(testHeaderBuffer()));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<AsyncFileHandle>(write_handle));
+  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<size_t>(testHeaderBlock().size()));
   pumpDispatcher();
   mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<size_t>(updated_headers_size + CacheFileFixedBlock::size()));
-  pumpDispatcher();
-  std::string body_and_trailers;
-  body_and_trailers.resize(body_size + trailers_size_);
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<Buffer::InstancePtr>(std::make_unique<Buffer::OwnedImpl>(body_and_trailers)));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::StatusOr<size_t>(body_and_trailers.size()));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::UnknownError("intentionally failed to link"));
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus()); // close read handle
-  pumpDispatcher();
-  mock_async_file_manager_->nextActionCompletes(absl::OkStatus()); // close write handle
+      absl::StatusOr<size_t>(absl::UnknownError("intentional fail to write body")));
   pumpDispatcher();
-  lookup_context->onDestroy();
-  EXPECT_FALSE(update_success);
-}
-
-TEST_F(FileSystemHttpCacheTestWithMockFiles, UpdateHeadersAbortsEarlyIfCacheEntryIsInProgress) {
-  auto lookup_context = testLookupContext();
-  Http::TestResponseHeaderMapImpl response_headers{
-      {":status", "200"},
-      {"date", formatter_.fromTime(time_system_.systemTime())},
-      {"x-whatever", "updated"},
-      {"cache-control", "public,max-age=3600"},
-  };
-  EXPECT_CALL(*mock_async_file_manager_, openExistingFile(_, _, _, _));
-  bool update_success;
-  cache_->updateHeaders(*lookup_context, response_headers, {time_system_.systemTime()},
-                        [&update_success](bool success) { update_success = success; });
-  // A second updateHeaders call for the same resource while the first is still operating
-  // should do nothing.
-  auto lookup_context_2 = testLookupContext();
-  cache_->updateHeaders(*lookup_context_2, response_headers, {time_system_.systemTime()},
-                        [&update_success](bool success) { update_success = success; });
-  mock_async_file_manager_->nextActionCompletes(
-      absl::StatusOr<AsyncFileHandle>(absl::UnknownError("intentionally failed to open file")));
-  pumpDispatcher();
-  lookup_context->onDestroy();
-  lookup_context_2->onDestroy();
-  // The file handle didn't actually get used in this test, but is expected to be closed.
-  EXPECT_OK(mock_async_file_handle_->close(nullptr, [](absl::Status) {}));
 }
 
 // For the standard cache tests from http_cache_implementation_test_common.cc
@@ -1374,9 +885,10 @@ class FileSystemHttpCacheTestDelegate : public HttpCacheTestDelegate,
                                         public FileSystemCacheTestContext {
 public:
   FileSystemHttpCacheTestDelegate() { initCache(); }
-  std::shared_ptr<HttpCache> cache() override { return cache_; }
-  bool validationEnabled() const override { return true; }
-  void beforePumpingDispatcher() override { cache_->drainAsyncFileActionsForTest(); }
+  HttpCache& cache() override { return cache_->cache(); }
+  void beforePumpingDispatcher() override {
+    dynamic_cast<FileSystemHttpCache&>(cache()).drainAsyncFileActionsForTest();
+  }
 };
 
 // For the standard cache tests from http_cache_implementation_test_common.cc
@@ -1397,11 +909,9 @@ TEST(Registration, GetCacheFromFactory) {
   TestUtility::loadFromYaml(std::string(yaml_config), cache_config);
   EXPECT_EQ(factory->getCache(cache_config, factory_context)->cacheInfo().name_,
             "envoy.extensions.http.cache.file_system_http_cache");
+  auto cache_sessions = factory->getCache(cache_config, factory_context);
   // Verify that the config path got a / suffixed onto it.
-  EXPECT_EQ(std::dynamic_pointer_cast<FileSystemHttpCache>(
-                factory->getCache(cache_config, factory_context))
-                ->config()
-                .cache_path(),
+  EXPECT_EQ(dynamic_cast<FileSystemHttpCache&>(cache_sessions->cache()).config().cache_path(),
             "/tmp/");
 }
 
diff --git a/test/extensions/http/cache/simple_http_cache/simple_http_cache_test.cc b/test/extensions/http/cache/simple_http_cache/simple_http_cache_test.cc
index 3bb376ac41ea5..65bd2241c7a80 100644
--- a/test/extensions/http/cache/simple_http_cache/simple_http_cache_test.cc
+++ b/test/extensions/http/cache/simple_http_cache/simple_http_cache_test.cc
@@ -4,6 +4,7 @@
 #include "source/common/buffer/buffer_impl.h"
 #include "source/extensions/filters/http/cache/cache_entry_utils.h"
 #include "source/extensions/filters/http/cache/cache_headers_utils.h"
+#include "source/extensions/filters/http/cache/cache_sessions.h"
 #include "source/extensions/http/cache/simple_http_cache/simple_http_cache.h"
 
 #include "test/extensions/filters/http/cache/http_cache_implementation_test_common.h"
@@ -21,11 +22,10 @@ namespace {
 
 class SimpleHttpCacheTestDelegate : public HttpCacheTestDelegate {
 public:
-  std::shared_ptr<HttpCache> cache() override { return cache_; }
-  bool validationEnabled() const override { return true; }
+  HttpCache& cache() override { return cache_; }
 
 private:
-  std::shared_ptr<SimpleHttpCache> cache_ = std::make_shared<SimpleHttpCache>();
+  SimpleHttpCache cache_;
 };
 
 INSTANTIATE_TEST_SUITE_P(SimpleHttpCacheTest, HttpCacheImplementationTest,