diff --git a/.changesets/feat_garypen_next_backpressure.md b/.changesets/feat_garypen_next_backpressure.md
new file mode 100644
index 0000000000..c39b13f406
--- /dev/null
+++ b/.changesets/feat_garypen_next_backpressure.md
@@ -0,0 +1,9 @@
+### Enabling back-pressure in the request processing pipeline ([PR #6486](https://github.com/apollographql/router/pull/6486))
+
+In Router 1.x, back-pressure was not maintained. Requests would be accepted by the router. This could cause issue for routers which were accepting high levels of traffic.
+
+We are now improving the handling of back-pressure so that traffic shaping measures are more effective and integration with telemetry is improved. In particular, this means that telemetry events will not be lost due to traffic shaping and that traffic shaping now works more precisely. This will make the behaviour of the router more predictable.
+
+For more details about how these improvements effect the router please refer to the [migrating from 1.x guide](reference/migration/from-router-v1.mdx).
+
+By [@garypen](https://github.com/garypen) in https://github.com/apollographql/router/pull/6486
diff --git a/apollo-router/src/axum_factory/tests.rs b/apollo-router/src/axum_factory/tests.rs
index 9f7e2582fb..75344cacb3 100644
--- a/apollo-router/src/axum_factory/tests.rs
+++ b/apollo-router/src/axum_factory/tests.rs
@@ -1,4 +1,3 @@
-use std::collections::HashMap;
 use std::io;
 use std::net::SocketAddr;
 use std::pin::Pin;
@@ -67,28 +66,18 @@ use crate::graphql;
 use crate::http_server_factory::HttpServerFactory;
 use crate::http_server_factory::HttpServerHandle;
 use crate::json_ext::Path;
-use crate::plugin::test::MockSubgraph;
-use crate::query_planner::QueryPlannerService;
-use crate::router_factory::create_plugins;
 use crate::router_factory::Endpoint;
 use crate::router_factory::RouterFactory;
-use crate::services::execution;
-use crate::services::layers::persisted_queries::PersistedQueryLayer;
-use crate::services::layers::query_analysis::QueryAnalysisLayer;
 use crate::services::layers::static_page::home_page_content;
 use crate::services::layers::static_page::sandbox_page_content;
 use crate::services::new_service::ServiceFactory;
 use crate::services::router;
-use crate::services::router::service::RouterCreator;
 use crate::services::supergraph;
-use crate::services::HasSchema;
-use crate::services::PluggableSupergraphServiceBuilder;
 use crate::services::RouterRequest;
 use crate::services::RouterResponse;
 use crate::services::SupergraphResponse;
 use crate::services::MULTIPART_DEFER_ACCEPT;
 use crate::services::MULTIPART_DEFER_CONTENT_TYPE;
-use crate::spec::Schema;
 use crate::test_harness::http_client;
 use crate::test_harness::http_client::MaybeMultipart;
 use crate::uplink::license_enforcement::LicenseState;
@@ -2406,114 +2395,3 @@ async fn test_supergraph_and_health_check_same_port_different_listener() {
         error.to_string()
     );
 }
-
-#[tokio::test]
-async fn test_supergraph_timeout() {
-    let config = serde_json::json!({
-        "supergraph": {
-            "listen": "127.0.0.1:0",
-            "defer_support": false,
-        },
-        "traffic_shaping": {
-            "router": {
-                "timeout": "1ns"
-            }
-        },
-    });
-
-    let conf: Arc<Configuration> = Arc::new(serde_json::from_value(config).unwrap());
-
-    let schema = include_str!("..//testdata/minimal_supergraph.graphql");
-    let schema = Arc::new(Schema::parse(schema, &conf).unwrap());
-    let planner = QueryPlannerService::new(schema.clone(), conf.clone())
-        .await
-        .unwrap();
-
-    // we do the entire supergraph rebuilding instead of using `from_supergraph_mock_callback_and_configuration`
-    // because we need the plugins to apply on the supergraph
-    let subgraph_schemas = Arc::new(
-        planner
-            .subgraph_schemas()
-            .iter()
-            .map(|(k, v)| (k.clone(), v.schema.clone()))
-            .collect(),
-    );
-    let mut plugins = create_plugins(&conf, &schema, subgraph_schemas, None, None)
-        .await
-        .unwrap();
-
-    plugins.insert("delay".into(), Box::new(Delay));
-
-    struct Delay;
-
-    #[async_trait::async_trait]
-    impl crate::plugin::Plugin for Delay {
-        type Config = ();
-
-        async fn new(_: crate::plugin::PluginInit<()>) -> Result<Self, BoxError> {
-            Ok(Self)
-        }
-
-        fn execution_service(&self, service: execution::BoxService) -> execution::BoxService {
-            service
-                .map_future(|fut| async {
-                    tokio::time::sleep(Duration::from_millis(10)).await;
-                    fut.await
-                })
-                .boxed()
-        }
-    }
-
-    let builder = PluggableSupergraphServiceBuilder::new(planner)
-        .with_configuration(conf.clone())
-        .with_subgraph_service("accounts", MockSubgraph::new(HashMap::new()));
-
-    let supergraph_creator = builder
-        .with_plugins(Arc::new(plugins))
-        .build()
-        .await
-        .unwrap();
-
-    let service = RouterCreator::new(
-        QueryAnalysisLayer::new(supergraph_creator.schema(), Arc::clone(&conf)).await,
-        Arc::new(PersistedQueryLayer::new(&conf).await.unwrap()),
-        Arc::new(supergraph_creator),
-        conf.clone(),
-    )
-    .await
-    .unwrap()
-    .make();
-
-    // keep the server handle around otherwise it will immediately shutdown
-    let (server, client) = init_with_config(service, conf.clone(), MultiMap::new())
-        .await
-        .unwrap();
-    let url = server
-        .graphql_listen_address()
-        .as_ref()
-        .unwrap()
-        .to_string();
-
-    let response = client
-        .post(url)
-        .body(r#"{ "query": "{ me }" }"#)
-        .send()
-        .await
-        .unwrap();
-
-    assert_eq!(response.status(), StatusCode::GATEWAY_TIMEOUT);
-
-    let body = response.bytes().await.unwrap();
-    let body: serde_json::Value = serde_json::from_slice(&body).unwrap();
-    assert_eq!(
-        body,
-        json!({
-             "errors": [{
-                 "message": "Request timed out",
-                 "extensions": {
-                     "code": "REQUEST_TIMEOUT"
-                 }
-             }]
-        })
-    );
-}
diff --git a/apollo-router/src/batching.rs b/apollo-router/src/batching.rs
index 10c65916a3..158de76f27 100644
--- a/apollo-router/src/batching.rs
+++ b/apollo-router/src/batching.rs
@@ -281,7 +281,7 @@ impl Batch {
                                 request, sender, ..
                             } in cancelled_requests
                             {
-                                let subgraph_name = request.subgraph_name.ok_or(SubgraphBatchingError::MissingSubgraphName)?;
+                                let subgraph_name = request.subgraph_name;
                                 if let Err(log_error) = sender.send(Err(Box::new(FetchError::SubrequestBatchingError {
                                         service: subgraph_name.clone(),
                                         reason: format!("request cancelled: {reason}"),
@@ -365,7 +365,7 @@ impl Batch {
                 sender: tx,
             } in all_in_one
             {
-                let subgraph_name = sg_request.subgraph_name.clone().ok_or(SubgraphBatchingError::MissingSubgraphName)?;
+                let subgraph_name = sg_request.subgraph_name.clone();
                 let value = svc_map
                     .entry(
                         subgraph_name,
@@ -583,7 +583,7 @@ mod tests {
                     .body(graphql::Response::builder().data(data.clone()).build())
                     .unwrap(),
                 context: Context::new(),
-                subgraph_name: None,
+                subgraph_name: String::default(),
                 id: SubgraphRequestId(String::new()),
             };
 
diff --git a/apollo-router/src/configuration/snapshots/apollo_router__configuration__tests__schema_generation.snap b/apollo-router/src/configuration/snapshots/apollo_router__configuration__tests__schema_generation.snap
index ac302d97c3..c2cea5d9ad 100644
--- a/apollo-router/src/configuration/snapshots/apollo_router__configuration__tests__schema_generation.snap
+++ b/apollo-router/src/configuration/snapshots/apollo_router__configuration__tests__schema_generation.snap
@@ -5380,6 +5380,13 @@ expression: "&schema"
     "RouterShaping": {
       "additionalProperties": false,
       "properties": {
+        "concurrency_limit": {
+          "description": "The global concurrency limit",
+          "format": "uint",
+          "minimum": 0.0,
+          "nullable": true,
+          "type": "integer"
+        },
         "global_rate_limit": {
           "$ref": "#/definitions/RateLimitConf",
           "description": "#/definitions/RateLimitConf",
diff --git a/apollo-router/src/error.rs b/apollo-router/src/error.rs
index 7b550fc79c..df8b41ff54 100644
--- a/apollo-router/src/error.rs
+++ b/apollo-router/src/error.rs
@@ -630,8 +630,6 @@ impl std::fmt::Display for ValidationErrors {
 pub(crate) enum SubgraphBatchingError {
     /// Sender unavailable
     SenderUnavailable,
-    /// Request does not have a subgraph name
-    MissingSubgraphName,
     /// Requests is empty
     RequestsIsEmpty,
     /// Batch processing failed: {0}
diff --git a/apollo-router/src/layers/async_checkpoint.rs b/apollo-router/src/layers/async_checkpoint.rs
index 5cd45e1713..f4c0d744cc 100644
--- a/apollo-router/src/layers/async_checkpoint.rs
+++ b/apollo-router/src/layers/async_checkpoint.rs
@@ -13,14 +13,12 @@ use std::marker::PhantomData;
 use std::ops::ControlFlow;
 use std::pin::Pin;
 use std::sync::Arc;
-use std::task::Poll;
 
 use futures::future::BoxFuture;
 use futures::Future;
 use tower::BoxError;
 use tower::Layer;
 use tower::Service;
-use tower::ServiceExt;
 
 /// [`Layer`] for Asynchronous Checkpoints. See [`ServiceBuilderExt::checkpoint_async()`](crate::layers::ServiceBuilderExt::checkpoint_async()).
 #[allow(clippy::type_complexity)]
@@ -63,86 +61,11 @@ where
     fn layer(&self, service: S) -> Self::Service {
         AsyncCheckpointService {
             checkpoint_fn: Arc::clone(&self.checkpoint_fn),
-            inner: service,
+            service,
         }
     }
 }
 
-/// [`Service`] for OneShot (single use) Asynchronous Checkpoints. See [`ServiceBuilderExt::oneshot_checkpoint_async()`](crate::layers::ServiceBuilderExt::oneshot_checkpoint_async()).
-#[allow(clippy::type_complexity)]
-pub struct OneShotAsyncCheckpointService<S, Fut, Request>
-where
-    Request: Send + 'static,
-    S: Service<Request, Error = BoxError> + Send + 'static,
-    <S as Service<Request>>::Response: Send + 'static,
-    <S as Service<Request>>::Future: Send + 'static,
-    Fut: Future<Output = Result<ControlFlow<<S as Service<Request>>::Response, Request>, BoxError>>,
-{
-    inner: Option<S>,
-    checkpoint_fn: Arc<Pin<Box<dyn Fn(Request) -> Fut + Send + Sync + 'static>>>,
-}
-
-impl<S, Fut, Request> OneShotAsyncCheckpointService<S, Fut, Request>
-where
-    Request: Send + 'static,
-    S: Service<Request, Error = BoxError> + Send + 'static,
-    <S as Service<Request>>::Response: Send + 'static,
-    <S as Service<Request>>::Future: Send + 'static,
-    Fut: Future<Output = Result<ControlFlow<<S as Service<Request>>::Response, Request>, BoxError>>,
-{
-    /// Create an `OneShotAsyncCheckpointLayer` from a function that takes a Service Request and returns a `ControlFlow`
-    pub fn new<F>(checkpoint_fn: F, service: S) -> Self
-    where
-        F: Fn(Request) -> Fut + Send + Sync + 'static,
-    {
-        Self {
-            checkpoint_fn: Arc::new(Box::pin(checkpoint_fn)),
-            inner: Some(service),
-        }
-    }
-}
-
-impl<S, Fut, Request> Service<Request> for OneShotAsyncCheckpointService<S, Fut, Request>
-where
-    Request: Send + 'static,
-    S: Service<Request, Error = BoxError> + Send + 'static,
-    <S as Service<Request>>::Response: Send + 'static,
-    <S as Service<Request>>::Future: Send + 'static,
-    Fut: Future<Output = Result<ControlFlow<<S as Service<Request>>::Response, Request>, BoxError>>
-        + Send
-        + 'static,
-{
-    type Response = <S as Service<Request>>::Response;
-
-    type Error = BoxError;
-
-    type Future = BoxFuture<'static, Result<Self::Response, Self::Error>>;
-
-    fn poll_ready(
-        &mut self,
-        cx: &mut std::task::Context<'_>,
-    ) -> std::task::Poll<Result<(), Self::Error>> {
-        // Return an error if we no longer have an inner service
-        match self.inner.as_mut() {
-            Some(inner) => inner.poll_ready(cx),
-            None => Poll::Ready(Err("One shot must only be called once".into())),
-        }
-    }
-
-    fn call(&mut self, req: Request) -> Self::Future {
-        let checkpoint_fn = Arc::clone(&self.checkpoint_fn);
-        let inner = self.inner.take();
-        Box::pin(async move {
-            let inner = inner.ok_or("One shot must only be called once")?;
-            match (checkpoint_fn)(req).await {
-                Ok(ControlFlow::Break(response)) => Ok(response),
-                Ok(ControlFlow::Continue(request)) => inner.oneshot(request).await,
-                Err(error) => Err(error),
-            }
-        })
-    }
-}
-
 /// [`Service`] for Asynchronous Checkpoints. See [`ServiceBuilderExt::checkpoint_async()`](crate::layers::ServiceBuilderExt::checkpoint_async()).
 #[allow(clippy::type_complexity)]
 pub struct AsyncCheckpointService<S, Fut, Request>
@@ -153,7 +76,7 @@ where
     <S as Service<Request>>::Future: Send + 'static,
     Fut: Future<Output = Result<ControlFlow<<S as Service<Request>>::Response, Request>, BoxError>>,
 {
-    inner: S,
+    service: S,
     checkpoint_fn: Arc<Pin<Box<dyn Fn(Request) -> Fut + Send + Sync + 'static>>>,
 }
 
@@ -172,7 +95,7 @@ where
     {
         Self {
             checkpoint_fn: Arc::new(Box::pin(checkpoint_fn)),
-            inner: service,
+            service,
         }
     }
 }
@@ -197,68 +120,24 @@ where
         &mut self,
         cx: &mut std::task::Context<'_>,
     ) -> std::task::Poll<Result<(), Self::Error>> {
-        self.inner.poll_ready(cx)
+        self.service.poll_ready(cx)
     }
 
     fn call(&mut self, req: Request) -> Self::Future {
         let checkpoint_fn = Arc::clone(&self.checkpoint_fn);
-        let inner = self.inner.clone();
+        let service = self.service.clone();
+        let mut inner = std::mem::replace(&mut self.service, service);
+
         Box::pin(async move {
             match (checkpoint_fn)(req).await {
                 Ok(ControlFlow::Break(response)) => Ok(response),
-                Ok(ControlFlow::Continue(request)) => inner.oneshot(request).await,
+                Ok(ControlFlow::Continue(request)) => inner.call(request).await,
                 Err(error) => Err(error),
             }
         })
     }
 }
 
-/// [`Layer`] for OneShot (single use) Asynchronous Checkpoints. See [`ServiceBuilderExt::oneshot_checkpoint_async()`](crate::layers::ServiceBuilderExt::oneshot_checkpoint_async()).
-#[allow(clippy::type_complexity)]
-pub struct OneShotAsyncCheckpointLayer<S, Fut, Request>
-where
-    S: Service<Request, Error = BoxError> + Send + 'static,
-    Fut: Future<Output = Result<ControlFlow<<S as Service<Request>>::Response, Request>, BoxError>>,
-{
-    checkpoint_fn: Arc<Pin<Box<dyn Fn(Request) -> Fut + Send + Sync + 'static>>>,
-    phantom: PhantomData<S>, // We use PhantomData because the compiler can't detect that S is used in the Future.
-}
-
-impl<S, Fut, Request> OneShotAsyncCheckpointLayer<S, Fut, Request>
-where
-    S: Service<Request, Error = BoxError> + Send + 'static,
-    Fut: Future<Output = Result<ControlFlow<<S as Service<Request>>::Response, Request>, BoxError>>,
-{
-    /// Create an `OneShotAsyncCheckpointLayer` from a function that takes a Service Request and returns a `ControlFlow`
-    pub fn new<F>(checkpoint_fn: F) -> Self
-    where
-        F: Fn(Request) -> Fut + Send + Sync + 'static,
-    {
-        Self {
-            checkpoint_fn: Arc::new(Box::pin(checkpoint_fn)),
-            phantom: PhantomData,
-        }
-    }
-}
-
-impl<S, Fut, Request> Layer<S> for OneShotAsyncCheckpointLayer<S, Fut, Request>
-where
-    S: Service<Request, Error = BoxError> + Send + 'static,
-    <S as Service<Request>>::Future: Send,
-    Request: Send + 'static,
-    <S as Service<Request>>::Response: Send + 'static,
-    Fut: Future<Output = Result<ControlFlow<<S as Service<Request>>::Response, Request>, BoxError>>,
-{
-    type Service = OneShotAsyncCheckpointService<S, Fut, Request>;
-
-    fn layer(&self, service: S) -> Self::Service {
-        OneShotAsyncCheckpointService {
-            checkpoint_fn: Arc::clone(&self.checkpoint_fn),
-            inner: Some(service),
-        }
-    }
-}
-
 #[cfg(test)]
 mod async_checkpoint_tests {
     use tower::BoxError;
@@ -277,21 +156,21 @@ mod async_checkpoint_tests {
         let expected_label = "from_mock_service";
 
         let mut execution_service = MockExecutionService::new();
-        execution_service.expect_clone().return_once(move || {
-            let mut execution_service = MockExecutionService::new();
-            execution_service
-                .expect_call()
-                .times(1)
-                .returning(move |req: ExecutionRequest| {
-                    Ok(ExecutionResponse::fake_builder()
-                        .label(expected_label.to_string())
-                        .context(req.context)
-                        .build()
-                        .unwrap())
-                });
-
-            execution_service
-        });
+
+        execution_service
+            .expect_clone()
+            .return_once(MockExecutionService::new);
+
+        execution_service
+            .expect_call()
+            .times(1)
+            .returning(move |req| {
+                Ok(ExecutionResponse::fake_builder()
+                    .label(expected_label.to_string())
+                    .context(req.context)
+                    .build()
+                    .unwrap())
+            });
 
         let service_stack = ServiceBuilder::new()
             .checkpoint_async(|req: ExecutionRequest| async { Ok(ControlFlow::Continue(req)) })
@@ -317,20 +196,19 @@ mod async_checkpoint_tests {
         let expected_label = "from_mock_service";
         let mut router_service = MockExecutionService::new();
 
-        router_service.expect_clone().return_once(move || {
-            let mut router_service = MockExecutionService::new();
-            router_service
-                .expect_call()
-                .times(1)
-                .returning(move |_req| {
-                    Ok(ExecutionResponse::fake_builder()
-                        .label(expected_label.to_string())
-                        .build()
-                        .unwrap())
-                });
-            router_service
-        });
+        router_service
+            .expect_clone()
+            .return_once(MockExecutionService::new);
 
+        router_service
+            .expect_call()
+            .times(1)
+            .returning(move |_req| {
+                Ok(ExecutionResponse::fake_builder()
+                    .label(expected_label.to_string())
+                    .build()
+                    .unwrap())
+            });
         let service_stack =
             AsyncCheckpointLayer::new(|req| async { Ok(ControlFlow::Continue(req)) })
                 .layer(router_service);
@@ -425,10 +303,12 @@ mod async_checkpoint_tests {
                     .unwrap())
             });
 
+        execution_service
+            .expect_clone()
+            .returning(MockExecutionService::new);
+
         let service_stack = ServiceBuilder::new()
-            .oneshot_checkpoint_async(|req: ExecutionRequest| async {
-                Ok(ControlFlow::Continue(req))
-            })
+            .checkpoint_async(|req: ExecutionRequest| async { Ok(ControlFlow::Continue(req)) })
             .service(execution_service);
 
         let request = ExecutionRequest::fake_builder().build();
@@ -464,9 +344,7 @@ mod async_checkpoint_tests {
             });
 
         let mut service_stack = ServiceBuilder::new()
-            .oneshot_checkpoint_async(|req: ExecutionRequest| async {
-                Ok(ControlFlow::Continue(req))
-            })
+            .checkpoint_async(|req: ExecutionRequest| async { Ok(ControlFlow::Continue(req)) })
             .buffered()
             .service(execution_service);
 
@@ -479,95 +357,14 @@ mod async_checkpoint_tests {
     }
 
     #[tokio::test]
-    async fn test_continue_oneshot() {
-        let expected_label = "from_mock_service";
+    async fn test_double_ready_doesnt_panic() {
         let mut router_service = MockExecutionService::new();
-        router_service
-            .expect_call()
-            .times(1)
-            .returning(move |_req| {
-                Ok(ExecutionResponse::fake_builder()
-                    .label(expected_label.to_string())
-                    .build()
-                    .unwrap())
-            });
-
-        let service_stack =
-            OneShotAsyncCheckpointLayer::new(|req| async { Ok(ControlFlow::Continue(req)) })
-                .layer(router_service);
-
-        let request = ExecutionRequest::fake_builder().build();
-
-        let actual_label = service_stack
-            .oneshot(request)
-            .await
-            .unwrap()
-            .next_response()
-            .await
-            .unwrap()
-            .label
-            .unwrap();
-
-        assert_eq!(actual_label, expected_label)
-    }
 
-    #[tokio::test]
-    async fn test_return_oneshot() {
-        let expected_label = "returned_before_mock_service";
-        let router_service = MockExecutionService::new();
-
-        let service_stack = OneShotAsyncCheckpointLayer::new(|_req| async {
-            Ok(ControlFlow::Break(
-                ExecutionResponse::fake_builder()
-                    .label("returned_before_mock_service".to_string())
-                    .build()
-                    .unwrap(),
-            ))
-        })
-        .layer(router_service);
-
-        let request = ExecutionRequest::fake_builder().build();
-
-        let actual_label = service_stack
-            .oneshot(request)
-            .await
-            .unwrap()
-            .next_response()
-            .await
-            .unwrap()
-            .label
-            .unwrap();
-
-        assert_eq!(actual_label, expected_label)
-    }
-
-    #[tokio::test]
-    async fn test_error_oneshot() {
-        let expected_error = "checkpoint_error";
-        let router_service = MockExecutionService::new();
-
-        let service_stack = OneShotAsyncCheckpointLayer::new(move |_req| async move {
-            Err(BoxError::from(expected_error))
-        })
-        .layer(router_service);
-
-        let request = ExecutionRequest::fake_builder().build();
-
-        let actual_error = service_stack
-            .oneshot(request)
-            .await
-            .map(|_| unreachable!())
-            .unwrap_err()
-            .to_string();
-
-        assert_eq!(actual_error, expected_error)
-    }
-
-    #[tokio::test]
-    async fn test_double_ready_doesnt_panic() {
-        let router_service = MockExecutionService::new();
+        router_service
+            .expect_clone()
+            .returning(MockExecutionService::new);
 
-        let mut service_stack = OneShotAsyncCheckpointLayer::new(|_req| async {
+        let mut service_stack = AsyncCheckpointLayer::new(|_req| async {
             Ok(ControlFlow::Break(
                 ExecutionResponse::fake_builder()
                     .label("returned_before_mock_service".to_string())
@@ -583,14 +380,20 @@ mod async_checkpoint_tests {
             .await
             .unwrap();
 
-        assert!(service_stack.ready().await.is_err());
+        assert!(service_stack.ready().await.is_ok());
     }
 
     #[tokio::test]
     async fn test_double_call_doesnt_panic() {
-        let router_service = MockExecutionService::new();
+        let mut router_service = MockExecutionService::new();
+
+        router_service.expect_clone().returning(|| {
+            let mut mes = MockExecutionService::new();
+            mes.expect_clone().returning(MockExecutionService::new);
+            mes
+        });
 
-        let mut service_stack = OneShotAsyncCheckpointLayer::new(|_req| async {
+        let mut service_stack = AsyncCheckpointLayer::new(|_req| async {
             Ok(ControlFlow::Break(
                 ExecutionResponse::fake_builder()
                     .label("returned_before_mock_service".to_string())
@@ -607,9 +410,11 @@ mod async_checkpoint_tests {
             .await
             .unwrap();
 
+        service_stack.ready().await.unwrap();
+
         assert!(service_stack
             .call(ExecutionRequest::fake_builder().build())
             .await
-            .is_err());
+            .is_ok());
     }
 }
diff --git a/apollo-router/src/layers/mod.rs b/apollo-router/src/layers/mod.rs
index 8a175639c9..811ce1aabd 100644
--- a/apollo-router/src/layers/mod.rs
+++ b/apollo-router/src/layers/mod.rs
@@ -14,7 +14,6 @@ use self::map_first_graphql_response::MapFirstGraphqlResponseLayer;
 use self::map_first_graphql_response::MapFirstGraphqlResponseService;
 use crate::graphql;
 use crate::layers::async_checkpoint::AsyncCheckpointLayer;
-use crate::layers::async_checkpoint::OneShotAsyncCheckpointLayer;
 use crate::layers::instrument::InstrumentLayer;
 use crate::layers::map_future_with_request_data::MapFutureWithRequestDataLayer;
 use crate::layers::map_future_with_request_data::MapFutureWithRequestDataService;
@@ -28,7 +27,15 @@ pub mod map_first_graphql_response;
 pub mod map_future_with_request_data;
 pub mod sync_checkpoint;
 
-pub(crate) const DEFAULT_BUFFER_SIZE: usize = 20_000;
+// Note: We use Buffer in many places throughout the router. 50_000 represents
+// the "maximal number of requests that can be queued for the buffered
+// service before backpressure is applied to callers". We set this to be
+// so high, 50_000, because we anticipate that many users will want to
+//
+// Think of this as a backstop for when there are no other backpressure
+// enforcing limits configured in a router. In future we may tweak this
+// value higher or lower or expose it as a configurable.
+pub(crate) const DEFAULT_BUFFER_SIZE: usize = 50_000;
 
 /// Extension to the [`ServiceBuilder`] trait to make it easy to add router specific capabilities
 /// (e.g.: checkpoints) to a [`Service`].
@@ -148,63 +155,6 @@ pub trait ServiceBuilderExt<L>: Sized {
         self.layer(AsyncCheckpointLayer::new(async_checkpoint_fn))
     }
 
-    /// Decide if processing should continue or not, and if not allow returning of a response.
-    /// Unlike checkpoint it is possible to perform async operations in the callback. Unlike
-    /// checkpoint_async, this does not require that the service is `Clone` and avoids the
-    /// requiremnent to buffer services.
-    ///
-    /// This is useful for things like authentication where you need to make an external call to
-    /// check if a request should proceed or not.
-    ///
-    /// # Arguments
-    ///
-    /// * `async_checkpoint_fn`: The asynchronous callback to decide if processing should continue or not.
-    ///
-    /// returns: ServiceBuilder<Stack<OneShotAsyncCheckpointLayer<S, Request>, L>>
-    ///
-    /// # Examples
-    ///
-    /// ```rust
-    /// # use std::ops::ControlFlow;
-    /// use futures::FutureExt;
-    /// # use http::Method;
-    /// # use tower::ServiceBuilder;
-    /// # use tower_service::Service;
-    /// # use tracing::info_span;
-    /// # use apollo_router::services::supergraph;
-    /// # use apollo_router::layers::ServiceBuilderExt;
-    /// # fn test(service: supergraph::BoxService) {
-    /// let _ = ServiceBuilder::new()
-    ///     .oneshot_checkpoint_async(|req: supergraph::Request|
-    ///         async {
-    ///             if req.supergraph_request.method() == Method::GET {
-    ///                 Ok(ControlFlow::Break(supergraph::Response::builder()
-    ///                     .data("Only get requests allowed")
-    ///                     .context(req.context)
-    ///                     .build()?))
-    ///             } else {
-    ///                 Ok(ControlFlow::Continue(req))
-    ///             }
-    ///         }
-    ///         .boxed()
-    ///     )
-    ///     .service(service);
-    /// # }
-    /// ```
-    fn oneshot_checkpoint_async<F, S, Fut, Request>(
-        self,
-        async_checkpoint_fn: F,
-    ) -> ServiceBuilder<Stack<OneShotAsyncCheckpointLayer<S, Fut, Request>, L>>
-    where
-        S: Service<Request, Error = BoxError> + Send + 'static,
-        Fut: Future<
-            Output = Result<ControlFlow<<S as Service<Request>>::Response, Request>, BoxError>,
-        >,
-        F: Fn(Request) -> Fut + Send + Sync + 'static,
-    {
-        self.layer(OneShotAsyncCheckpointLayer::new(async_checkpoint_fn))
-    }
-
     /// Adds a buffer to the service stack with a default size.
     ///
     /// This is useful for making services `Clone` and `Send`
diff --git a/apollo-router/src/plugin/test/mock/subgraph.rs b/apollo-router/src/plugin/test/mock/subgraph.rs
index ee38234606..465a2ffb30 100644
--- a/apollo-router/src/plugin/test/mock/subgraph.rs
+++ b/apollo-router/src/plugin/test/mock/subgraph.rs
@@ -165,11 +165,11 @@ impl Service<SubgraphRequest> for MockSubgraph {
         }
         let body = req.subgraph_request.body_mut();
 
+        let subscription_stream = self.subscription_stream.clone();
         if let Some(sub_stream) = &mut req.subscription_stream {
             sub_stream
                 .try_send(Box::pin(
-                    self.subscription_stream
-                        .take()
+                    subscription_stream
                         .expect("must have a subscription stream set")
                         .into_stream(),
                 ))
@@ -227,6 +227,7 @@ impl Service<SubgraphRequest> for MockSubgraph {
             SubgraphResponse::fake_builder()
                 .error(error)
                 .context(req.context)
+                .subgraph_name(req.subgraph_name.clone())
                 .id(req.id)
                 .build()
         };
diff --git a/apollo-router/src/plugins/authentication/subgraph.rs b/apollo-router/src/plugins/authentication/subgraph.rs
index 8e0d2ca1e9..7425c5dbbd 100644
--- a/apollo-router/src/plugins/authentication/subgraph.rs
+++ b/apollo-router/src/plugins/authentication/subgraph.rs
@@ -811,7 +811,7 @@ mod test {
         Ok(SubgraphResponse::new_from_response(
             http::Response::default(),
             Context::new(),
-            req.subgraph_name.unwrap_or_else(|| String::from("test")),
+            req.subgraph_name,
             SubgraphRequestId(String::new()),
         ))
     }
@@ -842,6 +842,7 @@ mod test {
             )
             .operation_kind(OperationKind::Query)
             .context(Context::new())
+            .subgraph_name(String::default())
             .build()
     }
 
diff --git a/apollo-router/src/plugins/authorization/mod.rs b/apollo-router/src/plugins/authorization/mod.rs
index 98d065c7ee..92c4d55e43 100644
--- a/apollo-router/src/plugins/authorization/mod.rs
+++ b/apollo-router/src/plugins/authorization/mod.rs
@@ -550,6 +550,8 @@ impl Plugin for AuthorizationPlugin {
         if self.require_authentication {
             ServiceBuilder::new()
                 .checkpoint(move |request: supergraph::Request| {
+                    // XXX(@goto-bus-stop): Why are we doing this here, as opposed to the
+                    // authentication plugin, which manages this context value?
                     if request
                         .context
                         .contains_key(APOLLO_AUTHENTICATION_JWT_CLAIMS)
diff --git a/apollo-router/src/plugins/cache/entity.rs b/apollo-router/src/plugins/cache/entity.rs
index 9b19703b0f..ba78cae52c 100644
--- a/apollo-router/src/plugins/cache/entity.rs
+++ b/apollo-router/src/plugins/cache/entity.rs
@@ -45,6 +45,7 @@ use crate::graphql::Error;
 use crate::json_ext::Object;
 use crate::json_ext::Path;
 use crate::json_ext::PathElement;
+use crate::layers::ServiceBuilderExt;
 use crate::plugin::Plugin;
 use crate::plugin::PluginInit;
 use crate::plugins::authorization::CacheKeyMetadata;
@@ -390,8 +391,11 @@ impl Plugin for EntityCache {
 
                     response
                 })
-                .service(CacheService(Some(InnerCacheService {
-                    service,
+                .service(CacheService {
+                    service: ServiceBuilder::new()
+                        .buffered()
+                        .service(service)
+                        .boxed_clone(),
                     entity_type: self.entity_type.clone(),
                     name: name.to_string(),
                     storage,
@@ -400,7 +404,7 @@ impl Plugin for EntityCache {
                     private_id,
                     invalidation: self.invalidation.clone(),
                     expose_keys_in_context: self.expose_keys_in_context,
-                })));
+                });
             tower::util::BoxService::new(inner)
         } else {
             ServiceBuilder::new()
@@ -498,9 +502,9 @@ impl EntityCache {
     }
 }
 
-struct CacheService(Option<InnerCacheService>);
-struct InnerCacheService {
-    service: subgraph::BoxService,
+#[derive(Clone)]
+struct CacheService {
+    service: subgraph::BoxCloneService,
     name: String,
     entity_type: Option<String>,
     storage: RedisCacheStorage,
@@ -520,21 +524,18 @@ impl Service<subgraph::Request> for CacheService {
         &mut self,
         cx: &mut std::task::Context<'_>,
     ) -> std::task::Poll<Result<(), Self::Error>> {
-        match &mut self.0 {
-            Some(s) => s.service.poll_ready(cx),
-            None => panic!("service should have been called only once"),
-        }
+        self.service.poll_ready(cx)
     }
 
     fn call(&mut self, request: subgraph::Request) -> Self::Future {
-        match self.0.take() {
-            None => panic!("service should have been called only once"),
-            Some(s) => Box::pin(s.call_inner(request)),
-        }
+        let clone = self.clone();
+        let inner = std::mem::replace(self, clone);
+
+        Box::pin(inner.call_inner(request))
     }
 }
 
-impl InnerCacheService {
+impl CacheService {
     async fn call_inner(
         mut self,
         request: subgraph::Request,
@@ -588,9 +589,7 @@ impl InnerCacheService {
                     ControlFlow::Break(response) => {
                         cache_hit.insert("Query".to_string(), CacheHitMiss { hit: 1, miss: 0 });
                         let _ = response.context.insert(
-                            CacheMetricContextKey::new(
-                                response.subgraph_name.clone().unwrap_or_default(),
-                            ),
+                            CacheMetricContextKey::new(response.subgraph_name.clone()),
                             CacheSubgraph(cache_hit),
                         );
                         Ok(response)
@@ -598,9 +597,7 @@ impl InnerCacheService {
                     ControlFlow::Continue((request, mut root_cache_key)) => {
                         cache_hit.insert("Query".to_string(), CacheHitMiss { hit: 0, miss: 1 });
                         let _ = request.context.insert(
-                            CacheMetricContextKey::new(
-                                request.subgraph_name.clone().unwrap_or_default(),
-                            ),
+                            CacheMetricContextKey::new(request.subgraph_name.clone()),
                             CacheSubgraph(cache_hit),
                         );
 
@@ -740,6 +737,7 @@ impl InnerCacheService {
                                 .context(context)
                                 .data(Value::Object(data))
                                 .errors(new_errors)
+                                .subgraph_name(self.name)
                                 .extensions(Object::new())
                                 .build();
                             CacheControl::no_store().to_headers(response.response.headers_mut())?;
@@ -908,7 +906,7 @@ async fn cache_lookup_root(
                     .data(value.0.data)
                     .extensions(Object::new())
                     .context(request.context)
-                    .and_subgraph_name(request.subgraph_name.clone())
+                    .subgraph_name(request.subgraph_name.clone())
                     .build();
 
                 value
@@ -1035,7 +1033,7 @@ async fn cache_lookup_entities(
         let mut response = subgraph::Response::builder()
             .data(data)
             .extensions(Object::new())
-            .and_subgraph_name(request.subgraph_name)
+            .subgraph_name(request.subgraph_name)
             .context(request.context)
             .build();
 
diff --git a/apollo-router/src/plugins/cache/metrics.rs b/apollo-router/src/plugins/cache/metrics.rs
index caff0ee738..b7c1d85489 100644
--- a/apollo-router/src/plugins/cache/metrics.rs
+++ b/apollo-router/src/plugins/cache/metrics.rs
@@ -1,5 +1,6 @@
 use std::collections::HashMap;
 use std::sync::Arc;
+use std::task::Poll;
 use std::time::Duration;
 use std::time::Instant;
 
@@ -8,18 +9,20 @@ use http::header;
 use parking_lot::Mutex;
 use serde_json_bytes::Value;
 use tower::BoxError;
+use tower::ServiceBuilder;
+use tower::ServiceExt;
 use tower_service::Service;
 
 use super::entity::hash_query;
 use super::entity::hash_vary_headers;
 use super::entity::Ttl;
 use super::entity::REPRESENTATIONS;
+use crate::layers::ServiceBuilderExt;
 use crate::services::subgraph;
 use crate::spec::TYPENAME;
 
 pub(crate) const CACHE_INFO_SUBGRAPH_CONTEXT_KEY: &str =
     "apollo::router::entity_cache_info_subgraph";
-pub(crate) struct CacheMetricsService(Option<InnerCacheMetricsService>);
 
 impl CacheMetricsService {
     pub(crate) fn create(
@@ -28,19 +31,23 @@ impl CacheMetricsService {
         ttl: Option<&Ttl>,
         separate_per_type: bool,
     ) -> subgraph::BoxService {
-        tower::util::BoxService::new(CacheMetricsService(Some(InnerCacheMetricsService {
-            service,
+        tower::util::BoxService::new(CacheMetricsService {
+            service: ServiceBuilder::new()
+                .buffered()
+                .service(service)
+                .boxed_clone(),
             name: Arc::new(name),
             counter: Some(Arc::new(Mutex::new(CacheCounter::new(
                 ttl.map(|t| t.0).unwrap_or_else(|| Duration::from_secs(60)),
                 separate_per_type,
             )))),
-        })))
+        })
     }
 }
 
-pub(crate) struct InnerCacheMetricsService {
-    service: subgraph::BoxService,
+#[derive(Clone)]
+pub(crate) struct CacheMetricsService {
+    service: subgraph::BoxCloneService,
     name: Arc<String>,
     counter: Option<Arc<Mutex<CacheCounter>>>,
 }
@@ -52,30 +59,27 @@ impl Service<subgraph::Request> for CacheMetricsService {
 
     fn poll_ready(
         &mut self,
-        cx: &mut std::task::Context<'_>,
+        _cx: &mut std::task::Context<'_>,
     ) -> std::task::Poll<Result<(), Self::Error>> {
-        match &mut self.0 {
-            Some(s) => s.service.poll_ready(cx),
-            None => panic!("service should have been called only once"),
-        }
+        Poll::Ready(Ok(()))
     }
 
     fn call(&mut self, request: subgraph::Request) -> Self::Future {
-        match self.0.take() {
-            None => panic!("service should have been called only once"),
-            Some(s) => Box::pin(s.call_inner(request)),
-        }
+        let clone = self.clone();
+        let inner = std::mem::replace(self, clone);
+
+        Box::pin(inner.call_inner(request))
     }
 }
 
-impl InnerCacheMetricsService {
+impl CacheMetricsService {
     async fn call_inner(
         mut self,
         mut request: subgraph::Request,
     ) -> Result<subgraph::Response, BoxError> {
         let cache_attributes = Self::get_cache_attributes(&mut request);
 
-        let response = self.service.call(request).await?;
+        let response = self.service.ready().await?.call(request).await?;
 
         if let Some(cache_attributes) = cache_attributes {
             if let Some(counter) = &self.counter {
diff --git a/apollo-router/src/plugins/cache/tests.rs b/apollo-router/src/plugins/cache/tests.rs
index 3633a21bcc..49bd1e9a2e 100644
--- a/apollo-router/src/plugins/cache/tests.rs
+++ b/apollo-router/src/plugins/cache/tests.rs
@@ -10,6 +10,7 @@ use fred::prelude::RedisValue;
 use http::header::CACHE_CONTROL;
 use http::HeaderValue;
 use parking_lot::Mutex;
+use tower::Service;
 use tower::ServiceExt;
 
 use super::entity::EntityCache;
@@ -428,7 +429,7 @@ async fn private() {
         .await
         .unwrap();
 
-    let service = TestHarness::builder()
+    let mut service = TestHarness::builder()
         .configuration_json(serde_json::json!({"include_subgraph_errors": { "all": true } }))
         .unwrap()
         .schema(SCHEMA)
@@ -446,7 +447,7 @@ async fn private() {
         .context(context)
         .build()
         .unwrap();
-    let mut response = service.clone().oneshot(request).await.unwrap();
+    let mut response = service.ready().await.unwrap().call(request).await.unwrap();
     let cache_keys: CacheKeysContext = response.context.get(CONTEXT_CACHE_KEYS).unwrap().unwrap();
     let mut cache_keys: Vec<CacheKeyContext> = cache_keys.into_values().flatten().collect();
     cache_keys.sort();
@@ -457,7 +458,7 @@ async fn private() {
 
     println!("\nNOW WITHOUT SUBGRAPHS\n");
     // Now testing without any mock subgraphs, all the data should come from the cache
-    let service = TestHarness::builder()
+    let mut service = TestHarness::builder()
         .configuration_json(serde_json::json!({"include_subgraph_errors": { "all": true } }))
         .unwrap()
         .schema(SCHEMA)
@@ -474,7 +475,7 @@ async fn private() {
         .context(context)
         .build()
         .unwrap();
-    let mut response = service.clone().oneshot(request).await.unwrap();
+    let mut response = service.ready().await.unwrap().call(request).await.unwrap();
     let cache_keys: CacheKeysContext = response.context.get(CONTEXT_CACHE_KEYS).unwrap().unwrap();
     let mut cache_keys: Vec<CacheKeyContext> = cache_keys.into_values().flatten().collect();
     cache_keys.sort();
@@ -493,7 +494,7 @@ async fn private() {
         .context(context)
         .build()
         .unwrap();
-    let mut response = service.clone().oneshot(request).await.unwrap();
+    let mut response = service.ready().await.unwrap().call(request).await.unwrap();
     assert!(response
         .context
         .get::<_, CacheKeysContext>(CONTEXT_CACHE_KEYS)
diff --git a/apollo-router/src/plugins/coprocessor/execution.rs b/apollo-router/src/plugins/coprocessor/execution.rs
index 9f6b6d0a27..8e3eb37b1a 100644
--- a/apollo-router/src/plugins/coprocessor/execution.rs
+++ b/apollo-router/src/plugins/coprocessor/execution.rs
@@ -12,7 +12,7 @@ use tower_service::Service;
 
 use super::*;
 use crate::graphql;
-use crate::layers::async_checkpoint::OneShotAsyncCheckpointLayer;
+use crate::layers::async_checkpoint::AsyncCheckpointLayer;
 use crate::layers::ServiceBuilderExt;
 use crate::plugins::coprocessor::EXTERNAL_SPAN_NAME;
 use crate::services::execution;
@@ -85,7 +85,7 @@ impl ExecutionStage {
             let http_client = http_client.clone();
             let sdl = sdl.clone();
 
-            OneShotAsyncCheckpointLayer::new(move |request: execution::Request| {
+            AsyncCheckpointLayer::new(move |request: execution::Request| {
                 let request_config = request_config.clone();
                 let coprocessor_url = coprocessor_url.clone();
                 let http_client = http_client.clone();
@@ -176,6 +176,7 @@ impl ExecutionStage {
             .instrument(external_service_span())
             .option_layer(request_layer)
             .option_layer(response_layer)
+            .buffered() // XXX: Added during backpressure fixing
             .service(service)
             .boxed()
     }
diff --git a/apollo-router/src/plugins/coprocessor/mod.rs b/apollo-router/src/plugins/coprocessor/mod.rs
index 355b0c98d9..d15e859b95 100644
--- a/apollo-router/src/plugins/coprocessor/mod.rs
+++ b/apollo-router/src/plugins/coprocessor/mod.rs
@@ -34,7 +34,7 @@ use tower::ServiceExt;
 use crate::configuration::shared::Client;
 use crate::error::Error;
 use crate::graphql;
-use crate::layers::async_checkpoint::OneShotAsyncCheckpointLayer;
+use crate::layers::async_checkpoint::AsyncCheckpointLayer;
 use crate::layers::ServiceBuilderExt;
 use crate::plugin::Plugin;
 use crate::plugin::PluginInit;
@@ -397,7 +397,7 @@ impl RouterStage {
             let http_client = http_client.clone();
             let sdl = sdl.clone();
 
-            OneShotAsyncCheckpointLayer::new(move |request: router::Request| {
+            AsyncCheckpointLayer::new(move |request: router::Request| {
                 let request_config = request_config.clone();
                 let coprocessor_url = coprocessor_url.clone();
                 let http_client = http_client.clone();
@@ -485,6 +485,7 @@ impl RouterStage {
             .instrument(external_service_span())
             .option_layer(request_layer)
             .option_layer(response_layer)
+            .buffered() // XXX: Added during backpressure fixing
             .service(service)
             .boxed()
     }
@@ -534,7 +535,7 @@ impl SubgraphStage {
             let http_client = http_client.clone();
             let coprocessor_url = coprocessor_url.clone();
             let service_name = service_name.clone();
-            OneShotAsyncCheckpointLayer::new(move |request: subgraph::Request| {
+            AsyncCheckpointLayer::new(move |request: subgraph::Request| {
                 let http_client = http_client.clone();
                 let coprocessor_url = coprocessor_url.clone();
                 let service_name = service_name.clone();
@@ -623,6 +624,7 @@ impl SubgraphStage {
             .instrument(external_service_span())
             .option_layer(request_layer)
             .option_layer(response_layer)
+            .buffered() // XXX: Added during backpressure fixing
             .service(service)
             .boxed()
     }
@@ -1095,7 +1097,7 @@ where
             let subgraph_response = subgraph::Response {
                 response: http_response,
                 context: request.context,
-                subgraph_name: Some(subgraph_name),
+                subgraph_name,
                 id: request.id,
             };
 
diff --git a/apollo-router/src/plugins/coprocessor/supergraph.rs b/apollo-router/src/plugins/coprocessor/supergraph.rs
index a34104eaa7..01696a9ad5 100644
--- a/apollo-router/src/plugins/coprocessor/supergraph.rs
+++ b/apollo-router/src/plugins/coprocessor/supergraph.rs
@@ -12,7 +12,7 @@ use tower_service::Service;
 
 use super::*;
 use crate::graphql;
-use crate::layers::async_checkpoint::OneShotAsyncCheckpointLayer;
+use crate::layers::async_checkpoint::AsyncCheckpointLayer;
 use crate::layers::ServiceBuilderExt;
 use crate::plugins::coprocessor::EXTERNAL_SPAN_NAME;
 use crate::plugins::telemetry::config_new::conditions::Condition;
@@ -91,7 +91,7 @@ impl SupergraphStage {
             let http_client = http_client.clone();
             let sdl = sdl.clone();
 
-            OneShotAsyncCheckpointLayer::new(move |request: supergraph::Request| {
+            AsyncCheckpointLayer::new(move |request: supergraph::Request| {
                 let request_config = request_config.clone();
                 let coprocessor_url = coprocessor_url.clone();
                 let http_client = http_client.clone();
@@ -180,6 +180,7 @@ impl SupergraphStage {
             .instrument(external_service_span())
             .option_layer(request_layer)
             .option_layer(response_layer)
+            .buffered() // XXX: Added during backpressure fixing
             .service(service)
             .boxed()
     }
diff --git a/apollo-router/src/plugins/coprocessor/test.rs b/apollo-router/src/plugins/coprocessor/test.rs
index 25185542e5..395930ed7f 100644
--- a/apollo-router/src/plugins/coprocessor/test.rs
+++ b/apollo-router/src/plugins/coprocessor/test.rs
@@ -393,6 +393,7 @@ mod tests {
                     .extensions(crate::json_ext::Object::new())
                     .context(req.context)
                     .id(req.id)
+                    .subgraph_name(String::default())
                     .build())
             });
 
@@ -510,6 +511,7 @@ mod tests {
                     .errors(Vec::new())
                     .extensions(crate::json_ext::Object::new())
                     .context(req.context)
+                    .subgraph_name(String::default())
                     .build())
             });
 
@@ -707,6 +709,7 @@ mod tests {
                     .extensions(crate::json_ext::Object::new())
                     .context(req.context)
                     .id(req.id)
+                    .subgraph_name(String::default())
                     .build())
             });
 
@@ -834,6 +837,7 @@ mod tests {
                     .errors(Vec::new())
                     .extensions(crate::json_ext::Object::new())
                     .context(req.context)
+                    .subgraph_name(String::default())
                     .build())
             });
 
diff --git a/apollo-router/src/plugins/file_uploads/mod.rs b/apollo-router/src/plugins/file_uploads/mod.rs
index ba764d55c8..93e85ee460 100644
--- a/apollo-router/src/plugins/file_uploads/mod.rs
+++ b/apollo-router/src/plugins/file_uploads/mod.rs
@@ -68,7 +68,7 @@ impl PluginPrivate for FileUploadsPlugin {
         }
         let limits = self.limits;
         ServiceBuilder::new()
-            .oneshot_checkpoint_async(move |req: router::Request| {
+            .checkpoint_async(move |req: router::Request| {
                 async move {
                     let context = req.context.clone();
                     Ok(match router_layer(req, limits).await {
@@ -83,6 +83,7 @@ impl PluginPrivate for FileUploadsPlugin {
                 }
                 .boxed()
             })
+            .buffered()
             .service(service)
             .boxed()
     }
@@ -92,7 +93,7 @@ impl PluginPrivate for FileUploadsPlugin {
             return service;
         }
         ServiceBuilder::new()
-            .oneshot_checkpoint_async(move |req: supergraph::Request| {
+            .checkpoint_async(move |req: supergraph::Request| {
                 async move {
                     let context = req.context.clone();
                     Ok(match supergraph_layer(req).await {
@@ -107,6 +108,7 @@ impl PluginPrivate for FileUploadsPlugin {
                 }
                 .boxed()
             })
+            .buffered()
             .service(service)
             .boxed()
     }
@@ -141,12 +143,13 @@ impl PluginPrivate for FileUploadsPlugin {
             return service;
         }
         ServiceBuilder::new()
-            .oneshot_checkpoint_async(|req: subgraph::Request| {
+            .checkpoint_async(|req: subgraph::Request| {
                 subgraph_layer(req)
                     .boxed()
                     .map(|req| Ok(ControlFlow::Continue(req)))
                     .boxed()
             })
+            .buffered()
             .service(service)
             .boxed()
     }
@@ -162,6 +165,11 @@ fn get_multipart_mime(req: &router::Request) -> Option<MediaType> {
         .filter(|mime| mime.ty == MULTIPART && mime.subty == FORM_DATA)
 }
 
+/// Takes in multipart request bodies, and turns them into serialized JSON bodies that the rest of the router
+/// pipeline can understand.
+///
+/// # Context
+/// Adds a [`MultipartRequest`] value to context.
 async fn router_layer(
     req: router::Request,
     limits: MultipartRequestLimits,
@@ -201,6 +209,14 @@ async fn router_layer(
     Ok(req)
 }
 
+/// Patch up the variable values in file upload requests.
+///
+/// File uploads do something funky: They use *required* GraphQL field arguments (`file: Upload!`),
+/// but then pass `null` as the variable value. This is invalid GraphQL, but it is how the file
+/// uploads spec works.
+///
+/// To make all this work in the router, we stick some placeholder value in the variables used for
+/// file uploads, and then remove them before we pass on the files to subgraphs.
 async fn supergraph_layer(mut req: supergraph::Request) -> Result<supergraph::Request> {
     let multipart = req
         .context
diff --git a/apollo-router/src/plugins/headers.rs b/apollo-router/src/plugins/headers.rs
index 5be0c352d4..6a5c6d8aa1 100644
--- a/apollo-router/src/plugins/headers.rs
+++ b/apollo-router/src/plugins/headers.rs
@@ -902,7 +902,7 @@ mod test {
                 .expect("expecting valid request"),
             operation_kind: OperationKind::Query,
             context: Context::new(),
-            subgraph_name: String::from("test").into(),
+            subgraph_name: String::from("test"),
             subscription_stream: None,
             connection_closed_signal: None,
             query_hash: Default::default(),
@@ -975,7 +975,7 @@ mod test {
                 .expect("expecting valid request"),
             operation_kind: OperationKind::Query,
             context: Context::new(),
-            subgraph_name: String::from("test").into(),
+            subgraph_name: String::from("test"),
             subscription_stream: None,
             connection_closed_signal: None,
             query_hash: Default::default(),
@@ -1002,7 +1002,7 @@ mod test {
         Ok(SubgraphResponse::new_from_response(
             http::Response::default(),
             Context::new(),
-            req.subgraph_name.unwrap_or_default(),
+            req.subgraph_name,
             SubgraphRequestId(String::new()),
         ))
     }
@@ -1040,7 +1040,7 @@ mod test {
                 .expect("expecting valid request"),
             operation_kind: OperationKind::Query,
             context: ctx,
-            subgraph_name: String::from("test").into(),
+            subgraph_name: String::from("test"),
             subscription_stream: None,
             connection_closed_signal: None,
             query_hash: Default::default(),
diff --git a/apollo-router/src/plugins/limits/layer.rs b/apollo-router/src/plugins/limits/layer.rs
index e84e333c74..f58064915b 100644
--- a/apollo-router/src/plugins/limits/layer.rs
+++ b/apollo-router/src/plugins/limits/layer.rs
@@ -25,6 +25,7 @@ struct BodyLimitControlInner {
 
 /// This structure allows the body limit to be updated dynamically.
 /// It also allows the error message to be updated
+///
 #[derive(Clone)]
 pub(crate) struct BodyLimitControl {
     inner: Arc<BodyLimitControlInner>,
@@ -41,7 +42,12 @@ impl BodyLimitControl {
     }
 
     /// To disable the limit check just set this to usize::MAX
+    #[allow(dead_code)]
     pub(crate) fn update_limit(&self, limit: usize) {
+        assert!(
+            self.limit() < limit,
+            "new limit must be greater than current limit"
+        );
         self.inner
             .limit
             .store(limit, std::sync::atomic::Ordering::SeqCst);
@@ -76,13 +82,13 @@ impl BodyLimitControl {
 ///
 pub(crate) struct RequestBodyLimitLayer<Body> {
     _phantom: std::marker::PhantomData<Body>,
-    control: BodyLimitControl,
+    initial_limit: usize,
 }
 impl<Body> RequestBodyLimitLayer<Body> {
-    pub(crate) fn new(control: BodyLimitControl) -> Self {
+    pub(crate) fn new(initial_limit: usize) -> Self {
         Self {
             _phantom: Default::default(),
-            control,
+            initial_limit,
         }
     }
 }
@@ -95,14 +101,14 @@ where
     type Service = RequestBodyLimit<Body, S>;
 
     fn layer(&self, inner: S) -> Self::Service {
-        RequestBodyLimit::new(inner, self.control.clone())
+        RequestBodyLimit::new(inner, self.initial_limit)
     }
 }
 
 pub(crate) struct RequestBodyLimit<Body, S> {
     _phantom: std::marker::PhantomData<Body>,
     inner: S,
-    control: BodyLimitControl,
+    initial_limit: usize,
 }
 
 impl<Body, S> RequestBodyLimit<Body, S>
@@ -110,11 +116,11 @@ where
     S: Service<http::request::Request<super::limited::Limited<Body>>>,
     Body: http_body::Body,
 {
-    fn new(inner: S, control: BodyLimitControl) -> Self {
+    fn new(inner: S, initial_limit: usize) -> Self {
         Self {
             _phantom: Default::default(),
             inner,
-            control,
+            initial_limit,
         }
     }
 }
@@ -137,16 +143,17 @@ where
         self.inner.poll_ready(cx)
     }
 
-    fn call(&mut self, req: http::Request<ReqBody>) -> Self::Future {
+    fn call(&mut self, mut req: http::Request<ReqBody>) -> Self::Future {
+        let control = BodyLimitControl::new(self.initial_limit);
         let content_length = req
             .headers()
             .get(http::header::CONTENT_LENGTH)
             .and_then(|value| value.to_str().ok()?.parse::<usize>().ok());
 
         let _body_limit = match content_length {
-            Some(len) if len > self.control.limit() => return ResponseFuture::Reject,
-            Some(len) => self.control.limit().min(len),
-            None => self.control.limit(),
+            Some(len) if len > control.limit() => return ResponseFuture::Reject,
+            Some(len) => control.limit().min(len),
+            None => control.limit(),
         };
 
         // TODO: We can only do this once this layer is moved to the beginning of the router pipeline.
@@ -162,10 +169,12 @@ where
             .try_acquire_owned()
             .expect("abort lock is new, qed");
 
-        let f =
-            self.inner.call(req.map(|body| {
-                super::limited::Limited::new(body, self.control.clone(), owned_permit)
-            }));
+        // Add the body limit to the request extensions
+        req.extensions_mut().insert(control.clone());
+
+        let f = self
+            .inner
+            .call(req.map(|body| super::limited::Limited::new(body, control, owned_permit)));
 
         ResponseFuture::Continue {
             inner: f,
@@ -227,6 +236,7 @@ mod test {
     use http_body_util::BodyStream;
     use tower::BoxError;
     use tower::ServiceBuilder;
+    use tower::ServiceExt;
     use tower_service::Service;
 
     use crate::plugins::limits::layer::BodyLimitControl;
@@ -234,14 +244,16 @@ mod test {
 
     #[tokio::test]
     async fn test_body_content_length_limit_exceeded() {
-        let control = BodyLimitControl::new(10);
         let mut service = ServiceBuilder::new()
-            .layer(RequestBodyLimitLayer::new(control.clone()))
+            .layer(RequestBodyLimitLayer::new(10))
             .service_fn(|r: http::Request<_>| async move {
                 BodyStream::new(r.into_body()).collect::<Vec<_>>().await;
                 panic!("should have rejected request");
             });
         let resp: Result<http::Response<String>, BoxError> = service
+            .ready()
+            .await
+            .unwrap()
             .call(http::Request::new("This is a test".to_string()))
             .await;
         assert!(resp.is_err());
@@ -249,9 +261,8 @@ mod test {
 
     #[tokio::test]
     async fn test_body_content_length_limit_ok() {
-        let control = BodyLimitControl::new(10);
         let mut service = ServiceBuilder::new()
-            .layer(RequestBodyLimitLayer::new(control.clone()))
+            .layer(RequestBodyLimitLayer::new(10))
             .service_fn(|r: http::Request<_>| async move {
                 BodyStream::new(r.into_body()).collect::<Vec<_>>().await;
                 Ok(http::Response::builder()
@@ -259,7 +270,12 @@ mod test {
                     .body("This is a test".to_string())
                     .unwrap())
             });
-        let resp: Result<_, BoxError> = service.call(http::Request::new("OK".to_string())).await;
+        let resp: Result<_, BoxError> = service
+            .ready()
+            .await
+            .unwrap()
+            .call(http::Request::new("OK".to_string()))
+            .await;
 
         assert!(resp.is_ok());
         let resp = resp.unwrap();
@@ -269,14 +285,16 @@ mod test {
 
     #[tokio::test]
     async fn test_header_content_length_limit_exceeded() {
-        let control = BodyLimitControl::new(10);
         let mut service = ServiceBuilder::new()
-            .layer(RequestBodyLimitLayer::new(control.clone()))
+            .layer(RequestBodyLimitLayer::new(10))
             .service_fn(|r: http::Request<_>| async move {
                 BodyStream::new(r.into_body()).collect::<Vec<_>>().await;
                 panic!("should have rejected request");
             });
         let resp: Result<http::Response<String>, BoxError> = service
+            .ready()
+            .await
+            .unwrap()
             .call(
                 http::Request::builder()
                     .header("Content-Length", "100")
@@ -289,9 +307,8 @@ mod test {
 
     #[tokio::test]
     async fn test_header_content_length_limit_ok() {
-        let control = BodyLimitControl::new(10);
         let mut service = ServiceBuilder::new()
-            .layer(RequestBodyLimitLayer::new(control.clone()))
+            .layer(RequestBodyLimitLayer::new(10))
             .service_fn(|r: http::Request<_>| async move {
                 BodyStream::new(r.into_body()).collect::<Vec<_>>().await;
                 Ok(http::Response::builder()
@@ -300,6 +317,9 @@ mod test {
                     .unwrap())
             });
         let resp: Result<_, BoxError> = service
+            .ready()
+            .await
+            .unwrap()
             .call(
                 http::Request::builder()
                     .header("Content-Length", "5")
@@ -315,14 +335,16 @@ mod test {
 
     #[tokio::test]
     async fn test_limits_dynamic_update() {
-        let control = BodyLimitControl::new(10);
         let mut service = ServiceBuilder::new()
-            .layer(RequestBodyLimitLayer::new(control.clone()))
+            .layer(RequestBodyLimitLayer::new(10))
             .service_fn(move |r: http::Request<_>| {
-                let control = control.clone();
+                //Update the limit before we start reading the stream
+                r.extensions()
+                    .get::<BodyLimitControl>()
+                    .expect("cody limit must have been added to extensions")
+                    .update_limit(100);
                 async move {
                     BodyStream::new(r.into_body()).collect::<Vec<_>>().await;
-                    control.update_limit(100);
                     Ok(http::Response::builder()
                         .status(StatusCode::OK)
                         .body("This is a test".to_string())
@@ -330,16 +352,18 @@ mod test {
                 }
             });
         let resp: Result<_, BoxError> = service
+            .ready()
+            .await
+            .unwrap()
             .call(http::Request::new("This is a test".to_string()))
             .await;
-        assert!(resp.is_err());
+        assert!(resp.is_ok());
     }
 
     #[tokio::test]
     async fn test_body_length_exceeds_content_length() {
-        let control = BodyLimitControl::new(10);
         let mut service = ServiceBuilder::new()
-            .layer(RequestBodyLimitLayer::new(control.clone()))
+            .layer(RequestBodyLimitLayer::new(10))
             .service_fn(|r: http::Request<_>| async move {
                 BodyStream::new(r.into_body()).collect::<Vec<_>>().await;
                 Ok(http::Response::builder()
@@ -348,6 +372,9 @@ mod test {
                     .unwrap())
             });
         let resp: Result<_, BoxError> = service
+            .ready()
+            .await
+            .unwrap()
             .call(
                 http::Request::builder()
                     .header("Content-Length", "5")
@@ -361,4 +388,31 @@ mod test {
         assert_eq!(resp.status(), StatusCode::OK);
         assert_eq!(resp.into_body(), "This is a test");
     }
+
+    #[tokio::test]
+    async fn test_body_content_length_service_reuse() {
+        let mut service = ServiceBuilder::new()
+            .layer(RequestBodyLimitLayer::new(10))
+            .service_fn(|r: http::Request<_>| async move {
+                BodyStream::new(r.into_body()).collect::<Vec<_>>().await;
+                Ok(http::Response::builder()
+                    .status(StatusCode::OK)
+                    .body("This is a test".to_string())
+                    .unwrap())
+            });
+
+        for _ in 0..10 {
+            let resp: Result<_, BoxError> = service
+                .ready()
+                .await
+                .unwrap()
+                .call(http::Request::new("OK".to_string()))
+                .await;
+
+            assert!(resp.is_ok());
+            let resp = resp.unwrap();
+            assert_eq!(resp.status(), StatusCode::OK);
+            assert_eq!(resp.into_body(), "This is a test");
+        }
+    }
 }
diff --git a/apollo-router/src/plugins/limits/limited.rs b/apollo-router/src/plugins/limits/limited.rs
index 7db7ad0ad3..ce78a8a94b 100644
--- a/apollo-router/src/plugins/limits/limited.rs
+++ b/apollo-router/src/plugins/limits/limited.rs
@@ -88,7 +88,6 @@ where
                     // This is the difference between http_body::Limited and our implementation.
                     // Dropping this mutex allows the containing layer to immediately return an error response
                     // This prevents the need to deal with wrapped errors.
-                    this.control.update_limit(0);
                     this.permit.release();
                     return Poll::Pending;
                 } else {
diff --git a/apollo-router/src/plugins/limits/mod.rs b/apollo-router/src/plugins/limits/mod.rs
index 2c8e0de4bf..9130657bf7 100644
--- a/apollo-router/src/plugins/limits/mod.rs
+++ b/apollo-router/src/plugins/limits/mod.rs
@@ -17,7 +17,6 @@ use crate::graphql;
 use crate::layers::ServiceBuilderExt;
 use crate::plugin::Plugin;
 use crate::plugin::PluginInit;
-use crate::plugins::limits::layer::BodyLimitControl;
 use crate::plugins::limits::layer::BodyLimitError;
 use crate::plugins::limits::layer::RequestBodyLimitLayer;
 use crate::services::router;
@@ -157,16 +156,7 @@ impl Plugin for LimitsPlugin {
     }
 
     fn router_service(&self, service: BoxService) -> BoxService {
-        let control = BodyLimitControl::new(self.config.http_max_request_bytes);
-        let control_for_context = control.clone();
         ServiceBuilder::new()
-            .map_request(move |r: router::Request| {
-                let control_for_context = control_for_context.clone();
-                r.context
-                    .extensions()
-                    .with_lock(|mut lock| lock.insert(control_for_context));
-                r
-            })
             .map_future_with_request_data(
                 |r: &router::Request| r.context.clone(),
                 |ctx, f| async { Self::map_error_to_graphql(f.await, ctx) },
@@ -174,7 +164,9 @@ impl Plugin for LimitsPlugin {
             // Here we need to convert to and from the underlying http request types so that we can use existing middleware.
             .map_request(Into::into)
             .map_response(Into::into)
-            .layer(RequestBodyLimitLayer::new(control))
+            .layer(RequestBodyLimitLayer::new(
+                self.config.http_max_request_bytes,
+            ))
             .map_request(Into::into)
             .map_response(Into::into)
             .service(service)
@@ -386,24 +378,24 @@ mod test {
     async fn test_limits_dynamic_update() {
         let plugin = plugin().await;
         let resp = plugin
-            .router_service(|r| async move {
+            .router_service(|mut r: router::Request| async move {
                 // Before we go for the body, we'll update the limit
-                r.context.extensions().with_lock(|lock| {
-                    let control: &BodyLimitControl =
-                        lock.get().expect("mut have body limit control");
-                    assert_eq!(control.remaining(), 10);
-                    assert_eq!(control.limit(), 10);
-                    control.update_limit(100);
-                });
+                let control = r
+                    .router_request
+                    .extensions_mut()
+                    .get::<BodyLimitControl>()
+                    .expect("body limit control must have been set")
+                    .clone();
+
+                assert_eq!(control.remaining(), 10);
+                assert_eq!(control.limit(), 10);
+                control.update_limit(100);
+
                 let body = r.router_request.into_body();
                 let _ = router::body::into_bytes(body).await?;
 
                 // Now let's check progress
-                r.context.extensions().with_lock(|lock| {
-                    let control: &BodyLimitControl =
-                        lock.get().expect("mut have body limit control");
-                    assert_eq!(control.remaining(), 86);
-                });
+                assert_eq!(control.remaining(), 86);
                 Ok(router::Response::fake_builder().build().unwrap())
             })
             .call(
diff --git a/apollo-router/src/plugins/rhai/subgraph.rs b/apollo-router/src/plugins/rhai/subgraph.rs
index 110dce38d5..01708b40ac 100644
--- a/apollo-router/src/plugins/rhai/subgraph.rs
+++ b/apollo-router/src/plugins/rhai/subgraph.rs
@@ -22,6 +22,7 @@ pub(super) fn request_failure(
             .and_data(body.data)
             .and_label(body.label)
             .and_path(body.path)
+            .subgraph_name(String::default()) // XXX: We don't know the subgraph name
             .build()
     } else {
         Response::error_builder()
@@ -31,7 +32,8 @@ pub(super) fn request_failure(
             }])
             .context(context)
             .status_code(error_details.status)
-            .build()?
+            .subgraph_name(String::default()) // XXX: We don't know the subgraph name
+            .build()
     };
 
     Ok(ControlFlow::Break(res))
@@ -47,6 +49,7 @@ pub(super) fn response_failure(context: Context, error_details: ErrorDetails) ->
             .and_data(body.data)
             .and_label(body.label)
             .and_path(body.path)
+            .subgraph_name(String::default()) // XXX: We don't know the subgraph name
             .build()
     } else {
         Response::error_builder()
@@ -56,7 +59,7 @@ pub(super) fn response_failure(context: Context, error_details: ErrorDetails) ->
             }])
             .status_code(error_details.status)
             .context(context)
+            .subgraph_name(String::default()) // XXX: We don't know the subgraph name
             .build()
-            .expect("can't fail to build our error message")
     }
 }
diff --git a/apollo-router/src/plugins/subscription.rs b/apollo-router/src/plugins/subscription.rs
index 6f46c0c14b..36edca3fe7 100644
--- a/apollo-router/src/plugins/subscription.rs
+++ b/apollo-router/src/plugins/subscription.rs
@@ -276,7 +276,7 @@ impl Plugin for Subscription {
         ServiceBuilder::new()
             .checkpoint(move |req: subgraph::Request| {
                 if req.operation_kind == OperationKind::Subscription && !enabled {
-                    Ok(ControlFlow::Break(subgraph::Response::builder().context(req.context).error(graphql::Error::builder().message("cannot execute a subscription if it's not enabled in the configuration").extension_code("SUBSCRIPTION_DISABLED").build()).extensions(Object::default()).build()))
+                    Ok(ControlFlow::Break(subgraph::Response::builder().context(req.context).subgraph_name(req.subgraph_name).error(graphql::Error::builder().message("cannot execute a subscription if it's not enabled in the configuration").extension_code("SUBSCRIPTION_DISABLED").build()).extensions(Object::default()).build()))
                 } else {
                     Ok(ControlFlow::Continue(req))
                 }
diff --git a/apollo-router/src/plugins/telemetry/config_new/attributes.rs b/apollo-router/src/plugins/telemetry/config_new/attributes.rs
index fcdd297a98..7cc2cf4875 100644
--- a/apollo-router/src/plugins/telemetry/config_new/attributes.rs
+++ b/apollo-router/src/plugins/telemetry/config_new/attributes.rs
@@ -1139,9 +1139,7 @@ impl Selectors<subgraph::Request, subgraph::Response, ()> for SubgraphAttributes
             .as_ref()
             .and_then(|a| a.key(SUBGRAPH_NAME))
         {
-            if let Some(subgraph_name) = &request.subgraph_name {
-                attrs.push(KeyValue::new(key, subgraph_name.clone()));
-            }
+            attrs.push(KeyValue::new(key, request.subgraph_name.clone()));
         }
 
         attrs
diff --git a/apollo-router/src/plugins/telemetry/config_new/cache/mod.rs b/apollo-router/src/plugins/telemetry/config_new/cache/mod.rs
index 894d938c1f..c9e6301085 100644
--- a/apollo-router/src/plugins/telemetry/config_new/cache/mod.rs
+++ b/apollo-router/src/plugins/telemetry/config_new/cache/mod.rs
@@ -65,15 +65,10 @@ impl Instrumented for CacheInstruments {
     }
 
     fn on_response(&self, response: &Self::Response) {
-        let subgraph_name = match &response.subgraph_name {
-            Some(subgraph_name) => subgraph_name,
-            None => {
-                return;
-            }
-        };
+        let subgraph_name = response.subgraph_name.clone();
         let cache_info: CacheSubgraph = match response
             .context
-            .get(CacheMetricContextKey::new(subgraph_name.clone()))
+            .get(CacheMetricContextKey::new(subgraph_name))
             .ok()
             .flatten()
         {
diff --git a/apollo-router/src/plugins/telemetry/config_new/selectors.rs b/apollo-router/src/plugins/telemetry/config_new/selectors.rs
index 1b55f285aa..bcb0ff5c4e 100644
--- a/apollo-router/src/plugins/telemetry/config_new/selectors.rs
+++ b/apollo-router/src/plugins/telemetry/config_new/selectors.rs
@@ -1348,10 +1348,11 @@ impl Selector for SubgraphSelector {
                 }
                 .map(opentelemetry::Value::from)
             }
-            SubgraphSelector::SubgraphName { subgraph_name } if *subgraph_name => request
-                .subgraph_name
-                .clone()
-                .map(opentelemetry::Value::from),
+            SubgraphSelector::SubgraphName { subgraph_name } if *subgraph_name => {
+                Some(request.subgraph_name.clone().into())
+            }
+            // .clone()
+            // .map(opentelemetry::Value::from),
             SubgraphSelector::SubgraphOperationKind { .. } => request
                 .context
                 .get::<_, String>(OPERATION_KIND)
@@ -1531,10 +1532,11 @@ impl Selector for SubgraphSelector {
                 }
                 .map(opentelemetry::Value::from)
             }
-            SubgraphSelector::SubgraphName { subgraph_name } if *subgraph_name => response
-                .subgraph_name
-                .clone()
-                .map(opentelemetry::Value::from),
+            SubgraphSelector::SubgraphName { subgraph_name } if *subgraph_name => {
+                Some(response.subgraph_name.clone().into())
+            }
+            // .clone()
+            // .map(opentelemetry::Value::from),
             SubgraphSelector::SubgraphResponseBody {
                 subgraph_response_body,
                 default,
@@ -1601,7 +1603,7 @@ impl Selector for SubgraphSelector {
             SubgraphSelector::Cache { cache, entity_type } => {
                 let cache_info: CacheSubgraph = response
                     .context
-                    .get(CacheMetricContextKey::new(response.subgraph_name.clone()?))
+                    .get(CacheMetricContextKey::new(response.subgraph_name.clone()))
                     .ok()
                     .flatten()?;
 
diff --git a/apollo-router/src/plugins/telemetry/config_new/spans.rs b/apollo-router/src/plugins/telemetry/config_new/spans.rs
index 4fd111a941..158d3b346a 100644
--- a/apollo-router/src/plugins/telemetry/config_new/spans.rs
+++ b/apollo-router/src/plugins/telemetry/config_new/spans.rs
@@ -797,6 +797,7 @@ mod test {
         let values = spans.attributes.on_response(
             &subgraph::Response::fake2_builder()
                 .header("my-header", "test_val")
+                .subgraph_name(String::default())
                 .build()
                 .unwrap(),
         );
@@ -829,6 +830,7 @@ mod test {
             &subgraph::Response::fake2_builder()
                 .header("my-header", "test_val")
                 .status_code(http::StatusCode::OK)
+                .subgraph_name(String::default())
                 .build()
                 .unwrap(),
         );
@@ -861,6 +863,7 @@ mod test {
             &subgraph::Response::fake2_builder()
                 .header("my-header", "test_val")
                 .status_code(http::StatusCode::OK)
+                .subgraph_name(String::default())
                 .build()
                 .unwrap(),
         );
diff --git a/apollo-router/src/plugins/telemetry/fmt_layer.rs b/apollo-router/src/plugins/telemetry/fmt_layer.rs
index 1a091d2e4e..b1f53679df 100644
--- a/apollo-router/src/plugins/telemetry/fmt_layer.rs
+++ b/apollo-router/src/plugins/telemetry/fmt_layer.rs
@@ -818,6 +818,7 @@ connector:
                     .header("custom-header", "val1")
                     .header("x-log-request", HeaderValue::from_static("log"))
                     .data(serde_json::json!({"products": [{"id": 1234, "name": "first_name"}, {"id": 567, "name": "second_name"}]}))
+                    .subgraph_name("subgraph")
                     .build()
                     .expect("expecting valid response");
                 subgraph_events.on_response(&subgraph_resp);
@@ -842,6 +843,7 @@ connector:
                     .header("custom-header", "val1")
                     .header("x-log-request", HeaderValue::from_static("log"))
                     .data(serde_json::json!({"products": [{"id": 1234, "name": "first_name"}, {"id": 567, "name": "second_name"}], "other": {"foo": "bar"}}))
+                    .subgraph_name("subgraph_bis")
                     .build()
                     .expect("expecting valid response");
                 subgraph_events.on_response(&subgraph_resp);
@@ -1066,6 +1068,7 @@ connector:
                     .header("custom-header", "val1")
                     .header("x-log-request", HeaderValue::from_static("log"))
                     .data(serde_json::json!({"products": [{"id": 1234, "name": "first_name"}, {"id": 567, "name": "second_name"}]}))
+                    .subgraph_name("subgraph")
                     .build()
                     .expect("expecting valid response");
                 subgraph_events.on_response(&subgraph_resp);
@@ -1090,6 +1093,7 @@ connector:
                     .header("custom-header", "val1")
                     .header("x-log-request", HeaderValue::from_static("log"))
                     .data(serde_json::json!({"products": [{"id": 1234, "name": "first_name"}, {"id": 567, "name": "second_name"}], "other": {"foo": "bar"}}))
+                    .subgraph_name("subgraph_bis")
                     .build()
                     .expect("expecting valid response");
                 subgraph_events.on_response(&subgraph_resp);
diff --git a/apollo-router/src/plugins/telemetry/logging/mod.rs b/apollo-router/src/plugins/telemetry/logging/mod.rs
index dcc643ea21..5724194644 100644
--- a/apollo-router/src/plugins/telemetry/logging/mod.rs
+++ b/apollo-router/src/plugins/telemetry/logging/mod.rs
@@ -80,6 +80,7 @@ mod test {
                     subgraph::Response::fake2_builder()
                         .header("custom-header", "val1")
                         .data(serde_json::json!({"data": "res"}).to_string())
+                        .subgraph_name("subgraph")
                         .build()
                 })
                 .call(
diff --git a/apollo-router/src/plugins/traffic_shaping/deduplication.rs b/apollo-router/src/plugins/traffic_shaping/deduplication.rs
index 5c2165f775..c39815cd22 100644
--- a/apollo-router/src/plugins/traffic_shaping/deduplication.rs
+++ b/apollo-router/src/plugins/traffic_shaping/deduplication.rs
@@ -13,7 +13,6 @@ use tokio::sync::broadcast::{self};
 use tokio::sync::oneshot;
 use tower::BoxError;
 use tower::Layer;
-use tower::ServiceExt;
 
 use crate::batching::BatchQuery;
 use crate::graphql::Request;
@@ -72,7 +71,7 @@ where
     }
 
     async fn dedup(
-        service: S,
+        mut service: S,
         wait_map: WaitMap,
         request: SubgraphRequest,
     ) -> Result<SubgraphResponse, BoxError> {
@@ -85,7 +84,7 @@ where
             .extensions()
             .with_lock(|lock| lock.contains_key::<BatchQuery>())
         {
-            return service.ready_oneshot().await?.call(request).await;
+            return service.call(request).await;
         }
         loop {
             let mut locked_wait_map = wait_map.lock().await;
@@ -106,7 +105,7 @@ where
                                     SubgraphResponse::new_from_response(
                                         response.0.response,
                                         request.context,
-                                        request.subgraph_name.unwrap_or_default(),
+                                        request.subgraph_name,
                                         request.id,
                                     )
                                 })
@@ -137,12 +136,7 @@ where
                             locked_wait_map.remove(&cache_key);
                         });
 
-                        service
-                            .ready_oneshot()
-                            .await?
-                            .call(request)
-                            .await
-                            .map(CloneSubgraphResponse)
+                        service.call(request).await.map(CloneSubgraphResponse)
                     };
 
                     // Let our waiters know
@@ -165,7 +159,7 @@ where
                         SubgraphResponse::new_from_response(
                             response.0.response,
                             context,
-                            response.0.subgraph_name.unwrap_or_default(),
+                            response.0.subgraph_name,
                             id,
                         )
                     });
@@ -187,19 +181,20 @@ where
     type Error = BoxError;
     type Future = BoxFuture<'static, Result<Self::Response, Self::Error>>;
 
-    fn poll_ready(&mut self, _cx: &mut std::task::Context<'_>) -> Poll<Result<(), Self::Error>> {
-        Poll::Ready(Ok(()))
+    fn poll_ready(&mut self, cx: &mut std::task::Context<'_>) -> Poll<Result<(), Self::Error>> {
+        self.service.poll_ready(cx)
     }
 
     fn call(&mut self, request: SubgraphRequest) -> Self::Future {
         let service = self.service.clone();
+        let mut inner = std::mem::replace(&mut self.service, service);
 
         if request.operation_kind == OperationKind::Query {
             let wait_map = self.wait_map.clone();
 
-            Box::pin(async move { Self::dedup(service, wait_map, request).await })
+            Box::pin(async move { Self::dedup(inner, wait_map, request).await })
         } else {
-            Box::pin(async move { service.oneshot(request).await })
+            Box::pin(async move { inner.call(request).await })
         }
     }
 }
diff --git a/apollo-router/src/plugins/traffic_shaping/mod.rs b/apollo-router/src/plugins/traffic_shaping/mod.rs
index 695018587d..9aee659904 100644
--- a/apollo-router/src/plugins/traffic_shaping/mod.rs
+++ b/apollo-router/src/plugins/traffic_shaping/mod.rs
@@ -7,44 +7,39 @@
 //! * Rate limiting
 //!
 mod deduplication;
-pub(crate) mod rate;
-pub(crate) mod timeout;
 
 use std::collections::HashMap;
 use std::num::NonZeroU64;
 use std::sync::Mutex;
 use std::time::Duration;
 
-use futures::future::BoxFuture;
-use futures::FutureExt;
 use http::header::CONTENT_ENCODING;
 use http::HeaderValue;
 use http::StatusCode;
 use schemars::JsonSchema;
 use serde::Deserialize;
-use tower::util::future::EitherResponseFuture;
-use tower::util::Either;
+use tower::limit::ConcurrencyLimitLayer;
+use tower::limit::RateLimitLayer;
+use tower::load_shed::error::Overloaded;
+use tower::timeout::error::Elapsed;
+use tower::timeout::TimeoutLayer;
 use tower::BoxError;
-use tower::Service;
 use tower::ServiceBuilder;
 use tower::ServiceExt;
 
 use self::deduplication::QueryDeduplicationLayer;
-use self::rate::RateLimitLayer;
-use self::rate::RateLimited;
-use self::timeout::Elapsed;
-use self::timeout::TimeoutLayer;
 use crate::configuration::shared::DnsResolutionStrategy;
-use crate::error::ConfigurationError;
 use crate::graphql;
 use crate::layers::ServiceBuilderExt;
 use crate::plugin::Plugin;
 use crate::plugin::PluginInit;
 use crate::register_plugin;
 use crate::services::http::service::Compression;
+use crate::services::router;
 use crate::services::subgraph;
-use crate::services::supergraph;
+use crate::services::RouterResponse;
 use crate::services::SubgraphRequest;
+use crate::services::SubgraphResponse;
 
 const DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
 pub(crate) const APOLLO_TRAFFIC_SHAPING: &str = "apollo.traffic_shaping";
@@ -132,9 +127,12 @@ impl Merge for SubgraphShaping {
     }
 }
 
-#[derive(PartialEq, Debug, Clone, Deserialize, JsonSchema)]
+#[derive(PartialEq, Debug, Clone, Deserialize, JsonSchema, Default)]
 #[serde(deny_unknown_fields)]
 struct RouterShaping {
+    /// The global concurrency limit
+    concurrency_limit: Option<usize>,
+
     /// Enable global rate limiting
     global_rate_limit: Option<RateLimitConf>,
     #[serde(deserialize_with = "humantime_serde::deserialize", default)]
@@ -186,7 +184,6 @@ impl Merge for RateLimitConf {
 // Remove this once the configuration yml changes.
 pub(crate) struct TrafficShaping {
     config: Config,
-    rate_limit_router: Option<RateLimitLayer>,
     rate_limit_subgraphs: Mutex<HashMap<String, RateLimitLayer>>,
 }
 
@@ -195,103 +192,40 @@ impl Plugin for TrafficShaping {
     type Config = Config;
 
     async fn new(init: PluginInit<Self::Config>) -> Result<Self, BoxError> {
-        let rate_limit_router = init
-            .config
-            .router
-            .as_ref()
-            .and_then(|r| r.global_rate_limit.as_ref())
-            .map(|router_rate_limit_conf| {
-                if router_rate_limit_conf.interval.as_millis() > u64::MAX as u128 {
-                    Err(ConfigurationError::InvalidConfiguration {
-                        message: "bad configuration for traffic_shaping plugin",
-                        error: format!(
-                            "cannot set an interval for the rate limit greater than {} ms",
-                            u64::MAX
-                        ),
-                    })
-                } else {
-                    Ok(RateLimitLayer::new(
-                        router_rate_limit_conf.capacity,
-                        router_rate_limit_conf.interval,
-                    ))
-                }
-            })
-            .transpose()?;
-
-        {
-            Ok(Self {
-                config: init.config,
-                rate_limit_router,
-                rate_limit_subgraphs: Mutex::new(HashMap::new()),
-            })
-        }
-    }
-}
-
-pub(crate) type TrafficShapingSubgraphFuture<S> = EitherResponseFuture<
-    EitherResponseFuture<
-        BoxFuture<'static, Result<subgraph::Response, BoxError>>,
-        BoxFuture<'static, Result<subgraph::Response, BoxError>>,
-    >,
-    <S as Service<subgraph::Request>>::Future,
->;
-
-impl TrafficShaping {
-    fn merge_config<T: Merge + Clone>(
-        all_config: Option<&T>,
-        subgraph_config: Option<&T>,
-    ) -> Option<T> {
-        let merged_subgraph_config = subgraph_config.map(|c| c.merge(all_config));
-        merged_subgraph_config.or_else(|| all_config.cloned())
+        Ok(Self {
+            config: init.config,
+            rate_limit_subgraphs: Mutex::new(HashMap::new()),
+        })
     }
 
-    pub(crate) fn supergraph_service_internal<S>(
-        &self,
-        service: S,
-    ) -> impl Service<
-        supergraph::Request,
-        Response = supergraph::Response,
-        Error = BoxError,
-        Future = BoxFuture<'static, Result<supergraph::Response, BoxError>>,
-    > + Clone
-           + Send
-           + Sync
-           + 'static
-    where
-        S: Service<supergraph::Request, Response = supergraph::Response, Error = BoxError>
-            + Clone
-            + Send
-            + Sync
-            + 'static,
-        <S as Service<supergraph::Request>>::Future: std::marker::Send,
-    {
+    fn router_service(&self, service: router::BoxService) -> router::BoxService {
         ServiceBuilder::new()
             .map_future_with_request_data(
-                |req: &supergraph::Request| req.context.clone(),
+                |req: &router::Request| req.context.clone(),
                 move |ctx, future| {
                     async {
-                        let response: Result<supergraph::Response, BoxError> = future.await;
+                        let response: Result<RouterResponse, BoxError> = future.await;
                         match response {
-                            Err(error) if error.is::<Elapsed>() => {
-                                supergraph::Response::error_builder()
+                            Ok(ok) => Ok(ok),
+                            Err(err) if err.is::<Elapsed>() => {
+                                // TODO add metrics
+                                let error = graphql::Error::builder()
+                                    .message("Your request has been timed out")
+                                    .extension_code("GATEWAY_TIMEOUT")
+                                    .build();
+                                Ok(RouterResponse::error_builder()
                                     .status_code(StatusCode::GATEWAY_TIMEOUT)
-                                    .error::<graphql::Error>(Elapsed::new().into())
+                                    .error(error)
                                     .context(ctx)
                                     .build()
+                                    .expect("should build overloaded response"))
                             }
-                            Err(error) if error.is::<RateLimited>() => {
-                                supergraph::Response::error_builder()
-                                    .status_code(StatusCode::TOO_MANY_REQUESTS)
-                                    .error::<graphql::Error>(RateLimited::new().into())
-                                    .context(ctx)
-                                    .build()
-                            }
-                            _ => response,
+                            Err(err) => Err(err),
                         }
                     }
-                    .boxed()
                 },
             )
+            .load_shed()
             .layer(TimeoutLayer::new(
                 self.config
                     .router
@@ -299,31 +233,75 @@ impl TrafficShaping {
                     .and_then(|r| r.timeout)
                     .unwrap_or(DEFAULT_TIMEOUT),
             ))
-            .option_layer(self.rate_limit_router.clone())
+            .map_future_with_request_data(
+                |req: &router::Request| req.context.clone(),
+                move |ctx, future| {
+                    async {
+                        let response: Result<RouterResponse, BoxError> = future.await;
+                        match response {
+                            Ok(ok) => Ok(ok),
+                            Err(err) if err.is::<Overloaded>() => {
+                                // TODO add metrics
+                                let error = graphql::Error::builder()
+                                    .message("Your request has been concurrency limited")
+                                    .extension_code("REQUEST_CONCURRENCY_LIMITED")
+                                    .build();
+                                Ok(RouterResponse::error_builder()
+                                    .status_code(StatusCode::SERVICE_UNAVAILABLE)
+                                    .error(error)
+                                    .context(ctx)
+                                    .build()
+                                    .expect("should build overloaded response"))
+                            }
+                            Err(err) => Err(err),
+                        }
+                    }
+                },
+            )
+            .load_shed()
+            .option_layer(self.config.router.as_ref().and_then(|router| {
+                router
+                    .concurrency_limit
+                    .as_ref()
+                    .map(|limit| ConcurrencyLimitLayer::new(*limit))
+            }))
+            .map_future_with_request_data(
+                |req: &router::Request| req.context.clone(),
+                move |ctx, future| {
+                    async {
+                        let response: Result<RouterResponse, BoxError> = future.await;
+                        match response {
+                            Ok(ok) => Ok(ok),
+                            Err(err) if err.is::<Overloaded>() => {
+                                // TODO add metrics
+                                let error = graphql::Error::builder()
+                                    .message("Your request has been rate limited")
+                                    .extension_code("REQUEST_RATE_LIMITED")
+                                    .build();
+                                Ok(RouterResponse::error_builder()
+                                    .status_code(StatusCode::SERVICE_UNAVAILABLE)
+                                    .error(error)
+                                    .context(ctx)
+                                    .build()
+                                    .expect("should build overloaded response"))
+                            }
+                            Err(err) => Err(err),
+                        }
+                    }
+                },
+            )
+            .load_shed()
+            .option_layer(self.config.router.as_ref().and_then(|router| {
+                router
+                    .global_rate_limit
+                    .as_ref()
+                    .map(|limit| RateLimitLayer::new(limit.capacity.into(), limit.interval))
+            }))
             .service(service)
+            .boxed()
     }
 
-    pub(crate) fn subgraph_service_internal<S>(
-        &self,
-        name: &str,
-        service: S,
-    ) -> impl Service<
-        subgraph::Request,
-        Response = subgraph::Response,
-        Error = BoxError,
-        Future = TrafficShapingSubgraphFuture<S>,
-    > + Clone
-           + Send
-           + Sync
-           + 'static
-    where
-        S: Service<subgraph::Request, Response = subgraph::Response, Error = BoxError>
-            + Clone
-            + Send
-            + Sync
-            + 'static,
-        <S as Service<subgraph::Request>>::Future: std::marker::Send,
-    {
+    fn subgraph_service(&self, name: &str, service: subgraph::BoxService) -> subgraph::BoxService {
         // Either we have the subgraph config and we merge it with the all config, or we just have the all config or we have nothing.
         let all_config = self.config.all.as_ref();
         let subgraph_config = self.config.subgraphs.get(name);
@@ -340,59 +318,89 @@ impl TrafficShaping {
                         .unwrap()
                         .entry(name.to_string())
                         .or_insert_with(|| {
-                            RateLimitLayer::new(rate_limit_conf.capacity, rate_limit_conf.interval)
+                            RateLimitLayer::new(
+                                rate_limit_conf.capacity.into(),
+                                rate_limit_conf.interval,
+                            )
                         })
                         .clone()
                 });
 
-            Either::Left(ServiceBuilder::new()
-                .option_layer(config.shaping.deduplicate_query.unwrap_or_default().then(
-                  QueryDeduplicationLayer::default
-                ))
-                    .map_future_with_request_data(
-                        |req: &subgraph::Request| req.context.clone(),
-                        move |ctx, future| {
-                            async {
-                                let response: Result<subgraph::Response, BoxError> = future.await;
-                                match response {
-                                    Err(error) if error.is::<Elapsed>() => {
-                                        subgraph::Response::error_builder()
-                                            .status_code(StatusCode::GATEWAY_TIMEOUT)
-                                            .error::<graphql::Error>(Elapsed::new().into())
-                                            .context(ctx)
-                                            .build()
-                                    }
-                                    Err(error) if error.is::<RateLimited>() => {
-                                        subgraph::Response::error_builder()
-                                            .status_code(StatusCode::TOO_MANY_REQUESTS)
-                                            .error::<graphql::Error>(RateLimited::new().into())
-                                            .context(ctx)
-                                            .build()
-                                    }
-                                    _ => response,
+            ServiceBuilder::new()
+                .map_future_with_request_data(
+                    |req: &subgraph::Request| (req.context.clone(), req.subgraph_name.clone()),
+                    move |(ctx, subgraph_name), future| {
+                        async {
+                            let response: Result<SubgraphResponse, BoxError> = future.await;
+                            match response {
+                                Ok(ok) => Ok(ok),
+                                Err(err) if err.is::<Elapsed>() => {
+                                    // TODO add metrics
+                                    let error = graphql::Error::builder()
+                                        .message("Your request has been timed out")
+                                        .extension_code("GATEWAY_TIMEOUT")
+                                        .build();
+                                    Ok(SubgraphResponse::error_builder()
+                                        .status_code(StatusCode::GATEWAY_TIMEOUT)
+                                        .subgraph_name(subgraph_name)
+                                        .error(error)
+                                        .context(ctx)
+                                        .build())
                                 }
-                            }.boxed()
-                        },
-                    )
-                    .layer(TimeoutLayer::new(
-                        config.shaping
-                        .timeout
-                        .unwrap_or(DEFAULT_TIMEOUT),
-                    ))
-                    .option_layer(rate_limit)
-                .service(service)
+                                Err(err) if err.is::<Overloaded>() => {
+                                    // TODO add metrics
+                                    let error = graphql::Error::builder()
+                                        .message("Your request has been rate limited")
+                                        .extension_code("REQUEST_RATE_LIMITED")
+                                        .build();
+                                    Ok(SubgraphResponse::error_builder()
+                                        .status_code(StatusCode::SERVICE_UNAVAILABLE)
+                                        .subgraph_name(subgraph_name)
+                                        .error(error)
+                                        .context(ctx)
+                                        .build())
+                                }
+                                Err(err) => Err(err),
+                            }
+                        }
+                    },
+                )
+                .load_shed()
+                .layer(TimeoutLayer::new(
+                    config.shaping.timeout.unwrap_or(DEFAULT_TIMEOUT),
+                ))
+                .option_layer(rate_limit)
+                .option_layer(
+                    config
+                        .shaping
+                        .deduplicate_query
+                        .unwrap_or_default()
+                        .then(QueryDeduplicationLayer::default),
+                )
                 .map_request(move |mut req: SubgraphRequest| {
                     if let Some(compression) = config.shaping.compression {
                         let compression_header_val = HeaderValue::from_str(&compression.to_string()).expect("compression is manually implemented and already have the right values; qed");
                         req.subgraph_request.headers_mut().insert(CONTENT_ENCODING, compression_header_val);
                     }
-
                     req
-                }))
+                })
+                .buffered()
+                .service(service)
+                .boxed()
         } else {
-            Either::Right(service)
+            service
         }
     }
+}
+
+impl TrafficShaping {
+    fn merge_config<T: Merge + Clone>(
+        all_config: Option<&T>,
+        subgraph_config: Option<&T>,
+    ) -> Option<T> {
+        let merged_subgraph_config = subgraph_config.map(|c| c.merge(all_config));
+        merged_subgraph_config.or_else(|| all_config.cloned())
+    }
 
     pub(crate) fn subgraph_client_config(
         &self,
@@ -426,8 +434,8 @@ mod test {
 
     use super::*;
     use crate::json_ext::Object;
+    use crate::plugin::test::MockRouterService;
     use crate::plugin::test::MockSubgraph;
-    use crate::plugin::test::MockSupergraphService;
     use crate::plugin::DynPlugin;
     use crate::query_planner::QueryPlannerService;
     use crate::router_factory::create_plugins;
@@ -437,8 +445,9 @@ mod test {
     use crate::services::router::service::RouterCreator;
     use crate::services::HasSchema;
     use crate::services::PluggableSupergraphServiceBuilder;
+    use crate::services::RouterRequest;
+    use crate::services::RouterResponse;
     use crate::services::SupergraphRequest;
-    use crate::services::SupergraphResponse;
     use crate::spec::Schema;
     use crate::Configuration;
 
@@ -625,10 +634,7 @@ mod test {
         });
 
         let _response = plugin
-            .as_any()
-            .downcast_ref::<TrafficShaping>()
-            .unwrap()
-            .subgraph_service_internal("test", test_service)
+            .subgraph_service("test", test_service.boxed())
             .oneshot(request)
             .await
             .unwrap();
@@ -774,54 +780,36 @@ mod test {
             graphql::Request::default() => graphql::Response::default()
         });
 
-        assert!(&plugin
-            .as_any()
-            .downcast_ref::<TrafficShaping>()
-            .unwrap()
-            .subgraph_service_internal("test", test_service.clone())
-            .oneshot(SubgraphRequest::fake_builder().build())
+        let mut svc = plugin.subgraph_service("test", test_service.boxed());
+
+        assert!(svc
+            .ready()
             .await
-            .unwrap()
-            .response
-            .body()
-            .errors
-            .is_empty());
-        assert_eq!(
-            plugin
-                .as_any()
-                .downcast_ref::<TrafficShaping>()
-                .unwrap()
-                .subgraph_service_internal("test", test_service.clone())
-                .oneshot(SubgraphRequest::fake_builder().build())
-                .await
-                .unwrap()
-                .response
-                .body()
-                .errors[0]
-                .extensions
-                .get("code")
-                .unwrap(),
-            "REQUEST_RATE_LIMITED"
-        );
-        assert!(plugin
-            .as_any()
-            .downcast_ref::<TrafficShaping>()
-            .unwrap()
-            .subgraph_service_internal("another", test_service.clone())
-            .oneshot(SubgraphRequest::fake_builder().build())
+            .expect("it is ready")
+            .call(SubgraphRequest::fake_builder().build())
             .await
             .unwrap()
             .response
             .body()
             .errors
             .is_empty());
+        let response = svc
+            .ready()
+            .await
+            .expect("it is ready")
+            .call(SubgraphRequest::fake_builder().build())
+            .await
+            .expect("it responded");
+
+        assert_eq!(StatusCode::SERVICE_UNAVAILABLE, response.response.status());
+
         tokio::time::sleep(Duration::from_millis(300)).await;
-        assert!(plugin
-            .as_any()
-            .downcast_ref::<TrafficShaping>()
-            .unwrap()
-            .subgraph_service_internal("test", test_service.clone())
-            .oneshot(SubgraphRequest::fake_builder().build())
+
+        assert!(svc
+            .ready()
+            .await
+            .expect("it is ready")
+            .call(SubgraphRequest::fake_builder().build())
             .await
             .unwrap()
             .response
@@ -844,68 +832,98 @@ mod test {
         .unwrap();
 
         let plugin = get_traffic_shaping_plugin(&config).await;
-        let mut mock_service = MockSupergraphService::new();
-        mock_service.expect_clone().returning(|| {
-            let mut mock_service = MockSupergraphService::new();
-
-            mock_service.expect_clone().returning(|| {
-                let mut mock_service = MockSupergraphService::new();
-                mock_service.expect_call().times(0..2).returning(move |_| {
-                    Ok(SupergraphResponse::fake_builder()
-                        .data(json!({ "test": 1234_u32 }))
-                        .build()
-                        .unwrap())
-                });
-                mock_service
-            });
-            mock_service
+        let mut mock_service = MockRouterService::new();
+
+        mock_service.expect_call().times(0..3).returning(|_| {
+            Ok(RouterResponse::fake_builder()
+                .data(json!({ "test": 1234_u32 }))
+                .build()
+                .unwrap())
         });
+        mock_service
+            .expect_clone()
+            .returning(MockRouterService::new);
 
-        assert!(plugin
-            .as_any()
-            .downcast_ref::<TrafficShaping>()
-            .unwrap()
-            .supergraph_service_internal(mock_service.clone())
-            .oneshot(SupergraphRequest::fake_builder().build().unwrap())
+        // let mut svc = plugin.router_service(mock_service.clone().boxed());
+        let mut svc = plugin.router_service(mock_service.boxed());
+
+        let response: RouterResponse = svc
+            .ready()
             .await
-            .unwrap()
-            .next_response()
+            .expect("it is ready")
+            .call(RouterRequest::fake_builder().build().unwrap())
             .await
-            .unwrap()
-            .errors
-            .is_empty());
+            .unwrap();
+        assert_eq!(StatusCode::OK, response.response.status());
 
-        assert_eq!(
-            plugin
-                .as_any()
-                .downcast_ref::<TrafficShaping>()
-                .unwrap()
-                .supergraph_service_internal(mock_service.clone())
-                .oneshot(SupergraphRequest::fake_builder().build().unwrap())
-                .await
-                .unwrap()
-                .next_response()
+        let response: RouterResponse = svc
+            .ready()
+            .await
+            .expect("it is ready")
+            .call(RouterRequest::fake_builder().build().unwrap())
+            .await
+            .unwrap();
+        assert_eq!(StatusCode::SERVICE_UNAVAILABLE, response.response.status());
+        let j: serde_json::Value = serde_json::from_slice(
+            &crate::services::router::body::into_bytes(response.response)
                 .await
-                .unwrap()
-                .errors[0]
-                .extensions
-                .get("code")
-                .unwrap(),
-            "REQUEST_RATE_LIMITED"
+                .expect("we have a body"),
+        )
+        .expect("our body is valid json");
+        assert_eq!(
+            "Your request has been rate limited",
+            j["errors"][0]["message"]
         );
+
         tokio::time::sleep(Duration::from_millis(300)).await;
-        assert!(plugin
-            .as_any()
-            .downcast_ref::<TrafficShaping>()
-            .unwrap()
-            .supergraph_service_internal(mock_service.clone())
-            .oneshot(SupergraphRequest::fake_builder().build().unwrap())
+
+        let response: RouterResponse = svc
+            .ready()
             .await
-            .unwrap()
-            .next_response()
+            .expect("it is ready")
+            .call(RouterRequest::fake_builder().build().unwrap())
             .await
-            .unwrap()
-            .errors
-            .is_empty());
+            .unwrap();
+        assert_eq!(StatusCode::OK, response.response.status());
+    }
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn it_timeout_router_requests() {
+        let config = serde_yaml::from_str::<serde_json::Value>(
+            r#"
+        router:
+            timeout: 1ns
+        "#,
+        )
+        .unwrap();
+
+        let plugin = get_traffic_shaping_plugin(&config).await;
+
+        let svc = ServiceBuilder::new()
+            .service_fn(move |_req: router::Request| async {
+                tokio::time::sleep(std::time::Duration::from_millis(300)).await;
+                RouterResponse::fake_builder()
+                    .data(json!({ "test": 1234_u32 }))
+                    .build()
+            })
+            .boxed();
+
+        let mut rs = plugin.router_service(svc);
+
+        let response: RouterResponse = rs
+            .ready()
+            .await
+            .expect("it is ready")
+            .call(RouterRequest::fake_builder().build().unwrap())
+            .await
+            .unwrap();
+        assert_eq!(StatusCode::GATEWAY_TIMEOUT, response.response.status());
+        let j: serde_json::Value = serde_json::from_slice(
+            &crate::services::router::body::into_bytes(response.response)
+                .await
+                .expect("we have a body"),
+        )
+        .expect("our body is valid json");
+        assert_eq!("Your request has been timed out", j["errors"][0]["message"]);
     }
 }
diff --git a/apollo-router/src/plugins/traffic_shaping/rate/error.rs b/apollo-router/src/plugins/traffic_shaping/rate/error.rs
deleted file mode 100644
index d1c7ef09e3..0000000000
--- a/apollo-router/src/plugins/traffic_shaping/rate/error.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-//! Error types
-
-use std::error;
-use std::fmt;
-
-use crate::graphql;
-
-/// The rate limit error.
-#[derive(Debug, Default)]
-pub(crate) struct RateLimited;
-
-impl RateLimited {
-    /// Construct a new RateLimited error
-    pub(crate) fn new() -> Self {
-        RateLimited {}
-    }
-}
-
-impl fmt::Display for RateLimited {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.pad("your request has been rate limited")
-    }
-}
-
-impl From<RateLimited> for graphql::Error {
-    fn from(_: RateLimited) -> Self {
-        graphql::Error::builder()
-            .message(String::from("Your request has been rate limited"))
-            .extension_code("REQUEST_RATE_LIMITED")
-            .build()
-    }
-}
-
-impl error::Error for RateLimited {}
diff --git a/apollo-router/src/plugins/traffic_shaping/rate/future.rs b/apollo-router/src/plugins/traffic_shaping/rate/future.rs
deleted file mode 100644
index cb3ed8eb1e..0000000000
--- a/apollo-router/src/plugins/traffic_shaping/rate/future.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-//! Future types
-
-use std::future::Future;
-use std::pin::Pin;
-use std::task::Context;
-use std::task::Poll;
-
-use pin_project_lite::pin_project;
-
-pin_project! {
-    #[derive(Debug)]
-    pub(crate) struct ResponseFuture<T> {
-        #[pin]
-        response: T,
-    }
-}
-
-impl<T> ResponseFuture<T> {
-    pub(crate) fn new(response: T) -> Self {
-        ResponseFuture { response }
-    }
-}
-
-impl<F, T, E> Future for ResponseFuture<F>
-where
-    F: Future<Output = Result<T, E>>,
-    E: Into<tower::BoxError>,
-{
-    type Output = Result<T, tower::BoxError>;
-
-    fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
-        let this = self.project();
-
-        match this.response.poll(cx) {
-            Poll::Ready(v) => Poll::Ready(v.map_err(Into::into)),
-            Poll::Pending => Poll::Pending,
-        }
-    }
-}
diff --git a/apollo-router/src/plugins/traffic_shaping/rate/layer.rs b/apollo-router/src/plugins/traffic_shaping/rate/layer.rs
deleted file mode 100644
index 107b394a70..0000000000
--- a/apollo-router/src/plugins/traffic_shaping/rate/layer.rs
+++ /dev/null
@@ -1,53 +0,0 @@
-use std::num::NonZeroU64;
-use std::sync::atomic::AtomicU64;
-use std::sync::atomic::AtomicUsize;
-use std::sync::Arc;
-use std::time::Duration;
-use std::time::SystemTime;
-use std::time::UNIX_EPOCH;
-
-use tower::Layer;
-
-use super::Rate;
-use super::RateLimit;
-/// Enforces a rate limit on the number of requests the underlying
-/// service can handle over a period of time.
-#[derive(Debug, Clone)]
-pub(crate) struct RateLimitLayer {
-    rate: Rate,
-    window_start: Arc<AtomicU64>,
-    previous_nb_requests: Arc<AtomicUsize>,
-    current_nb_requests: Arc<AtomicUsize>,
-}
-
-impl RateLimitLayer {
-    /// Create new rate limit layer.
-    pub(crate) fn new(num: NonZeroU64, per: Duration) -> Self {
-        let rate = Rate::new(num, per);
-        RateLimitLayer {
-            rate,
-            window_start: Arc::new(AtomicU64::new(
-                SystemTime::now()
-                    .duration_since(UNIX_EPOCH)
-                    .expect("system time must be after EPOCH")
-                    .as_millis() as u64,
-            )),
-            previous_nb_requests: Arc::default(),
-            current_nb_requests: Arc::new(AtomicUsize::new(1)),
-        }
-    }
-}
-
-impl<S> Layer<S> for RateLimitLayer {
-    type Service = RateLimit<S>;
-
-    fn layer(&self, service: S) -> Self::Service {
-        RateLimit {
-            inner: service,
-            rate: self.rate,
-            window_start: self.window_start.clone(),
-            previous_nb_requests: self.previous_nb_requests.clone(),
-            current_nb_requests: self.current_nb_requests.clone(),
-        }
-    }
-}
diff --git a/apollo-router/src/plugins/traffic_shaping/rate/mod.rs b/apollo-router/src/plugins/traffic_shaping/rate/mod.rs
deleted file mode 100644
index 53c16998d0..0000000000
--- a/apollo-router/src/plugins/traffic_shaping/rate/mod.rs
+++ /dev/null
@@ -1,13 +0,0 @@
-//! Limit the rate at which requests are processed.
-
-mod error;
-pub(crate) mod future;
-mod layer;
-#[allow(clippy::module_inception)]
-mod rate;
-pub(crate) mod service;
-
-pub(crate) use self::error::RateLimited;
-pub(crate) use self::layer::RateLimitLayer;
-pub(crate) use self::rate::Rate;
-pub(crate) use self::service::RateLimit;
diff --git a/apollo-router/src/plugins/traffic_shaping/rate/rate.rs b/apollo-router/src/plugins/traffic_shaping/rate/rate.rs
deleted file mode 100644
index eb73f74f10..0000000000
--- a/apollo-router/src/plugins/traffic_shaping/rate/rate.rs
+++ /dev/null
@@ -1,33 +0,0 @@
-use std::num::NonZeroU64;
-use std::time::Duration;
-
-/// A rate of requests per time period.
-#[derive(Debug, Copy, Clone)]
-pub(crate) struct Rate {
-    num: u64,
-    per: Duration,
-}
-
-impl Rate {
-    /// Create a new rate.
-    ///
-    /// # Panics
-    ///
-    /// This function panics if `num` or `per` is 0.
-    pub(crate) fn new(num: NonZeroU64, per: Duration) -> Self {
-        assert!(per > Duration::default());
-
-        Rate {
-            num: num.into(),
-            per,
-        }
-    }
-
-    pub(crate) fn num(&self) -> u64 {
-        self.num
-    }
-
-    pub(crate) fn per(&self) -> Duration {
-        self.per
-    }
-}
diff --git a/apollo-router/src/plugins/traffic_shaping/rate/service.rs b/apollo-router/src/plugins/traffic_shaping/rate/service.rs
deleted file mode 100644
index f826723b0a..0000000000
--- a/apollo-router/src/plugins/traffic_shaping/rate/service.rs
+++ /dev/null
@@ -1,83 +0,0 @@
-use std::sync::atomic::AtomicU64;
-use std::sync::atomic::AtomicUsize;
-use std::sync::atomic::Ordering;
-use std::sync::Arc;
-use std::task::Context;
-use std::task::Poll;
-use std::time::SystemTime;
-use std::time::UNIX_EPOCH;
-
-use futures::ready;
-use tower::Service;
-
-use super::future::ResponseFuture;
-use super::Rate;
-use crate::plugins::traffic_shaping::rate::error::RateLimited;
-
-#[derive(Debug, Clone)]
-pub(crate) struct RateLimit<T> {
-    pub(crate) inner: T,
-    pub(crate) rate: Rate,
-    /// We're using an atomic u64 because it's basically a timestamp in milliseconds for the start of the window
-    /// Instead of using an Instant which is not thread safe we're using an atomic u64
-    /// It's ok to have an u64 because we just care about milliseconds for this use case
-    pub(crate) window_start: Arc<AtomicU64>,
-    pub(crate) previous_nb_requests: Arc<AtomicUsize>,
-    pub(crate) current_nb_requests: Arc<AtomicUsize>,
-}
-
-impl<S, Request> Service<Request> for RateLimit<S>
-where
-    S: Service<Request>,
-    S::Error: Into<tower::BoxError>,
-{
-    type Response = S::Response;
-    type Error = tower::BoxError;
-    type Future = ResponseFuture<S::Future>;
-
-    fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
-        let time_unit = self.rate.per().as_millis() as u64;
-
-        let updated =
-            self.window_start
-                .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |window_start| {
-                    let duration_now = SystemTime::now()
-                        .duration_since(UNIX_EPOCH)
-                        .expect("system time must be after EPOCH")
-                        .as_millis() as u64;
-                    if duration_now - window_start > self.rate.per().as_millis() as u64 {
-                        Some(duration_now)
-                    } else {
-                        None
-                    }
-                });
-        // If it has been updated
-        if let Ok(_updated_window_start) = updated {
-            self.previous_nb_requests.swap(
-                self.current_nb_requests.load(Ordering::SeqCst),
-                Ordering::SeqCst,
-            );
-            self.current_nb_requests.swap(1, Ordering::SeqCst);
-        }
-
-        let estimated_cap = (self.previous_nb_requests.load(Ordering::SeqCst)
-            * (time_unit
-                .checked_sub(self.window_start.load(Ordering::SeqCst))
-                .unwrap_or_default()
-                / time_unit) as usize)
-            + self.current_nb_requests.load(Ordering::SeqCst);
-
-        if estimated_cap as u64 > self.rate.num() {
-            tracing::trace!("rate limit exceeded; sleeping.");
-            return Poll::Ready(Err(RateLimited::new().into()));
-        }
-
-        self.current_nb_requests.fetch_add(1, Ordering::SeqCst);
-
-        Poll::Ready(ready!(self.inner.poll_ready(cx)).map_err(Into::into))
-    }
-
-    fn call(&mut self, request: Request) -> Self::Future {
-        ResponseFuture::new(self.inner.call(request))
-    }
-}
diff --git a/apollo-router/src/plugins/traffic_shaping/timeout/error.rs b/apollo-router/src/plugins/traffic_shaping/timeout/error.rs
deleted file mode 100644
index 38e36dc8ad..0000000000
--- a/apollo-router/src/plugins/traffic_shaping/timeout/error.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-//! Error types
-
-use std::error;
-use std::fmt;
-
-use crate::graphql;
-
-/// The timeout elapsed.
-#[derive(Debug, Default)]
-pub(crate) struct Elapsed;
-
-impl Elapsed {
-    /// Construct a new elapsed error
-    pub(crate) fn new() -> Self {
-        Elapsed {}
-    }
-}
-
-impl fmt::Display for Elapsed {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.pad("request timed out")
-    }
-}
-
-impl From<Elapsed> for graphql::Error {
-    fn from(_: Elapsed) -> Self {
-        graphql::Error::builder()
-            .message(String::from("Request timed out"))
-            .extension_code("REQUEST_TIMEOUT")
-            .build()
-    }
-}
-
-impl error::Error for Elapsed {}
diff --git a/apollo-router/src/plugins/traffic_shaping/timeout/future.rs b/apollo-router/src/plugins/traffic_shaping/timeout/future.rs
deleted file mode 100644
index eda4100198..0000000000
--- a/apollo-router/src/plugins/traffic_shaping/timeout/future.rs
+++ /dev/null
@@ -1,61 +0,0 @@
-//! Future types
-
-use std::future::Future;
-use std::pin::Pin;
-use std::task::Context;
-use std::task::Poll;
-
-use pin_project_lite::pin_project;
-use tokio::time::Sleep;
-
-use super::error::Elapsed;
-
-pin_project! {
-    /// [`Timeout`] response future
-    ///
-    /// [`Timeout`]: crate::timeout::Timeout
-    #[derive(Debug)]
-    pub(crate) struct ResponseFuture<T> {
-        #[pin]
-        response: T,
-        #[pin]
-        sleep: Pin<Box<Sleep>>,
-    }
-}
-
-impl<T> ResponseFuture<T> {
-    pub(crate) fn new(response: T, sleep: Pin<Box<Sleep>>) -> Self {
-        ResponseFuture { response, sleep }
-    }
-}
-
-impl<F, T, E> Future for ResponseFuture<F>
-where
-    F: Future<Output = Result<T, E>>,
-    E: Into<tower::BoxError>,
-{
-    type Output = Result<T, tower::BoxError>;
-
-    fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
-        let mut this = self.project();
-
-        // First, try polling the future
-        match this.response.poll(cx) {
-            Poll::Ready(v) => return Poll::Ready(v.map_err(Into::into)),
-            Poll::Pending => {}
-        }
-
-        // Now check the sleep
-        match Pin::new(&mut this.sleep).poll(cx) {
-            Poll::Pending => Poll::Pending,
-            Poll::Ready(_) => {
-                u64_counter!(
-                    "apollo_router_timeout",
-                    "Number of timed out client requests",
-                    1
-                );
-                Poll::Ready(Err(Elapsed::new().into()))
-            }
-        }
-    }
-}
diff --git a/apollo-router/src/plugins/traffic_shaping/timeout/layer.rs b/apollo-router/src/plugins/traffic_shaping/timeout/layer.rs
deleted file mode 100644
index fd1b5ea59e..0000000000
--- a/apollo-router/src/plugins/traffic_shaping/timeout/layer.rs
+++ /dev/null
@@ -1,26 +0,0 @@
-use std::time::Duration;
-
-use tower::Layer;
-
-use super::Timeout;
-
-/// Applies a timeout to requests via the supplied inner service.
-#[derive(Debug, Clone)]
-pub(crate) struct TimeoutLayer {
-    timeout: Duration,
-}
-
-impl TimeoutLayer {
-    /// Create a timeout from a duration
-    pub(crate) fn new(timeout: Duration) -> Self {
-        TimeoutLayer { timeout }
-    }
-}
-
-impl<S: Clone> Layer<S> for TimeoutLayer {
-    type Service = Timeout<S>;
-
-    fn layer(&self, service: S) -> Self::Service {
-        Timeout::new(service, self.timeout)
-    }
-}
diff --git a/apollo-router/src/plugins/traffic_shaping/timeout/mod.rs b/apollo-router/src/plugins/traffic_shaping/timeout/mod.rs
deleted file mode 100644
index 6b7cb9abce..0000000000
--- a/apollo-router/src/plugins/traffic_shaping/timeout/mod.rs
+++ /dev/null
@@ -1,60 +0,0 @@
-//! This is a modified Timeout service copy/pasted from the tower codebase.
-//! This Timeout is also checking if we do not timeout on the `poll_ready` and not only on the `call` part
-//! Middleware that applies a timeout to requests.
-//!
-//! If the response does not complete within the specified timeout, the response
-//! will be aborted.
-
-pub(crate) mod error;
-pub(crate) mod future;
-mod layer;
-
-use std::task::Context;
-use std::task::Poll;
-use std::time::Duration;
-
-use tower::util::Oneshot;
-use tower::Service;
-use tower::ServiceExt;
-
-use self::future::ResponseFuture;
-pub(crate) use self::layer::TimeoutLayer;
-pub(crate) use crate::plugins::traffic_shaping::timeout::error::Elapsed;
-
-/// Applies a timeout to requests.
-#[derive(Debug, Clone)]
-pub(crate) struct Timeout<T: Clone> {
-    inner: T,
-    timeout: Duration,
-}
-
-// ===== impl Timeout =====
-
-impl<T: Clone> Timeout<T> {
-    /// Creates a new [`Timeout`]
-    pub(crate) fn new(inner: T, timeout: Duration) -> Self {
-        Timeout { inner, timeout }
-    }
-}
-
-impl<S, Request> Service<Request> for Timeout<S>
-where
-    S: Service<Request> + Clone,
-    S::Error: Into<tower::BoxError>,
-{
-    type Response = S::Response;
-    type Error = tower::BoxError;
-    type Future = ResponseFuture<Oneshot<S, Request>>;
-
-    fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
-        Poll::Ready(Ok(()))
-    }
-
-    fn call(&mut self, request: Request) -> Self::Future {
-        let service = self.inner.clone();
-
-        let response = service.oneshot(request);
-
-        ResponseFuture::new(response, Box::pin(tokio::time::sleep(self.timeout)))
-    }
-}
diff --git a/apollo-router/src/query_planner/query_planner_service.rs b/apollo-router/src/query_planner/query_planner_service.rs
index 68e57c62f6..bc7f441290 100644
--- a/apollo-router/src/query_planner/query_planner_service.rs
+++ b/apollo-router/src/query_planner/query_planner_service.rs
@@ -1119,6 +1119,7 @@ mod tests {
                         Ok(subgraph::Response::builder()
                             .extensions(crate::json_ext::Object::new())
                             .context(request.context)
+                            .subgraph_name(String::default())
                             .build())
                     }
                 })
diff --git a/apollo-router/src/query_planner/tests.rs b/apollo-router/src/query_planner/tests.rs
index babf395514..3128c5eb6c 100644
--- a/apollo-router/src/query_planner/tests.rs
+++ b/apollo-router/src/query_planner/tests.rs
@@ -77,7 +77,6 @@ fn service_usage() {
 /// The query planner reports the failed subgraph fetch as an error with a reason of "service
 /// closed", which is what this test expects.
 #[tokio::test]
-#[should_panic(expected = "this panic should be propagated to the test harness")]
 async fn mock_subgraph_service_withf_panics_should_be_reported_as_service_closed() {
     let query_plan: QueryPlan = QueryPlan {
         root: serde_json::from_str(test_query_plan!()).unwrap(),
@@ -93,9 +92,7 @@ async fn mock_subgraph_service_withf_panics_should_be_reported_as_service_closed
     };
 
     let mut mock_products_service = plugin::test::MockSubgraphService::new();
-    mock_products_service.expect_call().times(1).withf(|_| {
-        panic!("this panic should be propagated to the test harness");
-    });
+    // This clone happens in the `MakeSubgraphService` impl for MockSubgraphService.
     mock_products_service.expect_clone().return_once(|| {
         let mut mock_products_service = plugin::test::MockSubgraphService::new();
         mock_products_service.expect_call().times(1).withf(|_| {
@@ -107,16 +104,17 @@ async fn mock_subgraph_service_withf_panics_should_be_reported_as_service_closed
     let (sender, _) = tokio::sync::mpsc::channel(10);
 
     let schema = Arc::new(Schema::parse(test_schema!(), &Default::default()).unwrap());
+    let ssf = SubgraphServiceFactory::new(
+        vec![(
+            "product".into(),
+            Arc::new(mock_products_service) as Arc<dyn MakeSubgraphService>,
+        )],
+        Default::default(),
+    );
     let sf = Arc::new(FetchServiceFactory::new(
         schema.clone(),
         Default::default(),
-        Arc::new(SubgraphServiceFactory {
-            services: Arc::new(HashMap::from([(
-                "product".into(),
-                Arc::new(mock_products_service) as Arc<dyn MakeSubgraphService>,
-            )])),
-            plugins: Default::default(),
-        }),
+        Arc::new(ssf),
         None,
         Arc::new(ConnectorServiceFactory::empty(schema.clone())),
     ));
@@ -138,7 +136,7 @@ async fn mock_subgraph_service_withf_panics_should_be_reported_as_service_closed
     let reason: String =
         serde_json_bytes::from_value(result.errors[0].extensions.get("reason").unwrap().clone())
             .unwrap();
-    assert_eq!(reason, "service closed".to_string());
+    assert_eq!(reason, "buffer's worker closed unexpectedly".to_string());
 }
 
 #[tokio::test]
@@ -178,16 +176,17 @@ async fn fetch_includes_operation_name() {
     let (sender, _) = tokio::sync::mpsc::channel(10);
 
     let schema = Arc::new(Schema::parse(test_schema!(), &Default::default()).unwrap());
+    let ssf = SubgraphServiceFactory::new(
+        vec![(
+            "product".into(),
+            Arc::new(mock_products_service) as Arc<dyn MakeSubgraphService>,
+        )],
+        Default::default(),
+    );
     let sf = Arc::new(FetchServiceFactory::new(
         schema.clone(),
         Default::default(),
-        Arc::new(SubgraphServiceFactory {
-            services: Arc::new(HashMap::from([(
-                "product".into(),
-                Arc::new(mock_products_service) as Arc<dyn MakeSubgraphService>,
-            )])),
-            plugins: Default::default(),
-        }),
+        Arc::new(ssf),
         None,
         Arc::new(ConnectorServiceFactory::empty(schema.clone())),
     ));
@@ -246,16 +245,17 @@ async fn fetch_makes_post_requests() {
     let (sender, _) = tokio::sync::mpsc::channel(10);
 
     let schema = Arc::new(Schema::parse(test_schema!(), &Default::default()).unwrap());
+    let ssf = SubgraphServiceFactory::new(
+        vec![(
+            "product".into(),
+            Arc::new(mock_products_service) as Arc<dyn MakeSubgraphService>,
+        )],
+        Default::default(),
+    );
     let sf = Arc::new(FetchServiceFactory::new(
         schema.clone(),
         Default::default(),
-        Arc::new(SubgraphServiceFactory {
-            services: Arc::new(HashMap::from([(
-                "product".into(),
-                Arc::new(mock_products_service) as Arc<dyn MakeSubgraphService>,
-            )])),
-            plugins: Default::default(),
-        }),
+        Arc::new(ssf),
         None,
         Arc::new(ConnectorServiceFactory::empty(schema.clone())),
     ));
@@ -401,22 +401,23 @@ async fn defer() {
 
     let schema = include_str!("testdata/defer_schema.graphql");
     let schema = Arc::new(Schema::parse(schema, &Default::default()).unwrap());
+    let ssf = SubgraphServiceFactory::new(
+        vec![
+            (
+                "X".into(),
+                Arc::new(mock_x_service) as Arc<dyn MakeSubgraphService>,
+            ),
+            (
+                "Y".into(),
+                Arc::new(mock_y_service) as Arc<dyn MakeSubgraphService>,
+            ),
+        ],
+        Default::default(),
+    );
     let sf = Arc::new(FetchServiceFactory::new(
         schema.clone(),
         Default::default(),
-        Arc::new(SubgraphServiceFactory {
-            services: Arc::new(HashMap::from([
-                (
-                    "X".into(),
-                    Arc::new(mock_x_service) as Arc<dyn MakeSubgraphService>,
-                ),
-                (
-                    "Y".into(),
-                    Arc::new(mock_y_service) as Arc<dyn MakeSubgraphService>,
-                ),
-            ])),
-            plugins: Default::default(),
-        }),
+        Arc::new(ssf),
         None,
         Arc::new(ConnectorServiceFactory::empty(schema.clone())),
     ));
@@ -516,16 +517,17 @@ async fn defer_if_condition() {
     let (sender, receiver) = tokio::sync::mpsc::channel(10);
     let mut receiver_stream = ReceiverStream::new(receiver);
 
+    let ssf = SubgraphServiceFactory::new(
+        vec![(
+            "accounts".into(),
+            Arc::new(mocked_accounts) as Arc<dyn MakeSubgraphService>,
+        )],
+        Default::default(),
+    );
     let service_factory = Arc::new(FetchServiceFactory::new(
         schema.clone(),
         Default::default(),
-        Arc::new(SubgraphServiceFactory {
-            services: Arc::new(HashMap::from([(
-                "accounts".into(),
-                Arc::new(mocked_accounts) as Arc<dyn MakeSubgraphService>,
-            )])),
-            plugins: Default::default(),
-        }),
+        Arc::new(ssf),
         None,
         Arc::new(ConnectorServiceFactory::empty(schema.clone())),
     ));
@@ -673,25 +675,29 @@ async fn dependent_mutations() {
 
     // the first fetch returned null, so there should never be a call to B
     let mut mock_b_service = plugin::test::MockSubgraphService::new();
+    mock_b_service
+        .expect_clone()
+        .returning(plugin::test::MockSubgraphService::new);
     mock_b_service.expect_call().never();
 
     let schema = Arc::new(Schema::parse(schema, &Default::default()).unwrap());
+    let ssf = SubgraphServiceFactory::new(
+        vec![
+            (
+                "A".into(),
+                Arc::new(mock_a_service) as Arc<dyn MakeSubgraphService>,
+            ),
+            (
+                "B".into(),
+                Arc::new(mock_b_service) as Arc<dyn MakeSubgraphService>,
+            ),
+        ],
+        Default::default(),
+    );
     let sf = Arc::new(FetchServiceFactory::new(
         schema.clone(),
         Default::default(),
-        Arc::new(SubgraphServiceFactory {
-            services: Arc::new(HashMap::from([
-                (
-                    "A".into(),
-                    Arc::new(mock_a_service) as Arc<dyn MakeSubgraphService>,
-                ),
-                (
-                    "B".into(),
-                    Arc::new(mock_b_service) as Arc<dyn MakeSubgraphService>,
-                ),
-            ])),
-            plugins: Default::default(),
-        }),
+        Arc::new(ssf),
         None,
         Arc::new(ConnectorServiceFactory::empty(schema.clone())),
     ));
diff --git a/apollo-router/src/router_factory.rs b/apollo-router/src/router_factory.rs
index 54c2c85922..311f77f44e 100644
--- a/apollo-router/src/router_factory.rs
+++ b/apollo-router/src/router_factory.rs
@@ -39,7 +39,6 @@ use crate::services::layers::query_analysis::QueryAnalysisLayer;
 use crate::services::new_service::ServiceFactory;
 use crate::services::router;
 use crate::services::router::service::RouterCreator;
-use crate::services::subgraph;
 use crate::services::HasConfig;
 use crate::services::HasSchema;
 use crate::services::PluggableSupergraphServiceBuilder;
@@ -359,46 +358,21 @@ pub(crate) async fn create_subgraph_services(
     http_service_factory: &IndexMap<String, HttpClientServiceFactory>,
     plugins: &Arc<Plugins>,
     configuration: &Configuration,
-) -> Result<
-    IndexMap<
-        String,
-        impl Service<
-                subgraph::Request,
-                Response = subgraph::Response,
-                Error = BoxError,
-                Future = crate::plugins::traffic_shaping::TrafficShapingSubgraphFuture<
-                    SubgraphService,
-                >,
-            > + Clone
-            + Send
-            + Sync
-            + 'static,
-    >,
-    BoxError,
-> {
+) -> Result<IndexMap<String, SubgraphService>, BoxError> {
     let subscription_plugin_conf = plugins
         .iter()
         .find(|i| i.0.as_str() == APOLLO_SUBSCRIPTION_PLUGIN)
         .and_then(|plugin| (*plugin.1).as_any().downcast_ref::<Subscription>())
         .map(|p| p.config.clone());
 
-    let shaping = plugins
-        .iter()
-        .find(|i| i.0.as_str() == APOLLO_TRAFFIC_SHAPING)
-        .and_then(|plugin| (*plugin.1).as_any().downcast_ref::<TrafficShaping>())
-        .expect("traffic shaping should always be part of the plugin list");
-
     let mut subgraph_services = IndexMap::default();
     for (name, http_service_factory) in http_service_factory.iter() {
-        let subgraph_service = shaping.subgraph_service_internal(
-            name.as_ref(),
-            SubgraphService::from_config(
-                name.clone(),
-                configuration,
-                subscription_plugin_conf.clone(),
-                http_service_factory.clone(),
-            )?,
-        );
+        let subgraph_service = SubgraphService::from_config(
+            name.clone(),
+            configuration,
+            subscription_plugin_conf.clone(),
+            http_service_factory.clone(),
+        )?;
         subgraph_services.insert(name.clone(), subgraph_service);
     }
 
diff --git a/apollo-router/src/services/fetch_service.rs b/apollo-router/src/services/fetch_service.rs
index 6a852cf22e..337d736de1 100644
--- a/apollo-router/src/services/fetch_service.rs
+++ b/apollo-router/src/services/fetch_service.rs
@@ -350,7 +350,7 @@ impl FetchService {
         };
 
         let service = subgraph_service_factory
-            .create(&service_name.clone())
+            .create(&service_name)
             .expect("we already checked that the service exists during planning; qed");
 
         let uri = schema
diff --git a/apollo-router/src/services/hickory_dns_connector.rs b/apollo-router/src/services/hickory_dns_connector.rs
index 5b7573f672..c553e582e7 100644
--- a/apollo-router/src/services/hickory_dns_connector.rs
+++ b/apollo-router/src/services/hickory_dns_connector.rs
@@ -3,6 +3,7 @@ use std::io;
 use std::net::SocketAddr;
 use std::net::ToSocketAddrs;
 use std::pin::Pin;
+use std::sync::Arc;
 use std::task::Context;
 use std::task::Poll;
 
@@ -22,7 +23,7 @@ use crate::configuration::shared::DnsResolutionStrategy;
 /// the background task is also created, it needs to be spawned on top of an executor before using the client,
 /// or dns requests will block.
 #[derive(Debug, Clone)]
-pub(crate) struct AsyncHyperResolver(TokioAsyncResolver);
+pub(crate) struct AsyncHyperResolver(Arc<TokioAsyncResolver>);
 
 impl AsyncHyperResolver {
     /// constructs a new resolver from default configuration, using [read_system_conf](https://docs.rs/hickory-resolver/0.24.1/hickory_resolver/system_conf/fn.read_system_conf.html)
@@ -32,7 +33,7 @@ impl AsyncHyperResolver {
         let (config, mut options) = read_system_conf()?;
         options.ip_strategy = dns_resolution_strategy.into();
 
-        Ok(Self(TokioAsyncResolver::tokio(config, options)))
+        Ok(Self(Arc::new(TokioAsyncResolver::tokio(config, options))))
     }
 }
 
diff --git a/apollo-router/src/services/http/service.rs b/apollo-router/src/services/http/service.rs
index cd282f6fd0..7e25b0deb0 100644
--- a/apollo-router/src/services/http/service.rs
+++ b/apollo-router/src/services/http/service.rs
@@ -248,11 +248,23 @@ impl tower::Service<HttpRequest> for HttpClientService {
 
         #[cfg(unix)]
         let client = match schema_uri.scheme().map(|s| s.as_str()) {
-            Some("unix") => Either::Right(self.unix_client.clone()),
-            _ => Either::Left(self.http_client.clone()),
+            Some("unix") => {
+                // Because we clone our inner service, we'd better swap the readied one
+                let clone = self.unix_client.clone();
+                Either::Right(std::mem::replace(&mut self.unix_client, clone))
+            }
+            _ => {
+                // Because we clone our inner service, we'd better swap the readied one
+                let clone = self.http_client.clone();
+                Either::Left(std::mem::replace(&mut self.http_client, clone))
+            }
         };
         #[cfg(not(unix))]
-        let client = self.http_client.clone();
+        let client = {
+            // Because we clone our inner service, we'd better swap the readied one
+            let clone = self.http_client.clone();
+            std::mem::replace(&mut self.http_client, clone)
+        };
 
         let service_name = self.service.clone();
 
diff --git a/apollo-router/src/services/layers/allow_only_http_post_mutations.rs b/apollo-router/src/services/layers/allow_only_http_post_mutations.rs
index 6c737ec76e..c8dd160bd4 100644
--- a/apollo-router/src/services/layers/allow_only_http_post_mutations.rs
+++ b/apollo-router/src/services/layers/allow_only_http_post_mutations.rs
@@ -1,6 +1,4 @@
-//! Prevent mutations if the HTTP method is GET.
-//!
-//! See [`Layer`] and [`Service`] for more details.
+//! A supergraph service layer that requires that GraphQL mutations use the HTTP POST method.
 
 use std::ops::ControlFlow;
 
@@ -18,11 +16,19 @@ use tower::ServiceBuilder;
 use super::query_analysis::ParsedDocument;
 use crate::graphql::Error;
 use crate::json_ext::Object;
-use crate::layers::async_checkpoint::OneShotAsyncCheckpointService;
+use crate::layers::async_checkpoint::AsyncCheckpointService;
 use crate::layers::ServiceBuilderExt;
 use crate::services::SupergraphRequest;
 use crate::services::SupergraphResponse;
 
+/// A supergraph service layer that requires that GraphQL mutations use the HTTP POST method.
+///
+/// Responds with a 405 Method Not Allowed if it receives a GraphQL mutation using any other HTTP
+/// method.
+///
+/// This layer requires that a ParsedDocument is available on the context and that the request has
+/// a valid GraphQL operation and operation name. If these conditions are not met the layer will
+/// return early with an unspecified error response.
 #[derive(Default)]
 pub(crate) struct AllowOnlyHttpPostMutationsLayer {}
 
@@ -34,7 +40,7 @@ where
         + 'static,
     <S as Service<SupergraphRequest>>::Future: Send + 'static,
 {
-    type Service = OneShotAsyncCheckpointService<
+    type Service = AsyncCheckpointService<
         S,
         BoxFuture<'static, Result<ControlFlow<SupergraphResponse, SupergraphRequest>, BoxError>>,
         SupergraphRequest,
@@ -42,7 +48,7 @@ where
 
     fn layer(&self, service: S) -> Self::Service {
         ServiceBuilder::new()
-            .oneshot_checkpoint_async(|req: SupergraphRequest| {
+            .checkpoint_async(|req: SupergraphRequest| {
                 Box::pin(async {
                     if req.supergraph_request.method() == Method::POST {
                         return Ok(ControlFlow::Continue(req));
@@ -54,6 +60,9 @@ where
                         .with_lock(|lock| lock.get::<ParsedDocument>().cloned())
                     {
                         None => {
+                            // We shouldn't ever reach here unless the pipeline was set up
+                            // improperly (i.e. programmer error), but do something better than
+                            // panicking just in case.
                             let errors = vec![Error::builder()
                                 .message("Cannot find executable document".to_string())
                                 .extension_code("MISSING_EXECUTABLE_DOCUMENT")
@@ -77,6 +86,8 @@ where
 
                     match op {
                         Err(_) => {
+                            // We shouldn't end up here if the request is valid, and validation
+                            // should happen well before this, but do something just in case.
                             let errors = vec![Error::builder()
                                 .message("Cannot find operation".to_string())
                                 .extension_code("MISSING_OPERATION")
@@ -143,6 +154,10 @@ mod forbid_http_get_mutations_tests {
     async fn it_lets_http_post_queries_pass_through() {
         let mut mock_service = MockSupergraphService::new();
 
+        mock_service
+            .expect_clone()
+            .returning(MockSupergraphService::new);
+
         mock_service
             .expect_call()
             .times(1)
@@ -166,6 +181,10 @@ mod forbid_http_get_mutations_tests {
     async fn it_lets_http_post_mutations_pass_through() {
         let mut mock_service = MockSupergraphService::new();
 
+        mock_service
+            .expect_clone()
+            .returning(MockSupergraphService::new);
+
         mock_service
             .expect_call()
             .times(1)
@@ -189,6 +208,10 @@ mod forbid_http_get_mutations_tests {
     async fn it_lets_http_get_queries_pass_through() {
         let mut mock_service = MockSupergraphService::new();
 
+        mock_service
+            .expect_clone()
+            .returning(MockSupergraphService::new);
+
         mock_service
             .expect_call()
             .times(1)
@@ -238,7 +261,12 @@ mod forbid_http_get_mutations_tests {
         .map(|method| create_request(method, OperationKind::Mutation));
 
         for request in forbidden_requests {
-            let mock_service = MockSupergraphService::new();
+            let mut mock_service = MockSupergraphService::new();
+
+            mock_service
+                .expect_clone()
+                .returning(MockSupergraphService::new);
+
             let mut service_stack = AllowOnlyHttpPostMutationsLayer::default().layer(mock_service);
             let services = service_stack.ready().await.unwrap();
 
diff --git a/apollo-router/src/services/layers/apq.rs b/apollo-router/src/services/layers/apq.rs
index cb0c2ba5be..874f3d4852 100644
--- a/apollo-router/src/services/layers/apq.rs
+++ b/apollo-router/src/services/layers/apq.rs
@@ -1,7 +1,7 @@
-//!  (A)utomatic (P)ersisted (Q)ueries cache.
+//! (A)utomatic (P)ersisted (Q)ueries cache.
 //!
-//!  For more information on APQ see:
-//!  <https://www.apollographql.com/docs/apollo-server/performance/apq/>
+//! For more information on APQ see:
+//! <https://www.apollographql.com/docs/apollo-server/performance/apq/>
 
 use http::header::CACHE_CONTROL;
 use http::HeaderValue;
@@ -49,7 +49,7 @@ impl PersistedQuery {
     }
 }
 
-/// [`Layer`] for APQ implementation.
+/// A layer-like type implementing Automatic Persisted Queries.
 #[derive(Clone)]
 pub(crate) struct APQLayer {
     /// set to None if APQ is disabled
@@ -73,6 +73,20 @@ impl APQLayer {
         Self { cache: None }
     }
 
+    /// Supergraph service implementation for Automatic Persisted Queries.
+    ///
+    /// For more information about APQ:
+    /// https://www.apollographql.com/docs/apollo-server/performance/apq.
+    ///
+    /// If APQ is disabled, it rejects requests that try to use a persisted query hash.
+    /// If APQ is enabled, requests using APQ will populate the cache and use the cache as needed,
+    /// see [`apq_request`] for details.
+    ///
+    /// This must happen before GraphQL query parsing.
+    ///
+    /// This functions similarly to a checkpoint service, short-circuiting the pipeline on error
+    /// (using an `Err()` return value).
+    /// The user of this function is responsible for propagating short-circuiting.
     pub(crate) async fn supergraph_request(
         &self,
         request: SupergraphRequest,
@@ -84,6 +98,15 @@ impl APQLayer {
     }
 }
 
+/// Used when APQ is enabled.
+///
+/// If the request contains a hash and a query string, that query is added to the APQ cache.
+/// Then, the client can submit only the hash and not the query string on subsequent requests.
+/// The request is rejected if the hash does not match the query string.
+///
+/// If the request contains only a hash, attempts to read the query from the APQ cache, and
+/// populates the query string in the request body.
+/// The request is rejected if the hash is not present in the cache.
 async fn apq_request(
     cache: &DeduplicatingCache<String, String>,
     mut request: SupergraphRequest,
@@ -182,6 +205,7 @@ pub(crate) fn calculate_hash_for_query(query: &str) -> String {
     hex::encode(hasher.finalize())
 }
 
+/// Used when APQ is disabled. Rejects requests that try to use a persisted query hash anyways.
 async fn disabled_apq_request(
     request: SupergraphRequest,
 ) -> Result<SupergraphRequest, SupergraphResponse> {
@@ -221,6 +245,7 @@ mod apq_tests {
     use http::StatusCode;
     use serde_json_bytes::json;
     use tower::Service;
+    use tower::ServiceExt;
 
     use super::*;
     use crate::error::Error;
@@ -283,7 +308,13 @@ mod apq_tests {
             .expect("expecting valid request")
             .try_into()
             .unwrap();
-        let apq_response = router_service.call(hash_only).await.unwrap();
+        let apq_response = router_service
+            .ready()
+            .await
+            .expect("readied")
+            .call(hash_only)
+            .await
+            .unwrap();
 
         // make sure clients won't cache apq missed response
         assert_eq!(
@@ -310,7 +341,13 @@ mod apq_tests {
             .try_into()
             .unwrap();
 
-        let full_response = router_service.call(with_query).await.unwrap();
+        let full_response = router_service
+            .ready()
+            .await
+            .expect("readied")
+            .call(with_query)
+            .await
+            .unwrap();
 
         // the cache control header shouldn't have been tampered with
         assert!(full_response
@@ -331,7 +368,13 @@ mod apq_tests {
             .try_into()
             .unwrap();
 
-        let apq_response = router_service.call(second_hash_only).await.unwrap();
+        let apq_response = router_service
+            .ready()
+            .await
+            .expect("readied")
+            .call(second_hash_only)
+            .await
+            .unwrap();
 
         // the cache control header shouldn't have been tampered with
         assert!(apq_response.response.headers().get(CACHE_CONTROL).is_none());
@@ -408,6 +451,9 @@ mod apq_tests {
 
         // This apq call will miss the APQ cache
         let apq_error = router_service
+            .ready()
+            .await
+            .expect("readied")
             .call(hash_only)
             .await
             .unwrap()
@@ -421,7 +467,13 @@ mod apq_tests {
         assert_error_matches(&expected_apq_miss_error, apq_error);
 
         // sha256 is wrong, apq insert won't happen
-        let insert_failed_response = router_service.call(with_query).await.unwrap();
+        let insert_failed_response = router_service
+            .ready()
+            .await
+            .expect("readied")
+            .call(with_query)
+            .await
+            .unwrap();
 
         assert_eq!(
             StatusCode::BAD_REQUEST,
@@ -448,6 +500,9 @@ mod apq_tests {
 
         // apq insert failed, this call will miss
         let second_apq_error = router_service
+            .ready()
+            .await
+            .expect("readied")
             .call(second_hash_only)
             .await
             .unwrap()
@@ -499,7 +554,13 @@ mod apq_tests {
             .expect("expecting valid request")
             .try_into()
             .unwrap();
-        let apq_response = router_service.call(hash_only).await.unwrap();
+        let apq_response = router_service
+            .ready()
+            .await
+            .expect("readied")
+            .call(hash_only)
+            .await
+            .unwrap();
 
         let apq_error = apq_response
             .into_graphql_response_stream()
@@ -520,7 +581,13 @@ mod apq_tests {
             .try_into()
             .unwrap();
 
-        let with_query_response = router_service.call(with_query).await.unwrap();
+        let with_query_response = router_service
+            .ready()
+            .await
+            .expect("readied")
+            .call(with_query)
+            .await
+            .unwrap();
 
         let apq_error = with_query_response
             .into_graphql_response_stream()
@@ -540,7 +607,13 @@ mod apq_tests {
             .try_into()
             .unwrap();
 
-        let without_apq_response = router_service.call(without_apq).await.unwrap();
+        let without_apq_response = router_service
+            .ready()
+            .await
+            .expect("readied")
+            .call(without_apq)
+            .await
+            .unwrap();
 
         let without_apq_graphql_response = without_apq_response
             .into_graphql_response_stream()
diff --git a/apollo-router/src/services/layers/content_negotiation.rs b/apollo-router/src/services/layers/content_negotiation.rs
index 5d7a83071d..7dafde6d02 100644
--- a/apollo-router/src/services/layers/content_negotiation.rs
+++ b/apollo-router/src/services/layers/content_negotiation.rs
@@ -1,3 +1,7 @@
+//! Layers that do HTTP content negotiation using the Accept and Content-Type headers.
+//!
+//! Content negotiation uses a pair of layers that work together at the router and supergraph stages.
+
 use std::ops::ControlFlow;
 
 use http::header::ACCEPT;
@@ -35,7 +39,15 @@ use crate::services::MULTIPART_SUBSCRIPTION_SPEC_PARAMETER;
 use crate::services::MULTIPART_SUBSCRIPTION_SPEC_VALUE;
 
 pub(crate) const GRAPHQL_JSON_RESPONSE_HEADER_VALUE: &str = "application/graphql-response+json";
-/// [`Layer`] for Content-Type checks implementation.
+
+/// A layer for the router service that rejects requests that do not have an expected Content-Type,
+/// or that have an Accept header that is not supported by the router.
+///
+/// In particular, the Content-Type must be JSON, and the Accept header must include */*, or one of
+/// the JSON/GraphQL MIME types.
+///
+/// # Context
+/// If the request is valid, this layer adds a [`ClientRequestAccepts`] value to the context.
 #[derive(Clone, Default)]
 pub(crate) struct RouterLayer {}
 
@@ -52,7 +64,7 @@ where
                 if req.router_request.method() != Method::GET
                     && !content_type_is_json(req.router_request.headers())
                 {
-                    let response: http::Response<crate::services::router::Body> = http::Response::builder()
+                    let response = http::Response::builder()
                         .status(StatusCode::UNSUPPORTED_MEDIA_TYPE)
                         .header(CONTENT_TYPE, APPLICATION_JSON.essence_str())
                         .body(router::body::from_bytes(
@@ -88,8 +100,10 @@ where
 
                     Ok(ControlFlow::Continue(req))
                 } else {
-                    let response: http::Response<crate::services::router::Body> = http::Response::builder().status(StatusCode::NOT_ACCEPTABLE).header(CONTENT_TYPE, APPLICATION_JSON.essence_str()).body(
-                        router::body::from_bytes(
+                    let response = http::Response::builder()
+                        .status(StatusCode::NOT_ACCEPTABLE)
+                        .header(CONTENT_TYPE, APPLICATION_JSON.essence_str())
+                        .body(router::body::from_bytes(
                             serde_json::json!({
                                 "errors": [
                                     graphql::Error::builder()
@@ -103,7 +117,9 @@ where
                                         .extension_code("INVALID_ACCEPT_HEADER")
                                         .build()
                                 ]
-                            }).to_string())).expect("cannot fail");
+                            })
+                            .to_string()
+                        )).expect("cannot fail");
 
                     Ok(ControlFlow::Break(response.into()))
                 }
@@ -113,7 +129,12 @@ where
     }
 }
 
-/// [`Layer`] for Content-Type checks implementation.
+/// A layer for the supergraph service that populates the Content-Type response header.
+///
+/// The content type is decided based on the [`ClientRequestAccepts`] context value, which is
+/// populated by the content negotiation [`RouterLayer`].
+// XXX(@goto-bus-stop): this feels a bit odd. It probably works fine because we can only ever respond
+// with JSON, but maybe this should be done as close as possible to where we populate the response body..?
 #[derive(Clone, Default)]
 pub(crate) struct SupergraphLayer {}
 
diff --git a/apollo-router/src/services/layers/persisted_queries/mod.rs b/apollo-router/src/services/layers/persisted_queries/mod.rs
index ae849a6170..e4a17f1973 100644
--- a/apollo-router/src/services/layers/persisted_queries/mod.rs
+++ b/apollo-router/src/services/layers/persisted_queries/mod.rs
@@ -1,3 +1,5 @@
+//! Implements support for persisted queries and safelisting at the supergraph service stage.
+
 mod id_extractor;
 mod manifest_poller;
 
@@ -25,8 +27,21 @@ const PERSISTED_QUERIES_CLIENT_NAME_CONTEXT_KEY: &str = "apollo_persisted_querie
 const PERSISTED_QUERIES_SAFELIST_SKIP_ENFORCEMENT_CONTEXT_KEY: &str =
     "apollo_persisted_queries::safelist::skip_enforcement";
 
+/// Marker type for request context to identify requests that were expanded from a persisted query
+/// ID.
 struct UsedQueryIdFromManifest;
 
+/// Implements persisted query support, namely expanding requests using persisted query IDs and
+/// filtering free-form GraphQL requests based on router configuration.
+///
+/// Despite the name, this is not really in any way a layer today.
+///
+/// This type actually consists of two conceptual layers that must both be applied at the supergraph
+/// service stage, at different points:
+/// - [PersistedQueryLayer::supergraph_request] must be done *before* the GraphQL request is parsed
+///    and validated.
+/// - [PersistedQueryLayer::supergraph_request_with_analyzed_query] must be done *after* the
+///    GraphQL request is parsed and validated.
 #[derive(Debug)]
 pub(crate) struct PersistedQueryLayer {
     /// Manages polling uplink for persisted queries and caches the current
@@ -62,11 +77,17 @@ impl PersistedQueryLayer {
         }
     }
 
-    /// Run a request through the layer.
+    /// Handles pre-parsing work for requests using persisted queries.
+    ///
     /// Takes care of:
     /// 1) resolving a persisted query ID to a query body
-    /// 2) matching a freeform GraphQL request against persisted queries, optionally rejecting it based on configuration
-    /// 3) continuing to the next stage of the router
+    /// 2) rejecting free-form GraphQL requests if they are never allowed by configuration.
+    ///    Matching against safelists is done later in
+    ///    [`PersistedQueryLayer::supergraph_request_with_analyzed_query`].
+    ///
+    /// This functions similarly to a checkpoint service, short-circuiting the pipeline on error
+    /// (using an `Err()` return value).
+    /// The user of this function is responsible for propagating short-circuiting.
     pub(crate) fn supergraph_request(
         &self,
         request: SupergraphRequest,
@@ -178,6 +199,16 @@ impl PersistedQueryLayer {
         }
     }
 
+    /// Handles post-GraphQL-parsing work for requests using the persisted queries feature,
+    /// in particular safelisting.
+    ///
+    /// Any request that was expanded by the [`PersistedQueryLayer::supergraph_request`] call is
+    /// passed through immediately. Free-form GraphQL is matched against safelists and rejected or
+    /// passed through based on router configuration.
+    ///
+    /// This functions similarly to a checkpoint service, short-circuiting the pipeline on error
+    /// (using an `Err()` return value).
+    /// The user of this function is responsible for propagating short-circuiting.
     pub(crate) async fn supergraph_request_with_analyzed_query(
         &self,
         request: SupergraphRequest,
@@ -500,9 +531,8 @@ mod tests {
         assert!(incoming_request.supergraph_request.body().query.is_none());
 
         let result = pq_layer.supergraph_request(incoming_request);
-        let request = result
-            .ok()
-            .expect("pq layer returned response instead of putting the query on the request");
+        let request =
+            result.expect("pq layer returned response instead of putting the query on the request");
         assert_eq!(request.supergraph_request.body().query, Some(body));
     }
 
@@ -556,7 +586,6 @@ mod tests {
 
             pq_layer
                 .supergraph_request(incoming_request)
-                .ok()
                 .expect("pq layer returned response instead of putting the query on the request")
                 .supergraph_request
                 .body()
@@ -611,9 +640,8 @@ mod tests {
         assert!(incoming_request.supergraph_request.body().query.is_none());
 
         let result = pq_layer.supergraph_request(incoming_request);
-        let request = result
-            .ok()
-            .expect("pq layer returned response instead of continuing to APQ layer");
+        let request =
+            result.expect("pq layer returned response instead of continuing to APQ layer");
         assert!(request.supergraph_request.body().query.is_none());
     }
 
@@ -755,12 +783,10 @@ mod tests {
         // the operation.
         let updated_request = pq_layer
             .supergraph_request(incoming_request)
-            .ok()
             .expect("pq layer returned error response instead of returning a request");
         query_analysis_layer
             .supergraph_request(updated_request)
             .await
-            .ok()
             .expect("QA layer returned error response instead of returning a request")
     }
 
@@ -820,7 +846,6 @@ mod tests {
         pq_layer
             .supergraph_request_with_analyzed_query(request_with_analyzed_query)
             .await
-            .ok()
             .expect("pq layer second hook returned error response instead of returning a request");
 
         let mut metric_attributes = vec![];
diff --git a/apollo-router/src/services/layers/query_analysis.rs b/apollo-router/src/services/layers/query_analysis.rs
index 12e3694afa..b5575a3979 100644
--- a/apollo-router/src/services/layers/query_analysis.rs
+++ b/apollo-router/src/services/layers/query_analysis.rs
@@ -1,3 +1,6 @@
+//! Implements GraphQL parsing/validation/usage counting of requests at the supergraph service
+//! stage.
+
 use std::collections::HashMap;
 use std::fmt::Display;
 use std::fmt::Formatter;
@@ -36,7 +39,9 @@ use crate::spec::SpecError;
 use crate::Configuration;
 use crate::Context;
 
-/// [`Layer`] for QueryAnalysis implementation.
+/// A layer-like type that handles several aspects of query parsing and analysis.
+///
+/// The supergraph layer implementation is in [QueryAnalysisLayer::supergraph_request].
 #[derive(Clone)]
 #[allow(clippy::type_complexity)]
 pub(crate) struct QueryAnalysisLayer {
@@ -75,6 +80,8 @@ impl QueryAnalysisLayer {
         }
     }
 
+    // XXX(@goto-bus-stop): This is public because query warmup uses it. I think the reason that
+    // warmup uses this instead of `Query::parse_document` directly is to use the worker pool.
     pub(crate) async fn parse_document(
         &self,
         query: &str,
@@ -107,6 +114,20 @@ impl QueryAnalysisLayer {
             .expect("Query::parse_document panicked")
     }
 
+    /// Parses the GraphQL in the supergraph request and computes Apollo usage references.
+    ///
+    /// This functions similarly to a checkpoint service, short-circuiting the pipeline on error
+    /// (using an `Err()` return value).
+    /// The user of this function is responsible for propagating short-circuiting.
+    ///
+    /// # Context
+    /// This stores values in the request context:
+    /// - [`ParsedDocument`]
+    /// - [`ExtendedReferenceStats`]
+    /// - "operation_name" and "operation_kind"
+    /// - authorization details (required scopes, policies), if any
+    /// - [`Arc`]`<`[`UsageReporting`]`>` if there was an error; normally, this would be populated
+    ///   by the caching query planner, but we do not reach that code if we fail early here.
     pub(crate) async fn supergraph_request(
         &self,
         request: SupergraphRequest,
@@ -118,7 +139,6 @@ impl QueryAnalysisLayer {
                 .message("Must provide query string.".to_string())
                 .extension_code("MISSING_QUERY_STRING")
                 .build()];
-
             return Err(SupergraphResponse::builder()
                 .errors(errors)
                 .status_code(StatusCode::BAD_REQUEST)
diff --git a/apollo-router/src/services/layers/static_page.rs b/apollo-router/src/services/layers/static_page.rs
index ec62dc7352..358f63cd49 100644
--- a/apollo-router/src/services/layers/static_page.rs
+++ b/apollo-router/src/services/layers/static_page.rs
@@ -1,7 +1,4 @@
-//!  (A)utomatic (P)ersisted (Q)ueries cache.
-//!
-//!  For more information on APQ see:
-//!  <https://www.apollographql.com/docs/apollo-server/performance/apq/>
+//! Provides the home page and sandbox page implementations.
 
 use std::ops::ControlFlow;
 
@@ -23,13 +20,16 @@ use crate::layers::sync_checkpoint::CheckpointService;
 use crate::services::router;
 use crate::Configuration;
 
-/// [`Layer`] That serves Static pages such as Homepage and Sandbox.
+/// A layer that serves a static page for all requests that accept a `text/html` response
+/// (typically a user navigating to a page in the browser).
 #[derive(Clone)]
 pub(crate) struct StaticPageLayer {
     static_page: Option<Bytes>,
 }
 
 impl StaticPageLayer {
+    /// Create a static page based on configuration: either an Apollo Sandbox, or a simple home
+    /// page.
     pub(crate) fn new(configuration: &Configuration) -> Self {
         let static_page = if configuration.sandbox.enabled {
             Some(Bytes::from(sandbox_page_content()))
@@ -57,7 +57,7 @@ where
             CheckpointService::new(
                 move |req| {
                     let res = if req.router_request.method() == Method::GET
-                        && prefers_html(req.router_request.headers())
+                        && accepts_html(req.router_request.headers())
                     {
                         let response = http::Response::builder()
                             .header(
@@ -84,7 +84,11 @@ where
     }
 }
 
-fn prefers_html(headers: &HeaderMap) -> bool {
+/// Returns true if the given header map contains an `Accept` header which contains the "text/html"
+/// MIME type.
+///
+/// `Accept` priorities or preferences are not considered.
+fn accepts_html(headers: &HeaderMap) -> bool {
     let text_html = MediaType::new(TEXT, HTML);
 
     headers.get_all(&http::header::ACCEPT).iter().any(|value| {
diff --git a/apollo-router/src/services/router.rs b/apollo-router/src/services/router.rs
index 519f2cb9ee..c1098fc053 100644
--- a/apollo-router/src/services/router.rs
+++ b/apollo-router/src/services/router.rs
@@ -145,6 +145,8 @@ impl Request {
 use displaydoc::Display;
 use thiserror::Error;
 
+use crate::context::CONTAINS_GRAPHQL_ERROR;
+
 #[derive(Error, Display, Debug)]
 pub enum ParseError {
     /// couldn't create a valid http GET uri '{0}'
@@ -241,6 +243,9 @@ impl Response {
         headers: MultiMap<TryIntoHeaderName, TryIntoHeaderValue>,
         context: Context,
     ) -> Result<Self, BoxError> {
+        if !errors.is_empty() {
+            context.insert_json_value(CONTAINS_GRAPHQL_ERROR, serde_json_bytes::Value::Bool(true));
+        }
         // Build a response
         let b = graphql::Response::builder()
             .and_label(label)
diff --git a/apollo-router/src/services/router/service.rs b/apollo-router/src/services/router/service.rs
index 1743c7d33a..53ba35394f 100644
--- a/apollo-router/src/services/router/service.rs
+++ b/apollo-router/src/services/router/service.rs
@@ -26,6 +26,7 @@ use mime::APPLICATION_JSON;
 use multimap::MultiMap;
 use opentelemetry::KeyValue;
 use opentelemetry_semantic_conventions::trace::HTTP_REQUEST_METHOD;
+use tower::buffer::Buffer;
 use tower::BoxError;
 use tower::Layer;
 use tower::ServiceBuilder;
@@ -41,9 +42,10 @@ use crate::batching::BatchQuery;
 use crate::cache::DeduplicatingCache;
 use crate::configuration::Batching;
 use crate::configuration::BatchingMode;
-use crate::context::CONTAINS_GRAPHQL_ERROR;
 use crate::graphql;
 use crate::http_ext;
+use crate::layers::ServiceBuilderExt;
+use crate::layers::DEFAULT_BUFFER_SIZE;
 #[cfg(test)]
 use crate::plugin::test::MockSupergraphService;
 use crate::plugins::telemetry::config_new::attributes::HTTP_REQUEST_BODY;
@@ -66,7 +68,6 @@ use crate::services::layers::query_analysis::QueryAnalysisLayer;
 use crate::services::layers::static_page::StaticPageLayer;
 use crate::services::new_service::ServiceFactory;
 use crate::services::router;
-#[cfg(test)]
 use crate::services::supergraph;
 use crate::services::HasPlugins;
 #[cfg(test)]
@@ -97,27 +98,32 @@ static ORIGIN_HEADER_VALUE: HeaderValue = HeaderValue::from_static("origin");
 /// Containing [`Service`] in the request lifecyle.
 #[derive(Clone)]
 pub(crate) struct RouterService {
-    supergraph_creator: Arc<SupergraphCreator>,
-    apq_layer: APQLayer,
+    apq_layer: Arc<APQLayer>,
     persisted_query_layer: Arc<PersistedQueryLayer>,
-    query_analysis_layer: QueryAnalysisLayer,
+    query_analysis_layer: Arc<QueryAnalysisLayer>,
+    // Cannot be under Arc. Batching state must be preserved for each RouterService
+    // instance
     batching: Batching,
+    supergraph_service: supergraph::BoxCloneService,
 }
 
 impl RouterService {
-    pub(crate) fn new(
-        supergraph_creator: Arc<SupergraphCreator>,
+    fn new(
+        sgb: supergraph::BoxService,
         apq_layer: APQLayer,
         persisted_query_layer: Arc<PersistedQueryLayer>,
         query_analysis_layer: QueryAnalysisLayer,
         batching: Batching,
     ) -> Self {
+        let supergraph_service: supergraph::BoxCloneService =
+            ServiceBuilder::new().buffered().service(sgb).boxed_clone();
+
         RouterService {
-            supergraph_creator,
-            apq_layer,
+            apq_layer: Arc::new(apq_layer),
             persisted_query_layer,
-            query_analysis_layer,
+            query_analysis_layer: Arc::new(query_analysis_layer),
             batching,
+            supergraph_service,
         }
     }
 }
@@ -218,30 +224,29 @@ impl Service<RouterRequest> for RouterService {
     type Error = BoxError;
     type Future = BoxFuture<'static, Result<Self::Response, Self::Error>>;
 
-    fn poll_ready(&mut self, _cx: &mut std::task::Context<'_>) -> Poll<Result<(), Self::Error>> {
-        // This service eventually calls `QueryAnalysisLayer::parse_document()`
-        // which calls `compute_job::execute()`
-        if crate::compute_job::is_full() {
-            return Poll::Pending;
-        }
-        Poll::Ready(Ok(()))
+    fn poll_ready(&mut self, cx: &mut std::task::Context<'_>) -> Poll<Result<(), Self::Error>> {
+        self.supergraph_service.poll_ready(cx)
     }
 
     fn call(&mut self, req: RouterRequest) -> Self::Future {
-        let clone = self.clone();
+        let self_clone = self.clone();
 
-        let this = std::mem::replace(self, clone);
+        let this = std::mem::replace(self, self_clone);
 
         let fut = async move { this.call_inner(req).await };
+
         Box::pin(fut)
     }
 }
 
 impl RouterService {
     async fn process_supergraph_request(
-        &self,
+        self,
         supergraph_request: SupergraphRequest,
     ) -> Result<router::Response, BoxError> {
+        // XXX(@goto-bus-stop): This code looks confusing. we are manually calling several
+        // layers with various ifs and matches, but *really*, we are calling each layer in order
+        // and handling short-circuiting.
         let mut request_res = self
             .persisted_query_layer
             .supergraph_request(supergraph_request);
@@ -260,11 +265,26 @@ impl RouterService {
                     .await
                 {
                     Err(response) => response,
-                    Ok(request) => self.supergraph_creator.create().oneshot(request).await?,
+                    Ok(request) => {
+                        // self.supergraph_service here is a clone of the service that was readied
+                        // in RouterService::poll_ready. Clones are un-ready by default, so this
+                        // self.supergraph_service is actually not ready, which is why we need to
+                        // oneshot it here. That technically breaks backpressure, but because we are
+                        // still readying the supergraph service before calling into the router
+                        // service, backpressure is actually still exerted at that point--there's
+                        // just potential for some requests to slip through the cracks and end up
+                        // queueing up at this .oneshot() call.
+                        //
+                        // Not ideal, but an improvement on the situation in Router 1.x.
+                        self.supergraph_service.oneshot(request).await?
+                    }
                 },
             },
         };
 
+        // XXX(@goto-bus-stop): *all* of the code using these `accepts_` variables looks like it
+        // duplicates what the content_negotiation::SupergraphLayer is doing. We should delete one
+        // or the other, and absolutely not do it inline here.
         let ClientRequestAccepts {
             wildcard: accepts_wildcard,
             json: accepts_json,
@@ -274,6 +294,8 @@ impl RouterService {
             .extensions()
             .with_lock(|lock| lock.get().cloned())
             .unwrap_or_default();
+
+        // XXX(@goto-bus-stop): I strongly suspect that it would be better to move this into its own layer.
         let display_router_response: DisplayRouterResponse = context
             .extensions()
             .with_lock(|lock| lock.get().cloned())
@@ -379,11 +401,6 @@ impl RouterService {
                         1,
                         code = "INVALID_ACCEPT_HEADER"
                     );
-                    // Useful for selector in spans/instruments/events
-                    context.insert_json_value(
-                        CONTAINS_GRAPHQL_ERROR,
-                        serde_json_bytes::Value::Bool(true),
-                    );
 
                     // this should be unreachable due to a previous check, but just to be sure...
                     Ok(router::Response::error_builder()
@@ -408,21 +425,21 @@ impl RouterService {
         }
     }
 
-    async fn call_inner(&self, req: RouterRequest) -> Result<RouterResponse, BoxError> {
+    async fn call_inner(self, req: RouterRequest) -> Result<RouterResponse, BoxError> {
         let context = req.context;
         let (parts, body) = req.router_request.into_parts();
-        let requests = self.get_graphql_requests(&context, &parts, body).await?;
+        let requests = self
+            .clone()
+            .get_graphql_requests(&context, &parts, body)
+            .await?;
 
+        let my_self = self.clone();
         let (supergraph_requests, is_batch) = match futures::future::ready(requests)
-            .and_then(|r| self.translate_request(&context, parts, r))
+            .and_then(|r| my_self.translate_request(&context, parts, r))
             .await
         {
             Ok(requests) => requests,
             Err(err) => {
-                // Useful for selector in spans/instruments/events
-                context
-                    .insert_json_value(CONTAINS_GRAPHQL_ERROR, serde_json_bytes::Value::Bool(true));
-
                 return router::Response::error_builder()
                     .error(
                         graphql::Error::builder()
@@ -442,13 +459,13 @@ impl RouterService {
         // Requests can be cancelled at any point of the router pipeline, but all failures bubble back
         // up through here, so we can catch them without having to specially handle batch queries in
         // other portions of the codebase.
-        let futures = supergraph_requests
-            .into_iter()
-            .map(|supergraph_request| async {
+        let futures = supergraph_requests.into_iter().map(|supergraph_request| {
+            let my_self = self.clone();
+            async move {
                 // We clone the context here, because if the request results in an Err, the
                 // response context will no longer exist.
                 let context = supergraph_request.context.clone();
-                let result = self.process_supergraph_request(supergraph_request).await;
+                let result = my_self.process_supergraph_request(supergraph_request).await;
 
                 // Regardless of the result, we need to make sure that we cancel any potential batch queries. This is because
                 // custom rust plugins, rhai scripts, and coprocessors can cancel requests at any time and return a GraphQL
@@ -467,7 +484,8 @@ impl RouterService {
                 }
 
                 result
-            });
+            }
+        });
 
         // Use join_all to preserve ordering of concurrent operations
         // (Short circuit processing and propagate any errors in the batch)
@@ -511,7 +529,7 @@ impl RouterService {
     }
 
     async fn translate_query_request(
-        &self,
+        self,
         parts: &Parts,
     ) -> Result<(Vec<graphql::Request>, bool), TranslateError> {
         let mut is_batch = false;
@@ -531,7 +549,7 @@ impl RouterService {
                         result = graphql::Request::batch_from_urlencoded_query(q.to_string())
                             .map_err(|e| TranslateError {
                                 status: StatusCode::BAD_REQUEST,
-                                extension_code: "INVALID_GRAPHQL_REQUEST",
+                                extension_code: "INVALID_GRAPHQL_REQUEST".to_string(),
                                 extension_details: format!(
                                     "failed to decode a valid GraphQL request from path {e}"
                                 ),
@@ -539,7 +557,7 @@ impl RouterService {
                         if result.is_empty() {
                             return Err(TranslateError {
                                 status: StatusCode::BAD_REQUEST,
-                                extension_code: "INVALID_GRAPHQL_REQUEST",
+                                extension_code: "INVALID_GRAPHQL_REQUEST".to_string(),
                                 extension_details: "failed to decode a valid GraphQL request from path: empty array ".to_string()
                             });
                         }
@@ -553,13 +571,13 @@ impl RouterService {
                         };
                         return Err(TranslateError {
                             status: StatusCode::BAD_REQUEST,
-                            extension_code: "BATCHING_NOT_ENABLED",
+                            extension_code: "BATCHING_NOT_ENABLED".to_string(),
                             extension_details,
                         });
                     } else {
                         return Err(TranslateError {
                             status: StatusCode::BAD_REQUEST,
-                            extension_code: "INVALID_GRAPHQL_REQUEST",
+                            extension_code: "INVALID_GRAPHQL_REQUEST".to_string(),
                             extension_details: format!(
                                 "failed to decode a valid GraphQL request from path {err}"
                             ),
@@ -571,7 +589,7 @@ impl RouterService {
         }).unwrap_or_else(|| {
             Err(TranslateError {
                 status: StatusCode::BAD_REQUEST,
-                extension_code: "INVALID_GRAPHQL_REQUEST",
+                extension_code: "INVALID_GRAPHQL_REQUEST".to_string(),
                 extension_details: "There was no GraphQL operation to execute. Use the `query` parameter to send an operation, using either GET or POST.".to_string()
             })
         })
@@ -595,7 +613,7 @@ impl RouterService {
                     result =
                         graphql::Request::batch_from_bytes(bytes).map_err(|e| TranslateError {
                             status: StatusCode::BAD_REQUEST,
-                            extension_code: "INVALID_GRAPHQL_REQUEST",
+                            extension_code: "INVALID_GRAPHQL_REQUEST".to_string(),
                             extension_details: format!(
                                 "failed to deserialize the request body into JSON: {e}"
                             ),
@@ -603,7 +621,7 @@ impl RouterService {
                     if result.is_empty() {
                         return Err(TranslateError {
                             status: StatusCode::BAD_REQUEST,
-                            extension_code: "INVALID_GRAPHQL_REQUEST",
+                            extension_code: "INVALID_GRAPHQL_REQUEST".to_string(),
                             extension_details:
                                 "failed to decode a valid GraphQL request from path: empty array "
                                     .to_string(),
@@ -620,13 +638,13 @@ impl RouterService {
                     };
                     return Err(TranslateError {
                         status: StatusCode::BAD_REQUEST,
-                        extension_code: "BATCHING_NOT_ENABLED",
+                        extension_code: "BATCHING_NOT_ENABLED".to_string(),
                         extension_details,
                     });
                 } else {
                     return Err(TranslateError {
                         status: StatusCode::BAD_REQUEST,
-                        extension_code: "INVALID_GRAPHQL_REQUEST",
+                        extension_code: "INVALID_GRAPHQL_REQUEST".to_string(),
                         extension_details: format!(
                             "failed to deserialize the request body into JSON: {err}"
                         ),
@@ -638,7 +656,7 @@ impl RouterService {
     }
 
     async fn translate_request(
-        &self,
+        self,
         context: &Context,
         parts: Parts,
         graphql_requests: (Vec<graphql::Request>, bool),
@@ -705,7 +723,7 @@ impl RouterService {
                     Batch::query_for_index(shared_batch_details.clone(), index + 1).map_err(
                         |err| TranslateError {
                             status: StatusCode::INTERNAL_SERVER_ERROR,
-                            extension_code: "BATCHING_ERROR",
+                            extension_code: "BATCHING_ERROR".to_string(),
                             extension_details: format!("failed to create batch entry: {err}"),
                         },
                     )?,
@@ -733,7 +751,7 @@ impl RouterService {
             let b_for_index =
                 Batch::query_for_index(shared_batch_details, 0).map_err(|err| TranslateError {
                     status: StatusCode::INTERNAL_SERVER_ERROR,
-                    extension_code: "BATCHING_ERROR",
+                    extension_code: "BATCHING_ERROR".to_string(),
                     extension_details: format!("failed to create batch entry: {err}"),
                 })?;
             context
@@ -753,7 +771,7 @@ impl RouterService {
     }
 
     async fn get_graphql_requests(
-        &self,
+        self,
         context: &Context,
         parts: &Parts,
         body: Body,
@@ -842,9 +860,10 @@ impl RouterService {
     }
 }
 
-struct TranslateError<'a> {
+#[derive(Clone)]
+struct TranslateError {
     status: StatusCode,
-    extension_code: &'a str,
+    extension_code: String,
     extension_details: String,
 }
 
@@ -860,11 +879,7 @@ pub(crate) fn process_vary_header(headers: &mut HeaderMap<HeaderValue>) {
 #[derive(Clone)]
 pub(crate) struct RouterCreator {
     pub(crate) supergraph_creator: Arc<SupergraphCreator>,
-    static_page: StaticPageLayer,
-    apq_layer: APQLayer,
-    pub(crate) persisted_query_layer: Arc<PersistedQueryLayer>,
-    query_analysis_layer: QueryAnalysisLayer,
-    batching: Batching,
+    sb: Buffer<router::Request, BoxFuture<'static, router::ServiceResult>>,
 }
 
 impl ServiceFactory<router::Request> for RouterCreator {
@@ -916,13 +931,32 @@ impl RouterCreator {
         // For now just call activate to make the gauges work on the happy path.
         apq_layer.activate();
 
-        Ok(Self {
-            supergraph_creator,
-            static_page,
+        let router_service = content_negotiation::RouterLayer::default().layer(RouterService::new(
+            supergraph_creator.create(),
             apq_layer,
-            query_analysis_layer,
             persisted_query_layer,
-            batching: configuration.batching.clone(),
+            query_analysis_layer,
+            configuration.batching.clone(),
+        ));
+
+        // NOTE: This is the start of the router pipeline (router_service)
+        let sb = Buffer::new(
+            ServiceBuilder::new()
+                .layer(static_page.clone())
+                .service(
+                    supergraph_creator
+                        .plugins()
+                        .iter()
+                        .rev()
+                        .fold(router_service.boxed(), |acc, (_, e)| e.router_service(acc)),
+                )
+                .boxed(),
+            DEFAULT_BUFFER_SIZE,
+        );
+
+        Ok(Self {
+            supergraph_creator,
+            sb,
         })
     }
 
@@ -934,23 +968,8 @@ impl RouterCreator {
         Error = BoxError,
         Future = BoxFuture<'static, router::ServiceResult>,
     > + Send {
-        let router_service = content_negotiation::RouterLayer::default().layer(RouterService::new(
-            self.supergraph_creator.clone(),
-            self.apq_layer.clone(),
-            self.persisted_query_layer.clone(),
-            self.query_analysis_layer.clone(),
-            self.batching.clone(),
-        ));
-
-        ServiceBuilder::new()
-            .layer(self.static_page.clone())
-            .service(
-                self.supergraph_creator
-                    .plugins()
-                    .iter()
-                    .rev()
-                    .fold(router_service.boxed(), |acc, (_, e)| e.router_service(acc)),
-            )
+        // Note: We have to box our cloned service to erase the type of the Buffer.
+        self.sb.clone().boxed()
     }
 }
 
diff --git a/apollo-router/src/services/router/tests.rs b/apollo-router/src/services/router/tests.rs
index 0adf3cacb1..b018196dac 100644
--- a/apollo-router/src/services/router/tests.rs
+++ b/apollo-router/src/services/router/tests.rs
@@ -108,7 +108,13 @@ async fn it_extracts_query_and_operation_name() {
         .try_into()
         .unwrap();
 
-    router_service.call(get_request).await.unwrap();
+    router_service
+        .ready()
+        .await
+        .expect("readied")
+        .call(get_request)
+        .await
+        .unwrap();
 
     // post request
     let post_request = supergraph::Request::builder()
@@ -122,6 +128,9 @@ async fn it_extracts_query_and_operation_name() {
         .unwrap();
 
     router_service
+        .ready()
+        .await
+        .expect("readied")
         .call(post_request.try_into().unwrap())
         .await
         .unwrap();
diff --git a/apollo-router/src/services/subgraph.rs b/apollo-router/src/services/subgraph.rs
index 1c671eb13f..c55c9cc7d7 100644
--- a/apollo-router/src/services/subgraph.rs
+++ b/apollo-router/src/services/subgraph.rs
@@ -60,9 +60,8 @@ pub struct Request {
 
     pub context: Context,
 
-    // FIXME for router 2.x
-    /// Name of the subgraph, it's an Option to not introduce breaking change
-    pub(crate) subgraph_name: Option<String>,
+    /// Name of the subgraph
+    pub(crate) subgraph_name: String,
     /// Channel to send the subscription stream to listen on events coming from subgraph in a task
     pub(crate) subscription_stream: Option<mpsc::Sender<BoxGqlStream>>,
     /// Channel triggered when the client connection has been dropped
@@ -91,7 +90,7 @@ impl Request {
         operation_kind: OperationKind,
         context: Context,
         subscription_stream: Option<mpsc::Sender<BoxGqlStream>>,
-        subgraph_name: Option<String>,
+        subgraph_name: String,
         connection_closed_signal: Option<broadcast::Receiver<()>>,
     ) -> Request {
         Self {
@@ -133,7 +132,7 @@ impl Request {
             operation_kind.unwrap_or(OperationKind::Query),
             context.unwrap_or_default(),
             subscription_stream,
-            subgraph_name,
+            subgraph_name.unwrap_or_default(),
             connection_closed_signal,
         )
     }
@@ -208,9 +207,8 @@ assert_impl_all!(Response: Send);
 #[non_exhaustive]
 pub struct Response {
     pub response: http::Response<graphql::Response>,
-    // FIXME for router 2.x
-    /// Name of the subgraph, it's an Option to not introduce breaking change
-    pub(crate) subgraph_name: Option<String>,
+    /// Name of the subgraph
+    pub(crate) subgraph_name: String,
     pub context: Context,
     /// unique id matching the corresponding field in the request
     pub(crate) id: SubgraphRequestId,
@@ -227,11 +225,11 @@ impl Response {
         context: Context,
         subgraph_name: String,
         id: SubgraphRequestId,
-    ) -> Response {
+    ) -> Self {
         Self {
             response,
             context,
-            subgraph_name: Some(subgraph_name),
+            subgraph_name,
             id,
         }
     }
@@ -251,9 +249,9 @@ impl Response {
         status_code: Option<StatusCode>,
         context: Context,
         headers: Option<http::HeaderMap<http::HeaderValue>>,
-        subgraph_name: Option<String>,
+        subgraph_name: String,
         id: Option<SubgraphRequestId>,
-    ) -> Response {
+    ) -> Self {
         // Build a response
         let res = graphql::Response::builder()
             .and_label(label)
@@ -303,8 +301,8 @@ impl Response {
         headers: Option<http::HeaderMap<http::HeaderValue>>,
         subgraph_name: Option<String>,
         id: Option<SubgraphRequestId>,
-    ) -> Response {
-        Response::new(
+    ) -> Self {
+        Self::new(
             label,
             data,
             path,
@@ -313,7 +311,7 @@ impl Response {
             status_code,
             context.unwrap_or_default(),
             headers,
-            subgraph_name,
+            subgraph_name.unwrap_or_default(),
             id,
         )
     }
@@ -339,7 +337,7 @@ impl Response {
         subgraph_name: Option<String>,
         id: Option<SubgraphRequestId>,
     ) -> Result<Response, BoxError> {
-        Ok(Response::new(
+        Ok(Self::new(
             label,
             data,
             path,
@@ -348,7 +346,7 @@ impl Response {
             status_code,
             context.unwrap_or_default(),
             Some(header_map(headers)?),
-            subgraph_name,
+            subgraph_name.unwrap_or_default(),
             id,
         ))
     }
@@ -361,10 +359,10 @@ impl Response {
         errors: Vec<Error>,
         status_code: Option<StatusCode>,
         context: Context,
-        subgraph_name: Option<String>,
+        subgraph_name: String,
         id: Option<SubgraphRequestId>,
-    ) -> Result<Response, BoxError> {
-        Ok(Response::new(
+    ) -> Self {
+        Self::new(
             Default::default(),
             Default::default(),
             Default::default(),
@@ -375,7 +373,7 @@ impl Response {
             Default::default(),
             subgraph_name,
             id,
-        ))
+        )
     }
 }
 
diff --git a/apollo-router/src/services/subgraph_service.rs b/apollo-router/src/services/subgraph_service.rs
index 90f527d674..716b60cd7a 100644
--- a/apollo-router/src/services/subgraph_service.rs
+++ b/apollo-router/src/services/subgraph_service.rs
@@ -32,7 +32,7 @@ use tokio::sync::oneshot;
 use tokio_tungstenite::connect_async;
 use tokio_tungstenite::connect_async_tls_with_config;
 use tokio_tungstenite::tungstenite::client::IntoClientRequest;
-use tower::util::BoxService;
+use tower::buffer::Buffer;
 use tower::BoxError;
 use tower::Service;
 use tower::ServiceExt;
@@ -56,6 +56,7 @@ use crate::error::FetchError;
 use crate::error::SubgraphBatchingError;
 use crate::graphql;
 use crate::json_ext::Object;
+use crate::layers::DEFAULT_BUFFER_SIZE;
 use crate::plugins::authentication::subgraph::SigningParamsConfig;
 use crate::plugins::file_uploads;
 use crate::plugins::subscription::create_verifier;
@@ -73,6 +74,7 @@ use crate::protocols::websocket::GraphqlWebSocket;
 use crate::query_planner::OperationKind;
 use crate::services::layers::apq;
 use crate::services::router;
+use crate::services::subgraph;
 use crate::services::SubgraphRequest;
 use crate::services::SubgraphResponse;
 use crate::Configuration;
@@ -1574,8 +1576,9 @@ fn get_apq_error(gql_response: &graphql::Response) -> APQError {
 
 #[derive(Clone)]
 pub(crate) struct SubgraphServiceFactory {
-    pub(crate) services: Arc<HashMap<String, Arc<dyn MakeSubgraphService>>>,
-    pub(crate) plugins: Arc<Plugins>,
+    pub(crate) services: Arc<
+        HashMap<String, Buffer<subgraph::Request, BoxFuture<'static, subgraph::ServiceResult>>>,
+    >,
 }
 
 impl SubgraphServiceFactory {
@@ -1583,23 +1586,27 @@ impl SubgraphServiceFactory {
         services: Vec<(String, Arc<dyn MakeSubgraphService>)>,
         plugins: Arc<Plugins>,
     ) -> Self {
+        let mut map = HashMap::with_capacity(services.len());
+        for (name, maker) in services.into_iter() {
+            let service = Buffer::new(
+                plugins
+                    .iter()
+                    .rev()
+                    .fold(maker.make(), |acc, (_, e)| e.subgraph_service(&name, acc))
+                    .boxed(),
+                DEFAULT_BUFFER_SIZE,
+            );
+            map.insert(name, service);
+        }
+
         SubgraphServiceFactory {
-            services: Arc::new(services.into_iter().collect()),
-            plugins,
+            services: Arc::new(map),
         }
     }
 
-    pub(crate) fn create(
-        &self,
-        name: &str,
-    ) -> Option<BoxService<SubgraphRequest, SubgraphResponse, BoxError>> {
-        self.services.get(name).map(|service| {
-            let service = service.make();
-            self.plugins
-                .iter()
-                .rev()
-                .fold(service, |acc, (_, e)| e.subgraph_service(name, acc))
-        })
+    pub(crate) fn create(&self, name: &str) -> Option<subgraph::BoxService> {
+        // Note: We have to box our cloned service to erase the type of the Buffer.
+        self.services.get(name).map(|svc| svc.clone().boxed())
     }
 }
 
@@ -1607,7 +1614,7 @@ impl SubgraphServiceFactory {
 ///
 /// there can be multiple instances of that service executing at any given time
 pub(crate) trait MakeSubgraphService: Send + Sync + 'static {
-    fn make(&self) -> BoxService<SubgraphRequest, SubgraphResponse, BoxError>;
+    fn make(&self) -> subgraph::BoxService;
 }
 
 impl<S> MakeSubgraphService for S
@@ -1619,7 +1626,7 @@ where
         + 'static,
     <S as Service<SubgraphRequest>>::Future: Send,
 {
-    fn make(&self) -> BoxService<SubgraphRequest, SubgraphResponse, BoxError> {
+    fn make(&self) -> subgraph::BoxService {
         self.clone().boxed()
     }
 }
diff --git a/apollo-router/src/services/supergraph.rs b/apollo-router/src/services/supergraph.rs
index 3807cf859d..ad25525f7c 100644
--- a/apollo-router/src/services/supergraph.rs
+++ b/apollo-router/src/services/supergraph.rs
@@ -16,6 +16,7 @@ use serde_json_bytes::Value;
 use static_assertions::assert_impl_all;
 use tower::BoxError;
 
+use crate::context::CONTAINS_GRAPHQL_ERROR;
 use crate::error::Error;
 use crate::graphql;
 use crate::http_ext::header_map;
@@ -179,6 +180,14 @@ pub struct Response {
     pub context: Context,
 }
 
+impl std::fmt::Debug for Response {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("Response")
+            .field("context", &self.context)
+            .finish()
+    }
+}
+
 #[buildstructor::buildstructor]
 impl Response {
     /// This is the constructor (or builder) to use when constructing a real Response..
@@ -197,6 +206,9 @@ impl Response {
         headers: MultiMap<TryIntoHeaderName, TryIntoHeaderValue>,
         context: Context,
     ) -> Result<Self, BoxError> {
+        if !errors.is_empty() {
+            context.insert_json_value(CONTAINS_GRAPHQL_ERROR, serde_json_bytes::Value::Bool(true));
+        }
         // Build a response
         let b = graphql::Response::builder()
             .and_label(label)
@@ -243,6 +255,12 @@ impl Response {
         headers: MultiMap<TryIntoHeaderName, TryIntoHeaderValue>,
         context: Option<Context>,
     ) -> Result<Self, BoxError> {
+        if !errors.is_empty() {
+            if let Some(context) = &context {
+                context
+                    .insert_json_value(CONTAINS_GRAPHQL_ERROR, serde_json_bytes::Value::Bool(true));
+            }
+        }
         Response::new(
             label,
             data,
@@ -293,6 +311,9 @@ impl Response {
         headers: MultiMap<TryIntoHeaderName, TryIntoHeaderValue>,
         context: Context,
     ) -> Result<Self, BoxError> {
+        if !errors.is_empty() {
+            context.insert_json_value(CONTAINS_GRAPHQL_ERROR, serde_json_bytes::Value::Bool(true));
+        }
         Response::new(
             Default::default(),
             Default::default(),
@@ -321,6 +342,9 @@ impl Response {
         headers: MultiMap<HeaderName, HeaderValue>,
         context: Context,
     ) -> Self {
+        if !errors.is_empty() {
+            context.insert_json_value(CONTAINS_GRAPHQL_ERROR, serde_json_bytes::Value::Bool(true));
+        }
         // Build a response
         let b = graphql::Response::builder()
             .and_label(label)
diff --git a/apollo-router/src/services/supergraph/service.rs b/apollo-router/src/services/supergraph/service.rs
index 6c7193022e..8fa87af741 100644
--- a/apollo-router/src/services/supergraph/service.rs
+++ b/apollo-router/src/services/supergraph/service.rs
@@ -15,6 +15,7 @@ use opentelemetry::KeyValue;
 use tokio::sync::mpsc;
 use tokio::sync::mpsc::error::SendError;
 use tokio_stream::wrappers::ReceiverStream;
+use tower::buffer::Buffer;
 use tower::BoxError;
 use tower::Layer;
 use tower::ServiceBuilder;
@@ -33,6 +34,8 @@ use crate::error::CacheResolverError;
 use crate::graphql;
 use crate::graphql::IntoGraphQLErrors;
 use crate::graphql::Response;
+use crate::layers::ServiceBuilderExt;
+use crate::layers::DEFAULT_BUFFER_SIZE;
 use crate::plugin::DynPlugin;
 use crate::plugins::connectors::query_plans::store_connectors;
 use crate::plugins::connectors::query_plans::store_connectors_labels;
@@ -43,8 +46,6 @@ use crate::plugins::telemetry::config_new::events::log_event;
 use crate::plugins::telemetry::config_new::events::SupergraphEventResponse;
 use crate::plugins::telemetry::consts::QUERY_PLANNING_SPAN_NAME;
 use crate::plugins::telemetry::tracing::apollo_telemetry::APOLLO_PRIVATE_DURATION_NS;
-use crate::plugins::traffic_shaping::TrafficShaping;
-use crate::plugins::traffic_shaping::APOLLO_TRAFFIC_SHAPING;
 use crate::query_planner::subscription::SubscriptionHandle;
 use crate::query_planner::subscription::OPENED_SUBSCRIPTIONS;
 use crate::query_planner::subscription::SUBSCRIPTION_EVENT_SPAN_NAME;
@@ -56,6 +57,7 @@ use crate::router_factory::create_plugins;
 use crate::router_factory::create_subgraph_services;
 use crate::services::connector::request_service::ConnectorRequestServiceFactory;
 use crate::services::connector_service::ConnectorServiceFactory;
+use crate::services::execution;
 use crate::services::execution::QueryPlan;
 use crate::services::fetch_service::FetchServiceFactory;
 use crate::services::http::HttpClientServiceFactory;
@@ -91,8 +93,9 @@ pub(crate) type Plugins = IndexMap<String, Box<dyn DynPlugin>>;
 /// Containing [`Service`] in the request lifecyle.
 #[derive(Clone)]
 pub(crate) struct SupergraphService {
-    execution_service_factory: ExecutionServiceFactory,
     query_planner_service: CachingQueryPlanner<QueryPlannerService>,
+    execution_service_factory: ExecutionServiceFactory,
+    execution_service: execution::BoxCloneService,
     schema: Arc<Schema>,
     notify: Notify<String, graphql::Response>,
 }
@@ -106,9 +109,15 @@ impl SupergraphService {
         schema: Arc<Schema>,
         notify: Notify<String, graphql::Response>,
     ) -> Self {
+        let execution_service: execution::BoxCloneService = ServiceBuilder::new()
+            .buffered()
+            .service(execution_service_factory.create())
+            .boxed_clone();
+
         SupergraphService {
             query_planner_service,
             execution_service_factory,
+            execution_service,
             schema,
             notify,
         }
@@ -143,6 +152,7 @@ impl Service<SupergraphRequest> for SupergraphService {
         let fut = service_call(
             planning,
             self.execution_service_factory.clone(),
+            self.execution_service.clone(),
             schema,
             req,
             self.notify.clone(),
@@ -173,6 +183,7 @@ impl Service<SupergraphRequest> for SupergraphService {
 async fn service_call(
     planning: CachingQueryPlanner<QueryPlannerService>,
     execution_service_factory: ExecutionServiceFactory,
+    execution_service: execution::BoxCloneService,
     schema: Arc<Schema>,
     req: SupergraphRequest,
     notify: Notify<String, graphql::Response>,
@@ -323,12 +334,14 @@ async fn service_call(
                     let (subs_tx, subs_rx) = mpsc::channel(1);
                     let query_plan = plan.clone();
                     let execution_service_factory_cloned = execution_service_factory.clone();
+                    let execution_service_cloned = execution_service.clone();
                     let cloned_supergraph_req =
                         clone_supergraph_request(&req.supergraph_request, context.clone());
                     // Spawn task for subscription
                     tokio::spawn(async move {
                         subscription_task(
                             execution_service_factory_cloned,
+                            execution_service_cloned,
                             ctx,
                             query_plan,
                             subs_rx,
@@ -340,8 +353,7 @@ async fn service_call(
                     subscription_tx = subs_tx.into();
                 }
 
-                let execution_response = execution_service_factory
-                    .create()
+                let execution_response = execution_service
                     .oneshot(
                         ExecutionRequest::internal_builder()
                             .supergraph_request(req.supergraph_request)
@@ -436,6 +448,7 @@ pub struct SubscriptionTaskParams {
 
 async fn subscription_task(
     mut execution_service_factory: ExecutionServiceFactory,
+    execution_service: execution::BoxCloneService,
     context: Context,
     query_plan: Arc<QueryPlan>,
     mut rx: mpsc::Receiver<SubscriptionTaskParams>,
@@ -541,7 +554,7 @@ async fn subscription_task(
                 match message {
                     Some(mut val) => {
                         val.created_at = Some(Instant::now());
-                        let res = dispatch_event(&supergraph_req, &execution_service_factory, query_plan.as_ref(), context.clone(), val, sender.clone())
+                        let res = dispatch_event(&supergraph_req, execution_service.clone(), query_plan.as_ref(), context.clone(), val, sender.clone())
                             .instrument(tracing::info_span!(SUBSCRIPTION_EVENT_SPAN_NAME,
                                 graphql.operation.name = %operation_name,
                                 otel.kind = "INTERNAL",
@@ -628,7 +641,6 @@ async fn subscription_task(
                         subgraph_schemas: execution_service_factory.subgraph_schemas.clone(),
                         plugins: plugins.clone(),
                         fetch_service_factory,
-
                     };
                 }
             }
@@ -657,7 +669,7 @@ async fn subscription_task(
 
 async fn dispatch_event(
     supergraph_req: &SupergraphRequest,
-    execution_service_factory: &ExecutionServiceFactory,
+    execution_service: execution::BoxCloneService,
     query_plan: Option<&Arc<QueryPlan>>,
     context: Context,
     mut val: graphql::Response,
@@ -679,7 +691,6 @@ async fn dispatch_event(
                 .build()
                 .await;
 
-            let execution_service = execution_service_factory.create();
             let execution_response = execution_service.oneshot(execution_request).await;
             let next_response = match execution_response {
                 Ok(mut execution_response) => execution_response.next_response().await,
@@ -914,12 +925,42 @@ impl PluggableSupergraphServiceBuilder {
             )),
         ));
 
+        let supergraph_service = SupergraphService::builder()
+            .query_planner_service(query_planner_service.clone())
+            .execution_service_factory(ExecutionServiceFactory {
+                schema: schema.clone(),
+                subgraph_schemas: query_planner_service.subgraph_schemas(),
+                plugins: self.plugins.clone(),
+                fetch_service_factory,
+            })
+            .schema(schema.clone())
+            .notify(configuration.notify.clone())
+            .build();
+
+        let supergraph_service =
+            AllowOnlyHttpPostMutationsLayer::default().layer(supergraph_service);
+
+        let sb = Buffer::new(
+            ServiceBuilder::new()
+                .layer(content_negotiation::SupergraphLayer::default())
+                .service(
+                    self.plugins
+                        .iter()
+                        .rev()
+                        .fold(supergraph_service.boxed(), |acc, (_, e)| {
+                            e.supergraph_service(acc)
+                        }),
+                )
+                .boxed(),
+            DEFAULT_BUFFER_SIZE,
+        );
+
         Ok(SupergraphCreator {
             query_planner_service,
-            fetch_service_factory,
             schema,
             plugins: self.plugins,
             config: configuration,
+            sb,
         })
     }
 }
@@ -928,10 +969,10 @@ impl PluggableSupergraphServiceBuilder {
 #[derive(Clone)]
 pub(crate) struct SupergraphCreator {
     query_planner_service: CachingQueryPlanner<QueryPlannerService>,
-    fetch_service_factory: Arc<FetchServiceFactory>,
     schema: Arc<Schema>,
     config: Arc<Configuration>,
     plugins: Arc<Plugins>,
+    sb: Buffer<supergraph::Request, BoxFuture<'static, supergraph::ServiceResult>>,
 }
 
 pub(crate) trait HasPlugins {
@@ -980,38 +1021,8 @@ impl SupergraphCreator {
         Error = BoxError,
         Future = BoxFuture<'static, supergraph::ServiceResult>,
     > + Send {
-        let supergraph_service = SupergraphService::builder()
-            .query_planner_service(self.query_planner_service.clone())
-            .execution_service_factory(ExecutionServiceFactory {
-                schema: self.schema.clone(),
-                subgraph_schemas: self.query_planner_service.subgraph_schemas(),
-                plugins: self.plugins.clone(),
-                fetch_service_factory: self.fetch_service_factory.clone(),
-            })
-            .schema(self.schema.clone())
-            .notify(self.config.notify.clone())
-            .build();
-
-        let shaping = self
-            .plugins
-            .iter()
-            .find(|i| i.0.as_str() == APOLLO_TRAFFIC_SHAPING)
-            .and_then(|plugin| plugin.1.as_any().downcast_ref::<TrafficShaping>())
-            .expect("traffic shaping should always be part of the plugin list");
-
-        let supergraph_service = AllowOnlyHttpPostMutationsLayer::default()
-            .layer(shaping.supergraph_service_internal(supergraph_service));
-
-        ServiceBuilder::new()
-            .layer(content_negotiation::SupergraphLayer::default())
-            .service(
-                self.plugins
-                    .iter()
-                    .rev()
-                    .fold(supergraph_service.boxed(), |acc, (_, e)| {
-                        e.supergraph_service(acc)
-                    }),
-            )
+        // Note: We have to box our cloned service to erase the type of the Buffer.
+        self.sb.clone().boxed()
     }
 
     pub(crate) fn previous_cache(&self) -> InMemoryCachePlanner {
diff --git a/apollo-router/src/test_harness.rs b/apollo-router/src/test_harness.rs
index b81d89bcb4..1a51d3db27 100644
--- a/apollo-router/src/test_harness.rs
+++ b/apollo-router/src/test_harness.rs
@@ -281,11 +281,13 @@ impl<'a> TestHarness<'a> {
         let builder = if builder.subgraph_network_requests {
             builder
         } else {
-            builder.subgraph_hook(|_name, _default| {
-                tower::service_fn(|request: subgraph::Request| {
+            builder.subgraph_hook(|name, _default| {
+                let my_name = name.to_string();
+                tower::service_fn(move |request: subgraph::Request| {
                     let empty_response = subgraph::Response::builder()
                         .extensions(crate::json_ext::Object::new())
                         .context(request.context)
+                        .subgraph_name(my_name.clone())
                         .id(request.id)
                         .build();
                     std::future::ready(Ok(empty_response))
diff --git a/apollo-router/tests/common.rs b/apollo-router/tests/common.rs
index 29d671bf44..a4c4078877 100644
--- a/apollo-router/tests/common.rs
+++ b/apollo-router/tests/common.rs
@@ -550,7 +550,6 @@ impl IntegrationTest {
             let mut collected = Vec::new();
             let mut lines = reader.lines();
             while let Ok(Some(line)) = lines.next_line().await {
-                println!("{line}");
                 // Extract the bind address from a log line that looks like this: GraphQL endpoint exposed at http://127.0.0.1:51087/
                 if let Some(captures) = bind_address_regex.captures(&line) {
                     let address = captures.name("address").unwrap().as_str();
@@ -566,7 +565,7 @@ impl IntegrationTest {
                         level: String,
                         message: String,
                     }
-                    let log = serde_json::from_str::<Log>(&line).unwrap();
+                    let log = serde_json::from_str::<Log>(&line).expect("line: '{line}' isn't JSON, might you have some debug output in the logging?");
                     // Omit this message from snapshots since it depends on external environment
                     if !log.message.starts_with("RUST_BACKTRACE=full detected") {
                         collected.push(format!(
diff --git a/apollo-router/tests/integration/batching.rs b/apollo-router/tests/integration/batching.rs
index 521e615b30..811ac88f81 100644
--- a/apollo-router/tests/integration/batching.rs
+++ b/apollo-router/tests/integration/batching.rs
@@ -252,27 +252,26 @@ async fn it_handles_short_timeouts() -> Result<(), BoxError> {
     .await?;
 
     if test_is_enabled() {
-        assert_yaml_snapshot!(responses, @r###"
-        ---
+        assert_yaml_snapshot!(responses, @r"
         - data:
             entryA:
               index: 0
         - errors:
-            - message: Request timed out
+            - message: Your request has been timed out
               path: []
               extensions:
-                code: REQUEST_TIMEOUT
+                code: GATEWAY_TIMEOUT
                 service: b
         - data:
             entryA:
               index: 1
         - errors:
-            - message: Request timed out
+            - message: Your request has been timed out
               path: []
               extensions:
-                code: REQUEST_TIMEOUT
+                code: GATEWAY_TIMEOUT
                 service: b
-        "###);
+        ");
     }
 
     Ok(())
@@ -323,8 +322,7 @@ async fn it_handles_indefinite_timeouts() -> Result<(), BoxError> {
     // verify the output
     let responses = [results_a, results_b].concat();
     if test_is_enabled() {
-        assert_yaml_snapshot!(responses, @r###"
-        ---
+        assert_yaml_snapshot!(responses, @r"
         - data:
             entryA:
               index: 0
@@ -335,24 +333,24 @@ async fn it_handles_indefinite_timeouts() -> Result<(), BoxError> {
             entryA:
               index: 2
         - errors:
-            - message: Request timed out
+            - message: Your request has been timed out
               path: []
               extensions:
-                code: REQUEST_TIMEOUT
+                code: GATEWAY_TIMEOUT
                 service: b
         - errors:
-            - message: Request timed out
+            - message: Your request has been timed out
               path: []
               extensions:
-                code: REQUEST_TIMEOUT
+                code: GATEWAY_TIMEOUT
                 service: b
         - errors:
-            - message: Request timed out
+            - message: Your request has been timed out
               path: []
               extensions:
-                code: REQUEST_TIMEOUT
+                code: GATEWAY_TIMEOUT
                 service: b
-        "###);
+        ");
     }
 
     Ok(())
diff --git a/apollo-router/tests/integration/coprocessor.rs b/apollo-router/tests/integration/coprocessor.rs
index 604728df2d..d61038bc06 100644
--- a/apollo-router/tests/integration/coprocessor.rs
+++ b/apollo-router/tests/integration/coprocessor.rs
@@ -27,7 +27,7 @@ async fn test_error_not_propagated_to_client() -> Result<(), BoxError> {
     let (_trace_id, response) = router.execute_default_query().await;
     assert_eq!(response.status(), 500);
     assert_yaml_snapshot!(response.text().await?);
-    router.wait_for_log_message("INTERNAL_SERVER_ERROR").await;
+    router.wait_for_log_message("Internal Server Error").await;
     router.graceful_shutdown().await;
     Ok(())
 }
diff --git a/apollo-router/tests/integration/fixtures/rhai_logging.router.yaml b/apollo-router/tests/integration/fixtures/rhai_logging.router.yaml
new file mode 100644
index 0000000000..445e798d35
--- /dev/null
+++ b/apollo-router/tests/integration/fixtures/rhai_logging.router.yaml
@@ -0,0 +1,3 @@
+rhai:
+  scripts: "tests/fixtures"
+  main: "test_callbacks.rhai"
diff --git a/apollo-router/tests/integration/lifecycle.rs b/apollo-router/tests/integration/lifecycle.rs
index 6aa6c84350..824a10e17b 100644
--- a/apollo-router/tests/integration/lifecycle.rs
+++ b/apollo-router/tests/integration/lifecycle.rs
@@ -301,30 +301,30 @@ async fn test_plugin_ordering() {
         .join("tests")
         .join("fixtures")
         .join("test_plugin_ordering.rhai");
-    let mut service = TestHarness::builder()
-        .configuration_json(json!({
-            "plugins": {
-                "experimental.test_ordering_1": {},
-                "experimental.test_ordering_2": {},
-                "experimental.test_ordering_3": {},
-            },
-            "rhai": {
-                "main": rhai_main,
-            },
-            "coprocessor": {
-                "url": coprocessor_url,
-                "router": {
-                    "request": { "context": true },
-                    "response": { "context": true },
-                }
-            },
-        }))
-        .unwrap()
-        .build_router()
-        .await
-        .unwrap();
     // Repeat to get more confidence it’s deterministic
     for _ in 0..10 {
+        let mut service = TestHarness::builder()
+            .configuration_json(json!({
+                "plugins": {
+                    "experimental.test_ordering_1": {},
+                    "experimental.test_ordering_2": {},
+                    "experimental.test_ordering_3": {},
+                },
+                "rhai": {
+                    "main": rhai_main,
+                },
+                "coprocessor": {
+                    "url": coprocessor_url,
+                    "router": {
+                        "request": { "context": true },
+                        "response": { "context": true },
+                    }
+                },
+            }))
+            .unwrap()
+            .build_router()
+            .await
+            .unwrap();
         let request = supergraph::Request::canned_builder().build().unwrap();
         let mut response = service
             .ready()
diff --git a/apollo-router/tests/integration/rhai.rs b/apollo-router/tests/integration/rhai.rs
index d94a47c567..3309bd619d 100644
--- a/apollo-router/tests/integration/rhai.rs
+++ b/apollo-router/tests/integration/rhai.rs
@@ -1,49 +1,21 @@
-use apollo_router::graphql;
-use apollo_router::services::supergraph;
-use apollo_router::TestHarness;
-use tower::ServiceExt;
+use crate::integration::common::Query;
+use crate::integration::IntegrationTest;
 
-// This test will fail if run with the "multi_thread" flavor.
-// This is because tracing_test doesn't set a global subscriber, so logs will be dropped
-// if we're crossing a thread boundary
 #[tokio::test(flavor = "multi_thread")]
 async fn all_rhai_callbacks_are_invoked() {
-    let env_filter = "apollo_router=info";
-    let mock_writer = tracing_test::internal::MockWriter::new(tracing_test::internal::global_buf());
-    let subscriber = tracing_test::internal::get_subscriber(mock_writer, env_filter);
+    let (sender, receiver) = tokio::sync::oneshot::channel();
+    let mut router = IntegrationTest::builder()
+        .config(include_str!("fixtures/rhai_logging.router.yaml"))
+        .collect_stdio(sender)
+        .build()
+        .await;
 
-    let _guard = tracing::dispatcher::set_default(&subscriber);
+    router.start().await;
+    router.assert_started().await;
+    router.execute_query(Query::default()).await;
+    router.graceful_shutdown().await;
 
-    let config = serde_json::json!({
-        "rhai": {
-            "scripts": "tests/fixtures",
-            "main": "test_callbacks.rhai",
-        }
-    });
-    let router = TestHarness::builder()
-        .configuration_json(config)
-        .unwrap()
-        .schema(include_str!("../fixtures/supergraph.graphql"))
-        .build_router()
-        .await
-        .unwrap();
-    let request = supergraph::Request::fake_builder()
-        .query("{ topProducts { name } }")
-        .build()
-        .unwrap();
-    let _response: graphql::Response = serde_json::from_slice(
-        router
-            .oneshot(request.try_into().unwrap())
-            .await
-            .unwrap()
-            .next_response()
-            .await
-            .unwrap()
-            .unwrap()
-            .to_vec()
-            .as_slice(),
-    )
-    .unwrap();
+    let logs = receiver.await.expect("logs received");
 
     for expected_log in [
         "router_service setup",
@@ -58,9 +30,6 @@ async fn all_rhai_callbacks_are_invoked() {
         "subgraph_service setup",
         "from_subgraph_request",
     ] {
-        assert!(
-            tracing_test::internal::logs_with_scope_contain("apollo_router", expected_log),
-            "log not found: {expected_log}"
-        );
+        assert!(logs.contains(expected_log));
     }
 }
diff --git a/apollo-router/tests/integration/snapshots/integration_tests__integration__traffic_shaping__router_timeout.snap b/apollo-router/tests/integration/snapshots/integration_tests__integration__traffic_shaping__router_timeout.snap
index d09e20a31d..f33e3b17e3 100644
--- a/apollo-router/tests/integration/snapshots/integration_tests__integration__traffic_shaping__router_timeout.snap
+++ b/apollo-router/tests/integration/snapshots/integration_tests__integration__traffic_shaping__router_timeout.snap
@@ -1,5 +1,5 @@
 ---
 source: apollo-router/tests/integration/traffic_shaping.rs
-expression: response.text().await?
+expression: response
 ---
-"{\"errors\":[{\"message\":\"Request timed out\",\"extensions\":{\"code\":\"REQUEST_TIMEOUT\"}}]}"
+"{\"errors\":[{\"message\":\"Your request has been timed out\",\"extensions\":{\"code\":\"GATEWAY_TIMEOUT\"}}]}"
diff --git a/apollo-router/tests/integration/snapshots/integration_tests__integration__traffic_shaping__subgraph_timeout.snap b/apollo-router/tests/integration/snapshots/integration_tests__integration__traffic_shaping__subgraph_timeout.snap
index 0364f2b734..65e9eaf8e4 100644
--- a/apollo-router/tests/integration/snapshots/integration_tests__integration__traffic_shaping__subgraph_timeout.snap
+++ b/apollo-router/tests/integration/snapshots/integration_tests__integration__traffic_shaping__subgraph_timeout.snap
@@ -2,4 +2,4 @@
 source: apollo-router/tests/integration/traffic_shaping.rs
 expression: response
 ---
-"{\"data\":null,\"errors\":[{\"message\":\"Request timed out\",\"path\":[],\"extensions\":{\"code\":\"REQUEST_TIMEOUT\",\"service\":\"products\"}}]}"
+"{\"data\":null,\"errors\":[{\"message\":\"Your request has been timed out\",\"path\":[],\"extensions\":{\"code\":\"GATEWAY_TIMEOUT\",\"service\":\"products\"}}]}"
diff --git a/apollo-router/tests/integration/traffic_shaping.rs b/apollo-router/tests/integration/traffic_shaping.rs
index 4f4a3460da..0375829611 100644
--- a/apollo-router/tests/integration/traffic_shaping.rs
+++ b/apollo-router/tests/integration/traffic_shaping.rs
@@ -41,10 +41,10 @@ async fn test_router_timeout() -> Result<(), BoxError> {
     let (_trace_id, response) = router.execute_default_query().await;
     assert_eq!(response.status(), 504);
     let response = response.text().await?;
-    assert!(response.contains("REQUEST_TIMEOUT"));
+    assert!(response.contains("GATEWAY_TIMEOUT"));
     assert_yaml_snapshot!(response);
 
-    router.assert_metrics_contains(r#"apollo_router_graphql_error_total{code="REQUEST_TIMEOUT",otel_scope_name="apollo/router"} 1"#, None).await;
+    router.assert_metrics_contains(r#"http_server_request_duration_seconds_count{error_type="Gateway Timeout",http_request_method="POST",http_response_status_code="504""#, None).await;
 
     router.graceful_shutdown().await;
     Ok(())
@@ -73,10 +73,11 @@ async fn test_subgraph_timeout() -> Result<(), BoxError> {
     let (_trace_id, response) = router.execute_default_query().await;
     assert_eq!(response.status(), 200);
     let response = response.text().await?;
-    assert!(response.contains("REQUEST_TIMEOUT"));
+    assert!(response.contains("GATEWAY_TIMEOUT"));
     assert_yaml_snapshot!(response);
 
-    router.assert_metrics_contains(r#"apollo_router_graphql_error_total{code="REQUEST_TIMEOUT",otel_scope_name="apollo/router"} 1"#, None).await;
+    // We need to add support for http.client metrics ROUTER-991
+    //router.assert_metrics_contains(r#"apollo_router_graphql_error_total{code="REQUEST_TIMEOUT",otel_scope_name="apollo/router"} 1"#, None).await;
 
     router.graceful_shutdown().await;
     Ok(())
@@ -110,10 +111,10 @@ async fn test_router_timeout_operation_name_in_tracing() -> Result<(), BoxError>
         .await;
     assert_eq!(response.status(), 504);
     let response = response.text().await?;
-    assert!(response.contains("REQUEST_TIMEOUT"));
+    assert!(response.contains("GATEWAY_TIMEOUT"));
 
     router
-        .wait_for_log_message(r#""otel.name":"query UniqueName""#)
+        .wait_for_log_message(r#""otel.name":"GraphQL Operation""#)
         .await;
 
     router.graceful_shutdown().await;
@@ -121,7 +122,7 @@ async fn test_router_timeout_operation_name_in_tracing() -> Result<(), BoxError>
 }
 
 #[tokio::test(flavor = "multi_thread")]
-async fn test_router_timeout_custom_metric() -> Result<(), BoxError> {
+async fn test_router_custom_metric() -> Result<(), BoxError> {
     if !graph_os_enabled() {
         return Ok(());
     }
@@ -152,12 +153,12 @@ async fn test_router_timeout_custom_metric() -> Result<(), BoxError> {
     router.start().await;
     router.assert_started().await;
 
-    let (_trace_id, response) = router.execute_default_query().await;
-    assert_eq!(response.status(), 504);
+    let (_trace_id, response) = router
+        .execute_query(Query::default().with_bad_query())
+        .await;
     let response = response.text().await?;
-    assert!(response.contains("REQUEST_TIMEOUT"));
-
-    router.assert_metrics_contains(r#"http_server_request_duration_seconds_count{error_type="Gateway Timeout",graphql_error="true",http_request_method="POST",http_response_status_code="504""#, None).await;
+    assert!(response.contains("MISSING_QUERY_STRING"));
+    router.assert_metrics_contains(r#"http_server_request_duration_seconds_count{error_type="Bad Request",graphql_error="true",http_request_method="POST",http_response_status_code="400""#, None).await;
 
     router.graceful_shutdown().await;
     Ok(())
@@ -189,12 +190,12 @@ async fn test_router_rate_limit() -> Result<(), BoxError> {
     assert_yaml_snapshot!(response);
 
     let (_, response) = router.execute_default_query().await;
-    assert_eq!(response.status(), 429);
+    assert_eq!(response.status(), 503);
     let response = response.text().await?;
     assert!(response.contains("REQUEST_RATE_LIMITED"));
     assert_yaml_snapshot!(response);
 
-    router.assert_metrics_contains(r#"apollo_router_graphql_error_total{code="REQUEST_RATE_LIMITED",otel_scope_name="apollo/router"} 1"#, None).await;
+    router.assert_metrics_contains(r#"http_server_request_duration_seconds_count{error_type="Service Unavailable",http_request_method="POST",http_response_status_code="503""#, None).await;
 
     router.graceful_shutdown().await;
     Ok(())
diff --git a/dev-docs/BACKPRESSURE_REVIEW_NOTES.md b/dev-docs/BACKPRESSURE_REVIEW_NOTES.md
new file mode 100644
index 0000000000..63e997d893
--- /dev/null
+++ b/dev-docs/BACKPRESSURE_REVIEW_NOTES.md
@@ -0,0 +1,127 @@
+# Back-Pressure Review
+
+This PR is intended to introduce some backpressure into the router so that traffic shaping works effectively.
+
+## Pipeline Change Motivations
+
+### Lack of feedback
+
+Poll::Ready(Ok(())) is a sign that a service is not going to ask the successor service if it's ready. In a pipeline that would like to exercise back-pressure this is not a good thing.
+
+We don't exercise any back-pressure before we hit the router service. The main reason for this is a desire to put telemetry at the front of the back-pressure pipe. Currently, the router service accepts requests and unquestioningly (recent changes to the query planner mean, it's not entirely unquestioning, but ...) starts to process them. The main motivation of this change is to allow services to poll successors for readiness.
+
+### State
+
+Some services require state to work effectively. Creating and throwing away our pipeline for every request is both wasteful and breaks our ability to use state aware layers, such as RateLimits. The secondary motivation for this change is the ability to use standard Tower layers for rate limits or other stateful functionality.
+
+## Review Sequencing
+
+I've tried to group all the files which are interesting to review here. All files should be reviewed, but these notes are intended to make understanding the more complex changes a little simpler.
+
+### Pipeline Cloning
+
+The most complex (and probably difficult to understand changes) are in the various affected services which implement ServiceFactory. I've not fixed all instances. I've done enough to allow us to exercise effective backpressure from RouterService -> ExecutionService and then again through the SubgraphService. The reason I haven't converted all services is because it's hard/complicated and the improvements we get from these changes are probably enough to materially improve the behaviour of the router. We may be able to do the same thing to other services in 2.1 or it may have to wait until 3.0. We need a Mutex to store our single service under because the various consumers of ServiceFactory require Sync access to our various services. If we could unpick that change of responsibilities we could maybe improve this so that we don't need the Mutex, but I think it would be major open heart surgery on the router to do this. In the performance testing I've performed so far, the Mutex doesn't seem to be a hot spot since it's called once per connection for Router and Service (multiple times for subgraphs) and is a lightweight Arc clone() under the lock which should be pretty fast.
+
+#### Router Service
+
+There are substantial changes here to form the start of our cloneable, backpressure enabled pipeline. The primary changes are
+ - Not to provide a supergraph creator to the router service. We want it to use the same supergraph service as long as it lives.
+ - Only create the router create service builder once (in the constructor). We story it under a Mutex to keep it Sync and then clone it whenever we need it.
+
+apollo-router/src/services/router/service.rs
+
+#### Supergraph Service
+
+We make a lot of changes here so that our downstream execution service is preserved. Again we only build a single service which we store under a mutex and lock/clone as required.
+
+apollo-router/src/services/supergraph/service.rs
+
+#### Subgraph Service
+
+We do the same thing as the router service and only create a single Subgraph service for each subgraph. The complication is that we have multiple subgraphs, so we store them in a hash.
+
+apollo-router/src/services/subgraph_service.rs
+
+### BackPressure Preservation
+
+In a number of places, we could have preserved backpressure, but didn't. These are fixed in this PR.
+
+apollo-router/src/plugins/traffic_shaping/deduplication.rs
+apollo-router/src/services/hickory_dns_connector.rs
+
+### Backpressure Control
+
+In traffic shaping we have the bulk of implementation which exercises control over our new backpressure, via load_shed.
+
+apollo-router/src/plugins/traffic_shaping/mod.rs
+
+### Removing Stuff
+
+OneShotAsyncCheckpoint is a potential footgun in a backpressure pipeline. I've removed it. A number of files are impacted by now using AsyncCheckpoint which requires `buffered`.
+
+apollo-router/src/layers/async_checkpoint.rs
+apollo-router/src/layers/mod.rs
+apollo-router/src/plugins/coprocessor/execution.rs
+apollo-router/src/plugins/coprocessor/mod.rs
+apollo-router/src/plugins/coprocessor/supergraph.rs
+apollo-router/src/plugins/file_uploads/mod.rs
+apollo-router/src/services/layers/allow_only_http_post_mutations.rs
+
+### Changes because pipeline is now Cloned
+
+apollo-router/src/plugin/test/mock/subgraph.rs
+apollo-router/src/plugins/cache/entity.rs
+apollo-router/src/plugins/cache/metrics.rs
+apollo-router/src/plugins/cache/tests.rs
+apollo-router/src/services/layers/apq.rs
+
+### Comments on layer review
+
+There are a number of comments in the PR relating to dev-docs/layer-inventory.md.
+
+apollo-router/src/plugins/authorization/mod.rs
+apollo-router/src/services/layers/content_negotiation.rs
+apollo-router/src/services/layers/query_analysis.rs
+apollo-router/src/services/layers/static_page.rs
+
+### Changes to Snapshots
+
+#### New Concurrency Feature Added
+
+apollo-router/src/configuration/snapshots/apollo_router__configuration__tests__schema_generation.snap
+
+#### Changes to error/extension codes
+
+apollo-router/tests/integration/snapshots/integration_tests__integration__traffic_shaping__router_timeout.snap
+apollo-router/tests/integration/snapshots/integration_tests__integration__traffic_shaping__subgraph_timeout.snap
+
+### Changes because of old cruft (usually in tests)
+
+apollo-router/src/axum_factory/tests.rs
+
+This should probably have been deleted a long time ago. I replaced it with the new test `it_timeout_router_requests` in apollo-router/src/plugins/traffic_shaping/mod.rs
+
+### Examples
+
+Mock expectations have changed and OneshotAsyncCheckpoint has been removed. Examples were updated to match the changes.
+
+### Stuff I'm not sure about NOTE: IT MAY BE LAST IN THE FILE BUT IT NEEDS MOST CAREFUL REVIEW
+
+#### Body Limits
+
+This change is a complicated re-factoring so that limits are maintained for a connection. The previous strategy of keeping BodyLimitControl under an Arc does not work, because cloning the pipeline would mean it is shared across all connections. The PR changes this so that limits are still shared across all requests on a single connection, but not across all connections. This enables dynamic updating of the limit to work, but I'm not 100% sure this is the right thing to do.
+
+This requires detailed review and discussion.
+
+apollo-router/src/plugins/limits/layer.rs
+apollo-router/src/plugins/limits/mod.rs
+
+#### Sundry stuff I'm not sure about
+
+I don't know if we want the `mock_subgraph_service_withf_panics_should_be_reported_as_service_closed test anymore`
+
+apollo-router/src/query_planner/tests.rs
+
+Now that traffic shaping happens earlier in the router, some things, which probably never were graphql errors anyway, are now no longer reported as graphql errors. I think the answer is to document this in the migration guide, since they now appear as OTEL response standard errors.
+
+apollo-router/tests/integration/traffic_shaping.rs
diff --git a/dev-docs/layer-inventory.md b/dev-docs/layer-inventory.md
new file mode 100644
index 0000000000..7141cc24fa
--- /dev/null
+++ b/dev-docs/layer-inventory.md
@@ -0,0 +1,174 @@
+# Layer Inventory
+This document is an investigation and overview of our tower layers, and layer-like things, before 2.0. It describes the order and the purpose of each layer, and whether it is Clone (which is roughly a stand-in for being "backpressured pipeline-ready").
+
+This is ordered from the point of view of a request to the router, starting at the outer boundary and going "deeper" into the layer onion.
+
+Still missing are the execution and subgraph client parts of the onion, and layers added by plugins.
+
+## Axum
+Before entering the router service, we have some layers on our axum Router. These are already functioning properly in the tower service stack.
+
+- "Metrics handler"
+  - It only implements the `apollo.router.operations` metric.
+  - This is using `axum::middleware::from_fn`, so it is functioning properly as a tower layer.
+- TraceLayer
+  - This one is from `tower-http`!
+- CorsLayer
+  - This one is from `tower-http`!
+- "License handler"
+  - Logs a warning if the commercial licence is expired
+  - Rejects requests if the commercial licence is "halted" (expired + a grace period)
+  - This is using `axum::middleware::from_fn`, so it is functioning properly as a tower layer.
+- RequestDecompressionLayer
+  - This one is from `tower-http`!
+
+Now, we enter `handle_graphql`.
+
+- Compression
+  - This is manually written inside `handle_graphql`, but could conceptually be considered a layer.
+  - I don't see an obvious reason for why this could not use a standard tower-http compression layer.
+- Then we create a router service and oneshot it.
+
+## Router service
+The router service consists of some layers in "front" of the service "proper", and of several layers *inside the router service*, which we appear to call manually.
+
+I suspect that this is bad and that we should try to make all these layers part of a straightforward tower service stack.
+
+Front (`RouterCreator`):
+- StaticPageLayer
+  - If configured, responds to any request that accepts a "text/html" response (*at all*, regardless of preference), with a fixed HTML response.
+  - It is Clone!
+  - This must occur before content negotiation, which rejects "Accept: text/html" requests.
+- Content negotiation: Request-side
+  - It is Clone!
+  - This layer rejects requests with invalid Accept or Content-Type headers.
+
+Plugins:
+- Telemetry: InstrumentLayer
+  - Only used with `SpanMode::Deprecated`
+  - Maybe a candidate for removal in 2.0?
+  - **It is not Clone**, but could be trivially derived, if we stick the span_fn in an Arc.
+- Telemetry: other work
+  - A lot of stuff is happening inside a `map_future` layer. I haven't checked this out but I think it's fine from a backpressure/pipeline perspective.
+  - This would be easier to understand in a named layer, potentially.
+- Body limiting (limits plugin)
+  - Rejects bodies that are too big. It's an equivalent of the tower-http `RequestBodyLimitLayer`.
+  - **It is not Clone**. We can trivially derive it, but, we should probably use tower-http's implementation.
+  - There's a bit of a dance happening here that we can hopefully remove.
+  - Comment: "This layer differs from the tower version in that it will always generate an error eagerly rather than allowing the downstream service to catch and handle the error."
+  - I do not understand what that means. But it must be related to making the `map_error_to_graphql` call inside the limits plugin work.
+  - It may not be trivial to change this to use tower-http.
+- Traffic shaping:
+  - Load shedding.
+    - This is just tower!
+  - TimeoutLayer
+    - Only present if a timeout is configured in the router.
+    - We use a `.map_result()` layer to handle the error and turn it into a GraphQL error response.
+    - This is provided by `tower`!
+  - Load shedding.
+    - Do we need this twice? Wouldn't it all propagate to the outermost^ load shedding layer?
+  - ConcurrencyLimitLayer
+    - Only present if a concurrency limit is configured in the router.
+    - We use a `.map_result()` layer to handle the error and turn it into a GraphQL error response.
+    - This is provided by `tower`!
+  - Load shedding.
+    - Do we need this thrice? Wouldn't it all propagate to the outermost^ load shedding layer?
+  - RateLimitLayer
+    - Only present if a global rate limit is configured in the router.
+    - We use a `.map_result()` layer to handle the error and turn it into a GraphQL error response.
+    - This is provided by `tower`!
+- Fleet detection
+  - Records router request and response size (unless disabled by environment variable).
+  - The size counting has the effect of turning all bodies into streams, which may have negative effects!
+  - It is Clone.
+- Authentication: InstrumentLayer
+  - The same underlying implementation as the one in "Telemetry: InstrumentLayer", but creating a different span.
+  - **It is not Clone**, but could be trivially derived, if we stick the `span_fn` in an Arc.
+- Authentication: implementation
+  - It reads a JWT from the request and adds various context values.
+  - It can short-circuit on invalid JWTs.
+  - This is just a checkpoint layer, so it will be easy to adapt
+- File uploads
+  - Processes multipart requests and enforces file uploads-specific limits (eg. max amount of files).
+  - The multipart state is moved into extensions and the request is modified to look like a normal GraphQL request.
+  - **It is not Clone**. This is a oneshot checkpoint layer. At first glance it does not look terribly difficult to change it to a normal checkpoint layer.
+- Progressive override
+  - Adds override labels used in the schema to the context. Coprocessors can supposedly use this.
+  - It is Clone! This is using `map_request` from tower, so it's fine for backpressure.
+- Rhai/coprocessors
+  - I have not looked deeply into it but I think this will be okay
+
+Proper (`RouterService`):
+- Batching
+  - This is not a layer but the code can sort of be understood conceptually like one. Maybe it could, should be a layer?
+  - Splits apart the incoming request into multiple requests that go through the rest of the pipeline, and puts the responses back together.
+- Persisted queries: Expansion
+  - This expands requests that use persisted query IDs, and rejects freeform GraphQL requests if not allowed per router configuration.
+  - This is *not* a real layer right now. I suspect it should be.
+  - **It is not Clone**, but it looks easy to make it Clone: we can just derive it on this layer and on the ManifestPoller type it contains (which already uses Arc internally)
+- APQs
+  - Clients can submit a hash+query body to add a query to the cache. Afterward, clients can use only the hash, and this layer will populate the query string in the request body before query analysis.
+  - This is *not* a real layer right now. I suspect it should be.
+  - **It is not Clone**. I think it's nothing that a little Arc can't solve, but I didn't trace it deeply enough to be sure.
+- Query analysis
+  - This does query parsing and validation and schema-aware hashing,
+    and field/enum usage tracking for apollo studio.
+  - This is *not* a real layer right now. I suspect it should be.
+  - It is Clone!
+  - This includes an explicit call to the AuthorizationPlugin. I suspect the AuthorizationPlugin should instead add its own layer for this, but chances are there was a good reason to do it this way, like not having precise enough hook-points. Still, maybe it can be a separate layer that we add manually.
+  - Query analysis also exposes a method that is used by the query planner service, which could just as well be a standalone function in my opinion.
+- Persisted queries: Safelisting
+  - This is *not* a real layer right now. I suspect it should be.
+  - For requests *not* using persisted queries, this layer checks incoming GraphQL documents against the "free-form GraphQL behaviour" configuration (essentially: safelisting), and rejects requests that are not allowed.
+  - For Clone-ness, see "Persisted queries: Expansion"
+- Straggler bits, not part of sub-layers. I think some of these should be normal layers, and some of them should be just a `.map_response()` layer in the service stack.
+  - It does something with the `Vary` header.
+  - It adjusts the status code to 499 if the request was canceled.
+  - It does various JSON serialization bits.
+  - It does various telemetry bits such as counting errors.
+  - It appears to do the *exact same thing* as "Content negotiation: Response-side" to populate the Content-Type header.
+
+## Supergraph service
+The supergraph service consists of some layers in "front" of the service "proper", and several interacting services *inside* the supergraph service.
+
+The implementation of those interactions is more complicated than in the router service, but I think many things could probably be implemented as a normal tower service stack, and we could benefit from doing that.
+
+Front (`SupergraphCreator`):
+- Content negotiation: Response-side
+  - It is Clone!
+  - This layer sets the Content-Type header on the response.
+- AllowOnlyHttpPostMutationsLayer is the final step before going into the supergraph service proper.
+  - **It is not Clone** today but it can trivially be derived.
+
+Plugin layers happen here or in between somewhere, TBD.
+Plugins:
+- CSRF
+  - This is a checkpoint layer.
+  - **It is not Clone**, but CheckpointLayer can trivially be made Clone, I think.
+  - Rejects requests that might be cross-site request forgery...
+  - TODO(@goto-bus-stop): I'm not actually sure how this works
+- Telemetry: InstrumentLayer
+  - The same underlying implementation as the one in `router_service`, but creating a different span.
+  - **It is not Clone**, but could be trivially derived, if we stick the `span_fn` in an Arc.
+- Telemetry: other work
+  - A lot of stuff is happening inside a `map_response` and a `map_future` layer.
+  - This copies several resources when the service is created, and uses them for the entire lifetime of the service. This will not do if we do not create the pipeline from scratch for every request.
+  - This would be easier to understand if split apart into several named layers, potentially.
+- Authorization
+  - Rejects requests if not authenticated.
+  - I'm extremely confused why this happens here as opposed to the authentication plugin.
+  - This is a checkpoint layer.
+- File uploads
+  - Patches up variables in the parsed JSON to pass validation.
+  - **It is not Clone**. This is a oneshot checkpoint layer, but it looks easy enough to update.
+- Entity caching: Cache control headers
+  - Sets cache control headers on responses based on context. The context is populated by the subgraph service.
+- Progressive override
+  - Collect overriden labels from the context (added by rhai or coprocessors), calculate the special percentage-chance labels to override, and add *all* enabled override labels to the context for use by the query planner service.
+  - It is Clone! This is using `map_request` from tower, so it's fine for backpressure.
+- Connectors
+  - Not sure what this does in detail but it's using `map_future_with_request_data` which is probably fine.
+- Rhai/coprocessors
+  - I have not looked deeply into it but I think this will be okay
+
+The bulk of the "Proper" `SupergraphService` is doing query planning, and then handing things off to the execution service. This probably could be conceptualised as 2 layers, or 3 if there is also one to handle subscriptions.
diff --git a/docs/source/reference/migration/from-router-v1.mdx b/docs/source/reference/migration/from-router-v1.mdx
index 7268f1ecc7..d78ff41459 100644
--- a/docs/source/reference/migration/from-router-v1.mdx
+++ b/docs/source/reference/migration/from-router-v1.mdx
@@ -237,6 +237,58 @@ And ensure that you have enabled OTLP support in your Jaeger instance using `COL
 If you already configured [CORS](../../routing/security/cors) on the router, the configuration won't change but with v1.x it accepted bad configuration and displayed an error log when some header names or regexes were invalid.
 With v2.x it will end up with an error and it won't start at all to avoid confusions and misconfigurations.
 
+### OneShotAsyncCheckpoint
+
+This has been removed because it is incompatible with the architectural optimizations we've made in Router 2.0. If you were using this in a Rust Plugin, you can replace it easily with `AsyncCheckpoint` as follows:
+
+Plugin code using `OneShotAsyncCheckpoint`
+
+```
+            OneShotAsyncCheckpointLayer::new(move |request: execution::Request| {
+                let request_config = request_config.clone();
+                <etc...>
+            }
+            .service(service)
+            .boxed()
+```
+
+Replace with `AsyncCheckpoint`
+
+```
+            AsyncCheckpointLayer::new(move |request: execution::Request| {
+                let request_config = request_config.clone();
+                <etc...>
+            }
+            .buffered()
+            .service(service)
+            .boxed()
+```
+
+The `buffered()` method is provided by the `apollo_router::layers::ServiceBuilderExt` trait and ensures that you service may be cloned.
+
+<!-- PR: https://github.com/apollographql/router/pull/6486 -->
+
+### Traffic Shaping
+
+Traffic shaping has been improved significantly in Router 2.0. We've added a new mechanism, concurrency control, and we've improved the router's ability to observe timeout and traffic shaping restrictions correctly. These improvements do mean that clients of the router may see an increase in:
+
+ - [Service Unavailable](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/503)
+ - [Gateway Timeout](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/504)
+
+errors as traffic shaping constraints are enforced.
+
+We recommend that users experiment with their configuration in order to arrive at the right combination of timeout, concurrency and rate limit controls for their particular use case.
+
+For more information about [configuring traffic shaping](../../routing/performance/traffic-shaping.mdx).
+
+<!-- PR: https://github.com/apollographql/router/pull/6486 -->
+
+### Subgraph Request/Response
+
+If, in your Rust plugin, you were using either the subgraph::Request::subgraph_name or the subgraph::Response::subgraph_name, please note that these are no longer `Option`, but are mandatory.
+
+<!-- PR: https://github.com/apollographql/router/pull/6486 -->
+
 ### Deploy your router
 
 Make sure that you are referencing the correct router release: **v2.0.0-preview.0**
diff --git a/docs/source/routing/customization/native-plugins.mdx b/docs/source/routing/customization/native-plugins.mdx
index aa27eb991c..cdb87a51d5 100644
--- a/docs/source/routing/customization/native-plugins.mdx
+++ b/docs/source/routing/customization/native-plugins.mdx
@@ -161,7 +161,6 @@ Some notable layers are:
 * **buffered** - Make a service `Clone`. Typically required for any `async` layers.
 * **checkpoint** - Perform a sync call to decide if a request should proceed or not. Useful for validation.
 * **checkpoint_async** - Perform an async call to decide if the request should proceed or not. e.g. for Authentication. Requires `buffered`.
-* **oneshot_checkpoint_async** - Perform an async call to decide if the request should proceed or not. e.g. for Authentication. Does not require `buffered` and should be preferred to `checkpoint_async` for that reason.
 * **instrument** - Add a tracing span around a service.
 * **map_request** - Transform the request before proceeding. e.g. for header manipulation.
 * **map_response** - Transform the response before proceeding. e.g. for header manipulation.
diff --git a/docs/source/routing/performance/traffic-shaping.mdx b/docs/source/routing/performance/traffic-shaping.mdx
index 9e1da28a71..b2ea73dfd2 100644
--- a/docs/source/routing/performance/traffic-shaping.mdx
+++ b/docs/source/routing/performance/traffic-shaping.mdx
@@ -17,6 +17,7 @@ traffic_shaping:
       capacity: 10
       interval: 5s # Must not be greater than 18_446_744_073_709_551_615 milliseconds and not less than 0 milliseconds
     timeout: 50s # If a request to the router takes more than 50secs then cancel the request (30 sec by default)
+    concurrency_limit: 100 # The router is limited to processing 100 concurrent requests. Excess requests must be rejected.
   all:
     deduplicate_query: true # Enable query deduplication for all subgraphs.
     compression: br # Enable brotli compression for all subgraphs.
@@ -85,6 +86,22 @@ Since [deferred](/router/executing-operations/defer-support/#what-is-defer) frag
 
 </Note>
 
+### Concurrency
+
+By default, the router does not enforce a concurrency limit. To enforce a limit, change your config to include:
+
+```yaml title="router.yaml"
+traffic_shaping:
+  router:
+    concurrency_limit: 100 # The router is limited to processing 100 concurrent requests. Excess requests must be rejected.
+```
+
+<Note>
+
+This is not a concurrency limit on connections, but a limit on the number of active requests being processed by the router at any one time.
+
+</Note>
+
 ### Compression
 
 Compression is automatically supported on the client side, depending on the `Accept-Encoding` header provided by the client.
diff --git a/examples/async-auth/rust/src/allow_client_id_from_file.rs b/examples/async-auth/rust/src/allow_client_id_from_file.rs
index a41e6ce370..39a0efe247 100644
--- a/examples/async-auth/rust/src/allow_client_id_from_file.rs
+++ b/examples/async-auth/rust/src/allow_client_id_from_file.rs
@@ -48,12 +48,12 @@ impl Plugin for AllowClientIdFromFile {
     // switching the async file read with an async http request
     fn supergraph_service(&self, service: supergraph::BoxService) -> supergraph::BoxService {
         let header_key = self.header.clone();
-        // oneshot_async_checkpoint is an async function.
+        // async_checkpoint is an async function.
         // this means it will run whenever the service `await`s it
         // given we're getting a mutable reference to self,
         // self won't be present anymore when we `await` the checkpoint.
         //
-        // this is solved by cloning the path and moving it into the oneshot_async_checkpoint callback.
+        // this is solved by cloning the path and moving it into the async_checkpoint callback.
         //
         // see https://rust-lang.github.io/async-book/03_async_await/01_chapter.html#async-lifetimes for more information
         let allowed_ids_path = self.allowed_ids_path.clone();
@@ -141,12 +141,16 @@ impl Plugin for AllowClientIdFromFile {
                 }
             }
         };
-        // `ServiceBuilder` provides us with an `oneshot_async_checkpoint` method.
+        // `ServiceBuilder` provides us with an `async_checkpoint` method.
+        //
+        // Because our service is not `Clone`, we use the `ServiceExt` trait's `buffered` method to
+        // make the service `Clone`.
         //
         // This method allows us to return ControlFlow::Continue(request) if we want to let the request through,
         // or ControlFlow::Break(response) with a crafted response if we don't want the request to go through.
         ServiceBuilder::new()
-            .oneshot_checkpoint_async(handler)
+            .checkpoint_async(handler)
+            .buffered()
             .service(service)
             .boxed()
     }
diff --git a/examples/cache-control/rhai/src/main.rs b/examples/cache-control/rhai/src/main.rs
index 14d1d53e0b..c299262fa0 100644
--- a/examples/cache-control/rhai/src/main.rs
+++ b/examples/cache-control/rhai/src/main.rs
@@ -22,14 +22,15 @@ mod tests {
         let mut mock_service1 = test::MockSubgraphService::new();
         let mut mock_service2 = test::MockSubgraphService::new();
 
-        mock_service1.expect_clone().return_once(|| {
+        mock_service1.expect_clone().returning(move || {
             let mut mock_service = test::MockSubgraphService::new();
+            let value = header_one.clone();
             mock_service
                 .expect_call()
                 .once()
                 .returning(move |req: subgraph::Request| {
                     let mut headers = HeaderMap::new();
-                    if let Some(value) = &header_one {
+                    if let Some(value) = &value {
                         headers.insert("cache-control", value.parse().unwrap());
                     }
 
@@ -41,14 +42,15 @@ mod tests {
             mock_service
         });
 
-        mock_service2.expect_clone().return_once(move || {
+        mock_service2.expect_clone().returning(move || {
             let mut mock_service = test::MockSubgraphService::new();
+            let value = header_two.clone();
             mock_service
                 .expect_call()
                 .once()
                 .returning(move |req: subgraph::Request| {
                     let mut headers = HeaderMap::new();
-                    if let Some(value) = &header_two {
+                    if let Some(value) = &value {
                         headers.insert("cache-control", value.parse().unwrap());
                     }
 
diff --git a/examples/cookies-to-headers/rhai/src/main.rs b/examples/cookies-to-headers/rhai/src/main.rs
index 6ff83b1d75..de52fe0418 100644
--- a/examples/cookies-to-headers/rhai/src/main.rs
+++ b/examples/cookies-to-headers/rhai/src/main.rs
@@ -48,7 +48,7 @@ mod tests {
         let expected_mock_response_data = "response created within the mock";
 
         // Let's set up our mock to make sure it will be called once
-        mock_service.expect_clone().return_once(move || {
+        mock_service.expect_clone().returning(move || {
             let mut mock_service = test::MockSubgraphService::new();
             mock_service
                 .expect_call()