diff --git a/api/envoy/extensions/filters/http/local_ratelimit/v3/local_rate_limit.proto b/api/envoy/extensions/filters/http/local_ratelimit/v3/local_rate_limit.proto
index 7766ee2573d00..1cf6c5f2fa52c 100644
--- a/api/envoy/extensions/filters/http/local_ratelimit/v3/local_rate_limit.proto
+++ b/api/envoy/extensions/filters/http/local_ratelimit/v3/local_rate_limit.proto
@@ -19,7 +19,7 @@ option (udpa.annotations.file_status).package_version_status = ACTIVE;
 // Local Rate limit :ref:`configuration overview <config_http_filters_local_rate_limit>`.
 // [#extension: envoy.filters.http.local_ratelimit]
 
-// [#next-free-field: 11]
+// [#next-free-field: 12]
 message LocalRateLimit {
   // The human readable prefix to use when emitting stats.
   string stat_prefix = 1 [(validate.rules).string = {min_len: 1}];
@@ -97,4 +97,13 @@ message LocalRateLimit {
   //
   //  The filter supports a range of 0 - 10 inclusively for stage numbers.
   uint32 stage = 9 [(validate.rules).uint32 = {lte: 10}];
+
+  // Specifies the scope of the rate limiter's token bucket.
+  // If set to false, the token bucket is shared across all worker threads,
+  // thus the rate limits are applied per Envoy process.
+  // If set to true, a token bucket is allocated for each connection.
+  // Thus the rate limits are applied per connection thereby allowing
+  // one to rate limit requests on a per connection basis.
+  // If unspecified, the default value is false.
+  bool local_rate_limit_per_downstream_connection = 11;
 }
diff --git a/docs/root/configuration/http/http_filters/local_rate_limit_filter.rst b/docs/root/configuration/http/http_filters/local_rate_limit_filter.rst
index 4467ba080a41f..0b890cce0bf5e 100644
--- a/docs/root/configuration/http/http_filters/local_rate_limit_filter.rst
+++ b/docs/root/configuration/http/http_filters/local_rate_limit_filter.rst
@@ -22,8 +22,9 @@ configured to be returned.
 <envoy_v3_api_field_extensions.filters.http.local_ratelimit.v3.LocalRateLimit.request_headers_to_add_when_not_enforced>` can be
 configured to be added to forwarded requests to the upstream when the local rate limit filter is enabled but not enforced.
 
-.. note::
-  The token bucket is shared across all workers, thus the rate limits are applied per Envoy process.
+Depending on the value of the config :ref:`local_rate_limit_per_downstream_connection <envoy_v3_api_field_extensions.filters.http.local_ratelimit.v3.LocalRateLimit.local_rate_limit_per_downstream_connection>`,
+the token bucket is either shared across all workers or on a per connection basis. This results in the local rate limits being applied either per Envoy process or per downstream connection.
+By default the rate limits are applied per Envoy process.
 
 Example configuration
 ---------------------
@@ -55,6 +56,7 @@ Example filter configuration for a globally set rate limiter (e.g.: all vhosts/r
         header:
           key: x-local-rate-limit
           value: 'true'
+    local_rate_limit_per_downstream_connection: false
 
 
 Example filter configuration for a globally disabled rate limiter but enabled for a specific route:
diff --git a/docs/root/version_history/current.rst b/docs/root/version_history/current.rst
index 449d76c1bd2c0..9421224989bd0 100644
--- a/docs/root/version_history/current.rst
+++ b/docs/root/version_history/current.rst
@@ -73,6 +73,7 @@ New Features
 * http: added the ability to :ref:`unescape slash sequences<envoy_v3_api_field_extensions.filters.network.http_connection_manager.v3.HttpConnectionManager.path_with_escaped_slashes_action>` in the path. Requests with unescaped slashes can be proxied, rejected or redirected to the new unescaped path. By default this feature is disabled. The default behavior can be overridden through :ref:`http_connection_manager.path_with_escaped_slashes_action<config_http_conn_man_runtime_path_with_escaped_slashes_action>` runtime variable. This action can be selectively enabled for a portion of requests by setting the :ref:`http_connection_manager.path_with_escaped_slashes_action_sampling<config_http_conn_man_runtime_path_with_escaped_slashes_action_enabled>` runtime variable.
 * http: added upstream and downstream alpha HTTP/3 support! See :ref:`quic_options <envoy_v3_api_field_config.listener.v3.UdpListenerConfig.quic_options>` for downstream and the new http3_protocol_options in :ref:`http_protocol_options <envoy_v3_api_msg_extensions.upstreams.http.v3.HttpProtocolOptions>` for upstream HTTP/3.
 * listener: added ability to change an existing listener's address.
+* local_rate_limit_filter: added suppoort for locally rate limiting http requests on a per connection basis. This can be enabled by setting the :ref:`local_rate_limit_per_downstream_connection <envoy_v3_api_field_extensions.filters.http.local_ratelimit.v3.LocalRateLimit.local_rate_limit_per_downstream_connection>` field to true.
 * metric service: added support for sending metric tags as labels. This can be enabled by setting the :ref:`emit_tags_as_labels <envoy_v3_api_field_config.metrics.v3.MetricsServiceConfig.emit_tags_as_labels>` field to true.
 * tcp: added support for :ref:`preconnecting <v1.18.0:envoy_v3_api_msg_config.cluster.v3.Cluster.PreconnectPolicy>`. Preconnecting is off by default, but recommended for clusters serving latency-sensitive traffic.
 * udp_proxy: added :ref:`key <envoy_v3_api_msg_extensions.filters.udp.udp_proxy.v3.UdpProxyConfig.HashPolicy>` as another hash policy to support hash based routing on any given key.
diff --git a/generated_api_shadow/envoy/extensions/filters/http/local_ratelimit/v3/local_rate_limit.proto b/generated_api_shadow/envoy/extensions/filters/http/local_ratelimit/v3/local_rate_limit.proto
index 7766ee2573d00..1cf6c5f2fa52c 100644
--- a/generated_api_shadow/envoy/extensions/filters/http/local_ratelimit/v3/local_rate_limit.proto
+++ b/generated_api_shadow/envoy/extensions/filters/http/local_ratelimit/v3/local_rate_limit.proto
@@ -19,7 +19,7 @@ option (udpa.annotations.file_status).package_version_status = ACTIVE;
 // Local Rate limit :ref:`configuration overview <config_http_filters_local_rate_limit>`.
 // [#extension: envoy.filters.http.local_ratelimit]
 
-// [#next-free-field: 11]
+// [#next-free-field: 12]
 message LocalRateLimit {
   // The human readable prefix to use when emitting stats.
   string stat_prefix = 1 [(validate.rules).string = {min_len: 1}];
@@ -97,4 +97,13 @@ message LocalRateLimit {
   //
   //  The filter supports a range of 0 - 10 inclusively for stage numbers.
   uint32 stage = 9 [(validate.rules).uint32 = {lte: 10}];
+
+  // Specifies the scope of the rate limiter's token bucket.
+  // If set to false, the token bucket is shared across all worker threads,
+  // thus the rate limits are applied per Envoy process.
+  // If set to true, a token bucket is allocated for each connection.
+  // Thus the rate limits are applied per connection thereby allowing
+  // one to rate limit requests on a per connection basis.
+  // If unspecified, the default value is false.
+  bool local_rate_limit_per_downstream_connection = 11;
 }
diff --git a/source/extensions/filters/http/local_ratelimit/local_ratelimit.cc b/source/extensions/filters/http/local_ratelimit/local_ratelimit.cc
index 30ed854930e51..741ec1b3346c0 100644
--- a/source/extensions/filters/http/local_ratelimit/local_ratelimit.cc
+++ b/source/extensions/filters/http/local_ratelimit/local_ratelimit.cc
@@ -13,18 +13,24 @@ namespace Extensions {
 namespace HttpFilters {
 namespace LocalRateLimitFilter {
 
+const std::string& PerConnectionRateLimiter::key() {
+  CONSTRUCT_ON_FIRST_USE(std::string, "per_connection_local_rate_limiter");
+}
+
 FilterConfig::FilterConfig(
     const envoy::extensions::filters::http::local_ratelimit::v3::LocalRateLimit& config,
     const LocalInfo::LocalInfo& local_info, Event::Dispatcher& dispatcher, Stats::Scope& scope,
     Runtime::Loader& runtime, const bool per_route)
     : status_(toErrorCode(config.status().code())),
       stats_(generateStats(config.stat_prefix(), scope)),
+      fill_interval_(std::chrono::milliseconds(
+          PROTOBUF_GET_MS_OR_DEFAULT(config.token_bucket(), fill_interval, 0))),
+      max_tokens_(config.token_bucket().max_tokens()),
+      tokens_per_fill_(PROTOBUF_GET_WRAPPED_OR_DEFAULT(config.token_bucket(), tokens_per_fill, 1)),
+      descriptors_(config.descriptors()),
+      rate_limit_per_connection_(config.local_rate_limit_per_downstream_connection()),
       rate_limiter_(Filters::Common::LocalRateLimit::LocalRateLimiterImpl(
-          std::chrono::milliseconds(
-              PROTOBUF_GET_MS_OR_DEFAULT(config.token_bucket(), fill_interval, 0)),
-          config.token_bucket().max_tokens(),
-          PROTOBUF_GET_WRAPPED_OR_DEFAULT(config.token_bucket(), tokens_per_fill, 1), dispatcher,
-          config.descriptors())),
+          fill_interval_, max_tokens_, tokens_per_fill_, dispatcher, descriptors_)),
       local_info_(local_info), runtime_(runtime),
       filter_enabled_(
           config.has_filter_enabled()
@@ -84,7 +90,7 @@ Http::FilterHeadersStatus Filter::decodeHeaders(Http::RequestHeaderMap& headers,
     populateDescriptors(descriptors, headers);
   }
 
-  if (config->requestAllowed(descriptors)) {
+  if (requestAllowed(descriptors)) {
     config->stats().ok_.inc();
     return Http::FilterHeadersStatus::Continue;
   }
@@ -109,6 +115,34 @@ Http::FilterHeadersStatus Filter::decodeHeaders(Http::RequestHeaderMap& headers,
   return Http::FilterHeadersStatus::StopIteration;
 }
 
+bool Filter::requestAllowed(absl::Span<const RateLimit::LocalDescriptor> request_descriptors) {
+  const auto* config = getConfig();
+  return config->rateLimitPerConnection()
+             ? getPerConnectionRateLimiter().requestAllowed(request_descriptors)
+             : config->requestAllowed(request_descriptors);
+}
+
+const Filters::Common::LocalRateLimit::LocalRateLimiterImpl& Filter::getPerConnectionRateLimiter() {
+  const auto* config = getConfig();
+  ASSERT(config->rateLimitPerConnection());
+
+  if (!decoder_callbacks_->streamInfo().filterState()->hasData<PerConnectionRateLimiter>(
+          PerConnectionRateLimiter::key())) {
+    decoder_callbacks_->streamInfo().filterState()->setData(
+        PerConnectionRateLimiter::key(),
+        std::make_unique<PerConnectionRateLimiter>(
+            config->fillInterval(), config->maxTokens(), config->tokensPerFill(),
+            decoder_callbacks_->dispatcher(), config->descriptors()),
+        StreamInfo::FilterState::StateType::ReadOnly,
+        StreamInfo::FilterState::LifeSpan::Connection);
+  }
+
+  return decoder_callbacks_->streamInfo()
+      .filterState()
+      ->getDataReadOnly<PerConnectionRateLimiter>(PerConnectionRateLimiter::key())
+      .value();
+}
+
 void Filter::populateDescriptors(std::vector<RateLimit::LocalDescriptor>& descriptors,
                                  Http::RequestHeaderMap& headers) {
   Router::RouteConstSharedPtr route = decoder_callbacks_->route();
diff --git a/source/extensions/filters/http/local_ratelimit/local_ratelimit.h b/source/extensions/filters/http/local_ratelimit/local_ratelimit.h
index af27e23efe4ef..380fd212a5052 100644
--- a/source/extensions/filters/http/local_ratelimit/local_ratelimit.h
+++ b/source/extensions/filters/http/local_ratelimit/local_ratelimit.h
@@ -42,6 +42,23 @@ struct LocalRateLimitStats {
   ALL_LOCAL_RATE_LIMIT_STATS(GENERATE_COUNTER_STRUCT)
 };
 
+class PerConnectionRateLimiter : public StreamInfo::FilterState::Object {
+public:
+  PerConnectionRateLimiter(
+      const std::chrono::milliseconds& fill_interval, uint32_t max_tokens, uint32_t tokens_per_fill,
+      Envoy::Event::Dispatcher& dispatcher,
+      const Protobuf::RepeatedPtrField<
+          envoy::extensions::common::ratelimit::v3::LocalRateLimitDescriptor>& descriptor)
+      : rate_limiter_(fill_interval, max_tokens, tokens_per_fill, dispatcher, descriptor) {}
+  static const std::string& key();
+  const Filters::Common::LocalRateLimit::LocalRateLimiterImpl& value() const {
+    return rate_limiter_;
+  }
+
+private:
+  Filters::Common::LocalRateLimit::LocalRateLimiterImpl rate_limiter_;
+};
+
 /**
  * Global configuration for the HTTP local rate limit filter.
  */
@@ -62,6 +79,15 @@ class FilterConfig : public Router::RouteSpecificFilterConfig {
   Http::Code status() const { return status_; }
   uint64_t stage() const { return stage_; }
   bool hasDescriptors() const { return has_descriptors_; }
+  const std::chrono::milliseconds& fillInterval() const { return fill_interval_; }
+  uint32_t maxTokens() const { return max_tokens_; }
+  uint32_t tokensPerFill() const { return tokens_per_fill_; }
+  const Protobuf::RepeatedPtrField<
+      envoy::extensions::common::ratelimit::v3::LocalRateLimitDescriptor>&
+  descriptors() const {
+    return descriptors_;
+  }
+  bool rateLimitPerConnection() const { return rate_limit_per_connection_; }
 
 private:
   friend class FilterTest;
@@ -78,6 +104,13 @@ class FilterConfig : public Router::RouteSpecificFilterConfig {
 
   const Http::Code status_;
   mutable LocalRateLimitStats stats_;
+  const std::chrono::milliseconds fill_interval_;
+  const uint32_t max_tokens_;
+  const uint32_t tokens_per_fill_;
+  const Protobuf::RepeatedPtrField<
+      envoy::extensions::common::ratelimit::v3::LocalRateLimitDescriptor>
+      descriptors_;
+  const bool rate_limit_per_connection_;
   Filters::Common::LocalRateLimit::LocalRateLimiterImpl rate_limiter_;
   const LocalInfo::LocalInfo& local_info_;
   Runtime::Loader& runtime_;
@@ -108,6 +141,8 @@ class Filter : public Http::PassThroughFilter {
 
   void populateDescriptors(std::vector<RateLimit::LocalDescriptor>& descriptors,
                            Http::RequestHeaderMap& headers);
+  const Filters::Common::LocalRateLimit::LocalRateLimiterImpl& getPerConnectionRateLimiter();
+  bool requestAllowed(absl::Span<const RateLimit::LocalDescriptor> request_descriptors);
 
   const FilterConfig* getConfig() const;
   FilterConfigSharedPtr config_;
diff --git a/test/extensions/filters/http/local_ratelimit/filter_test.cc b/test/extensions/filters/http/local_ratelimit/filter_test.cc
index cd9b6ec84f7b0..1614b0520aaa1 100644
--- a/test/extensions/filters/http/local_ratelimit/filter_test.cc
+++ b/test/extensions/filters/http/local_ratelimit/filter_test.cc
@@ -39,7 +39,12 @@ stat_prefix: test
     header:
       key: x-local-ratelimited
       value: 'true'
+local_rate_limit_per_downstream_connection: {}
   )";
+// '{}' used in the yaml config above are position dependent placeholders used for substitutions.
+// Different test cases toggle functionality based on these positional placeholder variables
+// For instance, fmt::format(config_yaml, "1", "false") substitutes '1' and 'false' for 'max_tokens'
+// and 'local_rate_limit_per_downstream_connection' configurations, respectively.
 
 class FilterTest : public testing::Test {
 public:
@@ -58,12 +63,18 @@ class FilterTest : public testing::Test {
                        testing::Matcher<const envoy::type::v3::FractionalPercent&>(Percent(100))))
         .WillRepeatedly(testing::Return(enforced));
 
+    ON_CALL(decoder_callbacks_, dispatcher()).WillByDefault(ReturnRef(dispatcher_));
+    ON_CALL(decoder_callbacks_2_, dispatcher()).WillByDefault(ReturnRef(dispatcher_));
+
     envoy::extensions::filters::http::local_ratelimit::v3::LocalRateLimit config;
     TestUtility::loadFromYaml(yaml, config);
     config_ = std::make_shared<FilterConfig>(config, local_info_, dispatcher_, stats_, runtime_,
                                              per_route);
     filter_ = std::make_shared<Filter>(config_);
     filter_->setDecoderFilterCallbacks(decoder_callbacks_);
+
+    filter_2_ = std::make_shared<Filter>(config_);
+    filter_2_->setDecoderFilterCallbacks(decoder_callbacks_2_);
   }
   void setup(const std::string& yaml, const bool enabled = true, const bool enforced = true) {
     setupPerRoute(yaml, enabled, enforced);
@@ -78,25 +89,27 @@ class FilterTest : public testing::Test {
 
   Stats::IsolatedStoreImpl stats_;
   testing::NiceMock<Http::MockStreamDecoderFilterCallbacks> decoder_callbacks_;
+  testing::NiceMock<Http::MockStreamDecoderFilterCallbacks> decoder_callbacks_2_;
   NiceMock<Event::MockDispatcher> dispatcher_;
   NiceMock<Runtime::MockLoader> runtime_;
   NiceMock<LocalInfo::MockLocalInfo> local_info_;
   std::shared_ptr<FilterConfig> config_;
   std::shared_ptr<Filter> filter_;
+  std::shared_ptr<Filter> filter_2_;
 };
 
 TEST_F(FilterTest, Runtime) {
-  setup(fmt::format(config_yaml, "1"), false, false);
+  setup(fmt::format(config_yaml, "1", "false"), false, false);
   EXPECT_EQ(&runtime_, &(config_->runtime()));
 }
 
 TEST_F(FilterTest, ToErrorCode) {
-  setup(fmt::format(config_yaml, "1"), false, false);
+  setup(fmt::format(config_yaml, "1", "false"), false, false);
   EXPECT_EQ(Http::Code::BadRequest, toErrorCode(400));
 }
 
 TEST_F(FilterTest, Disabled) {
-  setup(fmt::format(config_yaml, "1"), false, false);
+  setup(fmt::format(config_yaml, "1", "false"), false, false);
   auto headers = Http::TestRequestHeaderMapImpl();
   EXPECT_EQ(Http::FilterHeadersStatus::Continue, filter_->decodeHeaders(headers, false));
   EXPECT_EQ(0U, findCounter("test.http_local_rate_limit.enabled"));
@@ -104,18 +117,31 @@ TEST_F(FilterTest, Disabled) {
 }
 
 TEST_F(FilterTest, RequestOk) {
-  setup(fmt::format(config_yaml, "1"));
+  setup(fmt::format(config_yaml, "1", "false"));
   auto headers = Http::TestRequestHeaderMapImpl();
   EXPECT_EQ(Http::FilterHeadersStatus::Continue, filter_->decodeHeaders(headers, false));
-  EXPECT_EQ(1U, findCounter("test.http_local_rate_limit.enabled"));
-  EXPECT_EQ(0U, findCounter("test.http_local_rate_limit.enforced"));
+  EXPECT_EQ(Http::FilterHeadersStatus::StopIteration, filter_2_->decodeHeaders(headers, false));
+  EXPECT_EQ(2U, findCounter("test.http_local_rate_limit.enabled"));
+  EXPECT_EQ(1U, findCounter("test.http_local_rate_limit.enforced"));
   EXPECT_EQ(1U, findCounter("test.http_local_rate_limit.ok"));
+  EXPECT_EQ(1U, findCounter("test.http_local_rate_limit.rate_limited"));
+}
+
+TEST_F(FilterTest, RequestOkPerConnection) {
+  setup(fmt::format(config_yaml, "1", "true"));
+  auto headers = Http::TestRequestHeaderMapImpl();
+  EXPECT_EQ(Http::FilterHeadersStatus::Continue, filter_->decodeHeaders(headers, false));
+  EXPECT_EQ(Http::FilterHeadersStatus::Continue, filter_2_->decodeHeaders(headers, false));
+  EXPECT_EQ(2U, findCounter("test.http_local_rate_limit.enabled"));
+  EXPECT_EQ(0U, findCounter("test.http_local_rate_limit.enforced"));
+  EXPECT_EQ(2U, findCounter("test.http_local_rate_limit.ok"));
+  EXPECT_EQ(0U, findCounter("test.http_local_rate_limit.rate_limited"));
 }
 
 TEST_F(FilterTest, RequestRateLimited) {
-  setup(fmt::format(config_yaml, "0"));
+  setup(fmt::format(config_yaml, "1", "false"));
 
-  EXPECT_CALL(decoder_callbacks_, sendLocalReply(Http::Code::TooManyRequests, _, _, _, _))
+  EXPECT_CALL(decoder_callbacks_2_, sendLocalReply(Http::Code::TooManyRequests, _, _, _, _))
       .WillOnce(Invoke([](Http::Code code, absl::string_view body,
                           std::function<void(Http::ResponseHeaderMap & headers)> modify_headers,
                           const absl::optional<Grpc::Status::GrpcStatus> grpc_status,
@@ -136,16 +162,61 @@ TEST_F(FilterTest, RequestRateLimited) {
   auto request_headers = Http::TestRequestHeaderMapImpl();
   auto expected_headers = Http::TestRequestHeaderMapImpl();
 
-  EXPECT_EQ(Http::FilterHeadersStatus::StopIteration,
-            filter_->decodeHeaders(request_headers, false));
   EXPECT_EQ(request_headers, expected_headers);
-  EXPECT_EQ(1U, findCounter("test.http_local_rate_limit.enabled"));
+  EXPECT_EQ(Http::FilterHeadersStatus::Continue, filter_->decodeHeaders(request_headers, false));
+  EXPECT_EQ(Http::FilterHeadersStatus::StopIteration,
+            filter_2_->decodeHeaders(request_headers, false));
+  EXPECT_EQ(2U, findCounter("test.http_local_rate_limit.enabled"));
   EXPECT_EQ(1U, findCounter("test.http_local_rate_limit.enforced"));
+  EXPECT_EQ(1U, findCounter("test.http_local_rate_limit.ok"));
   EXPECT_EQ(1U, findCounter("test.http_local_rate_limit.rate_limited"));
 }
 
+/*
+This test sets 'local_rate_limit_per_downstream_connection' to true. Doing this enables per
+connection rate limiting and even though 'max_token' is set to 1, it allows 2 requests to go through
+- one on each connection. This is in contrast to the 'RequestOk' test above where only 1 request is
+allowed (across the process) for the same configuration.
+*/
+TEST_F(FilterTest, RequestRateLimitedPerConnection) {
+  setup(fmt::format(config_yaml, "1", "true"));
+
+  EXPECT_CALL(decoder_callbacks_, sendLocalReply(Http::Code::TooManyRequests, _, _, _, _))
+      .WillOnce(Invoke([](Http::Code code, absl::string_view body,
+                          std::function<void(Http::ResponseHeaderMap & headers)> modify_headers,
+                          const absl::optional<Grpc::Status::GrpcStatus> grpc_status,
+                          absl::string_view details) {
+        EXPECT_EQ(Http::Code::TooManyRequests, code);
+        EXPECT_EQ("local_rate_limited", body);
+
+        Http::TestResponseHeaderMapImpl response_headers{{":status", "200"}};
+        modify_headers(response_headers);
+        EXPECT_EQ("true", response_headers.get(Http::LowerCaseString("x-test-rate-limit"))[0]
+                              ->value()
+                              .getStringView());
+
+        EXPECT_EQ(grpc_status, absl::nullopt);
+        EXPECT_EQ(details, "local_rate_limited");
+      }));
+
+  auto request_headers = Http::TestRequestHeaderMapImpl();
+  auto expected_headers = Http::TestRequestHeaderMapImpl();
+
+  EXPECT_EQ(request_headers, expected_headers);
+  EXPECT_EQ(Http::FilterHeadersStatus::Continue, filter_->decodeHeaders(request_headers, false));
+  EXPECT_EQ(Http::FilterHeadersStatus::StopIteration,
+            filter_->decodeHeaders(request_headers, false));
+  EXPECT_EQ(Http::FilterHeadersStatus::Continue, filter_2_->decodeHeaders(request_headers, false));
+  EXPECT_EQ(Http::FilterHeadersStatus::StopIteration,
+            filter_2_->decodeHeaders(request_headers, false));
+  EXPECT_EQ(4U, findCounter("test.http_local_rate_limit.enabled"));
+  EXPECT_EQ(2U, findCounter("test.http_local_rate_limit.enforced"));
+  EXPECT_EQ(2U, findCounter("test.http_local_rate_limit.ok"));
+  EXPECT_EQ(2U, findCounter("test.http_local_rate_limit.rate_limited"));
+}
+
 TEST_F(FilterTest, RequestRateLimitedButNotEnforced) {
-  setup(fmt::format(config_yaml, "0"), true, false);
+  setup(fmt::format(config_yaml, "0", "false"), true, false);
 
   EXPECT_CALL(decoder_callbacks_, sendLocalReply(Http::Code::TooManyRequests, _, _, _, _)).Times(0);
 
@@ -181,6 +252,7 @@ stat_prefix: test
     header:
       key: x-test-rate-limit
       value: 'true'
+local_rate_limit_per_downstream_connection: true
 descriptors:
 - entries:
    - key: hello