envoyproxy · adisuissa · Mar 10, 2022 · Sep 16, 2021 · Sep 17, 2021 · Sep 17, 2021
diff --git a/api/envoy/extensions/common/ratelimit/v3/ratelimit.proto b/api/envoy/extensions/common/ratelimit/v3/ratelimit.proto
@@ -17,6 +17,23 @@ option (udpa.annotations.file_status).package_version_status = ACTIVE;
 
 // [#protodoc-title: Common rate limit components]
 
+// Defines the version of the standard to use for X-RateLimit headers.
+enum XRateLimitHeadersRFCVersion {
+  // X-RateLimit headers disabled.
+  OFF = 0;
+
+  // Use `draft RFC Version 03 <https://tools.ietf.org/id/draft-polli-ratelimit-headers-03.html>`_ where 3 headers will be added:
+  //
+  // * ``X-RateLimit-Limit`` - indicates the request-quota associated to the
+  //   client in the current time-window followed by the description of the
+  //   quota policy. The value is returned by the maximum tokens of the token bucket.
+  // * ``X-RateLimit-Remaining`` - indicates the remaining requests in the
+  //   current time-window. The value is returned by the remaining tokens in the token bucket.
+  // * ``X-RateLimit-Reset`` - indicates the number of seconds until reset of
+  //   the current time-window. The value is returned by the remaining fill interval of the token bucket.
+  DRAFT_VERSION_03 = 1;
+}
+
 // A RateLimitDescriptor is a list of hierarchical entries that are used by the service to
 // determine the final rate limit key and overall allowed limit. Here are some examples of how
 // they might be used for the domain "envoy".

diff --git a/api/envoy/extensions/filters/http/local_ratelimit/v3/local_rate_limit.proto b/api/envoy/extensions/filters/http/local_ratelimit/v3/local_rate_limit.proto
@@ -20,7 +20,7 @@ option (udpa.annotations.file_status).package_version_status = ACTIVE;
 // Local Rate limit :ref:`configuration overview <config_http_filters_local_rate_limit>`.
 // [#extension: envoy.filters.http.local_ratelimit]
 
-// [#next-free-field: 12]
+// [#next-free-field: 13]
 message LocalRateLimit {
   // The human readable prefix to use when emitting stats.
   string stat_prefix = 1 [(validate.rules).string = {min_len: 1}];
@@ -107,4 +107,10 @@ message LocalRateLimit {
   // one to rate limit requests on a per connection basis.
   // If unspecified, the default value is false.
   bool local_rate_limit_per_downstream_connection = 11;
+
+  // Defines the standard version to use for X-RateLimit headers emitted by the filter.
+  //
+  // Disabled by default.
+  common.ratelimit.v3.XRateLimitHeadersRFCVersion enable_x_ratelimit_headers = 12
+      [(validate.rules).enum = {defined_only: true}];
 }
diff --git a/api/envoy/extensions/filters/http/ratelimit/v3/rate_limit.proto b/api/envoy/extensions/filters/http/ratelimit/v3/rate_limit.proto
@@ -29,6 +29,8 @@ message RateLimit {
       "envoy.config.filter.http.rate_limit.v2.RateLimit";
 
   // Defines the version of the standard to use for X-RateLimit headers.
+  //
+  // [#next-major-version: unify with local ratelimit, should use common.ratelimit.v3.XRateLimitHeadersRFCVersion instead.]
   enum XRateLimitHeadersRFCVersion {
     // X-RateLimit headers disabled.
     OFF = 0;
@@ -100,6 +102,8 @@ message RateLimit {
   // the `draft RFC <https://tools.ietf.org/id/draft-polli-ratelimit-headers-03.html>`_.
   //
   // Disabled by default.
+  //
+  // [#next-major-version: unify with local ratelimit, should use common.ratelimit.v3.XRateLimitHeadersRFCVersion instead.]
   XRateLimitHeadersRFCVersion enable_x_ratelimit_headers = 8
       [(validate.rules).enum = {defined_only: true}];
 

diff --git a/docs/root/version_history/current.rst b/docs/root/version_history/current.rst
@@ -56,6 +56,7 @@ New Features
 * http: make consistent custom header format fields ``%(DOWN|DIRECT_DOWN|UP)STREAM_(LOCAL|REMOTE)_*%`` to provide all combinations of local & remote addresses for upstream & downstream connections.
 * http3: downstream HTTP/3 support is now GA! Upstream HTTP/3 also GA for specific deployments. See :ref:`here <arch_overview_http3>` for details.
 * http3: supports upstream HTTP/3 retries. Automatically retry `0-RTT safe requests <https://www.rfc-editor.org/rfc/rfc7231#section-4.2.1>`_ if they are rejected because they are sent `too early <https://datatracker.ietf.org/doc/html/rfc8470#section-5.2>`_. And automatically retry 0-RTT safe requests if connect attempt fails later on and the cluster is configured with TCP fallback. And add retry on ``http3-post-connect-failure`` policy which allows retry of failed HTTP/3 requests with TCP fallback even after handshake if the cluster is configured with TCP fallback. This feature is guarded by ``envoy.reloadable_features.conn_pool_new_stream_with_early_data_and_http3``.
+* local_ratelimit: added support for X-RateLimit-* headers as defined in `draft RFC <https://tools.ietf.org/id/draft-polli-ratelimit-headers-03.html>`_.
 * matching: the matching API can now express a match tree that will always match by omitting a matcher at the top level.
 
 Deprecated

diff --git a/source/extensions/filters/common/local_ratelimit/local_ratelimit_impl.cc b/source/extensions/filters/common/local_ratelimit/local_ratelimit_impl.cc
@@ -1,5 +1,7 @@
 #include "source/extensions/filters/common/local_ratelimit/local_ratelimit_impl.h"
 
+#include <chrono>
+
 #include "source/common/protobuf/utility.h"
 
 namespace Envoy {
@@ -25,6 +27,7 @@ LocalRateLimiterImpl::LocalRateLimiterImpl(
   token_bucket_.tokens_per_fill_ = tokens_per_fill;
   token_bucket_.fill_interval_ = absl::FromChrono(fill_interval);
   tokens_.tokens_ = max_tokens;
+  tokens_.fill_time_ = time_source_.monotonicTime();
 
   if (fill_timer_) {
     fill_timer_->enableTimer(fill_interval);
@@ -72,7 +75,7 @@ void LocalRateLimiterImpl::onFillTimer() {
   fill_timer_->enableTimer(absl::ToChronoMilliseconds(token_bucket_.fill_interval_));
 }
 
-void LocalRateLimiterImpl::onFillTimerHelper(const TokenState& tokens,
+void LocalRateLimiterImpl::onFillTimerHelper(TokenState& tokens,
                                              const RateLimit::TokenBucket& bucket) {
   // Relaxed consistency is used for all operations because we don't care about ordering, just the
   // final atomic correctness.
@@ -88,6 +91,9 @@ void LocalRateLimiterImpl::onFillTimerHelper(const TokenState& tokens,
     // Loop while the weak CAS fails trying to update the tokens value.
   } while (!tokens.tokens_.compare_exchange_weak(expected_tokens, new_tokens_value,
                                                  std::memory_order_relaxed));
+
+  // Update fill time at last.
+  tokens.fill_time_ = time_source_.monotonicTime();
 }
 
 void LocalRateLimiterImpl::onFillTimerDescriptorHelper() {
@@ -97,7 +103,6 @@ void LocalRateLimiterImpl::onFillTimerDescriptorHelper() {
             current_time - descriptor.token_state_->fill_time_) >=
         absl::ToChronoMilliseconds(descriptor.token_bucket_.fill_interval_)) {
       onFillTimerHelper(*descriptor.token_state_, descriptor.token_bucket_);
-      descriptor.token_state_->fill_time_ = current_time;
     }
   }
 }
@@ -123,17 +128,63 @@ bool LocalRateLimiterImpl::requestAllowedHelper(const TokenState& tokens) const
   return true;
 }
 
-bool LocalRateLimiterImpl::requestAllowed(
+OptRef<const LocalRateLimiterImpl::LocalDescriptorImpl> LocalRateLimiterImpl::descriptorHelper(
     absl::Span<const RateLimit::LocalDescriptor> request_descriptors) const {
   if (!descriptors_.empty() && !request_descriptors.empty()) {
+    // The override rate limit descriptor is selected by the first full match from the request
+    // descriptors.
     for (const auto& request_descriptor : request_descriptors) {
       auto it = descriptors_.find(request_descriptor);
       if (it != descriptors_.end()) {
-        return requestAllowedHelper(*it->token_state_);
+        return *it;
       }
     }
   }
-  return requestAllowedHelper(tokens_);
+  return {};
+}
+
+bool LocalRateLimiterImpl::requestAllowed(
+    absl::Span<const RateLimit::LocalDescriptor> request_descriptors) const {
+  auto descriptor = descriptorHelper(request_descriptors);
+
+  return descriptor.has_value() ? requestAllowedHelper(*descriptor.value().get().token_state_)
+                                : requestAllowedHelper(tokens_);
+}
+
+uint32_t LocalRateLimiterImpl::maxTokens(
+    absl::Span<const RateLimit::LocalDescriptor> request_descriptors) const {
+  auto descriptor = descriptorHelper(request_descriptors);
+
+  return descriptor.has_value() ? descriptor.value().get().token_bucket_.max_tokens_
+                                : token_bucket_.max_tokens_;
+}
+
+uint32_t LocalRateLimiterImpl::remainingTokens(
+    absl::Span<const RateLimit::LocalDescriptor> request_descriptors) const {
+  auto descriptor = descriptorHelper(request_descriptors);
+
+  return descriptor.has_value()
+             ? descriptor.value().get().token_state_->tokens_.load(std::memory_order_relaxed)
+             : tokens_.tokens_.load(std::memory_order_relaxed);
+}
+
+int64_t LocalRateLimiterImpl::remainingFillInterval(
+    absl::Span<const RateLimit::LocalDescriptor> request_descriptors) const {
+  using namespace std::literals;
+
+  auto current_time = time_source_.monotonicTime();
+  auto descriptor = descriptorHelper(request_descriptors);
+  // Remaining time to next fill = fill interval - (current time - last fill time).
+  if (descriptor.has_value()) {
+    ASSERT(std::chrono::duration_cast<std::chrono::milliseconds>(
+               current_time - descriptor.value().get().token_state_->fill_time_) <=
+           absl::ToChronoMilliseconds(descriptor.value().get().token_bucket_.fill_interval_));
+    return absl::ToInt64Seconds(
+        descriptor.value().get().token_bucket_.fill_interval_ -
+        absl::Seconds((current_time - descriptor.value().get().token_state_->fill_time_) / 1s));
+  }
+  return absl::ToInt64Seconds(token_bucket_.fill_interval_ -
+                              absl::Seconds((current_time - tokens_.fill_time_) / 1s));
 }
 
 } // namespace LocalRateLimit

diff --git a/source/extensions/filters/common/local_ratelimit/local_ratelimit_impl.h b/source/extensions/filters/common/local_ratelimit/local_ratelimit_impl.h
@@ -26,6 +26,10 @@ class LocalRateLimiterImpl {
   ~LocalRateLimiterImpl();
 
   bool requestAllowed(absl::Span<const RateLimit::LocalDescriptor> request_descriptors) const;
+  uint32_t maxTokens(absl::Span<const RateLimit::LocalDescriptor> request_descriptors) const;
+  uint32_t remainingTokens(absl::Span<const RateLimit::LocalDescriptor> request_descriptors) const;
+  int64_t
+  remainingFillInterval(absl::Span<const RateLimit::LocalDescriptor> request_descriptors) const;
 
 private:
   struct TokenState {
@@ -59,8 +63,10 @@ class LocalRateLimiterImpl {
   };
 
   void onFillTimer();
-  void onFillTimerHelper(const TokenState& state, const RateLimit::TokenBucket& bucket);
+  void onFillTimerHelper(TokenState& state, const RateLimit::TokenBucket& bucket);
   void onFillTimerDescriptorHelper();
+  OptRef<const LocalDescriptorImpl>
+  descriptorHelper(absl::Span<const RateLimit::LocalDescriptor> request_descriptors) const;
   bool requestAllowedHelper(const TokenState& tokens) const;
 
   RateLimit::TokenBucket token_bucket_;

diff --git a/source/extensions/filters/http/common/BUILD b/source/extensions/filters/http/common/BUILD
@@ -56,3 +56,11 @@ envoy_cc_library(
         "//source/common/common:token_bucket_impl_lib",
     ],
 )
+
+envoy_cc_library(
+    name = "ratelimit_headers_lib",
+    hdrs = ["ratelimit_headers.h"],
+    deps = [
+        "//source/common/http:header_map_lib",
+    ],
+)
diff --git a/source/extensions/filters/http/common/ratelimit_headers.h b/source/extensions/filters/http/common/ratelimit_headers.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include "envoy/http/header_map.h"
+
+#include "source/common/singleton/const_singleton.h"
+
+namespace Envoy {
+namespace Extensions {
+namespace HttpFilters {
+namespace Common {
+namespace RateLimit {
+
+class XRateLimitHeaderValues {
+public:
+  const Http::LowerCaseString XRateLimitLimit{"x-ratelimit-limit"};
+  const Http::LowerCaseString XRateLimitRemaining{"x-ratelimit-remaining"};
+  const Http::LowerCaseString XRateLimitReset{"x-ratelimit-reset"};
+
+  struct {
+    const std::string Window{"w"};
+    const std::string Name{"name"};
+  } QuotaPolicyKeys;
+};
+
+using XRateLimitHeaders = ConstSingleton<XRateLimitHeaderValues>;
+} // namespace RateLimit
+} // namespace Common
+} // namespace HttpFilters
+} // namespace Extensions
+} // namespace Envoy
@@ -28,6 +28,8 @@ envoy_cc_library(
         "//source/extensions/filters/common/local_ratelimit:local_ratelimit_lib",
         "//source/extensions/filters/common/ratelimit:ratelimit_lib",
         "//source/extensions/filters/http/common:pass_through_filter_lib",
+        "//source/extensions/filters/http/common:ratelimit_headers_lib",
+        "@envoy_api//envoy/extensions/common/ratelimit/v3:pkg_cc_proto",
         "@envoy_api//envoy/extensions/filters/http/local_ratelimit/v3:pkg_cc_proto",
     ],
 )

@@ -4,10 +4,13 @@
 #include <string>
 #include <vector>
 
+#include "envoy/extensions/common/ratelimit/v3/ratelimit.pb.h"
+#include "envoy/extensions/filters/http/local_ratelimit/v3/local_rate_limit.pb.h"
 #include "envoy/http/codes.h"
 
 #include "source/common/http/utility.h"
 #include "source/common/router/config_impl.h"
+#include "source/extensions/filters/http/common/ratelimit_headers.h"
 
 namespace Envoy {
 namespace Extensions {
@@ -48,7 +51,9 @@ FilterConfig::FilterConfig(
       request_headers_parser_(Envoy::Router::HeaderParser::configure(
           config.request_headers_to_add_when_not_enforced())),
       stage_(static_cast<uint64_t>(config.stage())),
-      has_descriptors_(!config.descriptors().empty()) {
+      has_descriptors_(!config.descriptors().empty()),
+      enable_x_rate_limit_headers_(config.enable_x_ratelimit_headers() ==
+                                   envoy::extensions::common::ratelimit::v3::DRAFT_VERSION_03) {
   // Note: no token bucket is fine for the global config, which would be the case for enabling
   //       the filter globally but disabled and then applying limits at the virtual host or
   //       route level. At the virtual or route level, it makes no sense to have an no token
@@ -64,6 +69,21 @@ bool FilterConfig::requestAllowed(
   return rate_limiter_.requestAllowed(request_descriptors);
 }
 
+uint32_t
+FilterConfig::maxTokens(absl::Span<const RateLimit::LocalDescriptor> request_descriptors) const {
+  return rate_limiter_.maxTokens(request_descriptors);
+}
+
+uint32_t FilterConfig::remainingTokens(
+    absl::Span<const RateLimit::LocalDescriptor> request_descriptors) const {
+  return rate_limiter_.remainingTokens(request_descriptors);
+}
+
+int64_t FilterConfig::remainingFillInterval(
+    absl::Span<const RateLimit::LocalDescriptor> request_descriptors) const {
+  return rate_limiter_.remainingFillInterval(request_descriptors);
+}
+
 LocalRateLimitStats FilterConfig::generateStats(const std::string& prefix, Stats::Scope& scope) {
   const std::string final_prefix = prefix + ".http_local_rate_limit";
   return {ALL_LOCAL_RATE_LIMIT_STATS(POOL_COUNTER_PREFIX(scope, final_prefix))};
@@ -91,6 +111,9 @@ Http::FilterHeadersStatus Filter::decodeHeaders(Http::RequestHeaderMap& headers,
     populateDescriptors(descriptors, headers);
   }
 
+  // Store descriptors which is used to generate x-ratelimit-* headers in encoding response headers.
+  stored_descriptors_ = descriptors;
+
   if (requestAllowed(descriptors)) {
     config->stats().ok_.inc();
     return Http::FilterHeadersStatus::Continue;
@@ -116,13 +139,55 @@ Http::FilterHeadersStatus Filter::decodeHeaders(Http::RequestHeaderMap& headers,
   return Http::FilterHeadersStatus::StopIteration;
 }
 
+Http::FilterHeadersStatus Filter::encodeHeaders(Http::ResponseHeaderMap& headers, bool) {
+  const auto* config = getConfig();
+
+  if (config->enabled() && config->enableXRateLimitHeaders()) {
+    ASSERT(stored_descriptors_.has_value());
+    auto limit = maxTokens(stored_descriptors_.value());
+    auto remaining = remainingTokens(stored_descriptors_.value());
+    auto reset = remainingFillInterval(stored_descriptors_.value());
+
+    headers.addReferenceKey(
+        HttpFilters::Common::RateLimit::XRateLimitHeaders::get().XRateLimitLimit, limit);
+    headers.addReferenceKey(
+        HttpFilters::Common::RateLimit::XRateLimitHeaders::get().XRateLimitRemaining, remaining);
+    headers.addReferenceKey(
+        HttpFilters::Common::RateLimit::XRateLimitHeaders::get().XRateLimitReset, reset);
+  }
+
+  return Http::FilterHeadersStatus::Continue;
+}
+
 bool Filter::requestAllowed(absl::Span<const RateLimit::LocalDescriptor> request_descriptors) {
   const auto* config = getConfig();
   return config->rateLimitPerConnection()
              ? getPerConnectionRateLimiter().requestAllowed(request_descriptors)
              : config->requestAllowed(request_descriptors);
 }
 
+uint32_t Filter::maxTokens(absl::Span<const RateLimit::LocalDescriptor> request_descriptors) {
+  const auto* config = getConfig();
+  return config->rateLimitPerConnection()
+             ? getPerConnectionRateLimiter().maxTokens(request_descriptors)
+             : config->maxTokens(request_descriptors);
+}
+
+uint32_t Filter::remainingTokens(absl::Span<const RateLimit::LocalDescriptor> request_descriptors) {
+  const auto* config = getConfig();
+  return config->rateLimitPerConnection()
+             ? getPerConnectionRateLimiter().remainingTokens(request_descriptors)
+             : config->remainingTokens(request_descriptors);
+}
+
+int64_t
+Filter::remainingFillInterval(absl::Span<const RateLimit::LocalDescriptor> request_descriptors) {
+  const auto* config = getConfig();
+  return config->rateLimitPerConnection()
+             ? getPerConnectionRateLimiter().remainingFillInterval(request_descriptors)
+             : config->remainingFillInterval(request_descriptors);
+}
+
 const Filters::Common::LocalRateLimit::LocalRateLimiterImpl& Filter::getPerConnectionRateLimiter() {
   const auto* config = getConfig();
   ASSERT(config->rateLimitPerConnection());