envoyproxy · mattklein123 · Dec 30, 2019 · Dec 14, 2019 · Dec 15, 2019 · Dec 15, 2019
diff --git a/CODEOWNERS b/CODEOWNERS
@@ -92,3 +92,4 @@ extensions/filters/common/original_src @snowp @klarose
 /*/extensions/filters/network/echo @htuch @alyssawilk
 /*/extensions/filters/udp/udp_proxy @mattklein123 @danzh2010
 /*/extensions/clusters/aggregate @yxue @snowp
+/*/extensions/filters/network/local_ratelimit @mattklein123 @junr03
diff --git a/api/BUILD b/api/BUILD
@@ -84,6 +84,8 @@ proto_library(
         "//envoy/config/filter/network/ext_authz/v3alpha:pkg",
         "//envoy/config/filter/network/http_connection_manager/v2:pkg",
         "//envoy/config/filter/network/http_connection_manager/v3alpha:pkg",
+        "//envoy/config/filter/network/local_rate_limit/v2alpha:pkg",
+        "//envoy/config/filter/network/local_rate_limit/v3alpha:pkg",
         "//envoy/config/filter/network/mongo_proxy/v2:pkg",
         "//envoy/config/filter/network/mongo_proxy/v3alpha:pkg",
         "//envoy/config/filter/network/mysql_proxy/v1alpha1:pkg",

diff --git a/api/docs/BUILD b/api/docs/BUILD
@@ -59,6 +59,7 @@ proto_library(
         "//envoy/config/filter/network/dubbo_proxy/v2alpha1:pkg",
         "//envoy/config/filter/network/ext_authz/v2:pkg",
         "//envoy/config/filter/network/http_connection_manager/v2:pkg",
+        "//envoy/config/filter/network/local_rate_limit/v2alpha:pkg",
         "//envoy/config/filter/network/mongo_proxy/v2:pkg",
         "//envoy/config/filter/network/mysql_proxy/v1alpha1:pkg",
         "//envoy/config/filter/network/rate_limit/v2:pkg",

diff --git a/api/envoy/api/v2/route/route.proto b/api/envoy/api/v2/route/route.proto
@@ -1178,7 +1178,7 @@ message VirtualCluster {
   core.RequestMethod method = 3 [deprecated = true];
 }
 
-// Global rate limiting :ref:`architecture overview <arch_overview_rate_limit>`.
+// Global rate limiting :ref:`architecture overview <arch_overview_global_rate_limit>`.
 message RateLimit {
   // [#next-free-field: 7]
   message Action {

diff --git a/api/envoy/api/v3alpha/route/route.proto b/api/envoy/api/v3alpha/route/route.proto
@@ -1167,7 +1167,7 @@ message VirtualCluster {
   string name = 2 [(validate.rules).string = {min_bytes: 1}];
 }
 
-// Global rate limiting :ref:`architecture overview <arch_overview_rate_limit>`.
+// Global rate limiting :ref:`architecture overview <arch_overview_global_rate_limit>`.
 message RateLimit {
   option (udpa.annotations.versioning).previous_message_type = "envoy.api.v2.route.RateLimit";
 

diff --git a/api/envoy/config/filter/network/local_rate_limit/v2alpha/BUILD b/api/envoy/config/filter/network/local_rate_limit/v2alpha/BUILD
@@ -0,0 +1,12 @@
+# DO NOT EDIT. This file is generated by tools/proto_sync.py.
+
+load("@envoy_api//bazel:api_build_system.bzl", "api_proto_package")
+
+licenses(["notice"])  # Apache 2
+
+api_proto_package(
+    deps = [
+        "//envoy/api/v2/core:pkg",
+        "//envoy/type:pkg",
+    ],
+)
diff --git a/api/envoy/config/filter/network/local_rate_limit/v2alpha/local_rate_limit.proto b/api/envoy/config/filter/network/local_rate_limit/v2alpha/local_rate_limit.proto
@@ -0,0 +1,40 @@
+syntax = "proto3";
+
+package envoy.config.filter.network.local_rate_limit.v2alpha;
+
+import "envoy/api/v2/core/base.proto";
+import "envoy/type/token_bucket.proto";
+
+import "validate/validate.proto";
+
+option java_package = "io.envoyproxy.envoy.config.filter.network.local_rate_limit.v2alpha";
+option java_outer_classname = "LocalRateLimitProto";
+option java_multiple_files = true;
+
+// [#protodoc-title: Local rate limit]
+// Local rate limit :ref:`configuration overview <config_network_filters_local_rate_limit>`.
+// [#extension: envoy.filters.network.local_ratelimit]
+
+message LocalRateLimit {
+  // The prefix to use when emitting :ref:`statistics
+  // <config_network_filters_local_rate_limit_stats>`.
+  string stat_prefix = 1 [(validate.rules).string = {min_bytes: 1}];
+
+  // The token bucket configuration to use for rate limiting connections that are processed by the
+  // filter's filter chain. Each incoming connection processed by the filter consumes a single
+  // token. If the token is available, the connection will be allowed. If no tokens are available,
+  // the connection will be immediately closed.
+  //
+  // .. note::
+  //   In the current implementation each filter and filter chain has an independent rate limit.
+  //
+  // .. note::
+  //   In the current implementation the token bucket's :ref:`fill_interval
+  //   <envoy_api_field_type.TokenBucket.fill_interval>` must be >= 50ms to avoid too aggressive
+  //   refills.
+  type.TokenBucket token_bucket = 2 [(validate.rules).message = {required: true}];
+
+  // Runtime flag that controls whether the filter is enabled or not. If not specified, defaults
+  // to enabled.
+  api.v2.core.RuntimeFeatureFlag runtime_enabled = 3;
+}
diff --git a/api/envoy/config/filter/network/local_rate_limit/v3alpha/BUILD b/api/envoy/config/filter/network/local_rate_limit/v3alpha/BUILD
@@ -0,0 +1,14 @@
+# DO NOT EDIT. This file is generated by tools/proto_sync.py.
+
+load("@envoy_api//bazel:api_build_system.bzl", "api_proto_package")
+
+licenses(["notice"])  # Apache 2
+
+api_proto_package(
+    deps = [
+        "//envoy/api/v3alpha/core:pkg",
+        "//envoy/config/filter/network/local_rate_limit/v2alpha:pkg",
+        "//envoy/type/v3alpha:pkg",
+        "@com_github_cncf_udpa//udpa/annotations:pkg",
+    ],
+)
diff --git a/api/envoy/config/filter/network/local_rate_limit/v3alpha/local_rate_limit.proto b/api/envoy/config/filter/network/local_rate_limit/v3alpha/local_rate_limit.proto
@@ -0,0 +1,45 @@
+syntax = "proto3";
+
+package envoy.config.filter.network.local_rate_limit.v3alpha;
+
+import "envoy/api/v3alpha/core/base.proto";
+import "envoy/type/v3alpha/token_bucket.proto";
+
+import "udpa/annotations/versioning.proto";
+
+import "validate/validate.proto";
+
+option java_package = "io.envoyproxy.envoy.config.filter.network.local_rate_limit.v3alpha";
+option java_outer_classname = "LocalRateLimitProto";
+option java_multiple_files = true;
+
+// [#protodoc-title: Local rate limit]
+// Local rate limit :ref:`configuration overview <config_network_filters_local_rate_limit>`.
+// [#extension: envoy.filters.network.local_ratelimit]
+
+message LocalRateLimit {
+  option (udpa.annotations.versioning).previous_message_type =
+      "envoy.config.filter.network.local_rate_limit.v2alpha.LocalRateLimit";
+
+  // The prefix to use when emitting :ref:`statistics
+  // <config_network_filters_local_rate_limit_stats>`.
+  string stat_prefix = 1 [(validate.rules).string = {min_bytes: 1}];
+
+  // The token bucket configuration to use for rate limiting connections that are processed by the
+  // filter's filter chain. Each incoming connection processed by the filter consumes a single
+  // token. If the token is available, the connection will be allowed. If no tokens are available,
+  // the connection will be immediately closed.
+  //
+  // .. note::
+  //   In the current implementation each filter and filter chain has an independent rate limit.
+  //
+  // .. note::
+  //   In the current implementation the token bucket's :ref:`fill_interval
+  //   <envoy_api_field_type.v3alpha.TokenBucket.fill_interval>` must be >= 50ms to avoid too
+  //   aggressive refills.
+  type.v3alpha.TokenBucket token_bucket = 2 [(validate.rules).message = {required: true}];
+
+  // Runtime flag that controls whether the filter is enabled or not. If not specified, defaults
+  // to enabled.
+  api.v3alpha.core.RuntimeFeatureFlag runtime_enabled = 3;
+}
diff --git a/api/envoy/type/token_bucket.proto b/api/envoy/type/token_bucket.proto
@@ -0,0 +1,33 @@
+syntax = "proto3";
+
+package envoy.type;
+
+import "google/protobuf/duration.proto";
+import "google/protobuf/wrappers.proto";
+
+import "validate/validate.proto";
+
+option java_package = "io.envoyproxy.envoy.type";
+option java_outer_classname = "TokenBucketProto";
+option java_multiple_files = true;
+
+// [#protodoc-title: Token bucket]
+
+// Configures a token bucket, typically used for rate limiting.
+message TokenBucket {
+  // The maximum tokens that the bucket can hold. This is also the number of tokens that the bucket
+  // initially contains.
+  uint32 max_tokens = 1 [(validate.rules).uint32 = {gt: 0}];
+
+  // The number of tokens added to the bucket during each fill interval. If not specified, defaults
+  // to a single token.
+  google.protobuf.UInt32Value tokens_per_fill = 2 [(validate.rules).uint32 = {gt: 0}];
+
+  // The fill interval that tokens are added to the bucket. During each fill interval
+  // `tokens_per_fill` are added to the bucket. The bucket will never contain more than
+  // `max_tokens` tokens.
+  google.protobuf.Duration fill_interval = 3 [(validate.rules).duration = {
+    required: true
+    gt {}
+  }];
+}
diff --git a/api/envoy/type/v3alpha/token_bucket.proto b/api/envoy/type/v3alpha/token_bucket.proto
@@ -0,0 +1,37 @@
+syntax = "proto3";
+
+package envoy.type.v3alpha;
+
+import "google/protobuf/duration.proto";
+import "google/protobuf/wrappers.proto";
+
+import "udpa/annotations/versioning.proto";
+
+import "validate/validate.proto";
+
+option java_package = "io.envoyproxy.envoy.type.v3alpha";
+option java_outer_classname = "TokenBucketProto";
+option java_multiple_files = true;
+
+// [#protodoc-title: Token bucket]
+
+// Configures a token bucket, typically used for rate limiting.
+message TokenBucket {
+  option (udpa.annotations.versioning).previous_message_type = "envoy.type.TokenBucket";
+
+  // The maximum tokens that the bucket can hold. This is also the number of tokens that the bucket
+  // initially contains.
+  uint32 max_tokens = 1 [(validate.rules).uint32 = {gt: 0}];
+
+  // The number of tokens added to the bucket during each fill interval. If not specified, defaults
+  // to a single token.
+  google.protobuf.UInt32Value tokens_per_fill = 2 [(validate.rules).uint32 = {gt: 0}];
+
+  // The fill interval that tokens are added to the bucket. During each fill interval
+  // `tokens_per_fill` are added to the bucket. The bucket will never contain more than
+  // `max_tokens` tokens.
+  google.protobuf.Duration fill_interval = 3 [(validate.rules).duration = {
+    required: true
+    gt {}
+  }];
+}
diff --git a/docs/root/api-v2/config/filter/network/network.rst b/docs/root/api-v2/config/filter/network/network.rst
@@ -8,4 +8,5 @@ Network filters
   */empty/*
   */v1alpha1/*
   */v2/*
+  */v2alpha/*
   */v2alpha1/*
diff --git a/docs/root/api-v2/types/types.rst b/docs/root/api-v2/types/types.rst
@@ -10,6 +10,7 @@ Types
   ../type/http_status.proto
   ../type/percent.proto
   ../type/range.proto
+  ../type/token_bucket.proto
   ../type/matcher/metadata.proto
   ../type/matcher/number.proto
   ../type/matcher/regex.proto

diff --git a/docs/root/configuration/http/http_filters/rate_limit_filter.rst b/docs/root/configuration/http/http_filters/rate_limit_filter.rst
@@ -3,7 +3,7 @@
 Rate limit
 ==========
 
-* Global rate limiting :ref:`architecture overview <arch_overview_rate_limit>`
+* Global rate limiting :ref:`architecture overview <arch_overview_global_rate_limit>`
 * :ref:`v2 API reference <envoy_api_msg_config.filter.http.rate_limit.v2.RateLimit>`
 * This filter should be configured with the name *envoy.rate_limit*.
 

diff --git a/docs/root/configuration/listeners/network_filters/local_rate_limit_filter.rst b/docs/root/configuration/listeners/network_filters/local_rate_limit_filter.rst
@@ -0,0 +1,46 @@
+.. _config_network_filters_local_rate_limit:
+
+Local rate limit
+================
+
+* Local rate limiting :ref:`architecture overview <arch_overview_local_rate_limit>`
+* :ref:`v2 API reference
+  <envoy_api_msg_config.filter.network.local_rate_limit.v2alpha.LocalRateLimit>`
+* This filter should be configured with the name *envoy.filters.network.local_ratelimit*.
+
+.. note::
+  Global rate limiting is also supported via the :ref:`global rate limit filter
+  <config_network_filters_rate_limit>`.
+
+Overview
+--------
+
+The local rate limit filter applies a :ref:`token bucket
+<envoy_api_field_config.filter.network.local_rate_limit.v2alpha.LocalRateLimit.token_bucket>` rate
+limit to incoming connections that are processed by the filter's filter chain. Each connection
+processed by the filter utilizes a single token, and if no tokens are available, the connection will
+be immediately closed without further filter iteration.
+
+.. note::
+  In the current implementation each filter and filter chain has an independent rate limit.
+
+.. _config_network_filters_local_rate_limit_stats:
+
+Statistics
+----------
+
+Every configured local rate limit filter has statistics rooted at *local_ratelimit.<stat_prefix>.*
+with the following statistics:
+
+.. csv-table::
+  :header: Name, Type, Description
+  :widths: 1, 1, 2
+
+  rate_limited, Counter, Total connections that have been closed due to rate limit exceeded
+
+Runtime
+-------
+
+The local rate limit filter can be runtime feature flagged via the :ref:`enabled
+<envoy_api_field_config.filter.network.local_rate_limit.v2alpha.LocalRateLimit.runtime_enabled>`
+configuration field.
diff --git a/docs/root/configuration/listeners/network_filters/network_filters.rst b/docs/root/configuration/listeners/network_filters/network_filters.rst
@@ -14,6 +14,7 @@ filters.
   client_ssl_auth_filter
   echo_filter
   ext_authz_filter
+  local_rate_limit_filter
   mongo_proxy_filter
   mysql_proxy_filter
   rate_limit_filter

diff --git a/docs/root/configuration/listeners/network_filters/rate_limit_filter.rst b/docs/root/configuration/listeners/network_filters/rate_limit_filter.rst
@@ -3,10 +3,14 @@
 Rate limit
 ==========
 
-* Global rate limiting :ref:`architecture overview <arch_overview_rate_limit>`
+* Global rate limiting :ref:`architecture overview <arch_overview_global_rate_limit>`
 * :ref:`v2 API reference <envoy_api_msg_config.filter.network.rate_limit.v2.RateLimit>`
 * This filter should be configured with the name *envoy.ratelimit*.
 
+.. note::
+  Local rate limiting is also supported via the :ref:`local rate limit filter
+  <config_network_filters_local_rate_limit>`.
+
 .. _config_network_filters_rate_limit_stats:
 
 Statistics

diff --git a/docs/root/configuration/other_features/rate_limit.rst b/docs/root/configuration/other_features/rate_limit.rst
@@ -3,7 +3,7 @@
 Rate limit service
 ==================
 
-The :ref:`rate limit service <arch_overview_rate_limit>` configuration specifies the global rate
+The :ref:`rate limit service <arch_overview_global_rate_limit>` configuration specifies the global rate
 limit service Envoy should talk to when it needs to make global rate limit decisions. If no rate
 limit service is configured, a "null" service will be used which will always return OK if called.
 

diff --git a/docs/root/configuration/other_protocols/thrift_filters/rate_limit_filter.rst b/docs/root/configuration/other_protocols/thrift_filters/rate_limit_filter.rst
@@ -3,7 +3,7 @@
 Rate limit
 ==========
 
-* Global rate limiting :ref:`architecture overview <arch_overview_rate_limit>`
+* Global rate limiting :ref:`architecture overview <arch_overview_global_rate_limit>`
 * :ref:`v2 API reference <envoy_api_msg_config.filter.thrift.rate_limit.v2alpha1.RateLimit>`
 * This filter should be configured with the name *envoy.filters.thrift.rate_limit*.
 

diff --git a/docs/root/faq/load_balancing/transient_failures.rst b/docs/root/faq/load_balancing/transient_failures.rst
@@ -38,7 +38,7 @@ The following application status codes in gRPC are considered safe for automatic
 
 * *CANCELLED* - Return this code if there is an error that can be retried in the service.
 * *RESOURCE_EXHAUSTED* - Return this code if some of the resources that service depends on are exhausted in that instance so that retrying 
-  to another instance would help. Please note that for shared resource exhaustion, returning this will not help. Instead :ref:`rate limiting <arch_overview_rate_limit>`
+  to another instance would help. Please note that for shared resource exhaustion, returning this will not help. Instead :ref:`rate limiting <arch_overview_global_rate_limit>`
   should be used to handle such cases.
 
 The HTTP Status codes *502 (Bad Gateway)*, *503 (Service Unavailable)* and *504 (Gateway Timeout)* are all mapped to gRPC status code *UNAVAILABLE*. 

diff --git a/docs/root/install/ref_configs.rst b/docs/root/install/ref_configs.rst
@@ -50,7 +50,7 @@ A few notes about the example configurations:
   disable this or enable `Zipkin <https://zipkin.io>`_ or `Datadog <https://datadoghq.com>`_ tracing, delete or
   change the :ref:`tracing configuration <envoy_api_file_envoy/config/trace/v2/trace.proto>` accordingly.
 * The configuration demonstrates the use of a :ref:`global rate limiting service
-  <arch_overview_rate_limit>`. To disable this delete the :ref:`rate limit configuration
+  <arch_overview_global_rate_limit>`. To disable this delete the :ref:`rate limit configuration
   <config_rate_limit_service>`.
 * :ref:`Route discovery service <config_http_conn_man_rds>` is configured for the service to service
   reference configuration and it is assumed to be running at `rds.yourcompany.net`.

diff --git a/docs/root/intro/arch_overview/listeners/listener_filters.rst b/docs/root/intro/arch_overview/listeners/listener_filters.rst
@@ -11,6 +11,6 @@ and also make interaction between multiple such features more explicit.
 The API for listener filters is relatively simple since ultimately these filters operate on newly
 accepted sockets. Filters in the chain can stop and subsequently continue iteration to
 further filters. This allows for more complex scenarios such as calling a :ref:`rate limiting
-service <arch_overview_rate_limit>`, etc. Envoy already includes several listener filters that
+service <arch_overview_global_rate_limit>`, etc. Envoy already includes several listener filters that
 are documented in this architecture overview as well as the :ref:`configuration reference
 <config_listener_filters>`.
diff --git a/docs/root/intro/arch_overview/listeners/listeners.rst b/docs/root/intro/arch_overview/listeners/listeners.rst
@@ -18,7 +18,7 @@ composed of one or more network level (L3/L4) :ref:`filters <arch_overview_netwo
 a new connection is received on a listener, the appropriate filter chain is selected, and the
 configured connection local filter stack is instantiated and begins processing subsequent events.
 The generic listener architecture is used to perform the vast majority of different proxy tasks that
-Envoy is used for (e.g., :ref:`rate limiting <arch_overview_rate_limit>`, :ref:`TLS client
+Envoy is used for (e.g., :ref:`rate limiting <arch_overview_global_rate_limit>`, :ref:`TLS client
 authentication <arch_overview_ssl_auth_filter>`, :ref:`HTTP connection management
 <arch_overview_http_conn_man>`, MongoDB :ref:`sniffing <arch_overview_mongo>`, raw :ref:`TCP proxy
 <arch_overview_tcp_proxy>`, etc.).

diff --git a/docs/root/intro/arch_overview/listeners/network_filters.rst b/docs/root/intro/arch_overview/listeners/network_filters.rst
@@ -17,7 +17,7 @@ The API for network level filters is relatively simple since ultimately the filt
 bytes and a small number of connection events (e.g., TLS handshake complete, connection disconnected
 locally or remotely, etc.). Filters in the chain can stop and subsequently continue iteration to
 further filters. This allows for more complex scenarios such as calling a :ref:`rate limiting
-service <arch_overview_rate_limit>`, etc. Network level filters can also share state (static and
+service <arch_overview_global_rate_limit>`, etc. Network level filters can also share state (static and
 dynamic) among themselves within the context of a single downstream connection. Refer to
 :ref:`data sharing between filters <arch_overview_data_sharing_between_filters>` for more details.
 Envoy already includes several network level filters that are documented in this architecture

diff --git a/docs/root/intro/arch_overview/other_features/global_rate_limiting.rst b/docs/root/intro/arch_overview/other_features/global_rate_limiting.rst
@@ -1,4 +1,4 @@
-.. _arch_overview_rate_limit:
+.. _arch_overview_global_rate_limit:
 
 Global rate limiting
 ====================
@@ -29,3 +29,10 @@ written in Go which uses a Redis backend. Envoy’s rate limit integration has t
   :ref:`Configuration reference <config_http_filters_rate_limit>`
 
 Rate limit service :ref:`configuration <config_rate_limit_service>`.
+
+Note that Envoy also supports :ref:`local rate limiting <config_network_filters_local_rate_limit>`.
+Local rate limiting can be used in conjunction with global rate limiting to reduce load on the
+global rate limit service. For example, a local token bucket rate limit can absorb very large bursts
+in load that might otherwise overwhelm a global rate limit service. Thus, the rate limit is applied
+in two stages. The initial coarse grained limiting is performed by the token bucket limit before
+a fine grained global limit finishes the job.