diff --git a/api/envoy/api/v2/route/route_components.proto b/api/envoy/api/v2/route/route_components.proto index f5e6bae79a35a..4312993396965 100644 --- a/api/envoy/api/v2/route/route_components.proto +++ b/api/envoy/api/v2/route/route_components.proto @@ -530,7 +530,7 @@ message CorsPolicy { core.RuntimeFractionalPercent shadow_enabled = 10; } -// [#next-free-field: 32] +// [#next-free-field: 33] message RouteAction { enum ClusterNotFoundResponseCode { // HTTP status code - 503 Service Unavailable. @@ -742,6 +742,10 @@ message RouteAction { // place the original path before rewrite into the :ref:`x-envoy-original-path // ` header. // + // Only one of *prefix_rewrite* or + // :ref:`regex_rewrite ` + // may be specified. + // // .. attention:: // // Pay careful attention to the use of trailing slashes in the @@ -765,6 +769,36 @@ message RouteAction { // requests to */prefix/etc* will be stripped to */etc*. string prefix_rewrite = 5; + // Indicates that during forwarding, portions of the path that match the + // pattern should be rewritten, even allowing the substitution of capture + // groups from the pattern into the new path as specified by the rewrite + // substitution string. This is useful to allow application paths to be + // rewritten in a way that is aware of segments with variable content like + // identifiers. The router filter will place the original path as it was + // before the rewrite into the :ref:`x-envoy-original-path + // ` header. + // + // Only one of :ref:`prefix_rewrite ` + // or *regex_rewrite* may be specified. + // + // Examples using Google's `RE2 `_ engine: + // + // * The path pattern ``^/service/([^/]+)(/.*)$`` paired with a substitution + // string of ``\2/instance/\1`` would transform ``/service/foo/v1/api`` + // into ``/v1/api/instance/foo``. + // + // * The pattern ``one`` paired with a substitution string of ``two`` would + // transform ``/xxx/one/yyy/one/zzz`` into ``/xxx/two/yyy/two/zzz``. + // + // * The pattern ``^(.*?)one(.*)$`` paired with a substitution string of + // ``\1two\2`` would replace only the first occurrence of ``one``, + // transforming path ``/xxx/one/yyy/one/zzz`` into ``/xxx/two/yyy/one/zzz``. + // + // * The pattern ``(?i)/xxx/`` paired with a substitution string of ``/yyy/`` + // would do a case-insensitive match and transform path ``/aaa/XxX/bbb`` to + // ``/aaa/yyy/bbb``. + type.matcher.RegexMatchAndSubstitute regex_rewrite = 32; + oneof host_rewrite_specifier { // Indicates that during forwarding, the host header will be swapped with // this value. diff --git a/api/envoy/config/route/v3/route_components.proto b/api/envoy/config/route/v3/route_components.proto index 2fde356ea6f7e..5430a813f0fb3 100644 --- a/api/envoy/config/route/v3/route_components.proto +++ b/api/envoy/config/route/v3/route_components.proto @@ -491,7 +491,7 @@ message CorsPolicy { core.v3.RuntimeFractionalPercent shadow_enabled = 10; } -// [#next-free-field: 32] +// [#next-free-field: 33] message RouteAction { option (udpa.annotations.versioning).previous_message_type = "envoy.api.v2.route.RouteAction"; @@ -715,6 +715,10 @@ message RouteAction { // place the original path before rewrite into the :ref:`x-envoy-original-path // ` header. // + // Only one of *prefix_rewrite* or + // :ref:`regex_rewrite ` + // may be specified. + // // .. attention:: // // Pay careful attention to the use of trailing slashes in the @@ -738,6 +742,36 @@ message RouteAction { // requests to */prefix/etc* will be stripped to */etc*. string prefix_rewrite = 5; + // Indicates that during forwarding, portions of the path that match the + // pattern should be rewritten, even allowing the substitution of capture + // groups from the pattern into the new path as specified by the rewrite + // substitution string. This is useful to allow application paths to be + // rewritten in a way that is aware of segments with variable content like + // identifiers. The router filter will place the original path as it was + // before the rewrite into the :ref:`x-envoy-original-path + // ` header. + // + // Only one of :ref:`prefix_rewrite ` + // or *regex_rewrite* may be specified. + // + // Examples using Google's `RE2 `_ engine: + // + // * The path pattern ``^/service/([^/]+)(/.*)$`` paired with a substitution + // string of ``\2/instance/\1`` would transform ``/service/foo/v1/api`` + // into ``/v1/api/instance/foo``. + // + // * The pattern ``one`` paired with a substitution string of ``two`` would + // transform ``/xxx/one/yyy/one/zzz`` into ``/xxx/two/yyy/two/zzz``. + // + // * The pattern ``^(.*?)one(.*)$`` paired with a substitution string of + // ``\1two\2`` would replace only the first occurrence of ``one``, + // transforming path ``/xxx/one/yyy/one/zzz`` into ``/xxx/two/yyy/one/zzz``. + // + // * The pattern ``(?i)/xxx/`` paired with a substitution string of ``/yyy/`` + // would do a case-insensitive match and transform path ``/aaa/XxX/bbb`` to + // ``/aaa/yyy/bbb``. + type.matcher.v3.RegexMatchAndSubstitute regex_rewrite = 32; + oneof host_rewrite_specifier { // Indicates that during forwarding, the host header will be swapped with // this value. diff --git a/api/envoy/type/matcher/regex.proto b/api/envoy/type/matcher/regex.proto index 2dd5bbe047cbb..2be13845fc00b 100644 --- a/api/envoy/type/matcher/regex.proto +++ b/api/envoy/type/matcher/regex.proto @@ -35,3 +35,29 @@ message RegexMatcher { // The regex match string. The string must be supported by the configured engine. string regex = 2 [(validate.rules).string = {min_bytes: 1}]; } + +// Describes how to match a string and then produce a new string using a regular +// expression and a substitution string. +message RegexMatchAndSubstitute { + // The regular expression used to find portions of a string (hereafter called + // the "subject string") that should be replaced. When a new string is + // produced during the substitution operation, the new string is initially + // the same as the subject string, but then all matches in the subject string + // are replaced by the substitution string. If replacing all matches isn't + // desired, regular expression anchors can be used to ensure a single match, + // so as to replace just one occurrence of a pattern. Capture groups can be + // used in the pattern to extract portions of the subject string, and then + // referenced in the substitution string. + RegexMatcher pattern = 1; + + // The string that should be substituted into matching portions of the + // subject string during a substitution operation to produce a new string. + // Capture groups in the pattern can be referenced in the substitution + // string. Note, however, that the syntax for referring to capture groups is + // defined by the chosen regular expression engine. Google's `RE2 + // `_ regular expression engine uses a + // backslash followed by the capture group number to denote a numbered + // capture group. E.g., ``\1`` refers to capture group 1, and ``\2`` refers + // to capture group 2. + string substitution = 2; +} diff --git a/api/envoy/type/matcher/v3/regex.proto b/api/envoy/type/matcher/v3/regex.proto index bf62d7e32a559..acfb905ea01c5 100644 --- a/api/envoy/type/matcher/v3/regex.proto +++ b/api/envoy/type/matcher/v3/regex.proto @@ -42,3 +42,32 @@ message RegexMatcher { // The regex match string. The string must be supported by the configured engine. string regex = 2 [(validate.rules).string = {min_bytes: 1}]; } + +// Describes how to match a string and then produce a new string using a regular +// expression and a substitution string. +message RegexMatchAndSubstitute { + option (udpa.annotations.versioning).previous_message_type = + "envoy.type.matcher.RegexMatchAndSubstitute"; + + // The regular expression used to find portions of a string (hereafter called + // the "subject string") that should be replaced. When a new string is + // produced during the substitution operation, the new string is initially + // the same as the subject string, but then all matches in the subject string + // are replaced by the substitution string. If replacing all matches isn't + // desired, regular expression anchors can be used to ensure a single match, + // so as to replace just one occurrence of a pattern. Capture groups can be + // used in the pattern to extract portions of the subject string, and then + // referenced in the substitution string. + RegexMatcher pattern = 1; + + // The string that should be substituted into matching portions of the + // subject string during a substitution operation to produce a new string. + // Capture groups in the pattern can be referenced in the substitution + // string. Note, however, that the syntax for referring to capture groups is + // defined by the chosen regular expression engine. Google's `RE2 + // `_ regular expression engine uses a + // backslash followed by the capture group number to denote a numbered + // capture group. E.g., ``\1`` refers to capture group 1, and ``\2`` refers + // to capture group 2. + string substitution = 2; +} diff --git a/docs/root/configuration/http/http_conn_man/headers.rst b/docs/root/configuration/http/http_conn_man/headers.rst index 077cd0b30ab1c..7041772752347 100644 --- a/docs/root/configuration/http/http_conn_man/headers.rst +++ b/docs/root/configuration/http/http_conn_man/headers.rst @@ -471,7 +471,8 @@ route, virtual host, and/or global route configuration level. See the No *:-prefixed* pseudo-header may be modified via this mechanism. The *:path* and *:authority* headers may instead be modified via mechanisms such as -:ref:`prefix_rewrite ` and +:ref:`prefix_rewrite `, +:ref:`regex_rewrite `, and :ref:`host_rewrite `. Headers are appended to requests/responses in the following order: weighted cluster level headers, diff --git a/docs/root/configuration/http/http_filters/router_filter.rst b/docs/root/configuration/http/http_filters/router_filter.rst index b4ca99c68d781..0ae63a6ea310a 100644 --- a/docs/root/configuration/http/http_filters/router_filter.rst +++ b/docs/root/configuration/http/http_filters/router_filter.rst @@ -341,7 +341,8 @@ responses. x-envoy-original-path ^^^^^^^^^^^^^^^^^^^^^ -If the route utilizes :ref:`prefix_rewrite `, +If the route utilizes :ref:`prefix_rewrite ` +or :ref:`regex_rewrite `, Envoy will put the original path header in this header. This can be useful for logging and debugging. diff --git a/docs/root/intro/arch_overview/http/http_routing.rst b/docs/root/intro/arch_overview/http/http_routing.rst index b70190ab26f72..95ce2b6ed796f 100644 --- a/docs/root/intro/arch_overview/http/http_routing.rst +++ b/docs/root/intro/arch_overview/http/http_routing.rst @@ -30,6 +30,7 @@ request. The router filter supports the following features: * :ref:`Automatic host rewriting ` based on the DNS name of the selected upstream host. * :ref:`Prefix rewriting `. +* :ref:`Path rewriting using a regular expression and capture groups `. * :ref:`Request retries ` specified either via HTTP header or via route configuration. * Request timeout specified either via :ref:`HTTP diff --git a/docs/root/intro/version_history.rst b/docs/root/intro/version_history.rst index 9de318ff49ed3..cb47e23c74811 100644 --- a/docs/root/intro/version_history.rst +++ b/docs/root/intro/version_history.rst @@ -18,6 +18,8 @@ Version history * router: added :ref:`auto_san_validation ` to support overrriding SAN validation to transport socket for new upstream connections based on the downstream HTTP host/authority header. * router: added the ability to match a route based on whether a downstream TLS connection certificate has been :ref:`validated `. +* router: added support for :ref:`regex_rewrite + ` for path rewriting using regular expressions and capture groups. * router: don't ignore :ref:`per_try_timeout ` when the :ref:`global route timeout ` is disabled. * sds: added :ref:`GenericSecret ` to support secret of generic type. * stat sinks: stat sink extensions use the "envoy.stat_sinks" name space. A mapping of extension diff --git a/generated_api_shadow/envoy/api/v2/route/route_components.proto b/generated_api_shadow/envoy/api/v2/route/route_components.proto index f5e6bae79a35a..4312993396965 100644 --- a/generated_api_shadow/envoy/api/v2/route/route_components.proto +++ b/generated_api_shadow/envoy/api/v2/route/route_components.proto @@ -530,7 +530,7 @@ message CorsPolicy { core.RuntimeFractionalPercent shadow_enabled = 10; } -// [#next-free-field: 32] +// [#next-free-field: 33] message RouteAction { enum ClusterNotFoundResponseCode { // HTTP status code - 503 Service Unavailable. @@ -742,6 +742,10 @@ message RouteAction { // place the original path before rewrite into the :ref:`x-envoy-original-path // ` header. // + // Only one of *prefix_rewrite* or + // :ref:`regex_rewrite ` + // may be specified. + // // .. attention:: // // Pay careful attention to the use of trailing slashes in the @@ -765,6 +769,36 @@ message RouteAction { // requests to */prefix/etc* will be stripped to */etc*. string prefix_rewrite = 5; + // Indicates that during forwarding, portions of the path that match the + // pattern should be rewritten, even allowing the substitution of capture + // groups from the pattern into the new path as specified by the rewrite + // substitution string. This is useful to allow application paths to be + // rewritten in a way that is aware of segments with variable content like + // identifiers. The router filter will place the original path as it was + // before the rewrite into the :ref:`x-envoy-original-path + // ` header. + // + // Only one of :ref:`prefix_rewrite ` + // or *regex_rewrite* may be specified. + // + // Examples using Google's `RE2 `_ engine: + // + // * The path pattern ``^/service/([^/]+)(/.*)$`` paired with a substitution + // string of ``\2/instance/\1`` would transform ``/service/foo/v1/api`` + // into ``/v1/api/instance/foo``. + // + // * The pattern ``one`` paired with a substitution string of ``two`` would + // transform ``/xxx/one/yyy/one/zzz`` into ``/xxx/two/yyy/two/zzz``. + // + // * The pattern ``^(.*?)one(.*)$`` paired with a substitution string of + // ``\1two\2`` would replace only the first occurrence of ``one``, + // transforming path ``/xxx/one/yyy/one/zzz`` into ``/xxx/two/yyy/one/zzz``. + // + // * The pattern ``(?i)/xxx/`` paired with a substitution string of ``/yyy/`` + // would do a case-insensitive match and transform path ``/aaa/XxX/bbb`` to + // ``/aaa/yyy/bbb``. + type.matcher.RegexMatchAndSubstitute regex_rewrite = 32; + oneof host_rewrite_specifier { // Indicates that during forwarding, the host header will be swapped with // this value. diff --git a/generated_api_shadow/envoy/config/route/v3/route_components.proto b/generated_api_shadow/envoy/config/route/v3/route_components.proto index 68a4c600aef88..91b5d67fc7cc3 100644 --- a/generated_api_shadow/envoy/config/route/v3/route_components.proto +++ b/generated_api_shadow/envoy/config/route/v3/route_components.proto @@ -553,7 +553,7 @@ message CorsPolicy { core.v3.RuntimeFractionalPercent shadow_enabled = 10; } -// [#next-free-field: 32] +// [#next-free-field: 33] message RouteAction { option (udpa.annotations.versioning).previous_message_type = "envoy.api.v2.route.RouteAction"; @@ -789,6 +789,10 @@ message RouteAction { // place the original path before rewrite into the :ref:`x-envoy-original-path // ` header. // + // Only one of *prefix_rewrite* or + // :ref:`regex_rewrite ` + // may be specified. + // // .. attention:: // // Pay careful attention to the use of trailing slashes in the @@ -812,6 +816,36 @@ message RouteAction { // requests to */prefix/etc* will be stripped to */etc*. string prefix_rewrite = 5; + // Indicates that during forwarding, portions of the path that match the + // pattern should be rewritten, even allowing the substitution of capture + // groups from the pattern into the new path as specified by the rewrite + // substitution string. This is useful to allow application paths to be + // rewritten in a way that is aware of segments with variable content like + // identifiers. The router filter will place the original path as it was + // before the rewrite into the :ref:`x-envoy-original-path + // ` header. + // + // Only one of :ref:`prefix_rewrite ` + // or *regex_rewrite* may be specified. + // + // Examples using Google's `RE2 `_ engine: + // + // * The path pattern ``^/service/([^/]+)(/.*)$`` paired with a substitution + // string of ``\2/instance/\1`` would transform ``/service/foo/v1/api`` + // into ``/v1/api/instance/foo``. + // + // * The pattern ``one`` paired with a substitution string of ``two`` would + // transform ``/xxx/one/yyy/one/zzz`` into ``/xxx/two/yyy/two/zzz``. + // + // * The pattern ``^(.*?)one(.*)$`` paired with a substitution string of + // ``\1two\2`` would replace only the first occurrence of ``one``, + // transforming path ``/xxx/one/yyy/one/zzz`` into ``/xxx/two/yyy/one/zzz``. + // + // * The pattern ``(?i)/xxx/`` paired with a substitution string of ``/yyy/`` + // would do a case-insensitive match and transform path ``/aaa/XxX/bbb`` to + // ``/aaa/yyy/bbb``. + type.matcher.v3.RegexMatchAndSubstitute regex_rewrite = 32; + oneof host_rewrite_specifier { // Indicates that during forwarding, the host header will be swapped with // this value. diff --git a/generated_api_shadow/envoy/type/matcher/regex.proto b/generated_api_shadow/envoy/type/matcher/regex.proto index 2dd5bbe047cbb..2be13845fc00b 100644 --- a/generated_api_shadow/envoy/type/matcher/regex.proto +++ b/generated_api_shadow/envoy/type/matcher/regex.proto @@ -35,3 +35,29 @@ message RegexMatcher { // The regex match string. The string must be supported by the configured engine. string regex = 2 [(validate.rules).string = {min_bytes: 1}]; } + +// Describes how to match a string and then produce a new string using a regular +// expression and a substitution string. +message RegexMatchAndSubstitute { + // The regular expression used to find portions of a string (hereafter called + // the "subject string") that should be replaced. When a new string is + // produced during the substitution operation, the new string is initially + // the same as the subject string, but then all matches in the subject string + // are replaced by the substitution string. If replacing all matches isn't + // desired, regular expression anchors can be used to ensure a single match, + // so as to replace just one occurrence of a pattern. Capture groups can be + // used in the pattern to extract portions of the subject string, and then + // referenced in the substitution string. + RegexMatcher pattern = 1; + + // The string that should be substituted into matching portions of the + // subject string during a substitution operation to produce a new string. + // Capture groups in the pattern can be referenced in the substitution + // string. Note, however, that the syntax for referring to capture groups is + // defined by the chosen regular expression engine. Google's `RE2 + // `_ regular expression engine uses a + // backslash followed by the capture group number to denote a numbered + // capture group. E.g., ``\1`` refers to capture group 1, and ``\2`` refers + // to capture group 2. + string substitution = 2; +} diff --git a/generated_api_shadow/envoy/type/matcher/v3/regex.proto b/generated_api_shadow/envoy/type/matcher/v3/regex.proto index bf62d7e32a559..acfb905ea01c5 100644 --- a/generated_api_shadow/envoy/type/matcher/v3/regex.proto +++ b/generated_api_shadow/envoy/type/matcher/v3/regex.proto @@ -42,3 +42,32 @@ message RegexMatcher { // The regex match string. The string must be supported by the configured engine. string regex = 2 [(validate.rules).string = {min_bytes: 1}]; } + +// Describes how to match a string and then produce a new string using a regular +// expression and a substitution string. +message RegexMatchAndSubstitute { + option (udpa.annotations.versioning).previous_message_type = + "envoy.type.matcher.RegexMatchAndSubstitute"; + + // The regular expression used to find portions of a string (hereafter called + // the "subject string") that should be replaced. When a new string is + // produced during the substitution operation, the new string is initially + // the same as the subject string, but then all matches in the subject string + // are replaced by the substitution string. If replacing all matches isn't + // desired, regular expression anchors can be used to ensure a single match, + // so as to replace just one occurrence of a pattern. Capture groups can be + // used in the pattern to extract portions of the subject string, and then + // referenced in the substitution string. + RegexMatcher pattern = 1; + + // The string that should be substituted into matching portions of the + // subject string during a substitution operation to produce a new string. + // Capture groups in the pattern can be referenced in the substitution + // string. Note, however, that the syntax for referring to capture groups is + // defined by the chosen regular expression engine. Google's `RE2 + // `_ regular expression engine uses a + // backslash followed by the capture group number to denote a numbered + // capture group. E.g., ``\1`` refers to capture group 1, and ``\2`` refers + // to capture group 2. + string substitution = 2; +} diff --git a/include/envoy/common/regex.h b/include/envoy/common/regex.h index f4cdc1699ef70..df71729c17db4 100644 --- a/include/envoy/common/regex.h +++ b/include/envoy/common/regex.h @@ -9,11 +9,18 @@ namespace Regex { /** * A compiled regex expression matcher which uses an abstract regex engine. - * - * NOTE: Currently this is the same as StringMatcher, however has been split out as in the future - * we are likely to add other methods such as returning captures, etc. */ -class CompiledMatcher : public Matchers::StringMatcher {}; +class CompiledMatcher : public Matchers::StringMatcher { +public: + /** + * Replaces all non-overlapping occurrences of the pattern in "value" with + * "substitution". The "substitution" string can make references to + * capture groups in the pattern, using the syntax specific to that + * regular expression engine. + */ + virtual std::string replaceAll(absl::string_view value, + absl::string_view substitution) const PURE; +}; using CompiledMatcherPtr = std::unique_ptr; diff --git a/source/common/common/regex.cc b/source/common/common/regex.cc index b3ce6ed8f570c..6d8467f8ea195 100644 --- a/source/common/common/regex.cc +++ b/source/common/common/regex.cc @@ -22,6 +22,15 @@ class CompiledStdMatcher : public CompiledMatcher { return std::regex_match(value.begin(), value.end(), regex_); } + // CompiledMatcher + std::string replaceAll(absl::string_view value, absl::string_view substitution) const override { + try { + return std::regex_replace(std::string(value), regex_, std::string(substitution)); + } catch (const std::regex_error& e) { + return std::string(value); + } + } + private: const std::regex regex_; }; @@ -48,6 +57,14 @@ class CompiledGoogleReMatcher : public CompiledMatcher { return re2::RE2::FullMatch(re2::StringPiece(value.data(), value.size()), regex_); } + // CompiledMatcher + std::string replaceAll(absl::string_view value, absl::string_view substitution) const override { + std::string result = std::string(value); + re2::RE2::GlobalReplace(&result, regex_, + re2::StringPiece(substitution.data(), substitution.size())); + return result; + } + private: const re2::RE2 regex_; }; diff --git a/source/common/router/config_impl.cc b/source/common/router/config_impl.cc index d562f8e4fe411..2a5d8ccb01177 100644 --- a/source/common/router/config_impl.cc +++ b/source/common/router/config_impl.cc @@ -379,6 +379,15 @@ RouteEntryImplBase::RouteEntryImplBase(const VirtualHostImpl& vhost, throw EnvoyException(absl::StrCat("Duplicate upgrade ", upgrade_config.upgrade_type())); } } + + if (route.route().has_regex_rewrite()) { + if (!prefix_rewrite_.empty()) { + throw EnvoyException("Cannot specify both prefix_rewrite and regex_rewrite"); + } + auto rewrite_spec = route.route().regex_rewrite(); + regex_rewrite_ = Regex::Utility::parseRegex(rewrite_spec.pattern()); + regex_rewrite_substitution_ = rewrite_spec.substitution(); + } } bool RouteEntryImplBase::evaluateRuntimeMatch(const uint64_t random_value) const { @@ -476,7 +485,7 @@ void RouteEntryImplBase::finalizeRequestHeaders(Http::HeaderMap& headers, } // Handle path rewrite - if (!getPathRewrite().empty()) { + if (!getPathRewrite().empty() || regex_rewrite_ != nullptr) { rewritePathHeader(headers, insert_envoy_original_path); } } @@ -511,11 +520,19 @@ RouteEntryImplBase::loadRuntimeData(const envoy::config::route::v3::RouteMatch& return runtime; } +// finalizePathHeaders does the "standard" path rewriting, meaning that it +// handles the "prefix_rewrite" and "regex_rewrite" route actions, only one of +// which can be specified. The "matched_path" argument applies only to the +// prefix rewriting, and describes the portion of the path (excluding query +// parameters) that should be replaced by the rewrite. A "regex_rewrite" +// applies to the entire path (excluding query parameters), regardless of what +// portion was matched. void RouteEntryImplBase::finalizePathHeader(Http::HeaderMap& headers, absl::string_view matched_path, bool insert_envoy_original_path) const { const auto& rewrite = getPathRewrite(); - if (rewrite.empty()) { + if (rewrite.empty() && regex_rewrite_ == nullptr) { + // There are no rewrites configured. Just return. return; } @@ -523,9 +540,21 @@ void RouteEntryImplBase::finalizePathHeader(Http::HeaderMap& headers, if (insert_envoy_original_path) { headers.setEnvoyOriginalPath(path); } - ASSERT(case_sensitive_ ? absl::StartsWith(path, matched_path) - : absl::StartsWithIgnoreCase(path, matched_path)); - headers.setPath(path.replace(0, matched_path.size(), rewrite)); + + if (!rewrite.empty()) { + ASSERT(case_sensitive_ ? absl::StartsWith(path, matched_path) + : absl::StartsWithIgnoreCase(path, matched_path)); + headers.setPath(path.replace(0, matched_path.size(), rewrite)); + return; + } + + if (regex_rewrite_ != nullptr) { + // Replace the entire path, but preserve the query parameters + auto just_path(Http::PathUtil::removeQueryAndFragment(path)); + headers.setPath(path.replace( + 0, just_path.size(), regex_rewrite_->replaceAll(just_path, regex_rewrite_substitution_))); + return; + } } absl::string_view RouteEntryImplBase::processRequestHost(const Http::HeaderMap& headers, diff --git a/source/common/router/config_impl.h b/source/common/router/config_impl.h index b9559b66efc8a..9d41e352353c0 100644 --- a/source/common/router/config_impl.h +++ b/source/common/router/config_impl.h @@ -472,6 +472,8 @@ class RouteEntryImplBase : public RouteEntry, protected: const bool case_sensitive_; const std::string prefix_rewrite_; + Regex::CompiledMatcherPtr regex_rewrite_; + std::string regex_rewrite_substitution_; const std::string host_rewrite_; bool include_vh_rate_limits_; diff --git a/test/common/router/config_impl_test.cc b/test/common/router/config_impl_test.cc index 53c90e7955375..e3f0491d4f1d4 100644 --- a/test/common/router/config_impl_test.cc +++ b/test/common/router/config_impl_test.cc @@ -447,6 +447,34 @@ TEST_F(RouteMatcherTest, TestRoutes) { route: prefix_rewrite: "/api/new_endpoint" cluster: www2 + - match: + prefix: "/newforreg1_endpoint" + route: + regex_rewrite: + pattern: + google_re2: {} + regex: "^/new(.*?)_endpoint(.*)$" + substitution: /\1_rewritten_endpoint\2 + cluster: www2 + - match: + prefix: "/newforreg2_endpoint" + route: + regex_rewrite: + pattern: + google_re2: {} + regex: "e" + substitution: "X" + cluster: www2 + - match: + path: "/exact/path/for/regex1" + case_sensitive: true + route: + cluster: www2 + regex_rewrite: + pattern: + google_re2: {} + regex: "[aeioe]" + substitution: "V" - match: path: "/" route: @@ -504,6 +532,17 @@ TEST_F(RouteMatcherTest, TestRoutes) { route: cluster: three_numbers prefix_rewrite: "/rewrote" + - match: + safe_regex: + google_re2: {} + regex: ".*/\\d{4}$" + route: + cluster: four_numbers + regex_rewrite: + pattern: + google_re2: {} + regex: "(^.*)/(\\d{4})$" + substitution: /four/\2/endpoint\1 - match: safe_regex: google_re2: {} @@ -781,6 +820,56 @@ TEST_F(RouteMatcherTest, TestRoutes) { EXPECT_EQ("/bar", headers.get_(Http::Headers::get().Path)); } + // Regular expression path rewrite after prefix match testing. + { + Http::TestHeaderMapImpl headers = genHeaders("www.lyft.com", "/newforreg1_endpoint/foo", "GET"); + const RouteEntry* route = config.route(headers, 0)->routeEntry(); + EXPECT_EQ("www2", route->clusterName()); + EXPECT_EQ("www2", virtualHostName(route)); + route->finalizeRequestHeaders(headers, stream_info, true); + EXPECT_EQ("/forreg1_rewritten_endpoint/foo", headers.get_(Http::Headers::get().Path)); + EXPECT_EQ("/newforreg1_endpoint/foo", headers.get_(Http::Headers::get().EnvoyOriginalPath)); + } + + // Regular expression path rewrite after prefix match testing, replace every + // occurrence, excluding query parameters. + { + Http::TestHeaderMapImpl headers = + genHeaders("www.lyft.com", "/newforreg2_endpoint/tee?test=me", "GET"); + const RouteEntry* route = config.route(headers, 0)->routeEntry(); + EXPECT_EQ("www2", route->clusterName()); + EXPECT_EQ("www2", virtualHostName(route)); + route->finalizeRequestHeaders(headers, stream_info, true); + EXPECT_EQ("/nXwforrXg2_Xndpoint/tXX?test=me", headers.get_(Http::Headers::get().Path)); + EXPECT_EQ("/newforreg2_endpoint/tee?test=me", + headers.get_(Http::Headers::get().EnvoyOriginalPath)); + } + + // Regular expression path rewrite after exact path match testing. + { + Http::TestHeaderMapImpl headers = genHeaders("www.lyft.com", "/exact/path/for/regex1", "GET"); + const RouteEntry* route = config.route(headers, 0)->routeEntry(); + EXPECT_EQ("www2", route->clusterName()); + EXPECT_EQ("www2", virtualHostName(route)); + route->finalizeRequestHeaders(headers, stream_info, true); + EXPECT_EQ("/VxVct/pVth/fVr/rVgVx1", headers.get_(Http::Headers::get().Path)); + EXPECT_EQ("/exact/path/for/regex1", headers.get_(Http::Headers::get().EnvoyOriginalPath)); + } + + // Regular expression path rewrite after exact path match testing, + // with query parameters. + { + Http::TestHeaderMapImpl headers = + genHeaders("www.lyft.com", "/exact/path/for/regex1?test=aeiou", "GET"); + const RouteEntry* route = config.route(headers, 0)->routeEntry(); + EXPECT_EQ("www2", route->clusterName()); + EXPECT_EQ("www2", virtualHostName(route)); + route->finalizeRequestHeaders(headers, stream_info, true); + EXPECT_EQ("/VxVct/pVth/fVr/rVgVx1?test=aeiou", headers.get_(Http::Headers::get().Path)); + EXPECT_EQ("/exact/path/for/regex1?test=aeiou", + headers.get_(Http::Headers::get().EnvoyOriginalPath)); + } + // Host rewrite testing. { Http::TestHeaderMapImpl headers = genHeaders("api.lyft.com", "/host/rewrite/me", "GET"); @@ -884,6 +973,24 @@ TEST_F(RouteMatcherTest, TestRoutes) { EXPECT_EQ("/rewrote?bar=true", headers.get_(Http::Headers::get().Path)); } + // Regular expression rewrite for regular expression matching + { + Http::TestHeaderMapImpl headers = genHeaders("bat.com", "/xx/yy/6472", "GET"); + const RouteEntry* route = config.route(headers, 0)->routeEntry(); + route->finalizeRequestHeaders(headers, stream_info, true); + EXPECT_EQ("/four/6472/endpoint/xx/yy", headers.get_(Http::Headers::get().Path)); + EXPECT_EQ("/xx/yy/6472", headers.get_(Http::Headers::get().EnvoyOriginalPath)); + } + + // Regular expression rewrite for regular expression matching, with query parameters. + { + Http::TestHeaderMapImpl headers = genHeaders("bat.com", "/xx/yy/6472?test=foo", "GET"); + const RouteEntry* route = config.route(headers, 0)->routeEntry(); + route->finalizeRequestHeaders(headers, stream_info, true); + EXPECT_EQ("/four/6472/endpoint/xx/yy?test=foo", headers.get_(Http::Headers::get().Path)); + EXPECT_EQ("/xx/yy/6472?test=foo", headers.get_(Http::Headers::get().EnvoyOriginalPath)); + } + // Virtual cluster testing. { Http::TestHeaderMapImpl headers = genHeaders("api.lyft.com", "/rides", "GET"); @@ -3446,6 +3553,28 @@ TEST_F(RouteMatcherTest, TestDuplicatePrefixWildcardDomainConfig) { "Only unique values for domains are permitted. Duplicate entry of domain bar.*"); } +TEST_F(RouteMatcherTest, TestPrefixAndRegexRewrites) { + const std::string yaml = R"EOF( +virtual_hosts: +- name: www2 + domains: ["bar.*"] + routes: + - match: { prefix: "/foo" } + route: + prefix_rewrite: / + regex_rewrite: + pattern: + google_re2: {} + regex: foo + substitution: bar + cluster: www2 + )EOF"; + + EXPECT_THROW_WITH_MESSAGE( + TestConfigImpl(parseRouteConfigurationFromV2Yaml(yaml), factory_context_, true), + EnvoyException, "Cannot specify both prefix_rewrite and regex_rewrite"); +} + TEST_F(RouteMatcherTest, TestDomainMatchOrderConfig) { const std::string yaml = R"EOF( virtual_hosts: diff --git a/tools/spelling/spelling_dictionary.txt b/tools/spelling/spelling_dictionary.txt index 9478a70b89e32..780472a6e1625 100644 --- a/tools/spelling/spelling_dictionary.txt +++ b/tools/spelling/spelling_dictionary.txt @@ -337,6 +337,7 @@ XNOR XSS YAML ZXID +aaa abc absl accesslog @@ -390,6 +391,7 @@ balancers barbaz baz bazel +bbb behaviour benchmarked benchmarking @@ -1208,6 +1210,7 @@ xmodem xxhash xxs xyz +yyy zag zig zipkin @@ -1215,3 +1218,4 @@ zlib OBQ SemVer SCM +zzz