From 90d82c463668edcf7df5b038f89b88beebec8693 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Mon, 30 Nov 2020 22:18:33 +0800 Subject: [PATCH 1/7] fix --- .../main/scala/org/apache/spark/ui/JettyUtils.scala | 12 +++++++----- .../src/test/scala/org/apache/spark/ui/UISuite.scala | 9 +++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala index 2a3597e323543..b50552c4c5963 100644 --- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala @@ -51,6 +51,8 @@ private[spark] object JettyUtils extends Logging { val SPARK_CONNECTOR_NAME = "Spark" val REDIRECT_CONNECTOR_NAME = "HttpsRedirect" + // By default decoding the URI as "UTF-8" should be enough for SparkUI + val defaultUrlEncoding = "UTF-8" // Base type for a function that returns something based on an HTTP request. Allows for // implicit conversion from many types of functions to jetty Handlers. @@ -401,15 +403,16 @@ private[spark] object JettyUtils extends Logging { uri.append(rest) } - val rewrittenURI = URI.create(uri.toString()) + val rewrittenURI = URI.create(uri.toString()).normalize() if (query != null) { + val decodedQuery = decodeURL(query, defaultUrlEncoding) return new URI( rewrittenURI.getScheme(), rewrittenURI.getAuthority(), rewrittenURI.getPath(), - query, + decodedQuery, rewrittenURI.getFragment() - ).normalize() + ) } rewrittenURI.normalize() } @@ -459,8 +462,7 @@ private[spark] object JettyUtils extends Logging { val queryEncoding = if (request.getQueryEncoding != null) { request.getQueryEncoding } else { - // By default decoding the URI as "UTF-8" should be enough for SparkUI - "UTF-8" + defaultUrlEncoding } // The request URL can be raw or encoded here. To avoid the request URL being // encoded twice, let's decode it here. diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala index 56026eaa0072b..a2c47e78d65ee 100644 --- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala @@ -216,6 +216,15 @@ class UISuite extends SparkFunSuite { assert(rewrittenURI === null) } + test("Decode query parameters of proxy rewrittenURI if it is double-encoded") { + val prefix = "/worker-id" + val target = "http://localhost:8081" + val path = "/worker-id/json" + val rewrittenURI = + JettyUtils.createProxyURI(prefix, target, path, "order%255B0%255D%255Bcolumn%255D=0") + assert(rewrittenURI.getQuery() === "order%5B0%5D%5Bcolumn%5D=0") + } + test("verify rewriting location header for reverse proxy") { val clientRequest = mock(classOf[HttpServletRequest]) var headerValue = "http://localhost:4040/jobs" From b8d3056683db0044f14756836ffe4504ef0525b0 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Tue, 1 Dec 2020 18:01:11 +0800 Subject: [PATCH 2/7] add pattern match --- .../org/apache/spark/ui/JettyUtils.scala | 35 +++++++++++++++++-- .../scala/org/apache/spark/ui/UISuite.scala | 14 ++++++-- 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala index b50552c4c5963..afad0f82c63d0 100644 --- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala @@ -53,6 +53,16 @@ private[spark] object JettyUtils extends Logging { val REDIRECT_CONNECTOR_NAME = "HttpsRedirect" // By default decoding the URI as "UTF-8" should be enough for SparkUI val defaultUrlEncoding = "UTF-8" + // As per https://en.wikipedia.org/wiki/Percent-encoding: + // "Percent-encoding a reserved character involves converting the character to its corresponding + // byte value in ASCII and then representing that value as a pair of hexadecimal digits. + // The digits, preceded by a percent sign (%) which is used as an escape character, + // are then used in the URI in place of the reserved character." + // + // If a URL with a "reserved character" is encoded once, there will be "%[0-9a-fA-F]{2}" in + // the encoded result. After encoded again, the character "%" will become "%25", thus, there will + // "%25[0-9a-fA-F]{2}" in the second encoded result. + val urlWithReservedCharacterEncodedTwicePattern = "%25[0-9a-fA-F]{2}".r // Base type for a function that returns something based on an HTTP request. Allows for // implicit conversion from many types of functions to jetty Handlers. @@ -388,6 +398,19 @@ private[spark] object JettyUtils extends Logging { redirectHandler } + /** + * Check if the input url is percent-encoded twice. + * If yes, decode the url once(assuming handler functions will decode again on parsing it). + * Otherwise, return the URL as it is. + */ + def decodeURLIfEncodedTwice(url: String): String = { + if (urlWithReservedCharacterEncodedTwicePattern.findFirstMatchIn(url).isDefined) { + decodeURL(url, defaultUrlEncoding) + } else { + url + } + } + def createProxyURI(prefix: String, target: String, path: String, query: String): URI = { if (!path.startsWith(prefix)) { return null @@ -403,16 +426,22 @@ private[spark] object JettyUtils extends Logging { uri.append(rest) } - val rewrittenURI = URI.create(uri.toString()).normalize() + // When running Spark behind a reverse proxy(e.g. Nginx, Apache HTTP Server), the request URL + // on Spark can be encoded twice: one from the browser and another one from the reverse proxy. + // In general, the problem can be resolved by configuring the reverse proxy properly. However, + // we can detect whether the URL is percent-encoded twice and decode it if yes. This makes + // the setup of reverse proxy easier. + val decodedUri = decodeURLIfEncodedTwice(uri.toString()) + val rewrittenURI = URI.create(decodedUri) if (query != null) { - val decodedQuery = decodeURL(query, defaultUrlEncoding) + val decodedQuery = decodeURLIfEncodedTwice(query) return new URI( rewrittenURI.getScheme(), rewrittenURI.getAuthority(), rewrittenURI.getPath(), decodedQuery, rewrittenURI.getFragment() - ) + ).normalize() } rewrittenURI.normalize() } diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala index a2c47e78d65ee..b7580930e75d8 100644 --- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala @@ -216,13 +216,21 @@ class UISuite extends SparkFunSuite { assert(rewrittenURI === null) } - test("Decode query parameters of proxy rewrittenURI if it is double-encoded") { + test("Decode proxy rewrittenURI if it is percent-encoded twice") { val prefix = "/worker-id" val target = "http://localhost:8081" val path = "/worker-id/json" - val rewrittenURI = + var rewrittenURI = JettyUtils.createProxyURI(prefix, target, path, "order%255B0%255D%255Bcolumn%255D=0") - assert(rewrittenURI.getQuery() === "order%5B0%5D%5Bcolumn%5D=0") + assert(rewrittenURI.getQuery === "order%5B0%5D%5Bcolumn%5D=0") + rewrittenURI = JettyUtils.createProxyURI(prefix, target, "/worker-id/test%255B0%255D", null) + assert(rewrittenURI.toString() === "http://localhost:8081/test%5B0%5D") + + // If the URL is not percent-encoded twice, return the URL as it is + rewrittenURI = JettyUtils.createProxyURI(prefix, target, path, "order%5B0%5D%5Bcolumn%5D=0") + assert(rewrittenURI.getQuery === "order%5B0%5D%5Bcolumn%5D=0") + rewrittenURI = JettyUtils.createProxyURI(prefix, target, "/worker-id/test%255B0%255D", null) + assert(rewrittenURI.toString() === "http://localhost:8081/test%5B0%5D") } test("verify rewriting location header for reverse proxy") { From 2742b1ad3e5676e2f4c2c9369ce6196a8374817b Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Tue, 1 Dec 2020 18:10:39 +0800 Subject: [PATCH 3/7] update comment --- core/src/main/scala/org/apache/spark/ui/JettyUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala index afad0f82c63d0..9aa3c56968327 100644 --- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala @@ -399,7 +399,7 @@ private[spark] object JettyUtils extends Logging { } /** - * Check if the input url is percent-encoded twice. + * Check if the input url contains reserved characters and it is percent-encoded twice. * If yes, decode the url once(assuming handler functions will decode again on parsing it). * Otherwise, return the URL as it is. */ From a1649fbc14d28ce2b1fa1ec5f419869f78f363f7 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Tue, 1 Dec 2020 19:46:24 +0800 Subject: [PATCH 4/7] Revert "update comment" This reverts commit 2742b1ad3e5676e2f4c2c9369ce6196a8374817b. --- core/src/main/scala/org/apache/spark/ui/JettyUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala index 9aa3c56968327..afad0f82c63d0 100644 --- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala @@ -399,7 +399,7 @@ private[spark] object JettyUtils extends Logging { } /** - * Check if the input url contains reserved characters and it is percent-encoded twice. + * Check if the input url is percent-encoded twice. * If yes, decode the url once(assuming handler functions will decode again on parsing it). * Otherwise, return the URL as it is. */ From 32d598bddbc84aa3e13789ffebcbf16ce1777542 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Tue, 1 Dec 2020 19:46:26 +0800 Subject: [PATCH 5/7] Revert "add pattern match" This reverts commit b8d3056683db0044f14756836ffe4504ef0525b0. --- .../org/apache/spark/ui/JettyUtils.scala | 35 ++----------------- .../scala/org/apache/spark/ui/UISuite.scala | 14 ++------ 2 files changed, 6 insertions(+), 43 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala index afad0f82c63d0..b50552c4c5963 100644 --- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala @@ -53,16 +53,6 @@ private[spark] object JettyUtils extends Logging { val REDIRECT_CONNECTOR_NAME = "HttpsRedirect" // By default decoding the URI as "UTF-8" should be enough for SparkUI val defaultUrlEncoding = "UTF-8" - // As per https://en.wikipedia.org/wiki/Percent-encoding: - // "Percent-encoding a reserved character involves converting the character to its corresponding - // byte value in ASCII and then representing that value as a pair of hexadecimal digits. - // The digits, preceded by a percent sign (%) which is used as an escape character, - // are then used in the URI in place of the reserved character." - // - // If a URL with a "reserved character" is encoded once, there will be "%[0-9a-fA-F]{2}" in - // the encoded result. After encoded again, the character "%" will become "%25", thus, there will - // "%25[0-9a-fA-F]{2}" in the second encoded result. - val urlWithReservedCharacterEncodedTwicePattern = "%25[0-9a-fA-F]{2}".r // Base type for a function that returns something based on an HTTP request. Allows for // implicit conversion from many types of functions to jetty Handlers. @@ -398,19 +388,6 @@ private[spark] object JettyUtils extends Logging { redirectHandler } - /** - * Check if the input url is percent-encoded twice. - * If yes, decode the url once(assuming handler functions will decode again on parsing it). - * Otherwise, return the URL as it is. - */ - def decodeURLIfEncodedTwice(url: String): String = { - if (urlWithReservedCharacterEncodedTwicePattern.findFirstMatchIn(url).isDefined) { - decodeURL(url, defaultUrlEncoding) - } else { - url - } - } - def createProxyURI(prefix: String, target: String, path: String, query: String): URI = { if (!path.startsWith(prefix)) { return null @@ -426,22 +403,16 @@ private[spark] object JettyUtils extends Logging { uri.append(rest) } - // When running Spark behind a reverse proxy(e.g. Nginx, Apache HTTP Server), the request URL - // on Spark can be encoded twice: one from the browser and another one from the reverse proxy. - // In general, the problem can be resolved by configuring the reverse proxy properly. However, - // we can detect whether the URL is percent-encoded twice and decode it if yes. This makes - // the setup of reverse proxy easier. - val decodedUri = decodeURLIfEncodedTwice(uri.toString()) - val rewrittenURI = URI.create(decodedUri) + val rewrittenURI = URI.create(uri.toString()).normalize() if (query != null) { - val decodedQuery = decodeURLIfEncodedTwice(query) + val decodedQuery = decodeURL(query, defaultUrlEncoding) return new URI( rewrittenURI.getScheme(), rewrittenURI.getAuthority(), rewrittenURI.getPath(), decodedQuery, rewrittenURI.getFragment() - ).normalize() + ) } rewrittenURI.normalize() } diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala index b7580930e75d8..a2c47e78d65ee 100644 --- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala @@ -216,21 +216,13 @@ class UISuite extends SparkFunSuite { assert(rewrittenURI === null) } - test("Decode proxy rewrittenURI if it is percent-encoded twice") { + test("Decode query parameters of proxy rewrittenURI if it is double-encoded") { val prefix = "/worker-id" val target = "http://localhost:8081" val path = "/worker-id/json" - var rewrittenURI = + val rewrittenURI = JettyUtils.createProxyURI(prefix, target, path, "order%255B0%255D%255Bcolumn%255D=0") - assert(rewrittenURI.getQuery === "order%5B0%5D%5Bcolumn%5D=0") - rewrittenURI = JettyUtils.createProxyURI(prefix, target, "/worker-id/test%255B0%255D", null) - assert(rewrittenURI.toString() === "http://localhost:8081/test%5B0%5D") - - // If the URL is not percent-encoded twice, return the URL as it is - rewrittenURI = JettyUtils.createProxyURI(prefix, target, path, "order%5B0%5D%5Bcolumn%5D=0") - assert(rewrittenURI.getQuery === "order%5B0%5D%5Bcolumn%5D=0") - rewrittenURI = JettyUtils.createProxyURI(prefix, target, "/worker-id/test%255B0%255D", null) - assert(rewrittenURI.toString() === "http://localhost:8081/test%5B0%5D") + assert(rewrittenURI.getQuery() === "order%5B0%5D%5Bcolumn%5D=0") } test("verify rewriting location header for reverse proxy") { From 05775cbee33a8dbf785fb5c670c254b22c8cb770 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Tue, 1 Dec 2020 20:37:11 +0800 Subject: [PATCH 6/7] update --- .../org/apache/spark/ui/JettyUtils.scala | 19 +++++++------------ .../scala/org/apache/spark/ui/UISuite.scala | 6 +++--- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala index b50552c4c5963..4e7a3c19586a2 100644 --- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala @@ -403,18 +403,13 @@ private[spark] object JettyUtils extends Logging { uri.append(rest) } - val rewrittenURI = URI.create(uri.toString()).normalize() - if (query != null) { - val decodedQuery = decodeURL(query, defaultUrlEncoding) - return new URI( - rewrittenURI.getScheme(), - rewrittenURI.getAuthority(), - rewrittenURI.getPath(), - decodedQuery, - rewrittenURI.getFragment() - ) + val queryString = if (query == null) { + "" + } else { + s"?$query" } - rewrittenURI.normalize() + // SPARK-33611: use method `URI.create` to avoid percent-encoding twice on the query string. + URI.create(uri.toString() + queryString).normalize() } def createProxyLocationHeader( @@ -442,7 +437,7 @@ private[spark] object JettyUtils extends Logging { handler.addFilter(holder, "/*", EnumSet.allOf(classOf[DispatcherType])) } - private def decodeURL(url: String, encoding: String): String = { + private def decodeURL(url: String, encoding: String = defaultUrlEncoding): String = { if (url == null) { null } else { diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala index a2c47e78d65ee..c7e1dfe71d563 100644 --- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala @@ -216,13 +216,13 @@ class UISuite extends SparkFunSuite { assert(rewrittenURI === null) } - test("Decode query parameters of proxy rewrittenURI if it is double-encoded") { + test("SPARK-33611: Avoid encoding twice on the query parameter of proxy rewrittenURI") { val prefix = "/worker-id" val target = "http://localhost:8081" val path = "/worker-id/json" val rewrittenURI = - JettyUtils.createProxyURI(prefix, target, path, "order%255B0%255D%255Bcolumn%255D=0") - assert(rewrittenURI.getQuery() === "order%5B0%5D%5Bcolumn%5D=0") + JettyUtils.createProxyURI(prefix, target, path, "order%5B0%5D%5Bcolumn%5D=0") + assert(rewrittenURI.toString === "http://localhost:8081/json?order%5B0%5D%5Bcolumn%5D=0") } test("verify rewriting location header for reverse proxy") { From b34c0a5f1d18581ab71d38ac023a79fcc7378583 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Tue, 1 Dec 2020 20:54:58 +0800 Subject: [PATCH 7/7] remove unused code --- core/src/main/scala/org/apache/spark/ui/JettyUtils.scala | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala index 4e7a3c19586a2..663da0d33e20b 100644 --- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala @@ -51,8 +51,6 @@ private[spark] object JettyUtils extends Logging { val SPARK_CONNECTOR_NAME = "Spark" val REDIRECT_CONNECTOR_NAME = "HttpsRedirect" - // By default decoding the URI as "UTF-8" should be enough for SparkUI - val defaultUrlEncoding = "UTF-8" // Base type for a function that returns something based on an HTTP request. Allows for // implicit conversion from many types of functions to jetty Handlers. @@ -437,7 +435,7 @@ private[spark] object JettyUtils extends Logging { handler.addFilter(holder, "/*", EnumSet.allOf(classOf[DispatcherType])) } - private def decodeURL(url: String, encoding: String = defaultUrlEncoding): String = { + private def decodeURL(url: String, encoding: String): String = { if (url == null) { null } else { @@ -457,7 +455,8 @@ private[spark] object JettyUtils extends Logging { val queryEncoding = if (request.getQueryEncoding != null) { request.getQueryEncoding } else { - defaultUrlEncoding + // By default decoding the URI as "UTF-8" should be enough for SparkUI + "UTF-8" } // The request URL can be raw or encoded here. To avoid the request URL being // encoded twice, let's decode it here.