From f4b9feed5378b0eb87a201b5aebe44421505cb17 Mon Sep 17 00:00:00 2001 From: "chenliang.lu" Date: Fri, 2 Dec 2022 18:09:35 +0800 Subject: [PATCH 1/4] SPARK-41365 Stages UI page fails to load for proxy in some yarn environment --- .../apache/spark/status/api/v1/StagesResource.scala | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala index 26dfa5af101e3..8ea58bfcd17f4 100644 --- a/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala +++ b/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala @@ -16,12 +16,17 @@ */ package org.apache.spark.status.api.v1 +import java.net.URLDecoder +import java.nio.charset.StandardCharsets.UTF_8 import java.util.{HashMap, List => JList, Locale} import javax.ws.rs.{NotFoundException => _, _} import javax.ws.rs.core.{Context, MediaType, MultivaluedMap, UriInfo} import scala.collection.JavaConverters._ +import org.glassfish.jersey.internal.util.collection.ImmutableMultivaluedMap +import org.glassfish.jersey.uri.UriComponent + import org.apache.spark.status.api.v1.TaskStatus._ import org.apache.spark.ui.UIUtils import org.apache.spark.ui.jobs.ApiHelper._ @@ -143,7 +148,10 @@ private[v1] class StagesResource extends BaseAppResource { @Context uriInfo: UriInfo): HashMap[String, Object] = { withUI { ui => - val uriQueryParameters = uriInfo.getQueryParameters(true) + // Decode URI twice here to avoid percent-encoding twice on the query string + val decodeURI = URLDecoder.decode(uriInfo.getRequestUri.getRawQuery, UTF_8.name()) + val uriQueryParameters = new ImmutableMultivaluedMap[String, String]( + UriComponent.decodeQuery(decodeURI, true)) val totalRecords = uriQueryParameters.getFirst("numTasks") var isSearch = false var searchValue: String = null @@ -204,7 +212,7 @@ private[v1] class StagesResource extends BaseAppResource { pageLength = queryParameters.getFirst("length").toInt } withUI(_.store.taskList(stageId, stageAttemptId, pageStartIndex, pageLength, - indexName(columnNameToSort), isAscendingStr.equalsIgnoreCase("asc"))) + indexName(columnNameToSort), "asc".equalsIgnoreCase(isAscendingStr))) } // Filters task list based on search parameter From 3fa4affaa91618e95d29aa9f178b35f6f8c94a91 Mon Sep 17 00:00:00 2001 From: "chenliang.lu" Date: Fri, 9 Dec 2022 12:23:27 +0800 Subject: [PATCH 2/4] SPARK-41365 Stages UI page fails to load for proxy in some yarn environment --- .../spark/status/api/v1/StagesResource.scala | 11 +----- .../scala/org/apache/spark/ui/UIUtils.scala | 20 +++++++++- .../org/apache/spark/ui/UISeleniumSuite.scala | 38 ++++++++++++++++++- 3 files changed, 58 insertions(+), 11 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala index 8ea58bfcd17f4..b23d0770a3601 100644 --- a/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala +++ b/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala @@ -16,17 +16,12 @@ */ package org.apache.spark.status.api.v1 -import java.net.URLDecoder -import java.nio.charset.StandardCharsets.UTF_8 import java.util.{HashMap, List => JList, Locale} import javax.ws.rs.{NotFoundException => _, _} import javax.ws.rs.core.{Context, MediaType, MultivaluedMap, UriInfo} import scala.collection.JavaConverters._ -import org.glassfish.jersey.internal.util.collection.ImmutableMultivaluedMap -import org.glassfish.jersey.uri.UriComponent - import org.apache.spark.status.api.v1.TaskStatus._ import org.apache.spark.ui.UIUtils import org.apache.spark.ui.jobs.ApiHelper._ @@ -148,10 +143,8 @@ private[v1] class StagesResource extends BaseAppResource { @Context uriInfo: UriInfo): HashMap[String, Object] = { withUI { ui => - // Decode URI twice here to avoid percent-encoding twice on the query string - val decodeURI = URLDecoder.decode(uriInfo.getRequestUri.getRawQuery, UTF_8.name()) - val uriQueryParameters = new ImmutableMultivaluedMap[String, String]( - UriComponent.decodeQuery(decodeURI, true)) + // Decode URI params twice here to avoid percent-encoding twice + val uriQueryParameters = UIUtils.decodeURLParameter(uriInfo.getQueryParameters(true)) val totalRecords = uriQueryParameters.getFirst("numTasks") var isSearch = false var searchValue: String = null diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala index 111e8f8b3ad4b..aca03fde4c96d 100644 --- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala @@ -24,13 +24,15 @@ import java.nio.charset.StandardCharsets.UTF_8 import java.text.SimpleDateFormat import java.util.{Date, Locale, TimeZone} import javax.servlet.http.HttpServletRequest -import javax.ws.rs.core.{MediaType, Response} +import javax.ws.rs.core.{MediaType, MultivaluedMap, Response} import scala.collection.JavaConverters._ import scala.util.control.NonFatal import scala.xml._ import scala.xml.transform.{RewriteRule, RuleTransformer} +import org.glassfish.jersey.internal.util.collection.MultivaluedStringMap + import org.apache.spark.internal.Logging import org.apache.spark.ui.scope.RDDOperationGraph @@ -636,6 +638,22 @@ private[spark] object UIUtils extends Logging { param } + /** + * Decode URLParameter if URL is encoded by YARN-WebAppProxyServlet. + */ + def decodeURLParameter(params: MultivaluedMap[String, String]): MultivaluedStringMap = { + val decodedParameters = new MultivaluedStringMap + params.forEach((encodeKey, encodeValues) => { + val decodeKey = decodeURLParameter(encodeKey) + val decodeValues = new java.util.LinkedList[String] + encodeValues.forEach(v => { + decodeValues.add(decodeURLParameter(v)) + }) + decodedParameters.addAll(decodeKey, decodeValues) + }) + decodedParameters + } + def getTimeZoneOffset() : Int = TimeZone.getDefault().getOffset(System.currentTimeMillis()) / 1000 / 60 diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala index 874ef1552d828..35e8dd1b31587 100644 --- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala @@ -35,8 +35,9 @@ import org.scalatest.matchers.must.Matchers import org.scalatest.matchers.should.Matchers._ import org.scalatest.time.SpanSugar._ import org.scalatestplus.selenium.WebBrowser - import org.apache.spark._ +import org.glassfish.jersey.internal.util.collection.MultivaluedStringMap + import org.apache.spark.LocalSparkContext._ import org.apache.spark.api.java.StorageLevels import org.apache.spark.deploy.history.HistoryServerSuite @@ -711,6 +712,8 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers { rdd.count() eventually(timeout(5.seconds), interval(100.milliseconds)) { + val str: String = Utils.tryWithResource(Source.fromURL( + apiUrl(sc.ui.get, "/stages/stage/0/0/taskTable?draw=1")))(_.mkString) val stage0 = Utils.tryWithResource(Source.fromURL(sc.ui.get.webUrl + "/stages/stage/?id=0&attempt=0&expandDagViz=true"))(_.mkString) assert(stage0.contains("digraph G {\n subgraph clusterstage_0 {\n " + @@ -819,6 +822,39 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers { } } + test("SPARK-41365: Stage page can be accessed if URI was encoded twice") { + withSpark(newSparkContext()) { sc => + val rdd = sc.parallelize(0 to 10, 10).repartition(10) + rdd.count() + eventually(timeout(5.seconds), interval(50.milliseconds)) { + val encodeParams = new MultivaluedStringMap + encodeParams.add("order%255B0%255D%255Bcolumn%255D", "Locality%2520Level") + encodeParams.add("order%255B0%255D%255Bcolumn%255D", "Executor%2520ID") + encodeParams.add("search%255Bvalue%255D", null) + val decodeParams = UIUtils.decodeURLParameter(encodeParams) + // assert no change in order + assert(decodeParams.getFirst("order[0][column]").equals("Locality Level")) + assert(decodeParams.get("order[0][column]").size() == 2) + assert(decodeParams.getFirst("search[value]").equals("")) + + val decodeQuery = "draw=2&order[0][column]=4&order[0][dir]=asc&start=0&length=20" + + "&search[value]=&search[regex]=false&numTasks=10&columnIndexToSort=4" + + "&columnNameToSort=Locality Level" + val encodeOnceQuery = "draw=2&order%5B0%5D%5Bcolumn%5D=4&start=0&length=20" + + "&search%5Bvalue%5D=&search%5Bregex%5D=false&numTasks=10&columnIndexToSort=4" + + "&columnNameToSort=Locality%20Level" + val encodeTwiceQuery = "draw=2&order%255B0%255D%255Bcolumn%255D=4&start=0&length=20" + + "&search%255Bvalue%255D=&search%255Bregex%255D=false&numTasks=10&columnIndexToSort=4" + + "&columnNameToSort=Locality%2520Level" + val encodeOnceRes = Utils.tryWithResource(Source.fromURL( + apiUrl(sc.ui.get, "stages/0/0/taskTable?"+ encodeOnceQuery)))(_.mkString) + val encodeTwiceRes = Utils.tryWithResource(Source.fromURL( + apiUrl(sc.ui.get, "stages/0/0/taskTable?"+ encodeTwiceQuery)))(_.mkString) + assert(encodeOnceRes.equals(encodeTwiceRes)) + } + } + } + def goToUi(sc: SparkContext, path: String): Unit = { goToUi(sc.ui.get, path) } From 5dcceaf0e8cbd748a23989437b8491549056247e Mon Sep 17 00:00:00 2001 From: "chenliang.lu" Date: Fri, 9 Dec 2022 12:26:01 +0800 Subject: [PATCH 3/4] ut fix --- core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala index 35e8dd1b31587..c44fac6426ad6 100644 --- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala @@ -712,8 +712,6 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers { rdd.count() eventually(timeout(5.seconds), interval(100.milliseconds)) { - val str: String = Utils.tryWithResource(Source.fromURL( - apiUrl(sc.ui.get, "/stages/stage/0/0/taskTable?draw=1")))(_.mkString) val stage0 = Utils.tryWithResource(Source.fromURL(sc.ui.get.webUrl + "/stages/stage/?id=0&attempt=0&expandDagViz=true"))(_.mkString) assert(stage0.contains("digraph G {\n subgraph clusterstage_0 {\n " + From af998413b88087ce542d30412061e836d006aec1 Mon Sep 17 00:00:00 2001 From: "chenliang.lu" Date: Fri, 9 Dec 2022 14:33:42 +0800 Subject: [PATCH 4/4] code style --- .../test/scala/org/apache/spark/ui/UISeleniumSuite.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala index c44fac6426ad6..45348b2e9a7ba 100644 --- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala @@ -26,6 +26,7 @@ import scala.xml.Node import com.gargoylesoftware.css.parser.CSSParseException import com.gargoylesoftware.htmlunit.DefaultCssErrorHandler +import org.glassfish.jersey.internal.util.collection.MultivaluedStringMap import org.json4s._ import org.json4s.jackson.JsonMethods import org.openqa.selenium.{By, WebDriver} @@ -35,9 +36,8 @@ import org.scalatest.matchers.must.Matchers import org.scalatest.matchers.should.Matchers._ import org.scalatest.time.SpanSugar._ import org.scalatestplus.selenium.WebBrowser -import org.apache.spark._ -import org.glassfish.jersey.internal.util.collection.MultivaluedStringMap +import org.apache.spark._ import org.apache.spark.LocalSparkContext._ import org.apache.spark.api.java.StorageLevels import org.apache.spark.deploy.history.HistoryServerSuite @@ -845,9 +845,9 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers { "&search%255Bvalue%255D=&search%255Bregex%255D=false&numTasks=10&columnIndexToSort=4" + "&columnNameToSort=Locality%2520Level" val encodeOnceRes = Utils.tryWithResource(Source.fromURL( - apiUrl(sc.ui.get, "stages/0/0/taskTable?"+ encodeOnceQuery)))(_.mkString) + apiUrl(sc.ui.get, "stages/0/0/taskTable?" + encodeOnceQuery)))(_.mkString) val encodeTwiceRes = Utils.tryWithResource(Source.fromURL( - apiUrl(sc.ui.get, "stages/0/0/taskTable?"+ encodeTwiceQuery)))(_.mkString) + apiUrl(sc.ui.get, "stages/0/0/taskTable?" + encodeTwiceQuery)))(_.mkString) assert(encodeOnceRes.equals(encodeTwiceRes)) } }