apache · ajbozarth · Aug 26, 2016 · Aug 26, 2016 · Aug 26, 2016 · Aug 26, 2016
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -28,6 +28,7 @@ import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
 import org.apache.spark.status.api.v1.{ApiRootResource, ApplicationInfo, ApplicationsListResource, UIRoot}
 import org.apache.spark.ui.{SparkUI, UIUtils, WebUI}
 import org.apache.spark.ui.JettyUtils._
@@ -55,6 +56,9 @@ class HistoryServer(
   // How many applications to retain
   private val retainedApplications = conf.getInt("spark.history.retainedApplications", 50)
 
+  // How many applications are available
+  private val maxApplications = conf.get(HISTORY_MAX_APPS);
+
   // application
   private val appCache = new ApplicationCache(this, retainedApplications, new SystemClock())
 
@@ -171,7 +175,7 @@ class HistoryServer(
    * @return List of all known applications.
    */
   def getApplicationList(): Iterable[ApplicationHistoryInfo] = {
-    provider.getListing()
+    provider.getListing().take(maxApplications)
   }
 
   def getApplicationInfoList: Iterator[ApplicationInfo] = {

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -119,4 +119,8 @@ package object config {
   private[spark] val UI_RETAINED_TASKS = ConfigBuilder("spark.ui.retainedTasks")
     .intConf
     .createWithDefault(100000)
+
+  // To limit how many applications are available in the History Server
+  private[spark] val HISTORY_MAX_APPS =
+    ConfigBuilder("spark.history.maxApplications").intConf.createWithDefault(Integer.MAX_VALUE)
 }
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala
@@ -29,7 +29,8 @@ private[v1] class ApplicationListResource(uiRoot: UIRoot) {
   def appList(
       @QueryParam("status") status: JList[ApplicationStatus],
       @DefaultValue("2010-01-01") @QueryParam("minDate") minDate: SimpleDateParam,
-      @DefaultValue("3000-01-01") @QueryParam("maxDate") maxDate: SimpleDateParam)
+      @DefaultValue("3000-01-01") @QueryParam("maxDate") maxDate: SimpleDateParam,
+      @QueryParam("limit") limit: Integer)
   : Iterator[ApplicationInfo] = {
     val allApps = uiRoot.getApplicationInfoList
     val adjStatus = {
@@ -41,7 +42,7 @@ private[v1] class ApplicationListResource(uiRoot: UIRoot) {
     }
     val includeCompleted = adjStatus.contains(ApplicationStatus.COMPLETED)
     val includeRunning = adjStatus.contains(ApplicationStatus.RUNNING)
-    allApps.filter { app =>
+    val appList = allApps.filter { app =>
       val anyRunning = app.attempts.exists(!_.completed)
       // if any attempt is still running, we consider the app to also still be running
       val statusOk = (!anyRunning && includeCompleted) ||
@@ -53,6 +54,11 @@ private[v1] class ApplicationListResource(uiRoot: UIRoot) {
       }
       statusOk && dateOk
     }
+    if (limit != null) {
+      appList.take(limit)
+    } else {
+      appList
+    }
   }
 }
 

diff --git a/core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json
@@ -0,0 +1,67 @@
+[ {
+  "id" : "local-1430917381534",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "startTime" : "2015-05-06T13:03:00.893GMT",
+    "endTime" : "2015-05-06T13:03:11.398GMT",
+    "lastUpdated" : "",
+    "duration" : 10505,
+    "sparkUser" : "irashid",
+    "completed" : true,
+    "startTimeEpoch" : 1430917380893,
+    "endTimeEpoch" : 1430917391398,
+    "lastUpdatedEpoch" : 0
+  } ]
+}, {
+  "id" : "local-1430917381535",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "attemptId" : "2",
+    "startTime" : "2015-05-06T13:03:00.893GMT",
+    "endTime" : "2015-05-06T13:03:00.950GMT",
+    "lastUpdated" : "",
+    "duration" : 57,
+    "sparkUser" : "irashid",
+    "completed" : true,
+    "startTimeEpoch" : 1430917380893,
+    "endTimeEpoch" : 1430917380950,
+    "lastUpdatedEpoch" : 0
+  }, {
+    "attemptId" : "1",
+    "startTime" : "2015-05-06T13:03:00.880GMT",
+    "endTime" : "2015-05-06T13:03:00.890GMT",
+    "lastUpdated" : "",
+    "duration" : 10,
+    "sparkUser" : "irashid",
+    "completed" : true,
+    "startTimeEpoch" : 1430917380880,
+    "endTimeEpoch" : 1430917380890,
+    "lastUpdatedEpoch" : 0
+  } ]
+}, {
+  "id" : "local-1426533911241",
+  "name" : "Spark shell",
+  "attempts" : [ {
+    "attemptId" : "2",
+    "startTime" : "2015-03-17T23:11:50.242GMT",
+    "endTime" : "2015-03-17T23:12:25.177GMT",
+    "lastUpdated" : "",
+    "duration" : 34935,
+    "sparkUser" : "irashid",
+    "completed" : true,
+    "startTimeEpoch" : 1426633910242,
+    "endTimeEpoch" : 1426633945177,
+    "lastUpdatedEpoch" : 0
+  }, {
+    "attemptId" : "1",
+    "startTime" : "2015-03-16T19:25:10.242GMT",
+    "endTime" : "2015-03-16T19:25:45.177GMT",
+    "lastUpdated" : "",
+    "duration" : 34935,
+    "sparkUser" : "irashid",
+    "completed" : true,
+    "startTimeEpoch" : 1426533910242,
+    "endTimeEpoch" : 1426533945177,
+    "lastUpdatedEpoch" : 0
+  } ]
+} ]
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -100,6 +100,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     "minDate app list json" -> "applications?minDate=2015-02-10",
     "maxDate app list json" -> "applications?maxDate=2015-02-10",
     "maxDate2 app list json" -> "applications?maxDate=2015-02-03T16:42:40.000GMT",
+    "limit app list json" -> "applications?limit=3",
     "one app json" -> "applications/local-1422981780767",
     "one app multi-attempt json" -> "applications/local-1426533911241",
     "job list json" -> "applications/local-1422981780767/jobs",

diff --git a/dev/.rat-excludes b/dev/.rat-excludes
@@ -101,3 +101,4 @@ org.apache.spark.scheduler.ExternalClusterManager
 .*\.sql
 .Rbuildignore
 org.apache.spark.deploy.yarn.security.ServiceCredentialProvider
+spark-warehouse