Skip to content

Commit 1db9531

Browse files
committed
Merge remote-tracking branch 'upstream/master' into dataTypeAndSchema
Conflicts: sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
2 parents d48fc7b + eff9714 commit 1db9531

File tree

58 files changed

+1708
-505
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+1708
-505
lines changed

assembly/pom.xml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
<deb.pkg.name>spark</deb.pkg.name>
4040
<deb.install.path>/usr/share/spark</deb.install.path>
4141
<deb.user>root</deb.user>
42+
<deb.bin.filemode>744</deb.bin.filemode>
4243
</properties>
4344

4445
<dependencies>
@@ -276,7 +277,7 @@
276277
<user>${deb.user}</user>
277278
<group>${deb.user}</group>
278279
<prefix>${deb.install.path}/bin</prefix>
279-
<filemode>744</filemode>
280+
<filemode>${deb.bin.filemode}</filemode>
280281
</mapper>
281282
</data>
282283
<data>

bagel/src/main/scala/org/apache/spark/bagel/Bagel.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ object Bagel extends Logging {
7272
var verts = vertices
7373
var msgs = messages
7474
var noActivity = false
75+
var lastRDD: RDD[(K, (V, Array[M]))] = null
7576
do {
7677
logInfo("Starting superstep " + superstep + ".")
7778
val startTime = System.currentTimeMillis
@@ -83,6 +84,10 @@ object Bagel extends Logging {
8384
val superstep_ = superstep // Create a read-only copy of superstep for capture in closure
8485
val (processed, numMsgs, numActiveVerts) =
8586
comp[K, V, M, C](sc, grouped, compute(_, _, aggregated, superstep_), storageLevel)
87+
if (lastRDD != null) {
88+
lastRDD.unpersist(false)
89+
}
90+
lastRDD = processed
8691

8792
val timeTaken = System.currentTimeMillis - startTime
8893
logInfo("Superstep %d took %d s".format(superstep, timeTaken / 1000))

core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,9 @@ object SparkSubmit {
269269
sysProps.getOrElseUpdate(k, v)
270270
}
271271

272+
// Spark properties included on command line take precedence
273+
sysProps ++= args.sparkProperties
274+
272275
(childArgs, childClasspath, sysProps, childMainClass)
273276
}
274277

core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
5555
var verbose: Boolean = false
5656
var isPython: Boolean = false
5757
var pyFiles: String = null
58+
val sparkProperties: HashMap[String, String] = new HashMap[String, String]()
5859

5960
parseOpts(args.toList)
6061
loadDefaults()
@@ -177,6 +178,7 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
177178
| executorCores $executorCores
178179
| totalExecutorCores $totalExecutorCores
179180
| propertiesFile $propertiesFile
181+
| extraSparkProperties $sparkProperties
180182
| driverMemory $driverMemory
181183
| driverCores $driverCores
182184
| driverExtraClassPath $driverExtraClassPath
@@ -290,6 +292,13 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
290292
jars = Utils.resolveURIs(value)
291293
parse(tail)
292294

295+
case ("--conf" | "-c") :: value :: tail =>
296+
value.split("=", 2).toSeq match {
297+
case Seq(k, v) => sparkProperties(k) = v
298+
case _ => SparkSubmit.printErrorAndExit(s"Spark config without '=': $value")
299+
}
300+
parse(tail)
301+
293302
case ("--help" | "-h") :: tail =>
294303
printUsageAndExit(0)
295304

@@ -349,6 +358,8 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
349358
| on the PYTHONPATH for Python apps.
350359
| --files FILES Comma-separated list of files to be placed in the working
351360
| directory of each executor.
361+
|
362+
| --conf PROP=VALUE Arbitrary Spark configuration property.
352363
| --properties-file FILE Path to a file from which to load extra properties. If not
353364
| specified, this will look for conf/spark-defaults.conf.
354365
|

core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,8 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
169169
val ui: SparkUI = if (renderUI) {
170170
val conf = this.conf.clone()
171171
val appSecManager = new SecurityManager(conf)
172-
new SparkUI(conf, appSecManager, replayBus, appId, "/history/" + appId)
172+
new SparkUI(conf, appSecManager, replayBus, appId,
173+
HistoryServer.UI_PATH_PREFIX + s"/$appId")
173174
// Do not call ui.bind() to avoid creating a new server for each application
174175
} else {
175176
null

core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ private[spark] class HistoryPage(parent: HistoryServer) extends WebUIPage("") {
7575
"Last Updated")
7676

7777
private def appRow(info: ApplicationHistoryInfo): Seq[Node] = {
78-
val uiAddress = "/history/" + info.id
78+
val uiAddress = HistoryServer.UI_PATH_PREFIX + s"/${info.id}"
7979
val startTime = UIUtils.formatDate(info.startTime)
8080
val endTime = UIUtils.formatDate(info.endTime)
8181
val duration = UIUtils.formatDuration(info.endTime - info.startTime)

core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ class HistoryServer(
114114
attachHandler(createStaticHandler(SparkUI.STATIC_RESOURCE_DIR, "/static"))
115115

116116
val contextHandler = new ServletContextHandler
117-
contextHandler.setContextPath("/history")
117+
contextHandler.setContextPath(HistoryServer.UI_PATH_PREFIX)
118118
contextHandler.addServlet(new ServletHolder(loaderServlet), "/*")
119119
attachHandler(contextHandler)
120120
}
@@ -172,6 +172,8 @@ class HistoryServer(
172172
object HistoryServer extends Logging {
173173
private val conf = new SparkConf
174174

175+
val UI_PATH_PREFIX = "/history"
176+
175177
def main(argStrings: Array[String]) {
176178
SignalLogger.register(log)
177179
initSecurity()

core/src/main/scala/org/apache/spark/deploy/master/Master.scala

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import akka.serialization.SerializationExtension
3535
import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkException}
3636
import org.apache.spark.deploy.{ApplicationDescription, DriverDescription, ExecutorState}
3737
import org.apache.spark.deploy.DeployMessages._
38+
import org.apache.spark.deploy.history.HistoryServer
3839
import org.apache.spark.deploy.master.DriverState.DriverState
3940
import org.apache.spark.deploy.master.MasterMessages._
4041
import org.apache.spark.deploy.master.ui.MasterWebUI
@@ -664,9 +665,10 @@ private[spark] class Master(
664665
*/
665666
def rebuildSparkUI(app: ApplicationInfo): Boolean = {
666667
val appName = app.desc.name
668+
val notFoundBasePath = HistoryServer.UI_PATH_PREFIX + "/not-found"
667669
val eventLogDir = app.desc.eventLogDir.getOrElse {
668670
// Event logging is not enabled for this application
669-
app.desc.appUiUrl = "/history/not-found"
671+
app.desc.appUiUrl = notFoundBasePath
670672
return false
671673
}
672674
val fileSystem = Utils.getHadoopFileSystem(eventLogDir)
@@ -681,13 +683,14 @@ private[spark] class Master(
681683
logWarning(msg)
682684
msg += " Did you specify the correct logging directory?"
683685
msg = URLEncoder.encode(msg, "UTF-8")
684-
app.desc.appUiUrl = s"/history/not-found?msg=$msg&title=$title"
686+
app.desc.appUiUrl = notFoundBasePath + s"?msg=$msg&title=$title"
685687
return false
686688
}
687689

688690
try {
689691
val replayBus = new ReplayListenerBus(eventLogPaths, fileSystem, compressionCodec)
690-
val ui = new SparkUI(new SparkConf, replayBus, appName + " (completed)", "/history/" + app.id)
692+
val ui = new SparkUI(new SparkConf, replayBus, appName + " (completed)",
693+
HistoryServer.UI_PATH_PREFIX + s"/${app.id}")
691694
replayBus.replay()
692695
appIdToUI(app.id) = ui
693696
webUi.attachSparkUI(ui)
@@ -702,7 +705,7 @@ private[spark] class Master(
702705
var msg = s"Exception in replaying log for application $appName!"
703706
logError(msg, e)
704707
msg = URLEncoder.encode(msg, "UTF-8")
705-
app.desc.appUiUrl = s"/history/not-found?msg=$msg&exception=$exception&title=$title"
708+
app.desc.appUiUrl = notFoundBasePath + s"?msg=$msg&exception=$exception&title=$title"
706709
false
707710
}
708711
}

core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,13 @@ private[spark] class EventLoggingListener(
6363
// For testing. Keep track of all JSON serialized events that have been logged.
6464
private[scheduler] val loggedEvents = new ArrayBuffer[JValue]
6565

66+
/**
67+
* Return only the unique application directory without the base directory.
68+
*/
69+
def getApplicationLogDir(): String = {
70+
name
71+
}
72+
6673
/**
6774
* Begin logging events.
6875
* If compression is used, log a file that indicates which compression library is used.

core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ private[spark] class LocalActor(
5757
case StatusUpdate(taskId, state, serializedData) =>
5858
scheduler.statusUpdate(taskId, state, serializedData)
5959
if (TaskState.isFinished(state)) {
60-
freeCores += 1
60+
freeCores += scheduler.CPUS_PER_TASK
6161
reviveOffers()
6262
}
6363

@@ -68,7 +68,7 @@ private[spark] class LocalActor(
6868
def reviveOffers() {
6969
val offers = Seq(new WorkerOffer(localExecutorId, localExecutorHostname, freeCores))
7070
for (task <- scheduler.resourceOffers(offers).flatten) {
71-
freeCores -= 1
71+
freeCores -= scheduler.CPUS_PER_TASK
7272
executor.launchTask(executorBackend, task.taskId, task.name, task.serializedTask)
7373
}
7474
}

0 commit comments

Comments
 (0)