Skip to content

Commit 9db0efd

Browse files
author
Marcelo Vanzin
committed
Show prettier name in UI.
Work around a SparkUI issue where the name to show has to be provided in the constructor. Also remove explicit flushes from logging code, since they're not really useful now that the HS only reads data from finished apps (and the API used does not exist in Hadoop trunk).
1 parent 8f42274 commit 9db0efd

File tree

3 files changed

+30
-67
lines changed

3 files changed

+30
-67
lines changed

core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
163163
fs.exists(new Path(entry.getPath(), APPLICATION_COMPLETE))
164164
} else {
165165
try {
166-
val matcher(version, codecName, inprogress) = entry.getPath().getName()
166+
val matcher(_, _, version, codecName, inprogress) = entry.getPath().getName()
167167
inprogress == null
168168
} catch {
169169
case e: Exception => false
@@ -247,32 +247,37 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis
247247
return (null, null)
248248
}
249249

250-
val (logFile, lastUpdated) = if (log.isFile()) {
251-
(elogInfo.path, log.getModificationTime())
250+
val (appName, logFile, lastUpdated) = if (log.isFile()) {
251+
// Due to SPARK-2169, we need to provide the name of the application to be shown in
252+
// the SparkUI constructor; setting it afterwards has no effect. So get the portion
253+
// of the log file name that contains that information.
254+
val EventLoggingListener.LOG_FILE_NAME_REGEX(appName, timestamp, _, _, _) =
255+
log.getPath().getName()
256+
257+
(s"$appName-$timestamp", elogInfo.path, log.getModificationTime())
252258
} else {
253259
// For old-style log directories, need to find the actual log file.
254260
val status = fs.listStatus(elogInfo.path)
255261
.filter(e => e.getPath().getName().startsWith(LOG_PREFIX))(0)
256-
(status.getPath(), status.getModificationTime())
262+
(elogInfo.path.getName(), status.getPath(), status.getModificationTime())
257263
}
258264

259-
val appId = elogInfo.path.getName
260265
val replayBus = new ReplayListenerBus(logFile, fs, elogInfo.compressionCodec)
261266
val appListener = new ApplicationEventListener
262267
replayBus.addListener(appListener)
263268

264269
val ui: SparkUI = if (renderUI) {
265270
val conf = this.conf.clone()
266271
val appSecManager = new SecurityManager(conf)
267-
new SparkUI(conf, appSecManager, replayBus, appId, "/history/" + appId)
272+
new SparkUI(conf, appSecManager, replayBus, appName, "/history/" + elogInfo.path.getName())
268273
// Do not call ui.bind() to avoid creating a new server for each application
269274
} else {
270275
null
271276
}
272277

273278
replayBus.replay()
274279
val appInfo = ApplicationHistoryInfo(
275-
appId,
280+
elogInfo.path.getName(),
276281
appListener.appName,
277282
appListener.startTime,
278283
appListener.endTime,

core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala

Lines changed: 11 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,7 @@ import org.json4s.jackson.JsonMethods._
3232
import org.apache.spark.{Logging, SparkConf, SparkContext}
3333
import org.apache.spark.deploy.SparkHadoopUtil
3434
import org.apache.spark.io.CompressionCodec
35-
<<<<<<< HEAD
36-
import org.apache.spark.SPARK_VERSION
37-
import org.apache.spark.util.{FileLogger, JsonProtocol, Utils}
38-
=======
3935
import org.apache.spark.util.{JsonProtocol, Utils}
40-
>>>>>>> Make event logger use a single file.
4136

4237
/**
4338
* A SparkListener that logs events to persistent storage.
@@ -65,17 +60,11 @@ private[spark] class EventLoggingListener(
6560
private val testing = sparkConf.getBoolean("spark.eventLog.testing", false)
6661
private val outputBufferSize = sparkConf.getInt("spark.eventLog.buffer.kb", 100) * 1024
6762
private val logBaseDir = sparkConf.get("spark.eventLog.dir", DEFAULT_LOG_DIR).stripSuffix("/")
68-
<<<<<<< HEAD
69-
private val name = appName.replaceAll("[ :/]", "-").replaceAll("[${}'\"]", "_")
70-
.toLowerCase + "-" + System.currentTimeMillis
71-
val logDir = Utils.resolveURI(logBaseDir) + "/" + name.stripSuffix("/")
72-
=======
7363
private val fileSystem = Utils.getHadoopFileSystem(new URI(logBaseDir))
7464
private lazy val compressionCodec = CompressionCodec.createCodec(sparkConf)
7565

7666
// Only defined if the file system scheme is not local
7767
private var hadoopDataStream: Option[FSDataOutputStream] = None
78-
>>>>>>> Make event logger use a single file.
7968

8069
private var writer: Option[PrintWriter] = None
8170

@@ -100,17 +89,6 @@ private[spark] class EventLoggingListener(
10089
}
10190

10291
/**
103-
<<<<<<< HEAD
104-
* Return only the unique application directory without the base directory.
105-
*/
106-
def getApplicationLogDir(): String = {
107-
name
108-
}
109-
110-
/**
111-
* Begin logging events.
112-
* If compression is used, log a file that indicates which compression library is used.
113-
=======
11492
* Creates the log file in the configured log directory.
11593
*
11694
* The file name contains some metadata about its contents. It follows the following
@@ -128,16 +106,11 @@ private[spark] class EventLoggingListener(
128106
* used to write the file
129107
* - ".inprogress" will be present while the log file is still being written to, and
130108
* removed after the application is finished.
131-
>>>>>>> Make event logger use a single file.
132109
*/
133110
def start() {
134111
if (!fileSystem.isDirectory(new Path(logBaseDir))) {
135112
throw new IllegalArgumentException(s"Log directory $logBaseDir does not exist.");
136113
}
137-
<<<<<<< HEAD
138-
logger.newFile(SPARK_VERSION_PREFIX + SPARK_VERSION)
139-
logger.newFile(LOG_PREFIX + logger.fileIndex)
140-
=======
141114

142115
val workingPath = logPath + IN_PROGRESS
143116
val uri = new URI(workingPath)
@@ -162,25 +135,17 @@ private[spark] class EventLoggingListener(
162135
writer = Some(new PrintWriter(cstream))
163136

164137
logInfo("Logging events to %s".format(logPath))
165-
>>>>>>> Make event logger use a single file.
166138
}
167139

168140
/** Log the event as JSON. */
169-
private def logEvent(event: SparkListenerEvent, flushLogger: Boolean = false) {
141+
private def logEvent(event: SparkListenerEvent) {
170142
val eventJson = JsonProtocol.sparkEventToJson(event)
171-
172143
writer.foreach(_.println(compact(render(eventJson))))
173-
if (flushLogger) {
174-
writer.foreach(_.flush())
175-
hadoopDataStream.foreach(_.sync())
176-
}
177-
178144
if (testing) {
179145
loggedEvents += eventJson
180146
}
181147
}
182148

183-
// Events that do not trigger a flush
184149
override def onStageSubmitted(event: SparkListenerStageSubmitted) =
185150
logEvent(event)
186151
override def onTaskStart(event: SparkListenerTaskStart) =
@@ -191,24 +156,22 @@ private[spark] class EventLoggingListener(
191156
logEvent(event)
192157
override def onEnvironmentUpdate(event: SparkListenerEnvironmentUpdate) =
193158
logEvent(event)
194-
195-
// Events that trigger a flush
196159
override def onStageCompleted(event: SparkListenerStageCompleted) =
197-
logEvent(event, flushLogger = true)
160+
logEvent(event)
198161
override def onJobStart(event: SparkListenerJobStart) =
199-
logEvent(event, flushLogger = true)
162+
logEvent(event)
200163
override def onJobEnd(event: SparkListenerJobEnd) =
201-
logEvent(event, flushLogger = true)
164+
logEvent(event)
202165
override def onBlockManagerAdded(event: SparkListenerBlockManagerAdded) =
203-
logEvent(event, flushLogger = true)
166+
logEvent(event)
204167
override def onBlockManagerRemoved(event: SparkListenerBlockManagerRemoved) =
205-
logEvent(event, flushLogger = true)
168+
logEvent(event)
206169
override def onUnpersistRDD(event: SparkListenerUnpersistRDD) =
207-
logEvent(event, flushLogger = true)
170+
logEvent(event)
208171
override def onApplicationStart(event: SparkListenerApplicationStart) =
209-
logEvent(event, flushLogger = true)
172+
logEvent(event)
210173
override def onApplicationEnd(event: SparkListenerApplicationEnd) =
211-
logEvent(event, flushLogger = true)
174+
logEvent(event)
212175
// No-op because logging every update would be overkill
213176
override def onExecutorMetricsUpdate(event: SparkListenerExecutorMetricsUpdate) { }
214177

@@ -234,7 +197,7 @@ private[spark] object EventLoggingListener extends Logging {
234197
val LOG_FILE_PERMISSIONS = FsPermission.createImmutable(Integer.parseInt("770", 8).toShort)
235198

236199
// Regex for parsing log file names. See description of log file name format in start().
237-
val LOG_FILE_NAME_REGEX = s".+-[0-9]+-([0-9](?:\\.[0-9])*)(?:-(.+?))?(\\$IN_PROGRESS)?".r
200+
val LOG_FILE_NAME_REGEX = s"(.+)-([0-9]+)-([0-9](?:\\.[0-9])*)(?:-(.+?))?(\\$IN_PROGRESS)?".r
238201

239202
// A cache for compression codecs to avoid creating the same codec many times
240203
private val codecMap = new mutable.HashMap[String, CompressionCodec]
@@ -248,7 +211,7 @@ private[spark] object EventLoggingListener extends Logging {
248211
*/
249212
def parseLoggingInfo(log: Path): EventLoggingInfo = {
250213
try {
251-
val LOG_FILE_NAME_REGEX(version, codecName, inprogress) = log.getName()
214+
val LOG_FILE_NAME_REGEX(_, _, version, codecName, inprogress) = log.getName()
252215
val codec: Option[CompressionCodec] = if (codecName != null) {
253216
val conf = new SparkConf()
254217
conf.set("spark.io.compression.codec", codecName)
@@ -259,13 +222,8 @@ private[spark] object EventLoggingListener extends Logging {
259222
EventLoggingInfo(log, version, codec, inprogress == null)
260223
} catch {
261224
case e: Exception =>
262-
<<<<<<< HEAD
263-
logError("Exception in parsing logging info from directory %s".format(logDir), e)
264-
EventLoggingInfo.empty
265-
=======
266225
logError("Exception in parsing logging info from file %s".format(log), e)
267226
null
268-
>>>>>>> Make event logger use a single file.
269227
}
270228
}
271229

core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,14 @@ import com.google.common.io.Files
2323
import org.apache.hadoop.fs.Path
2424
import org.json4s.jackson.JsonMethods._
2525
import org.scalatest.{BeforeAndAfter, FunSuite}
26-
import org.scalatest.matchers.ShouldMatchers
26+
import org.scalatest.Matchers
2727

2828
import org.apache.spark.SparkConf
2929
import org.apache.spark.io._
3030
import org.apache.spark.scheduler._
3131
import org.apache.spark.util.{JsonProtocol, Utils}
3232

33-
class FsHistoryProviderSuite extends FunSuite with BeforeAndAfter with ShouldMatchers {
33+
class FsHistoryProviderSuite extends FunSuite with BeforeAndAfter with Matchers {
3434

3535
private var testDir: File = null
3636

@@ -54,15 +54,15 @@ class FsHistoryProviderSuite extends FunSuite with BeforeAndAfter with ShouldMat
5454
val provider = new FsHistoryProvider(conf)
5555

5656
// Write a new-style application log.
57-
val logFile1 = new File(testDir, "app1-1-1.0")
57+
val logFile1 = new File(testDir, "app1-1-2-1.0")
5858
writeFile(logFile1,
59-
SparkListenerApplicationStart("app1", 1L, "test"),
59+
SparkListenerApplicationStart("app1-1", 1L, "test"),
6060
SparkListenerApplicationEnd(2L)
6161
)
6262

6363
// Write an unfinished app, new-style.
64-
writeFile(new File(testDir, "app2-1-1.0.inprogress"),
65-
SparkListenerApplicationStart("app2", 1L, "test")
64+
writeFile(new File(testDir, "app2-2-1-1.0.inprogress"),
65+
SparkListenerApplicationStart("app2-2", 1L, "test")
6666
)
6767

6868
// Write an old-style application log.
@@ -93,7 +93,7 @@ class FsHistoryProviderSuite extends FunSuite with BeforeAndAfter with ShouldMat
9393

9494
list(0) should be (ApplicationHistoryInfo(oldLog.getName(), "app3", 2L, 3L,
9595
oldLog.lastModified(), "test"))
96-
list(1) should be (ApplicationHistoryInfo(logFile1.getName(), "app1", 1L, 2L,
96+
list(1) should be (ApplicationHistoryInfo(logFile1.getName(), "app1-1", 1L, 2L,
9797
logFile1.lastModified(), "test"))
9898

9999
// Make sure the UI can be rendered.

0 commit comments

Comments
 (0)