diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala index fe8be0b3b20d3..168bd1e68a304 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala @@ -538,6 +538,9 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) reader.fileSizeForLastIndex > 0 } catch { case _: FileNotFoundException => false + case NonFatal(e) => + logWarning(s"Error while reading new log ${reader.rootPath}", e) + false } case NonFatal(e) => diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala index 3f8c875f5a552..1578b908b1b55 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala @@ -1475,6 +1475,55 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging { } } + test("SPARK-33146: don't let one bad rolling log folder prevent loading other applications") { + withTempDir { dir => + val conf = createTestConf(true) + conf.set(HISTORY_LOG_DIR, dir.getAbsolutePath) + val hadoopConf = SparkHadoopUtil.newConfiguration(conf) + val fs = new Path(dir.getAbsolutePath).getFileSystem(hadoopConf) + + val provider = new FsHistoryProvider(conf) + + val writer = new RollingEventLogFilesWriter("app", None, dir.toURI, conf, hadoopConf) + writer.start() + + writeEventsToRollingWriter(writer, Seq( + SparkListenerApplicationStart("app", Some("app"), 0, "user", None), + SparkListenerJobStart(1, 0, Seq.empty)), rollFile = false) + provider.checkForLogs() + provider.cleanLogs() + assert(dir.listFiles().size === 1) + assert(provider.getListing.length === 1) + + // Manually delete the appstatus file to make an invalid rolling event log + val appStatusPath = RollingEventLogFilesWriter.getAppStatusFilePath(new Path(writer.logPath), + "app", None, true) + fs.delete(appStatusPath, false) + provider.checkForLogs() + provider.cleanLogs() + assert(provider.getListing.length === 0) + + // Create a new application + val writer2 = new RollingEventLogFilesWriter("app2", None, dir.toURI, conf, hadoopConf) + writer2.start() + writeEventsToRollingWriter(writer2, Seq( + SparkListenerApplicationStart("app2", Some("app2"), 0, "user", None), + SparkListenerJobStart(1, 0, Seq.empty)), rollFile = false) + + // Both folders exist but only one application found + provider.checkForLogs() + provider.cleanLogs() + assert(provider.getListing.length === 1) + assert(dir.listFiles().size === 2) + + // Make sure a new provider sees the valid application + provider.stop() + val newProvider = new FsHistoryProvider(conf) + newProvider.checkForLogs() + assert(newProvider.getListing.length === 1) + } + } + /** * Asks the provider to check for logs and calls a function to perform checks on the updated * app list. Example: