Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.scheduler

import java.util.Locale

import org.apache.spark.resource.ResourceProfile
import org.apache.spark.storage.BlockManagerId

Expand Down Expand Up @@ -73,7 +75,12 @@ private[spark] trait SchedulerBackend {
* Executors tab for the driver.
* @return Map containing the log names and their respective URLs
*/
def getDriverLogUrls: Option[Map[String, String]] = None
def getDriverLogUrls: Option[Map[String, String]] = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we explicitly mention the target resource managers instead of Support setting driver log url using env vars on other resource managers because YarnClusterSchedulerBackend will not use this implementation?

override def getDriverLogUrls: Option[Map[String, String]] = {
YarnContainerInfoHelper.getLogUrls(sc.hadoopConfiguration, container = None)
}

Copy link
Member Author

@pan3793 pan3793 Oct 12, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It makes sense if we want to keep YARN as-is, another direction is to let Yarn support it then it works on all resource managers.

The pseudo-code would like

override def getDriverLogUrls: Option[Map[String, String]] = { 
   YarnContainerInfoHelper.getLogUrls(sc.hadoopConfiguration, container = None) ++
     super.getDriverLogUrls.getOrElse(Map.empty))
 } 

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you happen to know any instances where the production YARN clusters use the external log services?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it was mentioned in SPARK-26311

val prefix = "SPARK_DRIVER_LOG_URL_"
val driverLogUrls = sys.env.filterKeys(_.startsWith(prefix))
.map(e => (e._1.substring(prefix.length).toLowerCase(Locale.ROOT), e._2)).toMap
if (driverLogUrls.nonEmpty) Some(driverLogUrls) else None
}

/**
* Get the attributes on driver. These attributes are used to replace log URLs when
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

package org.apache.spark.scheduler.cluster

import java.util.Locale
import java.util.concurrent.{Semaphore, TimeUnit}
import java.util.concurrent.atomic.AtomicBoolean

Expand Down Expand Up @@ -251,13 +250,6 @@ private[spark] class StandaloneSchedulerBackend(
}
}

override def getDriverLogUrls: Option[Map[String, String]] = {
val prefix = "SPARK_DRIVER_LOG_URL_"
val driverLogUrls = sys.env.filterKeys(_.startsWith(prefix))
.map(e => (e._1.substring(prefix.length).toLowerCase(Locale.ROOT), e._2)).toMap
if (driverLogUrls.nonEmpty) Some(driverLogUrls) else None
}

private def waitForRegistration() = {
registrationBarrier.acquire()
}
Expand Down