-
Notifications
You must be signed in to change notification settings - Fork 859
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Spark fetcher is now able to fetch event logs via REST API #225
Changes from 1 commit
d406c3d
6a10752
c3fe15d
e60f170
b46433a
84b50b6
00c2ef7
ca7806d
cac509b
d06278b
cdc0a6c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,24 +16,28 @@ | |
|
||
package com.linkedin.drelephant.spark.fetchers | ||
|
||
import java.io.BufferedInputStream | ||
import java.net.URI | ||
import java.text.SimpleDateFormat | ||
import java.util.zip.{ZipEntry, ZipInputStream} | ||
import java.util.{Calendar, SimpleTimeZone} | ||
|
||
import scala.async.Async | ||
import scala.concurrent.{ExecutionContext, Future} | ||
import scala.util.control.NonFatal | ||
|
||
import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} | ||
import com.fasterxml.jackson.module.scala.DefaultScalaModule | ||
import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper | ||
import com.linkedin.drelephant.spark.data.SparkRestDerivedData | ||
import com.linkedin.drelephant.spark.fetchers.statusapiv1.{ApplicationAttemptInfo, ApplicationInfo, ExecutorSummary, JobData, StageData} | ||
import com.linkedin.drelephant.spark.data.{SparkLogDerivedData, SparkRestDerivedData} | ||
import com.linkedin.drelephant.spark.fetchers.statusapiv1.{ApplicationInfo, ExecutorSummary, JobData, StageData} | ||
import javax.ws.rs.client.{Client, ClientBuilder, WebTarget} | ||
import javax.ws.rs.core.MediaType | ||
|
||
import org.apache.hadoop.fs.Path | ||
import org.apache.log4j.Logger | ||
import org.apache.spark.SparkConf | ||
|
||
import scala.collection.mutable | ||
|
||
/** | ||
* A client for getting data from the Spark monitoring REST API, e.g. <https://spark.apache.org/docs/1.4.1/monitoring.html#rest-api>. | ||
|
@@ -67,16 +71,16 @@ class SparkRestClient(sparkConf: SparkConf) { | |
|
||
private val apiTarget: WebTarget = client.target(historyServerUri).path(API_V1_MOUNT_PATH) | ||
|
||
def fetchData(appId: String)(implicit ec: ExecutionContext): Future[SparkRestDerivedData] = { | ||
def fetchRestData(appId: String)(implicit ec: ExecutionContext): Future[SparkRestDerivedData] = { | ||
val appTarget = apiTarget.path(s"applications/${appId}") | ||
logger.info(s"calling REST API at ${appTarget.getUri}") | ||
|
||
val applicationInfo = getApplicationInfo(appTarget) | ||
|
||
// Limit scope of async. | ||
val lastAttemptId = applicationInfo.attempts.maxBy {_.startTime}.attemptId | ||
val attemptTarget = lastAttemptId.map(appTarget.path).getOrElse(appTarget) | ||
async { | ||
val lastAttemptId = applicationInfo.attempts.maxBy {_.startTime}.attemptId | ||
val attemptTarget = lastAttemptId.map(appTarget.path).getOrElse(appTarget) | ||
val futureJobDatas = async { getJobDatas(attemptTarget) } | ||
val futureStageDatas = async { getStageDatas(attemptTarget) } | ||
val futureExecutorSummaries = async { getExecutorSummaries(attemptTarget) } | ||
|
@@ -89,6 +93,34 @@ class SparkRestClient(sparkConf: SparkConf) { | |
} | ||
} | ||
|
||
def fetchLogData(appId: String, attemptId: Option[String])( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think there is a design issue here. First, it is inconsistent that all the other methods that are getting data via rest (getApplicationInfo, getJobDatas etc. ) are defined as private methods. And, I think a better design would be to introduce an optional member, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a very good point. I've considered extending Summary:
|
||
implicit ec: ExecutionContext | ||
): Future[Option[SparkLogDerivedData]] = { | ||
val appTarget = apiTarget.path(s"applications/${appId}") | ||
logger.info(s"calling REST API at ${appTarget.getUri}") | ||
|
||
val logPrefix = attemptId.map(id => s"${appId}_$id").getOrElse(appId) | ||
async { | ||
resource.managed { getApplicationLogs(appTarget) }.acquireAndGet { zis => | ||
var entry: ZipEntry = null | ||
do { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please add a comment to which explains the logic of this do...while block? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
zis.closeEntry() | ||
entry = zis.getNextEntry | ||
} while (!(entry == null || entry.getName.startsWith(logPrefix))) | ||
|
||
if (entry == null) { | ||
logger.warn( | ||
s"failed to resolve log starting with $logPrefix for ${appTarget.getUri}") | ||
None | ||
} else { | ||
val codec = SparkLogClient.compressionCodecForLogName(sparkConf, entry.getName) | ||
Some(SparkLogClient.findDerivedData( | ||
codec.map { _.compressedInputStream(zis) }.getOrElse(zis))) | ||
} | ||
} | ||
} | ||
} | ||
|
||
private def getApplicationInfo(appTarget: WebTarget): ApplicationInfo = { | ||
try { | ||
get(appTarget, SparkRestObjectMapper.readValue[ApplicationInfo]) | ||
|
@@ -100,6 +132,20 @@ class SparkRestClient(sparkConf: SparkConf) { | |
} | ||
} | ||
|
||
private def getApplicationLogs(appTarget: WebTarget): ZipInputStream = { | ||
val target = appTarget.path("logs") | ||
try { | ||
val is = target.request(MediaType.APPLICATION_OCTET_STREAM) | ||
.get(classOf[java.io.InputStream]) | ||
new ZipInputStream(new BufferedInputStream(is)) | ||
} catch { | ||
case NonFatal(e) => { | ||
logger.error(s"error reading logs ${target.getUri}", e) | ||
throw e | ||
} | ||
} | ||
} | ||
|
||
private def getJobDatas(attemptTarget: WebTarget): Seq[JobData] = { | ||
val target = attemptTarget.path("jobs") | ||
try { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you can move line 80 and 81 back to the asyc block?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My motivation was that they are pure and exception free and threrefore shouldn't be part of async { ... }. Should I move them back anyway?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's right. Then I think you should keep them outside. Just add a comment there that why are they outside of the async block.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.