-
Notifications
You must be signed in to change notification settings - Fork 8
Fixes to make fetch Join work in CLI including use name over nameToFilePath and replacing / to . in MetaData names
#398
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 26 commits
fd93108
55f099c
3cf5e19
91dd3cb
da9009a
94d102d
d4daf13
b7b9b6d
70b7e71
beaf474
82bd214
9a5da22
9eac5a7
4f4e699
69f00b1
f52d526
fb4a569
c62420f
23e8815
909466c
a6d9e31
6890cf6
73fc856
ff3a913
637831e
59b36a3
ae91478
9a90663
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,12 +18,12 @@ package ai.chronon.online | |
|
|
||
| import ai.chronon.api.Constants.MetadataDataset | ||
| import ai.chronon.api.Extensions.JoinOps | ||
| import ai.chronon.api.Extensions.MetadataOps | ||
| import ai.chronon.api.Extensions.StringOps | ||
| import ai.chronon.api.Extensions.WindowOps | ||
| import ai.chronon.api.Extensions.WindowUtils | ||
| import ai.chronon.api._ | ||
| import ai.chronon.api.thrift.TBase | ||
| import ai.chronon.api.Constants._ | ||
| import ai.chronon.online.KVStore.PutRequest | ||
| import ai.chronon.online.MetadataEndPoint.NameByTeamEndPointName | ||
| import org.slf4j.Logger | ||
|
|
@@ -41,6 +41,22 @@ import scala.util.Try | |
| // [timestamp -> {metric name -> metric value}] | ||
| case class DataMetrics(series: Seq[(Long, SortedMap[String, Any])]) | ||
|
|
||
| case class ConfPathOrName(confPath: Option[String] = None, confName: Option[String] = None) { | ||
|
|
||
| if (confPath.isEmpty && confName.isEmpty) { | ||
| throw new IllegalArgumentException("confPath and confName cannot be both empty") | ||
| } | ||
|
|
||
| def computeConfKey(confKeyword: String): String = { | ||
| if (confName.isDefined) { | ||
| s"$confKeyword/" + confName.get | ||
|
|
||
| } else { | ||
| s"$confKeyword/" + confPath.get.split("/").takeRight(1).head | ||
| } | ||
| } | ||
| } | ||
|
|
||
| class MetadataStore(kvStore: KVStore, | ||
| val dataset: String = MetadataDataset, | ||
| timeoutMillis: Long, | ||
|
|
@@ -62,9 +78,18 @@ class MetadataStore(kvStore: KVStore, | |
| implicit val executionContext: ExecutionContext = | ||
| Option(executionContextOverride).getOrElse(FlexibleExecutionContext.buildExecutionContext) | ||
|
|
||
| def getConf[T <: TBase[_, _]: Manifest](confPathOrName: String): Try[T] = { | ||
| def getConf[T <: TBase[_, _]: Manifest](confPathOrName: ConfPathOrName): Try[T] = { | ||
| val clazz = implicitly[ClassTag[T]].runtimeClass.asInstanceOf[Class[T]] | ||
| val confKey = confPathOrName.confPathToKey | ||
|
|
||
| val confTypeKeyword = clazz match { | ||
| case j if j == classOf[Join] => JoinKeyword | ||
| case g if g == classOf[GroupBy] => GroupByKeyword | ||
| case sq if sq == classOf[StagingQuery] => StagingQueryKeyword | ||
| case m if m == classOf[Model] => ModelKeyword | ||
| case _ => throw new IllegalArgumentException(s"Unsupported conf type: $clazz") | ||
| } | ||
|
|
||
| val confKey = confPathOrName.computeConfKey(confTypeKeyword) | ||
| kvStore | ||
| .getString(confKey, dataset, timeoutMillis) | ||
| .map(conf => ThriftJsonCodec.fromJsonStr[T](conf, false, clazz)) | ||
|
|
@@ -122,7 +147,7 @@ class MetadataStore(kvStore: KVStore, | |
| lazy val getJoinConf: TTLCache[String, Try[JoinOps]] = new TTLCache[String, Try[JoinOps]]( | ||
| { name => | ||
| val startTimeMs = System.currentTimeMillis() | ||
| val result = getConf[Join](s"joins/$name") | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So I made this change only because it's not compatible with how I've done the run.py + dataprocsubmitter at the moment. to explain, when we do metadata upload for a join what happens is that we:
dataproc will copy the file but that only copies
........this breaks when we try to ultimately do a fetch join now as on this line of code we hardcode |
||
| val result = getConf[Join](ConfPathOrName(confName = Some(name))) | ||
| .recover { case e: java.util.NoSuchElementException => | ||
| logger.error( | ||
| s"Failed to fetch conf for join $name at joins/$name, please check metadata upload to make sure the join metadata for $name has been uploaded") | ||
|
|
@@ -143,9 +168,10 @@ class MetadataStore(kvStore: KVStore, | |
| { join => Metrics.Context(environment = "join.meta.fetch", join = join) }) | ||
|
|
||
| def putJoinConf(join: Join): Unit = { | ||
| logger.info(s"uploading join conf to dataset: $dataset by key: joins/${join.metaData.nameToFilePath}") | ||
| val joinConfKeyForKvStore = join.keyNameForKvStore | ||
| logger.info(s"uploading join conf to dataset: $dataset by key:${joinConfKeyForKvStore}") | ||
| kvStore.put( | ||
| PutRequest(s"joins/${join.metaData.nameToFilePath}".getBytes(Constants.UTF8), | ||
| PutRequest(joinConfKeyForKvStore.getBytes(Constants.UTF8), | ||
| ThriftJsonCodec.toJsonStr(join).getBytes(Constants.UTF8), | ||
| dataset)) | ||
| } | ||
|
|
@@ -178,6 +204,9 @@ class MetadataStore(kvStore: KVStore, | |
| logger.error( | ||
| s"Failed to fetch metadata for $batchDataset, is it possible Group By Upload for $name has not succeeded?") | ||
| throw e | ||
| case e: Throwable => | ||
| logger.error(s"Failed to fetch metadata for $batchDataset", e) | ||
| throw e | ||
| } | ||
| logger.info(s"Fetched ${Constants.GroupByServingInfoKey} from : $batchDataset") | ||
| if (metaData.isFailure) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -67,7 +67,7 @@ class LogFlattenerJob(session: SparkSession, | |
| val metrics: Metrics.Context = Metrics.Context(Metrics.Environment.JoinLogFlatten, joinConf) | ||
|
|
||
| private def getUnfilledRanges(inputTable: String, outputTable: String): Seq[PartitionRange] = { | ||
| val partitionName: String = joinConf.metaData.nameToFilePath.replace("/", "%2F") | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this one line killed me. not sure why we did this but I think it's because later on when we do I'm deleting this line and instead making sure we just use |
||
| val partitionName: String = joinConf.metaData.name | ||
| val unfilledRangeTry = Try( | ||
| tableUtils.unfilledRanges( | ||
| outputTable, | ||
|
|
@@ -80,7 +80,7 @@ class LogFlattenerJob(session: SparkSession, | |
| val ranges = unfilledRangeTry match { | ||
| case Failure(_: AssertionError) => | ||
| logger.info(s""" | ||
| |The join name ${joinConf.metaData.nameToFilePath} does not have available logged data yet. | ||
| |The join name ${joinConf.metaData.name} does not have available logged data yet. | ||
| |Please double check your logging status""".stripMargin) | ||
| Seq() | ||
| case Success(None) => | ||
|
|
@@ -208,7 +208,7 @@ class LogFlattenerJob(session: SparkSession, | |
| } | ||
| val unfilledRanges = getUnfilledRanges(logTable, joinConf.metaData.loggedTable) | ||
| if (unfilledRanges.isEmpty) return | ||
| val joinName = joinConf.metaData.nameToFilePath | ||
| val joinName = joinConf.metaData.name | ||
|
|
||
| val start = System.currentTimeMillis() | ||
| val columnBeforeCount = columnCount() | ||
|
|
||



There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
changing this to
metadataNamebecausemetadataNameis sourced from the actual conf:Previously,
path.confPathToKeyextracts from the conf path. It's better to just be consistent and use the name in the actual conf (not the confpath)