zipline-ai · tchow-zlai · Jul 23, 2025 · Jul 15, 2025 · Jul 22, 2025 · Jul 22, 2025
diff --git a/spark/src/main/scala/ai/chronon/spark/batch/BatchNodeRunner.scala b/spark/src/main/scala/ai/chronon/spark/batch/BatchNodeRunner.scala
@@ -5,14 +5,22 @@ import ai.chronon.api.planner.{DependencyResolver, NodeRunner}
 import ai.chronon.api.{MetaData, PartitionRange, PartitionSpec, ThriftJsonCodec}
 import ai.chronon.online.Api
 import ai.chronon.online.KVStore.PutRequest
-import ai.chronon.planner.{GroupByUploadNode, MonolithJoinNode, Node, NodeContent, StagingQueryNode}
+import ai.chronon.planner.{
+  ExternalSourceSensorNode,
+  GroupByUploadNode,
+  MonolithJoinNode,
+  Node,
+  NodeContent,
+  StagingQueryNode
+}
 import ai.chronon.spark.catalog.TableUtils
 import ai.chronon.spark.join.UnionJoin
 import ai.chronon.spark.submission.SparkSessionBuilder
 import ai.chronon.spark.{GroupByUpload, Join}
 import org.rogach.scallop.{ScallopConf, ScallopOption}
 import org.slf4j.{Logger, LoggerFactory}
 
+import java.util.concurrent.TimeUnit
 import scala.collection.JavaConverters._
 import scala.concurrent.Await
 import scala.concurrent.duration.Duration
@@ -45,6 +53,42 @@ object BatchNodeRunner extends NodeRunner {
 
   @transient private lazy val logger: Logger = LoggerFactory.getLogger(getClass)
 
+  private def checkPartitions(conf: ExternalSourceSensorNode,
+                              metadata: MetaData,
+                              tableUtils: TableUtils,
+                              range: PartitionRange): Try[Unit] = {
+    val tableName = conf.sourceName
+    val retryCount = if (conf.isSetRetryCount) conf.retryCount else 0L
+    val retryIntervalMin = if (conf.isSetRetryIntervalMin) conf.retryIntervalMin else 5L
+
+    val spec = metadata.executionInfo.tableDependencies.asScala
+      .find(_.tableInfo.table == tableName)
+      .map(_.tableInfo.partitionSpec(tableUtils.partitionSpec))
+
+    def retry(attempt: Long): Try[Unit] = {
+      val result = Try {
+        val partitionsInRange =
+          tableUtils.partitions(tableName, partitionRange = Option(range), tablePartitionSpec = spec)
+        val missingPartitions = range.partitions.diff(partitionsInRange)
+        if (missingPartitions.nonEmpty) {
+          throw new RuntimeException(
+            s"Input table ${tableName} is missing partitions: ${missingPartitions.mkString(", ")}")
+        } else {
+          logger.info(s"Input table ${tableName} has the requested range present: ${range}.")
+        }
+      }
+      result match {
+        case Success(value) => Success(value)
+        case Failure(exception) if attempt < retryCount =>
+          logger.warn(s"Attempt ${attempt + 1} failed, retrying in ${retryIntervalMin} minutes", exception)
+          Await.ready(scala.concurrent.Promise[Unit]().future, Duration(retryIntervalMin, TimeUnit.MINUTES))
+          retry(attempt + 1)
+        case failure => failure
+      }
+    }
+    retry(0)
+  }
+
   private def createTableUtils(name: String): TableUtils = {
     val spark = SparkSessionBuilder.build(s"batch-node-runner-${name}")
     TableUtils(spark)
@@ -59,6 +103,16 @@ object BatchNodeRunner extends NodeRunner {
         runGroupByUpload(metadata, conf.getGroupByUpload, range, tableUtils)
       case NodeContent._Fields.STAGING_QUERY =>
         runStagingQuery(metadata, conf.getStagingQuery, range, tableUtils)
+      case NodeContent._Fields.EXTERNAL_SOURCE_SENSOR => {
+
+        checkPartitions(retryCount, retryIntervalMin, tableUtils, tableName, range, spec) match {
+          case Success(_) => System.exit(0)
+          case Failure(exception) =>
+            logger.error(s"Failed to find required partitions for table ${tableName} after ${retryCount} retries",
+                         exception)
+            System.exit(1)
+        }
+      }
       case _ =>
         throw new UnsupportedOperationException(s"Unsupported NodeContent type: ${conf.getSetField}")
     }