turboFei
diff --git a/‎assembly/pom.xml‎
Lines changed: 10 additions & 0 deletions b/‎assembly/pom.xml‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎bin/spark-etl-sql‎
Lines changed: 25 additions & 0 deletions b/‎bin/spark-etl-sql‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 20 additions & 12 deletions b/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 20 additions & 12 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala‎
Lines changed: 69 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala‎
Lines changed: 69 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala‎
Lines changed: 40 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala‎
Lines changed: 40 additions & 0 deletions
@@ -224,6 +224,16 @@
         </dependency>
       </dependencies>
     </profile>
+    <profile>
+      <id>ebay-etl-sql</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>spark-ebay-etl-sql_${scala.binary.version}</artifactId>
+          <version>${project.version}</version>
+        </dependency>
+      </dependencies>
+    </profile>
     <profile>
       <id>spark-ganglia-lgpl</id>
       <dependencies>
 
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+if [ -z "${SPARK_HOME}" ]; then
+  source "$(dirname "$0")"/find-spark-home
+fi
+
+export _SPARK_CMD_USAGE="Usage: ./bin/spark-etl-sql [options] [ETL SQL Driver options]"
+exec "${SPARK_HOME}"/bin/spark-submit --class org.apache.spark.sql.ebay.ETLSqlDriver "$@"
@@ -2375,29 +2375,29 @@ class SparkContext(config: SparkConf) extends Logging {
         postApplicationEnd()
       }
     }
-    Utils.tryLogNonFatalError {
+    logStop("driver logger") {
       _driverLogger.foreach(_.stop())
     }
-    Utils.tryLogNonFatalError {
+    logStop("ui") {
       _scratchPath.foreach(SparkHadoopUtil.deletePath(_, hadoopConfiguration))
     }
     Utils.tryLogNonFatalError {
       _ui.foreach(_.stop())
     }
-    Utils.tryLogNonFatalError {
+    logStop("context cleaner") {
       _cleaner.foreach(_.stop())
     }
-    Utils.tryLogNonFatalError {
+    logStop("executor allocation manager") {
       _executorAllocationManager.foreach(_.stop())
     }
     if (_dagScheduler != null) {
-      Utils.tryLogNonFatalError {
+      logStop("dag schedule") {
         _dagScheduler.stop(exitCode)
       }
       _dagScheduler = null
     }
     if (_listenerBusStarted) {
-      Utils.tryLogNonFatalError {
+      logStop("listener bus") {
         listenerBus.stop()
         _listenerBusStarted = false
       }
@@ -2413,7 +2413,7 @@ class SparkContext(config: SparkConf) extends Logging {
     Utils.tryLogNonFatalError {
       FallbackStorage.cleanUp(_conf, _hadoopConfiguration)
     }
-    Utils.tryLogNonFatalError {
+    logStop("event logger") {
       _eventLogger.foreach(_.stop())
     }
     if (_shuffleDriverComponents != null) {
@@ -2422,29 +2422,31 @@ class SparkContext(config: SparkConf) extends Logging {
       }
     }
     if (_heartbeater != null) {
-      Utils.tryLogNonFatalError {
+      logStop("heart beater") {
         _heartbeater.stop()
       }
       _heartbeater = null
     }
     if (env != null && _heartbeatReceiver != null) {
-      Utils.tryLogNonFatalError {
+      logStop("heart beat receiver") {
         env.rpcEnv.stop(_heartbeatReceiver)
       }
     }
-    Utils.tryLogNonFatalError {
+    logStop("progress bar") {
       _progressBar.foreach(_.stop())
     }
     _taskScheduler = null
     // TODO: Cache.stop()?
     if (_env != null) {
-      Utils.tryLogNonFatalError {
+      logStop("spark env") {
         _env.stop()
       }
       SparkEnv.set(null)
     }
     if (_statusStore != null) {
-      _statusStore.close()
+      logStop("status store") {
+        _statusStore.close()
+      }
     }
     if (_kafkaStore.nonEmpty) {
       try {
@@ -2463,6 +2465,12 @@ class SparkContext(config: SparkConf) extends Logging {
     logInfo("Successfully stopped SparkContext")
   }
 
+  def logStop(name: String)(stop: => Unit) {
+    logInfo(s"Stopping $name")
+    Utils.tryLogNonFatalError {
+      stop
+    }
+  }
 
   /**
    * Get Spark's home location from either a value set through the constructor,
 
@@ -29,7 +29,7 @@ import javax.ws.rs.core.UriBuilder
 
 import scala.annotation.tailrec
 import scala.collection.JavaConverters._
-import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.{ArrayBuffer, ListBuffer}
 import scala.util.{Properties, Try}
 
 import org.apache.commons.lang3.StringUtils
@@ -242,6 +242,11 @@ private[spark] class SparkSubmit extends Logging {
     if (sparkConf.contains("spark.local.connect")) sparkConf.remove("spark.remote")
     var childMainClass = ""
 
+    // Fail fast if there is no sql files specified in ETL mode
+    if (isEtlSqlDriver(args.mainClass) && args.etlSqlFiles == null) {
+      printErrorAndExit("For ETL Sql mode, please specify at least one sql file to execute.")
+    }
+
     // Set the cluster manager
     val clusterManager: Int = args.maybeMaster match {
       case Some(v) =>
@@ -618,12 +623,37 @@ private[spark] class SparkSubmit extends Logging {
       args.files = mergeFileLists(args.files, args.primaryResource)
     }
 
+    if (isYarnCluster && isEtlSqlDriver(args.mainClass)) {
+      // In yarn-cluster mode for ETL app, add sql files to files
+      // that can be distributed with the job
+      args.files = mergeFileLists(args.files, args.etlInitFiles)
+      args.files = mergeFileLists(args.files, args.etlSqlFiles)
+      args.etlInitFileNames = useETLSqlFileNames(args.etlInitFiles)
+      args.etlSqlFiles = useETLSqlFileNames(args.etlSqlFiles)
+    }
+
+    // for k8s cluster mode, there are two times spark-submit
+    // for the first time spark-submit, it is used to launch a spark driver pod
+    // and merge the etl sql files into spark.files.
+    if (isKubernetesCluster && isEtlSqlDriver(args.mainClass)) {
+      args.files = mergeFileLists(args.files, args.etlInitFiles)
+      args.files = mergeFileLists(args.files, args.etlSqlFiles)
+    }
+
+    // for the second time spark-submit, it is used to launch spark driver.
+    // the spark.files are placed into working directory and here using
+    // the file names to access the etl sql files
+    if (isKubernetesClusterModeDriver && isEtlSqlDriver(args.mainClass)) {
+      args.etlInitFileNames = useETLSqlFileNames(args.etlInitFiles)
+      args.etlSqlFiles = useETLSqlFileNames(args.etlSqlFiles)
+    }
+
     // Special flag to avoid deprecation warnings at the client
     sys.props("SPARK_SUBMIT") = "true"
 
     // A list of rules to map each argument to system properties or command-line options in
     // each deploy mode; we iterate through these below
-    val options = List[OptionAssigner](
+    val options = ListBuffer[OptionAssigner](
 
       // All cluster managers
       OptionAssigner(
@@ -706,6 +736,25 @@ private[spark] class SparkSubmit extends Logging {
       OptionAssigner(localJars, ALL_CLUSTER_MGRS, CLIENT, confKey = "spark.repl.local.jars")
     )
 
+    if (isEtlSqlDriver(args.mainClass)) {
+      options ++= ListBuffer(
+        // Only for ETL SQL mode
+        OptionAssigner(args.etlInitFiles, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
+          confKey = "spark.etl.sql.init.files.absPaths"),
+        if (args.etlInitFileNames != null) {
+          OptionAssigner(args.etlInitFileNames, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
+            confKey = "spark.etl.sql.init.files")
+        } else {
+          OptionAssigner(args.etlInitFiles, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
+            confKey = "spark.etl.sql.init.files")
+        },
+        OptionAssigner(args.etlSqlFiles, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
+          confKey = "spark.etl.sql.files"),
+        OptionAssigner(args.etlSqlVars, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
+          confKey = "spark.etl.sql.vars")
+      )
+    }
+
     // In client mode, launch the application main class directly
     // In addition, add the main application jar and any added jars (if any) to the classpath
     if (deployMode == CLIENT) {
@@ -788,6 +837,9 @@ private[spark] class SparkSubmit extends Logging {
       if (args.isPython) {
         sparkConf.set("spark.yarn.isPython", "true")
       }
+      if (isEtlSqlDriver(args.mainClass)) {
+        sparkConf.set("spark.yarn.isEtlSql", "true")
+      }
     }
 
     if ((clusterManager == MESOS || clusterManager == KUBERNETES)
@@ -1166,6 +1218,13 @@ object SparkSubmit extends CommandLineUtils with Logging {
     mainClass == "org.apache.spark.sql.connect.service.SparkConnectServer"
   }
 
+  /**
+   * Return whether the given main class represents a etl sql driver.
+   */
+  private[deploy] def isEtlSqlDriver(mainClass: String): Boolean = {
+    mainClass == "org.apache.spark.sql.ebay.ETLSqlDriver"
+  }
+
   /**
    * Return whether the given primary resource requires running python.
    */
@@ -1184,6 +1243,14 @@ object SparkSubmit extends CommandLineUtils with Logging {
     res == SparkLauncher.NO_RESOURCE
   }
 
+  /**
+   * For yarn-cluster mode, all sql files would be distributed to working dir, use file names
+   * to access them.
+   */
+  private[deploy] def useETLSqlFileNames(etlSqlFiles: String): String = {
+    etlSqlFiles.split(",").map(sf => new Path(sf).getName).mkString(",")
+    Option(etlSqlFiles).map(_.split(",").map(sf => new File(sf).getName).mkString(",")).orNull
+  }
 }
 
 /** Provides utility functions to be used inside SparkSubmit. */
 
@@ -86,6 +86,12 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
   var submissionToRequestStatusFor: String = null
   var useRest: Boolean = false // used internally
 
+  // ETL mode only
+  var etlInitFiles: String = null
+  var etlInitFileNames: String = null
+  var etlSqlFiles: String = null
+  var etlSqlVars: String = null
+
   /** Default properties present in the currently defined defaults file. */
   lazy val defaultSparkProperties: HashMap[String, String] = {
     val defaultProperties = new HashMap[String, String]()
@@ -229,6 +235,12 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
 
     // Action should be SUBMIT unless otherwise specified
     action = Option(action).getOrElse(SUBMIT)
+
+    // load etl configuration
+    etlInitFiles = Option(etlInitFiles).orElse(sparkProperties.get("spark.etl.sql.init.files"))
+      .orNull
+    etlSqlFiles = Option(etlSqlFiles).orElse(sparkProperties.get("spark.etl.sql.files")).orNull
+    etlSqlVars = Option(etlSqlVars).orElse(sparkProperties.get("spark.etl.sql.vars")).orNull
   }
 
   /** Ensure that required fields exists. Call this only once all defaults are loaded. */
@@ -444,6 +456,15 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       case KEYTAB =>
         keytab = value
 
+      case ETL_INIT_FILES =>
+        etlInitFiles = value
+
+      case ETL_SQL_FILES =>
+        etlSqlFiles = value
+
+      case ETL_SQL_VARS =>
+        etlSqlVars = value
+
       case HELP =>
         printUsageAndExit(0)
 
@@ -593,6 +614,10 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       logInfo(getSqlShellOptions())
     }
 
+    if (SparkSubmit.isEtlSqlDriver(mainClass)) {
+      logInfo(getETLSqlDriverOptions())
+    }
+
     throw new SparkUserAppException(exitCode)
   }
 
@@ -647,6 +672,21 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
     }
   }
 
+  /**
+   * Run the Spark ETL SQL Driver main class with the "--help" option and catch its output.
+   */
+  private def getETLSqlDriverOptions(): String = {
+    // scalastyle:off
+    s"""
+      |ETL SQL Driver options:
+      |  --etl-init-files            A comma-separated list of sql files to be executed for initialization.
+      |  --etl-sql-files             A comma-separated list of sql files to be executed. Must be specified.
+      |  --etl-sql-vars              A comma-separated list of sql variables, formatted like variable1=replaced1,
+      |                              variable2=replaced2, used for sql queries replacement.
+    """.stripMargin
+    // scalastyle:on
+  }
+
   private def error(msg: String): Unit = throw new SparkException(msg)
 
   private[deploy] def toSparkConf(sparkConf: Option[SparkConf] = None): SparkConf = {
Original file line number	Diff line number	Diff line change
`@@ -2375,29 +2375,29 @@ class SparkContext(config: SparkConf) extends Logging {`
`2375`	`2375`	`postApplicationEnd()`
`2376`	`2376`	`}`
`2377`	`2377`	`}`
`2378`		`- Utils.tryLogNonFatalError {`
	`2378`	`+ logStop("driver logger") {`
`2379`	`2379`	`_driverLogger.foreach(_.stop())`
`2380`	`2380`	`}`
`2381`		`- Utils.tryLogNonFatalError {`
	`2381`	`+ logStop("ui") {`
`2382`	`2382`	`_scratchPath.foreach(SparkHadoopUtil.deletePath(_, hadoopConfiguration))`
`2383`	`2383`	`}`
`2384`	`2384`	`Utils.tryLogNonFatalError {`
`2385`	`2385`	`_ui.foreach(_.stop())`
`2386`	`2386`	`}`
`2387`		`- Utils.tryLogNonFatalError {`
	`2387`	`+ logStop("context cleaner") {`
`2388`	`2388`	`_cleaner.foreach(_.stop())`
`2389`	`2389`	`}`
`2390`		`- Utils.tryLogNonFatalError {`
	`2390`	`+ logStop("executor allocation manager") {`
`2391`	`2391`	`_executorAllocationManager.foreach(_.stop())`
`2392`	`2392`	`}`
`2393`	`2393`	`if (_dagScheduler != null) {`
`2394`		`- Utils.tryLogNonFatalError {`
	`2394`	`+ logStop("dag schedule") {`
`2395`	`2395`	`_dagScheduler.stop(exitCode)`
`2396`	`2396`	`}`
`2397`	`2397`	`_dagScheduler = null`
`2398`	`2398`	`}`
`2399`	`2399`	`if (_listenerBusStarted) {`
`2400`		`- Utils.tryLogNonFatalError {`
	`2400`	`+ logStop("listener bus") {`
`2401`	`2401`	`listenerBus.stop()`
`2402`	`2402`	`_listenerBusStarted = false`
`2403`	`2403`	`}`
`@@ -2413,7 +2413,7 @@ class SparkContext(config: SparkConf) extends Logging {`
`2413`	`2413`	`Utils.tryLogNonFatalError {`
`2414`	`2414`	`FallbackStorage.cleanUp(_conf, _hadoopConfiguration)`
`2415`	`2415`	`}`
`2416`		`- Utils.tryLogNonFatalError {`
	`2416`	`+ logStop("event logger") {`
`2417`	`2417`	`_eventLogger.foreach(_.stop())`
`2418`	`2418`	`}`
`2419`	`2419`	`if (_shuffleDriverComponents != null) {`
`@@ -2422,29 +2422,31 @@ class SparkContext(config: SparkConf) extends Logging {`
`2422`	`2422`	`}`
`2423`	`2423`	`}`
`2424`	`2424`	`if (_heartbeater != null) {`
`2425`		`- Utils.tryLogNonFatalError {`
	`2425`	`+ logStop("heart beater") {`
`2426`	`2426`	`_heartbeater.stop()`
`2427`	`2427`	`}`
`2428`	`2428`	`_heartbeater = null`
`2429`	`2429`	`}`
`2430`	`2430`	`if (env != null && _heartbeatReceiver != null) {`
`2431`		`- Utils.tryLogNonFatalError {`
	`2431`	`+ logStop("heart beat receiver") {`
`2432`	`2432`	`env.rpcEnv.stop(_heartbeatReceiver)`
`2433`	`2433`	`}`
`2434`	`2434`	`}`
`2435`		`- Utils.tryLogNonFatalError {`
	`2435`	`+ logStop("progress bar") {`
`2436`	`2436`	`_progressBar.foreach(_.stop())`
`2437`	`2437`	`}`
`2438`	`2438`	`_taskScheduler = null`
`2439`	`2439`	`// TODO: Cache.stop()?`
`2440`	`2440`	`if (_env != null) {`
`2441`		`- Utils.tryLogNonFatalError {`
	`2441`	`+ logStop("spark env") {`
`2442`	`2442`	`_env.stop()`
`2443`	`2443`	`}`
`2444`	`2444`	`SparkEnv.set(null)`
`2445`	`2445`	`}`
`2446`	`2446`	`if (_statusStore != null) {`
`2447`		`- _statusStore.close()`
	`2447`	`+ logStop("status store") {`
	`2448`	`+ _statusStore.close()`
	`2449`	`+ }`
`2448`	`2450`	`}`
`2449`	`2451`	`if (_kafkaStore.nonEmpty) {`
`2450`	`2452`	`try {`
`@@ -2463,6 +2465,12 @@ class SparkContext(config: SparkConf) extends Logging {`
`2463`	`2465`	`logInfo("Successfully stopped SparkContext")`
`2464`	`2466`	`}`
`2465`	`2467`
	`2468`	`+ def logStop(name: String)(stop: => Unit) {`
	`2469`	`+ logInfo(s"Stopping $name")`
	`2470`	`+ Utils.tryLogNonFatalError {`
	`2471`	`+ stop`
	`2472`	`+ }`
	`2473`	`+ }`
`2466`	`2474`
`2467`	`2475`	`/**`
`2468`	`2476`	`* Get Spark's home location from either a value set through the constructor,`