diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd index 579efff90953..db680218dc96 100644 --- a/bin/spark-class2.cmd +++ b/bin/spark-class2.cmd @@ -36,7 +36,7 @@ if exist "%SPARK_HOME%\RELEASE" ( ) if not exist "%SPARK_JARS_DIR%"\ ( - echo Failed to find Spark assembly JAR. + echo Failed to find Spark jars directory. echo You need to build Spark before running this program. exit /b 1 ) diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala index c0a9e3f280ba..6227a30dc949 100644 --- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala @@ -62,7 +62,7 @@ object PythonRunner { // ready to serve connections. thread.join() - // Build up a PYTHONPATH that includes the Spark assembly JAR (where this class is), the + // Build up a PYTHONPATH that includes the Spark assembly (where this class is), the // python directories in SPARK_HOME (if set), and any files in the pyFiles argument val pathElements = new ArrayBuffer[String] pathElements ++= formattedPyFiles diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index ec6d48485f11..78da1b70c54a 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -478,7 +478,8 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S val command = sys.env.get("_SPARK_CMD_USAGE").getOrElse( """Usage: spark-submit [options] [app arguments] |Usage: spark-submit --kill [submission ID] --master [spark://...] - |Usage: spark-submit --status [submission ID] --master [spark://...]""".stripMargin) + |Usage: spark-submit --status [submission ID] --master [spark://...] + |Usage: spark-submit run-example [options] example-class [example args]""".stripMargin) outStream.println(command) val mem_mb = Utils.DEFAULT_DRIVER_MEM_MB diff --git a/docs/building-spark.md b/docs/building-spark.md index 40661604af94..fec442af95e1 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -192,7 +192,7 @@ If you have JDK 8 installed but it is not the system default, you can set JAVA_H # Packaging without Hadoop Dependencies for YARN -The assembly jar produced by `mvn package` will, by default, include all of Spark's dependencies, including Hadoop and some of its ecosystem projects. On YARN deployments, this causes multiple versions of these to appear on executor classpaths: the version packaged in the Spark assembly and the version on each node, included with `yarn.application.classpath`. The `hadoop-provided` profile builds the assembly without including Hadoop-ecosystem projects, like ZooKeeper and Hadoop itself. +The assembly directory produced by `mvn package` will, by default, include all of Spark's dependencies, including Hadoop and some of its ecosystem projects. On YARN deployments, this causes multiple versions of these to appear on executor classpaths: the version packaged in the Spark assembly and the version on each node, included with `yarn.application.classpath`. The `hadoop-provided` profile builds the assembly without including Hadoop-ecosystem projects, like ZooKeeper and Hadoop itself. # Building with SBT diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index 2d9849d0328e..77887f4ca36b 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -1651,7 +1651,7 @@ SELECT * FROM jsonTable Spark SQL also supports reading and writing data stored in [Apache Hive](http://hive.apache.org/). However, since Hive has a large number of dependencies, it is not included in the default Spark assembly. Hive support is enabled by adding the `-Phive` and `-Phive-thriftserver` flags to Spark's build. -This command builds a new assembly jar that includes Hive. Note that this Hive assembly jar must also be present +This command builds a new assembly directory that includes Hive. Note that this Hive assembly directory must also be present on all of the worker nodes, as they will need access to the Hive serialization and deserialization libraries (SerDes) in order to access data stored in Hive. @@ -1770,7 +1770,7 @@ The following options can be used to configure the version of Hive that is used property can be one of three options:
  1. builtin
  2. - Use Hive 1.2.1, which is bundled with the Spark assembly jar when -Phive is + Use Hive 1.2.1, which is bundled with the Spark assembly when -Phive is enabled. When this option is chosen, spark.sql.hive.metastore.version must be either 1.2.1 or not defined.
  3. maven
  4. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala index b91e892f8f21..bfe7aefe4100 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala @@ -784,7 +784,7 @@ private[sql] object ParquetRelation extends Logging { // scalastyle:on classforname redirect(JLogger.getLogger("parquet")) } catch { case _: Throwable => - // SPARK-9974: com.twitter:parquet-hadoop-bundle:1.6.0 is not packaged into the assembly jar + // SPARK-9974: com.twitter:parquet-hadoop-bundle:1.6.0 is not packaged into the assembly // when Spark is built with SBT. So `parquet.Log` may not be found. This try/catch block // should be removed after this issue is fixed. } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala index 505e5c0bb62f..ff93bfc4a3d1 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala @@ -429,7 +429,7 @@ private[hive] object HiveContext extends Logging { | Location of the jars that should be used to instantiate the HiveMetastoreClient. | This property can be one of three options: " | 1. "builtin" - | Use Hive ${hiveExecutionVersion}, which is bundled with the Spark assembly jar when + | Use Hive ${hiveExecutionVersion}, which is bundled with the Spark assembly when | -Phive is enabled. When this option is chosen, | spark.sql.hive.metastore.version must be either | ${hiveExecutionVersion} or not defined.