Skip to content

Commit ee07c76

Browse files
committed
Modified regression of the description about building for using Thrift JDBC server and CLI
2 parents ed53329 + 050f8d0 commit ee07c76

File tree

359 files changed

+11260
-4435
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

359 files changed

+11260
-4435
lines changed

.rat-excludes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ log4j-defaults.properties
2525
bootstrap-tooltip.js
2626
jquery-1.11.1.min.js
2727
sorttable.js
28+
.*avsc
2829
.*txt
2930
.*json
3031
.*data

.travis.yml

Lines changed: 0 additions & 32 deletions
This file was deleted.

README.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,10 @@ If your project is built with Maven, add this to your POM file's `<dependencies>
118118
## A Note About Thrift JDBC server and CLI for Spark SQL
119119

120120
Spark SQL supports Thrift JDBC server and CLI.
121-
See sql-programming-guide.md for more information about those features.
122-
You can use those features by setting `-Phive-thriftserver` when building Spark as follows.
123-
124-
$ sbt/sbt -Phive-thriftserver assembly
121+
See sql-programming-guide.md for more information about using the JDBC server and CLI.
122+
You can use those features by setting `-Phive` when building Spark as follows.
125123

124+
$ sbt/sbt -Phive assembly
126125

127126
## Configuration
128127

assembly/pom.xml

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@
4343
</properties>
4444

4545
<dependencies>
46+
<!-- Promote Guava to compile scope in this module so it's included while shading. -->
47+
<dependency>
48+
<groupId>com.google.guava</groupId>
49+
<artifactId>guava</artifactId>
50+
<scope>compile</scope>
51+
</dependency>
4652
<dependency>
4753
<groupId>org.apache.spark</groupId>
4854
<artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -113,6 +119,18 @@
113119
<goal>shade</goal>
114120
</goals>
115121
<configuration>
122+
<relocations>
123+
<relocation>
124+
<pattern>com.google</pattern>
125+
<shadedPattern>org.spark-project.guava</shadedPattern>
126+
<includes>
127+
<include>com.google.common.**</include>
128+
</includes>
129+
<excludes>
130+
<exclude>com.google.common.base.Optional**</exclude>
131+
</excludes>
132+
</relocation>
133+
</relocations>
116134
<transformers>
117135
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
118136
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
@@ -163,11 +181,6 @@
163181
<artifactId>spark-hive_${scala.binary.version}</artifactId>
164182
<version>${project.version}</version>
165183
</dependency>
166-
</dependencies>
167-
</profile>
168-
<profile>
169-
<id>hive-thriftserver</id>
170-
<dependencies>
171184
<dependency>
172185
<groupId>org.apache.spark</groupId>
173186
<artifactId>spark-hive-thriftserver_${scala.binary.version}</artifactId>

bin/spark-class

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
# limitations under the License.
1818
#
1919

20+
# NOTE: Any changes to this file must be reflected in SparkSubmitDriverBootstrapper.scala!
21+
2022
cygwin=false
2123
case "`uname`" in
2224
CYGWIN*) cygwin=true;;
@@ -39,7 +41,7 @@ fi
3941

4042
if [ -n "$SPARK_MEM" ]; then
4143
echo -e "Warning: SPARK_MEM is deprecated, please use a more specific config option" 1>&2
42-
echo -e "(e.g., spark.executor.memory or SPARK_DRIVER_MEMORY)." 1>&2
44+
echo -e "(e.g., spark.executor.memory or spark.driver.memory)." 1>&2
4345
fi
4446

4547
# Use SPARK_MEM or 512m as the default memory, to be overridden by specific options
@@ -73,11 +75,17 @@ case "$1" in
7375
OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM}
7476
;;
7577

76-
# Spark submit uses SPARK_SUBMIT_OPTS and SPARK_JAVA_OPTS
77-
'org.apache.spark.deploy.SparkSubmit')
78-
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS \
79-
-Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH"
78+
# Spark submit uses SPARK_JAVA_OPTS + SPARK_SUBMIT_OPTS +
79+
# SPARK_DRIVER_MEMORY + SPARK_SUBMIT_DRIVER_MEMORY.
80+
'org.apache.spark.deploy.SparkSubmit')
81+
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS"
8082
OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
83+
if [ -n "$SPARK_SUBMIT_LIBRARY_PATH" ]; then
84+
OUR_JAVA_OPTS="$OUR_JAVA_OPTS -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH"
85+
fi
86+
if [ -n "$SPARK_SUBMIT_DRIVER_MEMORY" ]; then
87+
OUR_JAVA_MEM="$SPARK_SUBMIT_DRIVER_MEMORY"
88+
fi
8189
;;
8290

8391
*)
@@ -101,11 +109,12 @@ fi
101109
# Set JAVA_OPTS to be able to load native libraries and to set heap size
102110
JAVA_OPTS="-XX:MaxPermSize=128m $OUR_JAVA_OPTS"
103111
JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"
112+
104113
# Load extra JAVA_OPTS from conf/java-opts, if it exists
105114
if [ -e "$FWDIR/conf/java-opts" ] ; then
106115
JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
107116
fi
108-
export JAVA_OPTS
117+
109118
# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!
110119

111120
TOOLS_DIR="$FWDIR"/tools
@@ -146,10 +155,28 @@ if $cygwin; then
146155
fi
147156
export CLASSPATH
148157

149-
if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
150-
echo -n "Spark Command: " 1>&2
151-
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2
152-
echo -e "========================================\n" 1>&2
158+
# In Spark submit client mode, the driver is launched in the same JVM as Spark submit itself.
159+
# Here we must parse the properties file for relevant "spark.driver.*" configs before launching
160+
# the driver JVM itself. Instead of handling this complexity in Bash, we launch a separate JVM
161+
# to prepare the launch environment of this driver JVM.
162+
163+
if [ -n "$SPARK_SUBMIT_BOOTSTRAP_DRIVER" ]; then
164+
# This is used only if the properties file actually contains these special configs
165+
# Export the environment variables needed by SparkSubmitDriverBootstrapper
166+
export RUNNER
167+
export CLASSPATH
168+
export JAVA_OPTS
169+
export OUR_JAVA_MEM
170+
export SPARK_CLASS=1
171+
shift # Ignore main class (org.apache.spark.deploy.SparkSubmit) and use our own
172+
exec "$RUNNER" org.apache.spark.deploy.SparkSubmitDriverBootstrapper "$@"
173+
else
174+
# Note: The format of this command is closely echoed in SparkSubmitDriverBootstrapper.scala
175+
if [ -n "$SPARK_PRINT_LAUNCH_COMMAND" ]; then
176+
echo -n "Spark Command: " 1>&2
177+
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2
178+
echo -e "========================================\n" 1>&2
179+
fi
180+
exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
153181
fi
154182

155-
exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"

bin/spark-shell.cmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@ rem
1919

2020
set SPARK_HOME=%~dp0..
2121

22-
cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd spark-shell --class org.apache.spark.repl.Main %*
22+
cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd --class org.apache.spark.repl.Main %* spark-shell

bin/spark-sql

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -65,30 +65,30 @@ while (($#)); do
6565
case $1 in
6666
-d | --define | --database | -f | -h | --hiveconf | --hivevar | -i | -p)
6767
ensure_arg_number $# 2
68-
CLI_ARGS+=($1); shift
69-
CLI_ARGS+=($1); shift
68+
CLI_ARGS+=("$1"); shift
69+
CLI_ARGS+=("$1"); shift
7070
;;
7171

7272
-e)
7373
ensure_arg_number $# 2
74-
CLI_ARGS+=($1); shift
75-
CLI_ARGS+=(\"$1\"); shift
74+
CLI_ARGS+=("$1"); shift
75+
CLI_ARGS+=("$1"); shift
7676
;;
7777

7878
-s | --silent)
79-
CLI_ARGS+=($1); shift
79+
CLI_ARGS+=("$1"); shift
8080
;;
8181

8282
-v | --verbose)
8383
# Both SparkSubmit and SparkSQLCLIDriver recognizes -v | --verbose
84-
CLI_ARGS+=($1)
85-
SUBMISSION_ARGS+=($1); shift
84+
CLI_ARGS+=("$1")
85+
SUBMISSION_ARGS+=("$1"); shift
8686
;;
8787

8888
*)
89-
SUBMISSION_ARGS+=($1); shift
89+
SUBMISSION_ARGS+=("$1"); shift
9090
;;
9191
esac
9292
done
9393

94-
eval exec "$FWDIR"/bin/spark-submit --class $CLASS ${SUBMISSION_ARGS[*]} spark-internal ${CLI_ARGS[*]}
94+
exec "$FWDIR"/bin/spark-submit --class $CLASS "${SUBMISSION_ARGS[@]}" spark-internal "${CLI_ARGS[@]}"

bin/spark-submit

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,18 @@
1717
# limitations under the License.
1818
#
1919

20+
# NOTE: Any changes in this file must be reflected in SparkClassLauncher.scala!
21+
2022
export SPARK_HOME="$(cd `dirname $0`/..; pwd)"
2123
ORIG_ARGS=("$@")
2224

2325
while (($#)); do
2426
if [ "$1" = "--deploy-mode" ]; then
25-
DEPLOY_MODE=$2
27+
SPARK_SUBMIT_DEPLOY_MODE=$2
28+
elif [ "$1" = "--properties-file" ]; then
29+
SPARK_SUBMIT_PROPERTIES_FILE=$2
2630
elif [ "$1" = "--driver-memory" ]; then
27-
DRIVER_MEMORY=$2
31+
export SPARK_SUBMIT_DRIVER_MEMORY=$2
2832
elif [ "$1" = "--driver-library-path" ]; then
2933
export SPARK_SUBMIT_LIBRARY_PATH=$2
3034
elif [ "$1" = "--driver-class-path" ]; then
@@ -35,10 +39,24 @@ while (($#)); do
3539
shift
3640
done
3741

38-
DEPLOY_MODE=${DEPLOY_MODE:-"client"}
42+
DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf"
43+
export SPARK_SUBMIT_DEPLOY_MODE=${SPARK_SUBMIT_DEPLOY_MODE:-"client"}
44+
export SPARK_SUBMIT_PROPERTIES_FILE=${SPARK_SUBMIT_PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"}
45+
46+
# For client mode, the driver will be launched in the same JVM that launches
47+
# SparkSubmit, so we may need to read the properties file for any extra class
48+
# paths, library paths, java options and memory early on. Otherwise, it will
49+
# be too late by the time the driver JVM has started.
3950

40-
if [ -n "$DRIVER_MEMORY" ] && [ $DEPLOY_MODE == "client" ]; then
41-
export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY
51+
if [[ "$SPARK_SUBMIT_DEPLOY_MODE" == "client" && -f "$SPARK_SUBMIT_PROPERTIES_FILE" ]]; then
52+
# Parse the properties file only if the special configs exist
53+
contains_special_configs=$(
54+
grep -e "spark.driver.extra*\|spark.driver.memory" "$SPARK_SUBMIT_PROPERTIES_FILE" | \
55+
grep -v "^[[:space:]]*#"
56+
)
57+
if [ -n "$contains_special_configs" ]; then
58+
export SPARK_SUBMIT_BOOTSTRAP_DRIVER=1
59+
fi
4260
fi
4361

4462
exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"

bin/utils.sh

100644100755
File mode changed.

conf/spark-defaults.conf.template

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
# This is useful for setting default environmental settings.
33

44
# Example:
5-
# spark.master spark://master:7077
6-
# spark.eventLog.enabled true
7-
# spark.eventLog.dir hdfs://namenode:8021/directory
8-
# spark.serializer org.apache.spark.serializer.KryoSerializer
5+
# spark.master spark://master:7077
6+
# spark.eventLog.enabled true
7+
# spark.eventLog.dir hdfs://namenode:8021/directory
8+
# spark.serializer org.apache.spark.serializer.KryoSerializer
9+
# spark.driver.memory 5g
10+
# spark.executor.extraJavaOptions -XX:+PrintGCDetail -Dkey=value -Dnumbers="one two three"

0 commit comments

Comments
 (0)