Skip to content

Commit ab5515b

Browse files
author
Davies Liu
committed
Merge branch 'master' into groupby
Conflicts: python/pyspark/tests.py
2 parents 651f891 + 3a845d3 commit ab5515b

File tree

8,610 files changed

+50696
-51340
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

8,610 files changed

+50696
-51340
lines changed

.rat-excludes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ sbt-launch-lib.bash
4848
plugins.sbt
4949
work
5050
.*\.q
51+
.*\.qv
5152
golden
5253
test.out/*
5354
.*iml

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ To build Spark and its example programs, run:
2525

2626
(You do not need to do this if you downloaded a pre-built package.)
2727
More detailed documentation is available from the project site, at
28-
["Building Spark"](http://spark.apache.org/docs/latest/building-spark.html).
28+
["Building Spark with Maven"](http://spark.apache.org/docs/latest/building-with-maven.html).
2929

3030
## Interactive Scala Shell
3131

assembly/pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,12 @@
197197
<artifactId>spark-hive_${scala.binary.version}</artifactId>
198198
<version>${project.version}</version>
199199
</dependency>
200+
</dependencies>
201+
</profile>
202+
<profile>
203+
<!-- TODO: Move this to "hive" profile once 0.13 JDBC is supported -->
204+
<id>hive-0.12.0</id>
205+
<dependencies>
200206
<dependency>
201207
<groupId>org.apache.spark</groupId>
202208
<artifactId>spark-hive-thriftserver_${scala.binary.version}</artifactId>

bin/compute-classpath.cmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
3838
rem Build up classpath
3939
set CLASSPATH=%SPARK_CLASSPATH%;%SPARK_SUBMIT_CLASSPATH%
4040

41-
if "x%SPARK_CONF_DIR%"!="x" (
41+
if not "x%SPARK_CONF_DIR%"=="x" (
4242
set CLASSPATH=%CLASSPATH%;%SPARK_CONF_DIR%
4343
) else (
4444
set CLASSPATH=%CLASSPATH%;%FWDIR%conf

bin/pyspark

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -50,22 +50,47 @@ fi
5050

5151
. "$FWDIR"/bin/load-spark-env.sh
5252

53-
# Figure out which Python executable to use
53+
# In Spark <= 1.1, setting IPYTHON=1 would cause the driver to be launched using the `ipython`
54+
# executable, while the worker would still be launched using PYSPARK_PYTHON.
55+
#
56+
# In Spark 1.2, we removed the documentation of the IPYTHON and IPYTHON_OPTS variables and added
57+
# PYSPARK_DRIVER_PYTHON and PYSPARK_DRIVER_PYTHON_OPTS to allow IPython to be used for the driver.
58+
# Now, users can simply set PYSPARK_DRIVER_PYTHON=ipython to use IPython and set
59+
# PYSPARK_DRIVER_PYTHON_OPTS to pass options when starting the Python driver
60+
# (e.g. PYSPARK_DRIVER_PYTHON_OPTS='notebook'). This supports full customization of the IPython
61+
# and executor Python executables.
62+
#
63+
# For backwards-compatibility, we retain the old IPYTHON and IPYTHON_OPTS variables.
64+
65+
# Determine the Python executable to use if PYSPARK_PYTHON or PYSPARK_DRIVER_PYTHON isn't set:
66+
if hash python2.7 2>/dev/null; then
67+
# Attempt to use Python 2.7, if installed:
68+
DEFAULT_PYTHON="python2.7"
69+
else
70+
DEFAULT_PYTHON="python"
71+
fi
72+
73+
# Determine the Python executable to use for the driver:
74+
if [[ -n "$IPYTHON_OPTS" || "$IPYTHON" == "1" ]]; then
75+
# If IPython options are specified, assume user wants to run IPython
76+
# (for backwards-compatibility)
77+
PYSPARK_DRIVER_PYTHON_OPTS="$PYSPARK_DRIVER_PYTHON_OPTS $IPYTHON_OPTS"
78+
PYSPARK_DRIVER_PYTHON="ipython"
79+
elif [[ -z "$PYSPARK_DRIVER_PYTHON" ]]; then
80+
PYSPARK_DRIVER_PYTHON="${PYSPARK_PYTHON:-"$DEFAULT_PYTHON"}"
81+
fi
82+
83+
# Determine the Python executable to use for the executors:
5484
if [[ -z "$PYSPARK_PYTHON" ]]; then
55-
if [[ "$IPYTHON" = "1" || -n "$IPYTHON_OPTS" ]]; then
56-
# for backward compatibility
57-
PYSPARK_PYTHON="ipython"
85+
if [[ $PYSPARK_DRIVER_PYTHON == *ipython* && $DEFAULT_PYTHON != "python2.7" ]]; then
86+
echo "IPython requires Python 2.7+; please install python2.7 or set PYSPARK_PYTHON" 1>&2
87+
exit 1
5888
else
59-
PYSPARK_PYTHON="python"
89+
PYSPARK_PYTHON="$DEFAULT_PYTHON"
6090
fi
6191
fi
6292
export PYSPARK_PYTHON
6393

64-
if [[ -z "$PYSPARK_PYTHON_OPTS" && -n "$IPYTHON_OPTS" ]]; then
65-
# for backward compatibility
66-
PYSPARK_PYTHON_OPTS="$IPYTHON_OPTS"
67-
fi
68-
6994
# Add the PySpark classes to the Python path:
7095
export PYTHONPATH="$SPARK_HOME/python/:$PYTHONPATH"
7196
export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
@@ -93,9 +118,9 @@ if [[ -n "$SPARK_TESTING" ]]; then
93118
unset YARN_CONF_DIR
94119
unset HADOOP_CONF_DIR
95120
if [[ -n "$PYSPARK_DOC_TEST" ]]; then
96-
exec "$PYSPARK_PYTHON" -m doctest $1
121+
exec "$PYSPARK_DRIVER_PYTHON" -m doctest $1
97122
else
98-
exec "$PYSPARK_PYTHON" $1
123+
exec "$PYSPARK_DRIVER_PYTHON" $1
99124
fi
100125
exit
101126
fi
@@ -111,5 +136,5 @@ if [[ "$1" =~ \.py$ ]]; then
111136
else
112137
# PySpark shell requires special handling downstream
113138
export PYSPARK_SHELL=1
114-
exec "$PYSPARK_PYTHON" $PYSPARK_PYTHON_OPTS
139+
exec "$PYSPARK_DRIVER_PYTHON" $PYSPARK_DRIVER_PYTHON_OPTS
115140
fi

bin/spark-class

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ else
105105
exit 1
106106
fi
107107
fi
108-
JAVA_VERSION=$("$RUNNER" -version 2>&1 | sed 's/.* version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')
108+
JAVA_VERSION=$("$RUNNER" -version 2>&1 | grep 'version' | sed 's/.* version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')
109109

110110
# Set JAVA_OPTS to be able to load native libraries and to set heap size
111111
if [ "$JAVA_VERSION" -ge 18 ]; then

bin/spark-shell.cmd

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ rem See the License for the specific language governing permissions and
1717
rem limitations under the License.
1818
rem
1919

20-
set SPARK_HOME=%~dp0..
20+
rem This is the entry point for running Spark shell. To avoid polluting the
21+
rem environment, it just launches a new cmd to do the real work.
2122

22-
cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd --class org.apache.spark.repl.Main %* spark-shell
23+
cmd /V /E /C %~dp0spark-shell2.cmd %*

bin/spark-shell2.cmd

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
@echo off
2+
3+
rem
4+
rem Licensed to the Apache Software Foundation (ASF) under one or more
5+
rem contributor license agreements. See the NOTICE file distributed with
6+
rem this work for additional information regarding copyright ownership.
7+
rem The ASF licenses this file to You under the Apache License, Version 2.0
8+
rem (the "License"); you may not use this file except in compliance with
9+
rem the License. You may obtain a copy of the License at
10+
rem
11+
rem http://www.apache.org/licenses/LICENSE-2.0
12+
rem
13+
rem Unless required by applicable law or agreed to in writing, software
14+
rem distributed under the License is distributed on an "AS IS" BASIS,
15+
rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
rem See the License for the specific language governing permissions and
17+
rem limitations under the License.
18+
rem
19+
20+
set SPARK_HOME=%~dp0..
21+
22+
cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd --class org.apache.spark.repl.Main %* spark-shell

bin/spark-submit.cmd

Lines changed: 3 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -17,52 +17,7 @@ rem See the License for the specific language governing permissions and
1717
rem limitations under the License.
1818
rem
1919

20-
rem NOTE: Any changes in this file must be reflected in SparkSubmitDriverBootstrapper.scala!
20+
rem This is the entry point for running Spark submit. To avoid polluting the
21+
rem environment, it just launches a new cmd to do the real work.
2122

22-
set SPARK_HOME=%~dp0..
23-
set ORIG_ARGS=%*
24-
25-
rem Reset the values of all variables used
26-
set SPARK_SUBMIT_DEPLOY_MODE=client
27-
set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_HOME%\conf\spark-defaults.conf
28-
set SPARK_SUBMIT_DRIVER_MEMORY=
29-
set SPARK_SUBMIT_LIBRARY_PATH=
30-
set SPARK_SUBMIT_CLASSPATH=
31-
set SPARK_SUBMIT_OPTS=
32-
set SPARK_SUBMIT_BOOTSTRAP_DRIVER=
33-
34-
:loop
35-
if [%1] == [] goto continue
36-
if [%1] == [--deploy-mode] (
37-
set SPARK_SUBMIT_DEPLOY_MODE=%2
38-
) else if [%1] == [--properties-file] (
39-
set SPARK_SUBMIT_PROPERTIES_FILE=%2
40-
) else if [%1] == [--driver-memory] (
41-
set SPARK_SUBMIT_DRIVER_MEMORY=%2
42-
) else if [%1] == [--driver-library-path] (
43-
set SPARK_SUBMIT_LIBRARY_PATH=%2
44-
) else if [%1] == [--driver-class-path] (
45-
set SPARK_SUBMIT_CLASSPATH=%2
46-
) else if [%1] == [--driver-java-options] (
47-
set SPARK_SUBMIT_OPTS=%2
48-
)
49-
shift
50-
goto loop
51-
:continue
52-
53-
rem For client mode, the driver will be launched in the same JVM that launches
54-
rem SparkSubmit, so we may need to read the properties file for any extra class
55-
rem paths, library paths, java options and memory early on. Otherwise, it will
56-
rem be too late by the time the driver JVM has started.
57-
58-
if [%SPARK_SUBMIT_DEPLOY_MODE%] == [client] (
59-
if exist %SPARK_SUBMIT_PROPERTIES_FILE% (
60-
rem Parse the properties file only if the special configs exist
61-
for /f %%i in ('findstr /r /c:"^[\t ]*spark.driver.memory" /c:"^[\t ]*spark.driver.extra" ^
62-
%SPARK_SUBMIT_PROPERTIES_FILE%') do (
63-
set SPARK_SUBMIT_BOOTSTRAP_DRIVER=1
64-
)
65-
)
66-
)
67-
68-
cmd /V /E /C %SPARK_HOME%\bin\spark-class.cmd org.apache.spark.deploy.SparkSubmit %ORIG_ARGS%
23+
cmd /V /E /C %~dp0spark-submit2.cmd %*

bin/spark-submit2.cmd

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
@echo off
2+
3+
rem
4+
rem Licensed to the Apache Software Foundation (ASF) under one or more
5+
rem contributor license agreements. See the NOTICE file distributed with
6+
rem this work for additional information regarding copyright ownership.
7+
rem The ASF licenses this file to You under the Apache License, Version 2.0
8+
rem (the "License"); you may not use this file except in compliance with
9+
rem the License. You may obtain a copy of the License at
10+
rem
11+
rem http://www.apache.org/licenses/LICENSE-2.0
12+
rem
13+
rem Unless required by applicable law or agreed to in writing, software
14+
rem distributed under the License is distributed on an "AS IS" BASIS,
15+
rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
rem See the License for the specific language governing permissions and
17+
rem limitations under the License.
18+
rem
19+
20+
rem NOTE: Any changes in this file must be reflected in SparkSubmitDriverBootstrapper.scala!
21+
22+
set SPARK_HOME=%~dp0..
23+
set ORIG_ARGS=%*
24+
25+
rem Reset the values of all variables used
26+
set SPARK_SUBMIT_DEPLOY_MODE=client
27+
set SPARK_SUBMIT_PROPERTIES_FILE=%SPARK_HOME%\conf\spark-defaults.conf
28+
set SPARK_SUBMIT_DRIVER_MEMORY=
29+
set SPARK_SUBMIT_LIBRARY_PATH=
30+
set SPARK_SUBMIT_CLASSPATH=
31+
set SPARK_SUBMIT_OPTS=
32+
set SPARK_SUBMIT_BOOTSTRAP_DRIVER=
33+
34+
:loop
35+
if [%1] == [] goto continue
36+
if [%1] == [--deploy-mode] (
37+
set SPARK_SUBMIT_DEPLOY_MODE=%2
38+
) else if [%1] == [--properties-file] (
39+
set SPARK_SUBMIT_PROPERTIES_FILE=%2
40+
) else if [%1] == [--driver-memory] (
41+
set SPARK_SUBMIT_DRIVER_MEMORY=%2
42+
) else if [%1] == [--driver-library-path] (
43+
set SPARK_SUBMIT_LIBRARY_PATH=%2
44+
) else if [%1] == [--driver-class-path] (
45+
set SPARK_SUBMIT_CLASSPATH=%2
46+
) else if [%1] == [--driver-java-options] (
47+
set SPARK_SUBMIT_OPTS=%2
48+
)
49+
shift
50+
goto loop
51+
:continue
52+
53+
rem For client mode, the driver will be launched in the same JVM that launches
54+
rem SparkSubmit, so we may need to read the properties file for any extra class
55+
rem paths, library paths, java options and memory early on. Otherwise, it will
56+
rem be too late by the time the driver JVM has started.
57+
58+
if [%SPARK_SUBMIT_DEPLOY_MODE%] == [client] (
59+
if exist %SPARK_SUBMIT_PROPERTIES_FILE% (
60+
rem Parse the properties file only if the special configs exist
61+
for /f %%i in ('findstr /r /c:"^[\t ]*spark.driver.memory" /c:"^[\t ]*spark.driver.extra" ^
62+
%SPARK_SUBMIT_PROPERTIES_FILE%') do (
63+
set SPARK_SUBMIT_BOOTSTRAP_DRIVER=1
64+
)
65+
)
66+
)
67+
68+
cmd /V /E /C %SPARK_HOME%\bin\spark-class.cmd org.apache.spark.deploy.SparkSubmit %ORIG_ARGS%

0 commit comments

Comments
 (0)