Skip to content

Commit ad1f96e

Browse files
committed
Merge branch 'master' of git://git.apache.org/spark into fix-assembly-jarname
2 parents b2318eb + 70e824f commit ad1f96e

File tree

20 files changed

+457
-279
lines changed

20 files changed

+457
-279
lines changed

dev/run-tests

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,16 @@ cd "$FWDIR"
2424
# Remove work directory
2525
rm -rf ./work
2626

27+
source "$FWDIR/dev/run-tests-codes.sh"
28+
29+
CURRENT_BLOCK=$BLOCK_GENERAL
30+
31+
function handle_error () {
32+
echo "[error] Got a return code of $? on line $1 of the run-tests script."
33+
exit $CURRENT_BLOCK
34+
}
35+
36+
2737
# Build against the right verison of Hadoop.
2838
{
2939
if [ -n "$AMPLAB_JENKINS_BUILD_PROFILE" ]; then
@@ -91,33 +101,43 @@ if [ -n "$AMPLAB_JENKINS" ]; then
91101
fi
92102
fi
93103

94-
# Fail fast
95-
set -e
96104
set -o pipefail
105+
trap 'handle_error $LINENO' ERR
97106

98107
echo ""
99108
echo "========================================================================="
100109
echo "Running Apache RAT checks"
101110
echo "========================================================================="
111+
112+
CURRENT_BLOCK=$BLOCK_RAT
113+
102114
./dev/check-license
103115

104116
echo ""
105117
echo "========================================================================="
106118
echo "Running Scala style checks"
107119
echo "========================================================================="
120+
121+
CURRENT_BLOCK=$BLOCK_SCALA_STYLE
122+
108123
./dev/lint-scala
109124

110125
echo ""
111126
echo "========================================================================="
112127
echo "Running Python style checks"
113128
echo "========================================================================="
129+
130+
CURRENT_BLOCK=$BLOCK_PYTHON_STYLE
131+
114132
./dev/lint-python
115133

116134
echo ""
117135
echo "========================================================================="
118136
echo "Building Spark"
119137
echo "========================================================================="
120138

139+
CURRENT_BLOCK=$BLOCK_BUILD
140+
121141
{
122142
# We always build with Hive because the PySpark Spark SQL tests need it.
123143
BUILD_MVN_PROFILE_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive"
@@ -141,6 +161,8 @@ echo "========================================================================="
141161
echo "Running Spark unit tests"
142162
echo "========================================================================="
143163

164+
CURRENT_BLOCK=$BLOCK_SPARK_UNIT_TESTS
165+
144166
{
145167
# If the Spark SQL tests are enabled, run the tests with the Hive profiles enabled.
146168
# This must be a single argument, as it is.
@@ -175,10 +197,16 @@ echo ""
175197
echo "========================================================================="
176198
echo "Running PySpark tests"
177199
echo "========================================================================="
200+
201+
CURRENT_BLOCK=$BLOCK_PYSPARK_UNIT_TESTS
202+
178203
./python/run-tests
179204

180205
echo ""
181206
echo "========================================================================="
182207
echo "Detecting binary incompatibilites with MiMa"
183208
echo "========================================================================="
209+
210+
CURRENT_BLOCK=$BLOCK_MIMA
211+
184212
./dev/mima

dev/run-tests-codes.sh

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/usr/bin/env bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
readonly BLOCK_GENERAL=10
21+
readonly BLOCK_RAT=11
22+
readonly BLOCK_SCALA_STYLE=12
23+
readonly BLOCK_PYTHON_STYLE=13
24+
readonly BLOCK_BUILD=14
25+
readonly BLOCK_SPARK_UNIT_TESTS=15
26+
readonly BLOCK_PYSPARK_UNIT_TESTS=16
27+
readonly BLOCK_MIMA=17

dev/run-tests-jenkins

Lines changed: 69 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,23 @@
2626
FWDIR="$(cd `dirname $0`/..; pwd)"
2727
cd "$FWDIR"
2828

29+
source "$FWDIR/dev/run-tests-codes.sh"
30+
2931
COMMENTS_URL="https://api.github.com/repos/apache/spark/issues/$ghprbPullId/comments"
3032
PULL_REQUEST_URL="https://github.com/apache/spark/pull/$ghprbPullId"
3133

34+
# Important Environment Variables
35+
# ---
36+
# $ghprbActualCommit
37+
#+ This is the hash of the most recent commit in the PR.
38+
#+ The merge-base of this and master is the commit from which the PR was branched.
39+
# $sha1
40+
#+ If the patch merges cleanly, this is a reference to the merge commit hash
41+
#+ (e.g. "origin/pr/2606/merge").
42+
#+ If the patch does not merge cleanly, it is equal to $ghprbActualCommit.
43+
#+ The merge-base of this and master in the case of a clean merge is the most recent commit
44+
#+ against master.
45+
3246
COMMIT_URL="https://github.com/apache/spark/commit/${ghprbActualCommit}"
3347
# GitHub doesn't auto-link short hashes when submitted via the API, unfortunately. :(
3448
SHORT_COMMIT_HASH="${ghprbActualCommit:0:7}"
@@ -84,42 +98,46 @@ function post_message () {
8498
fi
8599
}
86100

101+
102+
# We diff master...$ghprbActualCommit because that gets us changes introduced in the PR
103+
#+ and not anything else added to master since the PR was branched.
104+
87105
# check PR merge-ability and check for new public classes
88106
{
89107
if [ "$sha1" == "$ghprbActualCommit" ]; then
90-
merge_note=" * This patch **does not** merge cleanly!"
108+
merge_note=" * This patch **does not merge cleanly**."
91109
else
92110
merge_note=" * This patch merges cleanly."
111+
fi
112+
113+
source_files=$(
114+
git diff master...$ghprbActualCommit --name-only `# diff patch against master from branch point` \
115+
| grep -v -e "\/test" `# ignore files in test directories` \
116+
| grep -e "\.py$" -e "\.java$" -e "\.scala$" `# include only code files` \
117+
| tr "\n" " "
118+
)
119+
new_public_classes=$(
120+
git diff master...$ghprbActualCommit ${source_files} `# diff patch against master from branch point` \
121+
| grep "^\+" `# filter in only added lines` \
122+
| sed -r -e "s/^\+//g" `# remove the leading +` \
123+
| grep -e "trait " -e "class " `# filter in lines with these key words` \
124+
| grep -e "{" -e "(" `# filter in lines with these key words, too` \
125+
| grep -v -e "\@\@" -e "private" `# exclude lines with these words` \
126+
| grep -v -e "^// " -e "^/\*" -e "^ \* " `# exclude comment lines` \
127+
| sed -r -e "s/\{.*//g" `# remove from the { onwards` \
128+
| sed -r -e "s/\}//g" `# just in case, remove }; they mess the JSON` \
129+
| sed -r -e "s/\"/\\\\\"/g" `# escape double quotes; they mess the JSON` \
130+
| sed -r -e "s/^(.*)$/\`\1\`/g" `# surround with backticks for style` \
131+
| sed -r -e "s/^/ \* /g" `# prepend ' *' to start of line` \
132+
| sed -r -e "s/$/\\\n/g" `# append newline to end of line` \
133+
| tr -d "\n" `# remove actual LF characters`
134+
)
93135

94-
source_files=$(
95-
git diff master... --name-only `# diff patch against master from branch point` \
96-
| grep -v -e "\/test" `# ignore files in test directories` \
97-
| grep -e "\.py$" -e "\.java$" -e "\.scala$" `# include only code files` \
98-
| tr "\n" " "
99-
)
100-
new_public_classes=$(
101-
git diff master... ${source_files} `# diff patch against master from branch point` \
102-
| grep "^\+" `# filter in only added lines` \
103-
| sed -r -e "s/^\+//g" `# remove the leading +` \
104-
| grep -e "trait " -e "class " `# filter in lines with these key words` \
105-
| grep -e "{" -e "(" `# filter in lines with these key words, too` \
106-
| grep -v -e "\@\@" -e "private" `# exclude lines with these words` \
107-
| grep -v -e "^// " -e "^/\*" -e "^ \* " `# exclude comment lines` \
108-
| sed -r -e "s/\{.*//g" `# remove from the { onwards` \
109-
| sed -r -e "s/\}//g" `# just in case, remove }; they mess the JSON` \
110-
| sed -r -e "s/\"/\\\\\"/g" `# escape double quotes; they mess the JSON` \
111-
| sed -r -e "s/^(.*)$/\`\1\`/g" `# surround with backticks for style` \
112-
| sed -r -e "s/^/ \* /g" `# prepend ' *' to start of line` \
113-
| sed -r -e "s/$/\\\n/g" `# append newline to end of line` \
114-
| tr -d "\n" `# remove actual LF characters`
115-
)
116-
117-
if [ "$new_public_classes" == "" ]; then
118-
public_classes_note=" * This patch adds no public classes."
119-
else
120-
public_classes_note=" * This patch adds the following public classes _(experimental)_:"
121-
public_classes_note="${public_classes_note}\n${new_public_classes}"
122-
fi
136+
if [ -z "$new_public_classes" ]; then
137+
public_classes_note=" * This patch adds no public classes."
138+
else
139+
public_classes_note=" * This patch adds the following public classes _(experimental)_:"
140+
public_classes_note="${public_classes_note}\n${new_public_classes}"
123141
fi
124142
}
125143

@@ -147,12 +165,30 @@ function post_message () {
147165

148166
post_message "$fail_message"
149167
exit $test_result
168+
elif [ "$test_result" -eq "0" ]; then
169+
test_result_note=" * This patch **passes all tests**."
150170
else
151-
if [ "$test_result" -eq "0" ]; then
152-
test_result_note=" * This patch **passes** unit tests."
171+
if [ "$test_result" -eq "$BLOCK_GENERAL" ]; then
172+
failing_test="some tests"
173+
elif [ "$test_result" -eq "$BLOCK_RAT" ]; then
174+
failing_test="RAT tests"
175+
elif [ "$test_result" -eq "$BLOCK_SCALA_STYLE" ]; then
176+
failing_test="Scala style tests"
177+
elif [ "$test_result" -eq "$BLOCK_PYTHON_STYLE" ]; then
178+
failing_test="Python style tests"
179+
elif [ "$test_result" -eq "$BLOCK_BUILD" ]; then
180+
failing_test="to build"
181+
elif [ "$test_result" -eq "$BLOCK_SPARK_UNIT_TESTS" ]; then
182+
failing_test="Spark unit tests"
183+
elif [ "$test_result" -eq "$BLOCK_PYSPARK_UNIT_TESTS" ]; then
184+
failing_test="PySpark unit tests"
185+
elif [ "$test_result" -eq "$BLOCK_MIMA" ]; then
186+
failing_test="MiMa tests"
153187
else
154-
test_result_note=" * This patch **fails** unit tests."
188+
failing_test="some tests"
155189
fi
190+
191+
test_result_note=" * This patch **fails $failing_test**."
156192
fi
157193
}
158194

mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
7575
def predict(testData: Vector): Double = {
7676
predictPoint(testData, weights, intercept)
7777
}
78+
79+
override def toString() = "(weights=%s, intercept=%s)".format(weights, intercept)
7880
}
7981

8082
/**

python/docs/modules.rst

Lines changed: 0 additions & 7 deletions
This file was deleted.

python/pyspark/context.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,7 @@ def sequenceFile(self, path, keyClass=None, valueClass=None, keyConverter=None,
410410
Read a Hadoop SequenceFile with arbitrary key and value Writable class from HDFS,
411411
a local file system (available on all nodes), or any Hadoop-supported file system URI.
412412
The mechanism is as follows:
413+
413414
1. A Java RDD is created from the SequenceFile or other InputFormat, and the key
414415
and value Writable classes
415416
2. Serialization is attempted via Pyrolite pickling

python/pyspark/mllib/classification.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,14 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
8989
@param regParam: The regularizer parameter (default: 1.0).
9090
@param regType: The type of regularizer used for training
9191
our model.
92-
Allowed values: "l1" for using L1Updater,
93-
"l2" for using
94-
SquaredL2Updater,
95-
"none" for no regularizer.
96-
(default: "none")
92+
93+
:Allowed values:
94+
- "l1" for using L1Updater
95+
- "l2" for using SquaredL2Updater
96+
- "none" for no regularizer
97+
98+
(default: "none")
99+
97100
@param intercept: Boolean parameter which indicates the use
98101
or not of the augmented representation for
99102
training data (i.e. whether bias features
@@ -158,11 +161,14 @@ def train(cls, data, iterations=100, step=1.0, regParam=1.0,
158161
@param initialWeights: The initial weights (default: None).
159162
@param regType: The type of regularizer used for training
160163
our model.
161-
Allowed values: "l1" for using L1Updater,
162-
"l2" for using
163-
SquaredL2Updater,
164-
"none" for no regularizer.
165-
(default: "none")
164+
165+
:Allowed values:
166+
- "l1" for using L1Updater
167+
- "l2" for using SquaredL2Updater,
168+
- "none" for no regularizer.
169+
170+
(default: "none")
171+
166172
@param intercept: Boolean parameter which indicates the use
167173
or not of the augmented representation for
168174
training data (i.e. whether bias features

python/pyspark/mllib/regression.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from pyspark.mllib.linalg import SparseVector, _convert_to_vector
2323
from pyspark.serializers import PickleSerializer, AutoBatchedSerializer
2424

25-
__all__ = ['LabeledPoint', 'LinearModel', 'LinearRegressionModel', 'RidgeRegressionModel'
25+
__all__ = ['LabeledPoint', 'LinearModel', 'LinearRegressionModel', 'RidgeRegressionModel',
2626
'LinearRegressionWithSGD', 'LassoWithSGD', 'RidgeRegressionWithSGD']
2727

2828

@@ -66,6 +66,9 @@ def weights(self):
6666
def intercept(self):
6767
return self._intercept
6868

69+
def __repr__(self):
70+
return "(weights=%s, intercept=%s)" % (self._coeff, self._intercept)
71+
6972

7073
class LinearRegressionModelBase(LinearModel):
7174

@@ -152,11 +155,14 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
152155
@param regParam: The regularizer parameter (default: 1.0).
153156
@param regType: The type of regularizer used for training
154157
our model.
155-
Allowed values: "l1" for using L1Updater,
156-
"l2" for using
157-
SquaredL2Updater,
158-
"none" for no regularizer.
159-
(default: "none")
158+
159+
:Allowed values:
160+
- "l1" for using L1Updater,
161+
- "l2" for using SquaredL2Updater,
162+
- "none" for no regularizer.
163+
164+
(default: "none")
165+
160166
@param intercept: Boolean parameter which indicates the use
161167
or not of the augmented representation for
162168
training data (i.e. whether bias features

python/pyspark/mllib/tests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
from pyspark.serializers import PickleSerializer
3333
from pyspark.mllib.linalg import Vector, SparseVector, DenseVector, _convert_to_vector
3434
from pyspark.mllib.regression import LabeledPoint
35-
from pyspark.tests import PySparkTestCase
35+
from pyspark.tests import ReusedPySparkTestCase as PySparkTestCase
3636

3737

3838
_have_scipy = False

python/pyspark/mllib/tree.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def __del__(self):
4848
def predict(self, x):
4949
"""
5050
Predict the label of one or more examples.
51+
5152
:param x: Data point (feature vector),
5253
or an RDD of data points (feature vectors).
5354
"""

0 commit comments

Comments
 (0)