Skip to content

Commit 098e8c5

Browse files
committed
merged master
2 parents e006f9d + a6b02fb commit 098e8c5

File tree

161 files changed

+2635
-1778
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

161 files changed

+2635
-1778
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ unit-tests.log
4949
/lib/
5050
rat-results.txt
5151
scalastyle.txt
52+
conf/*.conf
5253

5354
# For Hive
5455
metastore_db/

README.md

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,17 +39,22 @@ And run the following command, which should also return 1000:
3939
## Example Programs
4040

4141
Spark also comes with several sample programs in the `examples` directory.
42-
To run one of them, use `./bin/run-example <class> <params>`. For example:
42+
To run one of them, use `./bin/run-example <class> [params]`. For example:
4343

44-
./bin/run-example org.apache.spark.examples.SparkLR local[2]
44+
./bin/run-example org.apache.spark.examples.SparkLR
4545

46-
will run the Logistic Regression example locally on 2 CPUs.
46+
will run the Logistic Regression example locally.
4747

48-
Each of the example programs prints usage help if no params are given.
48+
You can set the MASTER environment variable when running examples to submit
49+
examples to a cluster. This can be a mesos:// or spark:// URL,
50+
"yarn-cluster" or "yarn-client" to run on YARN, and "local" to run
51+
locally with one thread, or "local[N]" to run locally with N threads. You
52+
can also use an abbreviated class name if the class is in the `examples`
53+
package. For instance:
4954

50-
All of the Spark samples take a `<master>` parameter that is the cluster URL
51-
to connect to. This can be a mesos:// or spark:// URL, or "local" to run
52-
locally with one thread, or "local[N]" to run locally with N threads.
55+
MASTER=spark://host:7077 ./bin/run-example SparkPi
56+
57+
Many of the example programs print usage help if no params are given.
5358

5459
## Running Tests
5560

assembly/pom.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@
9696
<filter>
9797
<artifact>*:*</artifact>
9898
<excludes>
99+
<exclude>org.datanucleus:*</exclude>
99100
<exclude>META-INF/*.SF</exclude>
100101
<exclude>META-INF/*.DSA</exclude>
101102
<exclude>META-INF/*.RSA</exclude>

bin/pyspark

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ if [ ! -f "$FWDIR/RELEASE" ]; then
3131
ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null
3232
if [[ $? != 0 ]]; then
3333
echo "Failed to find Spark assembly in $FWDIR/assembly/target" >&2
34-
echo "You need to build Spark with sbt/sbt assembly before running this program" >&2
34+
echo "You need to build Spark before running this program" >&2
3535
exit 1
3636
fi
3737
fi

bin/run-example

Lines changed: 18 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -17,28 +17,10 @@
1717
# limitations under the License.
1818
#
1919

20-
cygwin=false
21-
case "`uname`" in
22-
CYGWIN*) cygwin=true;;
23-
esac
24-
2520
SCALA_VERSION=2.10
2621

27-
# Figure out where the Scala framework is installed
2822
FWDIR="$(cd `dirname $0`/..; pwd)"
29-
30-
# Export this as SPARK_HOME
3123
export SPARK_HOME="$FWDIR"
32-
33-
. $FWDIR/bin/load-spark-env.sh
34-
35-
if [ -z "$1" ]; then
36-
echo "Usage: run-example <example-class> [<args>]" >&2
37-
exit 1
38-
fi
39-
40-
# Figure out the JAR file that our examples were packaged into. This includes a bit of a hack
41-
# to avoid the -sources and -doc packages that are built by publish-local.
4224
EXAMPLES_DIR="$FWDIR"/examples
4325

4426
if [ -f "$FWDIR/RELEASE" ]; then
@@ -49,46 +31,29 @@ fi
4931

5032
if [[ -z $SPARK_EXAMPLES_JAR ]]; then
5133
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" >&2
52-
echo "You need to build Spark with sbt/sbt assembly before running this program" >&2
34+
echo "You need to build Spark before running this program" >&2
5335
exit 1
5436
fi
5537

38+
EXAMPLE_MASTER=${MASTER:-"local[*]"}
5639

57-
# Since the examples JAR ideally shouldn't include spark-core (that dependency should be
58-
# "provided"), also add our standard Spark classpath, built using compute-classpath.sh.
59-
CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
60-
CLASSPATH="$SPARK_EXAMPLES_JAR:$CLASSPATH"
61-
62-
if $cygwin; then
63-
CLASSPATH=`cygpath -wp $CLASSPATH`
64-
export SPARK_EXAMPLES_JAR=`cygpath -w $SPARK_EXAMPLES_JAR`
65-
fi
66-
67-
# Find java binary
68-
if [ -n "${JAVA_HOME}" ]; then
69-
RUNNER="${JAVA_HOME}/bin/java"
70-
else
71-
if [ `command -v java` ]; then
72-
RUNNER="java"
73-
else
74-
echo "JAVA_HOME is not set" >&2
75-
exit 1
76-
fi
77-
fi
78-
79-
# Set JAVA_OPTS to be able to load native libraries and to set heap size
80-
JAVA_OPTS="$SPARK_JAVA_OPTS"
81-
# Load extra JAVA_OPTS from conf/java-opts, if it exists
82-
if [ -e "$FWDIR/conf/java-opts" ] ; then
83-
JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
40+
if [ -n "$1" ]; then
41+
EXAMPLE_CLASS="$1"
42+
shift
43+
else
44+
echo "usage: ./bin/run-example <example-class> [example-args]"
45+
echo " - set MASTER=XX to use a specific master"
46+
echo " - can use abbreviated example class name (e.g. SparkPi, mllib.MovieLensALS)"
47+
echo
48+
exit -1
8449
fi
85-
export JAVA_OPTS
8650

87-
if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
88-
echo -n "Spark Command: "
89-
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
90-
echo "========================================"
91-
echo
51+
if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples* ]]; then
52+
EXAMPLE_CLASS="org.apache.spark.examples.$EXAMPLE_CLASS"
9253
fi
9354

94-
exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
55+
./bin/spark-submit \
56+
--master $EXAMPLE_MASTER \
57+
--class $EXAMPLE_CLASS \
58+
$SPARK_EXAMPLES_JAR \
59+
"$@"

bin/spark-class

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ if [ ! -f "$FWDIR/RELEASE" ]; then
114114
jars_list=$(ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/ | grep "spark-assembly.*hadoop.*.jar")
115115
if [ "$num_jars" -eq "0" ]; then
116116
echo "Failed to find Spark assembly in $FWDIR/assembly/target/scala-$SCALA_VERSION/" >&2
117-
echo "You need to build Spark with 'sbt/sbt assembly' before running this program." >&2
117+
echo "You need to build Spark before running this program." >&2
118118
exit 1
119119
fi
120120
if [ "$num_jars" -gt "1" ]; then

bin/spark-submit

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,10 @@ while (($#)); do
3535
shift
3636
done
3737

38-
if [ ! -z $DRIVER_MEMORY ] && [ ! -z $DEPLOY_MODE ] && [ $DEPLOY_MODE = "client" ]; then
39-
export SPARK_MEM=$DRIVER_MEMORY
38+
DEPLOY_MODE=${DEPLOY_MODE:-"client"}
39+
40+
if [ -n "$DRIVER_MEMORY" ] && [ $DEPLOY_MODE == "client" ]; then
41+
export SPARK_DRIVER_MEMORY=$DRIVER_MEMORY
4042
fi
4143

4244
$SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"

core/src/main/scala/org/apache/spark/Accumulators.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import java.io.{ObjectInputStream, Serializable}
2121

2222
import scala.collection.generic.Growable
2323
import scala.collection.mutable.Map
24+
import scala.reflect.ClassTag
2425

2526
import org.apache.spark.serializer.JavaSerializer
2627

@@ -164,9 +165,9 @@ trait AccumulableParam[R, T] extends Serializable {
164165
def zero(initialValue: R): R
165166
}
166167

167-
private[spark]
168-
class GrowableAccumulableParam[R <% Growable[T] with TraversableOnce[T] with Serializable, T]
169-
extends AccumulableParam[R,T] {
168+
private[spark] class
169+
GrowableAccumulableParam[R <% Growable[T] with TraversableOnce[T] with Serializable: ClassTag, T]
170+
extends AccumulableParam[R, T] {
170171

171172
def addAccumulator(growable: R, elem: T): R = {
172173
growable += elem

core/src/main/scala/org/apache/spark/SparkContext.scala

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,10 @@ class SparkContext(config: SparkConf) extends Logging {
7474
* be generated using [[org.apache.spark.scheduler.InputFormatInfo.computePreferredLocations]]
7575
* from a list of input files or InputFormats for the application.
7676
*/
77-
@DeveloperApi
78-
def this(config: SparkConf, preferredNodeLocationData: Map[String, Set[SplitInfo]]) = {
79-
this(config)
80-
this.preferredNodeLocationData = preferredNodeLocationData
77+
@DeveloperApi
78+
def this(config: SparkConf, preferredNodeLocationData: Map[String, Set[SplitInfo]]) = {
79+
this(config)
80+
this.preferredNodeLocationData = preferredNodeLocationData
8181
}
8282

8383
/**
@@ -756,7 +756,7 @@ class SparkContext(config: SparkConf) extends Logging {
756756
* Growable and TraversableOnce are the standard APIs that guarantee += and ++=, implemented by
757757
* standard mutable collections. So you can use this with mutable Map, Set, etc.
758758
*/
759-
def accumulableCollection[R <% Growable[T] with TraversableOnce[T] with Serializable, T]
759+
def accumulableCollection[R <% Growable[T] with TraversableOnce[T] with Serializable: ClassTag, T]
760760
(initialValue: R): Accumulable[R, T] = {
761761
val param = new GrowableAccumulableParam[R,T]
762762
new Accumulable(initialValue, param)
@@ -767,7 +767,7 @@ class SparkContext(config: SparkConf) extends Logging {
767767
* [[org.apache.spark.broadcast.Broadcast]] object for reading it in distributed functions.
768768
* The variable will be sent to each cluster only once.
769769
*/
770-
def broadcast[T](value: T): Broadcast[T] = {
770+
def broadcast[T: ClassTag](value: T): Broadcast[T] = {
771771
val bc = env.broadcastManager.newBroadcast[T](value, isLocal)
772772
cleaner.foreach(_.registerBroadcastForCleanup(bc))
773773
bc
@@ -917,7 +917,7 @@ class SparkContext(config: SparkConf) extends Logging {
917917
if (SparkHadoopUtil.get.isYarnMode() &&
918918
(master == "yarn-standalone" || master == "yarn-cluster")) {
919919
// In order for this to work in yarn-cluster mode the user must specify the
920-
// --addjars option to the client to upload the file into the distributed cache
920+
// --addJars option to the client to upload the file into the distributed cache
921921
// of the AM to make it show up in the current working directory.
922922
val fileName = new Path(uri.getPath).getName()
923923
try {

core/src/main/scala/org/apache/spark/SparkEnv.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,8 +281,7 @@ object SparkEnv extends Logging {
281281
val jvmInformation = Seq(
282282
("Java Version", "%s (%s)".format(Properties.javaVersion, Properties.javaVendor)),
283283
("Java Home", Properties.javaHome),
284-
("Scala Version", Properties.versionString),
285-
("Scala Home", Properties.scalaHome)
284+
("Scala Version", Properties.versionString)
286285
).sorted
287286

288287
// Spark properties

0 commit comments

Comments
 (0)