Skip to content

Commit 7cc34ce

Browse files
author
Andrew Or
committed
Merge branch 'master' of github.com:apache/spark into dag-viz-skipped
2 parents c121fa2 + 48fc38f commit 7cc34ce

File tree

88 files changed

+854
-467
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+854
-467
lines changed

core/src/main/resources/org/apache/spark/ui/static/streaming-page.js

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,16 @@ function drawTimeline(id, data, minX, maxX, minY, maxY, unitY, batchInterval) {
9898
var x = d3.scale.linear().domain([minX, maxX]).range([0, width]);
9999
var y = d3.scale.linear().domain([minY, maxY]).range([height, 0]);
100100

101-
var xAxis = d3.svg.axis().scale(x).orient("bottom").tickFormat(function(d) { return timeFormat[d]; });
101+
var xAxis = d3.svg.axis().scale(x).orient("bottom").tickFormat(function(d) {
102+
var formattedDate = timeFormat[d];
103+
var dotIndex = formattedDate.indexOf('.');
104+
if (dotIndex >= 0) {
105+
// Remove milliseconds
106+
return formattedDate.substring(0, dotIndex);
107+
} else {
108+
return formattedDate;
109+
}
110+
});
102111
var formatYValue = d3.format(",.2f");
103112
var yAxis = d3.svg.axis().scale(y).orient("left").ticks(5).tickFormat(formatYValue);
104113

@@ -252,28 +261,16 @@ function drawHistogram(id, values, minY, maxY, unitY, batchInterval) {
252261
}
253262

254263
$(function() {
255-
function getParameterFromURL(param)
256-
{
257-
var parameters = window.location.search.substring(1); // Remove "?"
258-
var keyValues = parameters.split('&');
259-
for (var i = 0; i < keyValues.length; i++)
260-
{
261-
var paramKeyValue = keyValues[i].split('=');
262-
if (paramKeyValue[0] == param)
263-
{
264-
return paramKeyValue[1];
265-
}
266-
}
267-
}
268-
269-
var status = getParameterFromURL("show-streams-detail") == "true";
264+
var status = window.localStorage && window.localStorage.getItem("show-streams-detail") == "true";
270265

271266
$("span.expand-input-rate").click(function() {
272267
status = !status;
273268
$("#inputs-table").toggle('collapsed');
274269
// Toggle the class of the arrow between open and closed
275270
$(this).find('.expand-input-rate-arrow').toggleClass('arrow-open').toggleClass('arrow-closed');
276-
window.history.pushState('', document.title, window.location.pathname + '?show-streams-detail=' + status);
271+
if (window.localStorage) {
272+
window.localStorage.setItem("show-streams-detail", "" + status);
273+
}
277274
});
278275

279276
if (status) {

core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ private[spark] class MasterSource(val master: Master) extends Source {
3030
override def getValue: Int = master.workers.size
3131
})
3232

33+
// Gauge for alive worker numbers in cluster
34+
metricRegistry.register(MetricRegistry.name("aliveWorkers"), new Gauge[Int]{
35+
override def getValue: Int = master.workers.filter(_.state == WorkerState.ALIVE).size
36+
})
37+
3338
// Gauge for application numbers in cluster
3439
metricRegistry.register(MetricRegistry.name("apps"), new Gauge[Int] {
3540
override def getValue: Int = master.apps.size

core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,17 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
7777

7878
<div class="row-fluid">
7979
<div class="span12">
80-
<h4> Data Distribution on {rddStorageInfo.dataDistribution.size} Executors </h4>
80+
<h4>
81+
Data Distribution on {rddStorageInfo.dataDistribution.map(_.size).getOrElse(0)}
82+
Executors
83+
</h4>
8184
{workerTable}
8285
</div>
8386
</div>
8487

8588
<div class="row-fluid">
8689
<div class="span12">
87-
<h4> {rddStorageInfo.partitions.size} Partitions </h4>
90+
<h4> {rddStorageInfo.partitions.map(_.size).getOrElse(0)} Partitions </h4>
8891
{blockTable}
8992
</div>
9093
</div>;

dev/create-release/create-release.sh

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -118,14 +118,14 @@ if [[ ! "$@" =~ --skip-publish ]]; then
118118

119119
rm -rf $SPARK_REPO
120120

121-
build/mvn -DskipTests -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
122-
-Pyarn -Phive -Phive-thriftserver -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
121+
build/mvn -DskipTests -Pyarn -Phive \
122+
-Phive-thriftserver -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
123123
clean install
124124

125125
./dev/change-version-to-2.11.sh
126126

127-
build/mvn -DskipTests -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
128-
-Dscala-2.11 -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
127+
build/mvn -DskipTests -Pyarn -Phive \
128+
-Dscala-2.11 -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
129129
clean install
130130

131131
./dev/change-version-to-2.10.sh
@@ -228,9 +228,9 @@ if [[ ! "$@" =~ --skip-package ]]; then
228228

229229
# We increment the Zinc port each time to avoid OOM's and other craziness if multiple builds
230230
# share the same Zinc server.
231-
make_binary_release "hadoop1" "-Phive -Phive-thriftserver -Dhadoop.version=1.0.4" "3030" &
232-
make_binary_release "hadoop1-scala2.11" "-Phive -Dscala-2.11" "3031" &
233-
make_binary_release "cdh4" "-Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0" "3032" &
231+
make_binary_release "hadoop1" "-Phadoop-1 -Phive -Phive-thriftserver" "3030" &
232+
make_binary_release "hadoop1-scala2.11" "-Phadoop-1 -Phive -Dscala-2.11" "3031" &
233+
make_binary_release "cdh4" "-Phadoop-1 -Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0" "3032" &
234234
make_binary_release "hadoop2.3" "-Phadoop-2.3 -Phive -Phive-thriftserver -Pyarn" "3033" &
235235
make_binary_release "hadoop2.4" "-Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn" "3034" &
236236
make_binary_release "mapr3" "-Pmapr3 -Phive -Phive-thriftserver" "3035" &

dev/github_jira_sync.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333

3434
# User facing configs
3535
GITHUB_API_BASE = os.environ.get("GITHUB_API_BASE", "https://api.github.com/repos/apache/spark")
36+
JIRA_PROJECT_NAME = os.environ.get("JIRA_PROJECT_NAME", "SPARK")
3637
JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
3738
JIRA_USERNAME = os.environ.get("JIRA_USERNAME", "apachespark")
3839
JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", "XXX")
@@ -68,7 +69,7 @@ def get_jira_prs():
6869
page_json = get_json(page)
6970

7071
for pull in page_json:
71-
jiras = re.findall("SPARK-[0-9]{4,5}", pull['title'])
72+
jiras = re.findall(JIRA_PROJECT_NAME + "-[0-9]{4,5}", pull['title'])
7273
for jira in jiras:
7374
result = result + [(jira, pull)]
7475

dev/run-tests

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@ function handle_error () {
4040
{
4141
if [ -n "$AMPLAB_JENKINS_BUILD_PROFILE" ]; then
4242
if [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop1.0" ]; then
43-
export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=1.0.4"
43+
export SBT_MAVEN_PROFILES_ARGS="-Phadoop-1 -Dhadoop.version=1.0.4"
4444
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.0" ]; then
45-
export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=2.0.0-mr1-cdh4.1.1"
45+
export SBT_MAVEN_PROFILES_ARGS="-Phadoop-1 -Dhadoop.version=2.0.0-mr1-cdh4.1.1"
4646
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.2" ]; then
47-
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0"
47+
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.2"
4848
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.3" ]; then
4949
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
5050
fi

dev/scalastyle

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
echo -e "q\n" | build/sbt -Phive -Phive-thriftserver scalastyle > scalastyle.txt
2121
echo -e "q\n" | build/sbt -Phive -Phive-thriftserver test:scalastyle >> scalastyle.txt
2222
# Check style with YARN built too
23-
echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 scalastyle >> scalastyle.txt
24-
echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 test:scalastyle >> scalastyle.txt
23+
echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 scalastyle >> scalastyle.txt
24+
echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 test:scalastyle >> scalastyle.txt
2525

2626
ERRORS=$(cat scalastyle.txt | awk '{if($1~/error/)print}')
2727
rm scalastyle.txt

docs/building-spark.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,14 @@ You can fix this by setting the `MAVEN_OPTS` variable as discussed before.
5959

6060
# Specifying the Hadoop Version
6161

62-
Because HDFS is not protocol-compatible across versions, if you want to read from HDFS, you'll need to build Spark against the specific HDFS version in your environment. You can do this through the "hadoop.version" property. If unset, Spark will build against Hadoop 1.0.4 by default. Note that certain build profiles are required for particular Hadoop versions:
62+
Because HDFS is not protocol-compatible across versions, if you want to read from HDFS, you'll need to build Spark against the specific HDFS version in your environment. You can do this through the "hadoop.version" property. If unset, Spark will build against Hadoop 2.2.0 by default. Note that certain build profiles are required for particular Hadoop versions:
6363

6464
<table class="table">
6565
<thead>
6666
<tr><th>Hadoop version</th><th>Profile required</th></tr>
6767
</thead>
6868
<tbody>
69-
<tr><td>1.x to 2.1.x</td><td>(none)</td></tr>
69+
<tr><td>1.x to 2.1.x</td><td>hadoop-1</td></tr>
7070
<tr><td>2.2.x</td><td>hadoop-2.2</td></tr>
7171
<tr><td>2.3.x</td><td>hadoop-2.3</td></tr>
7272
<tr><td>2.4.x</td><td>hadoop-2.4</td></tr>
@@ -77,19 +77,20 @@ For Apache Hadoop versions 1.x, Cloudera CDH "mr1" distributions, and other Hado
7777

7878
{% highlight bash %}
7979
# Apache Hadoop 1.2.1
80-
mvn -Dhadoop.version=1.2.1 -DskipTests clean package
80+
mvn -Dhadoop.version=1.2.1 -Phadoop-1 -DskipTests clean package
8181

8282
# Cloudera CDH 4.2.0 with MapReduce v1
83-
mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -DskipTests clean package
83+
mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -Phadoop-1 -DskipTests clean package
8484
{% endhighlight %}
8585

8686
You can enable the "yarn" profile and optionally set the "yarn.version" property if it is different from "hadoop.version". Spark only supports YARN versions 2.2.0 and later.
8787

8888
Examples:
8989

9090
{% highlight bash %}
91+
9192
# Apache Hadoop 2.2.X
92-
mvn -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 -DskipTests clean package
93+
mvn -Pyarn -Phadoop-2.2 -DskipTests clean package
9394

9495
# Apache Hadoop 2.3.X
9596
mvn -Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0 -DskipTests clean package

docs/hadoop-third-party-distributions.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ property. For certain versions, you will need to specify additional profiles. Fo
1414
see the guide on [building with maven](building-spark.html#specifying-the-hadoop-version):
1515

1616
mvn -Dhadoop.version=1.0.4 -DskipTests clean package
17-
mvn -Phadoop-2.2 -Dhadoop.version=2.2.0 -DskipTests clean package
17+
mvn -Phadoop-2.3 -Dhadoop.version=2.3.0 -DskipTests clean package
1818

1919
The table below lists the corresponding `hadoop.version` code for each CDH/HDP release. Note that
2020
some Hadoop releases are binary compatible across client versions. This means the pre-built Spark

examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.apache.spark.ml.classification.ClassificationModel;
2929
import org.apache.spark.ml.param.IntParam;
3030
import org.apache.spark.ml.param.ParamMap;
31+
import org.apache.spark.ml.util.Identifiable$;
3132
import org.apache.spark.mllib.linalg.BLAS;
3233
import org.apache.spark.mllib.linalg.Vector;
3334
import org.apache.spark.mllib.linalg.Vectors;
@@ -103,7 +104,23 @@ public static void main(String[] args) throws Exception {
103104
* However, this should still compile and run successfully.
104105
*/
105106
class MyJavaLogisticRegression
106-
extends Classifier<Vector, MyJavaLogisticRegression, MyJavaLogisticRegressionModel> {
107+
extends Classifier<Vector, MyJavaLogisticRegression, MyJavaLogisticRegressionModel> {
108+
109+
public MyJavaLogisticRegression() {
110+
init();
111+
}
112+
113+
public MyJavaLogisticRegression(String uid) {
114+
this.uid_ = uid;
115+
init();
116+
}
117+
118+
private String uid_ = Identifiable$.MODULE$.randomUID("myJavaLogReg");
119+
120+
@Override
121+
public String uid() {
122+
return uid_;
123+
}
107124

108125
/**
109126
* Param for max number of iterations
@@ -117,7 +134,7 @@ class MyJavaLogisticRegression
117134

118135
int getMaxIter() { return (Integer) getOrDefault(maxIter); }
119136

120-
public MyJavaLogisticRegression() {
137+
private void init() {
121138
setMaxIter(100);
122139
}
123140

@@ -137,7 +154,7 @@ public MyJavaLogisticRegressionModel train(DataFrame dataset) {
137154
Vector weights = Vectors.zeros(numFeatures); // Learning would happen here.
138155

139156
// Create a model, and return it.
140-
return new MyJavaLogisticRegressionModel(this, weights);
157+
return new MyJavaLogisticRegressionModel(uid(), weights).setParent(this);
141158
}
142159
}
143160

@@ -149,17 +166,21 @@ public MyJavaLogisticRegressionModel train(DataFrame dataset) {
149166
* However, this should still compile and run successfully.
150167
*/
151168
class MyJavaLogisticRegressionModel
152-
extends ClassificationModel<Vector, MyJavaLogisticRegressionModel> {
153-
154-
private MyJavaLogisticRegression parent_;
155-
public MyJavaLogisticRegression parent() { return parent_; }
169+
extends ClassificationModel<Vector, MyJavaLogisticRegressionModel> {
156170

157171
private Vector weights_;
158172
public Vector weights() { return weights_; }
159173

160-
public MyJavaLogisticRegressionModel(MyJavaLogisticRegression parent_, Vector weights_) {
161-
this.parent_ = parent_;
162-
this.weights_ = weights_;
174+
public MyJavaLogisticRegressionModel(String uid, Vector weights) {
175+
this.uid_ = uid;
176+
this.weights_ = weights;
177+
}
178+
179+
private String uid_ = Identifiable$.MODULE$.randomUID("myJavaLogReg");
180+
181+
@Override
182+
public String uid() {
183+
return uid_;
163184
}
164185

165186
// This uses the default implementation of transform(), which reads column "features" and outputs
@@ -204,6 +225,6 @@ public Vector predictRaw(Vector features) {
204225
*/
205226
@Override
206227
public MyJavaLogisticRegressionModel copy(ParamMap extra) {
207-
return copyValues(new MyJavaLogisticRegressionModel(parent_, weights_), extra);
228+
return copyValues(new MyJavaLogisticRegressionModel(uid(), weights_), extra);
208229
}
209230
}

0 commit comments

Comments
 (0)