Skip to content

Commit 25416dc

Browse files
author
Andrew Or
committed
Merge branch 'master' of github.com:apache/spark into dag-viz-streaming
Conflicts: streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala
2 parents 9113183 + 48fc38f commit 25416dc

File tree

20 files changed

+214
-99
lines changed

20 files changed

+214
-99
lines changed

core/src/main/resources/org/apache/spark/ui/static/streaming-page.js

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,16 @@ function drawTimeline(id, data, minX, maxX, minY, maxY, unitY, batchInterval) {
9898
var x = d3.scale.linear().domain([minX, maxX]).range([0, width]);
9999
var y = d3.scale.linear().domain([minY, maxY]).range([height, 0]);
100100

101-
var xAxis = d3.svg.axis().scale(x).orient("bottom").tickFormat(function(d) { return timeFormat[d]; });
101+
var xAxis = d3.svg.axis().scale(x).orient("bottom").tickFormat(function(d) {
102+
var formattedDate = timeFormat[d];
103+
var dotIndex = formattedDate.indexOf('.');
104+
if (dotIndex >= 0) {
105+
// Remove milliseconds
106+
return formattedDate.substring(0, dotIndex);
107+
} else {
108+
return formattedDate;
109+
}
110+
});
102111
var formatYValue = d3.format(",.2f");
103112
var yAxis = d3.svg.axis().scale(y).orient("left").ticks(5).tickFormat(formatYValue);
104113

@@ -252,28 +261,16 @@ function drawHistogram(id, values, minY, maxY, unitY, batchInterval) {
252261
}
253262

254263
$(function() {
255-
function getParameterFromURL(param)
256-
{
257-
var parameters = window.location.search.substring(1); // Remove "?"
258-
var keyValues = parameters.split('&');
259-
for (var i = 0; i < keyValues.length; i++)
260-
{
261-
var paramKeyValue = keyValues[i].split('=');
262-
if (paramKeyValue[0] == param)
263-
{
264-
return paramKeyValue[1];
265-
}
266-
}
267-
}
268-
269-
var status = getParameterFromURL("show-streams-detail") == "true";
264+
var status = window.localStorage && window.localStorage.getItem("show-streams-detail") == "true";
270265

271266
$("span.expand-input-rate").click(function() {
272267
status = !status;
273268
$("#inputs-table").toggle('collapsed');
274269
// Toggle the class of the arrow between open and closed
275270
$(this).find('.expand-input-rate-arrow').toggleClass('arrow-open').toggleClass('arrow-closed');
276-
window.history.pushState('', document.title, window.location.pathname + '?show-streams-detail=' + status);
271+
if (window.localStorage) {
272+
window.localStorage.setItem("show-streams-detail", "" + status);
273+
}
277274
});
278275

279276
if (status) {

core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ private[spark] class MasterSource(val master: Master) extends Source {
3030
override def getValue: Int = master.workers.size
3131
})
3232

33+
// Gauge for alive worker numbers in cluster
34+
metricRegistry.register(MetricRegistry.name("aliveWorkers"), new Gauge[Int]{
35+
override def getValue: Int = master.workers.filter(_.state == WorkerState.ALIVE).size
36+
})
37+
3338
// Gauge for application numbers in cluster
3439
metricRegistry.register(MetricRegistry.name("apps"), new Gauge[Int] {
3540
override def getValue: Int = master.apps.size

core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,17 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
7777

7878
<div class="row-fluid">
7979
<div class="span12">
80-
<h4> Data Distribution on {rddStorageInfo.dataDistribution.size} Executors </h4>
80+
<h4>
81+
Data Distribution on {rddStorageInfo.dataDistribution.map(_.size).getOrElse(0)}
82+
Executors
83+
</h4>
8184
{workerTable}
8285
</div>
8386
</div>
8487

8588
<div class="row-fluid">
8689
<div class="span12">
87-
<h4> {rddStorageInfo.partitions.size} Partitions </h4>
90+
<h4> {rddStorageInfo.partitions.map(_.size).getOrElse(0)} Partitions </h4>
8891
{blockTable}
8992
</div>
9093
</div>;

dev/github_jira_sync.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333

3434
# User facing configs
3535
GITHUB_API_BASE = os.environ.get("GITHUB_API_BASE", "https://api.github.com/repos/apache/spark")
36+
JIRA_PROJECT_NAME = os.environ.get("JIRA_PROJECT_NAME", "SPARK")
3637
JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
3738
JIRA_USERNAME = os.environ.get("JIRA_USERNAME", "apachespark")
3839
JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", "XXX")
@@ -68,7 +69,7 @@ def get_jira_prs():
6869
page_json = get_json(page)
6970

7071
for pull in page_json:
71-
jiras = re.findall("SPARK-[0-9]{4,5}", pull['title'])
72+
jiras = re.findall(JIRA_PROJECT_NAME + "-[0-9]{4,5}", pull['title'])
7273
for jira in jiras:
7374
result = result + [(jira, pull)]
7475

python/docs/pyspark.ml.rst

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
pyspark.ml package
2-
=====================
2+
==================
33

44
ML Pipeline APIs
5-
--------------
5+
----------------
66

77
.. automodule:: pyspark.ml
88
:members:
99
:undoc-members:
1010
:inherited-members:
1111

1212
pyspark.ml.param module
13-
-------------------------
13+
-----------------------
1414

1515
.. automodule:: pyspark.ml.param
1616
:members:
@@ -34,31 +34,31 @@ pyspark.ml.classification module
3434
:inherited-members:
3535

3636
pyspark.ml.recommendation module
37-
-------------------------
37+
--------------------------------
3838

3939
.. automodule:: pyspark.ml.recommendation
4040
:members:
4141
:undoc-members:
4242
:inherited-members:
4343

4444
pyspark.ml.regression module
45-
-------------------------
45+
----------------------------
4646

4747
.. automodule:: pyspark.ml.regression
4848
:members:
4949
:undoc-members:
5050
:inherited-members:
5151

5252
pyspark.ml.tuning module
53-
--------------------------------
53+
------------------------
5454

5555
.. automodule:: pyspark.ml.tuning
5656
:members:
5757
:undoc-members:
5858
:inherited-members:
5959

6060
pyspark.ml.evaluation module
61-
--------------------------------
61+
----------------------------
6262

6363
.. automodule:: pyspark.ml.evaluation
6464
:members:

python/pyspark/ml/classification.py

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
4343
>>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0))]).toDF()
4444
>>> model.transform(test0).head().prediction
4545
0.0
46+
>>> model.weights
47+
DenseVector([5.5...])
48+
>>> model.intercept
49+
-2.68...
4650
>>> test1 = sc.parallelize([Row(features=Vectors.sparse(1, [0], [1.0]))]).toDF()
4751
>>> model.transform(test1).head().prediction
4852
1.0
@@ -67,7 +71,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
6771
threshold=0.5, probabilityCol="probability"):
6872
"""
6973
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
70-
maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
74+
maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
7175
threshold=0.5, probabilityCol="probability")
7276
"""
7377
super(LogisticRegression, self).__init__()
@@ -92,8 +96,8 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
9296
maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
9397
threshold=0.5, probabilityCol="probability"):
9498
"""
95-
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
96-
maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
99+
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
100+
maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
97101
threshold=0.5, probabilityCol="probability")
98102
Sets params for logistic regression.
99103
"""
@@ -148,6 +152,20 @@ class LogisticRegressionModel(JavaModel):
148152
Model fitted by LogisticRegression.
149153
"""
150154

155+
@property
156+
def weights(self):
157+
"""
158+
Model weights.
159+
"""
160+
return self._call_java("weights")
161+
162+
@property
163+
def intercept(self):
164+
"""
165+
Model intercept.
166+
"""
167+
return self._call_java("intercept")
168+
151169

152170
class TreeClassifierParams(object):
153171
"""
@@ -202,7 +220,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
202220
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini"):
203221
"""
204222
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
205-
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
223+
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
206224
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini")
207225
"""
208226
super(DecisionTreeClassifier, self).__init__()
@@ -224,9 +242,8 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
224242
impurity="gini"):
225243
"""
226244
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
227-
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
228-
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
229-
impurity="gini")
245+
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
246+
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini")
230247
Sets params for the DecisionTreeClassifier.
231248
"""
232249
kwargs = self.setParams._input_kwargs
@@ -302,9 +319,9 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
302319
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
303320
numTrees=20, featureSubsetStrategy="auto", seed=42):
304321
"""
305-
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
306-
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
307-
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
322+
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
323+
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
324+
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \
308325
numTrees=20, featureSubsetStrategy="auto", seed=42)
309326
"""
310327
super(RandomForestClassifier, self).__init__()
@@ -337,9 +354,9 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
337354
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
338355
impurity="gini", numTrees=20, featureSubsetStrategy="auto"):
339356
"""
340-
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
341-
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
342-
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
357+
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
358+
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
359+
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42, \
343360
impurity="gini", numTrees=20, featureSubsetStrategy="auto")
344361
Sets params for linear classification.
345362
"""
@@ -453,10 +470,10 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
453470
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="logistic",
454471
maxIter=20, stepSize=0.1):
455472
"""
456-
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
457-
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
458-
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="logistic",
459-
maxIter=20, stepSize=0.1)
473+
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
474+
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
475+
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
476+
lossType="logistic", maxIter=20, stepSize=0.1)
460477
"""
461478
super(GBTClassifier, self).__init__()
462479
#: param for Loss function which GBT tries to minimize (case-insensitive).
@@ -484,9 +501,9 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
484501
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
485502
lossType="logistic", maxIter=20, stepSize=0.1):
486503
"""
487-
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
488-
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
489-
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
504+
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
505+
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
506+
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
490507
lossType="logistic", maxIter=20, stepSize=0.1)
491508
Sets params for Gradient Boosted Tree Classification.
492509
"""

python/pyspark/ml/feature.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
481481
def __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
482482
inputCol=None, outputCol=None):
483483
"""
484-
__init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
484+
__init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+", \
485485
inputCol=None, outputCol=None)
486486
"""
487487
super(RegexTokenizer, self).__init__()
@@ -496,7 +496,7 @@ def __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+"
496496
def setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
497497
inputCol=None, outputCol=None):
498498
"""
499-
setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
499+
setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+", \
500500
inputCol="input", outputCol="output")
501501
Sets params for this RegexTokenizer.
502502
"""
@@ -869,7 +869,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
869869
def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
870870
seed=42, inputCol=None, outputCol=None):
871871
"""
872-
__init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
872+
__init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, \
873873
seed=42, inputCol=None, outputCol=None)
874874
"""
875875
super(Word2Vec, self).__init__()
@@ -889,7 +889,7 @@ def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025,
889889
def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
890890
seed=42, inputCol=None, outputCol=None):
891891
"""
892-
setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=42,
892+
setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=42, \
893893
inputCol=None, outputCol=None)
894894
Sets params for this Word2Vec.
895895
"""

python/pyspark/ml/recommendation.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ def __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemB
9292
implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
9393
ratingCol="rating", nonnegative=False, checkpointInterval=10):
9494
"""
95-
__init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
96-
implicitPrefs=false, alpha=1.0, userCol="user", itemCol="item", seed=0,
95+
__init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, \
96+
implicitPrefs=false, alpha=1.0, userCol="user", itemCol="item", seed=0, \
9797
ratingCol="rating", nonnegative=false, checkpointInterval=10)
9898
"""
9999
super(ALS, self).__init__()
@@ -118,8 +118,8 @@ def setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItem
118118
implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
119119
ratingCol="rating", nonnegative=False, checkpointInterval=10):
120120
"""
121-
setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
122-
implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
121+
setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, \
122+
implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0, \
123123
ratingCol="rating", nonnegative=False, checkpointInterval=10)
124124
Sets params for ALS.
125125
"""

0 commit comments

Comments
 (0)