Skip to content

Commit 0b674fa

Browse files
committed
mark decision tree APIs
1 parent 86b9e34 commit 0b674fa

File tree

13 files changed

+37
-14
lines changed

13 files changed

+37
-14
lines changed

mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ class NaiveBayesModel(
4141
private val brzTheta = new BDM[Double](theta.length, theta(0).length)
4242

4343
{
44-
// Need to put an extra pair of braces to prevent Scala treat `i` as a member.
44+
// Need to put an extra pair of braces to prevent Scala treating `i` as a member.
4545
var i = 0
4646
while (i < theta.length) {
4747
var j = 0

mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,15 @@ import org.apache.spark.util.random.XORShiftRandom
3333
import org.apache.spark.mllib.linalg.{Vector, Vectors}
3434

3535
/**
36+
* <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
37+
*
3638
* A class that implements a decision tree algorithm for classification and regression. It
3739
* supports both continuous and categorical features.
3840
* @param strategy The configuration parameters for the tree algorithm which specify the type
3941
* of algorithm (classification, regression, etc.), feature type (continuous,
4042
* categorical), depth of the tree, quantile calculation strategy, etc.
4143
*/
42-
class DecisionTree private(val strategy: Strategy) extends Serializable with Logging {
44+
class DecisionTree (private val strategy: Strategy) extends Serializable with Logging {
4345

4446
/**
4547
* Method to train a decision tree model over an RDD

mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,4 @@ class Strategy (
4040
val maxDepth: Int,
4141
val maxBins: Int = 100,
4242
val quantileCalculationStrategy: QuantileStrategy = Sort,
43-
val categoricalFeaturesInfo: Map[Int,Int] = Map[Int,Int]()) extends Serializable
43+
val categoricalFeaturesInfo: Map[Int, Int] = Map[Int, Int]()) extends Serializable

mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,24 @@
1818
package org.apache.spark.mllib.tree.impurity
1919

2020
/**
21+
* <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
22+
*
2123
* Class for calculating [[http://en.wikipedia.org/wiki/Binary_entropy_function entropy]] during
2224
* binary classification.
2325
*/
2426
object Entropy extends Impurity {
2527

26-
def log2(x: Double) = scala.math.log(x) / scala.math.log(2)
28+
private[tree] def log2(x: Double) = scala.math.log(x) / scala.math.log(2)
2729

2830
/**
31+
* <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
32+
*
2933
* entropy calculation
3034
* @param c0 count of instances with label 0
3135
* @param c1 count of instances with label 1
3236
* @return entropy value
3337
*/
34-
def calculate(c0: Double, c1: Double): Double = {
38+
override def calculate(c0: Double, c1: Double): Double = {
3539
if (c0 == 0 || c1 == 0) {
3640
0
3741
} else {
@@ -42,6 +46,6 @@ object Entropy extends Impurity {
4246
}
4347
}
4448

45-
def calculate(count: Double, sum: Double, sumSquares: Double): Double =
49+
override def calculate(count: Double, sum: Double, sumSquares: Double): Double =
4650
throw new UnsupportedOperationException("Entropy.calculate")
4751
}

mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,17 @@
1818
package org.apache.spark.mllib.tree.impurity
1919

2020
/**
21+
* <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
22+
*
2123
* Class for calculating the
2224
* [[http://en.wikipedia.org/wiki/Decision_tree_learning#Gini_impurity Gini impurity]]
2325
* during binary classification.
2426
*/
2527
object Gini extends Impurity {
2628

2729
/**
30+
* <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
31+
*
2832
* Gini coefficient calculation
2933
* @param c0 count of instances with label 0
3034
* @param c1 count of instances with label 1
@@ -41,6 +45,6 @@ object Gini extends Impurity {
4145
}
4246
}
4347

44-
def calculate(count: Double, sum: Double, sumSquares: Double): Double =
48+
override def calculate(count: Double, sum: Double, sumSquares: Double): Double =
4549
throw new UnsupportedOperationException("Gini.calculate")
4650
}

mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,14 @@
1818
package org.apache.spark.mllib.tree.impurity
1919

2020
/**
21+
* <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
22+
*
2123
* Trait for calculating information gain.
2224
*/
2325
trait Impurity extends Serializable {
2426

2527
/**
26-
* <span class="badge badge-red" style="float: right;">DEVELOPER API - UNSTABLE</span>
28+
* <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
2729
*
2830
* information calculation for binary classification
2931
* @param c0 count of instances with label 0
@@ -33,7 +35,7 @@ trait Impurity extends Serializable {
3335
def calculate(c0 : Double, c1 : Double): Double
3436

3537
/**
36-
* <span class="badge badge-red" style="float: right;">DEVELOPER API - UNSTABLE</span>
38+
* <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
3739
*
3840
* information calculation for regression
3941
* @param count number of instances

mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,17 @@
1818
package org.apache.spark.mllib.tree.impurity
1919

2020
/**
21+
* <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
22+
*
2123
* Class for calculating variance during regression
2224
*/
2325
object Variance extends Impurity {
2426
override def calculate(c0: Double, c1: Double): Double =
2527
throw new UnsupportedOperationException("Variance.calculate")
2628

2729
/**
30+
* <span class="badge badge-red" style="float: right;">DEVELOPER API</span>
31+
*
2832
* variance calculation
2933
* @param count number of instances
3034
* @param sum sum of labels

mllib/src/main/scala/org/apache/spark/mllib/tree/model/Bin.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,5 @@ import org.apache.spark.mllib.tree.configuration.FeatureType._
3030
* @param featureType type of feature -- categorical or continuous
3131
* @param category categorical label value accepted in the bin
3232
*/
33+
private[tree]
3334
case class Bin(lowSplit: Split, highSplit: Split, featureType: FeatureType, category: Double)

mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ import org.apache.spark.rdd.RDD
2222
import org.apache.spark.mllib.linalg.Vector
2323

2424
/**
25+
* <span class="badge" style="float: right; background-color: #257080;">EXPERIMENTAL</span>
26+
*
2527
* Model to store the decision tree parameters
2628
* @param topNode root node
2729
* @param algo algorithm type -- classification or regression

mllib/src/main/scala/org/apache/spark/mllib/tree/model/Filter.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ package org.apache.spark.mllib.tree.model
2222
* @param split split specifying the feature index, type and threshold
2323
* @param comparison integer specifying <,=,>
2424
*/
25-
case class Filter(split: Split, comparison: Int) {
25+
private[tree] case class Filter(split: Split, comparison: Int) {
2626
// Comparison -1,0,1 signifies <.=,>
2727
override def toString = " split = " + split + "comparison = " + comparison
2828
}

0 commit comments

Comments
 (0)