Skip to content

Commit a189acf

Browse files
committed
update since versions in mllib.pmml and mllib.util
1 parent b37f0cc commit a189acf

File tree

9 files changed

+41
-11
lines changed

9 files changed

+41
-11
lines changed

mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import javax.xml.transform.stream.StreamResult
2323
import org.jpmml.model.JAXBUtil
2424

2525
import org.apache.spark.SparkContext
26-
import org.apache.spark.annotation.{DeveloperApi, Experimental}
26+
import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
2727
import org.apache.spark.mllib.pmml.export.PMMLModelExportFactory
2828

2929
/**
@@ -33,6 +33,7 @@ import org.apache.spark.mllib.pmml.export.PMMLModelExportFactory
3333
* developed by the Data Mining Group (www.dmg.org).
3434
*/
3535
@DeveloperApi
36+
@Since("1.4.0")
3637
trait PMMLExportable {
3738

3839
/**
@@ -48,6 +49,7 @@ trait PMMLExportable {
4849
* Export the model to a local file in PMML format
4950
*/
5051
@Experimental
52+
@Since("1.4.0")
5153
def toPMML(localPath: String): Unit = {
5254
toPMML(new StreamResult(new File(localPath)))
5355
}
@@ -57,6 +59,7 @@ trait PMMLExportable {
5759
* Export the model to a directory on a distributed file system in PMML format
5860
*/
5961
@Experimental
62+
@Since("1.4.0")
6063
def toPMML(sc: SparkContext, path: String): Unit = {
6164
val pmml = toPMML()
6265
sc.parallelize(Array(pmml), 1).saveAsTextFile(path)
@@ -67,6 +70,7 @@ trait PMMLExportable {
6770
* Export the model to the OutputStream in PMML format
6871
*/
6972
@Experimental
73+
@Since("1.4.0")
7074
def toPMML(outputStream: OutputStream): Unit = {
7175
toPMML(new StreamResult(outputStream))
7276
}
@@ -76,6 +80,7 @@ trait PMMLExportable {
7680
* Export the model to a String in PMML format
7781
*/
7882
@Experimental
83+
@Since("1.4.0")
7984
def toPMML(): String = {
8085
val writer = new StringWriter
8186
toPMML(new StreamResult(writer))

mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,25 @@
1717

1818
package org.apache.spark.mllib.util
1919

20-
import org.apache.spark.annotation.DeveloperApi
2120
import org.apache.spark.Logging
22-
import org.apache.spark.rdd.RDD
21+
import org.apache.spark.annotation.{DeveloperApi, Since}
2322
import org.apache.spark.mllib.regression.LabeledPoint
23+
import org.apache.spark.rdd.RDD
2424

2525
/**
2626
* :: DeveloperApi ::
2727
* A collection of methods used to validate data before applying ML algorithms.
2828
*/
2929
@DeveloperApi
30+
@Since("0.8.0")
3031
object DataValidators extends Logging {
3132

3233
/**
3334
* Function to check if labels used for classification are either zero or one.
3435
*
3536
* @return True if labels are all zero or one, false otherwise.
3637
*/
38+
@Since("1.0.0")
3739
val binaryLabelValidator: RDD[LabeledPoint] => Boolean = { data =>
3840
val numInvalid = data.filter(x => x.label != 1.0 && x.label != 0.0).count()
3941
if (numInvalid != 0) {
@@ -48,6 +50,7 @@ object DataValidators extends Logging {
4850
*
4951
* @return True if labels are all in the range of {0, 1, ..., k-1}, false otherwise.
5052
*/
53+
@Since("1.3.0")
5154
def multiLabelValidator(k: Int): RDD[LabeledPoint] => Boolean = { data =>
5255
val numInvalid = data.filter(x =>
5356
x.label - x.label.toInt != 0.0 || x.label < 0 || x.label > k - 1).count()

mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ package org.apache.spark.mllib.util
1919

2020
import scala.util.Random
2121

22-
import org.apache.spark.annotation.DeveloperApi
2322
import org.apache.spark.SparkContext
23+
import org.apache.spark.annotation.{DeveloperApi, Since}
2424
import org.apache.spark.rdd.RDD
2525

2626
/**
@@ -30,6 +30,7 @@ import org.apache.spark.rdd.RDD
3030
* cluster with scale 1 around each center.
3131
*/
3232
@DeveloperApi
33+
@Since("0.8.0")
3334
object KMeansDataGenerator {
3435

3536
/**
@@ -42,6 +43,7 @@ object KMeansDataGenerator {
4243
* @param r Scaling factor for the distribution of the initial centers
4344
* @param numPartitions Number of partitions of the generated RDD; default 2
4445
*/
46+
@Since("0.8.0")
4547
def generateKMeansRDD(
4648
sc: SparkContext,
4749
numPoints: Int,
@@ -62,6 +64,7 @@ object KMeansDataGenerator {
6264
}
6365
}
6466

67+
@Since("0.8.0")
6568
def main(args: Array[String]) {
6669
if (args.length < 6) {
6770
// scalastyle:off println

mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@ import scala.util.Random
2222

2323
import com.github.fommil.netlib.BLAS.{getInstance => blas}
2424

25-
import org.apache.spark.annotation.DeveloperApi
2625
import org.apache.spark.SparkContext
27-
import org.apache.spark.rdd.RDD
26+
import org.apache.spark.annotation.{DeveloperApi, Since}
2827
import org.apache.spark.mllib.linalg.Vectors
2928
import org.apache.spark.mllib.regression.LabeledPoint
29+
import org.apache.spark.rdd.RDD
3030

3131
/**
3232
* :: DeveloperApi ::
@@ -35,6 +35,7 @@ import org.apache.spark.mllib.regression.LabeledPoint
3535
* response variable `Y`.
3636
*/
3737
@DeveloperApi
38+
@Since("0.8.0")
3839
object LinearDataGenerator {
3940

4041
/**
@@ -46,6 +47,7 @@ object LinearDataGenerator {
4647
* @param seed Random seed
4748
* @return Java List of input.
4849
*/
50+
@Since("0.8.0")
4951
def generateLinearInputAsList(
5052
intercept: Double,
5153
weights: Array[Double],
@@ -68,6 +70,7 @@ object LinearDataGenerator {
6870
* @param eps Epsilon scaling factor.
6971
* @return Seq of input.
7072
*/
73+
@Since("0.8.0")
7174
def generateLinearInput(
7275
intercept: Double,
7376
weights: Array[Double],
@@ -92,6 +95,7 @@ object LinearDataGenerator {
9295
* @param eps Epsilon scaling factor.
9396
* @return Seq of input.
9497
*/
98+
@Since("0.8.0")
9599
def generateLinearInput(
96100
intercept: Double,
97101
weights: Array[Double],
@@ -132,6 +136,7 @@ object LinearDataGenerator {
132136
*
133137
* @return RDD of LabeledPoint containing sample data.
134138
*/
139+
@Since("0.8.0")
135140
def generateLinearRDD(
136141
sc: SparkContext,
137142
nexamples: Int,
@@ -151,6 +156,7 @@ object LinearDataGenerator {
151156
data
152157
}
153158

159+
@Since("0.8.0")
154160
def main(args: Array[String]) {
155161
if (args.length < 2) {
156162
// scalastyle:off println

mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.util
1919

2020
import scala.util.Random
2121

22-
import org.apache.spark.annotation.DeveloperApi
22+
import org.apache.spark.annotation.{Since, DeveloperApi}
2323
import org.apache.spark.SparkContext
2424
import org.apache.spark.rdd.RDD
2525
import org.apache.spark.mllib.regression.LabeledPoint
@@ -31,6 +31,7 @@ import org.apache.spark.mllib.linalg.Vectors
3131
* with probability `probOne` and scales features for positive examples by `eps`.
3232
*/
3333
@DeveloperApi
34+
@Since("0.8.0")
3435
object LogisticRegressionDataGenerator {
3536

3637
/**
@@ -43,6 +44,7 @@ object LogisticRegressionDataGenerator {
4344
* @param nparts Number of partitions of the generated RDD. Default value is 2.
4445
* @param probOne Probability that a label is 1 (and not 0). Default value is 0.5.
4546
*/
47+
@Since("0.8.0")
4648
def generateLogisticRDD(
4749
sc: SparkContext,
4850
nexamples: Int,
@@ -62,6 +64,7 @@ object LogisticRegressionDataGenerator {
6264
data
6365
}
6466

67+
@Since("0.8.0")
6568
def main(args: Array[String]) {
6669
if (args.length != 5) {
6770
// scalastyle:off println

mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import scala.language.postfixOps
2323
import scala.util.Random
2424

2525
import org.apache.spark.SparkContext
26-
import org.apache.spark.annotation.DeveloperApi
26+
import org.apache.spark.annotation.{Since, DeveloperApi}
2727
import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix}
2828
import org.apache.spark.rdd.RDD
2929

@@ -52,7 +52,9 @@ import org.apache.spark.rdd.RDD
5252
* testSampFact (Double) Percentage of training data to use as test data.
5353
*/
5454
@DeveloperApi
55+
@Since("0.8.0")
5556
object MFDataGenerator {
57+
@Since("0.8.0")
5658
def main(args: Array[String]) {
5759
if (args.length < 2) {
5860
// scalastyle:off println

mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import org.apache.spark.streaming.dstream.DStream
3636
/**
3737
* Helper methods to load, save and pre-process data used in ML Lib.
3838
*/
39+
@Since("0.8.0")
3940
object MLUtils {
4041

4142
private[mllib] lazy val EPSILON = {
@@ -168,6 +169,7 @@ object MLUtils {
168169
*
169170
* @see [[org.apache.spark.mllib.util.MLUtils#loadLibSVMFile]]
170171
*/
172+
@Since("1.0.0")
171173
def saveAsLibSVMFile(data: RDD[LabeledPoint], dir: String) {
172174
// TODO: allow to specify label precision and feature precision.
173175
val dataStr = data.map { case LabeledPoint(label, features) =>

mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,22 @@ import scala.util.Random
2121

2222
import com.github.fommil.netlib.BLAS.{getInstance => blas}
2323

24-
import org.apache.spark.annotation.DeveloperApi
2524
import org.apache.spark.SparkContext
26-
import org.apache.spark.rdd.RDD
25+
import org.apache.spark.annotation.{DeveloperApi, Since}
2726
import org.apache.spark.mllib.linalg.Vectors
2827
import org.apache.spark.mllib.regression.LabeledPoint
28+
import org.apache.spark.rdd.RDD
2929

3030
/**
3131
* :: DeveloperApi ::
3232
* Generate sample data used for SVM. This class generates uniform random values
3333
* for the features and adds Gaussian noise with weight 0.1 to generate labels.
3434
*/
3535
@DeveloperApi
36+
@Since("0.8.0")
3637
object SVMDataGenerator {
3738

39+
@Since("0.8.0")
3840
def main(args: Array[String]) {
3941
if (args.length < 2) {
4042
// scalastyle:off println

mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import org.json4s._
2424
import org.json4s.jackson.JsonMethods._
2525

2626
import org.apache.spark.SparkContext
27-
import org.apache.spark.annotation.DeveloperApi
27+
import org.apache.spark.annotation.{DeveloperApi, Since}
2828
import org.apache.spark.sql.catalyst.ScalaReflection
2929
import org.apache.spark.sql.types.{DataType, StructField, StructType}
3030

@@ -35,6 +35,7 @@ import org.apache.spark.sql.types.{DataType, StructField, StructType}
3535
* This should be inherited by the class which implements model instances.
3636
*/
3737
@DeveloperApi
38+
@Since("1.3.0")
3839
trait Saveable {
3940

4041
/**
@@ -50,6 +51,7 @@ trait Saveable {
5051
* @param path Path specifying the directory in which to save this model.
5152
* If the directory already exists, this method throws an exception.
5253
*/
54+
@Since("1.3.0")
5355
def save(sc: SparkContext, path: String): Unit
5456

5557
/** Current version of model save/load format. */
@@ -64,6 +66,7 @@ trait Saveable {
6466
* This should be inherited by an object paired with the model class.
6567
*/
6668
@DeveloperApi
69+
@Since("1.3.0")
6770
trait Loader[M <: Saveable] {
6871

6972
/**
@@ -75,6 +78,7 @@ trait Loader[M <: Saveable] {
7578
* @param path Path specifying the directory to which the model was saved.
7679
* @return Model instance
7780
*/
81+
@Since("1.3.0")
7882
def load(sc: SparkContext, path: String): M
7983

8084
}

0 commit comments

Comments
 (0)