Skip to content

Commit 7da6748

Browse files
committed
[SPARK-11988][ML][MLLIB] Update JPMML to 1.2.7
Update JPMML pmml-model to 1.2.7 Author: Sean Owen <[email protected]> Closes #9972 from srowen/SPARK-11988.
1 parent e9c9ae2 commit 7da6748

File tree

6 files changed

+59
-65
lines changed

6 files changed

+59
-65
lines changed

LICENSE

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
Apache License
32
Version 2.0, January 2004
43
http://www.apache.org/licenses/
@@ -237,7 +236,7 @@ The following components are provided under a BSD-style license. See project lin
237236
The text of each license is also included at licenses/LICENSE-[project].txt.
238237

239238
(BSD 3 Clause) netlib core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core)
240-
(BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.1.15 - https://github.com/jpmml/jpmml-model)
239+
(BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.2.7 - https://github.com/jpmml/jpmml-model)
241240
(BSD 3-clause style license) jblas (org.jblas:jblas:1.2.4 - http://jblas.org/)
242241
(BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/)
243242
(BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org)

mllib/pom.xml

+1-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@
109109
<dependency>
110110
<groupId>org.jpmml</groupId>
111111
<artifactId>pmml-model</artifactId>
112-
<version>1.1.15</version>
112+
<version>1.2.7</version>
113113
<exclusions>
114114
<exclusion>
115115
<groupId>com.sun.xml.fastinfoset</groupId>

mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala

+16-16
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ private[mllib] class BinaryClassificationPMMLModelExport(
4545
val fields = new SArray[FieldName](model.weights.size)
4646
val dataDictionary = new DataDictionary
4747
val miningSchema = new MiningSchema
48-
val regressionTableYES = new RegressionTable(model.intercept).withTargetCategory("1")
48+
val regressionTableYES = new RegressionTable(model.intercept).setTargetCategory("1")
4949
var interceptNO = threshold
5050
if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) {
5151
if (threshold <= 0) {
@@ -56,35 +56,35 @@ private[mllib] class BinaryClassificationPMMLModelExport(
5656
interceptNO = -math.log(1 / threshold - 1)
5757
}
5858
}
59-
val regressionTableNO = new RegressionTable(interceptNO).withTargetCategory("0")
59+
val regressionTableNO = new RegressionTable(interceptNO).setTargetCategory("0")
6060
val regressionModel = new RegressionModel()
61-
.withFunctionName(MiningFunctionType.CLASSIFICATION)
62-
.withMiningSchema(miningSchema)
63-
.withModelName(description)
64-
.withNormalizationMethod(normalizationMethod)
65-
.withRegressionTables(regressionTableYES, regressionTableNO)
61+
.setFunctionName(MiningFunctionType.CLASSIFICATION)
62+
.setMiningSchema(miningSchema)
63+
.setModelName(description)
64+
.setNormalizationMethod(normalizationMethod)
65+
.addRegressionTables(regressionTableYES, regressionTableNO)
6666

6767
for (i <- 0 until model.weights.size) {
6868
fields(i) = FieldName.create("field_" + i)
69-
dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
69+
dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
7070
miningSchema
71-
.withMiningFields(new MiningField(fields(i))
72-
.withUsageType(FieldUsageType.ACTIVE))
73-
regressionTableYES.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
71+
.addMiningFields(new MiningField(fields(i))
72+
.setUsageType(FieldUsageType.ACTIVE))
73+
regressionTableYES.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
7474
}
7575

7676
// add target field
7777
val targetField = FieldName.create("target")
7878
dataDictionary
79-
.withDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING))
79+
.addDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING))
8080
miningSchema
81-
.withMiningFields(new MiningField(targetField)
82-
.withUsageType(FieldUsageType.TARGET))
81+
.addMiningFields(new MiningField(targetField)
82+
.setUsageType(FieldUsageType.TARGET))
8383

84-
dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size)
84+
dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)
8585

8686
pmml.setDataDictionary(dataDictionary)
87-
pmml.withModels(regressionModel)
87+
pmml.addModels(regressionModel)
8888
}
8989
}
9090
}

mllib/src/main/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExport.scala

+13-13
Original file line numberDiff line numberDiff line change
@@ -45,31 +45,31 @@ private[mllib] class GeneralizedLinearPMMLModelExport(
4545
val miningSchema = new MiningSchema
4646
val regressionTable = new RegressionTable(model.intercept)
4747
val regressionModel = new RegressionModel()
48-
.withFunctionName(MiningFunctionType.REGRESSION)
49-
.withMiningSchema(miningSchema)
50-
.withModelName(description)
51-
.withRegressionTables(regressionTable)
48+
.setFunctionName(MiningFunctionType.REGRESSION)
49+
.setMiningSchema(miningSchema)
50+
.setModelName(description)
51+
.addRegressionTables(regressionTable)
5252

5353
for (i <- 0 until model.weights.size) {
5454
fields(i) = FieldName.create("field_" + i)
55-
dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
55+
dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
5656
miningSchema
57-
.withMiningFields(new MiningField(fields(i))
58-
.withUsageType(FieldUsageType.ACTIVE))
59-
regressionTable.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
57+
.addMiningFields(new MiningField(fields(i))
58+
.setUsageType(FieldUsageType.ACTIVE))
59+
regressionTable.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
6060
}
6161

6262
// for completeness add target field
6363
val targetField = FieldName.create("target")
64-
dataDictionary.withDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE))
64+
dataDictionary.addDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE))
6565
miningSchema
66-
.withMiningFields(new MiningField(targetField)
67-
.withUsageType(FieldUsageType.TARGET))
66+
.addMiningFields(new MiningField(targetField)
67+
.setUsageType(FieldUsageType.TARGET))
6868

69-
dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size)
69+
dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)
7070

7171
pmml.setDataDictionary(dataDictionary)
72-
pmml.withModels(regressionModel)
72+
pmml.addModels(regressionModel)
7373
}
7474
}
7575
}

mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala

+22-22
Original file line numberDiff line numberDiff line change
@@ -42,42 +42,42 @@ private[mllib] class KMeansPMMLModelExport(model : KMeansModel) extends PMMLMode
4242
val dataDictionary = new DataDictionary
4343
val miningSchema = new MiningSchema
4444
val comparisonMeasure = new ComparisonMeasure()
45-
.withKind(ComparisonMeasure.Kind.DISTANCE)
46-
.withMeasure(new SquaredEuclidean())
45+
.setKind(ComparisonMeasure.Kind.DISTANCE)
46+
.setMeasure(new SquaredEuclidean())
4747
val clusteringModel = new ClusteringModel()
48-
.withModelName("k-means")
49-
.withMiningSchema(miningSchema)
50-
.withComparisonMeasure(comparisonMeasure)
51-
.withFunctionName(MiningFunctionType.CLUSTERING)
52-
.withModelClass(ClusteringModel.ModelClass.CENTER_BASED)
53-
.withNumberOfClusters(model.clusterCenters.length)
48+
.setModelName("k-means")
49+
.setMiningSchema(miningSchema)
50+
.setComparisonMeasure(comparisonMeasure)
51+
.setFunctionName(MiningFunctionType.CLUSTERING)
52+
.setModelClass(ClusteringModel.ModelClass.CENTER_BASED)
53+
.setNumberOfClusters(model.clusterCenters.length)
5454

5555
for (i <- 0 until clusterCenter.size) {
5656
fields(i) = FieldName.create("field_" + i)
57-
dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
57+
dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
5858
miningSchema
59-
.withMiningFields(new MiningField(fields(i))
60-
.withUsageType(FieldUsageType.ACTIVE))
61-
clusteringModel.withClusteringFields(
62-
new ClusteringField(fields(i)).withCompareFunction(CompareFunctionType.ABS_DIFF))
59+
.addMiningFields(new MiningField(fields(i))
60+
.setUsageType(FieldUsageType.ACTIVE))
61+
clusteringModel.addClusteringFields(
62+
new ClusteringField(fields(i)).setCompareFunction(CompareFunctionType.ABS_DIFF))
6363
}
6464

65-
dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size)
65+
dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)
6666

67-
for (i <- 0 until model.clusterCenters.length) {
67+
for (i <- model.clusterCenters.indices) {
6868
val cluster = new Cluster()
69-
.withName("cluster_" + i)
70-
.withArray(new org.dmg.pmml.Array()
71-
.withType(Array.Type.REAL)
72-
.withN(clusterCenter.size)
73-
.withValue(model.clusterCenters(i).toArray.mkString(" ")))
69+
.setName("cluster_" + i)
70+
.setArray(new org.dmg.pmml.Array()
71+
.setType(Array.Type.REAL)
72+
.setN(clusterCenter.size)
73+
.setValue(model.clusterCenters(i).toArray.mkString(" ")))
7474
// we don't have the size of the single cluster but only the centroids (withValue)
7575
// .withSize(value)
76-
clusteringModel.withClusters(cluster)
76+
clusteringModel.addClusters(cluster)
7777
}
7878

7979
pmml.setDataDictionary(dataDictionary)
80-
pmml.withModels(clusteringModel)
80+
pmml.addModels(clusteringModel)
8181
}
8282
}
8383
}

mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala

+6-11
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,14 @@ private[mllib] trait PMMLModelExport {
3030
* Holder of the exported model in PMML format
3131
*/
3232
@BeanProperty
33-
val pmml: PMML = new PMML
34-
35-
pmml.setVersion("4.2")
36-
setHeader(pmml)
37-
38-
private def setHeader(pmml: PMML): Unit = {
33+
val pmml: PMML = {
3934
val version = getClass.getPackage.getImplementationVersion
40-
val app = new Application().withName("Apache Spark MLlib").withVersion(version)
35+
val app = new Application("Apache Spark MLlib").setVersion(version)
4136
val timestamp = new Timestamp()
42-
.withContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date()))
37+
.addContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date()))
4338
val header = new Header()
44-
.withApplication(app)
45-
.withTimestamp(timestamp)
46-
pmml.setHeader(header)
39+
.setApplication(app)
40+
.setTimestamp(timestamp)
41+
new PMML("4.2", header, null)
4742
}
4843
}

0 commit comments

Comments
 (0)