diff --git a/LICENSE-binary b/LICENSE-binary
index d2eea83525caf..845c3af566f1b 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -515,7 +515,9 @@ javax.xml.bind:jaxb-api https://github.com/javaee/jaxb-v2
Eclipse Distribution License (EDL) 1.0
--------------------------------------
com.sun.istack:istack-commons-runtime
+jakarta.activation:jakarta.activation-api
jakarta.xml.bind:jakarta.xml.bind-api
+org.glassfish.jaxb:jaxb-core
org.glassfish.jaxb:jaxb-runtime
Eclipse Public License (EPL) 2.0
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index e10b2e4c86645..12587680ae32e 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -101,7 +101,7 @@ httpclient/4.5.14//httpclient-4.5.14.jar
httpcore/4.4.16//httpcore-4.4.16.jar
icu4j/76.1//icu4j-76.1.jar
ini4j/0.5.4//ini4j-0.5.4.jar
-istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
+istack-commons-runtime/4.1.2//istack-commons-runtime-4.1.2.jar
ivy/2.5.3//ivy-2.5.3.jar
j2objc-annotations/3.0.0//j2objc-annotations-3.0.0.jar
jackson-annotations/2.18.2//jackson-annotations-2.18.2.jar
@@ -113,12 +113,13 @@ jackson-dataformat-yaml/2.18.2//jackson-dataformat-yaml-2.18.2.jar
jackson-datatype-jsr310/2.18.2//jackson-datatype-jsr310-2.18.2.jar
jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
jackson-module-scala_2.13/2.18.2//jackson-module-scala_2.13-2.18.2.jar
+jakarta.activation-api/2.1.3//jakarta.activation-api-2.1.3.jar
jakarta.annotation-api/2.1.1//jakarta.annotation-api-2.1.1.jar
jakarta.inject-api/2.0.1//jakarta.inject-api-2.0.1.jar
jakarta.servlet-api/5.0.0//jakarta.servlet-api-5.0.0.jar
jakarta.validation-api/3.0.2//jakarta.validation-api-3.0.2.jar
jakarta.ws.rs-api/3.0.0//jakarta.ws.rs-api-3.0.0.jar
-jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar
+jakarta.xml.bind-api/4.0.2//jakarta.xml.bind-api-4.0.2.jar
janino/3.1.9//janino-3.1.9.jar
java-diff-utils/4.15//java-diff-utils-4.15.jar
java-xmlbuilder/1.2//java-xmlbuilder-1.2.jar
@@ -126,8 +127,8 @@ javassist/3.30.2-GA//javassist-3.30.2-GA.jar
javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar
javax.servlet-api/4.0.1//javax.servlet-api-4.0.1.jar
javolution/5.5.1//javolution-5.5.1.jar
-jaxb-api/2.2.11//jaxb-api-2.2.11.jar
-jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
+jaxb-core/4.0.5//jaxb-core-4.0.5.jar
+jaxb-runtime/4.0.5//jaxb-runtime-4.0.5.jar
jcl-over-slf4j/2.0.16//jcl-over-slf4j-2.0.16.jar
jdo-api/3.0.1//jdo-api-3.0.1.jar
jdom2/2.0.6//jdom2-2.0.6.jar
diff --git a/docs/ml-migration-guide.md b/docs/ml-migration-guide.md
index 8523151f3576f..fe5bdc4aedbcb 100644
--- a/docs/ml-migration-guide.md
+++ b/docs/ml-migration-guide.md
@@ -26,6 +26,25 @@ Note that this migration guide describes the items specific to MLlib.
Many items of SQL migration can be applied when migrating MLlib to higher versions for DataFrame-based APIs.
Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.html).
+## Upgrading from MLlib 3.5 to 4.0
+
+### Breaking changes
+{:.no_toc}
+
+There are no breaking changes.
+
+### Deprecations and changes of behavior
+{:.no_toc}
+
+**Deprecations**
+
+There are no deprecations.
+
+**Changes of behavior**
+
+* [SPARK-51132](https://issues.apache.org/jira/browse/SPARK-51132):
+ The PMML XML schema version of exported PMML format models by [PMML model export](mllib-pmml-model-export.html) has been upgraded from `PMML-4_3` to `PMML-4_4`.
+
## Upgrading from MLlib 2.4 to 3.0
### Breaking changes
diff --git a/mllib/pom.xml b/mllib/pom.xml
index f48750c00af54..0d87640c6b47d 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -34,10 +34,6 @@
https://spark.apache.org/
-
- javax.xml.bind
- jaxb-api
-
org.scala-lang.modules
scala-parser-combinators_${scala.binary.version}
@@ -144,6 +140,10 @@
org.glassfish.jaxb
jaxb-runtime
+
+ jakarta.xml.bind
+ jakarta.xml.bind-api
+
org.apache.spark
spark-tags_${scala.binary.version}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
index ea548b2e1bf2e..d6b938eefeb7c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.pmml
import java.io.{File, OutputStream, StringWriter}
import javax.xml.transform.stream.StreamResult
-import org.jpmml.model.JAXBUtil
+import org.jpmml.model.JAXBSerializer
import org.apache.spark.SparkContext
import org.apache.spark.annotation.Since
@@ -39,7 +39,8 @@ trait PMMLExportable {
*/
private def toPMML(streamResult: StreamResult): Unit = {
val pmmlModelExport = PMMLModelExportFactory.createPMMLModelExport(this)
- JAXBUtil.marshalPMML(pmmlModelExport.getPmml(), streamResult)
+ val jaxbSerializer = new JAXBSerializer()
+ jaxbSerializer.marshalPretty(pmmlModelExport.getPmml(), streamResult)
}
/**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala
index 7aa9051b5c585..02e4f7a76f90f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala
@@ -19,8 +19,8 @@ package org.apache.spark.mllib.pmml.`export`
import scala.{Array => SArray}
-import org.dmg.pmml.{DataDictionary, DataField, DataType, FieldName, MiningField,
- MiningFunction, MiningSchema, OpType}
+import org.dmg.pmml.{DataDictionary, DataField, DataType, MiningField, MiningFunction,
+ MiningSchema, OpType}
import org.dmg.pmml.regression.{NumericPredictor, RegressionModel, RegressionTable}
import org.apache.spark.mllib.regression.GeneralizedLinearModel
@@ -44,7 +44,7 @@ private[mllib] class BinaryClassificationPMMLModelExport(
pmml.getHeader.setDescription(description)
if (model.weights.size > 0) {
- val fields = new SArray[FieldName](model.weights.size)
+ val fields = new SArray[String](model.weights.size)
val dataDictionary = new DataDictionary
val miningSchema = new MiningSchema
val regressionTableYES = new RegressionTable(model.intercept).setTargetCategory("1")
@@ -67,7 +67,7 @@ private[mllib] class BinaryClassificationPMMLModelExport(
.addRegressionTables(regressionTableYES, regressionTableNO)
for (i <- 0 until model.weights.size) {
- fields(i) = FieldName.create("field_" + i)
+ fields(i) = "field_" + i
dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
miningSchema
.addMiningFields(new MiningField(fields(i))
@@ -76,7 +76,7 @@ private[mllib] class BinaryClassificationPMMLModelExport(
}
// add target field
- val targetField = FieldName.create("target")
+ val targetField = "target"
dataDictionary
.addDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING))
miningSchema
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExport.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExport.scala
index bc6a4a40c441a..c562f2d637413 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExport.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExport.scala
@@ -19,8 +19,8 @@ package org.apache.spark.mllib.pmml.`export`
import scala.{Array => SArray}
-import org.dmg.pmml.{DataDictionary, DataField, DataType, FieldName, MiningField,
- MiningFunction, MiningSchema, OpType}
+import org.dmg.pmml.{DataDictionary, DataField, DataType, MiningField, MiningFunction,
+ MiningSchema, OpType}
import org.dmg.pmml.regression.{NumericPredictor, RegressionModel, RegressionTable}
import org.apache.spark.mllib.regression.GeneralizedLinearModel
@@ -42,7 +42,7 @@ private[mllib] class GeneralizedLinearPMMLModelExport(
pmml.getHeader.setDescription(description)
if (model.weights.size > 0) {
- val fields = new SArray[FieldName](model.weights.size)
+ val fields = new SArray[String](model.weights.size)
val dataDictionary = new DataDictionary
val miningSchema = new MiningSchema
val regressionTable = new RegressionTable(model.intercept)
@@ -53,7 +53,7 @@ private[mllib] class GeneralizedLinearPMMLModelExport(
.addRegressionTables(regressionTable)
for (i <- 0 until model.weights.size) {
- fields(i) = FieldName.create("field_" + i)
+ fields(i) = "field_" + i
dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
miningSchema
.addMiningFields(new MiningField(fields(i))
@@ -62,7 +62,7 @@ private[mllib] class GeneralizedLinearPMMLModelExport(
}
// for completeness add target field
- val targetField = FieldName.create("target")
+ val targetField = "target"
dataDictionary.addDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE))
miningSchema
.addMiningFields(new MiningField(targetField)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala
index 674c8cea9d425..7594a378afdfa 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.pmml.`export`
import scala.{Array => SArray}
import org.dmg.pmml.{Array, CompareFunction, ComparisonMeasure, DataDictionary, DataField, DataType,
- FieldName, MiningField, MiningFunction, MiningSchema, OpType, SquaredEuclidean}
+ MiningField, MiningFunction, MiningSchema, OpType, SquaredEuclidean}
import org.dmg.pmml.clustering.{Cluster, ClusteringField, ClusteringModel}
import org.apache.spark.mllib.clustering.KMeansModel
@@ -40,7 +40,7 @@ private[mllib] class KMeansPMMLModelExport(model: KMeansModel) extends PMMLModel
if (model.clusterCenters.length > 0) {
val clusterCenter = model.clusterCenters(0)
- val fields = new SArray[FieldName](clusterCenter.size)
+ val fields = new SArray[String](clusterCenter.size)
val dataDictionary = new DataDictionary
val miningSchema = new MiningSchema
val comparisonMeasure = new ComparisonMeasure()
@@ -55,7 +55,7 @@ private[mllib] class KMeansPMMLModelExport(model: KMeansModel) extends PMMLModel
.setNumberOfClusters(model.clusterCenters.length)
for (i <- 0 until clusterCenter.size) {
- fields(i) = FieldName.create("field_" + i)
+ fields(i) = "field_" + i
dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
miningSchema
.addMiningFields(new MiningField(fields(i))
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala
index 838b0cefe625e..56d623e655801 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala
@@ -23,7 +23,7 @@ import java.util.Locale
import scala.beans.BeanProperty
-import org.dmg.pmml.{Application, Header, PMML, Timestamp}
+import org.dmg.pmml.{Application, Header, PMML, Timestamp, Version}
private[mllib] trait PMMLModelExport {
@@ -44,6 +44,6 @@ private[mllib] trait PMMLModelExport {
val header = new Header()
.setApplication(app)
.setTimestamp(timestamp)
- new PMML("4.2", header, null)
+ new PMML(Version.PMML_4_4.getVersion(), header, null)
}
}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index 7ccd3494bd32b..46bddc2d93a85 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -1165,7 +1165,7 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe
assert(fields(0).getOpType() == OpType.CONTINUOUS)
val pmmlRegressionModel = pmml.getModels().get(0).asInstanceOf[PMMLRegressionModel]
val pmmlPredictors = pmmlRegressionModel.getRegressionTables.get(0).getNumericPredictors
- val pmmlWeights = pmmlPredictors.asScala.map(_.getCoefficient()).toList
+ val pmmlWeights = pmmlPredictors.asScala.map(_.getCoefficient().doubleValue()).toList
assert(pmmlWeights(0) ~== model.coefficients(0) relTol 1E-3)
assert(pmmlWeights(1) ~== model.coefficients(1) relTol 1E-3)
}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/PMMLUtils.scala b/mllib/src/test/scala/org/apache/spark/ml/util/PMMLUtils.scala
index 5f1091e438db0..fdd0ce54c460f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/util/PMMLUtils.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/PMMLUtils.scala
@@ -20,7 +20,7 @@ import java.io.ByteArrayInputStream
import java.nio.charset.StandardCharsets
import org.dmg.pmml.PMML
-import org.jpmml.model.{JAXBUtil, SAXUtil}
+import org.jpmml.model.{JAXBSerializer, SAXUtil}
import org.jpmml.model.filters.ImportFilter
/**
@@ -37,6 +37,7 @@ private[spark] object PMMLUtils {
val transformed = SAXUtil.createFilteredSource(
new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8)),
new ImportFilter())
- JAXBUtil.unmarshalPMML(transformed)
+ val jaxbSerializer = new JAXBSerializer()
+ jaxbSerializer.unmarshal(transformed).asInstanceOf[PMML]
}
}
diff --git a/pom.xml b/pom.xml
index a78fcc07aa506..0102631dee034 100644
--- a/pom.xml
+++ b/pom.xml
@@ -571,7 +571,7 @@
org.jpmml
pmml-model
- 1.4.8
+ 1.7.1
provided
@@ -599,32 +599,24 @@
org.glassfish.jaxb
jaxb-runtime
- 2.3.2
+ 4.0.5
compile
-
-
- com.sun.xml.fastinfoset
- FastInfoset
-
org.glassfish.jaxb
txw2
- org.jvnet.staxex
- stax-ex
-
-
-
- jakarta.activation
- jakarta.activation-api
+ org.eclipse.angus
+ angus-activation
+
+ jakarta.xml.bind
+ jakarta.xml.bind-api
+ 4.0.2
+
org.apache.commons
commons-lang3
@@ -1061,13 +1053,6 @@
org.glassfish.jersey.core
jersey-server
${jersey.version}
-
-
-
- jakarta.xml.bind
- jakarta.xml.bind-api
-
-
org.glassfish.jersey.core