From 7f17c6e1597849d812d041e5e8124448b8dc60c7 Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Thu, 25 Oct 2018 20:07:22 +0200 Subject: [PATCH 1/2] [SPARK-25838][ML] Remove formatVersion from Saveable --- .../spark/mllib/classification/LogisticRegression.scala | 2 -- .../apache/spark/mllib/classification/NaiveBayes.scala | 2 -- .../scala/org/apache/spark/mllib/classification/SVM.scala | 2 -- .../spark/mllib/clustering/BisectingKMeansModel.scala | 2 -- .../spark/mllib/clustering/GaussianMixtureModel.scala | 2 -- .../org/apache/spark/mllib/clustering/KMeansModel.scala | 2 -- .../org/apache/spark/mllib/clustering/LDAModel.scala | 4 ---- .../spark/mllib/clustering/PowerIterationClustering.scala | 2 -- .../org/apache/spark/mllib/feature/ChiSqSelector.scala | 2 -- .../scala/org/apache/spark/mllib/feature/Word2Vec.scala | 2 -- .../main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala | 2 -- .../scala/org/apache/spark/mllib/fpm/PrefixSpan.scala | 2 -- .../mllib/recommendation/MatrixFactorizationModel.scala | 2 -- .../spark/mllib/regression/IsotonicRegression.scala | 2 -- .../scala/org/apache/spark/mllib/regression/Lasso.scala | 2 -- .../apache/spark/mllib/regression/LinearRegression.scala | 2 -- .../apache/spark/mllib/regression/RidgeRegression.scala | 2 -- .../apache/spark/mllib/tree/model/DecisionTreeModel.scala | 4 ---- .../spark/mllib/tree/model/treeEnsembleModels.scala | 8 -------- .../scala/org/apache/spark/mllib/util/modelSaveLoad.scala | 3 --- 20 files changed, 51 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala index 4b650000736e..d86aa01c9195 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala @@ -163,8 +163,6 @@ class LogisticRegressionModel @Since("1.3.0") ( numFeatures, numClasses, weights, intercept, threshold) } - override protected def formatVersion: String = "1.0" - override def toString: String = { s"${super.toString}, numClasses = ${numClasses}, threshold = ${threshold.getOrElse("None")}" } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala index 9e8774732efe..2caf79e5f520 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala @@ -170,8 +170,6 @@ class NaiveBayesModel private[spark] ( val data = NaiveBayesModel.SaveLoadV2_0.Data(labels, pi, theta, modelType) NaiveBayesModel.SaveLoadV2_0.save(sc, path, data) } - - override protected def formatVersion: String = "2.0" } @Since("1.3.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala index 5fb04ed0ee9a..087c2c263983 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala @@ -85,8 +85,6 @@ class SVMModel @Since("1.1.0") ( numFeatures = weights.size, numClasses = 2, weights, intercept, threshold) } - override protected def formatVersion: String = "1.0" - override def toString: String = { s"${super.toString}, numClasses = 2, threshold = ${threshold.getOrElse("None")}" } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala index 9d115afcea75..8f5025fd1973 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala @@ -111,8 +111,6 @@ class BisectingKMeansModel private[clustering] ( override def save(sc: SparkContext, path: String): Unit = { BisectingKMeansModel.SaveLoadV1_0.save(sc, this, path) } - - override protected def formatVersion: String = "1.0" } @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala index 1933d5499c3b..5d2ecf33129c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala @@ -48,8 +48,6 @@ class GaussianMixtureModel @Since("1.3.0") ( require(weights.length == gaussians.length, "Length of weight and Gaussian arrays must match") - override protected def formatVersion = "1.0" - @Since("1.4.0") override def save(sc: SparkContext, path: String): Unit = { GaussianMixtureModel.SaveLoadV1_0.save(sc, path, weights, gaussians) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala index d5c8188144ce..fb2200e1e491 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala @@ -112,8 +112,6 @@ class KMeansModel (@Since("1.0.0") val clusterCenters: Array[Vector], override def save(sc: SparkContext, path: String): Unit = { KMeansModel.SaveLoadV2_0.save(sc, this, path) } - - override protected def formatVersion: String = "1.0" } @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala index f915062d7738..f3c26778ddd1 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala @@ -216,8 +216,6 @@ class LocalLDAModel private[spark] ( }.toArray } - override protected def formatVersion = "1.0" - /** * Random seed for cluster initialization. */ @@ -835,8 +833,6 @@ class DistributedLDAModel private[clustering] ( // TODO: // override def topicDistributions(documents: RDD[(Long, Vector)]): RDD[(Long, Vector)] = ??? - override protected def formatVersion = "1.0" - @Since("1.5.0") override def save(sc: SparkContext, path: String): Unit = { // Note: This intentionally does not save checkpointFiles. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala index 9444f29a91ed..46529f7457b7 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala @@ -48,8 +48,6 @@ class PowerIterationClusteringModel @Since("1.3.0") ( override def save(sc: SparkContext, path: String): Unit = { PowerIterationClusteringModel.SaveLoadV1_0.save(sc, this, path) } - - override protected def formatVersion: String = "1.0" } @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala index aa78e91b679a..fc0a45c6af53 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala @@ -113,8 +113,6 @@ class ChiSqSelectorModel @Since("1.3.0") ( override def save(sc: SparkContext, path: String): Unit = { ChiSqSelectorModel.SaveLoadV1_0.save(sc, this, path) } - - override protected def formatVersion: String = "1.0" } object ChiSqSelectorModel extends Loader[ChiSqSelectorModel] { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index b8c306d86bac..cc1ae324cc8d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -511,8 +511,6 @@ class Word2VecModel private[spark] ( this(Word2VecModel.buildWordIndex(model), Word2VecModel.buildWordVectors(model)) } - override protected def formatVersion = "1.0" - @Since("1.4.0") def save(sc: SparkContext, path: String): Unit = { Word2VecModel.SaveLoadV1_0.save(sc, path, getVectors) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala index 3a1bc35186dc..2b6b997f9ff1 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala @@ -84,8 +84,6 @@ class FPGrowthModel[Item: ClassTag] @Since("2.4.0") ( override def save(sc: SparkContext, path: String): Unit = { FPGrowthModel.SaveLoadV1_0.save(this, path) } - - override protected val formatVersion: String = "1.0" } @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala index 7aed2f3bd8a6..dd44c3af95b7 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala @@ -621,8 +621,6 @@ class PrefixSpanModel[Item] @Since("1.5.0") ( override def save(sc: SparkContext, path: String): Unit = { PrefixSpanModel.SaveLoadV1_0.save(this, path) } - - override protected val formatVersion: String = "1.0" } @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala index 7b49d4d0812f..e5e82d19f1cb 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala @@ -196,8 +196,6 @@ class MatrixFactorizationModel @Since("0.8.0") ( .map(t => Rating(t._1, product, t._2)) } - protected override val formatVersion: String = "1.0" - /** * Save this model to the given path. * diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala index 8347ccad6b71..649f9816e6a5 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala @@ -160,8 +160,6 @@ class IsotonicRegressionModel @Since("1.3.0") ( override def save(sc: SparkContext, path: String): Unit = { IsotonicRegressionModel.SaveLoadV1_0.save(sc, path, boundaries, predictions, isotonic) } - - override protected def formatVersion: String = "1.0" } @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala index cef1b4f51b84..ead9f5b30037 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala @@ -51,8 +51,6 @@ class LassoModel @Since("1.1.0") ( override def save(sc: SparkContext, path: String): Unit = { GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept) } - - override protected def formatVersion: String = "1.0" } @Since("1.3.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala index 60262fdc497a..cb08216fbf69 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala @@ -51,8 +51,6 @@ class LinearRegressionModel @Since("1.1.0") ( override def save(sc: SparkContext, path: String): Unit = { GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept) } - - override protected def formatVersion: String = "1.0" } @Since("1.3.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala index 52977ac4f062..43c3154dd053 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala @@ -52,8 +52,6 @@ class RidgeRegressionModel @Since("1.1.0") ( override def save(sc: SparkContext, path: String): Unit = { GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept) } - - override protected def formatVersion: String = "1.0" } @Since("1.3.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala index 27618e122aef..9983ca7dc5e8 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala @@ -126,15 +126,11 @@ class DecisionTreeModel @Since("1.0.0") ( override def save(sc: SparkContext, path: String): Unit = { DecisionTreeModel.SaveLoadV1_0.save(sc, path, this) } - - override protected def formatVersion: String = DecisionTreeModel.formatVersion } @Since("1.3.0") object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging { - private[spark] def formatVersion: String = "1.0" - private[tree] object SaveLoadV1_0 { def thisFormatVersion: String = "1.0" diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala index b1e82656a240..28cc55912688 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala @@ -66,15 +66,11 @@ class RandomForestModel @Since("1.2.0") ( TreeEnsembleModel.SaveLoadV1_0.save(sc, path, this, RandomForestModel.SaveLoadV1_0.thisClassName) } - - override protected def formatVersion: String = RandomForestModel.formatVersion } @Since("1.3.0") object RandomForestModel extends Loader[RandomForestModel] { - private[mllib] def formatVersion: String = TreeEnsembleModel.SaveLoadV1_0.thisFormatVersion - /** * * @param sc Spark context used for loading model files. @@ -170,8 +166,6 @@ class GradientBoostedTreesModel @Since("1.2.0") ( broadcastTrees.destroy(blocking = false) evaluation.toArray } - - override protected def formatVersion: String = GradientBoostedTreesModel.formatVersion } /** @@ -235,8 +229,6 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] { newPredError } - private[mllib] def formatVersion: String = TreeEnsembleModel.SaveLoadV1_0.thisFormatVersion - /** * @param sc Spark context used for loading model files. * @param path Path specifying the directory to which the model was saved. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala index da0eb04764c5..e8889bfd7c38 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala @@ -54,9 +54,6 @@ trait Saveable { @Since("1.3.0") def save(sc: SparkContext, path: String): Unit - /** Current version of model save/load format. */ - protected def formatVersion: String - } /** From 492f6edd2d3007bc2b95327cbf1cdce570c64bff Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Fri, 26 Oct 2018 10:37:22 +0200 Subject: [PATCH 2/2] fix mima --- project/MimaExcludes.scala | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 350d8ad6942f..3bbc562b30b4 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -36,6 +36,26 @@ object MimaExcludes { // Exclude rules for 3.0.x lazy val v30excludes = v24excludes ++ Seq( + // [SPARK-25838] Remove formatVersion from Saveable + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.DistributedLDAModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.LocalLDAModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.BisectingKMeansModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.KMeansModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.PowerIterationClusteringModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.GaussianMixtureModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.recommendation.MatrixFactorizationModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.feature.ChiSqSelectorModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.feature.Word2VecModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.classification.SVMModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.classification.LogisticRegressionModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.classification.NaiveBayesModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.util.Saveable.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.fpm.FPGrowthModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.fpm.PrefixSpanModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.regression.IsotonicRegressionModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.regression.RidgeRegressionModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.regression.LassoModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.regression.LinearRegressionModel.formatVersion"), // [SPARK-25737] Remove JavaSparkContextVarargsWorkaround ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.api.java.JavaSparkContext"), ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.api.java.JavaSparkContext.union"),