-
Notifications
You must be signed in to change notification settings - Fork 29.2k
[SPARK-20501] [ML] ML 2.2 QA: New Scala APIs, docs #17934
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -51,6 +51,7 @@ private[classification] trait LinearSVCParams extends ClassifierParams with HasR | |
| * Linear SVM Classifier</a> | ||
| * | ||
| * This binary classifier optimizes the Hinge Loss using the OWLQN optimizer. | ||
| * Only supports L2 regularization currently. | ||
| * | ||
| */ | ||
| @Since("2.2.0") | ||
|
|
@@ -131,7 +132,6 @@ class LinearSVC @Since("2.2.0") ( | |
| */ | ||
| @Since("2.2.0") | ||
| def setThreshold(value: Double): this.type = set(threshold, value) | ||
| setDefault(threshold -> 0.0) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why remove this?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typo, reverted, thanks. |
||
|
|
||
| /** | ||
| * Suggested depth for treeAggregate (greater than or equal to 2). | ||
|
|
@@ -148,7 +148,7 @@ class LinearSVC @Since("2.2.0") ( | |
| @Since("2.2.0") | ||
| override def copy(extra: ParamMap): LinearSVC = defaultCopy(extra) | ||
|
|
||
| override protected[classification] def train(dataset: Dataset[_]): LinearSVCModel = { | ||
| override protected def train(dataset: Dataset[_]): LinearSVCModel = { | ||
| val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol)) | ||
| val instances: RDD[Instance] = | ||
| dataset.select(col($(labelCol)), w, col($(featuresCol))).rdd.map { | ||
|
|
@@ -264,7 +264,7 @@ object LinearSVC extends DefaultParamsReadable[LinearSVC] { | |
|
|
||
| /** | ||
| * :: Experimental :: | ||
| * SVM Model trained by [[LinearSVC]] | ||
| * Linear SVM Model trained by [[LinearSVC]] | ||
| */ | ||
| @Since("2.2.0") | ||
| @Experimental | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -102,7 +102,8 @@ private[feature] trait ImputerParams extends Params with HasInputCols { | |
| * computing median, DataFrameStatFunctions.approxQuantile is used with a relative error of 0.001. | ||
| */ | ||
| @Experimental | ||
| class Imputer @Since("2.2.0")(override val uid: String) | ||
| @Since("2.2.0") | ||
| class Imputer @Since("2.2.0") (@Since("2.2.0") override val uid: String) | ||
| extends Estimator[ImputerModel] with ImputerParams with DefaultParamsWritable { | ||
|
|
||
| @Since("2.2.0") | ||
|
|
@@ -165,8 +166,8 @@ class Imputer @Since("2.2.0")(override val uid: String) | |
| object Imputer extends DefaultParamsReadable[Imputer] { | ||
|
|
||
| /** strategy names that Imputer currently supports. */ | ||
| private[ml] val mean = "mean" | ||
| private[ml] val median = "median" | ||
| private[feature] val mean = "mean" | ||
| private[feature] val median = "median" | ||
|
|
||
| @Since("2.2.0") | ||
| override def load(path: String): Imputer = super.load(path) | ||
|
|
@@ -180,9 +181,10 @@ object Imputer extends DefaultParamsReadable[Imputer] { | |
| * which are used to replace the missing values in the input DataFrame. | ||
| */ | ||
| @Experimental | ||
| @Since("2.2.0") | ||
| class ImputerModel private[ml]( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: should be a space between |
||
| override val uid: String, | ||
| val surrogateDF: DataFrame) | ||
| @Since("2.2.0") override val uid: String, | ||
| @Since("2.2.0") val surrogateDF: DataFrame) | ||
| extends Model[ImputerModel] with ImputerParams with MLWritable { | ||
|
|
||
| import ImputerModel._ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -146,7 +146,7 @@ object StringIndexer extends DefaultParamsReadable[StringIndexer] { | |
| * This is a temporary fix for the case when target labels do not exist during prediction. | ||
| */ | ||
| @Since("1.4.0") | ||
| class StringIndexerModel ( | ||
| class StringIndexerModel private[ml] ( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, making this private now will break user code if it is used, no? It's unfortunate that it wasn't private before, but do we want to do this?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We make this public by mistake, so I think we should fix this bug. AFAIK, this is the only model whose constructor is public, so there is little chance to break user code. And I think it makes sense to break user code by fixing bug. Thanks.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought this was made public on purpose. StringIndexerModel is one case where it makes sense for users to create it manually (if they already have a list of the possible Strings).
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, I reverted back. Thanks. |
||
| @Since("1.4.0") override val uid: String, | ||
| @Since("1.5.0") val labels: Array[String]) | ||
| extends Model[StringIndexerModel] with StringIndexerBase with MLWritable { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is this something should be mentioned in R too?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@felixcheung Yeah, it should be. However, since SparkR exposed more MLlib APIs during 2.2 releasing cycle, I'm preparing a separate PR to audit new SparkR MLlib APIs which would include this update. Thanks.