From 49227229cadfc8d1f39bd2bb36cc59ab68accbfd Mon Sep 17 00:00:00 2001 From: Octavian Geagla Date: Thu, 12 Feb 2015 19:24:45 -0700 Subject: [PATCH 1/9] [SPARK-5726] [MLLIB] Hadamard Vector Product Transformer --- docs/mllib-feature-extraction.md | 73 ++++++++++++++++ .../spark/ml/feature/HadamardProductTF.scala | 45 ++++++++++ .../spark/mllib/feature/HadamardProduct.scala | 62 ++++++++++++++ .../mllib/feature/HadamardProductSuite.scala | 83 +++++++++++++++++++ 4 files changed, 263 insertions(+) create mode 100644 mllib/src/main/scala/org/apache/spark/ml/feature/HadamardProductTF.scala create mode 100644 mllib/src/main/scala/org/apache/spark/mllib/feature/HadamardProduct.scala create mode 100644 mllib/src/test/scala/org/apache/spark/mllib/feature/HadamardProductSuite.scala diff --git a/docs/mllib-feature-extraction.md b/docs/mllib-feature-extraction.md index 80842b27effd..ced0b57dd907 100644 --- a/docs/mllib-feature-extraction.md +++ b/docs/mllib-feature-extraction.md @@ -477,3 +477,76 @@ sc.stop(); +## HadamardProduct + +HadamardProduct scales individual vector samples by a provided weighting vector component-wise. This represents the [Hadamard product](https://en.wikipedia.org/wiki/Hadamard_product_%28matrices%29) between the input vector, `v` and weighting vector, `w`, to yield a result vector. + +`\[ \begin{pmatrix} +v_1 \\ +\vdots \\ +v_N +\end{pmatrix} \circ \begin{pmatrix} + w_1 \\ + \vdots \\ + w_N + \end{pmatrix} += \begin{pmatrix} + v_1 w_1 \\ + \vdots \\ + v_N w_N + \end{pmatrix} +\]` + +[`HadamardProduct`](api/scala/index.html#org.apache.spark.mllib.feature.HadamardProduct) has the following parameter in the constructor: + +* `w` Vector, the scaling vector. + +`HadamardProduct` implements [`VectorTransformer`](api/scala/index.html#org.apache.spark.mllib.feature.VectorTransformer) which can apply the weighting on a `Vector` to produce a transformed `Vector` or on an `RDD[Vector]` to produce a transformed `RDD[Vector]`. + +### Example + +This example below demonstrates how to load a simple vectors file, extract a set of vectors, then weight those vectors using a weighting vector value. + + +
+
+{% highlight scala %} +import org.apache.spark.SparkContext._ +import org.apache.spark.mllib.feature.HadamardProduct +import org.apache.spark.mllib.linalg.Vectors + +//load and parse the data +val data = sc.textFile("data/mllib/kmeans_data.txt") +val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble))) + +val weightingVector = Vectors.dense(0.0, 1.0, 2.0) +val scaler = new HadamardProduct(weightingVector) + +//same results: +val weightedData = scaler.transform(parsedData) +val weightedData2 = parsedData.map(x => scaler.transform(x)) + +{% endhighlight %} +
+ +
+{% highlight python %} +from pyspark.mllib.linalg import Vectors +from pyspark.mllib.feature import HadamardProduct + +# Load and parse the data +data = sc.textFile("data/mllib/kmeans_data.txt") +parsedData = data.map(lambda line: array([float(x) for x in line.split(' ')])) + +weightingVector = Vectors.dense(0.0, 1.0, 2.0) +scaler = HadamardProduct(weightingVector) + +# Same results: +weightedData = scaler.transform(parsedData) +weightedData2 = parsedData.map(lambda x: scaler.transform(x)) + +{% endhighlight %} +
+
+ + diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HadamardProductTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HadamardProductTF.scala new file mode 100644 index 000000000000..390d037c2fe8 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HadamardProductTF.scala @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.feature + +import org.apache.spark.annotation.AlphaComponent +import org.apache.spark.ml.UnaryTransformer +import org.apache.spark.ml.param.{Param, ParamMap} +import org.apache.spark.mllib.feature.HadamardProduct +import org.apache.spark.mllib.linalg.{Vector, VectorUDT} +import org.apache.spark.sql.types.DataType + +/** + * :: AlphaComponent + * Maps a vector to the hadamard product of it and a reference vector. + */ +@AlphaComponent +class HadamardProductTF extends UnaryTransformer[Vector, Vector, HadamardProductTF] { + + /** the vector to multiply with input vectors */ + val scalingVec : Param[Vector] = new Param(this, "scalingVector", "vector for hadamard product") + def setScalingVec(value: Vector) = set(scalingVec, value) + def getScalingVec: Vector = get(scalingVec) + + override protected def createTransformFunc(paramMap: ParamMap): Vector => Vector = { + val hadScaler = new HadamardProduct(paramMap(scalingVec)) + hadScaler.transform + } + + override protected def outputDataType: DataType = new VectorUDT() +} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/HadamardProduct.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/HadamardProduct.scala new file mode 100644 index 000000000000..2c6f0b7d5bca --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/HadamardProduct.scala @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.feature + +import org.apache.spark.annotation.Experimental +import org.apache.spark.mllib.linalg._ + +/** + * :: Experimental :: + * Component-wise scaling of dense vectors by a provided vector's components. + * + * @param scalingVector The values used to scale the reference vector's individual components. + */ +@Experimental +class HadamardProduct(val scalingVector: Vector) extends VectorTransformer { + + /** + * Does the hadamard product transformation. + * + * @param vector vector to be transformed. + * @return transformed vector. + */ + override def transform(vector: Vector): Vector = { + require(vector.size == scalingVector.size) + vector match { + case dv: DenseVector => + val values: Array[Double] = dv.values.clone() + val dim = scalingVector.size + var i = 0 + while(i < dim) { + values(i) *= scalingVector(i) + i+=1 + } + Vectors.dense(values) + case SparseVector(size, indices, vs) => + val values = vs.clone() + val dim = values.size + var i = 0 + while (i < dim) { + values(i) *= scalingVector.apply(indices(i)) + i += 1 + } + Vectors.sparse(size, indices, values) + case v => throw new IllegalArgumentException("Does not support vector type " + v.getClass) + } + } +} diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/HadamardProductSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/HadamardProductSuite.scala new file mode 100644 index 000000000000..c277f4309832 --- /dev/null +++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/HadamardProductSuite.scala @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.feature + +import org.apache.spark.mllib.linalg.{SparseVector, DenseVector, Vector, Vectors} +import org.apache.spark.mllib.util.MLlibTestSparkContext +import org.scalatest.FunSuite +import org.apache.spark.mllib.util.TestingUtils._ + +class HadamardProductSuite extends FunSuite with MLlibTestSparkContext{ + + val denseData = Array( + Vectors.dense(1.0, 1.0, 0.0, 0.0), + Vectors.dense(1.0, 2.0, -3.0, 0.0), + Vectors.dense(1.0, 3.0, 0.0, 0.0), + Vectors.dense(1.0, 4.0, 1.9, -9.0), + Vectors.dense(1.0, 5.0, 0.0, 0.0) + ) + + val sparseData = Array( + Vectors.sparse(3, Seq((0, -2.0), (1, 2.3))), + Vectors.sparse(3, Seq((1, -1.0), (2, -3.0))), + Vectors.sparse(3, Seq((1, -5.1))), + Vectors.sparse(3, Seq((0, 3.8), (2, 1.9))), + Vectors.sparse(3, Seq((0, 1.7), (1, -0.6))), + Vectors.sparse(3, Seq((1, 1.9))) + ) + + val scalingVector = Vectors.dense(2.0, 0.5, 0.0, 0.25) + + test("hadamard product should properly apply vector to dense data set") { + + val scaler = new HadamardProduct(scalingVector) + val scaledData = scaler.transform(sc.makeRDD(denseData)) + + val scaledVecs = scaledData.collect() + + val fourthVec = scaledVecs.apply(3).toArray + + assert(fourthVec.apply(0) === 2.0, "product by 2.0 should have been applied") + assert(fourthVec.apply(1) === 2.0, "product by 0.5 should have been applied") + assert(fourthVec.apply(2) === 0.0, "product by 0.0 should have been applied") + assert(fourthVec.apply(3) === -2.25, "product by 0.25 should have been applied") + } + + test("hadamard product should properly apply vector to sparse data set") { + + val dataRDD = sc.parallelize(sparseData, 3) + + val scalingVec = Vectors.dense(1.0, 0.0, 0.5) + + val hadScaler = new HadamardProduct(scalingVec) + + val data2 = sparseData.map(hadScaler.transform) + val data2RDD = hadScaler.transform(dataRDD) + + assert((sparseData, data2, data2RDD.collect()).zipped.forall { + case (v1: DenseVector, v2: DenseVector, v3: DenseVector) => true + case (v1: SparseVector, v2: SparseVector, v3: SparseVector) => true + case _ => false + }, "The vector type should be preserved after hadamard product") + + assert((data2, data2RDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5)) + + assert(data2(0) ~== Vectors.sparse(3, Seq((0, -2.0), (1, 0.0))) absTol 1E-5) + assert(data2(1) ~== Vectors.sparse(3, Seq((1, 0.0), (2, -1.5))) absTol 1E-5) + } +} From cb520e69fd62a77b7d1c33d4d7a093c2a7b7d57a Mon Sep 17 00:00:00 2001 From: Octavian Geagla Date: Fri, 13 Feb 2015 11:25:04 -0700 Subject: [PATCH 2/9] [SPARK-5726] [MLLIB] Rename HadamardProduct to ElementwiseProduct --- docs/mllib-feature-extraction.md | 32 +++++++++---------- ...uctTF.scala => ElementwiseProductTF.scala} | 8 ++--- ...Product.scala => ElementwiseProduct.scala} | 4 +-- ...te.scala => ElementwiseProductSuite.scala} | 20 ++++++------ 4 files changed, 32 insertions(+), 32 deletions(-) rename mllib/src/main/scala/org/apache/spark/ml/feature/{HadamardProductTF.scala => ElementwiseProductTF.scala} (87%) rename mllib/src/main/scala/org/apache/spark/mllib/feature/{HadamardProduct.scala => ElementwiseProduct.scala} (92%) rename mllib/src/test/scala/org/apache/spark/mllib/feature/{HadamardProductSuite.scala => ElementwiseProductSuite.scala} (80%) diff --git a/docs/mllib-feature-extraction.md b/docs/mllib-feature-extraction.md index ced0b57dd907..b71c1512ebb2 100644 --- a/docs/mllib-feature-extraction.md +++ b/docs/mllib-feature-extraction.md @@ -477,9 +477,9 @@ sc.stop(); -## HadamardProduct +## ElementwiseProduct -HadamardProduct scales individual vector samples by a provided weighting vector component-wise. This represents the [Hadamard product](https://en.wikipedia.org/wiki/Hadamard_product_%28matrices%29) between the input vector, `v` and weighting vector, `w`, to yield a result vector. +ElementwiseProduct multiplies individual vector samples by a provided weighting vector component-wise. This represents the [Hadamard product](https://en.wikipedia.org/wiki/Hadamard_product_%28matrices%29) between the input vector, `v` and transforming vector, `w`, to yield a result vector. `\[ \begin{pmatrix} v_1 \\ @@ -497,34 +497,34 @@ v_N \end{pmatrix} \]` -[`HadamardProduct`](api/scala/index.html#org.apache.spark.mllib.feature.HadamardProduct) has the following parameter in the constructor: +[`ElementwiseProduct`](api/scala/index.html#org.apache.spark.mllib.feature.ElementwiseProduct) has the following parameter in the constructor: -* `w` Vector, the scaling vector. +* `w` Vector, the transforming vector. -`HadamardProduct` implements [`VectorTransformer`](api/scala/index.html#org.apache.spark.mllib.feature.VectorTransformer) which can apply the weighting on a `Vector` to produce a transformed `Vector` or on an `RDD[Vector]` to produce a transformed `RDD[Vector]`. +`ElementwiseProduct` implements [`VectorTransformer`](api/scala/index.html#org.apache.spark.mllib.feature.VectorTransformer) which can apply the weighting on a `Vector` to produce a transformed `Vector` or on an `RDD[Vector]` to produce a transformed `RDD[Vector]`. ### Example -This example below demonstrates how to load a simple vectors file, extract a set of vectors, then weight those vectors using a weighting vector value. +This example below demonstrates how to load a simple vectors file, extract a set of vectors, then transform those vectors using a transforming vector value.
{% highlight scala %} import org.apache.spark.SparkContext._ -import org.apache.spark.mllib.feature.HadamardProduct +import org.apache.spark.mllib.feature.ElementwiseProduct import org.apache.spark.mllib.linalg.Vectors //load and parse the data val data = sc.textFile("data/mllib/kmeans_data.txt") val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble))) -val weightingVector = Vectors.dense(0.0, 1.0, 2.0) -val scaler = new HadamardProduct(weightingVector) +val transformingVector = Vectors.dense(0.0, 1.0, 2.0) +val transformer = new ElementwiseProduct(transformingVector) //same results: -val weightedData = scaler.transform(parsedData) -val weightedData2 = parsedData.map(x => scaler.transform(x)) +val transformedData = transformer.transform(parsedData) +val transformedData2 = parsedData.map(x => transformer.transform(x)) {% endhighlight %}
@@ -532,18 +532,18 @@ val weightedData2 = parsedData.map(x => scaler.transform(x))
{% highlight python %} from pyspark.mllib.linalg import Vectors -from pyspark.mllib.feature import HadamardProduct +from pyspark.mllib.feature import ElementwiseProduct # Load and parse the data data = sc.textFile("data/mllib/kmeans_data.txt") parsedData = data.map(lambda line: array([float(x) for x in line.split(' ')])) -weightingVector = Vectors.dense(0.0, 1.0, 2.0) -scaler = HadamardProduct(weightingVector) +transformingVector = Vectors.dense(0.0, 1.0, 2.0) +transformer = ElementwiseProduct(transformingVector) # Same results: -weightedData = scaler.transform(parsedData) -weightedData2 = parsedData.map(lambda x: scaler.transform(x)) +transformedData = transformer.transform(parsedData) +transformedData2 = parsedData.map(lambda x: transformer.transform(x)) {% endhighlight %}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HadamardProductTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProductTF.scala similarity index 87% rename from mllib/src/main/scala/org/apache/spark/ml/feature/HadamardProductTF.scala rename to mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProductTF.scala index 390d037c2fe8..c3b12382c814 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/HadamardProductTF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProductTF.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml.feature import org.apache.spark.annotation.AlphaComponent import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param.{Param, ParamMap} -import org.apache.spark.mllib.feature.HadamardProduct +import org.apache.spark.mllib.feature.ElementwiseProduct import org.apache.spark.mllib.linalg.{Vector, VectorUDT} import org.apache.spark.sql.types.DataType @@ -29,7 +29,7 @@ import org.apache.spark.sql.types.DataType * Maps a vector to the hadamard product of it and a reference vector. */ @AlphaComponent -class HadamardProductTF extends UnaryTransformer[Vector, Vector, HadamardProductTF] { +class ElementwiseProductTF extends UnaryTransformer[Vector, Vector, ElementwiseProductTF] { /** the vector to multiply with input vectors */ val scalingVec : Param[Vector] = new Param(this, "scalingVector", "vector for hadamard product") @@ -37,8 +37,8 @@ class HadamardProductTF extends UnaryTransformer[Vector, Vector, HadamardProduct def getScalingVec: Vector = get(scalingVec) override protected def createTransformFunc(paramMap: ParamMap): Vector => Vector = { - val hadScaler = new HadamardProduct(paramMap(scalingVec)) - hadScaler.transform + val elemScaler = new ElementwiseProduct(paramMap(scalingVec)) + elemScaler.transform } override protected def outputDataType: DataType = new VectorUDT() diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/HadamardProduct.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala similarity index 92% rename from mllib/src/main/scala/org/apache/spark/mllib/feature/HadamardProduct.scala rename to mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala index 2c6f0b7d5bca..17163df0e950 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/HadamardProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala @@ -22,12 +22,12 @@ import org.apache.spark.mllib.linalg._ /** * :: Experimental :: - * Component-wise scaling of dense vectors by a provided vector's components. + * Element-wise product of dense vectors by a provided vector's components. * * @param scalingVector The values used to scale the reference vector's individual components. */ @Experimental -class HadamardProduct(val scalingVector: Vector) extends VectorTransformer { +class ElementwiseProduct(val scalingVector: Vector) extends VectorTransformer { /** * Does the hadamard product transformation. diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/HadamardProductSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala similarity index 80% rename from mllib/src/test/scala/org/apache/spark/mllib/feature/HadamardProductSuite.scala rename to mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala index c277f4309832..57840e3b255e 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/feature/HadamardProductSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala @@ -22,7 +22,7 @@ import org.apache.spark.mllib.util.MLlibTestSparkContext import org.scalatest.FunSuite import org.apache.spark.mllib.util.TestingUtils._ -class HadamardProductSuite extends FunSuite with MLlibTestSparkContext{ +class ElementwiseProductSuite extends FunSuite with MLlibTestSparkContext{ val denseData = Array( Vectors.dense(1.0, 1.0, 0.0, 0.0), @@ -43,14 +43,14 @@ class HadamardProductSuite extends FunSuite with MLlibTestSparkContext{ val scalingVector = Vectors.dense(2.0, 0.5, 0.0, 0.25) - test("hadamard product should properly apply vector to dense data set") { + test("elementwise (hadamard) product should properly apply vector to dense data set") { - val scaler = new HadamardProduct(scalingVector) - val scaledData = scaler.transform(sc.makeRDD(denseData)) + val transformer = new ElementwiseProduct(scalingVector) + val transformedData = transformer.transform(sc.makeRDD(denseData)) - val scaledVecs = scaledData.collect() + val transformedVecs = transformedData.collect() - val fourthVec = scaledVecs.apply(3).toArray + val fourthVec = transformedVecs.apply(3).toArray assert(fourthVec.apply(0) === 2.0, "product by 2.0 should have been applied") assert(fourthVec.apply(1) === 2.0, "product by 0.5 should have been applied") @@ -58,16 +58,16 @@ class HadamardProductSuite extends FunSuite with MLlibTestSparkContext{ assert(fourthVec.apply(3) === -2.25, "product by 0.25 should have been applied") } - test("hadamard product should properly apply vector to sparse data set") { + test("elementwise (hadamard) product should properly apply vector to sparse data set") { val dataRDD = sc.parallelize(sparseData, 3) val scalingVec = Vectors.dense(1.0, 0.0, 0.5) - val hadScaler = new HadamardProduct(scalingVec) + val transformer = new ElementwiseProduct(scalingVec) - val data2 = sparseData.map(hadScaler.transform) - val data2RDD = hadScaler.transform(dataRDD) + val data2 = sparseData.map(transformer.transform) + val data2RDD = transformer.transform(dataRDD) assert((sparseData, data2, data2RDD.collect()).zipped.forall { case (v1: DenseVector, v2: DenseVector, v3: DenseVector) => true From e436896dd10ec03aca92aad44386223aa54eef84 Mon Sep 17 00:00:00 2001 From: Octavian Geagla Date: Fri, 13 Feb 2015 11:50:59 -0700 Subject: [PATCH 3/9] [SPARK-5726] [MLLIB] Remove 'TF' from 'ElementwiseProductTF' --- ...{ElementwiseProductTF.scala => ElementwiseProduct.scala} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename mllib/src/main/scala/org/apache/spark/ml/feature/{ElementwiseProductTF.scala => ElementwiseProduct.scala} (88%) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProductTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala similarity index 88% rename from mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProductTF.scala rename to mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala index c3b12382c814..7a5fedf62904 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProductTF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml.feature import org.apache.spark.annotation.AlphaComponent import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param.{Param, ParamMap} -import org.apache.spark.mllib.feature.ElementwiseProduct +import org.apache.spark.mllib.feature import org.apache.spark.mllib.linalg.{Vector, VectorUDT} import org.apache.spark.sql.types.DataType @@ -29,7 +29,7 @@ import org.apache.spark.sql.types.DataType * Maps a vector to the hadamard product of it and a reference vector. */ @AlphaComponent -class ElementwiseProductTF extends UnaryTransformer[Vector, Vector, ElementwiseProductTF] { +class ElementwiseProduct extends UnaryTransformer[Vector, Vector, ElementwiseProduct] { /** the vector to multiply with input vectors */ val scalingVec : Param[Vector] = new Param(this, "scalingVector", "vector for hadamard product") @@ -37,7 +37,7 @@ class ElementwiseProductTF extends UnaryTransformer[Vector, Vector, ElementwiseP def getScalingVec: Vector = get(scalingVec) override protected def createTransformFunc(paramMap: ParamMap): Vector => Vector = { - val elemScaler = new ElementwiseProduct(paramMap(scalingVec)) + val elemScaler = new feature.ElementwiseProduct(paramMap(scalingVec)) elemScaler.transform } From 1dffeee4910b108238efa79439372fbbab1435e4 Mon Sep 17 00:00:00 2001 From: Octavian Geagla Date: Fri, 24 Apr 2015 10:10:58 -0600 Subject: [PATCH 4/9] [SPARK-5726] [MLLIB] Pass style checks. --- .../org/apache/spark/ml/feature/ElementwiseProduct.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala index 7a5fedf62904..5d739f826c34 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala @@ -33,8 +33,8 @@ class ElementwiseProduct extends UnaryTransformer[Vector, Vector, ElementwisePro /** the vector to multiply with input vectors */ val scalingVec : Param[Vector] = new Param(this, "scalingVector", "vector for hadamard product") - def setScalingVec(value: Vector) = set(scalingVec, value) - def getScalingVec: Vector = get(scalingVec) + def setScalingVec(value: Vector): this.type = set(scalingVec, value) + def getScalingVec: Vector = getOrDefault(scalingVec) override protected def createTransformFunc(paramMap: ParamMap): Vector => Vector = { val elemScaler = new feature.ElementwiseProduct(paramMap(scalingVec)) From 37d4705f10ad12fd2291abbf00b30c61de6adbdf Mon Sep 17 00:00:00 2001 From: Octavian Geagla Date: Wed, 6 May 2015 19:53:05 -0600 Subject: [PATCH 5/9] [SPARK-5726] [MLLIB] Incorporated feedback. --- docs/mllib-feature-extraction.md | 27 ++----------- .../spark/ml/feature/ElementwiseProduct.scala | 16 ++++++-- .../mllib/feature/ElementwiseProduct.scala | 17 ++++---- .../feature/ElementwiseProductSuite.scala | 40 ++++++------------- 4 files changed, 37 insertions(+), 63 deletions(-) diff --git a/docs/mllib-feature-extraction.md b/docs/mllib-feature-extraction.md index b71c1512ebb2..03fedd01016b 100644 --- a/docs/mllib-feature-extraction.md +++ b/docs/mllib-feature-extraction.md @@ -479,7 +479,7 @@ sc.stop(); ## ElementwiseProduct -ElementwiseProduct multiplies individual vector samples by a provided weighting vector component-wise. This represents the [Hadamard product](https://en.wikipedia.org/wiki/Hadamard_product_%28matrices%29) between the input vector, `v` and transforming vector, `w`, to yield a result vector. +ElementwiseProduct multiplies each input vector by a provided "weight" vector, using element-wise multiplication. In other words, it scales each column of the dataset by a scalar multiplier. This represents the [Hadamard product](https://en.wikipedia.org/wiki/Hadamard_product_%28matrices%29) between the input vector, `v` and transforming vector, `w`, to yield a result vector. `\[ \begin{pmatrix} v_1 \\ @@ -499,7 +499,7 @@ v_N [`ElementwiseProduct`](api/scala/index.html#org.apache.spark.mllib.feature.ElementwiseProduct) has the following parameter in the constructor: -* `w` Vector, the transforming vector. +* `w`: the transforming vector. `ElementwiseProduct` implements [`VectorTransformer`](api/scala/index.html#org.apache.spark.mllib.feature.VectorTransformer) which can apply the weighting on a `Vector` to produce a transformed `Vector` or on an `RDD[Vector]` to produce a transformed `RDD[Vector]`. @@ -515,36 +515,17 @@ import org.apache.spark.SparkContext._ import org.apache.spark.mllib.feature.ElementwiseProduct import org.apache.spark.mllib.linalg.Vectors -//load and parse the data +// Load and parse the data: val data = sc.textFile("data/mllib/kmeans_data.txt") val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble))) val transformingVector = Vectors.dense(0.0, 1.0, 2.0) val transformer = new ElementwiseProduct(transformingVector) -//same results: +// Batch transform and per-row transform give the same results: val transformedData = transformer.transform(parsedData) val transformedData2 = parsedData.map(x => transformer.transform(x)) -{% endhighlight %} -
- -
-{% highlight python %} -from pyspark.mllib.linalg import Vectors -from pyspark.mllib.feature import ElementwiseProduct - -# Load and parse the data -data = sc.textFile("data/mllib/kmeans_data.txt") -parsedData = data.map(lambda line: array([float(x) for x in line.split(' ')])) - -transformingVector = Vectors.dense(0.0, 1.0, 2.0) -transformer = ElementwiseProduct(transformingVector) - -# Same results: -transformedData = transformer.transform(parsedData) -transformedData2 = parsedData.map(lambda x: transformer.transform(x)) - {% endhighlight %}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala index 5d739f826c34..e0fe4a568a41 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala @@ -25,18 +25,26 @@ import org.apache.spark.mllib.linalg.{Vector, VectorUDT} import org.apache.spark.sql.types.DataType /** - * :: AlphaComponent - * Maps a vector to the hadamard product of it and a reference vector. + * :: AlphaComponent :: + * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a provided "weight" vector. In other words, it scales each column of the dataset by a scalar multiplier. */ @AlphaComponent class ElementwiseProduct extends UnaryTransformer[Vector, Vector, ElementwiseProduct] { - /** the vector to multiply with input vectors */ - val scalingVec : Param[Vector] = new Param(this, "scalingVector", "vector for hadamard product") + /** + * the vector to multiply with input vectors + * @group param + */ + val scalingVec: Param[Vector] = new Param(this, "scalingVector", "vector for hadamard product") + + /** @group setParam */ def setScalingVec(value: Vector): this.type = set(scalingVec, value) + + /** @group getParam */ def getScalingVec: Vector = getOrDefault(scalingVec) override protected def createTransformFunc(paramMap: ParamMap): Vector => Vector = { + require(paramMap.contains(scalingVec), s"transformation requires a weight vector: $scalingVec") val elemScaler = new feature.ElementwiseProduct(paramMap(scalingVec)) elemScaler.transform } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala index 17163df0e950..5700bf68b154 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala @@ -17,16 +17,16 @@ package org.apache.spark.mllib.feature -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.AlphaComponent import org.apache.spark.mllib.linalg._ /** - * :: Experimental :: - * Element-wise product of dense vectors by a provided vector's components. + * :: AlphaComponent :: + * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a provided "weight" vector. In other words, it scales each column of the dataset by a scalar multiplier. * * @param scalingVector The values used to scale the reference vector's individual components. */ -@Experimental +@AlphaComponent class ElementwiseProduct(val scalingVector: Vector) extends VectorTransformer { /** @@ -36,15 +36,16 @@ class ElementwiseProduct(val scalingVector: Vector) extends VectorTransformer { * @return transformed vector. */ override def transform(vector: Vector): Vector = { - require(vector.size == scalingVector.size) + require(vector.size == scalingVector.size, + s"vector sizes do not match: ${scalingVector.size} ${vector.size}") vector match { case dv: DenseVector => val values: Array[Double] = dv.values.clone() val dim = scalingVector.size var i = 0 - while(i < dim) { + while (i < dim) { values(i) *= scalingVector(i) - i+=1 + i += 1 } Vectors.dense(values) case SparseVector(size, indices, vs) => @@ -52,7 +53,7 @@ class ElementwiseProduct(val scalingVector: Vector) extends VectorTransformer { val dim = values.size var i = 0 while (i < dim) { - values(i) *= scalingVector.apply(indices(i)) + values(i) *= scalingVector(indices(i)) i += 1 } Vectors.sparse(size, indices, values) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala index 57840e3b255e..e1ec10d02d7c 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala @@ -17,55 +17,40 @@ package org.apache.spark.mllib.feature -import org.apache.spark.mllib.linalg.{SparseVector, DenseVector, Vector, Vectors} -import org.apache.spark.mllib.util.MLlibTestSparkContext import org.scalatest.FunSuite + +import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vectors} +import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ -class ElementwiseProductSuite extends FunSuite with MLlibTestSparkContext{ +class ElementwiseProductSuite extends FunSuite with MLlibTestSparkContext { - val denseData = Array( - Vectors.dense(1.0, 1.0, 0.0, 0.0), - Vectors.dense(1.0, 2.0, -3.0, 0.0), - Vectors.dense(1.0, 3.0, 0.0, 0.0), - Vectors.dense(1.0, 4.0, 1.9, -9.0), - Vectors.dense(1.0, 5.0, 0.0, 0.0) + val denseData = Array( + Vectors.dense(1.0, 4.0, 1.9, -9.0) ) val sparseData = Array( - Vectors.sparse(3, Seq((0, -2.0), (1, 2.3))), - Vectors.sparse(3, Seq((1, -1.0), (2, -3.0))), - Vectors.sparse(3, Seq((1, -5.1))), - Vectors.sparse(3, Seq((0, 3.8), (2, 1.9))), - Vectors.sparse(3, Seq((0, 1.7), (1, -0.6))), - Vectors.sparse(3, Seq((1, 1.9))) + Vectors.sparse(3, Seq((1, -1.0), (2, -3.0))) ) val scalingVector = Vectors.dense(2.0, 0.5, 0.0, 0.25) test("elementwise (hadamard) product should properly apply vector to dense data set") { - val transformer = new ElementwiseProduct(scalingVector) val transformedData = transformer.transform(sc.makeRDD(denseData)) - val transformedVecs = transformedData.collect() + val transformedVec = transformedVecs(0).toArray - val fourthVec = transformedVecs.apply(3).toArray - - assert(fourthVec.apply(0) === 2.0, "product by 2.0 should have been applied") - assert(fourthVec.apply(1) === 2.0, "product by 0.5 should have been applied") - assert(fourthVec.apply(2) === 0.0, "product by 0.0 should have been applied") - assert(fourthVec.apply(3) === -2.25, "product by 0.25 should have been applied") + assert(transformedVec(0) === 2.0, "product by 2.0 should have been applied") + assert(transformedVec(1) === 2.0, "product by 0.5 should have been applied") + assert(transformedVec(2) === 0.0, "product by 0.0 should have been applied") + assert(transformedVec(3) === -2.25, "product by 0.25 should have been applied") } test("elementwise (hadamard) product should properly apply vector to sparse data set") { - val dataRDD = sc.parallelize(sparseData, 3) - val scalingVec = Vectors.dense(1.0, 0.0, 0.5) - val transformer = new ElementwiseProduct(scalingVec) - val data2 = sparseData.map(transformer.transform) val data2RDD = transformer.transform(dataRDD) @@ -76,7 +61,6 @@ class ElementwiseProductSuite extends FunSuite with MLlibTestSparkContext{ }, "The vector type should be preserved after hadamard product") assert((data2, data2RDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5)) - assert(data2(0) ~== Vectors.sparse(3, Seq((0, -2.0), (1, 0.0))) absTol 1E-5) assert(data2(1) ~== Vectors.sparse(3, Seq((1, 0.0), (2, -1.5))) absTol 1E-5) } From ded3ac66024ce72f4112daac14d90be548dc36f9 Mon Sep 17 00:00:00 2001 From: Octavian Geagla Date: Wed, 6 May 2015 19:57:18 -0600 Subject: [PATCH 6/9] [SPARK-5726] [MLLIB] Pass style checks. --- .../org/apache/spark/ml/feature/ElementwiseProduct.scala | 3 ++- .../org/apache/spark/mllib/feature/ElementwiseProduct.scala | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala index e0fe4a568a41..d4c0ea040f90 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala @@ -26,7 +26,8 @@ import org.apache.spark.sql.types.DataType /** * :: AlphaComponent :: - * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a provided "weight" vector. In other words, it scales each column of the dataset by a scalar multiplier. + * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a + * provided "weight" vector. In other words, it scales each column of the dataset by a scalar multiplier. */ @AlphaComponent class ElementwiseProduct extends UnaryTransformer[Vector, Vector, ElementwiseProduct] { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala index 5700bf68b154..bd3b5f825c18 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala @@ -22,8 +22,8 @@ import org.apache.spark.mllib.linalg._ /** * :: AlphaComponent :: - * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a provided "weight" vector. In other words, it scales each column of the dataset by a scalar multiplier. - * + * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a provided + * "weight" vector. In other words, it scales each column of the dataset by a scalar multiplier. * @param scalingVector The values used to scale the reference vector's individual components. */ @AlphaComponent From 459516588fd9d162f10bdb0e84c09c8499db4c32 Mon Sep 17 00:00:00 2001 From: Octavian Geagla Date: Wed, 6 May 2015 19:58:54 -0600 Subject: [PATCH 7/9] [SPARK-5726] [MLLIB] Remove erroneous test case. --- .../apache/spark/mllib/feature/ElementwiseProductSuite.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala index e1ec10d02d7c..b5fe36a17225 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala @@ -61,7 +61,6 @@ class ElementwiseProductSuite extends FunSuite with MLlibTestSparkContext { }, "The vector type should be preserved after hadamard product") assert((data2, data2RDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5)) - assert(data2(0) ~== Vectors.sparse(3, Seq((0, -2.0), (1, 0.0))) absTol 1E-5) - assert(data2(1) ~== Vectors.sparse(3, Seq((1, 0.0), (2, -1.5))) absTol 1E-5) + assert(data2(0) ~== Vectors.sparse(3, Seq((1, 0.0), (2, -1.5))) absTol 1E-5) } } From 90f7e3962c5ddf68ae8760de30f06fb149ada688 Mon Sep 17 00:00:00 2001 From: "Joseph K. Bradley" Date: Thu, 7 May 2015 00:14:34 -0700 Subject: [PATCH 8/9] small cleanups --- .../spark/ml/feature/ElementwiseProduct.scala | 3 +- .../mllib/feature/ElementwiseProduct.scala | 15 +++++----- .../feature/ElementwiseProductSuite.scala | 29 ++++++++----------- 3 files changed, 22 insertions(+), 25 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala index d4c0ea040f90..de3555a5b74d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala @@ -27,7 +27,8 @@ import org.apache.spark.sql.types.DataType /** * :: AlphaComponent :: * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a - * provided "weight" vector. In other words, it scales each column of the dataset by a scalar multiplier. + * provided "weight" vector. In other words, it scales each column of the dataset by a scalar + * multiplier. */ @AlphaComponent class ElementwiseProduct extends UnaryTransformer[Vector, Vector, ElementwiseProduct] { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala index bd3b5f825c18..b0985baf9b27 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala @@ -17,16 +17,17 @@ package org.apache.spark.mllib.feature -import org.apache.spark.annotation.AlphaComponent +import org.apache.spark.annotation.Experimental import org.apache.spark.mllib.linalg._ /** - * :: AlphaComponent :: - * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a provided - * "weight" vector. In other words, it scales each column of the dataset by a scalar multiplier. + * :: Experimental :: + * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a + * provided "weight" vector. In other words, it scales each column of the dataset by a scalar + * multiplier. * @param scalingVector The values used to scale the reference vector's individual components. */ -@AlphaComponent +@Experimental class ElementwiseProduct(val scalingVector: Vector) extends VectorTransformer { /** @@ -37,7 +38,7 @@ class ElementwiseProduct(val scalingVector: Vector) extends VectorTransformer { */ override def transform(vector: Vector): Vector = { require(vector.size == scalingVector.size, - s"vector sizes do not match: ${scalingVector.size} ${vector.size}") + s"vector sizes do not match: Expected ${scalingVector.size} but found ${vector.size}") vector match { case dv: DenseVector => val values: Array[Double] = dv.values.clone() @@ -50,7 +51,7 @@ class ElementwiseProduct(val scalingVector: Vector) extends VectorTransformer { Vectors.dense(values) case SparseVector(size, indices, vs) => val values = vs.clone() - val dim = values.size + val dim = values.length var i = 0 while (i < dim) { values(i) *= scalingVector(indices(i)) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala index b5fe36a17225..f3a482abda87 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala @@ -25,29 +25,24 @@ import org.apache.spark.mllib.util.TestingUtils._ class ElementwiseProductSuite extends FunSuite with MLlibTestSparkContext { - val denseData = Array( - Vectors.dense(1.0, 4.0, 1.9, -9.0) - ) - - val sparseData = Array( - Vectors.sparse(3, Seq((1, -1.0), (2, -3.0))) - ) - - val scalingVector = Vectors.dense(2.0, 0.5, 0.0, 0.25) - test("elementwise (hadamard) product should properly apply vector to dense data set") { - val transformer = new ElementwiseProduct(scalingVector) + val denseData = Array( + Vectors.dense(1.0, 4.0, 1.9, -9.0) + ) + val scalingVec = Vectors.dense(2.0, 0.5, 0.0, 0.25) + val transformer = new ElementwiseProduct(scalingVec) val transformedData = transformer.transform(sc.makeRDD(denseData)) val transformedVecs = transformedData.collect() - val transformedVec = transformedVecs(0).toArray - - assert(transformedVec(0) === 2.0, "product by 2.0 should have been applied") - assert(transformedVec(1) === 2.0, "product by 0.5 should have been applied") - assert(transformedVec(2) === 0.0, "product by 0.0 should have been applied") - assert(transformedVec(3) === -2.25, "product by 0.25 should have been applied") + val transformedVec = transformedVecs(0) + val expectedVec = Vectors.dense(2.0, 2.0, 0.0, -2.25) + assert(transformedVec ~== expectedVec absTol 1E-5, + s"Expected transformed vector $expectedVec but found $transformedVec") } test("elementwise (hadamard) product should properly apply vector to sparse data set") { + val sparseData = Array( + Vectors.sparse(3, Seq((1, -1.0), (2, -3.0))) + ) val dataRDD = sc.parallelize(sparseData, 3) val scalingVec = Vectors.dense(1.0, 0.0, 0.5) val transformer = new ElementwiseProduct(scalingVec) From fac12ad29b6a640b6567e66e57a8176deb621dc8 Mon Sep 17 00:00:00 2001 From: Octavian Geagla Date: Thu, 7 May 2015 13:22:53 -0600 Subject: [PATCH 9/9] [SPARK-5726] [MLLIB] Use new createTransformFunc. --- .../org/apache/spark/ml/feature/ElementwiseProduct.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala index de3555a5b74d..f8b56293e3cc 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala @@ -19,7 +19,7 @@ package org.apache.spark.ml.feature import org.apache.spark.annotation.AlphaComponent import org.apache.spark.ml.UnaryTransformer -import org.apache.spark.ml.param.{Param, ParamMap} +import org.apache.spark.ml.param.Param import org.apache.spark.mllib.feature import org.apache.spark.mllib.linalg.{Vector, VectorUDT} import org.apache.spark.sql.types.DataType @@ -45,9 +45,9 @@ class ElementwiseProduct extends UnaryTransformer[Vector, Vector, ElementwisePro /** @group getParam */ def getScalingVec: Vector = getOrDefault(scalingVec) - override protected def createTransformFunc(paramMap: ParamMap): Vector => Vector = { - require(paramMap.contains(scalingVec), s"transformation requires a weight vector: $scalingVec") - val elemScaler = new feature.ElementwiseProduct(paramMap(scalingVec)) + override protected def createTransformFunc: Vector => Vector = { + require(params.contains(scalingVec), s"transformation requires a weight vector") + val elemScaler = new feature.ElementwiseProduct($(scalingVec)) elemScaler.transform }