Skip to content

Commit 372227c

Browse files
committed
add polynomial expansion
1 parent 3af7334 commit 372227c

File tree

1 file changed

+111
-0
lines changed

1 file changed

+111
-0
lines changed
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.ml.feature
19+
20+
import org.apache.spark.annotation.AlphaComponent
21+
import org.apache.spark.ml.UnaryTransformer
22+
import org.apache.spark.ml.param.{IntParam, ParamMap}
23+
import org.apache.spark.mllib.linalg._
24+
import org.apache.spark.sql.types.DataType
25+
26+
/**
27+
* :: AlphaComponent ::
28+
* Normalize a vector to have unit norm using the given p-norm.
29+
*/
30+
@AlphaComponent
31+
class PolynomialMapper extends UnaryTransformer[Vector, Vector, PolynomialMapper] {
32+
33+
/**
34+
* Normalization in L^p^ space, p = 2 by default.
35+
* @group param
36+
*/
37+
val degree = new IntParam(this, "degree", "the polynomial degree to expand", Some(1))
38+
39+
/** @group getParam */
40+
def getDegree: Int = get(degree)
41+
42+
/** @group setParam */
43+
def setDegree(value: Int): this.type = set(degree, value)
44+
45+
override protected def createTransformFunc(paramMap: ParamMap): Vector => Vector = {
46+
PolynomialMapper.transform(getDegree)
47+
}
48+
49+
override protected def outputDataType: DataType = new VectorUDT()
50+
}
51+
52+
object PolynomialMapper {
53+
/**
54+
* The number that combines k items from N items without repeat, i.e. the binomial coefficient.
55+
*/
56+
private def binomialCoefficient(N: Int, k: Int): Int = {
57+
(N - k + 1 to N).product / (1 to k).product
58+
}
59+
60+
/**
61+
* The number of monomials of a `numVariables` vector after expanding at a specific polynomial
62+
* degree `degree`.
63+
*/
64+
private def numMonomials(degree: Int, numVariables: Int): Int = {
65+
binomialCoefficient(numVariables + degree - 1, degree)
66+
}
67+
68+
/**
69+
* The number of monomials of a `numVariables` vector after expanding from polynomial degree 1 to
70+
* polynomial degree `degree`.
71+
*/
72+
private def numExpandedDims(degree: Int, numVariables: Int): Int = {
73+
binomialCoefficient(numVariables + degree, numVariables) - 1
74+
}
75+
76+
/**
77+
* Multiply two polynomials.
78+
*/
79+
private def expandVector(lhs: Vector, rhs: Vector): Vector = {
80+
(lhs, rhs) match {
81+
case (l: DenseVector, r: DenseVector) =>
82+
Vectors.dense(l.toArray.flatMap(lx => r.toArray.map(rx => lx * rx)))
83+
case (SparseVector(lLen, lIdx, lVal), SparseVector(rLen, rIdx, rVal)) =>
84+
val len = lLen * rLen
85+
val idx = lIdx.flatMap(li => rIdx.map(ri => li * lLen + ri))
86+
val value = lVal.flatMap(lv => rVal.map(rv => lv * rv))
87+
Vectors.sparse(len, idx, value)
88+
case _ => throw new Exception("vector types are not match.")
89+
}
90+
}
91+
92+
/**
93+
* Transform a vector of variables into a larger vector which stores the polynomial expansion from
94+
* degree 1 to degree `degree`.
95+
*/
96+
private def transform(degree: Int)(feature: Vector): Vector = {
97+
feature match {
98+
case f: DenseVector =>
99+
(2 to degree).foldLeft(Array(feature.copy)) { (vectors, _) =>
100+
vectors ++ Array(expandVector(feature, vectors.last))
101+
}.reduce((lhs, rhs) => Vectors.dense(lhs.toArray ++ rhs.toArray))
102+
case f: SparseVector =>
103+
(2 to degree).foldLeft(Array(feature.copy)) { (vectors, _) =>
104+
vectors ++ Array(expandVector(feature, vectors.last))
105+
}.reduce { case (SparseVector(lLen, lIdx, lVal), SparseVector(rLen, rIdx, rVal)) =>
106+
Vectors.sparse(lLen + rLen, lIdx ++ rIdx.map(_ + lLen), lVal ++ rVal)
107+
}
108+
case _ => throw new Exception("vector type is invalid.")
109+
}
110+
}
111+
}

0 commit comments

Comments
 (0)