@@ -23,6 +23,7 @@ import org.apache.spark.ml.param.{IntParam, ParamMap}
2323import org .apache .spark .mllib .linalg ._
2424import org .apache .spark .sql .types .DataType
2525
26+ import scala .annotation .tailrec
2627import scala .collection .mutable .ArrayBuffer
2728
2829/**
@@ -75,12 +76,17 @@ object PolynomialMapper {
7576 binomialCoefficient(numVariables + degree, numVariables) - 1
7677 }
7778
79+ @ tailrec
7880 private def fillDenseVector (
7981 outputVector : Array [Double ],
8082 prevExpandedVecFrom : Int ,
8183 prevExpandedVecLen : Int ,
8284 currDegree : Int ,
83- nDim : Int ): (Int , Int ) = {
85+ finalDegree : Int ,
86+ nDim : Int ): Unit = {
87+ if (currDegree > finalDegree) {
88+ return None
89+ }
8490 val currExpandedVecFrom = prevExpandedVecFrom + prevExpandedVecLen
8591 var currIndex = currExpandedVecFrom
8692 val currExpandedVecLen = numMonomials(currDegree, nDim)
@@ -97,41 +103,65 @@ object PolynomialMapper {
97103 }
98104 leftIndex += 1
99105 }
100- (currExpandedVecFrom, currExpandedVecLen)
106+
107+ fillDenseVector(outputVector, currExpandedVecFrom, currExpandedVecLen, currDegree + 1 ,
108+ finalDegree, nDim)
101109 }
102110
111+ @ tailrec
103112 private def fillSparseVector (
104113 outputIndices : ArrayBuffer [Int ],
105114 outputValues : ArrayBuffer [Double ],
106115 originalSparseVecLen : Int ,
107116 prevExpandedSparseVecFrom : Int ,
108117 prevExpandedSparseVecLen : Int ,
109118 currDegree : Int ,
110- nDim : Int ): (Int , Int ) = {
119+ finalDegree : Int ,
120+ nDim : Int ): Unit = {
121+ if (currDegree > finalDegree) {
122+ return None
123+ }
124+
125+ println(outputIndices.toArray.mkString(" , " ))
126+ println(outputValues.toArray.mkString(" , " ))
127+ println(originalSparseVecLen)
128+ println(prevExpandedSparseVecFrom)
129+ println(prevExpandedSparseVecLen)
130+ println(currDegree)
131+ println(finalDegree)
132+ println(nDim)
133+
111134 val currExpandedSparseVecFrom = prevExpandedSparseVecFrom + prevExpandedSparseVecLen
112- var lengthCount = 0
113- val prevExpandedVecFrom = numExpandedDims(currDegree - 1 , nDim)
135+ var currExpandedSparseVecLen = 0
136+ val prevExpandedVecFrom = numExpandedDims(currDegree - 2 , nDim)
114137 val prevExpandedVecLen = numMonomials(currDegree - 1 , nDim)
138+ println(prevExpandedVecFrom)
139+ println(prevExpandedVecLen)
140+ val currExpandedVecFrom = prevExpandedVecFrom + prevExpandedVecLen
115141 var leftIndex = 0
142+ var numToKeepCum = 0
116143 while (leftIndex < originalSparseVecLen) {
117144 val numToKeep = numMonomials(currDegree - 1 , nDim - outputIndices(leftIndex))
118- val prevVecStartIndex = prevExpandedVecFrom + prevExpandedVecLen - numToKeep
119145 var rightIndex = 0
120146 while (rightIndex < prevExpandedSparseVecLen) {
121147 val realIndex =
122148 outputIndices(prevExpandedSparseVecFrom + rightIndex) - (prevExpandedVecLen - numToKeep)
123- if (realIndex >= prevVecStartIndex) {
124- outputIndices += realIndex
149+ println(s " real index in $currDegree degree is $realIndex" )
150+ if (realIndex >= prevExpandedVecFrom) {
151+ outputIndices += currExpandedVecFrom + numToKeepCum + realIndex
125152 outputValues += outputValues(leftIndex) * outputValues(rightIndex)
126- lengthCount += 1
153+ currExpandedSparseVecLen += 1
127154 } else {
128155 // pass through if the index is invalid
129156 }
157+ numToKeepCum += numToKeep
130158 rightIndex += 1
131159 }
132160 leftIndex += 1
133161 }
134- (currExpandedSparseVecFrom, lengthCount)
162+
163+ fillSparseVector(outputIndices, outputValues, originalSparseVecLen, currExpandedSparseVecFrom,
164+ currExpandedSparseVecLen, currDegree + 1 , finalDegree, nDim)
135165 }
136166
137167 /**
@@ -172,19 +202,14 @@ object PolynomialMapper {
172202 }
173203 }
174204
175- /**
176- * Transform a vector of variables into a larger vector which stores the polynomial expansion from
177- * degree 1 to degree `degree`.
178- */
179- private def transform (degree : Int )(feature : Vector ): Vector = {
205+ private def transform2 (degree : Int )(feature : Vector ): Vector = {
180206 val originalDims = feature.size
181207 val expectedDims = numExpandedDims(degree, feature.size)
182208 feature match {
183209 case f : DenseVector =>
184- val res = Vectors .zeros(expectedDims)
185210 (2 to degree).foldLeft(Array (feature.copy)) { (vectors, currDegree) =>
186211 vectors ++ Array (expandVector(feature, vectors.last, originalDims, currDegree))
187- }.reduce((lhs, rhs) => Vectors .dense(lhs.toArray ++ rhs.toArray))
212+ }.reduce((lhs, rhs) => Vectors .dense(lhs.toArray ++ rhs.toArray))
188213 case f : SparseVector =>
189214 (2 to degree).foldLeft(Array (feature.copy)) { (vectors, currDegree) =>
190215 vectors ++ Array (expandVector(feature, vectors.last, originalDims, currDegree))
@@ -197,4 +222,33 @@ object PolynomialMapper {
197222 case _ => throw new Exception (" vector type is invalid." )
198223 }
199224 }
225+
226+ /**
227+ * Transform a vector of variables into a larger vector which stores the polynomial expansion from
228+ * degree 1 to degree `degree`.
229+ */
230+ private def transform (degree : Int )(feature : Vector ): Vector = {
231+ val originalDims = feature.size
232+ val expectedDims = numExpandedDims(degree, feature.size)
233+ feature match {
234+ case f : DenseVector =>
235+ val res = Array .fill[Double ](expectedDims)(0.0 )
236+ for (i <- 0 until f.size) {
237+ res(i) = f(i)
238+ }
239+ fillDenseVector(res, 0 , originalDims, 2 , degree, originalDims)
240+ Vectors .dense(res)
241+
242+ case f : SparseVector =>
243+ val resIndices = new ArrayBuffer [Int ]()
244+ val resValues = new ArrayBuffer [Double ]()
245+ for (i <- 0 until f.indices.size) {
246+ resIndices += f.indices(i)
247+ resValues += f.values(i)
248+ }
249+ fillSparseVector(resIndices, resValues, f.indices.size, 0 , f.indices.size, 2 , degree,
250+ originalDims)
251+ Vectors .sparse(expectedDims, resIndices.toArray, resValues.toArray)
252+ }
253+ }
200254}
0 commit comments