@@ -20,6 +20,7 @@ package org.apache.spark.mllib.regression
2020import org .scalatest .{Matchers , FunSuite }
2121
2222import org .apache .spark .mllib .util .MLlibTestSparkContext
23+ import org .apache .spark .mllib .util .TestingUtils ._
2324
2425class IsotonicRegressionSuite extends FunSuite with MLlibTestSparkContext with Matchers {
2526
@@ -28,15 +29,13 @@ class IsotonicRegressionSuite extends FunSuite with MLlibTestSparkContext with M
2829 }
2930
3031 private def generateIsotonicInput (labels : Seq [Double ]): Seq [(Double , Double , Double )] = {
31- labels.zip( 1 to labels.size).map(point => (point._1, point._2 .toDouble, 1d ))
32+ Seq .tabulate( labels.size)(i => (labels(i), i .toDouble, 1d ))
3233 }
3334
3435 private def generateIsotonicInput (
3536 labels : Seq [Double ],
3637 weights : Seq [Double ]): Seq [(Double , Double , Double )] = {
37- labels.zip(1 to labels.size)
38- .zip(weights)
39- .map(point => (point._1._1, point._1._2.toDouble, point._2))
38+ Seq .tabulate(labels.size)(i => (labels(i), i.toDouble, weights(i)))
4039 }
4140
4241 private def runIsotonicRegression (
@@ -54,9 +53,24 @@ class IsotonicRegressionSuite extends FunSuite with MLlibTestSparkContext with M
5453 }
5554
5655 test(" increasing isotonic regression" ) {
57- val model = runIsotonicRegression(Seq (1 , 2 , 3 , 3 , 1 , 6 , 17 , 16 , 17 , 18 ), true )
56+ /*
57+ The following result could be re-produced with sklearn.
5858
59- assert(model.predictions === Array (1 , 2 , 7d / 3 , 7d / 3 , 7d / 3 , 6 , 16.5 , 16.5 , 17 , 18 ))
59+ > from sklearn.isotonic import IsotonicRegression
60+ > x = range(9)
61+ > y = [1, 2, 3, 1, 6, 17, 16, 17, 18]
62+ > ir = IsotonicRegression(x, y)
63+ > print ir.predict(x)
64+
65+ array([ 1. , 2. , 2. , 2. , 6. , 16.5, 16.5, 17. , 18. ])
66+ */
67+ val model = runIsotonicRegression(Seq (1 , 2 , 3 , 1 , 6 , 17 , 16 , 17 , 18 ), true )
68+
69+ assert(Array .tabulate(9 )(x => model.predict(x)) === Array (1 , 2 , 2 , 2 , 6 , 16.5 , 16.5 , 17 , 18 ))
70+
71+ assert(model.boundaries === Array (0 , 1 , 3 , 4 , 5 , 6 , 7 , 8 ))
72+ assert(model.predictions === Array (1 , 2 , 2 , 6 , 16.5 , 16.5 , 17.0 , 18.0 ))
73+ assert(model.isotonic)
6074 }
6175
6276 test(" isotonic regression with size 0" ) {
@@ -80,74 +94,82 @@ class IsotonicRegressionSuite extends FunSuite with MLlibTestSparkContext with M
8094 test(" isotonic regression strictly decreasing sequence" ) {
8195 val model = runIsotonicRegression(Seq (5 , 4 , 3 , 2 , 1 ), true )
8296
83- assert(model.predictions === Array (3 , 3 , 3 , 3 , 3 ))
97+ assert(model.boundaries === Array (0 , 4 ))
98+ assert(model.predictions === Array (3 , 3 ))
8499 }
85100
86101 test(" isotonic regression with last element violating monotonicity" ) {
87102 val model = runIsotonicRegression(Seq (1 , 2 , 3 , 4 , 2 ), true )
88103
89- assert(model.predictions === Array (1 , 2 , 3 , 3 , 3 ))
104+ assert(model.boundaries === Array (0 , 1 , 2 , 4 ))
105+ assert(model.predictions === Array (1 , 2 , 3 , 3 ))
90106 }
91107
92108 test(" isotonic regression with first element violating monotonicity" ) {
93109 val model = runIsotonicRegression(Seq (4 , 2 , 3 , 4 , 5 ), true )
94110
95- assert(model.predictions === Array (3 , 3 , 3 , 4 , 5 ))
111+ assert(model.boundaries === Array (0 , 2 , 3 , 4 ))
112+ assert(model.predictions === Array (3 , 3 , 4 , 5 ))
96113 }
97114
98115 test(" isotonic regression with negative labels" ) {
99116 val model = runIsotonicRegression(Seq (- 1 , - 2 , 0 , 1 , - 1 ), true )
100117
101- assert(model.predictions === Array (- 1.5 , - 1.5 , 0 , 0 , 0 ))
118+ assert(model.boundaries === Array (0 , 1 , 2 , 4 ))
119+ assert(model.predictions === Array (- 1.5 , - 1.5 , 0 , 0 ))
102120 }
103121
104122 test(" isotonic regression with unordered input" ) {
105- val trainRDD = sc.parallelize(generateIsotonicInput(Seq (1 , 2 , 3 , 4 , 5 )).reverse).cache()
106- val model = new IsotonicRegression ().run(trainRDD)
123+ val trainRDD = sc.parallelize(generateIsotonicInput(Seq (1 , 2 , 3 , 4 , 5 )).reverse, 2 ).cache()
107124
125+ val model = new IsotonicRegression ().run(trainRDD)
108126 assert(model.predictions === Array (1 , 2 , 3 , 4 , 5 ))
109127 }
110128
111129 test(" weighted isotonic regression" ) {
112130 val model = runIsotonicRegression(Seq (1 , 2 , 3 , 4 , 2 ), Seq (1 , 1 , 1 , 1 , 2 ), true )
113131
114- assert(model.predictions === Array (1 , 2 , 2.75 , 2.75 ,2.75 ))
132+ assert(model.boundaries === Array (0 , 1 , 2 , 4 ))
133+ assert(model.predictions === Array (1 , 2 , 2.75 , 2.75 ))
115134 }
116135
117136 test(" weighted isotonic regression with weights lower than 1" ) {
118137 val model = runIsotonicRegression(Seq (1 , 2 , 3 , 2 , 1 ), Seq (1 , 1 , 1 , 0.1 , 0.1 ), true )
119138
120- assert(model.predictions.map(round) === Array (1 , 2 , 3.3 / 1.2 , 3.3 / 1.2 , 3.3 / 1.2 ))
139+ assert(model.boundaries === Array (0 , 1 , 2 , 4 ))
140+ assert(model.predictions.map(round) === Array (1 , 2 , 3.3 / 1.2 , 3.3 / 1.2 ))
121141 }
122142
123143 test(" weighted isotonic regression with negative weights" ) {
124144 val model = runIsotonicRegression(Seq (1 , 2 , 3 , 2 , 1 ), Seq (- 1 , 1 , - 3 , 1 , - 5 ), true )
125145
126- assert(model.predictions === Array (1.0 , 10.0 / 6 , 10.0 / 6 , 10.0 / 6 , 10.0 / 6 ))
146+ assert(model.boundaries === Array (0.0 , 1.0 , 4.0 ))
147+ assert(model.predictions === Array (1.0 , 10.0 / 6 , 10.0 / 6 ))
127148 }
128149
129150 test(" weighted isotonic regression with zero weights" ) {
130151 val model = runIsotonicRegression(Seq [Double ](1 , 2 , 3 , 2 , 1 ), Seq [Double ](0 , 0 , 0 , 1 , 0 ), true )
131152
132- assert(model.predictions === Array (1 , 2 , 2 , 2 , 2 ))
153+ assert(model.boundaries === Array (0.0 , 1.0 , 4.0 ))
154+ assert(model.predictions === Array (1 , 2 , 2 ))
133155 }
134156
135157 test(" isotonic regression prediction" ) {
136158 val model = runIsotonicRegression(Seq (1 , 2 , 7 , 1 , 2 ), true )
137159
160+ assert(model.predict(- 2 ) === 1 )
138161 assert(model.predict(- 1 ) === 1 )
139- assert(model.predict(0 ) === 1 )
140- assert(model.predict(1.5 ) === 1.5 )
141- assert(model.predict(1.75 ) === 1.75 )
142- assert(model.predict(2 ) === 2 )
143- assert(model.predict(3 ) === 10d / 3 )
144- assert(model.predict(10 ) === 10d / 3 )
162+ assert(model.predict(0.5 ) === 1.5 )
163+ assert(model.predict(0.75 ) === 1.75 )
164+ assert(model.predict(1 ) === 2 )
165+ assert(model.predict(2 ) === 10d / 3 )
166+ assert(model.predict(9 ) === 10d / 3 )
145167 }
146168
147169 test(" isotonic regression prediction with duplicate features" ) {
148170 val trainRDD = sc.parallelize(
149171 Seq [(Double , Double , Double )](
150- (2 , 1 , 1 ), (1 , 1 , 1 ), (4 , 2 , 1 ), (2 , 2 , 1 ), (6 , 3 , 1 ), (5 , 3 , 1 ))).cache()
172+ (2 , 1 , 1 ), (1 , 1 , 1 ), (4 , 2 , 1 ), (2 , 2 , 1 ), (6 , 3 , 1 ), (5 , 3 , 1 )), 2 ).cache()
151173 val model = new IsotonicRegression ().run(trainRDD)
152174
153175 assert(model.predict(0 ) === 1 )
@@ -159,7 +181,7 @@ class IsotonicRegressionSuite extends FunSuite with MLlibTestSparkContext with M
159181 test(" antitonic regression prediction with duplicate features" ) {
160182 val trainRDD = sc.parallelize(
161183 Seq [(Double , Double , Double )](
162- (5 , 1 , 1 ), (6 , 1 , 1 ), (2 , 2 , 1 ), (4 , 2 , 1 ), (1 , 3 , 1 ), (2 , 3 , 1 ))).cache()
184+ (5 , 1 , 1 ), (6 , 1 , 1 ), (2 , 2 , 1 ), (4 , 2 , 1 ), (1 , 3 , 1 ), (2 , 3 , 1 )), 2 ).cache()
163185 val model = new IsotonicRegression ().setIsotonic(false ).run(trainRDD)
164186
165187 assert(model.predict(0 ) === 6 )
@@ -170,20 +192,50 @@ class IsotonicRegressionSuite extends FunSuite with MLlibTestSparkContext with M
170192
171193 test(" isotonic regression RDD prediction" ) {
172194 val model = runIsotonicRegression(Seq (1 , 2 , 7 , 1 , 2 ), true )
173- val testRDD = sc.parallelize(List (- 1.0 , 0.0 , 1.5 , 1.75 , 2.0 , 3.0 , 10.0 )).cache()
174195
175- assert(model.predict(testRDD).collect() === Array (1 , 1 , 1.5 , 1.75 , 2 , 10.0 / 3 , 10.0 / 3 ))
196+ val testRDD = sc.parallelize(List (- 2.0 , - 1.0 , 0.5 , 0.75 , 1.0 , 2.0 , 9.0 ), 2 ).cache()
197+ val predictions = testRDD.map(x => (x, model.predict(x))).collect().sortBy(_._1).map(_._2)
198+ assert(predictions === Array (1 , 1 , 1.5 , 1.75 , 2 , 10.0 / 3 , 10.0 / 3 ))
176199 }
177200
178201 test(" antitonic regression prediction" ) {
179202 val model = runIsotonicRegression(Seq (7 , 5 , 3 , 5 , 1 ), false )
180203
204+ assert(model.predict(- 2 ) === 7 )
181205 assert(model.predict(- 1 ) === 7 )
182- assert(model.predict(0 ) === 7 )
183- assert(model.predict(1.5 ) === 6 )
184- assert(model.predict(1.75 ) === 5.5 )
185- assert(model.predict(2 ) === 5 )
186- assert(model.predict(3 ) === 4 )
187- assert(model.predict(10 ) === 1 )
188- }
189- }
206+ assert(model.predict(0.5 ) === 6 )
207+ assert(model.predict(0.75 ) === 5.5 )
208+ assert(model.predict(1 ) === 5 )
209+ assert(model.predict(2 ) === 4 )
210+ assert(model.predict(9 ) === 1 )
211+ }
212+
213+ test(" model construction" ) {
214+ val model = new IsotonicRegressionModel (Array (0.0 , 1.0 ), Array (1.0 , 2.0 ), isotonic = true )
215+ assert(model.predict(- 0.5 ) === 1.0 )
216+ assert(model.predict(0.0 ) === 1.0 )
217+ assert(model.predict(0.5 ) ~== 1.5 absTol 1e-14 )
218+ assert(model.predict(1.0 ) === 2.0 )
219+ assert(model.predict(1.5 ) === 2.0 )
220+
221+ intercept[IllegalArgumentException ] {
222+ // different array sizes.
223+ new IsotonicRegressionModel (Array (0.0 , 1.0 ), Array (1.0 ), isotonic = true )
224+ }
225+
226+ intercept[IllegalArgumentException ] {
227+ // unordered boundaries
228+ new IsotonicRegressionModel (Array (1.0 , 0.0 ), Array (1.0 , 2.0 ), isotonic = true )
229+ }
230+
231+ intercept[IllegalArgumentException ] {
232+ // unordered predictions (isotonic)
233+ new IsotonicRegressionModel (Array (0.0 , 1.0 ), Array (2.0 , 1.0 ), isotonic = true )
234+ }
235+
236+ intercept[IllegalArgumentException ] {
237+ // unordered predictions (antitonic)
238+ new IsotonicRegressionModel (Array (0.0 , 1.0 ), Array (1.0 , 2.0 ), isotonic = false )
239+ }
240+ }
241+ }
0 commit comments