Skip to content

Commit 9032f7c

Browse files
CodingCataarondav
authored andcommitted
SPARK-1160: Deprecate toArray in RDD
https://spark-project.atlassian.net/browse/SPARK-1160 reported by @mateiz: "It's redundant with collect() and the name doesn't make sense in Java, where we return a List (we can't return an array due to the way Java generics work). It's also missing in Python." In this patch, I deprecated the method and changed the source files using it by replacing toArray with collect() directly Author: CodingCat <[email protected]> Closes #105 from CodingCat/SPARK-1060 and squashes the following commits: 286f163 [CodingCat] deprecate in JavaRDDLike ee17b4e [CodingCat] add message and since 2ff7319 [CodingCat] deprecate toArray in RDD
1 parent b8afe30 commit 9032f7c

File tree

8 files changed

+13
-11
lines changed

8 files changed

+13
-11
lines changed

core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
283283
/**
284284
* Return an array that contains all of the elements in this RDD.
285285
*/
286+
@deprecated("use collect", "1.0.0")
286287
def toArray(): JList[T] = collect()
287288

288289
/**

core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ class PairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)])
423423
* Return the key-value pairs in this RDD to the master as a Map.
424424
*/
425425
def collectAsMap(): Map[K, V] = {
426-
val data = self.toArray()
426+
val data = self.collect()
427427
val map = new mutable.HashMap[K, V]
428428
map.sizeHint(data.length)
429429
data.foreach { case (k, v) => map.put(k, v) }

core/src/main/scala/org/apache/spark/rdd/RDD.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,7 @@ abstract class RDD[T: ClassTag](
658658
/**
659659
* Return an array that contains all of the elements in this RDD.
660660
*/
661+
@deprecated("use collect", "1.0.0")
661662
def toArray(): Array[T] = collect()
662663

663664
/**

core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,13 @@ import cern.jet.random.engine.DRand
2626

2727
import org.apache.spark.{Partition, TaskContext}
2828

29-
@deprecated("Replaced by PartitionwiseSampledRDDPartition", "1.0")
29+
@deprecated("Replaced by PartitionwiseSampledRDDPartition", "1.0.0")
3030
private[spark]
3131
class SampledRDDPartition(val prev: Partition, val seed: Int) extends Partition with Serializable {
3232
override val index: Int = prev.index
3333
}
3434

35-
@deprecated("Replaced by PartitionwiseSampledRDD", "1.0")
35+
@deprecated("Replaced by PartitionwiseSampledRDD", "1.0.0")
3636
class SampledRDD[T: ClassTag](
3737
prev: RDD[T],
3838
withReplacement: Boolean,

examples/src/main/scala/org/apache/spark/examples/SparkALS.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,11 +128,11 @@ object SparkALS {
128128
println("Iteration " + iter + ":")
129129
ms = sc.parallelize(0 until M, slices)
130130
.map(i => update(i, msb.value(i), usb.value, Rc.value))
131-
.toArray
131+
.collect()
132132
msb = sc.broadcast(ms) // Re-broadcast ms because it was updated
133133
us = sc.parallelize(0 until U, slices)
134134
.map(i => update(i, usb.value(i), msb.value, algebra.transpose(Rc.value)))
135-
.toArray
135+
.collect()
136136
usb = sc.broadcast(us) // Re-broadcast us because it was updated
137137
println("RMSE = " + rmse(R, ms, us))
138138
println()

examples/src/main/scala/org/apache/spark/examples/mllib/SparkSVD.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,6 @@ object SparkSVD {
5454
val s = decomposed.S.data
5555
val v = decomposed.V.data
5656

57-
println("singular values = " + s.toArray.mkString)
57+
println("singular values = " + s.collect().mkString)
5858
}
5959
}

mllib/src/main/scala/org/apache/spark/mllib/linalg/SVD.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ object SVD {
109109

110110
// Construct jblas A^T A locally
111111
val ata = DoubleMatrix.zeros(n, n)
112-
for (entry <- emits.toArray) {
112+
for (entry <- emits.collect()) {
113113
ata.put(entry._1._1, entry._1._2, entry._2)
114114
}
115115

@@ -178,7 +178,7 @@ object SVD {
178178
val s = decomposed.S.data
179179
val v = decomposed.V.data
180180

181-
println("Computed " + s.toArray.length + " singular values and vectors")
181+
println("Computed " + s.collect().length + " singular values and vectors")
182182
u.saveAsTextFile(output_u)
183183
s.saveAsTextFile(output_s)
184184
v.saveAsTextFile(output_v)

mllib/src/test/scala/org/apache/spark/mllib/linalg/SVDSuite.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll {
5050
val m = matrix.m
5151
val n = matrix.n
5252
val ret = DoubleMatrix.zeros(m, n)
53-
matrix.data.toArray.map(x => ret.put(x.i, x.j, x.mval))
53+
matrix.data.collect().map(x => ret.put(x.i, x.j, x.mval))
5454
ret
5555
}
5656

@@ -106,7 +106,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll {
106106
val u = decomposed.U
107107
val s = decomposed.S
108108
val v = decomposed.V
109-
val retrank = s.data.toArray.length
109+
val retrank = s.data.collect().length
110110

111111
assert(retrank == 1, "rank returned not one")
112112

@@ -139,7 +139,7 @@ class SVDSuite extends FunSuite with BeforeAndAfterAll {
139139
val u = decomposed.U
140140
val s = decomposed.S
141141
val v = decomposed.V
142-
val retrank = s.data.toArray.length
142+
val retrank = s.data.collect().length
143143

144144
val densea = getDenseMatrix(a)
145145
val svd = Singular.sparseSVD(densea)

0 commit comments

Comments
 (0)