From 59f03ebcd46bcf1e134f44df3f20dd271fa991bc Mon Sep 17 00:00:00 2001 From: zsxwing Date: Mon, 22 Dec 2014 17:46:28 +0800 Subject: [PATCH] Reuse Text in saveAsTextFile --- .../main/scala/org/apache/spark/rdd/RDD.scala | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index a94206963b52..f47c2d1fcdcc 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -1186,7 +1186,13 @@ abstract class RDD[T: ClassTag]( // same bytecodes for `saveAsTextFile`. val nullWritableClassTag = implicitly[ClassTag[NullWritable]] val textClassTag = implicitly[ClassTag[Text]] - val r = this.map(x => (NullWritable.get(), new Text(x.toString))) + val r = this.mapPartitions { iter => + val text = new Text() + iter.map { x => + text.set(x.toString) + (NullWritable.get(), text) + } + } RDD.rddToPairRDDFunctions(r)(nullWritableClassTag, textClassTag, null) .saveAsHadoopFile[TextOutputFormat[NullWritable, Text]](path) } @@ -1198,7 +1204,13 @@ abstract class RDD[T: ClassTag]( // https://issues.apache.org/jira/browse/SPARK-2075 val nullWritableClassTag = implicitly[ClassTag[NullWritable]] val textClassTag = implicitly[ClassTag[Text]] - val r = this.map(x => (NullWritable.get(), new Text(x.toString))) + val r = this.mapPartitions { iter => + val text = new Text() + iter.map { x => + text.set(x.toString) + (NullWritable.get(), text) + } + } RDD.rddToPairRDDFunctions(r)(nullWritableClassTag, textClassTag, null) .saveAsHadoopFile[TextOutputFormat[NullWritable, Text]](path, codec) }