From 7f37d2142a388e5717ae2c3e89152c8c735904cc Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Mon, 30 Mar 2015 20:34:32 +0800 Subject: [PATCH 1/2] Makes DataFrame.rdd a lazy val --- sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index 423ef3912bc8..0247bd57d0cd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -941,10 +941,10 @@ class DataFrame private[sql]( ///////////////////////////////////////////////////////////////////////////// /** - * Returns the content of the [[DataFrame]] as an [[RDD]] of [[Row]]s. + * Represents the content of the [[DataFrame]] as an [[RDD]] of [[Row]]s. * @group rdd */ - def rdd: RDD[Row] = { + lazy val rdd: RDD[Row] = { // use a local variable to make sure the map closure doesn't capture the whole DataFrame val schema = this.schema queryExecution.executedPlan.execute().map(ScalaReflection.convertRowToScala(_, schema)) From 75009682dff1d45e55c7e6de5a709d97ab3c2ca3 Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Tue, 31 Mar 2015 23:22:23 +0800 Subject: [PATCH 2/2] Updates javadoc --- sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index 0247bd57d0cd..36d42c2be715 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -941,7 +941,9 @@ class DataFrame private[sql]( ///////////////////////////////////////////////////////////////////////////// /** - * Represents the content of the [[DataFrame]] as an [[RDD]] of [[Row]]s. + * Represents the content of the [[DataFrame]] as an [[RDD]] of [[Row]]s. Note that the RDD is + * memoized. Once called, it won't change even if you change any query planning related Spark SQL + * configurations (e.g. `spark.sql.shuffle.partitions`). * @group rdd */ lazy val rdd: RDD[Row] = {