From 6216e944a1aef8dc67a446654c0799c8e9920b4f Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Fri, 3 Jun 2016 17:12:16 -0700 Subject: [PATCH 01/12] Replace N^2 loop in BindReferences. --- .../sql/catalyst/expressions/BoundAttribute.scala | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala index a38f1ec09156..9308ec70e5b8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala @@ -84,9 +84,19 @@ object BindReferences extends Logging { expression: A, input: Seq[Attribute], allowFailures: Boolean = false): A = { + val inputArr = input.toArray + val inputToOrdinal = { + val map = new java.util.HashMap[ExprId, Int](inputArr.length * 2) + var index = 0 + input.foreach { attr => + map.putIfAbsent(attr.exprId, index) + index += 1 + } + map + } expression.transform { case a: AttributeReference => attachTree(a, "Binding attribute") { - val ordinal = input.indexWhere(_.exprId == a.exprId) + val ordinal = inputToOrdinal.getOrDefault(a.exprId, -1) if (ordinal == -1) { if (allowFailures) { a @@ -94,7 +104,7 @@ object BindReferences extends Logging { sys.error(s"Couldn't find $a in ${input.mkString("[", ",", "]")}") } } else { - BoundReference(ordinal, a.dataType, input(ordinal).nullable) + BoundReference(ordinal, a.dataType, inputArr(ordinal).nullable) } } }.asInstanceOf[A] // Kind of a hack, but safe. TODO: Tighten return type when possible. From b1a76466e5d935bfcf1b5c48dc74053b16a78132 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Fri, 3 Jun 2016 17:36:54 -0700 Subject: [PATCH 02/12] Fix Java 7 compilation issue. --- .../spark/sql/catalyst/expressions/BoundAttribute.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala index 9308ec70e5b8..c1a14d1b5a3d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala @@ -89,7 +89,9 @@ object BindReferences extends Logging { val map = new java.util.HashMap[ExprId, Int](inputArr.length * 2) var index = 0 input.foreach { attr => - map.putIfAbsent(attr.exprId, index) + if (!map.containsKey(attr.exprId)) { + map.put(attr.exprId, index) + } index += 1 } map From 38e8a9935e3ae0a166f7bcd3231bef50ef7ec71b Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Fri, 3 Jun 2016 17:39:28 -0700 Subject: [PATCH 03/12] Whoops, getOrDefault is also new in Java 8. --- .../apache/spark/sql/catalyst/expressions/BoundAttribute.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala index c1a14d1b5a3d..e97d0bb951d0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala @@ -98,7 +98,7 @@ object BindReferences extends Logging { } expression.transform { case a: AttributeReference => attachTree(a, "Binding attribute") { - val ordinal = inputToOrdinal.getOrDefault(a.exprId, -1) + val ordinal = Option(inputToOrdinal.get(a.exprId)).getOrElse(-1) if (ordinal == -1) { if (allowFailures) { a From 0b412b0069dee64c13daa836f43013380c9aa273 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Fri, 3 Jun 2016 17:58:28 -0700 Subject: [PATCH 04/12] Amortize map construction. --- .../catalyst/expressions/BoundAttribute.scala | 20 ++++------------- .../sql/catalyst/expressions/package.scala | 22 ++++++++++++++++++- .../spark/sql/catalyst/plans/QueryPlan.scala | 2 +- .../aggregate/HashAggregateExec.scala | 2 +- 4 files changed, 27 insertions(+), 19 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala index e97d0bb951d0..8ecd6480ce0b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala @@ -82,31 +82,19 @@ object BindReferences extends Logging { def bindReference[A <: Expression]( expression: A, - input: Seq[Attribute], + input: AttributeSeq, allowFailures: Boolean = false): A = { - val inputArr = input.toArray - val inputToOrdinal = { - val map = new java.util.HashMap[ExprId, Int](inputArr.length * 2) - var index = 0 - input.foreach { attr => - if (!map.containsKey(attr.exprId)) { - map.put(attr.exprId, index) - } - index += 1 - } - map - } expression.transform { case a: AttributeReference => attachTree(a, "Binding attribute") { - val ordinal = Option(inputToOrdinal.get(a.exprId)).getOrElse(-1) + val ordinal = input.getOrdinal(a.exprId) if (ordinal == -1) { if (allowFailures) { a } else { - sys.error(s"Couldn't find $a in ${input.mkString("[", ",", "]")}") + sys.error(s"Couldn't find $a in ${input.attrs.mkString("[", ",", "]")}") } } else { - BoundReference(ordinal, a.dataType, inputArr(ordinal).nullable) + BoundReference(ordinal, a.dataType, input(ordinal).nullable) } } }.asInstanceOf[A] // Kind of a hack, but safe. TODO: Tighten return type when possible. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala index 23baa6f7837f..2391ab4cb9c2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala @@ -86,11 +86,31 @@ package object expressions { /** * Helper functions for working with `Seq[Attribute]`. */ - implicit class AttributeSeq(attrs: Seq[Attribute]) { + implicit class AttributeSeq(val attrs: Seq[Attribute]) { /** Creates a StructType with a schema matching this `Seq[Attribute]`. */ def toStructType: StructType = { StructType(attrs.map(a => StructField(a.name, a.dataType, a.nullable))) } + + private lazy val inputArr = attrs.toArray + + private lazy val inputToOrdinal = { + val map = new java.util.HashMap[ExprId, Int](inputArr.length * 2) + var index = 0 + attrs.foreach { attr => + if (!map.containsKey(attr.exprId)) { + map.put(attr.exprId, index) + } + index += 1 + } + map + } + + def apply(ordinal: Int): Attribute = inputArr(ordinal) + + def getOrdinal(exprId: ExprId): Int = { + Option(inputToOrdinal.get(exprId)).getOrElse(-1) + } } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index 6784c3ae1d7e..14654dc3d273 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -296,7 +296,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT /** * All the attributes that are used for this plan. */ - lazy val allAttributes: Seq[Attribute] = children.flatMap(_.output) + lazy val allAttributes: AttributeSeq = children.flatMap(_.output) private def cleanExpression(e: Expression): Expression = e match { case a: Alias => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala index f5bc0628b645..f270ca07554f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala @@ -49,7 +49,7 @@ case class HashAggregateExec( require(HashAggregateExec.supportsAggregate(aggregateBufferAttributes)) - override lazy val allAttributes: Seq[Attribute] = + override lazy val allAttributes: AttributeSeq = child.output ++ aggregateBufferAttributes ++ aggregateAttributes ++ aggregateExpressions.flatMap(_.aggregateFunction.inputAggBufferAttributes) From e7c4150d486a37390b7b9708ce392942c29756b2 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Sat, 4 Jun 2016 16:20:07 -0700 Subject: [PATCH 05/12] Fix similar loop in InMemoryTableScanExec. --- .../spark/sql/execution/columnar/InMemoryTableScanExec.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala index bd55e1a8751d..f08f88e4712d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala @@ -310,7 +310,7 @@ private[sql] case class InMemoryTableScanExec( // within the map Partitions closure. val schema = relation.partitionStatistics.schema val schemaIndex = schema.zipWithIndex - val relOutput = relation.output + val relOutput: AttributeSeq = relation.output val buffers = relation.cachedColumnBuffers buffers.mapPartitionsInternal { cachedBatchIterator => @@ -321,7 +321,7 @@ private[sql] case class InMemoryTableScanExec( // Find the ordinals and data types of the requested columns. val (requestedColumnIndices, requestedColumnDataTypes) = attributes.map { a => - relOutput.indexWhere(_.exprId == a.exprId) -> a.dataType + relOutput.getOrdinal(a.exprId) -> a.dataType }.unzip // Do partition batch pruning if enabled From 210dbd36ae77a609e9a8d269f282810d51759fe1 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Sat, 4 Jun 2016 16:25:24 -0700 Subject: [PATCH 06/12] Rename getOrdinal to getOrdinalWithExprId --- .../apache/spark/sql/catalyst/expressions/BoundAttribute.scala | 2 +- .../org/apache/spark/sql/catalyst/expressions/package.scala | 2 +- .../spark/sql/execution/columnar/InMemoryTableScanExec.scala | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala index 8ecd6480ce0b..78ff9d188ffd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala @@ -86,7 +86,7 @@ object BindReferences extends Logging { allowFailures: Boolean = false): A = { expression.transform { case a: AttributeReference => attachTree(a, "Binding attribute") { - val ordinal = input.getOrdinal(a.exprId) + val ordinal = input.getOrdinalWithExprId(a.exprId) if (ordinal == -1) { if (allowFailures) { a diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala index 2391ab4cb9c2..b37e437ca41e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala @@ -108,7 +108,7 @@ package object expressions { def apply(ordinal: Int): Attribute = inputArr(ordinal) - def getOrdinal(exprId: ExprId): Int = { + def getOrdinalWithExprId(exprId: ExprId): Int = { Option(inputToOrdinal.get(exprId)).getOrElse(-1) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala index f08f88e4712d..c9c8fc19b29f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala @@ -321,7 +321,7 @@ private[sql] case class InMemoryTableScanExec( // Find the ordinals and data types of the requested columns. val (requestedColumnIndices, requestedColumnDataTypes) = attributes.map { a => - relOutput.getOrdinal(a.exprId) -> a.dataType + relOutput.getOrdinalWithExprId(a.exprId) -> a.dataType }.unzip // Do partition batch pruning if enabled From dd94e294c4bc197e800ccffe73defb56d2b2eba2 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Sat, 4 Jun 2016 16:38:02 -0700 Subject: [PATCH 07/12] Delete some related yet dead code in AttributeMap. --- .../spark/sql/catalyst/expressions/AttributeMap.scala | 7 ------- 1 file changed, 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala index ef3cc554b79c..96a11e352ec5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala @@ -26,13 +26,6 @@ object AttributeMap { def apply[A](kvs: Seq[(Attribute, A)]): AttributeMap[A] = { new AttributeMap(kvs.map(kv => (kv._1.exprId, kv)).toMap) } - - /** Given a schema, constructs an [[AttributeMap]] from [[Attribute]] to ordinal */ - def byIndex(schema: Seq[Attribute]): AttributeMap[Int] = apply(schema.zipWithIndex) - - /** Given a schema, constructs a map from ordinal to Attribute. */ - def toIndex(schema: Seq[Attribute]): Map[Int, Attribute] = - schema.zipWithIndex.map { case (a, i) => i -> a }.toMap } class AttributeMap[A](baseMap: Map[ExprId, (Attribute, A)]) From b933fe08e61b529e77c0923d5b559102463a1ff4 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Sat, 4 Jun 2016 16:44:06 -0700 Subject: [PATCH 08/12] Code style and readability improvements. --- .../spark/sql/catalyst/expressions/package.scala | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala index b37e437ca41e..ce62b08bf47d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.catalyst +import com.google.common.collect.Maps + import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types.{StructField, StructType} @@ -94,12 +96,14 @@ package object expressions { private lazy val inputArr = attrs.toArray - private lazy val inputToOrdinal = { - val map = new java.util.HashMap[ExprId, Int](inputArr.length * 2) + private lazy val exprIdToOrdinal = { + val arr = inputArr + val map = Maps.newHashMapWithExpectedSize[ExprId, Int](arr.length) var index = 0 - attrs.foreach { attr => - if (!map.containsKey(attr.exprId)) { - map.put(attr.exprId, index) + while (index < arr.length) { + val exprId = arr(index).exprId + if (!map.containsKey(exprId)) { + map.put(exprId, index) } index += 1 } @@ -109,7 +113,7 @@ package object expressions { def apply(ordinal: Int): Attribute = inputArr(ordinal) def getOrdinalWithExprId(exprId: ExprId): Int = { - Option(inputToOrdinal.get(exprId)).getOrElse(-1) + Option(exprIdToOrdinal.get(exprId)).getOrElse(-1) } } From 4efd3ee498902cbd9e5b8905c50d514771dcc6ac Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Sat, 4 Jun 2016 16:50:23 -0700 Subject: [PATCH 09/12] Comments. --- .../spark/sql/catalyst/expressions/package.scala | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala index ce62b08bf47d..884629612ebc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala @@ -94,10 +94,13 @@ package object expressions { StructType(attrs.map(a => StructField(a.name, a.dataType, a.nullable))) } - private lazy val inputArr = attrs.toArray + // It's possible that `attrs` is a linked list, which can lead to bad O(n^2) loops when + // accessing attributes by their ordinals. To avoid this performance penalty, convert the input + // to an array. + private lazy val attrsArray = attrs.toArray private lazy val exprIdToOrdinal = { - val arr = inputArr + val arr = attrsArray val map = Maps.newHashMapWithExpectedSize[ExprId, Int](arr.length) var index = 0 while (index < arr.length) { @@ -110,8 +113,14 @@ package object expressions { map } - def apply(ordinal: Int): Attribute = inputArr(ordinal) + /** + * Returns the attribute at the given index. + */ + def apply(ordinal: Int): Attribute = attrsArray(ordinal) + /** + * Returns the index of first attribute with a matching expression id, or -1 if no match exists. + */ def getOrdinalWithExprId(exprId: ExprId): Int = { Option(exprIdToOrdinal.get(exprId)).getOrElse(-1) } From 5504b6c2dd3ac7959b2cb7e139a54208368a9a45 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Sun, 5 Jun 2016 11:17:39 -0700 Subject: [PATCH 10/12] Address Eric's comments. --- .../sql/catalyst/expressions/BoundAttribute.scala | 2 +- .../spark/sql/catalyst/expressions/package.scala | 15 +++++++-------- .../columnar/InMemoryTableScanExec.scala | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala index 78ff9d188ffd..7d16118c9d59 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala @@ -86,7 +86,7 @@ object BindReferences extends Logging { allowFailures: Boolean = false): A = { expression.transform { case a: AttributeReference => attachTree(a, "Binding attribute") { - val ordinal = input.getOrdinalWithExprId(a.exprId) + val ordinal = input.indexOf(a.exprId) if (ordinal == -1) { if (allowFailures) { a diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala index 884629612ebc..00dbf29988c7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala @@ -102,13 +102,12 @@ package object expressions { private lazy val exprIdToOrdinal = { val arr = attrsArray val map = Maps.newHashMapWithExpectedSize[ExprId, Int](arr.length) - var index = 0 - while (index < arr.length) { - val exprId = arr(index).exprId - if (!map.containsKey(exprId)) { - map.put(exprId, index) - } - index += 1 + // Iterate over the array in reverse order so that the final map value is the first attribute + // with a given expression id. + var index = arr.length - 1 + while (index >= 0) { + map.put(arr(index).exprId, index) + index -= 1 } map } @@ -121,7 +120,7 @@ package object expressions { /** * Returns the index of first attribute with a matching expression id, or -1 if no match exists. */ - def getOrdinalWithExprId(exprId: ExprId): Int = { + def indexOf(exprId: ExprId): Int = { Option(exprIdToOrdinal.get(exprId)).getOrElse(-1) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala index c9c8fc19b29f..a1c2f0a8fbcf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala @@ -321,7 +321,7 @@ private[sql] case class InMemoryTableScanExec( // Find the ordinals and data types of the requested columns. val (requestedColumnIndices, requestedColumnDataTypes) = attributes.map { a => - relOutput.getOrdinalWithExprId(a.exprId) -> a.dataType + relOutput.indexOf(a.exprId) -> a.dataType }.unzip // Do partition batch pruning if enabled From 99197b7f58cfe17b587d1b3be8d7ad0dd4041151 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 6 Jun 2016 00:34:15 -0700 Subject: [PATCH 11/12] Add missing `@transient` annotations. --- .../org/apache/spark/sql/catalyst/expressions/package.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala index 00dbf29988c7..adeb576b29e8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala @@ -97,9 +97,9 @@ package object expressions { // It's possible that `attrs` is a linked list, which can lead to bad O(n^2) loops when // accessing attributes by their ordinals. To avoid this performance penalty, convert the input // to an array. - private lazy val attrsArray = attrs.toArray + @transient private lazy val attrsArray = attrs.toArray - private lazy val exprIdToOrdinal = { + @transient private lazy val exprIdToOrdinal = { val arr = attrsArray val map = Maps.newHashMapWithExpectedSize[ExprId, Int](arr.length) // Iterate over the array in reverse order so that the final map value is the first attribute From 5e9c258dfb592e47800df9c5205a7b0c6c07ed4a Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 6 Jun 2016 00:36:00 -0700 Subject: [PATCH 12/12] Also extend Serializable. --- .../org/apache/spark/sql/catalyst/expressions/package.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala index adeb576b29e8..81f5bb4a6509 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala @@ -88,7 +88,7 @@ package object expressions { /** * Helper functions for working with `Seq[Attribute]`. */ - implicit class AttributeSeq(val attrs: Seq[Attribute]) { + implicit class AttributeSeq(val attrs: Seq[Attribute]) extends Serializable { /** Creates a StructType with a schema matching this `Seq[Attribute]`. */ def toStructType: StructType = { StructType(attrs.map(a => StructField(a.name, a.dataType, a.nullable)))