Add an analysis rule to convert aggregate function to the new version.

yhuai · yhuai · commit b7720ba2b33a · 2015-07-13T14:38:35.000-07:00
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.aggregate2.{AggregateExpression2, AggregateFunction2}
 import org.apache.spark.sql.catalyst.{SimpleCatalystConf, CatalystConf}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -483,11 +482,7 @@ class Analyzer(
         q transformExpressions {
           case u @ UnresolvedFunction(name, children) =>
             withPosition(u) {
-              registry.lookupFunction(name, children) match {
-                case agg2: AggregateFunction2 =>
-                  AggregateExpression2(agg2, aggregate2.Complete, false)
-                case other => other
-              }
+              registry.lookupFunction(name, children)
             }
         }
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -148,7 +148,6 @@ object FunctionRegistry {
 
     // aggregate functions
     expression[Average]("avg"),
-    expression[aggregate2.Average]("avg2"),
     expression[Count]("count"),
     expression[First]("first"),
     expression[Last]("last"),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -21,6 +21,8 @@ import java.beans.Introspector
 import java.util.Properties
 import java.util.concurrent.atomic.AtomicReference
 
+import org.apache.spark.sql.execution.aggregate2.ConvertAggregateFunction
+
 import scala.collection.JavaConversions._
 import scala.collection.immutable
 import scala.language.implicitConversions
@@ -148,6 +150,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
       override val extendedResolutionRules =
         ExtractPythonUDFs ::
         sources.PreInsertCastAndRename ::
+        ConvertAggregateFunction(self) ::
         Nil
 
       override val extendedCheckRules = Seq(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate2/Aggregate2Sort.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate2/Aggregate2Sort.scala
@@ -15,13 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.execution
+package org.apache.spark.sql.execution.aggregate2
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate2._
-import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, AllTuples, UnspecifiedDistribution, Distribution}
+import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, UnspecifiedDistribution}
+import org.apache.spark.sql.execution.{SparkPlan, UnaryNode}
 import org.apache.spark.sql.types.NullType
 
 case class Aggregate2Sort(
@@ -71,7 +72,7 @@ case class Aggregate2Sort(
               case PartialMerge | Final => func
             }
             bufferOffset = aggregateExpressions(i).mode match {
-              case Partial | PartialMerge => bufferOffset + func.bufferValueDataTypes.length
+              case Partial | PartialMerge => bufferOffset + func.bufferSchema.length
               case Final | Complete => bufferOffset + 1
             }
             i += 1
@@ -88,7 +89,7 @@ case class Aggregate2Sort(
           var i = 0
           var size = 0
           while (i < aggregateFunctions.length) {
-            size += aggregateFunctions(i).bufferValueDataTypes.length
+            size += aggregateFunctions(i).bufferSchema.length
             i += 1
           }
           if (preShuffle) {
@@ -132,7 +133,7 @@ case class Aggregate2Sort(
 
         lazy val updateProjection = {
           val bufferSchema = aggregateFunctions.flatMap {
-            case ae: AlgebraicAggregate => ae.bufferSchema
+            case ae: AlgebraicAggregate => ae.bufferAttributes
           }
           val updateExpressions = aggregateFunctions.flatMap {
             case ae: AlgebraicAggregate => ae.updateExpressions
@@ -145,7 +146,7 @@ case class Aggregate2Sort(
         val mergeProjection = {
           val bufferSchemata =
             offsetAttributes ++ aggregateFunctions.flatMap {
-              case ae: AlgebraicAggregate => ae.bufferSchema
+              case ae: AlgebraicAggregate => ae.bufferAttributes
             } ++ offsetAttributes ++ aggregateFunctions.flatMap {
               case ae: AlgebraicAggregate => ae.rightBufferSchema
             }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate2/ConvertAggregateFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate2/ConvertAggregateFunction.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.aggregate2
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.catalyst.expressions.{Average => Average1}
+import org.apache.spark.sql.catalyst.expressions.aggregate2.{Average => Average2, AggregateExpression2, Complete}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+
+case class ConvertAggregateFunction(context: SQLContext) extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case p: LogicalPlan if !p.childrenResolved => p
+
+    case p if context.conf.useSqlAggregate2 => p.transformExpressionsUp {
+      case Average1(child) => AggregateExpression2(Average2(child), Complete, false)
+    }
+  }
+}

Original file line number	Diff line number	Diff line change
`@@ -18,7 +18,6 @@`
`18`	`18`	`package org.apache.spark.sql.catalyst.analysis`
`19`	`19`
`20`	`20`	`import org.apache.spark.sql.AnalysisException`
`21`		`-import org.apache.spark.sql.catalyst.expressions.aggregate2.{AggregateExpression2, AggregateFunction2}`
`22`	`21`	`import org.apache.spark.sql.catalyst.{SimpleCatalystConf, CatalystConf}`
`23`	`22`	`import org.apache.spark.sql.catalyst.expressions._`
`24`	`23`	`import org.apache.spark.sql.catalyst.plans.logical._`
`@@ -483,11 +482,7 @@ class Analyzer(`
`483`	`482`	`q transformExpressions {`
`484`	`483`	`case u @ UnresolvedFunction(name, children) =>`
`485`	`484`	`withPosition(u) {`
`486`		`- registry.lookupFunction(name, children) match {`
`487`		`- case agg2: AggregateFunction2 =>`
`488`		`- AggregateExpression2(agg2, aggregate2.Complete, false)`
`489`		`- case other => other`
`490`		`- }`
	`485`	`+ registry.lookupFunction(name, children)`
`491`	`486`	`}`
`492`	`487`	`}`
`493`	`488`	`}`