Update comments

bersprockets · bersprockets · commit b4adef79deff · 2022-09-17T09:55:25.000-07:00
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
@@ -214,11 +214,14 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
   }
 
   def rewrite(aOrig: Aggregate): Aggregate = {
-    // make children of distinct aggregations the same if they are different
-    // only because of superficial reasons, e.g.:
-    //   "1 + col1" vs "col1 + 1", both become "1 + col1"
+    // Make children of distinct aggregations the same if they are only
+    // different due to superficial reasons, e.g.:
+    //   "1 + col1" vs "col1 + 1", both should become "1 + col1"
     // or
-    //   "col1" vs "Col1", both become "col1"
+    //   "col1" vs "Col1", both should become "col1"
+    // This could potentially reduce the number of distinct
+    // aggregate groups, and therefore reduce the number of
+    // projections in Expand (or eliminate the need for Expand)
     val a = reduceDistinctAggregateGroups(aOrig)
 
     val aggExpressions = collectAggregateExprs(a)
@@ -408,6 +411,10 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
       }
       Aggregate(groupByAttrs, patchedAggExpressions, firstAggregate)
     } else {
+      // It's possible we avoided rewriting the plan to use Expand only because
+      // reduceDistinctAggregateGroups reduced the number of distinct aggregate groups
+      // from > 1 to 1. To prevent SparkStrategies from complaining during sanity check,
+      // we use the potentially patched Aggregate returned by reduceDistinctAggregateGroups.
       a
     }
   }

Original file line number	Diff line number	Diff line change
`@@ -214,11 +214,14 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {`
`214`	`214`	`}`
`215`	`215`
`216`	`216`	`def rewrite(aOrig: Aggregate): Aggregate = {`
`217`		`- // make children of distinct aggregations the same if they are different`
`218`		`- // only because of superficial reasons, e.g.:`
`219`		`- // "1 + col1" vs "col1 + 1", both become "1 + col1"`
	`217`	`+ // Make children of distinct aggregations the same if they are only`
	`218`	`+ // different due to superficial reasons, e.g.:`
	`219`	`+ // "1 + col1" vs "col1 + 1", both should become "1 + col1"`
`220`	`220`	`// or`
`221`		`- // "col1" vs "Col1", both become "col1"`
	`221`	`+ // "col1" vs "Col1", both should become "col1"`
	`222`	`+ // This could potentially reduce the number of distinct`
	`223`	`+ // aggregate groups, and therefore reduce the number of`
	`224`	`+ // projections in Expand (or eliminate the need for Expand)`
`222`	`225`	`val a = reduceDistinctAggregateGroups(aOrig)`
`223`	`226`
`224`	`227`	`val aggExpressions = collectAggregateExprs(a)`
`@@ -408,6 +411,10 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {`
`408`	`411`	`}`
`409`	`412`	`Aggregate(groupByAttrs, patchedAggExpressions, firstAggregate)`
`410`	`413`	`} else {`
	`414`	`+ // It's possible we avoided rewriting the plan to use Expand only because`
	`415`	`+ // reduceDistinctAggregateGroups reduced the number of distinct aggregate groups`
	`416`	`+ // from > 1 to 1. To prevent SparkStrategies from complaining during sanity check,`
	`417`	`+ // we use the potentially patched Aggregate returned by reduceDistinctAggregateGroups.`
`411`	`418`	`a`
`412`	`419`	`}`
`413`	`420`	`}`