Upgrade to spark 3.2 #416

tanvn · 2022-02-11T11:53:19Z

This is to fix the following error happened on my local PC

*** RUN ABORTED *** (1 minute, 15 seconds) java.lang.VerifyError: class scala.tools.nsc.reporters.Reporter overrides final method echo.(Ljava/lang/String;)V at java.lang.ClassLoader.defineClass1(Native Method) at java.lang.ClassLoader.defineClass(ClassLoader.java:756) at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142) at java.net.URLClassLoader.defineClass(URLClassLoader.java:468) at java.net.URLClassLoader.access$100(URLClassLoader.java:74) at java.net.URLClassLoader$1.run(URLClassLoader.java:369) at java.net.URLClassLoader$1.run(URLClassLoader.java:363) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:362) at java.lang.ClassLoader.loadClass(ClassLoader.java:418) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:352) at java.lang.ClassLoader.loadClass(ClassLoader.java:351) at com.amazon.deequ.suggestions.ConstraintSuggestionRunnerTest$.verificationFnFromConstraintSrc(ConstraintSuggestionRunnerTest.scala:291) at com.amazon.deequ.suggestions.ConstraintSuggestionRunnerTest.suggestHasDataTypeConstraintVerifyTest(ConstraintSuggestionRunnerTest.scala:259) at com.amazon.deequ.suggestions.ConstraintSuggestionRunnerTest.$anonfun$new$22(ConstraintSuggestionRunnerTest.scala:221) at com.amazon.deequ.suggestions.ConstraintSuggestionRunnerTest.$anonfun$new$22$adapted(ConstraintSuggestionRunnerTest.scala:215) at com.amazon.deequ.SparkContextSpec.withSparkSession(SparkContextSpec.scala:33) at com.amazon.deequ.SparkContextSpec.withSparkSession$(SparkContextSpec.scala:30) at com.amazon.deequ.suggestions.ConstraintSuggestionRunnerTest.withSparkSession(ConstraintSuggestionRunnerTest.scala:36) at com.amazon.deequ.suggestions.ConstraintSuggestionRunnerTest.$anonfun$new$21(ConstraintSuggestionRunnerTest.scala:215) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at org.scalatest.Transformer.apply(Transformer.scala:22) at org.scalatest.Transformer.apply(Transformer.scala:20) at org.scalatest.wordspec.AnyWordSpecLike$$anon$3.apply(AnyWordSpecLike.scala:1076) at org.scalatest.TestSuite.withFixture(TestSuite.scala:196) at org.scalatest.TestSuite.withFixture$(TestSuite.scala:195) at org.scalatest.wordspec.AnyWordSpec.withFixture(AnyWordSpec.scala:1879) at org.scalatest.wordspec.AnyWordSpecLike.invokeWithFixture$1(AnyWordSpecLike.scala:1074) at org.scalatest.wordspec.AnyWordSpecLike.$anonfun$runTest$1(AnyWordSpecLike.scala:1086) at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) at org.scalatest.wordspec.AnyWordSpecLike.runTest(AnyWordSpecLike.scala:1086) at org.scalatest.wordspec.AnyWordSpecLike.runTest$(AnyWordSpecLike.scala:1068) at org.scalatest.wordspec.AnyWordSpec.runTest(AnyWordSpec.scala:1879) at org.scalatest.wordspec.AnyWordSpecLike.$anonfun$runTests$1(AnyWordSpecLike.scala:1145) at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413) at scala.collection.immutable.List.foreach(List.scala:392) at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401) at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:390) at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:427) at scala.collection.immutable.List.foreach(List.scala:392) at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401) at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396) at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475) at org.scalatest.wordspec.AnyWordSpecLike.runTests(AnyWordSpecLike.scala:1145) at org.scalatest.wordspec.AnyWordSpecLike.runTests$(AnyWordSpecLike.scala:1144) at org.scalatest.wordspec.AnyWordSpec.runTests(AnyWordSpec.scala:1879) at org.scalatest.Suite.run(Suite.scala:1112) at org.scalatest.Suite.run$(Suite.scala:1094) at org.scalatest.wordspec.AnyWordSpec.org$scalatest$wordspec$AnyWordSpecLike$$super$run(AnyWordSpec.scala:1879) at org.scalatest.wordspec.AnyWordSpecLike.$anonfun$run$1(AnyWordSpecLike.scala:1190) at org.scalatest.SuperEngine.runImpl(Engine.scala:535) at org.scalatest.wordspec.AnyWordSpecLike.run(AnyWordSpecLike.scala:1190) at org.scalatest.wordspec.AnyWordSpecLike.run$(AnyWordSpecLike.scala:1188) at org.scalatest.wordspec.AnyWordSpec.run(AnyWordSpec.scala:1879) at org.scalatest.Suite.callExecuteOnSuite$1(Suite.scala:1175) at org.scalatest.Suite.$anonfun$runNestedSuites$1(Suite.scala:1222) at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36) at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33) at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198) at org.scalatest.Suite.runNestedSuites(Suite.scala:1220) at org.scalatest.Suite.runNestedSuites$(Suite.scala:1154) at org.scalatest.tools.DiscoverySuite.runNestedSuites(DiscoverySuite.scala:30) at org.scalatest.Suite.run(Suite.scala:1109) at org.scalatest.Suite.run$(Suite.scala:1094) at org.scalatest.tools.DiscoverySuite.run(DiscoverySuite.scala:30) at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45) at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13(Runner.scala:1320) at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13$adapted(Runner.scala:1314) at scala.collection.immutable.List.foreach(List.scala:392) at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1314) at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24(Runner.scala:993) at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24$adapted(Runner.scala:971) at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1480) at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:971) at org.scalatest.tools.Runner$.main(Runner.scala:775) at org.scalatest.tools.Runner.main(Runner.scala)

tanvn · 2022-02-11T11:53:47Z

Formatted using scalafmt to pass scalastyle check.

tanvn · 2022-02-11T11:54:14Z

This is to fix a fail on baseCheck.hasCorrelation
https://github.com/awslabs/deequ/blob/master/src/test/scala/com/amazon/deequ/checks/CheckTest.scala#L577

If we do not provide this withNewChildrenInternal, the method of Corr will be used, which lead to incorrect results.

tanvn · 2022-02-11T11:56:08Z

This is to fix failed tests when verifying submitted job count from SparkSessionStats
Because from Spark 3.2, adaptive query execution is enabled by default, the plan Spark creates to execute tasks has changed largely.
Here we disable Adaptive query execution to make Spark behave the same as 3.1
This is to fix failed tests like below:
https://github.com/awslabs/deequ/blob/master/src/test/scala/com/amazon/deequ/profiles/ColumnProfilerRunnerTest.scala#L63
(There were several failed tasks like this one, the actual number of jobs that have been submitted is 5 when AQE is enabled, and is 3 when AQE is disabled)

The plan is explained on master branch (spark 3.1)

== Parsed Logical Plan == 'Aggregate [sum(cast(isnotnull('item) as int)) AS sum(CAST((item IS NOT NULL) AS INT))#915, count(1) AS count(1)#916L, stateful_approx_count_distinct('item, 0, 0) AS stateful_approx_count_distinct(item)#1021, statefuldatatype('item, org.apache.spark.sql.StatefulDataType@4ba1c1a2, 0, 0) AS statefuldatatype(item)#1032, sum(cast(isnotnull('att1) as int)) AS sum(CAST((att1 IS NOT NULL) AS INT))#1033, count(1) AS count(1)#1034L, stateful_approx_count_distinct('att1, 0, 0) AS stateful_approx_count_distinct(att1)#1139, sum(cast(isnotnull('att2) as int)) AS sum(CAST((att2 IS NOT NULL) AS INT))#1140, count(1) AS count(1)#1141L, stateful_approx_count_distinct('att2, 0, 0) AS stateful_approx_count_distinct(att2)#1246, sum(cast(isnotnull('att3) as int)) AS sum(CAST((att3 IS NOT NULL) AS INT))#1247, count(1) AS count(1)#1248L, stateful_approx_count_distinct('att3, 0, 0) AS stateful_approx_count_distinct(att3)#1353, count(1) AS count(1)#1354L] +- Project [_1#4 AS item#13, _2#5 AS att1#14, _3#6 AS att2#15, _4#7 AS att3#16] +- LocalRelation [_1#4, _2#5, _3#6, _4#7] == Analyzed Logical Plan == sum(CAST((item IS NOT NULL) AS INT)): bigint, count(1): bigint, stateful_approx_count_distinct(item): binary, statefuldatatype(item): binary, sum(CAST((att1 IS NOT NULL) AS INT)): bigint, count(1): bigint, stateful_approx_count_distinct(att1): binary, sum(CAST((att2 IS NOT NULL) AS INT)): bigint, count(1): bigint, stateful_approx_count_distinct(att2): binary, sum(CAST((att3 IS NOT NULL) AS INT)): bigint, count(1): bigint, stateful_approx_count_distinct(att3): binary, count(1): bigint Aggregate [sum(cast(cast(isnotnull(item#13) as int) as bigint)) AS sum(CAST((item IS NOT NULL) AS INT))#915L, count(1) AS count(1)#916L, stateful_approx_count_distinct(item#13, 0, 0) AS stateful_approx_count_distinct(item)#1021, statefuldatatype(item#13, org.apache.spark.sql.StatefulDataType@4ba1c1a2, 0, 0) AS statefuldatatype(item)#1032, sum(cast(cast(isnotnull(att1#14) as int) as bigint)) AS sum(CAST((att1 IS NOT NULL) AS INT))#1033L, count(1) AS count(1)#1034L, stateful_approx_count_distinct(att1#14, 0, 0) AS stateful_approx_count_distinct(att1)#1139, sum(cast(cast(isnotnull(att2#15) as int) as bigint)) AS sum(CAST((att2 IS NOT NULL) AS INT))#1140L, count(1) AS count(1)#1141L, stateful_approx_count_distinct(att2#15, 0, 0) AS stateful_approx_count_distinct(att2)#1246, sum(cast(cast(isnotnull(att3#16) as int) as bigint)) AS sum(CAST((att3 IS NOT NULL) AS INT))#1247L, count(1) AS count(1)#1248L, stateful_approx_count_distinct(att3#16, 0, 0) AS stateful_approx_count_distinct(att3)#1353, count(1) AS count(1)#1354L] +- Project [_1#4 AS item#13, _2#5 AS att1#14, _3#6 AS att2#15, _4#7 AS att3#16] +- LocalRelation [_1#4, _2#5, _3#6, _4#7] == Optimized Logical Plan == Aggregate [sum(cast(cast(isnotnull(item#13) as int) as bigint)) AS sum(CAST((item IS NOT NULL) AS INT))#915L, count(1) AS count(1)#916L, stateful_approx_count_distinct(item#13, 0, 0) AS stateful_approx_count_distinct(item)#1021, statefuldatatype(item#13, org.apache.spark.sql.StatefulDataType@4ba1c1a2, 0, 0) AS statefuldatatype(item)#1032, sum(1) AS sum(CAST((att1 IS NOT NULL) AS INT))#1033L, count(1) AS count(1)#1034L, stateful_approx_count_distinct(att1#14, 0, 0) AS stateful_approx_count_distinct(att1)#1139, sum(1) AS sum(CAST((att2 IS NOT NULL) AS INT))#1140L, count(1) AS count(1)#1141L, stateful_approx_count_distinct(att2#15, 0, 0) AS stateful_approx_count_distinct(att2)#1246, sum(1) AS sum(CAST((att3 IS NOT NULL) AS INT))#1247L, count(1) AS count(1)#1248L, stateful_approx_count_distinct(att3#16, 0, 0) AS stateful_approx_count_distinct(att3)#1353, count(1) AS count(1)#1354L] +- LocalRelation [item#13, att1#14, att2#15, att3#16] == Physical Plan == HashAggregate(keys=[], functions=[sum(cast(cast(isnotnull(item#13) as int) as bigint)), count(1), stateful_approx_count_distinct(item#13, 0, 0), statefuldatatype(item#13, org.apache.spark.sql.StatefulDataType@4ba1c1a2, 0, 0), sum(1), stateful_approx_count_distinct(att1#14, 0, 0), stateful_approx_count_distinct(att2#15, 0, 0), stateful_approx_count_distinct(att3#16, 0, 0)], output=[sum(CAST((item IS NOT NULL) AS INT))#915L, count(1)#916L, stateful_approx_count_distinct(item)#1021, statefuldatatype(item)#1032, sum(CAST((att1 IS NOT NULL) AS INT))#1033L, count(1)#1034L, stateful_approx_count_distinct(att1)#1139, sum(CAST((att2 IS NOT NULL) AS INT))#1140L, count(1)#1141L, stateful_approx_count_distinct(att2)#1246, sum(CAST((att3 IS NOT NULL) AS INT))#1247L, count(1)#1248L, stateful_approx_count_distinct(att3)#1353, count(1)#1354L]) +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#10] +- HashAggregate(keys=[], functions=[partial_sum(cast(cast(isnotnull(item#13) as int) as bigint)), partial_count(1), partial_stateful_approx_count_distinct(item#13, 0, 0), partial_statefuldatatype(item#13, org.apache.spark.sql.StatefulDataType@4ba1c1a2, 0, 0), partial_sum(1), partial_stateful_approx_count_distinct(att1#14, 0, 0), partial_stateful_approx_count_distinct(att2#15, 0, 0), partial_stateful_approx_count_distinct(att3#16, 0, 0)], output=[sum#2224L, count#2225L, MS[0]#1407L, MS[1]#1408L, MS[2]#1409L, MS[3]#1410L, MS[4]#1411L, MS[5]#1412L, MS[6]#1413L, MS[7]#1414L, MS[8]#1415L, MS[9]#1416L, MS[10]#1417L, MS[11]#1418L, MS[12]#1419L, MS[13]#1420L, MS[14]#1421L, MS[15]#1422L, MS[16]#1423L, MS[17]#1424L, MS[18]#1425L, MS[19]#1426L, MS[20]#1427L, MS[21]#1428L, ... 192 more fields]) +- LocalTableScan [item#13, att1#14, att2#15, att3#16]

The plan is explained on this branch (Spark 3.2.1)

== Parsed Logical Plan == 'Aggregate [sum(cast(isnotnull('item) as int)) AS sum(CAST((item IS NOT NULL) AS INT))#915, count(1) AS count(1)#916L, stateful_approx_count_distinct('item, 0, 0) AS stateful_approx_count_distinct(item)#1021, statefuldatatype('item, org.apache.spark.sql.StatefulDataType@253b1cbd, 0, 0, None) AS statefuldatatype(item)#1032, sum(cast(isnotnull('att1) as int)) AS sum(CAST((att1 IS NOT NULL) AS INT))#1033, count(1) AS count(1)#1034L, stateful_approx_count_distinct('att1, 0, 0) AS stateful_approx_count_distinct(att1)#1139, sum(cast(isnotnull('att2) as int)) AS sum(CAST((att2 IS NOT NULL) AS INT))#1140, count(1) AS count(1)#1141L, stateful_approx_count_distinct('att2, 0, 0) AS stateful_approx_count_distinct(att2)#1246, sum(cast(isnotnull('att3) as int)) AS sum(CAST((att3 IS NOT NULL) AS INT))#1247, count(1) AS count(1)#1248L, stateful_approx_count_distinct('att3, 0, 0) AS stateful_approx_count_distinct(att3)#1353, count(1) AS count(1)#1354L] +- Project [_1#4 AS item#13, _2#5 AS att1#14, _3#6 AS att2#15, _4#7 AS att3#16] +- LocalRelation [_1#4, _2#5, _3#6, _4#7] == Analyzed Logical Plan == sum(CAST((item IS NOT NULL) AS INT)): bigint, count(1): bigint, stateful_approx_count_distinct(item): binary, statefuldatatype(item): binary, sum(CAST((att1 IS NOT NULL) AS INT)): bigint, count(1): bigint, stateful_approx_count_distinct(att1): binary, sum(CAST((att2 IS NOT NULL) AS INT)): bigint, count(1): bigint, stateful_approx_count_distinct(att2): binary, sum(CAST((att3 IS NOT NULL) AS INT)): bigint, count(1): bigint, stateful_approx_count_distinct(att3): binary, count(1): bigint Aggregate [sum(cast(isnotnull(item#13) as int)) AS sum(CAST((item IS NOT NULL) AS INT))#915L, count(1) AS count(1)#916L, stateful_approx_count_distinct(item#13, 0, 0) AS stateful_approx_count_distinct(item)#1021, statefuldatatype(item#13, org.apache.spark.sql.StatefulDataType@253b1cbd, 0, 0, None) AS statefuldatatype(item)#1032, sum(cast(isnotnull(att1#14) as int)) AS sum(CAST((att1 IS NOT NULL) AS INT))#1033L, count(1) AS count(1)#1034L, stateful_approx_count_distinct(att1#14, 0, 0) AS stateful_approx_count_distinct(att1)#1139, sum(cast(isnotnull(att2#15) as int)) AS sum(CAST((att2 IS NOT NULL) AS INT))#1140L, count(1) AS count(1)#1141L, stateful_approx_count_distinct(att2#15, 0, 0) AS stateful_approx_count_distinct(att2)#1246, sum(cast(isnotnull(att3#16) as int)) AS sum(CAST((att3 IS NOT NULL) AS INT))#1247L, count(1) AS count(1)#1248L, stateful_approx_count_distinct(att3#16, 0, 0) AS stateful_approx_count_distinct(att3)#1353, count(1) AS count(1)#1354L] +- Project [_1#4 AS item#13, _2#5 AS att1#14, _3#6 AS att2#15, _4#7 AS att3#16] +- LocalRelation [_1#4, _2#5, _3#6, _4#7] == Optimized Logical Plan == Aggregate [sum(cast(isnotnull(item#13) as int)) AS sum(CAST((item IS NOT NULL) AS INT))#915L, count(1) AS count(1)#916L, stateful_approx_count_distinct(item#13, 0, 0) AS stateful_approx_count_distinct(item)#1021, statefuldatatype(item#13, org.apache.spark.sql.StatefulDataType@253b1cbd, 0, 0, None) AS statefuldatatype(item)#1032, sum(1) AS sum(CAST((att1 IS NOT NULL) AS INT))#1033L, count(1) AS count(1)#1034L, stateful_approx_count_distinct(att1#14, 0, 0) AS stateful_approx_count_distinct(att1)#1139, sum(1) AS sum(CAST((att2 IS NOT NULL) AS INT))#1140L, count(1) AS count(1)#1141L, stateful_approx_count_distinct(att2#15, 0, 0) AS stateful_approx_count_distinct(att2)#1246, sum(1) AS sum(CAST((att3 IS NOT NULL) AS INT))#1247L, count(1) AS count(1)#1248L, stateful_approx_count_distinct(att3#16, 0, 0) AS stateful_approx_count_distinct(att3)#1353, count(1) AS count(1)#1354L] +- LocalRelation [item#13, att1#14, att2#15, att3#16] == Physical Plan == AdaptiveSparkPlan isFinalPlan=false +- HashAggregate(keys=[], functions=[sum(cast(isnotnull(item#13) as int)), count(1), stateful_approx_count_distinct(item#13, 0, 0), statefuldatatype(item#13, org.apache.spark.sql.StatefulDataType@253b1cbd, 0, 0, None), sum(1), stateful_approx_count_distinct(att1#14, 0, 0), stateful_approx_count_distinct(att2#15, 0, 0), stateful_approx_count_distinct(att3#16, 0, 0)], output=[sum(CAST((item IS NOT NULL) AS INT))#915L, count(1)#916L, stateful_approx_count_distinct(item)#1021, statefuldatatype(item)#1032, sum(CAST((att1 IS NOT NULL) AS INT))#1033L, count(1)#1034L, stateful_approx_count_distinct(att1)#1139, sum(CAST((att2 IS NOT NULL) AS INT))#1140L, count(1)#1141L, stateful_approx_count_distinct(att2)#1246, sum(CAST((att3 IS NOT NULL) AS INT))#1247L, count(1)#1248L, stateful_approx_count_distinct(att3)#1353, count(1)#1354L]) +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#11] +- HashAggregate(keys=[], functions=[partial_sum(cast(isnotnull(item#13) as int)), partial_count(1), partial_stateful_approx_count_distinct(item#13, 0, 0), partial_statefuldatatype(item#13, org.apache.spark.sql.StatefulDataType@253b1cbd, 0, 0, None), partial_sum(1), partial_stateful_approx_count_distinct(att1#14, 0, 0), partial_stateful_approx_count_distinct(att2#15, 0, 0), partial_stateful_approx_count_distinct(att3#16, 0, 0)], output=[sum#2224L, count#2225L, MS[0]#1407L, MS[1]#1408L, MS[2]#1409L, MS[3]#1410L, MS[4]#1411L, MS[5]#1412L, MS[6]#1413L, MS[7]#1414L, MS[8]#1415L, MS[9]#1416L, MS[10]#1417L, MS[11]#1418L, MS[12]#1419L, MS[13]#1420L, MS[14]#1421L, MS[15]#1422L, MS[16]#1423L, MS[17]#1424L, MS[18]#1425L, MS[19]#1426L, MS[20]#1427L, MS[21]#1428L, ... 192 more fields]) +- LocalTableScan [item#13, att1#14, att2#15, att3#16]

-Original file line number
+Diff line change
@@ Expand Up / @@ -18,7 +18,7 @@ @@
             <artifact.scala.version>${scala.major.version}</artifact.scala.version>
             <scala-maven-plugin.version>4.4.0</scala-maven-plugin.version>
-            <spark.version>3.2.0</spark.version>
+            <spark.version>3.2.1</spark.version>
         </properties>
         <name>deequ</name>
@@ Expand Down Expand Up / @@ -74,6 +74,14 @@ @@
                 <version>${scala.version}</version>
             </dependency>
+            <!-- https://mvnrepository.com/artifact/org.scala-lang/scala-reflect -->
+            <dependency>
+                <groupId>org.scala-lang</groupId>
+                <artifactId>scala-reflect</artifactId>
+                <version>${scala.version}</version>
+            </dependency>
             <dependency>
                 <groupId>org.apache.spark</groupId>
                 <artifactId>spark-core_${scala.major.version}</artifactId>
@@ Expand Down @@

-Original file line number
+Diff line change
@@ -1,19 +1,18 @@
     /**
-     * Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
-     *
-     * Licensed under the Apache License, Version 2.0 (the "License"). You may not
-     * use this file except in compliance with the License. A copy of the License
-     * is located at
-     *
-     *     http://aws.amazon.com/apache2.0/
-     *
-     * or in the "license" file accompanying this file. This file is distributed on
-     * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
-     * express or implied. See the License for the specific language governing
-     * permissions and limitations under the License.
-     *
-     */
+      * Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+      *
+      * Licensed under the Apache License, Version 2.0 (the "License"). You may not
+      * use this file except in compliance with the License. A copy of the License
+      * is located at
+      *
+      * http://aws.amazon.com/apache2.0/
+      *
+      * or in the "license" file accompanying this file. This file is distributed on
+      * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+      * express or implied. See the License for the specific language governing
+      * permissions and limitations under the License.
+      *
+      */
     package org.apache.spark.sql.catalyst.expressions.aggregate
     import org.apache.spark.sql.catalyst.InternalRow
@@ Expand All / @@ -26,12 +25,13 @@ import org.apache.spark.sql.types._ @@
     /** Adjusted version of org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile
       * (github tag v2.2.0) */
-    private[sql] case class StatefulApproxQuantile(
-        child: Expression,
-        accuracyExpression: Expression,
-        override val mutableAggBufferOffset: Int,
-        override val inputAggBufferOffset: Int)
-        extends TypedImperativeAggregate[PercentileDigest] with ImplicitCastInputTypes with BinaryLike[Expression] {
+    private[sql] case class StatefulApproxQuantile(child: Expression,
+                                                   accuracyExpression: Expression,
+                                                   override val mutableAggBufferOffset: Int,
+                                                   override val inputAggBufferOffset: Int)
+        extends TypedImperativeAggregate[PercentileDigest]
+        with ImplicitCastInputTypes
+        with BinaryLike[Expression] {
       def this(child: Expression, accuracyExpression: Expression) = {
         this(child, accuracyExpression, 0, 0)
@@ Expand Down Expand Up / @@ -111,11 +111,12 @@ private[sql] case class StatefulApproxQuantile( @@
       }
       override def left: Expression = child
       override def right: Expression = accuracyExpression
       // override def third: Expression = accuracyExpression
-      protected def withNewChildrenInternal(
-          newFirst: Expression, newSecond: Expression): StatefulApproxQuantile =
+      protected def withNewChildrenInternal(newFirst: Expression,
+                                            newSecond: Expression): StatefulApproxQuantile =
         copy(child = newFirst, accuracyExpression = newSecond)
       // protected def withNewChildrenInternal(
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -50,4 +50,8 @@ private[sql] class StatefulCorrelation( @@
         val state = Seq(super.hashCode(), evaluateExpression)
         state.map { _.hashCode() }.foldLeft(0) {(a, b) => 31 * a + b }
       }
+      override protected def withNewChildrenInternal(newLeft: Expression,
+                                                     newRight: Expression): StatefulCorrelation =
+        new StatefulCorrelation(newLeft, newRight, nullOnDivideByZero)
     }

-Original file line number
+Diff line change
@@ Expand Up / @@ -78,6 +78,7 @@ trait SparkContextSpec { @@
           .appName("test")
           .config("spark.ui.enabled", "false")
           .config("spark.sql.shuffle.partitions", 2.toString)
+          .config("spark.sql.adaptive.enabled", value = false)
           .getOrCreate()
         session.sparkContext.setCheckpointDir(System.getProperty("java.io.tmpdir"))
         session
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Upgrade to spark 3.2 #416

Uh oh!

Diff view

Diff view

There are no files selected for viewing

tanvn Feb 11, 2022

Uh oh!

tanvn Feb 11, 2022 •

edited

Loading

Uh oh!

tanvn Feb 11, 2022 •

edited

Loading

Uh oh!

tanvn Feb 11, 2022

Uh oh!

tanvn Feb 11, 2022

Uh oh!

Upgrade to spark 3.2 #416

Uh oh!

Upgrade to spark 3.2 #416

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

tanvn Feb 11, 2022

Choose a reason for hiding this comment

Uh oh!

tanvn Feb 11, 2022 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

tanvn Feb 11, 2022 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

tanvn Feb 11, 2022

Choose a reason for hiding this comment

Uh oh!

tanvn Feb 11, 2022

Choose a reason for hiding this comment

Uh oh!

tanvn Feb 11, 2022 •

edited

Loading

tanvn Feb 11, 2022 •

edited

Loading