From 3abfcae06727af8643e883c17346d19e0c6203da Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Tue, 11 May 2021 17:49:45 +0900 Subject: [PATCH] update to spark3.1 AFAIU the only requirement is update for . In order to be consistent with the previous behavior and pass the existing test suite, this PR is essentially equavalent to setting `spark.sql.legacy.statisticalAggregate` to `true`. --- pom.xml | 4 ++-- .../deequ/analyzers/catalyst/StatefulCorrelation.scala | 6 +++++- .../amazon/deequ/analyzers/catalyst/StatefulStdDevPop.scala | 5 ++++- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index cc2e50b88..cb4d87bae 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.amazon.deequ deequ - 1.2.2-SNAPSHOT + 1.2.2-spark-3.1 1.8 @@ -20,7 +20,7 @@ 4.4.0 - 2.4.7 + 3.1.1 deequ diff --git a/src/main/scala/com/amazon/deequ/analyzers/catalyst/StatefulCorrelation.scala b/src/main/scala/com/amazon/deequ/analyzers/catalyst/StatefulCorrelation.scala index b8f517f0e..5e6e82a34 100644 --- a/src/main/scala/com/amazon/deequ/analyzers/catalyst/StatefulCorrelation.scala +++ b/src/main/scala/com/amazon/deequ/analyzers/catalyst/StatefulCorrelation.scala @@ -21,7 +21,11 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types._ /** Adjusted version of org.apache.spark.sql.catalyst.expressions.aggregate.Corr */ -private[sql] class StatefulCorrelation(x: Expression, y: Expression) extends Corr(x, y) { +private[sql] class StatefulCorrelation( + x: Expression, + y: Expression, + nullOnDivideByZero: Boolean = false +) extends Corr(x, y, nullOnDivideByZero) { override def dataType: org.apache.spark.sql.types.DataType = StructType(StructField("n", DoubleType) :: StructField("xAvg", DoubleType) :: diff --git a/src/main/scala/com/amazon/deequ/analyzers/catalyst/StatefulStdDevPop.scala b/src/main/scala/com/amazon/deequ/analyzers/catalyst/StatefulStdDevPop.scala index f082cc709..4d24ad244 100644 --- a/src/main/scala/com/amazon/deequ/analyzers/catalyst/StatefulStdDevPop.scala +++ b/src/main/scala/com/amazon/deequ/analyzers/catalyst/StatefulStdDevPop.scala @@ -21,7 +21,10 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.CentralMomentAgg import org.apache.spark.sql.types._ /** Adjusted version of org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop */ -private[sql] case class StatefulStdDevPop(child: Expression) extends CentralMomentAgg(child) { +private[sql] case class StatefulStdDevPop( + child: Expression, + nullOnDivideByZero: Boolean = false +) extends CentralMomentAgg(child, nullOnDivideByZero) { override protected def momentOrder = 2