From 4de3028259ac06e31ecce5838aa89b43bfbddd27 Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Fri, 18 Dec 2020 18:20:49 +0800
Subject: [PATCH 1/3] [SPARK-33593][SQL] Vector reader got incorrect data with
 binary partition value

---
 .../vectorized/ColumnVectorUtils.java         |   5 +
 .../org/apache/spark/sql/SQLQuerySuite.scala  |  26 +++++
 .../orc/OrcColumnarBatchReaderSuite.scala     | 105 ++++++++++++++++++
 .../datasources/parquet/ParquetIOSuite.scala  |   9 +-
 4 files changed, 143 insertions(+), 2 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
index 829f3ce750fe..0792f5bcfeef 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
@@ -54,6 +54,8 @@ public static void populate(WritableColumnVector col, InternalRow row, int field
     } else {
       if (t == DataTypes.BooleanType) {
         col.putBooleans(0, capacity, row.getBoolean(fieldIdx));
+      } else if (t == DataTypes.BinaryType) {
+        col.putByteArray(0, row.getBinary(fieldIdx));
       } else if (t == DataTypes.ByteType) {
         col.putBytes(0, capacity, row.getByte(fieldIdx));
       } else if (t == DataTypes.ShortType) {
@@ -94,6 +96,9 @@ public static void populate(WritableColumnVector col, InternalRow row, int field
         col.putInts(0, capacity, row.getInt(fieldIdx));
       } else if (t instanceof TimestampType) {
         col.putLongs(0, capacity, row.getLong(fieldIdx));
+      } else {
+        throw new RuntimeException(String.format("DataType %s is not supported" +
+            " in column vectorized reader.", t.sql()));
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index ee7ef556b3ab..ab2a1c9d3748 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3114,6 +3114,32 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
       }
     }
   }
+
+  test("SPARK-33593: Vector reader got incorrect data with binary partition value") {
+    Seq("false", "true").foreach(value => {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> value) {
+        withTable("t1") {
+          sql(
+            """CREATE TABLE t1(name STRING, id BINARY, part BINARY)
+              |USING PARQUET PARTITIONED BY (part)""".stripMargin)
+          sql("INSERT INTO t1 PARTITION(part = 'Spark SQL') VALUES('a', X'537061726B2053514C')")
+          checkAnswer(sql("SELECT name, cast(id as string), cast(part as string) FROM t1"),
+            Row("a", "Spark SQL", "Spark SQL"))
+        }
+      }
+
+      withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> value) {
+        withTable("t2") {
+          sql(
+            """CREATE TABLE t2(name STRING, id BINARY, part BINARY)
+              |USING ORC PARTITIONED BY (part)""".stripMargin)
+          sql("INSERT INTO t2 PARTITION(part = 'Spark SQL') VALUES('a', X'537061726B2053514C')")
+          checkAnswer(sql("SELECT name, cast(id as string), cast(part as string) FROM t2"),
+            Row("a", "Spark SQL", "Spark SQL"))
+        }
+      }
+    })
+  }
 }
 
 case class Foo(bar: Option[String])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
new file mode 100644
index 000000000000..5534c09d9097
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc
+
+import java.io.File
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce.{JobID, TaskAttemptID, TaskID, TaskType}
+import org.apache.hadoop.mapreduce.lib.input.FileSplit
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
+import org.apache.orc.TypeDescription
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+class OrcColumnarBatchReaderSuite extends QueryTest with SharedSparkSession {
+
+  import testImplicits._
+
+  test("SPARK-33593: partition column types") {
+    withTempPath { dir =>
+      Seq(1).toDF().repartition(1).write.orc(dir.getCanonicalPath)
+
+      val dataTypes =
+        Seq(StringType, BooleanType, ByteType, BinaryType, ShortType, IntegerType, LongType,
+          FloatType, DoubleType, DecimalType(25, 5), DateType, TimestampType)
+
+      val constantValues =
+        Seq(
+          UTF8String.fromString("a string"),
+          true,
+          1.toByte,
+          "Spark SQL".getBytes,
+          2.toShort,
+          3,
+          Long.MaxValue,
+          0.25.toFloat,
+          0.75D,
+          Decimal("1234.23456"),
+          DateTimeUtils.fromJavaDate(java.sql.Date.valueOf("2015-01-01")),
+          DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf("2015-01-01 23:50:59.123")))
+
+      dataTypes.zip(constantValues).foreach { case (dt, v) =>
+        val conf = sqlContext.conf
+        val dataSchema = StructType(StructField("col1", IntegerType) :: Nil)
+        val partitionSchema = StructType(StructField("pcol", dt) :: Nil)
+        val partitionValues = new GenericInternalRow(Array(v))
+        val file = new File(SpecificParquetRecordReaderBase.listDirectory(dir).get(0))
+        val fileSplit = new FileSplit(new Path(file.getCanonicalPath), 0L, file.length, Array.empty)
+        val taskConf = sqlContext.sessionState.newHadoopConf()
+        val orcFileSchema = TypeDescription.fromString(partitionSchema.simpleString)
+        val vectorizedReader = new OrcColumnarBatchReader(
+          conf.offHeapColumnVectorEnabled, conf.getConf(SQLConf.ORC_COPY_BATCH_TO_SPARK), 4096)
+        val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
+        val taskAttemptContext = new TaskAttemptContextImpl(taskConf, attemptId)
+
+        try {
+          vectorizedReader.initialize(fileSplit, taskAttemptContext)
+          vectorizedReader.initBatch(
+            orcFileSchema,
+            Array(0, -1),
+            (dataSchema ++ partitionSchema).toArray,
+            partitionSchema,
+            partitionValues)
+          vectorizedReader.nextKeyValue()
+          val row = vectorizedReader.getCurrentValue.getRow(0)
+
+          // Use `GenericMutableRow` by explicitly copying rather than `ColumnarBatch`
+          // in order to use get(...) method which is not implemented in `ColumnarBatch`.
+          val actual = row.copy().get(1, dt)
+          val expected = v
+          if (dt.isInstanceOf[BinaryType]) {
+            assert(actual.asInstanceOf[Array[Byte]]
+              sameElements expected.asInstanceOf[Array[Byte]])
+          } else {
+            assert(actual == expected)
+          }
+        } finally {
+          vectorizedReader.close()
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 6b05b9c0f720..dff1152bc1e3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -734,7 +734,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext {
       Seq(1).toDF().repartition(1).write.parquet(dir.getCanonicalPath)
 
       val dataTypes =
-        Seq(StringType, BooleanType, ByteType, ShortType, IntegerType, LongType,
+        Seq(StringType, BooleanType, ByteType, BinaryType, ShortType, IntegerType, LongType,
           FloatType, DoubleType, DecimalType(25, 5), DateType, TimestampType)
 
       val constantValues =
@@ -742,6 +742,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext {
           UTF8String.fromString("a string"),
           true,
           1.toByte,
+          "Spark SQL".getBytes,
           2.toShort,
           3,
           Long.MaxValue,
@@ -769,7 +770,11 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext {
           // in order to use get(...) method which is not implemented in `ColumnarBatch`.
           val actual = row.copy().get(1, dt)
           val expected = v
-          assert(actual == expected)
+          if (dt.isInstanceOf[BinaryType]) {
+            assert(actual.asInstanceOf[Array[Byte]] sameElements expected.asInstanceOf[Array[Byte]])
+          } else {
+            assert(actual == expected)
+          }
         } finally {
           vectorizedReader.close()
         }

From ee9f7e7cf92fc615fa5fb105e7e41021c5d9cf49 Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Wed, 6 Jan 2021 10:17:01 +0800
Subject: [PATCH 2/3] Revert "[SPARK-33593][SQL] Vector reader got incorrect
 data with binary partition value"

This reverts commit 4de3028259ac06e31ecce5838aa89b43bfbddd27.
---
 .../vectorized/ColumnVectorUtils.java         |   5 -
 .../org/apache/spark/sql/SQLQuerySuite.scala  |  26 -----
 .../orc/OrcColumnarBatchReaderSuite.scala     | 105 ------------------
 .../datasources/parquet/ParquetIOSuite.scala  |   9 +-
 4 files changed, 2 insertions(+), 143 deletions(-)
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
index 0792f5bcfeef..829f3ce750fe 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
@@ -54,8 +54,6 @@ public static void populate(WritableColumnVector col, InternalRow row, int field
     } else {
       if (t == DataTypes.BooleanType) {
         col.putBooleans(0, capacity, row.getBoolean(fieldIdx));
-      } else if (t == DataTypes.BinaryType) {
-        col.putByteArray(0, row.getBinary(fieldIdx));
       } else if (t == DataTypes.ByteType) {
         col.putBytes(0, capacity, row.getByte(fieldIdx));
       } else if (t == DataTypes.ShortType) {
@@ -96,9 +94,6 @@ public static void populate(WritableColumnVector col, InternalRow row, int field
         col.putInts(0, capacity, row.getInt(fieldIdx));
       } else if (t instanceof TimestampType) {
         col.putLongs(0, capacity, row.getLong(fieldIdx));
-      } else {
-        throw new RuntimeException(String.format("DataType %s is not supported" +
-            " in column vectorized reader.", t.sql()));
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index ab2a1c9d3748..ee7ef556b3ab 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3114,32 +3114,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
       }
     }
   }
-
-  test("SPARK-33593: Vector reader got incorrect data with binary partition value") {
-    Seq("false", "true").foreach(value => {
-      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> value) {
-        withTable("t1") {
-          sql(
-            """CREATE TABLE t1(name STRING, id BINARY, part BINARY)
-              |USING PARQUET PARTITIONED BY (part)""".stripMargin)
-          sql("INSERT INTO t1 PARTITION(part = 'Spark SQL') VALUES('a', X'537061726B2053514C')")
-          checkAnswer(sql("SELECT name, cast(id as string), cast(part as string) FROM t1"),
-            Row("a", "Spark SQL", "Spark SQL"))
-        }
-      }
-
-      withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> value) {
-        withTable("t2") {
-          sql(
-            """CREATE TABLE t2(name STRING, id BINARY, part BINARY)
-              |USING ORC PARTITIONED BY (part)""".stripMargin)
-          sql("INSERT INTO t2 PARTITION(part = 'Spark SQL') VALUES('a', X'537061726B2053514C')")
-          checkAnswer(sql("SELECT name, cast(id as string), cast(part as string) FROM t2"),
-            Row("a", "Spark SQL", "Spark SQL"))
-        }
-      }
-    })
-  }
 }
 
 case class Foo(bar: Option[String])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
deleted file mode 100644
index 5534c09d9097..000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.orc
-
-import java.io.File
-
-import org.apache.hadoop.fs.Path
-import org.apache.hadoop.mapreduce.{JobID, TaskAttemptID, TaskID, TaskType}
-import org.apache.hadoop.mapreduce.lib.input.FileSplit
-import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
-import org.apache.orc.TypeDescription
-
-import org.apache.spark.sql.QueryTest
-import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.UTF8String
-
-class OrcColumnarBatchReaderSuite extends QueryTest with SharedSparkSession {
-
-  import testImplicits._
-
-  test("SPARK-33593: partition column types") {
-    withTempPath { dir =>
-      Seq(1).toDF().repartition(1).write.orc(dir.getCanonicalPath)
-
-      val dataTypes =
-        Seq(StringType, BooleanType, ByteType, BinaryType, ShortType, IntegerType, LongType,
-          FloatType, DoubleType, DecimalType(25, 5), DateType, TimestampType)
-
-      val constantValues =
-        Seq(
-          UTF8String.fromString("a string"),
-          true,
-          1.toByte,
-          "Spark SQL".getBytes,
-          2.toShort,
-          3,
-          Long.MaxValue,
-          0.25.toFloat,
-          0.75D,
-          Decimal("1234.23456"),
-          DateTimeUtils.fromJavaDate(java.sql.Date.valueOf("2015-01-01")),
-          DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf("2015-01-01 23:50:59.123")))
-
-      dataTypes.zip(constantValues).foreach { case (dt, v) =>
-        val conf = sqlContext.conf
-        val dataSchema = StructType(StructField("col1", IntegerType) :: Nil)
-        val partitionSchema = StructType(StructField("pcol", dt) :: Nil)
-        val partitionValues = new GenericInternalRow(Array(v))
-        val file = new File(SpecificParquetRecordReaderBase.listDirectory(dir).get(0))
-        val fileSplit = new FileSplit(new Path(file.getCanonicalPath), 0L, file.length, Array.empty)
-        val taskConf = sqlContext.sessionState.newHadoopConf()
-        val orcFileSchema = TypeDescription.fromString(partitionSchema.simpleString)
-        val vectorizedReader = new OrcColumnarBatchReader(
-          conf.offHeapColumnVectorEnabled, conf.getConf(SQLConf.ORC_COPY_BATCH_TO_SPARK), 4096)
-        val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
-        val taskAttemptContext = new TaskAttemptContextImpl(taskConf, attemptId)
-
-        try {
-          vectorizedReader.initialize(fileSplit, taskAttemptContext)
-          vectorizedReader.initBatch(
-            orcFileSchema,
-            Array(0, -1),
-            (dataSchema ++ partitionSchema).toArray,
-            partitionSchema,
-            partitionValues)
-          vectorizedReader.nextKeyValue()
-          val row = vectorizedReader.getCurrentValue.getRow(0)
-
-          // Use `GenericMutableRow` by explicitly copying rather than `ColumnarBatch`
-          // in order to use get(...) method which is not implemented in `ColumnarBatch`.
-          val actual = row.copy().get(1, dt)
-          val expected = v
-          if (dt.isInstanceOf[BinaryType]) {
-            assert(actual.asInstanceOf[Array[Byte]]
-              sameElements expected.asInstanceOf[Array[Byte]])
-          } else {
-            assert(actual == expected)
-          }
-        } finally {
-          vectorizedReader.close()
-        }
-      }
-    }
-  }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index dff1152bc1e3..6b05b9c0f720 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -734,7 +734,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext {
       Seq(1).toDF().repartition(1).write.parquet(dir.getCanonicalPath)
 
       val dataTypes =
-        Seq(StringType, BooleanType, ByteType, BinaryType, ShortType, IntegerType, LongType,
+        Seq(StringType, BooleanType, ByteType, ShortType, IntegerType, LongType,
           FloatType, DoubleType, DecimalType(25, 5), DateType, TimestampType)
 
       val constantValues =
@@ -742,7 +742,6 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext {
           UTF8String.fromString("a string"),
           true,
           1.toByte,
-          "Spark SQL".getBytes,
           2.toShort,
           3,
           Long.MaxValue,
@@ -770,11 +769,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext {
           // in order to use get(...) method which is not implemented in `ColumnarBatch`.
           val actual = row.copy().get(1, dt)
           val expected = v
-          if (dt.isInstanceOf[BinaryType]) {
-            assert(actual.asInstanceOf[Array[Byte]] sameElements expected.asInstanceOf[Array[Byte]])
-          } else {
-            assert(actual == expected)
-          }
+          assert(actual == expected)
         } finally {
           vectorizedReader.close()
         }

From aa379b52b099a4c02a50f93265f976563397b3ea Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Wed, 6 Jan 2021 10:51:17 +0800
Subject: [PATCH 3/3] [SPARK-34012][SQL][2.4] Keep behavior consistent when
 conf `spark.sql.legacy.parser.havingWithoutGroupByAsWhere` is true with
 migration guide

---
 .../sql/catalyst/parser/AstBuilder.scala      |  6 +-
 .../resources/sql-tests/inputs/group-by.sql   | 10 ++++
 .../sql-tests/results/group-by.sql.out        | 60 ++++++++++++++++++-
 3 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 90e7d1c3917e..4c4e4f1f9bb6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -467,7 +467,11 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
         val withProject = if (aggregation == null && having != null) {
           if (conf.getConf(SQLConf.LEGACY_HAVING_WITHOUT_GROUP_BY_AS_WHERE)) {
             // If the legacy conf is set, treat HAVING without GROUP BY as WHERE.
-            withHaving(having, createProject())
+            val predicate = expression(having) match {
+              case p: Predicate => p
+              case e => Cast(e, BooleanType)
+            }
+            Filter(predicate, createProject())
           } else {
             // According to SQL standard, HAVING without GROUP BY means global aggregate.
             withHaving(having, Aggregate(Nil, namedExpressions, withFilter))
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index 433db7152743..0c40a8c86e68 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -80,3 +80,13 @@ SELECT 1 FROM range(10) HAVING true;
 SELECT 1 FROM range(10) HAVING MAX(id) > 0;
 
 SELECT id FROM range(10) HAVING id > 0;
+
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=true;
+
+SELECT 1 FROM range(10) HAVING true;
+
+SELECT 1 FROM range(10) HAVING MAX(id) > 0;
+
+SELECT id FROM range(10) HAVING id > 0;
+
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false;
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index f9d1ee8a6bcd..d23a58a9613e 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 30
+-- Number of queries: 35
 
 
 -- !query 0
@@ -275,3 +275,61 @@ struct<>
 -- !query 29 output
 org.apache.spark.sql.AnalysisException
 grouping expressions sequence is empty, and '`id`' is not an aggregate function. Wrap '()' in windowing function(s) or wrap '`id`' in first() (or first_value) if you don't care which value you get.;
+
+
+-- !query 30
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=true
+-- !query 30 schema
+struct<key:string,value:string>
+-- !query 30 output
+spark.sql.legacy.parser.havingWithoutGroupByAsWhere	true
+
+
+-- !query 31
+SELECT 1 FROM range(10) HAVING true
+-- !query 31 schema
+struct<1:int>
+-- !query 31 output
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+
+
+-- !query 32
+SELECT 1 FROM range(10) HAVING MAX(id) > 0
+-- !query 32 schema
+struct<>
+-- !query 32 output
+java.lang.UnsupportedOperationException
+Cannot evaluate expression: max(input[0, bigint, false])
+
+
+-- !query 33
+SELECT id FROM range(10) HAVING id > 0
+-- !query 33 schema
+struct<id:bigint>
+-- !query 33 output
+1
+2
+3
+4
+5
+6
+7
+8
+9
+
+
+-- !query 34
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false
+-- !query 34 schema
+struct<key:string,value:string>
+-- !query 34 output
+spark.sql.legacy.parser.havingWithoutGroupByAsWhere	false