From 4de3028259ac06e31ecce5838aa89b43bfbddd27 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Fri, 18 Dec 2020 18:20:49 +0800 Subject: [PATCH 1/3] [SPARK-33593][SQL] Vector reader got incorrect data with binary partition value --- .../vectorized/ColumnVectorUtils.java | 5 + .../org/apache/spark/sql/SQLQuerySuite.scala | 26 +++++ .../orc/OrcColumnarBatchReaderSuite.scala | 105 ++++++++++++++++++ .../datasources/parquet/ParquetIOSuite.scala | 9 +- 4 files changed, 143 insertions(+), 2 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java index 829f3ce750fe..0792f5bcfeef 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java @@ -54,6 +54,8 @@ public static void populate(WritableColumnVector col, InternalRow row, int field } else { if (t == DataTypes.BooleanType) { col.putBooleans(0, capacity, row.getBoolean(fieldIdx)); + } else if (t == DataTypes.BinaryType) { + col.putByteArray(0, row.getBinary(fieldIdx)); } else if (t == DataTypes.ByteType) { col.putBytes(0, capacity, row.getByte(fieldIdx)); } else if (t == DataTypes.ShortType) { @@ -94,6 +96,9 @@ public static void populate(WritableColumnVector col, InternalRow row, int field col.putInts(0, capacity, row.getInt(fieldIdx)); } else if (t instanceof TimestampType) { col.putLongs(0, capacity, row.getLong(fieldIdx)); + } else { + throw new RuntimeException(String.format("DataType %s is not supported" + + " in column vectorized reader.", t.sql())); } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index ee7ef556b3ab..ab2a1c9d3748 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3114,6 +3114,32 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } } + + test("SPARK-33593: Vector reader got incorrect data with binary partition value") { + Seq("false", "true").foreach(value => { + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> value) { + withTable("t1") { + sql( + """CREATE TABLE t1(name STRING, id BINARY, part BINARY) + |USING PARQUET PARTITIONED BY (part)""".stripMargin) + sql("INSERT INTO t1 PARTITION(part = 'Spark SQL') VALUES('a', X'537061726B2053514C')") + checkAnswer(sql("SELECT name, cast(id as string), cast(part as string) FROM t1"), + Row("a", "Spark SQL", "Spark SQL")) + } + } + + withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> value) { + withTable("t2") { + sql( + """CREATE TABLE t2(name STRING, id BINARY, part BINARY) + |USING ORC PARTITIONED BY (part)""".stripMargin) + sql("INSERT INTO t2 PARTITION(part = 'Spark SQL') VALUES('a', X'537061726B2053514C')") + checkAnswer(sql("SELECT name, cast(id as string), cast(part as string) FROM t2"), + Row("a", "Spark SQL", "Spark SQL")) + } + } + }) + } } case class Foo(bar: Option[String]) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala new file mode 100644 index 000000000000..5534c09d9097 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.orc + +import java.io.File + +import org.apache.hadoop.fs.Path +import org.apache.hadoop.mapreduce.{JobID, TaskAttemptID, TaskID, TaskType} +import org.apache.hadoop.mapreduce.lib.input.FileSplit +import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl +import org.apache.orc.TypeDescription + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst.expressions.GenericInternalRow +import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.UTF8String + +class OrcColumnarBatchReaderSuite extends QueryTest with SharedSparkSession { + + import testImplicits._ + + test("SPARK-33593: partition column types") { + withTempPath { dir => + Seq(1).toDF().repartition(1).write.orc(dir.getCanonicalPath) + + val dataTypes = + Seq(StringType, BooleanType, ByteType, BinaryType, ShortType, IntegerType, LongType, + FloatType, DoubleType, DecimalType(25, 5), DateType, TimestampType) + + val constantValues = + Seq( + UTF8String.fromString("a string"), + true, + 1.toByte, + "Spark SQL".getBytes, + 2.toShort, + 3, + Long.MaxValue, + 0.25.toFloat, + 0.75D, + Decimal("1234.23456"), + DateTimeUtils.fromJavaDate(java.sql.Date.valueOf("2015-01-01")), + DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf("2015-01-01 23:50:59.123"))) + + dataTypes.zip(constantValues).foreach { case (dt, v) => + val conf = sqlContext.conf + val dataSchema = StructType(StructField("col1", IntegerType) :: Nil) + val partitionSchema = StructType(StructField("pcol", dt) :: Nil) + val partitionValues = new GenericInternalRow(Array(v)) + val file = new File(SpecificParquetRecordReaderBase.listDirectory(dir).get(0)) + val fileSplit = new FileSplit(new Path(file.getCanonicalPath), 0L, file.length, Array.empty) + val taskConf = sqlContext.sessionState.newHadoopConf() + val orcFileSchema = TypeDescription.fromString(partitionSchema.simpleString) + val vectorizedReader = new OrcColumnarBatchReader( + conf.offHeapColumnVectorEnabled, conf.getConf(SQLConf.ORC_COPY_BATCH_TO_SPARK), 4096) + val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0) + val taskAttemptContext = new TaskAttemptContextImpl(taskConf, attemptId) + + try { + vectorizedReader.initialize(fileSplit, taskAttemptContext) + vectorizedReader.initBatch( + orcFileSchema, + Array(0, -1), + (dataSchema ++ partitionSchema).toArray, + partitionSchema, + partitionValues) + vectorizedReader.nextKeyValue() + val row = vectorizedReader.getCurrentValue.getRow(0) + + // Use `GenericMutableRow` by explicitly copying rather than `ColumnarBatch` + // in order to use get(...) method which is not implemented in `ColumnarBatch`. + val actual = row.copy().get(1, dt) + val expected = v + if (dt.isInstanceOf[BinaryType]) { + assert(actual.asInstanceOf[Array[Byte]] + sameElements expected.asInstanceOf[Array[Byte]]) + } else { + assert(actual == expected) + } + } finally { + vectorizedReader.close() + } + } + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala index 6b05b9c0f720..dff1152bc1e3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala @@ -734,7 +734,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext { Seq(1).toDF().repartition(1).write.parquet(dir.getCanonicalPath) val dataTypes = - Seq(StringType, BooleanType, ByteType, ShortType, IntegerType, LongType, + Seq(StringType, BooleanType, ByteType, BinaryType, ShortType, IntegerType, LongType, FloatType, DoubleType, DecimalType(25, 5), DateType, TimestampType) val constantValues = @@ -742,6 +742,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext { UTF8String.fromString("a string"), true, 1.toByte, + "Spark SQL".getBytes, 2.toShort, 3, Long.MaxValue, @@ -769,7 +770,11 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext { // in order to use get(...) method which is not implemented in `ColumnarBatch`. val actual = row.copy().get(1, dt) val expected = v - assert(actual == expected) + if (dt.isInstanceOf[BinaryType]) { + assert(actual.asInstanceOf[Array[Byte]] sameElements expected.asInstanceOf[Array[Byte]]) + } else { + assert(actual == expected) + } } finally { vectorizedReader.close() } From ee9f7e7cf92fc615fa5fb105e7e41021c5d9cf49 Mon Sep 17 00:00:00 2001 From: angerszhu Date: Wed, 6 Jan 2021 10:17:01 +0800 Subject: [PATCH 2/3] Revert "[SPARK-33593][SQL] Vector reader got incorrect data with binary partition value" This reverts commit 4de3028259ac06e31ecce5838aa89b43bfbddd27. --- .../vectorized/ColumnVectorUtils.java | 5 - .../org/apache/spark/sql/SQLQuerySuite.scala | 26 ----- .../orc/OrcColumnarBatchReaderSuite.scala | 105 ------------------ .../datasources/parquet/ParquetIOSuite.scala | 9 +- 4 files changed, 2 insertions(+), 143 deletions(-) delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java index 0792f5bcfeef..829f3ce750fe 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java @@ -54,8 +54,6 @@ public static void populate(WritableColumnVector col, InternalRow row, int field } else { if (t == DataTypes.BooleanType) { col.putBooleans(0, capacity, row.getBoolean(fieldIdx)); - } else if (t == DataTypes.BinaryType) { - col.putByteArray(0, row.getBinary(fieldIdx)); } else if (t == DataTypes.ByteType) { col.putBytes(0, capacity, row.getByte(fieldIdx)); } else if (t == DataTypes.ShortType) { @@ -96,9 +94,6 @@ public static void populate(WritableColumnVector col, InternalRow row, int field col.putInts(0, capacity, row.getInt(fieldIdx)); } else if (t instanceof TimestampType) { col.putLongs(0, capacity, row.getLong(fieldIdx)); - } else { - throw new RuntimeException(String.format("DataType %s is not supported" + - " in column vectorized reader.", t.sql())); } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index ab2a1c9d3748..ee7ef556b3ab 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3114,32 +3114,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } } - - test("SPARK-33593: Vector reader got incorrect data with binary partition value") { - Seq("false", "true").foreach(value => { - withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> value) { - withTable("t1") { - sql( - """CREATE TABLE t1(name STRING, id BINARY, part BINARY) - |USING PARQUET PARTITIONED BY (part)""".stripMargin) - sql("INSERT INTO t1 PARTITION(part = 'Spark SQL') VALUES('a', X'537061726B2053514C')") - checkAnswer(sql("SELECT name, cast(id as string), cast(part as string) FROM t1"), - Row("a", "Spark SQL", "Spark SQL")) - } - } - - withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> value) { - withTable("t2") { - sql( - """CREATE TABLE t2(name STRING, id BINARY, part BINARY) - |USING ORC PARTITIONED BY (part)""".stripMargin) - sql("INSERT INTO t2 PARTITION(part = 'Spark SQL') VALUES('a', X'537061726B2053514C')") - checkAnswer(sql("SELECT name, cast(id as string), cast(part as string) FROM t2"), - Row("a", "Spark SQL", "Spark SQL")) - } - } - }) - } } case class Foo(bar: Option[String]) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala deleted file mode 100644 index 5534c09d9097..000000000000 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution.datasources.orc - -import java.io.File - -import org.apache.hadoop.fs.Path -import org.apache.hadoop.mapreduce.{JobID, TaskAttemptID, TaskID, TaskType} -import org.apache.hadoop.mapreduce.lib.input.FileSplit -import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl -import org.apache.orc.TypeDescription - -import org.apache.spark.sql.QueryTest -import org.apache.spark.sql.catalyst.expressions.GenericInternalRow -import org.apache.spark.sql.catalyst.util.DateTimeUtils -import org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.test.SharedSparkSession -import org.apache.spark.sql.types._ -import org.apache.spark.unsafe.types.UTF8String - -class OrcColumnarBatchReaderSuite extends QueryTest with SharedSparkSession { - - import testImplicits._ - - test("SPARK-33593: partition column types") { - withTempPath { dir => - Seq(1).toDF().repartition(1).write.orc(dir.getCanonicalPath) - - val dataTypes = - Seq(StringType, BooleanType, ByteType, BinaryType, ShortType, IntegerType, LongType, - FloatType, DoubleType, DecimalType(25, 5), DateType, TimestampType) - - val constantValues = - Seq( - UTF8String.fromString("a string"), - true, - 1.toByte, - "Spark SQL".getBytes, - 2.toShort, - 3, - Long.MaxValue, - 0.25.toFloat, - 0.75D, - Decimal("1234.23456"), - DateTimeUtils.fromJavaDate(java.sql.Date.valueOf("2015-01-01")), - DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf("2015-01-01 23:50:59.123"))) - - dataTypes.zip(constantValues).foreach { case (dt, v) => - val conf = sqlContext.conf - val dataSchema = StructType(StructField("col1", IntegerType) :: Nil) - val partitionSchema = StructType(StructField("pcol", dt) :: Nil) - val partitionValues = new GenericInternalRow(Array(v)) - val file = new File(SpecificParquetRecordReaderBase.listDirectory(dir).get(0)) - val fileSplit = new FileSplit(new Path(file.getCanonicalPath), 0L, file.length, Array.empty) - val taskConf = sqlContext.sessionState.newHadoopConf() - val orcFileSchema = TypeDescription.fromString(partitionSchema.simpleString) - val vectorizedReader = new OrcColumnarBatchReader( - conf.offHeapColumnVectorEnabled, conf.getConf(SQLConf.ORC_COPY_BATCH_TO_SPARK), 4096) - val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0) - val taskAttemptContext = new TaskAttemptContextImpl(taskConf, attemptId) - - try { - vectorizedReader.initialize(fileSplit, taskAttemptContext) - vectorizedReader.initBatch( - orcFileSchema, - Array(0, -1), - (dataSchema ++ partitionSchema).toArray, - partitionSchema, - partitionValues) - vectorizedReader.nextKeyValue() - val row = vectorizedReader.getCurrentValue.getRow(0) - - // Use `GenericMutableRow` by explicitly copying rather than `ColumnarBatch` - // in order to use get(...) method which is not implemented in `ColumnarBatch`. - val actual = row.copy().get(1, dt) - val expected = v - if (dt.isInstanceOf[BinaryType]) { - assert(actual.asInstanceOf[Array[Byte]] - sameElements expected.asInstanceOf[Array[Byte]]) - } else { - assert(actual == expected) - } - } finally { - vectorizedReader.close() - } - } - } - } -} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala index dff1152bc1e3..6b05b9c0f720 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala @@ -734,7 +734,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext { Seq(1).toDF().repartition(1).write.parquet(dir.getCanonicalPath) val dataTypes = - Seq(StringType, BooleanType, ByteType, BinaryType, ShortType, IntegerType, LongType, + Seq(StringType, BooleanType, ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType, DecimalType(25, 5), DateType, TimestampType) val constantValues = @@ -742,7 +742,6 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext { UTF8String.fromString("a string"), true, 1.toByte, - "Spark SQL".getBytes, 2.toShort, 3, Long.MaxValue, @@ -770,11 +769,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext { // in order to use get(...) method which is not implemented in `ColumnarBatch`. val actual = row.copy().get(1, dt) val expected = v - if (dt.isInstanceOf[BinaryType]) { - assert(actual.asInstanceOf[Array[Byte]] sameElements expected.asInstanceOf[Array[Byte]]) - } else { - assert(actual == expected) - } + assert(actual == expected) } finally { vectorizedReader.close() } From aa379b52b099a4c02a50f93265f976563397b3ea Mon Sep 17 00:00:00 2001 From: angerszhu Date: Wed, 6 Jan 2021 10:51:17 +0800 Subject: [PATCH 3/3] [SPARK-34012][SQL][2.4] Keep behavior consistent when conf `spark.sql.legacy.parser.havingWithoutGroupByAsWhere` is true with migration guide --- .../sql/catalyst/parser/AstBuilder.scala | 6 +- .../resources/sql-tests/inputs/group-by.sql | 10 ++++ .../sql-tests/results/group-by.sql.out | 60 ++++++++++++++++++- 3 files changed, 74 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 90e7d1c3917e..4c4e4f1f9bb6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -467,7 +467,11 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging val withProject = if (aggregation == null && having != null) { if (conf.getConf(SQLConf.LEGACY_HAVING_WITHOUT_GROUP_BY_AS_WHERE)) { // If the legacy conf is set, treat HAVING without GROUP BY as WHERE. - withHaving(having, createProject()) + val predicate = expression(having) match { + case p: Predicate => p + case e => Cast(e, BooleanType) + } + Filter(predicate, createProject()) } else { // According to SQL standard, HAVING without GROUP BY means global aggregate. withHaving(having, Aggregate(Nil, namedExpressions, withFilter)) diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql index 433db7152743..0c40a8c86e68 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql @@ -80,3 +80,13 @@ SELECT 1 FROM range(10) HAVING true; SELECT 1 FROM range(10) HAVING MAX(id) > 0; SELECT id FROM range(10) HAVING id > 0; + +SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=true; + +SELECT 1 FROM range(10) HAVING true; + +SELECT 1 FROM range(10) HAVING MAX(id) > 0; + +SELECT id FROM range(10) HAVING id > 0; + +SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false; diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index f9d1ee8a6bcd..d23a58a9613e 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 30 +-- Number of queries: 35 -- !query 0 @@ -275,3 +275,61 @@ struct<> -- !query 29 output org.apache.spark.sql.AnalysisException grouping expressions sequence is empty, and '`id`' is not an aggregate function. Wrap '()' in windowing function(s) or wrap '`id`' in first() (or first_value) if you don't care which value you get.; + + +-- !query 30 +SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=true +-- !query 30 schema +struct +-- !query 30 output +spark.sql.legacy.parser.havingWithoutGroupByAsWhere true + + +-- !query 31 +SELECT 1 FROM range(10) HAVING true +-- !query 31 schema +struct<1:int> +-- !query 31 output +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 + + +-- !query 32 +SELECT 1 FROM range(10) HAVING MAX(id) > 0 +-- !query 32 schema +struct<> +-- !query 32 output +java.lang.UnsupportedOperationException +Cannot evaluate expression: max(input[0, bigint, false]) + + +-- !query 33 +SELECT id FROM range(10) HAVING id > 0 +-- !query 33 schema +struct +-- !query 33 output +1 +2 +3 +4 +5 +6 +7 +8 +9 + + +-- !query 34 +SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false +-- !query 34 schema +struct +-- !query 34 output +spark.sql.legacy.parser.havingWithoutGroupByAsWhere false