diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 257c80022110..20ea952ff0a2 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -185,7 +185,7 @@ jobs: echo "Preparing the benchmark results:" tar -cvf benchmark-results-${{ github.event.inputs.jdk }}-${{ github.event.inputs.scala }}.tar `git diff --name-only` `git ls-files --others --exclude=tpcds-sf-1 --exclude-standard` - name: Upload benchmark results - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: name: benchmark-results-${{ github.event.inputs.jdk }}-${{ github.event.inputs.scala }}-${{ matrix.split }} path: benchmark-results-${{ github.event.inputs.jdk }}-${{ github.event.inputs.scala }}.tar diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 56b34d6c5e7b..012e7c8fe9e9 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -272,13 +272,13 @@ jobs: ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" - name: Upload test results to report if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} path: "**/target/test-reports/*.xml" - name: Upload unit tests log files if: ${{ !success() }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} path: "**/target/unit-tests.log" @@ -468,13 +468,13 @@ jobs: name: PySpark - name: Upload test results to report if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: name: test-results-${{ matrix.modules }}--${{ matrix.java }}-${{ inputs.hadoop }}-hive2.3 path: "**/target/test-reports/*.xml" - name: Upload unit tests log files if: ${{ !success() }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: name: unit-tests-log-${{ matrix.modules }}--${{ matrix.java }}-${{ inputs.hadoop }}-hive2.3 path: "**/target/unit-tests.log" @@ -553,7 +553,7 @@ jobs: ./dev/run-tests --parallelism 1 --modules sparkr - name: Upload test results to report if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: name: test-results-sparkr--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3 path: "**/target/test-reports/*.xml" @@ -761,6 +761,9 @@ jobs: run: ./dev/lint-r - name: Run documentation build run: | + # Build docs first with SKIP_API to ensure they are buildable without requiring any + # language docs to be built beforehand. + cd docs; SKIP_API=1 bundle exec jekyll build; cd .. if [ -f "./dev/is-changed.py" ]; then # Skip PySpark and SparkR docs while keeping Scala/Java/SQL docs pyspark_modules=`cd dev && python3.9 -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"` @@ -774,7 +777,7 @@ jobs: run: tar cjf site.tar.bz2 docs/_site - name: Upload documentation if: github.repository != 'apache/spark' - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: name: site path: site.tar.bz2 @@ -927,13 +930,13 @@ jobs: spark.sql.join.forceApplyShuffledHashJoin=true - name: Upload test results to report if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: name: test-results-tpcds--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3 path: "**/target/test-reports/*.xml" - name: Upload unit tests log files if: ${{ !success() }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: name: unit-tests-log-tpcds--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3 path: "**/target/unit-tests.log" @@ -996,13 +999,13 @@ jobs: ./dev/run-tests --parallelism 1 --modules docker-integration-tests --included-tags org.apache.spark.tags.DockerTest - name: Upload test results to report if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: name: test-results-docker-integration--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3 path: "**/target/test-reports/*.xml" - name: Upload unit tests log files if: ${{ !success() }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: name: unit-tests-log-docker-integration--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3 path: "**/target/unit-tests.log" @@ -1077,7 +1080,7 @@ jobs: build/sbt -Phadoop-3 -Psparkr -Pkubernetes -Pvolcano -Pkubernetes-integration-tests -Dspark.kubernetes.test.driverRequestCores=0.5 -Dspark.kubernetes.test.executorRequestCores=0.2 -Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local "kubernetes-integration-tests/test" - name: Upload Spark on K8S integration tests log files if: ${{ !success() }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: name: spark-on-kubernetes-it-log path: "**/target/integration-tests.log" diff --git a/.github/workflows/build_python.yml b/.github/workflows/build_python.yml index ebd8de2311c6..a2cf7c64f089 100644 --- a/.github/workflows/build_python.yml +++ b/.github/workflows/build_python.yml @@ -27,7 +27,7 @@ jobs: run-build: strategy: matrix: - pyversion: ["pypy3,python3.10", "python3.11", "python3.12"] + pyversion: ["pypy3", "python3.10", "python3.11", "python3.12"] permissions: packages: write name: Run diff --git a/.github/workflows/maven_test.yml b/.github/workflows/maven_test.yml index 3f19c76fd961..078a380f0afe 100644 --- a/.github/workflows/maven_test.yml +++ b/.github/workflows/maven_test.yml @@ -200,13 +200,13 @@ jobs: rm -rf ~/.m2/repository/org/apache/spark - name: Upload test results to report if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} path: "**/target/test-reports/*.xml" - name: Upload unit tests log files if: failure() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }} path: "**/target/unit-tests.log" diff --git a/common/network-common/src/main/java/org/apache/spark/network/shuffledb/StoreVersion.java b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/StoreVersion.java index c138163d21e1..e5887d353dd7 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/shuffledb/StoreVersion.java +++ b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/StoreVersion.java @@ -54,4 +54,9 @@ public int hashCode() { result = 31 * result + minor; return result; } + + @Override + public String toString() { + return "StoreVersion[" + major + "." + minor + ']'; + } } diff --git a/common/network-common/src/test/java/org/apache/spark/network/util/DBProviderSuite.java b/common/network-common/src/test/java/org/apache/spark/network/util/DBProviderSuite.java new file mode 100644 index 000000000000..e258b9e6ff40 --- /dev/null +++ b/common/network-common/src/test/java/org/apache/spark/network/util/DBProviderSuite.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.network.util; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.commons.lang3.SystemUtils; +import org.apache.spark.network.shuffledb.DBBackend; +import org.apache.spark.network.shuffledb.StoreVersion; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.io.IOException; + +import static org.junit.jupiter.api.Assumptions.assumeFalse; + +public class DBProviderSuite { + + @Test + public void testRockDBCheckVersionFailed() throws IOException { + testCheckVersionFailed(DBBackend.ROCKSDB, "rocksdb"); + } + + @Test + public void testLevelDBCheckVersionFailed() throws IOException { + assumeFalse(SystemUtils.IS_OS_MAC_OSX && SystemUtils.OS_ARCH.equals("aarch64")); + testCheckVersionFailed(DBBackend.LEVELDB, "leveldb"); + } + + private void testCheckVersionFailed(DBBackend dbBackend, String namePrefix) throws IOException { + String root = System.getProperty("java.io.tmpdir"); + File dbFile = JavaUtils.createDirectory(root, namePrefix); + try { + StoreVersion v1 = new StoreVersion(1, 0); + ObjectMapper mapper = new ObjectMapper(); + DBProvider.initDB(dbBackend, dbFile, v1, mapper).close(); + StoreVersion v2 = new StoreVersion(2, 0); + IOException ioe = Assertions.assertThrows(IOException.class, () -> + DBProvider.initDB(dbBackend, dbFile, v2, mapper)); + Assertions.assertTrue( + ioe.getMessage().contains("incompatible with current version StoreVersion[2.0]")); + } finally { + JavaUtils.deleteRecursively(dbFile); + } + } +} diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java index f2d06e793f9d..b567ac302b84 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java @@ -44,7 +44,7 @@ * @since 3.0.0 */ @Unstable -public final class CalendarInterval implements Serializable { +public final class CalendarInterval implements Serializable, Comparable { // NOTE: If you're moving or renaming this file, you should also update Unidoc configuration // specified in 'SparkBuild.scala'. public final int months; @@ -127,4 +127,26 @@ private void appendUnit(StringBuilder sb, long value, String unit) { * @throws ArithmeticException if a numeric overflow occurs */ public Duration extractAsDuration() { return Duration.of(microseconds, ChronoUnit.MICROS); } + + /** + * This method is not used to order CalendarInterval instances, as they are not orderable and + * cannot be used in a ORDER BY statement. + * Instead, it is used to find identical interval instances for aggregation purposes. + * It compares the 'months', 'days', and 'microseconds' fields of this CalendarInterval + * with another instance. The comparison is done first on the 'months', then on the 'days', + * and finally on the 'microseconds'. + * + * @param o The CalendarInterval instance to compare with. + * @return Zero if this object is equal to the specified object, and non-zero otherwise + */ + @Override + public int compareTo(CalendarInterval o) { + if (this.months != o.months) { + return Integer.compare(this.months, o.months); + } else if (this.days != o.days) { + return Integer.compare(this.days, o.days); + } else { + return Long.compare(this.microseconds, o.microseconds); + } + } } diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java index b8b710523365..0a1ee279316f 100644 --- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java +++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java @@ -76,6 +76,22 @@ public void toStringTest() { i.toString()); } + @Test + public void compareToTest() { + CalendarInterval i = new CalendarInterval(0, 0, 0); + + assertEquals(i.compareTo(new CalendarInterval(0, 0, 0)), 0); + assertEquals(i.compareTo(new CalendarInterval(0, 0, 1)), -1); + assertEquals(i.compareTo(new CalendarInterval(0, 1, 0)), -1); + assertEquals(i.compareTo(new CalendarInterval(0, 1, -1)), -1); + assertEquals(i.compareTo(new CalendarInterval(1, 0, 0)), -1); + assertEquals(i.compareTo(new CalendarInterval(1, 0, -1)), -1); + assertEquals(i.compareTo(new CalendarInterval(0, 0, -1)), 1); + assertEquals(i.compareTo(new CalendarInterval(0, -1, 0)), 1); + assertEquals(i.compareTo(new CalendarInterval(-1, 0, 0)), 1); + assertEquals(i.compareTo(new CalendarInterval(-1, 0, 1)), 1); + } + @Test public void periodAndDurationTest() { CalendarInterval interval = new CalendarInterval(120, -40, 123456); diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index c7f8f59a7679..e770b9c7053e 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -324,6 +324,12 @@ ], "sqlState" : "0AKD0" }, + "CANNOT_RESOLVE_DATAFRAME_COLUMN" : { + "message" : [ + "Cannot resolve dataframe column . It's probably because of illegal references like `df1.select(df2.col(\"a\"))`." + ], + "sqlState" : "42704" + }, "CANNOT_RESOLVE_STAR_EXPAND" : { "message" : [ "Cannot resolve .* given input columns . Please check that the specified table or struct exists and is accessible in the input columns." @@ -6843,11 +6849,6 @@ "Cannot modify the value of a static config: " ] }, - "_LEGACY_ERROR_TEMP_3051" : { - "message" : [ - "When resolving , fail to find subplan with plan_id= in " - ] - }, "_LEGACY_ERROR_TEMP_3052" : { "message" : [ "Unexpected resolved action: " diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala index 27a5b918fc94..25e6aec4d84a 100644 --- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala +++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala @@ -182,16 +182,19 @@ private[sql] object AvroUtils extends Logging { def hasNextRow: Boolean = { while (!completed && currentRow.isEmpty) { - val r = fileReader.hasNext && !fileReader.pastSync(stopPosition) - if (!r) { + if (fileReader.pastSync(stopPosition)) { fileReader.close() completed = true currentRow = None - } else { + } else if (fileReader.hasNext()) { val record = fileReader.next() // the row must be deserialized in hasNextRow, because AvroDeserializer#deserialize // potentially filters rows currentRow = deserializer.deserialize(record).asInstanceOf[Option[InternalRow]] + } else { + // In this case, `fileReader.hasNext()` returns false but we are not past sync point yet. + // This means empty blocks, we need to continue reading the file in case there are non + // empty blocks or we are past sync point. } } currentRow.isDefined diff --git a/connector/avro/src/test/resources/empty_blocks.avro b/connector/avro/src/test/resources/empty_blocks.avro new file mode 100644 index 000000000000..85d96f4af715 --- /dev/null +++ b/connector/avro/src/test/resources/empty_blocks.avro @@ -0,0 +1,5 @@ +Obj6decoder.shape.version.patch0,decoder.shape.commitidP32d3df6520fbab9f829c638602bfcaf57a36af3a2decoder.shape.fingerprint18b0f3cb011ab5450a2eb866a48017b178f421c495cfea8e014a892a2e0af6c4$decoder.shape.usid ea88d6ea20b60173cmesg_shape.id118 cmesg_shape.name,testAvroMessageAAAAAAA(cmesg_shape.bytesize21.cmesg_shape.fingerprint5d0906d6748d14d2a4b65b5a18dc05ba39e2def91236c3d0d08c577afe38b0180decoder.software.version&cmessage-decoder==1_weoiwasd2weroqw_asdmjkjsdf_2p1_gccgggg.file.path/tmp/aaaaaa/bbbbbbbbbbb/ccccccccc/split-ggggs/5d0f6168_20231107134036/20231107161711.653/tmp_28dycwr-asd-ed-123-234-128-2.gggggggg.file.mode +batch gggg.file.st_ino(-4344181839388196375"gggg.file.st_size169201408$gggg.file.st_mtime.1998-02-15 16:26:43.000_col_15111.222.333.4 xyz_idHe4ed94eb-bfbd-458e-85af-cf1a7245a254 hrr_idhrr-5d0f6168 ofname20231107134036*gggg.decode_timestamp01998-02-15 16:28:02.264Zgggg.file.paths3://test-bucket-abcde/aa/bbbb/ccccccccccccc/ddddddddddddddddddd/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee/fffffffffffffffffffffff.gggg.hh"gggg.file.st_size40788641gggg.locations3://test-bucket-abcde/aa/bbbb/ccccccccccccc/ddddddddddddddddddd/eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee/fffffffffffffffffffffff.gggg.hh2gggg.record_end_timestamp01998-02-15 13:45:35.000Z&gggg.xfer_timestamp01998-02-15 16:14:40.339Zgggg_bytes40788641port_idNoneraw>5d0f6168_20231107134036.gggg.gz(record_end_timestamp01998-02-15 13:45:35.000Zsplit_total2Pcmesg_shape.field.Column01xxxxxxxxxx.idx0Tcmesg_shape.field.Column01xxxxxxxxxx.ctypeInt32_tdcmesg_shape.field.Column01xxxxxxxxxx.struct_format>ijcmesg_shape.field.Column01xxxxxxxxxx.meameameameameamvalue0001\cmesg_shape.field.Column01xxxxxxxxxx.math_typevalue02tcmesg_shape.field.Column02xxxxxxxxxxxxxxxxxxxxxxxxxxxx.idx1xcmesg_shape.field.Column02xxxxxxxxxxxxxxxxxxxxxxxxxxxx.ctypeInt64_tcmesg_shape.field.Column02xxxxxxxxxxxxxxxxxxxxxxxxxxxx.struct_format>qcmesg_shape.field.Column02xxxxxxxxxxxxxxxxxxxxxxxxxxxx.meameameameameamvalue00000005cmesg_shape.field.Column02xxxxxxxxxxxxxxxxxxxxxxxxxxxx.math_typevalue02>cmesg_shape.field.Column03x.idx2Bcmesg_shape.field.Column03x.ctypeUint32_tRcmesg_shape.field.Column03x.struct_format>IXcmesg_shape.field.Column03x.meameameameameam eBytesJcmesg_shape.field.Column03x.math_typevalue02Bcmesg_shape.field.Column04xxx.idx3Fcmesg_shape.field.Column04xxx.ctypeboolVcmesg_shape.field.Column04xxx.struct_format>?\cmesg_shape.field.Column04xxx.meameameameameamvalue0001Ncmesg_shape.field.Column04xxx.math_typevalue004Dcmesg_shape.field.Column05xxxx.idx4Hcmesg_shape.field.Column05xxxx.ctypeUint32_tXcmesg_shape.field.Column05xxxx.struct_format>I^cmesg_shape.field.Column05xxxx.meameameameameam eBytesPcmesg_shape.field.Column05xxxx.math_typevalue02(gggg.first_timestamp01998-02-15 13:40:36.004ZZcmesg_shape.field.Column06xxxxxxxxxxxxxxx.idx5^cmesg_shape.field.Column06xxxxxxxxxxxxxxx.ctype2EControllableElementNamesncmesg_shape.field.Column06xxxxxxxxxxxxxxx.struct_format>Btcmesg_shape.field.Column06xxxxxxxxxxxxxxx.meameameameameamvalue0001fcmesg_shape.field.Column06xxxxxxxxxxxxxxx.math_typeNoneZcmesg_shape.field.Column07xxxxxxxxxxxxxxx.idx6^cmesg_shape.field.Column07xxxxxxxxxxxxxxx.ctype*Value0000000000000003ncmesg_shape.field.Column07xxxxxxxxxxxxxxx.struct_format>Btcmesg_shape.field.Column07xxxxxxxxxxxxxxx.meameameameameamvalue0001fcmesg_shape.field.Column07xxxxxxxxxxxxxxx.math_typeNoneRcmesg_shape.field.Column08xxxxxxxxxxx.idx7Vcmesg_shape.field.Column08xxxxxxxxxxx.ctype2EControllableElementNamesfcmesg_shape.field.Column08xxxxxxxxxxx.struct_format>Blcmesg_shape.field.Column08xxxxxxxxxxx.meameameameameamvalue0001^cmesg_shape.field.Column08xxxxxxxxxxx.math_typeNoneRcmesg_shape.field.Column09xxxxxxxxxxx.idx8Vcmesg_shape.field.Column09xxxxxxxxxxx.ctype*Value0000000000000003fcmesg_shape.field.Column09xxxxxxxxxxx.struct_format>Blcmesg_shape.field.Column09xxxxxxxxxxx.meameameameameamvalue0001^cmesg_shape.field.Column09xxxxxxxxxxx.math_typeNoneRcmesg_shape.field.Column10xxxxxxxxxxx.idx9Vcmesg_shape.field.Column10xxxxxxxxxxx.ctype2EControllableElementNamesfcmesg_shape.field.Column10xxxxxxxxxxx.struct_format>Blcmesg_shape.field.Column10xxxxxxxxxxx.meameameameameamvalue0001^cmesg_shape.field.Column10xxxxxxxxxxx.math_typeNoneRcmesg_shape.field.Column11xxxxxxxxxxx.idx10Vcmesg_shape.field.Column11xxxxxxxxxxx.ctype*Value0000000000000003fcmesg_shape.field.Column11xxxxxxxxxxx.struct_format>Blcmesg_shape.field.Column11xxxxxxxxxxx.meameameameameamvalue0001^cmesg_shape.field.Column11xxxxxxxxxxx.math_typeNoneavro.schema{"namespace": "aaaaaaaaaaaaaaaaaa.bbbb.cccc", "name": "testAvroMessageAAAAAAA", "type": "record", "fields": [{"name": "Column01xxxxxxxxxx", "type": "int"}, {"name": "Column02xxxxxxxxxxxxxxxxxxxxxxxxxxxx", "type": "long"}, {"name": "Column03x", "type": "long"}, {"name": "Column04xxx", "type": "boolean"}, {"name": "Column05xxxx", "type": "long"}, {"name": "Column06xxxxxxxxxxxxxxx", "type": {"type": "enum", "symbols": ["v_1_____", "v_2_____", "v_3_____________________", "v_4__________________", "v_5___", "v_6_____", "v_7__", "v_8____", "v_9______", "v_10________", "v_11___________", "v_12_______________", "v_13__________________", "v_14________", "v_15_", "v_16____________________", "v_17______", "v_18_____", "v_19______", "v_20__", "v_21______________", "v_22__", "v_23______________________", "v_24____________", "v_25___________", "v_26____________", "v_27_________________", "v_28_________________", "v_29______________________", "v_30______", "v_31_____B", "v_32_________", "v_33_________", "v_34___________qweroiqwer", "v_35________________________", "v_36________________________", "v_37____________________", "v_38_________", "v_39____________________", "v_40_____________________", "v_41______________________", "v_42_________", "v_43________", "v_44____________", "v_45_______________", "v_45_______________Basdflsdfqkl", "v_46___________", "v_47______________________", "v_48____________________", "v_49___________", "v_50___", "v_51_______", "v_52____________________", "v_31____________________", "v_59__", "v_60__________", "v_61___________", "v_62________________", "v_63____________________________", "v_64_____________________", "v_65____________", "v_66________________50___", "v_67_________50___", "v_68________________", "v_69____________________", "v_70___________", "v_71________", "v_72__________________", "v_73__________", "v_74_________", "v_129___________________UasdfsdAsdfl", "v_75_______________", "v_129_______________", "v_76______", "v_34___________UsdfIasdflkasDeweksdkjs", "v_77____________", "v_78____________________", "v_77__________________________", "v_78________________________________", "v_79________________________________", "v_80___________", "v_81_________________", "v_82_______________________", "v_83_______________________", "v_6_____qpwepaaaaa", "v_6_____lksdlkwerpiwoelk", "v_6_________________________23", "v_6_________________________45", "v_7__qpwepaaaaa", "v_7__lksdlkwerpiwoelk", "v_7______________________45", "v_7______________________56", "v_10________qpwepaaaaa", "v_10________lksdlkwerpiwoelk", "v_10_____________________________A", "v_10_____________________________B", "v_85_________", "v_85________________", "v_86_______________", "v_87__________________________", "v_88______________", "v_89_______________", "v_34___________wetuto", "v_34___________asdfasdf", "v_90______________________wetuto", "v_92___________", "v_91_____________________________", "v_93_______________", "v_90______________________asdfasdf", "v_94________________", "v_95__________asdfasdf", "v_32_________asfqwerlasdflkasdf", "v_71________PPOPOP", "v_96__________", "v_97______________", "v_98_____________________", "v_99_______", "v_100______________", "v_50___Bridge", "v_101__________", "v_102_______________________", "v_32_________exasdflk", "v_103___________________", "v_104___", "v_105_____________", "v_41______________________exasdflk", "v_106_________________", "v_107_____________", "v_108________________", "v_16____________________exasdflk", "v_109________", "v_110_______________________", "v_111____________", "v_112_________________", "v_113___________________", "v_105_____________UIasdfasdfasdf", "v_105_____________UIasdfwetuto", "v_114____________________", "v_115____", "v_116__________________", "v_117________________", "v_118_______", "v_6_____Taselkk", "v_7__Taselkk", "v_10________Taselkk", "v_119________________", "v_120_________________", "v_121__________________", "v_122________________________________", "v_123______", "v_123______qmasdfolkasdf", "v_124____________________", "v_125____________________", "v_126______________", "v_127___________________", "v_128_____________________", "v_129_____", "v_130_______________", "v_131___________", "v_62________________Pqweroi", "v_132__________________", "v_133___________________", "v_134_________________", "v_129_____bnasdkwel", "v_170_________", "v_171_________________________________", "v_6_____exasdflk", "v_7__exasdflk", "v_10________exasdflk", "v_135________________", "v_136_________________", "v_137_________________", "v_32_________qwerUasdIlasdlKsdflakslksdk", "v_138_________________________", "v_139_______________________", "v_136_________________exasdflk", "v_140_____________", "v_141_____", "v_142_____", "v_143_____", "v_144________________________", "v_145_________________", "v_146___________________", "v_147_______________________________", "v_148_________", "v_149__________________", "v_150________________________", "v_151_____________________", "v_25___________lkxcvmnsmljnnj", "v_152______________________", "v_153______________________", "v_154_____________________________________________", "v_136_________________wetuto", "v_155________", "v_156_________", "v_25___________exasdflk", "v_157________________________", "v_158___________________________________", "v_159___________________", "v_104___Oqwe", "v_160_________________________", "v_161__", "v_162______", "v_163_________________________", "v_164____________", "bcount"], "name": "_po_asdfadslka_asxqweoioasdflkasdfkjklas"}}, {"name": "Column07xxxxxxxxxxxxxxx", "type": {"type": "enum", "symbols": ["v_1_____", "eFso", "eFsi", "iOIJ", "iOIK", "eS0", "eS1", "eS2", "eP0", "eP1", "eP2", "eRp", "eRs", "eRs0", "eRs1", "eRs2", "eRp0", "eRp1", "eRp2", "v_124____0", "v_124____1", "v_124____2", "v_124____3", "v_124____4", "v_124____5", "v_125__________", "e1", "e1A", "e1B", "e2", "e2A", "e2B", "e3", "e3A", "e3B", "e4", "e4A", "e4B", "e5", "e5A", "e5B", "e6", "e6A", "e6B", "e7", "e8", "e9", "e10", "e11", "e12", "e13", "e14", "v_126_________0", "v_127_________", "v_128______________________________", "v_129___________________Bqweroiqwer", "v_129___________________Cqweroiqwer", "v_1300", "v_1301", "v_131_____0", "e0", "v_126_________1", "v_126_________2", "v_129_______________", "v_132________", "v_133_________", "v_130Raex", "eA", "eB", "bgrtasd", "bcount"], "name": "_aa_bbbbbbbbbb_cccccc0000000000000003"}}, {"name": "Column08xxxxxxxxxxx", "type": {"type": "enum", "symbols": ["v_1_____", "v_2_____", "v_3_____________________", "v_4__________________", "v_5___", "v_6_____", "v_7__", "v_8____", "v_9______", "v_10________", "v_11___________", "v_12_______________", "v_13__________________", "v_14________", "v_15_", "v_16____________________", "v_17______", "v_18_____", "v_19______", "v_20__", "v_21______________", "v_22__", "v_23______________________", "v_24____________", "v_25___________", "v_26____________", "v_27_________________", "v_28_________________", "v_29______________________", "v_30______", "v_31_____B", "v_32_________", "v_33_________", "v_34___________qweroiqwer", "v_35________________________", "v_36________________________", "v_37____________________", "v_38_________", "v_39____________________", "v_40_____________________", "v_41______________________", "v_42_________", "v_43________", "v_44____________", "v_45_______________", "v_45_______________Basdflsdfqkl", "v_46___________", "v_47______________________", "v_48____________________", "v_49___________", "v_50___", "v_51_______", "v_52____________________", "v_31____________________", "v_59__", "v_60__________", "v_61___________", "v_62________________", "v_63____________________________", "v_64_____________________", "v_65____________", "v_66________________50___", "v_67_________50___", "v_68________________", "v_69____________________", "v_70___________", "v_71________", "v_72__________________", "v_73__________", "v_74_________", "v_129___________________UasdfsdAsdfl", "v_75_______________", "v_129_______________", "v_76______", "v_34___________UsdfIasdflkasDeweksdkjs", "v_77____________", "v_78____________________", "v_77__________________________", "v_78________________________________", "v_79________________________________", "v_80___________", "v_81_________________", "v_82_______________________", "v_83_______________________", "v_6_____qpwepaaaaa", "v_6_____lksdlkwerpiwoelk", "v_6___________________________", "v_6__________________________1", "v_7__qpwepaaaaa", "v_7__lksdlkwerpiwoelk", "v_7______________________78", "v_7________________________", "v_10________qpwepaaaaa", "v_10________lksdlkwerpiwoelk", "v_10_____________________________C", "v_10_____________________________D", "v_85_________", "v_85________________", "v_86_______________", "v_87__________________________", "v_88______________", "v_89_______________", "v_34___________wetuto", "v_34___________asdfasdf", "v_90______________________wetuto", "v_92___________", "v_91_____________________________", "v_93_______________", "v_90______________________asdfasdf", "v_94________________", "v_95__________asdfasdf", "v_32_________asfqwerlasdflkasdf", "v_71________PPOPOP", "v_96__________", "v_97______________", "v_98_____________________", "v_99_______", "v_100______________", "v_50___Bridge", "v_101__________", "v_102_______________________", "v_32_________exasdflk", "v_103___________________", "v_104___", "v_105_____________", "v_41______________________exasdflk", "v_106_________________", "v_107_____________", "v_108________________", "v_16____________________exasdflk", "v_109________", "v_110_______________________", "v_111____________", "v_112_________________", "v_113___________________", "v_105_____________UIasdfasdfasdf", "v_105_____________UIasdfwetuto", "v_114____________________", "v_115____", "v_116__________________", "v_117________________", "v_118_______", "v_6_____Taselkk", "v_7__Taselkk", "v_10________Taselkk", "v_119________________", "v_120_________________", "v_121__________________", "v_122________________________________", "v_123______", "v_123______qmasdfolkasdf", "v_124____________________", "v_125____________________", "v_126______________", "v_127___________________", "v_128_____________________", "v_129_____", "v_130_______________", "v_131___________", "v_62________________Pqweroi", "v_132__________________", "v_133___________________", "v_134_________________", "v_129_____bnasdkwel", "v_170_________", "v_171_________________________________", "v_6_____exasdflk", "v_7__exasdflk", "v_10________exasdflk", "v_135________________", "v_136_________________", "v_137_________________", "v_32_________qwerUasdIlasdlKsdflakslksdk", "v_138_________________________", "v_139_______________________", "v_136_________________exasdflk", "v_140_____________", "v_141_____", "v_142_____", "v_143_____", "v_144________________________", "v_145_________________", "v_146___________________", "v_147_______________________________", "v_148_________", "v_149__________________", "v_150________________________", "v_151_____________________", "v_25___________lkxcvmnsmljnnj", "v_152______________________", "v_153______________________", "v_154_____________________________________________", "v_136_________________wetuto", "v_155________", "v_156_________", "v_25___________exasdflk", "v_157________________________", "v_158___________________________________", "v_159___________________", "v_104___Oqwe", "v_160_________________________", "v_161__", "v_162______", "v_163_________________________", "v_164____________", "bcount"], "name": "_jk_wew_sd_asdflkweroiuwerlkasdfljas"}}, {"name": "Column09xxxxxxxxxxx", "type": {"type": "enum", "symbols": ["v_1_____", "eFso", "eFsi", "iOIJ", "iOIK", "eS0", "eS1", "eS2", "eP0", "eP1", "eP2", "eRp", "eRs", "eRs0", "eRs1", "eRs2", "eRp0", "eRp1", "eRp2", "v_124____0", "v_124____1", "v_124____2", "v_124____3", "v_124____4", "v_124____5", "v_125__________", "e1", "e1A", "e1B", "e2", "e2A", "e2B", "e3", "e3A", "e3B", "e4", "e4A", "e4B", "e5", "e5A", "e5B", "e6", "e6A", "e6B", "e7", "e8", "e9", "e10", "e11", "e12", "e13", "e14", "v_126_________0", "v_127_________", "v_128______________________________", "v_129___________________Bqweroiqwer", "v_129___________________Cqweroiqwer", "v_1300", "v_1301", "v_131_____0", "e0", "v_126_________1", "v_126_________2", "v_129_______________", "v_132________", "v_133_________", "v_130Raex", "eA", "eB", "bgrtasd", "bcount"], "name": "_tt_ppp_xx_yyyyyy0000000000000003"}}, {"name": "Column10xxxxxxxxxxx", "type": {"type": "enum", "symbols": ["v_1_____", "v_2_____", "v_3_____________________", "v_4__________________", "v_5___", "v_6_____", "v_7__", "v_8____", "v_9______", "v_10________", "v_11___________", "v_12_______________", "v_13__________________", "v_14________", "v_15_", "v_16____________________", "v_17______", "v_18_____", "v_19______", "v_20__", "v_21______________", "v_22__", "v_23______________________", "v_24____________", "v_25___________", "v_26____________", "v_27_________________", "v_28_________________", "v_29______________________", "v_30______", "v_31_____B", "v_32_________", "v_33_________", "v_34___________qweroiqwer", "v_35________________________", "v_36________________________", "v_37____________________", "v_38_________", "v_39____________________", "v_40_____________________", "v_41______________________", "v_42_________", "v_43________", "v_44____________", "v_45_______________", "v_45_______________Basdflsdfqkl", "v_46___________", "v_47______________________", "v_48____________________", "v_49___________", "v_50___", "v_51_______", "v_52____________________", "v_31____________________", "v_59__", "v_60__________", "v_61___________", "v_62________________", "v_63____________________________", "v_64_____________________", "v_65____________", "v_66________________50___", "v_67_________50___", "v_68________________", "v_69____________________", "v_70___________", "v_71________", "v_72__________________", "v_73__________", "v_74_________", "v_129___________________UasdfsdAsdfl", "v_75_______________", "v_129_______________", "v_76______", "v_34___________UsdfIasdflkasDeweksdkjs", "v_77____________", "v_78____________________", "v_77__________________________", "v_78________________________________", "v_79________________________________", "v_80___________", "v_81_________________", "v_82_______________________", "v_83_______________________", "v_6_____qpwepaaaaa", "v_6_____lksdlkwerpiwoelk", "v_6_________________________34", "v_6_________________________12", "v_7__qpwepaaaaa", "v_7__lksdlkwerpiwoelk", "v_7______________________12", "v_7______________________34", "v_10________qpwepaaaaa", "v_10________lksdlkwerpiwoelk", "v_10_____________________________E", "v_10_____________________________F", "v_85_________", "v_85________________", "v_86_______________", "v_87__________________________", "v_88______________", "v_89_______________", "v_34___________wetuto", "v_34___________asdfasdf", "v_90______________________wetuto", "v_92___________", "v_91_____________________________", "v_93_______________", "v_90______________________asdfasdf", "v_94________________", "v_95__________asdfasdf", "v_32_________asfqwerlasdflkasdf", "v_71________PPOPOP", "v_96__________", "v_97______________", "v_98_____________________", "v_99_______", "v_100______________", "v_50___Bridge", "v_101__________", "v_102_______________________", "v_32_________exasdflk", "v_103___________________", "v_104___", "v_105_____________", "v_41______________________exasdflk", "v_106_________________", "v_107_____________", "v_108________________", "v_16____________________exasdflk", "v_109________", "v_110_______________________", "v_111____________", "v_112_________________", "v_113___________________", "v_105_____________UIasdfasdfasdf", "v_105_____________UIasdfwetuto", "v_114____________________", "v_115____", "v_116__________________", "v_117________________", "v_118_______", "v_6_____Taselkk", "v_7__Taselkk", "v_10________Taselkk", "v_119________________", "v_120_________________", "v_121__________________", "v_122________________________________", "v_123______", "v_123______qmasdfolkasdf", "v_124____________________", "v_125____________________", "v_126______________", "v_127___________________", "v_128_____________________", "v_129_____", "v_130_______________", "v_131___________", "v_62________________Pqweroi", "v_132__________________", "v_133___________________", "v_134_________________", "v_129_____bnasdkwel", "v_170_________", "v_171_________________________________", "v_6_____exasdflk", "v_7__exasdflk", "v_10________exasdflk", "v_135________________", "v_136_________________", "v_137_________________", "v_32_________qwerUasdIlasdlKsdflakslksdk", "v_138_________________________", "v_139_______________________", "v_136_________________exasdflk", "v_140_____________", "v_141_____", "v_142_____", "v_143_____", "v_144________________________", "v_145_________________", "v_146___________________", "v_147_______________________________", "v_148_________", "v_149__________________", "v_150________________________", "v_151_____________________", "v_25___________lkxcvmnsmljnnj", "v_152______________________", "v_153______________________", "v_154_____________________________________________", "v_136_________________wetuto", "v_155________", "v_156_________", "v_25___________exasdflk", "v_157________________________", "v_158___________________________________", "v_159___________________", "v_104___Oqwe", "v_160_________________________", "v_161__", "v_162______", "v_163_________________________", "v_164____________", "bcount"], "name": "_we_asd_qw_qwerplksdflksfwerijllsadk"}}, {"name": "Column11xxxxxxxxxxx", "type": {"type": "enum", "symbols": ["v_1_____", "eFso", "eFsi", "iOIJ", "iOIK", "eS0", "eS1", "eS2", "eP0", "eP1", "eP2", "eRp", "eRs", "eRs0", "eRs1", "eRs2", "eRp0", "eRp1", "eRp2", "v_124____0", "v_124____1", "v_124____2", "v_124____3", "v_124____4", "v_124____5", "v_125__________", "e1", "e1A", "e1B", "e2", "e2A", "e2B", "e3", "e3A", "e3B", "e4", "e4A", "e4B", "e5", "e5A", "e5B", "e6", "e6A", "e6B", "e7", "e8", "e9", "e10", "e11", "e12", "e13", "e14", "v_126_________0", "v_127_________", "v_128______________________________", "v_129___________________Bqweroiqwer", "v_129___________________Cqweroiqwer", "v_1300", "v_1301", "v_131_____0", "e0", "v_126_________1", "v_126_________2", "v_129_______________", "v_132________", "v_133_________", "v_130Raex", "eA", "eB", "bgrtasd", "bcount"], "name": "_aa_bbb_cc_dddddd0000000000000003"}}, {"name": "Column12xxxxxxx", "type": {"type": "long", "logicalType": "timestamp-micros"}}, {"name": "Column13xx", "type": "string"}, {"name": "Column14xx", "type": "string"}, {"name": "_col_15", "type": "string"}, {"name": "_col_16", "type": "string"}, {"name": "_col_17___", "type": "int"}, {"name": "_col_18___", "type": "int"}, {"name": "_col_19___", "type": "int"}, {"name": "_col_20_____", "type": "long"}, {"name": "_col_20_____2", "type": "int"}, {"name": "_col_22________________", "type": "long"}]}avro.codecdeflate@fi+)wd5- +@fi+)wd5-cX3uL &&&j[;YK/mw|eu.3Оu쥊=Pl98gZ7 GÄO"yg ׼p=&j6Pw,gYض@fi+)wd5-caL &&&j^?M))*)0P<***9EHBTPgf$CUC;Y>"Ypg:U122,139%wνB=|p# - Qoncʹ>#ǃTgo1cr${L=H {6}mpݙ5n/N=k`lyf4$kVXA k`˂uԾ +aϮ3O?Rўe',yQh̲(?2@fi+)wd5-cUL &&&jfM))*)0P<***9EHBTPgf$CE=, a`hZAk&=f&3ȹ{\D~<@fi+)wd5-cuuUL &&&j۶im}¦hdejeihe``e`dd`elehbhdfi"ehighfghbg(33h!~sϢ]אa`-5122tTM10_/7S;p=k6Sw.dy3Sy@fi+)wd5-cXȺUL && _~h}¦hdejeihe``e`dd`elehbhdfi"ehighfghbg(322Y10xdڵT̠Ir?p0߽7[DEܛ~>PC=&j]Sw&=d9rE y@fi+)wd5-cUL && 35i}¦hdejeihe``e`dd`elehbhdfi"ehighfghbg(322Lg `XlZC_ 5143h!m 㺧=, O=䝆'R;=V{g1${LԾ=Pa 읶GG"Y3Գ],| a`*BJ?S, tGgʼ='h g3 0tCD&X okf#Ĺ@fi+)wd5-c3܋UL && 5\n}¦hdejeihe``e`dd`elehbhdfi"ehighfghbg(32230CDHO#5143h!~dU, w-dM+wgͺӌsv;RXƼ}fy@fi+)wd5-cXpUL &&&j s"$maxPartitionBytes") { + val file = getResourceAvroFilePath("empty_blocks.avro") + val df = spark.read.format("avro").load(file) + val count = df.count() + val records = df.collect() + assert(count == 58) + assert(count == records.length) + } + } + } } class AvroV1Suite extends AvroSuite { diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala index 0740334724e8..288964a084ba 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala @@ -894,7 +894,7 @@ class ClientE2ETestSuite extends RemoteSparkSession with SQLHelper with PrivateM // df1("i") is not ambiguous, but it's not valid in the projected df. df1.select((df1("i") + 1).as("plus")).select(df1("i")).collect() } - assert(e1.getMessage.contains("MISSING_ATTRIBUTES.RESOLVED_ATTRIBUTE_MISSING_FROM_INPUT")) + assert(e1.getMessage.contains("UNRESOLVED_COLUMN.WITH_SUGGESTION")) checkSameResult( Seq(Row(1, "a")), diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala index e4b60eeeff0d..e96e5dfcac08 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala @@ -191,7 +191,7 @@ class SparkConnectService(debug: Boolean) extends AsyncService with BindableServ new SparkConnectReleaseExecuteHandler(responseObserver).handle(request) } catch ErrorUtils.handleError( - "reattachExecute", + "releaseExecute", observer = responseObserver, userId = request.getUserContext.getUserId, sessionId = request.getSessionId) diff --git a/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js b/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js index 352095c46f6c..73941e618c89 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js +++ b/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js @@ -223,7 +223,7 @@ function drawTimeline(id, data, minX, maxX, minY, maxY, unitY, batchInterval) { .attr("cx", function(d) { return x(d.x); }) .attr("cy", function(d) { return y(d.y); }) .attr("r", function(d) { return isFailedBatch(d.x) ? "2" : "3";}) - .on('mouseover', function(d) { + .on('mouseover', function(event, d) { var tip = yValueFormat(d.y) + " " + unitY + " at " + timeTipStrings[d.x]; showBootstrapTooltip(d3.select(this), tip); // show the point diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile index 9e81accbd3b7..33b715021eb9 100644 --- a/dev/infra/Dockerfile +++ b/dev/infra/Dockerfile @@ -95,12 +95,12 @@ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3 RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas<=2.1.4' scipy coverage matplotlib lxml -ARG BASIC_PIP_PKGS="numpy pyarrow>=14.0.0 six==1.16.0 pandas<=2.1.4 scipy unittest-xml-reporting plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" +ARG BASIC_PIP_PKGS="numpy pyarrow>=14.0.0 six==1.16.0 pandas<=2.1.4 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" # Python deps for Spark Connect ARG CONNECT_PIP_PKGS="grpcio==1.59.3 grpcio-status==1.59.3 protobuf==4.25.1 googleapis-common-protos==1.56.4" -RUN python3.9 -m pip install $BASIC_PIP_PKGS $CONNECT_PIP_PKGS +RUN python3.9 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS # Add torch as a testing dependency for TorchDistributor and DeepspeedTorchDistributor RUN python3.9 -m pip install 'torch<=2.0.1' torchvision --index-url https://download.pytorch.org/whl/cpu RUN python3.9 -m pip install deepspeed torcheval @@ -111,7 +111,7 @@ RUN apt-get update && apt-get install -y \ python3.10 python3.10-distutils \ && rm -rf /var/lib/apt/lists/* RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 -RUN python3.10 -m pip install $BASIC_PIP_PKGS $CONNECT_PIP_PKGS +RUN python3.10 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS RUN python3.10 -m pip install 'torch<=2.0.1' torchvision --index-url https://download.pytorch.org/whl/cpu RUN python3.10 -m pip install deepspeed torcheval @@ -121,7 +121,7 @@ RUN apt-get update && apt-get install -y \ python3.11 python3.11-distutils \ && rm -rf /var/lib/apt/lists/* RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 -RUN python3.11 -m pip install $BASIC_PIP_PKGS $CONNECT_PIP_PKGS +RUN python3.11 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS RUN python3.11 -m pip install 'torch<=2.0.1' torchvision --index-url https://download.pytorch.org/whl/cpu RUN python3.11 -m pip install deepspeed torcheval @@ -131,6 +131,7 @@ RUN apt-get update && apt-get install -y \ python3.12 python3.12-distutils \ && rm -rf /var/lib/apt/lists/* RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12 +# TODO(SPARK-46647) Add unittest-xml-reporting into Python 3.12 image when it supports Python 3.12 RUN python3.12 -m pip install $BASIC_PIP_PKGS $CONNECT_PIP_PKGS # TODO(SPARK-46078) Use official one instead of nightly build when it's ready RUN python3.12 -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 699a9d07452d..b8ae23613688 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -865,6 +865,7 @@ def __hash__(self): "pyspark.pandas.tests.frame.test_reshaping", "pyspark.pandas.tests.frame.test_spark", "pyspark.pandas.tests.frame.test_take", + "pyspark.pandas.tests.frame.test_take_adv", "pyspark.pandas.tests.frame.test_time_series", "pyspark.pandas.tests.frame.test_truncate", "pyspark.pandas.tests.io.test_io", @@ -1165,6 +1166,7 @@ def __hash__(self): "pyspark.pandas.tests.connect.frame.test_parity_reshaping", "pyspark.pandas.tests.connect.frame.test_parity_spark", "pyspark.pandas.tests.connect.frame.test_parity_take", + "pyspark.pandas.tests.connect.frame.test_parity_take_adv", "pyspark.pandas.tests.connect.frame.test_parity_time_series", "pyspark.pandas.tests.connect.frame.test_parity_truncate", "pyspark.pandas.tests.connect.groupby.test_parity_aggregate", diff --git a/docs/Gemfile b/docs/Gemfile index 6c6760371163..fe2b1a8259b9 100644 --- a/docs/Gemfile +++ b/docs/Gemfile @@ -17,8 +17,12 @@ source "https://rubygems.org" +# Keep these specifications as flexible as possible and leave it to Bundler +# to pin versions in the lock file. +# To update the lock file, run `bundle update`. +# Version constraint reference: https://guides.rubygems.org/patterns/#declaring-dependencies gem "ffi", "1.15.5" -gem "jekyll", "4.3.2" +gem "jekyll", "~> 4.3" gem "rouge", "3.26.0" gem "jekyll-redirect-from", "0.16.0" gem "webrick", "1.8.1" diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock index eda31f857476..1af211ae1df6 100644 --- a/docs/Gemfile.lock +++ b/docs/Gemfile.lock @@ -1,7 +1,7 @@ GEM remote: https://rubygems.org/ specs: - addressable (2.8.5) + addressable (2.8.6) public_suffix (>= 2.0.2, < 6.0) colorator (1.1.0) concurrent-ruby (1.2.2) @@ -11,11 +11,11 @@ GEM eventmachine (1.2.7) ffi (1.15.5) forwardable-extended (2.6.0) - google-protobuf (3.24.2) + google-protobuf (3.25.1) http_parser.rb (0.8.0) i18n (1.14.1) concurrent-ruby (~> 1.0) - jekyll (4.3.2) + jekyll (4.3.3) addressable (~> 2.4) colorator (~> 1.0) em-websocket (~> 0.5) @@ -48,20 +48,20 @@ GEM mercenary (0.4.0) pathutil (0.16.2) forwardable-extended (~> 2.6) - public_suffix (5.0.3) - rake (13.0.6) + public_suffix (5.0.4) + rake (13.1.0) rb-fsevent (0.11.2) rb-inotify (0.10.1) ffi (~> 1.0) rexml (3.2.6) rouge (3.26.0) safe_yaml (1.0.5) - sass-embedded (1.63.6) - google-protobuf (~> 3.23) + sass-embedded (1.69.7) + google-protobuf (~> 3.25) rake (>= 13.0.0) terminal-table (3.0.2) unicode-display_width (>= 1.1.1, < 3) - unicode-display_width (2.4.2) + unicode-display_width (2.5.0) webrick (1.8.1) PLATFORMS @@ -69,7 +69,7 @@ PLATFORMS DEPENDENCIES ffi (= 1.15.5) - jekyll (= 4.3.2) + jekyll (~> 4.3) jekyll-redirect-from (= 0.16.0) rouge (= 3.26.0) webrick (= 1.8.1) diff --git a/docs/README.md b/docs/README.md index 95f4b9ac9e08..09b2b9b5b41a 100644 --- a/docs/README.md +++ b/docs/README.md @@ -30,12 +30,13 @@ whichever version of Spark you currently have checked out of revision control. The Spark documentation build uses a number of tools to build HTML docs and API docs in Scala, Java, Python, R, and SQL. -You need to have [Ruby](https://www.ruby-lang.org/en/documentation/installation/) and -[Python](https://docs.python.org/2/using/unix.html#getting-and-installing-the-latest-version-of-python) -installed. Make sure the `bundle` command is available, if not install the Gem containing it: +You need to have [Ruby][ruby] and [Python][python] installed. Make sure the `bundle` command is available. If not, install it as follows: + +[ruby]: https://www.ruby-lang.org/en/documentation/installation/ +[python]: https://www.python.org/downloads/ ```sh -$ sudo gem install bundler +$ gem install bundler ``` After this all the required ruby dependencies can be installed from the `docs/` directory via the Bundler: @@ -45,8 +46,6 @@ $ cd docs $ bundle install ``` -Note: If you are on a system with both Ruby 1.9 and Ruby 2.0 you may need to replace gem with gem2.0. - To generate the Python or R docs, you'll need to [install Pandoc](https://pandoc.org/installing.html). ### SQL and Python API Documentation (Optional) diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html index 03f66acb12d8..6acffe8a405d 100755 --- a/docs/_layouts/global.html +++ b/docs/_layouts/global.html @@ -137,25 +137,21 @@ {% if page.url == "/" %}
-
-

Apache Spark - A Unified engine for large-scale data analytics

-
-
- Apache Spark is a unified analytics engine for large-scale data processing. - It provides high-level APIs in Java, Scala, Python and R, - and an optimized engine that supports general execution graphs. - It also supports a rich set of higher-level tools including - Spark SQL for SQL and structured data processing, - pandas API on Spark for pandas workloads, - MLlib for machine learning, - GraphX for graph processing, - and Structured Streaming - for incremental computation and stream processing. -
+
+ Apache Spark is a unified analytics engine for large-scale data processing. + It provides high-level APIs in Java, Scala, Python and R, + and an optimized engine that supports general execution graphs. + It also supports a rich set of higher-level tools including + Spark SQL for SQL and structured data processing, + pandas API on Spark for pandas workloads, + MLlib for machine learning, + GraphX for graph processing, + and Structured Streaming + for incremental computation and stream processing.
diff --git a/docs/_plugins/conditonal_includes.rb b/docs/_plugins/conditonal_includes.rb new file mode 100644 index 000000000000..7c03a224b348 --- /dev/null +++ b/docs/_plugins/conditonal_includes.rb @@ -0,0 +1,71 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +module Jekyll + # Tag for including a file if it exists. + class IncludeRelativeIfExistsTag < Tags::IncludeRelativeTag + def render(context) + super + rescue IOError + "" + end + end + + # Tag for including files generated as part of the various language APIs. + # If a SKIP_ flag is set, tolerate missing files. If not, raise an error. + class IncludeApiGenTag < Tags::IncludeRelativeTag + @@displayed_warning = false + + def render(context) + super + rescue IOError => e + skip_flags = [ + 'SKIP_API', + 'SKIP_SCALADOC', + 'SKIP_PYTHONDOC', + 'SKIP_RDOC', + 'SKIP_SQLDOC', + ] + set_flags = skip_flags.select { |flag| ENV[flag] } + # A more sophisticated approach would be to accept a tag parameter + # specifying the relevant API so we tolerate missing files only for + # APIs that are explicitly skipped. But this is unnecessary for now. + # Instead, we simply tolerate missing files if _any_ skip flag is set. + if set_flags.any? then + set_flags_string = set_flags.join(', ') + if !@@displayed_warning then + STDERR.puts "Warning: Tolerating missing API files because the " \ + "following skip flags are set: #{set_flags_string}" + @@displayed_warning = true + end + # "skip flags set: `#{set_flags_string}`; " \ + "placeholder for missing API include: `#{@file}`" + else + raise e + end + end + end +end + +Liquid::Template.register_tag( + 'include_relative_if_exists', + Jekyll::IncludeRelativeIfExistsTag, +) + +Liquid::Template.register_tag( + 'include_api_gen', + Jekyll::IncludeApiGenTag, +) diff --git a/docs/_plugins/production_tag.rb b/docs/_plugins/production_tag.rb index 9f870cf2137a..de860cf22ef8 100644 --- a/docs/_plugins/production_tag.rb +++ b/docs/_plugins/production_tag.rb @@ -1,3 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# module Jekyll class ProductionTag < Liquid::Block diff --git a/docs/configuration.md b/docs/configuration.md index b45d647fde85..beb52c62d6c2 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -3302,9 +3302,6 @@ Spark subsystems. ### Spark SQL -{% for static_file in site.static_files %} - {% if static_file.name == 'generated-runtime-sql-config-table.html' %} - #### Runtime SQL Configuration Runtime SQL configurations are per-session, mutable Spark SQL configurations. They can be set with initial values by the config file @@ -3312,13 +3309,7 @@ and command-line options with `--conf/-c` prefixed, or by setting `SparkConf` th Also, they can be set and queried by SET commands and rest to their initial values by RESET command, or by `SparkSession.conf`'s setter and getter methods in runtime. -{% include_relative generated-runtime-sql-config-table.html %} - {% break %} - {% endif %} -{% endfor %} - -{% for static_file in site.static_files %} - {% if static_file.name == 'generated-static-sql-config-table.html' %} +{% include_api_gen generated-runtime-sql-config-table.html %} #### Static SQL Configuration @@ -3326,11 +3317,7 @@ Static SQL configurations are cross-session, immutable Spark SQL configurations. and command-line options with `--conf/-c` prefixed, or by setting `SparkConf` that are used to create `SparkSession`. External users can query the static sql config values via `SparkSession.conf` or via set command, e.g. `SET spark.sql.extensions;`, but cannot set/unset them. -{% include_relative generated-static-sql-config-table.html %} - {% break %} - {% endif %} -{% endfor %} - +{% include_api_gen generated-static-sql-config-table.html %} ### Spark Streaming diff --git a/docs/css/custom.css b/docs/css/custom.css index e80ca506a74c..51e89066e4d5 100644 --- a/docs/css/custom.css +++ b/docs/css/custom.css @@ -96,18 +96,7 @@ section { border-color: transparent; } -.hero-banner .bg { - background: url(/img/spark-hero-thin-light.jpg) no-repeat; - transform: translate(36%, 0%); - height: 475px; - top: 0; - position: absolute; - right: 0; - width: 100%; - opacity: 50%; -} - -.hero-banner h1 { +.hero-banner .container .row h1 { color: #0B9ACE; font-style: normal; font-weight: normal; @@ -116,13 +105,6 @@ section { letter-spacing: -0.045em; } -.hero-banner h2 { - font-style: normal; - font-weight: bold; - font-size: 32px; - line-height: 42px; -} - .what-is-spark { font-style: normal; font-weight: normal; @@ -823,18 +805,29 @@ ul { margin-bottom: 10px; } -.global h2, .global .h2 { +.global h1, .global .h1 { font-size: 30px; } -.global h3 { +#content h1.title { + font-size: 40px; +} + +.global h2 { font-size: 24px !important; } +.global h3 { + font-size: 20px !important; +} + .global h4 { font-size: 18px !important; } +.global h5 { + font-size: 16px !important; +} .global h1:first-letter, .global h2:first-letter, .global h3:first-letter, .global h4:first-letter, .global h5:first-letter, .global h6:first-letter, .global .h1:first-letter, .global .h2:first-letter, .global .h3:first-letter, .global .h4:first-letter, .global .h5:first-letter, .global .h6:first-letter { text-transform: uppercase; diff --git a/docs/img/spark-hero-thin-light.jpg b/docs/img/spark-hero-thin-light.jpg deleted file mode 100644 index 4d9ed926b361..000000000000 Binary files a/docs/img/spark-hero-thin-light.jpg and /dev/null differ diff --git a/docs/sql-data-sources-orc.md b/docs/sql-data-sources-orc.md index 561f601aa4e5..abd1901d24e4 100644 --- a/docs/sql-data-sources-orc.md +++ b/docs/sql-data-sources-orc.md @@ -240,7 +240,7 @@ Data source options of ORC can be set via: compression - snappy + zstd compression codec to use when saving to file. This can be one of the known case-insensitive shorten names (none, snappy, zlib, lzo, zstd and lz4). This will override orc.compress and spark.sql.orc.compression.codec. write diff --git a/docs/sql-data-sources-xml.md b/docs/sql-data-sources-xml.md index b10e054634ed..3b735191fc42 100644 --- a/docs/sql-data-sources-xml.md +++ b/docs/sql-data-sources-xml.md @@ -94,7 +94,7 @@ Data source options of XML can be set via: inferSchema true - If true, attempts to infer an appropriate type for each resulting DataFrame column. If false, all resulting columns are of string type. Default is true. XML built-in functions ignore this option. + If true, attempts to infer an appropriate type for each resulting DataFrame column. If false, all resulting columns are of string type. read @@ -108,7 +108,7 @@ Data source options of XML can be set via: attributePrefix _ - The prefix for attributes to differentiate attributes from elements. This will be the prefix for field names. Default is _. Can be empty for reading XML, but not for writing. + The prefix for attributes to differentiate attributes from elements. This will be the prefix for field names. Can be empty for reading XML, but not for writing. read/write @@ -235,5 +235,12 @@ Data source options of XML can be set via: write + + validateName + true + If true, throws error on XML element name validation failure. For example, SQL field names can have spaces, but XML element names cannot. + write + + Other generic options can be found in Generic File Source Options. diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md index f58b7f607a0b..db8ecf5b2a30 100644 --- a/docs/sql-error-conditions.md +++ b/docs/sql-error-conditions.md @@ -282,6 +282,12 @@ Cannot recognize hive type string: ``, column: ``. The spe Renaming a `` across schemas is not allowed. +### CANNOT_RESOLVE_DATAFRAME_COLUMN + +[SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) + +Cannot resolve dataframe column ``. It's probably because of illegal references like `df1.select(df2.col("a"))`. + ### CANNOT_RESOLVE_STAR_EXPAND [SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 30a37d97042a..dbb25e5adc04 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -36,6 +36,7 @@ license: | - `spark.sql.parquet.int96RebaseModeInRead` instead of `spark.sql.legacy.parquet.int96RebaseModeInRead` - `spark.sql.avro.datetimeRebaseModeInWrite` instead of `spark.sql.legacy.avro.datetimeRebaseModeInWrite` - `spark.sql.avro.datetimeRebaseModeInRead` instead of `spark.sql.legacy.avro.datetimeRebaseModeInRead` +- Since Spark 4.0, the default value of `spark.sql.orc.compression.codec` is changed from `snappy` to `zstd`. To restore the previous behavior, set `spark.sql.orc.compression.codec` to `snappy`. ## Upgrading from Spark SQL 3.4 to 3.5 diff --git a/docs/sql-ref-functions-builtin.md b/docs/sql-ref-functions-builtin.md index 0ff1432fabf8..c5f4e44dec0d 100644 --- a/docs/sql-ref-functions-builtin.md +++ b/docs/sql-ref-functions-builtin.md @@ -17,202 +17,102 @@ license: | limitations under the License. --- -{% for static_file in site.static_files %} - {% if static_file.name == 'generated-agg-funcs-table.html' %} ### Aggregate Functions -{% include_relative generated-agg-funcs-table.html %} +{% include_api_gen generated-agg-funcs-table.html %} #### Examples -{% include_relative generated-agg-funcs-examples.html %} - {% break %} - {% endif %} -{% endfor %} +{% include_api_gen generated-agg-funcs-examples.html %} -{% for static_file in site.static_files %} - {% if static_file.name == 'generated-window-funcs-table.html' %} ### Window Functions -{% include_relative generated-window-funcs-table.html %} +{% include_api_gen generated-window-funcs-table.html %} #### Examples -{% include_relative generated-window-funcs-examples.html %} - {% break %} - {% endif %} -{% endfor %} +{% include_api_gen generated-window-funcs-examples.html %} -{% for static_file in site.static_files %} - {% if static_file.name == 'generated-array-funcs-table.html' %} ### Array Functions -{% include_relative generated-array-funcs-table.html %} +{% include_api_gen generated-array-funcs-table.html %} #### Examples -{% include_relative generated-array-funcs-examples.html %} - {% break %} - {% endif %} -{% endfor %} +{% include_api_gen generated-array-funcs-examples.html %} -{% for static_file in site.static_files %} -{% if static_file.name == 'generated-collection-funcs-table.html' %} ### Collection Functions -{% include_relative generated-collection-funcs-table.html %} +{% include_api_gen generated-collection-funcs-table.html %} #### Examples -{% include_relative generated-collection-funcs-examples.html %} -{% break %} -{% endif %} -{% endfor %} +{% include_api_gen generated-collection-funcs-examples.html %} -{% for static_file in site.static_files %} -{% if static_file.name == 'generated-struct-funcs-table.html' %} ### STRUCT Functions -{% include_relative generated-struct-funcs-table.html %} +{% include_api_gen generated-struct-funcs-table.html %} #### Examples -{% include_relative generated-struct-funcs-examples.html %} -{% break %} -{% endif %} -{% endfor %} +{% include_api_gen generated-struct-funcs-examples.html %} -{% for static_file in site.static_files %} - {% if static_file.name == 'generated-map-funcs-table.html' %} ### Map Functions -{% include_relative generated-map-funcs-table.html %} +{% include_api_gen generated-map-funcs-table.html %} #### Examples -{% include_relative generated-map-funcs-examples.html %} - {% break %} - {% endif %} -{% endfor %} +{% include_api_gen generated-map-funcs-examples.html %} -{% for static_file in site.static_files %} - {% if static_file.name == 'generated-datetime-funcs-table.html' %} ### Date and Timestamp Functions -{% include_relative generated-datetime-funcs-table.html %} +{% include_api_gen generated-datetime-funcs-table.html %} #### Examples -{% include_relative generated-datetime-funcs-examples.html %} - {% break %} - {% endif %} -{% endfor %} +{% include_api_gen generated-datetime-funcs-examples.html %} -{% for static_file in site.static_files %} - {% if static_file.name == 'generated-math-funcs-table.html' %} ### Mathematical Functions -{% include_relative generated-math-funcs-table.html %} +{% include_api_gen generated-math-funcs-table.html %} #### Examples -{% include_relative generated-math-funcs-examples.html %} - {% break %} - {% endif %} -{% endfor %} +{% include_api_gen generated-math-funcs-examples.html %} -{% for static_file in site.static_files %} - {% if static_file.name == 'generated-string-funcs-table.html' %} ### String Functions -{% include_relative generated-string-funcs-table.html %} +{% include_api_gen generated-string-funcs-table.html %} #### Examples -{% include_relative generated-string-funcs-examples.html %} - {% break %} - {% endif %} -{% endfor %} +{% include_api_gen generated-string-funcs-examples.html %} -{% for static_file in site.static_files %} - {% if static_file.name == 'generated-conditional-funcs-table.html' %} ### Conditional Functions -{% include_relative generated-conditional-funcs-table.html %} +{% include_api_gen generated-conditional-funcs-table.html %} #### Examples -{% include_relative generated-conditional-funcs-examples.html %} - {% break %} - {% endif %} -{% endfor %} +{% include_api_gen generated-conditional-funcs-examples.html %} -{% for static_file in site.static_files %} -{% if static_file.name == 'generated-hash-funcs-table.html' %} ### Hash Functions -{% include_relative generated-hash-funcs-table.html %} +{% include_api_gen generated-hash-funcs-table.html %} #### Examples -{% include_relative generated-hash-funcs-examples.html %} -{% break %} -{% endif %} -{% endfor %} +{% include_api_gen generated-hash-funcs-examples.html %} -{% for static_file in site.static_files %} -{% if static_file.name == 'generated-csv-funcs-table.html' %} ### CSV Functions -{% include_relative generated-csv-funcs-table.html %} +{% include_api_gen generated-csv-funcs-table.html %} #### Examples -{% include_relative generated-csv-funcs-examples.html %} -{% break %} -{% endif %} -{% endfor %} +{% include_api_gen generated-csv-funcs-examples.html %} -{% for static_file in site.static_files %} -{% if static_file.name == 'generated-json-funcs-table.html' %} ### JSON Functions -{% include_relative generated-json-funcs-table.html %} +{% include_api_gen generated-json-funcs-table.html %} #### Examples -{% include_relative generated-json-funcs-examples.html %} -{% break %} -{% endif %} -{% endfor %} +{% include_api_gen generated-json-funcs-examples.html %} -{% for static_file in site.static_files %} -{% if static_file.name == 'generated-xml-funcs-table.html' %} ### XML Functions -{% include_relative generated-xml-funcs-table.html %} +{% include_api_gen generated-xml-funcs-table.html %} #### Examples -{% include_relative generated-xml-funcs-examples.html %} -{% break %} -{% endif %} -{% endfor %} +{% include_api_gen generated-xml-funcs-examples.html %} -{% for static_file in site.static_files %} -{% if static_file.name == 'generated-url-funcs-table.html' %} ### URL Functions -{% include_relative generated-url-funcs-table.html %} +{% include_api_gen generated-url-funcs-table.html %} #### Examples -{% include_relative generated-url-funcs-examples.html %} -{% break %} -{% endif %} -{% endfor %} +{% include_api_gen generated-url-funcs-examples.html %} -{% for static_file in site.static_files %} - {% if static_file.name == 'generated-bitwise-funcs-table.html' %} ### Bitwise Functions -{% include_relative generated-bitwise-funcs-table.html %} +{% include_api_gen generated-bitwise-funcs-table.html %} #### Examples -{% include_relative generated-bitwise-funcs-examples.html %} - {% break %} - {% endif %} -{% endfor %} +{% include_api_gen generated-bitwise-funcs-examples.html %} -{% for static_file in site.static_files %} - {% if static_file.name == 'generated-conversion-funcs-table.html' %} ### Conversion Functions -{% include_relative generated-conversion-funcs-table.html %} +{% include_api_gen generated-conversion-funcs-table.html %} #### Examples -{% include_relative generated-conversion-funcs-examples.html %} - {% break %} - {% endif %} -{% endfor %} +{% include_api_gen generated-conversion-funcs-examples.html %} -{% for static_file in site.static_files %} - {% if static_file.name == 'generated-predicate-funcs-table.html' %} ### Predicate Functions -{% include_relative generated-predicate-funcs-table.html %} +{% include_api_gen generated-predicate-funcs-table.html %} #### Examples -{% include_relative generated-predicate-funcs-examples.html %} - {% break %} - {% endif %} -{% endfor %} +{% include_api_gen generated-predicate-funcs-examples.html %} -{% for static_file in site.static_files %} - {% if static_file.name == 'generated-misc-funcs-table.html' %} ### Misc Functions -{% include_relative generated-misc-funcs-table.html %} +{% include_api_gen generated-misc-funcs-table.html %} #### Examples -{% include_relative generated-misc-funcs-examples.html %} - {% break %} - {% endif %} -{% endfor %} +{% include_api_gen generated-misc-funcs-examples.html %} -{% for static_file in site.static_files %} - {% if static_file.name == 'generated-generator-funcs-table.html' %} ### Generator Functions -{% include_relative generated-generator-funcs-table.html %} +{% include_api_gen generated-generator-funcs-table.html %} #### Examples -{% include_relative generated-generator-funcs-examples.html %} - {% break %} - {% endif %} -{% endfor %} +{% include_api_gen generated-generator-funcs-examples.html %} diff --git a/docs/sql-ref-functions.md b/docs/sql-ref-functions.md index cc9edd61f41e..b4891fe72eb3 100644 --- a/docs/sql-ref-functions.md +++ b/docs/sql-ref-functions.md @@ -20,7 +20,7 @@ license: | --- Spark SQL provides two function features to meet a wide range of user needs: built-in functions and user-defined functions (UDFs). -Built-in functions are commonly used routines that Spark SQL predefines and a complete list of the functions can be found in the [Built-in Functions](api/sql/) API document. UDFs allow users to define their own functions when the system’s built-in functions are not enough to perform the desired task. +Built-in functions are commonly used routines that Spark SQL predefines and a complete list of the functions can be found in the [Built-in Functions](api/sql/index.html) API document. UDFs allow users to define their own functions when the system’s built-in functions are not enough to perform the desired task. ### Built-in Functions diff --git a/pom.xml b/pom.xml index 9703174fc538..51471cd31f62 100644 --- a/pom.xml +++ b/pom.xml @@ -32,7 +32,7 @@ https://spark.apache.org/ - Apache 2.0 License + Apache-2.0 http://www.apache.org/licenses/LICENSE-2.0.html repo diff --git a/python/pyspark/pandas/data_type_ops/base.py b/python/pyspark/pandas/data_type_ops/base.py index 5a4cd7a1eb07..2df40252965b 100644 --- a/python/pyspark/pandas/data_type_ops/base.py +++ b/python/pyspark/pandas/data_type_ops/base.py @@ -150,7 +150,10 @@ def _as_bool_type(index_ops: IndexOpsLike, dtype: Dtype) -> IndexOpsLike: if isinstance(dtype, extension_dtypes): scol = index_ops.spark.column.cast(spark_type) else: - scol = F.when(index_ops.spark.column.isNull(), F.lit(False)).otherwise( + null_value = ( + F.lit(True) if isinstance(index_ops.spark.data_type, DecimalType) else F.lit(False) + ) + scol = F.when(index_ops.spark.column.isNull(), null_value).otherwise( index_ops.spark.column.cast(spark_type) ) return index_ops._with_new_scol( diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_as_type.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_as_type.py index a2a9e28a5ab5..205b937fb51d 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_as_type.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_as_type.py @@ -16,19 +16,19 @@ # import unittest -from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_as_type import AsTypeTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class AsTypeParityTests( - AsTypeTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + AsTypeTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py index c277f5ce0664..1623db58af38 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py @@ -20,7 +20,10 @@ from pyspark.testing.connectutils import ReusedConnectTestCase -class BaseParityTests(BaseTestsMixin, ReusedConnectTestCase): +class BaseParityTests( + BaseTestsMixin, + ReusedConnectTestCase, +): pass diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py index 29b13868e03f..42ca98276658 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py @@ -17,13 +17,16 @@ import unittest from pyspark.pandas.tests.data_type_ops.test_binary_ops import BinaryOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class BinaryOpsParityTests( - BinaryOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + BinaryOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): pass diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py index 9ad2aa0ad17a..e14dcefde851 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py @@ -16,19 +16,19 @@ # import unittest -from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_boolean_ops import BooleanOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class BooleanOpsParityTests( - BooleanOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + BooleanOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py index 1b4dabdb0453..d5c1fdc09a89 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py @@ -16,19 +16,19 @@ # import unittest -from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_categorical_ops import CategoricalOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class CategoricalOpsParityTests( - CategoricalOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + CategoricalOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py index ef587578f4ae..e22bc8988a9b 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py @@ -17,13 +17,16 @@ import unittest from pyspark.pandas.tests.data_type_ops.test_complex_ops import ComplexOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class ComplexOpsParityTests( - ComplexOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + ComplexOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): pass diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py index baa3180baaa7..d8dfd488b325 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py @@ -16,19 +16,19 @@ # import unittest -from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_date_ops import DateOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class DateOpsParityTests( - DateOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + DateOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py index 2641e3a32dcd..d963db367ac2 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py @@ -16,19 +16,19 @@ # import unittest -from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_datetime_ops import DatetimeOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class DatetimeOpsParityTests( - DatetimeOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + DatetimeOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py index 5df4c791c98b..9a2d1ef685ac 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py @@ -17,13 +17,16 @@ import unittest from pyspark.pandas.tests.data_type_ops.test_null_ops import NullOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class NullOpsParityTests( - NullOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + NullOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): pass diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py index 6f5c294e4ad5..1cce33e17076 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py @@ -16,19 +16,19 @@ # import unittest -from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_num_arithmetic import ArithmeticTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class ArithmeticParityTests( - ArithmeticTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + ArithmeticTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py index 56eba708c945..729443e8b9d8 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py @@ -16,19 +16,19 @@ # import unittest -from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_num_ops import NumOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class NumOpsParityTests( - NumOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + NumOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py index 4d322d8b9b06..0e90dd97887d 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py @@ -16,19 +16,19 @@ # import unittest -from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_num_reverse import ReverseTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class ReverseParityTests( - ReverseTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + ReverseTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py index f507756a7a48..bb31ded81102 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py @@ -16,19 +16,19 @@ # import unittest -from pyspark import pandas as ps from pyspark.pandas.tests.data_type_ops.test_string_ops import StringOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class StringOpsParityTests( - StringOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + StringOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py index edd29fa1ed28..819f2e079103 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py @@ -16,19 +16,19 @@ # import unittest -import pyspark.pandas as ps from pyspark.pandas.tests.data_type_ops.test_timedelta_ops import TimedeltaOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class TimedeltaOpsParityTests( - TimedeltaOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + TimedeltaOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): - @property - def psdf(self): - return ps.from_pandas(self.pdf) + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py index 70a79e4cd3f9..d4ce9a4d0499 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py @@ -17,13 +17,16 @@ import unittest from pyspark.pandas.tests.data_type_ops.test_udt_ops import UDTOpsTestsMixin -from pyspark.pandas.tests.connect.data_type_ops.testing_utils import OpsTestBase +from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.testing.pandasutils import PandasOnSparkTestUtils from pyspark.testing.connectutils import ReusedConnectTestCase class UDTOpsParityTests( - UDTOpsTestsMixin, PandasOnSparkTestUtils, OpsTestBase, ReusedConnectTestCase + UDTOpsTestsMixin, + PandasOnSparkTestUtils, + OpsTestBase, + ReusedConnectTestCase, ): pass diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py b/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py deleted file mode 100644 index f1e36aecd194..000000000000 --- a/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +++ /dev/null @@ -1,211 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import datetime -import decimal - -import numpy as np -import pandas as pd - -import pyspark.pandas as ps -from pyspark.pandas.typedef.typehints import ( - extension_dtypes_available, - extension_float_dtypes_available, - extension_object_dtypes_available, -) - -if extension_dtypes_available: - from pandas import Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype - -if extension_float_dtypes_available: - from pandas import Float32Dtype, Float64Dtype - -if extension_object_dtypes_available: - from pandas import BooleanDtype, StringDtype - - -class OpsTestBase: - """The test base for arithmetic operations of different data types.""" - - @property - def numeric_pdf(self): - dtypes = [np.int32, int, np.float32, float] - sers = [pd.Series([1, 2, 3], dtype=dtype) for dtype in dtypes] - sers.append(pd.Series([decimal.Decimal(1), decimal.Decimal(2), decimal.Decimal(3)])) - sers.append(pd.Series([1, 2, np.nan], dtype=float)) - sers.append(pd.Series([decimal.Decimal(1), decimal.Decimal(2), decimal.Decimal(np.nan)])) - pdf = pd.concat(sers, axis=1) - pdf.columns = [dtype.__name__ for dtype in dtypes] + [ - "decimal", - "float_nan", - "decimal_nan", - ] - return pdf - - @property - def numeric_psdf(self): - return ps.from_pandas(self.numeric_pdf) - - @property - def numeric_df_cols(self): - return self.numeric_pdf.columns - - @property - def integral_pdf(self): - return pd.DataFrame({"this": [1, 2, 3], "that": [2, 2, 1]}) - - @property - def integral_psdf(self): - return ps.from_pandas(self.integral_pdf) - - @property - def non_numeric_pdf(self): - psers = { - "string": pd.Series(["x", "y", "z"]), - "bool": pd.Series([True, True, False]), - "date": pd.Series( - [datetime.date(1994, 1, 1), datetime.date(1994, 1, 2), datetime.date(1994, 1, 3)] - ), - "datetime": pd.to_datetime(pd.Series([1, 2, 3])), - "timedelta": pd.Series( - [datetime.timedelta(1), datetime.timedelta(hours=2), datetime.timedelta(weeks=3)] - ), - "categorical": pd.Series(["a", "b", "a"], dtype="category"), - } - return pd.concat(psers, axis=1) - - @property - def non_numeric_psdf(self): - return ps.from_pandas(self.non_numeric_pdf) - - @property - def non_numeric_df_cols(self): - return self.non_numeric_pdf.columns - - @property - def pdf(self): - return pd.concat([self.numeric_pdf, self.non_numeric_pdf], axis=1) - - @property - def df_cols(self): - return self.pdf.columns - - @property - def numeric_psers(self): - dtypes = [np.float32, float, int, np.int32] - sers = [pd.Series([1, 2, 3], dtype=dtype) for dtype in dtypes] - sers.append(pd.Series([decimal.Decimal(1), decimal.Decimal(2), decimal.Decimal(3)])) - return sers - - @property - def numeric_pssers(self): - return [ps.from_pandas(pser) for pser in self.numeric_psers] - - @property - def numeric_pser_psser_pairs(self): - return zip(self.numeric_psers, self.numeric_pssers) - - @property - def non_numeric_psers(self): - psers = { - "string": pd.Series(["x", "y", "z"]), - "datetime": pd.to_datetime(pd.Series([1, 2, 3])), - "bool": pd.Series([True, True, False]), - "date": pd.Series( - [datetime.date(1994, 1, 1), datetime.date(1994, 1, 2), datetime.date(1994, 1, 3)] - ), - "categorical": pd.Series(["a", "b", "a"], dtype="category"), - } - return psers - - @property - def non_numeric_pssers(self): - pssers = {} - - for k, v in self.non_numeric_psers.items(): - pssers[k] = ps.from_pandas(v) - return pssers - - @property - def non_numeric_pser_psser_pairs(self): - return zip(self.non_numeric_psers.values(), self.non_numeric_pssers.values()) - - @property - def pssers(self): - return self.numeric_pssers + list(self.non_numeric_pssers.values()) - - @property - def psers(self): - return self.numeric_psers + list(self.non_numeric_psers.values()) - - @property - def pser_psser_pairs(self): - return zip(self.psers, self.pssers) - - @property - def string_extension_dtype(self): - return ["string", StringDtype()] if extension_object_dtypes_available else [] - - @property - def object_extension_dtypes(self): - return ( - ["boolean", "string", BooleanDtype(), StringDtype()] - if extension_object_dtypes_available - else [] - ) - - @property - def fractional_extension_dtypes(self): - return ( - ["Float32", "Float64", Float32Dtype(), Float64Dtype()] - if extension_float_dtypes_available - else [] - ) - - @property - def integral_extension_dtypes(self): - return ( - [ - "Int8", - "Int16", - "Int32", - "Int64", - Int8Dtype(), - Int16Dtype(), - Int32Dtype(), - Int64Dtype(), - ] - if extension_dtypes_available - else [] - ) - - @property - def extension_dtypes(self): - return ( - self.object_extension_dtypes - + self.fractional_extension_dtypes - + self.integral_extension_dtypes - ) - - def check_extension(self, left, right): - """ - Compare `psser` and `pser` of numeric ExtensionDtypes. - - This utility is to adjust an issue for comparing numeric ExtensionDtypes in specific - pandas versions. Please refer to https://github.com/pandas-dev/pandas/issues/39410. - """ - self.assert_eq(left, right) diff --git a/python/pyspark/pandas/tests/connect/frame/test_parity_take.py b/python/pyspark/pandas/tests/connect/frame/test_parity_take.py index 4fc8b2452a76..fe15f37ddd2d 100644 --- a/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +++ b/python/pyspark/pandas/tests/connect/frame/test_parity_take.py @@ -16,16 +16,17 @@ # import unittest -from pyspark import pandas as ps from pyspark.pandas.tests.frame.test_take import FrameTakeMixin from pyspark.testing.connectutils import ReusedConnectTestCase from pyspark.testing.pandasutils import PandasOnSparkTestUtils -class FrameParityTakeTests(FrameTakeMixin, PandasOnSparkTestUtils, ReusedConnectTestCase): - @property - def psdf(self): - return ps.from_pandas(self.pdf) +class FrameTakeParityTests( + FrameTakeMixin, + PandasOnSparkTestUtils, + ReusedConnectTestCase, +): + pass if __name__ == "__main__": diff --git a/python/pyspark/pandas/tests/connect/frame/test_parity_take_adv.py b/python/pyspark/pandas/tests/connect/frame/test_parity_take_adv.py new file mode 100644 index 000000000000..22b4aea4fd39 --- /dev/null +++ b/python/pyspark/pandas/tests/connect/frame/test_parity_take_adv.py @@ -0,0 +1,41 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest + +from pyspark.pandas.tests.frame.test_take_adv import FrameTakeAdvMixin +from pyspark.testing.connectutils import ReusedConnectTestCase +from pyspark.testing.pandasutils import PandasOnSparkTestUtils + + +class FrameTakeAdvParityTests( + FrameTakeAdvMixin, + PandasOnSparkTestUtils, + ReusedConnectTestCase, +): + pass + + +if __name__ == "__main__": + from pyspark.pandas.tests.connect.frame.test_parity_take_adv import * # noqa: F401 + + try: + import xmlrunner # type: ignore[import] + + testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2) + except ImportError: + testRunner = None + unittest.main(testRunner=testRunner, verbosity=2) diff --git a/python/pyspark/pandas/tests/data_type_ops/test_as_type.py b/python/pyspark/pandas/tests/data_type_ops/test_as_type.py index 9d5c0d03d548..379d055d585d 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_as_type.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_as_type.py @@ -22,6 +22,7 @@ from pandas.api.types import CategoricalDtype from pyspark import pandas as ps +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.pandas.typedef.typehints import ( extension_dtypes_available, @@ -54,10 +55,7 @@ def test_astype(self): lambda: psser.astype(int_type), ) - # TODO(SPARK-37039): the np.nan series.astype(bool) should be True - if not pser.hasnans: - self.assert_eq(pser.astype(bool), psser.astype(bool)) - + self.assert_eq(pser.astype(bool), psser.astype(bool)) self.assert_eq(pser.astype(float), psser.astype(float)) self.assert_eq(pser.astype(np.float32), psser.astype(np.float32)) self.assert_eq(pser.astype(str), psser.astype(str)) @@ -87,7 +85,11 @@ def test_astype_eager_check(self): psser.astype(int) -class AsTypeTests(AsTypeTestsMixin, OpsTestBase): +class AsTypeTests( + AsTypeTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_base.py b/python/pyspark/pandas/tests/data_type_ops/test_base.py index 551bbbadfb86..8114b60af935 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_base.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_base.py @@ -91,7 +91,10 @@ def test_bool_ext_ops(self): self.assertIsInstance(DataTypeOps(ExtensionDtype(), BooleanType()), BooleanOps) -class BaseTests(BaseTestsMixin, unittest.TestCase): +class BaseTests( + BaseTestsMixin, + unittest.TestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py index 732cc295bfb0..211d7a094f83 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py @@ -19,6 +19,7 @@ from pandas.api.types import CategoricalDtype from pyspark import pandas as ps +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase @@ -207,7 +208,11 @@ def test_ge(self): self.assert_eq(byte_pdf["this"] >= byte_pdf["this"], byte_psdf["this"] >= byte_psdf["this"]) -class BinaryOpsTests(BinaryOpsTestsMixin, OpsTestBase): +class BinaryOpsTests( + BinaryOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py index 6887d535cffe..bb8067530d64 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py @@ -24,6 +24,7 @@ from pyspark import pandas as ps from pyspark.pandas import option_context +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.pandas.typedef.typehints import ( extension_float_dtypes_available, @@ -806,7 +807,11 @@ def test_ge(self): self.check_extension(pser >= pser, psser >= psser) -class BooleanOpsTests(BooleanOpsTestsMixin, OpsTestBase): +class BooleanOpsTests( + BooleanOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py index d9a4ee1fcd8a..cf6a986b7b65 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py @@ -21,6 +21,7 @@ from pyspark import pandas as ps from pyspark.pandas.config import option_context +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase @@ -539,7 +540,11 @@ def test_ge(self): ) -class CategoricalOpsTests(CategoricalOpsTestsMixin, OpsTestBase): +class CategoricalOpsTests( + CategoricalOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py index f7c66425a902..535fda1359b8 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py @@ -21,6 +21,7 @@ import pandas as pd from pyspark import pandas as ps +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase @@ -351,7 +352,11 @@ def test_ge(self): ) -class ComplexOpsTests(ComplexOpsTestsMixin, OpsTestBase): +class ComplexOpsTests( + ComplexOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py index 33332503943d..c2b29ee8a1d3 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py @@ -16,12 +16,12 @@ # import datetime -import unittest import pandas as pd from pandas.api.types import CategoricalDtype from pyspark import pandas as ps +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase @@ -235,7 +235,11 @@ def test_ge(self): self.assert_eq(pdf["this"] >= pdf["this"], psdf["this"] >= psdf["this"]) -class DateOpsTests(DateOpsTestsMixin, OpsTestBase): +class DateOpsTests( + DateOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py index c7bda900b7d5..f98f2011dde0 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py @@ -21,6 +21,7 @@ from pandas.api.types import CategoricalDtype from pyspark import pandas as ps +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase @@ -236,7 +237,11 @@ def test_ge(self): self.assert_eq(pdf["this"] >= pdf["this"], psdf["this"] >= psdf["this"]) -class DatetimeOpsTests(DatetimeOpsTestsMixin, OpsTestBase): +class DatetimeOpsTests( + DatetimeOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py index 19a3e7c07359..439557c460dd 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py @@ -19,6 +19,7 @@ from pandas.api.types import CategoricalDtype import pyspark.pandas as ps +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase @@ -161,7 +162,11 @@ def test_ge(self): self.assert_eq(pser >= pser, psser >= psser) -class NullOpsTests(NullOpsTestsMixin, OpsTestBase): +class NullOpsTests( + NullOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py b/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py index 3aedd93622e8..f27211f53917 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py @@ -21,6 +21,7 @@ import numpy as np from pyspark import pandas as ps +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase @@ -167,7 +168,11 @@ def test_pow(self): self.assertRaises(TypeError, lambda: psser ** psdf[n_col]) -class ArithmeticTests(ArithmeticTestsMixin, OpsTestBase): +class ArithmeticTests( + ArithmeticTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py index b1c80b31651b..e7b157cabb2f 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py @@ -22,6 +22,7 @@ from pyspark import pandas as ps from pyspark.pandas.config import option_context +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.pandas.typedef.typehints import ( extension_dtypes_available, @@ -410,7 +411,11 @@ def test_ge(self): self.check_extension(pser >= pser, (psser >= psser).sort_index()) -class NumOpsTests(NumOpsTestsMixin, OpsTestBase): +class NumOpsTests( + NumOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py b/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py index 4e7ee17aec6f..e60fa1e781f0 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py @@ -21,6 +21,7 @@ import pandas as pd from pyspark import pandas as ps +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase @@ -124,7 +125,11 @@ def test_rmod(self): self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) % psser) -class ReverseTests(ReverseTestsMixin, OpsTestBase): +class ReverseTests( + ReverseTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py index 340153b06335..9648ad9ab2c0 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py @@ -23,6 +23,7 @@ from pyspark import pandas as ps from pyspark.pandas.config import option_context +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase from pyspark.pandas.typedef.typehints import extension_object_dtypes_available @@ -229,7 +230,11 @@ def test_ge(self): self.assert_eq(pser >= pser, psser >= psser) -class StringOpsTests(StringOpsTestsMixin, OpsTestBase): +class StringOpsTests( + StringOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py index f89ec17ec12b..5ea60742d9e7 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py @@ -21,6 +21,7 @@ from pandas.api.types import CategoricalDtype import pyspark.pandas as ps +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase @@ -202,7 +203,11 @@ def test_ge(self): self.assert_eq(pdf["this"] >= pdf["this"], psdf["this"] >= psdf["this"]) -class TimedeltaOpsTests(TimedeltaOpsTestsMixin, OpsTestBase): +class TimedeltaOpsTests( + TimedeltaOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py index 45f8cca56ee9..60b4153198a3 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py @@ -19,6 +19,7 @@ import pyspark.pandas as ps from pyspark.ml.linalg import SparseVector +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase @@ -175,7 +176,11 @@ def test_ge(self): ) -class UDTOpsTests(UDTOpsTestsMixin, OpsTestBase): +class UDTOpsTests( + UDTOpsTestsMixin, + OpsTestBase, + PandasOnSparkTestCase, +): pass diff --git a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py index 37a708948a80..089b929db1b3 100644 --- a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +++ b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py @@ -27,7 +27,6 @@ extension_float_dtypes_available, extension_object_dtypes_available, ) -from pyspark.testing.pandasutils import ComparisonTestBase if extension_dtypes_available: from pandas import Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype @@ -39,7 +38,7 @@ from pandas import BooleanDtype, StringDtype -class OpsTestBase(ComparisonTestBase): +class OpsTestBase: """The test base for arithmetic operations of different data types.""" @property @@ -101,6 +100,10 @@ def non_numeric_df_cols(self): def pdf(self): return pd.concat([self.numeric_pdf, self.non_numeric_pdf], axis=1) + @property + def psdf(self): + return ps.from_pandas(self.pdf) + @property def df_cols(self): return self.pdf.columns diff --git a/python/pyspark/pandas/tests/frame/test_take.py b/python/pyspark/pandas/tests/frame/test_take.py index 3654436848b6..c3e9440d1dc1 100644 --- a/python/pyspark/pandas/tests/frame/test_take.py +++ b/python/pyspark/pandas/tests/frame/test_take.py @@ -19,7 +19,7 @@ import pandas as pd from pyspark import pandas as ps -from pyspark.testing.pandasutils import ComparisonTestBase +from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.testing.sqlutils import SQLTestUtils @@ -72,61 +72,12 @@ def test_take(self): pdf.take([-1, -2], axis=1).sort_index(), ) - # MultiIndex columns - columns = pd.MultiIndex.from_tuples([("A", "Z"), ("B", "X"), ("C", "C")]) - psdf.columns = columns - pdf.columns = columns - # MultiIndex columns with axis=0 (default) - self.assert_eq(psdf.take([1, 2]).sort_index(), pdf.take([1, 2]).sort_index()) - self.assert_eq(psdf.take([-1, -2]).sort_index(), pdf.take([-1, -2]).sort_index()) - self.assert_eq( - psdf.take(range(100, 110)).sort_index(), pdf.take(range(100, 110)).sort_index() - ) - self.assert_eq( - psdf.take(range(-110, -100)).sort_index(), pdf.take(range(-110, -100)).sort_index() - ) - self.assert_eq( - psdf.take([10, 100, 1000, 10000]).sort_index(), - pdf.take([10, 100, 1000, 10000]).sort_index(), - ) - self.assert_eq( - psdf.take([-10, -100, -1000, -10000]).sort_index(), - pdf.take([-10, -100, -1000, -10000]).sort_index(), - ) - - # axis=1 - self.assert_eq( - psdf.take([1, 2], axis=1).sort_index(), pdf.take([1, 2], axis=1).sort_index() - ) - self.assert_eq( - psdf.take([-1, -2], axis=1).sort_index(), pdf.take([-1, -2], axis=1).sort_index() - ) - self.assert_eq( - psdf.take(range(1, 3), axis=1).sort_index(), - pdf.take(range(1, 3), axis=1).sort_index(), - ) - self.assert_eq( - psdf.take(range(-1, -3), axis=1).sort_index(), - pdf.take(range(-1, -3), axis=1).sort_index(), - ) - self.assert_eq( - psdf.take([2, 1], axis=1).sort_index(), - pdf.take([2, 1], axis=1).sort_index(), - ) - self.assert_eq( - psdf.take([-1, -2], axis=1).sort_index(), - pdf.take([-1, -2], axis=1).sort_index(), - ) - - # Checking the type of indices. - self.assertRaises(TypeError, lambda: psdf.take(1)) - self.assertRaises(TypeError, lambda: psdf.take("1")) - self.assertRaises(TypeError, lambda: psdf.take({1, 2})) - self.assertRaises(TypeError, lambda: psdf.take({1: None, 2: None})) - - -class FrameTakeTests(FrameTakeMixin, ComparisonTestBase, SQLTestUtils): +class FrameTakeTests( + FrameTakeMixin, + PandasOnSparkTestCase, + SQLTestUtils, +): pass diff --git a/python/pyspark/pandas/tests/frame/test_take_adv.py b/python/pyspark/pandas/tests/frame/test_take_adv.py new file mode 100644 index 000000000000..71899307683d --- /dev/null +++ b/python/pyspark/pandas/tests/frame/test_take_adv.py @@ -0,0 +1,104 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest + +import pandas as pd + +from pyspark import pandas as ps +from pyspark.testing.pandasutils import PandasOnSparkTestCase +from pyspark.testing.sqlutils import SQLTestUtils + + +class FrameTakeAdvMixin: + def test_take_adv(self): + pdf = pd.DataFrame( + {"A": range(0, 50000), "B": range(100000, 0, -2), "C": range(100000, 50000, -1)} + ) + psdf = ps.from_pandas(pdf) + + # MultiIndex columns + columns = pd.MultiIndex.from_tuples([("A", "Z"), ("B", "X"), ("C", "C")]) + psdf.columns = columns + pdf.columns = columns + + # MultiIndex columns with axis=0 (default) + self.assert_eq(psdf.take([1, 2]).sort_index(), pdf.take([1, 2]).sort_index()) + self.assert_eq(psdf.take([-1, -2]).sort_index(), pdf.take([-1, -2]).sort_index()) + self.assert_eq( + psdf.take(range(100, 110)).sort_index(), pdf.take(range(100, 110)).sort_index() + ) + self.assert_eq( + psdf.take(range(-110, -100)).sort_index(), pdf.take(range(-110, -100)).sort_index() + ) + self.assert_eq( + psdf.take([10, 100, 1000, 10000]).sort_index(), + pdf.take([10, 100, 1000, 10000]).sort_index(), + ) + self.assert_eq( + psdf.take([-10, -100, -1000, -10000]).sort_index(), + pdf.take([-10, -100, -1000, -10000]).sort_index(), + ) + + # axis=1 + self.assert_eq( + psdf.take([1, 2], axis=1).sort_index(), pdf.take([1, 2], axis=1).sort_index() + ) + self.assert_eq( + psdf.take([-1, -2], axis=1).sort_index(), pdf.take([-1, -2], axis=1).sort_index() + ) + self.assert_eq( + psdf.take(range(1, 3), axis=1).sort_index(), + pdf.take(range(1, 3), axis=1).sort_index(), + ) + self.assert_eq( + psdf.take(range(-1, -3), axis=1).sort_index(), + pdf.take(range(-1, -3), axis=1).sort_index(), + ) + self.assert_eq( + psdf.take([2, 1], axis=1).sort_index(), + pdf.take([2, 1], axis=1).sort_index(), + ) + self.assert_eq( + psdf.take([-1, -2], axis=1).sort_index(), + pdf.take([-1, -2], axis=1).sort_index(), + ) + + # Checking the type of indices. + self.assertRaises(TypeError, lambda: psdf.take(1)) + self.assertRaises(TypeError, lambda: psdf.take("1")) + self.assertRaises(TypeError, lambda: psdf.take({1, 2})) + self.assertRaises(TypeError, lambda: psdf.take({1: None, 2: None})) + + +class FrameTakeAdvTests( + FrameTakeAdvMixin, + PandasOnSparkTestCase, + SQLTestUtils, +): + pass + + +if __name__ == "__main__": + from pyspark.pandas.tests.frame.test_take_adv import * # noqa: F401 + + try: + import xmlrunner + + testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2) + except ImportError: + testRunner = None + unittest.main(testRunner=testRunner, verbosity=2) diff --git a/python/pyspark/sql/connect/readwriter.py b/python/pyspark/sql/connect/readwriter.py index 52975917ea02..51698f262fc5 100644 --- a/python/pyspark/sql/connect/readwriter.py +++ b/python/pyspark/sql/connect/readwriter.py @@ -792,6 +792,7 @@ def xml( timestampFormat: Optional[str] = None, compression: Optional[str] = None, encoding: Optional[str] = None, + validateName: Optional[bool] = None, ) -> None: self.mode(mode) self._set_opts( @@ -806,6 +807,7 @@ def xml( timestampFormat=timestampFormat, compression=compression, encoding=encoding, + validateName=validateName, ) self.format("xml").save(path) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index 711d9b447a60..5b3c1d83d5ac 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -14925,7 +14925,7 @@ def to_xml(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Col @_try_remote_functions def schema_of_csv(csv: Union[Column, str], options: Optional[Dict[str, str]] = None) -> Column: """ - Parses a CSV string and infers its schema in DDL format. + CSV Function: Parses a CSV string and infers its schema in DDL format. .. versionadded:: 3.0.0 @@ -14935,9 +14935,9 @@ def schema_of_csv(csv: Union[Column, str], options: Optional[Dict[str, str]] = N Parameters ---------- csv : :class:`~pyspark.sql.Column` or str - a CSV string or a foldable string column containing a CSV string. + A CSV string or a foldable string column containing a CSV string. options : dict, optional - options to control parsing. accepts the same options as the CSV datasource. + Options to control parsing. Accepts the same options as the CSV datasource. See `Data Source Option `_ for the version you use. @@ -14946,15 +14946,53 @@ def schema_of_csv(csv: Union[Column, str], options: Optional[Dict[str, str]] = N Returns ------- :class:`~pyspark.sql.Column` - a string representation of a :class:`StructType` parsed from given CSV. + A string representation of a :class:`StructType` parsed from the given CSV. Examples -------- + Example 1: Inferring the schema of a CSV string with different data types + + >>> from pyspark.sql import functions as sf + >>> df = spark.range(1) + >>> df.select(sf.schema_of_csv(sf.lit('1|a|true'), {'sep':'|'})).show(truncate=False) + +-------------------------------------------+ + |schema_of_csv(1|a|true) | + +-------------------------------------------+ + |STRUCT<_c0: INT, _c1: STRING, _c2: BOOLEAN>| + +-------------------------------------------+ + + Example 2: Inferring the schema of a CSV string with missing values + + >>> from pyspark.sql import functions as sf + >>> df = spark.range(1) + >>> df.select(sf.schema_of_csv(sf.lit('1||true'), {'sep':'|'})).show(truncate=False) + +-------------------------------------------+ + |schema_of_csv(1||true) | + +-------------------------------------------+ + |STRUCT<_c0: INT, _c1: STRING, _c2: BOOLEAN>| + +-------------------------------------------+ + + Example 3: Inferring the schema of a CSV string with a different delimiter + + >>> from pyspark.sql import functions as sf >>> df = spark.range(1) - >>> df.select(schema_of_csv(lit('1|a'), {'sep':'|'}).alias("csv")).collect() - [Row(csv='STRUCT<_c0: INT, _c1: STRING>')] - >>> df.select(schema_of_csv('1|a', {'sep':'|'}).alias("csv")).collect() - [Row(csv='STRUCT<_c0: INT, _c1: STRING>')] + >>> df.select(sf.schema_of_csv(sf.lit('1;a;true'), {'sep':';'})).show(truncate=False) + +-------------------------------------------+ + |schema_of_csv(1;a;true) | + +-------------------------------------------+ + |STRUCT<_c0: INT, _c1: STRING, _c2: BOOLEAN>| + +-------------------------------------------+ + + Example 4: Inferring the schema of a CSV string with quoted fields + + >>> from pyspark.sql import functions as sf + >>> df = spark.range(1) + >>> df.select(sf.schema_of_csv(sf.lit('"1","a","true"'), {'sep':','})).show(truncate=False) + +-------------------------------------------+ + |schema_of_csv("1","a","true") | + +-------------------------------------------+ + |STRUCT<_c0: INT, _c1: STRING, _c2: BOOLEAN>| + +-------------------------------------------+ """ if isinstance(csv, str): col = _create_column_from_literal(csv) @@ -14969,10 +15007,12 @@ def schema_of_csv(csv: Union[Column, str], options: Optional[Dict[str, str]] = N return _invoke_function("schema_of_csv", col, _options_to_str(options)) +# TODO(SPARK-46654) Re-enable the `Example 2` test after fixing the display +# difference between Regular Spark and Spark Connect on `df.show`. @_try_remote_functions def to_csv(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column: """ - Converts a column containing a :class:`StructType` into a CSV string. + CSV Function: Converts a column containing a :class:`StructType` into a CSV string. Throws an exception, in the case of an unsupported type. .. versionadded:: 3.0.0 @@ -14983,9 +15023,9 @@ def to_csv(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Col Parameters ---------- col : :class:`~pyspark.sql.Column` or str - name of column containing a struct. + Name of column containing a struct. options: dict, optional - options to control converting. accepts the same options as the CSV datasource. + Options to control converting. Accepts the same options as the CSV datasource. See `Data Source Option `_ for the version you use. @@ -14994,15 +15034,65 @@ def to_csv(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Col Returns ------- :class:`~pyspark.sql.Column` - a CSV string converted from given :class:`StructType`. + A CSV string converted from the given :class:`StructType`. Examples -------- - >>> from pyspark.sql import Row + Example 1: Converting a simple StructType to a CSV string + + >>> from pyspark.sql import Row, functions as sf >>> data = [(1, Row(age=2, name='Alice'))] >>> df = spark.createDataFrame(data, ("key", "value")) - >>> df.select(to_csv(df.value).alias("csv")).collect() - [Row(csv='2,Alice')] + >>> df.select(sf.to_csv(df.value)).show() + +-------------+ + |to_csv(value)| + +-------------+ + | 2,Alice| + +-------------+ + + Example 2: Converting a complex StructType to a CSV string + + >>> from pyspark.sql import Row, functions as sf + >>> data = [(1, Row(age=2, name='Alice', scores=[100, 200, 300]))] + >>> df = spark.createDataFrame(data, ("key", "value")) + >>> df.select(sf.to_csv(df.value)).show(truncate=False) # doctest: +SKIP + +-----------------------+ + |to_csv(value) | + +-----------------------+ + |2,Alice,"[100,200,300]"| + +-----------------------+ + + Example 3: Converting a StructType with null values to a CSV string + + >>> from pyspark.sql import Row, functions as sf + >>> from pyspark.sql.types import StructType, StructField, IntegerType, StringType + >>> data = [(1, Row(age=None, name='Alice'))] + >>> schema = StructType([ + ... StructField("key", IntegerType(), True), + ... StructField("value", StructType([ + ... StructField("age", IntegerType(), True), + ... StructField("name", StringType(), True) + ... ]), True) + ... ]) + >>> df = spark.createDataFrame(data, schema) + >>> df.select(sf.to_csv(df.value)).show() + +-------------+ + |to_csv(value)| + +-------------+ + | ,Alice| + +-------------+ + + Example 4: Converting a StructType with different data types to a CSV string + + >>> from pyspark.sql import Row, functions as sf + >>> data = [(1, Row(age=2, name='Alice', isStudent=True))] + >>> df = spark.createDataFrame(data, ("key", "value")) + >>> df.select(sf.to_csv(df.value)).show() + +-------------+ + |to_csv(value)| + +-------------+ + | 2,Alice,true| + +-------------+ """ return _invoke_function("to_csv", _to_java_column(col), _options_to_str(options)) @@ -16228,8 +16318,8 @@ def from_csv( options: Optional[Dict[str, str]] = None, ) -> Column: """ - Parses a column containing a CSV string to a row with the specified schema. - Returns `null`, in the case of an unparseable string. + CSV Function: Parses a column containing a CSV string into a row with the specified schema. + Returns `null` if the string cannot be parsed. .. versionadded:: 3.0.0 @@ -16239,11 +16329,11 @@ def from_csv( Parameters ---------- col : :class:`~pyspark.sql.Column` or str - a column or column name in CSV format - schema :class:`~pyspark.sql.Column` or str - a column, or Python string literal with schema in DDL format, to use when parsing the CSV column. + A column or column name in CSV format. + schema : :class:`~pyspark.sql.Column` or str + A column, or Python string literal with schema in DDL format, to use when parsing the CSV column. options : dict, optional - options to control parsing. accepts the same options as the CSV datasource. + Options to control parsing. Accepts the same options as the CSV datasource. See `Data Source Option `_ for the version you use. @@ -16252,22 +16342,71 @@ def from_csv( Returns ------- :class:`~pyspark.sql.Column` - a column of parsed CSV values + A column of parsed CSV values. Examples -------- + Example 1: Parsing a simple CSV string + + >>> from pyspark.sql import functions as sf >>> data = [("1,2,3",)] >>> df = spark.createDataFrame(data, ("value",)) - >>> df.select(from_csv(df.value, "a INT, b INT, c INT").alias("csv")).collect() - [Row(csv=Row(a=1, b=2, c=3))] + >>> df.select(sf.from_csv(df.value, "a INT, b INT, c INT")).show() + +---------------+ + |from_csv(value)| + +---------------+ + | {1, 2, 3}| + +---------------+ + + Example 2: Using schema_of_csv to infer the schema + + >>> from pyspark.sql import functions as sf + >>> data = [("1,2,3",)] >>> value = data[0][0] - >>> df.select(from_csv(df.value, schema_of_csv(value)).alias("csv")).collect() - [Row(csv=Row(_c0=1, _c1=2, _c2=3))] + >>> df.select(sf.from_csv(df.value, sf.schema_of_csv(value))).show() + +---------------+ + |from_csv(value)| + +---------------+ + | {1, 2, 3}| + +---------------+ + + Example 3: Ignoring leading white space in the CSV string + + >>> from pyspark.sql import functions as sf >>> data = [(" abc",)] >>> df = spark.createDataFrame(data, ("value",)) >>> options = {'ignoreLeadingWhiteSpace': True} - >>> df.select(from_csv(df.value, "s string", options).alias("csv")).collect() - [Row(csv=Row(s='abc'))] + >>> df.select(sf.from_csv(df.value, "s string", options)).show() + +---------------+ + |from_csv(value)| + +---------------+ + | {abc}| + +---------------+ + + Example 4: Parsing a CSV string with a missing value + + >>> from pyspark.sql import functions as sf + >>> data = [("1,2,",)] + >>> df = spark.createDataFrame(data, ("value",)) + >>> df.select(sf.from_csv(df.value, "a INT, b INT, c INT")).show() + +---------------+ + |from_csv(value)| + +---------------+ + | {1, 2, NULL}| + +---------------+ + + Example 5: Parsing a CSV string with a different delimiter + + >>> from pyspark.sql import functions as sf + >>> data = [("1;2;3",)] + >>> df = spark.createDataFrame(data, ("value",)) + >>> options = {'delimiter': ';'} + >>> df.select(sf.from_csv(df.value, "a INT, b INT, c INT", options)).show() + +---------------+ + |from_csv(value)| + +---------------+ + | {1, 2, 3}| + +---------------+ """ _get_active_spark_context() diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index b61284247b0e..db9220fc48bb 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -2096,6 +2096,7 @@ def xml( timestampFormat: Optional[str] = None, compression: Optional[str] = None, encoding: Optional[str] = None, + validateName: Optional[bool] = None, ) -> None: r"""Saves the content of the :class:`DataFrame` in XML format at the specified path. @@ -2155,6 +2156,7 @@ def xml( timestampFormat=timestampFormat, compression=compression, encoding=encoding, + validateName=validateName, ) self._jwrite.xml(path) diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py index 045ba8f0060d..a1cd00e79e1a 100755 --- a/python/pyspark/sql/tests/connect/test_connect_basic.py +++ b/python/pyspark/sql/tests/connect/test_connect_basic.py @@ -543,10 +543,21 @@ def test_invalid_column(self): with self.assertRaises(AnalysisException): cdf2.withColumn("x", cdf1.a + 1).schema - with self.assertRaisesRegex(AnalysisException, "attribute.*missing"): + # Can find the target plan node, but fail to resolve with it + with self.assertRaisesRegex( + AnalysisException, + "UNRESOLVED_COLUMN.WITH_SUGGESTION", + ): cdf3 = cdf1.select(cdf1.a) cdf3.select(cdf1.b).schema + # Can not find the target plan node by plan id + with self.assertRaisesRegex( + AnalysisException, + "CANNOT_RESOLVE_DATAFRAME_COLUMN", + ): + cdf1.select(cdf2.a).schema + def test_collect(self): cdf = self.connect.read.table(self.tbl_name) sdf = self.spark.read.table(self.tbl_name) diff --git a/python/pyspark/sql/tests/pandas/test_pandas_map.py b/python/pyspark/sql/tests/pandas/test_pandas_map.py index ec9f208d08f9..8f7229e1d74f 100644 --- a/python/pyspark/sql/tests/pandas/test_pandas_map.py +++ b/python/pyspark/sql/tests/pandas/test_pandas_map.py @@ -31,7 +31,7 @@ pandas_requirement_message, pyarrow_requirement_message, ) -from pyspark.testing.utils import QuietTest +from pyspark.testing.utils import QuietTest, eventually if have_pandas: import pandas as pd @@ -381,6 +381,7 @@ def test_self_join(self): self.assertEqual(sorted(actual), sorted(expected)) # SPARK-33277 + @eventually(timeout=180, catch_assertions=True) def test_map_in_pandas_with_column_vector(self): path = tempfile.mkdtemp() shutil.rmtree(path) diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py index f1d690751ead..c77e7fd89d01 100644 --- a/python/pyspark/sql/tests/test_dataframe.py +++ b/python/pyspark/sql/tests/test_dataframe.py @@ -26,7 +26,6 @@ import io from contextlib import redirect_stdout -from pyspark import StorageLevel from pyspark.sql import SparkSession, Row, functions from pyspark.sql.functions import col, lit, count, sum, mean, struct from pyspark.sql.types import ( @@ -70,6 +69,14 @@ def test_range(self): self.assertEqual(self.spark.range(-2).count(), 0) self.assertEqual(self.spark.range(3).count(), 3) + def test_self_join(self): + df1 = self.spark.range(10).withColumn("a", lit(0)) + df2 = df1.withColumnRenamed("a", "b") + df = df1.join(df2, df1["a"] == df2["b"]) + self.assertTrue(df.count() == 100) + df = df2.join(df1, df2["b"] == df1["a"]) + self.assertTrue(df.count() == 100) + def test_duplicated_column_names(self): df = self.spark.createDataFrame([(1, 2)], ["c", "c"]) row = df.select("*").first() diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala index d746e9037ec4..5ec72b83837e 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.internal import java.util.TimeZone -import java.util.concurrent.atomic.AtomicReference import scala.util.Try @@ -48,25 +47,14 @@ private[sql] trait SqlApiConf { private[sql] object SqlApiConf { // Shared keys. - val ANSI_ENABLED_KEY: String = "spark.sql.ansi.enabled" - val LEGACY_TIME_PARSER_POLICY_KEY: String = "spark.sql.legacy.timeParserPolicy" - val CASE_SENSITIVE_KEY: String = "spark.sql.caseSensitive" - val SESSION_LOCAL_TIMEZONE_KEY: String = "spark.sql.session.timeZone" - val LOCAL_RELATION_CACHE_THRESHOLD_KEY: String = "spark.sql.session.localRelationCacheThreshold" + val ANSI_ENABLED_KEY: String = SqlApiConfHelper.ANSI_ENABLED_KEY + val LEGACY_TIME_PARSER_POLICY_KEY: String = SqlApiConfHelper.LEGACY_TIME_PARSER_POLICY_KEY + val CASE_SENSITIVE_KEY: String = SqlApiConfHelper.CASE_SENSITIVE_KEY + val SESSION_LOCAL_TIMEZONE_KEY: String = SqlApiConfHelper.SESSION_LOCAL_TIMEZONE_KEY + val LOCAL_RELATION_CACHE_THRESHOLD_KEY: String = + SqlApiConfHelper.LOCAL_RELATION_CACHE_THRESHOLD_KEY - /** - * Defines a getter that returns the [[SqlApiConf]] within scope. - */ - private val confGetter = new AtomicReference[() => SqlApiConf](() => DefaultSqlApiConf) - - /** - * Sets the active config getter. - */ - private[sql] def setConfGetter(getter: () => SqlApiConf): Unit = { - confGetter.set(getter) - } - - def get: SqlApiConf = confGetter.get()() + def get: SqlApiConf = SqlApiConfHelper.getConfGetter.get()() // Force load SQLConf. This will trigger the installation of a confGetter that points to SQLConf. Try(SparkClassUtils.classForName("org.apache.spark.sql.internal.SQLConf$")) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala new file mode 100644 index 000000000000..79b6cb9231c5 --- /dev/null +++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.internal + +import java.util.concurrent.atomic.AtomicReference + +/** + * SqlApiConfHelper is created to avoid a deadlock during a concurrent access to SQLConf and + * SqlApiConf, which is because SQLConf and SqlApiConf tries to load each other upon + * initializations. SqlApiConfHelper is private to sql package and is not supposed to be + * accessed by end users. Variables and methods within SqlApiConfHelper are defined to + * be used by SQLConf and SqlApiConf only. + */ +private[sql] object SqlApiConfHelper { + // Shared keys. + val ANSI_ENABLED_KEY: String = "spark.sql.ansi.enabled" + val LEGACY_TIME_PARSER_POLICY_KEY: String = "spark.sql.legacy.timeParserPolicy" + val CASE_SENSITIVE_KEY: String = "spark.sql.caseSensitive" + val SESSION_LOCAL_TIMEZONE_KEY: String = "spark.sql.session.timeZone" + val LOCAL_RELATION_CACHE_THRESHOLD_KEY: String = "spark.sql.session.localRelationCacheThreshold" + + val confGetter: AtomicReference[() => SqlApiConf] = { + new AtomicReference[() => SqlApiConf](() => DefaultSqlApiConf) + } + + def getConfGetter: AtomicReference[() => SqlApiConf] = confGetter + + /** + * Sets the active config getter. + */ + def setConfGetter(getter: () => SqlApiConf): Unit = { + confGetter.set(getter) + } +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java index b4fb9eae48da..8fe59cb7fae5 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java @@ -17,8 +17,10 @@ package org.apache.spark.sql.catalyst.expressions; +import org.apache.spark.SparkBuildInfo; import org.apache.spark.sql.errors.QueryExecutionErrors; import org.apache.spark.unsafe.types.UTF8String; +import org.apache.spark.util.VersionUtils; import javax.crypto.Cipher; import javax.crypto.spec.GCMParameterSpec; @@ -143,6 +145,17 @@ public static byte[] aesDecrypt(byte[] input, ); } + /** + * Function to return the Spark version. + * @return + * Space separated version and revision. + */ + public static UTF8String getSparkVersion() { + String shortVersion = VersionUtils.shortVersion(SparkBuildInfo.spark_version()); + String revision = SparkBuildInfo.spark_revision(); + return UTF8String.fromString(shortVersion + " " + revision); + } + private static SecretKeySpec getSecretKeySpec(byte[] key) { return switch (key.length) { case 16, 24, 32 -> new SecretKeySpec(key, 0, key.length, "AES"); diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/GeneralAggregateFunc.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/GeneralAggregateFunc.java index 4d787eaf9644..d287288ba33f 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/GeneralAggregateFunc.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/GeneralAggregateFunc.java @@ -21,6 +21,7 @@ import org.apache.spark.annotation.Evolving; import org.apache.spark.sql.connector.expressions.Expression; +import org.apache.spark.sql.connector.expressions.SortValue; import org.apache.spark.sql.internal.connector.ExpressionWithToString; /** @@ -41,7 +42,9 @@ *
  • REGR_R2(input1, input2)
    Since 3.4.0
  • *
  • REGR_SLOPE(input1, input2)
    Since 3.4.0
  • *
  • REGR_SXY(input1, input2)
    Since 3.4.0
  • - *
  • MODE(input1[, inverse])
    Since 4.0.0
  • + *
  • MODE() WITHIN (ORDER BY input1 [ASC|DESC])
    Since 4.0.0
  • + *
  • PERCENTILE_CONT(input1) WITHIN (ORDER BY input2 [ASC|DESC])
    Since 4.0.0
  • + *
  • PERCENTILE_DISC(input1) WITHIN (ORDER BY input2 [ASC|DESC])
    Since 4.0.0
  • * * * @since 3.3.0 @@ -51,11 +54,21 @@ public final class GeneralAggregateFunc extends ExpressionWithToString implement private final String name; private final boolean isDistinct; private final Expression[] children; + private final SortValue[] orderingWithinGroups; public GeneralAggregateFunc(String name, boolean isDistinct, Expression[] children) { this.name = name; this.isDistinct = isDistinct; this.children = children; + this.orderingWithinGroups = new SortValue[]{}; + } + + public GeneralAggregateFunc( + String name, boolean isDistinct, Expression[] children, SortValue[] orderingWithinGroups) { + this.name = name; + this.isDistinct = isDistinct; + this.children = children; + this.orderingWithinGroups = orderingWithinGroups; } public String name() { return name; } @@ -64,6 +77,8 @@ public GeneralAggregateFunc(String name, boolean isDistinct, Expression[] childr @Override public Expression[] children() { return children; } + public SortValue[] orderingWithinGroups() { return orderingWithinGroups; } + @Override public boolean equals(Object o) { if (this == o) return true; @@ -73,7 +88,8 @@ public boolean equals(Object o) { if (isDistinct != that.isDistinct) return false; if (!name.equals(that.name)) return false; - return Arrays.equals(children, that.children); + if (!Arrays.equals(children, that.children)) return false; + return Arrays.equals(orderingWithinGroups, that.orderingWithinGroups); } @Override @@ -81,6 +97,7 @@ public int hashCode() { int result = name.hashCode(); result = 31 * result + (isDistinct ? 1 : 0); result = 31 * result + Arrays.hashCode(children); + result = 31 * result + Arrays.hashCode(orderingWithinGroups); return result; } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java index fb11de4fdedd..1035d2da0240 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java @@ -146,8 +146,16 @@ yield visitBinaryArithmetic( return visitAggregateFunction("AVG", avg.isDistinct(), expressionsToStringArray(avg.children())); } else if (expr instanceof GeneralAggregateFunc f) { - return visitAggregateFunction(f.name(), f.isDistinct(), - expressionsToStringArray(f.children())); + if (f.orderingWithinGroups().length == 0) { + return visitAggregateFunction(f.name(), f.isDistinct(), + expressionsToStringArray(f.children())); + } else { + return visitInverseDistributionFunction( + f.name(), + f.isDistinct(), + expressionsToStringArray(f.children()), + expressionsToStringArray(f.orderingWithinGroups())); + } } else if (expr instanceof UserDefinedScalarFunc f) { return visitUserDefinedScalarFunction(f.name(), f.canonicalName(), expressionsToStringArray(f.children())); @@ -273,6 +281,15 @@ protected String visitAggregateFunction( } } + protected String visitInverseDistributionFunction( + String funcName, boolean isDistinct, String[] inputs, String[] orderingWithinGroups) { + assert(isDistinct == false); + String withinGroup = + joinArrayToString(orderingWithinGroups, ", ", "WITHIN GROUP (ORDER BY ", ")"); + String functionCall = joinArrayToString(inputs, ", ", funcName + "(", ")"); + return functionCall + " " + withinGroup; + } + protected String visitUserDefinedScalarFunction( String funcName, String canonicalName, String[] inputs) { throw new SparkUnsupportedOperationException( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala index a90c61565039..3261aa51b9be 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala @@ -426,7 +426,7 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase { throws: Boolean = false, includeLastResort: Boolean = false): Expression = { resolveExpression( - tryResolveColumnByPlanId(expr, plan), + tryResolveDataFrameColumns(expr, Seq(plan)), resolveColumnByName = nameParts => { plan.resolve(nameParts, conf.resolver) }, @@ -448,7 +448,7 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase { q: LogicalPlan, includeLastResort: Boolean = false): Expression = { resolveExpression( - tryResolveColumnByPlanId(e, q), + tryResolveDataFrameColumns(e, q.children), resolveColumnByName = nameParts => { q.resolveChildren(nameParts, conf.resolver) }, @@ -485,80 +485,107 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase { // 4. if more than one matching nodes are found, fail due to ambiguous column reference; // 5. resolve the expression with the matching node, if any error occurs here, return the // original expression as it is. - private def tryResolveColumnByPlanId( + private def tryResolveDataFrameColumns( e: Expression, - q: LogicalPlan, - idToPlan: mutable.HashMap[Long, LogicalPlan] = mutable.HashMap.empty): Expression = e match { + q: Seq[LogicalPlan]): Expression = e match { case u: UnresolvedAttribute => - resolveUnresolvedAttributeByPlanId( - u, q, idToPlan: mutable.HashMap[Long, LogicalPlan] - ).getOrElse(u) + resolveDataFrameColumn(u, q).getOrElse(u) case _ if e.containsPattern(UNRESOLVED_ATTRIBUTE) => - e.mapChildren(c => tryResolveColumnByPlanId(c, q, idToPlan)) + e.mapChildren(c => tryResolveDataFrameColumns(c, q)) case _ => e } - private def resolveUnresolvedAttributeByPlanId( + private def resolveDataFrameColumn( u: UnresolvedAttribute, - q: LogicalPlan, - idToPlan: mutable.HashMap[Long, LogicalPlan]): Option[NamedExpression] = { + q: Seq[LogicalPlan]): Option[NamedExpression] = { val planIdOpt = u.getTagValue(LogicalPlan.PLAN_ID_TAG) if (planIdOpt.isEmpty) return None val planId = planIdOpt.get logDebug(s"Extract plan_id $planId from $u") - val plan = idToPlan.getOrElseUpdate(planId, { - findPlanById(u, planId, q).getOrElse { - // For example: - // df1 = spark.createDataFrame([Row(a = 1, b = 2, c = 3)]]) - // df2 = spark.createDataFrame([Row(a = 1, b = 2)]]) - // df1.select(df2.a) <- illegal reference df2.a - throw new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_3051", - messageParameters = Map( - "u" -> u.toString, - "planId" -> planId.toString, - "q" -> q.toString)) - } - }) + val isMetadataAccess = u.getTagValue(LogicalPlan.IS_METADATA_COL).nonEmpty + val (resolved, matched) = resolveDataFrameColumnByPlanId(u, planId, isMetadataAccess, q) + if (!matched) { + // Can not find the target plan node with plan id, e.g. + // df1 = spark.createDataFrame([Row(a = 1, b = 2, c = 3)]]) + // df2 = spark.createDataFrame([Row(a = 1, b = 2)]]) + // df1.select(df2.a) <- illegal reference df2.a + throw QueryCompilationErrors.cannotResolveColumn(u) + } + resolved + } - val isMetadataAccess = u.getTagValue(LogicalPlan.IS_METADATA_COL).isDefined - try { - if (!isMetadataAccess) { - plan.resolve(u.nameParts, conf.resolver) - } else if (u.nameParts.size == 1) { - plan.getMetadataAttributeByNameOpt(u.nameParts.head) - } else { - None + private def resolveDataFrameColumnByPlanId( + u: UnresolvedAttribute, + id: Long, + isMetadataAccess: Boolean, + q: Seq[LogicalPlan]): (Option[NamedExpression], Boolean) = { + q.iterator.map(resolveDataFrameColumnRecursively(u, id, isMetadataAccess, _)) + .foldLeft((Option.empty[NamedExpression], false)) { + case ((r1, m1), (r2, m2)) => + if (r1.nonEmpty && r2.nonEmpty) { + throw QueryCompilationErrors.ambiguousColumnReferences(u) + } + (if (r1.nonEmpty) r1 else r2, m1 | m2) } - } catch { - case e: AnalysisException => - logDebug(s"Fail to resolve $u with $plan due to $e") - None - } } - private def findPlanById( + private def resolveDataFrameColumnRecursively( u: UnresolvedAttribute, id: Long, - plan: LogicalPlan): Option[LogicalPlan] = { - if (plan.getTagValue(LogicalPlan.PLAN_ID_TAG).contains(id)) { - Some(plan) - } else if (plan.children.length == 1) { - findPlanById(u, id, plan.children.head) - } else if (plan.children.length > 1) { - val matched = plan.children.flatMap(findPlanById(u, id, _)) - if (matched.length > 1) { - throw new AnalysisException( - errorClass = "AMBIGUOUS_COLUMN_REFERENCE", - messageParameters = Map("name" -> toSQLId(u.nameParts)), - origin = u.origin - ) - } else { - matched.headOption + isMetadataAccess: Boolean, + p: LogicalPlan): (Option[NamedExpression], Boolean) = { + val (resolved, matched) = if (p.getTagValue(LogicalPlan.PLAN_ID_TAG).contains(id)) { + val resolved = try { + if (!isMetadataAccess) { + p.resolve(u.nameParts, conf.resolver) + } else if (u.nameParts.size == 1) { + p.getMetadataAttributeByNameOpt(u.nameParts.head) + } else { + None + } + } catch { + case e: AnalysisException => + logDebug(s"Fail to resolve $u with $p due to $e") + None } + (resolved, true) } else { - None + resolveDataFrameColumnByPlanId(u, id, isMetadataAccess, p.children) + } + + // In self join case like: + // df1 = spark.range(10).withColumn("a", sf.lit(0)) + // df2 = df1.withColumnRenamed("a", "b") + // df1.join(df2, df1["a"] == df2["b"]) + // + // the logical plan would be like: + // + // 'Join Inner, '`==`('a, 'b) [plan_id=5] + // :- Project [id#22L, 0 AS a#25] [plan_id=1] + // : +- Range (0, 10, step=1, splits=Some(12)) + // +- Project [id#28L, a#31 AS b#36] [plan_id=2] + // +- Project [id#28L, 0 AS a#31] [plan_id=1] + // +- Range (0, 10, step=1, splits=Some(12)) + // + // When resolving the column reference df1.a, the target node with plan_id=1 + // can be found in both sides of the Join node. + // To correctly resolve df1.a, the analyzer discards the resolved attribute + // in the right side, by filtering out the result by the output attributes of + // Project plan_id=2. + // + // However, there are analyzer rules (e.g. ResolveReferencesInSort) + // supporting missing column resolution. Then a valid resolved attribute + // maybe filtered out here. In this case, resolveDataFrameColumnByPlanId + // returns None, the dataframe column will remain unresolved, and the analyzer + // will try to resolve it without plan id later. + val filtered = resolved.filter { r => + if (isMetadataAccess) { + r.references.subsetOf(AttributeSet(p.output ++ p.metadataOutput)) + } else { + r.references.subsetOf(p.outputSet) + } } + (filtered, matched) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala index 811e02b4d97b..3b9c6799bfaf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala @@ -68,17 +68,16 @@ object ResolveInlineTables extends Rule[LogicalPlan] /** * Validates that all inline table data are valid expressions that can be evaluated * (in this they must be foldable). - * + * Note that nondeterministic expressions are not supported since they are not foldable. + * Exception are CURRENT_LIKE expressions, which are replaced by a literal in later stages. * This is package visible for unit testing. */ private[analysis] def validateInputEvaluable(table: UnresolvedInlineTable): Unit = { table.rows.foreach { row => row.foreach { e => - // Note that nondeterministic expressions are not supported since they are not foldable. - // Only exception are CURRENT_LIKE expressions, which are replaced by a literal - // In later stages. - if ((!e.resolved && !e.containsPattern(CURRENT_LIKE)) - || !trimAliases(prepareForEval(e)).foldable) { + if (e.containsPattern(CURRENT_LIKE)) { + // Do nothing. + } else if (!e.resolved || !trimAliases(prepareForEval(e)).foldable) { e.failAnalysis( errorClass = "INVALID_INLINE_TABLE.CANNOT_EVALUATE_EXPRESSION_IN_INLINE_TABLE", messageParameters = Map("expr" -> toSQLExpr(e))) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableSpec.scala index d8be6824b909..cc9979ad4c5e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableSpec.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableSpec.scala @@ -19,10 +19,12 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.SparkThrowable import org.apache.spark.sql.catalyst.expressions.{Expression, Literal} +import org.apache.spark.sql.catalyst.optimizer.ComputeCurrentTime import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.TreePattern.COMMAND import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{ArrayType, MapType, StructType} /** @@ -34,7 +36,15 @@ import org.apache.spark.sql.types.{ArrayType, MapType, StructType} */ object ResolveTableSpec extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = { - plan.resolveOperatorsWithPruning(_.containsAnyPattern(COMMAND), ruleId) { + val preparedPlan = if (SQLConf.get.legacyEvalCurrentTime && plan.containsPattern(COMMAND)) { + AnalysisHelper.allowInvokingTransformsInAnalyzer { + ComputeCurrentTime(ResolveTimeZone(plan)) + } + } else { + plan + } + + preparedPlan.resolveOperatorsWithPruning(_.containsAnyPattern(COMMAND), ruleId) { case t: CreateTable => resolveTableSpec(t, t.tableSpec, s => t.copy(tableSpec = s)) case t: CreateTableAsSelect => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala index 41071d031d2e..2bbe730d4cfb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala @@ -194,7 +194,7 @@ object ExprUtils extends QueryErrorsBase { } // Check if the data type of expr is orderable. - if (!RowOrdering.isOrderable(expr.dataType)) { + if (expr.dataType.existsRecursively(_.isInstanceOf[MapType])) { expr.failAnalysis( errorClass = "GROUP_EXPRESSION_TYPE_IS_NOT_ORDERABLE", messageParameters = Map( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index d10e4a1ced1b..c8c2d5558b14 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -627,6 +627,7 @@ class CodegenContext extends Logging { case array: ArrayType => genComp(array, c1, c2) + " == 0" case struct: StructType => genComp(struct, c1, c2) + " == 0" case udt: UserDefinedType[_] => genEqual(udt.sqlType, c1, c2) + case CalendarIntervalType => s"$c1.equals($c2)" case NullType => "false" case _ => throw QueryExecutionErrors.cannotGenerateCodeForIncomparableTypeError( @@ -652,6 +653,7 @@ class CodegenContext extends Logging { // use c1 - c2 may overflow case dt: DataType if isPrimitiveType(dt) => s"($c1 > $c2 ? 1 : $c1 < $c2 ? -1 : 0)" case BinaryType => s"org.apache.spark.unsafe.types.ByteArray.compareBinary($c1, $c2)" + case CalendarIntervalType => s"$c1.compareTo($c2)" case NullType => "0" case array: ArrayType => val elementType = array.elementType diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 9be260a9f3da..22d4e175b9a3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -134,22 +134,14 @@ case class CurrentTimeZone() extends LeafExpression with Unevaluable { since = "1.5.0") // scalastyle:on line.size.limit case class CurrentDate(timeZoneId: Option[String] = None) - extends LeafExpression with TimeZoneAwareExpression with CodegenFallback { - + extends LeafExpression with TimeZoneAwareExpression with Unevaluable { def this() = this(None) - - override def foldable: Boolean = true override def nullable: Boolean = false - override def dataType: DataType = DateType - final override def nodePatternsInternal(): Seq[TreePattern] = Seq(CURRENT_LIKE) - override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = copy(timeZoneId = Option(timeZoneId)) - override def eval(input: InternalRow): Any = currentDate(zoneId) - override def prettyName: String = "current_date" } @@ -177,11 +169,9 @@ object CurDateExpressionBuilder extends ExpressionBuilder { } } -abstract class CurrentTimestampLike() extends LeafExpression with CodegenFallback { - override def foldable: Boolean = true +abstract class CurrentTimestampLike() extends LeafExpression with Unevaluable { override def nullable: Boolean = false override def dataType: DataType = TimestampType - override def eval(input: InternalRow): Any = currentTimestamp() final override val nodePatterns: Seq[TreePattern] = Seq(CURRENT_LIKE) } @@ -245,22 +235,13 @@ case class Now() extends CurrentTimestampLike { group = "datetime_funcs", since = "3.4.0") case class LocalTimestamp(timeZoneId: Option[String] = None) extends LeafExpression - with TimeZoneAwareExpression with CodegenFallback { - + with TimeZoneAwareExpression with Unevaluable { def this() = this(None) - - override def foldable: Boolean = true override def nullable: Boolean = false - override def dataType: DataType = TimestampNTZType - final override def nodePatternsInternal(): Seq[TreePattern] = Seq(CURRENT_LIKE) - override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = copy(timeZoneId = Option(timeZoneId)) - - override def eval(input: InternalRow): Any = localDateTimeToMicros(LocalDateTime.now(zoneId)) - override def prettyName: String = "localtimestamp" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala index 79b2985adc1d..6c72afae91e9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala @@ -243,7 +243,9 @@ object Literal { v.isInstanceOf[InternalRow] && { val row = v.asInstanceOf[InternalRow] st.fields.map(_.dataType).zipWithIndex.forall { - case (fieldDataType, i) => doValidate(row.get(i, fieldDataType), fieldDataType) + case (fieldDataType, i) => + // Do not need to validate null values. + row.isNullAt(i) || doValidate(row.get(i, fieldDataType), fieldDataType) } } case _ => false diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 8816e84490da..c7281e4e8737 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.catalyst.expressions -import org.apache.spark.{SPARK_REVISION, SPARK_VERSION_SHORT} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, FunctionRegistry, UnresolvedSeed} import org.apache.spark.sql.catalyst.expressions.codegen._ @@ -288,14 +287,14 @@ case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with Non since = "3.0.0", group = "misc_funcs") // scalastyle:on line.size.limit -case class SparkVersion() extends LeafExpression with CodegenFallback { - override def nullable: Boolean = false - override def foldable: Boolean = true - override def dataType: DataType = StringType +case class SparkVersion() extends LeafExpression with RuntimeReplaceable { override def prettyName: String = "version" - override def eval(input: InternalRow): Any = { - UTF8String.fromString(SPARK_VERSION_SHORT + " " + SPARK_REVISION) - } + + override lazy val replacement: Expression = StaticInvoke( + classOf[ExpressionImplUtils], + StringType, + "getSparkVersion", + returnNullable = false) } @ExpressionDescription( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 7609e96eba6f..54c4343e7ff9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -3379,7 +3379,9 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging { ctx: ExpressionPropertyListContext): OptionList = { val options = ctx.expressionProperty.asScala.map { property => val key: String = visitPropertyKey(property.key) - val value: Expression = Option(property.value).map(expression).orNull + val value: Expression = Option(property.value).map(expression).getOrElse { + operationNotAllowed(s"A value must be specified for the key: $key.", ctx) + } key -> value }.toSeq OptionList(options) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index cb93814e90e5..0dd83c4b499e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -516,11 +516,6 @@ object DateTimeUtils extends SparkDateTimeUtils { convertTz(micros, getZoneId(timeZone), ZoneOffset.UTC) } - /** - * Obtains the current instant as microseconds since the epoch at the UTC time zone. - */ - def currentTimestamp(): Long = instantToMicros(Instant.now()) - /** * Obtains the current date as days since the epoch in the specified time-zone. */ @@ -572,7 +567,7 @@ object DateTimeUtils extends SparkDateTimeUtils { def convertSpecialTimestamp(input: String, zoneId: ZoneId): Option[Long] = { extractSpecialValue(input.trim).flatMap { case "epoch" => Some(0) - case "now" => Some(currentTimestamp()) + case "now" => Some(instantToMicros(Instant.now())) case "today" => Some(instantToMicros(today(zoneId).toInstant)) case "tomorrow" => Some(instantToMicros(today(zoneId).plusDays(1).toInstant)) case "yesterday" => Some(instantToMicros(today(zoneId).minusDays(1).toInstant)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala index 43e89c49a89e..53c8b4cf3422 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala @@ -65,6 +65,7 @@ class StaxXmlGenerator( val factory = XMLOutputFactory.newInstance() // to_xml disables structure validation to allow multiple root tags factory.setProperty(WstxOutputProperties.P_OUTPUT_VALIDATE_STRUCTURE, validateStructure) + factory.setProperty(WstxOutputProperties.P_OUTPUT_VALIDATE_NAMES, options.validateName) val xmlWriter = factory.createXMLStreamWriter(writer) if (!indentDisabled) { val indentingXmlWriter = new IndentingXMLStreamWriter(xmlWriter) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala index 92b156fb8f23..336c54e164e8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala @@ -95,7 +95,7 @@ class XmlOptions( val nullValue = parameters.getOrElse(NULL_VALUE, XmlOptions.DEFAULT_NULL_VALUE) val columnNameOfCorruptRecord = parameters.getOrElse(COLUMN_NAME_OF_CORRUPT_RECORD, defaultColumnNameOfCorruptRecord) - val ignoreSurroundingSpaces = getBool(IGNORE_SURROUNDING_SPACES, false) + val ignoreSurroundingSpaces = getBool(IGNORE_SURROUNDING_SPACES, true) val parseMode = ParseMode.fromString(parameters.getOrElse(MODE, PermissiveMode.name)) val inferSchema = getBool(INFER_SCHEMA, true) val rowValidationXSDPath = parameters.get(ROW_VALIDATION_XSD_PATH).orNull @@ -107,6 +107,7 @@ class XmlOptions( // setting indent to "" disables indentation in the generated XML. // Each row will be written in a new line. val indent = parameters.getOrElse(INDENT, DEFAULT_INDENT) + val validateName = getBool(VALIDATE_NAME, true) /** * Infer columns with all valid date entries as date type (otherwise inferred as string or @@ -210,6 +211,7 @@ object XmlOptions extends DataSourceOptions { val TIME_ZONE = newOption("timeZone") val INDENT = newOption("indent") val PREFERS_DECIMAL = newOption("prefersDecimal") + val VALIDATE_NAME = newOption("validateName") // Options with alternative val ENCODING = "encoding" val CHARSET = "charset" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 387064695770..91d18788fd4c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -24,7 +24,7 @@ import org.apache.hadoop.fs.Path import org.apache.spark.{SPARK_DOC_ROOT, SparkException, SparkThrowable, SparkUnsupportedOperationException} import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.{ExtendedAnalysisException, FunctionIdentifier, InternalRow, QualifiedTableName, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, FunctionAlreadyExistsException, NamespaceAlreadyExistsException, NoSuchFunctionException, NoSuchNamespaceException, NoSuchPartitionException, NoSuchTableException, ResolvedTable, Star, TableAlreadyExistsException, UnresolvedRegex} +import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, FunctionAlreadyExistsException, NamespaceAlreadyExistsException, NoSuchFunctionException, NoSuchNamespaceException, NoSuchPartitionException, NoSuchTableException, ResolvedTable, Star, TableAlreadyExistsException, UnresolvedAttribute, UnresolvedRegex} import org.apache.spark.sql.catalyst.catalog.{CatalogTable, InvalidUDFClassException} import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, CreateMap, CreateStruct, Expression, GroupingID, NamedExpression, SpecifiedWindowFrame, WindowFrame, WindowFunction, WindowSpecDefinition} @@ -3940,4 +3940,20 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat "dsSchema" -> toSQLType(dsSchema), "expectedSchema" -> toSQLType(expectedSchema))) } + + def cannotResolveColumn(u: UnresolvedAttribute): Throwable = { + new AnalysisException( + errorClass = "CANNOT_RESOLVE_DATAFRAME_COLUMN", + messageParameters = Map("name" -> toSQLId(u.nameParts)), + origin = u.origin + ) + } + + def ambiguousColumnReferences(u: UnresolvedAttribute): Throwable = { + new AnalysisException( + errorClass = "AMBIGUOUS_COLUMN_REFERENCE", + messageParameters = Map("name" -> toSQLId(u.nameParts)), + origin = u.origin + ) + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index d54cb3756638..1928e74363cb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -182,7 +182,7 @@ object SQLConf { // Make sure SqlApiConf is always in sync with SQLConf. SqlApiConf will always try to // load SqlConf to make sure both classes are in sync from the get go. - SqlApiConf.setConfGetter(() => SQLConf.get) + SqlApiConfHelper.setConfGetter(() => SQLConf.get) /** * Returns the active config object within the current scope. If there is an active SparkSession, @@ -915,7 +915,7 @@ object SQLConf { .booleanConf .createWithDefault(false) - val CASE_SENSITIVE = buildConf(SqlApiConf.CASE_SENSITIVE_KEY) + val CASE_SENSITIVE = buildConf(SqlApiConfHelper.CASE_SENSITIVE_KEY) .internal() .doc("Whether the query analyzer should be case sensitive or not. " + "Default to case insensitive. It is highly discouraged to turn on case sensitive mode.") @@ -1211,7 +1211,7 @@ object SQLConf { .stringConf .transform(_.toLowerCase(Locale.ROOT)) .checkValues(Set("none", "uncompressed", "snappy", "zlib", "lzo", "zstd", "lz4")) - .createWithDefault("snappy") + .createWithDefault("zstd") val ORC_IMPLEMENTATION = buildConf("spark.sql.orc.impl") .doc("When native, use the native version of ORC support instead of the ORC library in Hive. " + @@ -2757,7 +2757,7 @@ object SQLConf { Try { DateTimeUtils.getZoneId(zone) }.isSuccess } - val SESSION_LOCAL_TIMEZONE = buildConf(SqlApiConf.SESSION_LOCAL_TIMEZONE_KEY) + val SESSION_LOCAL_TIMEZONE = buildConf(SqlApiConfHelper.SESSION_LOCAL_TIMEZONE_KEY) .doc("The ID of session local timezone in the format of either region-based zone IDs or " + "zone offsets. Region IDs must have the form 'area/city', such as 'America/Los_Angeles'. " + "Zone offsets must be in the format '(+|-)HH', '(+|-)HH:mm' or '(+|-)HH:mm:ss', e.g '-08', " + @@ -3281,7 +3281,7 @@ object SQLConf { .checkValues(StoreAssignmentPolicy.values.map(_.toString)) .createWithDefault(StoreAssignmentPolicy.ANSI.toString) - val ANSI_ENABLED = buildConf(SqlApiConf.ANSI_ENABLED_KEY) + val ANSI_ENABLED = buildConf(SqlApiConfHelper.ANSI_ENABLED_KEY) .doc("When true, Spark SQL uses an ANSI compliant dialect instead of being Hive compliant. " + "For example, Spark will throw an exception at runtime instead of returning null results " + "when the inputs to a SQL operator/function are invalid." + @@ -3914,7 +3914,7 @@ object SQLConf { .booleanConf .createWithDefault(false) - val LEGACY_TIME_PARSER_POLICY = buildConf(SqlApiConf.LEGACY_TIME_PARSER_POLICY_KEY) + val LEGACY_TIME_PARSER_POLICY = buildConf(SqlApiConfHelper.LEGACY_TIME_PARSER_POLICY_KEY) .internal() .doc("When LEGACY, java.text.SimpleDateFormat is used for formatting and parsing " + "dates/timestamps in a locale-sensitive manner, which is the approach before Spark 3.0. " + @@ -4476,7 +4476,7 @@ object SQLConf { .createWithDefault(false) val LOCAL_RELATION_CACHE_THRESHOLD = - buildConf(SqlApiConf.LOCAL_RELATION_CACHE_THRESHOLD_KEY) + buildConf(SqlApiConfHelper.LOCAL_RELATION_CACHE_THRESHOLD_KEY) .doc("The threshold for the size in bytes of local relations to be cached at " + "the driver side after serialization.") .version("3.5.0") @@ -4612,6 +4612,18 @@ object SQLConf { .booleanConf .createWithDefault(false) + val LEGACY_EVAL_CURRENT_TIME = buildConf("spark.sql.legacy.earlyEvalCurrentTime") + .internal() + .doc("When set to true, evaluation and constant folding will happen for now() and " + + "current_timestamp() expressions before finish analysis phase. " + + "This flag will allow a bit more liberal syntax but it will sacrifice correctness - " + + "Results of now() and current_timestamp() can be different for different operations " + + "in a single query." + ) + .version("4.0.0") + .booleanConf + .createWithDefault(false) + /** * Holds information about keys that have been deprecated. * @@ -5516,6 +5528,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf { def legacyJavaCharsets: Boolean = getConf(SQLConf.LEGACY_JAVA_CHARSETS) + def legacyEvalCurrentTime: Boolean = getConf(SQLConf.LEGACY_EVAL_CURRENT_TIME) + /** ********************** SQLConf functionality methods ************ */ /** Set Spark SQL configuration properties. */ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 09c2b6f5cc9b..88bb05cbf917 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -28,7 +28,7 @@ import scala.language.postfixOps import scala.reflect.ClassTag import scala.util.Random -import org.apache.spark.{SparkArithmeticException, SparkDateTimeException, SparkFunSuite, SparkUpgradeException} +import org.apache.spark.{SparkArithmeticException, SparkDateTimeException, SparkException, SparkFunSuite, SparkUpgradeException} import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection import org.apache.spark.sql.catalyst.util.{DateTimeUtils, IntervalUtils, TimestampFormatter} @@ -78,33 +78,6 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } } - test("datetime function current_date") { - val d0 = DateTimeUtils.currentDate(UTC) - val cd = CurrentDate(UTC_OPT).eval(EmptyRow).asInstanceOf[Int] - val d1 = DateTimeUtils.currentDate(UTC) - assert(d0 <= cd && cd <= d1 && d1 - d0 <= 1) - - val cdjst = CurrentDate(JST_OPT).eval(EmptyRow).asInstanceOf[Int] - val cdpst = CurrentDate(PST_OPT).eval(EmptyRow).asInstanceOf[Int] - assert(cdpst <= cd && cd <= cdjst) - } - - test("datetime function current_timestamp") { - val ct = DateTimeUtils.toJavaTimestamp(CurrentTimestamp().eval(EmptyRow).asInstanceOf[Long]) - val t1 = System.currentTimeMillis() - assert(math.abs(t1 - ct.getTime) < 5000) - } - - test("datetime function localtimestamp") { - // Verify with multiple outstanding time zones which has no daylight saving time. - Seq("UTC", "Africa/Dakar", "Asia/Hong_Kong").foreach { zid => - val zoneId = DateTimeUtils.getZoneId(zid) - val ct = LocalTimestamp(Some(zid)).eval(EmptyRow).asInstanceOf[Long] - val t1 = DateTimeUtils.localDateTimeToMicros(LocalDateTime.now(zoneId)) - assert(math.abs(t1 - ct) < 1000000) - } - } - test("DayOfYear") { val sdfDay = new SimpleDateFormat("D", Locale.US) @@ -970,11 +943,6 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { Literal(sdf3.format(Date.valueOf("2015-07-24"))), Literal(fmt3), timeZoneId), MICROSECONDS.toSeconds(DateTimeUtils.daysToMicros( DateTimeUtils.fromJavaDate(Date.valueOf("2015-07-24")), tz.toZoneId))) - val t1 = UnixTimestamp( - CurrentTimestamp(), Literal("yyyy-MM-dd HH:mm:ss")).eval().asInstanceOf[Long] - val t2 = UnixTimestamp( - CurrentTimestamp(), Literal("yyyy-MM-dd HH:mm:ss")).eval().asInstanceOf[Long] - assert(t2 - t1 <= 1) checkEvaluation( UnixTimestamp( Literal.create(null, DateType), Literal.create(null, StringType), timeZoneId), @@ -1041,11 +1009,6 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { Literal(sdf3.format(Date.valueOf("2015-07-24"))), Literal(fmt3), timeZoneId), MICROSECONDS.toSeconds(DateTimeUtils.daysToMicros( DateTimeUtils.fromJavaDate(Date.valueOf("2015-07-24")), zid))) - val t1 = ToUnixTimestamp( - CurrentTimestamp(), Literal(fmt1)).eval().asInstanceOf[Long] - val t2 = ToUnixTimestamp( - CurrentTimestamp(), Literal(fmt1)).eval().asInstanceOf[Long] - assert(t2 - t1 <= 1) checkEvaluation(ToUnixTimestamp( Literal.create(null, DateType), Literal.create(null, StringType), timeZoneId), null) checkEvaluation( @@ -1516,7 +1479,6 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkExceptionInExpression[T](ToUnixTimestamp(Literal("1"), Literal(c)), c) checkExceptionInExpression[T](UnixTimestamp(Literal("1"), Literal(c)), c) if (!Set("E", "F", "q", "Q").contains(c)) { - checkExceptionInExpression[T](DateFormatClass(CurrentTimestamp(), Literal(c)), c) checkExceptionInExpression[T](FromUnixTime(Literal(0L), Literal(c)), c) } } @@ -2124,4 +2086,14 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } } } + + test("datetime function CurrentDate and localtimestamp are Unevaluable") { + checkError(exception = intercept[SparkException] { CurrentDate(UTC_OPT).eval(EmptyRow) }, + errorClass = "INTERNAL_ERROR", + parameters = Map("message" -> "Cannot evaluate expression: current_date(Some(UTC))")) + + checkError(exception = intercept[SparkException] { LocalTimestamp(UTC_OPT).eval(EmptyRow) }, + errorClass = "INTERNAL_ERROR", + parameters = Map("message" -> "Cannot evaluate expression: localtimestamp(Some(UTC))")) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala index f63b60f5ebba..d42e0b7d681d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala @@ -478,6 +478,24 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper { UTF8String.fromString("Spark SQL")) } + // A generic internal row that throws exception when accessing null values + class NullAccessForbiddenGenericInternalRow(override val values: Array[Any]) + extends GenericInternalRow(values) { + override def get(ordinal: Int, dataType: DataType): AnyRef = { + if (values(ordinal) == null) { + throw new RuntimeException(s"Should not access null field at $ordinal!") + } + super.get(ordinal, dataType) + } + } + + test("SPARK-46634: literal validation should not drill down to null fields") { + val exceptionInternalRow = new NullAccessForbiddenGenericInternalRow(Array(null, 1)) + val schema = StructType.fromDDL("id INT, age INT") + // This should not fail because it should check whether the field is null before drilling down + Literal.validateLiteralValue(exceptionInternalRow, schema) + } + test("SPARK-46604: Literal support immutable ArraySeq") { import org.apache.spark.util.ArrayImplicits._ val immArraySeq = Array(1.0, 4.0).toImmutableArraySeq diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala index 447d77855fb3..6e1c7fc887d4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.catalyst.optimizer +import java.lang.Thread.sleep import java.time.{LocalDateTime, ZoneId} import scala.concurrent.duration._ @@ -51,6 +52,19 @@ class ComputeCurrentTimeSuite extends PlanTest { assert(lits(0) == lits(1)) } + test("analyzer should respect time flow in current timestamp calls") { + val in = Project(Alias(CurrentTimestamp(), "t1")() :: Nil, LocalRelation()) + + val planT1 = Optimize.execute(in.analyze).asInstanceOf[Project] + sleep(1) + val planT2 = Optimize.execute(in.analyze).asInstanceOf[Project] + + val t1 = DateTimeUtils.microsToMillis(literals[Long](planT1)(0)) + val t2 = DateTimeUtils.microsToMillis(literals[Long](planT2)(0)) + + assert(t2 - t1 <= 1000 && t2 - t1 > 0) + } + test("analyzer should replace current_date with literals") { val in = Project(Seq(Alias(CurrentDate(), "a")(), Alias(CurrentDate(), "b")()), LocalRelation()) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala index 5cfe4a7bf462..e9e6bd9dcf62 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala @@ -98,11 +98,11 @@ class EliminateSortsSuite extends AnalysisTest { test("Remove no-op alias") { val x = testRelation - val query = x.select($"a".as("x"), Year(CurrentDate()).as("y"), $"b") + val query = x.select($"a".as("x"), Literal(1).as("y"), $"b") .orderBy($"x".asc, $"y".asc, $"b".desc) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute( - x.select($"a".as("x"), Year(CurrentDate()).as("y"), $"b") + x.select($"a".as("x"), Literal(1).as("y"), $"b") .orderBy($"x".asc, $"b".desc)) comparePlans(optimized, correctAnswer) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala index 034b5b747fd1..767ef38ea7f7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala @@ -73,25 +73,25 @@ class FoldablePropagationSuite extends PlanTest { test("Propagate to orderBy clause") { val query = testRelation - .select($"a".as("x"), Year(CurrentDate()).as("y"), $"b") + .select($"a".as("x"), "str".as("y"), $"b") .orderBy($"x".asc, $"y".asc, $"b".desc) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation - .select($"a".as("x"), Year(CurrentDate()).as("y"), $"b") - .orderBy($"x".asc, SortOrder(Year(CurrentDate()), Ascending), $"b".desc).analyze + .select($"a".as("x"), "str".as("y"), $"b") + .orderBy($"x".asc, SortOrder("str", Ascending), $"b".desc).analyze comparePlans(optimized, correctAnswer) } test("Propagate to groupBy clause") { val query = testRelation - .select($"a".as("x"), Year(CurrentDate()).as("y"), $"b") + .select($"a".as("x"), Literal(42).as("y"), $"b") .groupBy($"x", $"y", $"b")(sum($"x"), avg($"y").as("AVG"), count($"b")) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation - .select($"a".as("x"), Year(CurrentDate()).as("y"), $"b") - .groupBy($"x", Year(CurrentDate()).as("y"), $"b")(sum($"x"), - avg(Year(CurrentDate())).as("AVG"), + .select($"a".as("x"), Literal(42).as("y"), $"b") + .groupBy($"x", Literal(42).as("y"), $"b")(sum($"x"), + avg(Literal(42)).as("AVG"), count($"b")).analyze comparePlans(optimized, correctAnswer) @@ -99,16 +99,16 @@ class FoldablePropagationSuite extends PlanTest { test("Propagate in a complex query") { val query = testRelation - .select($"a".as("x"), Year(CurrentDate()).as("y"), $"b") + .select($"a".as("x"), Literal(42).as("y"), $"b") .where($"x" > 1 && $"y" === 2016 && $"b" > 1) .groupBy($"x", $"y", $"b")(sum($"x"), avg($"y").as("AVG"), count($"b")) .orderBy($"x".asc, $"AVG".asc) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation - .select($"a".as("x"), Year(CurrentDate()).as("y"), $"b") - .where($"x" > 1 && Year(CurrentDate()).as("y") === 2016 && $"b" > 1) - .groupBy($"x", Year(CurrentDate()).as("y"), $"b")(sum($"x"), - avg(Year(CurrentDate())).as("AVG"), + .select($"a".as("x"), Literal(42).as("y"), $"b") + .where($"x" > 1 && Literal(42).as("y") === 2016 && $"b" > 1) + .groupBy($"x", Literal(42).as("y"), $"b")(sum($"x"), + avg(Literal(42)).as("AVG"), count($"b")) .orderBy($"x".asc, $"AVG".asc).analyze diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index ee85d509b6fc..03b31f1f03e4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -2421,6 +2421,18 @@ class DDLParserSuite extends AnalysisTest { stop = 42)) } + test("SPARK-46610: throw exception when no value for a key in create table options") { + val createTableSql = "create table test_table using my_data_source options (password)" + checkError( + exception = parseException(createTableSql), + errorClass = "_LEGACY_ERROR_TEMP_0035", + parameters = Map("message" -> "A value must be specified for the key: password."), + context = ExpectedContext( + fragment = createTableSql, + start = 0, + stop = 62)) + } + test("UNCACHE TABLE") { comparePlans( parsePlan("UNCACHE TABLE a.b.c"), diff --git a/sql/core/benchmarks/TPCDSQueryBenchmark-jdk21-results.txt b/sql/core/benchmarks/TPCDSQueryBenchmark-jdk21-results.txt index 9572427ae890..5d70d2d5f602 100644 --- a/sql/core/benchmarks/TPCDSQueryBenchmark-jdk21-results.txt +++ b/sql/core/benchmarks/TPCDSQueryBenchmark-jdk21-results.txt @@ -1,810 +1,810 @@ OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q1 615 739 192 0.8 1332.4 1.0X +q1 665 736 120 0.7 1441.1 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q2 830 910 86 2.7 371.9 1.0X +q2 837 863 28 2.7 375.0 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q3 222 268 47 13.4 74.9 1.0X +q3 224 261 36 13.2 75.5 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q4 4254 4579 460 1.2 816.3 1.0X +q4 4163 4571 577 1.3 798.8 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q5 1052 1160 152 5.3 187.0 1.0X +q5 1111 1276 234 5.1 197.4 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q6 1017 1054 52 3.1 325.9 1.0X +q6 1010 1072 88 3.1 323.5 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q7 552 568 17 8.9 112.8 1.0X +q7 495 569 55 9.9 101.1 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q8 405 480 69 7.7 130.6 1.0X +q8 414 457 43 7.5 133.3 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q9 788 843 51 0.0 22510758.9 1.0X +q9 862 878 19 0.0 24614403.5 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q10 1855 1906 71 1.1 896.0 1.0X +q10 1778 1899 172 1.2 858.6 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q11 1600 1807 292 2.4 424.2 1.0X +q11 1564 1726 229 2.4 414.6 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q12 161 191 28 5.0 199.3 1.0X +q12 160 192 31 5.1 198.0 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q13 800 838 40 6.2 162.3 1.0X +q13 719 764 53 6.9 145.9 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q14a 5198 5251 75 1.0 1013.3 1.0X +q14a 4618 4802 261 1.1 900.2 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q14b 3625 3702 109 1.4 706.7 1.0X +q14b 3444 3603 225 1.5 671.4 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q15 396 417 28 4.2 238.1 1.0X +q15 423 443 35 3.9 254.4 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q16 685 699 17 2.3 438.1 1.0X +q16 582 623 29 2.7 372.2 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q17 1404 1410 8 3.3 298.9 1.0X +q17 1334 1361 38 3.5 283.8 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q18 1168 1173 7 3.1 324.3 1.0X +q18 972 1208 333 3.7 269.9 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q19 310 341 34 10.1 99.3 1.0X +q19 278 306 23 11.2 89.0 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q20 173 197 21 8.9 112.7 1.0X +q20 190 208 12 8.1 124.0 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q21 614 653 38 19.3 51.9 1.0X +q21 592 644 48 20.0 50.0 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q22 3243 3305 87 3.6 274.0 1.0X +q22 3287 3346 83 3.6 277.7 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q23a 5781 5829 68 0.9 1105.3 1.0X +q23a 5646 5698 73 0.9 1079.7 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q23b 5758 5799 57 0.9 1101.1 1.0X +q23b 5616 5815 281 0.9 1073.9 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q24a 163 244 41 20.5 48.7 1.0X +q24a 146 241 62 22.9 43.8 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q24b 211 243 26 15.8 63.2 1.0X +q24b 210 252 21 15.9 63.0 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q25 1293 1315 31 3.6 275.1 1.0X +q25 1287 1338 72 3.7 273.9 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q26 321 345 33 10.8 93.0 1.0X +q26 303 337 36 11.4 87.7 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q27 525 556 26 9.3 107.2 1.0X +q27 474 488 15 10.3 96.8 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q28 1147 1181 49 2.5 398.3 1.0X +q28 1197 1287 127 2.4 415.7 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q29 1376 1398 31 3.4 292.8 1.0X +q29 1543 1634 129 3.0 328.3 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q30 400 417 23 0.7 1356.2 1.0X +q30 388 417 32 0.8 1314.4 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q31 786 806 19 4.7 211.2 1.0X +q31 770 862 126 4.8 206.9 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q32 192 212 22 8.0 125.6 1.0X +q32 202 239 57 7.6 131.8 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q33 432 457 31 12.0 83.4 1.0X +q33 398 420 29 13.0 76.8 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q34 388 413 22 7.9 126.9 1.0X +q34 344 370 31 8.9 112.4 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q35 1216 1233 23 1.7 587.2 1.0X +q35 1187 1250 89 1.7 573.4 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q36 498 540 56 6.0 167.5 1.0X +q36 500 524 41 5.9 168.4 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q37 828 874 61 16.0 62.4 1.0X +q37 795 824 25 16.7 59.9 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q38 693 703 9 7.5 132.9 1.0X +q38 672 684 10 7.8 129.0 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q39a 1569 1607 53 7.5 132.6 1.0X +q39a 1386 1418 46 8.5 117.1 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q39b 1376 1397 30 8.6 116.3 1.0X +q39b 1337 1360 31 8.8 113.0 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q40 316 338 21 5.3 188.5 1.0X +q40 284 327 40 5.9 169.8 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q41 151 180 25 0.1 8393.2 1.0X +q41 140 162 25 0.1 7771.3 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q42 149 190 29 19.9 50.2 1.0X +q42 140 160 19 21.2 47.2 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q43 352 362 11 8.4 119.3 1.0X +q43 281 311 28 10.5 95.1 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q44 332 364 36 8.7 114.7 1.0X +q44 318 357 39 9.1 109.7 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q45 171 209 30 5.6 177.6 1.0X +q45 179 207 32 5.4 186.3 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q46 464 490 29 6.7 149.3 1.0X +q46 429 468 43 7.3 137.8 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q47 1389 1500 157 2.1 467.6 1.0X +q47 1504 1581 108 2.0 506.4 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q48 942 963 36 5.2 191.3 1.0X +q48 830 833 2 5.9 168.6 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q49 615 675 82 9.1 109.4 1.0X +q49 533 604 51 10.5 94.9 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q50 647 688 35 5.0 199.7 1.0X +q50 573 675 114 5.7 176.8 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q51 2486 2609 173 1.5 677.2 1.0X +q51 2570 2633 89 1.4 699.9 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q52 148 160 17 20.1 49.8 1.0X +q52 141 168 24 21.1 47.4 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q53 298 315 22 10.0 100.2 1.0X +q53 267 294 37 11.1 90.0 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q54 1182 1201 26 4.5 224.0 1.0X +q54 1155 1201 65 4.6 218.8 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q55 148 165 17 20.1 49.9 1.0X +q55 138 151 18 21.5 46.4 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q56 433 455 33 12.0 83.5 1.0X +q56 412 451 39 12.6 79.6 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q57 734 806 64 2.1 479.2 1.0X +q57 746 808 71 2.1 487.0 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q58 414 451 34 12.4 80.7 1.0X +q58 423 503 59 12.1 82.4 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q59 602 652 45 4.9 204.0 1.0X +q59 606 652 69 4.9 205.3 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q60 447 468 34 11.6 86.4 1.0X +q60 407 444 44 12.7 78.5 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q61 563 595 40 5.5 180.5 1.0X +q61 524 556 31 6.0 167.9 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q62 181 204 41 4.4 229.1 1.0X +q62 162 176 19 4.9 204.0 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q63 302 318 15 9.8 101.7 1.0X +q63 276 297 14 10.8 93.0 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q64 2180 2385 290 3.2 314.9 1.0X +q64 1927 2249 455 3.6 278.4 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q65 670 689 34 4.4 225.4 1.0X +q65 631 678 75 4.7 212.5 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q66 545 602 83 4.3 235.0 1.0X +q66 557 577 20 4.2 240.4 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q67 5345 5380 50 0.6 1799.1 1.0X +q67 5285 5314 41 0.6 1779.0 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q68 490 519 21 6.3 157.7 1.0X +q68 426 457 22 7.3 137.0 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q69 1544 1592 67 1.3 745.7 1.0X +q69 1590 1603 19 1.3 767.7 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q70 557 594 61 5.3 188.8 1.0X +q70 514 551 49 5.7 174.0 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q71 368 383 18 14.2 70.6 1.0X +q71 347 364 22 15.0 66.6 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q72 117675 118995 1867 0.1 7667.2 1.0X +q72 117138 117824 970 0.1 7632.1 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q73 340 390 51 9.0 111.2 1.0X +q73 319 349 47 9.6 104.3 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q74 1128 1523 558 3.3 299.1 1.0X +q74 1185 1476 412 3.2 314.2 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q75 1396 1584 266 4.0 247.8 1.0X +q75 1333 1472 196 4.2 236.6 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q76 303 335 28 16.9 59.1 1.0X +q76 274 301 28 18.7 53.4 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q77 645 745 116 8.7 114.9 1.0X +q77 495 791 257 11.3 88.1 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q78 1918 2082 233 2.9 341.6 1.0X +q78 1963 2198 332 2.9 349.6 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q79 452 480 37 6.8 147.8 1.0X +q79 398 420 42 7.7 130.0 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q80 1163 1190 38 4.9 206.0 1.0X +q80 1375 1384 13 4.1 243.5 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q81 324 380 40 1.1 883.9 1.0X +q81 283 371 51 1.3 770.7 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q82 1078 1095 24 13.6 73.3 1.0X +q82 1074 1093 28 13.7 72.9 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q83 278 307 29 2.1 468.0 1.0X +q83 231 268 29 2.6 388.9 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q84 676 689 15 3.5 285.7 1.0X +q84 649 710 72 3.6 274.3 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q85 2155 2447 412 1.3 760.4 1.0X +q85 2113 2151 54 1.3 745.3 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q86 176 191 22 4.6 217.4 1.0X +q86 175 191 22 4.6 215.8 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q87 658 716 54 7.9 126.2 1.0X +q87 626 677 73 8.3 120.1 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q88 1228 1381 216 2.4 413.0 1.0X +q88 1090 1264 246 2.7 366.6 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q89 313 347 30 9.5 105.5 1.0X +q89 294 323 21 10.1 99.1 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q90 128 156 34 6.4 157.2 1.0X +q90 118 140 21 6.9 144.7 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q91 300 335 34 7.6 130.9 1.0X +q91 284 312 30 8.1 123.6 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q92 138 158 20 5.9 170.5 1.0X +q92 123 150 19 6.6 151.3 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q93 435 458 31 7.3 137.3 1.0X +q93 420 444 29 7.5 132.7 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q94 319 347 27 2.6 379.1 1.0X +q94 305 327 25 2.8 362.6 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q95 5080 5217 194 0.2 6033.3 1.0X +q95 5032 5202 240 0.2 5976.6 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q96 172 187 16 17.2 58.0 1.0X +q96 154 167 21 19.3 51.7 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q97 1143 1185 59 3.8 260.3 1.0X +q97 1140 1192 74 3.9 259.5 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q98 268 293 22 11.1 90.2 1.0X +q98 249 280 17 11.9 83.8 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q99 256 283 31 5.9 168.9 1.0X +q99 237 252 24 6.4 156.4 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q5a-v2.7 1018 1191 244 5.5 180.9 1.0X +q5a-v2.7 1161 1178 25 4.8 206.3 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q6-v2.7 926 955 36 3.4 296.7 1.0X +q6-v2.7 922 943 18 3.4 295.4 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q10a-v2.7 1738 1807 97 1.2 839.2 1.0X +q10a-v2.7 1669 1693 33 1.2 806.1 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q11-v2.7 1520 1849 465 2.5 402.9 1.0X +q11-v2.7 1430 1874 628 2.6 379.3 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q12-v2.7 125 141 18 6.5 153.9 1.0X +q12-v2.7 121 134 16 6.7 148.9 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q14-v2.7 3433 3644 298 1.5 669.2 1.0X +q14-v2.7 3332 3509 250 1.5 649.6 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q14a-v2.7 6546 6590 63 0.8 1276.1 1.0X +q14a-v2.7 6515 6621 150 0.8 1270.0 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q18a-v2.7 1807 2087 396 2.0 501.6 1.0X +q18a-v2.7 1807 1987 254 2.0 501.8 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q20-v2.7 155 177 29 9.9 101.0 1.0X +q20-v2.7 155 172 21 9.9 101.0 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q22-v2.7 12613 12683 99 0.9 1065.6 1.0X +q22-v2.7 12795 12851 80 0.9 1081.0 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q22a-v2.7 1828 1887 83 6.5 154.4 1.0X +q22a-v2.7 1853 1888 49 6.4 156.6 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q24-v2.7 183 232 37 18.3 54.7 1.0X +q24-v2.7 212 254 35 15.7 63.5 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q27a-v2.7 1236 1352 165 4.0 252.6 1.0X +q27a-v2.7 1468 1708 339 3.3 300.1 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q34-v2.7 355 376 22 8.6 115.9 1.0X +q34-v2.7 336 363 27 9.1 109.8 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q35-v2.7 1197 1223 36 1.7 578.1 1.0X +q35-v2.7 1246 1246 1 1.7 601.7 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q35a-v2.7 1196 1211 22 1.7 577.4 1.0X +q35a-v2.7 1181 1215 49 1.8 570.2 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q36a-v2.7 494 516 27 6.0 166.3 1.0X +q36a-v2.7 493 519 34 6.0 166.1 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q47-v2.7 1520 1577 81 2.0 511.6 1.0X +q47-v2.7 1601 1748 208 1.9 538.8 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q49-v2.7 587 608 21 9.6 104.6 1.0X +q49-v2.7 527 600 64 10.6 93.9 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q51a-v2.7 14714 15053 479 0.2 4007.4 1.0X +q51a-v2.7 14749 15025 392 0.2 4016.7 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q57-v2.7 719 757 35 2.1 469.9 1.0X +q57-v2.7 726 786 68 2.1 474.1 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q64-v2.7 1897 2267 523 3.6 274.1 1.0X +q64-v2.7 2186 2485 423 3.2 315.8 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q67a-v2.7 6984 7135 214 0.4 2351.0 1.0X +q67a-v2.7 6651 6878 321 0.4 2238.8 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q70a-v2.7 608 651 32 4.9 205.8 1.0X +q70a-v2.7 644 653 17 4.6 218.1 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q72-v2.7 125113 125663 778 0.1 8151.7 1.0X +q72-v2.7 116111 120030 5542 0.1 7565.2 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q74-v2.7 1040 1121 113 3.6 275.8 1.0X +q74-v2.7 982 1213 327 3.8 260.4 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q75-v2.7 1412 1591 254 4.0 250.6 1.0X +q75-v2.7 1378 1396 26 4.1 244.5 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q77a-v2.7 720 893 261 7.8 128.3 1.0X +q77a-v2.7 652 1072 594 8.6 116.1 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q78-v2.7 2174 2483 437 2.6 387.1 1.0X +q78-v2.7 1914 2306 554 2.9 340.9 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q80a-v2.7 1894 1962 96 3.0 335.6 1.0X +q80a-v2.7 1689 1757 96 3.3 299.2 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q86a-v2.7 231 267 33 3.5 284.8 1.0X +q86a-v2.7 219 242 28 3.7 270.1 1.0X OpenJDK 64-Bit Server VM 21.0.1+12-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q98-v2.7 259 278 13 11.5 87.3 1.0X +q98-v2.7 244 275 53 12.2 82.1 1.0X diff --git a/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt b/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt index 89c17f00a690..d08e957b08f7 100644 --- a/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt +++ b/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt @@ -1,810 +1,810 @@ OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q1 606 759 154 0.8 1312.6 1.0X +q1 683 728 43 0.7 1479.7 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q2 819 897 106 2.7 367.0 1.0X +q2 824 861 50 2.7 369.4 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q3 230 257 18 12.9 77.5 1.0X +q3 240 261 28 12.4 80.8 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q4 4048 4415 518 1.3 776.8 1.0X +q4 4284 4464 254 1.2 822.0 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q5 1123 1267 204 5.0 199.5 1.0X +q5 1069 1319 354 5.3 189.9 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q6 1015 1040 35 3.1 325.3 1.0X +q6 1065 1069 6 2.9 341.3 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q7 549 590 33 8.9 112.3 1.0X +q7 537 566 35 9.1 109.7 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q8 441 476 27 7.0 142.0 1.0X +q8 439 466 35 7.1 141.5 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q9 850 886 37 0.0 24271484.6 1.0X +q9 901 909 12 0.0 25734582.1 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q10 1890 1962 101 1.1 912.9 1.0X +q10 2008 2064 80 1.0 969.7 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q11 1834 2038 289 2.1 486.3 1.0X +q11 1879 2099 311 2.0 498.2 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q12 178 234 33 4.5 220.3 1.0X +q12 198 242 24 4.1 244.6 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q13 913 952 51 5.4 185.1 1.0X +q13 841 845 4 5.9 170.6 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q14a 4486 4973 689 1.1 874.5 1.0X +q14a 5005 5202 279 1.0 975.6 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q14b 3773 3795 31 1.4 735.5 1.0X +q14b 3630 3731 143 1.4 707.7 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q15 410 438 27 4.1 246.8 1.0X +q15 370 401 33 4.5 222.7 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q16 606 649 53 2.6 388.0 1.0X +q16 668 721 54 2.3 427.3 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q17 1493 1633 198 3.1 317.8 1.0X +q17 1409 1421 17 3.3 299.8 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q18 1157 1363 293 3.1 321.1 1.0X +q18 1334 1356 31 2.7 370.4 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q19 381 410 23 8.2 122.0 1.0X +q19 343 378 26 9.1 110.0 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q20 189 237 38 8.1 123.8 1.0X +q20 177 195 13 8.7 115.4 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q21 808 815 11 14.7 68.3 1.0X +q21 674 696 19 17.6 57.0 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q22 3394 3532 195 3.5 286.8 1.0X +q22 3313 3411 139 3.6 279.9 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q23a 5925 6054 182 0.9 1132.9 1.0X +q23a 5657 5979 455 0.9 1081.8 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q23b 5862 5880 26 0.9 1120.8 1.0X +q23b 5413 5714 425 1.0 1035.1 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q24a 313 323 10 10.7 93.8 1.0X +q24a 183 265 53 18.2 54.9 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q24b 230 246 10 14.5 68.8 1.0X +q24b 220 300 48 15.2 65.9 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q25 1431 1444 19 3.3 304.5 1.0X +q25 1295 1360 91 3.6 275.6 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q26 409 443 40 8.4 118.6 1.0X +q26 386 425 29 8.9 111.8 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q27 600 632 29 8.1 122.7 1.0X +q27 578 615 44 8.5 118.1 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q28 1524 1726 285 1.9 529.3 1.0X +q28 1415 1530 162 2.0 491.4 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q29 1313 1379 93 3.6 279.5 1.0X +q29 1360 1456 136 3.5 289.4 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q30 452 522 80 0.7 1534.0 1.0X +q30 463 483 19 0.6 1571.9 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q31 1020 1237 307 3.6 274.0 1.0X +q31 1172 1345 245 3.2 314.9 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q32 262 291 38 5.8 171.0 1.0X +q32 262 298 33 5.8 171.3 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q33 524 543 33 9.9 101.2 1.0X +q33 516 535 23 10.0 99.6 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q34 386 423 49 7.9 126.1 1.0X +q34 371 415 51 8.2 121.3 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q35 1480 1491 16 1.4 714.6 1.0X +q35 1384 1397 17 1.5 668.6 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q36 551 571 15 5.4 185.5 1.0X +q36 546 574 20 5.4 183.7 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q37 956 1020 91 13.9 72.0 1.0X +q37 896 907 9 14.8 67.5 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q38 741 818 123 7.0 142.1 1.0X +q38 716 799 120 7.3 137.4 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q39a 1703 1882 254 7.0 143.9 1.0X +q39a 1572 1653 116 7.5 132.8 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q39b 1735 1739 5 6.8 146.6 1.0X +q39b 1444 1455 16 8.2 122.0 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q40 351 365 17 4.8 209.6 1.0X +q40 331 359 18 5.1 197.4 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q41 144 153 5 0.1 8009.1 1.0X +q41 173 193 22 0.1 9598.5 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q42 157 172 27 18.9 52.8 1.0X +q42 156 167 15 19.0 52.5 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q43 296 320 33 10.0 100.3 1.0X +q43 303 326 16 9.7 102.6 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q44 343 362 10 8.4 118.4 1.0X +q44 336 353 15 8.6 115.8 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q45 178 200 17 5.4 185.4 1.0X +q45 183 222 21 5.2 191.1 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q46 497 515 25 6.3 159.9 1.0X +q46 457 479 20 6.8 146.9 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q47 1397 1532 190 2.1 470.4 1.0X +q47 1392 1532 198 2.1 468.5 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q48 849 859 17 5.8 172.4 1.0X +q48 814 846 55 6.0 165.3 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q49 650 700 73 8.6 115.8 1.0X +q49 689 747 68 8.1 122.7 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q50 571 589 23 5.7 176.3 1.0X +q50 656 675 21 4.9 202.3 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q51 2581 2716 192 1.4 702.8 1.0X +q51 2469 2697 323 1.5 672.4 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q52 154 169 16 19.2 52.0 1.0X +q52 161 171 9 18.5 54.0 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q53 284 314 27 10.5 95.4 1.0X +q53 283 303 29 10.5 95.3 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q54 1204 1243 55 4.4 228.1 1.0X +q54 1124 1155 43 4.7 213.0 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q55 150 160 10 19.8 50.6 1.0X +q55 153 160 4 19.4 51.6 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q56 519 548 27 10.0 100.2 1.0X +q56 530 540 15 9.8 102.3 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q57 885 941 49 1.7 578.0 1.0X +q57 855 913 56 1.8 558.6 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q58 417 504 90 12.3 81.4 1.0X +q58 407 458 63 12.6 79.4 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q59 711 734 20 4.2 240.9 1.0X +q59 585 608 20 5.0 198.3 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q60 560 612 63 9.2 108.1 1.0X +q60 516 564 56 10.0 99.5 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q61 606 635 35 5.2 194.0 1.0X +q61 626 792 255 5.0 200.6 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q62 210 230 12 3.8 264.6 1.0X +q62 179 203 23 4.4 226.2 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q63 335 353 10 8.9 112.8 1.0X +q63 282 287 4 10.5 95.0 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q64 2461 2728 377 2.8 355.6 1.0X +q64 1961 2307 490 3.5 283.3 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q65 622 652 44 4.8 209.4 1.0X +q65 641 671 40 4.6 215.9 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q66 684 735 83 3.4 295.0 1.0X +q66 536 592 71 4.3 231.3 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q67 5144 5266 173 0.6 1731.5 1.0X +q67 5075 5164 126 0.6 1708.3 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q68 532 562 21 5.8 171.0 1.0X +q68 444 461 16 7.0 142.8 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q69 1504 1520 24 1.4 726.1 1.0X +q69 1539 1552 19 1.3 743.0 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q70 561 581 33 5.3 190.0 1.0X +q70 635 644 7 4.7 215.0 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q71 442 463 16 11.8 84.7 1.0X +q71 430 464 30 12.1 82.5 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q72 106207 106885 959 0.1 6919.9 1.0X +q72 89843 90115 386 0.2 5853.7 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q73 388 434 29 7.9 126.9 1.0X +q73 379 401 13 8.1 123.7 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q74 1335 1689 501 2.8 353.9 1.0X +q74 1295 1689 556 2.9 343.5 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q75 1844 1987 203 3.1 327.3 1.0X +q75 1495 1737 342 3.8 265.4 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q76 364 390 20 14.1 71.0 1.0X +q76 341 356 14 15.1 66.4 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q77 844 1018 247 6.7 150.2 1.0X +q77 571 719 220 9.8 101.7 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q78 2433 2573 198 2.3 433.2 1.0X +q78 2343 2571 323 2.4 417.2 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q79 491 509 17 6.2 160.6 1.0X +q79 448 463 10 6.8 146.5 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q80 1438 1606 238 3.9 254.7 1.0X +q80 1151 1298 208 4.9 203.9 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q81 335 392 30 1.1 912.2 1.0X +q81 360 384 26 1.0 980.5 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q82 1252 1256 5 11.8 85.1 1.0X +q82 1184 1189 8 12.4 80.4 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q83 312 365 44 1.9 524.2 1.0X +q83 316 333 14 1.9 532.0 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q84 731 749 24 3.2 308.7 1.0X +q84 728 752 24 3.3 307.6 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q85 2013 2500 689 1.4 710.1 1.0X +q85 2266 2327 87 1.3 799.3 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q86 204 224 9 4.0 252.1 1.0X +q86 198 228 27 4.1 244.0 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q87 750 812 82 7.0 143.8 1.0X +q87 725 781 95 7.2 139.2 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q88 1519 1745 320 2.0 510.7 1.0X +q88 1469 1648 254 2.0 494.0 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q89 376 406 42 7.9 126.5 1.0X +q89 311 362 58 9.6 104.7 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q90 163 195 26 5.0 200.7 1.0X +q90 131 155 23 6.2 160.6 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q91 355 386 19 6.5 154.7 1.0X +q91 344 372 16 6.7 150.1 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q92 170 184 13 4.8 209.3 1.0X +q92 159 177 16 5.1 195.9 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q93 468 497 22 6.8 147.7 1.0X +q93 435 444 6 7.3 137.4 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q94 400 411 10 2.1 474.9 1.0X +q94 362 386 15 2.3 430.2 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q95 5194 5518 458 0.2 6169.0 1.0X +q95 5196 5354 223 0.2 6170.9 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q96 197 223 33 15.1 66.1 1.0X +q96 174 195 18 17.0 58.7 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q97 1158 1189 44 3.8 263.5 1.0X +q97 1143 1185 59 3.8 260.3 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q98 304 323 17 9.8 102.4 1.0X +q98 309 319 13 9.6 103.9 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q99 307 321 16 4.9 203.2 1.0X +q99 305 317 9 5.0 201.4 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q5a-v2.7 1540 1659 168 3.7 273.6 1.0X +q5a-v2.7 1577 1701 176 3.6 280.2 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q6-v2.7 871 938 59 3.6 279.1 1.0X +q6-v2.7 967 975 13 3.2 309.9 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q10a-v2.7 1765 1801 52 1.2 852.3 1.0X +q10a-v2.7 1862 1879 24 1.1 899.2 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q11-v2.7 1688 1952 373 2.2 447.7 1.0X +q11-v2.7 1683 2002 450 2.2 446.3 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q12-v2.7 148 162 9 5.5 182.6 1.0X +q12-v2.7 149 169 12 5.4 183.7 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q14-v2.7 3968 4160 271 1.3 773.5 1.0X +q14-v2.7 3808 4056 351 1.3 742.4 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q14a-v2.7 7100 7568 661 0.7 1384.2 1.0X +q14a-v2.7 7227 7596 522 0.7 1408.9 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q18a-v2.7 2078 2201 174 1.7 576.8 1.0X +q18a-v2.7 1795 2003 294 2.0 498.3 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q20-v2.7 185 216 26 8.3 121.0 1.0X +q20-v2.7 158 172 18 9.7 103.3 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q22-v2.7 13435 13530 134 0.9 1135.1 1.0X +q22-v2.7 13015 13068 75 0.9 1099.6 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q22a-v2.7 2067 2164 137 5.7 174.6 1.0X +q22a-v2.7 1973 2107 190 6.0 166.7 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q24-v2.7 150 270 53 22.3 44.9 1.0X +q24-v2.7 134 255 90 24.9 40.1 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q27a-v2.7 1432 1557 177 3.4 292.8 1.0X +q27a-v2.7 1353 1469 164 3.6 276.5 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q34-v2.7 414 430 13 7.4 135.1 1.0X +q34-v2.7 374 408 37 8.2 122.3 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q35-v2.7 1369 1425 79 1.5 661.0 1.0X +q35-v2.7 1304 1325 29 1.6 629.7 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q35a-v2.7 1299 1331 45 1.6 627.3 1.0X +q35a-v2.7 1285 1308 32 1.6 620.7 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q36a-v2.7 562 569 9 5.3 189.3 1.0X +q36a-v2.7 533 550 18 5.6 179.5 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q47-v2.7 1493 1634 200 2.0 502.6 1.0X +q47-v2.7 1517 1633 164 2.0 510.5 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q49-v2.7 708 758 45 7.9 126.2 1.0X +q49-v2.7 617 681 91 9.1 109.9 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q51a-v2.7 13860 14365 714 0.3 3774.8 1.0X +q51a-v2.7 13941 14268 461 0.3 3796.9 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q57-v2.7 811 853 49 1.9 529.6 1.0X +q57-v2.7 804 875 64 1.9 525.4 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q64-v2.7 2446 2855 578 2.8 353.5 1.0X +q64-v2.7 2206 2515 437 3.1 318.7 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q67a-v2.7 6764 6978 303 0.4 2276.9 1.0X +q67a-v2.7 6648 6862 303 0.4 2237.7 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q70a-v2.7 726 736 13 4.1 245.8 1.0X +q70a-v2.7 682 711 28 4.3 231.1 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q72-v2.7 105036 105599 797 0.1 6843.6 1.0X +q72-v2.7 87842 89382 2178 0.2 5723.4 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q74-v2.7 1190 1477 407 3.2 315.4 1.0X +q74-v2.7 1164 1439 388 3.2 308.6 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q75-v2.7 1776 1919 203 3.2 315.2 1.0X +q75-v2.7 1418 1641 316 4.0 251.7 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q77a-v2.7 789 869 72 7.1 140.5 1.0X +q77a-v2.7 779 1091 442 7.2 138.7 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q78-v2.7 1966 2070 147 2.9 350.1 1.0X +q78-v2.7 2049 2377 463 2.7 364.9 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q80a-v2.7 1817 1998 255 3.1 321.9 1.0X +q80a-v2.7 1769 1869 142 3.2 313.4 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q86a-v2.7 259 295 25 3.1 319.3 1.0X +q86a-v2.7 289 307 22 2.8 356.6 1.0X OpenJDK 64-Bit Server VM 17.0.9+9-LTS on Linux 5.15.0-1053-azure AMD EPYC 7763 64-Core Processor -TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +TPCDS: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -q98-v2.7 288 310 20 10.3 97.0 1.0X +q98-v2.7 279 315 27 10.7 93.8 1.0X diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 31e1495db7e3..ff1bd8c73e6f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -138,7 +138,8 @@ private[sql] object Dataset { * the following creates a new Dataset by applying a filter on the existing one: * {{{ * val names = people.map(_.name) // in Scala; names is a Dataset[String] - * Dataset names = people.map((Person p) -> p.name, Encoders.STRING)); + * Dataset names = people.map( + * (MapFunction) p -> p.name, Encoders.STRING()); // Java * }}} * * Dataset operations can also be untyped, through various domain-specific-language (DSL) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala index 2766bbaa8880..3942d193a328 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala @@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, Complete} import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, StaticInvoke} import org.apache.spark.sql.connector.catalog.functions.ScalarFunction -import org.apache.spark.sql.connector.expressions.{Cast => V2Cast, Expression => V2Expression, Extract => V2Extract, FieldReference, GeneralScalarExpression, LiteralValue, UserDefinedScalarFunc} +import org.apache.spark.sql.connector.expressions.{Cast => V2Cast, Expression => V2Expression, Extract => V2Extract, FieldReference, GeneralScalarExpression, LiteralValue, NullOrdering, SortDirection, SortValue, UserDefinedScalarFunc} import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Avg, Count, CountStar, GeneralAggregateFunc, Max, Min, Sum, UserDefinedAggregateFunc} import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse, AlwaysTrue, And => V2And, Not => V2Not, Or => V2Or, Predicate => V2Predicate} import org.apache.spark.sql.execution.datasources.PushableExpression @@ -347,8 +347,16 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) { Some(new GeneralAggregateFunc("REGR_SXY", isDistinct, Array(left, right))) // Translate Mode if it is deterministic or reverse is defined. case aggregate.Mode(PushableExpression(expr), _, _, Some(reverse)) => - Some(new GeneralAggregateFunc("MODE", isDistinct, - Array(expr, LiteralValue(reverse, BooleanType)))) + Some(new GeneralAggregateFunc( + "MODE", isDistinct, Array.empty, Array(generateSortValue(expr, !reverse)))) + case aggregate.Percentile( + PushableExpression(left), PushableExpression(right), LongLiteral(1L), _, _, reverse) => + Some(new GeneralAggregateFunc("PERCENTILE_CONT", isDistinct, + Array(right), Array(generateSortValue(left, reverse)))) + case aggregate.PercentileDisc( + PushableExpression(left), PushableExpression(right), reverse, _, _, _) => + Some(new GeneralAggregateFunc("PERCENTILE_DISC", isDistinct, + Array(right), Array(generateSortValue(left, reverse)))) // TODO supports other aggregate functions case aggregate.V2Aggregator(aggrFunc, children, _, _) => val translatedExprs = children.flatMap(PushableExpression.unapply(_)) @@ -380,6 +388,12 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) { None } } + + private def generateSortValue(expr: V2Expression, reverse: Boolean): SortValue = if (reverse) { + SortValue(expr, SortDirection.DESCENDING, NullOrdering.NULLS_LAST) + } else { + SortValue(expr, SortDirection.ASCENDING, NullOrdering.NULLS_FIRST) + } } object ColumnOrField { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala index 1972aeb38265..278c1fc3f73b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala @@ -77,9 +77,11 @@ object AggUtils { child: SparkPlan): SparkPlan = { val useHash = Aggregate.supportsHashAggregate( aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes)) + + val forceObjHashAggregate = forceApplyObjectHashAggregate(child.conf) val forceSortAggregate = forceApplySortAggregate(child.conf) - if (useHash && !forceSortAggregate) { + if (useHash && !forceSortAggregate && !forceObjHashAggregate) { HashAggregateExec( requiredChildDistributionExpressions = requiredChildDistributionExpressions, isStreaming = isStreaming, @@ -94,7 +96,7 @@ object AggUtils { val objectHashEnabled = child.conf.useObjectHashAggregation val useObjectHash = Aggregate.supportsObjectHashAggregate(aggregateExpressions) - if (objectHashEnabled && useObjectHash && !forceSortAggregate) { + if (forceObjHashAggregate || (objectHashEnabled && useObjectHash && !forceSortAggregate)) { ObjectHashAggregateExec( requiredChildDistributionExpressions = requiredChildDistributionExpressions, isStreaming = isStreaming, @@ -589,4 +591,13 @@ object AggUtils { Utils.isTesting && conf.getConfString("spark.sql.test.forceApplySortAggregate", "false") == "true" } + + /** + * Returns whether a object hash aggregate should be force applied. + * The config key is hard-coded because it's testing only and should not be exposed. + */ + private def forceApplyObjectHashAggregate(conf: SQLConf): Boolean = { + Utils.isTesting && + conf.getConfString("spark.sql.test.forceApplyObjectHashAggregate", "false") == "true" + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala index c33820ed85e5..8a88ad0a57e3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala @@ -174,6 +174,7 @@ abstract class HashMapGenerator( """ } case StringType => hashBytes(s"$input.getBytes()") + case CalendarIntervalType => hashInt(s"$input.hashCode()") } } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala index 9552e2c81bb1..f5382d040f28 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.streaming.state import java.io._ +import scala.util.control.NonFatal + import org.apache.hadoop.conf.Configuration import org.apache.spark.{SparkConf, SparkEnv} @@ -233,7 +235,15 @@ private[sql] class RocksDBStateStoreProvider } override def doMaintenance(): Unit = { - rocksDB.doMaintenance() + try { + rocksDB.doMaintenance() + } catch { + // SPARK-46547 - Swallow non-fatal exception in maintenance task to avoid deadlock between + // maintenance thread and streaming aggregation operator + case NonFatal(ex) => + logWarning(s"Ignoring error while performing maintenance operations with exception=", + ex) + } } override def close(): Unit = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala index fd20e495b10f..ae3a3addf7bf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala @@ -43,7 +43,7 @@ private[sql] object H2Dialect extends JdbcDialect { private val distinctUnsupportedAggregateFunctions = Set("COVAR_POP", "COVAR_SAMP", "CORR", "REGR_INTERCEPT", "REGR_R2", "REGR_SLOPE", "REGR_SXY", - "MODE") + "MODE", "PERCENTILE_CONT", "PERCENTILE_DISC") private val supportedAggregateFunctions = Set("MAX", "MIN", "SUM", "COUNT", "AVG", "VAR_POP", "VAR_SAMP", "STDDEV_POP", "STDDEV_SAMP") ++ distinctUnsupportedAggregateFunctions @@ -271,18 +271,7 @@ private[sql] object H2Dialect extends JdbcDialect { throw new UnsupportedOperationException(s"${this.getClass.getSimpleName} does not " + s"support aggregate function: $funcName with DISTINCT") } else { - funcName match { - case "MODE" => - // Support Mode only if it is deterministic or reverse is defined. - assert(inputs.length == 2) - if (inputs.last == "true") { - s"MODE() WITHIN GROUP (ORDER BY ${inputs.head})" - } else { - s"MODE() WITHIN GROUP (ORDER BY ${inputs.head} DESC)" - } - case _ => - super.visitAggregateFunction(funcName, isDistinct, inputs) - } + super.visitAggregateFunction(funcName, isDistinct, inputs) } override def visitExtract(field: String, source: String): String = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index 888ef4a20be3..bee870fcf7b7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -336,7 +336,22 @@ abstract class JdbcDialect extends Serializable with Logging { super.visitAggregateFunction(dialectFunctionName(funcName), isDistinct, inputs) } else { throw new UnsupportedOperationException( - s"${this.getClass.getSimpleName} does not support aggregate function: $funcName"); + s"${this.getClass.getSimpleName} does not support aggregate function: $funcName") + } + } + + override def visitInverseDistributionFunction( + funcName: String, + isDistinct: Boolean, + inputs: Array[String], + orderingWithinGroups: Array[String]): String = { + if (isSupportedFunction(funcName)) { + super.visitInverseDistributionFunction( + dialectFunctionName(funcName), isDistinct, inputs, orderingWithinGroups) + } else { + throw new UnsupportedOperationException( + s"${this.getClass.getSimpleName} does not support " + + s"inverse distribution function: $funcName") } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala index 3691d76d2512..0ab8926c016f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala @@ -37,6 +37,7 @@ import org.apache.spark.sql.test.SQLTestData.DecimalData import org.apache.spark.sql.types._ import org.apache.spark.sql.types.DayTimeIntervalType.{DAY, HOUR, MINUTE, SECOND} import org.apache.spark.sql.types.YearMonthIntervalType.{MONTH, YEAR} +import org.apache.spark.unsafe.types.CalendarInterval case class Fact(date: Int, hour: Int, minute: Int, room_name: String, temp: Double) @@ -2125,6 +2126,37 @@ class DataFrameAggregateSuite extends QueryTest Seq(Row(1)) ) } + + test("SPARK-46536 Support GROUP BY CalendarIntervalType") { + val numRows = 50 + val configurations = Seq( + Seq.empty[(String, String)], // hash aggregate is used by default + Seq(SQLConf.CODEGEN_FACTORY_MODE.key -> "NO_CODEGEN", + "spark.sql.TungstenAggregate.testFallbackStartsAt" -> "1, 10"), + Seq("spark.sql.test.forceApplyObjectHashAggregate" -> "true"), + Seq( + "spark.sql.test.forceApplyObjectHashAggregate" -> "true", + SQLConf.OBJECT_AGG_SORT_BASED_FALLBACK_THRESHOLD.key -> "1"), + Seq("spark.sql.test.forceApplySortAggregate" -> "true") + ) + + val dfSame = (0 until numRows) + .map(_ => Tuple1(new CalendarInterval(1, 2, 3))) + .toDF("c0") + + val dfDifferent = (0 until numRows) + .map(i => Tuple1(new CalendarInterval(i, i, i))) + .toDF("c0") + + for (conf <- configurations) { + withSQLConf(conf: _*) { + assert(createAggregate(dfSame).count() == 1) + assert(createAggregate(dfDifferent).count() == numRows) + } + } + + def createAggregate(df: DataFrame): DataFrame = df.groupBy("c0").agg(count("*")) + } } case class B(c: Option[Double]) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TableOptionsConstantFoldingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TableOptionsConstantFoldingSuite.scala index b86d5b9b80c4..2e56327a6313 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TableOptionsConstantFoldingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TableOptionsConstantFoldingSuite.scala @@ -44,7 +44,9 @@ class TableOptionsConstantFoldingSuite extends QueryTest with SharedSparkSession checkOption("null", null) checkOption("cast('11 23:4:0' as interval day to second)", "INTERVAL '11 23:04:00' DAY TO SECOND") - checkOption("date_diff(current_date(), current_date())", "0") + withSQLConf(SQLConf.LEGACY_EVAL_CURRENT_TIME.key -> "true") { + checkOption("date_diff(current_date(), current_date())", "0") + } checkOption("date_sub(date'2022-02-02', 1)", "2022-02-01") checkOption("timestampadd(microsecond, 5, timestamp'2022-02-28 00:00:00')", "2022-02-28 00:00:00.000005") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala index 721997d84e1a..c76ad0434d4f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala @@ -29,7 +29,6 @@ import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias import org.apache.spark.sql.catalyst.util._ import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_SECOND import org.apache.spark.sql.execution.datasources.LogicalRelation -import org.apache.spark.sql.execution.datasources.parquet.ParquetCompressionCodec import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.StructType @@ -52,7 +51,6 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark with Logging { val conf = new SparkConf() .setMaster(System.getProperty("spark.sql.test.master", "local[1]")) .setAppName("test-sql-context") - .set("spark.sql.parquet.compression.codec", ParquetCompressionCodec.SNAPPY.lowerCaseName()) .set("spark.sql.shuffle.partitions", System.getProperty("spark.sql.shuffle.partitions", "4")) .set("spark.driver.memory", "3g") .set("spark.executor.memory", "3g") @@ -74,7 +72,8 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark with Logging { tables.map { tableName => spark.sql(s"DROP TABLE IF EXISTS $tableName") val options = Map("path" -> s"$dataLocation/$tableName") - spark.catalog.createTable(tableName, "parquet", tableColumns(tableName), options) + val format = spark.conf.get("spark.sql.sources.default") + spark.catalog.createTable(tableName, format, tableColumns(tableName), options) // Recover partitions but don't fail if a table is not partitioned. Try { spark.sql(s"ALTER TABLE $tableName RECOVER PARTITIONS") @@ -107,7 +106,7 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark with Logging { case _ => } val numRows = queryRelations.map(tableSizes.getOrElse(_, 0L)).sum - val benchmark = new Benchmark(s"TPCDS Snappy", numRows, 2, output = output) + val benchmark = new Benchmark("TPCDS", numRows, 2, output = output) benchmark.addCase(s"$name$nameSuffix") { _ => spark.sql(queryString).noop() } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala index 7d666729bb43..3f3776bab8fa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala @@ -508,7 +508,7 @@ abstract class OrcQueryTest extends OrcTest { conf.setBoolean("hive.io.file.read.all.columns", false) val orcRecordReader = { - val file = new File(path).listFiles().find(_.getName.endsWith(".snappy.orc")).head + val file = new File(path).listFiles().find(_.getName.endsWith(".orc")).head val split = new FileSplit(new Path(file.toURI), 0, file.length, Array.empty[String]) val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0) val hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala index 1e98099361df..6166773fb094 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala @@ -332,8 +332,9 @@ abstract class OrcSuite test("SPARK-21839: Add SQL config for ORC compression") { val conf = spark.sessionState.conf - // Test if the default of spark.sql.orc.compression.codec is snappy - assert(new OrcOptions(Map.empty[String, String], conf).compressionCodec == SNAPPY.name()) + // Test if the default of spark.sql.orc.compression.codec is used. + assert(new OrcOptions(Map.empty[String, String], conf).compressionCodec == + SQLConf.ORC_COMPRESSION.defaultValueString.toUpperCase(Locale.ROOT)) // OrcOptions's parameters have a higher priority than SQL configuration. // `compression` -> `orc.compression` -> `spark.sql.orc.compression.codec` diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala index 1f56b51de3dd..7b8357e20774 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala @@ -226,9 +226,9 @@ class ParquetTypeWideningSuite Seq((7, 4) -> (5, 2), (10, 7) -> (5, 2), (20, 17) -> (5, 2), (12, 4) -> (10, 2), (20, 17) -> (10, 2), (22, 4) -> (20, 2)) ++ // Increasing precision and decreasing scale. - Seq((5, 4) -> (7, 2), (10, 6) -> (12, 4), (20, 7) -> (22, 5)) ++ + Seq((10, 6) -> (12, 4), (20, 7) -> (22, 5)) ++ // Decreasing precision and increasing scale. - Seq((7, 2) -> (5, 4), (12, 4) -> (10, 6), (22, 5) -> (20, 7)) ++ + Seq((12, 4) -> (10, 6), (22, 5) -> (20, 7)) ++ // Increasing precision by a smaller amount than scale. Seq((5, 2) -> (6, 4), (10, 4) -> (12, 7), (20, 5) -> (22, 8)) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala index 38734e001367..b3d10d2115f0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala @@ -16,12 +16,13 @@ */ package org.apache.spark.sql.execution.datasources.xml -import java.io.EOFException +import java.io.{EOFException, File} import java.nio.charset.{StandardCharsets, UnsupportedCharsetException} import java.nio.file.{Files, Path, Paths} import java.sql.{Date, Timestamp} import java.time.{Instant, LocalDateTime} import java.util.TimeZone +import javax.xml.stream.XMLStreamException import scala.collection.immutable.ArraySeq import scala.collection.mutable @@ -761,7 +762,7 @@ class XmlSuite .collect() assert(results(0) === Row("alice", "35")) - assert(results(1) === Row("bob", " ")) + assert(results(1) === Row("bob", "")) assert(results(2) === Row("coc", "24")) } @@ -847,7 +848,7 @@ class XmlSuite assert(result(0) === Row(Row(null))) assert(result(1) === Row(Row(Row(null, null)))) assert(result(2) === Row(Row(Row("E", null)))) - assert(result(3) === Row(Row(Row("E", " ")))) + assert(result(3) === Row(Row(Row("E", "")))) assert(result(4) === Row(Row(Row("E", "")))) } @@ -1177,8 +1178,8 @@ class XmlSuite .option("inferSchema", true) .xml(getTestResourcePath(resDir + "mixed_children.xml")) val mixedRow = mixedDF.head() - assert(mixedRow.getAs[Row](0) === Row(List(" issue ", " text ignored "), " lorem ")) - assert(mixedRow.getString(1) === " ipsum ") + assert(mixedRow.getAs[Row](0) === Row(List("issue", "text ignored"), "lorem")) + assert(mixedRow.getString(1) === "ipsum") } test("test mixed text and complex element children") { @@ -1186,9 +1187,9 @@ class XmlSuite .option("rowTag", "root") .option("inferSchema", true) .xml(getTestResourcePath(resDir + "mixed_children_2.xml")) - assert(mixedDF.select("foo.bar").head().getString(0) === " lorem ") + assert(mixedDF.select("foo.bar").head().getString(0) === "lorem") assert(mixedDF.select("foo.baz.bing").head().getLong(0) === 2) - assert(mixedDF.select("missing").head().getString(0) === " ipsum ") + assert(mixedDF.select("missing").head().getString(0) === "ipsum") } test("test XSD validation") { @@ -1752,7 +1753,7 @@ class XmlSuite assert(result(1).getAs[String]("_attr") == "attr1" && result(1).getAs[String]("_VALUE") == "value2") // comments aren't included in valueTag - assert(result(2).getAs[String]("_VALUE") == "\n value3\n ") + assert(result(2).getAs[String]("_VALUE") == "value3") } } @@ -2828,4 +2829,55 @@ class XmlSuite } } } + + test("XML Validate Name") { + val data = Seq(Row("Random String")) + + def checkValidation(fieldName: String, + errorMsg: String, + validateName: Boolean = true): Unit = { + val schema = StructType(Seq(StructField(fieldName, StringType))) + val df = spark.createDataFrame(data.asJava, schema) + + withTempDir { dir => + val path = dir.getCanonicalPath + validateName match { + case false => + df.write + .option("rowTag", "ROW") + .option("validateName", false) + .option("declaration", "") + .option("indent", "") + .mode(SaveMode.Overwrite) + .xml(path) + // read file back and check its content + val xmlFile = new File(path).listFiles() + .filter(_.isFile) + .filter(_.getName.endsWith("xml")).head + val actualContent = Files.readString(xmlFile.toPath).replaceAll("\\n", "") + assert(actualContent === + s"<${XmlOptions.DEFAULT_ROOT_TAG}>" + + s"<$fieldName>${data.head.getString(0)}" + + s"") + + case true => + val e = intercept[SparkException] { + df.write + .option("rowTag", "ROW") + .mode(SaveMode.Overwrite) + .xml(path) + } + + assert(e.getCause.getCause.isInstanceOf[XMLStreamException]) + assert(e.getMessage.contains(errorMsg)) + } + } + } + + checkValidation("", "Illegal to pass empty name") + checkValidation(" ", "Illegal first name character ' '") + checkValidation("1field", "Illegal first name character '1'") + checkValidation("field name with space", "Illegal name character ' '") + checkValidation("field", "", false) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index 0a66680edd63..05b3787d0ff2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -2435,7 +2435,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel checkAggregateRemoved(df1) checkPushedInfo(df1, """ - |PushedAggregates: [MODE(SALARY, true)], + |PushedAggregates: [MODE() WITHIN GROUP (ORDER BY SALARY ASC NULLS FIRST)], |PushedFilters: [DEPT IS NOT NULL, DEPT > 0], |PushedGroupByExpressions: [DEPT], |""".stripMargin.replaceAll("\n", " ")) @@ -2465,7 +2465,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel checkAggregateRemoved(df3) checkPushedInfo(df3, """ - |PushedAggregates: [MODE(SALARY, true)], + |PushedAggregates: [MODE() WITHIN GROUP (ORDER BY SALARY ASC NULLS FIRST)], |PushedFilters: [DEPT IS NOT NULL, DEPT > 0], |PushedGroupByExpressions: [DEPT], |""".stripMargin.replaceAll("\n", " ")) @@ -2481,13 +2481,69 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel checkAggregateRemoved(df4) checkPushedInfo(df4, """ - |PushedAggregates: [MODE(SALARY, false)], + |PushedAggregates: [MODE() WITHIN GROUP (ORDER BY SALARY DESC NULLS LAST)], |PushedFilters: [DEPT IS NOT NULL, DEPT > 0], |PushedGroupByExpressions: [DEPT], |""".stripMargin.replaceAll("\n", " ")) checkAnswer(df4, Seq(Row(1, 10000.00), Row(2, 12000.00), Row(6, 12000.00))) } + test("scan with aggregate push-down: PERCENTILE & PERCENTILE_DISC with filter and group by") { + val df1 = sql( + """ + |SELECT + | dept, + | PERCENTILE(salary, 0.5) + |FROM h2.test.employee WHERE dept > 0 GROUP BY DePt""".stripMargin) + checkFiltersRemoved(df1) + checkAggregateRemoved(df1) + checkPushedInfo(df1, + """ + |PushedAggregates: [PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY SALARY ASC NULLS FIRST)], + |PushedFilters: [DEPT IS NOT NULL, DEPT > 0], + |PushedGroupByExpressions: [DEPT], + |""".stripMargin.replaceAll("\n", " ")) + checkAnswer(df1, Seq(Row(1, 9500.00), Row(2, 11000.00), Row(6, 12000.00))) + + val df2 = sql( + """ + |SELECT + | dept, + | PERCENTILE_CONT(0.3) WITHIN GROUP (ORDER BY SALARY), + | PERCENTILE_CONT(0.3) WITHIN GROUP (ORDER BY SALARY DESC) + |FROM h2.test.employee WHERE dept > 0 GROUP BY DePt""".stripMargin) + checkFiltersRemoved(df2) + checkAggregateRemoved(df2) + checkPushedInfo(df2, + """ + |PushedAggregates: [PERCENTILE_CONT(0.3) WITHIN GROUP (ORDER BY SALARY ASC NULLS FIRST), + |PERCENTILE_CONT(0.3) WITHIN GROUP (ORDER BY SALARY DESC NULLS LAST)], + |PushedFilters: [DEPT IS NOT NULL, DEPT > 0], + |PushedGroupByExpressions: [DEPT], + |""".stripMargin.replaceAll("\n", " ")) + checkAnswer(df2, + Seq(Row(1, 9300.0, 9700.0), Row(2, 10600.0, 11400.0), Row(6, 12000.0, 12000.0))) + + val df3 = sql( + """ + |SELECT + | dept, + | PERCENTILE_DISC(0.3) WITHIN GROUP (ORDER BY SALARY), + | PERCENTILE_DISC(0.3) WITHIN GROUP (ORDER BY SALARY DESC) + |FROM h2.test.employee WHERE dept > 0 GROUP BY DePt""".stripMargin) + checkFiltersRemoved(df3) + checkAggregateRemoved(df3) + checkPushedInfo(df3, + """ + |PushedAggregates: [PERCENTILE_DISC(0.3) WITHIN GROUP (ORDER BY SALARY ASC NULLS FIRST), + |PERCENTILE_DISC(0.3) WITHIN GROUP (ORDER BY SALARY DESC NULLS LAST)], + |PushedFilters: [DEPT IS NOT NULL, DEPT > 0], + |PushedGroupByExpressions: [DEPT], + |""".stripMargin.replaceAll("\n", " ")) + checkAnswer(df3, + Seq(Row(1, 9000.0, 10000.0), Row(2, 10000.0, 12000.0), Row(6, 12000.0, 12000.0))) + } + test("scan with aggregate push-down: aggregate over alias push down") { val cols = Seq("a", "b", "c", "d", "e") val df1 = sql("SELECT * FROM h2.test.employee").toDF(cols: _*) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala index aa2f110ceac2..071035853b60 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala @@ -107,16 +107,6 @@ class OrcHadoopFsRelationSuite extends HadoopFsRelationTest { checkAnswer(df, copyDf) } } - - test("Default compression codec is snappy for ORC compression") { - withTempPath { file => - spark.range(0, 10).write - .orc(file.getCanonicalPath) - val expectedCompressionKind = - OrcFileOperator.getFileReader(file.getCanonicalPath).get.getCompression - assert(OrcCompressionCodec.SNAPPY.name() === expectedCompressionKind.name()) - } - } } class HiveOrcHadoopFsRelationSuite extends OrcHadoopFsRelationSuite {