Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
79f77f7
[SPARK-46626][DOCS] Bump jekyll to 4.3.3 to enable support for Ruby 3…
nchammas Jan 9, 2024
a4c9507
[MINOR][CONNECT] Fix typo in error handling opType name
grundprinzip Jan 9, 2024
d008f81
[SPARK-46628][INFRA] Use SPDX short identifier in `license` name
dongjoon-hyun Jan 9, 2024
e7536f2
[SPARK-46610][SQL] Create table should throw exception when no value …
amaliujia Jan 9, 2024
7f056d8
[SPARK-46382][SQL] XML: Default ignoreSurroundingSpaces to true
shujingyang-db Jan 9, 2024
03fc5e2
[SPARK-46600][SQL] Move shared code between SqlConf and SqlApiConf to…
amaliujia Jan 9, 2024
f4c8dd6
Revert "[SPARK-46474][INFRA] Upgrade upload-artifact action to v4"
HyukjinKwon Jan 9, 2024
ee2a87b
[SPARK-40876][SQL][TESTS][FOLLOW-UP] Remove invalid decimal test case…
HyukjinKwon Jan 9, 2024
8fa794b
[SPARK-46627][SS][UI] Fix timeline tooltip content on streaming ui
yaooqinn Jan 9, 2024
ee1fd92
[SPARK-46331][SQL] Removing CodegenFallback from subset of DateTime e…
dbatomic Jan 9, 2024
ef7846a
[SPARK-46622][CORE] Override `toString` method for `o.a.s.network.shu…
LuciferYang Jan 9, 2024
56dd1f7
[SPARK-46634][SQL] literal validation should not drill down to null f…
cloud-fan Jan 9, 2024
f765022
[SPARK-46630][SQL] XML: Validate XML element name on write
sandip-db Jan 10, 2024
71468eb
[SPARK-46637][DOCS] Enhancing the Visual Appeal of Spark doc website
gengliangwang Jan 10, 2024
2cf07f9
[SPARK-46593][PS][TESTS] Refactor `data_type_ops` tests again
zhengruifeng Jan 10, 2024
d203328
[MINOR][PYTHON][TESTS] Retry `test_map_in_pandas_with_column_vector`
zhengruifeng Jan 10, 2024
6679419
[SPARK-46437][DOCS] Add custom tags for conditional Jekyll includes
nchammas Jan 10, 2024
3a6b9ad
[SPARK-46633][SQL] Fix Avro reader to handle zero-length blocks
sadikovi Jan 10, 2024
eb11190
[MINOR][INFRA] Ensure that docs build successfully with SKIP_API=1
nchammas Jan 10, 2024
0a66be8
[SPARK-37039][PS] Fix `Series.astype` to work properly with missing v…
itholic Jan 10, 2024
d613772
[MINOR][DOCS] Add license header at docs/_plugins
HyukjinKwon Jan 10, 2024
48d22e9
[SPARK-46643][SQL][TESTS] Fix ORC tests to be independent from defaul…
dongjoon-hyun Jan 10, 2024
686f428
[SPARK-46541][SQL][CONNECT] Fix the ambiguous column reference in sel…
zhengruifeng Jan 10, 2024
24cb611
[SPARK-46646][SQL][TESTS] Improve `TPCDSQueryBenchmark` to support ot…
dongjoon-hyun Jan 10, 2024
0791e9f
[SPARK-46536][SQL] Support GROUP BY calendar_interval_type
stefankandic Jan 10, 2024
4957c1a
[SPARK-46645][INFRA] Exclude unittest-xml-reporting in Python 3.12 image
HyukjinKwon Jan 10, 2024
f526bea
[SPARK-46649][PYTHON][INFRA] Run PyPy 3 and Python 3.10 tests indepen…
HyukjinKwon Jan 10, 2024
d2f5724
[MINOR][DOCS] Correct the usage example of Dataset in Java
Jan 10, 2024
85b504d
[SPARK-46442][SQL] DS V2 supports push down PERCENTILE_CONT and PERCE…
beliefer Jan 10, 2024
a3991b1
[SPARK-46648][SQL] Use `zstd` as the default ORC compression
dongjoon-hyun Jan 10, 2024
6375817
[SPARK-46651][PS][TESTS] Split `FrameTakeTests`
zhengruifeng Jan 10, 2024
fcdfc8c
[SPARK-46437][FOLLOWUP] Update configuration.md to use include_api_gen
nchammas Jan 10, 2024
bda9957
[SPARK-46635][PYTHON][DOCS] Refine docstring of `from_csv/schema_of_c…
LuciferYang Jan 10, 2024
a73ff66
[SPARK-46652][SQL][TESTS] Remove `Snappy` from `TPCDSQueryBenchmark` …
dongjoon-hyun Jan 10, 2024
f7b0b45
[SPARK-46547][SS] Swallow non-fatal exception in maintenance task to …
anishshri-db Jan 10, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ jobs:
echo "Preparing the benchmark results:"
tar -cvf benchmark-results-${{ github.event.inputs.jdk }}-${{ github.event.inputs.scala }}.tar `git diff --name-only` `git ls-files --others --exclude=tpcds-sf-1 --exclude-standard`
- name: Upload benchmark results
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: benchmark-results-${{ github.event.inputs.jdk }}-${{ github.event.inputs.scala }}-${{ matrix.split }}
path: benchmark-results-${{ github.event.inputs.jdk }}-${{ github.event.inputs.scala }}.tar
Expand Down
25 changes: 14 additions & 11 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -272,13 +272,13 @@ jobs:
./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: ${{ !success() }}
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
path: "**/target/unit-tests.log"
Expand Down Expand Up @@ -468,13 +468,13 @@ jobs:
name: PySpark
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: test-results-${{ matrix.modules }}--${{ matrix.java }}-${{ inputs.hadoop }}-hive2.3
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: ${{ !success() }}
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-${{ matrix.modules }}--${{ matrix.java }}-${{ inputs.hadoop }}-hive2.3
path: "**/target/unit-tests.log"
Expand Down Expand Up @@ -553,7 +553,7 @@ jobs:
./dev/run-tests --parallelism 1 --modules sparkr
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: test-results-sparkr--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3
path: "**/target/test-reports/*.xml"
Expand Down Expand Up @@ -761,6 +761,9 @@ jobs:
run: ./dev/lint-r
- name: Run documentation build
run: |
# Build docs first with SKIP_API to ensure they are buildable without requiring any
# language docs to be built beforehand.
cd docs; SKIP_API=1 bundle exec jekyll build; cd ..
if [ -f "./dev/is-changed.py" ]; then
# Skip PySpark and SparkR docs while keeping Scala/Java/SQL docs
pyspark_modules=`cd dev && python3.9 -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"`
Expand All @@ -774,7 +777,7 @@ jobs:
run: tar cjf site.tar.bz2 docs/_site
- name: Upload documentation
if: github.repository != 'apache/spark'
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: site
path: site.tar.bz2
Expand Down Expand Up @@ -927,13 +930,13 @@ jobs:
spark.sql.join.forceApplyShuffledHashJoin=true
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: test-results-tpcds--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: ${{ !success() }}
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-tpcds--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3
path: "**/target/unit-tests.log"
Expand Down Expand Up @@ -996,13 +999,13 @@ jobs:
./dev/run-tests --parallelism 1 --modules docker-integration-tests --included-tags org.apache.spark.tags.DockerTest
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: test-results-docker-integration--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: ${{ !success() }}
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-docker-integration--${{ inputs.java }}-${{ inputs.hadoop }}-hive2.3
path: "**/target/unit-tests.log"
Expand Down Expand Up @@ -1077,7 +1080,7 @@ jobs:
build/sbt -Phadoop-3 -Psparkr -Pkubernetes -Pvolcano -Pkubernetes-integration-tests -Dspark.kubernetes.test.driverRequestCores=0.5 -Dspark.kubernetes.test.executorRequestCores=0.2 -Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local "kubernetes-integration-tests/test"
- name: Upload Spark on K8S integration tests log files
if: ${{ !success() }}
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: spark-on-kubernetes-it-log
path: "**/target/integration-tests.log"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build_python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
run-build:
strategy:
matrix:
pyversion: ["pypy3,python3.10", "python3.11", "python3.12"]
pyversion: ["pypy3", "python3.10", "python3.11", "python3.12"]
permissions:
packages: write
name: Run
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/maven_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -200,13 +200,13 @@ jobs:
rm -rf ~/.m2/repository/org/apache/spark
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
path: "**/target/unit-tests.log"
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,9 @@ public int hashCode() {
result = 31 * result + minor;
return result;
}

@Override
public String toString() {
return "StoreVersion[" + major + "." + minor + ']';
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.network.util;

import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.lang3.SystemUtils;
import org.apache.spark.network.shuffledb.DBBackend;
import org.apache.spark.network.shuffledb.StoreVersion;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

import java.io.File;
import java.io.IOException;

import static org.junit.jupiter.api.Assumptions.assumeFalse;

public class DBProviderSuite {

@Test
public void testRockDBCheckVersionFailed() throws IOException {
testCheckVersionFailed(DBBackend.ROCKSDB, "rocksdb");
}

@Test
public void testLevelDBCheckVersionFailed() throws IOException {
assumeFalse(SystemUtils.IS_OS_MAC_OSX && SystemUtils.OS_ARCH.equals("aarch64"));
testCheckVersionFailed(DBBackend.LEVELDB, "leveldb");
}

private void testCheckVersionFailed(DBBackend dbBackend, String namePrefix) throws IOException {
String root = System.getProperty("java.io.tmpdir");
File dbFile = JavaUtils.createDirectory(root, namePrefix);
try {
StoreVersion v1 = new StoreVersion(1, 0);
ObjectMapper mapper = new ObjectMapper();
DBProvider.initDB(dbBackend, dbFile, v1, mapper).close();
StoreVersion v2 = new StoreVersion(2, 0);
IOException ioe = Assertions.assertThrows(IOException.class, () ->
DBProvider.initDB(dbBackend, dbFile, v2, mapper));
Assertions.assertTrue(
ioe.getMessage().contains("incompatible with current version StoreVersion[2.0]"));
} finally {
JavaUtils.deleteRecursively(dbFile);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
* @since 3.0.0
*/
@Unstable
public final class CalendarInterval implements Serializable {
public final class CalendarInterval implements Serializable, Comparable<CalendarInterval> {
// NOTE: If you're moving or renaming this file, you should also update Unidoc configuration
// specified in 'SparkBuild.scala'.
public final int months;
Expand Down Expand Up @@ -127,4 +127,26 @@ private void appendUnit(StringBuilder sb, long value, String unit) {
* @throws ArithmeticException if a numeric overflow occurs
*/
public Duration extractAsDuration() { return Duration.of(microseconds, ChronoUnit.MICROS); }

/**
* This method is not used to order CalendarInterval instances, as they are not orderable and
* cannot be used in a ORDER BY statement.
* Instead, it is used to find identical interval instances for aggregation purposes.
* It compares the 'months', 'days', and 'microseconds' fields of this CalendarInterval
* with another instance. The comparison is done first on the 'months', then on the 'days',
* and finally on the 'microseconds'.
*
* @param o The CalendarInterval instance to compare with.
* @return Zero if this object is equal to the specified object, and non-zero otherwise
*/
@Override
public int compareTo(CalendarInterval o) {
if (this.months != o.months) {
return Integer.compare(this.months, o.months);
} else if (this.days != o.days) {
return Integer.compare(this.days, o.days);
} else {
return Long.compare(this.microseconds, o.microseconds);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,22 @@ public void toStringTest() {
i.toString());
}

@Test
public void compareToTest() {
CalendarInterval i = new CalendarInterval(0, 0, 0);

assertEquals(i.compareTo(new CalendarInterval(0, 0, 0)), 0);
assertEquals(i.compareTo(new CalendarInterval(0, 0, 1)), -1);
assertEquals(i.compareTo(new CalendarInterval(0, 1, 0)), -1);
assertEquals(i.compareTo(new CalendarInterval(0, 1, -1)), -1);
assertEquals(i.compareTo(new CalendarInterval(1, 0, 0)), -1);
assertEquals(i.compareTo(new CalendarInterval(1, 0, -1)), -1);
assertEquals(i.compareTo(new CalendarInterval(0, 0, -1)), 1);
assertEquals(i.compareTo(new CalendarInterval(0, -1, 0)), 1);
assertEquals(i.compareTo(new CalendarInterval(-1, 0, 0)), 1);
assertEquals(i.compareTo(new CalendarInterval(-1, 0, 1)), 1);
}

@Test
public void periodAndDurationTest() {
CalendarInterval interval = new CalendarInterval(120, -40, 123456);
Expand Down
11 changes: 6 additions & 5 deletions common/utils/src/main/resources/error/error-classes.json
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,12 @@
],
"sqlState" : "0AKD0"
},
"CANNOT_RESOLVE_DATAFRAME_COLUMN" : {
"message" : [
"Cannot resolve dataframe column <name>. It's probably because of illegal references like `df1.select(df2.col(\"a\"))`."
],
"sqlState" : "42704"
},
"CANNOT_RESOLVE_STAR_EXPAND" : {
"message" : [
"Cannot resolve <targetString>.* given input columns <columns>. Please check that the specified table or struct exists and is accessible in the input columns."
Expand Down Expand Up @@ -6843,11 +6849,6 @@
"Cannot modify the value of a static config: <k>"
]
},
"_LEGACY_ERROR_TEMP_3051" : {
"message" : [
"When resolving <u>, fail to find subplan with plan_id=<planId> in <q>"
]
},
"_LEGACY_ERROR_TEMP_3052" : {
"message" : [
"Unexpected resolved action: <other>"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,16 +182,19 @@ private[sql] object AvroUtils extends Logging {

def hasNextRow: Boolean = {
while (!completed && currentRow.isEmpty) {
val r = fileReader.hasNext && !fileReader.pastSync(stopPosition)
if (!r) {
if (fileReader.pastSync(stopPosition)) {
fileReader.close()
completed = true
currentRow = None
} else {
} else if (fileReader.hasNext()) {
val record = fileReader.next()
// the row must be deserialized in hasNextRow, because AvroDeserializer#deserialize
// potentially filters rows
currentRow = deserializer.deserialize(record).asInstanceOf[Option[InternalRow]]
} else {
// In this case, `fileReader.hasNext()` returns false but we are not past sync point yet.
// This means empty blocks, we need to continue reading the file in case there are non
// empty blocks or we are past sync point.
}
}
currentRow.isDefined
Expand Down
5 changes: 5 additions & 0 deletions connector/avro/src/test/resources/empty_blocks.avro

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -2716,6 +2716,19 @@ abstract class AvroSuite
assert(AvroOptions.isValidOption("datetimeRebaseMode"))
assert(AvroOptions.isValidOption("enableStableIdentifiersForUnionType"))
}

test("SPARK-46633: read file with empty blocks") {
for (maxPartitionBytes <- Seq(100, 100000, 100000000)) {
withSQLConf(SQLConf.FILES_MAX_PARTITION_BYTES.key -> s"$maxPartitionBytes") {
val file = getResourceAvroFilePath("empty_blocks.avro")
val df = spark.read.format("avro").load(file)
val count = df.count()
val records = df.collect()
assert(count == 58)
assert(count == records.length)
}
}
}
}

class AvroV1Suite extends AvroSuite {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -894,7 +894,7 @@ class ClientE2ETestSuite extends RemoteSparkSession with SQLHelper with PrivateM
// df1("i") is not ambiguous, but it's not valid in the projected df.
df1.select((df1("i") + 1).as("plus")).select(df1("i")).collect()
}
assert(e1.getMessage.contains("MISSING_ATTRIBUTES.RESOLVED_ATTRIBUTE_MISSING_FROM_INPUT"))
assert(e1.getMessage.contains("UNRESOLVED_COLUMN.WITH_SUGGESTION"))

checkSameResult(
Seq(Row(1, "a")),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ class SparkConnectService(debug: Boolean) extends AsyncService with BindableServ
new SparkConnectReleaseExecuteHandler(responseObserver).handle(request)
} catch
ErrorUtils.handleError(
"reattachExecute",
"releaseExecute",
observer = responseObserver,
userId = request.getUserContext.getUserId,
sessionId = request.getSessionId)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ function drawTimeline(id, data, minX, maxX, minY, maxY, unitY, batchInterval) {
.attr("cx", function(d) { return x(d.x); })
.attr("cy", function(d) { return y(d.y); })
.attr("r", function(d) { return isFailedBatch(d.x) ? "2" : "3";})
.on('mouseover', function(d) {
.on('mouseover', function(event, d) {
var tip = yValueFormat(d.y) + " " + unitY + " at " + timeTipStrings[d.x];
showBootstrapTooltip(d3.select(this), tip);
// show the point
Expand Down
Loading