diff --git a/core/pom.xml b/core/pom.xml index 99647ecef43b6..51567116ccbcd 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -35,10 +35,6 @@ - - com.thoughtworks.paranamer - paranamer - org.apache.avro avro @@ -46,7 +42,6 @@ org.apache.avro avro-mapred - ${avro.mapred.classifier} com.google.guava @@ -165,6 +160,10 @@ javax.servlet-api ${javaxservlet.version} + + commons-codec + commons-codec + org.apache.commons commons-lang3 diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java index 33be899b6b438..765ee035855d6 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java @@ -20,7 +20,7 @@ import java.util.Comparator; import java.util.LinkedList; -import org.apache.avro.reflect.Nullable; +import javax.annotation.Nullable; import org.apache.spark.TaskContext; import org.apache.spark.memory.MemoryConsumer; diff --git a/dev/deps/spark-deps-hadoop-palantir b/dev/deps/spark-deps-hadoop-palantir index d26545e74fd19..b804267c83e2d 100644 --- a/dev/deps/spark-deps-hadoop-palantir +++ b/dev/deps/spark-deps-hadoop-palantir @@ -16,9 +16,9 @@ arrow-format/0.15.1//arrow-format-0.15.1.jar arrow-memory/0.15.1//arrow-memory-0.15.1.jar arrow-vector/0.15.1//arrow-vector-0.15.1.jar audience-annotations/0.5.0//audience-annotations-0.5.0.jar -avro-ipc/1.8.2//avro-ipc-1.8.2.jar -avro-mapred/1.8.2/hadoop2/avro-mapred-1.8.2-hadoop2.jar -avro/1.8.2//avro-1.8.2.jar +avro-ipc/1.10.1//avro-ipc-1.10.1.jar +avro-mapred/1.10.1//avro-mapred-1.10.1.jar +avro/1.10.1//avro-1.10.1.jar breeze-macros_2.12/1.0//breeze-macros_2.12-1.0.jar breeze_2.12/1.0//breeze_2.12-1.0.jar cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar @@ -80,7 +80,6 @@ jackson-core/2.12.1//jackson-core-2.12.1.jar jackson-databind/2.12.1//jackson-databind-2.12.1.jar jackson-dataformat-yaml/2.12.1//jackson-dataformat-yaml-2.12.1.jar jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar -jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar jackson-module-scala_2.12/2.12.1//jackson-module-scala_2.12-2.12.1.jar jackson-xc/1.9.13//jackson-xc-1.9.13.jar jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar @@ -172,6 +171,6 @@ univocity-parsers/2.9.0//univocity-parsers-2.9.0.jar woodstox-core/5.0.3//woodstox-core-5.0.3.jar xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar xmlenc/0.52//xmlenc-0.52.jar -xz/1.5//xz-1.5.jar +xz/1.8//xz-1.8.jar zookeeper/3.4.14//zookeeper-3.4.14.jar zstd-jni/1.4.4-3//zstd-jni-1.4.4-3.jar diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md index 69b165ed28bae..ac1c26d39f5a7 100644 --- a/docs/sql-data-sources-avro.md +++ b/docs/sql-data-sources-avro.md @@ -309,7 +309,7 @@ applications. Read the [Advanced Dependency Management](https://spark.apache Submission Guide for more details. ## Supported types for Avro -> Spark SQL conversion -Currently Spark supports reading all [primitive types](https://avro.apache.org/docs/1.8.2/spec.html#schema_primitive) and [complex types](https://avro.apache.org/docs/1.8.2/spec.html#schema_complex) under records of Avro. +Currently Spark supports reading all [primitive types](https://avro.apache.org/docs/1.10.1/spec.html#schema_primitive) and [complex types](https://avro.apache.org/docs/1.10.1/spec.html#schema_complex) under records of Avro. @@ -373,7 +373,7 @@ In addition to the types listed above, it supports reading `union` types. The fo 3. `union(something, null)`, where something is any supported Avro type. This will be mapped to the same Spark SQL type as that of something, with nullable set to true. All other union types are considered complex. They will be mapped to StructType where field names are member0, member1, etc., in accordance with members of the union. This is consistent with the behavior when converting between Avro and Parquet. -It also supports reading the following Avro [logical types](https://avro.apache.org/docs/1.8.2/spec.html#Logical+Types): +It also supports reading the following Avro [logical types](https://avro.apache.org/docs/1.10.1/spec.html#Logical+Types):
Avro typeSpark SQL type
diff --git a/external/avro/pom.xml b/external/avro/pom.xml index 256143717317f..57ab1e8114a9d 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -70,6 +70,10 @@ org.apache.sparkspark-tags_${scala.binary.version} + + org.tukaani + xz + target/scala-${scala.binary.version}/classes diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala index 8972b0553c522..a2228f2564dd0 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala @@ -51,14 +51,14 @@ private[sql] class AvroOptions( /** * Top level record name in write result, which is required in Avro spec. - * See https://avro.apache.org/docs/1.8.2/spec.html#schema_record . + * See https://avro.apache.org/docs/1.10.1/spec.html#schema_record . * Default value is "topLevelRecord" */ val recordName: String = parameters.getOrElse("recordName", "topLevelRecord") /** * Record namespace in write result. Default value is "". - * See Avro spec for details: https://avro.apache.org/docs/1.8.2/spec.html#schema_record . + * See Avro spec for details: https://avro.apache.org/docs/1.10.1/spec.html#schema_record . */ val recordNamespace: String = parameters.getOrElse("recordNamespace", "") diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index 15905a7c11122..188ae602e96a0 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -1010,7 +1010,7 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession { .save(s"$tempDir/${UUID.randomUUID()}") }.getCause.getMessage assert(message.contains("Caused by: java.lang.NullPointerException: " + - "in test_schema in string null of string in field Name")) + "null of string in string in field Name of test_schema in test_schema")) } } diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index fae3a0a41fc0e..dedf2a7f8b3c5 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -77,7 +77,6 @@ org.apache.avro avro-mapred - ${avro.mapred.classifier} provided diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 6ea9c8762f92e..6de0b56f00fa4 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -94,15 +94,9 @@ hadoop-client provided - - org.apache.avro - avro-ipc - provided - org.apache.avro avro-mapred - ${avro.mapred.classifier} provided diff --git a/pom.xml b/pom.xml index 445aadd1500a9..f4d62be122825 100644 --- a/pom.xml +++ b/pom.xml @@ -155,8 +155,7 @@ the link to metrics.dropwizard.io in docs/monitoring.md. --> 4.1.1 - 1.8.2 - hadoop2 + 1.10.1 1.12.0 1.11.655 @@ -201,10 +200,6 @@ 1.1 2.52.0 2.22 - - 2.8 1.8 1.1.0 - - org.apache.avro - avro-ipc - tests - ${avro.version} - test - org.apache.avro avro-mapred ${avro.version} - ${avro.mapred.classifier} ${hive.deps.scope} + + org.apache.avro + avro-ipc-jetty + io.netty netty @@ -1243,10 +1206,19 @@ org.apache.velocity - velocity + velocity-engine-core + + + javax.annotation + javax.annotation-api + + org.tukaani + xz + 1.8 + org.apache.avro avro-mapred - ${avro.mapred.classifier} commons-httpclient
Avro logical typeAvro typeSpark SQL type