diff --git a/dev/deps/spark-deps-hadoop-2.2 b/dev/deps/spark-deps-hadoop-2.2 index afbdae055453..d6c4214a6ba7 100644 --- a/dev/deps/spark-deps-hadoop-2.2 +++ b/dev/deps/spark-deps-hadoop-2.2 @@ -129,13 +129,13 @@ opencsv-2.3.jar oro-2.0.8.jar osgi-resource-locator-1.0.1.jar paranamer-2.3.jar -parquet-column-1.8.1.jar -parquet-common-1.8.1.jar -parquet-encoding-1.8.1.jar -parquet-format-2.3.0-incubating.jar -parquet-hadoop-1.8.1.jar +parquet-column-1.9.0.jar +parquet-common-1.9.0.jar +parquet-encoding-1.9.0.jar +parquet-format-2.3.1.jar +parquet-hadoop-1.9.0.jar parquet-hadoop-bundle-1.6.0.jar -parquet-jackson-1.8.1.jar +parquet-jackson-1.9.0.jar pmml-model-1.2.15.jar pmml-schema-1.2.15.jar protobuf-java-2.5.0.jar diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3 index adf3863f6718..7a34b45c37bb 100644 --- a/dev/deps/spark-deps-hadoop-2.3 +++ b/dev/deps/spark-deps-hadoop-2.3 @@ -136,13 +136,13 @@ opencsv-2.3.jar oro-2.0.8.jar osgi-resource-locator-1.0.1.jar paranamer-2.3.jar -parquet-column-1.8.1.jar -parquet-common-1.8.1.jar -parquet-encoding-1.8.1.jar -parquet-format-2.3.0-incubating.jar -parquet-hadoop-1.8.1.jar +parquet-column-1.9.0.jar +parquet-common-1.9.0.jar +parquet-encoding-1.9.0.jar +parquet-format-2.3.1.jar +parquet-hadoop-1.9.0.jar parquet-hadoop-bundle-1.6.0.jar -parquet-jackson-1.8.1.jar +parquet-jackson-1.9.0.jar pmml-model-1.2.15.jar pmml-schema-1.2.15.jar protobuf-java-2.5.0.jar diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4 index 88e6b3fca08a..53f66cf45211 100644 --- a/dev/deps/spark-deps-hadoop-2.4 +++ b/dev/deps/spark-deps-hadoop-2.4 @@ -136,13 +136,13 @@ opencsv-2.3.jar oro-2.0.8.jar osgi-resource-locator-1.0.1.jar paranamer-2.3.jar -parquet-column-1.8.1.jar -parquet-common-1.8.1.jar -parquet-encoding-1.8.1.jar -parquet-format-2.3.0-incubating.jar -parquet-hadoop-1.8.1.jar +parquet-column-1.9.0.jar +parquet-common-1.9.0.jar +parquet-encoding-1.9.0.jar +parquet-format-2.3.1.jar +parquet-hadoop-1.9.0.jar parquet-hadoop-bundle-1.6.0.jar -parquet-jackson-1.8.1.jar +parquet-jackson-1.9.0.jar pmml-model-1.2.15.jar pmml-schema-1.2.15.jar protobuf-java-2.5.0.jar diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6 index 15c5d9f205f2..383b5f02787d 100644 --- a/dev/deps/spark-deps-hadoop-2.6 +++ b/dev/deps/spark-deps-hadoop-2.6 @@ -144,13 +144,13 @@ opencsv-2.3.jar oro-2.0.8.jar osgi-resource-locator-1.0.1.jar paranamer-2.3.jar -parquet-column-1.8.1.jar -parquet-common-1.8.1.jar -parquet-encoding-1.8.1.jar -parquet-format-2.3.0-incubating.jar -parquet-hadoop-1.8.1.jar +parquet-column-1.9.0.jar +parquet-common-1.9.0.jar +parquet-encoding-1.9.0.jar +parquet-format-2.3.1.jar +parquet-hadoop-1.9.0.jar parquet-hadoop-bundle-1.6.0.jar -parquet-jackson-1.8.1.jar +parquet-jackson-1.9.0.jar pmml-model-1.2.15.jar pmml-schema-1.2.15.jar protobuf-java-2.5.0.jar diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7 index 77fb5370d98b..895203dfbbe2 100644 --- a/dev/deps/spark-deps-hadoop-2.7 +++ b/dev/deps/spark-deps-hadoop-2.7 @@ -145,13 +145,13 @@ opencsv-2.3.jar oro-2.0.8.jar osgi-resource-locator-1.0.1.jar paranamer-2.3.jar -parquet-column-1.8.1.jar -parquet-common-1.8.1.jar -parquet-encoding-1.8.1.jar -parquet-format-2.3.0-incubating.jar -parquet-hadoop-1.8.1.jar +parquet-column-1.9.0.jar +parquet-common-1.9.0.jar +parquet-encoding-1.9.0.jar +parquet-format-2.3.1.jar +parquet-hadoop-1.9.0.jar parquet-hadoop-bundle-1.6.0.jar -parquet-jackson-1.8.1.jar +parquet-jackson-1.9.0.jar pmml-model-1.2.15.jar pmml-schema-1.2.15.jar protobuf-java-2.5.0.jar diff --git a/pom.xml b/pom.xml index 4f12085d044f..aafb0113a0f8 100644 --- a/pom.xml +++ b/pom.xml @@ -134,7 +134,7 @@ 1.2.1 10.12.1.1 - 1.8.1 + 1.9.0 1.6.0 9.2.16.v20160414 3.1.0 diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java index 98018b7f48bd..0fa1f118fb5a 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java @@ -41,6 +41,11 @@ public class VectorizedPlainValuesReader extends ValuesReader implements Vectori public VectorizedPlainValuesReader() { } + @Override + public void initFromPage(int valueCount, ByteBuffer page, int offset) { + throw new UnsupportedOperationException(); + } + @Override public void initFromPage(int valueCount, byte[] bytes, int offset) throws IOException { this.buffer = bytes; diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java index 62157389013b..865785bd35f5 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java @@ -27,6 +27,8 @@ import org.apache.spark.sql.execution.vectorized.ColumnVector; +import java.nio.ByteBuffer; + /** * A values reader for Parquet's run-length encoded data. This is based off of the version in * parquet-mr with these changes: @@ -80,6 +82,11 @@ public VectorizedRleValuesReader(int bitWidth) { init(bitWidth); } + @Override + public void initFromPage(int valueCount, ByteBuffer page, int offset) { + throw new UnsupportedOperationException(); + } + @Override public void initFromPage(int valueCount, byte[] page, int start) { this.offset = start; diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala index b4f36ce3752c..fe1849484780 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala @@ -546,21 +546,9 @@ private[parquet] class ParquetSchemaConverter( private[parquet] object ParquetSchemaConverter { val SPARK_PARQUET_SCHEMA_NAME = "spark_schema" - // !! HACK ALERT !! - // - // PARQUET-363 & PARQUET-278: parquet-mr 1.8.1 doesn't allow constructing empty GroupType, - // which prevents us to avoid selecting any columns for queries like `SELECT COUNT(*) FROM t`. - // This issue has been fixed in parquet-mr 1.8.2-SNAPSHOT. - // - // To workaround this problem, here we first construct a `MessageType` with a single dummy - // field, and then remove the field to obtain an empty `MessageType`. - // - // TODO Reverts this change after upgrading parquet-mr to 1.8.2+ val EMPTY_MESSAGE = Types .buildMessage() - .required(PrimitiveType.PrimitiveTypeName.INT32).named("dummy") .named(ParquetSchemaConverter.SPARK_PARQUET_SCHEMA_NAME) - EMPTY_MESSAGE.getFields.clear() def checkFieldName(name: String): Unit = { // ,;{}()\n\t= and space are special characters in Parquet schema