diff --git a/dev/deps/spark-deps-hadoop-2.2 b/dev/deps/spark-deps-hadoop-2.2
index afbdae055453..d6c4214a6ba7 100644
--- a/dev/deps/spark-deps-hadoop-2.2
+++ b/dev/deps/spark-deps-hadoop-2.2
@@ -129,13 +129,13 @@ opencsv-2.3.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
paranamer-2.3.jar
-parquet-column-1.8.1.jar
-parquet-common-1.8.1.jar
-parquet-encoding-1.8.1.jar
-parquet-format-2.3.0-incubating.jar
-parquet-hadoop-1.8.1.jar
+parquet-column-1.9.0.jar
+parquet-common-1.9.0.jar
+parquet-encoding-1.9.0.jar
+parquet-format-2.3.1.jar
+parquet-hadoop-1.9.0.jar
parquet-hadoop-bundle-1.6.0.jar
-parquet-jackson-1.8.1.jar
+parquet-jackson-1.9.0.jar
pmml-model-1.2.15.jar
pmml-schema-1.2.15.jar
protobuf-java-2.5.0.jar
diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3
index adf3863f6718..7a34b45c37bb 100644
--- a/dev/deps/spark-deps-hadoop-2.3
+++ b/dev/deps/spark-deps-hadoop-2.3
@@ -136,13 +136,13 @@ opencsv-2.3.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
paranamer-2.3.jar
-parquet-column-1.8.1.jar
-parquet-common-1.8.1.jar
-parquet-encoding-1.8.1.jar
-parquet-format-2.3.0-incubating.jar
-parquet-hadoop-1.8.1.jar
+parquet-column-1.9.0.jar
+parquet-common-1.9.0.jar
+parquet-encoding-1.9.0.jar
+parquet-format-2.3.1.jar
+parquet-hadoop-1.9.0.jar
parquet-hadoop-bundle-1.6.0.jar
-parquet-jackson-1.8.1.jar
+parquet-jackson-1.9.0.jar
pmml-model-1.2.15.jar
pmml-schema-1.2.15.jar
protobuf-java-2.5.0.jar
diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4
index 88e6b3fca08a..53f66cf45211 100644
--- a/dev/deps/spark-deps-hadoop-2.4
+++ b/dev/deps/spark-deps-hadoop-2.4
@@ -136,13 +136,13 @@ opencsv-2.3.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
paranamer-2.3.jar
-parquet-column-1.8.1.jar
-parquet-common-1.8.1.jar
-parquet-encoding-1.8.1.jar
-parquet-format-2.3.0-incubating.jar
-parquet-hadoop-1.8.1.jar
+parquet-column-1.9.0.jar
+parquet-common-1.9.0.jar
+parquet-encoding-1.9.0.jar
+parquet-format-2.3.1.jar
+parquet-hadoop-1.9.0.jar
parquet-hadoop-bundle-1.6.0.jar
-parquet-jackson-1.8.1.jar
+parquet-jackson-1.9.0.jar
pmml-model-1.2.15.jar
pmml-schema-1.2.15.jar
protobuf-java-2.5.0.jar
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index 15c5d9f205f2..383b5f02787d 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -144,13 +144,13 @@ opencsv-2.3.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
paranamer-2.3.jar
-parquet-column-1.8.1.jar
-parquet-common-1.8.1.jar
-parquet-encoding-1.8.1.jar
-parquet-format-2.3.0-incubating.jar
-parquet-hadoop-1.8.1.jar
+parquet-column-1.9.0.jar
+parquet-common-1.9.0.jar
+parquet-encoding-1.9.0.jar
+parquet-format-2.3.1.jar
+parquet-hadoop-1.9.0.jar
parquet-hadoop-bundle-1.6.0.jar
-parquet-jackson-1.8.1.jar
+parquet-jackson-1.9.0.jar
pmml-model-1.2.15.jar
pmml-schema-1.2.15.jar
protobuf-java-2.5.0.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 77fb5370d98b..895203dfbbe2 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -145,13 +145,13 @@ opencsv-2.3.jar
oro-2.0.8.jar
osgi-resource-locator-1.0.1.jar
paranamer-2.3.jar
-parquet-column-1.8.1.jar
-parquet-common-1.8.1.jar
-parquet-encoding-1.8.1.jar
-parquet-format-2.3.0-incubating.jar
-parquet-hadoop-1.8.1.jar
+parquet-column-1.9.0.jar
+parquet-common-1.9.0.jar
+parquet-encoding-1.9.0.jar
+parquet-format-2.3.1.jar
+parquet-hadoop-1.9.0.jar
parquet-hadoop-bundle-1.6.0.jar
-parquet-jackson-1.8.1.jar
+parquet-jackson-1.9.0.jar
pmml-model-1.2.15.jar
pmml-schema-1.2.15.jar
protobuf-java-2.5.0.jar
diff --git a/pom.xml b/pom.xml
index 4f12085d044f..aafb0113a0f8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -134,7 +134,7 @@
1.2.1
10.12.1.1
- 1.8.1
+ 1.9.0
1.6.0
9.2.16.v20160414
3.1.0
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java
index 98018b7f48bd..0fa1f118fb5a 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java
@@ -41,6 +41,11 @@ public class VectorizedPlainValuesReader extends ValuesReader implements Vectori
public VectorizedPlainValuesReader() {
}
+ @Override
+ public void initFromPage(int valueCount, ByteBuffer page, int offset) {
+ throw new UnsupportedOperationException();
+ }
+
@Override
public void initFromPage(int valueCount, byte[] bytes, int offset) throws IOException {
this.buffer = bytes;
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java
index 62157389013b..865785bd35f5 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java
@@ -27,6 +27,8 @@
import org.apache.spark.sql.execution.vectorized.ColumnVector;
+import java.nio.ByteBuffer;
+
/**
* A values reader for Parquet's run-length encoded data. This is based off of the version in
* parquet-mr with these changes:
@@ -80,6 +82,11 @@ public VectorizedRleValuesReader(int bitWidth) {
init(bitWidth);
}
+ @Override
+ public void initFromPage(int valueCount, ByteBuffer page, int offset) {
+ throw new UnsupportedOperationException();
+ }
+
@Override
public void initFromPage(int valueCount, byte[] page, int start) {
this.offset = start;
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
index b4f36ce3752c..fe1849484780 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
@@ -546,21 +546,9 @@ private[parquet] class ParquetSchemaConverter(
private[parquet] object ParquetSchemaConverter {
val SPARK_PARQUET_SCHEMA_NAME = "spark_schema"
- // !! HACK ALERT !!
- //
- // PARQUET-363 & PARQUET-278: parquet-mr 1.8.1 doesn't allow constructing empty GroupType,
- // which prevents us to avoid selecting any columns for queries like `SELECT COUNT(*) FROM t`.
- // This issue has been fixed in parquet-mr 1.8.2-SNAPSHOT.
- //
- // To workaround this problem, here we first construct a `MessageType` with a single dummy
- // field, and then remove the field to obtain an empty `MessageType`.
- //
- // TODO Reverts this change after upgrading parquet-mr to 1.8.2+
val EMPTY_MESSAGE = Types
.buildMessage()
- .required(PrimitiveType.PrimitiveTypeName.INT32).named("dummy")
.named(ParquetSchemaConverter.SPARK_PARQUET_SCHEMA_NAME)
- EMPTY_MESSAGE.getFields.clear()
def checkFieldName(name: String): Unit = {
// ,;{}()\n\t= and space are special characters in Parquet schema