Skip to content

Commit d947a4c

Browse files
committed
[SPARK-18860][SQL] Update Parquet to 1.9.0
1 parent 70ffff2 commit d947a4c

File tree

10 files changed

+39
-39
lines changed

10 files changed

+39
-39
lines changed

dev/deps/spark-deps-hadoop-2.2

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -129,13 +129,13 @@ opencsv-2.3.jar
129129
oro-2.0.8.jar
130130
osgi-resource-locator-1.0.1.jar
131131
paranamer-2.3.jar
132-
parquet-column-1.8.1.jar
133-
parquet-common-1.8.1.jar
134-
parquet-encoding-1.8.1.jar
132+
parquet-column-1.9.0.jar
133+
parquet-common-1.9.0.jar
134+
parquet-encoding-1.9.0.jar
135135
parquet-format-2.3.0-incubating.jar
136-
parquet-hadoop-1.8.1.jar
136+
parquet-hadoop-1.9.0.jar
137137
parquet-hadoop-bundle-1.6.0.jar
138-
parquet-jackson-1.8.1.jar
138+
parquet-jackson-1.9.0.jar
139139
pmml-model-1.2.15.jar
140140
pmml-schema-1.2.15.jar
141141
protobuf-java-2.5.0.jar

dev/deps/spark-deps-hadoop-2.3

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,13 +136,13 @@ opencsv-2.3.jar
136136
oro-2.0.8.jar
137137
osgi-resource-locator-1.0.1.jar
138138
paranamer-2.3.jar
139-
parquet-column-1.8.1.jar
140-
parquet-common-1.8.1.jar
141-
parquet-encoding-1.8.1.jar
139+
parquet-column-1.9.0.jar
140+
parquet-common-1.9.0.jar
141+
parquet-encoding-1.9.0.jar
142142
parquet-format-2.3.0-incubating.jar
143-
parquet-hadoop-1.8.1.jar
143+
parquet-hadoop-1.9.0.jar
144144
parquet-hadoop-bundle-1.6.0.jar
145-
parquet-jackson-1.8.1.jar
145+
parquet-jackson-1.9.0.jar
146146
pmml-model-1.2.15.jar
147147
pmml-schema-1.2.15.jar
148148
protobuf-java-2.5.0.jar

dev/deps/spark-deps-hadoop-2.4

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,13 +136,13 @@ opencsv-2.3.jar
136136
oro-2.0.8.jar
137137
osgi-resource-locator-1.0.1.jar
138138
paranamer-2.3.jar
139-
parquet-column-1.8.1.jar
140-
parquet-common-1.8.1.jar
141-
parquet-encoding-1.8.1.jar
139+
parquet-column-1.9.0.jar
140+
parquet-common-1.9.0.jar
141+
parquet-encoding-1.9.0.jar
142142
parquet-format-2.3.0-incubating.jar
143-
parquet-hadoop-1.8.1.jar
143+
parquet-hadoop-1.9.0.jar
144144
parquet-hadoop-bundle-1.6.0.jar
145-
parquet-jackson-1.8.1.jar
145+
parquet-jackson-1.9.0.jar
146146
pmml-model-1.2.15.jar
147147
pmml-schema-1.2.15.jar
148148
protobuf-java-2.5.0.jar

dev/deps/spark-deps-hadoop-2.6

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -144,13 +144,13 @@ opencsv-2.3.jar
144144
oro-2.0.8.jar
145145
osgi-resource-locator-1.0.1.jar
146146
paranamer-2.3.jar
147-
parquet-column-1.8.1.jar
148-
parquet-common-1.8.1.jar
149-
parquet-encoding-1.8.1.jar
147+
parquet-column-1.9.0.jar
148+
parquet-common-1.9.0.jar
149+
parquet-encoding-1.9.0.jar
150150
parquet-format-2.3.0-incubating.jar
151-
parquet-hadoop-1.8.1.jar
151+
parquet-hadoop-1.9.0.jar
152152
parquet-hadoop-bundle-1.6.0.jar
153-
parquet-jackson-1.8.1.jar
153+
parquet-jackson-1.9.0.jar
154154
pmml-model-1.2.15.jar
155155
pmml-schema-1.2.15.jar
156156
protobuf-java-2.5.0.jar

dev/deps/spark-deps-hadoop-2.7

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -145,13 +145,13 @@ opencsv-2.3.jar
145145
oro-2.0.8.jar
146146
osgi-resource-locator-1.0.1.jar
147147
paranamer-2.3.jar
148-
parquet-column-1.8.1.jar
149-
parquet-common-1.8.1.jar
150-
parquet-encoding-1.8.1.jar
148+
parquet-column-1.9.0.jar
149+
parquet-common-1.9.0.jar
150+
parquet-encoding-1.9.0.jar
151151
parquet-format-2.3.0-incubating.jar
152-
parquet-hadoop-1.8.1.jar
152+
parquet-hadoop-1.9.0.jar
153153
parquet-hadoop-bundle-1.6.0.jar
154-
parquet-jackson-1.8.1.jar
154+
parquet-jackson-1.9.0.jar
155155
pmml-model-1.2.15.jar
156156
pmml-schema-1.2.15.jar
157157
protobuf-java-2.5.0.jar

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@
134134
<!-- Version used for internal directory structure -->
135135
<hive.version.short>1.2.1</hive.version.short>
136136
<derby.version>10.12.1.1</derby.version>
137-
<parquet.version>1.8.1</parquet.version>
137+
<parquet.version>1.9.0</parquet.version>
138138
<hive.parquet.version>1.6.0</hive.parquet.version>
139139
<jetty.version>9.2.16.v20160414</jetty.version>
140140
<javaxservlet.version>3.1.0</javaxservlet.version>

sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ public class VectorizedPlainValuesReader extends ValuesReader implements Vectori
4141
public VectorizedPlainValuesReader() {
4242
}
4343

44+
@Override
45+
public void initFromPage(int valueCount, ByteBuffer page, int offset) {
46+
throw new UnsupportedOperationException();
47+
}
48+
4449
@Override
4550
public void initFromPage(int valueCount, byte[] bytes, int offset) throws IOException {
4651
this.buffer = bytes;

sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727

2828
import org.apache.spark.sql.execution.vectorized.ColumnVector;
2929

30+
import java.nio.ByteBuffer;
31+
3032
/**
3133
* A values reader for Parquet's run-length encoded data. This is based off of the version in
3234
* parquet-mr with these changes:
@@ -80,6 +82,11 @@ public VectorizedRleValuesReader(int bitWidth) {
8082
init(bitWidth);
8183
}
8284

85+
@Override
86+
public void initFromPage(int valueCount, ByteBuffer page, int offset) {
87+
throw new UnsupportedOperationException();
88+
}
89+
8390
@Override
8491
public void initFromPage(int valueCount, byte[] page, int start) {
8592
this.offset = start;

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -546,21 +546,9 @@ private[parquet] class ParquetSchemaConverter(
546546
private[parquet] object ParquetSchemaConverter {
547547
val SPARK_PARQUET_SCHEMA_NAME = "spark_schema"
548548

549-
// !! HACK ALERT !!
550-
//
551-
// PARQUET-363 & PARQUET-278: parquet-mr 1.8.1 doesn't allow constructing empty GroupType,
552-
// which prevents us to avoid selecting any columns for queries like `SELECT COUNT(*) FROM t`.
553-
// This issue has been fixed in parquet-mr 1.8.2-SNAPSHOT.
554-
//
555-
// To workaround this problem, here we first construct a `MessageType` with a single dummy
556-
// field, and then remove the field to obtain an empty `MessageType`.
557-
//
558-
// TODO Reverts this change after upgrading parquet-mr to 1.8.2+
559549
val EMPTY_MESSAGE = Types
560550
.buildMessage()
561-
.required(PrimitiveType.PrimitiveTypeName.INT32).named("dummy")
562551
.named(ParquetSchemaConverter.SPARK_PARQUET_SCHEMA_NAME)
563-
EMPTY_MESSAGE.getFields.clear()
564552

565553
def checkFieldName(name: String): Unit = {
566554
// ,;{}()\n\t= and space are special characters in Parquet schema

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1424,7 +1424,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
14241424

14251425
catalystSchema = new StructType(),
14261426

1427-
expectedSchema = ParquetSchemaConverter.EMPTY_MESSAGE)
1427+
expectedSchema = "message root {}")
14281428

14291429
testSchemaClipping(
14301430
"disjoint field sets",

0 commit comments

Comments
 (0)