Skip to content
Closed
12 changes: 6 additions & 6 deletions dev/deps/spark-deps-hadoop-2.7-hive-2.3
Original file line number Diff line number Diff line change
Expand Up @@ -202,12 +202,12 @@ orc-shims/1.6.7//orc-shims-1.6.7.jar
oro/2.0.8//oro-2.0.8.jar
osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
paranamer/2.8//paranamer-2.8.jar
parquet-column/1.10.1//parquet-column-1.10.1.jar
parquet-common/1.10.1//parquet-common-1.10.1.jar
parquet-encoding/1.10.1//parquet-encoding-1.10.1.jar
parquet-format/2.4.0//parquet-format-2.4.0.jar
parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar
parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar
parquet-column/1.11.1//parquet-column-1.11.1.jar
parquet-common/1.11.1//parquet-common-1.11.1.jar
parquet-encoding/1.11.1//parquet-encoding-1.11.1.jar
parquet-format-structures/1.11.1//parquet-format-structures-1.11.1.jar
parquet-hadoop/1.11.1//parquet-hadoop-1.11.1.jar
parquet-jackson/1.11.1//parquet-jackson-1.11.1.jar
protobuf-java/2.5.0//protobuf-java-2.5.0.jar
py4j/0.10.9.1//py4j-0.10.9.1.jar
pyrolite/4.30//pyrolite-4.30.jar
Expand Down
12 changes: 6 additions & 6 deletions dev/deps/spark-deps-hadoop-3.2-hive-2.3
Original file line number Diff line number Diff line change
Expand Up @@ -172,12 +172,12 @@ orc-shims/1.6.7//orc-shims-1.6.7.jar
oro/2.0.8//oro-2.0.8.jar
osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
paranamer/2.8//paranamer-2.8.jar
parquet-column/1.10.1//parquet-column-1.10.1.jar
parquet-common/1.10.1//parquet-common-1.10.1.jar
parquet-encoding/1.10.1//parquet-encoding-1.10.1.jar
parquet-format/2.4.0//parquet-format-2.4.0.jar
parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar
parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar
parquet-column/1.11.1//parquet-column-1.11.1.jar
parquet-common/1.11.1//parquet-common-1.11.1.jar
parquet-encoding/1.11.1//parquet-encoding-1.11.1.jar
parquet-format-structures/1.11.1//parquet-format-structures-1.11.1.jar
parquet-hadoop/1.11.1//parquet-hadoop-1.11.1.jar
parquet-jackson/1.11.1//parquet-jackson-1.11.1.jar
protobuf-java/2.5.0//protobuf-java-2.5.0.jar
py4j/0.10.9.1//py4j-0.10.9.1.jar
pyrolite/4.30//pyrolite-4.30.jar
Expand Down
6 changes: 5 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@
<kafka.version>2.7.0</kafka.version>
<!-- After 10.15.1.3, the minimum required version is JDK9 -->
<derby.version>10.14.2.0</derby.version>
<parquet.version>1.10.1</parquet.version>
<parquet.version>1.11.1</parquet.version>
<orc.version>1.6.7</orc.version>
<jetty.version>9.4.34.v20201102</jetty.version>
<jakartaservlet.version>4.0.3</jakartaservlet.version>
Expand Down Expand Up @@ -2290,6 +2290,10 @@
<groupId>commons-pool</groupId>
<artifactId>commons-pool</artifactId>
</exclusion>
<exclusion>
<groupId>javax.annotation</groupId>
<artifactId>javax.annotation-api</artifactId>
</exclusion>
Comment on lines +2293 to +2296
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do not need this, please see PARQUET-1497 for more details.

</exclusions>
</dependency>
<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest {
"""
|message root {
| optional group _1 (MAP) {
| repeated group map (MAP_KEY_VALUE) {
| repeated group key_value (MAP_KEY_VALUE) {
| required int32 key;
| optional binary value (UTF8);
| }
Expand All @@ -267,7 +267,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest {
"""
|message root {
| optional group _1 (MAP) {
| repeated group map (MAP_KEY_VALUE) {
| repeated group key_value (MAP_KEY_VALUE) {
| required group key {
| optional binary _1 (UTF8);
| optional binary _2 (UTF8);
Expand Down Expand Up @@ -300,7 +300,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest {
"""
|message root {
| optional group _1 (MAP_KEY_VALUE) {
| repeated group map {
| repeated group key_value {
| required int32 key;
| optional group value {
| optional binary _1 (UTF8);
Expand Down Expand Up @@ -740,7 +740,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
nullable = true))),
"""message root {
| optional group f1 (MAP_KEY_VALUE) {
| repeated group map {
| repeated group key_value {
| required int32 num;
| required binary str (UTF8);
| }
Expand All @@ -759,7 +759,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
nullable = true))),
"""message root {
| optional group f1 (MAP) {
| repeated group map (MAP_KEY_VALUE) {
| repeated group key_value (MAP_KEY_VALUE) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would this be a possibly breaking change to files written as Parquet? may be a dumb question.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. key_value introduced by this fix: https://issues.apache.org/jira/browse/PARQUET-1879
  2. I also use Parquet 1.11.1 to read this file: https://issues.apache.org/jira/browse/SPARK-32639
    image

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, are you saying that there is no breaking change, @wangyum ?
@srowen 's question is asking the reason why we need this change, isn't it?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at the original PR, I think the change should be backward-compatible (map annotation can still be handled on the read path).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems better to test both.

| required int32 key;
| required binary value (UTF8);
| }
Expand Down Expand Up @@ -797,7 +797,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
nullable = true))),
"""message root {
| optional group f1 (MAP_KEY_VALUE) {
| repeated group map {
| repeated group key_value {
| required int32 num;
| optional binary str (UTF8);
| }
Expand All @@ -816,7 +816,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
nullable = true))),
"""message root {
| optional group f1 (MAP) {
| repeated group map (MAP_KEY_VALUE) {
| repeated group key_value (MAP_KEY_VALUE) {
| required int32 key;
| optional binary value (UTF8);
| }
Expand Down Expand Up @@ -857,7 +857,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
nullable = true))),
"""message root {
| optional group f1 (MAP) {
| repeated group map (MAP_KEY_VALUE) {
| repeated group key_value (MAP_KEY_VALUE) {
| required int32 key;
| required binary value (UTF8);
| }
Expand Down Expand Up @@ -893,7 +893,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
nullable = true))),
"""message root {
| optional group f1 (MAP) {
| repeated group map (MAP_KEY_VALUE) {
| repeated group key_value (MAP_KEY_VALUE) {
| required int32 key;
| optional binary value (UTF8);
| }
Expand Down Expand Up @@ -1447,7 +1447,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
parquetSchema =
"""message root {
| required group f0 (MAP) {
| repeated group map (MAP_KEY_VALUE) {
| repeated group key_value (MAP_KEY_VALUE) {
| required int32 key;
| required group value {
| required int32 value_f0;
Expand All @@ -1472,7 +1472,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
expectedSchema =
"""message root {
| required group f0 (MAP) {
| repeated group map (MAP_KEY_VALUE) {
| repeated group key_value (MAP_KEY_VALUE) {
| required int32 key;
| required group value {
| required int64 value_f1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,9 @@ class StreamSuite extends StreamTest {
.start(outputDir.getAbsolutePath)
try {
query.processAllAvailable()
val outputDf = spark.read.parquet(outputDir.getAbsolutePath).as[Long]
// Parquet write page-level CRC checksums will change the file size and
// affect the data order when reading these files. Please see PARQUET-1746 for details.
val outputDf = spark.read.parquet(outputDir.getAbsolutePath).sort('a).as[Long]
checkDataset[Long](outputDf, (0L to 10L).toArray: _*)
} finally {
query.stop()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1528,7 +1528,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
Seq(tbl, ext_tbl).foreach { tblName =>
sql(s"INSERT INTO $tblName VALUES (1, 'a', '2019-12-13')")

val expectedSize = 601
val expectedSize = 651
// analyze table
sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN")
var tableStats = getTableStats(tblName)
Expand Down