apache · yabola · Jan 22, 2020 · Apr 23, 2021 · Aug 16, 2021 · Apr 19, 2021
diff --git a/CHANGES.md b/CHANGES.md
@@ -19,6 +19,33 @@
 
 # Parquet #
 
+### Version 1.12.2 ###
+
+Release Notes - Parquet - Version 1.12.2
+
+#### Bug
+
+*   [PARQUET-2094](https://issues.apache.org/jira/browse/PARQUET-2094) - Handle negative values in page headers
+
+### Version 1.12.1 ###
+
+Release Notes - Parquet - Version 1.12.1
+
+#### Bug
+
+*   [PARQUET-1633](https://issues.apache.org/jira/browse/PARQUET-1633) - Fix integer overflow
+*   [PARQUET-2022](https://issues.apache.org/jira/browse/PARQUET-2022) - ZstdDecompressorStream should close zstdInputStream
+*   [PARQUET-2027](https://issues.apache.org/jira/browse/PARQUET-2027) - Fix calculating directory offset for merge
+*   [PARQUET-2052](https://issues.apache.org/jira/browse/PARQUET-2052) - Integer overflow when writing huge binary using dictionary encoding
+*   [PARQUET-2054](https://issues.apache.org/jira/browse/PARQUET-2054) - fix TCP leaking when calling ParquetFileWriter.appendFile
+*   [PARQUET-2072](https://issues.apache.org/jira/browse/PARQUET-2072) - Do Not Determine Both Min/Max for Binary Stats
+*   [PARQUET-2073](https://issues.apache.org/jira/browse/PARQUET-2073) - Fix estimate remaining row count in ColumnWriteStoreBase.
+*   [PARQUET-2078](https://issues.apache.org/jira/browse/PARQUET-2078) - Failed to read parquet file after writing with the same parquet version
+
+#### Improvement
+
+*   [PARQUET-2064](https://issues.apache.org/jira/browse/PARQUET-2064) - Make Range public accessible in RowRanges
+
 ### Version 1.12.0 ###
 
 Release Notes - Parquet - Version 1.12.0
@@ -739,7 +766,7 @@ Release Notes - Parquet - Version 1.10.0
 * ISSUE [346](https://github.com/Parquet/parquet-mr/pull/346): stop using strings and b64 for compressed input splits
 * ISSUE [345](https://github.com/Parquet/parquet-mr/pull/345): set cascading version to 2.5.3
 * ISSUE [342](https://github.com/Parquet/parquet-mr/pull/342): compress kv pairs in ParquetInputSplits
- 
+
 ### Version 1.4.0 ###
 * ISSUE [333](https://github.com/Parquet/parquet-mr/pull/333): Compress schemas in split
 * ISSUE [329](https://github.com/Parquet/parquet-mr/pull/329): fix filesystem resolution
@@ -879,37 +906,37 @@ Release Notes - Parquet - Version 1.10.0
 * ISSUE 159: Counter for mapred
 * ISSUE 156: Fix site
 * ISSUE 153: Fix projection required field
-    
+
 ### Version 1.1.1 ###
 * ISSUE 150: add thrift validation on read
 
 ### Version 1.1.0 ###
-* ISSUE 149: changing default block size to 128mb  
-* ISSUE 146: Fix and add unit tests for Hive nested types  
-* ISSUE 145: add getStatistics method to parquetloader  
-* ISSUE 144: Map key fields should allow other types than strings  
-* ISSUE 143: Fix empty encoding col metadata  
-* ISSUE 142: Fix total size row group  
-* ISSUE 141: add parquet counters for benchmark  
-* ISSUE 140: Implemented partial schema for GroupReadSupport  
-* ISSUE 138: fix bug of wrong column metadata size  
-* ISSUE 137: ParquetMetadataConverter bug  
-* ISSUE 133: Update plugin versions for maven aether migration - fixes #125  
-* ISSUE 130: Schema validation should not validate the root element's name  
-* ISSUE 127: Adding dictionary encoding for non string types.. #99  
-* ISSUE 125: Unable to build  
-* ISSUE 124: Fix Short and Byte types in Hive SerDe.  
-* ISSUE 123: Fix Snappy compressor in parquet-hadoop.  
-* ISSUE 120: Fix RLE bug with partial literal groups at end of stream.  
-* ISSUE 118: Refactor column reader  
-* ISSUE 115: Map key fields should allow other types than strings  
-* ISSUE 103: Map key fields should allow other types than strings  
-* ISSUE 99: Dictionary encoding for non string types (float  double  int  long  boolean)  
-* ISSUE 47: Add tests for parquet-scrooge and parquet-cascading 
+* ISSUE 149: changing default block size to 128mb
+* ISSUE 146: Fix and add unit tests for Hive nested types
+* ISSUE 145: add getStatistics method to parquetloader
+* ISSUE 144: Map key fields should allow other types than strings
+* ISSUE 143: Fix empty encoding col metadata
+* ISSUE 142: Fix total size row group
+* ISSUE 141: add parquet counters for benchmark
+* ISSUE 140: Implemented partial schema for GroupReadSupport
+* ISSUE 138: fix bug of wrong column metadata size
+* ISSUE 137: ParquetMetadataConverter bug
+* ISSUE 133: Update plugin versions for maven aether migration - fixes #125
+* ISSUE 130: Schema validation should not validate the root element's name
+* ISSUE 127: Adding dictionary encoding for non string types.. #99
+* ISSUE 125: Unable to build
+* ISSUE 124: Fix Short and Byte types in Hive SerDe.
+* ISSUE 123: Fix Snappy compressor in parquet-hadoop.
+* ISSUE 120: Fix RLE bug with partial literal groups at end of stream.
+* ISSUE 118: Refactor column reader
+* ISSUE 115: Map key fields should allow other types than strings
+* ISSUE 103: Map key fields should allow other types than strings
+* ISSUE 99: Dictionary encoding for non string types (float  double  int  long  boolean)
+* ISSUE 47: Add tests for parquet-scrooge and parquet-cascading
 
 ### Version 1.0.1 ###
-* ISSUE 126: Unit tests for parquet cascading  
-* ISSUE 121: fix wrong RecordConverter for ParquetTBaseScheme  
-* ISSUE 119: fix compatibility with thrift  remove unused dependency 
+* ISSUE 126: Unit tests for parquet cascading
+* ISSUE 121: fix wrong RecordConverter for ParquetTBaseScheme
+* ISSUE 119: fix compatibility with thrift  remove unused dependency
 
 ### Version 1.0.0 ###
diff --git a/parquet-arrow/pom.xml b/parquet-arrow/pom.xml
@@ -21,7 +21,7 @@
     <groupId>org.apache.parquet</groupId>
     <artifactId>parquet</artifactId>
     <relativePath>../pom.xml</relativePath>
-    <version>1.12.0</version>
+    <version>1.12.2-kylin-r6</version>
   </parent>
 
   <modelVersion>4.0.0</modelVersion>

diff --git a/parquet-avro/pom.xml b/parquet-avro/pom.xml
@@ -21,7 +21,7 @@
     <groupId>org.apache.parquet</groupId>
     <artifactId>parquet</artifactId>
     <relativePath>../pom.xml</relativePath>
-    <version>1.12.0</version>
+    <version>1.12.2-kylin-r6</version>
   </parent>
 
   <modelVersion>4.0.0</modelVersion>

diff --git a/parquet-benchmarks/pom.xml b/parquet-benchmarks/pom.xml
@@ -21,7 +21,7 @@
     <groupId>org.apache.parquet</groupId>
     <artifactId>parquet</artifactId>
     <relativePath>../pom.xml</relativePath>
-    <version>1.12.0</version>
+    <version>1.12.2-kylin-r6</version>
   </parent>
 
   <modelVersion>4.0.0</modelVersion>

diff --git a/parquet-cascading-deprecated/pom.xml b/parquet-cascading-deprecated/pom.xml
@@ -21,7 +21,7 @@
     <groupId>org.apache.parquet</groupId>
     <artifactId>parquet</artifactId>
     <relativePath>../pom.xml</relativePath>
-    <version>1.12.0</version>
+    <version>1.12.2-kylin-r6</version>
   </parent>
 
   <modelVersion>4.0.0</modelVersion>

diff --git a/parquet-cascading3-deprecated/pom.xml b/parquet-cascading3-deprecated/pom.xml
@@ -21,7 +21,7 @@
     <groupId>org.apache.parquet</groupId>
     <artifactId>parquet</artifactId>
     <relativePath>../pom.xml</relativePath>
-    <version>1.12.0</version>
+    <version>1.12.2-kylin-r6</version>
   </parent>
 
   <modelVersion>4.0.0</modelVersion>

diff --git a/parquet-cli/pom.xml b/parquet-cli/pom.xml
@@ -21,7 +21,7 @@
     <groupId>org.apache.parquet</groupId>
     <artifactId>parquet</artifactId>
     <relativePath>../pom.xml</relativePath>
-    <version>1.12.0</version>
+    <version>1.12.2-kylin-r6</version>
   </parent>
 
   <modelVersion>4.0.0</modelVersion>

diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/CheckParquet251Command.java b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/CheckParquet251Command.java
@@ -108,10 +108,8 @@ public boolean apply(@Nullable ColumnDescriptor input) {
           }));
 
       // now check to see if the data is actually corrupt
-      ParquetFileReader reader = new ParquetFileReader(getConf(),
-          fakeMeta, path, footer.getBlocks(), columns);
-
-      try {
+      try (ParquetFileReader reader = new ParquetFileReader(getConf(),
+        fakeMeta, path, footer.getBlocks(), columns)) {
         PageStatsValidator validator = new PageStatsValidator();
         for (PageReadStore pages = reader.readNextRowGroup(); pages != null;
              pages = reader.readNextRowGroup()) {

diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/SchemaCommand.java b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/SchemaCommand.java
@@ -119,9 +119,10 @@ private String getParquetSchema(String source) throws IOException {
 
       switch (format) {
         case PARQUET:
-          return new ParquetFileReader(
-              getConf(), qualifiedPath(source), ParquetMetadataConverter.NO_FILTER)
-              .getFileMetaData().getSchema().toString();
+          try (ParquetFileReader reader = new ParquetFileReader(
+            getConf(), qualifiedPath(source), ParquetMetadataConverter.NO_FILTER)) {
+            return reader.getFileMetaData().getSchema().toString();
+          }
         default:
           throw new IllegalArgumentException(String.format(
               "Could not get a Parquet schema for format %s: %s", format, source));

diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowDictionaryCommand.java b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowDictionaryCommand.java
@@ -64,56 +64,57 @@ public int run() throws IOException {
 
     String source = targets.get(0);
 
-    ParquetFileReader reader = ParquetFileReader.open(getConf(), qualifiedPath(source));
-    MessageType schema = reader.getFileMetaData().getSchema();
-    ColumnDescriptor descriptor = Util.descriptor(column, schema);
-    PrimitiveType type = Util.primitive(column, schema);
-    Preconditions.checkNotNull(type);
-
-    DictionaryPageReadStore dictionaryReader;
-    int rowGroup = 0;
-    while ((dictionaryReader = reader.getNextDictionaryReader()) != null) {
-      DictionaryPage page = dictionaryReader.readDictionaryPage(descriptor);
-
-      Dictionary dict = page.getEncoding().initDictionary(descriptor, page);
-
-      console.info("\nRow group {} dictionary for \"{}\":", rowGroup, column, page.getCompressedSize());
-      for (int i = 0; i <= dict.getMaxId(); i += 1) {
-        switch(type.getPrimitiveTypeName()) {
-          case BINARY:
-            if (type.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation) {
+    try (ParquetFileReader reader = ParquetFileReader.open(getConf(), qualifiedPath(source))) {
+      MessageType schema = reader.getFileMetaData().getSchema();
+      ColumnDescriptor descriptor = Util.descriptor(column, schema);
+      PrimitiveType type = Util.primitive(column, schema);
+      Preconditions.checkNotNull(type);
+
+      DictionaryPageReadStore dictionaryReader;
+      int rowGroup = 0;
+      while ((dictionaryReader = reader.getNextDictionaryReader()) != null) {
+        DictionaryPage page = dictionaryReader.readDictionaryPage(descriptor);
+
+        Dictionary dict = page.getEncoding().initDictionary(descriptor, page);
+
+        console.info("\nRow group {} dictionary for \"{}\":", rowGroup, column, page.getCompressedSize());
+        for (int i = 0; i <= dict.getMaxId(); i += 1) {
+          switch(type.getPrimitiveTypeName()) {
+            case BINARY:
+              if (type.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation) {
+                console.info("{}: {}", String.format("%6d", i),
+                    Util.humanReadable(dict.decodeToBinary(i).toStringUsingUTF8(), 70));
+              } else {
+                console.info("{}: {}", String.format("%6d", i),
+                    Util.humanReadable(dict.decodeToBinary(i).getBytesUnsafe(), 70));
+              }
+              break;
+            case INT32:
               console.info("{}: {}", String.format("%6d", i),
-                  Util.humanReadable(dict.decodeToBinary(i).toStringUsingUTF8(), 70));
-            } else {
+                dict.decodeToInt(i));
+              break;
+            case INT64:
               console.info("{}: {}", String.format("%6d", i),
-                  Util.humanReadable(dict.decodeToBinary(i).getBytesUnsafe(), 70));
-            }
-            break;
-          case INT32:
-            console.info("{}: {}", String.format("%6d", i),
-              dict.decodeToInt(i));
-            break;
-          case INT64:
-            console.info("{}: {}", String.format("%6d", i),
-                dict.decodeToLong(i));
-            break;
-          case FLOAT:
-            console.info("{}: {}", String.format("%6d", i),
-                dict.decodeToFloat(i));
-            break;
-          case DOUBLE:
-            console.info("{}: {}", String.format("%6d", i),
-                dict.decodeToDouble(i));
-            break;
-          default:
-            throw new IllegalArgumentException(
-                "Unknown dictionary type: " + type.getPrimitiveTypeName());
+                  dict.decodeToLong(i));
+              break;
+            case FLOAT:
+              console.info("{}: {}", String.format("%6d", i),
+                  dict.decodeToFloat(i));
+              break;
+            case DOUBLE:
+              console.info("{}: {}", String.format("%6d", i),
+                  dict.decodeToDouble(i));
+              break;
+            default:
+              throw new IllegalArgumentException(
+                  "Unknown dictionary type: " + type.getPrimitiveTypeName());
+          }
         }
-      }
 
-      reader.skipNextRowGroup();
+        reader.skipNextRowGroup();
 
-      rowGroup += 1;
+        rowGroup += 1;
+      }
     }
 
     console.info("");

diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowPagesCommand.java b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowPagesCommand.java
@@ -75,57 +75,57 @@ public int run() throws IOException {
         "Cannot process multiple Parquet files.");
 
     String source = targets.get(0);
-    ParquetFileReader reader = ParquetFileReader.open(getConf(), qualifiedPath(source));
-
-    MessageType schema = reader.getFileMetaData().getSchema();
-    Map<ColumnDescriptor, PrimitiveType> columns = Maps.newLinkedHashMap();
-    if (this.columns == null || this.columns.isEmpty()) {
-      for (ColumnDescriptor descriptor : schema.getColumns()) {
-        columns.put(descriptor, primitive(schema, descriptor.getPath()));
-      }
-    } else {
-      for (String column : this.columns) {
-        columns.put(descriptor(column, schema), primitive(column, schema));
-      }
-    }
-
-    CompressionCodecName codec = reader.getRowGroups().get(0).getColumns().get(0).getCodec();
-    // accumulate formatted lines to print by column
-    Map<String, List<String>> formatted = Maps.newLinkedHashMap();
-    PageFormatter formatter = new PageFormatter();
-    PageReadStore pageStore;
-    int rowGroupNum = 0;
-    while ((pageStore = reader.readNextRowGroup()) != null) {
-      for (ColumnDescriptor descriptor : columns.keySet()) {
-        List<String> lines = formatted.get(columnName(descriptor));
-        if (lines == null) {
-          lines = Lists.newArrayList();
-          formatted.put(columnName(descriptor), lines);
+    try (ParquetFileReader reader = ParquetFileReader.open(getConf(), qualifiedPath(source))) {
+      MessageType schema = reader.getFileMetaData().getSchema();
+      Map<ColumnDescriptor, PrimitiveType> columns = Maps.newLinkedHashMap();
+      if (this.columns == null || this.columns.isEmpty()) {
+        for (ColumnDescriptor descriptor : schema.getColumns()) {
+          columns.put(descriptor, primitive(schema, descriptor.getPath()));
         }
-
-        formatter.setContext(rowGroupNum, columns.get(descriptor), codec);
-        PageReader pages = pageStore.getPageReader(descriptor);
-
-        DictionaryPage dict = pages.readDictionaryPage();
-        if (dict != null) {
-          lines.add(formatter.format(dict));
+      } else {
+        for (String column : this.columns) {
+          columns.put(descriptor(column, schema), primitive(column, schema));
         }
-        DataPage page;
-        while ((page = pages.readPage()) != null) {
-          lines.add(formatter.format(page));
+      }
+
+      CompressionCodecName codec = reader.getRowGroups().get(0).getColumns().get(0).getCodec();
+      // accumulate formatted lines to print by column
+      Map<String, List<String>> formatted = Maps.newLinkedHashMap();
+      PageFormatter formatter = new PageFormatter();
+      PageReadStore pageStore;
+      int rowGroupNum = 0;
+      while ((pageStore = reader.readNextRowGroup()) != null) {
+        for (ColumnDescriptor descriptor : columns.keySet()) {
+          List<String> lines = formatted.get(columnName(descriptor));
+          if (lines == null) {
+            lines = Lists.newArrayList();
+            formatted.put(columnName(descriptor), lines);
+          }
+
+          formatter.setContext(rowGroupNum, columns.get(descriptor), codec);
+          PageReader pages = pageStore.getPageReader(descriptor);
+
+          DictionaryPage dict = pages.readDictionaryPage();
+          if (dict != null) {
+            lines.add(formatter.format(dict));
+          }
+          DataPage page;
+          while ((page = pages.readPage()) != null) {
+            lines.add(formatter.format(page));
+          }
         }
+        rowGroupNum += 1;
       }
-      rowGroupNum += 1;
-    }
 
-    // TODO: Show total column size and overall size per value in the column summary line
-    for (String columnName : formatted.keySet()) {
-      console.info(String.format("\nColumn: %s\n%s", columnName, new TextStringBuilder(80).appendPadding(80, '-')));
-      console.info(formatter.getHeader());
-      for (String line : formatted.get(columnName)) {
-        console.info(line);
+      // TODO: Show total column size and overall size per value in the column summary line
+      for (String columnName : formatted.keySet()) {
+        console.info(String.format("\nColumn: %s\n%s", columnName, new TextStringBuilder(80).appendPadding(80, '-')));
+        console.info(formatter.getHeader());
+        for (String line : formatted.get(columnName)) {
+          console.info(line);
+        }
+        console.info("");
       }
-      console.info("");
     }
 
     return 0;