apache · gszadovszky · May 26, 2021 · May 21, 2021 · May 21, 2021 · gszadovszky
diff --git a/...umn/src/main/java/org/apache/parquet/column/values/dictionary/DictionaryValuesWriter.java b/...umn/src/main/java/org/apache/parquet/column/values/dictionary/DictionaryValuesWriter.java
@@ -81,7 +81,7 @@ public abstract class DictionaryValuesWriter extends ValuesWriter implements Req
   protected boolean dictionaryTooBig;
 
   /* current size in bytes the dictionary will take once serialized */
-  protected int dictionaryByteSize;
+  protected long dictionaryByteSize;
 
   /* size in bytes of the dictionary at the end of last dictionary encoded page (in case the current page falls back to PLAIN) */
   protected int lastUsedDictionaryByteSize;
@@ -173,7 +173,7 @@ public BytesInput getBytes() {
       BytesInput bytes = concat(BytesInput.from(bytesHeader), rleEncodedBytes);
       // remember size of dictionary when we last wrote a page
       lastUsedDictionarySize = getDictionarySize();
-      lastUsedDictionaryByteSize = dictionaryByteSize;
+      lastUsedDictionaryByteSize = (int) dictionaryByteSize;
       return bytes;
     } catch (IOException e) {
       throw new ParquetEncodingException("could not encode the values", e);
@@ -249,7 +249,7 @@ public void writeBytes(Binary v) {
         id = binaryDictionaryContent.size();
         binaryDictionaryContent.put(v.copy(), id);
         // length as int (4 bytes) + actual bytes
-        dictionaryByteSize += 4 + v.length();
+        dictionaryByteSize += 4L + v.length();
       }
       encodedValues.add(id);
     }

diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/dictionary/TestDictionary.java b/parquet-column/src/test/java/org/apache/parquet/column/values/dictionary/TestDictionary.java
@@ -53,6 +53,7 @@
 import org.apache.parquet.column.values.plain.PlainValuesWriter;
 import org.apache.parquet.io.api.Binary;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.mockito.Mockito;
 
 public class TestDictionary {
 
@@ -171,6 +172,20 @@ public void testBinaryDictionaryFallBack() throws IOException {
     assertEquals(0, cw.getBufferedSize());
   }
 
+  @Test
+  public void testBinaryDictionaryIntegerOverflow() {
+    Binary mock = Mockito.mock(Binary.class);
+    Mockito.when(mock.length()).thenReturn(Integer.MAX_VALUE - 1);
+    // make the writer happy
+    Mockito.when(mock.copy()).thenReturn(Binary.fromString(" world"));
+
+    final ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(100, 100);
+    cw.writeBytes(Binary.fromString("hello"));
+    cw.writeBytes(mock);
+
+    assertEquals(PLAIN, cw.getEncoding());
+  }
+
   @Test
   public void testBinaryDictionaryChangedValues() throws IOException {
     int COUNT = 100;