diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java index ac148a25c7c..a5b550ac783 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java @@ -1100,7 +1100,7 @@ public void setSafe(int index, int isSet, int start, int end, ArrowBuf buffer) { assert index >= 0; final int dataLength = end - start; fillEmpties(index); - handleSafe(index, end); + handleSafe(index, dataLength); BitVectorHelper.setValidityBit(validityBuffer, index, isSet); final int startOffset = offsetBuffer.getInt(index * OFFSET_WIDTH); offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 30fe23cae4a..f0cc4c4b882 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -1394,6 +1394,51 @@ public void testFillEmptiesNotOverfill() { } } + @Test + public void testSetSafeWithArrowBufNoExcessAllocs() { + final int numValues = BaseFixedWidthVector.INITIAL_VALUE_ALLOCATION * 2; + final byte[] valueBytes = "hello world".getBytes(); + final int valueBytesLength = valueBytes.length; + final int isSet = 1; + + try ( + final VarCharVector fromVector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, + MinorType.VARCHAR, allocator); + final VarCharVector toVector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, + MinorType.VARCHAR, allocator)) { + /* + * Populate the from vector with 'numValues' with byte-arrays, each of size 'valueBytesLength'. + */ + fromVector.setInitialCapacity(numValues); + fromVector.allocateNew(); + for (int i = 0; i < numValues; ++i) { + fromVector.setSafe(i, valueBytes, 0 /*start*/, valueBytesLength); + } + fromVector.setValueCount(numValues); + ArrowBuf fromDataBuffer = fromVector.getDataBuffer(); + assertTrue(numValues * valueBytesLength <= fromDataBuffer.capacity()); + + /* + * Copy the entries one-by-one from 'fromVector' to 'toVector', but use the setSafe with + * ArrowBuf API (instead of setSafe with byte-array). + */ + toVector.setInitialCapacity(numValues); + toVector.allocateNew(); + for (int i = 0; i < numValues; i++) { + int start = fromVector.getstartOffset(i); + int end = fromVector.getstartOffset(i + 1); + toVector.setSafe(i, isSet, start, end, fromDataBuffer); + } + + /* + * Since the 'fromVector' and 'toVector' have the same initial capacity, and were populated + * with the same varchar elements, the allocations and hence, the final capacity should be + * the same. + */ + assertEquals(fromDataBuffer.capacity(), toVector.getDataBuffer().capacity()); + } + } + @Test public void testCopyFromWithNulls() { try (final VarCharVector vector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator);