From 2ac1ad28dda72d60413eb7a22b324aa9d8ad64c7 Mon Sep 17 00:00:00 2001 From: Tim Hurski Date: Mon, 3 Nov 2025 13:19:50 -0500 Subject: [PATCH] GH-721: Allow using 1GB+ data buffers in variable width vectors Backport of https://github.com/apache/arrow-java/pull/722 --- java/pom.xml | 4 ++-- .../apache/arrow/vector/BaseVariableWidthVector.java | 7 +++++-- .../org/apache/arrow/vector/TestValueVector.java | 2 +- .../org/apache/arrow/vector/TestVectorReAlloc.java | 12 ++++++++++++ .../apache/arrow/vector/util/TestVectorAppender.java | 11 ++++++++++- 5 files changed, 30 insertions(+), 6 deletions(-) diff --git a/java/pom.xml b/java/pom.xml index 378d9069535..234479fffbb 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -316,8 +316,8 @@ under the License. true UTC - 1048576 + which in turn can cause OOM. Using 2MB - 1byte to simulate the defaul limit of 2^31 - 1 bytes. --> + 2097151 diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java index 5d761ffbee9..17d6811c2d9 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java @@ -571,10 +571,13 @@ public void reallocDataBuffer(long desiredAllocSize) { return; } - final long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize); + final long newAllocationSize = + Math.min(CommonUtil.nextPowerOfTwo(desiredAllocSize), MAX_BUFFER_SIZE); assert newAllocationSize >= 1; - checkDataBufferSize(newAllocationSize); + if (newAllocationSize < desiredAllocSize) { + checkDataBufferSize(desiredAllocSize); + } final ArrowBuf newBuf = allocator.buffer(newAllocationSize); newBuf.setBytes(0, valueBuffer, 0, valueBuffer.capacity()); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 40e55fce9bf..8c291bc51ad 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -91,7 +91,7 @@ public void init() { private static final byte[] STR5 = "EEE5".getBytes(utf8Charset); private static final byte[] STR6 = "FFFFF6".getBytes(utf8Charset); private static final int MAX_VALUE_COUNT = - (int) (Integer.getInteger("arrow.vector.max_allocation_bytes", Integer.MAX_VALUE) / 7); + (int) (Integer.getInteger("arrow.vector.max_allocation_bytes", Integer.MAX_VALUE) / 9); private static final int MAX_VALUE_COUNT_8BYTE = (int) (MAX_VALUE_COUNT / 2); @AfterEach diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java index f5ec42c71c2..bc471503760 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java @@ -24,6 +24,7 @@ import java.nio.charset.StandardCharsets; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.memory.util.CommonUtil; import org.apache.arrow.vector.complex.DenseUnionVector; import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.ListVector; @@ -222,6 +223,17 @@ public void testVariableAllocateAfterReAlloc() throws Exception { } } + @Test + public void testVariableReAllocAbove1GB() throws Exception { + try (final VarCharVector vector = new VarCharVector("", allocator)) { + long desiredSizeAboveLastPowerOf2 = + CommonUtil.nextPowerOfTwo(BaseVariableWidthVector.MAX_ALLOCATION_SIZE) / 2 + 1; + vector.reallocDataBuffer(desiredSizeAboveLastPowerOf2); + + assertTrue(vector.getDataBuffer().capacity() >= desiredSizeAboveLastPowerOf2); + } + } + @Test public void testLargeVariableAllocateAfterReAlloc() throws Exception { try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java index 19eafd1b201..4fb37a8de4b 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java @@ -26,6 +26,7 @@ import java.util.List; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.memory.util.CommonUtil; import org.apache.arrow.vector.BaseValueVector; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; @@ -192,7 +193,15 @@ public void testAppendEmptyVariableWidthVector() { @Test public void testAppendLargeAndSmallVariableVectorsWithinLimit() { - int sixteenthOfMaxAllocation = Math.toIntExact(BaseValueVector.MAX_ALLOCATION_SIZE / 16); + // Using the max power of 2 allocation size to avoid hitting the max limit at round ups + long maxPowerOfTwoAllocationSize = + CommonUtil.nextPowerOfTwo(BaseValueVector.MAX_ALLOCATION_SIZE); + if (maxPowerOfTwoAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) { + maxPowerOfTwoAllocationSize = + CommonUtil.nextPowerOfTwo(BaseValueVector.MAX_ALLOCATION_SIZE / 2); + } + + int sixteenthOfMaxAllocation = Math.toIntExact(maxPowerOfTwoAllocationSize / 16); try (VarCharVector target = makeVarCharVec(1, sixteenthOfMaxAllocation); VarCharVector delta = makeVarCharVec(sixteenthOfMaxAllocation, 1)) { new VectorAppender(delta).visit(target, null);