Skip to content

Commit b5a096c

Browse files
DX-103340: Allow using 1GB+ data buffers in variable width vectors (#98)
Backport of apache/arrow-java#722
1 parent 0bc7698 commit b5a096c

File tree

5 files changed

+30
-6
lines changed

5 files changed

+30
-6
lines changed

java/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -478,8 +478,8 @@
478478
<io.netty.tryReflectionSetAccessible>true</io.netty.tryReflectionSetAccessible>
479479
<user.timezone>UTC</user.timezone>
480480
<!-- Note: changing the below configuration might increase the max allocation size for a vector
481-
which in turn can cause OOM. -->
482-
<arrow.vector.max_allocation_bytes>1048576</arrow.vector.max_allocation_bytes>
481+
which in turn can cause OOM. Using 2MB - 1byte to simulate the defaul limit of 2^31 - 1 bytes. -->
482+
<arrow.vector.max_allocation_bytes>2097151</arrow.vector.max_allocation_bytes>
483483
</systemPropertyVariables>
484484
</configuration>
485485
</plugin>

java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -568,10 +568,13 @@ public void reallocDataBuffer(long desiredAllocSize) {
568568
return;
569569
}
570570

571-
final long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize);
571+
final long newAllocationSize =
572+
Math.min(CommonUtil.nextPowerOfTwo(desiredAllocSize), MAX_BUFFER_SIZE);
572573
assert newAllocationSize >= 1;
573574

574-
checkDataBufferSize(newAllocationSize);
575+
if (newAllocationSize < desiredAllocSize) {
576+
checkDataBufferSize(desiredAllocSize);
577+
}
575578

576579
final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
577580
newBuf.setBytes(0, valueBuffer, 0, valueBuffer.capacity());

java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ public void init() {
9292
private static final byte[] STR5 = "EEE5".getBytes(utf8Charset);
9393
private static final byte[] STR6 = "FFFFF6".getBytes(utf8Charset);
9494
private static final int MAX_VALUE_COUNT =
95-
(int) (Integer.getInteger("arrow.vector.max_allocation_bytes", Integer.MAX_VALUE) / 7);
95+
(int) (Integer.getInteger("arrow.vector.max_allocation_bytes", Integer.MAX_VALUE) / 9);
9696
private static final int MAX_VALUE_COUNT_8BYTE = (int) (MAX_VALUE_COUNT / 2);
9797

9898
@After

java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
import org.apache.arrow.memory.BufferAllocator;
2525
import org.apache.arrow.memory.RootAllocator;
26+
import org.apache.arrow.memory.util.CommonUtil;
2627
import org.apache.arrow.vector.complex.DenseUnionVector;
2728
import org.apache.arrow.vector.complex.FixedSizeListVector;
2829
import org.apache.arrow.vector.complex.ListVector;
@@ -223,6 +224,17 @@ public void testVariableAllocateAfterReAlloc() throws Exception {
223224
}
224225
}
225226

227+
@Test
228+
public void testVariableReAllocAbove1GB() throws Exception {
229+
try (final VarCharVector vector = new VarCharVector("", allocator)) {
230+
long desiredSizeAboveLastPowerOf2 =
231+
CommonUtil.nextPowerOfTwo(BaseVariableWidthVector.MAX_ALLOCATION_SIZE) / 2 + 1;
232+
vector.reallocDataBuffer(desiredSizeAboveLastPowerOf2);
233+
234+
assertTrue(vector.getDataBuffer().capacity() >= desiredSizeAboveLastPowerOf2);
235+
}
236+
}
237+
226238
@Test
227239
public void testLargeVariableAllocateAfterReAlloc() throws Exception {
228240
try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {

java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
import org.apache.arrow.memory.BufferAllocator;
3030
import org.apache.arrow.memory.RootAllocator;
31+
import org.apache.arrow.memory.util.CommonUtil;
3132
import org.apache.arrow.vector.BaseValueVector;
3233
import org.apache.arrow.vector.BigIntVector;
3334
import org.apache.arrow.vector.BitVector;
@@ -191,7 +192,15 @@ public void testAppendEmptyVariableWidthVector() {
191192

192193
@Test
193194
public void testAppendLargeAndSmallVariableVectorsWithinLimit() {
194-
int sixteenthOfMaxAllocation = Math.toIntExact(BaseValueVector.MAX_ALLOCATION_SIZE / 16);
195+
// Using the max power of 2 allocation size to avoid hitting the max limit at round ups
196+
long maxPowerOfTwoAllocationSize =
197+
CommonUtil.nextPowerOfTwo(BaseValueVector.MAX_ALLOCATION_SIZE);
198+
if (maxPowerOfTwoAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) {
199+
maxPowerOfTwoAllocationSize =
200+
CommonUtil.nextPowerOfTwo(BaseValueVector.MAX_ALLOCATION_SIZE / 2);
201+
}
202+
203+
int sixteenthOfMaxAllocation = Math.toIntExact(maxPowerOfTwoAllocationSize / 16);
195204
try (VarCharVector target = makeVarCharVec(1, sixteenthOfMaxAllocation);
196205
VarCharVector delta = makeVarCharVec(sixteenthOfMaxAllocation, 1)) {
197206
new VectorAppender(delta).visit(target, null);

0 commit comments

Comments
 (0)