From cc3bdb918c0c98428de10098f756d4ecf499e634 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Mon, 12 Aug 2024 07:36:07 +0530 Subject: [PATCH 1/5] feat: adding initial largelistview splitAndTransfer --- .../BaseLargeRepeatedValueViewVector.java | 2 +- .../vector/complex/LargeListViewVector.java | 167 ++++++- .../arrow/vector/TestLargeListViewVector.java | 456 ++++++++++++++++++ 3 files changed, 618 insertions(+), 7 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java index 26079cbee95..f643306cfdc 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java @@ -102,7 +102,7 @@ private void allocateBuffers() { sizeBuffer = allocateBuffers(sizeAllocationSizeInBytes); } - private ArrowBuf allocateBuffers(final long size) { + protected ArrowBuf allocateBuffers(final long size) { final int curSize = (int) size; ArrowBuf buffer = allocator.buffer(curSize); buffer.readerIndex(0); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java index 17ccdbf0eae..2faa4ae6508 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java @@ -39,6 +39,7 @@ import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueIterableVector; import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.compare.VectorVisitor; import org.apache.arrow.vector.complex.impl.UnionLargeListViewReader; import org.apache.arrow.vector.complex.impl.UnionLargeListViewWriter; @@ -361,20 +362,17 @@ public TransferPair getTransferPair(Field field, BufferAllocator allocator) { @Override public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - throw new UnsupportedOperationException( - "LargeListViewVector does not support getTransferPair(String, BufferAllocator, CallBack) yet"); + return new TransferImpl(ref, allocator, callBack); } @Override public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { - throw new UnsupportedOperationException( - "LargeListViewVector does not support getTransferPair(Field, BufferAllocator, CallBack) yet"); + return new TransferImpl(field, allocator, callBack); } @Override public TransferPair makeTransferPair(ValueVector target) { - throw new UnsupportedOperationException( - "LargeListViewVector does not support makeTransferPair(ValueVector) yet"); + return new TransferImpl((LargeListViewVector) target); } @Override @@ -452,6 +450,163 @@ public OUT accept(VectorVisitor visitor, IN value) { return visitor.visit(this, value); } + private class TransferImpl implements TransferPair { + + LargeListViewVector to; + TransferPair dataTransferPair; + + public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { + this(new LargeListViewVector(name, allocator, field.getFieldType(), callBack)); + } + + public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) { + this(new LargeListViewVector(field, allocator, callBack)); + } + + public TransferImpl(LargeListViewVector to) { + this.to = to; + to.addOrGetVector(vector.getField().getFieldType()); + if (to.getDataVector() instanceof ZeroVector) { + to.addOrGetVector(vector.getField().getFieldType()); + } + dataTransferPair = getDataVector().makeTransferPair(to.getDataVector()); + } + + @Override + public void transfer() { + to.clear(); + dataTransferPair.transfer(); + to.validityBuffer = transferBuffer(validityBuffer, to.allocator); + to.offsetBuffer = transferBuffer(offsetBuffer, to.allocator); + to.sizeBuffer = transferBuffer(sizeBuffer, to.allocator); + if (valueCount > 0) { + to.setValueCount(valueCount); + } + clear(); + } + + @Override + public void splitAndTransfer(int startIndex, int length) { + Preconditions.checkArgument( + startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, + "Invalid parameters startIndex: %s, length: %s for valueCount: %s", + startIndex, + length, + valueCount); + to.clear(); + if (length > 0) { + // we have to scan by index since there are out-of-order offsets + to.offsetBuffer = to.allocateBuffers((long) length * OFFSET_WIDTH); + to.sizeBuffer = to.allocateBuffers((long) length * SIZE_WIDTH); + + /* splitAndTransfer the size buffer */ + int maxOffsetAndSizeSum = -1; + int minOffsetValue = -1; + for (int i = 0; i < length; i++) { + final int offsetValue = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH); + final int sizeValue = sizeBuffer.getInt((long) (startIndex + i) * SIZE_WIDTH); + to.sizeBuffer.setInt((long) i * SIZE_WIDTH, sizeValue); + if (maxOffsetAndSizeSum < offsetValue + sizeValue) { + maxOffsetAndSizeSum = offsetValue + sizeValue; + } + if (minOffsetValue == -1 || minOffsetValue > offsetValue) { + minOffsetValue = offsetValue; + } + } + + /* splitAndTransfer the offset buffer */ + for (int i = 0; i < length; i++) { + final int offsetValue = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH); + final int relativeOffset = offsetValue - minOffsetValue; + to.offsetBuffer.setInt((long) i * OFFSET_WIDTH, relativeOffset); + } + + /* splitAndTransfer the validity buffer */ + splitAndTransferValidityBuffer(startIndex, length, to); + + /* splitAndTransfer the data buffer */ + final int childSliceLength = maxOffsetAndSizeSum - minOffsetValue; + dataTransferPair.splitAndTransfer(minOffsetValue, childSliceLength); + to.setValueCount(length); + } + } + + /* + * transfer the validity. + */ + private void splitAndTransferValidityBuffer( + int startIndex, int length, LargeListViewVector target) { + int firstByteSource = BitVectorHelper.byteIndex(startIndex); + int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); + int byteSizeTarget = getValidityBufferSizeFromCount(length); + int offset = startIndex % 8; + + if (length > 0) { + if (offset == 0) { + // slice + if (target.validityBuffer != null) { + target.validityBuffer.getReferenceManager().release(); + } + target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); + target.validityBuffer.getReferenceManager().retain(1); + } else { + /* Copy data + * When the first bit starts from the middle of a byte (offset != 0), + * copy data from src BitVector. + * Each byte in the target is composed by a part in i-th byte, + * another part in (i+1)-th byte. + */ + target.allocateValidityBuffer(byteSizeTarget); + + for (int i = 0; i < byteSizeTarget - 1; i++) { + byte b1 = + BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset); + byte b2 = + BitVectorHelper.getBitsFromNextByte( + validityBuffer, firstByteSource + i + 1, offset); + + target.validityBuffer.setByte(i, (b1 + b2)); + } + + /* Copying the last piece is done in the following manner: + * if the source vector has 1 or more bytes remaining, we copy + * the last piece as a byte formed by shifting data + * from the current byte and the next byte. + * + * if the source vector has no more bytes remaining + * (we are at the last byte), we copy the last piece as a byte + * by shifting data from the current byte. + */ + if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { + byte b1 = + BitVectorHelper.getBitsFromCurrentByte( + validityBuffer, firstByteSource + byteSizeTarget - 1, offset); + byte b2 = + BitVectorHelper.getBitsFromNextByte( + validityBuffer, firstByteSource + byteSizeTarget, offset); + + target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); + } else { + byte b1 = + BitVectorHelper.getBitsFromCurrentByte( + validityBuffer, firstByteSource + byteSizeTarget - 1, offset); + target.validityBuffer.setByte(byteSizeTarget - 1, b1); + } + } + } + } + + @Override + public ValueVector getTo() { + return to; + } + + @Override + public void copyValueSafe(int from, int to) { + this.to.copyFrom(from, to, LargeListViewVector.this); + } + } + @Override protected FieldReader getReaderImpl() { throw new UnsupportedOperationException( diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java index 563ac811c4f..2ed8d4d7005 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java @@ -18,6 +18,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; @@ -32,6 +33,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.TransferPair; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -1639,6 +1641,460 @@ public void testOutOfOrderOffset1() { } } + private int validateSizeBufferAndCalculateMinOffset( + int start, + int splitLength, + ArrowBuf fromOffsetBuffer, + ArrowBuf fromSizeBuffer, + ArrowBuf toSizeBuffer) { + int minOffset = fromOffsetBuffer.getInt((long) start * LargeListViewVector.OFFSET_WIDTH); + int fromDataLength; + int toDataLength; + + for (int i = 0; i < splitLength; i++) { + fromDataLength = fromSizeBuffer.getInt((long) (start + i) * LargeListViewVector.SIZE_WIDTH); + toDataLength = toSizeBuffer.getInt((long) (i) * LargeListViewVector.SIZE_WIDTH); + + /* validate size */ + assertEquals( + fromDataLength, + toDataLength, + "Different data lengths at index: " + i + " and start: " + start); + + /* calculate minimum offset */ + int currentOffset = + fromOffsetBuffer.getInt((long) (start + i) * LargeListViewVector.OFFSET_WIDTH); + if (currentOffset < minOffset) { + minOffset = currentOffset; + } + } + + return minOffset; + } + + private void validateOffsetBuffer( + int start, + int splitLength, + ArrowBuf fromOffsetBuffer, + ArrowBuf toOffsetBuffer, + int minOffset) { + int offset1; + int offset2; + + for (int i = 0; i < splitLength; i++) { + offset1 = fromOffsetBuffer.getInt((long) (start + i) * LargeListViewVector.OFFSET_WIDTH); + offset2 = toOffsetBuffer.getInt((long) (i) * LargeListViewVector.OFFSET_WIDTH); + assertEquals( + offset1 - minOffset, + offset2, + "Different offset values at index: " + i + " and start: " + start); + } + } + + private void validateDataBuffer( + int start, + int splitLength, + ArrowBuf fromOffsetBuffer, + ArrowBuf fromSizeBuffer, + BigIntVector fromDataVector, + ArrowBuf toOffsetBuffer, + BigIntVector toDataVector) { + int dataLength; + Long fromValue; + for (int i = 0; i < splitLength; i++) { + dataLength = fromSizeBuffer.getInt((long) (start + i) * LargeListViewVector.SIZE_WIDTH); + for (int j = 0; j < dataLength; j++) { + fromValue = + fromDataVector.getObject( + (fromOffsetBuffer.getInt((long) (start + i) * LargeListViewVector.OFFSET_WIDTH) + + j)); + Long toValue = + toDataVector.getObject( + (toOffsetBuffer.getInt((long) i * LargeListViewVector.OFFSET_WIDTH) + j)); + assertEquals( + fromValue, toValue, "Different data values at index: " + i + " and start: " + start); + } + } + } + + /** + * Validate split and transfer of data from fromVector to toVector. Note that this method assumes + * that the child vector is BigIntVector. + * + * @param start start index + * @param splitLength length of data to split and transfer + * @param fromVector fromVector + * @param toVector toVector + */ + private void validateSplitAndTransfer( + TransferPair transferPair, + int start, + int splitLength, + LargeListViewVector fromVector, + LargeListViewVector toVector) { + + transferPair.splitAndTransfer(start, splitLength); + + /* get offsetBuffer of toVector */ + final ArrowBuf toOffsetBuffer = toVector.getOffsetBuffer(); + + /* get sizeBuffer of toVector */ + final ArrowBuf toSizeBuffer = toVector.getSizeBuffer(); + + /* get dataVector of toVector */ + BigIntVector toDataVector = (BigIntVector) toVector.getDataVector(); + + /* get offsetBuffer of toVector */ + final ArrowBuf fromOffsetBuffer = fromVector.getOffsetBuffer(); + + /* get sizeBuffer of toVector */ + final ArrowBuf fromSizeBuffer = fromVector.getSizeBuffer(); + + /* get dataVector of toVector */ + BigIntVector fromDataVector = (BigIntVector) fromVector.getDataVector(); + + /* validate size buffers */ + int minOffset = + validateSizeBufferAndCalculateMinOffset( + start, splitLength, fromOffsetBuffer, fromSizeBuffer, toSizeBuffer); + /* validate offset buffers */ + validateOffsetBuffer(start, splitLength, fromOffsetBuffer, toOffsetBuffer, minOffset); + /* validate data */ + validateDataBuffer( + start, + splitLength, + fromOffsetBuffer, + fromSizeBuffer, + fromDataVector, + toOffsetBuffer, + toDataVector); + } + + @Test + public void testSplitAndTransfer() throws Exception { + try (LargeListViewVector fromVector = LargeListViewVector.empty("sourceVector", allocator)) { + + /* Explicitly add the dataVector */ + MinorType type = MinorType.BIGINT; + fromVector.addOrGetVector(FieldType.nullable(type.getType())); + + UnionLargeListViewWriter listViewWriter = fromVector.getWriter(); + + /* allocate memory */ + listViewWriter.allocate(); + + /* populate data */ + listViewWriter.setPosition(0); + listViewWriter.startListView(); + listViewWriter.bigInt().writeBigInt(10); + listViewWriter.bigInt().writeBigInt(11); + listViewWriter.bigInt().writeBigInt(12); + listViewWriter.endListView(); + + listViewWriter.setPosition(1); + listViewWriter.startListView(); + listViewWriter.bigInt().writeBigInt(13); + listViewWriter.bigInt().writeBigInt(14); + listViewWriter.endListView(); + + listViewWriter.setPosition(2); + listViewWriter.startListView(); + listViewWriter.bigInt().writeBigInt(15); + listViewWriter.bigInt().writeBigInt(16); + listViewWriter.bigInt().writeBigInt(17); + listViewWriter.bigInt().writeBigInt(18); + listViewWriter.endListView(); + + listViewWriter.setPosition(3); + listViewWriter.startListView(); + listViewWriter.bigInt().writeBigInt(19); + listViewWriter.endListView(); + + listViewWriter.setPosition(4); + listViewWriter.startListView(); + listViewWriter.bigInt().writeBigInt(20); + listViewWriter.bigInt().writeBigInt(21); + listViewWriter.bigInt().writeBigInt(22); + listViewWriter.bigInt().writeBigInt(23); + listViewWriter.endListView(); + + fromVector.setValueCount(5); + + /* get offset buffer */ + final ArrowBuf offsetBuffer = fromVector.getOffsetBuffer(); + + /* get size buffer */ + final ArrowBuf sizeBuffer = fromVector.getSizeBuffer(); + + /* get dataVector */ + BigIntVector dataVector = (BigIntVector) fromVector.getDataVector(); + + /* check the vector output */ + + int index = 0; + int offset; + int size = 0; + Long actual; + + /* index 0 */ + assertFalse(fromVector.isNull(index)); + offset = offsetBuffer.getInt(index * LargeListViewVector.OFFSET_WIDTH); + assertEquals(Integer.toString(0), Integer.toString(offset)); + + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(10), actual); + offset++; + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(11), actual); + offset++; + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(12), actual); + assertEquals( + Integer.toString(3), + Integer.toString(sizeBuffer.getInt(index * LargeListViewVector.SIZE_WIDTH))); + + /* index 1 */ + index++; + assertFalse(fromVector.isNull(index)); + offset = offsetBuffer.getInt(index * LargeListViewVector.OFFSET_WIDTH); + assertEquals(Integer.toString(3), Integer.toString(offset)); + + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(13), actual); + offset++; + size++; + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(14), actual); + size++; + assertEquals( + Integer.toString(size), + Integer.toString(sizeBuffer.getInt(index * LargeListViewVector.SIZE_WIDTH))); + + /* index 2 */ + size = 0; + index++; + assertFalse(fromVector.isNull(index)); + offset = offsetBuffer.getInt(index * LargeListViewVector.OFFSET_WIDTH); + assertEquals(Integer.toString(5), Integer.toString(offset)); + size++; + + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(15), actual); + offset++; + size++; + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(16), actual); + offset++; + size++; + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(17), actual); + offset++; + size++; + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(18), actual); + assertEquals( + Integer.toString(size), + Integer.toString(sizeBuffer.getInt(index * LargeListViewVector.SIZE_WIDTH))); + + /* index 3 */ + size = 0; + index++; + assertFalse(fromVector.isNull(index)); + offset = offsetBuffer.getInt(index * LargeListViewVector.OFFSET_WIDTH); + assertEquals(Integer.toString(9), Integer.toString(offset)); + + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(19), actual); + size++; + assertEquals( + Integer.toString(size), + Integer.toString(sizeBuffer.getInt(index * LargeListViewVector.SIZE_WIDTH))); + + /* index 4 */ + size = 0; + index++; + assertFalse(fromVector.isNull(index)); + offset = offsetBuffer.getInt(index * LargeListViewVector.OFFSET_WIDTH); + assertEquals(Integer.toString(10), Integer.toString(offset)); + + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(20), actual); + offset++; + size++; + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(21), actual); + offset++; + size++; + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(22), actual); + offset++; + size++; + actual = dataVector.getObject(offset); + assertEquals(Long.valueOf(23), actual); + size++; + assertEquals( + Integer.toString(size), + Integer.toString(sizeBuffer.getInt(index * LargeListViewVector.SIZE_WIDTH))); + + /* do split and transfer */ + try (LargeListViewVector toVector = LargeListViewVector.empty("toVector", allocator)) { + int[][] transferLengths = {{0, 2}, {3, 1}, {4, 1}}; + TransferPair transferPair = fromVector.makeTransferPair(toVector); + + for (final int[] transferLength : transferLengths) { + int start = transferLength[0]; + int splitLength = transferLength[1]; + validateSplitAndTransfer(transferPair, start, splitLength, fromVector, toVector); + } + } + } + } + + @Test + public void testGetTransferPairWithField() throws Exception { + try (final LargeListViewVector fromVector = LargeListViewVector.empty("listview", allocator)) { + + UnionLargeListViewWriter writer = fromVector.getWriter(); + writer.allocate(); + + // set some values + writer.startListView(); + writer.integer().writeInt(1); + writer.integer().writeInt(2); + writer.endListView(); + fromVector.setValueCount(2); + + final TransferPair transferPair = + fromVector.getTransferPair(fromVector.getField(), allocator); + final LargeListViewVector toVector = (LargeListViewVector) transferPair.getTo(); + // Field inside a new vector created by reusing a field should be the same in memory as the + // original field. + assertSame(toVector.getField(), fromVector.getField()); + } + } + + @Test + public void testOutOfOrderOffsetSplitAndTransfer() { + // [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]] + try (LargeListViewVector fromVector = LargeListViewVector.empty("fromVector", allocator)) { + // Allocate buffers in LargeListViewVector by calling `allocateNew` method. + fromVector.allocateNew(); + + // Initialize the child vector using `initializeChildrenFromFields` method. + + FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), null, null); + Field field = new Field("child-vector", fieldType, null); + fromVector.initializeChildrenFromFields(Collections.singletonList(field)); + + // Set values in the child vector. + FieldVector fieldVector = fromVector.getDataVector(); + fieldVector.clear(); + + BigIntVector childVector = (BigIntVector) fieldVector; + + childVector.allocateNew(7); + + childVector.set(0, 0); + childVector.set(1, -127); + childVector.set(2, 127); + childVector.set(3, 50); + childVector.set(4, 12); + childVector.set(5, -7); + childVector.set(6, 25); + + childVector.setValueCount(7); + + // Set validity, offset and size buffers using `setValidity`, + // `setOffset` and `setSize` methods. + fromVector.setValidity(0, 1); + fromVector.setValidity(1, 0); + fromVector.setValidity(2, 1); + fromVector.setValidity(3, 1); + fromVector.setValidity(4, 1); + + fromVector.setOffset(0, 4); + fromVector.setOffset(1, 7); + fromVector.setOffset(2, 0); + fromVector.setOffset(3, 0); + fromVector.setOffset(4, 3); + + fromVector.setSize(0, 3); + fromVector.setSize(1, 0); + fromVector.setSize(2, 4); + fromVector.setSize(3, 0); + fromVector.setSize(4, 2); + + // Set value count using `setValueCount` method. + fromVector.setValueCount(5); + + final ArrowBuf offSetBuffer = fromVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = fromVector.getSizeBuffer(); + + // check offset buffer + assertEquals(4, offSetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offSetBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offSetBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH)); + + // check child vector + assertEquals(0, ((BigIntVector) fromVector.getDataVector()).get(0)); + assertEquals(-127, ((BigIntVector) fromVector.getDataVector()).get(1)); + assertEquals(127, ((BigIntVector) fromVector.getDataVector()).get(2)); + assertEquals(50, ((BigIntVector) fromVector.getDataVector()).get(3)); + assertEquals(12, ((BigIntVector) fromVector.getDataVector()).get(4)); + assertEquals(-7, ((BigIntVector) fromVector.getDataVector()).get(5)); + assertEquals(25, ((BigIntVector) fromVector.getDataVector()).get(6)); + + // check values + Object result = fromVector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(12), resultSet.get(0)); + assertEquals(Long.valueOf(-7), resultSet.get(1)); + assertEquals(Long.valueOf(25), resultSet.get(2)); + + assertTrue(fromVector.isNull(1)); + + result = fromVector.getObject(2); + resultSet = (ArrayList) result; + assertEquals(4, resultSet.size()); + assertEquals(Long.valueOf(0), resultSet.get(0)); + assertEquals(Long.valueOf(-127), resultSet.get(1)); + assertEquals(Long.valueOf(127), resultSet.get(2)); + assertEquals(Long.valueOf(50), resultSet.get(3)); + + assertTrue(fromVector.isEmpty(3)); + + result = fromVector.getObject(4); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(50), resultSet.get(0)); + assertEquals(Long.valueOf(12), resultSet.get(1)); + + fromVector.validate(); + + /* do split and transfer */ + try (LargeListViewVector toVector = LargeListViewVector.empty("toVector", allocator)) { + int[][] transferLengths = {{2, 3}, {0, 1}, {0, 3}}; + TransferPair transferPair = fromVector.makeTransferPair(toVector); + + for (final int[] transferLength : transferLengths) { + int start = transferLength[0]; + int splitLength = transferLength[1]; + validateSplitAndTransfer(transferPair, start, splitLength, fromVector, toVector); + } + } + } + } + private void writeIntValues(UnionLargeListViewWriter writer, int[] values) { writer.startListView(); for (int v : values) { From 699101ecfdfbf7ee9710124948f5760374c5f9aa Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Mon, 12 Aug 2024 07:54:39 +0530 Subject: [PATCH 2/5] feat: adding more test cases --- .../arrow/vector/TestSplitAndTransfer.java | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java index d20dc3348b1..a3f25bc5207 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java @@ -29,6 +29,7 @@ import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.complex.DenseUnionVector; import org.apache.arrow.vector.complex.FixedSizeListVector; +import org.apache.arrow.vector.complex.LargeListViewVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.StructVector; @@ -852,6 +853,25 @@ public void testListVectorZeroStartIndexAndLength() { } } + @Test + public void testLargeListViewVectorZeroStartIndexAndLength() { + try (final LargeListViewVector listVector = + LargeListViewVector.empty("largelistview", allocator); + final LargeListViewVector newListVector = LargeListViewVector.empty("newList", allocator)) { + + listVector.allocateNew(); + final int valueCount = 0; + listVector.setValueCount(valueCount); + + final TransferPair tp = listVector.makeTransferPair(newListVector); + + tp.splitAndTransfer(0, 0); + assertEquals(valueCount, newListVector.getValueCount()); + + newListVector.clear(); + } + } + @Test public void testStructVectorZeroStartIndexAndLength() { Map metadata = new HashMap<>(); From 456dbec27ce335a2a6ce3f85c476e45f95ad0fc0 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Tue, 13 Aug 2024 16:00:22 +0530 Subject: [PATCH 3/5] fix: addressing reviews --- .../org/apache/arrow/vector/complex/LargeListViewVector.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java index 2faa4ae6508..23223697394 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java @@ -500,8 +500,8 @@ public void splitAndTransfer(int startIndex, int length) { to.sizeBuffer = to.allocateBuffers((long) length * SIZE_WIDTH); /* splitAndTransfer the size buffer */ - int maxOffsetAndSizeSum = -1; - int minOffsetValue = -1; + int maxOffsetAndSizeSum = Integer.MIN_VALUE; + int minOffsetValue = Integer.MAX_VALUE; for (int i = 0; i < length; i++) { final int offsetValue = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH); final int sizeValue = sizeBuffer.getInt((long) (startIndex + i) * SIZE_WIDTH); From 093b6bf3330670173db48fd0620397e389b60e41 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Wed, 14 Aug 2024 06:04:43 +0530 Subject: [PATCH 4/5] fix: addressing reviews v3 --- .../org/apache/arrow/vector/complex/LargeListViewVector.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java index 23223697394..dbdc317b8c2 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java @@ -509,9 +509,7 @@ public void splitAndTransfer(int startIndex, int length) { if (maxOffsetAndSizeSum < offsetValue + sizeValue) { maxOffsetAndSizeSum = offsetValue + sizeValue; } - if (minOffsetValue == -1 || minOffsetValue > offsetValue) { - minOffsetValue = offsetValue; - } + minOffsetValue = Math.min(minOffsetValue, offsetValue); } /* splitAndTransfer the offset buffer */ From e94318b0e60003a4b179d80604f2766aa588c10b Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Wed, 14 Aug 2024 07:14:24 +0530 Subject: [PATCH 5/5] fix: addressing reviews v3 --- .../org/apache/arrow/vector/complex/LargeListViewVector.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java index dbdc317b8c2..1a4218978bc 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java @@ -506,9 +506,7 @@ public void splitAndTransfer(int startIndex, int length) { final int offsetValue = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH); final int sizeValue = sizeBuffer.getInt((long) (startIndex + i) * SIZE_WIDTH); to.sizeBuffer.setInt((long) i * SIZE_WIDTH, sizeValue); - if (maxOffsetAndSizeSum < offsetValue + sizeValue) { - maxOffsetAndSizeSum = offsetValue + sizeValue; - } + maxOffsetAndSizeSum = Math.max(maxOffsetAndSizeSum, offsetValue + sizeValue); minOffsetValue = Math.min(minOffsetValue, offsetValue); }