diff --git a/.gitignore b/.gitignore index dd69b6cec9c..e6dfe19bb98 100644 --- a/.gitignore +++ b/.gitignore @@ -27,4 +27,5 @@ MANIFEST cpp/.idea/ python/.eggs/ -.vscode \ No newline at end of file +.vscode +.idea/ diff --git a/java/vector/src/main/codegen/templates/NullableValueVectors.java b/java/vector/src/main/codegen/templates/NullableValueVectors.java index 092097bb2bd..1decd0b3138 100644 --- a/java/vector/src/main/codegen/templates/NullableValueVectors.java +++ b/java/vector/src/main/codegen/templates/NullableValueVectors.java @@ -699,6 +699,22 @@ public void reset(){ setCount = 0; <#if type.major = "VarLen">lastSet = -1; } + + public void setLastSet(int value) { + <#if type.major = "VarLen"> + lastSet = value; + <#else> + throw new UnsupportedOperationException(); + + } + + public int getLastSet() { + <#if type.major != "VarLen"> + throw new UnsupportedOperationException(); + <#else> + return lastSet; + + } } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 4ab624f3694..63572945660 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -393,6 +393,12 @@ public void setValueCount(int valueCount) { vector.getMutator().setValueCount(childValueCount); bits.getMutator().setValueCount(valueCount); } + + public void setLastSet(int value) { + lastSet = value; + } + + public int getLastSet() { return lastSet; } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 11be3298f75..29ea7628f45 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -17,15 +17,26 @@ */ package org.apache.arrow.vector; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.impl.UnionListWriter; +import org.apache.arrow.vector.complex.impl.UnionListReader; import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.complex.writer.FieldWriter; +import org.apache.arrow.vector.holders.NullableBigIntHolder; +import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.Types.*; +import org.apache.arrow.vector.types.pojo.FieldType; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import java.util.List; + public class TestListVector { private BufferAllocator allocator; @@ -80,4 +91,147 @@ public void testCopyFrom() throws Exception { Assert.assertTrue("shouldn't be null", reader.isSet()); } } + + @Test + public void testSetLastSetUsage() throws Exception { + try (ListVector listVector = ListVector.empty("input", allocator)) { + + /* Explicitly add the dataVector */ + MinorType type = MinorType.BIGINT; + listVector.addOrGetVector(FieldType.nullable(type.getType())); + + /* allocate memory */ + listVector.allocateNew(); + + /* get inner vectors; bitVector and offsetVector */ + List innerVectors = listVector.getFieldInnerVectors(); + BitVector bitVector = (BitVector)innerVectors.get(0); + UInt4Vector offsetVector = (UInt4Vector)innerVectors.get(1); + + /* get the underlying data vector -- NullableBigIntVector */ + NullableBigIntVector dataVector = (NullableBigIntVector)listVector.getDataVector(); + + /* check current lastSet */ + assertEquals(Integer.toString(0), Integer.toString(listVector.getMutator().getLastSet())); + + int index = 0; + int offset = 0; + + /* write [10, 11, 12] to the list vector at index */ + bitVector.getMutator().setSafe(index, 1); + dataVector.getMutator().setSafe(0, 1, 10); + dataVector.getMutator().setSafe(1, 1, 11); + dataVector.getMutator().setSafe(2, 1, 12); + offsetVector.getMutator().setSafe(index + 1, 3); + + index += 1; + + /* write [13, 14] to the list vector at index 1 */ + bitVector.getMutator().setSafe(index, 1); + dataVector.getMutator().setSafe(3, 1, 13); + dataVector.getMutator().setSafe(4, 1, 14); + offsetVector.getMutator().setSafe(index + 1, 5); + + index += 1; + + /* write [15, 16, 17] to the list vector at index 2 */ + bitVector.getMutator().setSafe(index, 1); + dataVector.getMutator().setSafe(5, 1, 15); + dataVector.getMutator().setSafe(6, 1, 16); + dataVector.getMutator().setSafe(7, 1, 17); + offsetVector.getMutator().setSafe(index + 1, 8); + + /* check current lastSet */ + assertEquals(Integer.toString(0), Integer.toString(listVector.getMutator().getLastSet())); + + /* set lastset and arbitrary valuecount for list vector. + * + * NOTE: if we don't execute setLastSet() before setLastValueCount(), then + * the latter will corrupt the offsetVector and thus the accessor will not + * retrieve the correct values from underlying dataVector. Run the test + * by commenting out next line and we should see failures from 5th assert + * onwards. This is why doing setLastSet() is important before setValueCount() + * once the vector has been loaded. + * + * Another important thing to remember is the value of lastSet itself. + * Even though the listVector has elements till index 2 only, the lastSet should + * be set as 3. This is because the offsetVector has valid offsets filled till index 3. + * If we do setLastSet(2), the offsetVector at index 3 will contain incorrect value + * after execution of setValueCount(). + * + * correct state of the listVector + * bitvector {1, 1, 1, 0, 0.... } + * offsetvector {0, 3, 5, 8, 8, 8.....} + * datavector { [10, 11, 12], + * [13, 14], + * [15, 16, 17] + * } + * + * if we don't do setLastSet() before setValueCount --> incorrect state + * bitvector {1, 1, 1, 0, 0.... } + * offsetvector {0, 0, 0, 0, 0, 0.....} + * datavector { [10, 11, 12], + * [13, 14], + * [15, 16, 17] + * } + * + * if we do setLastSet(2) before setValueCount --> incorrect state + * bitvector {1, 1, 1, 0, 0.... } + * offsetvector {0, 3, 5, 5, 5, 5.....} + * datavector { [10, 11, 12], + * [13, 14], + * [15, 16, 17] + * } + */ + listVector.getMutator().setLastSet(3); + listVector.getMutator().setValueCount(10); + + /* check the vector output */ + final UInt4Vector.Accessor offsetAccessor = offsetVector.getAccessor(); + final ValueVector.Accessor valueAccessor = dataVector.getAccessor(); + + index = 0; + offset = offsetAccessor.get(index); + assertEquals(Integer.toString(0), Integer.toString(offset)); + + Object actual = valueAccessor.getObject(offset); + assertEquals(new Long(10), (Long)actual); + offset++; + actual = valueAccessor.getObject(offset); + assertEquals(new Long(11), (Long)actual); + offset++; + actual = valueAccessor.getObject(offset); + assertEquals(new Long(12), (Long)actual); + + index++; + offset = offsetAccessor.get(index); + assertEquals(Integer.toString(3), Integer.toString(offset)); + + actual = valueAccessor.getObject(offset); + assertEquals(new Long(13), (Long)actual); + offset++; + actual = valueAccessor.getObject(offset); + assertEquals(new Long(14), (Long)actual); + + index++; + offset = offsetAccessor.get(index); + assertEquals(Integer.toString(5), Integer.toString(offset)); + + actual = valueAccessor.getObject(offsetAccessor.get(index)); + assertEquals(new Long(15), (Long)actual); + offset++; + actual = valueAccessor.getObject(offset); + assertEquals(new Long(16), (Long)actual); + offset++; + actual = valueAccessor.getObject(offset); + assertEquals(new Long(17), (Long)actual); + + index++; + offset = offsetAccessor.get(index); + assertEquals(Integer.toString(8), Integer.toString(offset)); + + actual = valueAccessor.getObject(offsetAccessor.get(index)); + assertNull(actual); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 63543b09329..f5508aab2ce 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -26,11 +26,15 @@ import java.nio.charset.Charset; import java.util.List; +import java.util.ArrayList; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; + +import org.apache.arrow.vector.schema.ArrowRecordBatch; import org.apache.arrow.vector.schema.TypeLayout; import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.junit.After; @@ -56,6 +60,9 @@ public void init() { private final static byte[] STR1 = "AAAAA1".getBytes(utf8Charset); private final static byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset); private final static byte[] STR3 = "CCCC3".getBytes(utf8Charset); + private final static byte[] STR4 = "DDDDDDDD4".getBytes(utf8Charset); + private final static byte[] STR5 = "EEE5".getBytes(utf8Charset); + private final static byte[] STR6 = "FFFFF6".getBytes(utf8Charset); @After public void terminate() throws Exception { @@ -509,11 +516,134 @@ public void testCopyFromWithNulls() { } else { assertEquals(Integer.toString(i), vector2.getAccessor().getObject(i).toString()); } - } + } + } + + @Test + public void testSetLastSetUsage() { + try (final NullableVarCharVector vector = new NullableVarCharVector("myvector", allocator)) { + final NullableVarCharVector.Mutator mutator = vector.getMutator(); + + vector.allocateNew(1024 * 10, 1024); + + setBytes(0, STR1, vector); + setBytes(1, STR2, vector); + setBytes(2, STR3, vector); + setBytes(3, STR4, vector); + setBytes(4, STR5, vector); + setBytes(5, STR6, vector); + + /* Check current lastSet */ + assertEquals(Integer.toString(-1), Integer.toString(mutator.getLastSet())); + + /* Check the vector output */ + final NullableVarCharVector.Accessor accessor = vector.getAccessor(); + assertArrayEquals(STR1, accessor.get(0)); + assertArrayEquals(STR2, accessor.get(1)); + assertArrayEquals(STR3, accessor.get(2)); + assertArrayEquals(STR4, accessor.get(3)); + assertArrayEquals(STR5, accessor.get(4)); + assertArrayEquals(STR6, accessor.get(5)); + + /* + * If we don't do setLastSe(5) before setValueCount(), then the latter will corrupt + * the value vector by filling in all positions [0,valuecount-1] will empty byte arrays. + * Run the test by commenting out next line and we should see incorrect vector output. + */ + mutator.setLastSet(5); + mutator.setValueCount(20); + + /* Check the vector output again */ + assertArrayEquals(STR1, accessor.get(0)); + assertArrayEquals(STR2, accessor.get(1)); + assertArrayEquals(STR3, accessor.get(2)); + assertArrayEquals(STR4, accessor.get(3)); + assertArrayEquals(STR5, accessor.get(4)); + assertArrayEquals(STR6, accessor.get(5)); + } + } + @Test + public void testVectorLoadUnload() { + + try (final NullableVarCharVector vector1 = new NullableVarCharVector("myvector", allocator)) { + + final NullableVarCharVector.Mutator mutator1 = vector1.getMutator(); + + vector1.allocateNew(1024 * 10, 1024); + + mutator1.set(0, STR1); + mutator1.set(1, STR2); + mutator1.set(2, STR3); + mutator1.set(3, STR4); + mutator1.set(4, STR5); + mutator1.set(5, STR6); + assertEquals(Integer.toString(5), Integer.toString(mutator1.getLastSet())); + mutator1.setValueCount(15); + assertEquals(Integer.toString(14), Integer.toString(mutator1.getLastSet())); + + /* Check the vector output */ + final NullableVarCharVector.Accessor accessor1 = vector1.getAccessor(); + assertArrayEquals(STR1, accessor1.get(0)); + assertArrayEquals(STR2, accessor1.get(1)); + assertArrayEquals(STR3, accessor1.get(2)); + assertArrayEquals(STR4, accessor1.get(3)); + assertArrayEquals(STR5, accessor1.get(4)); + assertArrayEquals(STR6, accessor1.get(5)); + + Field field = vector1.getField(); + String fieldName = field.getName(); + + List fields = new ArrayList(); + List fieldVectors = new ArrayList(); + + fields.add(field); + fieldVectors.add(vector1); + + Schema schema = new Schema(fields); + + VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, accessor1.getValueCount()); + VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1); + + try ( + ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); + BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE); + VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator); + ) { + + VectorLoader vectorLoader = new VectorLoader(schemaRoot2); + vectorLoader.load(recordBatch); + + NullableVarCharVector vector2 = (NullableVarCharVector)schemaRoot2.getVector(fieldName); + NullableVarCharVector.Mutator mutator2 = vector2.getMutator(); + + /* + * lastSet would have internally been set by VectorLoader.load() when it invokes + * loadFieldBuffers. + */ + assertEquals(Integer.toString(14), Integer.toString(mutator2.getLastSet())); + mutator2.setValueCount(25); + assertEquals(Integer.toString(24), Integer.toString(mutator2.getLastSet())); + + /* Check the vector output */ + final NullableVarCharVector.Accessor accessor2 = vector2.getAccessor(); + assertArrayEquals(STR1, accessor2.get(0)); + assertArrayEquals(STR2, accessor2.get(1)); + assertArrayEquals(STR3, accessor2.get(2)); + assertArrayEquals(STR4, accessor2.get(3)); + assertArrayEquals(STR5, accessor2.get(4)); + assertArrayEquals(STR6, accessor2.get(5)); + } } } + public static void setBytes(int index, byte[] bytes, NullableVarCharVector vector) { + final int currentOffset = vector.values.offsetVector.getAccessor().get(index); + + vector.bits.getMutator().setToOne(index); + vector.values.offsetVector.getMutator().set(index + 1, currentOffset + bytes.length); + vector.values.data.setBytes(currentOffset, bytes, 0, bytes.length); + } }