Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,5 @@ MANIFEST

cpp/.idea/
python/.eggs/
.vscode
.vscode
.idea/
16 changes: 16 additions & 0 deletions java/vector/src/main/codegen/templates/NullableValueVectors.java
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,22 @@ public void reset(){
setCount = 0;
<#if type.major = "VarLen">lastSet = -1;</#if>
}

public void setLastSet(int value) {
<#if type.major = "VarLen">
lastSet = value;
<#else>
throw new UnsupportedOperationException();
</#if>
}

public int getLastSet() {
<#if type.major != "VarLen">
throw new UnsupportedOperationException();
<#else>
return lastSet;
</#if>
}
}
}
</#list>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,12 @@ public void setValueCount(int valueCount) {
vector.getMutator().setValueCount(childValueCount);
bits.getMutator().setValueCount(valueCount);
}

public void setLastSet(int value) {
lastSet = value;
}

public int getLastSet() { return lastSet; }
}

}
154 changes: 154 additions & 0 deletions java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,26 @@
*/
package org.apache.arrow.vector;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.impl.UnionListWriter;
import org.apache.arrow.vector.complex.impl.UnionListReader;
import org.apache.arrow.vector.complex.reader.FieldReader;
import org.apache.arrow.vector.complex.writer.FieldWriter;
import org.apache.arrow.vector.holders.NullableBigIntHolder;
import org.apache.arrow.vector.types.Types;
import org.apache.arrow.vector.types.Types.*;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

import java.util.List;

public class TestListVector {

private BufferAllocator allocator;
Expand Down Expand Up @@ -80,4 +91,147 @@ public void testCopyFrom() throws Exception {
Assert.assertTrue("shouldn't be null", reader.isSet());
}
}

@Test
public void testSetLastSetUsage() throws Exception {
try (ListVector listVector = ListVector.empty("input", allocator)) {

/* Explicitly add the dataVector */
MinorType type = MinorType.BIGINT;
listVector.addOrGetVector(FieldType.nullable(type.getType()));

/* allocate memory */
listVector.allocateNew();

/* get inner vectors; bitVector and offsetVector */
List<BufferBacked> innerVectors = listVector.getFieldInnerVectors();
BitVector bitVector = (BitVector)innerVectors.get(0);
UInt4Vector offsetVector = (UInt4Vector)innerVectors.get(1);

/* get the underlying data vector -- NullableBigIntVector */
NullableBigIntVector dataVector = (NullableBigIntVector)listVector.getDataVector();

/* check current lastSet */
assertEquals(Integer.toString(0), Integer.toString(listVector.getMutator().getLastSet()));

int index = 0;
int offset = 0;

/* write [10, 11, 12] to the list vector at index */
bitVector.getMutator().setSafe(index, 1);
dataVector.getMutator().setSafe(0, 1, 10);
dataVector.getMutator().setSafe(1, 1, 11);
dataVector.getMutator().setSafe(2, 1, 12);
offsetVector.getMutator().setSafe(index + 1, 3);

index += 1;

/* write [13, 14] to the list vector at index 1 */
bitVector.getMutator().setSafe(index, 1);
dataVector.getMutator().setSafe(3, 1, 13);
dataVector.getMutator().setSafe(4, 1, 14);
offsetVector.getMutator().setSafe(index + 1, 5);

index += 1;

/* write [15, 16, 17] to the list vector at index 2 */
bitVector.getMutator().setSafe(index, 1);
dataVector.getMutator().setSafe(5, 1, 15);
dataVector.getMutator().setSafe(6, 1, 16);
dataVector.getMutator().setSafe(7, 1, 17);
offsetVector.getMutator().setSafe(index + 1, 8);

/* check current lastSet */
assertEquals(Integer.toString(0), Integer.toString(listVector.getMutator().getLastSet()));

/* set lastset and arbitrary valuecount for list vector.
*
* NOTE: if we don't execute setLastSet() before setLastValueCount(), then
* the latter will corrupt the offsetVector and thus the accessor will not
* retrieve the correct values from underlying dataVector. Run the test
* by commenting out next line and we should see failures from 5th assert
* onwards. This is why doing setLastSet() is important before setValueCount()
* once the vector has been loaded.
*
* Another important thing to remember is the value of lastSet itself.
* Even though the listVector has elements till index 2 only, the lastSet should
* be set as 3. This is because the offsetVector has valid offsets filled till index 3.
* If we do setLastSet(2), the offsetVector at index 3 will contain incorrect value
* after execution of setValueCount().
*
* correct state of the listVector
* bitvector {1, 1, 1, 0, 0.... }
* offsetvector {0, 3, 5, 8, 8, 8.....}
* datavector { [10, 11, 12],
* [13, 14],
* [15, 16, 17]
* }
*
* if we don't do setLastSet() before setValueCount --> incorrect state
* bitvector {1, 1, 1, 0, 0.... }
* offsetvector {0, 0, 0, 0, 0, 0.....}
* datavector { [10, 11, 12],
* [13, 14],
* [15, 16, 17]
* }
*
* if we do setLastSet(2) before setValueCount --> incorrect state
* bitvector {1, 1, 1, 0, 0.... }
* offsetvector {0, 3, 5, 5, 5, 5.....}
* datavector { [10, 11, 12],
* [13, 14],
* [15, 16, 17]
* }
*/
listVector.getMutator().setLastSet(3);
listVector.getMutator().setValueCount(10);

/* check the vector output */
final UInt4Vector.Accessor offsetAccessor = offsetVector.getAccessor();
final ValueVector.Accessor valueAccessor = dataVector.getAccessor();

index = 0;
offset = offsetAccessor.get(index);
assertEquals(Integer.toString(0), Integer.toString(offset));

Object actual = valueAccessor.getObject(offset);
assertEquals(new Long(10), (Long)actual);
offset++;
actual = valueAccessor.getObject(offset);
assertEquals(new Long(11), (Long)actual);
offset++;
actual = valueAccessor.getObject(offset);
assertEquals(new Long(12), (Long)actual);

index++;
offset = offsetAccessor.get(index);
assertEquals(Integer.toString(3), Integer.toString(offset));

actual = valueAccessor.getObject(offset);
assertEquals(new Long(13), (Long)actual);
offset++;
actual = valueAccessor.getObject(offset);
assertEquals(new Long(14), (Long)actual);

index++;
offset = offsetAccessor.get(index);
assertEquals(Integer.toString(5), Integer.toString(offset));

actual = valueAccessor.getObject(offsetAccessor.get(index));
assertEquals(new Long(15), (Long)actual);
offset++;
actual = valueAccessor.getObject(offset);
assertEquals(new Long(16), (Long)actual);
offset++;
actual = valueAccessor.getObject(offset);
assertEquals(new Long(17), (Long)actual);

index++;
offset = offsetAccessor.get(index);
assertEquals(Integer.toString(8), Integer.toString(offset));

actual = valueAccessor.getObject(offsetAccessor.get(index));
assertNull(actual);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,15 @@

import java.nio.charset.Charset;
import java.util.List;
import java.util.ArrayList;

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;

import org.apache.arrow.vector.schema.ArrowRecordBatch;
import org.apache.arrow.vector.schema.TypeLayout;
import org.apache.arrow.vector.types.Types.MinorType;
import org.apache.arrow.vector.types.pojo.Schema;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.junit.After;
Expand All @@ -56,6 +60,9 @@ public void init() {
private final static byte[] STR1 = "AAAAA1".getBytes(utf8Charset);
private final static byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset);
private final static byte[] STR3 = "CCCC3".getBytes(utf8Charset);
private final static byte[] STR4 = "DDDDDDDD4".getBytes(utf8Charset);
private final static byte[] STR5 = "EEE5".getBytes(utf8Charset);
private final static byte[] STR6 = "FFFFF6".getBytes(utf8Charset);

@After
public void terminate() throws Exception {
Expand Down Expand Up @@ -509,11 +516,134 @@ public void testCopyFromWithNulls() {
} else {
assertEquals(Integer.toString(i), vector2.getAccessor().getObject(i).toString());
}

}
}
}

@Test
public void testSetLastSetUsage() {
try (final NullableVarCharVector vector = new NullableVarCharVector("myvector", allocator)) {

final NullableVarCharVector.Mutator mutator = vector.getMutator();

vector.allocateNew(1024 * 10, 1024);

setBytes(0, STR1, vector);
setBytes(1, STR2, vector);
setBytes(2, STR3, vector);
setBytes(3, STR4, vector);
setBytes(4, STR5, vector);
setBytes(5, STR6, vector);

/* Check current lastSet */
assertEquals(Integer.toString(-1), Integer.toString(mutator.getLastSet()));

/* Check the vector output */
final NullableVarCharVector.Accessor accessor = vector.getAccessor();
assertArrayEquals(STR1, accessor.get(0));
assertArrayEquals(STR2, accessor.get(1));
assertArrayEquals(STR3, accessor.get(2));
assertArrayEquals(STR4, accessor.get(3));
assertArrayEquals(STR5, accessor.get(4));
assertArrayEquals(STR6, accessor.get(5));

/*
* If we don't do setLastSe(5) before setValueCount(), then the latter will corrupt
* the value vector by filling in all positions [0,valuecount-1] will empty byte arrays.
* Run the test by commenting out next line and we should see incorrect vector output.
*/
mutator.setLastSet(5);
mutator.setValueCount(20);

/* Check the vector output again */
assertArrayEquals(STR1, accessor.get(0));
assertArrayEquals(STR2, accessor.get(1));
assertArrayEquals(STR3, accessor.get(2));
assertArrayEquals(STR4, accessor.get(3));
assertArrayEquals(STR5, accessor.get(4));
assertArrayEquals(STR6, accessor.get(5));
}
}

@Test
public void testVectorLoadUnload() {

try (final NullableVarCharVector vector1 = new NullableVarCharVector("myvector", allocator)) {

final NullableVarCharVector.Mutator mutator1 = vector1.getMutator();

vector1.allocateNew(1024 * 10, 1024);

mutator1.set(0, STR1);
mutator1.set(1, STR2);
mutator1.set(2, STR3);
mutator1.set(3, STR4);
mutator1.set(4, STR5);
mutator1.set(5, STR6);
assertEquals(Integer.toString(5), Integer.toString(mutator1.getLastSet()));
mutator1.setValueCount(15);
assertEquals(Integer.toString(14), Integer.toString(mutator1.getLastSet()));

/* Check the vector output */
final NullableVarCharVector.Accessor accessor1 = vector1.getAccessor();
assertArrayEquals(STR1, accessor1.get(0));
assertArrayEquals(STR2, accessor1.get(1));
assertArrayEquals(STR3, accessor1.get(2));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given the main issue of holes/blanks in the items set, I suggest you modify these tests to set the valuecount to something higher (e.g. 15). Then verify that everything looks good.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added additional test and slightly modified existing ones.

assertArrayEquals(STR4, accessor1.get(3));
assertArrayEquals(STR5, accessor1.get(4));
assertArrayEquals(STR6, accessor1.get(5));

Field field = vector1.getField();
String fieldName = field.getName();

List<Field> fields = new ArrayList<Field>();
List<FieldVector> fieldVectors = new ArrayList<FieldVector>();

fields.add(field);
fieldVectors.add(vector1);

Schema schema = new Schema(fields);

VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, accessor1.getValueCount());
VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1);

try (
ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE);
VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator);
) {

VectorLoader vectorLoader = new VectorLoader(schemaRoot2);
vectorLoader.load(recordBatch);

NullableVarCharVector vector2 = (NullableVarCharVector)schemaRoot2.getVector(fieldName);
NullableVarCharVector.Mutator mutator2 = vector2.getMutator();

/*
* lastSet would have internally been set by VectorLoader.load() when it invokes
* loadFieldBuffers.
*/
assertEquals(Integer.toString(14), Integer.toString(mutator2.getLastSet()));
mutator2.setValueCount(25);
assertEquals(Integer.toString(24), Integer.toString(mutator2.getLastSet()));

/* Check the vector output */
final NullableVarCharVector.Accessor accessor2 = vector2.getAccessor();
assertArrayEquals(STR1, accessor2.get(0));
assertArrayEquals(STR2, accessor2.get(1));
assertArrayEquals(STR3, accessor2.get(2));
assertArrayEquals(STR4, accessor2.get(3));
assertArrayEquals(STR5, accessor2.get(4));
assertArrayEquals(STR6, accessor2.get(5));
}
}
}

public static void setBytes(int index, byte[] bytes, NullableVarCharVector vector) {
final int currentOffset = vector.values.offsetVector.getAccessor().get(index);

vector.bits.getMutator().setToOne(index);
vector.values.offsetVector.getMutator().set(index + 1, currentOffset + bytes.length);
vector.values.data.setBytes(currentOffset, bytes, 0, bytes.length);
}
}