From d103436bcfca13b04893ea1fcd56b94473edb3fd Mon Sep 17 00:00:00 2001 From: siddharth Date: Wed, 20 Dec 2017 16:43:53 -0800 Subject: [PATCH 1/3] ARROW-1943: handle setInitialCapacity for deeply nested lists --- .../complex/BaseRepeatedValueVector.java | 13 ++++---- .../apache/arrow/vector/TestListVector.java | 31 +++++++++++++++++++ 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java index a9221f2f6ea..d044120adeb 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java @@ -23,12 +23,7 @@ import org.apache.arrow.memory.BaseAllocator; import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.AddOrGetResult; -import org.apache.arrow.vector.BaseValueVector; -import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.UInt4Vector; -import org.apache.arrow.vector.ValueVector; -import org.apache.arrow.vector.ZeroVector; +import org.apache.arrow.vector.*; import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; @@ -134,7 +129,11 @@ public FieldVector getDataVector() { @Override public void setInitialCapacity(int numRecords) { offsetAllocationSizeInBytes = (numRecords + 1) * OFFSET_WIDTH; - vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD); + if (vector instanceof BaseFixedWidthVector || vector instanceof BaseVariableWidthVector) { + vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD); + } else { + vector.setInitialCapacity(numRecords); + } } @Override diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 1acce7e0b66..67be2554cba 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -559,6 +559,37 @@ public void testNestedListVector() throws Exception { } } + @Test + public void testNestedListVector1() throws Exception { + try (ListVector listVector = ListVector.empty("sourceVector", allocator)) { + + MinorType listType = MinorType.LIST; + MinorType scalarType = MinorType.BIGINT; + + listVector.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList1 = (ListVector)listVector.getDataVector(); + innerList1.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList2 = (ListVector)innerList1.getDataVector(); + innerList2.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList3 = (ListVector)innerList2.getDataVector(); + innerList3.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList4 = (ListVector)innerList3.getDataVector(); + innerList4.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList5 = (ListVector)innerList4.getDataVector(); + innerList5.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList6 = (ListVector)innerList5.getDataVector(); + innerList6.addOrGetVector(FieldType.nullable(scalarType.getType())); + + listVector.setInitialCapacity(128); + } + } + @Test public void testGetBufferAddress() throws Exception { try (ListVector listVector = ListVector.empty("vector", allocator)) { From e2f21a8b60b6032fe3cfef756f7e80afed0fd09d Mon Sep 17 00:00:00 2001 From: siddharth Date: Wed, 20 Dec 2017 16:59:00 -0800 Subject: [PATCH 2/3] fix imports --- .../arrow/vector/complex/BaseRepeatedValueVector.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java index d044120adeb..9a23fd8c356 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java @@ -23,7 +23,14 @@ import org.apache.arrow.memory.BaseAllocator; import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.*; +import org.apache.arrow.vector.AddOrGetResult; +import org.apache.arrow.vector.BaseValueVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.UInt4Vector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.ZeroVector; +import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; From d0adbadee1cce54cc1e62887dd149a39515357fb Mon Sep 17 00:00:00 2001 From: siddharth Date: Wed, 20 Dec 2017 17:55:12 -0800 Subject: [PATCH 3/3] unit tests --- .../apache/arrow/vector/TestListVector.java | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 67be2554cba..e2023f44618 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -590,6 +590,103 @@ public void testNestedListVector1() throws Exception { } } + @Test + public void testNestedListVector2() throws Exception { + try (ListVector listVector = ListVector.empty("sourceVector", allocator)) { + listVector.setInitialCapacity(1); + UnionListWriter listWriter = listVector.getWriter(); + /* allocate memory */ + listWriter.allocate(); + + /* write one or more inner lists at index 0 */ + listWriter.setPosition(0); + listWriter.startList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(50); + listWriter.list().bigInt().writeBigInt(100); + listWriter.list().bigInt().writeBigInt(200); + listWriter.list().endList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(75); + listWriter.list().bigInt().writeBigInt(125); + listWriter.list().endList(); + + listWriter.endList(); + + /* write one or more inner lists at index 1 */ + listWriter.setPosition(1); + listWriter.startList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(15); + listWriter.list().bigInt().writeBigInt(20); + listWriter.list().endList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(25); + listWriter.list().bigInt().writeBigInt(30); + listWriter.list().bigInt().writeBigInt(35); + listWriter.list().endList(); + + listWriter.endList(); + + assertEquals(2, listVector.getLastSet()); + + listVector.setValueCount(2); + + assertEquals(2, listVector.getValueCount()); + + /* get listVector value at index 0 -- the value itself is a listvector */ + Object result = listVector.getObject(0); + ArrayList> resultSet = (ArrayList>) result; + ArrayList list; + + assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ + assertEquals(3, resultSet.get(0).size()); /* size of first inner list */ + assertEquals(2, resultSet.get(1).size()); /* size of second inner list */ + + list = resultSet.get(0); + assertEquals(new Long(50), list.get(0)); + assertEquals(new Long(100), list.get(1)); + assertEquals(new Long(200), list.get(2)); + + list = resultSet.get(1); + assertEquals(new Long(75), list.get(0)); + assertEquals(new Long(125), list.get(1)); + + /* get listVector value at index 1 -- the value itself is a listvector */ + result = listVector.getObject(1); + resultSet = (ArrayList>) result; + + assertEquals(2, resultSet.size()); /* 3 inner lists at index 1 */ + assertEquals(2, resultSet.get(0).size()); /* size of first inner list */ + assertEquals(3, resultSet.get(1).size()); /* size of second inner list */ + + list = resultSet.get(0); + assertEquals(new Long(15), list.get(0)); + assertEquals(new Long(20), list.get(1)); + + list = resultSet.get(1); + assertEquals(new Long(25), list.get(0)); + assertEquals(new Long(30), list.get(1)); + assertEquals(new Long(35), list.get(2)); + + /* check underlying bitVector */ + assertFalse(listVector.isNull(0)); + assertFalse(listVector.isNull(1)); + + /* check underlying offsets */ + final ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); + + /* listVector has 2 lists at index 0 and 3 lists at index 1 */ + assertEquals(0, offsetBuffer.getInt(0 * ListVector.OFFSET_WIDTH)); + assertEquals(2, offsetBuffer.getInt(1 * ListVector.OFFSET_WIDTH)); + assertEquals(4, offsetBuffer.getInt(2 * ListVector.OFFSET_WIDTH)); + } + } + @Test public void testGetBufferAddress() throws Exception { try (ListVector listVector = ListVector.empty("vector", allocator)) {