diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java index 6587cde633e..47daeee7f20 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java @@ -909,52 +909,6 @@ protected final void handleSafe(int index, int dataLength) { } } - - /****************************************************************** - * * - * helper methods currently * - * used by JsonFileReader and * - * JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Method used by Json Reader to explicitly set the data of the variable - * width vector elements. The method takes care of allocating the memory - * for the vector if caller hasn't done so. - * - * This method should not be used externally. - * - * @param data ArrowBuf for storing variable width elements in the vector - * @param offset offset of the element - * @param allocator memory allocator - * @param index position of the element in the vector - * @param value array of bytes for the element - * @param valueCount number of elements in the vector - * @return buffer holding the variable width data. - */ - public static ArrowBuf set(ArrowBuf data, ArrowBuf offset, - BufferAllocator allocator, int index, byte[] value, - int valueCount) { - if (data == null) { - data = allocator.buffer(INITIAL_BYTE_COUNT); - } - final int currentBufferCapacity = data.capacity(); - final int currentStartOffset = offset.getInt(index * OFFSET_WIDTH); - while (currentBufferCapacity < currentStartOffset + value.length) { - final ArrowBuf newBuf = allocator.buffer(currentBufferCapacity * 2); - newBuf.setBytes(0, data, 0, currentBufferCapacity); - data.release(); - data = newBuf; - } - data.setBytes(currentStartOffset, value, 0, value.length); - if (index == (valueCount - 1)) { - data.writerIndex(offset.getInt(valueCount * OFFSET_WIDTH)); - } - return data; - } - /** * Method used by Json Writer to read a variable width element from * the variable width vector and write to Json. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableBigIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableBigIntVector.java index ee40d708cc1..253427333a2 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableBigIntVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableBigIntVector.java @@ -34,7 +34,7 @@ * maintained to track which elements in the vector are null. */ public class NullableBigIntVector extends BaseNullableFixedWidthVector { - private static final byte TYPE_WIDTH = 8; + public static final byte TYPE_WIDTH = 8; private final FieldReader reader; /** @@ -290,41 +290,6 @@ public void setSafe(int index, int isSet, long value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, long value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setLong(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableDateDayVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableDateDayVector.java index 949287ecafe..e6b5b590f3f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableDateDayVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableDateDayVector.java @@ -292,40 +292,6 @@ public void setSafe(int index, int isSet, int value) { set(index, isSet, value); } - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, int value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setInt(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableDateMilliVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableDateMilliVector.java index a0bdccedea4..8e15100f708 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableDateMilliVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableDateMilliVector.java @@ -296,40 +296,6 @@ public void setSafe(int index, int isSet, long value) { set(index, isSet, value); } - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, long value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setLong(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java index 8320f90830a..2a611c6a2d5 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableDecimalVector.java @@ -37,7 +37,7 @@ * maintained to track which elements in the vector are null. */ public class NullableDecimalVector extends BaseNullableFixedWidthVector { - private static final byte TYPE_WIDTH = 16; + public static final byte TYPE_WIDTH = 16; private final FieldReader reader; private final int precision; @@ -355,41 +355,6 @@ public void setSafe(int index, int isSet, int start, ArrowBuf buffer) { } - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value as array of bytes - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, byte[] value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - DecimalUtility.writeByteArrayToArrowBuf(value, buffer, index); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - - /****************************************************************** * * * vector transfer * diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat4Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat4Vector.java index 5b28065c402..3ba5cfcea52 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat4Vector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat4Vector.java @@ -34,7 +34,7 @@ * maintained to track which elements in the vector are null. */ public class NullableFloat4Vector extends BaseNullableFixedWidthVector { - private static final byte TYPE_WIDTH = 4; + public static final byte TYPE_WIDTH = 4; private final FieldReader reader; /** @@ -291,41 +291,6 @@ public void setSafe(int index, int isSet, float value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, float value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setFloat(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat8Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat8Vector.java index 624abf2f272..2fb96a44fe7 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat8Vector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableFloat8Vector.java @@ -34,7 +34,7 @@ * maintained to track which elements in the vector are null. */ public class NullableFloat8Vector extends BaseNullableFixedWidthVector { - private static final byte TYPE_WIDTH = 8; + public static final byte TYPE_WIDTH = 8; private final FieldReader reader; /** @@ -291,41 +291,6 @@ public void setSafe(int index, int isSet, double value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, double value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setDouble(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableIntVector.java index 6311daf4f5c..93deacbdfdc 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableIntVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableIntVector.java @@ -34,7 +34,7 @@ * maintained to track which elements in the vector are null. */ public class NullableIntVector extends BaseNullableFixedWidthVector { - private static final byte TYPE_WIDTH = 4; + public static final byte TYPE_WIDTH = 4; private final FieldReader reader; /** @@ -291,40 +291,6 @@ public void setSafe(int index, int isSet, int value) { set(index, isSet, value); } - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, int value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setInt(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableSmallIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableSmallIntVector.java index c45a8d5f5bc..ed337188b5b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableSmallIntVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableSmallIntVector.java @@ -34,7 +34,7 @@ * maintained to track which elements in the vector are null. */ public class NullableSmallIntVector extends BaseNullableFixedWidthVector { - private static final byte TYPE_WIDTH = 2; + public static final byte TYPE_WIDTH = 2; private final FieldReader reader; /** @@ -319,41 +319,6 @@ public void setSafe(int index, int isSet, short value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, short value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setShort(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMicroVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMicroVector.java index 454a4ac41a6..d7ea3222397 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMicroVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMicroVector.java @@ -292,40 +292,6 @@ public void setSafe(int index, int isSet, long value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, long value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setLong(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMilliVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMilliVector.java index 8540d169242..07d8abba4e5 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMilliVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeMilliVector.java @@ -294,40 +294,6 @@ public void setSafe(int index, int isSet, int value) { } - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, int value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setInt(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeNanoVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeNanoVector.java index 015226da223..947b2392f7a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeNanoVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeNanoVector.java @@ -291,40 +291,6 @@ public void setSafe(int index, int isSet, long value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, long value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setLong(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeSecVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeSecVector.java index 2b2375e92f1..0a3cfaac047 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeSecVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeSecVector.java @@ -291,40 +291,6 @@ public void setSafe(int index, int isSet, int value) { set(index, isSet, value); } - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, int value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setInt(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeStampVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeStampVector.java index b2a58bd4568..1bf2abc384d 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeStampVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableTimeStampVector.java @@ -165,42 +165,6 @@ public void setSafe(int index, int isSet, long value) { set(index, isSet, value); } - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and * - * JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, long value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setLong(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullableTinyIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullableTinyIntVector.java index 15100530d0e..ccbfa32b85f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullableTinyIntVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullableTinyIntVector.java @@ -318,42 +318,6 @@ public void setSafe(int index, int isSet, byte value) { set(index, isSet, value); } - - - /****************************************************************** - * * - * helper routines currently * - * used in JsonFileReader and JsonFileWriter * - * * - ******************************************************************/ - - - /** - * Given a data buffer, this method sets the element value at a particular - * position. Reallocates the buffer if needed. - * - * This method should not be used externally. - * - * @param buffer data buffer - * @param allocator allocator - * @param valueCount number of elements in the vector - * @param index position of the new element - * @param value element value - * @return data buffer - */ - public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator, - int valueCount, int index, byte value) { - if (buffer == null) { - buffer = allocator.buffer(valueCount * TYPE_WIDTH); - } - buffer.setByte(index * TYPE_WIDTH, value); - if (index == (valueCount - 1)) { - buffer.writerIndex(valueCount * TYPE_WIDTH); - } - - return buffer; - } - /** * Given a data buffer, get the value stored at a particular position * in the vector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java index c6a82510e60..560b0b9c58a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/file/json/JsonFileReader.java @@ -27,15 +27,17 @@ import java.io.File; import java.io.IOException; +import java.nio.charset.Charset; import java.util.*; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; import io.netty.buffer.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.*; -import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.dictionary.Dictionary; import org.apache.arrow.vector.dictionary.DictionaryProvider; +import org.apache.arrow.vector.file.InvalidArrowFileException; import org.apache.arrow.vector.schema.ArrowFieldNode; import org.apache.arrow.vector.schema.ArrowVectorType; import org.apache.arrow.vector.types.Types; @@ -183,6 +185,282 @@ public VectorSchemaRoot read() throws IOException { } } + private abstract class BufferReader { + abstract protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException; + + final ArrowBuf readBuffer(BufferAllocator allocator, int count) throws IOException { + readToken(START_ARRAY); + ArrowBuf buf = read(allocator, count); + readToken(END_ARRAY); + return buf; + } + } + + private class BufferHelper { + BufferReader BIT = new BufferReader() { + + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + final int bufferSize = BitVectorHelper.getValidityBufferSize(count); + ArrowBuf buf = allocator.buffer(bufferSize); + + // C++ integration test fails without this. + buf.setZero(0, bufferSize); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + BitVectorHelper.setValidityBit(buf, i, parser.readValueAs(Boolean.class) ? 1 : 0); + } + + return buf; + } + }; + + BufferReader INT1 = new BufferReader() { + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableTinyIntVector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + buf.writeByte(parser.getByteValue()); + } + + return buf; + } + }; + + BufferReader INT2 = new BufferReader() { + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableSmallIntVector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + buf.writeShort(parser.getShortValue()); + } + + return buf; + } + }; + + BufferReader INT4 = new BufferReader() { + + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableIntVector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + buf.writeInt(parser.getIntValue()); + } + + return buf; + } + }; + + BufferReader INT8 = new BufferReader() { + + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableBigIntVector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + buf.writeLong(parser.getLongValue()); + } + + return buf; + } + }; + + BufferReader FLOAT4 = new BufferReader() { + + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableFloat4Vector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + buf.writeFloat(parser.getFloatValue()); + } + + return buf; + } + }; + + BufferReader FLOAT8 = new BufferReader() { + + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableFloat8Vector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + buf.writeDouble(parser.getDoubleValue()); + } + + return buf; + } + }; + + BufferReader DECIMAL = new BufferReader() { + + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrowBuf buf = allocator.buffer(count * NullableDecimalVector.TYPE_WIDTH); + + for (int i = 0; i < count; i++) { + parser.nextToken(); + final byte[] value = decodeHexSafe(parser.getValueAsString()); + DecimalUtility.writeByteArrayToArrowBuf(value, buf, i); + } + + return buf; + } + }; + + BufferReader VARCHAR = new BufferReader() { + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrayList values = Lists.newArrayList(); + int bufferSize = 0; + for (int i = 0; i < count; i++) { + parser.nextToken(); + final byte[] value = parser.getValueAsString().getBytes(UTF_8); + values.add(value); + bufferSize += value.length; + + } + + ArrowBuf buf = allocator.buffer(bufferSize); + + for (byte[] value : values) { + buf.writeBytes(value); + } + + return buf; + } + }; + + BufferReader VARBINARY = new BufferReader() { + @Override + protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException { + ArrayList values = Lists.newArrayList(); + int bufferSize = 0; + for (int i = 0; i < count; i++) { + parser.nextToken(); + final byte[] value = decodeHexSafe(parser.readValueAs(String.class)); + values.add(value); + bufferSize += value.length; + + } + + ArrowBuf buf = allocator.buffer(bufferSize); + + for (byte[] value : values) { + buf.writeBytes(value); + } + + return buf; + } + }; + + } + + private ArrowBuf readBuffer(BufferAllocator allocator, ArrowVectorType bufferType, Types.MinorType type, int count) throws IOException { + ArrowBuf buf; + + BufferHelper helper = new BufferHelper(); + + BufferReader reader = null; + + if (bufferType.equals(VALIDITY)) { + reader = helper.BIT; + } else if (bufferType.equals(OFFSET)) { + reader = helper.INT4; + } else if (bufferType.equals(TYPE)) { + reader = helper.INT1; + } else if (bufferType.equals(DATA)) { + switch (type) { + case BIT: + reader = helper.BIT; + break; + case TINYINT: + reader = helper.INT1; + break; + case SMALLINT: + reader = helper.INT2; + break; + case INT: + reader = helper.INT4; + break; + case BIGINT: + reader = helper.INT8; + break; + case UINT1: + reader = helper.INT1; + break; + case UINT2: + reader = helper.INT2; + break; + case UINT4: + reader = helper.INT4; + break; + case UINT8: + reader = helper.INT8; + break; + case FLOAT4: + reader = helper.FLOAT4; + break; + case FLOAT8: + reader = helper.FLOAT8; + break; + case DECIMAL: + reader = helper.DECIMAL; + break; + case VARCHAR: + reader = helper.VARCHAR; + break; + case VARBINARY: + reader = helper.VARBINARY; + break; + case DATEDAY: + reader = helper.INT4; + break; + case DATEMILLI: + reader = helper.INT8; + break; + case TIMESEC: + case TIMEMILLI: + reader = helper.INT4; + break; + case TIMEMICRO: + case TIMENANO: + reader = helper.INT8; + break; + case TIMESTAMPNANO: + case TIMESTAMPMICRO: + case TIMESTAMPMILLI: + case TIMESTAMPSEC: + case TIMESTAMPNANOTZ: + case TIMESTAMPMICROTZ: + case TIMESTAMPMILLITZ: + case TIMESTAMPSECTZ: + reader = helper.INT8; + break; + default: + throw new UnsupportedOperationException("Cannot read array of type " + type); + } + } else { + throw new InvalidArrowFileException("Unrecognized buffer type " + bufferType); + } + + buf = reader.readBuffer(allocator, count); + assert buf != null; + return buf; + } + private void readFromJsonIntoVector(Field field, FieldVector vector) throws JsonParseException, IOException { List vectorTypes = field.getTypeLayout().getVectorTypes(); ArrowBuf[] vectorBuffers = new ArrowBuf[vectorTypes.size()]; @@ -209,7 +487,7 @@ private void readFromJsonIntoVector(Field field, FieldVector vector) throws Json throw new IllegalArgumentException("Expected field " + field.getName() + " but got " + name); } - /* Initialize the vector with required capacity but don't allocate since we would + /* Initialize the vector with required capacity but don't allocateNew since we would * be doing loadFieldBuffers. */ int valueCount = readNextField("count", Integer.class); @@ -218,29 +496,13 @@ private void readFromJsonIntoVector(Field field, FieldVector vector) throws Json for (int v = 0; v < vectorTypes.size(); v++) { ArrowVectorType vectorType = vectorTypes.get(v); nextFieldIs(vectorType.getName()); - readToken(START_ARRAY); int innerBufferValueCount = valueCount; if (vectorType.equals(OFFSET)) { /* offset buffer has 1 additional value capacity */ innerBufferValueCount = valueCount + 1; } - for (int i = 0; i < innerBufferValueCount; i++) { - /* write data to the buffer */ - parser.nextToken(); - /* for variable width vectors, value count doesn't help pre-determining the capacity of - * the underlying data buffer. So we need to pass down the offset buffer (which was already - * populated in the previous iteration of this loop). - */ - if (vectorType.equals(DATA) && (vector.getMinorType() == Types.MinorType.VARCHAR - || vector.getMinorType() == Types.MinorType.VARBINARY)) { - vectorBuffers[v] = setValueFromParser(vectorType, vector, vectorBuffers[v], - vectorBuffers[v-1], i, innerBufferValueCount); - } else { - vectorBuffers[v] = setValueFromParser(vectorType, vector, vectorBuffers[v], - null, i, innerBufferValueCount); - } - } - readToken(END_ARRAY); + + vectorBuffers[v] = readBuffer(allocator, vectorType, vector.getMinorType(), innerBufferValueCount); } vector.loadFieldBuffers(new ArrowFieldNode(valueCount, 0), Arrays.asList(vectorBuffers)); @@ -250,7 +512,8 @@ private void readFromJsonIntoVector(Field field, FieldVector vector) throws Json if (!fields.isEmpty()) { List vectorChildren = vector.getChildrenFromFields(); if (fields.size() != vectorChildren.size()) { - throw new IllegalArgumentException("fields and children are not the same size: " + fields.size() + " != " + vectorChildren.size()); + throw new IllegalArgumentException( + "fields and children are not the same size: " + fields.size() + " != " + vectorChildren.size()); } nextFieldIs("children"); readToken(START_ARRAY); @@ -277,125 +540,6 @@ private byte[] decodeHexSafe(String hexString) throws IOException { } } - private ArrowBuf setValueFromParser(ArrowVectorType bufferType, FieldVector vector, - ArrowBuf buffer, ArrowBuf offsetBuffer, int index, int valueCount) throws IOException { - if (bufferType.equals(TYPE)) { - buffer = NullableTinyIntVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Byte.class)); - } else if (bufferType.equals(OFFSET)) { - buffer = BaseNullableVariableWidthVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Integer.class)); - } else if (bufferType.equals(VALIDITY)) { - buffer = BitVectorHelper.setValidityBit(buffer, allocator, - valueCount, index, parser.readValueAs(Boolean.class) ? 1 : 0); - } else if (bufferType.equals(DATA)) { - switch (vector.getMinorType()) { - case BIT: - buffer = BitVectorHelper.setValidityBit(buffer, allocator, - valueCount, index, parser.readValueAs(Boolean.class) ? 1 : 0); - break; - case TINYINT: - buffer = NullableTinyIntVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Byte.class)); - break; - case SMALLINT: - buffer = NullableSmallIntVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Short.class)); - break; - case INT: - buffer = NullableIntVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Integer.class)); - break; - case BIGINT: - buffer = NullableBigIntVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case FLOAT4: - buffer = NullableFloat4Vector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Float.class)); - break; - case FLOAT8: - buffer = NullableFloat8Vector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Double.class)); - break; - case DECIMAL: - buffer = NullableDecimalVector.set(buffer, allocator, - valueCount, index, decodeHexSafe(parser.readValueAs(String.class))); - break; - case VARBINARY: - assert (offsetBuffer != null); - buffer = BaseNullableVariableWidthVector.set(buffer, offsetBuffer, allocator, index, - decodeHexSafe(parser.readValueAs(String.class)), valueCount); - break; - case VARCHAR: - assert (offsetBuffer != null); - buffer = BaseNullableVariableWidthVector.set(buffer, offsetBuffer, allocator, index, - parser.readValueAs(String.class).getBytes(UTF_8), valueCount); - break; - case DATEDAY: - buffer = NullableDateDayVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Integer.class)); - break; - case DATEMILLI: - buffer = NullableDateMilliVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESEC: - buffer = NullableTimeSecVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Integer.class)); - break; - case TIMEMILLI: - buffer = NullableTimeMilliVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Integer.class)); - break; - case TIMEMICRO: - buffer = NullableTimeMicroVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMENANO: - buffer = NullableTimeNanoVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPSEC: - buffer = NullableTimeStampSecVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPMILLI: - buffer = NullableTimeStampMilliVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPMICRO: - buffer = NullableTimeStampMicroVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPNANO: - buffer = NullableTimeStampNanoVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPSECTZ: - buffer = NullableTimeStampSecTZVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPMILLITZ: - buffer = NullableTimeStampMilliTZVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPMICROTZ: - buffer = NullableTimeStampMicroTZVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - case TIMESTAMPNANOTZ: - buffer = NullableTimeStampNanoTZVector.set(buffer, allocator, - valueCount, index, parser.readValueAs(Long.class)); - break; - default: - throw new UnsupportedOperationException("minor type: " + vector.getMinorType()); - } - } - - return buffer; - } - @Override public void close() throws IOException { parser.close();