diff --git a/cpp/src/arrow/ipc/json-internal.cc b/cpp/src/arrow/ipc/json-internal.cc index bfb3d282d87..11a6956ebfd 100644 --- a/cpp/src/arrow/ipc/json-internal.cc +++ b/cpp/src/arrow/ipc/json-internal.cc @@ -43,22 +43,6 @@ namespace ipc { namespace json { namespace internal { -static std::string GetBufferTypeName(BufferType type) { - switch (type) { - case BufferType::DATA: - return "DATA"; - case BufferType::OFFSET: - return "OFFSET"; - case BufferType::TYPE: - return "TYPE"; - case BufferType::VALIDITY: - return "VALIDITY"; - default: - break; - } - return "UNKNOWN"; -} - static std::string GetFloatingPrecisionName(FloatingPoint::Precision precision) { switch (precision) { case FloatingPoint::HALF: @@ -174,12 +158,9 @@ class SchemaWriter { RETURN_NOT_OK(WriteDictionaryMetadata(dict_type)); const DataType& dictionary_type = *dict_type.dictionary()->type(); - const DataType& index_type = *dict_type.index_type(); RETURN_NOT_OK(WriteChildren(dictionary_type.children())); - WriteBufferLayout(index_type.GetBufferLayout()); } else { RETURN_NOT_OK(WriteChildren(type.children())); - WriteBufferLayout(type.GetBufferLayout()); } writer_->EndObject(); @@ -301,26 +282,6 @@ class SchemaWriter { return Status::OK(); } - void WriteBufferLayout(const std::vector& buffer_layout) { - writer_->Key("typeLayout"); - writer_->StartObject(); - writer_->Key("vectors"); - writer_->StartArray(); - - for (const BufferDescr& buffer : buffer_layout) { - writer_->StartObject(); - writer_->Key("type"); - writer_->String(GetBufferTypeName(buffer.type())); - - writer_->Key("typeBitWidth"); - writer_->Int(buffer.bit_width()); - - writer_->EndObject(); - } - writer_->EndArray(); - writer_->EndObject(); - } - Status WriteChildren(const std::vector>& children) { writer_->Key("children"); writer_->StartArray(); diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc index 05202ea9372..af1d6c85158 100644 --- a/cpp/src/arrow/ipc/metadata-internal.cc +++ b/cpp/src/arrow/ipc/metadata-internal.cc @@ -54,7 +54,6 @@ using DictionaryOffset = flatbuffers::Offset; using FieldOffset = flatbuffers::Offset; using KeyValueOffset = flatbuffers::Offset; using RecordBatchOffset = flatbuffers::Offset; -using VectorLayoutOffset = flatbuffers::Offset; using Offset = flatbuffers::Offset; using FBString = flatbuffers::Offset; @@ -341,34 +340,8 @@ static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data, // TODO(wesm): Convert this to visitor pattern static Status TypeToFlatbuffer(FBB& fbb, const DataType& type, std::vector* children, - std::vector* layout, flatbuf::Type* out_type, DictionaryMemo* dictionary_memo, Offset* offset) { - std::vector buffer_layout = type.GetBufferLayout(); - for (const BufferDescr& descr : buffer_layout) { - flatbuf::VectorType vector_type; - switch (descr.type()) { - case BufferType::OFFSET: - vector_type = flatbuf::VectorType_OFFSET; - break; - case BufferType::DATA: - vector_type = flatbuf::VectorType_DATA; - break; - case BufferType::VALIDITY: - vector_type = flatbuf::VectorType_VALIDITY; - break; - case BufferType::TYPE: - vector_type = flatbuf::VectorType_TYPE; - break; - default: - vector_type = flatbuf::VectorType_DATA; - break; - } - auto offset = flatbuf::CreateVectorLayout( - fbb, static_cast(descr.bit_width()), vector_type); - layout->push_back(offset); - } - const DataType* value_type = &type; if (type.id() == Type::DICTIONARY) { @@ -543,14 +516,11 @@ static Status FieldToFlatbuffer(FBB& fbb, const Field& field, flatbuf::Type type_enum; Offset type_offset; - Offset type_layout; std::vector children; - std::vector layout; - RETURN_NOT_OK(TypeToFlatbuffer(fbb, *field.type(), &children, &layout, &type_enum, + RETURN_NOT_OK(TypeToFlatbuffer(fbb, *field.type(), &children, &type_enum, dictionary_memo, &type_offset)); auto fb_children = fbb.CreateVector(children); - auto fb_layout = fbb.CreateVector(layout); DictionaryOffset dictionary = 0; if (field.type()->id() == Type::DICTIONARY) { @@ -560,7 +530,7 @@ static Status FieldToFlatbuffer(FBB& fbb, const Field& field, // TODO: produce the list of VectorTypes *offset = flatbuf::CreateField(fbb, fb_name, field.nullable(), type_enum, type_offset, - dictionary, fb_children, fb_layout); + dictionary, fb_children); return Status::OK(); } diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index ae171183234..31ad5345811 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -474,43 +474,6 @@ std::shared_ptr decimal(int32_t precision, int32_t scale) { return std::make_shared(precision, scale); } -static const BufferDescr kValidityBuffer(BufferType::VALIDITY, 1); -static const BufferDescr kOffsetBuffer(BufferType::OFFSET, 32); -static const BufferDescr kTypeBuffer(BufferType::TYPE, 32); -static const BufferDescr kBooleanBuffer(BufferType::DATA, 1); -static const BufferDescr kValues64(BufferType::DATA, 64); -static const BufferDescr kValues32(BufferType::DATA, 32); -static const BufferDescr kValues16(BufferType::DATA, 16); -static const BufferDescr kValues8(BufferType::DATA, 8); - -std::vector FixedWidthType::GetBufferLayout() const { - return {kValidityBuffer, BufferDescr(BufferType::DATA, bit_width())}; -} - -std::vector NullType::GetBufferLayout() const { return {}; } - -std::vector BinaryType::GetBufferLayout() const { - return {kValidityBuffer, kOffsetBuffer, kValues8}; -} - -std::vector FixedSizeBinaryType::GetBufferLayout() const { - return {kValidityBuffer, BufferDescr(BufferType::DATA, bit_width())}; -} - -std::vector ListType::GetBufferLayout() const { - return {kValidityBuffer, kOffsetBuffer}; -} - -std::vector StructType::GetBufferLayout() const { return {kValidityBuffer}; } - -std::vector UnionType::GetBufferLayout() const { - if (mode_ == UnionMode::SPARSE) { - return {kValidityBuffer, kTypeBuffer}; - } else { - return {kValidityBuffer, kTypeBuffer, kOffsetBuffer}; - } -} - std::string Decimal128Type::ToString() const { std::stringstream s; s << "decimal(" << precision_ << ", " << scale_ << ")"; diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 8dcc1592da0..009e07db077 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -133,20 +133,6 @@ struct Type { }; }; -enum class BufferType : char { DATA, OFFSET, TYPE, VALIDITY }; - -class BufferDescr { - public: - BufferDescr(BufferType type, int bit_width) : type_(type), bit_width_(bit_width) {} - - BufferType type() const { return type_; } - int bit_width() const { return bit_width_; } - - private: - BufferType type_; - int bit_width_; -}; - class ARROW_EXPORT DataType { public: explicit DataType(Type::type id) : id_(id) {} @@ -176,8 +162,6 @@ class ARROW_EXPORT DataType { /// \since 0.7.0 virtual std::string name() const = 0; - virtual std::vector GetBufferLayout() const = 0; - Type::type id() const { return id_; } protected: @@ -201,8 +185,6 @@ class ARROW_EXPORT FixedWidthType : public DataType { using DataType::DataType; virtual int bit_width() const = 0; - - std::vector GetBufferLayout() const override; }; class ARROW_EXPORT PrimitiveCType : public FixedWidthType { @@ -319,8 +301,6 @@ class ARROW_EXPORT NullType : public DataType, public NoExtraMeta { std::string ToString() const override; std::string name() const override { return "null"; } - - std::vector GetBufferLayout() const override; }; class ARROW_EXPORT BooleanType : public FixedWidthType, public NoExtraMeta { @@ -425,8 +405,6 @@ class ARROW_EXPORT ListType : public NestedType { std::string ToString() const override; std::string name() const override { return "list"; } - - std::vector GetBufferLayout() const override; }; // BinaryType type is represents lists of 1-byte values. @@ -440,8 +418,6 @@ class ARROW_EXPORT BinaryType : public DataType, public NoExtraMeta { std::string ToString() const override; std::string name() const override { return "binary"; } - std::vector GetBufferLayout() const override; - protected: // Allow subclasses to change the logical type. explicit BinaryType(Type::type logical_type) : DataType(logical_type) {} @@ -461,8 +437,6 @@ class ARROW_EXPORT FixedSizeBinaryType : public FixedWidthType, public Parametri std::string ToString() const override; std::string name() const override { return "fixed_size_binary"; } - std::vector GetBufferLayout() const override; - int32_t byte_width() const { return byte_width_; } int bit_width() const override; @@ -494,8 +468,6 @@ class ARROW_EXPORT StructType : public NestedType { Status Accept(TypeVisitor* visitor) const override; std::string ToString() const override; std::string name() const override { return "struct"; } - - std::vector GetBufferLayout() const override; }; class ARROW_EXPORT DecimalType : public FixedSizeBinaryType { @@ -541,8 +513,6 @@ class ARROW_EXPORT UnionType : public NestedType { std::string name() const override { return "union"; } Status Accept(TypeVisitor* visitor) const override; - std::vector GetBufferLayout() const override; - const std::vector& type_codes() const { return type_codes_; } UnionMode::type mode() const { return mode_; } diff --git a/format/Schema.fbs b/format/Schema.fbs index 6021e92b847..3d739342b83 100644 --- a/format/Schema.fbs +++ b/format/Schema.fbs @@ -211,32 +211,6 @@ union Type { Map } -/// ---------------------------------------------------------------------- -/// The possible types of a vector - -enum VectorType: short { - /// used in List type, Dense Union and variable length primitive types (String, Binary) - OFFSET, - /// actual data, either wixed width primitive types in slots or variable width delimited by an OFFSET vector - DATA, - /// Bit vector indicating if each value is null - VALIDITY, - /// Type vector used in Union type - TYPE -} - -/// ---------------------------------------------------------------------- -/// represents the physical layout of a buffer -/// buffers have fixed width slots of a given type - -table VectorLayout { - /// the width of a slot in the buffer (typically 1, 8, 16, 32 or 64) - bit_width: short; - /// the purpose of the vector - type: VectorType; -} - - /// ---------------------------------------------------------------------- /// user defined key value pairs to add custom metadata to arrow /// key namespacing is the responsibility of the user @@ -285,10 +259,7 @@ table Field { // children apply only to Nested data types like Struct, List and Union children: [Field]; - /// layout of buffers produced for this type (as derived from the Type) - /// does not include children - /// each recordbatch will return instances of those Buffers. - layout: [ VectorLayout ]; + // User-defined metadata custom_metadata: [ KeyValue ]; } diff --git a/integration/data/simple.json b/integration/data/simple.json index bc8949912a4..6634729193b 100644 --- a/integration/data/simple.json +++ b/integration/data/simple.json @@ -4,36 +4,20 @@ { "name": "foo", "type": {"name": "int", "isSigned": true, "bitWidth": 32}, - "nullable": true, "children": [], - "typeLayout": { - "vectors": [ - {"type": "VALIDITY", "typeBitWidth": 1}, - {"type": "DATA", "typeBitWidth": 32} - ] - } + "nullable": true, + "children": [] }, { "name": "bar", "type": {"name": "floatingpoint", "precision": "DOUBLE"}, - "nullable": true, "children": [], - "typeLayout": { - "vectors": [ - {"type": "VALIDITY", "typeBitWidth": 1}, - {"type": "DATA", "typeBitWidth": 64} - ] - } + "nullable": true, + "children": [] }, { "name": "baz", "type": {"name": "utf8"}, - "nullable": true, "children": [], - "typeLayout": { - "vectors": [ - {"type": "VALIDITY", "typeBitWidth": 1}, - {"type": "OFFSET", "typeBitWidth": 32}, - {"type": "DATA", "typeBitWidth": 8} - ] - } + "nullable": true, + "children": [] } ] }, diff --git a/integration/data/struct_example.json b/integration/data/struct_example.json index 3ea062db7ba..4e6cc774e31 100644 --- a/integration/data/struct_example.json +++ b/integration/data/struct_example.json @@ -16,19 +16,7 @@ "bitWidth": 32 }, "nullable": true, - "children": [], - "typeLayout": { - "vectors": [ - { - "type": "VALIDITY", - "typeBitWidth": 1 - }, - { - "type": "DATA", - "typeBitWidth": 32 - } - ] - } + "children": [] }, { "name": "f2", @@ -36,33 +24,9 @@ "name": "utf8" }, "nullable": true, - "children": [], - "typeLayout": { - "vectors": [ - { - "type": "VALIDITY", - "typeBitWidth": 1 - }, - { - "type": "OFFSET", - "typeBitWidth": 32 - }, - { - "type": "DATA", - "typeBitWidth": 8 - } - ] - } + "children": [] } - ], - "typeLayout": { - "vectors": [ - { - "type": "VALIDITY", - "typeBitWidth": 1 - } - ] - } + ] } ] }, @@ -234,4 +198,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/integration/integration_test.py b/integration/integration_test.py index a063fb36f49..5468ab19de8 100644 --- a/integration/integration_test.py +++ b/integration/integration_test.py @@ -111,8 +111,7 @@ def get_json(self): ('name', self.name), ('type', self._get_type()), ('nullable', self.nullable), - ('children', self._get_children()), - ('typeLayout', self._get_type_layout()) + ('children', self._get_children()) ]) def _make_is_valid(self, size): @@ -158,14 +157,6 @@ class PrimitiveType(DataType): def _get_children(self): return [] - def _get_type_layout(self): - return OrderedDict([ - ('vectors', - [OrderedDict([('type', 'VALIDITY'), - ('typeBitWidth', 1)]), - OrderedDict([('type', 'DATA'), - ('typeBitWidth', self.bit_width)])])]) - class PrimitiveColumn(Column): @@ -402,14 +393,6 @@ def _get_type(self): ('scale', self.scale), ]) - def _get_type_layout(self): - return OrderedDict([ - ('vectors', - [OrderedDict([('type', 'VALIDITY'), - ('typeBitWidth', 1)]), - OrderedDict([('type', 'DATA'), - ('typeBitWidth', self.bit_width)])])]) - def generate_column(self, size, name=None): min_value, max_value = decimal_range_from_precision(self.precision) values = [random.randint(min_value, max_value) for _ in range(size)] @@ -461,16 +444,6 @@ def column_class(self): def _get_type(self): return OrderedDict([('name', 'binary')]) - def _get_type_layout(self): - return OrderedDict([ - ('vectors', - [OrderedDict([('type', 'VALIDITY'), - ('typeBitWidth', 1)]), - OrderedDict([('type', 'OFFSET'), - ('typeBitWidth', 32)]), - OrderedDict([('type', 'DATA'), - ('typeBitWidth', 8)])])]) - def generate_column(self, size, name=None): K = 7 is_valid = self._make_is_valid(size) @@ -572,14 +545,6 @@ def _get_type(self): def _get_children(self): return [self.value_type.get_json()] - def _get_type_layout(self): - return OrderedDict([ - ('vectors', - [OrderedDict([('type', 'VALIDITY'), - ('typeBitWidth', 1)]), - OrderedDict([('type', 'OFFSET'), - ('typeBitWidth', 32)])])]) - def generate_column(self, size, name=None): MAX_LIST_SIZE = 4 @@ -633,12 +598,6 @@ def _get_type(self): def _get_children(self): return [type_.get_json() for type_ in self.field_types] - def _get_type_layout(self): - return OrderedDict([ - ('vectors', - [OrderedDict([('type', 'VALIDITY'), - ('typeBitWidth', 1)])])]) - def generate_column(self, size, name=None): is_valid = self._make_is_valid(size) @@ -689,13 +648,9 @@ def get_json(self): ('id', self.dictionary.id_), ('indexType', self.index_type._get_type()), ('isOrdered', self.dictionary.ordered) - ])), - ('typeLayout', self.index_type._get_type_layout()) + ])) ]) - def _get_type_layout(self): - return self.index_type._get_type_layout() - def generate_column(self, size, name=None): if name is None: name = self.name diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/VectorLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java similarity index 50% rename from java/vector/src/main/java/org/apache/arrow/vector/ipc/message/VectorLayout.java rename to java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java index e4f2f98fde3..f6529d8e55b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/VectorLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java @@ -16,33 +16,48 @@ * limitations under the License. */ -package org.apache.arrow.vector.ipc.message; +package org.apache.arrow.vector; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; -import com.google.flatbuffers.FlatBufferBuilder; -public class VectorLayout implements FBSerializable { +public class BufferLayout { - private static final VectorLayout VALIDITY_VECTOR = new VectorLayout(ArrowVectorType.VALIDITY, 1); - private static final VectorLayout OFFSET_VECTOR = new VectorLayout(ArrowVectorType.OFFSET, 32); - private static final VectorLayout TYPE_VECTOR = new VectorLayout(ArrowVectorType.TYPE, 32); - private static final VectorLayout BOOLEAN_VECTOR = new VectorLayout(ArrowVectorType.DATA, 1); - private static final VectorLayout VALUES_64 = new VectorLayout(ArrowVectorType.DATA, 64); - private static final VectorLayout VALUES_32 = new VectorLayout(ArrowVectorType.DATA, 32); - private static final VectorLayout VALUES_16 = new VectorLayout(ArrowVectorType.DATA, 16); - private static final VectorLayout VALUES_8 = new VectorLayout(ArrowVectorType.DATA, 8); + public enum BufferType { + DATA("DATA"), + OFFSET("OFFSET"), + VALIDITY("VALIDITY"), + TYPE("TYPE"); - public static VectorLayout typeVector() { - return TYPE_VECTOR; + final private String name; + + BufferType(String name) { + this.name = name; + } + + public String getName() { + return name; + } } - public static VectorLayout offsetVector() { - return OFFSET_VECTOR; + private static final BufferLayout VALIDITY_BUFFER = new BufferLayout(BufferType.VALIDITY, 1); + private static final BufferLayout OFFSET_BUFFER = new BufferLayout(BufferType.OFFSET, 32); + private static final BufferLayout TYPE_BUFFER = new BufferLayout(BufferType.TYPE, 32); + private static final BufferLayout BIT_BUFFER = new BufferLayout(BufferType.DATA, 1); + private static final BufferLayout VALUES_128 = new BufferLayout(BufferType.DATA, 128); + private static final BufferLayout VALUES_64 = new BufferLayout(BufferType.DATA, 64); + private static final BufferLayout VALUES_32 = new BufferLayout(BufferType.DATA, 32); + private static final BufferLayout VALUES_16 = new BufferLayout(BufferType.DATA, 16); + private static final BufferLayout VALUES_8 = new BufferLayout(BufferType.DATA, 8); + + public static BufferLayout typeBuffer() { + return TYPE_BUFFER; } - public static VectorLayout dataVector(int typeBitWidth) { + public static BufferLayout offsetBuffer() { + return OFFSET_BUFFER; + } + + public static BufferLayout dataBuffer(int typeBitWidth) { switch (typeBitWidth) { case 8: return VALUES_8; @@ -52,29 +67,30 @@ public static VectorLayout dataVector(int typeBitWidth) { return VALUES_32; case 64: return VALUES_64; + case 128: + return VALUES_128; default: throw new IllegalArgumentException("only 8, 16, 32, or 64 bits supported"); } } - public static VectorLayout booleanVector() { - return BOOLEAN_VECTOR; + public static BufferLayout booleanVector() { + return BIT_BUFFER; } - public static VectorLayout validityVector() { - return VALIDITY_VECTOR; + public static BufferLayout validityVector() { + return VALIDITY_BUFFER; } - public static VectorLayout byteVector() { - return dataVector(8); + public static BufferLayout byteVector() { + return dataBuffer(8); } private final short typeBitWidth; - private final ArrowVectorType type; + private final BufferType type; - @JsonCreator - private VectorLayout(@JsonProperty("type") ArrowVectorType type, @JsonProperty("typeBitWidth") int typeBitWidth) { + private BufferLayout(BufferType type, int typeBitWidth) { super(); this.type = Preconditions.checkNotNull(type); this.typeBitWidth = (short) typeBitWidth; @@ -83,15 +99,11 @@ private VectorLayout(@JsonProperty("type") ArrowVectorType type, @JsonProperty(" } } - public VectorLayout(org.apache.arrow.flatbuf.VectorLayout layout) { - this(new ArrowVectorType(layout.type()), layout.bitWidth()); - } - public int getTypeBitWidth() { return typeBitWidth; } - public ArrowVectorType getType() { + public BufferType getType() { return type; } @@ -116,15 +128,7 @@ public boolean equals(Object obj) { if (getClass() != obj.getClass()) { return false; } - VectorLayout other = (VectorLayout) obj; + BufferLayout other = (BufferLayout) obj; return type.equals(other.type) && (typeBitWidth == other.typeBitWidth); } - - @Override - public int writeTo(FlatBufferBuilder builder) { - ; - return org.apache.arrow.flatbuf.VectorLayout.createVectorLayout(builder, typeBitWidth, type.getType()); - } - - } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java similarity index 66% rename from java/vector/src/main/java/org/apache/arrow/vector/ipc/message/TypeLayout.java rename to java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java index 06fe9481686..d6f32b4b4b1 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.arrow.vector.ipc.message; +package org.apache.arrow.vector; import static java.util.Arrays.asList; @@ -24,6 +24,7 @@ import java.util.Collections; import java.util.List; +import org.apache.arrow.vector.BufferLayout.BufferType; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor; import org.apache.arrow.vector.types.pojo.ArrowType.Binary; @@ -41,14 +42,11 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Union; import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; /** - * The layout of vectors for a given type - * It defines its own vectors followed by the vectors for the children + * The buffer layout of vectors for a given type + * It defines its own buffers followed by the buffers for the children * if it is a nested type (Struct_, List, Union) */ public class TypeLayout { @@ -58,24 +56,24 @@ public static TypeLayout getTypeLayout(final ArrowType arrowType) { @Override public TypeLayout visit(Int type) { - return newFixedWidthTypeLayout(VectorLayout.dataVector(type.getBitWidth())); + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(type.getBitWidth())); } @Override public TypeLayout visit(Union type) { - List vectors; + List vectors; switch (type.getMode()) { case Dense: vectors = asList( // TODO: validate this - VectorLayout.validityVector(), - VectorLayout.typeVector(), - VectorLayout.offsetVector() // offset to find the vector + BufferLayout.validityVector(), + BufferLayout.typeBuffer(), + BufferLayout.offsetBuffer() // offset to find the vector ); break; case Sparse: vectors = asList( - VectorLayout.typeVector() // type of the value at the index or 0 if null + BufferLayout.typeBuffer() // type of the value at the index or 0 if null ); break; default: @@ -86,30 +84,30 @@ public TypeLayout visit(Union type) { @Override public TypeLayout visit(Struct type) { - List vectors = asList( - VectorLayout.validityVector() + List vectors = asList( + BufferLayout.validityVector() ); return new TypeLayout(vectors); } @Override public TypeLayout visit(Timestamp type) { - return newFixedWidthTypeLayout(VectorLayout.dataVector(64)); + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64)); } @Override public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { - List vectors = asList( - VectorLayout.validityVector(), - VectorLayout.offsetVector() + List vectors = asList( + BufferLayout.validityVector(), + BufferLayout.offsetBuffer() ); return new TypeLayout(vectors); } @Override public TypeLayout visit(FixedSizeList type) { - List vectors = asList( - VectorLayout.validityVector() + List vectors = asList( + BufferLayout.validityVector() ); return new TypeLayout(vectors); } @@ -130,18 +128,17 @@ public TypeLayout visit(FloatingPoint type) { default: throw new UnsupportedOperationException("Unsupported Precision: " + type.getPrecision()); } - return newFixedWidthTypeLayout(VectorLayout.dataVector(bitWidth)); + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(bitWidth)); } @Override public TypeLayout visit(Decimal type) { - // TODO: check size - return newFixedWidthTypeLayout(VectorLayout.dataVector(64)); // actually depends on the type fields + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(128)); } @Override public TypeLayout visit(Bool type) { - return newFixedWidthTypeLayout(VectorLayout.booleanVector()); + return newFixedWidthTypeLayout(BufferLayout.booleanVector()); } @Override @@ -155,39 +152,46 @@ public TypeLayout visit(Utf8 type) { } private TypeLayout newVariableWidthTypeLayout() { - return newPrimitiveTypeLayout(VectorLayout.validityVector(), VectorLayout.offsetVector(), VectorLayout.byteVector()); + return newPrimitiveTypeLayout(BufferLayout.validityVector(), BufferLayout.offsetBuffer(), BufferLayout.byteVector()); } - private TypeLayout newPrimitiveTypeLayout(VectorLayout... vectors) { + private TypeLayout newPrimitiveTypeLayout(BufferLayout... vectors) { return new TypeLayout(asList(vectors)); } - public TypeLayout newFixedWidthTypeLayout(VectorLayout dataVector) { - return newPrimitiveTypeLayout(VectorLayout.validityVector(), dataVector); + public TypeLayout newFixedWidthTypeLayout(BufferLayout dataVector) { + return newPrimitiveTypeLayout(BufferLayout.validityVector(), dataVector); } @Override public TypeLayout visit(Null type) { - return new TypeLayout(Collections.emptyList()); + return new TypeLayout(Collections.emptyList()); } @Override public TypeLayout visit(Date type) { - return newFixedWidthTypeLayout(VectorLayout.dataVector(64)); + switch (type.getUnit()) { + case DAY: + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(32)); + case MILLISECOND: + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64)); + default: + throw new UnsupportedOperationException("Unknown unit " + type.getUnit()); + } } @Override public TypeLayout visit(Time type) { - return newFixedWidthTypeLayout(VectorLayout.dataVector(type.getBitWidth())); + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(type.getBitWidth())); } @Override - public TypeLayout visit(Interval type) { // TODO: check size + public TypeLayout visit(Interval type) { switch (type.getUnit()) { case DAY_TIME: - return newFixedWidthTypeLayout(VectorLayout.dataVector(64)); + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64)); case YEAR_MONTH: - return newFixedWidthTypeLayout(VectorLayout.dataVector(64)); + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(32)); default: throw new UnsupportedOperationException("Unknown unit " + type.getUnit()); } @@ -197,39 +201,37 @@ public TypeLayout visit(Interval type) { // TODO: check size return layout; } - private final List vectors; + private final List bufferLayouts; - @JsonCreator - public TypeLayout(@JsonProperty("vectors") List vectors) { + public TypeLayout(List bufferLayouts) { super(); - this.vectors = Preconditions.checkNotNull(vectors); + this.bufferLayouts = Preconditions.checkNotNull(bufferLayouts); } - public TypeLayout(VectorLayout... vectors) { - this(asList(vectors)); + public TypeLayout(BufferLayout... bufferLayouts) { + this(asList(bufferLayouts)); } - public List getVectors() { - return vectors; + public List getBufferLayouts() { + return bufferLayouts; } - @JsonIgnore - public List getVectorTypes() { - List types = new ArrayList<>(vectors.size()); - for (VectorLayout vector : vectors) { + public List getBufferTypes() { + List types = new ArrayList<>(bufferLayouts.size()); + for (BufferLayout vector : bufferLayouts) { types.add(vector.getType()); } return types; } public String toString() { - return vectors.toString(); + return bufferLayouts.toString(); } @Override public int hashCode() { - return vectors.hashCode(); + return bufferLayouts.hashCode(); } @Override @@ -244,7 +246,7 @@ public boolean equals(Object obj) { return false; } TypeLayout other = (TypeLayout) obj; - return vectors.equals(other.vectors); + return bufferLayouts.equals(other.bufferLayouts); } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java index 2cd4099c669..c933d149f8d 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java @@ -26,7 +26,6 @@ import org.apache.arrow.vector.ipc.message.ArrowFieldNode; import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.ipc.message.VectorLayout; import org.apache.arrow.vector.types.pojo.Field; import com.google.common.collect.Iterators; @@ -71,9 +70,9 @@ private void loadBuffers(FieldVector vector, Field field, Iterator buf checkArgument(nodes.hasNext(), "no more field nodes for for field " + field + " and vector " + vector); ArrowFieldNode fieldNode = nodes.next(); - List typeLayout = field.getTypeLayout().getVectors(); - List ownBuffers = new ArrayList<>(typeLayout.size()); - for (int j = 0; j < typeLayout.size(); j++) { + List bufferLayouts = TypeLayout.getTypeLayout(field.getType()).getBufferLayouts(); + List ownBuffers = new ArrayList<>(bufferLayouts.size()); + for (int j = 0; j < bufferLayouts.size(); j++) { ownBuffers.add(buffers.next()); } try { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java index 2b034894ab1..94981ef02f6 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java @@ -22,9 +22,9 @@ import java.util.List; import io.netty.buffer.ArrowBuf; +import org.apache.arrow.vector.BufferLayout.BufferType; import org.apache.arrow.vector.ipc.message.ArrowFieldNode; import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.ipc.message.ArrowVectorType; public class VectorUnloader { @@ -54,7 +54,7 @@ public ArrowRecordBatch getRecordBatch() { private void appendNodes(FieldVector vector, List nodes, List buffers) { nodes.add(new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1)); List fieldBuffers = vector.getFieldBuffers(); - List expectedBuffers = vector.getField().getTypeLayout().getVectorTypes(); + List expectedBuffers = TypeLayout.getTypeLayout(vector.getField().getType()).getBufferTypes(); if (fieldBuffers.size() != expectedBuffers.size()) { throw new IllegalArgumentException(String.format( "wrong number of buffers for field %s in vector %s. found: %s", diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java index f274b748e55..d0a9b9e18b8 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java @@ -23,7 +23,7 @@ import static com.fasterxml.jackson.core.JsonToken.START_ARRAY; import static com.fasterxml.jackson.core.JsonToken.START_OBJECT; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.apache.arrow.vector.ipc.message.ArrowVectorType.*; +import static org.apache.arrow.vector.BufferLayout.BufferType.*; import java.io.File; import java.io.IOException; @@ -42,7 +42,8 @@ import org.apache.arrow.vector.dictionary.Dictionary; import org.apache.arrow.vector.dictionary.DictionaryProvider; import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.apache.arrow.vector.ipc.message.ArrowVectorType; +import org.apache.arrow.vector.BufferLayout.BufferType; +import org.apache.arrow.vector.TypeLayout; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; @@ -375,7 +376,7 @@ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException }; } - private ArrowBuf readIntoBuffer(BufferAllocator allocator, ArrowVectorType bufferType, + private ArrowBuf readIntoBuffer(BufferAllocator allocator, BufferType bufferType, Types.MinorType type, int count) throws IOException { ArrowBuf buf; @@ -471,7 +472,8 @@ private ArrowBuf readIntoBuffer(BufferAllocator allocator, ArrowVectorType buffe } private void readFromJsonIntoVector(Field field, FieldVector vector) throws JsonParseException, IOException { - List vectorTypes = field.getTypeLayout().getVectorTypes(); + TypeLayout typeLayout = TypeLayout.getTypeLayout(field.getType()); + List vectorTypes = typeLayout.getBufferTypes(); ArrowBuf[] vectorBuffers = new ArrowBuf[vectorTypes.size()]; /* * The order of inner buffers is : @@ -503,15 +505,15 @@ private void readFromJsonIntoVector(Field field, FieldVector vector) throws Json vector.setInitialCapacity(valueCount); for (int v = 0; v < vectorTypes.size(); v++) { - ArrowVectorType vectorType = vectorTypes.get(v); - nextFieldIs(vectorType.getName()); + BufferType bufferType = vectorTypes.get(v); + nextFieldIs(bufferType.getName()); int innerBufferValueCount = valueCount; - if (vectorType.equals(OFFSET)) { + if (bufferType.equals(OFFSET)) { /* offset buffer has 1 additional value capacity */ innerBufferValueCount = valueCount + 1; } - vectorBuffers[v] = readIntoBuffer(allocator, vectorType, vector.getMinorType(), innerBufferValueCount); + vectorBuffers[v] = readIntoBuffer(allocator, bufferType, vector.getMinorType(), innerBufferValueCount); } final int nullCount = BitVectorHelper.getNullCount(vectorBuffers[0], valueCount); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java index 1c9e1d38095..6eb76a7a147 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java @@ -18,7 +18,7 @@ package org.apache.arrow.vector.ipc; -import static org.apache.arrow.vector.ipc.message.ArrowVectorType.*; +import static org.apache.arrow.vector.BufferLayout.BufferType.*; import java.io.File; import java.io.IOException; @@ -33,7 +33,8 @@ import org.apache.arrow.vector.*; import org.apache.arrow.vector.dictionary.Dictionary; import org.apache.arrow.vector.dictionary.DictionaryProvider; -import org.apache.arrow.vector.ipc.message.ArrowVectorType; +import org.apache.arrow.vector.BufferLayout.BufferType; +import org.apache.arrow.vector.TypeLayout; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; @@ -152,7 +153,7 @@ private void writeBatch(VectorSchemaRoot recordBatch) throws IOException { } private void writeFromVectorIntoJson(Field field, FieldVector vector) throws IOException { - List vectorTypes = field.getTypeLayout().getVectorTypes(); + List vectorTypes = TypeLayout.getTypeLayout(field.getType()).getBufferTypes(); List vectorBuffers = vector.getFieldBuffers(); if (vectorTypes.size() != vectorBuffers.size()) { throw new IllegalArgumentException("vector types and inner vector buffers are not the same size: " + vectorTypes.size() + " != " + vectorBuffers.size()); @@ -165,16 +166,16 @@ private void writeFromVectorIntoJson(Field field, FieldVector vector) throws IOE final int scale = (vector instanceof DecimalVector) ? ((DecimalVector) vector).getScale() : 0; for (int v = 0; v < vectorTypes.size(); v++) { - ArrowVectorType vectorType = vectorTypes.get(v); + BufferType bufferType = vectorTypes.get(v); ArrowBuf vectorBuffer = vectorBuffers.get(v); - generator.writeArrayFieldStart(vectorType.getName()); - final int bufferValueCount = (vectorType.equals(OFFSET)) ? valueCount + 1 : valueCount; + generator.writeArrayFieldStart(bufferType.getName()); + final int bufferValueCount = (bufferType.equals(OFFSET)) ? valueCount + 1 : valueCount; for (int i = 0; i < bufferValueCount; i++) { - if (vectorType.equals(DATA) && (vector.getMinorType() == Types.MinorType.VARCHAR || + if (bufferType.equals(DATA) && (vector.getMinorType() == Types.MinorType.VARCHAR || vector.getMinorType() == Types.MinorType.VARBINARY)) { - writeValueToGenerator(vectorType, vectorBuffer, vectorBuffers.get(v-1), vector, i, scale); + writeValueToGenerator(bufferType, vectorBuffer, vectorBuffers.get(v-1), vector, i, scale); } else { - writeValueToGenerator(vectorType, vectorBuffer, null, vector, i, scale); + writeValueToGenerator(bufferType, vectorBuffer, null, vector, i, scale); } } generator.writeEndArray(); @@ -197,7 +198,7 @@ private void writeFromVectorIntoJson(Field field, FieldVector vector) throws IOE generator.writeEndObject(); } - private void writeValueToGenerator(ArrowVectorType bufferType, ArrowBuf buffer, + private void writeValueToGenerator(BufferType bufferType, ArrowBuf buffer, ArrowBuf offsetBuffer, FieldVector vector, final int index, final int scale) throws IOException { if (bufferType.equals(TYPE)) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowVectorType.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowVectorType.java deleted file mode 100644 index 3342652bedd..00000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowVectorType.java +++ /dev/null @@ -1,101 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.arrow.vector.ipc.message; - -import java.util.Map; - -import org.apache.arrow.flatbuf.VectorType; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonValue; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableMap.Builder; - -public class ArrowVectorType { - - public static final ArrowVectorType DATA = new ArrowVectorType(VectorType.DATA); - public static final ArrowVectorType OFFSET = new ArrowVectorType(VectorType.OFFSET); - public static final ArrowVectorType VALIDITY = new ArrowVectorType(VectorType.VALIDITY); - public static final ArrowVectorType TYPE = new ArrowVectorType(VectorType.TYPE); - - private static final Map typeByName; - - static { - ArrowVectorType[] types = {DATA, OFFSET, VALIDITY, TYPE}; - Builder builder = ImmutableMap.builder(); - for (ArrowVectorType type : types) { - builder.put(type.getName(), type); - } - typeByName = builder.build(); - } - - public static ArrowVectorType fromName(String name) { - ArrowVectorType type = typeByName.get(name); - if (type == null) { - throw new IllegalArgumentException("Unknown type " + name); - } - return type; - } - - private final short type; - - public ArrowVectorType(short type) { - this.type = type; - // validate that the type is valid - getName(); - } - - @JsonCreator - private ArrowVectorType(String name) { - this.type = fromName(name).type; - } - - public short getType() { - return type; - } - - @JsonValue - public String getName() { - try { - return VectorType.name(type); - } catch (ArrayIndexOutOfBoundsException e) { - throw new IllegalArgumentException("Unknown type " + type); - } - } - - @Override - public String toString() { - return getName(); - } - - @Override - public int hashCode() { - return type; - } - - @Override - public boolean equals(Object obj) { - if (obj instanceof ArrowVectorType) { - ArrowVectorType other = (ArrowVectorType) obj; - return type == other.type; - } - return false; - } - -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java index 574612833cd..b1f036a34a9 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java @@ -43,8 +43,7 @@ import org.apache.arrow.flatbuf.Type; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.FieldVector; -import org.apache.arrow.vector.ipc.message.VectorLayout; -import org.apache.arrow.vector.ipc.message.TypeLayout; +import org.apache.arrow.vector.TypeLayout; import org.apache.arrow.vector.types.pojo.ArrowType.Int; public class Field { @@ -60,7 +59,6 @@ public static Field nullable(String name, ArrowType type) { private final String name; private final FieldType fieldType; private final List children; - private final TypeLayout typeLayout; @JsonCreator private Field( @@ -69,16 +67,14 @@ private Field( @JsonProperty("type") ArrowType type, @JsonProperty("dictionary") DictionaryEncoding dictionary, @JsonProperty("children") List children, - @JsonProperty("typeLayout") TypeLayout typeLayout, @JsonProperty("metadata") Map metadata) { - this(name, new FieldType(nullable, type, dictionary, metadata), children, typeLayout); + this(name, new FieldType(nullable, type, dictionary, metadata), children); } private Field(String name, FieldType fieldType, List children, TypeLayout typeLayout) { this.name = name; this.fieldType = checkNotNull(fieldType); this.children = children == null ? ImmutableList.of() : ImmutableList.copyOf(children); - this.typeLayout = checkNotNull(typeLayout); } // deprecated, use FieldType or static constructor instead @@ -117,10 +113,6 @@ public static Field convertField(org.apache.arrow.flatbuf.Field field) { } dictionary = new DictionaryEncoding(dictionaryFB.id(), dictionaryFB.isOrdered(), indexType); } - ImmutableList.Builder layout = ImmutableList.builder(); - for (int i = 0; i < field.layoutLength(); ++i) { - layout.add(new VectorLayout(field.layout(i))); - } ImmutableList.Builder childrenBuilder = ImmutableList.builder(); for (int i = 0; i < field.childrenLength(); i++) { Field childField = convertField(field.children(i)); @@ -135,7 +127,7 @@ public static Field convertField(org.apache.arrow.flatbuf.Field field) { metadataBuilder.put(key == null ? "" : key, value == null ? "" : value); } Map metadata = metadataBuilder.build(); - return new Field(name, nullable, type, dictionary, children, new TypeLayout(layout.build()), metadata); + return new Field(name, nullable, type, dictionary, children, metadata); } /** @@ -153,19 +145,11 @@ private static Field mutateOriginalNameIfNeeded(org.apache.arrow.flatbuf.Field f originalChildField.getType(), originalChildField.getDictionary(), originalChildField.getChildren(), - originalChildField.getTypeLayout(), originalChildField.getMetadata()); } return originalChildField; } - public void validate() { - TypeLayout expectedLayout = TypeLayout.getTypeLayout(getType()); - if (!expectedLayout.equals(typeLayout)) { - throw new IllegalArgumentException("Deserialized field does not match expected vectors. expected: " + expectedLayout + " got " + typeLayout); - } - } - public int getField(FlatBufferBuilder builder) { int nameOffset = name == null ? -1 : builder.createString(name); int typeOffset = getType().getType(builder); @@ -184,12 +168,6 @@ public int getField(FlatBufferBuilder builder) { childrenData[i] = children.get(i).getField(builder); } int childrenOffset = org.apache.arrow.flatbuf.Field.createChildrenVector(builder, childrenData); - int[] buffersData = new int[typeLayout.getVectors().size()]; - for (int i = 0; i < buffersData.length; i++) { - VectorLayout vectorLayout = typeLayout.getVectors().get(i); - buffersData[i] = vectorLayout.writeTo(builder); - } - int layoutOffset = org.apache.arrow.flatbuf.Field.createLayoutVector(builder, buffersData); int[] metadataOffsets = new int[getMetadata().size()]; Iterator> metadataIterator = getMetadata().entrySet().iterator(); for (int i = 0; i < metadataOffsets.length; i++) { @@ -210,7 +188,6 @@ public int getField(FlatBufferBuilder builder) { org.apache.arrow.flatbuf.Field.addTypeType(builder, getType().getTypeID().getFlatbufID()); org.apache.arrow.flatbuf.Field.addType(builder, typeOffset); org.apache.arrow.flatbuf.Field.addChildren(builder, childrenOffset); - org.apache.arrow.flatbuf.Field.addLayout(builder, layoutOffset); org.apache.arrow.flatbuf.Field.addCustomMetadata(builder, metadataOffset); if (dictionary != null) { org.apache.arrow.flatbuf.Field.addDictionary(builder, dictionaryOffset); @@ -244,10 +221,6 @@ public List getChildren() { return children; } - public TypeLayout getTypeLayout() { - return typeLayout; - } - @JsonInclude(Include.NON_EMPTY) public Map getMetadata() { return fieldType.getMetadata(); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 1cff7fa886e..1acce7e0b66 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -170,16 +170,16 @@ public void testSetLastSetUsage() throws Exception { /* set lastset and arbitrary valuecount for list vector. * * NOTE: if we don't execute setLastSet() before setLastValueCount(), then - * the latter will corrupt the offsetVector and thus the accessor will not - * retrieve the correct values from underlying dataVector. Run the test + * the latter will corrupt the offsetBuffer and thus the accessor will not + * retrieve the correct values from underlying dataBuffer. Run the test * by commenting out next line and we should see failures from 5th assert * onwards. This is why doing setLastSet() is important before setValueCount() * once the vector has been loaded. * * Another important thing to remember is the value of lastSet itself. * Even though the listVector has elements till index 2 only, the lastSet should - * be set as 3. This is because the offsetVector has valid offsets filled till index 3. - * If we do setLastSet(2), the offsetVector at index 3 will contain incorrect value + * be set as 3. This is because the offsetBuffer has valid offsets filled till index 3. + * If we do setLastSet(2), the offsetBuffer at index 3 will contain incorrect value * after execution of setValueCount(). * * correct state of the listVector @@ -414,7 +414,7 @@ public void testSplitAndTransfer() throws Exception { transferPair.splitAndTransfer(start, splitLength); - /* get offsetVector of toVector */ + /* get offsetBuffer of toVector */ final ArrowBuf toOffsetBuffer = toVector.getOffsetBuffer(); /* get dataVector of toVector */ diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 336ae1c7a01..601b2062ff6 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -38,7 +38,6 @@ import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; -import org.apache.arrow.vector.ipc.message.TypeLayout; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.types.pojo.ArrowType; @@ -711,11 +710,9 @@ public void testNullableFixedType3() { vector.setValueCount(1024); Field field = vector.getField(); - TypeLayout typeLayout = field.getTypeLayout(); List buffers = vector.getFieldBuffers(); - assertEquals(2, typeLayout.getVectors().size()); assertEquals(2, buffers.size()); ArrowBuf validityVectorBuf = buffers.get(0); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java index 49e194b51b8..bf42fbb83c8 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java @@ -105,7 +105,6 @@ public void test() throws IOException { ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { Schema readSchema = reader.getVectorSchemaRoot().getSchema(); assertEquals(schema, readSchema); - assertTrue(readSchema.getFields().get(0).getTypeLayout().getVectorTypes().toString(), readSchema.getFields().get(0).getTypeLayout().getVectors().size() > 0); // TODO: dictionaries List recordBatches = reader.getRecordBlocks(); assertEquals(1, recordBatches.size()); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java index bd1ec94c8fd..9ec9a078f7e 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java @@ -124,9 +124,6 @@ public boolean loadNextBatch() throws IOException { public void run() { try { assertEquals(schema, reader.getVectorSchemaRoot().getSchema()); - assertTrue( - reader.getVectorSchemaRoot().getSchema().getFields().get(0).getTypeLayout().getVectorTypes().toString(), - reader.getVectorSchemaRoot().getSchema().getFields().get(0).getTypeLayout().getVectors().size() > 0); while (!done) { assertTrue(reader.loadNextBatch() != done); }