Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 0 additions & 39 deletions cpp/src/arrow/ipc/json-internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,22 +43,6 @@ namespace ipc {
namespace json {
namespace internal {

static std::string GetBufferTypeName(BufferType type) {
switch (type) {
case BufferType::DATA:
return "DATA";
case BufferType::OFFSET:
return "OFFSET";
case BufferType::TYPE:
return "TYPE";
case BufferType::VALIDITY:
return "VALIDITY";
default:
break;
}
return "UNKNOWN";
}

static std::string GetFloatingPrecisionName(FloatingPoint::Precision precision) {
switch (precision) {
case FloatingPoint::HALF:
Expand Down Expand Up @@ -174,12 +158,9 @@ class SchemaWriter {
RETURN_NOT_OK(WriteDictionaryMetadata(dict_type));

const DataType& dictionary_type = *dict_type.dictionary()->type();
const DataType& index_type = *dict_type.index_type();
RETURN_NOT_OK(WriteChildren(dictionary_type.children()));
WriteBufferLayout(index_type.GetBufferLayout());
} else {
RETURN_NOT_OK(WriteChildren(type.children()));
WriteBufferLayout(type.GetBufferLayout());
}

writer_->EndObject();
Expand Down Expand Up @@ -301,26 +282,6 @@ class SchemaWriter {
return Status::OK();
}

void WriteBufferLayout(const std::vector<BufferDescr>& buffer_layout) {
writer_->Key("typeLayout");
writer_->StartObject();
writer_->Key("vectors");
writer_->StartArray();

for (const BufferDescr& buffer : buffer_layout) {
writer_->StartObject();
writer_->Key("type");
writer_->String(GetBufferTypeName(buffer.type()));

writer_->Key("typeBitWidth");
writer_->Int(buffer.bit_width());

writer_->EndObject();
}
writer_->EndArray();
writer_->EndObject();
}

Status WriteChildren(const std::vector<std::shared_ptr<Field>>& children) {
writer_->Key("children");
writer_->StartArray();
Expand Down
34 changes: 2 additions & 32 deletions cpp/src/arrow/ipc/metadata-internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ using DictionaryOffset = flatbuffers::Offset<flatbuf::DictionaryEncoding>;
using FieldOffset = flatbuffers::Offset<flatbuf::Field>;
using KeyValueOffset = flatbuffers::Offset<flatbuf::KeyValue>;
using RecordBatchOffset = flatbuffers::Offset<flatbuf::RecordBatch>;
using VectorLayoutOffset = flatbuffers::Offset<arrow::flatbuf::VectorLayout>;
using Offset = flatbuffers::Offset<void>;
using FBString = flatbuffers::Offset<flatbuffers::String>;

Expand Down Expand Up @@ -341,34 +340,8 @@ static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
// TODO(wesm): Convert this to visitor pattern
static Status TypeToFlatbuffer(FBB& fbb, const DataType& type,
std::vector<FieldOffset>* children,
std::vector<VectorLayoutOffset>* layout,
flatbuf::Type* out_type, DictionaryMemo* dictionary_memo,
Offset* offset) {
std::vector<BufferDescr> buffer_layout = type.GetBufferLayout();
for (const BufferDescr& descr : buffer_layout) {
flatbuf::VectorType vector_type;
switch (descr.type()) {
case BufferType::OFFSET:
vector_type = flatbuf::VectorType_OFFSET;
break;
case BufferType::DATA:
vector_type = flatbuf::VectorType_DATA;
break;
case BufferType::VALIDITY:
vector_type = flatbuf::VectorType_VALIDITY;
break;
case BufferType::TYPE:
vector_type = flatbuf::VectorType_TYPE;
break;
default:
vector_type = flatbuf::VectorType_DATA;
break;
}
auto offset = flatbuf::CreateVectorLayout(
fbb, static_cast<int16_t>(descr.bit_width()), vector_type);
layout->push_back(offset);
}

const DataType* value_type = &type;

if (type.id() == Type::DICTIONARY) {
Expand Down Expand Up @@ -543,14 +516,11 @@ static Status FieldToFlatbuffer(FBB& fbb, const Field& field,

flatbuf::Type type_enum;
Offset type_offset;
Offset type_layout;
std::vector<FieldOffset> children;
std::vector<VectorLayoutOffset> layout;

RETURN_NOT_OK(TypeToFlatbuffer(fbb, *field.type(), &children, &layout, &type_enum,
RETURN_NOT_OK(TypeToFlatbuffer(fbb, *field.type(), &children, &type_enum,
dictionary_memo, &type_offset));
auto fb_children = fbb.CreateVector(children);
auto fb_layout = fbb.CreateVector(layout);

DictionaryOffset dictionary = 0;
if (field.type()->id() == Type::DICTIONARY) {
Expand All @@ -560,7 +530,7 @@ static Status FieldToFlatbuffer(FBB& fbb, const Field& field,

// TODO: produce the list of VectorTypes
*offset = flatbuf::CreateField(fbb, fb_name, field.nullable(), type_enum, type_offset,
dictionary, fb_children, fb_layout);
dictionary, fb_children);

return Status::OK();
}
Expand Down
37 changes: 0 additions & 37 deletions cpp/src/arrow/type.cc
Original file line number Diff line number Diff line change
Expand Up @@ -474,43 +474,6 @@ std::shared_ptr<DataType> decimal(int32_t precision, int32_t scale) {
return std::make_shared<Decimal128Type>(precision, scale);
}

static const BufferDescr kValidityBuffer(BufferType::VALIDITY, 1);
static const BufferDescr kOffsetBuffer(BufferType::OFFSET, 32);
static const BufferDescr kTypeBuffer(BufferType::TYPE, 32);
static const BufferDescr kBooleanBuffer(BufferType::DATA, 1);
static const BufferDescr kValues64(BufferType::DATA, 64);
static const BufferDescr kValues32(BufferType::DATA, 32);
static const BufferDescr kValues16(BufferType::DATA, 16);
static const BufferDescr kValues8(BufferType::DATA, 8);

std::vector<BufferDescr> FixedWidthType::GetBufferLayout() const {
return {kValidityBuffer, BufferDescr(BufferType::DATA, bit_width())};
}

std::vector<BufferDescr> NullType::GetBufferLayout() const { return {}; }

std::vector<BufferDescr> BinaryType::GetBufferLayout() const {
return {kValidityBuffer, kOffsetBuffer, kValues8};
}

std::vector<BufferDescr> FixedSizeBinaryType::GetBufferLayout() const {
return {kValidityBuffer, BufferDescr(BufferType::DATA, bit_width())};
}

std::vector<BufferDescr> ListType::GetBufferLayout() const {
return {kValidityBuffer, kOffsetBuffer};
}

std::vector<BufferDescr> StructType::GetBufferLayout() const { return {kValidityBuffer}; }

std::vector<BufferDescr> UnionType::GetBufferLayout() const {
if (mode_ == UnionMode::SPARSE) {
return {kValidityBuffer, kTypeBuffer};
} else {
return {kValidityBuffer, kTypeBuffer, kOffsetBuffer};
}
}

std::string Decimal128Type::ToString() const {
std::stringstream s;
s << "decimal(" << precision_ << ", " << scale_ << ")";
Expand Down
30 changes: 0 additions & 30 deletions cpp/src/arrow/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,20 +133,6 @@ struct Type {
};
};

enum class BufferType : char { DATA, OFFSET, TYPE, VALIDITY };

class BufferDescr {
public:
BufferDescr(BufferType type, int bit_width) : type_(type), bit_width_(bit_width) {}

BufferType type() const { return type_; }
int bit_width() const { return bit_width_; }

private:
BufferType type_;
int bit_width_;
};

class ARROW_EXPORT DataType {
public:
explicit DataType(Type::type id) : id_(id) {}
Expand Down Expand Up @@ -176,8 +162,6 @@ class ARROW_EXPORT DataType {
/// \since 0.7.0
virtual std::string name() const = 0;

virtual std::vector<BufferDescr> GetBufferLayout() const = 0;

Type::type id() const { return id_; }

protected:
Expand All @@ -201,8 +185,6 @@ class ARROW_EXPORT FixedWidthType : public DataType {
using DataType::DataType;

virtual int bit_width() const = 0;

std::vector<BufferDescr> GetBufferLayout() const override;
};

class ARROW_EXPORT PrimitiveCType : public FixedWidthType {
Expand Down Expand Up @@ -319,8 +301,6 @@ class ARROW_EXPORT NullType : public DataType, public NoExtraMeta {
std::string ToString() const override;

std::string name() const override { return "null"; }

std::vector<BufferDescr> GetBufferLayout() const override;
};

class ARROW_EXPORT BooleanType : public FixedWidthType, public NoExtraMeta {
Expand Down Expand Up @@ -425,8 +405,6 @@ class ARROW_EXPORT ListType : public NestedType {
std::string ToString() const override;

std::string name() const override { return "list"; }

std::vector<BufferDescr> GetBufferLayout() const override;
};

// BinaryType type is represents lists of 1-byte values.
Expand All @@ -440,8 +418,6 @@ class ARROW_EXPORT BinaryType : public DataType, public NoExtraMeta {
std::string ToString() const override;
std::string name() const override { return "binary"; }

std::vector<BufferDescr> GetBufferLayout() const override;

protected:
// Allow subclasses to change the logical type.
explicit BinaryType(Type::type logical_type) : DataType(logical_type) {}
Expand All @@ -461,8 +437,6 @@ class ARROW_EXPORT FixedSizeBinaryType : public FixedWidthType, public Parametri
std::string ToString() const override;
std::string name() const override { return "fixed_size_binary"; }

std::vector<BufferDescr> GetBufferLayout() const override;

int32_t byte_width() const { return byte_width_; }
int bit_width() const override;

Expand Down Expand Up @@ -494,8 +468,6 @@ class ARROW_EXPORT StructType : public NestedType {
Status Accept(TypeVisitor* visitor) const override;
std::string ToString() const override;
std::string name() const override { return "struct"; }

std::vector<BufferDescr> GetBufferLayout() const override;
};

class ARROW_EXPORT DecimalType : public FixedSizeBinaryType {
Expand Down Expand Up @@ -541,8 +513,6 @@ class ARROW_EXPORT UnionType : public NestedType {
std::string name() const override { return "union"; }
Status Accept(TypeVisitor* visitor) const override;

std::vector<BufferDescr> GetBufferLayout() const override;

const std::vector<uint8_t>& type_codes() const { return type_codes_; }

UnionMode::type mode() const { return mode_; }
Expand Down
31 changes: 1 addition & 30 deletions format/Schema.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -211,32 +211,6 @@ union Type {
Map
}

/// ----------------------------------------------------------------------
/// The possible types of a vector

enum VectorType: short {
/// used in List type, Dense Union and variable length primitive types (String, Binary)
OFFSET,
/// actual data, either wixed width primitive types in slots or variable width delimited by an OFFSET vector
DATA,
/// Bit vector indicating if each value is null
VALIDITY,
/// Type vector used in Union type
TYPE
}

/// ----------------------------------------------------------------------
/// represents the physical layout of a buffer
/// buffers have fixed width slots of a given type

table VectorLayout {
/// the width of a slot in the buffer (typically 1, 8, 16, 32 or 64)
bit_width: short;
/// the purpose of the vector
type: VectorType;
}


/// ----------------------------------------------------------------------
/// user defined key value pairs to add custom metadata to arrow
/// key namespacing is the responsibility of the user
Expand Down Expand Up @@ -285,10 +259,7 @@ table Field {

// children apply only to Nested data types like Struct, List and Union
children: [Field];
/// layout of buffers produced for this type (as derived from the Type)
/// does not include children
/// each recordbatch will return instances of those Buffers.
layout: [ VectorLayout ];

// User-defined metadata
custom_metadata: [ KeyValue ];
}
Expand Down
28 changes: 6 additions & 22 deletions integration/data/simple.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,20 @@
{
"name": "foo",
"type": {"name": "int", "isSigned": true, "bitWidth": 32},
"nullable": true, "children": [],
"typeLayout": {
"vectors": [
{"type": "VALIDITY", "typeBitWidth": 1},
{"type": "DATA", "typeBitWidth": 32}
]
}
"nullable": true,
"children": []
},
{
"name": "bar",
"type": {"name": "floatingpoint", "precision": "DOUBLE"},
"nullable": true, "children": [],
"typeLayout": {
"vectors": [
{"type": "VALIDITY", "typeBitWidth": 1},
{"type": "DATA", "typeBitWidth": 64}
]
}
"nullable": true,
"children": []
},
{
"name": "baz",
"type": {"name": "utf8"},
"nullable": true, "children": [],
"typeLayout": {
"vectors": [
{"type": "VALIDITY", "typeBitWidth": 1},
{"type": "OFFSET", "typeBitWidth": 32},
{"type": "DATA", "typeBitWidth": 8}
]
}
"nullable": true,
"children": []
}
]
},
Expand Down
Loading