diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index ecc48811e46..a49e58afbdb 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -30,6 +30,7 @@ #include #include "arrow/array.h" +#include "arrow/array/array_binary.h" #include "arrow/array/builder_binary.h" #include "arrow/array/builder_dict.h" #include "arrow/array/builder_primitive.h" @@ -2040,23 +2041,29 @@ class TypedRecordReader : public TypedColumnReaderImpl, LevelInfo leaf_info_; }; -class FLBARecordReader : public TypedRecordReader, - virtual public BinaryRecordReader { +class FLBARecordReader final : public TypedRecordReader, + virtual public BinaryRecordReader { public: FLBARecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info, ::arrow::MemoryPool* pool, bool read_dense_for_nullable) : TypedRecordReader(descr, leaf_info, pool, read_dense_for_nullable), - builder_(nullptr) { + byte_width_(descr_->type_length()), + empty_(byte_width_, 0), + type_(::arrow::fixed_size_binary(byte_width_)), + null_bitmap_builder_(pool), + data_builder_(pool) { ARROW_DCHECK_EQ(descr_->physical_type(), Type::FIXED_LEN_BYTE_ARRAY); - int byte_width = descr_->type_length(); - std::shared_ptr<::arrow::DataType> type = ::arrow::fixed_size_binary(byte_width); - builder_ = std::make_unique<::arrow::FixedSizeBinaryBuilder>(type, this->pool_); } ::arrow::ArrayVector GetBuilderChunks() override { - std::shared_ptr<::arrow::Array> chunk; - PARQUET_THROW_NOT_OK(builder_->Finish(&chunk)); - return ::arrow::ArrayVector({chunk}); + const int64_t null_count = null_bitmap_builder_.false_count(); + const int64_t length = null_bitmap_builder_.length(); + ARROW_DCHECK_EQ(length * byte_width_, data_builder_.length()); + PARQUET_ASSIGN_OR_THROW(auto data_buffer, data_builder_.Finish()); + PARQUET_ASSIGN_OR_THROW(auto null_bitmap, null_bitmap_builder_.Finish()); + auto chunk = std::make_shared<::arrow::FixedSizeBinaryArray>( + type_, length, data_buffer, null_bitmap, null_count); + return ::arrow::ArrayVector({std::move(chunk)}); } void ReadValuesDense(int64_t values_to_read) override { @@ -2065,9 +2072,9 @@ class FLBARecordReader : public TypedRecordReader, this->current_decoder_->Decode(values, static_cast(values_to_read)); CheckNumberDecoded(num_decoded, values_to_read); - for (int64_t i = 0; i < num_decoded; i++) { - PARQUET_THROW_NOT_OK(builder_->Append(values[i].ptr)); - } + PARQUET_THROW_NOT_OK(null_bitmap_builder_.Reserve(num_decoded)); + PARQUET_THROW_NOT_OK(data_builder_.Reserve(num_decoded * byte_width_)); + UnsafeAppendDense(values, num_decoded); ResetValues(); } @@ -2081,22 +2088,45 @@ class FLBARecordReader : public TypedRecordReader, valid_bits, valid_bits_offset); ARROW_DCHECK_EQ(num_decoded, values_to_read); + PARQUET_THROW_NOT_OK(null_bitmap_builder_.Reserve(num_decoded)); + PARQUET_THROW_NOT_OK(data_builder_.Reserve(num_decoded * byte_width_)); + if (null_count == 0) { + UnsafeAppendDense(values, num_decoded); + } else { + UnsafeAppendSpaced(values, num_decoded, valid_bits, valid_bits_offset); + } + ResetValues(); + } + + void UnsafeAppendDense(const FLBA* values, int64_t num_decoded) { + null_bitmap_builder_.UnsafeAppend(num_decoded, /*value=*/true); + for (int64_t i = 0; i < num_decoded; i++) { + data_builder_.UnsafeAppend(values[i].ptr, byte_width_); + } + } + + void UnsafeAppendSpaced(const FLBA* values, int64_t num_decoded, + const uint8_t* valid_bits, int64_t valid_bits_offset) { + null_bitmap_builder_.UnsafeAppend(valid_bits, valid_bits_offset, num_decoded); for (int64_t i = 0; i < num_decoded; i++) { if (::arrow::bit_util::GetBit(valid_bits, valid_bits_offset + i)) { - PARQUET_THROW_NOT_OK(builder_->Append(values[i].ptr)); + data_builder_.UnsafeAppend(values[i].ptr, byte_width_); } else { - PARQUET_THROW_NOT_OK(builder_->AppendNull()); + data_builder_.UnsafeAppend(empty_.data(), byte_width_); } } - ResetValues(); } private: - std::unique_ptr<::arrow::FixedSizeBinaryBuilder> builder_; + const int byte_width_; + const std::vector empty_; + std::shared_ptr<::arrow::DataType> type_; + ::arrow::TypedBufferBuilder null_bitmap_builder_; + ::arrow::BufferBuilder data_builder_; }; -class ByteArrayChunkedRecordReader : public TypedRecordReader, - virtual public BinaryRecordReader { +class ByteArrayChunkedRecordReader final : public TypedRecordReader, + virtual public BinaryRecordReader { public: ByteArrayChunkedRecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info, ::arrow::MemoryPool* pool, bool read_dense_for_nullable) @@ -2137,8 +2167,8 @@ class ByteArrayChunkedRecordReader : public TypedRecordReader, typename EncodingTraits::Accumulator accumulator_; }; -class ByteArrayDictionaryRecordReader : public TypedRecordReader, - virtual public DictionaryRecordReader { +class ByteArrayDictionaryRecordReader final : public TypedRecordReader, + virtual public DictionaryRecordReader { public: ByteArrayDictionaryRecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info, ::arrow::MemoryPool* pool, bool read_dense_for_nullable)