Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion cpp/src/parquet/encoding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2186,7 +2186,7 @@ void DeltaBitPackEncoder<DType>::Put(const T* src, int num_values) {
while (idx < num_values) {
UT value = static_cast<UT>(src[idx]);
// Calculate deltas. The possible overflow is handled by use of unsigned integers
// making subtraction operations well defined and correct even in case of overflow.
// making subtraction operations well-defined and correct even in case of overflow.
// Encoded integers will wrap back around on decoding.
// See http://en.wikipedia.org/wiki/Modular_arithmetic#Integers_modulo_n
deltas_[values_current_block_] = value - current_value_;
Expand Down Expand Up @@ -2282,6 +2282,11 @@ std::shared_ptr<Buffer> DeltaBitPackEncoder<DType>::FlushValues() {
std::memcpy(buffer->mutable_data() + offset_bytes, header_buffer_,
header_writer.bytes_written());

// Reset counter of cached values
total_value_count_ = 0;
// Reserve enough space at the beginning of the buffer for largest possible header.
PARQUET_THROW_NOT_OK(sink_.Advance(kMaxPageHeaderWriterSize));

// Excess bytes at the beginning are sliced off and ignored.
return SliceBuffer(buffer, offset_bytes);
}
Expand Down
37 changes: 21 additions & 16 deletions cpp/src/parquet/encoding_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1290,6 +1290,7 @@ class TestDeltaBitPackEncoding : public TestEncodingBase<Type> {
public:
using c_type = typename Type::c_type;
static constexpr int TYPE = Type::type_num;
static constexpr size_t ROUND_TRIP_TIMES = 3;

void InitBoundData(int nvalues, int repeats, c_type half_range) {
num_values_ = nvalues * repeats;
Expand Down Expand Up @@ -1328,14 +1329,16 @@ class TestDeltaBitPackEncoding : public TestEncodingBase<Type> {
MakeTypedEncoder<Type>(Encoding::DELTA_BINARY_PACKED, false, descr_.get());
auto decoder = MakeTypedDecoder<Type>(Encoding::DELTA_BINARY_PACKED, descr_.get());

encoder->Put(draws_, num_values_);
encode_buffer_ = encoder->FlushValues();
for (size_t i = 0; i < ROUND_TRIP_TIMES; ++i) {
encoder->Put(draws_, num_values_);
encode_buffer_ = encoder->FlushValues();

decoder->SetData(num_values_, encode_buffer_->data(),
static_cast<int>(encode_buffer_->size()));
int values_decoded = decoder->Decode(decode_buf_, num_values_);
ASSERT_EQ(num_values_, values_decoded);
ASSERT_NO_FATAL_FAILURE(VerifyResults<c_type>(decode_buf_, draws_, num_values_));
decoder->SetData(num_values_, encode_buffer_->data(),
static_cast<int>(encode_buffer_->size()));
int values_decoded = decoder->Decode(decode_buf_, num_values_);
ASSERT_EQ(num_values_, values_decoded);
ASSERT_NO_FATAL_FAILURE(VerifyResults<c_type>(decode_buf_, draws_, num_values_));
}
}

void CheckRoundtripSpaced(const uint8_t* valid_bits,
Expand All @@ -1350,15 +1353,17 @@ class TestDeltaBitPackEncoding : public TestEncodingBase<Type> {
}
}

encoder->PutSpaced(draws_, num_values_, valid_bits, valid_bits_offset);
encode_buffer_ = encoder->FlushValues();
decoder->SetData(num_values_ - null_count, encode_buffer_->data(),
static_cast<int>(encode_buffer_->size()));
auto values_decoded = decoder->DecodeSpaced(decode_buf_, num_values_, null_count,
valid_bits, valid_bits_offset);
ASSERT_EQ(num_values_, values_decoded);
ASSERT_NO_FATAL_FAILURE(VerifyResultsSpaced<c_type>(decode_buf_, draws_, num_values_,
valid_bits, valid_bits_offset));
for (size_t i = 0; i < ROUND_TRIP_TIMES; ++i) {
encoder->PutSpaced(draws_, num_values_, valid_bits, valid_bits_offset);
encode_buffer_ = encoder->FlushValues();
decoder->SetData(num_values_ - null_count, encode_buffer_->data(),
static_cast<int>(encode_buffer_->size()));
auto values_decoded = decoder->DecodeSpaced(decode_buf_, num_values_, null_count,
valid_bits, valid_bits_offset);
ASSERT_EQ(num_values_, values_decoded);
ASSERT_NO_FATAL_FAILURE(VerifyResultsSpaced<c_type>(
decode_buf_, draws_, num_values_, valid_bits, valid_bits_offset));
}
}

protected:
Expand Down