diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc index 4923870e9e6..9761dfd3013 100644 --- a/cpp/src/parquet/encoding.cc +++ b/cpp/src/parquet/encoding.cc @@ -2186,7 +2186,7 @@ void DeltaBitPackEncoder::Put(const T* src, int num_values) { while (idx < num_values) { UT value = static_cast(src[idx]); // Calculate deltas. The possible overflow is handled by use of unsigned integers - // making subtraction operations well defined and correct even in case of overflow. + // making subtraction operations well-defined and correct even in case of overflow. // Encoded integers will wrap back around on decoding. // See http://en.wikipedia.org/wiki/Modular_arithmetic#Integers_modulo_n deltas_[values_current_block_] = value - current_value_; @@ -2282,6 +2282,11 @@ std::shared_ptr DeltaBitPackEncoder::FlushValues() { std::memcpy(buffer->mutable_data() + offset_bytes, header_buffer_, header_writer.bytes_written()); + // Reset counter of cached values + total_value_count_ = 0; + // Reserve enough space at the beginning of the buffer for largest possible header. + PARQUET_THROW_NOT_OK(sink_.Advance(kMaxPageHeaderWriterSize)); + // Excess bytes at the beginning are sliced off and ignored. return SliceBuffer(buffer, offset_bytes); } diff --git a/cpp/src/parquet/encoding_test.cc b/cpp/src/parquet/encoding_test.cc index f0a5f32c413..3b4cafab829 100644 --- a/cpp/src/parquet/encoding_test.cc +++ b/cpp/src/parquet/encoding_test.cc @@ -1290,6 +1290,7 @@ class TestDeltaBitPackEncoding : public TestEncodingBase { public: using c_type = typename Type::c_type; static constexpr int TYPE = Type::type_num; + static constexpr size_t ROUND_TRIP_TIMES = 3; void InitBoundData(int nvalues, int repeats, c_type half_range) { num_values_ = nvalues * repeats; @@ -1328,14 +1329,16 @@ class TestDeltaBitPackEncoding : public TestEncodingBase { MakeTypedEncoder(Encoding::DELTA_BINARY_PACKED, false, descr_.get()); auto decoder = MakeTypedDecoder(Encoding::DELTA_BINARY_PACKED, descr_.get()); - encoder->Put(draws_, num_values_); - encode_buffer_ = encoder->FlushValues(); + for (size_t i = 0; i < ROUND_TRIP_TIMES; ++i) { + encoder->Put(draws_, num_values_); + encode_buffer_ = encoder->FlushValues(); - decoder->SetData(num_values_, encode_buffer_->data(), - static_cast(encode_buffer_->size())); - int values_decoded = decoder->Decode(decode_buf_, num_values_); - ASSERT_EQ(num_values_, values_decoded); - ASSERT_NO_FATAL_FAILURE(VerifyResults(decode_buf_, draws_, num_values_)); + decoder->SetData(num_values_, encode_buffer_->data(), + static_cast(encode_buffer_->size())); + int values_decoded = decoder->Decode(decode_buf_, num_values_); + ASSERT_EQ(num_values_, values_decoded); + ASSERT_NO_FATAL_FAILURE(VerifyResults(decode_buf_, draws_, num_values_)); + } } void CheckRoundtripSpaced(const uint8_t* valid_bits, @@ -1350,15 +1353,17 @@ class TestDeltaBitPackEncoding : public TestEncodingBase { } } - encoder->PutSpaced(draws_, num_values_, valid_bits, valid_bits_offset); - encode_buffer_ = encoder->FlushValues(); - decoder->SetData(num_values_ - null_count, encode_buffer_->data(), - static_cast(encode_buffer_->size())); - auto values_decoded = decoder->DecodeSpaced(decode_buf_, num_values_, null_count, - valid_bits, valid_bits_offset); - ASSERT_EQ(num_values_, values_decoded); - ASSERT_NO_FATAL_FAILURE(VerifyResultsSpaced(decode_buf_, draws_, num_values_, - valid_bits, valid_bits_offset)); + for (size_t i = 0; i < ROUND_TRIP_TIMES; ++i) { + encoder->PutSpaced(draws_, num_values_, valid_bits, valid_bits_offset); + encode_buffer_ = encoder->FlushValues(); + decoder->SetData(num_values_ - null_count, encode_buffer_->data(), + static_cast(encode_buffer_->size())); + auto values_decoded = decoder->DecodeSpaced(decode_buf_, num_values_, null_count, + valid_bits, valid_bits_offset); + ASSERT_EQ(num_values_, values_decoded); + ASSERT_NO_FATAL_FAILURE(VerifyResultsSpaced( + decode_buf_, draws_, num_values_, valid_bits, valid_bits_offset)); + } } protected: