From 5a8c9e5f9f8e2f4bf710499ddb73e250aaf2079e Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 17 Nov 2021 15:19:59 +0100 Subject: [PATCH] ARROW-14704: [C++] Fix Valgrind failure in parquet-arrow-test Error log of Valgrind failure: ``` [----------] 3 tests from TestArrowReadDeltaEncoding [ RUN ] TestArrowReadDeltaEncoding.DeltaBinaryPacked [ OK ] TestArrowReadDeltaEncoding.DeltaBinaryPacked (812 ms) [ RUN ] TestArrowReadDeltaEncoding.DeltaByteArray ==12587== Conditional jump or move depends on uninitialised value(s) ==12587== at 0x4F12C57: Advance (bit_stream_utils.h:426) ==12587== by 0x4F12C57: parquet::(anonymous namespace)::DeltaBitPackDecoder >::GetInternal(int*, int) (encoding.cc:2216) ==12587== by 0x4F13823: Decode (encoding.cc:2091) ==12587== by 0x4F13823: parquet::(anonymous namespace)::DeltaByteArrayDecoder::SetData(int, unsigned char const*, int) (encoding.cc:2360) ==12587== by 0x4E89EF5: parquet::(anonymous namespace)::ColumnReaderImplBase >::InitializeDataDecoder(parquet::DataPage const&, long) (column_reader.cc:797) ==12587== by 0x4E9AE63: ReadNewPage (column_reader.cc:614) ==12587== by 0x4E9AE63: HasNextInternal (column_reader.cc:576) ==12587== by 0x4E9AE63: parquet::internal::(anonymous namespace)::TypedRecordReader >::ReadRecords(long) (column_reader.cc:1228) ==12587== by 0x4DFB19F: parquet::arrow::(anonymous namespace)::LeafReader::LoadBatch(long) (reader.cc:467) ==12587== by 0x4DF513C: parquet::arrow::ColumnReaderImpl::NextBatch(long, std::shared_ptr*) (reader.cc:108) ==12587== by 0x4DFB74D: parquet::arrow::(anonymous namespace)::FileReaderImpl::ReadColumn(int, std::vector > const&, parquet::arrow::ColumnReader*, std::shared_ptr*) (reader.cc:273) ==12587== by 0x4E11FDA: operator() (reader.cc:1180) ==12587== by 0x4E11FDA: arrow::Future, std::allocator > > arrow::internal::OptionalParallelForAsync, std::vector > const&, std::vector > const&, arrow::internal::Executor*)::{lambda(unsigned long, std::shared_ptr)#1}&, std::shared_ptr, std::shared_ptr >(bool, std::vector, std::allocator, std::allocator > > > >, parquet::arrow::(anonymous namespace)::FileReaderImpl::DecodeRowGroups(std::shared_ptr, std::vector > const&, std::vector > const&, arrow::internal::Executor*)::{lambda(unsigned long, std::shared_ptr)#1}&, arrow::internal::Executor*) (parallel.h:95) ==12587== by 0x4E126A9: parquet::arrow::(anonymous namespace)::FileReaderImpl::DecodeRowGroups(std::shared_ptr, std::vector > const&, std::vector > const&, arrow::internal::Executor*) (reader.cc:1198) ==12587== by 0x4E12F50: parquet::arrow::(anonymous namespace)::FileReaderImpl::ReadRowGroups(std::vector > const&, std::vector > const&, std::shared_ptr*) (reader.cc:1160) ==12587== by 0x4DFA2BC: parquet::arrow::(anonymous namespace)::FileReaderImpl::ReadTable(std::vector > const&, std::shared_ptr*) (reader.cc:198) ==12587== by 0x4DFA392: parquet::arrow::(anonymous namespace)::FileReaderImpl::ReadTable(std::shared_ptr*) (reader.cc:289) ==12587== by 0x1DCE62: parquet::arrow::TestArrowReadDeltaEncoding::ReadTableFromParquetFile(std::__cxx11::basic_string, std::allocator > const&, std::shared_ptr*) (arrow_reader_writer_test.cc:4174) ==12587== by 0x2266D2: parquet::arrow::TestArrowReadDeltaEncoding_DeltaByteArray_Test::TestBody() (arrow_reader_writer_test.cc:4209) ==12587== by 0x4AD2C9B: void testing::internal::HandleSehExceptionsInMethodIfSupported(testing::Test*, void (testing::Test::*)(), char const*) (gtest.cc:2607) ==12587== by 0x4AC9DD1: void testing::internal::HandleExceptionsInMethodIfSupported(testing::Test*, void (testing::Test::*)(), char const*) (gtest.cc:2643) ==12587== by 0x4AA4C02: testing::Test::Run() (gtest.cc:2682) ==12587== by 0x4AA563A: testing::TestInfo::Run() (gtest.cc:2861) ==12587== by 0x4AA600F: testing::TestSuite::Run() (gtest.cc:3015) ==12587== by 0x4AB631B: testing::internal::UnitTestImpl::RunAllTests() (gtest.cc:5855) ==12587== by 0x4AD3CE7: bool testing::internal::HandleSehExceptionsInMethodIfSupported(testing::internal::UnitTestImpl*, bool (testing::internal::UnitTestImpl::*)(), char const*) (gtest.cc:2607) ==12587== by 0x4ACB063: bool testing::internal::HandleExceptionsInMethodIfSupported(testing::internal::UnitTestImpl*, bool (testing::internal::UnitTestImpl::*)(), char const*) (gtest.cc:2643) ==12587== by 0x4AB47B6: testing::UnitTest::Run() (gtest.cc:5438) ==12587== by 0x4218918: RUN_ALL_TESTS() (gtest.h:2490) ==12587== by 0x421895B: main (gtest_main.cc:52) ``` --- cpp/src/parquet/encoding.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc index 4c980dfe938..783e8680e8e 100644 --- a/cpp/src/parquet/encoding.cc +++ b/cpp/src/parquet/encoding.cc @@ -2175,6 +2175,10 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecodernum_values_); + if (max_values == 0) { + return 0; + } + DCHECK_LE(static_cast(max_values), total_value_count_); int i = 0; while (i < max_values) { @@ -2272,6 +2276,9 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl, // Decode up to `max_values` strings into an internal buffer // and reference them into `buffer`. max_values = std::min(max_values, num_valid_values_); + if (max_values == 0) { + return 0; + } int32_t data_size = 0; const int32_t* length_ptr = @@ -2406,6 +2413,10 @@ class DeltaByteArrayDecoder : public DecoderImpl, // Decode up to `max_values` strings into an internal buffer // and reference them into `buffer`. max_values = std::min(max_values, num_valid_values_); + if (max_values == 0) { + return max_values; + } + suffix_decoder_.Decode(buffer, max_values); int64_t data_size = 0;