-
Notifications
You must be signed in to change notification settings - Fork 4k
ARROW-17798: [C++][Parquet] Add DELTA_BINARY_PACKED encoder to Parquet writer #14191
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ed65e98
4b6c562
327e014
43fac72
1fd8212
c3e5179
cc1d74e
50e0b6f
4e11f41
8064b7f
b29ab91
09b8a32
a8c3d28
66b04ff
a7f1cea
068ea81
87eb662
ed68e54
586c324
c25ae52
62ae225
90999e1
e1000af
702b7a8
d46251c
01271e7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -173,6 +173,40 @@ TEST(BitArray, TestMixed) { | |||||
| } | ||||||
| } | ||||||
|
|
||||||
| // Write up to 'num_vals' values with width 'bit_width' and reads them back. | ||||||
| static void TestPutValue(int bit_width, uint64_t num_vals) { | ||||||
| // The max value representable in `bit_width` bits. | ||||||
| const uint64_t max = std::numeric_limits<uint64_t>::max() >> (64 - bit_width); | ||||||
| num_vals = std::min(num_vals, max); | ||||||
| int len = static_cast<int>(bit_util::BytesForBits(bit_width * num_vals)); | ||||||
| EXPECT_GT(len, 0); | ||||||
|
|
||||||
| std::vector<uint8_t> buffer(len); | ||||||
| bit_util::BitWriter writer(buffer.data(), len); | ||||||
| for (uint64_t i = max - num_vals; i < max; i++) { | ||||||
| bool result = writer.PutValue(i, bit_width); | ||||||
| EXPECT_TRUE(result); | ||||||
| } | ||||||
| writer.Flush(); | ||||||
| EXPECT_EQ(writer.bytes_written(), len); | ||||||
|
|
||||||
| bit_util::BitReader reader(buffer.data(), len); | ||||||
| for (uint64_t i = max - num_vals; i < max; i++) { | ||||||
| int64_t val = 0; | ||||||
| bool result = reader.GetValue(bit_width, &val); | ||||||
| EXPECT_TRUE(result); | ||||||
| EXPECT_EQ(val, i); | ||||||
| } | ||||||
| EXPECT_EQ(reader.bytes_left(), 0); | ||||||
| } | ||||||
|
|
||||||
| TEST(BitUtil, RoundTripIntValues) { | ||||||
| for (int width = 1; width < 64; width++) { | ||||||
|
||||||
| for (int width = 1; width < 64; width++) { | |
| for (int width = 1; width <= 64; width++) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Reverted this because it causes an overflow to 0 and testing for bit_width == 0 doesn't make sense I think.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What do you mean? Testing that we are able to write 64-bit values makes sense.
Perhaps you want something like:
// The max value representable in `bit_width` bits.
const uint64_t max = std::numeric_limits<uint64_t>::max() >> (64 - bit_width);There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changed to your proposal. It runs ok locally.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What do you mean? Testing that we are able to write 64-bit values makes sense.
What I meant was that 2 << 63 was overflowing, setting max and hence num_vals to 0. That then tripped the EXPECT_GT(len, 0); test. This is not an issue now I expect.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -400,7 +400,8 @@ typedef ::testing::Types<Int32Type, Int64Type, Int96Type, FloatType, DoubleType, | |
|
|
||
| TYPED_TEST_SUITE(TestPrimitiveWriter, TestTypes); | ||
|
|
||
| using TestNullValuesWriter = TestPrimitiveWriter<Int32Type>; | ||
| using TestValuesWriterInt32Type = TestPrimitiveWriter<Int32Type>; | ||
| using TestValuesWriterInt64Type = TestPrimitiveWriter<Int64Type>; | ||
|
|
||
| TYPED_TEST(TestPrimitiveWriter, RequiredPlain) { | ||
| this->TestRequiredWithEncoding(Encoding::PLAIN); | ||
|
|
@@ -418,23 +419,29 @@ TYPED_TEST(TestPrimitiveWriter, RequiredRLE) { | |
| TYPED_TEST(TestPrimitiveWriter, RequiredBitPacked) { | ||
| this->TestRequiredWithEncoding(Encoding::BIT_PACKED); | ||
| } | ||
| */ | ||
|
|
||
| TEST_F(TestValuesWriterInt32Type, RequiredDeltaBinaryPacked) { | ||
| this->TestRequiredWithEncoding(Encoding::DELTA_BINARY_PACKED); | ||
| } | ||
|
|
||
| TYPED_TEST(TestPrimitiveWriter, RequiredDeltaBinaryPacked) { | ||
| TEST_F(TestValuesWriterInt64Type, RequiredDeltaBinaryPacked) { | ||
| this->TestRequiredWithEncoding(Encoding::DELTA_BINARY_PACKED); | ||
| } | ||
|
||
|
|
||
| /* | ||
| TYPED_TEST(TestPrimitiveWriter, RequiredDeltaLengthByteArray) { | ||
| this->TestRequiredWithEncoding(Encoding::DELTA_LENGTH_BYTE_ARRAY); | ||
| } | ||
|
|
||
| TYPED_TEST(TestPrimitiveWriter, RequiredDeltaByteArray) { | ||
| this->TestRequiredWithEncoding(Encoding::DELTA_BYTE_ARRAY); | ||
| } | ||
| */ | ||
|
|
||
| TYPED_TEST(TestPrimitiveWriter, RequiredRLEDictionary) { | ||
| this->TestRequiredWithEncoding(Encoding::RLE_DICTIONARY); | ||
| } | ||
| */ | ||
|
|
||
| TYPED_TEST(TestPrimitiveWriter, RequiredPlainWithStats) { | ||
| this->TestRequiredWithSettings(Encoding::PLAIN, Compression::UNCOMPRESSED, false, true, | ||
|
|
@@ -647,7 +654,7 @@ TEST(TestWriter, NullValuesBuffer) { | |
|
|
||
| // PARQUET-719 | ||
| // Test case for NULL values | ||
| TEST_F(TestNullValuesWriter, OptionalNullValueChunk) { | ||
| TEST_F(TestValuesWriterInt32Type, OptionalNullValueChunk) { | ||
| this->SetUpSchema(Repetition::OPTIONAL); | ||
|
|
||
| this->GenerateData(LARGE_SIZE); | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.