Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion velox/dwio/parquet/reader/PageReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ void PageReader::prepareDataPageV2(const PageHeader& pageHeader, int64_t row) {
}

encodedDataSize_ = pageHeader.uncompressed_page_size - levelsSize;
numNulls_ = pageHeader.data_page_header_v2.num_nulls;
encoding_ = pageHeader.data_page_header_v2.encoding;
if (numRowsInPage_ == kRowsUnknown) {
readPageDefLevels();
Expand Down Expand Up @@ -543,8 +544,10 @@ void PageReader::preloadRepDefs() {
auto begin = definitionLevels_.size();
auto numLevels = definitionLevels_.size() + numRepDefsInPage_;
definitionLevels_.resize(numLevels);
wideDefineDecoder_->GetBatch(
if (wideDefineDecoder_) {
wideDefineDecoder_->GetBatch(
definitionLevels_.data() + begin, numRepDefsInPage_);
}
if (repeatDecoder_) {
repetitionLevels_.resize(numLevels);

Expand Down Expand Up @@ -699,6 +702,15 @@ void PageReader::makeDecoder() {
"DELTA_BINARY_PACKED decoder only supports INT32 and INT64");
}
break;
case Encoding::RLE:
switch (parquetType) {
case thrift::Type::BOOLEAN:
rleBooleanDecoder_ = std::make_unique<RleBooleanDecoder>(pageData_, pageData_ + encodedDataSize_, decompressedData_, repetitionLevels_.data(), encodedDataSize_, numNulls_);
break;
default:
VELOX_UNSUPPORTED("RLE decoder only supports boolean");
}
break;
default:
VELOX_UNSUPPORTED("Encoding not supported yet: {}", encoding_);
}
Expand Down Expand Up @@ -739,6 +751,8 @@ void PageReader::skip(int64_t numRows) {
booleanDecoder_->skip(toSkip);
} else if (deltaBpDecoder_) {
deltaBpDecoder_->skip(toSkip);
} else if (rleBooleanDecoder_) {
rleBooleanDecoder_->skip(toSkip);
} else {
VELOX_FAIL("No decoder to skip");
}
Expand Down
22 changes: 20 additions & 2 deletions velox/dwio/parquet/reader/PageReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "velox/dwio/parquet/reader/BooleanDecoder.h"
#include "velox/dwio/parquet/reader/DeltaBpDecoder.h"
#include "velox/dwio/parquet/reader/ParquetTypeWithId.h"
#include "velox/dwio/parquet/reader/RleBooleanDecoder.h"
#include "velox/dwio/parquet/reader/RleBpDataDecoder.h"
#include "velox/dwio/parquet/reader/StringDecoder.h"

Expand Down Expand Up @@ -329,9 +330,23 @@ class PageReader {
VELOX_CHECK(!isDictionary(), "BOOLEAN types are never dictionary-encoded");
if (nulls) {
nullsFromFastPath = false;
booleanDecoder_->readWithVisitor<true>(nulls, visitor);
switch (encoding_)
{
case thrift::Encoding::RLE:
rleBooleanDecoder_->readWithVisitor<true>(nulls, visitor);
break;
default:
booleanDecoder_->readWithVisitor<true>(nulls, visitor);
}
} else {
booleanDecoder_->readWithVisitor<false>(nulls, visitor);
switch (encoding_)
{
case thrift::Encoding::RLE:
rleBooleanDecoder_->readWithVisitor<false>(nulls, visitor);
break;
default:
booleanDecoder_->readWithVisitor<false>(nulls, visitor);
}
}
}

Expand Down Expand Up @@ -439,6 +454,8 @@ class PageReader {
// Number of bytes starting at pageData_ for current encoded data.
int32_t encodedDataSize_{0};

int32_t numNulls_{0};

// Below members Keep state between calls to readWithVisitor().

// Original rows in Visitor.
Expand Down Expand Up @@ -489,6 +506,7 @@ class PageReader {
std::unique_ptr<StringDecoder> stringDecoder_;
std::unique_ptr<BooleanDecoder> booleanDecoder_;
std::unique_ptr<DeltaBpDecoder> deltaBpDecoder_;
std::unique_ptr<RleBooleanDecoder> rleBooleanDecoder_;
// Add decoders for other encodings here.
};

Expand Down
Loading