-
Notifications
You must be signed in to change notification settings - Fork 1.1k
[Parquet] Support skipping pages with mask based evaluation #9118
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
sdf-jkl
wants to merge
14
commits into
apache:main
Choose a base branch
from
sdf-jkl:bitmask-skip-page
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+119
−71
Open
Changes from all commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
321a8d3
Finding where to start
sdf-jkl 8acfb4e
Seems to work
sdf-jkl ed2a182
Fix
sdf-jkl 145ecec
Merge branch 'main' into bitmask-skip-page
sdf-jkl 5395dbf
Fix async err?
sdf-jkl df9a493
Fix complexity from O(n^2) to O(logn)
sdf-jkl 014dbc9
Merge branch 'bitmask-skip-page' of https://github.com/sdf-jkl/arrow-…
sdf-jkl 55e0126
Pass pagelocation instead offsetindexmetadata
sdf-jkl 6919196
Fix clippy
sdf-jkl 83dfb46
Merge branch 'main' of https://github.com/apache/arrow-rs into bitmas…
sdf-jkl 87e21d9
Only add page_offsets if the policy is bitmask
sdf-jkl b41a94b
Add assert row values to end to end tests
sdf-jkl 700d550
Add cursor page awarness test
sdf-jkl 48d93af
Merge branch 'main' into bitmask-skip-page
Dandandan File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,9 +22,9 @@ use crate::DecodeResult; | |
| use crate::arrow::ProjectionMask; | ||
| use crate::arrow::array_reader::{ArrayReaderBuilder, RowGroupCache}; | ||
| use crate::arrow::arrow_reader::metrics::ArrowReaderMetrics; | ||
| use crate::arrow::arrow_reader::selection::RowSelectionStrategy; | ||
| use crate::arrow::arrow_reader::{ | ||
| ParquetRecordBatchReader, ReadPlanBuilder, RowFilter, RowSelection, RowSelectionPolicy, | ||
| selection::RowSelectionStrategy, | ||
| }; | ||
| use crate::arrow::in_memory_row_group::ColumnChunkData; | ||
| use crate::arrow::push_decoder::reader_builder::data::DataRequestBuilder; | ||
|
|
@@ -536,12 +536,6 @@ impl RowGroupReaderBuilder { | |
|
|
||
| plan_builder = plan_builder.with_row_selection_policy(self.row_selection_policy); | ||
|
|
||
| plan_builder = override_selector_strategy_if_needed( | ||
| plan_builder, | ||
| &self.projection, | ||
| self.row_group_offset_index(row_group_idx), | ||
| ); | ||
|
|
||
| let row_group_info = RowGroupInfo { | ||
| row_group_idx, | ||
| row_count, | ||
|
|
@@ -588,6 +582,23 @@ impl RowGroupReaderBuilder { | |
| &mut self.buffers, | ||
| )?; | ||
|
|
||
| // before plan is build below | ||
| // check if plan is bitmask and if it is, put it in a variable | ||
| let page_offsets = if plan_builder.resolve_selection_strategy() | ||
| == RowSelectionStrategy::Mask | ||
| && plan_builder.selection().is_some_and(|selection| { | ||
| selection.requires_page_aware_mask( | ||
| &self.projection, | ||
| self.row_group_offset_index(row_group_idx), | ||
| ) | ||
| }) { | ||
| self.row_group_offset_index(row_group_idx) | ||
| .and_then(|columns| columns.first()) | ||
| .map(|column| column.page_locations()) | ||
| } else { | ||
| None | ||
| }; | ||
|
|
||
| let plan = plan_builder.build(); | ||
|
|
||
| // if we have any cached results, connect them up | ||
|
|
@@ -603,7 +614,8 @@ impl RowGroupReaderBuilder { | |
| .build_array_reader(self.fields.as_deref(), &self.projection) | ||
| }?; | ||
|
|
||
| let reader = ParquetRecordBatchReader::new(array_reader, plan); | ||
| let reader = | ||
| ParquetRecordBatchReader::new(array_reader, plan, page_offsets.cloned()); | ||
| NextState::result(RowGroupDecoderState::Finished, DecodeResult::Data(reader)) | ||
| } | ||
| RowGroupDecoderState::Finished => { | ||
|
|
@@ -654,57 +666,6 @@ impl RowGroupReaderBuilder { | |
| } | ||
| } | ||
|
|
||
| /// Override the selection strategy if needed. | ||
| /// | ||
| /// Some pages can be skipped during row-group construction if they are not read | ||
| /// by the selections. This means that the data pages for those rows are never | ||
| /// loaded and definition/repetition levels are never read. When using | ||
| /// `RowSelections` selection works because `skip_records()` handles this | ||
| /// case and skips the page accordingly. | ||
| /// | ||
| /// However, with the current mask design, all values must be read and decoded | ||
| /// and then a mask filter is applied. Thus if any pages are skipped during | ||
| /// row-group construction, the data pages are missing and cannot be decoded. | ||
| /// | ||
| /// A simple example: | ||
| /// * the page size is 2, the mask is 100001, row selection should be read(1) skip(4) read(1) | ||
| /// * the `ColumnChunkData` would be page1(10), page2(skipped), page3(01) | ||
| /// | ||
| /// Using the row selection to skip(4), page2 won't be read at all, so in this | ||
| /// case we can't decode all the rows and apply a mask. To correctly apply the | ||
| /// bit mask, we need all 6 values be read, but page2 is not in memory. | ||
| fn override_selector_strategy_if_needed( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice -- the idea is to avoid this function 👍 |
||
| plan_builder: ReadPlanBuilder, | ||
| projection_mask: &ProjectionMask, | ||
| offset_index: Option<&[OffsetIndexMetaData]>, | ||
| ) -> ReadPlanBuilder { | ||
| // override only applies to Auto policy, If the policy is already Mask or Selectors, respect that | ||
| let RowSelectionPolicy::Auto { .. } = plan_builder.row_selection_policy() else { | ||
| return plan_builder; | ||
| }; | ||
|
|
||
| let preferred_strategy = plan_builder.resolve_selection_strategy(); | ||
|
|
||
| let force_selectors = matches!(preferred_strategy, RowSelectionStrategy::Mask) | ||
| && plan_builder.selection().is_some_and(|selection| { | ||
| selection.should_force_selectors(projection_mask, offset_index) | ||
| }); | ||
|
|
||
| let resolved_strategy = if force_selectors { | ||
| RowSelectionStrategy::Selectors | ||
| } else { | ||
| preferred_strategy | ||
| }; | ||
|
|
||
| // override the plan builder strategy with the resolved one | ||
| let new_policy = match resolved_strategy { | ||
| RowSelectionStrategy::Mask => RowSelectionPolicy::Mask, | ||
| RowSelectionStrategy::Selectors => RowSelectionPolicy::Selectors, | ||
| }; | ||
|
|
||
| plan_builder.with_row_selection_policy(new_policy) | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
|
|
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
clonedmay cause extra expense here, can we useArc<[PageLocation]>to avoid that?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's a big api change to make
PageLocationorOffsetIndexMetadataDataan Arc insideParquetMetaData.If we'd want to make that change, I can open an issue and work up a PR.