Skip to content

Commit

Permalink
update API
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Jun 11, 2024
1 parent ac38bc4 commit ade44df
Showing 1 changed file with 20 additions and 15 deletions.
35 changes: 20 additions & 15 deletions datafusion-examples/examples/advanced_parquet_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use async_trait::async_trait;
use bytes::Bytes;
use datafusion::datasource::listing::PartitionedFile;
use datafusion::datasource::physical_plan::parquet::{
ParquetAccessPlan, RequestedStatistics, RowGroupAccess, StatisticsConverter,
ParquetAccessPlan, RowGroupAccess, StatisticsConverter,
};
use datafusion::datasource::physical_plan::{
parquet::ParquetFileReaderFactory, FileMeta, FileScanConfig, ParquetExec,
Expand Down Expand Up @@ -712,20 +712,25 @@ impl ParquetRowGroupMetadataIndexBuilder {
let num_row_groups = metadata.num_row_groups();

// Extract the min/max values for each row group from the statistics
// TODO make an API that permits appending a row group at a time
let row_counts = StatisticsConverter::row_counts(metadata)?;
let value_column_mins =
StatisticsConverter::try_new("value", RequestedStatistics::Min, schema)?
.extract(metadata)?;
let value_column_maxes =
StatisticsConverter::try_new("value", RequestedStatistics::Max, schema)?
.extract(metadata)?;
let tag_column_mins =
StatisticsConverter::try_new("tag", RequestedStatistics::Min, schema)?
.extract(metadata)?;
let tag_column_maxes =
StatisticsConverter::try_new("tag", RequestedStatistics::Max, schema)?
.extract(metadata)?;
let row_group_meta = metadata.row_groups();

let row_counts = StatisticsConverter::row_group_row_counts(row_group_meta)?;

let value_converter = StatisticsConverter::try_new(
"value",
schema,
metadata.file_metadata().schema_descr(),
)?;
let value_column_mins = value_converter.row_group_mins(row_group_meta)?;
let value_column_maxes = value_converter.row_group_maxes(row_group_meta)?;

let tag_converter = StatisticsConverter::try_new(
"tag",
schema,
metadata.file_metadata().schema_descr(),
)?;
let tag_column_mins = tag_converter.row_group_mins(row_group_meta)?;
let tag_column_maxes = tag_converter.row_group_maxes(row_group_meta)?;

// sanity check the statistics
assert_eq!(row_counts.len(), num_row_groups);
Expand Down

0 comments on commit ade44df

Please sign in to comment.