File tree 3 files changed +15
-0
lines changed
3 files changed +15
-0
lines changed Original file line number Diff line number Diff line change @@ -73,6 +73,17 @@ pub struct DeltaTableMeta {
73
73
partition_columns : Vec < String > ,
74
74
}
75
75
76
+ /// In a delta table, partition columns are not stored in parquet file.
77
+ /// so it needs a few efforts to make pushdown work:
78
+ /// - context:
79
+ /// - Table store partition column names in meta.engine_options.
80
+ /// - Each partition carries all partition column values in the same order.
81
+ /// - With this order, we can get need info with a PartitionIndex.
82
+ /// - pushdown:
83
+ /// - projections (mask): partition columns are excluded when read parquet file and inserted at last.
84
+ /// - filter pass to parquet reader: all partition columns are appended to the filter input columns.
85
+ /// - pruner: ColumnRef of partition columns in filter expr are replace with const scalars.
86
+ /// Type of partition columns can only be simple primitive types.
76
87
impl DeltaTable {
77
88
#[ async_backtrace:: framed]
78
89
pub fn try_create ( info : TableInfo ) -> Result < Box < dyn Table > > {
Original file line number Diff line number Diff line change @@ -68,6 +68,7 @@ pub struct ParquetRSFullReader {
68
68
}
69
69
70
70
impl ParquetRSFullReader {
71
+ // partition_fields is only used for delta table engine.
71
72
pub async fn prepare_data_stream (
72
73
& self ,
73
74
loc : & str ,
Original file line number Diff line number Diff line change @@ -114,6 +114,8 @@ impl ParquetRSPruner {
114
114
/// Return the selected row groups' indices in the meta and omit filter flags.
115
115
///
116
116
/// If `stats` is not [None], we use this statistics to prune but not collect again.
117
+ ///
118
+ /// `partition_values` is used only for Delta table engine.
117
119
pub fn prune_row_groups (
118
120
& self ,
119
121
meta : & ParquetMetaData ,
@@ -178,6 +180,7 @@ impl ParquetRSPruner {
178
180
/// Prune pages of a parquet file.
179
181
///
180
182
/// Return a vector of [`RowSelection`] to represent rows to read.
183
+ /// `partition_values` is used only for Delta table engine.
181
184
pub fn prune_pages (
182
185
& self ,
183
186
meta : & ParquetMetaData ,
You can’t perform that action at this time.
0 commit comments