1
1
//! Functionality to create and execute scans (reads) over data stored in a delta table
2
2
3
+ use std:: collections:: HashMap ;
3
4
use std:: sync:: Arc ;
4
5
5
6
use itertools:: Itertools ;
6
7
use tracing:: debug;
7
8
use url:: Url ;
8
9
9
- use self :: log_replay:: { log_replay_iter , scan_action_iter} ;
10
+ use self :: log_replay:: scan_action_iter;
10
11
use self :: state:: GlobalScanState ;
11
12
use crate :: actions:: deletion_vector:: { split_vector, treemap_to_bools, DeletionVectorDescriptor } ;
12
- use crate :: actions:: { get_log_schema, Add , ADD_NAME , REMOVE_NAME } ;
13
+ use crate :: actions:: { get_log_schema, ADD_NAME , REMOVE_NAME } ;
13
14
use crate :: column_mapping:: ColumnMappingMode ;
14
15
use crate :: expressions:: { Expression , Scalar } ;
16
+ use crate :: scan:: state:: { DvInfo , Stats } ;
15
17
use crate :: schema:: { DataType , Schema , SchemaRef , StructField , StructType } ;
16
18
use crate :: snapshot:: Snapshot ;
17
19
use crate :: { DeltaResult , Engine , EngineData , Error , FileMeta } ;
@@ -177,30 +179,6 @@ impl Scan {
177
179
& self . predicate
178
180
}
179
181
180
- /// Get an iterator of Add actions that should be included in scan for a query. This handles
181
- /// log-replay, reconciling Add and Remove actions, and applying data skipping (if possible)
182
- pub ( crate ) fn files (
183
- & self ,
184
- engine : & dyn Engine ,
185
- ) -> DeltaResult < impl Iterator < Item = DeltaResult < Add > > + Send > {
186
- let commit_read_schema = get_log_schema ( ) . project ( & [ ADD_NAME , REMOVE_NAME ] ) ?;
187
- let checkpoint_read_schema = get_log_schema ( ) . project ( & [ ADD_NAME ] ) ?;
188
-
189
- let log_iter = self . snapshot . log_segment . replay (
190
- engine,
191
- commit_read_schema,
192
- checkpoint_read_schema,
193
- self . predicate . clone ( ) ,
194
- ) ?;
195
-
196
- Ok ( log_replay_iter (
197
- engine,
198
- log_iter,
199
- & self . logical_schema ,
200
- & self . predicate ,
201
- ) )
202
- }
203
-
204
182
/// Get an iterator of [`EngineData`]s that should be included in scan for a query. This handles
205
183
/// log-replay, reconciling Add and Remove actions, and applying data skipping (if
206
184
/// possible). Each item in the returned iterator is a tuple of:
@@ -256,99 +234,87 @@ impl Scan {
256
234
// This calls [`Scan::files`] to get a set of `Add` actions for the scan, and then uses the
257
235
// `engine`'s [`crate::ParquetHandler`] to read the actual table data.
258
236
pub fn execute ( & self , engine : & dyn Engine ) -> DeltaResult < Vec < ScanResult > > {
237
+ struct ScanFile {
238
+ path : String ,
239
+ size : i64 ,
240
+ dv_info : DvInfo ,
241
+ partition_values : HashMap < String , String > ,
242
+ }
243
+ fn scan_data_callback (
244
+ batches : & mut Vec < ScanFile > ,
245
+ path : & str ,
246
+ size : i64 ,
247
+ _: Option < Stats > ,
248
+ dv_info : DvInfo ,
249
+ partition_values : HashMap < String , String > ,
250
+ ) {
251
+ batches. push ( ScanFile {
252
+ path : path. to_string ( ) ,
253
+ size,
254
+ dv_info,
255
+ partition_values,
256
+ } ) ;
257
+ }
258
+
259
259
debug ! (
260
260
"Executing scan with logical schema {:#?} and physical schema {:#?}" ,
261
261
self . logical_schema, self . physical_schema
262
262
) ;
263
- let output_schema = DataType :: from ( self . schema ( ) . clone ( ) ) ;
264
- let parquet_handler = engine. get_parquet_handler ( ) ;
265
-
266
- let mut results: Vec < ScanResult > = vec ! [ ] ;
267
- let files = self . files ( engine) ?;
268
- for add_result in files {
269
- let add = add_result?;
270
- let meta = FileMeta {
271
- last_modified : add. modification_time ,
272
- size : add. size as usize ,
273
- location : self . snapshot . table_root . join ( & add. path ) ?,
274
- } ;
275
-
276
- let read_results =
277
- parquet_handler. read_parquet_files ( & [ meta] , self . physical_schema . clone ( ) , None ) ?;
278
-
279
- let read_expression = if self . have_partition_cols
280
- || self . snapshot . column_mapping_mode != ColumnMappingMode :: None
281
- {
282
- // Loop over all fields and create the correct expressions for them
283
- let all_fields = self
284
- . all_fields
285
- . iter ( )
286
- . map ( |field| match field {
287
- ColumnType :: Partition ( field_idx) => {
288
- let field = self . logical_schema . fields . get_index ( * field_idx) . ok_or_else ( || {
289
- Error :: generic ( "logical schema did not contain expected field, can't execute scan" )
290
- } ) ?. 1 ;
291
- let name = field. physical_name ( self . snapshot . column_mapping_mode ) ?;
292
- let value_expression = parse_partition_value (
293
- add. partition_values . get ( name) ,
294
- field. data_type ( ) ,
295
- ) ?;
296
- Ok :: < Expression , Error > ( Expression :: Literal ( value_expression) )
297
- }
298
- ColumnType :: Selected ( field_name) => Ok ( Expression :: column ( field_name) ) ,
299
- } )
300
- . try_collect ( ) ?;
301
- Some ( Expression :: Struct ( all_fields) )
302
- } else {
303
- None
304
- } ;
305
- debug ! ( "Final expression for read: {read_expression:?}" ) ;
306
-
307
- let dv_treemap = add
308
- . deletion_vector
309
- . as_ref ( )
310
- . map ( |dv_descriptor| {
311
- let fs_client = engine. get_file_system_client ( ) ;
312
- dv_descriptor. read ( fs_client, & self . snapshot . table_root )
313
- } )
314
- . transpose ( ) ?;
315
-
316
- let mut dv_mask = dv_treemap. map ( treemap_to_bools) ;
317
-
318
- for read_result in read_results {
319
- let len = if let Ok ( ref res) = read_result {
320
- res. length ( )
321
- } else {
322
- 0
323
- } ;
324
-
325
- let read_result = match read_expression {
326
- Some ( ref read_expression) => engine
327
- . get_expression_handler ( )
328
- . get_evaluator (
329
- self . physical_schema . clone ( ) ,
330
- read_expression. clone ( ) ,
331
- output_schema. clone ( ) ,
332
- )
333
- . evaluate ( read_result?. as_ref ( ) ) ,
334
- None => {
335
- // if we don't have partition columns, the result is just what we read
336
- read_result
337
- }
338
- } ;
339
263
340
- // need to split the dv_mask. what's left in dv_mask covers this result, and rest
341
- // will cover the following results
342
- let rest = split_vector ( dv_mask. as_mut ( ) , len, None ) ;
343
- let scan_result = ScanResult {
344
- raw_data : read_result,
345
- mask : dv_mask,
346
- } ;
347
- dv_mask = rest;
348
- results. push ( scan_result) ;
349
- }
264
+ let global_state = Arc :: new ( self . global_scan_state ( ) ) ;
265
+ let scan_data = self . scan_data ( engine) ?;
266
+ let mut scan_files = vec ! [ ] ;
267
+ for data in scan_data {
268
+ let ( data, vec) = data?;
269
+ scan_files =
270
+ state:: visit_scan_files ( data. as_ref ( ) , & vec, scan_files, scan_data_callback) ?;
350
271
}
351
- Ok ( results)
272
+ scan_files
273
+ . into_iter ( )
274
+ . map ( |scan_file| -> DeltaResult < _ > {
275
+ let file_path = self . snapshot . table_root . join ( & scan_file. path ) ?;
276
+ let mut selection_vector = scan_file
277
+ . dv_info
278
+ . get_selection_vector ( engine, & self . snapshot . table_root ) ?;
279
+ let meta = FileMeta {
280
+ last_modified : 0 ,
281
+ size : scan_file. size as usize ,
282
+ location : file_path,
283
+ } ;
284
+ let read_result_iter = engine. get_parquet_handler ( ) . read_parquet_files (
285
+ & [ meta] ,
286
+ global_state. read_schema . clone ( ) ,
287
+ None ,
288
+ ) ?;
289
+ let gs = global_state. clone ( ) ; // Arc clone
290
+ Ok ( read_result_iter. into_iter ( ) . map ( move |read_result| {
291
+ let read_result = read_result?;
292
+ // to transform the physical data into the correct logical form
293
+ let logical = transform_to_logical_internal (
294
+ engine,
295
+ read_result,
296
+ & gs,
297
+ & scan_file. partition_values ,
298
+ & self . all_fields ,
299
+ self . have_partition_cols ,
300
+ ) ;
301
+ let len = logical. as_ref ( ) . map_or ( 0 , |res| res. length ( ) ) ;
302
+ // need to split the dv_mask. what's left in dv_mask covers this result, and rest
303
+ // will cover the following results. we `take()` out of `selection_vector` to avoid
304
+ // trying to return a captured variable. We're going to reassign `selection_vector`
305
+ // to `rest` in a moment anyway
306
+ let mut sv = selection_vector. take ( ) ;
307
+ let rest = split_vector ( sv. as_mut ( ) , len, None ) ;
308
+ let result = ScanResult {
309
+ raw_data : logical,
310
+ mask : sv,
311
+ } ;
312
+ selection_vector = rest;
313
+ Ok ( result)
314
+ } ) )
315
+ } )
316
+ . flatten_ok ( )
317
+ . try_collect ( ) ?
352
318
}
353
319
}
354
320
@@ -438,17 +404,39 @@ pub fn selection_vector(
438
404
Ok ( treemap_to_bools ( dv_treemap) )
439
405
}
440
406
407
+ /// Transform the raw data read from parquet into the correct logical form, based on the provided
408
+ /// global scan state and partition values
441
409
pub fn transform_to_logical (
442
410
engine : & dyn Engine ,
443
411
data : Box < dyn EngineData > ,
444
412
global_state : & GlobalScanState ,
445
- partition_values : & std :: collections :: HashMap < String , String > ,
413
+ partition_values : & HashMap < String , String > ,
446
414
) -> DeltaResult < Box < dyn EngineData > > {
447
415
let ( all_fields, _read_fields, have_partition_cols) = get_state_info (
448
416
& global_state. logical_schema ,
449
417
& global_state. partition_columns ,
450
418
global_state. column_mapping_mode ,
451
419
) ?;
420
+ transform_to_logical_internal (
421
+ engine,
422
+ data,
423
+ global_state,
424
+ partition_values,
425
+ & all_fields,
426
+ have_partition_cols,
427
+ )
428
+ }
429
+
430
+ // We have this function because `execute` can save `all_fields` and `have_partition_cols` in the
431
+ // scan, and then reuse them for each batch transform
432
+ fn transform_to_logical_internal (
433
+ engine : & dyn Engine ,
434
+ data : Box < dyn EngineData > ,
435
+ global_state : & GlobalScanState ,
436
+ partition_values : & std:: collections:: HashMap < String , String > ,
437
+ all_fields : & [ ColumnType ] ,
438
+ have_partition_cols : bool ,
439
+ ) -> DeltaResult < Box < dyn EngineData > > {
452
440
let read_schema = global_state. read_schema . clone ( ) ;
453
441
if have_partition_cols || global_state. column_mapping_mode != ColumnMappingMode :: None {
454
442
// need to add back partition cols and/or fix-up mapped columns
@@ -596,8 +584,29 @@ mod tests {
596
584
use crate :: schema:: PrimitiveType ;
597
585
use crate :: Table ;
598
586
587
+ fn get_files_for_scan ( scan : Scan , engine : & dyn Engine ) -> DeltaResult < Vec < String > > {
588
+ let scan_data = scan. scan_data ( engine) ?;
589
+ fn scan_data_callback (
590
+ paths : & mut Vec < String > ,
591
+ path : & str ,
592
+ _size : i64 ,
593
+ _: Option < Stats > ,
594
+ dv_info : DvInfo ,
595
+ _partition_values : HashMap < String , String > ,
596
+ ) {
597
+ paths. push ( path. to_string ( ) ) ;
598
+ assert ! ( dv_info. deletion_vector. is_none( ) ) ;
599
+ }
600
+ let mut files = vec ! [ ] ;
601
+ for data in scan_data {
602
+ let ( data, vec) = data?;
603
+ files = state:: visit_scan_files ( data. as_ref ( ) , & vec, files, scan_data_callback) ?;
604
+ }
605
+ Ok ( files)
606
+ }
607
+
599
608
#[ test]
600
- fn test_scan_files ( ) {
609
+ fn test_scan_data_paths ( ) {
601
610
let path =
602
611
std:: fs:: canonicalize ( PathBuf :: from ( "./tests/data/table-without-dv-small/" ) ) . unwrap ( ) ;
603
612
let url = url:: Url :: from_directory_path ( path) . unwrap ( ) ;
@@ -606,14 +615,12 @@ mod tests {
606
615
let table = Table :: new ( url) ;
607
616
let snapshot = table. snapshot ( & engine, None ) . unwrap ( ) ;
608
617
let scan = snapshot. into_scan_builder ( ) . build ( ) . unwrap ( ) ;
609
- let files: Vec < Add > = scan. files ( & engine) . unwrap ( ) . try_collect ( ) . unwrap ( ) ;
610
-
618
+ let files = get_files_for_scan ( scan, & engine) . unwrap ( ) ;
611
619
assert_eq ! ( files. len( ) , 1 ) ;
612
620
assert_eq ! (
613
- & files[ 0 ] . path ,
621
+ files[ 0 ] ,
614
622
"part-00000-517f5d32-9c95-48e8-82b4-0229cc194867-c000.snappy.parquet"
615
623
) ;
616
- assert ! ( & files[ 0 ] . deletion_vector. is_none( ) ) ;
617
624
}
618
625
619
626
#[ test]
@@ -689,8 +696,7 @@ mod tests {
689
696
let table = Table :: new ( url) ;
690
697
let snapshot = table. snapshot ( & engine, None ) ?;
691
698
let scan = snapshot. into_scan_builder ( ) . build ( ) ?;
692
- let files: Vec < DeltaResult < Add > > = scan. files ( & engine) ?. collect ( ) ;
693
-
699
+ let files = get_files_for_scan ( scan, & engine) ?;
694
700
// test case:
695
701
//
696
702
// commit0: P and M, no add/remove
@@ -701,10 +707,7 @@ mod tests {
701
707
//
702
708
// thus replay should produce only file-70b
703
709
assert_eq ! (
704
- files
705
- . into_iter( )
706
- . map( |file| file. unwrap( ) . path)
707
- . collect:: <Vec <_>>( ) ,
710
+ files,
708
711
vec![ "part-00000-70b1dcdf-0236-4f63-a072-124cdbafd8a0-c000.snappy.parquet" ]
709
712
) ;
710
713
Ok ( ( ) )
0 commit comments