From 1fadadaf70aeefaad0501780b1b5f7cadc3f7193 Mon Sep 17 00:00:00 2001 From: ShashidharM0118 Date: Fri, 19 Dec 2025 10:00:46 +0530 Subject: [PATCH 1/2] fix upgrade guide API examples for FileScanConfigBuilder and ParquetSource --- docs/source/library-user-guide/upgrading.md | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md index bd9fb3f55a47e..86258666d199c 100644 --- a/docs/source/library-user-guide/upgrading.md +++ b/docs/source/library-user-guide/upgrading.md @@ -1866,12 +1866,15 @@ if let Some(predicate) = logical_filter { # */ ``` -New code should use `FileScanConfig` to build the appropriate `DataSourceExec`: +New code should use `FileScanConfigBuilder` to build the appropriate `DataSourceExec`: ```rust # /* comment to avoid running -let mut file_source = ParquetSource::new(parquet_options) - .with_schema_adapter_factory(Arc::new(DeltaSchemaAdapterFactory {})); +// Create table schema with file schema and partition columns +let table_schema = TableSchema::new(file_schema, table_partition_cols); + +let mut file_source = ParquetSource::new(table_schema) + .with_table_parquet_options(parquet_options); // Add filter if let Some(predicate) = logical_filter { @@ -1880,18 +1883,18 @@ if let Some(predicate) = logical_filter { } }; -let file_scan_config = FileScanConfig::new( +let file_scan_config = FileScanConfigBuilder::new( self.log_store.object_store_url(), - file_schema, Arc::new(file_source), ) .with_statistics(stats) -.with_projection(self.projection.cloned()) +.with_projection_indices(self.projection.cloned()) +.expect("Failed to push down projection") .with_limit(self.limit) -.with_table_partition_cols(table_partition_cols); +.build(); // Build the actual scan like this -parquet_scan: file_scan_config.build(), +let parquet_scan = DataSourceExec::from_data_source(file_scan_config); # */ ``` From 11712b53d74dbffc20c40284a08283faaca7341f Mon Sep 17 00:00:00 2001 From: ShashidharM0118 Date: Sun, 4 Jan 2026 08:35:23 +0530 Subject: [PATCH 2/2] docs: fix 51.0.0 upgrade guide examples for FileScanConfigBuilder and ParquetSource --- docs/source/library-user-guide/upgrading.md | 47 ++++++++------------- 1 file changed, 18 insertions(+), 29 deletions(-) diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md index 8da93645678c3..1a3df1241d76e 100644 --- a/docs/source/library-user-guide/upgrading.md +++ b/docs/source/library-user-guide/upgrading.md @@ -769,31 +769,25 @@ The way schemas are passed to file sources and scan configurations has been sign ```diff - let source = ParquetSource::default(); - + let source = ParquetSource::new(table_schema); + + let source = ParquetSource::new(TableParquetOptions::default()); ``` -2. **FileScanConfigBuilder no longer takes schema as a parameter**: The schema is now passed via the FileSource: - - ```diff - - FileScanConfigBuilder::new(url, schema, source) - + FileScanConfigBuilder::new(url, source) - ``` - -3. **Partition columns are now part of TableSchema**: The `with_table_partition_cols()` method has been removed from `FileScanConfigBuilder`. Partition columns are now passed as part of the `TableSchema` to the FileSource constructor: +2. **Partition columns are now part of TableSchema**: The `with_table_partition_cols()` method has been removed from `FileScanConfigBuilder`. Partition columns are now passed as part of the `TableSchema` to the FileSource constructor: ```diff + let table_schema = TableSchema::new( + file_schema, + vec![Arc::new(Field::new("date", DataType::Utf8, false))], + ); - + let source = ParquetSource::new(table_schema); - let config = FileScanConfigBuilder::new(url, source) + + let source = ParquetSource::new(TableParquetOptions::default()) + + .with_schema(table_schema); + let config = FileScanConfigBuilder::new(url, schema, source) - .with_table_partition_cols(vec![Field::new("date", DataType::Utf8, false)]) .with_file(partitioned_file) .build(); ``` -4. **FileFormat::file_source() now takes TableSchema parameter**: Custom `FileFormat` implementations must be updated: +3. **FileFormat::file_source() now takes TableSchema parameter**: Custom `FileFormat` implementations must be updated: ```diff impl FileFormat for MyFileFormat { - fn file_source(&self) -> Arc { @@ -810,10 +804,9 @@ For Parquet files: ```diff - let source = Arc::new(ParquetSource::default()); -- let config = FileScanConfigBuilder::new(url, schema, source) + let table_schema = TableSchema::new(schema, vec![]); -+ let source = Arc::new(ParquetSource::new(table_schema)); -+ let config = FileScanConfigBuilder::new(url, source) ++ let source = Arc::new(ParquetSource::new(TableParquetOptions::default()).with_schema(table_schema)); + let config = FileScanConfigBuilder::new(url, schema, source) .with_file(partitioned_file) .build(); ``` @@ -822,8 +815,6 @@ For CSV files with partition columns: ```diff - let source = Arc::new(CsvSource::new(true, b',', b'"')); -- let config = FileScanConfigBuilder::new(url, file_schema, source) -- .with_table_partition_cols(vec![Field::new("year", DataType::Int32, false)]) + let options = CsvOptions { + has_header: Some(true), + delimiter: b',', @@ -835,7 +826,8 @@ For CSV files with partition columns: + vec![Arc::new(Field::new("year", DataType::Int32, false))], + ); + let source = Arc::new(CsvSource::new(table_schema).with_csv_options(options)); -+ let config = FileScanConfigBuilder::new(url, source) + let config = FileScanConfigBuilder::new(url, file_schema, source) +- .with_table_partition_cols(vec![Field::new("year", DataType::Int32, false)]) .build(); ``` @@ -1929,15 +1921,12 @@ if let Some(predicate) = logical_filter { # */ ``` -New code should use `FileScanConfigBuilder` to build the appropriate `DataSourceExec`: +New code should use `FileScanConfig` to build the appropriate `DataSourceExec`: ```rust # /* comment to avoid running -// Create table schema with file schema and partition columns -let table_schema = TableSchema::new(file_schema, table_partition_cols); - -let mut file_source = ParquetSource::new(table_schema) - .with_table_parquet_options(parquet_options); +let mut file_source = ParquetSource::new(parquet_options) + .with_schema_adapter_factory(Arc::new(DeltaSchemaAdapterFactory {})); // Add filter if let Some(predicate) = logical_filter { @@ -1946,18 +1935,18 @@ if let Some(predicate) = logical_filter { } }; -let file_scan_config = FileScanConfigBuilder::new( +let file_scan_config = FileScanConfig::new( self.log_store.object_store_url(), + file_schema, Arc::new(file_source), ) .with_statistics(stats) -.with_projection_indices(self.projection.cloned()) -.expect("Failed to push down projection") +.with_projection(self.projection.cloned()) .with_limit(self.limit) -.build(); +.with_table_partition_cols(table_partition_cols); // Build the actual scan like this -let parquet_scan = DataSourceExec::from_data_source(file_scan_config); +parquet_scan: file_scan_config.build(), # */ ```