Skip to content

Commit

Permalink
[DataFrame] Read files in parallel
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Jul 16, 2023
1 parent 413eba1 commit e9d6a0e
Showing 1 changed file with 9 additions and 5 deletions.
14 changes: 9 additions & 5 deletions datafusion/core/src/execution/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use datafusion_common::alias::AliasGenerator;
use datafusion_execution::registry::SerializerRegistry;
use datafusion_expr::{
logical_plan::{DdlStatement, Statement},
DescribeTable, StringifiedPlan, UserDefinedLogicalNode, WindowUDF,
DescribeTable, Partitioning, StringifiedPlan, UserDefinedLogicalNode, WindowUDF,
};
pub use datafusion_physical_expr::execution_props::ExecutionProps;
use datafusion_physical_expr::var_provider::is_system_variables;
Expand Down Expand Up @@ -917,11 +917,15 @@ impl SessionContext {
/// Creates a [`DataFrame`] for a [`TableProvider`] such as a
/// [`ListingTable`] or a custom user defined provider.
pub fn read_table(&self, provider: Arc<dyn TableProvider>) -> Result<DataFrame> {
Ok(DataFrame::new(
self.state(),
let state = self.state();
let builder =
LogicalPlanBuilder::scan(UNNAMED_TABLE, provider_as_source(provider), None)?
.build()?,
))
// Keep the data in the target number of partitions
.repartition(Partitioning::RoundRobinBatch(
state.config.target_partitions(),
))?;

Ok(DataFrame::new(state, builder.build()?))
}

/// Creates a [`DataFrame`] for reading a [`RecordBatch`]
Expand Down

0 comments on commit e9d6a0e

Please sign in to comment.