Skip to content

Commit

Permalink
squash, rebase, and point at arrow-rs master
Browse files Browse the repository at this point in the history
  • Loading branch information
devinjdangelo committed Oct 5, 2023
1 parent 0408c2b commit a70c095
Show file tree
Hide file tree
Showing 5 changed files with 495 additions and 210 deletions.
12 changes: 6 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,12 @@ rust-version = "1.70"
version = "31.0.0"

[workspace.dependencies]
arrow = { version = "47.0.0", features = ["prettyprint"] }
arrow-array = { version = "47.0.0", default-features = false, features = ["chrono-tz"] }
arrow-buffer = { version = "47.0.0", default-features = false }
arrow-flight = { version = "47.0.0", features = ["flight-sql-experimental"] }
arrow-schema = { version = "47.0.0", default-features = false }
parquet = { version = "47.0.0", features = ["arrow", "async", "object_store"] }
arrow = { path = "../arrow-rs/arrow", features = ["prettyprint"] }
arrow-array = { git = "https://github.com/apache/arrow-rs.git", default-features = false, features = ["chrono-tz"] }
arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", default-features = false }
arrow-flight = { git = "https://github.com/apache/arrow-rs.git", features = ["flight-sql-experimental"] }
arrow-schema = { git = "https://github.com/apache/arrow-rs.git", default-features = false }
parquet = { git = "https://github.com/apache/arrow-rs.git", features = ["arrow", "async", "object_store"] }
sqlparser = { version = "0.38.0", features = ["visitor"] }
chrono = { version = "0.4.31", default-features = false }

Expand Down
24 changes: 18 additions & 6 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -358,12 +358,24 @@ config_namespace! {
pub bloom_filter_ndv: Option<u64>, default = None

/// Controls whether DataFusion will attempt to speed up writing
/// large parquet files by first writing multiple smaller files
/// and then stitching them together into a single large file.
/// This will result in faster write speeds, but higher memory usage.
/// Also currently unsupported are bloom filters and column indexes
/// when single_file_parallelism is enabled.
pub allow_single_file_parallelism: bool, default = false
/// parquet files by serializing them in parallel. Each column
/// in each row group in each output file are serialized in parallel
/// leveraging a maximum possible core count of n_files*n_row_groups*n_columns.
pub allow_single_file_parallelism: bool, default = true

/// If allow_single_file_parallelism=true, this setting allows
/// applying backpressure to prevent working on too many row groups in
/// parallel in case of limited memory or slow I/O speed causing
/// OOM errors. Lowering this number limits memory growth at the cost
/// of potentially slower write speeds.
pub maximum_parallel_row_group_writers: usize, default = 16

/// If allow_single_file_parallelism=true, this setting allows
/// applying backpressure to prevent too many RecordBatches building
/// up in memory in case the parallel writers cannot consume them fast
/// enough. Lowering this number limits memory growth at the cost
/// of potentially lower write speeds.
pub maximum_buffered_record_batches_per_stream: usize, default = 200

}
}
Expand Down
Loading

0 comments on commit a70c095

Please sign in to comment.