diff --git a/datafusion-cli/src/command.rs b/datafusion-cli/src/command.rs index fc7d1a2617cf6..bc2bb0ee02753 100644 --- a/datafusion-cli/src/command.rs +++ b/datafusion-cli/src/command.rs @@ -64,7 +64,15 @@ impl Command { let command_batch = all_commands_info(); let schema = command_batch.schema(); let num_rows = command_batch.num_rows(); - print_options.print_batches(schema, &[command_batch], now, num_rows) + let task_ctx = ctx.task_ctx(); + let config = &task_ctx.session_config().options().format; + print_options.print_batches( + schema, + &[command_batch], + now, + num_rows, + config, + ) } Self::ListTables => { exec_and_print(ctx, print_options, "SHOW TABLES".into()).await diff --git a/datafusion-cli/src/exec.rs b/datafusion-cli/src/exec.rs index 0f4d70c1cca97..fa4cd9c5fd3be 100644 --- a/datafusion-cli/src/exec.rs +++ b/datafusion-cli/src/exec.rs @@ -216,7 +216,8 @@ pub(super) async fn exec_and_print( ) -> Result<()> { let now = Instant::now(); let task_ctx = ctx.task_ctx(); - let dialect = &task_ctx.session_config().options().sql_parser.dialect; + let options = task_ctx.session_config().options(); + let dialect = &options.sql_parser.dialect; let dialect = dialect_from_str(dialect).ok_or_else(|| { plan_datafusion_err!( "Unsupported SQL dialect: {dialect}. Available dialects: \ @@ -250,7 +251,9 @@ pub(super) async fn exec_and_print( // As the input stream comes, we can generate results. // However, memory safety is not guaranteed. let stream = execute_stream(physical_plan, task_ctx.clone())?; - print_options.print_stream(stream, now).await?; + print_options + .print_stream(stream, now, &options.format) + .await?; } else { // Bounded stream; collected results size is limited by the maxrows option let schema = physical_plan.schema(); @@ -273,9 +276,13 @@ pub(super) async fn exec_and_print( } row_count += curr_num_rows; } - adjusted - .into_inner() - .print_batches(schema, &results, now, row_count)?; + adjusted.into_inner().print_batches( + schema, + &results, + now, + row_count, + &options.format, + )?; reservation.free(); } } diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs index dad2d15f01a11..fd1b11126230c 100644 --- a/datafusion-cli/src/main.rs +++ b/datafusion-cli/src/main.rs @@ -265,6 +265,11 @@ fn get_session_config(args: &Args) -> Result { config_options.explain.format = String::from("tree"); } + // in the CLI, we want to show NULL values rather the empty strings + if env::var_os("DATAFUSION_FORMAT_NULL").is_none() { + config_options.format.null = String::from("NULL"); + } + let session_config = SessionConfig::from(config_options).with_information_schema(true); Ok(session_config) diff --git a/datafusion-cli/src/print_format.rs b/datafusion-cli/src/print_format.rs index 1fc949593512b..1d4889c4bab16 100644 --- a/datafusion-cli/src/print_format.rs +++ b/datafusion-cli/src/print_format.rs @@ -26,7 +26,7 @@ use arrow::datatypes::SchemaRef; use arrow::json::{ArrayWriter, LineDelimitedWriter}; use arrow::record_batch::RecordBatch; use arrow::util::pretty::pretty_format_batches_with_options; -use datafusion::common::format::DEFAULT_CLI_FORMAT_OPTIONS; +use datafusion::config::FormatOptions; use datafusion::error::Result; /// Allow records to be printed in different formats @@ -110,7 +110,10 @@ fn format_batches_with_maxrows( writer: &mut W, batches: &[RecordBatch], maxrows: MaxRows, + format_options: &FormatOptions, ) -> Result<()> { + let options: arrow::util::display::FormatOptions = format_options.try_into()?; + match maxrows { MaxRows::Limited(maxrows) => { // Filter batches to meet the maxrows condition @@ -131,10 +134,8 @@ fn format_batches_with_maxrows( } } - let formatted = pretty_format_batches_with_options( - &filtered_batches, - &DEFAULT_CLI_FORMAT_OPTIONS, - )?; + let formatted = + pretty_format_batches_with_options(&filtered_batches, &options)?; if over_limit { let mut formatted_str = format!("{}", formatted); formatted_str = keep_only_maxrows(&formatted_str, maxrows); @@ -144,8 +145,7 @@ fn format_batches_with_maxrows( } } MaxRows::Unlimited => { - let formatted = - pretty_format_batches_with_options(batches, &DEFAULT_CLI_FORMAT_OPTIONS)?; + let formatted = pretty_format_batches_with_options(batches, &options)?; writeln!(writer, "{}", formatted)?; } } @@ -162,6 +162,7 @@ impl PrintFormat { batches: &[RecordBatch], maxrows: MaxRows, with_header: bool, + format_options: &FormatOptions, ) -> Result<()> { // filter out any empty batches let batches: Vec<_> = batches @@ -170,7 +171,7 @@ impl PrintFormat { .cloned() .collect(); if batches.is_empty() { - return self.print_empty(writer, schema); + return self.print_empty(writer, schema, format_options); } match self { @@ -182,7 +183,7 @@ impl PrintFormat { if maxrows == MaxRows::Limited(0) { return Ok(()); } - format_batches_with_maxrows(writer, &batches, maxrows) + format_batches_with_maxrows(writer, &batches, maxrows, format_options) } Self::Json => batches_to_json!(ArrayWriter, writer, &batches), Self::NdJson => batches_to_json!(LineDelimitedWriter, writer, &batches), @@ -194,15 +195,17 @@ impl PrintFormat { &self, writer: &mut W, schema: SchemaRef, + format_options: &FormatOptions, ) -> Result<()> { match self { // Print column headers for Table format Self::Table if !schema.fields().is_empty() => { + let format_options: arrow::util::display::FormatOptions = + format_options.try_into()?; + let empty_batch = RecordBatch::new_empty(schema); - let formatted = pretty_format_batches_with_options( - &[empty_batch], - &DEFAULT_CLI_FORMAT_OPTIONS, - )?; + let formatted = + pretty_format_batches_with_options(&[empty_batch], &format_options)?; writeln!(writer, "{}", formatted)?; } _ => {} @@ -644,6 +647,7 @@ mod tests { &self.batches, self.maxrows, with_header, + &FormatOptions::default(), ) .unwrap(); String::from_utf8(buffer).unwrap() diff --git a/datafusion-cli/src/print_options.rs b/datafusion-cli/src/print_options.rs index 9557e783e8a7c..6ca75bd226668 100644 --- a/datafusion-cli/src/print_options.rs +++ b/datafusion-cli/src/print_options.rs @@ -29,6 +29,7 @@ use datafusion::common::DataFusionError; use datafusion::error::Result; use datafusion::physical_plan::RecordBatchStream; +use datafusion::config::FormatOptions; use futures::StreamExt; #[derive(Debug, Clone, PartialEq, Copy)] @@ -103,12 +104,19 @@ impl PrintOptions { batches: &[RecordBatch], query_start_time: Instant, row_count: usize, + format_options: &FormatOptions, ) -> Result<()> { let stdout = std::io::stdout(); let mut writer = stdout.lock(); - self.format - .print_batches(&mut writer, schema, batches, self.maxrows, true)?; + self.format.print_batches( + &mut writer, + schema, + batches, + self.maxrows, + true, + format_options, + )?; let formatted_exec_details = get_execution_details_formatted( row_count, @@ -132,6 +140,7 @@ impl PrintOptions { &self, mut stream: Pin>, query_start_time: Instant, + format_options: &FormatOptions, ) -> Result<()> { if self.format == PrintFormat::Table { return Err(DataFusionError::External( @@ -154,6 +163,7 @@ impl PrintOptions { &[batch], MaxRows::Unlimited, with_header, + format_options, )?; with_header = false; } diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index 9ac09955512b8..69344f4e5e60f 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -69,6 +69,10 @@ fn init() { // can choose the old explain format too ["--command", "EXPLAIN FORMAT indent SELECT 123"], )] +#[case::change_format_version( + "change_format_version", + ["--file", "tests/sql/types_format.sql", "-q"], +)] #[test] fn cli_quick_test<'a>( #[case] snapshot_name: &'a str, diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@change_format_version.snap b/datafusion-cli/tests/snapshots/cli_quick_test@change_format_version.snap new file mode 100644 index 0000000000000..74059b2a6103c --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_quick_test@change_format_version.snap @@ -0,0 +1,20 @@ +--- +source: datafusion-cli/tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--file" + - tests/sql/types_format.sql + - "-q" +--- +success: true +exit_code: 0 +----- stdout ----- ++-----------+ +| Int64(54) | +| Int64 | ++-----------+ +| 54 | ++-----------+ + +----- stderr ----- diff --git a/datafusion-cli/tests/sql/types_format.sql b/datafusion-cli/tests/sql/types_format.sql new file mode 100644 index 0000000000000..637929c980a15 --- /dev/null +++ b/datafusion-cli/tests/sql/types_format.sql @@ -0,0 +1,3 @@ +set datafusion.format.types_info to true; + +select 54 \ No newline at end of file diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 25f555cd8634f..e3f88994d9cad 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -17,17 +17,16 @@ //! Runtime configuration, via [`ConfigOptions`] +use crate::error::_config_err; +use crate::parsers::CompressionTypeVariant; +use crate::utils::get_available_parallelism; +use crate::{DataFusionError, Result}; use std::any::Any; use std::collections::{BTreeMap, HashMap}; use std::error::Error; use std::fmt::{self, Display}; use std::str::FromStr; -use crate::error::_config_err; -use crate::parsers::CompressionTypeVariant; -use crate::utils::get_available_parallelism; -use crate::{DataFusionError, Result}; - /// A macro that wraps a configuration struct and automatically derives /// [`Default`] and [`ConfigField`] for it, allowing it to be used /// in the [`ConfigOptions`] configuration tree. @@ -759,6 +758,59 @@ impl ExecutionOptions { } } +config_namespace! { + /// Options controlling the format of output when printing record batches + /// Copies [`arrow::util::display::FormatOptions`] + pub struct FormatOptions { + /// If set to `true` any formatting errors will be written to the output + /// instead of being converted into a [`std::fmt::Error`] + pub safe: bool, default = true + /// Format string for nulls + pub null: String, default = "".into() + /// Date format for date arrays + pub date_format: Option, default = Some("%Y-%m-%d".to_string()) + /// Format for DateTime arrays + pub datetime_format: Option, default = Some("%Y-%m-%dT%H:%M:%S%.f".to_string()) + /// Timestamp format for timestamp arrays + pub timestamp_format: Option, default = Some("%Y-%m-%dT%H:%M:%S%.f".to_string()) + /// Timestamp format for timestamp with timezone arrays. When `None`, ISO 8601 format is used. + pub timestamp_tz_format: Option, default = None + /// Time format for time arrays + pub time_format: Option, default = Some("%H:%M:%S%.f".to_string()) + /// Duration format. Can be either `"pretty"` or `"ISO8601"` + pub duration_format: String, transform = str::to_lowercase, default = "pretty".into() + /// Show types in visual representation batches + pub types_info: bool, default = false + } +} + +impl<'a> TryInto> for &'a FormatOptions { + type Error = DataFusionError; + fn try_into(self) -> Result> { + let duration_format = match self.duration_format.as_str() { + "pretty" => arrow::util::display::DurationFormat::Pretty, + "iso8601" => arrow::util::display::DurationFormat::ISO8601, + _ => { + return _config_err!( + "Invalid duration format: {}. Valid values are pretty or iso8601", + self.duration_format + ) + } + }; + + Ok(arrow::util::display::FormatOptions::new() + .with_display_error(self.safe) + .with_null(&self.null) + .with_date_format(self.date_format.as_deref()) + .with_datetime_format(self.datetime_format.as_deref()) + .with_timestamp_format(self.timestamp_format.as_deref()) + .with_timestamp_tz_format(self.timestamp_tz_format.as_deref()) + .with_time_format(self.time_format.as_deref()) + .with_duration_format(duration_format) + .with_types_info(self.types_info)) + } +} + /// A key value pair, with a corresponding description #[derive(Debug)] pub struct ConfigEntry { @@ -788,6 +840,8 @@ pub struct ConfigOptions { pub explain: ExplainOptions, /// Optional extensions registered using [`Extensions::insert`] pub extensions: Extensions, + /// Formatting options when printing batches + pub format: FormatOptions, } impl ConfigField for ConfigOptions { @@ -800,6 +854,7 @@ impl ConfigField for ConfigOptions { "optimizer" => self.optimizer.set(rem, value), "explain" => self.explain.set(rem, value), "sql_parser" => self.sql_parser.set(rem, value), + "format" => self.format.set(rem, value), _ => _config_err!("Config value \"{key}\" not found on ConfigOptions"), } } @@ -810,6 +865,7 @@ impl ConfigField for ConfigOptions { self.optimizer.visit(v, "datafusion.optimizer", ""); self.explain.visit(v, "datafusion.explain", ""); self.sql_parser.visit(v, "datafusion.sql_parser", ""); + self.format.visit(v, "datafusion.format", ""); } } @@ -2004,11 +2060,11 @@ config_namespace! { } } -pub trait FormatOptionsExt: Display {} +pub trait OutputFormatExt: Display {} #[derive(Debug, Clone, PartialEq)] #[allow(clippy::large_enum_variant)] -pub enum FormatOptions { +pub enum OutputFormat { CSV(CsvOptions), JSON(JsonOptions), #[cfg(feature = "parquet")] @@ -2017,15 +2073,15 @@ pub enum FormatOptions { ARROW, } -impl Display for FormatOptions { +impl Display for OutputFormat { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let out = match self { - FormatOptions::CSV(_) => "csv", - FormatOptions::JSON(_) => "json", + OutputFormat::CSV(_) => "csv", + OutputFormat::JSON(_) => "json", #[cfg(feature = "parquet")] - FormatOptions::PARQUET(_) => "parquet", - FormatOptions::AVRO => "avro", - FormatOptions::ARROW => "arrow", + OutputFormat::PARQUET(_) => "parquet", + OutputFormat::AVRO => "avro", + OutputFormat::ARROW => "arrow", }; write!(f, "{}", out) } diff --git a/datafusion/common/src/format.rs b/datafusion/common/src/format.rs index 23cfb72314a3c..a4ebd17539996 100644 --- a/datafusion/common/src/format.rs +++ b/datafusion/common/src/format.rs @@ -19,6 +19,7 @@ use arrow::compute::CastOptions; use arrow::util::display::{DurationFormat, FormatOptions}; /// The default [`FormatOptions`] to use within DataFusion +/// Also see [`crate::config::FormatOptions`] pub const DEFAULT_FORMAT_OPTIONS: FormatOptions<'static> = FormatOptions::new().with_duration_format(DurationFormat::Pretty); @@ -27,7 +28,3 @@ pub const DEFAULT_CAST_OPTIONS: CastOptions<'static> = CastOptions { safe: false, format_options: DEFAULT_FORMAT_OPTIONS, }; - -pub const DEFAULT_CLI_FORMAT_OPTIONS: FormatOptions<'static> = FormatOptions::new() - .with_duration_format(DurationFormat::Pretty) - .with_null("NULL"); diff --git a/datafusion/common/src/test_util.rs b/datafusion/common/src/test_util.rs index b801c452af2c9..ff6320d17a05e 100644 --- a/datafusion/common/src/test_util.rs +++ b/datafusion/common/src/test_util.rs @@ -18,10 +18,20 @@ //! Utility functions to make testing DataFusion based crates easier use crate::arrow::util::pretty::pretty_format_batches_with_options; -use crate::format::DEFAULT_FORMAT_OPTIONS; use arrow::array::RecordBatch; +use arrow::error::ArrowError; +use std::fmt::Display; use std::{error::Error, path::PathBuf}; +pub fn format_batches(results: &[RecordBatch]) -> Result { + let datafusion_format_options = crate::config::FormatOptions::default(); + + let arrow_format_options: arrow::util::display::FormatOptions = + (&datafusion_format_options).try_into().unwrap(); + + pretty_format_batches_with_options(results, &arrow_format_options) +} + /// Compares formatted output of a record batch with an expected /// vector of strings, with the result of pretty formatting record /// batches. This is a macro so errors appear on the correct line @@ -59,12 +69,9 @@ macro_rules! assert_batches_eq { let expected_lines: Vec = $EXPECTED_LINES.iter().map(|&s| s.into()).collect(); - let formatted = $crate::arrow::util::pretty::pretty_format_batches_with_options( - $CHUNKS, - &$crate::format::DEFAULT_FORMAT_OPTIONS, - ) - .unwrap() - .to_string(); + let formatted = $crate::test_util::format_batches($CHUNKS) + .unwrap() + .to_string(); let actual_lines: Vec<&str> = formatted.trim().lines().collect(); @@ -77,18 +84,13 @@ macro_rules! assert_batches_eq { } pub fn batches_to_string(batches: &[RecordBatch]) -> String { - let actual = pretty_format_batches_with_options(batches, &DEFAULT_FORMAT_OPTIONS) - .unwrap() - .to_string(); + let actual = format_batches(batches).unwrap().to_string(); actual.trim().to_string() } pub fn batches_to_sort_string(batches: &[RecordBatch]) -> String { - let actual_lines = - pretty_format_batches_with_options(batches, &DEFAULT_FORMAT_OPTIONS) - .unwrap() - .to_string(); + let actual_lines = format_batches(batches).unwrap().to_string(); let mut actual_lines: Vec<&str> = actual_lines.trim().lines().collect(); @@ -122,12 +124,9 @@ macro_rules! assert_batches_sorted_eq { expected_lines.as_mut_slice()[2..num_lines - 1].sort_unstable() } - let formatted = $crate::arrow::util::pretty::pretty_format_batches_with_options( - $CHUNKS, - &$crate::format::DEFAULT_FORMAT_OPTIONS, - ) - .unwrap() - .to_string(); + let formatted = $crate::test_util::format_batches($CHUNKS) + .unwrap() + .to_string(); // fix for windows: \r\n --> let mut actual_lines: Vec<&str> = formatted.trim().lines().collect(); diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index 9a70f8f43fb61..e385125692bd1 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -1366,8 +1366,47 @@ impl DataFrame { /// # } /// ``` pub async fn show(self) -> Result<()> { + println!("{}", self.to_string().await?); + Ok(()) + } + + /// Execute the `DataFrame` and return a string representation of the results. + /// + /// # Example + /// ``` + /// # use datafusion::prelude::*; + /// # use datafusion::error::Result; + /// # use datafusion::execution::SessionStateBuilder; + /// + /// # #[tokio::main] + /// # async fn main() -> Result<()> { + /// let cfg = SessionConfig::new() + /// .set_str("datafusion.format.null", "no-value"); + /// let session_state = SessionStateBuilder::new() + /// .with_config(cfg) + /// .with_default_features() + /// .build(); + /// let ctx = SessionContext::new_with_state(session_state); + /// let df = ctx.sql("select null as 'null-column'").await?; + /// let result = df.to_string().await?; + /// assert_eq!(result, + /// "+-------------+ + /// | null-column | + /// +-------------+ + /// | no-value | + /// +-------------+" + /// ); + /// # Ok(()) + /// # } + pub async fn to_string(self) -> Result { + let options = self.session_state.config().options().format.clone(); + let arrow_options: arrow::util::display::FormatOptions = (&options).try_into()?; + let results = self.collect().await?; - Ok(pretty::print_batches(&results)?) + Ok( + pretty::pretty_format_batches_with_options(&results, &arrow_options)? + .to_string(), + ) } /// Execute the `DataFrame` and print only the first `num` rows of the diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs index 85bbd421827ee..b3396bbd06d83 100644 --- a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs +++ b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs @@ -22,8 +22,8 @@ use arrow::array::{Array, AsArray}; use arrow::datatypes::Fields; use arrow::util::display::ArrayFormatter; use arrow::{array, array::ArrayRef, datatypes::DataType, record_batch::RecordBatch}; -use datafusion::common::format::DEFAULT_CLI_FORMAT_OPTIONS; use datafusion::common::DataFusionError; +use datafusion::config::ConfigField; use std::path::PathBuf; use std::sync::LazyLock; @@ -243,9 +243,17 @@ pub fn cell_to_string(col: &ArrayRef, row: usize, is_spark_path: bool) -> Result Ok(cell_to_string(dict.values(), key, is_spark_path)?) } _ => { - let f = - ArrayFormatter::try_new(col.as_ref(), &DEFAULT_CLI_FORMAT_OPTIONS); - Ok(f?.value(row).to_string()) + let mut datafusion_format_options = + datafusion::config::FormatOptions::default(); + + datafusion_format_options.set("null", "NULL").unwrap(); + + let arrow_format_options: arrow::util::display::FormatOptions = + (&datafusion_format_options).try_into().unwrap(); + + let f = ArrayFormatter::try_new(col.as_ref(), &arrow_format_options)?; + + Ok(f.value(row).to_string()) } } .map_err(DFSqlLogicTestError::Arrow) diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 6f6ef1270cf81..4f0a46c7251a5 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -273,6 +273,15 @@ datafusion.explain.physical_plan_only false datafusion.explain.show_schema false datafusion.explain.show_sizes true datafusion.explain.show_statistics false +datafusion.format.date_format %Y-%m-%d +datafusion.format.datetime_format %Y-%m-%dT%H:%M:%S%.f +datafusion.format.duration_format pretty +datafusion.format.null (empty) +datafusion.format.safe true +datafusion.format.time_format %H:%M:%S%.f +datafusion.format.timestamp_format %Y-%m-%dT%H:%M:%S%.f +datafusion.format.timestamp_tz_format NULL +datafusion.format.types_info false datafusion.optimizer.allow_symmetric_joins_without_pruning true datafusion.optimizer.default_filter_selectivity 20 datafusion.optimizer.enable_distinct_aggregation_soft_limit true @@ -374,6 +383,15 @@ datafusion.explain.physical_plan_only false When set to true, the explain statem datafusion.explain.show_schema false When set to true, the explain statement will print schema information datafusion.explain.show_sizes true When set to true, the explain statement will print the partition sizes datafusion.explain.show_statistics false When set to true, the explain statement will print operator statistics for physical plans +datafusion.format.date_format %Y-%m-%d Date format for date arrays +datafusion.format.datetime_format %Y-%m-%dT%H:%M:%S%.f Format for DateTime arrays +datafusion.format.duration_format pretty Duration format. Can be either `"pretty"` or `"ISO8601"` +datafusion.format.null (empty) Format string for nulls +datafusion.format.safe true If set to `true` any formatting errors will be written to the output instead of being converted into a [`std::fmt::Error`] +datafusion.format.time_format %H:%M:%S%.f Time format for time arrays +datafusion.format.timestamp_format %Y-%m-%dT%H:%M:%S%.f Timestamp format for timestamp arrays +datafusion.format.timestamp_tz_format NULL Timestamp format for timestamp with timezone arrays. When `None`, ISO 8601 format is used. +datafusion.format.types_info false Show types in visual representation batches datafusion.optimizer.allow_symmetric_joins_without_pruning true Should DataFusion allow symmetric hash joins for unbounded data sources even when its inputs do not have any ordering or filtering If the flag is not enabled, the SymmetricHashJoin operator will be unable to prune its internal buffers, resulting in certain join types - such as Full, Left, LeftAnti, LeftSemi, Right, RightAnti, and RightSemi - being produced only at the end of the execution. This is not typical in stream processing. Additionally, without proper design for long runner execution, all types of joins may encounter out-of-memory errors. datafusion.optimizer.default_filter_selectivity 20 The default filter selectivity used by Filter Statistics when an exact selectivity cannot be determined. Valid values are between 0 (no selectivity) and 100 (all rows are selected). datafusion.optimizer.enable_distinct_aggregation_soft_limit true When set to true, the optimizer will push a limit operation into grouped aggregations which have no aggregate expressions, as a soft limit, emitting groups once the limit is reached, before all rows in the group are read. diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 63f8c711bdb29..ea174943c1381 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -133,3 +133,12 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.sql_parser.map_varchar_to_utf8view | false | If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. Default is false. | | datafusion.sql_parser.collect_spans | false | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes. | | datafusion.sql_parser.recursion_limit | 50 | Specifies the recursion depth limit when parsing complex SQL Queries | +| datafusion.format.safe | true | If set to `true` any formatting errors will be written to the output instead of being converted into a [`std::fmt::Error`] | +| datafusion.format.null | | Format string for nulls | +| datafusion.format.date_format | %Y-%m-%d | Date format for date arrays | +| datafusion.format.datetime_format | %Y-%m-%dT%H:%M:%S%.f | Format for DateTime arrays | +| datafusion.format.timestamp_format | %Y-%m-%dT%H:%M:%S%.f | Timestamp format for timestamp arrays | +| datafusion.format.timestamp_tz_format | NULL | Timestamp format for timestamp with timezone arrays. When `None`, ISO 8601 format is used. | +| datafusion.format.time_format | %H:%M:%S%.f | Time format for time arrays | +| datafusion.format.duration_format | pretty | Duration format. Can be either `"pretty"` or `"ISO8601"` | +| datafusion.format.types_info | false | Show types in visual representation batches |