Skip to content

Commit

Permalink
Better large output display for CLI
Browse files Browse the repository at this point in the history
  • Loading branch information
2010YOUY01 committed Sep 21, 2023
1 parent bcdda39 commit b5e4e5c
Show file tree
Hide file tree
Showing 3 changed files with 204 additions and 22 deletions.
30 changes: 30 additions & 0 deletions datafusion-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,13 @@ struct Args {
help = "Specify the memory pool type 'greedy' or 'fair', default to 'greedy'"
)]
mem_pool_type: Option<PoolType>,

#[clap(
long,
help = "The max number of rows to display for 'Table' format\n[default: 40] [possible values: numbers(0/10/...), inf(no limit)]",
validator(is_valid_maxrows)
)]
maxrows: Option<String>,
}

#[tokio::main]
Expand Down Expand Up @@ -179,6 +186,10 @@ pub async fn main() -> Result<()> {
let mut print_options = PrintOptions {
format: args.format,
quiet: args.quiet,
maxrows: match args.maxrows {
Some(maxrows_str) => extract_maxrows(&maxrows_str).unwrap(),
None => Some(40), // set default value
},
};

let commands = args.command;
Expand Down Expand Up @@ -253,6 +264,25 @@ fn is_valid_memory_pool_size(size: &str) -> Result<(), String> {
}
}

fn is_valid_maxrows(maxrows: &str) -> Result<(), String> {
extract_maxrows(maxrows).map(|_| ())
}

// If returned Ok(None), then no limit on max rows to display
fn extract_maxrows(maxrows: &str) -> Result<Option<usize>, String> {
if maxrows.to_lowercase() == "inf"
|| maxrows.to_lowercase() == "infinite"
|| maxrows.to_lowercase() == "none"
{
Ok(None)
} else {
match maxrows.parse::<usize>() {
Ok(nrows) => Ok(Some(nrows)),
_ => Err(format!("Invalid maxrows {}. Valid inputs are natural numbers or \'inf\' for no limit.", maxrows)),
}
}
}

#[derive(Debug, Clone, Copy)]
enum ByteUnit {
Byte,
Expand Down
142 changes: 137 additions & 5 deletions datafusion-cli/src/print_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,17 +70,87 @@ fn print_batches_with_sep(batches: &[RecordBatch], delimiter: u8) -> Result<Stri
Ok(formatted)
}

fn keep_only_maxrows(s: &str, maxrows: usize) -> String {
let lines: Vec<String> = s.lines().map(String::from).collect();

assert!(lines.len() >= maxrows + 4); // 4 lines for top and bottom border

let last_line = &lines[lines.len() - 1]; // bottom border line

let spaces = last_line.len().saturating_sub(4);
let dotted_line = format!("| .{:<spaces$}|", "", spaces = spaces);

let mut result = lines[0..(maxrows + 3)].to_vec(); // Keep top border and `maxrows` lines
result.extend(vec![dotted_line; 3]); // Append ... lines
result.push(last_line.clone());

result.join("\n")
}

fn format_batches_with_maxrows(
batches: &[RecordBatch],
maxrows_opt: Option<usize>,
) -> Result<String> {
if let Some(maxrows) = maxrows_opt {
// Only format enough batches for maxrows
let mut filtered_batches = Vec::new();
let mut batches = batches;
let row_count: usize = batches.iter().map(|b| b.num_rows()).sum();
if row_count > maxrows {
let mut accumulated_rows = 0;

for batch in batches {
filtered_batches.push(batch.clone());
if accumulated_rows + batch.num_rows() > maxrows {
break;
}
accumulated_rows += batch.num_rows();
}

batches = &filtered_batches;
}

let mut formatted = format!(
"{}",
pretty_format_batches_with_options(batches, &DEFAULT_FORMAT_OPTIONS)?,
);

if row_count > maxrows {
formatted = keep_only_maxrows(&formatted, maxrows);
}

Ok(formatted)
} else {
// maxrows not specified, print all rows
Ok(format!(
"{}",
pretty_format_batches_with_options(batches, &DEFAULT_FORMAT_OPTIONS)?,
))
}
}

impl PrintFormat {
/// print the batches to stdout using the specified format
pub fn print_batches(&self, batches: &[RecordBatch]) -> Result<()> {
/// `maxrows` option is only used for `Table` format:
/// If `maxrows` is Some(n), then at most n rows will be displayed
/// If `maxrows` is None, then everey row will be displayed
pub fn print_batches(
&self,
batches: &[RecordBatch],
maxrows: Option<usize>,
) -> Result<()> {
if batches.is_empty() {
return Ok(());
}

match self {
Self::Csv => println!("{}", print_batches_with_sep(batches, b',')?),
Self::Tsv => println!("{}", print_batches_with_sep(batches, b'\t')?),
Self::Table => {
println!(
"{}",
pretty_format_batches_with_options(batches, &DEFAULT_FORMAT_OPTIONS)?
)
if maxrows == Some(0) {
return Ok(());
}
println!("{}", format_batches_with_maxrows(batches, maxrows)?,)
}
Self::Json => println!("{}", batches_to_json!(ArrayWriter, batches)),
Self::NdJson => {
Expand Down Expand Up @@ -157,4 +227,66 @@ mod tests {
assert_eq!("{\"a\":1,\"b\":4,\"c\":7}\n{\"a\":2,\"b\":5,\"c\":8}\n{\"a\":3,\"b\":6,\"c\":9}\n", r);
Ok(())
}

#[test]
fn test_format_batches_with_maxrows() -> Result<()> {
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));

let batch =
RecordBatch::try_new(schema, vec![Arc::new(Int32Array::from(vec![1, 2, 3]))])
.unwrap();

#[rustfmt::skip]
let all_rows_expected = [
"+---+",
"| a |",
"+---+",
"| 1 |",
"| 2 |",
"| 3 |",
"+---+",
].join("\n");

#[rustfmt::skip]
let one_row_expected = [
"+---+",
"| a |",
"+---+",
"| 1 |",
"| . |",
"| . |",
"| . |",
"+---+",
].join("\n");

#[rustfmt::skip]
let multi_batches_expected = [
"+---+",
"| a |",
"+---+",
"| 1 |",
"| 2 |",
"| 3 |",
"| 1 |",
"| 2 |",
"| . |",
"| . |",
"| . |",
"+---+",
].join("\n");

let no_limit = format_batches_with_maxrows(&vec![batch.clone()], None)?;
assert_eq!(all_rows_expected, no_limit);

let maxrows_less_than_actual = format_batches_with_maxrows(&vec![batch.clone()], Some(1))?;
assert_eq!(one_row_expected, maxrows_less_than_actual);
let maxrows_more_than_actual = format_batches_with_maxrows(&vec![batch.clone()], Some(5))?;
assert_eq!(all_rows_expected, maxrows_more_than_actual);
let maxrows_equals_actual = format_batches_with_maxrows(&vec![batch.clone()], Some(3))?;
assert_eq!(all_rows_expected, maxrows_equals_actual);
let multi_batches = format_batches_with_maxrows(&vec![batch.clone(), batch.clone(), batch.clone()], Some(5))?;
assert_eq!(multi_batches_expected, multi_batches);

Ok(())
}
}
54 changes: 37 additions & 17 deletions datafusion-cli/src/print_options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,31 +24,51 @@ use std::time::Instant;
pub struct PrintOptions {
pub format: PrintFormat,
pub quiet: bool,
pub maxrows: Option<usize>,
}

fn print_timing_info(row_count: usize, now: Instant) {
println!(
"{} {} in set. Query took {:.3} seconds.\n",
fn get_timing_info_str(
row_count: usize,
maxrows_opt: Option<usize>,
query_start_time: Instant,
) -> String {
let row_word = if row_count == 1 { "row" } else { "rows" };
let maxrows_shown_msg = maxrows_opt
.map(|maxrows| {
if maxrows < row_count {
format!(" ({} shown)", maxrows)
} else {
String::new()
}
})
.unwrap_or_default();

format!(
"{} {} in set{}. Query took {:.3} seconds.\n",
row_count,
if row_count == 1 { "row" } else { "rows" },
now.elapsed().as_secs_f64()
);
row_word,
maxrows_shown_msg,
query_start_time.elapsed().as_secs_f64()
)
}

impl PrintOptions {
/// print the batches to stdout using the specified format
pub fn print_batches(&self, batches: &[RecordBatch], now: Instant) -> Result<()> {
if batches.is_empty() {
if !self.quiet {
print_timing_info(0, now);
}
} else {
self.format.print_batches(batches)?;
if !self.quiet {
let row_count: usize = batches.iter().map(|b| b.num_rows()).sum();
print_timing_info(row_count, now);
}
pub fn print_batches(
&self,
batches: &[RecordBatch],
query_start_time: Instant,
) -> Result<()> {
let row_count: usize = batches.iter().map(|b| b.num_rows()).sum();
// Elapsed time should not count time for printing batches
let timing_info = get_timing_info_str(row_count, self.maxrows, query_start_time);

self.format.print_batches(batches, self.maxrows)?;

if !self.quiet {
println!("{timing_info}");
}

Ok(())
}
}

0 comments on commit b5e4e5c

Please sign in to comment.