New command: latte list for tabular listing of run results

This option is intended for quick comparisons of multiple benchmarks or for finding the right benchmark. The command accepts basic filtering by workload or tags. Additionally, the speed of loading reports has been significantly improved. This feature will be enhanced in the future.
pkolaczk · Jul 5, 2024 · 3158474 · 3158474
1 parent 84988ee
commit 3158474
Show file tree

Hide file tree

Showing 6 changed files with 389 additions and 30 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -54,6 +54,7 @@ tracing = "0.1"
 tracing-subscriber = "0.3"
 try-lock = "0.2.3"
 uuid = { version = "1.1", features = ["v4"] }
+walkdir = "2"
 
 [dev-dependencies]
 tokio = { version = "1", features = ["rt", "test-util", "macros"] }

diff --git a/src/config.rs b/src/config.rs
@@ -465,6 +465,29 @@ impl RunCommand {
     }
 }
 
+#[derive(Parser, Debug)]
+pub struct ListCommand {
+    /// Lists only the runs of specified workload.
+    #[clap()]
+    pub workload: Option<String>,
+
+    /// Lists only the runs of given function.
+    #[clap(long, short('f'))]
+    pub function: Option<String>,
+
+    /// Lists only the runs with specified tags.
+    #[clap(long("tag"), number_of_values = 1)]
+    pub tags: Vec<String>,
+
+    /// Path to JSON reports directory where the JSON reports were written to.
+    #[clap(long, short('o'), long, default_value = ".", number_of_values = 1)]
+    pub output: Vec<PathBuf>,
+
+    /// Descends into subdirectories recursively.
+    #[clap(short('r'), long)]
+    pub recursive: bool,
+}
+
 #[derive(Parser, Debug)]
 pub struct ShowCommand {
     /// Path to the JSON report file
@@ -545,6 +568,10 @@ pub enum Command {
     /// Additionally dumps all data into a JSON report file.
     Run(RunCommand),
 
+    /// Lists benchmark reports saved in the current or specified directory
+    /// with summaries of their results.
+    List(ListCommand),
+
     /// Displays the report(s) of previously executed benchmark(s).
     ///
     /// Can compare two runs.

diff --git a/src/main.rs b/src/main.rs
@@ -1,23 +1,27 @@
 use std::env;
+use std::ffi::OsStr;
 use std::fs::File;
 use std::io::{stdout, Write};
 use std::path::{Path, PathBuf};
 use std::process::exit;
 use std::time::Duration;
 
 use clap::Parser;
+use config::RunCommand;
+use futures::stream::FuturesUnordered;
+use futures::StreamExt;
 use hdrhistogram::serialization::interval_log::Tag;
 use hdrhistogram::serialization::{interval_log, V2DeflateSerializer};
 use itertools::Itertools;
 use rune::Source;
 use search_path::SearchPath;
 use tokio::runtime::{Builder, Runtime};
-
-use config::RunCommand;
+use tokio::task::spawn_blocking;
+use walkdir::WalkDir;
 
 use crate::config::{
-    AppConfig, Command, ConnectionConf, EditCommand, HdrCommand, Interval, LoadCommand,
-    SchemaCommand, ShowCommand,
+    AppConfig, Command, ConnectionConf, EditCommand, HdrCommand, Interval, ListCommand,
+    LoadCommand, SchemaCommand, ShowCommand,
 };
 use crate::context::*;
 use crate::context::{CassError, CassErrorKind, Context, SessionStats};
@@ -26,9 +30,10 @@ use crate::error::{LatteError, Result};
 use crate::exec::{par_execute, ExecutionOptions};
 use crate::plot::plot_graph;
 use crate::progress::Progress;
-use crate::report::{Report, RunConfigCmp};
+use crate::report::{PathAndSummary, Report, RunConfigCmp};
 use crate::sampler::Sampler;
 use crate::stats::{BenchmarkCmp, BenchmarkStats, Recorder};
+use crate::table::{Alignment, Table};
 use crate::workload::{FnRef, Program, Workload, WorkloadStats, LOAD_FN};
 
 mod config;
@@ -42,6 +47,7 @@ mod progress;
 mod report;
 mod sampler;
 mod stats;
+mod table;
 mod workload;
 
 const VERSION: &str = env!("CARGO_PKG_VERSION");
@@ -301,6 +307,74 @@ async fn run(conf: RunCommand) -> Result<()> {
     Ok(())
 }
 
+async fn list(conf: ListCommand) -> Result<()> {
+    let max_depth = if conf.recursive { usize::MAX } else { 1 };
+
+    // Loading reports is a bit slow, so we do it in parallel:
+    let mut report_futures = FuturesUnordered::new();
+    for path in &conf.output {
+        let walk = WalkDir::new(path).max_depth(max_depth);
+        for entry in walk.into_iter().flatten() {
+            if !entry.file_type().is_file() {
+                continue;
+            }
+            if entry.path().extension() != Some(OsStr::new("json")) {
+                continue;
+            }
+
+            let path = entry.path().to_path_buf();
+            report_futures.push(spawn_blocking(move || (path.clone(), Report::load(&path))));
+        }
+    }
+
+    let mut reports = Vec::new();
+    while let Some(report) = report_futures.next().await {
+        match report.unwrap() {
+            (path, Ok(report)) if should_list(&report, &conf) => {
+                reports.push(PathAndSummary(path, report.summary()))
+            }
+            (path, Err(e)) => eprintln!("Failed to load report {}: {}", path.display(), e),
+            _ => {}
+        };
+    }
+
+    if !reports.is_empty() {
+        reports
+            .sort_unstable_by_key(|s| (s.1.workload.clone(), s.1.function.clone(), s.1.timestamp));
+        let mut table = Table::new(PathAndSummary::COLUMNS);
+        table.align(7, Alignment::Right);
+        table.align(8, Alignment::Right);
+        table.align(9, Alignment::Right);
+        for r in reports {
+            table.push(r);
+        }
+        println!("{}", table);
+    }
+    Ok(())
+}
+
+fn should_list(report: &Report, conf: &ListCommand) -> bool {
+    if let Some(workload_pattern) = &conf.workload {
+        if !report
+            .conf
+            .workload
+            .to_string_lossy()
+            .contains(workload_pattern)
+        {
+            return false;
+        }
+    }
+    if let Some(function) = &conf.function {
+        if report.conf.function != *function {
+            return false;
+        }
+    }
+    if !conf.tags.is_empty() && !conf.tags.iter().any(|t| report.conf.tags.contains(t)) {
+        return false;
+    }
+    true
+}
+
 async fn show(conf: ShowCommand) -> Result<()> {
     let report1 = load_report_or_abort(&conf.report);
     let report2 = conf.baseline.map(|p| load_report_or_abort(&p));
@@ -376,6 +450,7 @@ async fn async_main(command: Command) -> Result<()> {
         Command::Schema(config) => schema(config).await?,
         Command::Load(config) => load(config).await?,
         Command::Run(config) => run(config).await?,
+        Command::List(config) => list(config).await?,
         Command::Show(config) => show(config).await?,
         Command::Hdr(config) => export_hdr_log(config).await?,
         Command::Plot(config) => plot_graph(config).await?,

diff --git a/src/report.rs b/src/report.rs
@@ -1,23 +1,23 @@
-use core::fmt;
-use std::collections::BTreeSet;
-use std::fmt::{Display, Formatter};
-use std::num::NonZeroUsize;
-use std::path::Path;
-use std::{fs, io};
-
-use chrono::{Local, TimeZone};
+use crate::config::{RunCommand, PRINT_RETRY_ERROR_LIMIT};
+use crate::stats::{
+    BenchmarkCmp, BenchmarkStats, Bucket, Mean, Percentile, Sample, Significance, TimeDistribution,
+};
+use crate::table::Row;
+use chrono::{DateTime, Local, TimeZone};
 use console::{pad_str, style, Alignment};
+use core::fmt;
 use err_derive::*;
 use itertools::Itertools;
 use serde::{Deserialize, Serialize};
 use statrs::statistics::Statistics;
+use std::collections::BTreeSet;
+use std::fmt::{Display, Formatter};
+use std::io::{BufReader, BufWriter};
+use std::num::NonZeroUsize;
+use std::path::{Path, PathBuf};
+use std::{fs, io};
 use strum::IntoEnumIterator;
 
-use crate::config::{RunCommand, PRINT_RETRY_ERROR_LIMIT};
-use crate::stats::{
-    BenchmarkCmp, BenchmarkStats, Bucket, Mean, Percentile, Sample, Significance, TimeDistribution,
-};
-
 /// A standard error is multiplied by this factor to get the error margin.
 /// For a normally distributed random variable,
 /// this should give us 0.999 confidence the expected value is within the (result +- error) range.
@@ -53,16 +53,47 @@ impl Report {
     /// Loads benchmark results from a JSON file
     pub fn load(path: &Path) -> Result<Report, ReportLoadError> {
         let file = fs::File::open(path)?;
-        let report = serde_json::from_reader(file)?;
+        let reader = BufReader::new(file);
+        let report = serde_json::from_reader(reader)?;
         Ok(report)
     }
 
     /// Saves benchmark results to a JSON file
     pub fn save(&self, path: &Path) -> io::Result<()> {
         let f = fs::File::create(path)?;
-        serde_json::to_writer_pretty(f, &self)?;
+        let writer = BufWriter::new(f);
+        serde_json::to_writer_pretty(writer, &self)?;
         Ok(())
     }
+
+    pub fn summary(&self) -> Summary {
+        Summary {
+            workload: self.conf.workload.clone(),
+            function: self.conf.function.clone(),
+            timestamp: self
+                .conf
+                .timestamp
+                .and_then(|ts| Local.timestamp_opt(ts, 0).latest()),
+            tags: self.conf.tags.clone(),
+            params: self.conf.params.clone(),
+            rate: self.conf.rate,
+            throughput: self.result.cycle_throughput.value,
+            latency_p50: self
+                .result
+                .cycle_time_ms
+                .percentiles
+                .get(Percentile::P50 as usize)
+                .unwrap()
+                .value,
+            latency_p99: self
+                .result
+                .cycle_time_ms
+                .percentiles
+                .get(Percentile::P99 as usize)
+                .unwrap()
+                .value,
+        }
+    }
 }
 
 /// A displayable, optional value with an optional error.
@@ -436,14 +467,7 @@ impl RunConfigCmp<'_> {
     }
 
     fn format_time(&self, conf: &RunCommand, format: &str) -> String {
-        conf.timestamp
-            .and_then(|ts| {
-                Local
-                    .timestamp_opt(ts, 0)
-                    .latest()
-                    .map(|l| l.format(format).to_string())
-            })
-            .unwrap_or_default()
+        format_time(conf.timestamp, format)
     }
 
     /// Returns the set union of custom user parameters in both configurations.
@@ -787,3 +811,79 @@ impl<'a> Display for BenchmarkCmp<'a> {
         Ok(())
     }
 }
+
+#[derive(Debug)]
+pub struct PathAndSummary(pub PathBuf, pub Summary);
+
+#[derive(Debug)]
+pub struct Summary {
+    pub workload: PathBuf,
+    pub function: String,
+    pub timestamp: Option<DateTime<Local>>,
+    pub tags: Vec<String>,
+    pub params: Vec<(String, String)>,
+    pub rate: Option<f64>,
+    pub throughput: f64,
+    pub latency_p50: f64,
+    pub latency_p99: f64,
+}
+
+impl PathAndSummary {
+    pub const COLUMNS: &'static [&'static str] = &[
+        "File",
+        "Workload",
+        "Function",
+        "Timestamp",
+        "Tags",
+        "Params",
+        "Rate",
+        "Thrpt. [req/s]",
+        "P50 [ms]",
+        "P99 [ms]",
+    ];
+}
+
+impl Row for PathAndSummary {
+    fn cell_value(&self, column: &str) -> Option<String> {
+        match column {
+            "File" => Some(self.0.display().to_string()),
+            "Workload" => Some(
+                self.1
+                    .workload
+                    .file_name()
+                    .unwrap_or_default()
+                    .to_string_lossy()
+                    .to_string(),
+            ),
+            "Function" => Some(self.1.function.clone()),
+            "Timestamp" => self
+                .1
+                .timestamp
+                .map(|ts| ts.format("%Y-%m-%d %H:%M:%S").to_string()),
+            "Tags" => Some(self.1.tags.join(", ")),
+            "Params" => Some(
+                self.1
+                    .params
+                    .iter()
+                    .map(|(k, v)| format!("{k} = {v}"))
+                    .join(", "),
+            ),
+            "Rate" => self.1.rate.map(|r| r.to_string()),
+            "Thrpt. [req/s]" => Some(format!("{:.0}", self.1.throughput)),
+            "P50 [ms]" => Some(format!("{:.1}", self.1.latency_p50 * 1000.0)),
+            "P99 [ms]" => Some(format!("{:.1}", self.1.latency_p99 * 1000.0)),
+            _ => None,
+        }
+    }
+}
+
+fn format_time(timestamp: Option<i64>, format: &str) -> String {
+    timestamp
+        .and_then(|ts| {
+            Local
+                .timestamp_opt(ts, 0)
+                .latest()
+                .map(|l| l.format(format).to_string())
+        })
+        .unwrap_or_default()
+}