diff --git a/ci/eval/compare/cmp-stats.py b/ci/eval/compare/cmp-stats.py deleted file mode 100644 index ffc9026ca72e7..0000000000000 --- a/ci/eval/compare/cmp-stats.py +++ /dev/null @@ -1,141 +0,0 @@ -import json -import os -from scipy.stats import ttest_rel -import pandas as pd -import numpy as np -from pathlib import Path - -# Define metrics of interest (can be expanded as needed) -METRIC_PREFIXES = ("nr", "gc") - -def flatten_data(json_data: dict) -> dict: - """ - Extracts and flattens metrics from JSON data. - This is needed because the JSON data can be nested. - For example, the JSON data entry might look like this: - - "gc":{"cycles":13,"heapSize":5404549120,"totalBytes":9545876464} - - Flattened: - - "gc.cycles": 13 - "gc.heapSize": 5404549120 - ... - - Args: - json_data (dict): JSON data containing metrics. - Returns: - dict: Flattened metrics with keys as metric names. - """ - flat_metrics = {} - for k, v in json_data.items(): - if isinstance(v, (int, float)): - flat_metrics[k] = v - elif isinstance(v, dict): - for sub_k, sub_v in v.items(): - flat_metrics[f"{k}.{sub_k}"] = sub_v - return flat_metrics - - - - -def load_all_metrics(directory: Path) -> dict: - """ - Loads all stats JSON files in the specified directory and extracts metrics. - - Args: - directory (Path): Directory containing JSON files. - Returns: - dict: Dictionary with filenames as keys and extracted metrics as values. - """ - metrics = {} - for system_dir in directory.iterdir(): - assert system_dir.is_dir() - - for chunk_output in system_dir.iterdir(): - with chunk_output.open() as f: - data = json.load(f) - metrics[f"{system_dir.name}/${chunk_output.name}"] = flatten_data(data) - - return metrics - -def dataframe_to_markdown(df: pd.DataFrame) -> str: - markdown_lines = [] - - # Header (get column names and format them) - header = '\n| ' + ' | '.join(df.columns) + ' |' - markdown_lines.append(header) - markdown_lines.append("| - " * (len(df.columns)) + "|") # Separator line - - # Iterate over rows to build Markdown rows - for _, row in df.iterrows(): - # TODO: define threshold for highlighting - highlight = False - - fmt = lambda x: f"**{x}**" if highlight else f"{x}" - - # Check for no change and NaN in p_value/t_stat - row_values = [] - for val in row: - if isinstance(val, float) and np.isnan(val): # For NaN values in p-value or t-stat - row_values.append("-") # Custom symbol for NaN - elif isinstance(val, float) and val == 0: # For no change (mean_diff == 0) - row_values.append("-") # Custom symbol for no change - else: - row_values.append(fmt(f"{val:.4f}" if isinstance(val, float) else str(val))) - - markdown_lines.append('| ' + ' | '.join(row_values) + ' |') - - return '\n'.join(markdown_lines) - - -def perform_pairwise_tests(before_metrics: dict, after_metrics: dict) -> pd.DataFrame: - common_files = sorted(set(before_metrics) & set(after_metrics)) - all_keys = sorted({ metric_keys for file_metrics in before_metrics.values() for metric_keys in file_metrics.keys() }) - - results = [] - - for key in all_keys: - before_vals, after_vals = [], [] - - for fname in common_files: - if key in before_metrics[fname] and key in after_metrics[fname]: - before_vals.append(before_metrics[fname][key]) - after_vals.append(after_metrics[fname][key]) - - if len(before_vals) >= 2: - before_arr = np.array(before_vals) - after_arr = np.array(after_vals) - - diff = after_arr - before_arr - pct_change = 100 * diff / before_arr - t_stat, p_val = ttest_rel(after_arr, before_arr) - - results.append({ - "metric": key, - "mean_before": np.mean(before_arr), - "mean_after": np.mean(after_arr), - "mean_diff": np.mean(diff), - "mean_%_change": np.mean(pct_change), - "p_value": p_val, - "t_stat": t_stat - }) - - df = pd.DataFrame(results).sort_values("p_value") - return df - - -if __name__ == "__main__": - before_dir = os.environ.get("BEFORE_DIR") - after_dir = os.environ.get("AFTER_DIR") - - if not before_dir or not after_dir: - print("Error: Environment variables 'BEFORE_DIR' and 'AFTER_DIR' must be set.") - exit(1) - - before_metrics = load_all_metrics(Path(before_dir) / "stats") - after_metrics = load_all_metrics(Path(after_dir) / "stats") - - df1 = perform_pairwise_tests(before_metrics, after_metrics) - markdown_table = dataframe_to_markdown(df1) - print(markdown_table) diff --git a/ci/eval/compare/default.nix b/ci/eval/compare/default.nix index 7b677c6d01f3c..04676476f4b2a 100644 --- a/ci/eval/compare/default.nix +++ b/ci/eval/compare/default.nix @@ -3,7 +3,6 @@ jq, runCommand, writeText, - python3, ... }: { @@ -126,59 +125,18 @@ let in runCommand "compare" { - nativeBuildInputs = [ - jq - (python3.withPackages ( - ps: with ps; [ - numpy - pandas - scipy - ] - )) - - ]; + nativeBuildInputs = [ jq ]; maintainers = builtins.toJSON maintainers; passAsFile = [ "maintainers" ]; - env = { - BEFORE_DIR = "${beforeResultDir}"; - AFTER_DIR = "${afterResultDir}"; - }; } '' mkdir $out cp ${changed-paths} $out/changed-paths.json - - if jq -e '(.attrdiff.added | length == 0) and (.attrdiff.removed | length == 0)' "${changed-paths}" > /dev/null; then - # Chunks have changed between revisions - # We cannot generate a performance comparison - { - echo - echo "# Performance comparison" - echo - echo "This compares the performance of this branch against its pull request base branch (e.g., 'master')" - echo - echo "For further help please refer to: [ci/README.md](https://github.com/NixOS/nixpkgs/blob/master/ci/README.md)" - echo - } >> $out/step-summary.md - - python3 ${./cmp-stats.py} >> $out/step-summary.md - - else - # Package chunks are the same in both revisions - # We can use the to generate a performance comparison - { - echo - echo "# Performance Comparison" - echo - echo "Performance stats were skipped because the package sets differ between the two revisions." - echo - echo "For further help please refer to: [ci/README.md](https://github.com/NixOS/nixpkgs/blob/master/ci/README.md)" - } >> $out/step-summary.md - fi - - jq -r -f ${./generate-step-summary.jq} < ${changed-paths} >> $out/step-summary.md + jq -r -f ${./generate-step-summary.jq} < ${changed-paths} > $out/step-summary.md cp "$maintainersPath" "$out/maintainers.json" + + # TODO: Compare eval stats '' diff --git a/ci/eval/default.nix b/ci/eval/default.nix index 639e75ec4211f..8537084b1bd5e 100644 --- a/ci/eval/default.nix +++ b/ci/eval/default.nix @@ -9,7 +9,6 @@ nixVersions, jq, sta, - python3, }: let @@ -271,7 +270,6 @@ let runCommand writeText supportedSystems - python3 ; };