From 7795593c48a11e839ee749754550242aaa88a929 Mon Sep 17 00:00:00 2001 From: leo-blc Date: Fri, 15 Nov 2024 17:24:19 +0100 Subject: [PATCH] adding archive_plot file --- .../archive-update/archive_plot.py | 160 ++++++++++++++++++ .../python/archive_load_data.py | 2 +- 2 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 code-preprocessing/archive-update/archive_plot.py diff --git a/code-preprocessing/archive-update/archive_plot.py b/code-preprocessing/archive-update/archive_plot.py new file mode 100644 index 000000000..ae4063eb6 --- /dev/null +++ b/code-preprocessing/archive-update/archive_plot.py @@ -0,0 +1,160 @@ +import numpy as np +import matplotlib.pyplot as plt +import os +from collections import defaultdict +from archive_load_data import get_file_name_list, create_path, parse_archive_file_name #might need to be changed to 'from cocoprep.archive_load_data', but the cocoprep module could not be located so we adapted it to this. + +def read_adat_file(file_path): + """ + Reads a .adat file and returns the function evaluations and non-dominated points. + + Parameters: + file_path (str): Path to the .adat file. + + Returns: + tuple: (function_evals, archive_sizes), where: + - function_evals (list): Cumulative function evaluations at each logging point. + - archive_sizes (list): Archive size (number of non-dominated points) at each logging point. + """ + function_evals = [] + archive_sizes = [] + archive_size = 0 # Track the number of non-dominated points + + with open(file_path, 'r') as file: + for line in file: + if not line.startswith('%'): + data = line.strip().split() + eval_count = int(data[0]) # Function evaluations + archive_size += 1 # Increment for each line, as each line represents a new non-dominated point + function_evals.append(eval_count) + archive_sizes.append(archive_size) + + return function_evals, archive_sizes + +def plot_archive_size_by_dimension(output_dir="plots"): + """ + Generates separate plots of archive size over time for each dimension, + combining all functions and instances for that dimension. + + Parameters: + output_dir (str): Directory to save the plots. + """ + create_path(output_dir) + + # Collect all .adat files in the current directory + file_paths = get_file_name_list(".", ending=".adat") + + # Data storage by dimension + data_by_dimension = defaultdict(lambda: defaultdict(list)) + + # Read data from each file + for file_path in file_paths: + try: + suite_name, function, instance, dimension = parse_archive_file_name(file_path) + except Exception as e: + print(f"Skipping file {file_path}: {e}") + continue + function_evals, archive_sizes = read_adat_file(file_path) + + # Store data by dimension and function + data_by_dimension[dimension][function].append((function_evals, archive_sizes)) + + # Generate a plot for each dimension + for dimension, functions in data_by_dimension.items(): + plt.figure(figsize=(12, 8)) + plt.title(f"Archive Size vs Function Evaluations for Dimension {dimension}") + plt.xlabel("Function Evaluations") + plt.ylabel("Archive Size") + + colors = plt.cm.tab20(np.linspace(0, 1, len(functions))) + + for i, (function_name, instances) in enumerate(functions.items()): + color = colors[i] + for evals, sizes in instances: + plt.plot(evals, sizes, color=color, linewidth=0.7, alpha=0.5) + + plt.plot([], [], color=color, label=f"{function_name} instances") + + plt.legend(title="Function", loc="center left", bbox_to_anchor=(1.0, 0.5), fontsize="small", framealpha=0.5) + plt.grid(True, which="both", linestyle="--", linewidth=0.5) + + plot_filename = f"archive_size_{dimension}.png" + plt.savefig(os.path.join(output_dir, plot_filename), bbox_inches="tight") + plt.close() + print(f"Plot for dimension {dimension} saved as {plot_filename}") + +def plot_average_archive_size_by_dimension(output_dir="plots"): + """ + Generates separate plots of average archive size over time for each dimension, + combining all functions and averaging over instances for that function. + + Parameters: + output_dir (str): Directory to save the plots. + """ + create_path(output_dir) + + # Collect all .adat files in the current directory + file_paths = get_file_name_list(".", ending=".adat") + + # Data storage by dimension + data_by_dimension = defaultdict(lambda: defaultdict(list)) + + # Read data from each file + for file_path in file_paths: + try: + suite_name, function, instance, dimension = parse_archive_file_name(file_path) + except Exception as e: + print(f"Skipping file {file_path}: {e}") + continue + function_evals, archive_sizes = read_adat_file(file_path) + + # Store data by dimension and function + data_by_dimension[dimension][function].append((function_evals, archive_sizes)) + + # Generate a plot for each dimension + for dimension, functions in data_by_dimension.items(): + plt.figure(figsize=(12, 8)) + plt.title(f"Average Archive Size vs Function Evaluations for Dimension {dimension}") + plt.xlabel("Function Evaluations") + plt.ylabel("Average Archive Size") + + colors = plt.cm.tab20(np.linspace(0, 1, len(functions))) + + for i, (function_name, instances) in enumerate(functions.items()): + color = colors[i] + + max_eval_count = max(max(evals) for evals, _ in instances) + archive_sizes_sum = np.zeros(max_eval_count + 1) + count_per_eval = np.zeros(max_eval_count + 1) + + for evals, sizes in instances: + current_archive_size = 0 + eval_index = 0 + + for eval_count in range(1, max_eval_count + 1): + if eval_index < len(evals) and evals[eval_index] == eval_count: + current_archive_size = sizes[eval_index] + eval_index += 1 + + archive_sizes_sum[eval_count] += current_archive_size + count_per_eval[eval_count] += 1 + + with np.errstate(divide='ignore', invalid='ignore'): + average_archive_size = np.divide(archive_sizes_sum, count_per_eval, where=count_per_eval > 0) + average_archive_size = np.nan_to_num(average_archive_size) + + eval_points = np.arange(1, len(average_archive_size)) + + plt.plot(eval_points, average_archive_size[1:], color=color, label=function_name) + + plt.legend(title="Function", loc="center left", bbox_to_anchor=(1.0, 0.5), fontsize="small", framealpha=0.5) + plt.grid(True, which="both", linestyle="--", linewidth=0.5) + + plot_filename = f"average_archive_size_{dimension}.png" + plt.savefig(os.path.join(output_dir, plot_filename), bbox_inches="tight") + plt.close() + print(f"Average plot for dimension {dimension} saved as {plot_filename}") + +if __name__ == "__main__": + plot_archive_size_by_dimension() + plot_average_archive_size_by_dimension() diff --git a/code-preprocessing/archive-update/python/archive_load_data.py b/code-preprocessing/archive-update/python/archive_load_data.py index 537745cab..c9077c15b 100755 --- a/code-preprocessing/archive-update/python/archive_load_data.py +++ b/code-preprocessing/archive-update/python/archive_load_data.py @@ -11,7 +11,7 @@ from itertools import groupby from operator import itemgetter -from .archive_exceptions import PreprocessingWarning, PreprocessingException +from archive_exceptions import PreprocessingWarning, PreprocessingException def get_file_name_list(paths, ending=None):