diff --git a/.vscode/settings.json b/.vscode/settings.json index 0af75e918..cda3677b1 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -285,6 +285,7 @@ "statsmodels", "STFT", "subintervals", + "supremum", "suptitle", "supxlabel", "supylabel", diff --git a/test_mrs.ipynb b/docs/notebooks/test_mrs.ipynb similarity index 62% rename from test_mrs.ipynb rename to docs/notebooks/test_mrs.ipynb index 09015b197..d25e7c9a2 100644 --- a/test_mrs.ipynb +++ b/docs/notebooks/test_mrs.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -29,11 +29,13 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ - "from rocketpy.simulation.multivariate_rejection_sampler import MultivariateRejectionSampler\n", + "from rocketpy.simulation.multivariate_rejection_sampler import (\n", + " MultivariateRejectionSampler,\n", + ")\n", "from rocketpy import MonteCarlo\n", "from scipy.stats import norm\n", "import numpy as np" @@ -41,66 +43,47 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ - "montecarlo_filepath = \"docs/notebooks/monte_carlo_analysis/monte_carlo_analysis_outputs/monte_carlo_class_example\"\n", - "mrs_filepath = \"mrs\"\n", + "monte_carlo_filepath = (\n", + " \"monte_carlo_analysis/monte_carlo_analysis_outputs/monte_carlo_class_example\"\n", + ")\n", + "mrs_filepath = \"monte_carlo_analysis/monte_carlo_analysis_outputs/mrs\"\n", "old_mass_pdf = norm(15.426, 0.5).pdf\n", - "new_mass_pdf = norm(15, 0.5) .pdf\n", + "new_mass_pdf = norm(15, 0.5).pdf\n", "distribution_dict = {\n", " \"mass\": (old_mass_pdf, new_mass_pdf),\n", "}\n", "mrs = MultivariateRejectionSampler(\n", - " montecarlo_filepath=montecarlo_filepath,\n", + " monte_carlo_filepath=monte_carlo_filepath,\n", " mrs_filepath=mrs_filepath,\n", - " distribution_dict=distribution_dict,\n", ")" ] }, { "cell_type": "code", - "execution_count": 89, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "107.0" - ] - }, - "execution_count": 89, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mrs.expected_sample_size" - ] - }, - { - "cell_type": "code", - "execution_count": 90, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ - "mrs.sample()" + "mrs.sample(distribution_dict=distribution_dict)" ] }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The following input file was imported: mrs.inputs.txt\n", - "A total of 109 simulations results were loaded from the following output file: mrs.outputs.txt\n", + "The following input file was imported: monte_carlo_analysis/monte_carlo_analysis_outputs/mrs.inputs.txt\n", + "A total of 116 simulations results were loaded from the following output file: monte_carlo_analysis/monte_carlo_analysis_outputs/mrs.outputs.txt\n", "\n", - "The following error file was imported: mrs.errors.txt\n" + "The following error file was imported: monte_carlo_analysis/monte_carlo_analysis_outputs/mrs.errors.txt\n" ] } ], @@ -110,17 +93,17 @@ }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "A total of 109 simulations results were loaded from the following output file: mrs.outputs.txt\n", + "A total of 116 simulations results were loaded from the following output file: monte_carlo_analysis/monte_carlo_analysis_outputs/mrs.outputs.txt\n", "\n", - "The following input file was imported: mrs.inputs.txt\n", - "The following error file was imported: mrs.errors.txt\n" + "The following input file was imported: monte_carlo_analysis/monte_carlo_analysis_outputs/mrs.inputs.txt\n", + "The following error file was imported: monte_carlo_analysis/monte_carlo_analysis_outputs/mrs.errors.txt\n" ] } ], @@ -130,15 +113,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "MRS mass mean after resample: 15.029610376989238\n", - "MRS mass std after resample: 0.5213162519453568\n" + "MRS mass mean after resample: 15.041934326472004\n", + "MRS mass std after resample: 0.48924085702427966\n" ] } ], @@ -150,11 +133,38 @@ "print(f\"MRS mass mean after resample: {np.mean(mrs_mass_list)}\")\n", "print(f\"MRS mass std after resample: {np.std(mrs_mass_list)}\")" ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "107.0" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mrs.expected_sample_size" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "testnotebook", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -168,7 +178,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.2" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/rocketpy/simulation/multivariate_rejection_sampler.py b/rocketpy/simulation/multivariate_rejection_sampler.py index 160ba78cf..e1e63893e 100644 --- a/rocketpy/simulation/multivariate_rejection_sampler.py +++ b/rocketpy/simulation/multivariate_rejection_sampler.py @@ -9,118 +9,109 @@ """ import json +from dataclasses import dataclass from random import random from rocketpy._encoders import RocketPyEncoder +@dataclass +class SampleInformation: + """Sample information used in the MRS""" + + inputs_json: dict = None + outputs_json: dict = None + probability_ratio: float = None + acceptance_probability: float = None + + class MultivariateRejectionSampler: """Class that performs Multivariate Rejection Sampling (MRS) from MonteCarlo - results. + results. The class currently assumes that all input variables are independent + when performing the + + Attributes + ---------- """ def __init__( self, - montecarlo_filepath, + monte_carlo_filepath, mrs_filepath, - distribution_dict, ): """Initializes Multivariate Rejection Sampler (MRS) class Parameters ---------- - montecarlo_filepath : str + monte_carlo_filepath : str Filepath prefixes to the files created from a MonteCarlo simulation - results. + results and used as input for resampling. mrs_filepath : str Filepath prefix to MRS obtained samples. The files created follow the same - structure as those created by the MonteCarlo class. - distribution : dict - Dictionary whose keys contain the name whose distribution changed. The values - are tuples or lists with two entries. The first entry is a probability - density (mass) function for the old distribution, while the second entry - is the probability density function for the new distribution. + structure as those created by the MonteCarlo class but now containing the + selected sub-samples. Returns ------- None """ - self.montecarlo_filepath = montecarlo_filepath + self.monte_carlo_filepath = monte_carlo_filepath self.mrs_filepath = mrs_filepath - self.distribution_dict = distribution_dict + self.distribution_dict = None self.original_sample_size = 0 self.sup_ratio = 1 self.expected_sample_size = None self.final_sample_size = None - # TODO: is there a better way to construct input/output_list? - # Iterating and appending over lists is costly. However, the - # alternative, reading the file twice to get the number of lines, - # also does not seem to be a good option. - self.output_list = [] - self.input_list = [] + self.input_variables_names = set() + self.output_variables_names = set() + self.all_sample_list = [] + self.accepted_sample_list = [] self.__setup_input() self.__load_output() def __setup_input(self): - """Loads, validate and compute information from monte carlo - input with a single read from the file. - - This function does three things: - 1) Load: Loads the input data from MonteCarlo into python - objects so the sampling process does not require reading from - disk; - 2) Validate: Validates that the keys in 'distribution_dict' exist in - the input json created by the monte carlo; - 3) Compute: Computes the supremum of the probability ratios, used in the - sample function. - - While these three tasks could be disentangled to get clearer - code, the implementation as done here only requires a single - read from disk. + """Loads input information from monte carlo in a SampleInformation + object """ - input_filename = f"{self.montecarlo_filepath}.inputs.txt" + input_filename = f"{self.monte_carlo_filepath}.inputs.txt" try: input_file = open(input_filename, "r+", encoding="utf-8") except FileNotFoundError as e: raise FileNotFoundError( - f"Input file from monte carlo {input_filename} " "not found!" + f"Input file from monte carlo {input_filename} not found!" ) from e - for line in input_file.readlines(): - try: - # loads data + try: + for line in input_file.readlines(): + sample_info = SampleInformation() line_json = json.loads(line) - self.input_list.append(line_json) - self.original_sample_size += 1 + sample_info.inputs_json = line_json + self.all_sample_list.append(sample_info) - prob_ratio = 1 - for parameter in self.distribution_dict.keys(): - # checks dictionary keys - if parameter not in line_json.keys(): + # sets and validates input variables names + if not self.input_variables_names: + self.input_variables_names = set(line_json.keys()) + else: + if self.input_variables_names != set(line_json.keys()): raise ValueError( - f"Parameter key {parameter} from 'distribution_dict' " - "not found in input file!" + "Input file from Monte Carlo contains different " + "variables for different lines" ) - parameter_value = line_json[parameter] - - prob_ratio *= self.__compute_probability_ratio( - parameter, parameter_value - ) - # updates the supremum of the ratio - self.sup_ratio = max(self.sup_ratio, prob_ratio) - except Exception as e: - raise ValueError( - "An error occurred while reading " - f"the monte carlo input file {input_filename}!" - ) from e + self.original_sample_size += 1 + except Exception as e: + raise ValueError( + "An error occurred while reading " + f"the monte carlo input file {input_filename}!" + ) from e - self.expected_sample_size = self.original_sample_size // self.sup_ratio input_file.close() def __load_output(self): - """Load data from monte carlo outputs.""" - output_filename = f"{self.montecarlo_filepath}.outputs.txt" + """Loads output information from monte carlo in a SampleInformation + object. + """ + output_filename = f"{self.monte_carlo_filepath}.outputs.txt" sample_size_output = 0 # sanity check try: @@ -130,91 +121,139 @@ def __load_output(self): f"Output file from monte carlo {output_filename} " "not found!" ) from e - for line in output_file.readlines(): - try: + try: + for line in output_file.readlines(): + if sample_size_output > self.original_sample_size: + raise ValueError( + "Monte carlo output has more lines than the input file!" + ) line_json = json.loads(line) - self.output_list.append(line_json) + self.all_sample_list[sample_size_output].outputs_json = line_json sample_size_output += 1 - except Exception as e: - raise ValueError( - "An error occurred while reading " - f"the monte carlo output file {output_filename}!" - ) from e - - if self.original_sample_size != sample_size_output: + except Exception as e: + raise ValueError( + "An error occurred while reading " + f"the monte carlo output file {output_filename}!" + ) from e + if self.original_sample_size > sample_size_output: raise ValueError( - "Monte carlo input and output files have a different " - "number of samples!" + "Monte carlo output file has fewer lines than the input file!" ) output_file.close() - def sample(self): + def __validate_distribution_dict(self, distribution_dict): + """Checks that the variables passed in the distribution dictionary were + in the input file. + + """ + input_variables_names = set(distribution_dict.keys()) + for variable in input_variables_names: + if variable not in self.input_variables_names: + raise ValueError( + f"Variable key {variable} from 'distribution_dict' " + "not found in input file!" + ) + + def sample(self, distribution_dict): """Performs rejection sampling and saves data + Parameters + ---------- + distribution : dict + Dictionary whose keys contain the name whose distribution changed. + The values are tuples or lists with two entries. The first entry is + a probability density (mass) function for the old distribution, + while the second entry is the probability density function for the + new distribution. + Returns ------- None """ + self.__validate_distribution_dict(distribution_dict) + mrs_input_file = open(f"{self.mrs_filepath}.inputs.txt", "w+", encoding="utf-8") mrs_output_file = open( f"{self.mrs_filepath}.outputs.txt", "w+", encoding="utf-8" ) mrs_error_file = open(f"{self.mrs_filepath}.errors.txt", "w+", encoding="utf-8") - # compute sup ratio - for line_input_json, line_output_json in zip(self.input_list, self.output_list): - acceptance_prob = 1 / self.sup_ratio # probability the sample is accepted - for parameter in self.distribution_dict.keys(): - parameter_value = line_input_json[parameter] - acceptance_prob *= self.__compute_probability_ratio( - parameter, - parameter_value, - ) - # sample is accepted, write output - if random() < acceptance_prob: - mrs_input_file.write( - json.dumps(line_input_json, cls=RocketPyEncoder) + "\n" - ) - mrs_output_file.write( - json.dumps(line_output_json, cls=RocketPyEncoder) + "\n" - ) + self.__setup_probabilities(distribution_dict) + + try: + for sample in self.all_sample_list: + if random() < sample.acceptance_probability: + mrs_input_file.write( + json.dumps(sample.inputs_json, cls=RocketPyEncoder) + "\n" + ) + mrs_output_file.write( + json.dumps(sample.outputs_json, cls=RocketPyEncoder) + "\n" + ) + except Exception as e: + raise ValueError( + "An error occurred while writing the selected sample to the " + "output files" + ) from e mrs_input_file.close() mrs_output_file.close() mrs_error_file.close() - def __compute_probability_ratio(self, parameter, parameter_value): - """Computes the ratio of the new probability to the old probability + def __setup_probabilities(self, distribution_dict): + """Computes the probability ratio, probability ratio supremum and acceptance + probability for each sample. Parameters ---------- - parameter : str - Name of the parameter to evaluate the probability. - parameter_value : any - Value of the parameter to be passed to the density functions. + distribution : dict + Dictionary whose keys contain the name whose distribution changed. The values + are tuples or lists with two entries. The first entry is a probability + density (mass) function for the old distribution, while the second entry + is the probability density function for the new distribution. + """ + self.sup_ratio = 1 + for sample in self.all_sample_list: + sample.probability_ratio = self.__compute_probability_ratio( + sample, distribution_dict + ) + self.sup_ratio = max(self.sup_ratio, sample.probability_ratio) - Returns - ------- - float - The ratio of the new probability density function (numerator) - to the old one (denominator). + for sample in self.all_sample_list: + sample.acceptance_probability = sample.probability_ratio / self.sup_ratio + self.expected_sample_size = self.original_sample_size // self.sup_ratio + + def __compute_probability_ratio(self, sample, distribution_dict): + """Computes the ratio of the new probability to the old probability + for the given sample + + Parameters + ---------- + sample: SampleInformation + Sample information used to extract the values to evaluate the + distributions pdf. + distribution : dict + Dictionary whose keys contain the name whose distribution changed. The values + are tuples or lists with two entries. The first entry is a probability + density (mass) function for the old distribution, while the second entry + is the probability density function for the new distribution. Raises ------ ValueError Raises exception if an error occurs when computing the ratio. """ + probability_ratio = 1 try: - old_pdf = self.distribution_dict[parameter][0] - new_pdf = self.distribution_dict[parameter][1] - probability_ratio = new_pdf(parameter_value) / old_pdf(parameter_value) + for variable in distribution_dict.keys(): + value = sample.inputs_json[variable] + old_pdf = distribution_dict[variable][0] + new_pdf = distribution_dict[variable][1] + probability_ratio *= new_pdf(value) / old_pdf(value) except Exception as e: raise ValueError( - "An error occurred while evaluating the " - "ratio for 'distribution_dict' probability " - f"parameter key {parameter}!" + "An error occurred while evaluating the probability ratio" ) from e return probability_ratio