feat: improve run summary (#57)

Improve run summary content and generation - added new contents to the run summary: Starling version and commit, run date, config content, output files and their information - the run summary file is now always generated, removed the 'generate_summary' option in parameters - the run summary file is now named "run_summary.json" instead of "{scenario}_summary.json"
tellae · Jun 22, 2022 · d048a21 · d048a21
1 parent 04be3a5
commit d048a21
Show file tree

Hide file tree

Showing 11 changed files with 164 additions and 56 deletions.
diff --git a/docs/run/io.rst b/docs/run/io.rst
@@ -122,6 +122,13 @@ The main outputs of the simulation are the visualisation file and the KPI tables
 The specification of what they exactly contain is made by the model developer in the class
 extending :class:`~starling_sim.basemodel.output.output_factory.OutputFactory`.
 
+Run summary
+-----------
+
+The run summary file is a .json file generated at the end of a successful simulation.
+It contains information about the run (date, Starling version, commit), the simulation
+parameters, and the outputs of the run.
+
 Visualisation file
 ------------------
 

diff --git a/starling_sim/basemodel/output/geojson_output.py b/starling_sim/basemodel/output/geojson_output.py
@@ -177,9 +177,6 @@ def generate_geojson(self):
         # build file path
         path = self.folder + self.filename
 
-        # log geojson generation
-        logging.info("Generating geojson output in file " + path)
-
         # check bz2 extension
         if path.endswith(".gz"):
             to_gz = True
@@ -191,8 +188,15 @@ def generate_geojson(self):
         json_dump(feature_collection, path)
 
         # compress to bz2 if necessary
+        mimetype = "application/json"
         if to_gz:
-            gz_compression(path)
+            path = gz_compression(path)
+            mimetype = "application/gzip"
+
+        # signal new file to output factory
+        self.sim.outputFactory.new_output_file(
+            path, mimetype, compressed_mimetype="application/json", content="visualisation"
+        )
 
 
 # Classes for the generation of geojson output

diff --git a/starling_sim/basemodel/output/kpi_output.py b/starling_sim/basemodel/output/kpi_output.py
@@ -7,6 +7,9 @@
 class KpiOutput:
     def __init__(self, population_names, kpi_list, kpi_name=None):
 
+        # simulation model access
+        self.sim = None
+
         # name of the kpi, will compose the kpi filename : <kpi_name>.csv
         if kpi_name is None:
             if isinstance(population_names, list):
@@ -39,6 +42,7 @@ def setup(self, filename, folder, simulation_model):
         :return:
         """
 
+        self.sim = simulation_model
         self.filename = filename
         self.folder = folder
 
@@ -109,13 +113,22 @@ def write_kpi_table(self):
         if kpi_table.empty:
             return
 
-        # generate kpi output
-
-        logging.info("Generating KPI output in file " + path)
-
         try:
             # write the dataframe into a csv file
             kpi_table.to_csv(path, sep=";", index=False, columns=header_list)
+
+            # signal new file to output factory
+            mimetype = "text/csv"
+            if path.endswith("gz"):
+                mimetype = "application/gzip"
+            self.sim.outputFactory.new_output_file(
+                path,
+                mimetype,
+                compressed_mimetype="text/csv",
+                content="kpi",
+                subject=self.name,
+            )
+
         except KeyError as e:
             logging.warning(
                 "Could not generate kpi output {}, " "error occurred : {}".format(path, e)

diff --git a/starling_sim/basemodel/output/output_factory.py b/starling_sim/basemodel/output/output_factory.py
@@ -1,6 +1,7 @@
 from starling_sim.basemodel.output.geojson_output import new_geojson_output
 from starling_sim.utils.utils import json_pretty_dump
 from starling_sim.utils.config import config
+from starling_sim.utils.constants import RUN_SUMMARY_FILENAME
 
 import logging
 import os
@@ -14,11 +15,16 @@ class OutputFactory:
     e.g. writing a json containing all the simulation data
     """
 
+    GENERATION_ERROR_FORMAT = "Error while generating {} output"
+
     def __init__(self):
         """
         The constructor must be extended for the needs of the generation method
         """
 
+        # list of output files and associated information
+        self.output_files = []
+
         # list of KpiOutput objects, each will generate one kpi file
         self.kpi_outputs = None
 
@@ -89,47 +95,71 @@ def setup_geojson_output(self):
 
         self.geojson_output = new_geojson_output()
 
+    def new_output_file(
+        self,
+        filepath: str,
+        mimetype: str,
+        compressed_mimetype: str = None,
+        content: str = None,
+        subject: str = None,
+    ):
+        """
+        Add a new file and its information to the output dict.
+
+        This method should be called after generating an output file.
+
+        :param filepath: output file path
+        :param mimetype: file mimetype
+        :param compressed_mimetype: compressed mimetype (defaults to file mimetype)
+        :param content: content metadata (mandatory)
+        :param subject: subject
+        """
+
+        if content is None:
+            raise ValueError("'content' metadata was not provided for output {}".format(filepath))
+
+        if compressed_mimetype is None:
+            compressed_mimetype = mimetype
+
+        metadata = {"compressed-mimetype": compressed_mimetype, "content": content}
+
+        if subject is not None:
+            metadata["subject"] = subject
+
+        logging.info("Generated {} output in file {}".format(metadata["content"], filepath))
+
+        self.output_files.append(
+            {"filename": os.path.basename(filepath), "mimetype": mimetype, "metadata": metadata}
+        )
+
     def extract_simulation(self, simulation_model):
         """
         This method will be called for the output generation.
 
         It must be extended to generate the output using specific methods.
         """
 
-        if (
-            "traces_output" in simulation_model.parameters
-            and simulation_model.parameters["traces_output"]
-        ):
-            self.generate_trace_output(simulation_model)
-
-        if (
-            "generate_summary" in simulation_model.parameters
-            and simulation_model.parameters["generate_summary"]
-        ):
-            self.generate_run_summary(simulation_model)
+        # traces output
+        if simulation_model.parameters["traces_output"]:
+            try:
+                self.generate_trace_output(simulation_model)
+            except:
+                logging.warning(self.GENERATION_ERROR_FORMAT.format("traces"))
 
         # kpi output
         if simulation_model.parameters["kpi_output"]:
             self.generate_kpi_output(simulation_model)
 
         # geojson output
-        if simulation_model.parameters["visualisation_output"]:
-            self.generate_geojson_output(simulation_model)
 
-    def generate_run_summary(self, simulation_model):
-        """
-        Generate a summary file of the simulation run.
+        if simulation_model.parameters["visualisation_output"]:
+            try:
+                self.generate_geojson_output(simulation_model)
+            except:
+                logging.warning(self.GENERATION_ERROR_FORMAT.format("visualisation"))
 
-        :param simulation_model:
-        """
-        filepath = (
-            simulation_model.parameters["output_folder"]
-            + "/"
-            + simulation_model.parameters["scenario"]
-            + "_summary.json"
-        )
-
-        json_pretty_dump(simulation_model.runSummary, filepath)
+        # run summary output
+        self.generate_run_summary(simulation_model)
 
     def generate_geojson_output(self, simulation_model):
         """
@@ -150,10 +180,12 @@ def generate_kpi_output(self, simulation_model):
         """
 
         for kpi_output in self.kpi_outputs:
+            try:
+                kpi_output.write_kpi_table()
+            except:
+                logging.warning(self.GENERATION_ERROR_FORMAT.format(kpi_output.name + " kpi"))
 
-            kpi_output.write_kpi_table()
-
-    def generate_trace_output(simulation_model):
+    def generate_trace_output(self, simulation_model):
         """
         Generate a text file containing the event traces of the agents.
 
@@ -166,8 +198,6 @@ def generate_trace_output(simulation_model):
         output_folder = simulation_model.parameters["output_folder"]
         filepath = output_folder + config["traces_format"].format(scenario=scenario)
 
-        logging.info("Generating traces output in file {}".format(filepath))
-
         # open the trace file in write mode
         with open(filepath, "w") as outfile:
 
@@ -192,4 +222,19 @@ def generate_trace_output(simulation_model):
                     outfile.write("\n")
                     outfile.write(str(event))
 
-    generate_trace_output = staticmethod(generate_trace_output)
+        self.sim.outputFactory.new_output_file(filepath, "text/plain", content="traces")
+
+    def generate_run_summary(self, simulation_model):
+        """
+        Generate a summary file of the simulation run.
+
+        :param simulation_model:
+        """
+        filepath = simulation_model.parameters["output_folder"] + RUN_SUMMARY_FILENAME
+
+        # add run summary to output files
+        self.sim.outputFactory.new_output_file(filepath, "application/json", content="run_summary")
+
+        # set output files in run summary and dump it in a file
+        simulation_model.runSummary["output_files"] = self.output_files
+        json_pretty_dump(simulation_model.runSummary, filepath)
diff --git a/starling_sim/basemodel/simulation_model.py b/starling_sim/basemodel/simulation_model.py
@@ -1,11 +1,13 @@
 import random
 import logging
 import numpy
+import datetime
 
 from starling_sim.basemodel.trace.trace import trace_simulation_end
-from starling_sim.utils.utils import import_gtfs_feed
+from starling_sim.utils.utils import import_gtfs_feed, get_git_revision_hash
 from starling_sim.utils.constants import BASE_LEAVING_CODES
 from starling_sim.utils.config import config
+from starling_sim.version import __version__
 
 
 class SimulationModel:
@@ -42,7 +44,7 @@ def __init__(self, parameters):
         self.parameters = parameters
 
         # run_summary
-        self.runSummary = parameters.copy_dict()
+        self.runSummary = self.init_run_summary()
 
         # add the base leaving codes
         self.add_base_leaving_codes()
@@ -156,6 +158,36 @@ def setup_gtfs(self):
         restrict_transfers = config["transfer_restriction"]
         self.gtfs = import_gtfs_feed(self.parameters["gtfs_timetables"], restrict_transfers)
 
+    def init_run_summary(self):
+        """
+        Initialise the run summary.
+        """
+
+        summary = dict()
+
+        # get run date
+        summary["date"] = str(datetime.datetime.today())
+
+        # get starling version
+        summary["starling_version"] = __version__
+
+        # get current commit
+        summary["commit"] = get_git_revision_hash()
+
+        # copy scenario parameters
+        summary["parameters"] = self.parameters.copy_dict()
+
+        # copy config
+        summary["config"] = config.copy()
+
+        # scenario output files
+        summary["output_files"] = dict()
+
+        # run statistics
+        summary["stats"] = dict()
+
+        return summary
+
     @classmethod
     def get_agent_type_schemas(cls):
 

diff --git a/starling_sim/model_simulator.py b/starling_sim/model_simulator.py
@@ -148,21 +148,21 @@ def launch_simulation(parameters_path, pkg):
     simulator.setup_simulation()
     duration = time.time() - start
     logging.info("End of setup. Elapsed time : {:.2f} seconds\n".format(duration))
-    simulator.simulationModel.runSummary["setup_time"] = duration
+    simulator.simulationModel.runSummary["stats"]["setup_time"] = duration
 
     # run the simulation
     logging.info("Starting the simulation\n")
     start = time.time()
     simulator.run_simulation()
     duration = time.time() - start
     logging.info("End of simulation run. Elapsed time : {:.2f} seconds\n".format(duration))
-    simulator.simulationModel.runSummary["execution_time"] = duration
+    simulator.simulationModel.runSummary["stats"]["execution_time"] = duration
 
     shortest_path_count = 0
     for topology in simulator.simulationModel.environment.topologies.values():
         shortest_path_count += topology.shortest_path_count
     logging.info("Number of shortest_path computed : {}".format(shortest_path_count))
-    simulator.simulationModel.runSummary["shortest_paths"] = shortest_path_count
+    simulator.simulationModel.runSummary["stats"]["shortest_paths"] = shortest_path_count
 
     # generate simulation output
     logging.info("Generating outputs of the simulation\n")

diff --git a/starling_sim/schemas/parameters.schema.json b/starling_sim/schemas/parameters.schema.json
@@ -88,13 +88,6 @@
       "description": "Log simulation time every 3600 seconds (used for run monitoring)",
       "default": false
     },
-    "generate_summary": {
-      "advanced": true,
-      "title": "Generate run summary",
-      "description": "Generate a simulation run summary",
-      "type": "boolean",
-      "default": false
-    },
     "early_dynamic_input": {
       "advanced":  true,
       "type": "integer",

diff --git a/starling_sim/utils/constants.py b/starling_sim/utils/constants.py
@@ -37,6 +37,9 @@
 
 # filename formats
 
+#: filename of the run summary file
+RUN_SUMMARY_FILENAME = "run_summary.json"
+
 #: format of the ALIENS operators usage diagram
 OPERATORS_USAGE_FORMAT = "{prefix}_operators.png"
 

diff --git a/starling_sim/utils/paths.py b/starling_sim/utils/paths.py
@@ -66,8 +66,6 @@
         |   └── scenario_2      # scenario_2 data
         └── ...
 
-The path to the data repository and the names of the folders can be changed.
-
 The path to the data repository can be changed using the --data-folder option of main.py
 
 .. code-block:: bash