fix decay_case bug for SIREN use, updated Examples 4 and 5, made pyhe…

…pmc installation optional
mhostert · Oct 3, 2024 · 2da44b1 · 2da44b1
1 parent 7de7507
commit 2da44b1
Show file tree

Hide file tree

Showing 22 changed files with 2,163 additions and 1,277 deletions.
diff --git a/README.md b/README.md
@@ -44,15 +44,21 @@ The following dependencies (if missing) will be automatically installed during t
 
 - [scipy](https://scipy.org/)
 - [pandas](https://pandas.pydata.org/) 1.0 or above
-- [pyarrow](https://arrow.apache.org/docs/python/index.html)
 - [Cython](https://cython.org/)
 - [vegas](https://pypi.org/project/vegas/) 5.1.1 or above
 - [Particle](https://pypi.org/project/particle/)
-- [pyhepmc](https://github.com/scikit-hep/pyhepmc)
 - [pyparsing](https://github.com/pyparsing/pyparsing/)
 - [dill](https://pypi.org/project/dill/)
 - [matplotlib](https://matplotlib.org/)
 
+Additional optional dependencies for extras:
+
+DarkNews[parquet]
+- [pyarrow](https://arrow.apache.org/docs/python/index.html)
+
+DarkNews[pyhepmc]
+- [pyhepmc](https://github.com/scikit-hep/pyhepmc)
+
 ---
 
 ## Installation
@@ -97,6 +103,8 @@ to install it in developer mode (similar to editable mode above).
 
 If you would like to output events to `.parquet` files, you can install the following ```pip install DarkNews[parquet]``` or ```pip install "DarkNews[parquet]"```.
 
+Similarly, to output events to any `hepmc2/3` or `hepevt` formtats, you can install both extras via ```pip install DarkNews[parquet,pyhepmc]``` or ```pip install "DarkNews[parquet,pyhepmc]"```.
+
 ---
 
 ## Usage

diff --git a/examples/Example_2_TMM.ipynb b/examples/Example_2_TMM.ipynb
@@ -331,7 +331,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.9.12 ('base')",
+   "display_name": "darknews",
    "language": "python",
    "name": "python3"
   },
@@ -345,14 +345,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.0"
+   "version": "3.11.8"
   },
-  "orig_nbformat": 4,
-  "vscode": {
-   "interpreter": {
-    "hash": "3067ead486e059ec00ffe7555bdb889e6e264a24dc711bf108106cc7baee8d5d"
-   }
-  }
+  "orig_nbformat": 4
  },
  "nbformat": 4,
  "nbformat_minor": 2

diff --git a/examples/Example_3_geometries.ipynb b/examples/Example_3_geometries.ipynb
diff --git a/examples/Example_4_MB_analysis.ipynb b/examples/Example_4_MB_analysis.ipynb
diff --git a/examples/Example_4_fastbnb.ipynb b/examples/Example_4_fastbnb.ipynb
diff --git a/examples/Example_5_MB_spectra.ipynb b/examples/Example_5_MB_spectra.ipynb
diff --git a/examples/achilles_example_HC.pdf b/examples/achilles_example_HC.pdf
diff --git a/setup.cfg b/setup.cfg
@@ -45,7 +45,6 @@ install_requires =
 	Particle
 	pyparsing
 	matplotlib
-	pyhepmc>=2.7.1
 
 setup_requires =
 	cython
@@ -68,13 +67,20 @@ DarkNews = py.typed
 parquet = 
 	pyarrow
 
+pyhepmc =
+	pyhepmc>=2.7.1
+
 testing =
     pytest>=6.0
     pytest-cov>=2.0
     tox>=3.24
 	pyarrow
+	pyhepmc>=2.7.1
 
 [options.entry_points]
 console_scripts =
 	dn_gen = DarkNews.scripts:dn_gen
-	dn_get_examples = DarkNews.scripts:dn_get_examples
+	dn_get_examples = DarkNews.scripts:dn_get_examples
+
+[flake8]
+max-line-length = 180
diff --git a/src/DarkNews/Cfourvec.c b/src/DarkNews/Cfourvec.c
diff --git a/src/DarkNews/GenLauncher.py b/src/DarkNews/GenLauncher.py
@@ -3,15 +3,15 @@
 from pathlib import Path
 from particle import literals as lp
 
+import DarkNews as dn
+from DarkNews import configure_loggers
+from DarkNews.AssignmentParser import AssignmentParser
+
 import logging
 
 logger = logging.getLogger("logger.DarkNews")
 prettyprinter = logging.getLogger("prettyprinter.DarkNews")
 
-import DarkNews as dn
-from DarkNews import configure_loggers
-
-from DarkNews.AssignmentParser import AssignmentParser
 
 GENERATOR_ARGS = [
     # scope
@@ -179,12 +179,14 @@
 
 
 class GenLauncher:
-    banner = r"""   ______           _        _   _                     
+    banner = r"""
+   ______           _        _   _                     
    |  _  \         | |      | \ | |                    
    | | | |__ _ _ __| | __   |  \| | _____      _____   
    | | | / _  | ___| |/ /   | .   |/ _ \ \ /\ / / __|  
    | |/ / (_| | |  |   <    | |\  |  __/\ V  V /\__ \  
-   |___/ \__,_|_|  |_|\_\   \_| \_/\___| \_/\_/ |___/  """
+   |___/ \__,_|_|  |_|\_\   \_| \_/\___| \_/\_/ |___/  
+   """
 
     # handle parameters that can assume only certain values
     _choices = {
@@ -281,6 +283,10 @@ def __init__(self, param_file=None, **kwargs):
             logger.error("Error: pyarrow is not installed.")
             raise ModuleNotFoundError("pyarrow is not installed.")
 
+        if (self.hepmc2 or self.hepmc3 or self.hepevt) and not dn.HAS_PYHEPMC3:
+            logger.error("Error: pyhepmc is not installed.")
+            raise ModuleNotFoundError("pyhepmc is not installed.")
+
         ####################################################
         # Choose the model to be used in this generation
         self.bsm_model = self._model_class(**self.model_args_dict)
@@ -342,18 +348,18 @@ def __init__(self, param_file=None, **kwargs):
 
             _top_path = time.asctime().replace(" ", "_")
 
-        ## final path
+        # final path
         self.data_path = Path(f"{self.path}/data/{_exp_path_part}/3plus{len(_mass_strings)}/{_top_path}/")
 
         ####################################################
-        ## Determine scope of upscattering given the heavy nu spectrum
+        # Determine scope of upscattering given the heavy nu spectrum
         # 3+1
         if len(_mass_strings) == 1:
             self.upscattered_nus = [dn.pdg.neutrino4]
             self.outgoing_nus = [dn.pdg.nulight]
         # 3+2
         elif len(_mass_strings) == 2:
-            ## FIXING 3+2 process chain to be nualpha --> N5 --> N4
+            # FIXING 3+2 process chain to be nualpha --> N5 --> N4
             self.upscattered_nus = [dn.pdg.neutrino5]
             self.outgoing_nus = [dn.pdg.neutrino4]
         # 3+3
@@ -368,7 +374,11 @@ def __init__(self, param_file=None, **kwargs):
         # Miscellaneous checks
         if self.hep_unweight:
             logger.warning(
-                f"Unweighted events requested. This feature requires a large number of weighted events with respect to the requested number of hep-formatted events. Currently: n_unweighted/n_eval = {self.unweighted_hep_events/self.neval*100}%."
+                f"""
+                Unweighted events requested.
+                This feature requires a large number of weighted events with respect to the requested number of hep-formatted events.
+                Currently: n_unweighted/n_eval = {self.unweighted_hep_events/self.neval*100}%.
+                """
             )
 
         ####################################################
@@ -416,7 +426,10 @@ def _load_file(self, file, user_input_dict={}):
         for key in parser.parameters:
             if key in user_input_dict:
                 logger.warning(
-                    f"Warning! The keyword argument '{key} = {user_input_dict[key]}' was passed to GenLauncher but also appears in input file ('{key} = {parser.parameters[key]}'). Overridding file with keyword argument."
+                    f"""Warning! The keyword argument '{key} = {user_input_dict[key]}' was passed to GenLauncher
+                    but also appears in input file ('{key} = {parser.parameters[key]}').
+                    Overridding file with keyword argument.
+                    """
                 )
                 continue
             user_input_dict[key] = parser.parameters[key]
@@ -564,7 +577,11 @@ def _drop_zero_weight_samples(self):
         zero_entries = self.df["w_event_rate"] == 0
         if zero_entries.sum() / len(self.df.index) > 0.01:
             logger.warning(
-                f"Warning: number of entries with w_event_rate = 0 surpasses 1% of number of samples. Found: {zero_entries.sum()/len(self.df.index)*100:.2f}%. Sampling is likely not convering or integrand is too sparse."
+                f"""
+                Warning: number of entries with w_event_rate = 0 surpasses 1% of number of samples.
+                Found: {zero_entries.sum()/len(self.df.index)*100:.2f}%.
+                Sampling is likely not convering or integrand is too sparse.
+                """
             )
         self.df = self.df.drop(self.df[zero_entries].index).reset_index(drop=True)
 
@@ -619,9 +636,8 @@ def run(self, loglevel=None, verbose=None, logfile=None, overwrite_path=None):
 
         prettyprinter.info("Generating Events using the neutrino-nucleus upscattering engine")
 
-        self.df = self.gen_cases[0].get_MC_events()
-        for mc in self.gen_cases[1:]:
-            self.df = dn.MC.get_merged_MC_output(self.df, mc.get_MC_events())
+        # Merge the simulation dataframes from each MC case
+        self.df = dn.MC.get_merged_MC_output([mc.get_MC_events() for mc in self.gen_cases])
 
         # scramble events for minimum bias
         self._drop_zero_weight_samples()
@@ -660,12 +676,12 @@ def run(self, loglevel=None, verbose=None, logfile=None, overwrite_path=None):
             logger.info("Making summary plots of the kinematics of the process...")
             try:
                 import matplotlib
-            except ImportError as e:
+            except ImportError:
                 logger.warning("Warning! Could not find matplotlib -- stopping the making of summary plots.")
             else:
                 self.path_to_summary_plots = Path(self.data_path) / "summary_plots/"
                 dn.plot_tools.batch_plot(self.df, self.path_to_summary_plots, title=rf"{self.name}")
-            logger.info(f"Plots saved in {self.path_to_summary_plots}.")
+                logger.info(f"Plots saved in {self.path_to_summary_plots}.")
 
         # restore overwritten path
         if overwrite_path:

diff --git a/src/DarkNews/MC.py b/src/DarkNews/MC.py
@@ -2,11 +2,6 @@
 import pandas as pd
 import vegas as vg
 
-import logging
-
-logger = logging.getLogger("logger." + __name__)
-prettyprinter = logging.getLogger("prettyprinter." + __name__)
-
 from collections import defaultdict
 from functools import partial
 
@@ -16,6 +11,11 @@
 from DarkNews import pdg
 from DarkNews import geom
 
+import logging
+
+logger = logging.getLogger("logger." + __name__)
+prettyprinter = logging.getLogger("prettyprinter." + __name__)
+
 NINT = 10
 NEVAL = 1000
 NINT_warmup = 10
@@ -390,31 +390,80 @@ def get_MC_events(self):
 
 
 # merge all generation cases into one dictionary
-def get_merged_MC_output(df1, df2):
+# def get_merged_MC_output(df1, df2):
+#     """
+#     take two pandas dataframes with events and combine them.
+#     Resetting index to go from (0,n_1+n_2) where n_i is the number of events in dfi
+#     """
+#     if df1.attrs["model"] != df2.attrs["model"]:
+#         logger.warning("Beware! Merging generation cases with different df.attrs['models']! Discarting the second (newest) case.")
+#     if df1.attrs["experiment"] != df2.attrs["experiment"]:
+#         logger.warning("Beware! Merging generation cases with different df.attrs['experiment']! Discarting the second (newest) case.")
+
+#     df = pd.concat([df1, df2], axis=0, copy=False).reset_index(drop=True)
+
+#     # for older versions of pandas, concat does not keep the attributes
+#     #  -- if they disappear, force first dataframe.
+#     if not df.attrs:
+#         logger.debug("DEBUG: Forcing the storage of the df.attrs using the first dataframe. This is done automatically for newer versions of pandas.")
+#         df.attrs = df1.attrs
+
+#     # Now we merge lifetimes
+#     for i in range(4, 7):
+#         this_ctau0 = f"N{i}_ctau0"
+#         if this_ctau0 in df1.attrs.keys() and this_ctau0 in df2.attrs.keys():
+#             # take the average
+#             df.attrs[this_ctau0] = 0.5 * (df1.attrs[this_ctau0] + df2.attrs[this_ctau0])
+
+#     # Explicitly delete the references to the original dataframes to save memory
+#     del df1
+#     del df2
+
+
+#     return df
+def get_merged_MC_output(dfs):
     """
-    take two pandas dataframes with events and combine them.
-    Resetting index to go from (0,n_1+n_2) where n_i is the number of events in dfi
+    Take multiple pandas dataframes with events and combine them.
+    Resetting the index to go from 0 to the total number of events.
     """
-    if df1.attrs["model"] != df2.attrs["model"]:
-        logger.warning("Beware! Merging generation cases with different df.attrs['models']! Discarting the second (newest) case.")
-    if df1.attrs["experiment"] != df2.attrs["experiment"]:
-        logger.warning("Beware! Merging generation cases with different df.attrs['experiment']! Discarting the second (newest) case.")
+    if not dfs:
+        raise ValueError("At least one DataFrame must be provided")
+
+    dfs = list(dfs)  # Ensure dfs is a list in case it was passed as other iterable types
+
+    # Check for model and experiment consistency
+    base_model = dfs[0].attrs.get("model")
+    base_experiment = dfs[0].attrs.get("experiment")
 
-    df = pd.concat([df1, df2], axis=0).reset_index(drop=True)
+    for df in dfs[1:]:
+        if df.attrs.get("model") != base_model:
+            logger.warning("Beware! Merging generation cases with different df.attrs['models']! Discarting the mismatched cases.")
+        if df.attrs.get("experiment") != base_experiment:
+            logger.warning("Beware! Merging generation cases with different df.attrs['experiment']! Discarting the mismatched cases.")
 
-    # for older versions of pandas, concat does not keep the attributes
-    #  -- if they disappear, force first dataframe.
+    # Concatenate all dataframes
+    df = pd.concat(dfs, axis=0, copy=False).reset_index(drop=True)
+
+    # For older versions of pandas, concat does not keep the attributes
     if not df.attrs:
-        logger.debug("DEBUG: Forcing the storage of the df.attrs using the first dataframe. This is done automatically for newer versions of pandas.")
-        df.attrs = df1.attrs
+        logger.debug("DEBUG: Forcing the storage of the df.attrs using the first dataframe. " "This is done automatically for newer versions of pandas.")
+        df.attrs = dfs[0].attrs
 
     # Now we merge lifetimes
     for i in range(4, 7):
         this_ctau0 = f"N{i}_ctau0"
-        if this_ctau0 in df1.attrs.keys() and this_ctau0 in df2.attrs.keys():
-            # take the average
-            df.attrs[this_ctau0] = 0.5 * (df1.attrs[this_ctau0] + df2.attrs[this_ctau0])
 
+        # Sum the ctau0 values and calculate the average
+        ctau0_sum = sum(df.attrs.get(this_ctau0, 0) for df in dfs if this_ctau0 in df.attrs)
+        count = sum(1 for df in dfs if this_ctau0 in df.attrs)
+
+        if count > 0:
+            df.attrs[this_ctau0] = ctau0_sum / count
+
+    # Explicitly delete the references to the original dataframes to save memory
+    del dfs
+
+    # Return the merged dataframe
     return df
 
 

diff --git a/src/DarkNews/__init__.py b/src/DarkNews/__init__.py
@@ -113,6 +113,15 @@ def configure_loggers(loglevel="WARNING", logfile=None, verbose=False):
 except ImportError:
     HAS_PYARROW = False
 
+# Check if user has pyhepmc3 installed -- if not, no HepMC output is available
+try:
+    import pyhepmc as hep
+
+    HAS_PYHEPMC3 = True
+except ImportError:
+    HAS_PYHEPMC3 = False
+
+
 """
     And now making it easier to import the main DarkNews classes.
     It allows DarkNews.XXXX instead of DarkNews.YYYY.XXXX