uhh-cms · dsavoiu · Mar 21, 2024 · Mar 8, 2024 · Mar 8, 2024 · Mar 8, 2024
diff --git a/dijet/scripts/convert_to_root.py b/dijet/scripts/convert_to_root.py
@@ -0,0 +1,88 @@
+#!/usr/bin/python3.6
+# coding: utf-8
+
+"""
+Script to convert output of dijet.JERtoRoot to a TGraphAsymError.
+Keep eye on uprrot PR https://github.com/scikit-hep/uproot5/pull/1144
+to directly implement it in the task itself.
+
+Root is not in Sandboxes from CF.
+Needs to be run with python 3.6 instead.
+
+Structure of arguments is based on CF output. Run via
+./convert_to_root.py --version <v> --sample <dataset> --<option> <arg> ...
+"""
+
+import sys
+import argparse
+import os
+import pickle
+import ROOT
+
+# Filter sys.path to remove Python 3.9 specific paths
+# Conflicts of 3.9 to 3.6.
+# Second option is to sort sys.path and move paths from 3.9 to bottom
+sys.path = [p for p in sys.path if "python3.9" not in p]
+
+# Set up argument parser
+parser = argparse.ArgumentParser(description="Read a pickle file with JERs and convert them to root objects.")
+parser.add_argument("--config", default="config_2017", help="Configuration name (default: config_2017)")
+parser.add_argument("--shift", default="nominal", help="Shift type (default: nominal)")
+parser.add_argument("--calibrator", default="skip_jecunc", help="Calibrator type (default: skip_jecunc)")
+parser.add_argument("--selector", default="default", help="Selector type (default: default)")
+parser.add_argument("--producer", default="default", help="Producer type (default: default)")
+parser.add_argument("--version", required=True, help="Version, must be specified.")
+parser.add_argument("--sample", required=True, help="Sample name, must be specified.")
+
+# Parse the command-line arguments
+args = parser.parse_args()
+
+# Print all options and the arguments given
+for arg in vars(args):
+    print(f"{arg}: {getattr(args, arg)}")
+
+# Construct the file path
+base_path = os.getenv("CF_STORE_LOCAL")
+full_path = os.path.join(
+    base_path,
+    "analysis_dijet",
+    "dijet.JERtoRoot",
+    f"{args.config}",
+    f"{args.shift}",
+    f"calib__{args.calibrator}",
+    f"sel__{args.selector}",
+    f"prod__{args.producer}",
+    f"{args.version}",
+    f"{args.sample}",
+    "jers.pickle",
+)
+
+# Check if the file exists
+if not os.path.exists(full_path):
+    raise Exception(f"The file {full_path} does not exist.")
+
+# Load and display the contents of the pickle file
+with open(full_path, "rb") as file:
+    data = pickle.load(file)
+
+# Store root file in output directory from analysis
+root_path = full_path.replace("dijet.JERtoRoot", "rootfiles").replace(".pickle", ".root")
+
+# Ensure the directory exists
+os.makedirs(os.path.dirname(root_path), exist_ok=True)
+root_file = ROOT.TFile(root_path, "RECREATE")
+
+for key, value in data.items():
+    n = len(value["fX"])
+    x = value["fX"]
+    y = value["fY"]
+    xerrup = value["fXerrUp"]
+    xerrdown = value["fXerrDown"]
+    yerrup = value["fYerrUp"]
+    yerrdown = value["fYerrDown"]
+
+    # Create TGraphAsymmErrors object
+    graph = ROOT.TGraphAsymmErrors(n, y, x, yerrdown, yerrup, xerrdown, xerrup)
+
+root_file.Close()
+print(f"All TGraphAsymmErrors objects have been stored in {root_path}")
diff --git a/dijet/tasks/base.py b/dijet/tasks/base.py
@@ -41,6 +41,9 @@ class HistogramsBaseTask(
     # Add nested sibling directories to output path
     output_collection_cls = law.NestedSiblingFileCollection
 
+    # Category ID for methods
+    LOOKUP_CATEGORY_ID = {"sm": 1, "fe": 2}
+
     def get_datasets(self) -> tuple[list[str], bool]:
         """
         Select datasets belonging to the `process` of the current branch task.
@@ -58,7 +61,7 @@ def get_datasets(self) -> tuple[list[str], bool]:
         if not dataset_insts_from_process:
             raise RuntimeError(
                 "no single dataset found in config matching "
-                f"process `{self.branch_data.process}`"
+                f"process `{self.branch_data.process}`",
             )
 
         # filter to contain only user-supplied datasets
@@ -69,7 +72,7 @@ def get_datasets(self) -> tuple[list[str], bool]:
         if not datasets_filtered:
             raise RuntimeError(
                 "no single user-supplied dataset matched "
-                f"process `{self.branch_data.process}`"
+                f"process `{self.branch_data.process}`",
             )
 
         # set MC flag if any of the filtered datasets
@@ -79,7 +82,7 @@ def get_datasets(self) -> tuple[list[str], bool]:
         )
         if len(datasets_filtered_is_mc) > 1:
             raise RuntimeError(
-                "filtered datasets have mismatched `is_mc` flags"
+                "filtered datasets have mismatched `is_mc` flags",
             )
 
         # return filtered datasets and is_mc flag

diff --git a/dijet/tasks/jer.py b/dijet/tasks/jer.py
@@ -68,9 +68,8 @@ def run(self):
 
         # get index on `category` axis corresponding to
         # the two computation methods
-        category_id = {"sm": 1, "fe": 2}
         categories = list(h_widths.axes["category"])
-        index_methods = {m: categories.index(category_id[m]) for m in category_id}
+        index_methods = {m: categories.index(self.LOOKUP_CATEGORY_ID[m]) for m in self.LOOKUP_CATEGORY_ID}
 
         # calcuate JER for standard method
         jer_sm_val = h_widths[index_methods["sm"], :, :].values() * np.sqrt(2)
@@ -94,10 +93,10 @@ def run(self):
         v_jer = h_jer.view()
 
         # write JER values to output histogram
-        v_jer[index_methods["sm"], :, :].value = jer_sm_val
-        v_jer[index_methods["sm"], :, :].variance = jer_sm_err**2
-        v_jer[index_methods["fe"], :, :].value = jer_fe_val
-        v_jer[index_methods["fe"], :, :].variance = jer_fe_err**2
+        v_jer[index_methods["sm"], :, :].value = np.nan_to_num(jer_sm_val, nan=0.0)
+        v_jer[index_methods["sm"], :, :].variance = np.nan_to_num(jer_sm_err**2, nan=0.0)
+        v_jer[index_methods["fe"], :, :].value = np.nan_to_num(jer_fe_val, nan=0.0)
+        v_jer[index_methods["fe"], :, :].variance = np.nan_to_num(jer_fe_err**2, nan=0.0)
 
         results_jers = {
             "jer": h_jer,

diff --git a/dijet/tasks/root.py b/dijet/tasks/root.py
@@ -0,0 +1,104 @@
+# coding: utf-8
+
+import law
+
+from dijet.tasks.base import HistogramsBaseTask
+from columnflow.util import maybe_import
+from columnflow.tasks.framework.base import Requirements
+
+from dijet.tasks.jer import JER
+
+hist = maybe_import("hist")
+
+
+class JERtoRoot(HistogramsBaseTask):
+    """
+    Task to convert JER output to rootfiles.
+    It is not yet possible to write TGraphs via uproot.
+    Efforts by the uproot Team ongoing. Related PR:
+    https://github.com/scikit-hep/uproot5/pull/1144
+    """
+
+    output_collection_cls = law.NestedSiblingFileCollection
+
+    # upstream requirements
+    reqs = Requirements(
+        JER=JER,
+    )
+
+    # TODO: write base task for root conversion
+    #       keep HistogramBaseTask as long as no conversion possible
+    def requires(self):
+        return self.reqs.JER.req(
+            self,
+            branch=-1,
+            _exclude={"branches"},
+        )
+
+    def workflow_requires(self):
+        reqs = super().workflow_requires()
+        reqs["key"] = self.as_branch().requires()
+        return reqs
+
+    def load_jer(self):
+        histogram = self.input().collection[0]["jers"].load(formatter="pickle")
+        return histogram
+
+    def output(self) -> dict[law.FileSystemTarget]:
+        sample = self.extract_sample()
+        target = self.target(f"{sample}", dir=True)
+        outp = {
+            "jers": target.child("jers.pickle", type="f"),
+        }
+        return outp
+
+    def run(self):
+        results_jer = self.load_jer()
+        h_jer = results_jer["jer"]
+
+        # get pt bins (errors symmetrical for now)
+        pt_bins = h_jer.axes["dijets_pt_avg"].edges
+        pt_centers = h_jer.axes["dijets_pt_avg"].centers
+        pt_error_lo = pt_centers - pt_bins[:-1]
+        pt_error_hi = pt_bins[1:] - pt_centers
+
+        # compute and store values for building `TGraphAsymmError`
+        # in external script
+        results_jers = {}
+        abseta_bins = h_jer.axes["probejet_abseta"].edges
+        for method in self.LOOKUP_CATEGORY_ID:
+            for abseta_lo, abseta_hi in zip(
+                abseta_bins[:-1],
+                abseta_bins[1:],
+            ):
+
+                abseta_lo_str = f"{abseta_lo:.3f}".replace(".", "p")
+                abseta_hi_str = f"{abseta_hi:.3f}".replace(".", "p")
+                abseta_str = f"abseta_{abseta_lo_str}_{abseta_hi_str}"
+
+                abseta_center = (abseta_lo + abseta_hi) / 2
+                h_jer_category_abseta = h_jer[
+                    {
+                        "category": hist.loc(self.LOOKUP_CATEGORY_ID[method]),
+                        "probejet_abseta": hist.loc(abseta_center),
+                    }
+                ]
+
+                jers = h_jer_category_abseta.values()
+                jers_errors = h_jer_category_abseta.variances()
+
+                # sanity check
+                assert len(jers) == len(pt_centers), f"check number of bins for {abseta_str!r}"
+
+                # store results
+                results_jers[f"jer_{abseta_str}_{method}"] = {
+                    "fY": jers,
+                    "fYerrUp": jers_errors,
+                    "fYerrDown": jers_errors,
+                    "fX": pt_centers,
+                    "fXerrUp": pt_error_hi,
+                    "fXerrDown": pt_error_lo,
+                }
+
+        # store results
+        self.output()["jers"].dump(results_jers, formatter="pickle")
diff --git a/law.cfg b/law.cfg
@@ -8,7 +8,7 @@ inherit: $CF_BASE/law.cfg
 
 columnflow.tasks.cms.inference
 columnflow.tasks.cms.external
-dijet.tasks.{alpha,jer}
+dijet.tasks.{alpha,jer,root}
 
 
 [logging]
@@ -102,6 +102,8 @@ cf.PlotShiftedVariables: local
 dijet.AlphaExtrapolation: local
 dijet.JER: local
 
+dijet.JERtoRoot: local
+
 
 [job]