Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions dijet/scripts/convert_to_root.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#!/usr/bin/python3.6
# coding: utf-8

"""
Script to convert output of dijet.JERtoRoot to a TGraphAsymError.
Keep eye on uprrot PR https://github.com/scikit-hep/uproot5/pull/1144
to directly implement it in the task itself.

Root is not in Sandboxes from CF.
Needs to be run with python 3.6 instead.

Structure of arguments is based on CF output. Run via
./convert_to_root.py --version <v> --sample <dataset> --<option> <arg> ...
"""

import sys
import argparse
import os
import pickle
import ROOT

# Filter sys.path to remove Python 3.9 specific paths
# Conflicts of 3.9 to 3.6.
# Second option is to sort sys.path and move paths from 3.9 to bottom
sys.path = [p for p in sys.path if "python3.9" not in p]

# Set up argument parser
parser = argparse.ArgumentParser(description="Read a pickle file with JERs and convert them to root objects.")
parser.add_argument("--config", default="config_2017", help="Configuration name (default: config_2017)")
parser.add_argument("--shift", default="nominal", help="Shift type (default: nominal)")
parser.add_argument("--calibrator", default="skip_jecunc", help="Calibrator type (default: skip_jecunc)")
parser.add_argument("--selector", default="default", help="Selector type (default: default)")
parser.add_argument("--producer", default="default", help="Producer type (default: default)")
parser.add_argument("--version", required=True, help="Version, must be specified.")
parser.add_argument("--sample", required=True, help="Sample name, must be specified.")

# Parse the command-line arguments
args = parser.parse_args()

# Print all options and the arguments given
for arg in vars(args):
print(f"{arg}: {getattr(args, arg)}")

# Construct the file path
base_path = os.getenv("CF_STORE_LOCAL")
full_path = os.path.join(
base_path,
"analysis_dijet",
"dijet.JERtoRoot",
f"{args.config}",
f"{args.shift}",
f"calib__{args.calibrator}",
f"sel__{args.selector}",
f"prod__{args.producer}",
f"{args.version}",
f"{args.sample}",
"jers.pickle",
)

# Check if the file exists
if not os.path.exists(full_path):
raise Exception(f"The file {full_path} does not exist.")

# Load and display the contents of the pickle file
with open(full_path, "rb") as file:
data = pickle.load(file)

# Store root file in output directory from analysis
root_path = full_path.replace("dijet.JERtoRoot", "rootfiles").replace(".pickle", ".root")

# Ensure the directory exists
os.makedirs(os.path.dirname(root_path), exist_ok=True)
root_file = ROOT.TFile(root_path, "RECREATE")

for key, value in data.items():
n = len(value["fX"])
x = value["fX"]
y = value["fY"]
xerrup = value["fXerrUp"]
xerrdown = value["fXerrDown"]
yerrup = value["fYerrUp"]
yerrdown = value["fYerrDown"]

# Create TGraphAsymmErrors object
graph = ROOT.TGraphAsymmErrors(n, y, x, yerrdown, yerrup, xerrdown, xerrup)

root_file.Close()
print(f"All TGraphAsymmErrors objects have been stored in {root_path}")
9 changes: 6 additions & 3 deletions dijet/tasks/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ class HistogramsBaseTask(
# Add nested sibling directories to output path
output_collection_cls = law.NestedSiblingFileCollection

# Category ID for methods
LOOKUP_CATEGORY_ID = {"sm": 1, "fe": 2}

def get_datasets(self) -> tuple[list[str], bool]:
"""
Select datasets belonging to the `process` of the current branch task.
Expand All @@ -58,7 +61,7 @@ def get_datasets(self) -> tuple[list[str], bool]:
if not dataset_insts_from_process:
raise RuntimeError(
"no single dataset found in config matching "
f"process `{self.branch_data.process}`"
f"process `{self.branch_data.process}`",
)

# filter to contain only user-supplied datasets
Expand All @@ -69,7 +72,7 @@ def get_datasets(self) -> tuple[list[str], bool]:
if not datasets_filtered:
raise RuntimeError(
"no single user-supplied dataset matched "
f"process `{self.branch_data.process}`"
f"process `{self.branch_data.process}`",
)

# set MC flag if any of the filtered datasets
Expand All @@ -79,7 +82,7 @@ def get_datasets(self) -> tuple[list[str], bool]:
)
if len(datasets_filtered_is_mc) > 1:
raise RuntimeError(
"filtered datasets have mismatched `is_mc` flags"
"filtered datasets have mismatched `is_mc` flags",
)

# return filtered datasets and is_mc flag
Expand Down
11 changes: 5 additions & 6 deletions dijet/tasks/jer.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,8 @@ def run(self):

# get index on `category` axis corresponding to
# the two computation methods
category_id = {"sm": 1, "fe": 2}
categories = list(h_widths.axes["category"])
index_methods = {m: categories.index(category_id[m]) for m in category_id}
index_methods = {m: categories.index(self.LOOKUP_CATEGORY_ID[m]) for m in self.LOOKUP_CATEGORY_ID}

# calcuate JER for standard method
jer_sm_val = h_widths[index_methods["sm"], :, :].values() * np.sqrt(2)
Expand All @@ -94,10 +93,10 @@ def run(self):
v_jer = h_jer.view()

# write JER values to output histogram
v_jer[index_methods["sm"], :, :].value = jer_sm_val
v_jer[index_methods["sm"], :, :].variance = jer_sm_err**2
v_jer[index_methods["fe"], :, :].value = jer_fe_val
v_jer[index_methods["fe"], :, :].variance = jer_fe_err**2
v_jer[index_methods["sm"], :, :].value = np.nan_to_num(jer_sm_val, nan=0.0)
v_jer[index_methods["sm"], :, :].variance = np.nan_to_num(jer_sm_err**2, nan=0.0)
v_jer[index_methods["fe"], :, :].value = np.nan_to_num(jer_fe_val, nan=0.0)
v_jer[index_methods["fe"], :, :].variance = np.nan_to_num(jer_fe_err**2, nan=0.0)

results_jers = {
"jer": h_jer,
Expand Down
104 changes: 104 additions & 0 deletions dijet/tasks/root.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# coding: utf-8

import law

from dijet.tasks.base import HistogramsBaseTask
from columnflow.util import maybe_import
from columnflow.tasks.framework.base import Requirements

from dijet.tasks.jer import JER

hist = maybe_import("hist")


class JERtoRoot(HistogramsBaseTask):
"""
Task to convert JER output to rootfiles.
It is not yet possible to write TGraphs via uproot.
Efforts by the uproot Team ongoing. Related PR:
https://github.com/scikit-hep/uproot5/pull/1144
"""

output_collection_cls = law.NestedSiblingFileCollection

# upstream requirements
reqs = Requirements(
JER=JER,
)

# TODO: write base task for root conversion
# keep HistogramBaseTask as long as no conversion possible
def requires(self):
return self.reqs.JER.req(
self,
branch=-1,
_exclude={"branches"},
)

def workflow_requires(self):
reqs = super().workflow_requires()
reqs["key"] = self.as_branch().requires()
return reqs

def load_jer(self):
histogram = self.input().collection[0]["jers"].load(formatter="pickle")
return histogram

def output(self) -> dict[law.FileSystemTarget]:
sample = self.extract_sample()
target = self.target(f"{sample}", dir=True)
outp = {
"jers": target.child("jers.pickle", type="f"),
}
return outp

def run(self):
results_jer = self.load_jer()
h_jer = results_jer["jer"]

# get pt bins (errors symmetrical for now)
pt_bins = h_jer.axes["dijets_pt_avg"].edges
pt_centers = h_jer.axes["dijets_pt_avg"].centers
pt_error_lo = pt_centers - pt_bins[:-1]
pt_error_hi = pt_bins[1:] - pt_centers

# compute and store values for building `TGraphAsymmError`
# in external script
results_jers = {}
abseta_bins = h_jer.axes["probejet_abseta"].edges
for method in self.LOOKUP_CATEGORY_ID:
for abseta_lo, abseta_hi in zip(
abseta_bins[:-1],
abseta_bins[1:],
):

abseta_lo_str = f"{abseta_lo:.3f}".replace(".", "p")
abseta_hi_str = f"{abseta_hi:.3f}".replace(".", "p")
abseta_str = f"abseta_{abseta_lo_str}_{abseta_hi_str}"

abseta_center = (abseta_lo + abseta_hi) / 2
h_jer_category_abseta = h_jer[
{
"category": hist.loc(self.LOOKUP_CATEGORY_ID[method]),
"probejet_abseta": hist.loc(abseta_center),
}
]

jers = h_jer_category_abseta.values()
jers_errors = h_jer_category_abseta.variances()

# sanity check
assert len(jers) == len(pt_centers), f"check number of bins for {abseta_str!r}"

# store results
results_jers[f"jer_{abseta_str}_{method}"] = {
"fY": jers,
"fYerrUp": jers_errors,
"fYerrDown": jers_errors,
"fX": pt_centers,
"fXerrUp": pt_error_hi,
"fXerrDown": pt_error_lo,
}

# store results
self.output()["jers"].dump(results_jers, formatter="pickle")
4 changes: 3 additions & 1 deletion law.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ inherit: $CF_BASE/law.cfg

columnflow.tasks.cms.inference
columnflow.tasks.cms.external
dijet.tasks.{alpha,jer}
dijet.tasks.{alpha,jer,root}


[logging]
Expand Down Expand Up @@ -102,6 +102,8 @@ cf.PlotShiftedVariables: local
dijet.AlphaExtrapolation: local
dijet.JER: local

dijet.JERtoRoot: local


[job]

Expand Down