Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve yuma config #237

Merged
merged 7 commits into from
Feb 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion changelog.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Changelog

### [Latest]
- Autoinstall deps and steamline yuma config[!236](https://github.com/umami-hep/puma/pull/236)
- More improvements to Yuma configuration [!237](https://github.com/umami-hep/puma/pull/237)
- Autoinstall deps and steamline yuma config [!236](https://github.com/umami-hep/puma/pull/236)


### [v0.3.2] (2024/02/13)
Expand Down
3 changes: 1 addition & 2 deletions docs/source/examples/yuma.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ Additional arguments can be included:

## taggers.yaml

The ```taggers.yaml``` file contains required information for taggers we wish to load. A section 'tagger_defaults' can be used to include default values, such as f_c, f_b, or cuts. For all taggers that are loaded, these values will be used by default, unless overwritten by the tagger.

The ```taggers.yaml``` file contains required information for taggers we wish to load.
Under 'taggers' are the defined taggers. Each should be assigned a tag, which is used as the 'name' of the tagger and its settings. Within in tagger, arguments that can be parsed to the Tagger high-level class can be used.

## plot_cfg.yaml
Expand Down
22 changes: 12 additions & 10 deletions examples/plt_cfg.yaml
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
plot_dir: /home/output_dir
timestamp: False

sample_path: dummy/path/1

results_config:
sample: ttbar
atlas_first_tag: Simulation Internal
atlas_second_tag: $\sqrt{s} = 13.6$ TeV, MC23
remove_nan: True # for plotting eff profiles for variables with NaNs (e.g, Lxy)
atlas_third_tag: $t\overline{t}$ 20 < $p_T$ < 250 GeV, $|\eta| < 2.5$
num_jets: 100_000

taggers_config: !include taggers.yaml
taggers: [dummy1, dummy2, dummy3]

sample:
sample: ttbar
remove_nan: True # for plotting eff profiles for variables with NaNs (e.g, Lxy)
global_cuts:
- "pt > 20000"
- "pt < 250000"
- "pt > 20_000"
- "pt < 250_000"
- "eta < 2.5"
- "eta > -2.5"
tag: $t\overline{t}$ 20 < $p_T$ < 250 GeV, $|\eta| < 2.5$
- "n_truth_promptLepton == 0"

taggers_config: !include taggers.yaml
taggers: [dummy1, dummy2, dummy3]

roc_plots:
- args:
Expand Down
32 changes: 13 additions & 19 deletions examples/taggers.yaml
Original file line number Diff line number Diff line change
@@ -1,20 +1,14 @@
tagger_defaults:
dummy1: # Uses default sample path
label: Dummy default
f_c: 0.2
f_b: 0.05
reference: True
dummy2: # Uses default sample path
label: Dummy $f_c$ = 0.2
f_c: 0.2
f_b: 0.05
dummy3: # Uses updated sample path
sample_path: /dummy/path
label: Dummy 3
f_c: 0.2
f_b: 0.05
f_c: 0.1
sample_path: dummy/path/1
cuts:
- "n_truth_promptLepton == 0"

taggers:
# Uses default sample path, and default f_c
dummy1:
label: Dummy default
reference: True
# Uses default sample path, and updated f_c
dummy2:
f_c: 0.2
label: Dummy $f_c$ = 0.2
# Uses updated sample path, and default f_c
dummy3:
sample_path: /dummy/path
label: Dummy 3
62 changes: 20 additions & 42 deletions puma/hlplots/configs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from dataclasses import dataclass
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path

Expand All @@ -17,23 +17,18 @@
config_path: Path
plot_dir: Path

taggers_config: dict
sample: dict

results_config: dict[str, dict[str, str]]
taggers_config: dict
taggers: list[str] | list[Tagger] | None = None
timestamp: bool = True

roc_plots: dict[str, dict] = None
fracscan_plots: dict[str, dict] = None
disc_plots: dict[str, dict] = None
prob_plots: dict[str, dict] = None
eff_vs_var_plots: dict[str, dict] = None

signal: str = None
timestamp: bool = True
sample_path: Path = None

results: Results = None
default_second_atlas_tag: str = None
roc_plots: dict[str, dict] = field(default_factory=dict)
fracscan_plots: dict[str, dict] = field(default_factory=dict)
disc_plots: dict[str, dict] = field(default_factory=dict)
prob_plots: dict[str, dict] = field(default_factory=dict)
eff_vs_var_plots: dict[str, dict] = field(default_factory=dict)

def __post_init__(self):
# Define a plot directory based on the plot config file name, and a date time
Expand All @@ -43,27 +38,8 @@
plot_dir_name += "_" + date_time_file
self.plot_dir_final = Path(self.plot_dir) / plot_dir_name

tagger_defaults = self.taggers_config.get("tagger_defaults", {})
taggers = self.taggers_config.get("taggers", {})

if self.taggers is None:
self.taggers = list(taggers.keys())

self.taggers = {
k: {
**tagger_defaults,
**t,
"yaml_name": k,
}
for k, t in taggers.items()
if k in self.taggers
}

self.roc_plots = self.roc_plots or {}
self.fracscan_plots = self.fracscan_plots or {}
self.disc_plots = self.disc_plots or {}
self.prob_plots = self.prob_plots or {}
self.eff_vs_var_plots = self.eff_vs_var_plots or {}
for k, kwargs in self.taggers_config.items():
kwargs["yaml_name"] = k

@classmethod
def load_config(cls, path: Path) -> PlotConfig:
Expand All @@ -81,24 +57,26 @@
file, and adds them.
"""
kwargs = self.results_config
tag = kwargs.get("atlas_second_tag", "")
kwargs["atlas_second_tag"] = tag + "\n" + self.sample.pop("tag", "")
kwargs.update(self.sample)
kwargs["signal"] = self.signal
kwargs["perf_vars"] = list(
{plot["args"].get("perf_var", "pt") for plot in self.eff_vs_var_plots}
)

# Store default tag incase other plots need to temporarily modify it
self.default_second_atlas_tag = kwargs["atlas_second_tag"]

# Instantiate the results object
results = Results(**kwargs)

# Store default tag incase other plots need to temporarily modify it
results.default_atlas_second_tag = results.atlas_second_tag

good_colours = get_good_colours()
col_idx = 0
# Add taggers to results, then bulk load
for key, t in self.taggers.items():
for key, t in self.taggers_config.items():
# if the a sample is not defined for the tagger, use the default sample
if not self.sample_path and not t.get("sample_path", None):
raise ValueError(f"No sample path defined for tagger {key}")

Check warning on line 77 in puma/hlplots/configs.py

View check run for this annotation

Codecov / codecov/patch

puma/hlplots/configs.py#L77

Added line #L77 was not covered by tests
if self.sample_path and not t.get("sample_path", None):
t["sample_path"] = self.sample_path

Check warning on line 79 in puma/hlplots/configs.py

View check run for this annotation

Codecov / codecov/patch

puma/hlplots/configs.py#L79

Added line #L79 was not covered by tests
# Allows automatic selection of tagger name in eval files
t["name"] = get_tagger_name(
t.get("name", None), t["sample_path"], key, results.flavours
Expand Down
6 changes: 4 additions & 2 deletions puma/hlplots/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class Results:
backgrounds: list = field(init=False)
atlas_first_tag: str = "Simulation Internal"
atlas_second_tag: str = None
atlas_third_tag: str = None
taggers: dict = field(default_factory=dict)
perf_vars: str | tuple | list = "pt"
output_dir: str | Path = "."
Expand All @@ -47,6 +48,8 @@ def __post_init__(self):
self.output_dir = Path(self.output_dir)
if isinstance(self.perf_vars, str):
self.perf_vars = [self.perf_vars]
if self.atlas_second_tag is not None and self.atlas_third_tag is not None:
self.atlas_second_tag = f"{self.atlas_second_tag}\n{self.atlas_third_tag}"

self.plot_funcs = {
"probs": self.plot_probs,
Expand Down Expand Up @@ -82,7 +85,7 @@ def flavours(self):
"""
return self.backgrounds + [self.signal]

def add(self, tagger):
def add(self, tagger: Tagger):
"""Add tagger to class.

Parameters
Expand Down Expand Up @@ -127,7 +130,6 @@ def add_taggers_from_file(
"""Load one or more taggers from a common file, and adds them to this
results class


Parameters
----------
@self.load_taggers_from_file
Expand Down
2 changes: 0 additions & 2 deletions puma/hlplots/tagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@ class Tagger:
)
disc_cut: float = None
working_point: float = None
f_c: float = None
f_b: float = None

# Used only by YUMA
yaml_name: str = None
Expand Down
9 changes: 5 additions & 4 deletions puma/hlplots/yuma.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,11 @@
plt_cfg.results, eff_vs_var
)
plot_kwargs = get_plot_kwargs(eff_vs_var, suffix=[inc_str, perf_var])
# move this logic into the results class
if not (bins := eff_vs_var["args"].get("bins", None)):
if plt_cfg.sample["sample"] == "ttbar":
if plt_cfg.results.sample == "ttbar":
bins = [20, 30, 40, 60, 85, 110, 140, 175, 250]
elif plt_cfg.sample["sample"] == "zprime":
elif plt_cfg.results.sample == "zprime":

Check warning on line 61 in puma/hlplots/yuma.py

View check run for this annotation

Codecov / codecov/patch

puma/hlplots/yuma.py#L61

Added line #L61 was not covered by tests
bins = [250, 500, 750, 1000, 1500, 2000, 3000, 4000, 5500]
else:
raise ValueError(
Expand Down Expand Up @@ -129,7 +130,7 @@
info_str = f"$f_{frac_flav}$ scan" if frac_flav != "tau" else "$f_{\\tau}$ scan"
# info_str += f" {round(efficiency*100)}% {plt_cfg.results.signal.label} WP"
plt_cfg.results.atlas_second_tag = (
plt_cfg.default_second_atlas_tag + "\n" + info_str
plt_cfg.results.default_atlas_second_tag + "\n" + info_str
)

eff_str = str(round(efficiency * 100, 3)).replace(".", "p")
Expand All @@ -146,7 +147,7 @@
plt_cfg.results.plot_fraction_scans(efficiency=efficiency, **plot_kwargs)
plt_cfg.results.taggers = all_taggers
plt_cfg.results.backgrounds = tmp_backgrounds
plt_cfg.results.atlas_second_tag = plt_cfg.default_second_atlas_tag
plt_cfg.results.atlas_second_tag = plt_cfg.results.default_atlas_second_tag


def make_roc_plots(plt_cfg):
Expand Down
16 changes: 9 additions & 7 deletions puma/tests/test_yuma.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ def testGetIncludeTaggers(self):

with tempfile.TemporaryDirectory() as tmp_file:
fpath1, file = get_mock_file(fname=(Path(tmp_file) / "file1.h5").as_posix())
taggers["tagger_defaults"]["sample_path"] = fpath1
taggers["taggers"]["dummy3"]["sample_path"] = fpath1
taggers["dummy1"]["sample_path"] = fpath1
taggers["dummy2"]["sample_path"] = fpath1
taggers["dummy3"]["sample_path"] = fpath1
updated_plt_cfg = Path(tmp_file) / "plt_cfg.yaml"

plt_cfg["roc_plots"][0]["reference"] = "dummyNot"
Expand Down Expand Up @@ -108,9 +109,9 @@ def testAllPlots(self):
with tempfile.TemporaryDirectory() as tmp_file:
fpath1, file = get_mock_file(fname=(Path(tmp_file) / "file1.h5").as_posix())
fpath2, file = get_mock_file(fname=(Path(tmp_file) / "file2.h5").as_posix())

taggers["tagger_defaults"]["sample_path"] = fpath1
taggers["taggers"]["dummy3"]["sample_path"] = fpath2
taggers["dummy1"]["sample_path"] = fpath1
taggers["dummy2"]["sample_path"] = fpath1
taggers["dummy3"]["sample_path"] = fpath2
updated_plt_cfg = Path(tmp_file) / "plt_cfg.yaml"
plt_cfg["plot_dir"] = tmp_file + "/plots"
plt_cfg["taggers_config"] = taggers
Expand Down Expand Up @@ -161,8 +162,9 @@ def testNoPlots(self):
fpath1, file = get_mock_file(fname=(Path(tmp_file) / "file1.h5").as_posix())
fpath2, file = get_mock_file(fname=(Path(tmp_file) / "file2.h5").as_posix())

taggers["tagger_defaults"]["sample_path"] = fpath1
taggers["taggers"]["dummy3"]["sample_path"] = fpath2
taggers["dummy1"]["sample_path"] = fpath1
taggers["dummy2"]["sample_path"] = fpath1
taggers["dummy3"]["sample_path"] = fpath2

updated_plt_cfg = Path(tmp_file) / "plt_cfg.yaml"
plt_cfg["taggers_config"] = taggers
Expand Down
Loading