Update Tagger and Results (#155)

Update Tagger and Results
umami-hep · Feb 20, 2023 · 4cf40b3 · 4cf40b3
1 parent 90a453f
commit 4cf40b3
Show file tree

Hide file tree

Showing 23 changed files with 313 additions and 352 deletions.
diff --git a/.gitignore b/.gitignore
@@ -23,7 +23,7 @@ parts/
 sdist/
 var/
 plots/
-tests/
+outputs/
 wheels/
 pip-wheel-metadata/
 share/python-wheels/
@@ -47,6 +47,7 @@ pip-delete-this-directory.txt
 htmlcov/
 .tox/
 .nox/
+*diff.png
 .coverage
 .coverage.*
 .cache

diff --git a/changelog.md b/changelog.md
@@ -2,6 +2,7 @@
 
 ### [Latest]
 
+- Improvements to the high level API [!155](https://github.com/umami-hep/puma/pull/155)
 - Fixate the python container version [!153](https://github.com/umami-hep/puma/pull/153)
 - Improve ROC format [#146](https://github.com/umami-hep/puma/pull/149)
 - Fix for CI [!152](https://github.com/umami-hep/puma/pull/152)

diff --git a/docs/source/examples/high_level_api.md b/docs/source/examples/high_level_api.md
@@ -13,9 +13,13 @@ performance plots.
 
 ## Initialising the taggers
 
+The `Results` object is initialised with the signal class, by default this is `bjets` but can be changed to `cjets`
+to produce the c-tagging plots.
+
 ```py
 §§§examples/high_level_plots.py:1:55§§§
 ```
+
 WARNING: when using 2 different data frames you cannot just use one `tagger_args` but you need
 as many as you have data frames defining the flavour classes and performance variables.
 
@@ -24,47 +28,34 @@ as many as you have data frames defining the flavour classes and performance var
 To plot the discriminant, you can now simply call one function and everything else is handled automatically,
 here for the _b_-jet discriminant
 ```py
-§§§examples/high_level_plots.py:56:58§§§
+§§§examples/high_level_plots.py:57:59§§§
 ```
 
 <img src=https://github.com/umami-hep/puma/raw/examples-material/hlplots_disc_b.png width=500>
 
-and similar for the _c_-jet discriminant
-```py
-§§§examples/high_level_plots.py:59§§§
-```
-
-<img src=https://github.com/umami-hep/puma/raw/examples-material/hlplots_disc_c.png width=500>
-
 
 ## ROC plots
 
 In the same manner you can plot ROC curves, here for the _b_-tagging performance
 ```py
-§§§examples/high_level_plots.py:62:64§§§
+§§§examples/high_level_plots.py:61:63§§§
 ```
 <img src=https://github.com/umami-hep/puma/raw/examples-material/hlplots_roc_b.png width=500>
 
-and similar for the _c_-tagging performance
-```py
-§§§examples/high_level_plots.py:65§§§
-```
-
-<img src=https://github.com/umami-hep/puma/raw/examples-material/hlplots_roc_c.png width=500>
 
 
 ## Performance vs a variable
 In this case we plot the performance as a function of the jet pT with the same syntax as above for an inclusive working point of 70%
 ```py
-§§§examples/high_level_plots.py:69:82§§§
+§§§examples/high_level_plots.py:66:79§§§
 ```
 <img src=https://github.com/umami-hep/puma/raw/examples-material/hlplots_dummy_tagger_pt_b_eff.png width=500>
 <img src=https://github.com/umami-hep/puma/raw/examples-material/hlplots_dummy_tagger_pt_c_rej.png width=500>
 <img src=https://github.com/umami-hep/puma/raw/examples-material/hlplots_dummy_tagger_pt_light_rej.png width=500>
 
 and similar for a fixed b-efficiency per bin.
 ```py
-§§§examples/high_level_plots.py:84:94§§§
+§§§examples/high_level_plots.py:81:91§§§
 ```
 
 <img src=https://github.com/umami-hep/puma/raw/examples-material/hlplots_dummy_tagger_fixed_per_bin_pt_b_eff.png width=500>

diff --git a/examples/high_level_plots.py b/examples/high_level_plots.py
@@ -20,34 +20,35 @@
 # `is_c` and `is_b` for each data frame separately and thus you cannot use these
 # args for each tagger the same applies to the `perf_var`
 tagger_args = {
-    "perf_var": df["pt"] / 1e3,
-    "is_light": df["HadronConeExclTruthLabelID"] == 0,
-    "is_c": df["HadronConeExclTruthLabelID"] == 4,
-    "is_b": df["HadronConeExclTruthLabelID"] == 5,
+    "is_flav": {
+        "bjets": df["HadronConeExclTruthLabelID"] == 5,
+        "cjets": df["HadronConeExclTruthLabelID"] == 4,
+        "ujets": df["HadronConeExclTruthLabelID"] == 0,
+    }
 }
 
-
-dips = Tagger("dips", template=tagger_args)
+dips = Tagger("dips", **tagger_args)
+dips.perf_var = df["pt"] / 1e3
 dips.label = "dummy DIPS ($f_{c}=0.005$)"
 dips.f_c = 0.005
 dips.f_b = 0.04
 dips.colour = "#AA3377"
 dips.extract_tagger_scores(df)
 
-rnnip = Tagger("rnnip", template=tagger_args)
+rnnip = Tagger("rnnip", **tagger_args)
+rnnip.perf_var = df["pt"] / 1e3
 rnnip.label = "dummy RNNIP ($f_{c}=0.07$)"
 rnnip.f_c = 0.07
 rnnip.f_b = 0.04
 rnnip.colour = "#4477AA"
 rnnip.reference = True
 rnnip.extract_tagger_scores(df)
 
-
-results = Results()
+# create the Results object for c-jet signal plot use `signal="cjets"`
+results = Results(signal="bjets")
 results.add(dips)
 results.add(rnnip)
 
-
 results.sig_eff = np.linspace(0.6, 0.95, 20)
 results.atlas_second_tag = (
     "$\\sqrt{s}=13$ TeV, dummy jets \n$t\\bar{t}$, $20$ GeV $< p_{T} <250$ GeV"
@@ -56,14 +57,10 @@
 # tagger discriminant plots
 logger.info("Plotting tagger discriminant plots.")
 results.plot_discs("hlplots_disc_b.png")
-results.plot_discs("hlplots_disc_c.png", signal_class="cjets")
-
 
+# ROC curves
 logger.info("Plotting ROC curves.")
-# ROC curves as a function of the b-jet efficiency
 results.plot_rocs("hlplots_roc_b.png")
-# ROC curves as a function of the c-jet efficiency
-results.plot_rocs("hlplots_roc_c.png", signal_class="cjets")
 
 
 logger.info("Plotting efficiency/rejection vs pT curves.")

diff --git a/examples/plot_pie.py b/examples/plot_pie.py
@@ -39,7 +39,7 @@
         "startangle": 90,
         "textprops": {"fontsize": 10},
         "radius": 1,
-        "wedgeprops": dict(width=0.4, edgecolor="w"),
+        "wedgeprops": {"width": 0.4, "edgecolor": "w"},
         "pctdistance": 0.4,
     },
     # kwargs passed to puma.PlotObject