Fix hitrate (#1159)

* add movielens * First hitrate metric version * metrge with upstream * fix topk_map * fix error in map * tests for map * top_k ndcg * edit changelog * edit the tabs * check codestyle * check the intent * check the intent * remove trailing whitespace * fixed hitrate * fixed the docs * hitrate * Additive margin softmax (#1131) * Added Additive-margin softmax * update changelog.md * Readme update2 (#1142) * readme * batch overfit fix for reproducibility * batch overfit fix for reproducibility * readme * readme update * extra Config/Hydra API fixes * codestyle * updated dl_cpu(workflows)- For passing CI-Tests (#1135) * updated dl_cpu * Updated-requirements-ml.txt * corrected the mistake * Update dl_cpu.yml * Update requirements-ml.txt * Kept dl_cpu 20.04-3.6 minimal as requested * Corrected the mistakes * Add requirements parameter * Update dl_cpu.yml * Update dl_cpu.yml Co-authored-by: Sergey Kolesnikov <[email protected]> * fix: `_key_value` for schedulers in case of multiple optimizers fixed (#1146) * Github CI fixes (#1143) * verbose * Utils callbacks (#1127) * quantization.py * onnx and quantization * tracing.py * docs * fix * fix * quantization test * fix test * fix comments * fix comments * Update catalyst/callbacks/__init__.py Co-authored-by: Sergey Kolesnikov <[email protected]> * tests * tests * tests * tests * tests * pycharm tricks * extra tests * Utils callbacks (#1145) * quantization.py * onnx and quantization * tracing.py * docs * fix * fix * quantization test * fix test * fix comments * fix comments * quantization example * examples * CHANGELOG.md * fix * fix * Apply suggestions from code review Co-authored-by: Sergey Kolesnikov <[email protected]> * extra tests 2 * extra tests 2 * extra tests 2 * extra tests 2 * extra tests 2 * extra tests 2 * extra tests 2 * extra tests 2 Co-authored-by: Nikita Balagansky <[email protected]> * Engine docs (#1141) * typings & few docs * typing fix; disabled `dist.barrier()` in optimizer step for ddp * docs * docs: fixed long lines with docs * docs fixes * optimizer args * removed empty line Co-authored-by: Dmytro Doroshenko <[email protected]> * v21.03.1 * v21.03: minimal version fix (#1147) * minimal version fix * docs * fix: nested dicts in loaders_params/samplers_params overriding (#1150) * docs (#1152) * Github CI fix (#1148) * fix * fix * Niftireader (#1151) * changes for NiftiReader * adding import statement that was needed and fixing codestyle * updated CHANGELOG * removing unnecesary documentation * fixing codestyle * updated docs and requirements * adding a space * not importing nibabel for docs * fixing typo * adding step for contrib pip caching * fixed codestyle and added a test for reader that uncovered a mistake :) * hopefully fixing imports * adding sergey suggested fix * fixing deploy_push.yml * updating workflows Co-authored-by: Kevin Wang <[email protected]> Co-authored-by: Kevin Wang <[email protected]> * add changelog * solved runtime error * hitrate calculation Co-authored-by: Daniel Chepenko <[email protected]> Co-authored-by: denyhoof <[email protected]> Co-authored-by: Даниил <[email protected]> Co-authored-by: Atharva Phatak <[email protected]> Co-authored-by: Sergey Kolesnikov <[email protected]> Co-authored-by: Yauheni Kachan <[email protected]> Co-authored-by: Nikita Balagansky <[email protected]> Co-authored-by: Dmytro Doroshenko <[email protected]> Co-authored-by: Kevin Wang <[email protected]> Co-authored-by: Kevin Wang <[email protected]> Co-authored-by: Kevin Wang <[email protected]>
catalyst-team · Apr 9, 2021 · 9e381c8 · 9e381c8
1 parent 7de603f
commit 9e381c8
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 8 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -29,6 +29,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - minimal requirements issue ([#1147](https://github.com/catalyst-team/catalyst/issues/1147))
 - nested dicts in `loaders_params`/`samplers_params` overriding fixed ([#1150](https://github.com/catalyst-team/catalyst/pull/1150))
+- fixed hitrate calculation issue ([#1155]) (https://github.com/catalyst-team/catalyst/issues/1155)
 
 ## [21.03.1] - 2021-03-28
 

diff --git a/catalyst/metrics/functional/_hitrate.py b/catalyst/metrics/functional/_hitrate.py
@@ -5,9 +5,12 @@
 from catalyst.metrics.functional._misc import process_recsys_components
 
 
-def hitrate(outputs: torch.Tensor, targets: torch.Tensor, topk: List[int]) -> List[torch.Tensor]:
+def hitrate(
+    outputs: torch.Tensor, targets: torch.Tensor, topk: List[int], zero_division: int = 0
+) -> List[torch.Tensor]:
     """
-    Calculate the hit rate score given model outputs and targets.
+    Calculate the hit rate (aka recall) score given
+    model outputs and targets.
     Hit-rate is a metric for evaluating ranking systems.
     Generate top-N recommendations and if one of the recommendation is
     actually what user has rated, you consider that a hit.
@@ -30,6 +33,9 @@ def hitrate(outputs: torch.Tensor, targets: torch.Tensor, topk: List[int]) -> Li
             ground truth, labels
         topk (List[int]):
             Parameter fro evaluation on top-k items
+        zero_division (int):
+            value, returns in the case of the divison by zero
+            should be one of 0 or 1
 
     Returns:
         hitrate_at_k (List[torch.Tensor]): the hitrate score
@@ -39,7 +45,8 @@ def hitrate(outputs: torch.Tensor, targets: torch.Tensor, topk: List[int]) -> Li
     targets_sort_by_outputs = process_recsys_components(outputs, targets)
     for k in topk:
         k = min(outputs.size(1), k)
-        hits_score = torch.sum(targets_sort_by_outputs[:, :k], dim=1) / k
+        hits_score = torch.sum(targets_sort_by_outputs[:, :k], dim=1) / targets.sum(dim=1)
+        hits_score = hits_score.nan_to_num(zero_division)
         results.append(torch.mean(hits_score))
 
     return results

diff --git a/catalyst/metrics/functional/tests/test_hitrate.py b/catalyst/metrics/functional/tests/test_hitrate.py
@@ -9,13 +9,16 @@ def test_hitrate():
     """
     Tests for catalyst.metrics.hitrate metric.
     """
-    y_pred = [0.5, 0.2]
-    y_true = [1.0, 0.0]
-    k = [1, 2]
+    y_pred = [0.5, 0.2, 0.1]
+    y_true = [1.0, 0.0, 1.0]
+    k = [1, 2, 3]
 
-    hitrate_at1, hitrate_at2 = hitrate(torch.Tensor([y_pred]), torch.Tensor([y_true]), k)
-    assert hitrate_at1 == 1.0
+    hitrate_at1, hitrate_at2, hitrate_at3 = hitrate(
+        torch.Tensor([y_pred]), torch.Tensor([y_true]), k
+    )
+    assert hitrate_at1 == 0.5
     assert hitrate_at2 == 0.5
+    assert hitrate_at3 == 1.0
 
     # check 1 simple case
     y_pred = [0.5, 0.2]
@@ -24,3 +27,20 @@ def test_hitrate():
 
     hitrate_at2 = hitrate(torch.Tensor([y_pred]), torch.Tensor([y_true]), k)[0]
     assert hitrate_at2 == 0.0
+
+    # check batch case
+    y_pred1 = [4.0, 2.0, 3.0, 1.0]
+    y_pred2 = [1.0, 2.0, 3.0, 4.0]
+    y_true1 = [0, 0, 1.0, 1.0]
+    y_true2 = [0, 0, 0.0, 0.0]
+    k = [1, 2, 3, 4]
+
+    y_pred_torch = torch.Tensor([y_pred1, y_pred2])
+    y_true_torch = torch.Tensor([y_true1, y_true2])
+
+    hitrate_at1, hitrate_at2, hitrate_at3, hitrate_at4 = hitrate(y_pred_torch, y_true_torch, k)
+
+    assert hitrate_at1 == 0.0
+    assert hitrate_at2 == 0.25
+    assert hitrate_at3 == 0.25
+    assert hitrate_at4 == 0.5