From 38bbda667822c5722805c4a4d0d730f7025b19db Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 1 Aug 2022 21:26:11 +0200 Subject: [PATCH 1/2] Remove hook `torch.nan_to_num(x)` Observed erratic training behavior (green line) with the nan_to_num hook in classifier branch. I'm going to remove it from master. --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 6ada2a2f121b..7bb7bd61de3a 100644 --- a/train.py +++ b/train.py @@ -131,7 +131,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze for k, v in model.named_parameters(): v.requires_grad = True # train all layers - v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0.0 + # v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0.0 if any(x in k for x in freeze): LOGGER.info(f'freezing {k}') v.requires_grad = False From c95445d62c4f3a3199d3eecbaccaa0c4db34c94b Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 1 Aug 2022 21:28:14 +0200 Subject: [PATCH 2/2] Update train.py --- train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.py b/train.py index 7bb7bd61de3a..20fef265110c 100644 --- a/train.py +++ b/train.py @@ -131,7 +131,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze for k, v in model.named_parameters(): v.requires_grad = True # train all layers - # v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0.0 + # v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0 (commented for erratic training results) if any(x in k for x in freeze): LOGGER.info(f'freezing {k}') v.requires_grad = False