From b66f5dcadb1a8327d28333b11ea881310ef98c8d Mon Sep 17 00:00:00 2001 From: Etherll <61019402+Etherll@users.noreply.github.com> Date: Wed, 15 Apr 2026 10:53:31 +0200 Subject: [PATCH] Fix logit softcapping calculation in rl_replacements.py --- unsloth_zoo/rl_replacements.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth_zoo/rl_replacements.py b/unsloth_zoo/rl_replacements.py index a120aff2a..791d4d9c1 100644 --- a/unsloth_zoo/rl_replacements.py +++ b/unsloth_zoo/rl_replacements.py @@ -94,7 +94,7 @@ def chunked_hidden_states_selective_log_softmax( if logit_scale_divide != 0.0: chunk_logits = chunk_logits / logit_scale_divide if logit_softcapping != 0.0: - chunk_logits = chunk_logits * torch.tanh(chunk_logits / logit_softcapping) + chunk_logits = logit_softcapping * torch.tanh(chunk_logits / logit_softcapping) chunk_logits = chunk_logits.to(torch.float32)