diff --git a/unsloth_zoo/rl_replacements.py b/unsloth_zoo/rl_replacements.py index a120aff2a..791d4d9c1 100644 --- a/unsloth_zoo/rl_replacements.py +++ b/unsloth_zoo/rl_replacements.py @@ -94,7 +94,7 @@ def chunked_hidden_states_selective_log_softmax( if logit_scale_divide != 0.0: chunk_logits = chunk_logits / logit_scale_divide if logit_softcapping != 0.0: - chunk_logits = chunk_logits * torch.tanh(chunk_logits / logit_softcapping) + chunk_logits = logit_softcapping * torch.tanh(chunk_logits / logit_softcapping) chunk_logits = chunk_logits.to(torch.float32)