huggingface · qgallouedec · Nov 11, 2024 · Nov 11, 2024 · Nov 11, 2024 · Nov 12, 2024
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -283,3 +283,18 @@ The deprecation and removal schedule is based on each feature's usage and impact
 - **Widely-Used Components**: For a feature with high usage, we aim for a more gradual transition period of approximately **5 months**, generally scheduling deprecation around **5 minor releases** after the initial warning.
 
 These examples represent the two ends of a continuum. The specific timeline for each feature will be determined individually, balancing innovation with user stability needs.
+
+### Working with Warnings
+
+When working with warnings in the codebase, please follow these principles:
+
+1. **Warnings must be actionable**
+   Every warning raised should be actionable and provide clear guidance on how to address or resolve the underlying issue. For example, a deprecation warning should include an alternative method or function that can be used.
+
+2. **Warnings should not indicate normal behavior**
+   Warnings should not be triggered for issues that do not affect functionality. They must not appear for the expected, intended operation of the software. Warnings should highlight potential problems, not reflect normal behavior.
+
+3. **Use the appropriate warning type**
+   Use the appropriate warning types (e.g., `DeprecationWarning`, `UserWarning`) for features that are being phased out or for behaviors that should be addressed in future versions.
+
+By following these guidelines, we ensure that warnings remain meaningful, actionable, and contribute to the long-term health of the project.
diff --git a/examples/scripts/reward_modeling.py b/examples/scripts/reward_modeling.py
@@ -99,7 +99,8 @@
     if model_config.use_peft and model_config.lora_task_type != "SEQ_CLS":
         warnings.warn(
             "You are using a `task_type` that is different than `SEQ_CLS` for PEFT. This will lead to silent bugs"
-            " Make sure to pass --lora_task_type SEQ_CLS when using this script with PEFT."
+            " Make sure to pass --lora_task_type SEQ_CLS when using this script with PEFT.",
+            UserWarning,
         )
 
     ##############

diff --git a/trl/core.py b/trl/core.py
@@ -296,7 +296,8 @@ def randn_tensor(
                 warnings.warn(
                     f"The passed generator was created on 'cpu' even though a tensor on {device} was expected."
                     f" Tensors will be created on 'cpu' and then moved to {device}. Note that one can probably"
-                    f" slighly speed up this function by passing a generator that was created on the {device} device."
+                    f" slighly speed up this function by passing a generator that was created on the {device} device.",
+                    UserWarning,
                 )
         elif gen_device_type != device.type and gen_device_type == "cuda":
             raise ValueError(f"Cannot generate a {device} tensor from a generator of type {gen_device_type}.")

diff --git a/trl/trainer/bco_trainer.py b/trl/trainer/bco_trainer.py
@@ -394,17 +394,9 @@ def __init__(
                 ref_model_init_kwargs["torch_dtype"] = torch_dtype
 
         if isinstance(model, str):
-            warnings.warn(
-                "You passed a model_id to the BCOTrainer. This will automatically create an "
-                "`AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you."
-            )
             model = AutoModelForCausalLM.from_pretrained(model, **model_init_kwargs)
 
         if isinstance(ref_model, str):
-            warnings.warn(
-                "You passed a ref model_id to the BCOTrainer. This will automatically create an "
-                "`AutoModelForCausalLM`"
-            )
             ref_model = AutoModelForCausalLM.from_pretrained(ref_model, **ref_model_init_kwargs)
 
         # Initialize this variable to False. This helps tracking the case when `peft_module_casting_to_bf16`

diff --git a/trl/trainer/utils.py b/trl/trainer/utils.py
@@ -759,9 +759,12 @@ def compute_accuracy(eval_pred) -> Dict[str, float]:
     predictions, labels = eval_pred
     # Here, predictions is rewards_chosen and rewards_rejected.
     # We want to see how much of the time rewards_chosen > rewards_rejected.
-    if np.array(predictions[:, 0] == predictions[:, 1], dtype=float).sum() > 0:
+    equal_predictions_count = np.array(predictions[:, 0] == predictions[:, 1], dtype=float).sum()
+    if equal_predictions_count > 0:
         warnings.warn(
-            f"There are {np.array(predictions[:, 0] == predictions[:, 1]).sum()} out of {len(predictions[:, 0])} instances where the predictions for both options are equal. As a consequence the accuracy can be misleading."
+            f"There are {equal_predictions_count} out of {len(predictions[:, 0])} instances where the predictions for "
+            "both options are equal. As a consequence the accuracy can be misleading.",
+            UserWarning,
         )
     predictions = np.argmax(predictions, axis=1)