diff --git a/examples/configs/dpo.yaml b/examples/configs/dpo.yaml
index 44e2491227..34367dee20 100755
--- a/examples/configs/dpo.yaml
+++ b/examples/configs/dpo.yaml
@@ -55,8 +55,6 @@ policy:
   # makes the training sequence length divisible by the tensor parallel size
   # this is useful for sequence parallel training
   make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
-  ## NOTE: there is a known issue with gradient clipping when using Dtensor
-  ## if using dtensor, set max_grad_norm to NULL
   max_grad_norm: 1.0
 
   optimizer: