diff --git a/neural_compressor/experimental/common/criterion.py b/neural_compressor/experimental/common/criterion.py index 4382e827225..0cbc1e3ac31 100644 --- a/neural_compressor/experimental/common/criterion.py +++ b/neural_compressor/experimental/common/criterion.py @@ -1252,14 +1252,17 @@ def __call__(self, **kwargs): class SelfKnowledgeDistillationLoss(KnowledgeDistillationFramework): """SelfKnowledge Distillation Loss.""" - def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, temperature=1.0,add_origin_loss=False, student_model=None, teacher_model=None): + def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, temperature=1.0,add_origin_loss=False, + student_model=None, teacher_model=None): """Initialize SelfKnowledge Distillation Loss class. Args: layer_mappings (list): layers of distillation.Format like - [[[student1_layer_name1, teacher_layer_name1],[student2_layer_name1, teacher_layer_name1]],[[student1_layer_name2, teacher_layer_name2],[student2_layer_name2, teacher_layer_name2]]] + [[[student1_layer_name1, teacher_layer_name1],[student2_layer_name1, teacher_layer_name1]], + [[student1_layer_name2, teacher_layer_name2],[student2_layer_name2, teacher_layer_name2]]] loss_types (list, optional): loss types. Defaults to ['CE'] * len(layer_mappings). - loss_weights (list, optional): loss weights. Defaults to [1.0 / len(layer_mappings)] * len(layer_mappings).temperature (float, optional): use to calculate the soft label CE. + loss_weights (list, optional): loss weights. Defaults to [1.0 / len(layer_mappings)] * + len(layer_mappings).temperature (float, optional): use to calculate the soft label CE. temperature (optional): temperature. Defaults to 1.0. add_origin_loss (bool, optional): whether to add origin loss for hard label loss. student_model (optional): student model. Defaults to None. @@ -1342,14 +1345,17 @@ class PyTorchSelfKnowledgeDistillationLoss( SelfKnowledgeDistillationLoss ): """PyTorch SelfKnowledge Distillation Loss.""" - def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, temperature=1.0,add_origin_loss=False, student_model=None, teacher_model=None): + def __init__(self, layer_mappings=[], loss_types=None, loss_weights=None, temperature=1.0,add_origin_loss=False, + student_model=None, teacher_model=None): """Initialize PyTorch SelfKnowledge Distillation Loss class. Args: layer_mappings (list): layers of distillation.Format like - [[[student1_layer_name1, teacher_layer_name1],[student2_layer_name1, teacher_layer_name1]],[[student1_layer_name2, teacher_layer_name2],[student2_layer_name2, teacher_layer_name2]]] + [[[student1_layer_name1, teacher_layer_name1],[student2_layer_name1, teacher_layer_name1]], + [[student1_layer_name2, teacher_layer_name2],[student2_layer_name2, teacher_layer_name2]]] loss_types (list, optional): loss types. Defaults to ['CE'] * len(layer_mappings). - loss_weights (list, optional): loss weights. Defaults to [1.0 / len(layer_mappings)] * len(layer_mappings).temperature (float, optional): use to calculate the soft label CE. + loss_weights (list, optional): loss weights. Defaults to [1.0 / len(layer_mappings)] * + len(layer_mappings).temperature (float, optional): use to calculate the soft label CE. temperature (optional): temperature. Defaults to 1.0. add_origin_loss (bool, optional): whether to add origin loss for hard label loss. student_model (optional): student model. Defaults to None.