microsoft · ultmaster · May 27, 2021 · May 27, 2021 · May 27, 2021 · May 27, 2021
diff --git a/docs/en_US/Compression/DependencyAware.rst b/docs/en_US/Compression/DependencyAware.rst
@@ -54,11 +54,11 @@ To enable the dependency-aware mode for ``L1FilterPruner``\ :
    # for FPGMPruner
    # pruner = FPGMPruner(model, config_list, dependency_aware=True, dummy_input=dummy_input)
    # for ActivationAPoZRankFilterPruner
-   # pruner = ActivationAPoZRankFilterPruner(model, config_list, statistics_batch_num=1, , dependency_aware=True, dummy_input=dummy_input)
+   # pruner = ActivationAPoZRankFilterPruner(model, config_list, optimizer, trainer, criterion, statistics_batch_num=1, , dependency_aware=True, dummy_input=dummy_input)
    # for ActivationMeanRankFilterPruner
-   # pruner = ActivationMeanRankFilterPruner(model, config_list, statistics_batch_num=1, dependency_aware=True, dummy_input=dummy_input)
+   # pruner = ActivationMeanRankFilterPruner(model, config_list, optimizer, trainer, criterion, statistics_batch_num=1, dependency_aware=True, dummy_input=dummy_input)
    # for TaylorFOWeightFilterPruner
-   # pruner = TaylorFOWeightFilterPruner(model, config_list, statistics_batch_num=1, dependency_aware=True, dummy_input=dummy_input)
+   # pruner = TaylorFOWeightFilterPruner(model, config_list, optimizer, trainer, criterion, statistics_batch_num=1, dependency_aware=True, dummy_input=dummy_input)
 
    pruner.compress()
 

diff --git a/docs/en_US/Compression/Framework.rst b/docs/en_US/Compression/Framework.rst
@@ -29,8 +29,7 @@ Compressor is the base class for pruner and quntizer, it provides a unified inte
        'op_types': ['Conv2d', 'Linear'],
    }]
 
-   optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)
-   pruner = LevelPruner(model, configure_list, optimizer)
+   pruner = LevelPruner(model, configure_list)
    model = pruner.compress()
 
    # model is ready for pruning, now start finetune the model,
@@ -103,7 +102,8 @@ Users can also remove this collector like this:
 Pruner
 ------
 
-A pruner receives ``model``\ , ``config_list`` and ``optimizer`` as arguments. It prunes the model per the ``config_list`` during training loop by adding a hook on ``optimizer.step()``.
+A pruner receives ``model``\ , ``config_list`` as arguments. 
+Some pruners like ``TaylorFOWeightFilter Pruner`` prune the model per the ``config_list`` during training loop by adding a hook on ``optimizer.step()``.
 
 Pruner class is a subclass of Compressor, so it contains everything in the Compressor class and some additional components only for pruning, it contains:
 

diff --git a/docs/en_US/Compression/Pruner.rst b/docs/en_US/Compression/Pruner.rst
@@ -71,7 +71,7 @@ PyTorch code
 
    from nni.algorithms.compression.pytorch.pruning import SlimPruner
    config_list = [{ 'sparsity': 0.8, 'op_types': ['BatchNorm2d'] }]
-   pruner = SlimPruner(model, config_list)
+   pruner = SlimPruner(model, config_list, optimizer, trainer, criterion)
    pruner.compress()
 
 User configuration for Slim Pruner
@@ -269,7 +269,7 @@ PyTorch code
        'sparsity': 0.5,
        'op_types': ['Conv2d']
    }]
-   pruner = ActivationAPoZRankFilterPruner(model, config_list, statistics_batch_num=1)
+   pruner = ActivationAPoZRankFilterPruner(model, config_list, optimizer, trainer, criterion, statistics_batch_num=1)
    pruner.compress()
 
 Note: ActivationAPoZRankFilterPruner is used to prune convolutional layers within deep neural networks, therefore the ``op_types`` field supports only convolutional layers.
@@ -304,7 +304,7 @@ PyTorch code
        'sparsity': 0.5,
        'op_types': ['Conv2d']
    }]
-   pruner = ActivationMeanRankFilterPruner(model, config_list, statistics_batch_num=1)
+   pruner = ActivationMeanRankFilterPruner(model, config_list, optimizer, trainer, criterion, statistics_batch_num=1)
    pruner.compress()
 
 Note: ActivationMeanRankFilterPruner is used to prune convolutional layers within deep neural networks, therefore the ``op_types`` field supports only convolutional layers.
@@ -344,7 +344,7 @@ PyTorch code
        'sparsity': 0.5,
        'op_types': ['Conv2d']
    }]
-   pruner = TaylorFOWeightFilterPruner(model, config_list, statistics_batch_num=1)
+   pruner = TaylorFOWeightFilterPruner(model, config_list, optimizer, trainer, criterion, statistics_batch_num=1)
    pruner.compress()
 
 User configuration for TaylorFOWeightFilter Pruner
@@ -389,7 +389,7 @@ PyTorch code
    # optimizer.step(), so an optimizer is required to prune the model.
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)
 
-   pruner = AGPPruner(model, config_list, optimizer, pruning_algorithm='level')
+   pruner = AGPPruner(model, config_list, optimizer, trainer, criterion, pruning_algorithm='level')
    pruner.compress()
 
 AGP pruner uses ``LevelPruner`` algorithms to prune the weight by default, however you can set ``pruning_algorithm`` parameter to other values to use other pruning algorithms:
@@ -404,14 +404,6 @@ AGP pruner uses ``LevelPruner`` algorithms to prune the weight by default, howev
 * ``apoz``\ : ActivationAPoZRankFilterPruner
 * ``mean_activation``\ : ActivationMeanRankFilterPruner
 
-You should add code below to update epoch number when you finish one epoch in your training code.
-
-PyTorch code
-
-.. code-block:: python
-
-   pruner.update_epoch(epoch)
-
 
 User configuration for AGP Pruner
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -620,7 +612,7 @@ PyTorch code
                'op_types': ['Conv2d'],
                'op_names': ['conv2']
            }]
-   pruner = ADMMPruner(model, config_list, trainer=trainer, num_iterations=30, epochs=5)
+   pruner = ADMMPruner(model, config_list, trainer, num_iterations=30, epochs_per_iteration=5)
    pruner.compress()
 
 You can view :githublink:`example <examples/model_compress/pruning/auto_pruners_torch.py>` for more information.

diff --git a/docs/en_US/Compression/QuickStart.rst b/docs/en_US/Compression/QuickStart.rst
@@ -31,17 +31,16 @@ The specification of configuration can be found `here <./Tutorial.rst#specify-th
 Step2. Choose a pruner and compress the model
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-First instantiate the chosen pruner with your model and configuration as arguments, then invoke ``compress()`` to compress your model. Note that, some algorithms may check gradients for compressing, so we also define an optimizer and pass it to the pruner.
+First instantiate the chosen pruner with your model and configuration as arguments, then invoke ``compress()`` to compress your model. Note that, some algorithms may check gradients for compressing, so we may also define an optimizer and pass it to the pruner.
 
 .. code-block:: python
 
    from nni.algorithms.compression.pytorch.pruning import LevelPruner
 
-   optimizer_finetune = torch.optim.SGD(model.parameters(), lr=0.01)
-   pruner = LevelPruner(model, config_list, optimizer_finetune)
+   pruner = LevelPruner(model, config_list)
    model = pruner.compress()
 
-Then, you can train your model using traditional training approach (e.g., SGD), pruning is applied transparently during the training. Some pruners (e.g., L1FilterPruner, FPGMPruner) prune once at the beginning, the following training can be seen as fine-tune. Some pruners (e.g., AGPPruner) prune your model iteratively, the masks are adjusted epoch by epoch during training.
+Some pruners (e.g., L1FilterPruner, FPGMPruner) prune once, some pruners (e.g., AGPPruner) prune your model iteratively, the masks are adjusted epoch by epoch during training.
 
 Note that, ``pruner.compress`` simply adds masks on model weights, it does not include fine-tuning logic. If users want to fine tune the compressed model, they need to write the fine tune logic by themselves after ``pruner.compress``.
 

diff --git a/nni/algorithms/compression/pytorch/pruning/auto_compress_pruner.py b/nni/algorithms/compression/pytorch/pruning/auto_compress_pruner.py
@@ -34,6 +34,8 @@ class AutoCompressPruner(Pruner):
         Function used for the first subproblem of ADMM Pruner.
         Users should write this function as a normal function to train the Pytorch model
         and include `model, optimizer, criterion, epoch` as function arguments.
+    criterion: function
+        Function used to calculate the loss between the target and the output. By default, we use CrossEntropyLoss.
     evaluator : function
         function to evaluate the pruned model.
         This function should include `model` as the only parameter, and returns a scalar value.
@@ -80,7 +82,7 @@ def evaluator(model):
         PATH to store temporary experiment data.
     """
 
-    def __init__(self, model, config_list, trainer, criterion, evaluator, dummy_input,
+    def __init__(self, model, config_list, trainer, evaluator, dummy_input, criterion=torch.nn.CrossEntropyLoss(),
                  num_iterations=3, optimize_mode='maximize', base_algo='l1',
                  # SimulatedAnnealing related
                  start_temperature=100, stop_temperature=20, cool_down_rate=0.9, perturbation_magnitude=0.35,

diff --git a/nni/algorithms/compression/pytorch/pruning/iterative_pruner.py b/nni/algorithms/compression/pytorch/pruning/iterative_pruner.py
@@ -461,6 +461,8 @@ class TaylorFOWeightFilterPruner(IterativePruner):
         Function used to calculate the loss between the target and the output.
     sparsity_training_epochs: int
         The number of epochs to collect the contributions.
+    statistics_batch_num: int
+        The number of batches to statistic the activation.
     dependency_aware: bool
         If prune the model in a dependency-aware way. If it is `True`, this pruner will
         prune the model according to the l2-norm of weights and the channel-dependency or
@@ -472,14 +474,14 @@ class TaylorFOWeightFilterPruner(IterativePruner):
     dummy_input : torch.Tensor
         The dummy input to analyze the topology constraints. Note that, the dummy_input
         should on the same device with the model.
-
     """
 
-    def __init__(self, model, config_list, optimizer, trainer, criterion, sparsity_training_epochs=1, dependency_aware=False,
-                 dummy_input=None):
+    def __init__(self, model, config_list, optimizer, trainer, criterion, sparsity_training_epochs=1,
+                 statistics_batch_num=1, dependency_aware=False, dummy_input=None):
         super().__init__(model, config_list, optimizer=optimizer, pruning_algorithm='taylorfo', trainer=trainer,
-                         criterion=criterion, num_iterations=1, epochs_per_iteration=sparsity_training_epochs,
-                         dependency_aware=dependency_aware, dummy_input=dummy_input)
+                         criterion=criterion, statistics_batch_num=statistics_batch_num, num_iterations=1,
+                         epochs_per_iteration=sparsity_training_epochs, dependency_aware=dependency_aware,
+                         dummy_input=dummy_input)
 
     def _supported_dependency_aware(self):
         return True
@@ -507,6 +509,8 @@ class ActivationAPoZRankFilterPruner(IterativePruner):
         The activation type.
     sparsity_training_epochs: int
         The number of epochs to statistic the activation.
+    statistics_batch_num: int
+        The number of batches to statistic the activation.
     dependency_aware: bool
         If prune the model in a dependency-aware way. If it is `True`, this pruner will
         prune the model according to the l2-norm of weights and the channel-dependency or
@@ -522,10 +526,11 @@ class ActivationAPoZRankFilterPruner(IterativePruner):
     """
 
     def __init__(self, model, config_list, optimizer, trainer, criterion, activation='relu',
-                 sparsity_training_epochs=1, dependency_aware=False, dummy_input=None):
+                 sparsity_training_epochs=1, statistics_batch_num=1, dependency_aware=False, dummy_input=None):
         super().__init__(model, config_list, pruning_algorithm='apoz', optimizer=optimizer, trainer=trainer,
                          criterion=criterion, dependency_aware=dependency_aware, dummy_input=dummy_input,
-                         activation=activation, num_iterations=1, epochs_per_iteration=sparsity_training_epochs)
+                         activation=activation, statistics_batch_num=statistics_batch_num, num_iterations=1,
+                         epochs_per_iteration=sparsity_training_epochs)
         self.patch_optimizer(self.update_mask)
 
     def _supported_dependency_aware(self):
@@ -554,6 +559,8 @@ class ActivationMeanRankFilterPruner(IterativePruner):
         The activation type.
     sparsity_training_epochs: int
         The number of batches to statistic the activation.
+    statistics_batch_num: int
+        The number of batches to statistic the activation.
     dependency_aware: bool
         If prune the model in a dependency-aware way. If it is `True`, this pruner will
         prune the model according to the l2-norm of weights and the channel-dependency or
@@ -568,10 +575,11 @@ class ActivationMeanRankFilterPruner(IterativePruner):
     """
 
     def __init__(self, model, config_list, optimizer, trainer, criterion, activation='relu',
-                 sparsity_training_epochs=1, dependency_aware=False, dummy_input=None):
+                 sparsity_training_epochs=1, statistics_batch_num=1, dependency_aware=False, dummy_input=None):
         super().__init__(model, config_list, pruning_algorithm='mean_activation', optimizer=optimizer, trainer=trainer,
                          criterion=criterion, dependency_aware=dependency_aware, dummy_input=dummy_input,
-                         activation=activation, num_iterations=1, epochs_per_iteration=sparsity_training_epochs)
+                         activation=activation, statistics_batch_num=statistics_batch_num, num_iterations=1,
+                         epochs_per_iteration=sparsity_training_epochs)
         self.patch_optimizer(self.update_mask)
 
     def _supported_dependency_aware(self):