Add Squeeze and Excitation to DenseNets (facebookresearch#427)

mannatsingh · facebook-github-bot · commit 636d21d49b3a · 2020-03-10T09:18:06.000-07:00
Summary: Pull Request resolved: facebookresearch#427 Plugged in the Squeeze and Excitation layer to DenseNets Differential Revision: D20358700 fbshipit-source-id: 2ef6df1b7257c85d97ec78a7c842cd9824ab253d
diff --git a/classy_vision/models/densenet.py b/classy_vision/models/densenet.py
@@ -16,19 +16,24 @@
 
 from . import register_model
 from .classy_model import ClassyModel
+from .common import SqueezeAndExcitationLayer
 
 
 # global setting for in-place ReLU:
 INPLACE = True
 
 
 class _DenseLayer(nn.Sequential):
-    """
-        Single layer of a DenseNet.
-    """
-
-    def __init__(self, in_planes, growth_rate=32, expansion=4):
+    """Single layer of a DenseNet."""
 
+    def __init__(
+        self,
+        in_planes,
+        growth_rate=32,
+        expansion=4,
+        use_se=False,
+        se_reduction_ratio=16,
+    ):
         # assertions:
         assert is_pos_int(in_planes)
         assert is_pos_int(growth_rate)
@@ -56,6 +61,13 @@ def __init__(self, in_planes, growth_rate=32, expansion=4):
                 bias=False,
             ),
         )
+        if use_se:
+            self.add_module(
+                "se",
+                SqueezeAndExcitationLayer(
+                    growth_rate, reduction_ratio=se_reduction_ratio
+                ),
+            )
 
     def forward(self, x):
         new_features = super(_DenseLayer, self).forward(x)
@@ -98,22 +110,27 @@ def __init__(
         expansion,
         small_input,
         final_bn_relu,
+        use_se=False,
+        se_reduction_ratio=16,
     ):
         """
             Implementation of a standard densely connected network (DenseNet).
 
-            Set `small_input` to `True` for 32x32 sized image inputs.
-
-            Set `final_bn_relu` to `False` to exclude the final batchnorm and ReLU
-            layers. These settings are useful when
-            training Siamese networks.
-
             Contains the following attachable blocks:
                 block{block_idx}-{idx}: This is the output of each dense block,
                     indexed by the block index and the index of the dense layer
                 transition-{idx}: This is the output of the transition layers
                 trunk_output: The final output of the `DenseNet`. This is
                     where a `fully_connected` head is normally attached.
+
+            Args:
+                small_input: set to `True` for 32x32 sized image inputs.
+                final_bn_relu: set to `False` to exclude the final batchnorm and
+                    ReLU layers. These settings are useful when training Siamese
+                    networks.
+                use_se: Enable squeeze and excitation
+                se_reduction_ratio: The reduction ratio to apply in the excitation
+                    stage. Only used if `use_se` is `True`.
         """
         super().__init__()
 
@@ -158,6 +175,8 @@ def __init__(
                 idx,
                 growth_rate=growth_rate,
                 expansion=expansion,
+                use_se=use_se,
+                se_reduction_ratio=se_reduction_ratio,
             )
             blocks.append(block)
             num_planes = num_planes + num_layers * growth_rate
@@ -192,7 +211,14 @@ def _make_trunk_output_block(self, num_planes, final_bn_relu):
         return self.build_attachable_block("trunk_output", layers)
 
     def _make_dense_block(
-        self, num_layers, in_planes, block_idx, growth_rate=32, expansion=4
+        self,
+        num_layers,
+        in_planes,
+        block_idx,
+        growth_rate=32,
+        expansion=4,
+        use_se=False,
+        se_reduction_ratio=16,
     ):
         assert is_pos_int(in_planes)
         assert is_pos_int(growth_rate)
@@ -208,6 +234,8 @@ def _make_dense_block(
                         in_planes + idx * growth_rate,
                         growth_rate=growth_rate,
                         expansion=expansion,
+                        use_se=use_se,
+                        se_reduction_ratio=se_reduction_ratio,
                     ),
                 )
             )
@@ -233,6 +261,8 @@ def from_config(cls, config: Dict[str, Any]) -> "DenseNet":
             "expansion": config.get("expansion", 4),
             "small_input": config.get("small_input", False),
             "final_bn_relu": config.get("final_bn_relu", True),
+            "use_se": config.get("use_se", False),
+            "se_reduction_ratio": config.get("se_reduction_ratio", 16),
         }
         return cls(**config)
 
diff --git a/test/models_densenet_test.py b/test/models_densenet_test.py
@@ -30,7 +30,27 @@
                 "zero_init_bias": True,
             }
         ],
-    }
+    },
+    "small_densenet_se": {
+        "name": "densenet",
+        "num_blocks": [1, 1, 1, 1],
+        "init_planes": 4,
+        "growth_rate": 32,
+        "expansion": 4,
+        "final_bn_relu": True,
+        "small_input": True,
+        "use_se": True,
+        "heads": [
+            {
+                "name": "fully_connected",
+                "unique_id": "default_head",
+                "num_classes": 1000,
+                "fork_block": "trunk_output",
+                "in_plane": 60,
+                "zero_init_bias": True,
+            }
+        ],
+    },
 }