EfficientNet is a convolutional neural network architecture and scaling method that uniformly scales all dimensions of depth/width/resolution using a compound coefficient. Unlike conventional practice that arbitrarily scales these factors, the EfficientNet scaling method uniformly scales network width, depth, and resolution with a set of fixed scaling coefficients.
EfficientNet is a state-of-the algorithm for Image Classification. EfficientNet-B7 achieves state-of-the-art 84.3% top-1 accuracy on ImageNet, while being 8.4x smaller and 6.1x faster on inference than the best existing ConvNet(GPipe).
Original Paper: EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks
Official Blog: Google AI Blog: EfficientNet: Improving Accuracy and Efficiency through AutoML and Model Scaling
Blog: Coming Soon
class EfficientNet(nn.Module):
def __init__(self, version, num_classes):
super(EfficientNet, self).__init__()
width_factor, depth_factor, dropout_rate = self.calculate_factors(version)
last_channels = ceil(1280 * width_factor)
self.pool = nn.AdaptiveAvgPool2d(1)
self.features = self.create_features(width_factor, depth_factor, last_channels)
self.classifier = nn.Sequential(
nn.Dropout(dropout_rate),
nn.Linear(last_channels, num_classes),
)
def calculate_factors(self, version, alpha=1.2, beta=1.1):
phi, res, drop_rate = phi_values[version]
depth_factor = alpha ** phi
width_factor = beta ** phi
return width_factor, depth_factor, drop_rate
def create_features(self, width_factor, depth_factor, last_channels):
channels = int(32 * width_factor)
features = [CNNBlock(3, channels, 3, stride=2, padding=1)]
in_channels = channels
for expand_ratio, channels, repeats, stride, kernel_size in base_model:
out_channels = 4*ceil(int(channels*width_factor) / 4)
layers_repeats = ceil(repeats * depth_factor)
for layer in range(layers_repeats):
features.append(
InvertedResidualBlock(
in_channels,
out_channels,
expand_ratio=expand_ratio,
stride = stride if layer == 0 else 1,
kernel_size=kernel_size,
padding=kernel_size//2, # if k=1:pad=0, k=3:pad=1, k=5:pad=2
)
)
in_channels = out_channels
features.append(
CNNBlock(in_channels, last_channels, kernel_size=1, stride=1, padding=0)
)
return nn.Sequential(*features)
def forward(self, x):
x = self.pool(self.features(x))
return self.classifier(x.view(x.shape[0], -1))
Check out the notebook for more info: EfficientNet.ipynb