From 3fd33d55824d9be8c2f46972c68d5009db492a20 Mon Sep 17 00:00:00 2001 From: Liu Yi Date: Fri, 15 Apr 2022 14:15:01 +0800 Subject: [PATCH] [Feature] Add GloRe (#1951) --- configs/glore/README.md | 21 ++ ...e_resnet50_os8_cityscapes_1024x512_80k.yml | 23 ++ ...lore_resnet50_os8_voc12aug_512x512_40k.yml | 17 ++ paddleseg/models/__init__.py | 1 + paddleseg/models/glore.py | 199 ++++++++++++++++++ 5 files changed, 261 insertions(+) create mode 100644 configs/glore/README.md create mode 100644 configs/glore/glore_resnet50_os8_cityscapes_1024x512_80k.yml create mode 100644 configs/glore/glore_resnet50_os8_voc12aug_512x512_40k.yml create mode 100644 paddleseg/models/glore.py diff --git a/configs/glore/README.md b/configs/glore/README.md new file mode 100644 index 0000000000..0cfa55882c --- /dev/null +++ b/configs/glore/README.md @@ -0,0 +1,21 @@ +# Graph-Based Global Reasoning Networks + +## Reference + +> Chen, Yunpeng, Marcus Rohrbach, Zhicheng Yan, Yan Shuicheng, Jiashi Feng, and Yannis Kalantidis. "Graph-based global reasoning networks." In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition, pp. 433-442. 2019. + + +## Performance + +### Cityscapes + +| Model | Backbone | Resolution | Training Iters | mIoU | mIoU (flip) | mIoU (ms+flip) | Links | +|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| +|GloRe|ResNet50_OS8|1024x512|80000|78.26%|78.61%|78.72%|[model](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/glore_resnet50_os8_cityscapes_1024x512_80k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/cityscapes/glore_resnet50_os8_cityscapes_1024x512_80k/train.log) \| [vdl](https://paddlepaddle.org.cn/paddle/visualdl/service/app?id=de754e39ac9de4d2e951915c2334d6ec) | + + +### Pascal VOC 2012 + Aug + +| Model | Backbone | Resolution | Training Iters | mIoU | mIoU (flip) | mIoU (ms+flip) | Links | +|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| +|GloRe|ResNet50_OS8|512x512|40000|80.16%|80.35%|80.40%|[model](https://bj.bcebos.com/paddleseg/dygraph/pascal_voc12/glore_resnet50_os8_voc12aug_512x512_40k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/pascal_voc12/glore_resnet50_os8_voc12aug_512x512_40k/train.log) \| [vdl](https://paddlepaddle.org.cn/paddle/visualdl/service/app?id=e40c1dd8d4fcbf2dcda01242dec9d9b5) | diff --git a/configs/glore/glore_resnet50_os8_cityscapes_1024x512_80k.yml b/configs/glore/glore_resnet50_os8_cityscapes_1024x512_80k.yml new file mode 100644 index 0000000000..96d1919c2f --- /dev/null +++ b/configs/glore/glore_resnet50_os8_cityscapes_1024x512_80k.yml @@ -0,0 +1,23 @@ +_base_: '../_base_/cityscapes.yml' + +batch_size: 2 +iters: 80000 + +learning_rate: + decay: + end_lr: 1.0e-5 + +loss: + types: + - type: CrossEntropyLoss + coef: [1, 0.4] + +model: + type: GloRe + backbone: + type: ResNet50_vd + output_stride: 8 + pretrained: https://bj.bcebos.com/paddleseg/dygraph/resnet50_vd_ssld_v2.tar.gz + enable_auxiliary_loss: True + align_corners: False + pretrained: null diff --git a/configs/glore/glore_resnet50_os8_voc12aug_512x512_40k.yml b/configs/glore/glore_resnet50_os8_voc12aug_512x512_40k.yml new file mode 100644 index 0000000000..02f7a89a1a --- /dev/null +++ b/configs/glore/glore_resnet50_os8_voc12aug_512x512_40k.yml @@ -0,0 +1,17 @@ +_base_: '../_base_/pascal_voc12aug.yml' + + +model: + type: GloRe + backbone: + type: ResNet50_vd + output_stride: 8 + pretrained: https://bj.bcebos.com/paddleseg/dygraph/resnet50_vd_ssld_v2.tar.gz + enable_auxiliary_loss: True + align_corners: False + pretrained: null + +loss: + types: + - type: CrossEntropyLoss + coef: [1, 0.4] diff --git a/paddleseg/models/__init__.py b/paddleseg/models/__init__.py index b4f503973a..1e275d6491 100644 --- a/paddleseg/models/__init__.py +++ b/paddleseg/models/__init__.py @@ -56,3 +56,4 @@ from .bisenetv1 import BiseNetV1 from .fastfcn import FastFCN from .pfpnnet import PFPNNet +from .glore import GloRe \ No newline at end of file diff --git a/paddleseg/models/glore.py b/paddleseg/models/glore.py new file mode 100644 index 0000000000..87aa73e228 --- /dev/null +++ b/paddleseg/models/glore.py @@ -0,0 +1,199 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from paddleseg.cvlibs import manager +from paddleseg.models import layers +from paddleseg.utils import utils + + +@manager.MODELS.add_component +class GloRe(nn.Layer): + """ + The GloRe implementation based on PaddlePaddle. + + The original article refers to: + Chen, Yunpeng, et al. "Graph-Based Global Reasoning Networks" + (https://arxiv.org/pdf/1811.12814.pdf) + + Args: + num_classes (int): The unique number of target classes. + backbone (Paddle.nn.Layer): Backbone network, currently support Resnet50/101. + backbone_indices (tuple, optional): Two values in the tuple indicate the indices of output of backbone. + gru_channels (int, optional): The number of input channels in GloRe Unit. Default: 512. + gru_num_state (int, optional): The number of states in GloRe Unit. Default: 128. + gru_num_node (tuple, optional): The number of nodes in GloRe Unit. Default: Default: 128. + enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss. Default: True. + align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, + e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes, + backbone, + backbone_indices=(2, 3), + gru_channels=512, + gru_num_state=128, + gru_num_node=64, + enable_auxiliary_loss=True, + align_corners=False, + pretrained=None): + super().__init__() + + self.backbone = backbone + backbone_channels = [ + backbone.feat_channels[i] for i in backbone_indices + ] + + self.head = GloReHead(num_classes, backbone_indices, backbone_channels, + gru_channels, gru_num_state, gru_num_node, + enable_auxiliary_loss) + self.align_corners = align_corners + self.pretrained = pretrained + self.init_weight() + + def forward(self, x): + feat_list = self.backbone(x) + logit_list = self.head(feat_list) + return [ + F.interpolate( + logit, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) for logit in logit_list + ] + + def init_weight(self): + if self.pretrained is not None: + utils.load_entire_model(self, self.pretrained) + + +class GloReHead(nn.Layer): + + def __init__(self, + num_classes, + backbone_indices, + backbone_channels, + gru_channels=512, + gru_num_state=128, + gru_num_node=64, + enable_auxiliary_loss=True): + super().__init__() + + in_channels = backbone_channels[1] + self.conv_bn_relu = layers.ConvBNReLU( + in_channels, gru_channels, 1, bias_attr=False) + self.gru_module = GruModule( + num_input=gru_channels, + num_state=gru_num_state, + num_node=gru_num_node) + + self.dropout = nn.Dropout(0.1) + self.classifier = nn.Conv2D(512, num_classes, kernel_size=1) + self.auxlayer = layers.AuxLayer( + in_channels=backbone_channels[0], + inter_channels=backbone_channels[0] // 4, + out_channels=num_classes) + + self.backbone_indices = backbone_indices + self.enable_auxiliary_loss = enable_auxiliary_loss + + def forward(self, feat_list): + + logit_list = [] + x = feat_list[self.backbone_indices[1]] + + feature = self.conv_bn_relu(x) + gru_output = self.gru_module(feature) + output = self.dropout(gru_output) + logit = self.classifier(output) + logit_list.append(logit) + + if self.enable_auxiliary_loss: + low_level_feat = feat_list[self.backbone_indices[0]] + auxiliary_logit = self.auxlayer(low_level_feat) + logit_list.append(auxiliary_logit) + + return logit_list + + +class GCN(nn.Layer): + def __init__(self, num_state, num_node, bias=False): + super(GCN, self).__init__() + self.conv1 = nn.Conv1D(num_node, num_node, kernel_size=1) + self.relu = nn.ReLU() + self.conv2 = nn.Conv1D( + num_state, num_state, kernel_size=1, bias_attr=bias) + + def forward(self, x): + h = self.conv1(paddle.transpose(x, perm=(0, 2, 1))) + h = paddle.transpose(h, perm=(0, 2, 1)) + h = h + x + h = self.relu(self.conv2(h)) + return h + + +class GruModule(nn.Layer): + def __init__(self, + num_input=512, + num_state=128, + num_node=64, + normalize=False): + super(GruModule, self).__init__() + self.normalize = normalize + self.num_state = num_state + self.num_node = num_node + self.reduction_dim = nn.Conv2D(num_input, num_state, kernel_size=1) + self.projection_mat = nn.Conv2D(num_input, num_node, kernel_size=1) + self.gcn = GCN(num_state=self.num_state, num_node=self.num_node) + self.extend_dim = nn.Conv2D( + self.num_state, num_input, kernel_size=1, bias_attr=False) + self.extend_bn = nn.SyncBatchNorm(num_input, epsilon=1e-4) + + def forward(self, input): + n, c, h, w = input.shape + # B, C, H, W + reduction_dim = self.reduction_dim(input) + # B, N, H, W + mat_B = self.projection_mat(input) + # B, C, H*W + reshaped_reduction = paddle.reshape( + reduction_dim, shape=[n, self.num_state, h * w]) + # B, N, H*W + reshaped_B = paddle.reshape(mat_B, shape=[n, self.num_node, h * w]) + # B, N, H*W + reproject = reshaped_B + # B, C, N + node_state_V = paddle.matmul( + reshaped_reduction, paddle.transpose( + reshaped_B, perm=[0, 2, 1])) + + if self.normalize: + node_state_V = node_state_V * (1. / reshaped_reduction.shape[2]) + + # B, C, N + gcn_out = self.gcn(node_state_V) + # B, C, H*W + Y = paddle.matmul(gcn_out, reproject) + # B, C, H, W + Y = paddle.reshape(Y, shape=[n, self.num_state, h, w]) + Y_extend = self.extend_dim(Y) + Y_extend = self.extend_bn(Y_extend) + + out = input + Y_extend + return out