NetVLAD VPR system

prime-slam · Nov 4, 2023 · 62a3ede · 62a3ede
1 parent 8817fda
commit 62a3ede
Show file tree

Hide file tree

Showing 4 changed files with 282 additions and 0 deletions.
diff --git a/aero_vloc/vpr_systems/netvlad/__init__.py b/aero_vloc/vpr_systems/netvlad/__init__.py
@@ -0,0 +1,14 @@
+#  Copyright (c) 2023, Ivan Moskalenko, Anastasiia Kornilova
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+from uav_loc.vpr_systems.netvlad.netvlad import NetVLAD
diff --git a/aero_vloc/vpr_systems/netvlad/model/layers.py b/aero_vloc/vpr_systems/netvlad/model/layers.py
@@ -0,0 +1,124 @@
+#  Copyright (c) 2023, Stephen Hausler, Sourav Garg, Ming Xu, Michael Milford, Tobias Fischer,
+#  Ivan Moskalenko, Anastasiia Kornilova
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+#  Significant part of our code is based on Patch-NetVLAD repository
+#  (https://github.com/QVPR/Patch-NetVLAD)
+import faiss
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from sklearn.neighbors import NearestNeighbors
+
+
+class NetVLADModule(nn.Module):
+    """NetVLAD layer implementation"""
+
+    def __init__(
+        self,
+        num_clusters=64,
+        dim=128,
+        normalize_input=True,
+        vladv2=False,
+        use_faiss=True,
+    ):
+        """
+        Args:
+            num_clusters : int
+                The number of clusters
+            dim : int
+                Dimension of descriptors
+            normalize_input : bool
+                If true, descriptor-wise L2 normalization is applied to input.
+            vladv2 : bool
+                If true, use vladv2 otherwise use vladv1
+        """
+        super().__init__()
+        self.num_clusters = num_clusters
+        self.dim = dim
+        self.alpha = 0
+        self.vladv2 = vladv2
+        self.normalize_input = normalize_input
+        self.conv = nn.Conv2d(dim, num_clusters, kernel_size=(1, 1), bias=vladv2)
+        self.centroids = nn.Parameter(torch.rand(num_clusters, dim))
+        self.use_faiss = use_faiss
+
+    def init_params(self, clsts, traindescs):
+        if not self.vladv2:
+            clstsAssign = clsts / np.linalg.norm(clsts, axis=1, keepdims=True)
+            dots = np.dot(clstsAssign, traindescs.T)
+            dots.sort(0)
+            dots = dots[::-1, :]  # sort, descending
+
+            self.alpha = (-np.log(0.01) / np.mean(dots[0, :] - dots[1, :])).item()
+            self.centroids = nn.Parameter(torch.from_numpy(clsts))
+            self.conv.weight = nn.Parameter(
+                torch.from_numpy(self.alpha * clstsAssign).unsqueeze(2).unsqueeze(3)
+            )
+            self.conv.bias = None
+        else:
+            if not self.use_faiss:
+                knn = NearestNeighbors(n_jobs=-1)
+                knn.fit(traindescs)
+                del traindescs
+                ds_sq = np.square(knn.kneighbors(clsts, 2)[1])
+                del knn
+            else:
+                index = faiss.IndexFlatL2(traindescs.shape[1])
+                index.add(traindescs)
+                del traindescs
+                ds_sq = np.square(index.search(clsts, 2)[1])
+                del index
+
+            self.alpha = (-np.log(0.01) / np.mean(ds_sq[:, 1] - ds_sq[:, 0])).item()
+            self.centroids = nn.Parameter(torch.from_numpy(clsts))
+            del clsts, ds_sq
+
+            self.conv.weight = nn.Parameter(
+                (2.0 * self.alpha * self.centroids).unsqueeze(-1).unsqueeze(-1)
+            )
+            self.conv.bias = nn.Parameter(-self.alpha * self.centroids.norm(dim=1))
+
+    def forward(self, x):
+        N, C = x.shape[:2]
+
+        if self.normalize_input:
+            x = F.normalize(x, p=2, dim=1)  # across descriptor dim
+
+        # soft-assignment
+        soft_assign = self.conv(x).view(N, self.num_clusters, -1)
+        soft_assign = F.softmax(soft_assign, dim=1)
+
+        x_flatten = x.view(N, C, -1)
+
+        # calculate residuals to each clusters
+        vlad = torch.zeros(
+            [N, self.num_clusters, C], dtype=x.dtype, layout=x.layout, device=x.device
+        )
+        for C in range(
+            self.num_clusters
+        ):  # slower than non-looped, but lower memory usage
+            residual = x_flatten.unsqueeze(0).permute(1, 0, 2, 3) - self.centroids[
+                C : C + 1, :
+            ].expand(x_flatten.size(-1), -1, -1).permute(1, 2, 0).unsqueeze(0)
+            residual *= soft_assign[:, C : C + 1, :].unsqueeze(2)
+            vlad[:, C : C + 1, :] = residual.sum(dim=-1)
+
+        vlad = F.normalize(vlad, p=2, dim=2)  # intra-normalization
+        vlad = vlad.view(x.size(0), -1)  # flatten
+        vlad = F.normalize(vlad, p=2, dim=1)  # L2 normalize
+
+        return vlad
diff --git a/aero_vloc/vpr_systems/netvlad/model/models_generic.py b/aero_vloc/vpr_systems/netvlad/model/models_generic.py
@@ -0,0 +1,76 @@
+#  Copyright (c) 2023, Stephen Hausler, Sourav Garg, Ming Xu, Michael Milford, Tobias Fischer,
+#  Ivan Moskalenko, Anastasiia Kornilova
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+#  Significant part of our code is based on Patch-NetVLAD repository
+#  (https://github.com/QVPR/Patch-NetVLAD)
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.models as models
+
+from uav_loc.vpr_systems.netvlad.model.layers import NetVLADModule
+
+class Flatten(nn.Module):
+    def forward(self, input_data):
+        return input_data.view(input_data.size(0), -1)
+
+
+class L2Norm(nn.Module):
+    def __init__(self, dim=1):
+        super().__init__()
+        self.dim = dim
+
+    def forward(self, input_data):
+        return F.normalize(input_data, p=2, dim=self.dim)
+
+def get_backend():
+    enc_dim = 512
+    enc = models.vgg16(weights="IMAGENET1K_V1")
+    layers = list(enc.features.children())[:-2]
+    # only train conv5_1, conv5_2, and conv5_3 (leave rest same as Imagenet trained weights)
+    for layer in layers[:-5]:
+        for p in layer.parameters():
+            p.requires_grad = False
+    enc = nn.Sequential(*layers)
+    return enc_dim, enc
+
+def get_pca_encoding(model, vlad_encoding):
+    pca_encoding = model.WPCA(vlad_encoding.unsqueeze(-1).unsqueeze(-1))
+    return pca_encoding
+
+def get_model(
+    encoder,
+    encoder_dim,
+    num_clusters,
+    use_vladv2=False,
+    append_pca_layer=False,
+    num_pcs=8192,
+):
+    nn_model = nn.Module()
+    nn_model.add_module("encoder", encoder)
+
+    net_vlad = NetVLADModule(
+        num_clusters=num_clusters, dim=encoder_dim, vladv2=use_vladv2
+    )
+    nn_model.add_module("pool", net_vlad)
+    if append_pca_layer:
+        netvlad_output_dim = encoder_dim
+        netvlad_output_dim *= num_clusters
+        pca_conv = nn.Conv2d(
+            netvlad_output_dim, num_pcs, kernel_size=(1, 1), stride=1, padding=0
+        )
+        nn_model.add_module(
+            "WPCA", nn.Sequential(*[pca_conv, Flatten(), L2Norm(dim=-1)])
+        )
+    return nn_model
diff --git a/aero_vloc/vpr_systems/netvlad/netvlad.py b/aero_vloc/vpr_systems/netvlad/netvlad.py
@@ -0,0 +1,68 @@
+#  Copyright (c) 2023, Ivan Moskalenko, Anastasiia Kornilova
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+import torch
+
+from pathlib import Path
+from PIL import Image
+
+from uav_loc.utils import transform_image
+from uav_loc.vpr_systems.netvlad.model.models_generic import (
+    get_backend,
+    get_model,
+    get_pca_encoding,
+)
+from uav_loc.vpr_systems.vpr_system import VPRSystem
+
+
+class NetVLAD(VPRSystem):
+    """
+    Implementation of [NetVLAD](https://github.com/QVPR/Patch-NetVLAD) global localization method.
+    """
+
+    def __init__(self, path_to_weights: str, resize: int = 800):
+        super().__init__()
+        self.resize = resize
+        encoder_dim, encoder = get_backend()
+
+        checkpoint = torch.load(
+            path_to_weights, map_location=lambda storage, loc: storage
+        )
+        num_clusters = checkpoint["state_dict"]["pool.centroids"].shape[0]
+        num_pcs = checkpoint["state_dict"]["WPCA.0.bias"].shape[0]
+        self.model = get_model(
+            encoder,
+            encoder_dim,
+            num_clusters,
+            append_pca_layer=True,
+            num_pcs=num_pcs,
+        )
+        self.model.load_state_dict(checkpoint["state_dict"])
+        self.model = self.model.to(self.device)
+        self.model.eval()
+
+    def get_image_descriptor(
+        self,
+        image_path: Path,
+    ):
+        image = Image.open(image_path).convert("RGB")
+        image = transform_image(image, self.resize)[None, :].to(self.device)
+
+        with torch.no_grad():
+            image_encoding = self.model.encoder(image)
+            vlad_global = self.model.pool(image_encoding)
+            vlad_global_pca = get_pca_encoding(self.model, vlad_global)
+            desc = vlad_global_pca.detach().cpu().numpy()[0]
+        return desc
+
+    get_image_descriptor.__doc__ = VPRSystem.get_image_descriptor.__doc__