From 17d50e881b6e9cd1e9d4a36da17d21206caade52 Mon Sep 17 00:00:00 2001 From: Steven Bucaille Date: Thu, 29 Aug 2024 14:55:58 +0000 Subject: [PATCH 01/32] feat: Added int conversion and unwrapping --- .../superpoint/image_processing_superpoint.py | 30 ++++++++++++++++++- .../models/superpoint/modeling_superpoint.py | 3 ++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/transformers/models/superpoint/image_processing_superpoint.py b/src/transformers/models/superpoint/image_processing_superpoint.py index fbbb717570cb..b0ea901c4252 100644 --- a/src/transformers/models/superpoint/image_processing_superpoint.py +++ b/src/transformers/models/superpoint/image_processing_superpoint.py @@ -17,7 +17,7 @@ import numpy as np -from ... import is_vision_available +from ... import is_torch_available, is_vision_available from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict from ...image_transforms import resize, to_channel_dimension_format from ...image_utils import ( @@ -32,6 +32,9 @@ from ...utils import TensorType, logging, requires_backends +if is_torch_available(): + import torch + if is_vision_available(): import PIL @@ -270,3 +273,28 @@ def preprocess( data = {"pixel_values": images} return BatchFeature(data=data, tensor_type=return_tensors) + + def post_process_keypoint_detection(self, outputs, target_sizes, unwrap_batch_dim=True): + if len(outputs.mask) != len(target_sizes): + raise ValueError("Make sure that you pass in as many target sizes as the batch dimension of the logits") + if target_sizes.shape[1] != 2: + raise ValueError("Each element of target_sizes must contain the size (h, w) of each image of the batch") + + for keypoints, target_size in zip(outputs.keypoints, target_sizes): + keypoints[:, 0] = keypoints[:, 0] * target_size[1] + keypoints[:, 1] = keypoints[:, 1] * target_size[0] + + # Convert masked_keypoints to int + masked_keypoints = outputs.keypoints.to(torch.int32) + + outputs.keypoints = masked_keypoints + + results = [] + for i, image_mask in enumerate(outputs.mask): + indices = torch.nonzero(image_mask).squeeze(1) + keypoints = outputs.keypoints[i][indices] + scores = outputs.scores[i][indices] + descriptors = outputs.descriptors[i][indices] + results.append({"keypoints": keypoints, "scores": scores, "descriptors": descriptors}) + + return results diff --git a/src/transformers/models/superpoint/modeling_superpoint.py b/src/transformers/models/superpoint/modeling_superpoint.py index cfd3dfd86e8e..b77a90367d4a 100644 --- a/src/transformers/models/superpoint/modeling_superpoint.py +++ b/src/transformers/models/superpoint/modeling_superpoint.py @@ -258,6 +258,9 @@ def _extract_keypoints(self, scores: torch.Tensor) -> Tuple[torch.Tensor, torch. # Convert (y, x) to (x, y) keypoints = torch.flip(keypoints, [1]).float() + # Convert to relative coordinates + keypoints = keypoints / torch.tensor([width, height], device=keypoints.device) + return keypoints, scores From 285c465bd484aa01b38bee1caa9086ba0f96bdb9 Mon Sep 17 00:00:00 2001 From: steven Date: Fri, 30 Aug 2024 11:46:48 +0200 Subject: [PATCH 02/32] test: added tests for post_process_keypoint_detection of SuperPointImageProcessor --- docs/source/en/model_doc/superpoint.md | 1 + .../superpoint/image_processing_superpoint.py | 37 ++++++++++++++----- .../test_image_processing_superpoint.py | 29 ++++++++++++++- 3 files changed, 56 insertions(+), 11 deletions(-) diff --git a/docs/source/en/model_doc/superpoint.md b/docs/source/en/model_doc/superpoint.md index b9aab2f1b929..13eec5ffe1b2 100644 --- a/docs/source/en/model_doc/superpoint.md +++ b/docs/source/en/model_doc/superpoint.md @@ -123,6 +123,7 @@ A list of official Hugging Face and community (indicated by 🌎) resources to h [[autodoc]] SuperPointImageProcessor - preprocess +- post_process_keypoint_detection ## SuperPointForKeypointDetection diff --git a/src/transformers/models/superpoint/image_processing_superpoint.py b/src/transformers/models/superpoint/image_processing_superpoint.py index b0ea901c4252..4ab8a895275d 100644 --- a/src/transformers/models/superpoint/image_processing_superpoint.py +++ b/src/transformers/models/superpoint/image_processing_superpoint.py @@ -30,6 +30,7 @@ valid_images, ) from ...utils import TensorType, logging, requires_backends +from .modeling_superpoint import SuperPointKeypointDescriptionOutput if is_torch_available(): @@ -274,27 +275,45 @@ def preprocess( return BatchFeature(data=data, tensor_type=return_tensors) - def post_process_keypoint_detection(self, outputs, target_sizes, unwrap_batch_dim=True): + def post_process_keypoint_detection( + self, outputs: SuperPointKeypointDescriptionOutput, target_sizes: torch.Tensor + ): + """ + Converts the raw output of [`SuperPointForKeypointDetection`] into lists of keypoints, scores and descriptors + with coordinates absolute to the original image sizes. + + Args: + outputs ([`SuperPointKeypointDescriptionOutput`]): + Raw outputs of the model. + target_sizes (`torch.Tensor` of shape `(batch_size, 2)`): + Tensor containing the size (h, w) of each image of the batch. This must be the original + image size (before any processing). + Returns: + `List[Dict]`: A list of dictionaries, each dictionary containing the keypoints, scores and descriptors for + an image in the batch as predicted by the model. + """ if len(outputs.mask) != len(target_sizes): raise ValueError("Make sure that you pass in as many target sizes as the batch dimension of the logits") if target_sizes.shape[1] != 2: raise ValueError("Each element of target_sizes must contain the size (h, w) of each image of the batch") - for keypoints, target_size in zip(outputs.keypoints, target_sizes): + masked_keypoints = outputs.keypoints.clone() + + for keypoints, target_size in zip(masked_keypoints, target_sizes): keypoints[:, 0] = keypoints[:, 0] * target_size[1] keypoints[:, 1] = keypoints[:, 1] * target_size[0] # Convert masked_keypoints to int - masked_keypoints = outputs.keypoints.to(torch.int32) - - outputs.keypoints = masked_keypoints + masked_keypoints = masked_keypoints.to(torch.int32) results = [] - for i, image_mask in enumerate(outputs.mask): + for image_mask, keypoints, scores, descriptors in zip( + outputs.mask, masked_keypoints, outputs.scores, outputs.descriptors + ): indices = torch.nonzero(image_mask).squeeze(1) - keypoints = outputs.keypoints[i][indices] - scores = outputs.scores[i][indices] - descriptors = outputs.descriptors[i][indices] + keypoints = keypoints[indices] + scores = scores[indices] + descriptors = descriptors[indices] results.append({"keypoints": keypoints, "scores": scores, "descriptors": descriptors}) return results diff --git a/tests/models/superpoint/test_image_processing_superpoint.py b/tests/models/superpoint/test_image_processing_superpoint.py index 90bbf82d1ed8..332ac0847157 100644 --- a/tests/models/superpoint/test_image_processing_superpoint.py +++ b/tests/models/superpoint/test_image_processing_superpoint.py @@ -15,7 +15,7 @@ import numpy as np -from transformers.testing_utils import require_torch, require_vision +from transformers.testing_utils import require_torch, require_vision, slow from transformers.utils import is_vision_available from ...test_image_processing_common import ( @@ -25,7 +25,10 @@ if is_vision_available(): - from transformers import SuperPointImageProcessor + from transformers import SuperPointForKeypointDetection, SuperPointImageProcessor, is_torch_available + +if is_torch_available(): + import torch class SuperPointImageProcessingTester(unittest.TestCase): @@ -110,3 +113,25 @@ def test_input_image_properly_converted_to_grayscale(self): pre_processed_images = image_processor.preprocess(image_inputs) for image in pre_processed_images["pixel_values"]: self.assertTrue(np.all(image[0, ...] == image[1, ...]) and np.all(image[1, ...] == image[2, ...])) + + @slow + def test_post_processing_keypoint_detection(self): + image_processor = self.image_processing_class.from_dict(self.image_processor_dict) + model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint") + image_inputs = self.image_processor_tester.prepare_image_inputs() + pre_processed_images = image_processor.preprocess(image_inputs, return_tensors="pt") + outputs = model(**pre_processed_images) + image_sizes = torch.tensor([image.size for image in image_inputs]) + post_processed_outputs = image_processor.post_process_keypoint_detection(outputs, image_sizes) + + self.assertTrue(len(post_processed_outputs) == self.image_processor_tester.batch_size) + for post_processed_output, image_size in zip(post_processed_outputs, image_sizes): + self.assertTrue("keypoints" in post_processed_output) + self.assertTrue("descriptors" in post_processed_output) + self.assertTrue("scores" in post_processed_output) + keypoints = post_processed_output["keypoints"] + all_below_image_size = torch.all(keypoints[:, 0] <= image_size[1]) and torch.all( + keypoints[:, 1] <= image_size[0] + ) + all_above_zero = torch.all(keypoints[:, 0] >= 0) and torch.all(keypoints[:, 1] >= 0) + self.assertTrue(all_below_image_size and all_above_zero) From 2efe61b5fa1bcd1ba3617ab8735b1d043f93c704 Mon Sep 17 00:00:00 2001 From: steven Date: Fri, 30 Aug 2024 16:40:20 +0200 Subject: [PATCH 03/32] docs: changed docs to include post_process_keypoint_detection method and switched from opencv to matplotlib --- docs/source/en/model_doc/superpoint.md | 30 +++++++++++-------- .../test_image_processing_superpoint.py | 10 +++---- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/docs/source/en/model_doc/superpoint.md b/docs/source/en/model_doc/superpoint.md index 13eec5ffe1b2..4b053e2d88ed 100644 --- a/docs/source/en/model_doc/superpoint.md +++ b/docs/source/en/model_doc/superpoint.md @@ -86,23 +86,27 @@ model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/sup inputs = processor(images, return_tensors="pt") outputs = model(**inputs) +image_sizes = torch.tensor([image.size for image in images]).flip(1) +outputs = processor.post_process_keypoint_detection(outputs, image_sizes) -for i in range(len(images)): - image_mask = outputs.mask[i] - image_indices = torch.nonzero(image_mask).squeeze() - image_keypoints = outputs.keypoints[i][image_indices] - image_scores = outputs.scores[i][image_indices] - image_descriptors = outputs.descriptors[i][image_indices] +for output in outputs: + keypoints = output["keypoints"] + scores = output["scores"] + descriptors = output["descriptors"] ``` -You can then print the keypoints on the image to visualize the result : +You can then print the keypoints on the image of your choice to visualize the result : ```python -import cv2 -for keypoint, score in zip(image_keypoints, image_scores): - keypoint_x, keypoint_y = int(keypoint[0].item()), int(keypoint[1].item()) - color = tuple([score.item() * 255] * 3) - image = cv2.circle(image, (keypoint_x, keypoint_y), 2, color) -cv2.imwrite("output_image.png", image) +import matplotlib.pyplot as plt +plt.axis("off") +plt.imshow(image) +plt.scatter( + keypoints[:, 0], + keypoints[:, 1], + c=scores * 100, + s=scores * 20 +) +plt.savefig(f"output_image.png") ``` This model was contributed by [stevenbucaille](https://huggingface.co/stevenbucaille). diff --git a/tests/models/superpoint/test_image_processing_superpoint.py b/tests/models/superpoint/test_image_processing_superpoint.py index 332ac0847157..18aa0496f893 100644 --- a/tests/models/superpoint/test_image_processing_superpoint.py +++ b/tests/models/superpoint/test_image_processing_superpoint.py @@ -16,7 +16,7 @@ import numpy as np from transformers.testing_utils import require_torch, require_vision, slow -from transformers.utils import is_vision_available +from transformers.utils import is_torch_available, is_vision_available from ...test_image_processing_common import ( ImageProcessingTestMixin, @@ -24,12 +24,12 @@ ) -if is_vision_available(): - from transformers import SuperPointForKeypointDetection, SuperPointImageProcessor, is_torch_available - if is_torch_available(): import torch +if is_vision_available(): + from transformers import SuperPointForKeypointDetection, SuperPointImageProcessor + class SuperPointImageProcessingTester(unittest.TestCase): def __init__( @@ -121,7 +121,7 @@ def test_post_processing_keypoint_detection(self): image_inputs = self.image_processor_tester.prepare_image_inputs() pre_processed_images = image_processor.preprocess(image_inputs, return_tensors="pt") outputs = model(**pre_processed_images) - image_sizes = torch.tensor([image.size for image in image_inputs]) + image_sizes = torch.tensor([image.size for image in image_inputs]).flip(1) post_processed_outputs = image_processor.post_process_keypoint_detection(outputs, image_sizes) self.assertTrue(len(post_processed_outputs) == self.image_processor_tester.batch_size) From a77b87055462b19f0c36d051ab42fb1e2d7f1876 Mon Sep 17 00:00:00 2001 From: steven Date: Fri, 30 Aug 2024 22:42:28 +0200 Subject: [PATCH 04/32] test: changed test to not depend on SuperPointModel forward --- .../superpoint/image_processing_superpoint.py | 2 +- .../test_image_processing_superpoint.py | 23 ++++++++++++++++--- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/transformers/models/superpoint/image_processing_superpoint.py b/src/transformers/models/superpoint/image_processing_superpoint.py index 4ab8a895275d..aa0ffa9ae673 100644 --- a/src/transformers/models/superpoint/image_processing_superpoint.py +++ b/src/transformers/models/superpoint/image_processing_superpoint.py @@ -293,7 +293,7 @@ def post_process_keypoint_detection( an image in the batch as predicted by the model. """ if len(outputs.mask) != len(target_sizes): - raise ValueError("Make sure that you pass in as many target sizes as the batch dimension of the logits") + raise ValueError("Make sure that you pass in as many target sizes as the batch dimension of the mask") if target_sizes.shape[1] != 2: raise ValueError("Each element of target_sizes must contain the size (h, w) of each image of the batch") diff --git a/tests/models/superpoint/test_image_processing_superpoint.py b/tests/models/superpoint/test_image_processing_superpoint.py index 18aa0496f893..bf9cfcc24903 100644 --- a/tests/models/superpoint/test_image_processing_superpoint.py +++ b/tests/models/superpoint/test_image_processing_superpoint.py @@ -15,6 +15,7 @@ import numpy as np +from transformers.models.superpoint.modeling_superpoint import SuperPointKeypointDescriptionOutput from transformers.testing_utils import require_torch, require_vision, slow from transformers.utils import is_torch_available, is_vision_available @@ -28,7 +29,7 @@ import torch if is_vision_available(): - from transformers import SuperPointForKeypointDetection, SuperPointImageProcessor + from transformers import SuperPointImageProcessor class SuperPointImageProcessingTester(unittest.TestCase): @@ -73,6 +74,23 @@ def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=F torchify=torchify, ) + def prepare_keypoint_detection_output(self, pixel_values): + max_number_keypoints = 50 + batch_size = len(pixel_values) + mask = torch.zeros((batch_size, max_number_keypoints)) + keypoints = torch.zeros((batch_size, max_number_keypoints, 2)) + scores = torch.zeros((batch_size, max_number_keypoints)) + descriptors = torch.zeros((batch_size, max_number_keypoints, 16)) + for i in range(batch_size): + random_number_keypoints = np.random.randint(0, max_number_keypoints) + mask[i, :random_number_keypoints] = 1 + keypoints[i, :random_number_keypoints] = torch.rand((random_number_keypoints, 2)) + scores[i, :random_number_keypoints] = torch.rand((random_number_keypoints,)) + descriptors[i, :random_number_keypoints] = torch.rand((random_number_keypoints, 16)) + return SuperPointKeypointDescriptionOutput( + loss=None, keypoints=keypoints, scores=scores, descriptors=descriptors, mask=mask, hidden_states=None + ) + @require_torch @require_vision @@ -117,10 +135,9 @@ def test_input_image_properly_converted_to_grayscale(self): @slow def test_post_processing_keypoint_detection(self): image_processor = self.image_processing_class.from_dict(self.image_processor_dict) - model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint") image_inputs = self.image_processor_tester.prepare_image_inputs() pre_processed_images = image_processor.preprocess(image_inputs, return_tensors="pt") - outputs = model(**pre_processed_images) + outputs = self.image_processor_tester.prepare_keypoint_detection_output(**pre_processed_images) image_sizes = torch.tensor([image.size for image in image_inputs]).flip(1) post_processed_outputs = image_processor.post_process_keypoint_detection(outputs, image_sizes) From 2ab79cdccc65e09251aed61ef508fcbb4b672c86 Mon Sep 17 00:00:00 2001 From: steven Date: Fri, 30 Aug 2024 22:59:15 +0200 Subject: [PATCH 05/32] test: added missing require_torch decorator --- tests/models/superpoint/test_image_processing_superpoint.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/models/superpoint/test_image_processing_superpoint.py b/tests/models/superpoint/test_image_processing_superpoint.py index bf9cfcc24903..8acd2f7e3746 100644 --- a/tests/models/superpoint/test_image_processing_superpoint.py +++ b/tests/models/superpoint/test_image_processing_superpoint.py @@ -133,6 +133,7 @@ def test_input_image_properly_converted_to_grayscale(self): self.assertTrue(np.all(image[0, ...] == image[1, ...]) and np.all(image[1, ...] == image[2, ...])) @slow + @require_torch def test_post_processing_keypoint_detection(self): image_processor = self.image_processing_class.from_dict(self.image_processor_dict) image_inputs = self.image_processor_tester.prepare_image_inputs() From 419ae5dcf2ed40f58d2a9c419e17cded3bcf0e01 Mon Sep 17 00:00:00 2001 From: steven Date: Fri, 30 Aug 2024 22:59:51 +0200 Subject: [PATCH 06/32] docs: changed pyplot parameters for the keypoints to be more visible in the example --- docs/source/en/model_doc/superpoint.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/en/model_doc/superpoint.md b/docs/source/en/model_doc/superpoint.md index 4b053e2d88ed..18f3d76819fd 100644 --- a/docs/source/en/model_doc/superpoint.md +++ b/docs/source/en/model_doc/superpoint.md @@ -104,7 +104,8 @@ plt.scatter( keypoints[:, 0], keypoints[:, 1], c=scores * 100, - s=scores * 20 + s=scores * 50, + alpha=0.8 ) plt.savefig(f"output_image.png") ``` From 39b32a2f69500bc7af01715fc7beae2260549afe Mon Sep 17 00:00:00 2001 From: steven Date: Fri, 30 Aug 2024 23:03:59 +0200 Subject: [PATCH 07/32] tests: changed import torch location to make test_flax and test_tf --- tests/models/superpoint/test_image_processing_superpoint.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/models/superpoint/test_image_processing_superpoint.py b/tests/models/superpoint/test_image_processing_superpoint.py index 8acd2f7e3746..36df033c3db0 100644 --- a/tests/models/superpoint/test_image_processing_superpoint.py +++ b/tests/models/superpoint/test_image_processing_superpoint.py @@ -25,9 +25,6 @@ ) -if is_torch_available(): - import torch - if is_vision_available(): from transformers import SuperPointImageProcessor @@ -75,6 +72,7 @@ def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=F ) def prepare_keypoint_detection_output(self, pixel_values): + import torch max_number_keypoints = 50 batch_size = len(pixel_values) mask = torch.zeros((batch_size, max_number_keypoints)) From 144e09a2a8917649fffe5d309a46fc5e033a417e Mon Sep 17 00:00:00 2001 From: steven Date: Fri, 30 Aug 2024 23:19:15 +0200 Subject: [PATCH 08/32] Revert "tests: changed import torch location to make test_flax and test_tf" This reverts commit 39b32a2f69500bc7af01715fc7beae2260549afe. --- tests/models/superpoint/test_image_processing_superpoint.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/models/superpoint/test_image_processing_superpoint.py b/tests/models/superpoint/test_image_processing_superpoint.py index 36df033c3db0..8acd2f7e3746 100644 --- a/tests/models/superpoint/test_image_processing_superpoint.py +++ b/tests/models/superpoint/test_image_processing_superpoint.py @@ -25,6 +25,9 @@ ) +if is_torch_available(): + import torch + if is_vision_available(): from transformers import SuperPointImageProcessor @@ -72,7 +75,6 @@ def prepare_image_inputs(self, equal_resolution=False, numpify=False, torchify=F ) def prepare_keypoint_detection_output(self, pixel_values): - import torch max_number_keypoints = 50 batch_size = len(pixel_values) mask = torch.zeros((batch_size, max_number_keypoints)) From 21dbdfc0341f55bd1a32007aaca2d12fa47ad551 Mon Sep 17 00:00:00 2001 From: steven Date: Fri, 30 Aug 2024 23:31:21 +0200 Subject: [PATCH 09/32] tests: fixed import --- tests/models/superpoint/test_image_processing_superpoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/superpoint/test_image_processing_superpoint.py b/tests/models/superpoint/test_image_processing_superpoint.py index 8acd2f7e3746..3f0a625d8bf8 100644 --- a/tests/models/superpoint/test_image_processing_superpoint.py +++ b/tests/models/superpoint/test_image_processing_superpoint.py @@ -15,7 +15,6 @@ import numpy as np -from transformers.models.superpoint.modeling_superpoint import SuperPointKeypointDescriptionOutput from transformers.testing_utils import require_torch, require_vision, slow from transformers.utils import is_torch_available, is_vision_available @@ -30,6 +29,7 @@ if is_vision_available(): from transformers import SuperPointImageProcessor + from transformers.models.superpoint.modeling_superpoint import SuperPointKeypointDescriptionOutput class SuperPointImageProcessingTester(unittest.TestCase): From 389b154da0910ed093ec45abbac89d0f77177c42 Mon Sep 17 00:00:00 2001 From: StevenBucaille Date: Sun, 1 Sep 2024 14:59:16 +0200 Subject: [PATCH 10/32] chore: applied suggestions from code review Co-authored-by: NielsRogge <48327001+NielsRogge@users.noreply.github.com> --- docs/source/en/model_doc/superpoint.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/en/model_doc/superpoint.md b/docs/source/en/model_doc/superpoint.md index 18f3d76819fd..c49734f9216c 100644 --- a/docs/source/en/model_doc/superpoint.md +++ b/docs/source/en/model_doc/superpoint.md @@ -95,9 +95,10 @@ for output in outputs: descriptors = output["descriptors"] ``` -You can then print the keypoints on the image of your choice to visualize the result : +You can then print the keypoints on the image of your choice to visualize the result: ```python import matplotlib.pyplot as plt + plt.axis("off") plt.imshow(image) plt.scatter( From b7d672e979c0331281131d1302be038ddec5a053 Mon Sep 17 00:00:00 2001 From: steven Date: Sun, 1 Sep 2024 14:59:57 +0200 Subject: [PATCH 11/32] tests: fixed import --- tests/models/superpoint/test_image_processing_superpoint.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/models/superpoint/test_image_processing_superpoint.py b/tests/models/superpoint/test_image_processing_superpoint.py index 3f0a625d8bf8..64112aadda09 100644 --- a/tests/models/superpoint/test_image_processing_superpoint.py +++ b/tests/models/superpoint/test_image_processing_superpoint.py @@ -27,9 +27,11 @@ if is_torch_available(): import torch + from transformers.models.superpoint.modeling_superpoint import SuperPointKeypointDescriptionOutput + if is_vision_available(): from transformers import SuperPointImageProcessor - from transformers.models.superpoint.modeling_superpoint import SuperPointKeypointDescriptionOutput + class SuperPointImageProcessingTester(unittest.TestCase): From f5d731181cbb48a7a4ad8fbf078a6a2f1b8010a0 Mon Sep 17 00:00:00 2001 From: steven Date: Sun, 1 Sep 2024 15:05:39 +0200 Subject: [PATCH 12/32] tests: fixed import (bis) --- .../models/superpoint/image_processing_superpoint.py | 4 +++- tests/models/superpoint/test_image_processing_superpoint.py | 4 +--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/transformers/models/superpoint/image_processing_superpoint.py b/src/transformers/models/superpoint/image_processing_superpoint.py index aa0ffa9ae673..96a18bd130ea 100644 --- a/src/transformers/models/superpoint/image_processing_superpoint.py +++ b/src/transformers/models/superpoint/image_processing_superpoint.py @@ -30,15 +30,17 @@ valid_images, ) from ...utils import TensorType, logging, requires_backends -from .modeling_superpoint import SuperPointKeypointDescriptionOutput if is_torch_available(): import torch + if is_vision_available(): import PIL + from .modeling_superpoint import SuperPointKeypointDescriptionOutput + logger = logging.get_logger(__name__) diff --git a/tests/models/superpoint/test_image_processing_superpoint.py b/tests/models/superpoint/test_image_processing_superpoint.py index 64112aadda09..3f0a625d8bf8 100644 --- a/tests/models/superpoint/test_image_processing_superpoint.py +++ b/tests/models/superpoint/test_image_processing_superpoint.py @@ -27,11 +27,9 @@ if is_torch_available(): import torch - from transformers.models.superpoint.modeling_superpoint import SuperPointKeypointDescriptionOutput - if is_vision_available(): from transformers import SuperPointImageProcessor - + from transformers.models.superpoint.modeling_superpoint import SuperPointKeypointDescriptionOutput class SuperPointImageProcessingTester(unittest.TestCase): From d89d38541716c02d5ccd937f5d17c88b7a9c7e24 Mon Sep 17 00:00:00 2001 From: steven Date: Sun, 1 Sep 2024 15:09:02 +0200 Subject: [PATCH 13/32] tests: fixed import (ter) --- .../models/superpoint/image_processing_superpoint.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/transformers/models/superpoint/image_processing_superpoint.py b/src/transformers/models/superpoint/image_processing_superpoint.py index 96a18bd130ea..948cef817d3f 100644 --- a/src/transformers/models/superpoint/image_processing_superpoint.py +++ b/src/transformers/models/superpoint/image_processing_superpoint.py @@ -35,11 +35,12 @@ if is_torch_available(): import torch + from .modeling_superpoint import SuperPointKeypointDescriptionOutput + if is_vision_available(): import PIL - from .modeling_superpoint import SuperPointKeypointDescriptionOutput logger = logging.get_logger(__name__) From f9e1141256ab5a726dd3fc7c1fcfe9b5a71f7ac3 Mon Sep 17 00:00:00 2001 From: steven Date: Sun, 1 Sep 2024 15:33:49 +0200 Subject: [PATCH 14/32] feat: added choice of type for target_size and changed tests accordingly --- .../superpoint/image_processing_superpoint.py | 26 +++++++++----- .../test_image_processing_superpoint.py | 36 +++++++++++-------- 2 files changed, 39 insertions(+), 23 deletions(-) diff --git a/src/transformers/models/superpoint/image_processing_superpoint.py b/src/transformers/models/superpoint/image_processing_superpoint.py index 948cef817d3f..5be079992db7 100644 --- a/src/transformers/models/superpoint/image_processing_superpoint.py +++ b/src/transformers/models/superpoint/image_processing_superpoint.py @@ -13,7 +13,7 @@ # limitations under the License. """Image processor class for SuperPoint.""" -from typing import Dict, Optional, Union +from typing import Dict, List, Optional, Tuple, Union import numpy as np @@ -279,7 +279,7 @@ def preprocess( return BatchFeature(data=data, tensor_type=return_tensors) def post_process_keypoint_detection( - self, outputs: SuperPointKeypointDescriptionOutput, target_sizes: torch.Tensor + self, outputs: SuperPointKeypointDescriptionOutput, target_sizes: Union[TensorType, List[Tuple]] ): """ Converts the raw output of [`SuperPointForKeypointDetection`] into lists of keypoints, scores and descriptors @@ -288,8 +288,9 @@ def post_process_keypoint_detection( Args: outputs ([`SuperPointKeypointDescriptionOutput`]): Raw outputs of the model. - target_sizes (`torch.Tensor` of shape `(batch_size, 2)`): - Tensor containing the size (h, w) of each image of the batch. This must be the original + target_sizes (`torch.Tensor` or `List[Tuple[int, int]]`, *optional*): + Tensor of shape `(batch_size, 2)` or list of tuples (`Tuple[int, int]`) containing the target size + `(height, width)` of each image in the batch. This must be the original image size (before any processing). Returns: `List[Dict]`: A list of dictionaries, each dictionary containing the keypoints, scores and descriptors for @@ -297,14 +298,21 @@ def post_process_keypoint_detection( """ if len(outputs.mask) != len(target_sizes): raise ValueError("Make sure that you pass in as many target sizes as the batch dimension of the mask") - if target_sizes.shape[1] != 2: - raise ValueError("Each element of target_sizes must contain the size (h, w) of each image of the batch") + + if isinstance(target_sizes, List): + image_sizes = torch.tensor(target_sizes) + else: + if target_sizes.shape[1] != 2: + raise ValueError( + "Each element of target_sizes must contain the size (h, w) of each image of the batch" + ) + image_sizes = target_sizes masked_keypoints = outputs.keypoints.clone() - for keypoints, target_size in zip(masked_keypoints, target_sizes): - keypoints[:, 0] = keypoints[:, 0] * target_size[1] - keypoints[:, 1] = keypoints[:, 1] * target_size[0] + for keypoints, image_size in zip(masked_keypoints, image_sizes): + keypoints[:, 0] = keypoints[:, 0] * image_size[1] + keypoints[:, 1] = keypoints[:, 1] * image_size[0] # Convert masked_keypoints to int masked_keypoints = masked_keypoints.to(torch.int32) diff --git a/tests/models/superpoint/test_image_processing_superpoint.py b/tests/models/superpoint/test_image_processing_superpoint.py index 3f0a625d8bf8..c06682a20d8d 100644 --- a/tests/models/superpoint/test_image_processing_superpoint.py +++ b/tests/models/superpoint/test_image_processing_superpoint.py @@ -139,17 +139,25 @@ def test_post_processing_keypoint_detection(self): image_inputs = self.image_processor_tester.prepare_image_inputs() pre_processed_images = image_processor.preprocess(image_inputs, return_tensors="pt") outputs = self.image_processor_tester.prepare_keypoint_detection_output(**pre_processed_images) - image_sizes = torch.tensor([image.size for image in image_inputs]).flip(1) - post_processed_outputs = image_processor.post_process_keypoint_detection(outputs, image_sizes) - - self.assertTrue(len(post_processed_outputs) == self.image_processor_tester.batch_size) - for post_processed_output, image_size in zip(post_processed_outputs, image_sizes): - self.assertTrue("keypoints" in post_processed_output) - self.assertTrue("descriptors" in post_processed_output) - self.assertTrue("scores" in post_processed_output) - keypoints = post_processed_output["keypoints"] - all_below_image_size = torch.all(keypoints[:, 0] <= image_size[1]) and torch.all( - keypoints[:, 1] <= image_size[0] - ) - all_above_zero = torch.all(keypoints[:, 0] >= 0) and torch.all(keypoints[:, 1] >= 0) - self.assertTrue(all_below_image_size and all_above_zero) + + def check_post_processed_output(post_processed_output, image_size): + for post_processed_output, image_size in zip(post_processed_output, image_size): + self.assertTrue("keypoints" in post_processed_output) + self.assertTrue("descriptors" in post_processed_output) + self.assertTrue("scores" in post_processed_output) + keypoints = post_processed_output["keypoints"] + all_below_image_size = torch.all(keypoints[:, 0] <= image_size[1]) and torch.all( + keypoints[:, 1] <= image_size[0] + ) + all_above_zero = torch.all(keypoints[:, 0] >= 0) and torch.all(keypoints[:, 1] >= 0) + self.assertTrue(all_below_image_size and all_above_zero) + + tuple_image_sizes = [(image.size[0], image.size[1]) for image in image_inputs] + tuple_post_processed_outputs = image_processor.post_process_keypoint_detection(outputs, tuple_image_sizes) + + check_post_processed_output(tuple_post_processed_outputs, tuple_image_sizes) + + tensor_image_sizes = torch.tensor([image.size for image in image_inputs]).flip(1) + tensor_post_processed_outputs = image_processor.post_process_keypoint_detection(outputs, tensor_image_sizes) + + check_post_processed_output(tensor_post_processed_outputs, tensor_image_sizes) From 32a2e96acfccbe3d23ffca6195f5e6d0c6505ef6 Mon Sep 17 00:00:00 2001 From: steven Date: Sun, 1 Sep 2024 15:36:27 +0200 Subject: [PATCH 15/32] docs: updated code snippet to reflect the addition of target size type choice in post process method --- docs/source/en/model_doc/superpoint.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/model_doc/superpoint.md b/docs/source/en/model_doc/superpoint.md index c49734f9216c..0663030c529d 100644 --- a/docs/source/en/model_doc/superpoint.md +++ b/docs/source/en/model_doc/superpoint.md @@ -86,7 +86,7 @@ model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/sup inputs = processor(images, return_tensors="pt") outputs = model(**inputs) -image_sizes = torch.tensor([image.size for image in images]).flip(1) +image_sizes = [(image[1], image[0]) for image in images] outputs = processor.post_process_keypoint_detection(outputs, image_sizes) for output in outputs: From 560194e8619ee3cfdb64ef0e19e51f5e7f79c9cc Mon Sep 17 00:00:00 2001 From: Steven Bucaille Date: Mon, 2 Sep 2024 08:27:41 +0000 Subject: [PATCH 16/32] tests: fixed imports (...) --- tests/models/superpoint/test_image_processing_superpoint.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/models/superpoint/test_image_processing_superpoint.py b/tests/models/superpoint/test_image_processing_superpoint.py index c06682a20d8d..80da823cde16 100644 --- a/tests/models/superpoint/test_image_processing_superpoint.py +++ b/tests/models/superpoint/test_image_processing_superpoint.py @@ -27,9 +27,10 @@ if is_torch_available(): import torch + from transformers.models.superpoint.modeling_superpoint import SuperPointKeypointDescriptionOutput + if is_vision_available(): from transformers import SuperPointImageProcessor - from transformers.models.superpoint.modeling_superpoint import SuperPointKeypointDescriptionOutput class SuperPointImageProcessingTester(unittest.TestCase): From 2d28aba57f513f4a7e706c3590d42904ae315bca Mon Sep 17 00:00:00 2001 From: Steven Bucaille Date: Mon, 2 Sep 2024 08:33:07 +0000 Subject: [PATCH 17/32] tests: fixed imports (...) --- .../models/superpoint/image_processing_superpoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/superpoint/image_processing_superpoint.py b/src/transformers/models/superpoint/image_processing_superpoint.py index 5be079992db7..a6666c18a853 100644 --- a/src/transformers/models/superpoint/image_processing_superpoint.py +++ b/src/transformers/models/superpoint/image_processing_superpoint.py @@ -279,7 +279,7 @@ def preprocess( return BatchFeature(data=data, tensor_type=return_tensors) def post_process_keypoint_detection( - self, outputs: SuperPointKeypointDescriptionOutput, target_sizes: Union[TensorType, List[Tuple]] + self, outputs: 'SuperPointKeypointDescriptionOutput', target_sizes: Union[TensorType, List[Tuple]] ): """ Converts the raw output of [`SuperPointForKeypointDetection`] into lists of keypoints, scores and descriptors From bd23baa6c7a966f1b68ecf0dfa7e74a26c67613e Mon Sep 17 00:00:00 2001 From: Steven Bucaille Date: Mon, 2 Sep 2024 08:36:26 +0000 Subject: [PATCH 18/32] style: formatting file --- .../models/superpoint/image_processing_superpoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/superpoint/image_processing_superpoint.py b/src/transformers/models/superpoint/image_processing_superpoint.py index a6666c18a853..ada2ca1c5b8c 100644 --- a/src/transformers/models/superpoint/image_processing_superpoint.py +++ b/src/transformers/models/superpoint/image_processing_superpoint.py @@ -279,7 +279,7 @@ def preprocess( return BatchFeature(data=data, tensor_type=return_tensors) def post_process_keypoint_detection( - self, outputs: 'SuperPointKeypointDescriptionOutput', target_sizes: Union[TensorType, List[Tuple]] + self, outputs: "SuperPointKeypointDescriptionOutput", target_sizes: Union[TensorType, List[Tuple]] ): """ Converts the raw output of [`SuperPointForKeypointDetection`] into lists of keypoints, scores and descriptors From 5bb0baf0d5313800661f08e6af16fbbdd5a5bf30 Mon Sep 17 00:00:00 2001 From: steven Date: Mon, 2 Sep 2024 21:03:23 +0200 Subject: [PATCH 19/32] docs: fixed typo from image[0] to image.size[0] --- docs/source/en/model_doc/superpoint.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/model_doc/superpoint.md b/docs/source/en/model_doc/superpoint.md index 0663030c529d..b83685f12997 100644 --- a/docs/source/en/model_doc/superpoint.md +++ b/docs/source/en/model_doc/superpoint.md @@ -86,7 +86,7 @@ model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/sup inputs = processor(images, return_tensors="pt") outputs = model(**inputs) -image_sizes = [(image[1], image[0]) for image in images] +image_sizes = [(image.size[1], image.size[0]) for image in images] outputs = processor.post_process_keypoint_detection(outputs, image_sizes) for output in outputs: From ed28314b975eb0b1ee105f41aa9b2f5dd50362f4 Mon Sep 17 00:00:00 2001 From: steven Date: Thu, 5 Sep 2024 21:46:03 +0200 Subject: [PATCH 20/32] docs: added output image and fixed some tests --- docs/source/en/model_doc/superpoint.md | 1 + tests/models/superpoint/test_image_processing_superpoint.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/source/en/model_doc/superpoint.md b/docs/source/en/model_doc/superpoint.md index b83685f12997..94ac3d3ec56e 100644 --- a/docs/source/en/model_doc/superpoint.md +++ b/docs/source/en/model_doc/superpoint.md @@ -110,6 +110,7 @@ plt.scatter( ) plt.savefig(f"output_image.png") ``` +![image/png](https://cdn-uploads.huggingface.co/production/uploads/632885ba1558dac67c440aa8/ZtFmphEhx8tcbEQqOolyE.png) This model was contributed by [stevenbucaille](https://huggingface.co/stevenbucaille). The original code can be found [here](https://github.com/magicleap/SuperPointPretrainedNetwork). diff --git a/tests/models/superpoint/test_image_processing_superpoint.py b/tests/models/superpoint/test_image_processing_superpoint.py index 80da823cde16..c2eae872004c 100644 --- a/tests/models/superpoint/test_image_processing_superpoint.py +++ b/tests/models/superpoint/test_image_processing_superpoint.py @@ -15,7 +15,7 @@ import numpy as np -from transformers.testing_utils import require_torch, require_vision, slow +from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available from ...test_image_processing_common import ( @@ -133,7 +133,6 @@ def test_input_image_properly_converted_to_grayscale(self): for image in pre_processed_images["pixel_values"]: self.assertTrue(np.all(image[0, ...] == image[1, ...]) and np.all(image[1, ...] == image[2, ...])) - @slow @require_torch def test_post_processing_keypoint_detection(self): image_processor = self.image_processing_class.from_dict(self.image_processor_dict) @@ -151,7 +150,8 @@ def check_post_processed_output(post_processed_output, image_size): keypoints[:, 1] <= image_size[0] ) all_above_zero = torch.all(keypoints[:, 0] >= 0) and torch.all(keypoints[:, 1] >= 0) - self.assertTrue(all_below_image_size and all_above_zero) + self.assertTrue(all_below_image_size) + self.assertTrue(all_above_zero) tuple_image_sizes = [(image.size[0], image.size[1]) for image in image_inputs] tuple_post_processed_outputs = image_processor.post_process_keypoint_detection(outputs, tuple_image_sizes) From 192448d0a2affbca6b8cd21cfb1be327b6d0a129 Mon Sep 17 00:00:00 2001 From: StevenBucaille Date: Wed, 2 Oct 2024 21:53:14 +0200 Subject: [PATCH 21/32] Update docs/source/en/model_doc/superpoint.md Co-authored-by: Pavel Iakubovskii --- docs/source/en/model_doc/superpoint.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/model_doc/superpoint.md b/docs/source/en/model_doc/superpoint.md index 94ac3d3ec56e..d8bb7e95f185 100644 --- a/docs/source/en/model_doc/superpoint.md +++ b/docs/source/en/model_doc/superpoint.md @@ -86,7 +86,7 @@ model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/sup inputs = processor(images, return_tensors="pt") outputs = model(**inputs) -image_sizes = [(image.size[1], image.size[0]) for image in images] +image_sizes = [(image.height, image.width) for image in images] outputs = processor.post_process_keypoint_detection(outputs, image_sizes) for output in outputs: From e89af7f80420e9768722df0c22cd6d3cc9e922e1 Mon Sep 17 00:00:00 2001 From: steven Date: Wed, 2 Oct 2024 22:27:20 +0200 Subject: [PATCH 22/32] fix: included SuperPointKeypointDescriptionOutput in TYPE_CHECKING if statement and changed tests results to reflect changes to SuperPoint from absolute keypoints coordinates to relative --- .../models/superpoint/image_processing_superpoint.py | 5 ++--- tests/models/superpoint/test_modeling_superpoint.py | 8 ++++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/transformers/models/superpoint/image_processing_superpoint.py b/src/transformers/models/superpoint/image_processing_superpoint.py index ada2ca1c5b8c..c023a40a5164 100644 --- a/src/transformers/models/superpoint/image_processing_superpoint.py +++ b/src/transformers/models/superpoint/image_processing_superpoint.py @@ -13,7 +13,7 @@ # limitations under the License. """Image processor class for SuperPoint.""" -from typing import Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union import numpy as np @@ -35,13 +35,12 @@ if is_torch_available(): import torch +if TYPE_CHECKING: from .modeling_superpoint import SuperPointKeypointDescriptionOutput - if is_vision_available(): import PIL - logger = logging.get_logger(__name__) diff --git a/tests/models/superpoint/test_modeling_superpoint.py b/tests/models/superpoint/test_modeling_superpoint.py index 25c384a79557..d00a1b928470 100644 --- a/tests/models/superpoint/test_modeling_superpoint.py +++ b/tests/models/superpoint/test_modeling_superpoint.py @@ -260,7 +260,7 @@ def test_inference(self): inputs = preprocessor(images=images, return_tensors="pt").to(torch_device) with torch.no_grad(): outputs = model(**inputs) - expected_number_keypoints_image0 = 567 + expected_number_keypoints_image0 = 568 expected_number_keypoints_image1 = 830 expected_max_number_keypoints = max(expected_number_keypoints_image0, expected_number_keypoints_image1) expected_keypoints_shape = torch.Size((len(images), expected_max_number_keypoints, 2)) @@ -275,11 +275,11 @@ def test_inference(self): self.assertEqual(outputs.keypoints.shape, expected_keypoints_shape) self.assertEqual(outputs.scores.shape, expected_scores_shape) self.assertEqual(outputs.descriptors.shape, expected_descriptors_shape) - expected_keypoints_image0_values = torch.tensor([[480.0, 9.0], [494.0, 9.0], [489.0, 16.0]]).to(torch_device) + expected_keypoints_image0_values = torch.tensor([[0.75, 0.0188],[0.7719, 0.0188], [0.7641, 0.0333]]).to(torch_device) expected_scores_image0_values = torch.tensor( - [0.0064, 0.0137, 0.0589, 0.0723, 0.5166, 0.0174, 0.1515, 0.2054, 0.0334] + [0.0064, 0.0139, 0.0591, 0.0727, 0.5170, 0.0175, 0.1526, 0.2057, 0.0335] ).to(torch_device) - expected_descriptors_image0_value = torch.tensor(-0.1096).to(torch_device) + expected_descriptors_image0_value = torch.tensor(0.0449).to(torch_device) predicted_keypoints_image0_values = outputs.keypoints[0, :3] predicted_scores_image0_values = outputs.scores[0, :9] predicted_descriptors_image0_value = outputs.descriptors[0, 0, 0] From 4e77a4fe09ef72d1fc5210356b66d89296e585e0 Mon Sep 17 00:00:00 2001 From: steven Date: Wed, 2 Oct 2024 22:29:48 +0200 Subject: [PATCH 23/32] docs: changed SuperPoint's docs to print output instead of just accessing --- docs/source/en/model_doc/superpoint.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/source/en/model_doc/superpoint.md b/docs/source/en/model_doc/superpoint.md index d8bb7e95f185..3abd1ff03af0 100644 --- a/docs/source/en/model_doc/superpoint.md +++ b/docs/source/en/model_doc/superpoint.md @@ -90,9 +90,10 @@ image_sizes = [(image.height, image.width) for image in images] outputs = processor.post_process_keypoint_detection(outputs, image_sizes) for output in outputs: - keypoints = output["keypoints"] - scores = output["scores"] - descriptors = output["descriptors"] + for keypoints, scores, descriptors in zip(output["keypoints"], output["scores"], output["descriptors"]): + print(f"Keypoints: {keypoints}") + print(f"Scores: {scores}") + print(f"Descriptors: {descriptors}") ``` You can then print the keypoints on the image of your choice to visualize the result: From e9b642abe9baef789055b48e788ed55d5fb2eb01 Mon Sep 17 00:00:00 2001 From: steven Date: Wed, 2 Oct 2024 22:31:04 +0200 Subject: [PATCH 24/32] style: applied make style --- tests/models/superpoint/test_modeling_superpoint.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/models/superpoint/test_modeling_superpoint.py b/tests/models/superpoint/test_modeling_superpoint.py index d00a1b928470..94a2542a62c5 100644 --- a/tests/models/superpoint/test_modeling_superpoint.py +++ b/tests/models/superpoint/test_modeling_superpoint.py @@ -275,7 +275,9 @@ def test_inference(self): self.assertEqual(outputs.keypoints.shape, expected_keypoints_shape) self.assertEqual(outputs.scores.shape, expected_scores_shape) self.assertEqual(outputs.descriptors.shape, expected_descriptors_shape) - expected_keypoints_image0_values = torch.tensor([[0.75, 0.0188],[0.7719, 0.0188], [0.7641, 0.0333]]).to(torch_device) + expected_keypoints_image0_values = torch.tensor([[0.75, 0.0188], [0.7719, 0.0188], [0.7641, 0.0333]]).to( + torch_device + ) expected_scores_image0_values = torch.tensor( [0.0064, 0.0139, 0.0591, 0.0727, 0.5170, 0.0175, 0.1526, 0.2057, 0.0335] ).to(torch_device) From e08586115c780d77aadd4917553d127b2a2dcf0a Mon Sep 17 00:00:00 2001 From: steven Date: Thu, 3 Oct 2024 23:12:27 +0200 Subject: [PATCH 25/32] docs: added missing output type and precision in docstring of post_process_keypoint_detection --- .../models/superpoint/image_processing_superpoint.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/transformers/models/superpoint/image_processing_superpoint.py b/src/transformers/models/superpoint/image_processing_superpoint.py index c023a40a5164..11c130dffd42 100644 --- a/src/transformers/models/superpoint/image_processing_superpoint.py +++ b/src/transformers/models/superpoint/image_processing_superpoint.py @@ -279,21 +279,21 @@ def preprocess( def post_process_keypoint_detection( self, outputs: "SuperPointKeypointDescriptionOutput", target_sizes: Union[TensorType, List[Tuple]] - ): + ) -> List[Dict[str, torch.Tensor]]: """ Converts the raw output of [`SuperPointForKeypointDetection`] into lists of keypoints, scores and descriptors with coordinates absolute to the original image sizes. Args: outputs ([`SuperPointKeypointDescriptionOutput`]): - Raw outputs of the model. - target_sizes (`torch.Tensor` or `List[Tuple[int, int]]`, *optional*): + Raw outputs of the model containing keypoints in a relative (x, y) format, with scores and descriptors. + target_sizes (`torch.Tensor` or `List[Tuple[int, int]]`): Tensor of shape `(batch_size, 2)` or list of tuples (`Tuple[int, int]`) containing the target size `(height, width)` of each image in the batch. This must be the original image size (before any processing). Returns: - `List[Dict]`: A list of dictionaries, each dictionary containing the keypoints, scores and descriptors for - an image in the batch as predicted by the model. + `List[Dict]`: A list of dictionaries, each dictionary containing the keypoints in absolute format according + to target_sizes, scores and descriptors for an image in the batch as predicted by the model. """ if len(outputs.mask) != len(target_sizes): raise ValueError("Make sure that you pass in as many target sizes as the batch dimension of the mask") From 91275455a0cdea8d5745f4e2d50010e605499409 Mon Sep 17 00:00:00 2001 From: steven Date: Thu, 3 Oct 2024 23:27:15 +0200 Subject: [PATCH 26/32] perf: deleted loop to perform keypoint conversion in one statement --- .../models/superpoint/image_processing_superpoint.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/transformers/models/superpoint/image_processing_superpoint.py b/src/transformers/models/superpoint/image_processing_superpoint.py index 11c130dffd42..820ee7abf9ed 100644 --- a/src/transformers/models/superpoint/image_processing_superpoint.py +++ b/src/transformers/models/superpoint/image_processing_superpoint.py @@ -307,11 +307,9 @@ def post_process_keypoint_detection( ) image_sizes = target_sizes - masked_keypoints = outputs.keypoints.clone() - - for keypoints, image_size in zip(masked_keypoints, image_sizes): - keypoints[:, 0] = keypoints[:, 0] * image_size[1] - keypoints[:, 1] = keypoints[:, 1] * image_size[0] + # Flip the image sizes to (width, height) and convert keypoints to absolute coordinates + image_sizes = torch.flip(image_sizes, [1]) + masked_keypoints = outputs.keypoints * image_sizes[:, None] # Convert masked_keypoints to int masked_keypoints = masked_keypoints.to(torch.int32) From 1ffa4659aa409a47ed79b20facf6a766288e5977 Mon Sep 17 00:00:00 2001 From: steven Date: Thu, 3 Oct 2024 23:40:18 +0200 Subject: [PATCH 27/32] fix: moved keypoint conversion at the end of model forward --- src/transformers/models/superpoint/modeling_superpoint.py | 8 ++++---- tests/models/superpoint/test_modeling_superpoint.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/transformers/models/superpoint/modeling_superpoint.py b/src/transformers/models/superpoint/modeling_superpoint.py index b77a90367d4a..163147d4d19b 100644 --- a/src/transformers/models/superpoint/modeling_superpoint.py +++ b/src/transformers/models/superpoint/modeling_superpoint.py @@ -258,9 +258,6 @@ def _extract_keypoints(self, scores: torch.Tensor) -> Tuple[torch.Tensor, torch. # Convert (y, x) to (x, y) keypoints = torch.flip(keypoints, [1]).float() - # Convert to relative coordinates - keypoints = keypoints / torch.tensor([width, height], device=keypoints.device) - return keypoints, scores @@ -450,7 +447,7 @@ def forward( pixel_values = self.extract_one_channel_pixel_values(pixel_values) - batch_size = pixel_values.shape[0] + batch_size, _, height, width = pixel_values.shape encoder_outputs = self.encoder( pixel_values, @@ -488,6 +485,9 @@ def forward( descriptors[i, : _descriptors.shape[0]] = _descriptors mask[i, : _scores.shape[0]] = 1 + # Convert to relative coordinates + keypoints[:, :] = keypoints[:, :] / torch.tensor([width, height], device=keypoints.device) + hidden_states = encoder_outputs[1] if output_hidden_states else None if not return_dict: return tuple(v for v in [loss, keypoints, scores, descriptors, mask, hidden_states] if v is not None) diff --git a/tests/models/superpoint/test_modeling_superpoint.py b/tests/models/superpoint/test_modeling_superpoint.py index 94a2542a62c5..8db435502ca5 100644 --- a/tests/models/superpoint/test_modeling_superpoint.py +++ b/tests/models/superpoint/test_modeling_superpoint.py @@ -281,7 +281,7 @@ def test_inference(self): expected_scores_image0_values = torch.tensor( [0.0064, 0.0139, 0.0591, 0.0727, 0.5170, 0.0175, 0.1526, 0.2057, 0.0335] ).to(torch_device) - expected_descriptors_image0_value = torch.tensor(0.0449).to(torch_device) + expected_descriptors_image0_value = torch.tensor(-0.1095).to(torch_device) predicted_keypoints_image0_values = outputs.keypoints[0, :3] predicted_scores_image0_values = outputs.scores[0, :9] predicted_descriptors_image0_value = outputs.descriptors[0, 0, 0] From b0d25a35f47b27c8942551724617e60c433c8c36 Mon Sep 17 00:00:00 2001 From: steven Date: Thu, 3 Oct 2024 23:46:30 +0200 Subject: [PATCH 28/32] docs: changed SuperPointInterestPointDecoder to SuperPointKeypointDecoder class name and added relative (x, y) coordinates information to its method --- .../models/superpoint/modeling_superpoint.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/superpoint/modeling_superpoint.py b/src/transformers/models/superpoint/modeling_superpoint.py index 163147d4d19b..34981938d5a2 100644 --- a/src/transformers/models/superpoint/modeling_superpoint.py +++ b/src/transformers/models/superpoint/modeling_superpoint.py @@ -192,7 +192,7 @@ def forward( ) -class SuperPointInterestPointDecoder(nn.Module): +class SuperPointKeypointDecoder(nn.Module): """ The SuperPointInterestPointDecoder uses the output of the SuperPointEncoder to compute the keypoint with scores. The scores are first computed by a convolutional layer, then a softmax is applied to get a probability distribution @@ -239,7 +239,10 @@ def _get_pixel_scores(self, encoded: torch.Tensor) -> torch.Tensor: return scores def _extract_keypoints(self, scores: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: - """Based on their scores, extract the pixels that represent the keypoints that will be used for descriptors computation""" + """ + Based on their scores, extract the pixels that represent the keypoints that will be used for descriptors computation. + The keypoints are in the form of relative (x, y) coordinates. + """ _, height, width = scores.shape # Threshold keypoints by score value @@ -405,7 +408,7 @@ def __init__(self, config: SuperPointConfig) -> None: self.config = config self.encoder = SuperPointEncoder(config) - self.keypoint_decoder = SuperPointInterestPointDecoder(config) + self.keypoint_decoder = SuperPointKeypointDecoder(config) self.descriptor_decoder = SuperPointDescriptorDecoder(config) self.post_init() From 1fb5705dd82b6dc17e57502df22c28fde72c729c Mon Sep 17 00:00:00 2001 From: steven Date: Thu, 3 Oct 2024 23:53:34 +0200 Subject: [PATCH 29/32] fix: changed type hint --- .../models/superpoint/image_processing_superpoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/superpoint/image_processing_superpoint.py b/src/transformers/models/superpoint/image_processing_superpoint.py index 820ee7abf9ed..65309b1c1826 100644 --- a/src/transformers/models/superpoint/image_processing_superpoint.py +++ b/src/transformers/models/superpoint/image_processing_superpoint.py @@ -279,7 +279,7 @@ def preprocess( def post_process_keypoint_detection( self, outputs: "SuperPointKeypointDescriptionOutput", target_sizes: Union[TensorType, List[Tuple]] - ) -> List[Dict[str, torch.Tensor]]: + ) -> List[Dict[str, "torch.Tensor"]]: """ Converts the raw output of [`SuperPointForKeypointDetection`] into lists of keypoints, scores and descriptors with coordinates absolute to the original image sizes. From 13cb7e5be54eb8a15462473160f6df013049c7b2 Mon Sep 17 00:00:00 2001 From: steven Date: Fri, 4 Oct 2024 13:44:42 +0200 Subject: [PATCH 30/32] refactor: removed unnecessary brackets --- src/transformers/models/superpoint/modeling_superpoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/superpoint/modeling_superpoint.py b/src/transformers/models/superpoint/modeling_superpoint.py index 34981938d5a2..7ddd57d37c49 100644 --- a/src/transformers/models/superpoint/modeling_superpoint.py +++ b/src/transformers/models/superpoint/modeling_superpoint.py @@ -489,7 +489,7 @@ def forward( mask[i, : _scores.shape[0]] = 1 # Convert to relative coordinates - keypoints[:, :] = keypoints[:, :] / torch.tensor([width, height], device=keypoints.device) + keypoints = keypoints / torch.tensor([width, height], device=keypoints.device) hidden_states = encoder_outputs[1] if output_hidden_states else None if not return_dict: From eb6a5aad563b00c80650a92bbbab8eac277a8f2c Mon Sep 17 00:00:00 2001 From: steven Date: Fri, 4 Oct 2024 13:48:38 +0200 Subject: [PATCH 31/32] revert: SuperPointKeypointDecoder to SuperPointInterestPointDecoder --- src/transformers/models/superpoint/modeling_superpoint.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/superpoint/modeling_superpoint.py b/src/transformers/models/superpoint/modeling_superpoint.py index 7ddd57d37c49..1075de299a9f 100644 --- a/src/transformers/models/superpoint/modeling_superpoint.py +++ b/src/transformers/models/superpoint/modeling_superpoint.py @@ -192,7 +192,7 @@ def forward( ) -class SuperPointKeypointDecoder(nn.Module): +class SuperPointInterestPointDecoder(nn.Module): """ The SuperPointInterestPointDecoder uses the output of the SuperPointEncoder to compute the keypoint with scores. The scores are first computed by a convolutional layer, then a softmax is applied to get a probability distribution @@ -408,7 +408,7 @@ def __init__(self, config: SuperPointConfig) -> None: self.config = config self.encoder = SuperPointEncoder(config) - self.keypoint_decoder = SuperPointKeypointDecoder(config) + self.keypoint_decoder = SuperPointInterestPointDecoder(config) self.descriptor_decoder = SuperPointDescriptorDecoder(config) self.post_init() From 4c34d752402cdb3a518038a6452e32e02e6c05d6 Mon Sep 17 00:00:00 2001 From: StevenBucaille Date: Fri, 4 Oct 2024 20:16:37 +0200 Subject: [PATCH 32/32] Update docs/source/en/model_doc/superpoint.md Co-authored-by: Pavel Iakubovskii --- docs/source/en/model_doc/superpoint.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/source/en/model_doc/superpoint.md b/docs/source/en/model_doc/superpoint.md index 3abd1ff03af0..59e451adceb8 100644 --- a/docs/source/en/model_doc/superpoint.md +++ b/docs/source/en/model_doc/superpoint.md @@ -101,12 +101,12 @@ You can then print the keypoints on the image of your choice to visualize the re import matplotlib.pyplot as plt plt.axis("off") -plt.imshow(image) +plt.imshow(image_1) plt.scatter( - keypoints[:, 0], - keypoints[:, 1], - c=scores * 100, - s=scores * 50, + outputs[0]["keypoints"][:, 0], + outputs[0]["keypoints"][:, 1], + c=outputs[0]["scores"] * 100, + s=outputs[0]["scores"] * 50, alpha=0.8 ) plt.savefig(f"output_image.png")