huggingface · alaradirik · Jan 4, 2023 · Jan 4, 2023 · Jan 4, 2023 · Jan 5, 2023
diff --git a/README_ko.md b/README_ko.md
@@ -414,4 +414,4 @@ Flax, PyTorch, TensorFlow 설치 페이지에서 이들을 conda로 설치하는
     url = "https://www.aclweb.org/anthology/2020.emnlp-demos.6",
     pages = "38--45"
 }
-```
+```
diff --git a/src/transformers/models/deformable_detr/configuration_deformable_detr.py b/src/transformers/models/deformable_detr/configuration_deformable_detr.py
@@ -116,6 +116,8 @@ class DeformableDetrConfig(PretrainedConfig):
             based on the predictions from the previous layer.
         focal_alpha (`float`, *optional*, defaults to 0.25):
             Alpha parameter in the focal loss.
+        use_custom_kernel (`bool`, *optional*, defaults to `False`):
+            Whether to use custom CUDA kernel to speed up inference and training on GPU.
 
     Examples:
 
@@ -177,6 +179,7 @@ def __init__(
         giou_loss_coefficient=2,
         eos_coefficient=0.1,
         focal_alpha=0.25,
+        use_custom_kernel=False,
         **kwargs
     ):
         self.num_queries = num_queries
@@ -220,6 +223,7 @@ def __init__(
         self.giou_loss_coefficient = giou_loss_coefficient
         self.eos_coefficient = eos_coefficient
         self.focal_alpha = focal_alpha
+        self.use_custom_kernel = use_custom_kernel
         super().__init__(is_encoder_decoder=is_encoder_decoder, **kwargs)
 
     @property

diff --git a/src/transformers/models/deformable_detr/modeling_deformable_detr.py b/src/transformers/models/deformable_detr/modeling_deformable_detr.py
@@ -47,6 +47,15 @@
 from .load_custom import load_cuda_kernels
 
 
+if is_vision_available():
+    from transformers.image_transforms import center_to_corners_format
+
+if is_scipy_available():
+    from scipy.optimize import linear_sum_assignment
+
+if is_timm_available():
+    from timm import create_model
+
 logger = logging.get_logger(__name__)
 
 # Move this to not compile only when importing, this needs to happen later, like in __init__.
@@ -60,8 +69,14 @@
 else:
     MultiScaleDeformableAttention = None
 
-if is_vision_available():
-    from transformers.image_transforms import center_to_corners_format
+
+_CONFIG_FOR_DOC = "DeformableDetrConfig"
+_CHECKPOINT_FOR_DOC = "sensetime/deformable-detr"
+
+DEFORMABLE_DETR_PRETRAINED_MODEL_ARCHIVE_LIST = [
+    "sensetime/deformable-detr",
+    # See all Deformable DETR models at https://huggingface.co/models?filter=deformable-detr
+]
 
 
 class MultiScaleDeformableAttentionFunction(Function):
@@ -112,23 +127,6 @@ def backward(context, grad_output):
         return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None
 
 
-if is_scipy_available():
-    from scipy.optimize import linear_sum_assignment
-
-if is_timm_available():
-    from timm import create_model
-
-logger = logging.get_logger(__name__)
-
-_CONFIG_FOR_DOC = "DeformableDetrConfig"
-_CHECKPOINT_FOR_DOC = "sensetime/deformable-detr"
-
-DEFORMABLE_DETR_PRETRAINED_MODEL_ARCHIVE_LIST = [
-    "sensetime/deformable-detr",
-    # See all Deformable DETR models at https://huggingface.co/models?filter=deformable-detr
-]
-
-
 @dataclass
 class DeformableDetrDecoderOutput(ModelOutput):
     """
@@ -561,7 +559,7 @@ class DeformableDetrMultiscaleDeformableAttention(nn.Module):
     Multiscale deformable attention as proposed in Deformable DETR.
     """
 
-    def __init__(self, embed_dim: int, num_heads: int, n_levels: int, n_points: int):
+    def __init__(self, embed_dim: int, num_heads: int, n_levels: int, n_points: int, use_custom_kernel: bool):
         super().__init__()
         if embed_dim % num_heads != 0:
             raise ValueError(
@@ -582,6 +580,7 @@ def __init__(self, embed_dim: int, num_heads: int, n_levels: int, n_points: int)
         self.n_levels = n_levels
         self.n_heads = num_heads
         self.n_points = n_points
+        self.use_custom_kernel = use_custom_kernel
 
         self.sampling_offsets = nn.Linear(embed_dim, num_heads * n_levels * n_points * 2)
         self.attention_weights = nn.Linear(embed_dim, num_heads * n_levels * n_points)
@@ -664,19 +663,24 @@ def forward(
             )
         else:
             raise ValueError(f"Last dim of reference_points must be 2 or 4, but got {reference_points.shape[-1]}")
-        try:
-            # GPU
-            output = MultiScaleDeformableAttentionFunction.apply(
-                value,
-                spatial_shapes,
-                level_start_index,
-                sampling_locations,
-                attention_weights,
-                self.im2col_step,
-            )
-        except Exception:
-            # CPU
+
+        # Use custom CUDA kernel to speed up on GPU
+        if self.use_custom_kernel:
+            try:
+                output = MultiScaleDeformableAttentionFunction.apply(
+                    value,
+                    spatial_shapes,
+                    level_start_index,
+                    sampling_locations,
+                    attention_weights,
+                    self.im2col_step,
+                )
+            # Fall back to PyTorch implementation
+            except Exception:
+                output = ms_deform_attn_core_pytorch(value, spatial_shapes, sampling_locations, attention_weights)
+        else:
             output = ms_deform_attn_core_pytorch(value, spatial_shapes, sampling_locations, attention_weights)
+
         output = self.output_proj(output)
 
         return output, attention_weights
@@ -808,6 +812,7 @@ def __init__(self, config: DeformableDetrConfig):
             num_heads=config.encoder_attention_heads,
             n_levels=config.num_feature_levels,
             n_points=config.encoder_n_points,
+            use_custom_kernel=config.use_custom_kernel,
         )
         self.self_attn_layer_norm = nn.LayerNorm(self.embed_dim)
         self.dropout = config.dropout
@@ -909,6 +914,7 @@ def __init__(self, config: DeformableDetrConfig):
             num_heads=config.decoder_attention_heads,
             n_levels=config.num_feature_levels,
             n_points=config.decoder_n_points,
+            use_custom_kernel=config.use_custom_kernel,
         )
         self.encoder_attn_layer_norm = nn.LayerNorm(self.embed_dim)
         # feedforward neural networks
-Original file line number
+Diff line change
@@ Expand Up @@
         url = "https://www.aclweb.org/anthology/2020.emnlp-demos.6",
         pages = "38--45"
     }
-    ```
+    ```