diff --git a/src/transformers/models/layoutlmv2/configuration_layoutlmv2.py b/src/transformers/models/layoutlmv2/configuration_layoutlmv2.py index a5b1d09e19b8..049325e3d690 100644 --- a/src/transformers/models/layoutlmv2/configuration_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/configuration_layoutlmv2.py @@ -145,34 +145,7 @@ def __init__( has_relative_attention_bias=True, has_spatial_attention_bias=True, has_visual_segment_embedding=False, - detectron2_config_args={ - "MODEL.MASK_ON": True, - "MODEL.PIXEL_STD": [57.375, 57.120, 58.395], - "MODEL.BACKBONE.NAME": "build_resnet_fpn_backbone", - "MODEL.FPN.IN_FEATURES": ["res2", "res3", "res4", "res5"], - "MODEL.ANCHOR_GENERATOR.SIZES": [[32], [64], [128], [256], [512]], - "MODEL.RPN.IN_FEATURES": ["p2", "p3", "p4", "p5", "p6"], - "MODEL.RPN.PRE_NMS_TOPK_TRAIN": 2000, - "MODEL.RPN.PRE_NMS_TOPK_TEST": 1000, - "MODEL.RPN.POST_NMS_TOPK_TRAIN": 1000, - "MODEL.POST_NMS_TOPK_TEST": 1000, - "MODEL.ROI_HEADS.NAME": "StandardROIHeads", - "MODEL.ROI_HEADS.NUM_CLASSES": 5, - "MODEL.ROI_HEADS.IN_FEATURES": ["p2", "p3", "p4", "p5"], - "MODEL.ROI_BOX_HEAD.NAME": "FastRCNNConvFCHead", - "MODEL.ROI_BOX_HEAD.NUM_FC": 2, - "MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION": 14, - "MODEL.ROI_MASK_HEAD.NAME": "MaskRCNNConvUpsampleHead", - "MODEL.ROI_MASK_HEAD.NUM_CONV": 4, - "MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION": 7, - "MODEL.RESNETS.DEPTH": 101, - "MODEL.RESNETS.SIZES": [[32], [64], [128], [256], [512]], - "MODEL.RESNETS.ASPECT_RATIOS": [[0.5, 1.0, 2.0]], - "MODEL.RESNETS.OUT_FEATURES": ["res2", "res3", "res4", "res5"], - "MODEL.RESNETS.NUM_GROUPS": 32, - "MODEL.RESNETS.WIDTH_PER_GROUP": 8, - "MODEL.RESNETS.STRIDE_IN_1X1": False, - }, + detectron2_config_args=None, **kwargs ): super().__init__( @@ -204,7 +177,38 @@ def __init__( self.has_relative_attention_bias = has_relative_attention_bias self.has_spatial_attention_bias = has_spatial_attention_bias self.has_visual_segment_embedding = has_visual_segment_embedding - self.detectron2_config_args = detectron2_config_args + self.detectron2_config_args = detectron2_config_args if detectron2_config_args is not None else self.get_default_detectron2_config() + + @classmethod + def get_default_detectron2_config(self): + return { + "MODEL.MASK_ON": True, + "MODEL.PIXEL_STD": [57.375, 57.120, 58.395], + "MODEL.BACKBONE.NAME": "build_resnet_fpn_backbone", + "MODEL.FPN.IN_FEATURES": ["res2", "res3", "res4", "res5"], + "MODEL.ANCHOR_GENERATOR.SIZES": [[32], [64], [128], [256], [512]], + "MODEL.RPN.IN_FEATURES": ["p2", "p3", "p4", "p5", "p6"], + "MODEL.RPN.PRE_NMS_TOPK_TRAIN": 2000, + "MODEL.RPN.PRE_NMS_TOPK_TEST": 1000, + "MODEL.RPN.POST_NMS_TOPK_TRAIN": 1000, + "MODEL.POST_NMS_TOPK_TEST": 1000, + "MODEL.ROI_HEADS.NAME": "StandardROIHeads", + "MODEL.ROI_HEADS.NUM_CLASSES": 5, + "MODEL.ROI_HEADS.IN_FEATURES": ["p2", "p3", "p4", "p5"], + "MODEL.ROI_BOX_HEAD.NAME": "FastRCNNConvFCHead", + "MODEL.ROI_BOX_HEAD.NUM_FC": 2, + "MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION": 14, + "MODEL.ROI_MASK_HEAD.NAME": "MaskRCNNConvUpsampleHead", + "MODEL.ROI_MASK_HEAD.NUM_CONV": 4, + "MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION": 7, + "MODEL.RESNETS.DEPTH": 101, + "MODEL.RESNETS.SIZES": [[32], [64], [128], [256], [512]], + "MODEL.RESNETS.ASPECT_RATIOS": [[0.5, 1.0, 2.0]], + "MODEL.RESNETS.OUT_FEATURES": ["res2", "res3", "res4", "res5"], + "MODEL.RESNETS.NUM_GROUPS": 32, + "MODEL.RESNETS.WIDTH_PER_GROUP": 8, + "MODEL.RESNETS.STRIDE_IN_1X1": False, + } def get_detectron2_config(self): detectron2_config = detectron2.config.get_cfg()