ying09
diff --git a/‎configs/ocr/Base-RCNN-FPN.yaml
+34 b/‎configs/ocr/Base-RCNN-FPN.yaml
+34
diff --git a/‎configs/ocr/ctw1500_101_FPN.yaml
+35 b/‎configs/ocr/ctw1500_101_FPN.yaml
+35
diff --git a/‎configs/ocr/icdar2013_101_FPN.yaml
+35 b/‎configs/ocr/icdar2013_101_FPN.yaml
+35
diff --git a/‎configs/ocr/icdar2015_101_FPN.yaml
+35 b/‎configs/ocr/icdar2015_101_FPN.yaml
+35
diff --git a/‎configs/ocr/totaltext_101_FPN.yaml
+35 b/‎configs/ocr/totaltext_101_FPN.yaml
+35
diff --git a/‎datasets/README.md
+85 b/‎datasets/README.md
+85
diff --git a/‎datasets/prepare_for_tests.sh
+22 b/‎datasets/prepare_for_tests.sh
+22
@@ -0,0 +1,34 @@
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  BACKBONE:
+    NAME: "build_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+  FPN:
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+  ANCHOR_GENERATOR:
+    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
+    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
+  RPN:
+    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
+    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
+    # Detectron1 uses 2000 proposals per-batch,
+    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+    POST_NMS_TOPK_TRAIN: 1000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "StandardROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 63
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+  ROI_MASK_HEAD:
+    NAME: "MaskRCNNConvUpsampleHead"
+    NUM_CONV: 4
+    POOLER_RESOLUTION: 14
+
+
@@ -0,0 +1,35 @@
+_BASE_: "./Base-RCNN-FPN.yaml"
+MODEL:
+  MASK_ON: True
+  TEXTFUSENET_MUTIL_PATH_FUSE_ON: True
+  WEIGHTS: "./out_dir_r101/ctw1500_model/model_ctw_r101.pth"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+  ROI_HEADS:
+    NMS_THRESH_TEST: 0.4
+  TEXTFUSENET_SEG_HEAD:
+    FPN_FEATURES_FUSED_LEVEL: 1
+    POOLER_SCALES: (0.125,)
+
+DATASETS:
+  TRAIN: ("ctw1500",)
+  TEST: ("ctw1500",)
+SOLVER:
+  IMS_PER_BATCH: 4
+  BASE_LR: 0.001
+  STEPS: (40000,80000,)
+  MAX_ITER: 120000
+  CHECKPOINT_PERIOD: 2500
+
+INPUT:
+  MIN_SIZE_TRAIN: (800,1000,1200)
+  MAX_SIZE_TRAIN: 1500
+  MIN_SIZE_TEST: 950
+  MAX_SIZE_TEST: 1500
+  
+
+OUTPUT_DIR: "./out_dir_r101/ctw1500_model/"
@@ -0,0 +1,35 @@
+_BASE_: "./Base-RCNN-FPN.yaml"
+MODEL:
+  MASK_ON: True
+  TEXTFUSENET_MUTIL_PATH_FUSE_ON: True
+  WEIGHTS: "./out_dir_r101/icdar2013_model/model_ic13_r101.pth"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+  ROI_HEADS:
+    NMS_THRESH_TEST: 0.3
+  TEXTFUSENET_SEG_HEAD:
+    FPN_FEATURES_FUSED_LEVEL: 2
+    POOLER_SCALES: (0.0625,)
+
+DATASETS:
+  TRAIN: ("icdar2013",)
+  TEST: ("icdar2013",)
+SOLVER:
+  IMS_PER_BATCH: 4
+  BASE_LR: 0.001
+  STEPS: (40000,80000,)
+  MAX_ITER: 120000
+  CHECKPOINT_PERIOD: 2500
+
+INPUT:
+  MIN_SIZE_TRAIN: (800,1000,1200)
+  MAX_SIZE_TRAIN: 1500
+  MIN_SIZE_TEST: 800
+  MAX_SIZE_TEST: 1500
+  
+
+OUTPUT_DIR: "./out_dir_r101/icdar2013_model/"
@@ -0,0 +1,35 @@
+_BASE_: "./Base-RCNN-FPN.yaml"
+MODEL:
+  MASK_ON: True
+  TEXTFUSENET_MUTIL_PATH_FUSE_ON: True
+  WEIGHTS: "./out_dir_r101/icdar2015_model/model_ic15_r101.pth"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+  ROI_HEADS:
+    NMS_THRESH_TEST: 0.35
+  TEXTFUSENET_SEG_HEAD:
+    FPN_FEATURES_FUSED_LEVEL: 2
+    POOLER_SCALES: (0.0625,)
+
+DATASETS:
+  TRAIN: ("icdar2015",)
+  TEST: ("icdar2015",)
+SOLVER:
+  IMS_PER_BATCH: 4
+  BASE_LR: 0.001
+  STEPS: (40000,80000,)
+  MAX_ITER: 120000
+  CHECKPOINT_PERIOD: 2500
+
+INPUT:
+  MIN_SIZE_TRAIN: (800,1000,1200)
+  MAX_SIZE_TRAIN: 1500
+  MIN_SIZE_TEST: 1500
+  MAX_SIZE_TEST: 3000
+  
+
+OUTPUT_DIR: "./out_dir_r101/icdar2015_model/"
@@ -0,0 +1,35 @@
+_BASE_: "./Base-RCNN-FPN.yaml"
+MODEL:
+  MASK_ON: True
+  TEXTFUSENET_MUTIL_PATH_FUSE_ON: True
+  WEIGHTS: "./out_dir_r101/totaltext_model/model_tt_r101.pth"
+  PIXEL_STD: [57.375, 57.120, 58.395]
+  RESNETS:
+    STRIDE_IN_1X1: False  # this is a C2 model
+    NUM_GROUPS: 32
+    WIDTH_PER_GROUP: 8
+    DEPTH: 101
+  ROI_HEADS:
+    NMS_THRESH_TEST: 0.4
+  TEXTFUSENET_SEG_HEAD:
+    FPN_FEATURES_FUSED_LEVEL: 1
+    POOLER_SCALES: (0.125,)
+
+DATASETS:
+  TRAIN: ("totaltext",)
+  TEST: ("totaltext",)
+SOLVER:
+  IMS_PER_BATCH: 4
+  BASE_LR: 0.001
+  STEPS: (40000,80000,)
+  MAX_ITER: 120000
+  CHECKPOINT_PERIOD: 2500
+
+INPUT:
+  MIN_SIZE_TRAIN: (800,1000,1200)
+  MAX_SIZE_TRAIN: 1500
+  MIN_SIZE_TEST: 800
+  MAX_SIZE_TEST: 1333
+  
+
+OUTPUT_DIR: "./out_dir_r101/totaltext_model/"
@@ -0,0 +1,85 @@
+
+For a few datasets that detectron2 natively supports,
+the datasets are assumed to exist in a directory called
+"datasets/", under the directory where you launch the program.
+They need to have the following directory structure:
+
+## Expected dataset structure for COCO instance/keypoint detection:
+
+```
+coco/
+  annotations/
+    instances_{train,val}2017.json
+    person_keypoints_{train,val}2017.json
+  {train,val}2017/
+    # image files that are mentioned in the corresponding json
+```
+
+You can use the 2014 version of the dataset as well.
+
+Some of the builtin tests (`dev/run_*_tests.sh`) uses a tiny version of the COCO dataset,
+which you can download with `./prepare_for_tests.sh`.
+
+## Expected dataset structure for PanopticFPN:
+
+```
+coco/
+  annotations/
+    panoptic_{train,val}2017.json
+  panoptic_{train,val}2017/
+    # png annotations
+	panoptic_stuff_{train,val}2017/  # generated by the script mentioned below
+```
+
+Install panopticapi by:
+```
+pip install git+https://github.com/cocodataset/panopticapi.git
+```
+Then, run `python prepare_panoptic_fpn.py`, to extract semantic annotations from panoptic annotations.
+
+## Expected dataset structure for LVIS instance segmentation:
+```
+coco/
+  {train,val,test}2017/
+lvis/
+  lvis_v0.5_{train,val}.json
+  lvis_v0.5_image_info_test.json
+```
+
+Install lvis-api by:
+```
+pip install git+https://github.com/lvis-dataset/lvis-api.git
+```
+
+## Expected dataset structure for cityscapes:
+```
+cityscapes/
+  gtFine/
+    train/
+      aachen/
+        color.png, instanceIds.png, labelIds.png, polygons.json,
+        labelTrainIds.png
+      ...
+    val/
+    test/
+  leftImg8bit/
+    train/
+    val/
+    test/
+```
+Install cityscapes scripts by:
+```
+pip install git+https://github.com/mcordts/cityscapesScripts.git
+```
+
+Note:
+labelTrainIds.png are created by `cityscapesscripts/preparation/createTrainIdLabelImgs.py`.
+They are not needed for instance segmentation.
+
+## Expected dataset structure for Pascal VOC:
+```
+VOC20{07,12}/
+  Annotations/
+  ImageSets/
+  JPEGImages/
+```
@@ -0,0 +1,22 @@
+#!/bin/bash -e
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# Download some files needed for running tests.
+
+cd "${0%/*}"
+
+BASE=https://dl.fbaipublicfiles.com/detectron2
+mkdir -p coco/annotations
+
+for anno in instances_val2017_100 \
+  person_keypoints_val2017_100 \
+  instances_minival2014_100 \
+  person_keypoints_minival2014_100; do
+
+  dest=coco/annotations/$anno.json
+  [[ -s $dest ]] && {
+    echo "$dest exists. Skipping ..."
+  } || {
+    wget $BASE/annotations/coco/$anno.json -O $dest
+  }
+done