YunYang1994 · anandRIyer · Aug 28, 2019 · Aug 28, 2019 · Aug 28, 2019 · Aug 28, 2019
diff --git a/README.md b/README.md
@@ -1,8 +1,4 @@
-# [TensorFlow2.0-Examples/4-Object_Detection/YOLOV3](https://github.com/YunYang1994/TensorFlow2.0-Examples/tree/master/4-Object_Detection/YOLOV3)
-
-## Please install tensorflow-gpu 1.11.0 !  Since Tensorflow is fucking ridiculous !
-
-## part 1. Introduction [[代码剖析]](https://github.com/YunYang1994/CodeFun/blob/master/002-deep_learning/YOLOv3.md)
+## part 1. Introduction
 
 Implementation of YOLO v3 object detector in Tensorflow. The full details are in [this paper](https://pjreddie.com/media/files/papers/YOLOv3.pdf).  In this project we cover several segments as follows:<br>
 - [x] [YOLO v3 architecture](https://github.com/YunYang1994/tensorflow-yolov3/blob/master/core/yolov3.py)

diff --git a/checkpoint/checkpoint b/checkpoint/checkpoint
@@ -1,2 +0,0 @@
-model_checkpoint_path: "yolov3_test_loss=2530.1914.ckpt-1"
-all_model_checkpoint_paths: "yolov3_test_loss=2530.1914.ckpt-1"

diff --git a/core/config.py b/core/config.py
@@ -7,13 +7,15 @@
 #   File name   : config.py
 #   Author      : YunYang1994
 #   Created date: 2019-02-28 13:06:54
-#   Description :
+#   Description : configurations
 #
 #================================================================
 
 from easydict import EasyDict as edict
-
-
+import os
+import settings
+settings.RDT_GIT_ROOT
+YOLO_MODEL_VER=1
 __C                             = edict()
 # Consumers can get config by: from config import cfg
 
@@ -23,46 +25,56 @@
 __C.YOLO                        = edict()
 
 # Set the class name
-__C.YOLO.CLASSES                = "./data/classes/coco.names"
-__C.YOLO.ANCHORS                = "./data/anchors/basline_anchors.txt"
+if YOLO_MODEL_VER==1:
+    __C.YOLO.CLASSES                = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/data/classes/rdt.names"
+    __C.YOLO.ANCHORS                = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/data/anchors/rdt_anchors.txt"
+elif YOLO_MODEL_VER>=2:
+    __C.YOLO.CLASSES                = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/data/classes/rdt_rot.names"
+    __C.YOLO.ANCHORS                = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/data/anchors/rdt_anchors_rot.txt"
+
 __C.YOLO.MOVING_AVE_DECAY       = 0.9995
 __C.YOLO.STRIDES                = [8, 16, 32]
 __C.YOLO.ANCHOR_PER_SCALE       = 3
-__C.YOLO.IOU_LOSS_THRESH        = 0.5
+__C.YOLO.IOU_LOSS_THRESH        = 0.7
 __C.YOLO.UPSAMPLE_METHOD        = "resize"
-__C.YOLO.ORIGINAL_WEIGHT        = "./checkpoint/yolov3_coco.ckpt"
-__C.YOLO.DEMO_WEIGHT            = "./checkpoint/yolov3_coco_demo.ckpt"
+__C.YOLO.ORIGINAL_WEIGHT        = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/checkpoint/yolov3_coco.ckpt"
+__C.YOLO.DEMO_WEIGHT            = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/checkpoint/yolov3_coco_demo.ckpt"
 
 # Train options
 __C.TRAIN                       = edict()
 
-__C.TRAIN.ANNOT_PATH            = "./data/dataset/voc_train.txt"
-__C.TRAIN.BATCH_SIZE            = 6
+__C.TRAIN.ANNOT_PATH            = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/data/dataset/rdt_train.txt"
+__C.TRAIN.BATCH_SIZE            = 1
 __C.TRAIN.INPUT_SIZE            = [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
 __C.TRAIN.DATA_AUG              = True
-__C.TRAIN.LEARN_RATE_INIT       = 1e-4
-__C.TRAIN.LEARN_RATE_END        = 1e-6
+__C.TRAIN.LEARN_RATE_INIT       = 1e-8
+__C.TRAIN.LEARN_RATE_END        = 1e-10
 __C.TRAIN.WARMUP_EPOCHS         = 2
-__C.TRAIN.FISRT_STAGE_EPOCHS    = 20
-__C.TRAIN.SECOND_STAGE_EPOCHS   = 30
-__C.TRAIN.INITIAL_WEIGHT        = "./checkpoint/yolov3_coco_demo.ckpt"
+__C.TRAIN.FISRT_STAGE_EPOCHS    = 1
+__C.TRAIN.SECOND_STAGE_EPOCHS   = 10
+__C.TRAIN.INITIAL_WEIGHT        = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/checkpoint/rdt_model"
 
 
 
 # TEST options
 __C.TEST                        = edict()
 
-__C.TEST.ANNOT_PATH             = "./data/dataset/voc_test.txt"
+__C.TEST.ANNOT_PATH             = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/data/dataset/rdt_train.txt"
 __C.TEST.BATCH_SIZE             = 2
-__C.TEST.INPUT_SIZE             = 544
+__C.TEST.INPUT_SIZE             = 512
 __C.TEST.DATA_AUG               = False
 __C.TEST.WRITE_IMAGE            = True
-__C.TEST.WRITE_IMAGE_PATH       = "./data/detection/"
+__C.TEST.WRITE_IMAGE_PATH       = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/data/detection"
 __C.TEST.WRITE_IMAGE_SHOW_LABEL = True
-__C.TEST.WEIGHT_FILE            = "./checkpoint/yolov3_test_loss=9.2099.ckpt-5"
+__C.TEST.WEIGHT_FILE            = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/checkpoint/rdt_model"
 __C.TEST.SHOW_LABEL             = True
-__C.TEST.SCORE_THRESHOLD        = 0.3
-__C.TEST.IOU_THRESHOLD          = 0.45
+__C.TEST.SCORE_THRESHOLD        = 0.1
+__C.TEST.IOU_THRESHOLD          = 0.1
+
+
+
+
+
 
 
 

diff --git a/core/dataset.py b/core/dataset.py
@@ -18,7 +18,7 @@
 import tensorflow as tf
 import core.utils as utils
 from core.config import cfg
-
+import math
 
 
 class Dataset(object):
@@ -156,17 +156,114 @@ def parse_annotation(self, annotation):
         line = annotation.split()
         image_path = line[0]
         if not os.path.exists(image_path):
-            raise KeyError("%s does not exist ... " %image_path)
+            image_path = image_path.replace("jpg","jpeg")
         image = np.array(cv2.imread(image_path))
-        bboxes = np.array([list(map(int, box.split(','))) for box in line[1:]])
+        bboxes = np.array([list(map(float, box.split(','))) for box in line[1:]],dtype=int)
 
         if self.data_aug:
             image, bboxes = self.random_horizontal_flip(np.copy(image), np.copy(bboxes))
             image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes))
             image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes))
-
+            fraction = 0.20  # must be < 1.0
+            img_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)  # hue, sat, val
+            S = img_hsv[:, :, 1].astype(np.float32)  # saturation
+            V = img_hsv[:, :, 2].astype(np.float32)  # value
+
+            a = (random.random() * 2 - 1) * fraction + 1
+            b = (random.random() * 2 - 1) * fraction + 1
+            S *= a
+            V *= b
+
+            img_hsv[:, :, 1] = S if a < 1 else S.clip(None, 255)
+            img_hsv[:, :, 2] = V if b < 1 else V.clip(None, 255)
+            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=image)
+
         image, bboxes = utils.image_preporcess(np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes))
-        return image, bboxes
+#         print(bboxes)
+#         labels = bboxes.copy()
+#         labels[:, 0] = (bboxes[:, 0] - bboxes[:, 2] / 2)
+#         labels[:, 1] = (bboxes[:, 1] - bboxes[:, 3] / 2)
+#         labels[:, 2] = (bboxes[:, 0] + bboxes[:, 2] / 2)
+#         labels[:, 3] = (bboxes[:, 1] + bboxes[:, 3] / 2)
+
+        imw,labs = self.random_affine(image, bboxes, degrees=(-180, 180), translate=(0.10, 0.10), scale=(0.80, 1.20))
+#         print(labs)
+
+#         print(labels)
+
+        return imw, labs
+
+
+
+    def random_affine(self,img, targets=(), degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),
+                  borderValue=(127.5, 127.5, 127.5)):
+        # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
+        # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
+
+        if targets is None:
+            targets = []
+        border = 0  # width of added border (optional)
+        height = img.shape[0] + border * 2
+        width = img.shape[1] + border * 2
+
+        # Rotation and Scale
+        R = np.eye(3)
+        a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
+#         a += random.choice([-180, -90, 0, 90])  # 90deg rotations added to small rotations
+        s = random.random() * (scale[1] - scale[0]) + scale[0]
+        R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
+
+        # Translation
+        T = np.eye(3)
+        T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border  # x translation (pixels)
+        T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border  # y translation (pixels)
+
+        # Shear
+        S = np.eye(3)
+        S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # x shear (deg)
+        S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180)  # y shear (deg)
+
+        M = S @ T @ R  # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
+        imw = cv2.warpPerspective(img, M, dsize=(width, height), flags=cv2.INTER_LINEAR,
+                                  borderValue=borderValue)  # BGR order borderValue
+
+        # Return warped points also
+        if len(targets) > 0:
+            n = targets.shape[0]
+            points = targets[:, 0:4].copy()
+            area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])
+
+            # warp points
+            xy = np.ones((n * 4, 3))
+            xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+            xy = (xy @ M.T)[:, :2].reshape(n, 8)
+
+            # create new boxes
+            x = xy[:, [0, 2, 4, 6]]
+            y = xy[:, [1, 3, 5, 7]]
+            xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+
+            # # apply angle-based reduction of bounding boxes
+            # radians = a * math.pi / 180
+            # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
+            # x = (xy[:, 2] + xy[:, 0]) / 2
+            # y = (xy[:, 3] + xy[:, 1]) / 2
+            # w = (xy[:, 2] - xy[:, 0]) * reduction
+            # h = (xy[:, 3] - xy[:, 1]) * reduction
+            # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
+
+            # reject warped points outside of image
+            xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
+            xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
+            w = xy[:, 2] - xy[:, 0]
+            h = xy[:, 3] - xy[:, 1]
+            area = w * h
+            ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
+            i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
+
+            targets = targets[i]
+            targets[:, 0:4] = xy[i]
+        return imw, targets
 
     def bbox_iou(self, boxes1, boxes2):
 
@@ -200,7 +297,6 @@ def preprocess_true_boxes(self, bboxes):
         for bbox in bboxes:
             bbox_coor = bbox[:4]
             bbox_class_ind = bbox[4]
-
             onehot = np.zeros(self.num_classes, dtype=np.float)
             onehot[bbox_class_ind] = 1.0
             uniform_distribution = np.full(self.num_classes, 1.0 / self.num_classes)

diff --git a/data/anchors/rdt_anchors.txt b/data/anchors/rdt_anchors.txt
@@ -0,0 +1 @@
+15.9,7.6, 7.4,17.0, 22.3,10.4, 29.1,12.8, 38.8,16.2, 9.8,130.3, 124.4,11.2, 182.7,14.6, 235.8,20.9
diff --git a/data/anchors/rdt_anchors_rot.txt b/data/anchors/rdt_anchors_rot.txt
@@ -0,0 +1 @@
+140,60, 120,77, 138,138, 148,107, 173,123, 126,95, 160,160, 120,120, 112,58
diff --git a/data/classes/rdt.names b/data/classes/rdt.names
@@ -0,0 +1,4 @@
+top_pattern
+test_area
+arrow
+rdt
diff --git a/data/classes/rdt_rot.names b/data/classes/rdt_rot.names
@@ -0,0 +1,30 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
diff --git a/data/dataset/rdt_test.txt b/data/dataset/rdt_test.txt
@@ -0,0 +1,2 @@
+D:/source/repos/audere/dataset/images_yolo/ef547ec0.jpg 548.816,1284.896,807.080,1407.322,0.000 1501.450,1293.965,1810.689,1416.391,1.000 1899.045,1301.899,2185.631,1403.920,2.000 216.919,1281.496,2380.461,1445.862,3.000
+D:/source/repos/audere/dataset/images_yolo/0a506c22.jpg 88.566,627.493,427.327,791.283,0.000 1237.693,639.426,1574.241,775.564,1.000 1656.164,623.068,1937.357,764.724,2.000 -0.001,612.000,2090.132,773.577,3.000
Original file line number	Diff line number	Diff line change
		@@ -1,2 +0,0 @@
		model_checkpoint_path: "yolov3_test_loss=2530.1914.ckpt-1"
		all_model_checkpoint_paths: "yolov3_test_loss=2530.1914.ckpt-1"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		15.9,7.6, 7.4,17.0, 22.3,10.4, 29.1,12.8, 38.8,16.2, 9.8,130.3, 124.4,11.2, 182.7,14.6, 235.8,20.9
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		140,60, 120,77, 138,138, 148,107, 173,123, 126,95, 160,160, 120,120, 112,58
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,30 @@
		0
		1
		2
		3
		4
		5
		6
		7
		8
		9
		10
		11
		12
		13
		14
		15
		16
		17
		18
		19
		20
		21
		22
		23
		24
		25
		26
		27
		28
		29
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		D:/source/repos/audere/dataset/images_yolo/ef547ec0.jpg 548.816,1284.896,807.080,1407.322,0.000 1501.450,1293.965,1810.689,1416.391,1.000 1899.045,1301.899,2185.631,1403.920,2.000 216.919,1281.496,2380.461,1445.862,3.000
		D:/source/repos/audere/dataset/images_yolo/0a506c22.jpg 88.566,627.493,427.327,791.283,0.000 1237.693,639.426,1574.241,775.564,1.000 1656.164,623.068,1937.357,764.724,2.000 -0.001,612.000,2090.132,773.577,3.000