Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
# [TensorFlow2.0-Examples/4-Object_Detection/YOLOV3](https://github.com/YunYang1994/TensorFlow2.0-Examples/tree/master/4-Object_Detection/YOLOV3)

## Please install tensorflow-gpu 1.11.0 ! Since Tensorflow is fucking ridiculous !

## part 1. Introduction [[代码剖析]](https://github.com/YunYang1994/CodeFun/blob/master/002-deep_learning/YOLOv3.md)
## part 1. Introduction

Implementation of YOLO v3 object detector in Tensorflow. The full details are in [this paper](https://pjreddie.com/media/files/papers/YOLOv3.pdf). In this project we cover several segments as follows:<br>
- [x] [YOLO v3 architecture](https://github.com/YunYang1994/tensorflow-yolov3/blob/master/core/yolov3.py)
Expand Down
2 changes: 0 additions & 2 deletions checkpoint/checkpoint
Original file line number Diff line number Diff line change
@@ -1,2 +0,0 @@
model_checkpoint_path: "yolov3_test_loss=2530.1914.ckpt-1"
all_model_checkpoint_paths: "yolov3_test_loss=2530.1914.ckpt-1"
54 changes: 33 additions & 21 deletions core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
# File name : config.py
# Author : YunYang1994
# Created date: 2019-02-28 13:06:54
# Description :
# Description : configurations
#
#================================================================

from easydict import EasyDict as edict


import os
import settings
settings.RDT_GIT_ROOT
YOLO_MODEL_VER=1
__C = edict()
# Consumers can get config by: from config import cfg

Expand All @@ -23,46 +25,56 @@
__C.YOLO = edict()

# Set the class name
__C.YOLO.CLASSES = "./data/classes/coco.names"
__C.YOLO.ANCHORS = "./data/anchors/basline_anchors.txt"
if YOLO_MODEL_VER==1:
__C.YOLO.CLASSES = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/data/classes/rdt.names"
__C.YOLO.ANCHORS = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/data/anchors/rdt_anchors.txt"
elif YOLO_MODEL_VER>=2:
__C.YOLO.CLASSES = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/data/classes/rdt_rot.names"
__C.YOLO.ANCHORS = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/data/anchors/rdt_anchors_rot.txt"

__C.YOLO.MOVING_AVE_DECAY = 0.9995
__C.YOLO.STRIDES = [8, 16, 32]
__C.YOLO.ANCHOR_PER_SCALE = 3
__C.YOLO.IOU_LOSS_THRESH = 0.5
__C.YOLO.IOU_LOSS_THRESH = 0.7
__C.YOLO.UPSAMPLE_METHOD = "resize"
__C.YOLO.ORIGINAL_WEIGHT = "./checkpoint/yolov3_coco.ckpt"
__C.YOLO.DEMO_WEIGHT = "./checkpoint/yolov3_coco_demo.ckpt"
__C.YOLO.ORIGINAL_WEIGHT = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/checkpoint/yolov3_coco.ckpt"
__C.YOLO.DEMO_WEIGHT = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/checkpoint/yolov3_coco_demo.ckpt"

# Train options
__C.TRAIN = edict()

__C.TRAIN.ANNOT_PATH = "./data/dataset/voc_train.txt"
__C.TRAIN.BATCH_SIZE = 6
__C.TRAIN.ANNOT_PATH = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/data/dataset/rdt_train.txt"
__C.TRAIN.BATCH_SIZE = 1
__C.TRAIN.INPUT_SIZE = [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
__C.TRAIN.DATA_AUG = True
__C.TRAIN.LEARN_RATE_INIT = 1e-4
__C.TRAIN.LEARN_RATE_END = 1e-6
__C.TRAIN.LEARN_RATE_INIT = 1e-8
__C.TRAIN.LEARN_RATE_END = 1e-10
__C.TRAIN.WARMUP_EPOCHS = 2
__C.TRAIN.FISRT_STAGE_EPOCHS = 20
__C.TRAIN.SECOND_STAGE_EPOCHS = 30
__C.TRAIN.INITIAL_WEIGHT = "./checkpoint/yolov3_coco_demo.ckpt"
__C.TRAIN.FISRT_STAGE_EPOCHS = 1
__C.TRAIN.SECOND_STAGE_EPOCHS = 10
__C.TRAIN.INITIAL_WEIGHT = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/checkpoint/rdt_model"



# TEST options
__C.TEST = edict()

__C.TEST.ANNOT_PATH = "./data/dataset/voc_test.txt"
__C.TEST.ANNOT_PATH = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/data/dataset/rdt_train.txt"
__C.TEST.BATCH_SIZE = 2
__C.TEST.INPUT_SIZE = 544
__C.TEST.INPUT_SIZE = 512
__C.TEST.DATA_AUG = False
__C.TEST.WRITE_IMAGE = True
__C.TEST.WRITE_IMAGE_PATH = "./data/detection/"
__C.TEST.WRITE_IMAGE_PATH = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/data/detection"
__C.TEST.WRITE_IMAGE_SHOW_LABEL = True
__C.TEST.WEIGHT_FILE = "./checkpoint/yolov3_test_loss=9.2099.ckpt-5"
__C.TEST.WEIGHT_FILE = settings.RDT_GIT_ROOT+"/tensorflow-yolov3/checkpoint/rdt_model"
__C.TEST.SHOW_LABEL = True
__C.TEST.SCORE_THRESHOLD = 0.3
__C.TEST.IOU_THRESHOLD = 0.45
__C.TEST.SCORE_THRESHOLD = 0.1
__C.TEST.IOU_THRESHOLD = 0.1








Expand Down
108 changes: 102 additions & 6 deletions core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import tensorflow as tf
import core.utils as utils
from core.config import cfg

import math


class Dataset(object):
Expand Down Expand Up @@ -156,17 +156,114 @@ def parse_annotation(self, annotation):
line = annotation.split()
image_path = line[0]
if not os.path.exists(image_path):
raise KeyError("%s does not exist ... " %image_path)
image_path = image_path.replace("jpg","jpeg")
image = np.array(cv2.imread(image_path))
bboxes = np.array([list(map(int, box.split(','))) for box in line[1:]])
bboxes = np.array([list(map(float, box.split(','))) for box in line[1:]],dtype=int)

if self.data_aug:
image, bboxes = self.random_horizontal_flip(np.copy(image), np.copy(bboxes))
image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes))
image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes))

fraction = 0.20 # must be < 1.0
img_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) # hue, sat, val
S = img_hsv[:, :, 1].astype(np.float32) # saturation
V = img_hsv[:, :, 2].astype(np.float32) # value

a = (random.random() * 2 - 1) * fraction + 1
b = (random.random() * 2 - 1) * fraction + 1
S *= a
V *= b

img_hsv[:, :, 1] = S if a < 1 else S.clip(None, 255)
img_hsv[:, :, 2] = V if b < 1 else V.clip(None, 255)
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=image)

image, bboxes = utils.image_preporcess(np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes))
return image, bboxes
# print(bboxes)
# labels = bboxes.copy()
# labels[:, 0] = (bboxes[:, 0] - bboxes[:, 2] / 2)
# labels[:, 1] = (bboxes[:, 1] - bboxes[:, 3] / 2)
# labels[:, 2] = (bboxes[:, 0] + bboxes[:, 2] / 2)
# labels[:, 3] = (bboxes[:, 1] + bboxes[:, 3] / 2)

imw,labs = self.random_affine(image, bboxes, degrees=(-180, 180), translate=(0.10, 0.10), scale=(0.80, 1.20))
# print(labs)

# print(labels)

return imw, labs



def random_affine(self,img, targets=(), degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),
borderValue=(127.5, 127.5, 127.5)):
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4

if targets is None:
targets = []
border = 0 # width of added border (optional)
height = img.shape[0] + border * 2
width = img.shape[1] + border * 2

# Rotation and Scale
R = np.eye(3)
a = random.random() * (degrees[1] - degrees[0]) + degrees[0]
# a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations
s = random.random() * (scale[1] - scale[0]) + scale[0]
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)

# Translation
T = np.eye(3)
T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border # x translation (pixels)
T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border # y translation (pixels)

# Shear
S = np.eye(3)
S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # x shear (deg)
S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # y shear (deg)

M = S @ T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
imw = cv2.warpPerspective(img, M, dsize=(width, height), flags=cv2.INTER_LINEAR,
borderValue=borderValue) # BGR order borderValue

# Return warped points also
if len(targets) > 0:
n = targets.shape[0]
points = targets[:, 0:4].copy()
area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])

# warp points
xy = np.ones((n * 4, 3))
xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
xy = (xy @ M.T)[:, :2].reshape(n, 8)

# create new boxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T

# # apply angle-based reduction of bounding boxes
# radians = a * math.pi / 180
# reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
# x = (xy[:, 2] + xy[:, 0]) / 2
# y = (xy[:, 3] + xy[:, 1]) / 2
# w = (xy[:, 2] - xy[:, 0]) * reduction
# h = (xy[:, 3] - xy[:, 1]) * reduction
# xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T

# reject warped points outside of image
xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
w = xy[:, 2] - xy[:, 0]
h = xy[:, 3] - xy[:, 1]
area = w * h
ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)

targets = targets[i]
targets[:, 0:4] = xy[i]
return imw, targets

def bbox_iou(self, boxes1, boxes2):

Expand Down Expand Up @@ -200,7 +297,6 @@ def preprocess_true_boxes(self, bboxes):
for bbox in bboxes:
bbox_coor = bbox[:4]
bbox_class_ind = bbox[4]

onehot = np.zeros(self.num_classes, dtype=np.float)
onehot[bbox_class_ind] = 1.0
uniform_distribution = np.full(self.num_classes, 1.0 / self.num_classes)
Expand Down
1 change: 1 addition & 0 deletions data/anchors/rdt_anchors.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
15.9,7.6, 7.4,17.0, 22.3,10.4, 29.1,12.8, 38.8,16.2, 9.8,130.3, 124.4,11.2, 182.7,14.6, 235.8,20.9
1 change: 1 addition & 0 deletions data/anchors/rdt_anchors_rot.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
140,60, 120,77, 138,138, 148,107, 173,123, 126,95, 160,160, 120,120, 112,58
4 changes: 4 additions & 0 deletions data/classes/rdt.names
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
top_pattern
test_area
arrow
rdt
30 changes: 30 additions & 0 deletions data/classes/rdt_rot.names
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
2 changes: 2 additions & 0 deletions data/dataset/rdt_test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
D:/source/repos/audere/dataset/images_yolo/ef547ec0.jpg 548.816,1284.896,807.080,1407.322,0.000 1501.450,1293.965,1810.689,1416.391,1.000 1899.045,1301.899,2185.631,1403.920,2.000 216.919,1281.496,2380.461,1445.862,3.000
D:/source/repos/audere/dataset/images_yolo/0a506c22.jpg 88.566,627.493,427.327,791.283,0.000 1237.693,639.426,1574.241,775.564,1.000 1656.164,623.068,1937.357,764.724,2.000 -0.001,612.000,2090.132,773.577,3.000
Loading