Skip to content

Commit

Permalink
resnet_yolo?
Browse files Browse the repository at this point in the history
  • Loading branch information
xiongzihua committed Jul 3, 2018
1 parent 5bc2fff commit cfdb6dd
Show file tree
Hide file tree
Showing 12 changed files with 27,537 additions and 22,279 deletions.
38 changes: 29 additions & 9 deletions dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import torchvision.transforms as transforms

import cv2
import matplotlib.pyplot as plt

class yoloDataset(data.Dataset):
image_size = 448
Expand Down Expand Up @@ -73,7 +74,19 @@ def __getitem__(self,idx):
img = self.RandomHue(img)
img = self.RandomSaturation(img)
img,boxes,labels = self.randomShift(img,boxes,labels)

img,boxes,labels = self.randomCrop(img,boxes,labels)
# #debug
# box_show = boxes.numpy().reshape(-1)
# print(box_show)
# img_show = self.BGR2RGB(img)
# pt1=(int(box_show[0]),int(box_show[1])); pt2=(int(box_show[2]),int(box_show[3]))
# cv2.rectangle(img_show,pt1=pt1,pt2=pt2,color=(0,255,0),thickness=1)
# plt.figure()

# # cv2.rectangle(img,pt1=(10,10),pt2=(100,100),color=(0,255,0),thickness=1)
# plt.imshow(img_show)
# plt.show()
# #debug
h,w,_ = img.shape
boxes /= torch.Tensor([w,h,w,h]).expand_as(boxes)
img = self.BGR2RGB(img) #because pytorch pretrained model use RGB
Expand All @@ -93,8 +106,9 @@ def encoder(self,boxes,labels):
labels (tensor) [...]
return 7x7x30
'''
target = torch.zeros((7,7,30))
cell_size = 1./7
grid_num = 14
target = torch.zeros((grid_num,grid_num,30))
cell_size = 1./grid_num
wh = boxes[:,2:]-boxes[:,:2]
cxcy = (boxes[:,2:]+boxes[:,:2])/2
for i in range(cxcy.size()[0]):
Expand Down Expand Up @@ -188,9 +202,9 @@ def randomShift(self,bgr,boxes,labels):
return bgr,boxes,labels

def randomScale(self,bgr,boxes):
#固定住高度,以0.6-1.4伸缩宽度,做图像形变
#固定住高度,以0.8-1.2伸缩宽度,做图像形变
if random.random() < 0.5:
scale = random.uniform(0.6,1.4)
scale = random.uniform(0.8,1.2)
height,width,c = bgr.shape
bgr = cv2.resize(bgr,(int(width*scale),height))
scale_tensor = torch.FloatTensor([[scale,1,scale,1]]).expand_as(boxes)
Expand Down Expand Up @@ -219,6 +233,11 @@ def randomCrop(self,bgr,boxes,labels):
box_shift = torch.FloatTensor([[x,y,x,y]]).expand_as(boxes_in)

boxes_in = boxes_in - box_shift
boxes_in[:,0]=boxes_in[:,0].clamp_(min=0,max=w)
boxes_in[:,2]=boxes_in[:,2].clamp_(min=0,max=w)
boxes_in[:,1]=boxes_in[:,1].clamp_(min=0,max=h)
boxes_in[:,3]=boxes_in[:,3].clamp_(min=0,max=h)

labels_in = labels[mask.view(-1)]
img_croped = bgr[y:y+h,x:x+w,:]
return img_croped,boxes_in,labels_in
Expand Down Expand Up @@ -252,12 +271,13 @@ def random_bright(self, im, delta=16):
def main():
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
file_root = '/media/xiong/449C8E929C8E7DE4/codedata/voc2007/VOCdevkit_train/VOC2007/JPEGImages/'
train_dataset = yoloDataset(root=file_root,list_file='voc2007train.txt',train=True,transform = [transforms.ToTensor()] )
file_root = '/home/xzh/data/VOCdevkit/VOC2012/allimgs/'
train_dataset = yoloDataset(root=file_root,list_file='voc12_trainval.txt',train=True,transform = [transforms.ToTensor()] )
train_loader = DataLoader(train_dataset,batch_size=1,shuffle=False,num_workers=0)
train_iter = iter(train_loader)
img,target = next(train_iter)
print(img,target)
for i in range(100):
img,target = next(train_iter)
print(img,target)


if __name__ == '__main__':
Expand Down
Binary file added dog.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
34 changes: 11 additions & 23 deletions eval_voc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#created by xiongzihua
#
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import numpy as np
VOC_CLASSES = ( # always index 0
'aeroplane', 'bicycle', 'bird', 'boat',
Expand Down Expand Up @@ -142,48 +142,33 @@ def test_eval():
preds = defaultdict(list)
image_list = [] #image path list

# f = open('voc2007test.txt')
f = open('voc07_test.txt')
f = open('voc2007test.txt')
lines = f.readlines()
file_list = []
for line in lines:
splited = line.strip().split()
file_list.append(splited)
f.close()

f_diff = open('voc07_test_difficult.txt')
lines = f_diff.readlines()
difficult_list = []
for line in lines:
splited = line.strip().split()
# print(splited)
difficult_list.append(splited)
f_diff.close()
print('---prepare target---')
for index,image_file in enumerate(file_list):
image_diff = difficult_list[index]
image_id = image_file[0]
assert image_id == image_diff[0]

image_list.append(image_id)
num_obj = (len(image_file) - 1) // 5
for i in range(num_obj):
difficult = image_diff[i+1]
x1 = int(image_file[1+5*i])
y1 = int(image_file[2+5*i])
x2 = int(image_file[3+5*i])
y2 = int(image_file[4+5*i])
c = int(image_file[5+5*i])
class_name = VOC_CLASSES[c]
if difficult=='1':
continue
else:
target[(image_id,class_name)].append([x1,y1,x2,y2])
target[(image_id,class_name)].append([x1,y1,x2,y2])
#
#start test
#
print('---start test---')
model = vgg16_bn(pretrained=False)
# model = vgg16_bn(pretrained=False)
model = resnet50()
# model.classifier = nn.Sequential(
# nn.Linear(512 * 7 * 7, 4096),
# nn.ReLU(True),
Expand All @@ -206,12 +191,15 @@ def test_eval():
# for left_up,right_bottom,class_name,_,prob in result:
# color = Color[VOC_CLASSES.index(class_name)]
# cv2.rectangle(image,left_up,right_bottom,color,2)
# cv2.putText(image,class_name+str(round(prob,2)),left_up,cv2.FONT_HERSHEY_SIMPLEX,0.6,(0,0,0),1,cv2.LINE_AA)
# #print(prob)
# label = class_name+str(round(prob,2))
# text_size, baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1)
# p1 = (left_up[0], left_up[1]- text_size[1])
# cv2.rectangle(image, (p1[0] - 2//2, p1[1] - 2 - baseline), (p1[0] + text_size[0], p1[1] + text_size[1]), color, -1)
# cv2.putText(image, label, (p1[0], p1[1] + baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255,255,255), 1, 8)

# cv2.imwrite('testimg/'+image_path,image)
# count += 1
# if count == 300:
# if count == 100:
# break

print('---start evaluate---')
Expand Down
170 changes: 50 additions & 120 deletions log.txt
Original file line number Diff line number Diff line change
@@ -1,120 +1,50 @@
0 6.5415048876116355
1 6.100706057394705
2 5.776972419984879
3 5.554384683793591
4 5.3748845561858145
5 5.226670012935515
6 5.129751951463761
7 5.028328594084709
8 4.927295440243136
9 4.900093263195407
10 4.782809845093758
11 4.747922060566564
12 4.7022608941601165
13 4.593218278884888
14 4.5595668500469575
15 4.5256145277330955
16 4.487990423940843
17 4.437278278412357
18 4.405658186635663
19 4.345585892277379
20 4.351668170190627
21 4.281654350219235
22 4.271116087513585
23 4.218694491540232
24 4.217058497090494
25 4.18591207842673
26 4.1442133134411225
27 4.136817063054731
28 4.138740881027714
29 4.141887569427491
30 4.0587586833584695
31 4.073217805739372
32 4.03728237613555
33 3.9956122844449937
34 3.9974620434545702
35 4.023998829626268
36 4.003252243226575
37 3.96282632120194
38 3.972557355511573
39 3.9570388470926594
40 3.9371616824980706
41 3.9387538525366015
42 3.9152787362375565
43 3.9202156805223036
44 3.9222787534036945
45 3.9003318632802655
46 3.8846307400734195
47 3.8660433077043104
48 3.8885516658906014
49 3.8739484986951274
50 3.8828188911561043
51 3.879196426945348
52 3.8579012778497512
53 3.8586445162373204
54 3.8485875283518145
55 3.9004847126622355
56 3.856565999984741
57 3.8792484175774358
58 3.8369168358464396
59 3.8326780919105774
60 3.8230616754101168
61 3.8407709090940414
62 3.863103226692446
63 3.8657945602170884
64 3.8421273939071163
65 3.7986517229387835
66 3.82103298864057
67 3.8075707174116564
68 3.822302801378312
69 3.8013139247894285
70 3.805841762788834
71 3.794689299983363
72 3.827355009509671
73 3.8235756274192565
74 3.8244812150155343
75 3.7931045101534937
76 3.8153305530548094
77 3.811681870491274
78 3.803573694536763
79 3.797684003460792
80 3.724775042072419
81 3.7086529916332616
82 3.69718047418902
83 3.7072122058560772
84 3.697758961492969
85 3.695369718151708
86 3.695834325205895
87 3.6924753996633712
88 3.6935014640131305
89 3.6894948267167615
90 3.684856927779413
91 3.6949271040578044
92 3.6909136433755196
93 3.684668857820572
94 3.6938365920897454
95 3.6776908213092434
96 3.68652562479819
97 3.6927355427895825
98 3.682749411367601
99 3.68042238989184
100 3.6780288465561406
101 3.6768690047725556
102 3.675544171948587
103 3.678370831089635
104 3.6778974602299352
105 3.67521142344321
106 3.6865312660894087
107 3.6777405446575533
108 3.680706839407644
109 3.6763052263567526
110 3.6846513532823133
111 3.6738818607022687
112 3.677423651756779
113 3.6801375196826074
114 3.67694855043965
115 3.684765911102295
116 3.6786774143095937
117 3.681097923555682
118 3.6744024461315523
119 3.679993252600393
0 5.797461136527684
1 5.030571444598949
2 4.556520286965485
3 4.215520957817778
4 4.010316675987797
5 3.880362969089821
6 3.816462703373121
7 3.692494175860271
8 3.6367522645111823
9 3.653451669619279
10 3.6062179107021017
11 3.4957861629661156
12 3.479828889819159
13 3.4403432498231603
14 3.569326110508131
15 3.4177859134720143
16 3.4005619149277178
17 3.354262824219782
18 3.3333750905622033
19 3.330571729203929
20 3.330898431764133
21 3.345849901005842
22 3.2702495539245975
23 3.286139108132625
24 3.3057921491383353
25 3.2670130712398584
26 3.2556667172390483
27 3.281399481538413
28 3.2334959725826833
29 3.2227006891499395
30 3.122259281683659
31 3.1010478239704447
32 3.107929205548936
33 3.0857788859934048
34 3.0843215345760475
35 3.0651772505995156
36 3.0820578549795106
37 3.06958488277767
38 3.0720797805970417
39 3.0617620495782383
40 3.058031329210254
41 3.061978076391174
42 3.0600602747737495
43 3.0567766186119854
44 3.0753259716402503
45 3.0577328608231844
46 3.0421694407716466
47 3.053754838768411
48 3.0488021085803636
49 3.049299760717125
Binary file added person.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit cfdb6dd

Please sign in to comment.