resnet_yolo?

abeardear · Jul 3, 2018 · cfdb6dd · cfdb6dd
1 parent 5bc2fff
commit cfdb6dd
Show file tree

Hide file tree

Showing 12 changed files with 27,537 additions and 22,279 deletions.
diff --git a/dataset.py b/dataset.py
@@ -17,6 +17,7 @@
 import torchvision.transforms as transforms
 
 import cv2
+import matplotlib.pyplot as plt
 
 class yoloDataset(data.Dataset):
     image_size = 448
@@ -73,7 +74,19 @@ def __getitem__(self,idx):
             img = self.RandomHue(img)
             img = self.RandomSaturation(img)
             img,boxes,labels = self.randomShift(img,boxes,labels)
-
+            img,boxes,labels = self.randomCrop(img,boxes,labels)
+        # #debug
+        # box_show = boxes.numpy().reshape(-1)
+        # print(box_show)
+        # img_show = self.BGR2RGB(img)
+        # pt1=(int(box_show[0]),int(box_show[1])); pt2=(int(box_show[2]),int(box_show[3]))
+        # cv2.rectangle(img_show,pt1=pt1,pt2=pt2,color=(0,255,0),thickness=1)
+        # plt.figure()
+
+        # # cv2.rectangle(img,pt1=(10,10),pt2=(100,100),color=(0,255,0),thickness=1)
+        # plt.imshow(img_show)
+        # plt.show()
+        # #debug
         h,w,_ = img.shape
         boxes /= torch.Tensor([w,h,w,h]).expand_as(boxes)
         img = self.BGR2RGB(img) #because pytorch pretrained model use RGB
@@ -93,8 +106,9 @@ def encoder(self,boxes,labels):
         labels (tensor) [...]
         return 7x7x30
         '''
-        target = torch.zeros((7,7,30))
-        cell_size = 1./7
+        grid_num = 14
+        target = torch.zeros((grid_num,grid_num,30))
+        cell_size = 1./grid_num
         wh = boxes[:,2:]-boxes[:,:2]
         cxcy = (boxes[:,2:]+boxes[:,:2])/2
         for i in range(cxcy.size()[0]):
@@ -188,9 +202,9 @@ def randomShift(self,bgr,boxes,labels):
         return bgr,boxes,labels
 
     def randomScale(self,bgr,boxes):
-        #固定住高度，以0.6-1.4伸缩宽度，做图像形变
+        #固定住高度，以0.8-1.2伸缩宽度，做图像形变
         if random.random() < 0.5:
-            scale = random.uniform(0.6,1.4)
+            scale = random.uniform(0.8,1.2)
             height,width,c = bgr.shape
             bgr = cv2.resize(bgr,(int(width*scale),height))
             scale_tensor = torch.FloatTensor([[scale,1,scale,1]]).expand_as(boxes)
@@ -219,6 +233,11 @@ def randomCrop(self,bgr,boxes,labels):
             box_shift = torch.FloatTensor([[x,y,x,y]]).expand_as(boxes_in)
 
             boxes_in = boxes_in - box_shift
+            boxes_in[:,0]=boxes_in[:,0].clamp_(min=0,max=w)
+            boxes_in[:,2]=boxes_in[:,2].clamp_(min=0,max=w)
+            boxes_in[:,1]=boxes_in[:,1].clamp_(min=0,max=h)
+            boxes_in[:,3]=boxes_in[:,3].clamp_(min=0,max=h)
+
             labels_in = labels[mask.view(-1)]
             img_croped = bgr[y:y+h,x:x+w,:]
             return img_croped,boxes_in,labels_in
@@ -252,12 +271,13 @@ def random_bright(self, im, delta=16):
 def main():
     from torch.utils.data import DataLoader
     import torchvision.transforms as transforms
-    file_root = '/media/xiong/449C8E929C8E7DE4/codedata/voc2007/VOCdevkit_train/VOC2007/JPEGImages/'
-    train_dataset = yoloDataset(root=file_root,list_file='voc2007train.txt',train=True,transform = [transforms.ToTensor()] )
+    file_root = '/home/xzh/data/VOCdevkit/VOC2012/allimgs/'
+    train_dataset = yoloDataset(root=file_root,list_file='voc12_trainval.txt',train=True,transform = [transforms.ToTensor()] )
     train_loader = DataLoader(train_dataset,batch_size=1,shuffle=False,num_workers=0)
     train_iter = iter(train_loader)
-    img,target = next(train_iter)
-    print(img,target)
+    for i in range(100):
+        img,target = next(train_iter)
+        print(img,target)
 
 
 if __name__ == '__main__':

diff --git a/dog.jpg b/dog.jpg
diff --git a/eval_voc.py b/eval_voc.py
@@ -3,7 +3,7 @@
 #created by xiongzihua
 #
 import os
-os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+os.environ["CUDA_VISIBLE_DEVICES"] = "1"
 import numpy as np
 VOC_CLASSES = (    # always index 0
     'aeroplane', 'bicycle', 'bird', 'boat',
@@ -142,48 +142,33 @@ def test_eval():
     preds = defaultdict(list)
     image_list = [] #image path list
 
-    # f = open('voc2007test.txt')
-    f = open('voc07_test.txt')
+    f = open('voc2007test.txt')
     lines = f.readlines()
     file_list = []
     for line in lines:
         splited = line.strip().split()
         file_list.append(splited)
     f.close()
-
-    f_diff = open('voc07_test_difficult.txt')
-    lines = f_diff.readlines()
-    difficult_list = []
-    for line in lines:
-        splited = line.strip().split()
-        # print(splited)
-        difficult_list.append(splited)
-    f_diff.close()
     print('---prepare target---')
     for index,image_file in enumerate(file_list):
-        image_diff = difficult_list[index]
         image_id = image_file[0]
-        assert image_id == image_diff[0]
 
         image_list.append(image_id)
         num_obj = (len(image_file) - 1) // 5
         for i in range(num_obj):
-            difficult = image_diff[i+1]
             x1 = int(image_file[1+5*i])
             y1 = int(image_file[2+5*i])
             x2 = int(image_file[3+5*i])
             y2 = int(image_file[4+5*i])
             c = int(image_file[5+5*i])
             class_name = VOC_CLASSES[c]
-            if difficult=='1':
-                continue
-            else:
-                target[(image_id,class_name)].append([x1,y1,x2,y2])
+            target[(image_id,class_name)].append([x1,y1,x2,y2])
     #
     #start test
     #
     print('---start test---')
-    model = vgg16_bn(pretrained=False)
+    # model = vgg16_bn(pretrained=False)
+    model = resnet50()
     # model.classifier = nn.Sequential(
     #             nn.Linear(512 * 7 * 7, 4096),
     #             nn.ReLU(True),
@@ -206,12 +191,15 @@ def test_eval():
         # for left_up,right_bottom,class_name,_,prob in result:
         #     color = Color[VOC_CLASSES.index(class_name)]
         #     cv2.rectangle(image,left_up,right_bottom,color,2)
-        #     cv2.putText(image,class_name+str(round(prob,2)),left_up,cv2.FONT_HERSHEY_SIMPLEX,0.6,(0,0,0),1,cv2.LINE_AA)
-        #     #print(prob)
+        #     label = class_name+str(round(prob,2))
+        #     text_size, baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1)
+        #     p1 = (left_up[0], left_up[1]- text_size[1])
+        #     cv2.rectangle(image, (p1[0] - 2//2, p1[1] - 2 - baseline), (p1[0] + text_size[0], p1[1] + text_size[1]), color, -1)
+        #     cv2.putText(image, label, (p1[0], p1[1] + baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255,255,255), 1, 8)
 
         # cv2.imwrite('testimg/'+image_path,image)
         # count += 1
-        # if count == 300:
+        # if count == 100:
         #     break
 
     print('---start evaluate---')

diff --git a/log.txt b/log.txt
@@ -1,120 +1,50 @@
-0	6.5415048876116355
-1	6.100706057394705
-2	5.776972419984879
-3	5.554384683793591
-4	5.3748845561858145
-5	5.226670012935515
-6	5.129751951463761
-7	5.028328594084709
-8	4.927295440243136
-9	4.900093263195407
-10	4.782809845093758
-11	4.747922060566564
-12	4.7022608941601165
-13	4.593218278884888
-14	4.5595668500469575
-15	4.5256145277330955
-16	4.487990423940843
-17	4.437278278412357
-18	4.405658186635663
-19	4.345585892277379
-20	4.351668170190627
-21	4.281654350219235
-22	4.271116087513585
-23	4.218694491540232
-24	4.217058497090494
-25	4.18591207842673
-26	4.1442133134411225
-27	4.136817063054731
-28	4.138740881027714
-29	4.141887569427491
-30	4.0587586833584695
-31	4.073217805739372
-32	4.03728237613555
-33	3.9956122844449937
-34	3.9974620434545702
-35	4.023998829626268
-36	4.003252243226575
-37	3.96282632120194
-38	3.972557355511573
-39	3.9570388470926594
-40	3.9371616824980706
-41	3.9387538525366015
-42	3.9152787362375565
-43	3.9202156805223036
-44	3.9222787534036945
-45	3.9003318632802655
-46	3.8846307400734195
-47	3.8660433077043104
-48	3.8885516658906014
-49	3.8739484986951274
-50	3.8828188911561043
-51	3.879196426945348
-52	3.8579012778497512
-53	3.8586445162373204
-54	3.8485875283518145
-55	3.9004847126622355
-56	3.856565999984741
-57	3.8792484175774358
-58	3.8369168358464396
-59	3.8326780919105774
-60	3.8230616754101168
-61	3.8407709090940414
-62	3.863103226692446
-63	3.8657945602170884
-64	3.8421273939071163
-65	3.7986517229387835
-66	3.82103298864057
-67	3.8075707174116564
-68	3.822302801378312
-69	3.8013139247894285
-70	3.805841762788834
-71	3.794689299983363
-72	3.827355009509671
-73	3.8235756274192565
-74	3.8244812150155343
-75	3.7931045101534937
-76	3.8153305530548094
-77	3.811681870491274
-78	3.803573694536763
-79	3.797684003460792
-80	3.724775042072419
-81	3.7086529916332616
-82	3.69718047418902
-83	3.7072122058560772
-84	3.697758961492969
-85	3.695369718151708
-86	3.695834325205895
-87	3.6924753996633712
-88	3.6935014640131305
-89	3.6894948267167615
-90	3.684856927779413
-91	3.6949271040578044
-92	3.6909136433755196
-93	3.684668857820572
-94	3.6938365920897454
-95	3.6776908213092434
-96	3.68652562479819
-97	3.6927355427895825
-98	3.682749411367601
-99	3.68042238989184
-100	3.6780288465561406
-101	3.6768690047725556
-102	3.675544171948587
-103	3.678370831089635
-104	3.6778974602299352
-105	3.67521142344321
-106	3.6865312660894087
-107	3.6777405446575533
-108	3.680706839407644
-109	3.6763052263567526
-110	3.6846513532823133
-111	3.6738818607022687
-112	3.677423651756779
-113	3.6801375196826074
-114	3.67694855043965
-115	3.684765911102295
-116	3.6786774143095937
-117	3.681097923555682
-118	3.6744024461315523
-119	3.679993252600393
+0	5.797461136527684
+1	5.030571444598949
+2	4.556520286965485
+3	4.215520957817778
+4	4.010316675987797
+5	3.880362969089821
+6	3.816462703373121
+7	3.692494175860271
+8	3.6367522645111823
+9	3.653451669619279
+10	3.6062179107021017
+11	3.4957861629661156
+12	3.479828889819159
+13	3.4403432498231603
+14	3.569326110508131
+15	3.4177859134720143
+16	3.4005619149277178
+17	3.354262824219782
+18	3.3333750905622033
+19	3.330571729203929
+20	3.330898431764133
+21	3.345849901005842
+22	3.2702495539245975
+23	3.286139108132625
+24	3.3057921491383353
+25	3.2670130712398584
+26	3.2556667172390483
+27	3.281399481538413
+28	3.2334959725826833
+29	3.2227006891499395
+30	3.122259281683659
+31	3.1010478239704447
+32	3.107929205548936
+33	3.0857788859934048
+34	3.0843215345760475
+35	3.0651772505995156
+36	3.0820578549795106
+37	3.06958488277767
+38	3.0720797805970417
+39	3.0617620495782383
+40	3.058031329210254
+41	3.061978076391174
+42	3.0600602747737495
+43	3.0567766186119854
+44	3.0753259716402503
+45	3.0577328608231844
+46	3.0421694407716466
+47	3.053754838768411
+48	3.0488021085803636
+49	3.049299760717125
diff --git a/person.jpg b/person.jpg