2020-722-CV-YOLOV4 day3-yolo_v4

model.py

import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import torch.nn.functional as F
import math

class VGG(nn.Module):
    def __init__(self):
       super(VGG,self).__init__()
       # the vgg's layers
       #self.features = features
       cfg = [64,64,'M',128,128,'M',256,256,256,'M',512,512,512,'M',512,512,512,'M']
       layers= []
       batch_norm = False
       in_channels = 3
       for v in cfg:
           if v == 'M':
               layers += [nn.MaxPool2d(kernel_size=2,stride = 2)]
           else:
               conv2d = nn.Conv2d(in_channels,v,kernel_size=3,padding = 1)
               if batch_norm:
                   layers += [conv2d,nn.Batchnorm2d(v),nn.ReLU(inplace=True)]
               else:
                   layers += [conv2d,nn.ReLU(inplace=True)]
               in_channels = v
       # use the vgg layers to get the feature
       self.features = nn.Sequential(*layers)
       # 全局池化
       self.avgpool = nn.AdaptiveAvgPool2d((7,7))
       # 决策层：分类层
       self.classifier = nn.Sequential(
           nn.Linear(512*7*7,4096),
           nn.ReLU(True),
           nn.Dropout(),
           nn.Linear(4096,4096),
           nn.ReLU(True),
           nn.Dropout(),
           nn.Linear(4096,1000),
       )

       for m in self.modules():
           if isinstance(m,nn.Conv2d):
               nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
               if m.bias is not None: 
                   nn.init.constant_(m.bias,0)
           elif isinstance(m,nn.BatchNorm2d):
               nn.init.constant_(m.weight,1)
               nn.init.constant_(m.bias,1)
           elif isinstance(m,nn.Linear):
               nn.init.normal_(m.weight,0,0.01)
               nn.init.constant_(m.bias,0)

    def forward(self,x):
         x = self.features(x)
         x_fea = x
         x = self.avgpool(x)
         x_avg = x
         x = x.view(x.size(0),-1)
         x = self.classifier(x)
         return x,x_fea,x_avg
    def extractor(self,x):
         x = self.features(x)
         return x

class YOLOV1(nn.Module):
    def __init__(self):
       super(YOLOV1,self).__init__()
       vgg = VGG()
       self.extractor = vgg.extractor
       self.avgpool = nn.AdaptiveAvgPool2d((7,7))
       # 决策层：检测层
       self.detector = nn.Sequential(
          nn.Linear(512*7*7,4096),
          nn.ReLU(True),
          nn.Dropout(),
          nn.Linear(4096,1470),
       )
       for m in self.modules():
           if isinstance(m,nn.Conv2d):
               nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
               if m.bias is not None: 
                   nn.init.constant_(m.bias,0)
           elif isinstance(m,nn.BatchNorm2d):
               nn.init.constant_(m.weight,1)
               nn.init.constant_(m.bias,1)
           elif isinstance(m,nn.Linear):
               nn.init.normal_(m.weight,0,0.01)
               nn.init.constant_(m.bias,0)
    def forward(self,x):
        x = self.extractor(x)
        import pdb
        pdb.set_trace()
        x = self.avgpool(x)
        x = x.view(x.size(0),-1)
        x = self.detector(x)
        b,_ = x.shape
        x = x.view(b,7,7,30)
        return x
     
if __name__ == '__main__':
    vgg = VGG()
    x  = torch.randn(1,3,512,512)
    feature,x_fea,x_avg = vgg(x)
    print(feature.shape)
    print(x_fea.shape)
    print(x_avg.shape)
 
    yolov1 = YOLOV1()
    feature = yolov1(x)
    # feature_size b*7*7*30
    print(feature.shape)

model_yolov1_xiangxi.py

import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import torch.nn.functional as F
import math
import cv2




class VGG(nn.Module):
    def __init__(self):
       super(VGG,self).__init__()
       # the vgg's layers
       #self.features = features
       cfg = [64,64,'M',128,128,'M',256,256,256,'M',512,512,512,'M',512,512,512,'M']
       # use the vgg layers to get the feature
       #import pdb
       #pdb.set_trace()
       self.conv2d_1 = nn.Conv2d(3,64,3,1)
       self.bn_1 = nn.BatchNorm2d(64)
       self.relu_1 = nn.ReLU(True)
       
       self.conv2d_2 = nn.Conv2d(64,64,3,1)
       self.bn_2 = nn.BatchNorm2d(64)
       self.relu_2 = nn.ReLU(True)
       
       self.pool_2 = nn.MaxPool2d(2,2)
       self.conv2d_3 = nn.Conv2d(64,128,3,1)
       self.bn_3 = nn.BatchNorm2d(128)
       self.relu_3 = nn.ReLU(True)
       
       self.conv2d_4 = nn.Conv2d(128,128,3,1)
       self.bn_4 = nn.BatchNorm2d(128)
       self.relu_4 = nn.ReLU(True)
       
       self.pool_4 = nn.MaxPool2d(2,2)
       self.conv2d_5 = nn.Conv2d(128,256,3,1)
       self.bn_5 = nn.BatchNorm2d(256)
       self.relu_5 = nn.ReLU(True)
       
       self.conv2d_6 = nn.Conv2d(256,256,3,1)
       self.bn_6 = nn.BatchNorm2d(256)
       self.relu_6 = nn.ReLU(True)
       
       self.conv2d_7 = nn.Conv2d(256,256,3,1)
       self.bn_7 = nn.BatchNorm2d(256)
       self.relu_7 = nn.ReLU(True)
       
       self.pool_7 = nn.MaxPool2d(2,2)
       self.conv2d_8 = nn.Conv2d(256,512,3,1)
       self.bn_8 = nn.BatchNorm2d(512)
       self.relu_8 = nn.ReLU(True)
       
       self.conv2d_9 = nn.Conv2d(512,512,3,1)
       self.bn_9 = nn.BatchNorm2d(512)
       self.relu_9 = nn.ReLU(True)
       
       self.conv2d_10 = nn.Conv2d(512,512,3,1)
       self.bn_10 = nn.BatchNorm2d(512)
       self.relu_10 = nn.ReLU(True)
       
       self.pool_10 = nn.MaxPool2d(2,2)
       self.conv2d_11 = nn.Conv2d(512,512,3,1)
       self.bn_11 = nn.BatchNorm2d(512)
       self.relu_11 = nn.ReLU(True)
       
       self.conv2d_12 = nn.Conv2d(512,512,3,1)
       self.bn_12 = nn.BatchNorm2d(512)
       self.relu_12 = nn.ReLU(True)
       
       self.conv2d_13 = nn.Conv2d(512,512,3,1)
       self.bn_13 = nn.BatchNorm2d(512)
       self.relu_13 = nn.ReLU(True)
       
       self.pool_13 = nn.MaxPool2d(2,2)
       # 全局池化
       self.avgpool = nn.AdaptiveAvgPool2d((7,7))
       # 决策层：分类层
       self.classifier = nn.Sequential(
           nn.Linear(512*7*7,4096),
           nn.ReLU(True),
           nn.Dropout(),
           nn.Linear(4096,4096),
           nn.ReLU(True),
           nn.Dropout(),
           nn.Linear(4096,1000),
       )

       for m in self.modules():
           if isinstance(m,nn.Conv2d):
               nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
               if m.bias is not None: 
                   nn.init.constant_(m.bias,0)
           elif isinstance(m,nn.BatchNorm2d):
               nn.init.constant_(m.weight,1)
               nn.init.constant_(m.bias,1)
           elif isinstance(m,nn.Linear):
               nn.init.normal_(m.weight,0,0.01)
               nn.init.constant_(m.bias,0)

    def forward(self,x):
         x_records=[]
         #x = self.features(x)
         x = self.conv2d_1(x)
         x_records.append(x)
         x = self.bn_1(x)
         x = self.relu_1(x)
         x_records.append(x)
         x = self.conv2d_2(x)
         x = self.bn_2(x)
         x = self.relu_2(x)
         x_records.append(x)
         x = self.pool_2(x)
         x = self.conv2d_3(x)
         x = self.bn_3(x)
         x = self.relu_3(x)
         x_records.append(x)
         x = self.conv2d_4(x)
         x = self.bn_4(x)
         x = self.relu_4(x)
         x_records.append(x)

         x = self.pool_4(x)
         x = self.conv2d_5(x)
         x = self.bn_5(x)
         x = self.relu_5(x)
         x_records.append(x)
         x = self.conv2d_6(x)
         x = self.bn_6(x)
         x = self.relu_6(x)
         x_records.append(x)
         x = self.conv2d_7(x)
         x = self.bn_7(x)
         x = self.relu_7(x)
         x_records.append(x)
         x = self.pool_7(x)
         x = self.conv2d_8(x)
         x = self.bn_8(x)
         x = self.relu_8(x)
         x_records.append(x)
         x = self.conv2d_9(x)
         x = self.bn_9(x)
         x = self.relu_9(x)
         x_records.append(x)
         x = self.conv2d_10(x)
         x = self.bn_10(x)
         x = self.relu_10(x)
         x = self.pool_10(x)
         x = self.conv2d_11(x)
         x = self.bn_11(x)
         x = self.relu_11(x)
         x = self.conv2d_12(x)
         x = self.bn_12(x)
         x = self.relu_12(x)
         x = self.conv2d_13(x)
         x = self.bn_13(x)
         x = self.relu_13(x)
         x = self.pool_13(x)
         x_records.append(x)
         x = self.avgpool(x)
         x_records.append(x)
         x = x.view(x.size(0),-1)
         x = self.classifier(x)
         return x,x_records


if __name__ == '__main__':
    vgg = VGG()
    x  = torch.randn(1,3,512,512)
    #img=torch.tensor(cv2.imread("../../model_test.png"))
    img=torch.tensor(cv2.imread("../../model_test2.png"))
    w,h,c = img.shape
    x = img.view(1,w,h,c).permute(0,3,1,2).contiguous()
    x = x/255.
    import pdb
    pdb.set_trace()
    feature,x_records = vgg(x)
    print(feature.shape)
    from show_featuremap import show_featuremap
    for fea in x_records:
        print(fea.shape)
        show_featuremap(fea.detach())

PennFudanDataset_main.py

import os
import numpy as np
import torch
from PIL import Image


class PennFudanDataset(object):
    def __init__(self, root, transforms):
        self.root = root
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))

    def __getitem__(self, idx):
        # load images ad masks
        img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
        mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
        img = Image.open(img_path).convert("RGB")
        # note that we haven't converted the mask to RGB,
        # because each color corresponds to a different instance
        # with 0 being background
        mask = Image.open(mask_path)
        # convert the PIL Image into a numpy array
        mask = np.array(mask)
        # instances are encoded as different colors
        obj_ids = np.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]

        # split the color-encoded mask into a set
        # of binary masks
        masks = mask == obj_ids[:, None, None]

        # get bounding box coordinates for each mask
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)
        import transforms as T
import transforms as T
def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

show_featuremap.py

import cv2
import torch
def show_featuremap(feature):
    b,c,w,h = feature.shape
    feature_show=torch.zeros(b*w,c*h)
    for bi in range(0,b):
        for ci in range(0,c):
            #import pdb
            #pdb.set_trace()
            feature_show[bi*w:(bi+1)*w,ci*h:(ci+1)*h]=feature[bi,ci,:,:]
    print(feature_show.shape)
    cv2.imwrite("feature_bw_ch.png",255*feature_show.float().numpy())
    import os
    os.system("open feature_bw_ch.png")
    return feature_show

if __name__=="__main__":
    feature = torch.randn(1,10,100,100)
    feature_show = show_featuremap(feature)
    print(feature_show.shape)
    cv2.imwrite("feature_bw_ch.png",255*feature_show.float().numpy())
    import os
    os.system("open feature_bw_ch.png")

transforms.py

import random
import torch

from torchvision.transforms import functional as F


def _flip_coco_person_keypoints(kps, width):
    flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
    flipped_data = kps[:, flip_inds]
    flipped_data[..., 0] = width - flipped_data[..., 0]
    # Maintain COCO convention that if visibility == 0, then x, y = 0
    inds = flipped_data[..., 2] == 0
    flipped_data[inds] = 0
    return flipped_data


class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target


class RandomHorizontalFlip(object):
    def __init__(self, prob):
        self.prob = prob

    def __call__(self, image, target):
        if random.random() < self.prob:
            height, width = image.shape[-2:]
            image = image.flip(-1)
            bbox = target["boxes"]
            bbox[:, [0, 2]] = width - bbox[:, [2, 0]]
            target["boxes"] = bbox
            if "masks" in target:
                target["masks"] = target["masks"].flip(-1)
            if "keypoints" in target:
                keypoints = target["keypoints"]
                keypoints = _flip_coco_person_keypoints(keypoints, width)
                target["keypoints"] = keypoints
        return image, target


class ToTensor(object):
    def __call__(self, image, target):
        image = F.to_tensor(image)
        return image, target

v0yolo_model.py

#coding:utf-8
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import torch.nn.functional as F
import math

class VGG(nn.Module):
    def __init__(self):
       super(VGG,self).__init__()
       # the vgg's layers
       #self.features = features
       cfg = [64,64,'M',128,128,'M',256,256,256,'M',512,512,512,'M',512,512,512,'M']
       layers= []
       batch_norm = False
       in_channels = 3
       for v in cfg:
           if v == 'M':
               layers += [nn.MaxPool2d(kernel_size=2,stride = 2)]
           else:
               conv2d = nn.Conv2d(in_channels,v,kernel_size=3,padding = 1)
               if batch_norm:
                   layers += [conv2d,nn.Batchnorm2d(v),nn.ReLU(inplace=True)]
               else:
                   layers += [conv2d,nn.ReLU(inplace=True)]
               in_channels = v
       # use the vgg layers to get the feature
       self.features = nn.Sequential(*layers)
       # 全局池化
       self.avgpool = nn.AdaptiveAvgPool2d((7,7))
       # 决策层：分类层
       self.classifier = nn.Sequential(
           nn.Linear(512*7*7,4096),
           nn.ReLU(True),
           nn.Dropout(),
           nn.Linear(4096,4096),
           nn.ReLU(True),
           nn.Dropout(),
           nn.Linear(4096,1000),
       )

       for m in self.modules():
           if isinstance(m,nn.Conv2d):
               nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
               if m.bias is not None: 
                   nn.init.constant_(m.bias,0)
           elif isinstance(m,nn.BatchNorm2d):
               nn.init.constant_(m.weight,1)
               nn.init.constant_(m.bias,1)
           elif isinstance(m,nn.Linear):
               nn.init.normal_(m.weight,0,0.01)
               nn.init.constant_(m.bias,0)

    def forward(self,x):
         x = self.features(x)
         x_fea = x
         x = self.avgpool(x)
         x_avg = x
         x = x.view(x.size(0),-1)
         x = self.classifier(x)
         return x,x_fea,x_avg
    def extractor(self,x):
         x = self.features(x)
         return x

class YOLOV0(nn.Module):
    def __init__(self):
       super(YOLOV0,self).__init__()
       vgg = VGG()
       self.extractor = vgg.extractor
       self.avgpool = nn.AdaptiveAvgPool2d((7,7))
       # 决策层：检测层
       self.detector = nn.Sequential(
          nn.Linear(512*7*7,4096),
          nn.ReLU(True),
          nn.Dropout(),
          #nn.Linear(4096,1470),
          nn.Linear(4096,5),
       )
       for m in self.modules():
           if isinstance(m,nn.Conv2d):
               nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
               if m.bias is not None: 
                   nn.init.constant_(m.bias,0)
           elif isinstance(m,nn.BatchNorm2d):
               nn.init.constant_(m.weight,1)
               nn.init.constant_(m.bias,1)
           elif isinstance(m,nn.Linear):
               nn.init.normal_(m.weight,0,0.01)
               nn.init.constant_(m.bias,0)
    def forward(self,x):
        x = self.extractor(x)
        #import pdb
        #pdb.set_trace()
        x = self.avgpool(x)
        x = x.view(x.size(0),-1)
        x = self.detector(x)
        b,_ = x.shape
        #x = x.view(b,7,7,30)
        x = x.view(b,1,1,5)
        return x
     
if __name__ == '__main__':
    vgg = VGG()
    x  = torch.randn(1,3,512,512)
    feature,x_fea,x_avg = vgg(x)
    print(feature.shape)
    print(x_fea.shape)
    print(x_avg.shape)
 
    yolov1 = YOLOV1()
    feature = yolov1(x)
    # feature_size b*7*7*30
    print(feature.shape)

v0yolotrain.py

#coding:utf-8
from PennFudanDataset_main import *

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torch.utils.data import DataLoader
from v0yolo_model import *
import cv2
import numpy as np
import time
import sys
import os


## 数据处理
#服务器上的地址 /data/2020-722-YOLOV4-Practical-datasets/PenFudanPed
# dataset地址：/Users/zhaomignming/Documents/mmteacher/datasets
#datapath='/Users/zhaomignming/Documents/mmteacher/datasets/PennFudanPed'
datapath='/Users/zhaomingming/data_sets/PennFudanPed'
dataset = PennFudanDataset(datapath, get_transform(train=False))
dataset_test = PennFudanDataset(datapath, get_transform(train=False))

indices = torch.randperm(len(dataset)).tolist()
#dataset = torch.utils.data.Subset(dataset, indices[:-50])
#dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])
#dataset = torch.utils.data.Subset(dataset, indices[0:1])
#import pdb
#pdb.set_trace()
#dataset = torch.utils.data.Subset(dataset, indices[0:1])
dataset = torch.utils.data.Subset(dataset, [0])
dataset_test = torch.utils.data.Subset(dataset_test, indices[0:2])

def collate_fn(batch):
    return tuple(zip(*batch))
# define training and validation data loaders
train_loader = torch.utils.data.DataLoader(
        dataset, batch_size=1, shuffle=False, num_workers=1,
        collate_fn=collate_fn)

val_loader = torch.utils.data.DataLoader(
        dataset_test, batch_size=2, shuffle=False, num_workers=4,
        collate_fn=collate_fn)


def input_process(batch):
    #import pdb
    #pdb.set_trace()
    batch_size=len(batch[0])
    input_batch= torch.zeros(batch_size,3,448,448)
    for i in range(batch_size):
        inputs_tmp = Variable(batch[0][i])
        inputs_tmp1=cv2.resize(inputs_tmp.permute([1,2,0]).numpy(),(448,448))
        inputs_tmp2=torch.tensor(inputs_tmp1).permute([2,0,1])
        input_batch[i:i+1,:,:,:]= torch.unsqueeze(inputs_tmp2,0)
    return input_batch 

#batch[1][0]['boxes'][0]
def target_process(batch):
    batch_size=len(batch[0])
    target_batch= torch.zeros(batch_size,1,1,5)
    #import pdb
    #pdb.set_trace()
    for i in range(batch_size):
        #只处理batch中的第一张图片
        # batch[1]表示label
        # batch[0]表示image
        bbox=batch[1][i]['boxes'][0]
        _,hi,wi = batch[0][i].numpy().shape
        bbox = bbox/ torch.tensor([wi,hi,wi,hi])
        cbbox =  torch.cat([torch.ones(1),bbox])
        target_batch[i:i+1,:,:,:] = torch.unsqueeze(cbbox,0)
    return target_batch
    

num_classes = 2
n_class    = 2
batch_size = 6
epochs     = 500
lr         = 1e-3
momentum   = 0
w_decay    = 1e-5
step_size  = 50
gamma      = 0.5

# 定义模型
yolov0_model = YOLOV0()
import pdb
pdb.set_trace()
# 定义优化算法为sdg:随机梯度下降
optimizer = optim.SGD(yolov0_model.detector.parameters(), lr=lr, momentum=momentum, weight_decay=w_decay)

# 定义学习率变化策略
# 每30个epoch 学习率乘以0.5
scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)  # decay LR by a factor of 0.5 every 30 epochs

# 矩阵形式写法，写法简单，但是可读性不强
def lossfunc(outputs,labels):
    #import  pdb
    #pdb.set_trace()
    tmp = (outputs-labels)**2
    return torch.sum(tmp,0).view(1,5).mm(torch.tensor([10,0.0001,0.0001,0.0001,0.0001]).view(5,1))

# 定义直接拟合的学习率，可读性强
def lossfunc_details(outputs,labels):
    # 判断维度
    assert ( outputs.shape == labels.shape),"outputs shape[%s] not equal labels shape[%s]"%(outputs.shape,labels.shape)
    b,w,h,c = outputs.shape
    loss = 0
    for bi in range(b):
        for wi in range(w):
            for hi in range(h):
                #import pdb
                #pdb.set_trace()
                # detect_vector=[confidence,x,y,w,h]
                detect_vector = outputs[bi,wi,hi]
                gt_dv = labels[bi,wi,hi]
                conf_pred = detect_vector[0]
                conf_gt = gt_dv[0]
                x_pred = detect_vector[1]
                x_gt = gt_dv[1]
                y_pred = detect_vector[2]
                y_gt = gt_dv[2]
                w_pred = detect_vector[3]
                w_gt = gt_dv[3]
                h_pred = detect_vector[4]
                h_gt = gt_dv[4]
                loss_confidence = (conf_pred-conf_gt)**2 
                #loss_geo = (x_pred-x_gt)**2 + (y_pred-y_gt)**2 + (w_pred**0.5-w_gt**0.5)**2 + (h_pred**0.5-h_gt**0.5)**2
                loss_geo = (x_pred-x_gt)**2 + (y_pred-y_gt)**2 + (w_pred-w_gt)**2 + (h_pred-h_gt)**2
                loss_tmp = loss_confidence + 0.3*loss_geo
                #print("loss[%s,%s] = %s,%s"%(wi,hi,loss_confidence.item(),loss_geo.item()))
                loss += loss_tmp
    return loss
# train
def train():
    for epoch in range(epochs):
        ts = time.time()
        for iter, batch in enumerate(train_loader):
            optimizer.zero_grad()
            # 取图片
            inputs = input_process(batch)
            # 取标注
            labels = target_process(batch)
            
            # 获取得到输出
            outputs = yolov0_model(inputs)
            #import pdb
            #pdb.set_trace()
            #loss = criterion(outputs, labels)
            loss = lossfunc_details(outputs,labels)
            loss.backward()
            optimizer.step()
            #print(torch.cat([outputs.detach().view(1,5),labels.view(1,5)],0).view(2,5))
            if iter % 10 == 0:
            #    print(torch.cat([outputs.detach().view(1,5),labels.view(1,5)],0).view(2,5))
                print("epoch{}, iter{}, loss: {}, lr: {}".format(epoch, iter, loss.data.item(),optimizer.state_dict()['param_groups'][0]['lr']))
        
        #print("Finish epoch {}, time elapsed {}".format(epoch, time.time() - ts))
        #print("*"*30)
        #val(epoch)
        scheduler.step()
# inference
def val(epoch):
    yolov0_model.eval()
    total_ious = []
    pixel_accs = []
    for iter, batch in enumerate(val_loader):
        inputs = input_process(batch)
        target,label= target_process(batch)

        output = yolov1_model(inputs)
        output = output.data.cpu().numpy()

        N, _, h, w = output.shape
        pred = output.transpose(0, 2, 3, 1).reshape(-1, n_class).argmax(axis=1).reshape(N, h, w)
        

if __name__ == "__main__":
    train()

v1yolomodel.py

#coding:utf-8
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import torch.nn.functional as F
import math

class VGG(nn.Module):
    def __init__(self):
       super(VGG,self).__init__()
       # the vgg's layers
       #self.features = features
       cfg = [64,64,'M',128,128,'M',256,256,256,'M',512,512,512,'M',512,512,512,'M']
       layers= []
       batch_norm = False
       in_channels = 3
       for v in cfg:
           if v == 'M':
               layers += [nn.MaxPool2d(kernel_size=2,stride = 2)]
           else:
               conv2d = nn.Conv2d(in_channels,v,kernel_size=3,padding = 1)
               if batch_norm:
                   layers += [conv2d,nn.Batchnorm2d(v),nn.ReLU(inplace=True)]
               else:
                   layers += [conv2d,nn.ReLU(inplace=True)]
               in_channels = v
       # use the vgg layers to get the feature
       self.features = nn.Sequential(*layers)
       # 全局池化
       self.avgpool = nn.AdaptiveAvgPool2d((7,7))
       # 决策层：分类层
       self.classifier = nn.Sequential(
           nn.Linear(512*7*7,4096),
           nn.ReLU(True),
           nn.Dropout(),
           nn.Linear(4096,4096),
           nn.ReLU(True),
           nn.Dropout(),
           nn.Linear(4096,1000),
       )

       for m in self.modules():
           if isinstance(m,nn.Conv2d):
               nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
               if m.bias is not None: 
                   nn.init.constant_(m.bias,0)
           elif isinstance(m,nn.BatchNorm2d):
               nn.init.constant_(m.weight,1)
               nn.init.constant_(m.bias,1)
           elif isinstance(m,nn.Linear):
               nn.init.normal_(m.weight,0,0.01)
               nn.init.constant_(m.bias,0)

    def forward(self,x):
         x = self.features(x)
         x_fea = x
         x = self.avgpool(x)
         x_avg = x
         x = x.view(x.size(0),-1)
         x = self.classifier(x)
         return x,x_fea,x_avg
    def extractor(self,x):
         x = self.features(x)
         return x

class YOLOV1(nn.Module):
    def __init__(self):
       super(YOLOV1,self).__init__()
       vgg = VGG()
       self.extractor = vgg.extractor
       self.avgpool = nn.AdaptiveAvgPool2d((7,7))
       # 决策层：检测层
       self.detector = nn.Sequential(
          nn.Linear(512*7*7,4096),
          nn.ReLU(True),
          nn.Dropout(),
          #nn.Linear(4096,1470),
          nn.Linear(4096,245),
          #nn.Linear(4096,5),
       )
       for m in self.modules():
           if isinstance(m,nn.Conv2d):
               nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
               if m.bias is not None: 
                   nn.init.constant_(m.bias,0)
           elif isinstance(m,nn.BatchNorm2d):
               nn.init.constant_(m.weight,1)
               nn.init.constant_(m.bias,1)
           elif isinstance(m,nn.Linear):
               nn.init.normal_(m.weight,0,0.01)
               nn.init.constant_(m.bias,0)
    def forward(self,x):
        x = self.extractor(x)
        #import pdb
        #pdb.set_trace()
        x = self.avgpool(x)
        x = x.view(x.size(0),-1)
        x = self.detector(x)
        b,_ = x.shape
        #x = x.view(b,7,7,30)
        x = x.view(b,7,7,5)
        
        #x = x.view(b,1,1,5)
        return x
     
if __name__ == '__main__':
    vgg = VGG()
    x  = torch.randn(1,3,512,512)
    feature,x_fea,x_avg = vgg(x)
    print(feature.shape)
    print(x_fea.shape)
    print(x_avg.shape)
 
    yolov1 = YOLOV1()
    feature = yolov1(x)
    # feature_size b*7*7*30
    print(feature.shape)

v1yolotrain.py

#coding:utf-8
from PennFudanDataset_main import *

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torch.utils.data import DataLoader
from v1yolomodel import *
import cv2
import numpy as np
import time
import sys
import os


## 数据处理
#服务器上的地址 /data/2020-722-YOLOV4-Practical-datasets/PenFudanPed
# dataset地址：/Users/zhaomignming/Documents/mmteacher/datasets
#datapath='/Users/zhaomignming/Documents/mmteacher/datasets/PennFudanPed'
datapath='/Users/zhaomingming/data_sets/PennFudanPed'
dataset = PennFudanDataset(datapath, get_transform(train=False))
dataset_test = PennFudanDataset(datapath, get_transform(train=False))

indices = torch.randperm(len(dataset)).tolist()
#dataset = torch.utils.data.Subset(dataset, indices[:-50])
#dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])
#dataset = torch.utils.data.Subset(dataset, indices[0:1])
#import pdb
#pdb.set_trace()
#dataset = torch.utils.data.Subset(dataset, indices[0:1])
dataset = torch.utils.data.Subset(dataset, [0])
dataset_test = torch.utils.data.Subset(dataset_test, indices[0:2])

def collate_fn(batch):
    return tuple(zip(*batch))
# define training and validation data loaders
train_loader = torch.utils.data.DataLoader(
        dataset, batch_size=1, shuffle=False, num_workers=1,
        collate_fn=collate_fn)

val_loader = torch.utils.data.DataLoader(
        dataset_test, batch_size=2, shuffle=False, num_workers=4,
        collate_fn=collate_fn)


def input_process(batch):
    #import pdb
    #pdb.set_trace()
    batch_size=len(batch[0])
    input_batch= torch.zeros(batch_size,3,448,448)
    for i in range(batch_size):
        inputs_tmp = Variable(batch[0][i])
        inputs_tmp1=cv2.resize(inputs_tmp.permute([1,2,0]).numpy(),(448,448))
        inputs_tmp2=torch.tensor(inputs_tmp1).permute([2,0,1])
        input_batch[i:i+1,:,:,:]= torch.unsqueeze(inputs_tmp2,0)
    return input_batch 

#batch[1][0]['boxes'][0]
def target_process(batch,grid_number=7):
    # batch[1]表示label
    # batch[0]表示image
    batch_size=len(batch[0])
    target_batch= torch.zeros(batch_size,grid_number,grid_number,5)
    #import pdb
    #pdb.set_trace()
    for i in range(batch_size):
        labels = batch[1]
        batch_labels = labels[i]
        #import pdb
        #pdb.set_trace()
        number_box = len(batch_labels['boxes'])
        for wi in range(grid_number):
            for hi in range(grid_number):
                # 便利每个标注的框
                for bi in range(number_box):
                    bbox=batch_labels['boxes'][bi]
                    _,himg,wimg = batch[0][i].numpy().shape
                    bbox = bbox/ torch.tensor([wimg,himg,wimg,himg])
                    #import pdb
                    #pdb.set_trace()
                    center_x= (bbox[0]+bbox[2])*0.5
                    center_y= (bbox[1]+bbox[3])*0.5
                    #print("[%s,%s,%s],[%s,%s,%s]"%(wi/grid_number,center_x,(wi+1)/grid_number,hi/grid_number,center_y,(hi+1)/grid_number))
                    if center_x<=(wi+1)/grid_number and center_x>=wi/grid_number and center_y<=(hi+1)/grid_number and center_y>= hi/grid_number:
                        #pdb.set_trace()
                        cbbox =  torch.cat([torch.ones(1),bbox])
                        # 中心点落在grid内，
                        target_batch[i:i+1,wi:wi+1,hi:hi+1,:] = torch.unsqueeze(cbbox,0)
                    #else:
                        #cbbox =  torch.cat([torch.zeros(1),bbox])
                #import pdb
                #pdb.set_trace()
                #rint(target_batch[i:i+1,wi:wi+1,hi:hi+1,:])
                #target_batch[i:i+1,wi:wi+1,hi:hi+1,:] = torch.unsqueeze(cbbox,0)
    return target_batch
    

num_classes = 2
n_class    = 2
batch_size = 6
epochs     = 500
lr         = 1e-3
momentum   = 0
w_decay    = 1e-5
step_size  = 50
gamma      = 0.5

# 定义模型
yolov1_model = YOLOV1()
import pdb
pdb.set_trace()
# 定义优化算法为sdg:随机梯度下降
optimizer = optim.SGD(yolov1_model.detector.parameters(), lr=lr, momentum=momentum, weight_decay=w_decay)

# 定义学习率变化策略
# 每30个epoch 学习率乘以0.5
scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)  # decay LR by a factor of 0.5 every 30 epochs

# 矩阵形式写法，写法简单，但是可读性不强
def lossfunc(outputs,labels):
    #import  pdb
    #pdb.set_trace()
    tmp = (outputs-labels)**2
    return torch.sum(tmp,0).view(1,5).mm(torch.tensor([10,0.0001,0.0001,0.0001,0.0001]).view(5,1))

# 定义直接拟合的学习率，可读性强
def lossfunc_details(outputs,labels):
    # 判断维度
    assert ( outputs.shape == labels.shape),"outputs shape[%s] not equal labels shape[%s]"%(outputs.shape,labels.shape)
    #import pdb
    #pdb.set_trace()
    b,w,h,c = outputs.shape
    loss = 0
    #import pdb
    #pdb.set_trace()
    conf_loss_matrix = torch.zeros(b,w,h)
    geo_loss_matrix = torch.zeros(b,w,h)
    loss_matrix = torch.zeros(b,w,h)
    
    for bi in range(b):
        for wi in range(w):
            for hi in range(h):
                #import pdb
                #pdb.set_trace()
                # detect_vector=[confidence,x,y,w,h]
                detect_vector = outputs[bi,wi,hi]
                gt_dv = labels[bi,wi,hi]
                conf_pred = detect_vector[0]
                conf_gt = gt_dv[0]
                x_pred = detect_vector[1]
                x_gt = gt_dv[1]
                y_pred = detect_vector[2]
                y_gt = gt_dv[2]
                w_pred = detect_vector[3]
                w_gt = gt_dv[3]
                h_pred = detect_vector[4]
                h_gt = gt_dv[4]
                loss_confidence = (conf_pred-conf_gt)**2 
                #loss_geo = (x_pred-x_gt)**2 + (y_pred-y_gt)**2 + (w_pred**0.5-w_gt**0.5)**2 + (h_pred**0.5-h_gt**0.5)**2
            
                loss_geo = (x_pred-x_gt)**2 + (y_pred-y_gt)**2 + (w_pred-w_gt)**2 + (h_pred-h_gt)**2
                loss_geo = conf_gt*loss_geo
                loss_tmp = loss_confidence + 0.3*loss_geo
                #print("loss[%s,%s] = %s,%s"%(wi,hi,loss_confidence.item(),loss_geo.item()))
                loss += loss_tmp
                conf_loss_matrix[bi,wi,hi]=loss_confidence
                geo_loss_matrix[bi,wi,hi]=loss_geo
                loss_matrix[bi,wi,hi]=loss_tmp
    #打印出batch中每张片的位置loss,和置信度输出
    print(geo_loss_matrix)
    print(outputs[0,:,:,0]>0.5)
    return loss,loss_matrix,geo_loss_matrix,conf_loss_matrix
# train
def train():
    for epoch in range(epochs):
        ts = time.time()
        for iter, batch in enumerate(train_loader):
            optimizer.zero_grad()
            # 取图片
            inputs = input_process(batch)
            # 取标注
            labels = target_process(batch)
            
            # 获取得到输出
            outputs = yolov1_model(inputs)
            #import pdb
            #pdb.set_trace()
            #loss = criterion(outputs, labels)
            loss,lm,glm,clm = lossfunc_details(outputs,labels)
            loss.backward()
            optimizer.step()
            #print(torch.cat([outputs.detach().view(1,5),labels.view(1,5)],0).view(2,5))
            if iter % 10 == 0:
            #    print(torch.cat([outputs.detach().view(1,5),labels.view(1,5)],0).view(2,5))
                print("epoch{}, iter{}, loss: {}, lr: {}".format(epoch, iter, loss.data.item(),optimizer.state_dict()['param_groups'][0]['lr']))
        
        #print("Finish epoch {}, time elapsed {}".format(epoch, time.time() - ts))
        #print("*"*30)
        #val(epoch)
        scheduler.step()
# inference
def val(epoch):
    yolov1_model.eval()
    total_ious = []
    pixel_accs = []
    for iter, batch in enumerate(val_loader):
        inputs = input_process(batch)
        target,label= target_process(batch)

        output = yolov1_model(inputs)
        output = output.data.cpu().numpy()

        N, _, h, w = output.shape
        pred = output.transpose(0, 2, 3, 1).reshape(-1, n_class).argmax(axis=1).reshape(N, h, w)
        

if __name__ == "__main__":
    train()

数据集的路径：/data/2020-722-YOLOV4-Practical-datasets

2020-722-CV-YOLOV4 day3-yolo_v4

猜你喜欢