说明:
1、已安装环境:CUDA9.0+CUDNN7.05+OpenCv3.4.5
2、参考:1 2 3
一、配置YOLO3
参考:
https://www.cnblogs.com/taotingz/p/11316467.html
https://blog.csdn.net/syysyf99/article/details/93207020
1、下载编译darknet(YOLO3原版代码)
git clone https://github.com/pjreddie/darknet
cd darknet
make
2、下载预训练权重文件
wget https://pjreddie.com/media/files/yolov3.weights
3、环境配置
1、修改Makefile:
cd darknet
gedit Makefile
在打开的文件中修改如下,表示使用GPU CUDNN OPENCV:
GPU=1
CUDNN=1
OPENCV=1
重新make以生效:
make
注:每次修改都要重新make以生效!
2、修改网络配置文件:
在darknet中:
cd cfg
gedit yolov3.cfg
打开的文本中修改如下,test模式:
#Testing
batch=1
subdivisions=1
#Training
#batch=64
#subdivisions=16
重新make:
make
4、测试
1、测试单张图片,在darknet目录下执行:
./darknet detect cfg/yolov3.cfg yolov3.weights data/dog.jpg
2、打开电脑摄像头,测试实时图像:
./darknet detector demo cfg/coco.data cfg/yolov3.cfg yolov3.weights
二、数据集准备
参考:
https://blog.csdn.net/weixin_38106878/article/details/88684280
https://blog.csdn.net/weixin_43653815/article/details/95514857
https://blog.csdn.net/w5688414/article/details/78931910
1、数据集下载
UA-DETRAC数据集主要拍摄于北京和天津的道路过街天桥,多为摄像头俯视视角;
主要用到的部分:
DETRAC-Train-Images (5.22GB, 60 sequences)
DETRAC-Test-Images (3.94GB, 40 sequences)
DETRAC-Train-Annotations-XML
2、格式转化
将UA-DETRAC数据集转化成YOLO3可训练的VOC数据集格式:
a、VOC格式目录
在存放数据集的位置建立如下目录:
VOC2007的目录结构为:
├── Annotations xml文件
├── ImageSets txt文件
└── JPEGImages 图片
ImageSets的目录结构为:
├── Layout
├── Main
└── Segmentation
建立一个用于数据集转化的空文件夹test,包含下载好的数据集:
b、xml文件
从数据提供的XML文件中,提取出每张图片voc格式的xml文件:
在test下新建文档命名为:DETRAC_xmlParser.py
import xml.etree.ElementTree as ET
from xml.dom.minidom import Document
import os
#import cv2//用于显示
import time
def ConvertVOCXml(file_path="",file_name=""):
tree = ET.parse(file_name)
root = tree.getroot()
# print(root.tag)
num=0 #计数
#读xml操作
frame_lists=[]
output_file_name=""
for child in root:
if(child.tag=="frame"):
# 创建dom文档
doc = Document()
# 创建根节点
annotation = doc.createElement('annotation')
# 根节点插入dom树
doc.appendChild(annotation)
#print(child.tag, child.attrib["num"])
pic_id= child.attrib["num"].zfill(5)
#print(pic_id)
output_file_name=root.attrib["name"]+"__img"+pic_id+".xml"
# print(output_file_name)
folder = doc.createElement("folder")
folder.appendChild(doc.createTextNode("VOC2007"))
annotation.appendChild(folder)
filename = doc.createElement("filename")
pic_name="img"+pic_id+".jpg"
filename.appendChild(doc.createTextNode(pic_name))
annotation.appendChild(filename)
sizeimage = doc.createElement("size")
imagewidth = doc.createElement("width")
imageheight = doc.createElement("height")
imagedepth = doc.createElement("depth")
imagewidth.appendChild(doc.createTextNode("960"))
imageheight.appendChild(doc.createTextNode("540"))
imagedepth.appendChild(doc.createTextNode("3"))
sizeimage.appendChild(imagedepth)
sizeimage.appendChild(imagewidth)
sizeimage.appendChild(imageheight)
annotation.appendChild(sizeimage)
target_list=child.getchildren()[0] #获取target_list
#print(target_list.tag)
object=None
for target in target_list:
if(target.tag=="target"):
#print(target.tag)
object = doc.createElement('object')
bndbox = doc.createElement("bndbox")
for target_child in target:
if(target_child.tag=="box"):
xmin = doc.createElement("xmin")
ymin = doc.createElement("ymin")
xmax = doc.createElement("xmax")
ymax = doc.createElement("ymax")
xmin_value=int(float(target_child.attrib["left"]))
ymin_value=int(float(target_child.attrib["top"]))
box_width_value=int(float(target_child.attrib["width"]))
box_height_value=int(float(target_child.attrib["height"]))
xmin.appendChild(doc.createTextNode(str(xmin_value)))
ymin.appendChild(doc.createTextNode(str(ymin_value)))
if(xmin_value+box_width_value>960):
xmax.appendChild(doc.createTextNode(str(960)))
else:
xmax.appendChild(doc.createTextNode(str(xmin_value+box_width_value)))
if(ymin_value+box_height_value>540):
ymax.appendChild(doc.createTextNode(str(540)))
else:
ymax.appendChild(doc.createTextNode(str(ymin_value+box_height_value)))
if(target_child.tag=="attribute"):
name = doc.createElement('name')
pose=doc.createElement('pose')
truncated=doc.createElement('truncated')
difficult=doc.createElement('difficult')
name.appendChild(doc.createTextNode("car"))
pose.appendChild(doc.createTextNode("Left")) #随意指定
truncated.appendChild(doc.createTextNode("0")) #随意指定
difficult.appendChild(doc.createTextNode("0")) #随意指定
object.appendChild(name)
object.appendChild(pose)
object.appendChild(truncated)
object.appendChild(difficult)
bndbox.appendChild(xmin)
bndbox.appendChild(ymin)
bndbox.appendChild(xmax)
bndbox.appendChild(ymax)
object.appendChild(bndbox)
annotation.appendChild(object)
file_path_out=os.path.join(file_path,output_file_name)
f = open(file_path_out, 'w')
f.write(doc.toprettyxml(indent=' ' * 4))
f.close()
num=num+1
return num
'''
画方框
'''
def bboxes_draw_on_img(img, bbox, color=[255, 0, 0], thickness=2):
# Draw bounding box...
print(bbox)
p1 = (int(float(bbox["xmin"])), int(float(bbox["ymin"])))
p2 = (int(float(bbox["xmax"])), int(float(bbox["ymax"])))
cv2.rectangle(img, p1, p2, color, thickness)
def visualization_image(image_name,xml_file_name):
tree = ET.parse(xml_file_name)
root = tree.getroot()
object_lists=[]
for child in root:
if(child.tag=="folder"):
print(child.tag, child.text)
elif (child.tag == "filename"):
print(child.tag, child.text)
elif (child.tag == "size"): #解析size
for size_child in child:
if(size_child.tag=="width"):
print(size_child.tag,size_child.text)
elif (size_child.tag == "height"):
print(size_child.tag, size_child.text)
elif (size_child.tag == "depth"):
print(size_child.tag, size_child.text)
elif (child.tag == "object"): #解析object
singleObject={}
for object_child in child:
if (object_child.tag == "name"):
# print(object_child.tag,object_child.text)
singleObject["name"] = object_child.text
elif (object_child.tag == "bndbox"):
for bndbox_child in object_child:
if (bndbox_child.tag == "xmin"):
singleObject["xmin"] = bndbox_child.text
# print(bndbox_child.tag, bndbox_child.text)
elif (bndbox_child.tag == "ymin"):
# print(bndbox_child.tag, bndbox_child.text)
singleObject["ymin"] = bndbox_child.text
elif (bndbox_child.tag == "xmax"):
singleObject["xmax"] = bndbox_child.text
elif (bndbox_child.tag == "ymax"):
singleObject["ymax"] = bndbox_child.text
object_length=len(singleObject)
if(object_length>0):
object_lists.append(singleObject)
img = cv2.imread(image_name)
for object_coordinate in object_lists:
bboxes_draw_on_img(img,object_coordinate)
cv2.imshow("capture", img)
cv2.waitKey (0)
cv2.destroyAllWindows()
if ( __name__ == "__main__"):
#print("main")
basePath="DETRAC-Train-Annotations-XML"
totalxml=os.listdir(basePath)
total_num=0
flag=False
print("正在转换")
saveBasePath="xml_test"
if os.path.exists(saveBasePath)==False: #判断文件夹是否存在
os.makedirs(saveBasePath)
#ConvertVOCXml(file_path="samplexml",file_name="000009.xml")
# Start time
start = time.time()
log=open("xml_statistical.txt","w") #分析日志,进行排错
for xml in totalxml:
file_name=os.path.join(basePath,xml)
print(file_name)
num=ConvertVOCXml(file_path=saveBasePath,file_name=file_name)
print(num)
total_num=total_num+num
log.write(file_name+" "+str(num)+"\n")
# End time
end = time.time()
seconds=end-start
print( "Time taken : {0} seconds".format(seconds))
print(total_num)
log.write(str(total_num)+"\n")
visualization_image("Insight-MVT_Annotation_Train/MVI_40212/img00396.jpg","xml_test/MVI_40212__img00396.xml")
test下运行该文件:
python3 DETRAC_xmlParser.py
把生成的xml_test文件中的所有文件移到Annotations目录下:
c、图片迁移
根据生成的XML文件,迁移相应的图片到目标目录中:
在test目录下建立空白文件命名为voc_data_migrate.py
import os
import random
import shutil
#xml路径的地址
XmlPath=r'xml_test'
#原图片的地址
pictureBasePath=r"Insight-MVT_Annotation_Train"
#保存图片的地址
saveBasePath=r"picture_test"
total_xml = os.listdir(XmlPath)
num=len(total_xml)
list=range(num)
if os.path.exists(saveBasePath)==False: #判断文件夹是否存在
os.makedirs(saveBasePath)
for xml in total_xml:
xml_temp=xml.split("__")
folder=xml_temp[0]
filename=xml_temp[1].split(".")[0]+".jpg"
# print(folder)
# print(filename)
temp_pictureBasePath=os.path.join(pictureBasePath,folder)
filePath=os.path.join(temp_pictureBasePath,filename)
# print(filePath)
newfile=xml.split(".")[0]+".jpg"
newfile_path=os.path.join(saveBasePath,newfile)
print(newfile_path)
shutil.copyfile(filePath, newfile_path)
print("xml file total number",num)
test下运行该文件:
python3 voc_data_migrate.py
把生成的picture_test文件中的所有文件移到JPEGImage目录下:
d、数据分类
产生trainval.txt,test.txt,train.txt,val.txt文件,这样就可以像VOC2007那样使用这个数据集了:
在VOC2007的上级目录下建立空白文件命名为ImageSets_Convert.py
import os
import random
import time
xmlfilepath=r'./VOC2007/Annotations'
saveBasePath=r"./"
trainval_percent=0.8
train_percent=0.85
total_xml = os.listdir(xmlfilepath)
num=len(total_xml)
list=range(num)
tv=int(num*trainval_percent)
tr=int(tv*train_percent)
trainval= random.sample(list,tv)
train=random.sample(trainval,tr)
print("train and val size",tv)
print("traub suze",tr)
ftrainval = open(os.path.join(saveBasePath,'VOC2007/ImageSets/Main/trainval.txt'), 'w')
ftest = open(os.path.join(saveBasePath,'VOC2007/ImageSets/Main/test.txt'), 'w')
ftrain = open(os.path.join(saveBasePath,'VOC2007/ImageSets/Main/train.txt'), 'w')
fval = open(os.path.join(saveBasePath,'VOC2007/ImageSets/Main/val.txt'), 'w')
# Start time
start = time.time()
for i in list:
name=total_xml[i][:-4]+'\n'
if i in trainval:
ftrainval.write(name)
if i in train:
ftrain.write(name)
else:
fval.write(name)
else:
ftest.write(name)
# End time
end = time.time()
seconds=end-start
print( "Time taken : {0} seconds".format(seconds))
ftrainval.close()
ftrain.close()
fval.close()
ftest .close()
python3 ImageSets_Convert.py
然后可以看到生成的四个文件名目录:
这里的txt文件里只有文件名:
e、标记转换
将xml文件转换成YOLO3可以训练的txt格式:
在VOC2007文件夹下建立空白文档命名为voc2007.py
import pickle
import os
from os import listdir, getcwd
from os.path import join
sets = ['train', 'test','val']
classes = ["car"]#我们只是检测车辆,因此只有一个类别
def convert(size, box):
dw = 1. / size[0]
dh = 1. / size[1]
x = (box[0] + box[1]) / 2.0
y = (box[2] + box[3]) / 2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
def convert_annotation(image_id):
in_file = open('Annotations/%s.xml' % (image_id))
out_file = open('labels/%s.txt' % (image_id), 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
bb = convert((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
wd = getcwd()
print(wd)
for image_set in sets:
if not os.path.exists('labels/'):
os.makedirs('labels/')
image_ids = open('ImageSets/Main/%s.txt' % (image_set)).read().strip().split()
list_file = open('%s.txt' % (image_set), 'w')
for image_id in image_ids:
list_file.write('/home/ming/mproject/mdetection/YOLO3/darknet/VOCdevkit/VOC2007/JPEGImages/%s.jpg\n' % (image_id))
convert_annotation(image_id)
list_file.close()
python3 voc2007.py
生成图片绝对路径:(是否在上一步直接生成绝对路径就可以啦,没试过,用上一步的相对路径找不到图片)
替换到Main中:
同时,还生成了标签文件labels:
至此,数据集转化好啦!
三、YOLOV3相关代码的调整
1、car.cfg
在darknet目录下,进入cfg目录,复制yolov3.cfg文件重命名为car.cfg,并做如下更改:
[net]
# Testing
# batch=1
# subdivisions=1
# Training//训练模式
batch=64
subdivisions=32//根据自己GPU内存自行选择,我的是4G(唉)
width=608
height=608
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
# Downsample
[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
######################
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]//以下3个YOLO的更改,第一个
size=1
stride=1
pad=1
filters=18
activation=linear
[yolo]
mask = 6,7,8
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
classes=1
num=9
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=0//显存太小,停止
[route]
layers = -4
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 61
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]//第二个
size=1
stride=1
pad=1
filters=18
activation=linear
[yolo]
mask = 3,4,5
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
classes=1
num=9
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=0//停止
[route]
layers = -4
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 36
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]//第三个
size=1
stride=1
pad=1
filters=18
activation=linear
[yolo]
mask = 0,1,2
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
classes=1
num=9
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=0//停止,其他未改动
2、car.data
在darknet目录下,进入cfg目录,新建car.data:
classes= 1
train = /home/ming/mproject/mdetection/YOLO3/darknet/VOCdevkit/VOC2007/ImageSets/Main/train.txt
valid = /home/ming/mproject/mdetection/YOLO3/darknet/VOCdevkit/VOC2007/ImageSets/Main/test.txt
names = /home/ming/mproject/mdetection/YOLO3/darknet/data/car.names
backup = /home/ming/mproject/mdetection/YOLO3/darknet/backup
更改成自己的路径
3、car.names
data目录下新建一个names文件car.names:
car
4、预训练model
darknet53.conv.74 放在darknet中
5、其他更改
打开example文件夹中的detector.c代码138行改为
if(i%100==0){
表明每100次输出一次权重文件在darknet,重新make一下就可以准备训练了
四、YOLOV3训练
开始训练:
./darknet detector train cfg/car.data cfg/car.cfg darknet53.conv.74 -gpu 0 | tee train_yolov3.log
中止Ctrl+C;
继续训练要:
./darknet detector train cfg/car.data cfg/car.cfg backup/car.backup -gpu 0 | tee train_yolov3.log
训练日志记录在train_yolov3.log中
可视化方法:
在同目录下建立train.py文件:
# -*- coding: utf-8 -*-
# @Time : 2018/12/30 16:26
# @Author : lazerliu
# @File : vis_yolov3_log.py
# @Func :yolov3 训练日志可视化,把该脚本和日志文件放在同一目录下运行。
import pandas as pd
import matplotlib.pyplot as plt
import os
# ==================可能需要修改的地方=====================================#
g_log_path = "train_yolov3.log" # 此处修改为你的训练日志文件名
# ==========================================================================#
def extract_log(log_file, new_log_file, key_word):
'''
:param log_file:日志文件
:param new_log_file:挑选出可用信息的日志文件
:param key_word:根据关键词提取日志信息
:return:
'''
with open(log_file, "r") as f:
with open(new_log_file, "w") as train_log:
for line in f:
# 去除多gpu的同步log
if "Syncing" in line:
continue
# 去除nan log
if "nan" in line:
continue
if key_word in line:
train_log.write(line)
f.close()
train_log.close()
def drawAvgLoss(loss_log_path):
'''
:param loss_log_path: 提取到的loss日志信息文件
:return: 画loss曲线图
'''
line_cnt = 0
for count, line in enumerate(open(loss_log_path, "rU")):
line_cnt += 1
result = pd.read_csv(loss_log_path, skiprows=[iter_num for iter_num in range(line_cnt) if ((iter_num < 500))],
error_bad_lines=False,
names=["loss", "avg", "rate", "seconds", "images"])
result["avg"] = result["avg"].str.split(" ").str.get(1)
result["avg"] = pd.to_numeric(result["avg"])
fig = plt.figure(1, figsize=(6, 4))
ax = fig.add_subplot(1, 1, 1)
ax.plot(result["avg"].values, label="Avg Loss", color="#ff7043")
ax.legend(loc="best")
ax.set_title("Avg Loss Curve")
ax.set_xlabel("Batches")
ax.set_ylabel("Avg Loss")
def drawIOU(iou_log_path):
'''
:param iou_log_path: 提取到的iou日志信息文件
:return: 画iou曲线图
'''
line_cnt = 0
for count, line in enumerate(open(iou_log_path, "rU")):
line_cnt += 1
result = pd.read_csv(iou_log_path, skiprows=[x for x in range(line_cnt) if (x % 39 != 0 | (x < 5000))],
error_bad_lines=False,
names=["Region Avg IOU", "Class", "Obj", "No Obj", "Avg Recall", "count"])
result["Region Avg IOU"] = result["Region Avg IOU"].str.split(": ").str.get(1)
result["Region Avg IOU"] = pd.to_numeric(result["Region Avg IOU"])
result_iou = result["Region Avg IOU"].values
# 平滑iou曲线
for i in range(len(result_iou) - 1):
iou = result_iou[i]
iou_next = result_iou[i + 1]
if abs(iou - iou_next) > 0.2:
result_iou[i] = (iou + iou_next) / 2
fig = plt.figure(2, figsize=(6, 4))
ax = fig.add_subplot(1, 1, 1)
ax.plot(result_iou, label="Region Avg IOU", color="#ff7043")
ax.legend(loc="best")
ax.set_title("Avg IOU Curve")
ax.set_xlabel("Batches")
ax.set_ylabel("Avg IOU")
if __name__ == "__main__":
loss_log_path = "train_log_loss.txt"
iou_log_path = "train_log_iou.txt"
if os.path.exists(g_log_path) is False:
exit(-1)
if os.path.exists(loss_log_path) is False:
extract_log(g_log_path, loss_log_path, "images")
if os.path.exists(iou_log_path) is False:
extract_log(g_log_path, iou_log_path, "IOU")
drawAvgLoss(loss_log_path)
drawIOU(iou_log_path)
plt.show()
运行:
python train.py
千万注意:训练中止时保存一下log文件,因为重新开始的时候是新的训练文件了,数据被清除了!!!
五、测试
1、测试图片
./darknet detect cfg/car.cfg backup/car_1501.weights /home/ming/mproject/mdetection/YOLO3/darknet/VOCdevkit/VOC2007/JPEGImages/MVI_20051__img00299.jpg
更换自己的权重文件和测试图片即可
2、测试视频
./darknet detector demo cfg/car.data cfg/car.cfg backup/car_1501.weights /home/ming/mproject/mdetection/YOLO3/data/car1.mp4
完工!