模型介绍
YOLOv7相较于YOLOv4和YOLOv5,YOLOv7在保持高速实时性能的同时,显著提高了检测精度。YOLOv7的核心技术包括:
- 新型骨干网络
- 改进的特征金字塔
- 更强大的目标检测头
- 集成了多种数据增强和训练策略
环境
华为云ai1s
CPU:Intel® Xeon® Gold 6278C CPU @ 2.60GHz
内存:8G
NPU:Ascend 310
操作系统:Ubuntu 18.04.4 LTS
实例地址
https://gitee.com/ascend/samples/tree/master/inference/modelInference/sampleYOLOV7
样例介绍
以YOLOV7网络模型为例,使能Acllite对图片进行预处理,并通过模型转换使能静态AIPP功能,使能AIPP功能后,YUV420SP_U8格式图片转化为RGB,然后减均值和归一化操作,并将该信息固化到转换后的离线模型中,对YOLOV7网络执行推理,对图片进行物体检测和分类,并给出标定框和类别置信度。
样例输入:图片。
样例输出:图片物体检测,并且在图片上给出物体标注框,类别以及置信度。
实践记录
下载代码仓
git clone https://gitee.com/ascend/samples/
cd samples
cp -r inference/modelInference/sampleYOLOV7 ../
cd sampleYOLOV7
依赖安装
设定环境变量
export DDK_PATH=$HOME/Ascend/ascend-toolkit/latest
export NPU_HOST_LIB=$DDK_PATH/runtime/lib64/stub
export THIRDPART_PATH=${DDK_PATH}/thirdpart
export LD_LIBRARY_PATH=${THIRDPART_PATH}/lib:$LD_LIBRARY_PATH
创建THIRDPART_PATH路径
mkdir -p ${THIRDPART_PATH}
安装x264
#下载地址
https://www.videolan.org/developers/x264.html
tar -xvzf x264-master.tar.bz2
cd x264
# 安装x264
./configure --enable-shared --disable-asm
make
sudo make install
sudo cp /usr/local/lib/libx264.so.164 /lib
安装ffmpeg
wget http://www.ffmpeg.org/releases/ffmpeg-4.1.3.tar.gz --no-check-certificate
tar -zxvf ffmpeg-4.1.3.tar.gz
cd ffmpeg-4.1.3
# 安装ffmpeg
./configure --enable-shared --enable-pic --enable-static --disable-x86asm --enable-libx264 --enable-gpl --prefix=${THIRDPART_PATH}
make -j8
sudo make install
安装acllite
cd ${
HOME}/samples/inference/acllite/aclliteCPP
make
make install
样例运行
数据准备
cd sampleYOLOV7/data
wget https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/models/aclsample/dog1_1024_683.jpg
ATC模型转换
# 为了方便下载,在这里直接给出原始模型下载及模型转换命令,可以直接拷贝执行。
cd model
wget https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/003_Atc_Models/yolov7/yolov7x.onnx
wget https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/003_Atc_Models/yolov7/aipp.cfg
atc --model=yolov7x.onnx --framework=5 --output=yolov7x --input_shape="images:1,3,640,640" --soc_version=Ascend310 --insert_op_conf=aipp.cfg
样例编译
cd $HOME/work/sampleYOLOV7/scripts
bash sample_build.sh
运行
bash sample_run.sh
运行结果
sampleYOLOV7.cpp文件内容
#include <dirent.h>
#include <opencv2/opencv.hpp>
#include "AclLiteUtils.h"
#include "AclLiteImageProc.h"
#include "AclLiteResource.h"
#include "AclLiteError.h"
#include "AclLiteModel.h"
#include "label.h"
using namespace std;
using namespace cv;
typedef enum Result {
SUCCESS = 0,
FAILED = 1
} Result;
typedef struct BoundBox {
float x;
float y;
float width;
float height;
float score;
size_t classIndex;
size_t index;
} BoundBox;
bool sortScore(BoundBox box1, BoundBox box2)
{
return box1.score > box2.score;
}
class SampleYOLOV7 {
public:
SampleYOLOV7(const char *modelPath, const int32_t modelWidth, const int32_t modelHeight);
Result InitResource();
Result ProcessInput(string testImgPath);
Result Inference(std::vector<InferenceOutput>& inferOutputs);
Result GetResult(std::vector<InferenceOutput>& inferOutputs, string imagePath, size_t imageIndex, bool release);
~SampleYOLOV7();
private:
void ReleaseResource();
AclLiteResource aclResource_;
AclLiteImageProc imageProcess_;
AclLiteModel model_;
aclrtRunMode runMode_;
ImageData resizedImage_;
const char *modelPath_;
int32_t modelWidth_;
int32_t modelHeight_;
};
SampleYOLOV7::SampleYOLOV7(const char *modelPath, const int32_t modelWidth, const int32_t modelHeight) :
modelPath_(modelPath), modelWidth_(modelWidth), modelHeight_(modelHeight)
{
}
SampleYOLOV7::~SampleYOLOV7()
{
ReleaseResource();
}
Result SampleYOLOV7::InitResource()
{
// init acl resource
AclLiteError ret = aclResource_.Init();
if (ret == FAILED) {
ACLLITE_LOG_ERROR("resource init failed, errorCode is %d", ret);
return FAILED;
}
ret = aclrtGetRunMode(&runMode_);
if (ret == FAILED) {
ACLLITE_LOG_ERROR("get runMode failed, errorCode is %d", ret);
return FAILED;
}
// init dvpp resource
ret = imageProcess_.Init();
if (ret == FAILED) {
ACLLITE_LOG_ERROR("imageProcess init failed, errorCode is %d", ret);
return FAILED;
}
// load model from file
ret = model_.Init(modelPath_);
if (ret == FAILED) {
ACLLITE_LOG_ERROR("model init failed, errorCode is %d", ret);
return FAILED;
}
return SUCCESS;
}
Result SampleYOLOV7::ProcessInput(string testImgPath)
{
// read image from file
ImageData image;
AclLiteError ret = ReadJpeg(image, testImgPath);
if (ret == FAILED) {
ACLLITE_LOG_ERROR("ReadJpeg failed, errorCode is %d", ret);
return FAILED;
}
// copy image from host to dvpp
ImageData imageDevice;
ret = CopyImageToDevice(imageDevice, image, runMode_, MEMORY_DVPP);
if (ret == FAILED) {
ACLLITE_LOG_ERROR("CopyImageToDevice failed, errorCode is %d", ret);
return FAILED;
}
// image decoded from JPEG format to YUV
ImageData yuvImage;
ret = imageProcess_.JpegD(yuvImage, imageDevice);
if (ret == FAILED) {
ACLLITE_LOG_ERROR("Convert jpeg to yuv failed, errorCode is %d", ret);
return FAILED;
}
// zoom image to modelWidth_ * modelHeight_
ret = imageProcess_.Resize(resizedImage_, yuvImage, modelWidth_, modelHeight_);
if (ret == FAILED) {
ACLLITE_LOG_ERROR("Resize image failed, errorCode is %d", ret);
return FAILED;
}
return SUCCESS;
}
Result SampleYOLOV7::Inference(std::vector<InferenceOutput>& inferOutputs)
{
// create input data set of model
AclLiteError ret = model_.CreateInput(static_cast<void *>(resizedImage_.data.get()), resizedImage_.size);
if (ret == FAILED) {
ACLLITE_LOG_ERROR("CreateInput failed, errorCode is %d", ret);
return FAILED;
}
// inference
ret = model_.Execute(inferOutputs);
if (ret != ACL_SUCCESS) {
ACLLITE_LOG_ERROR("execute model failed, errorCode is %d", ret);
return FAILED;
}
return SUCCESS;
}
Result SampleYOLOV7::GetResult(std::vector<InferenceOutput>& inferOutputs,
string imagePath, size_t imageIndex, bool release)
{
uint32_t outputDataBufId = 0;
float *classBuff = static_cast<float *>(inferOutputs[outputDataBufId].data.get());
// confidence threshold
float confidenceThreshold = 0.25;
// class number
size_t classNum = 80;
// number of (x, y, width, hight, confidence)
size_t offset = 5;
// total number = class number + (x, y, width, hight, confidence)
size_t totalNumber = classNum + offset;
// total number of boxs
size_t modelOutputBoxNum = 25200;
// top 5 indexes correspond (x, y, width, hight, confidence),
// and 5~85 indexes correspond object's confidence
size_t startIndex = 5;
// read source image from file
cv::Mat srcImage = cv::imread(imagePath);
int srcWidth = srcImage.cols;
int srcHeight = srcImage.rows;
// filter boxes by confidence threshold
vector <BoundBox> boxes;
size_t yIndex = 1;
size_t widthIndex = 2;
size_t heightIndex = 3;
size_t classConfidenceIndex = 4;
for (size_t i = 0; i < modelOutputBoxNum; ++i) {
float maxValue = 0;
float maxIndex = 0;
for (size_t j = startIndex; j < totalNumber; ++j) {
float value = classBuff[i * totalNumber + j] * classBuff[i * totalNumber + classConfidenceIndex];
if (value > maxValue) {
// index of class
maxIndex = j - startIndex;
maxValue = value;
}
}
float classConfidence = classBuff[i * totalNumber + classConfidenceIndex];
if (classConfidence >= confidenceThreshold) {
// index of object's confidence
size_t index = i * totalNumber + maxIndex + startIndex;
// finalConfidence = class confidence * object's confidence
float finalConfidence = classConfidence * classBuff[index];
BoundBox box;
box.x = classBuff[i * totalNumber] * srcWidth / modelWidth_;
box.y = classBuff[i * totalNumber + yIndex] * srcHeight / modelHeight_;
box.width = classBuff[i * totalNumber + widthIndex] * srcWidth/modelWidth_;
box.height = classBuff[i * totalNumber + heightIndex] * srcHeight / modelHeight_;
box.score = finalConfidence;
box.classIndex = maxIndex;
box.index = i;
if (maxIndex < classNum) {
boxes.push_back(box);
}
}
}
// filter boxes by NMS
vector <BoundBox> result;
result.clear();
float NMSThreshold = 0.45;
int32_t maxLength = modelWidth_ > modelHeight_ ? modelWidth_ : modelHeight_;
std::sort(boxes.begin(), boxes.end(), sortScore);
BoundBox boxMax;
BoundBox boxCompare;
while (boxes.size() != 0) {
size_t index = 1;
result.push_back(boxes[0]);
while (boxes.size() > index) {
boxMax.score = boxes[0].score;
boxMax.classIndex = boxes[0].classIndex;
boxMax.index = boxes[0].index;
// translate point by maxLength * boxes[0].classIndex to
// avoid bumping into two boxes of different classes
boxMax.x = boxes[0].x + maxLength * boxes[0].classIndex;
boxMax.y = boxes[0].y + maxLength * boxes[0].classIndex;
boxMax.width = boxes[0].width;
boxMax.height = boxes[0].height;
boxCompare.score = boxes[index].score;
boxCompare.classIndex = boxes[index].classIndex;
boxCompare.index = boxes[index].index;
// translate point by maxLength * boxes[0].classIndex to
// avoid bumping into two boxes of different classes
boxCompare.x = boxes[index].x + boxes[index].classIndex * maxLength;
boxCompare.y = boxes[index].y + boxes[index].classIndex * maxLength;
boxCompare.width = boxes[index].width;
boxCompare.height = boxes[index].height;
// the overlapping part of the two boxes
float xLeft = max(boxMax.x, boxCompare.x);
float yTop = max(boxMax.y, boxCompare.y);
float xRight = min(boxMax.x + boxMax.width, boxCompare.x + boxCompare.width);
float yBottom = min(boxMax.y + boxMax.height, boxCompare.y + boxCompare.height);
float width = max(0.0f, xRight - xLeft);
float hight = max(0.0f, yBottom - yTop);
float area = width * hight;
float iou = area / (boxMax.width * boxMax.height + boxCompare.width * boxCompare.height - area);
// filter boxes by NMS threshold
if (iou > NMSThreshold) {
boxes.erase(boxes.begin() + index);
continue;
}
++index;
}
boxes.erase(boxes.begin());
}
// opencv draw label params
const double fountScale = 0.5;
const uint32_t lineSolid = 2;
const uint32_t labelOffset = 11;
const cv::Scalar fountColor(0, 0, 255);
const vector <cv::Scalar> colors{
cv::Scalar(237, 149, 100), cv::Scalar(0, 215, 255),
cv::Scalar(50, 205, 50), cv::Scalar(139, 85, 26)};
int half = 2;
for (size_t i = 0; i < result.size(); ++i) {
cv::Point leftUpPoint, rightBottomPoint;
leftUpPoint.x = result[i].x - result[i].width / half;
leftUpPoint.y = result[i].y - result[i].height / half;
rightBottomPoint.x = result[i].x + result[i].width / half;
rightBottomPoint.y = result[i].y + result[i].height / half;
cv::rectangle(srcImage, leftUpPoint, rightBottomPoint, colors[i % colors.size()], lineSolid);
string className = label[result[i].classIndex];
string markString = to_string(result[i].score) + ":" + className;
cv::putText(srcImage, markString, cv::Point(leftUpPoint.x, leftUpPoint.y + labelOffset),
cv::FONT_HERSHEY_COMPLEX, fountScale, fountColor);
}
string savePath = "out_" + to_string(imageIndex) + ".jpg";
cv::imwrite(savePath, srcImage);
if (release){
free(classBuff);
classBuff = nullptr;
}
return SUCCESS;
}
void SampleYOLOV7::ReleaseResource()
{
model_.DestroyResource();
imageProcess_.DestroyResource();
aclResource_.Release();
}
int main()
{
const char* modelPath = "../model/yolov7x.om";
const string imagePath = "../data";
const int32_t modelWidth = 640;
const int32_t modelHeight = 640;
// all images in dir
DIR *dir = opendir(imagePath.c_str());
if (dir == nullptr)
{
ACLLITE_LOG_ERROR("file folder does no exist, please create folder %s", imagePath.c_str());
return FAILED;
}
vector<string> allPath;
struct dirent *entry;
while ((entry = readdir(dir)) != nullptr)
{
if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0
|| strcmp(entry->d_name, ".keep") == 0)
{
continue;
}else{
string name = entry->d_name;
string imgDir = imagePath +"/"+ name;
allPath.push_back(imgDir);
}
}
closedir(dir);
if (allPath.size() == 0){
ACLLITE_LOG_ERROR("the directory is empty, please download image to %s", imagePath.c_str());
return FAILED;
}
// inference
string fileName;
bool release = false;
SampleYOLOV7 sampleYOLO(modelPath, modelWidth, modelHeight);
Result ret = sampleYOLO.InitResource();
if (ret == FAILED) {
ACLLITE_LOG_ERROR("InitResource failed, errorCode is %d", ret);
return FAILED;
}
for (size_t i = 0; i < allPath.size(); i++)
{
if (allPath.size() == i){
release = true;
}
std::vector<InferenceOutput> inferOutputs;
fileName = allPath.at(i).c_str();
ret = sampleYOLO.ProcessInput(fileName);
if (ret == FAILED) {
ACLLITE_LOG_ERROR("ProcessInput image failed, errorCode is %d", ret);
return FAILED;
}
ret = sampleYOLO.Inference(inferOutputs);
if (ret == FAILED) {
ACLLITE_LOG_ERROR("Inference failed, errorCode is %d", ret);
return FAILED;
}
ret = sampleYOLO.GetResult(inferOutputs, fileName, i, release);
if (ret == FAILED) {
ACLLITE_LOG_ERROR("GetResult failed, errorCode is %d", ret);
return FAILED;
}
}
return SUCCESS;
}