版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/sac761/article/details/75043857
一,不使用VEC-C直方图适量优化,用纯C++写的参考源码,作用是求取一张图片的直方图:
#include <fstream>
#include <sstream>
#include <iostream>
#include <vec-c.h>
using namespace std;
unsigned char source1[512*512];
int main()
{
int hist[256];
for(int i=0;i<256;i++)
hist[i]=0;
const char* txt = "E:\\test\\lena.txt";
const char* histtxt = "E:\\test\\lena_hist_nocv.txt";
ifstream inf;
std::ofstream ofh(histtxt);
inf.open(txt, ifstream::in);
string line;
int i = 0;
while (!inf.eof())
{
getline(inf, line);
int t= atoi(line.c_str());
unsigned char tt = (unsigned char)(t & 0xff);
source1[i++] =tt;
}
int width = 512;//图片宽度
int height = 512;//图片高度
unsigned char* ptr=(unsigned char*)source1;
//ptr[0]=0;
for(int h=0;h<height;h++)
{
for(int w=0;w<width;w++)
{
int intensity=ptr[h*width+w];
hist[intensity]+=1;
//cout<<intensity<<",";
}
//of <<"\n";
}
int maxx=hist[0];
for(int i=1;i<256;i++)
{
ofh << i<<","<<hist[i]<<"\n";
if(maxx<hist[i])
maxx=hist[i];
}
ofh.flush();
ofh.close();
inf.close();
return 0;
}
二,修改为VEC-C矢量优化版:
#include <fstream>
#include <sstream>
#include <iostream>
#include <vec-c.h>
using namespace std;
unsigned char source[512*512];
int histv[256];
unsigned short p_u16DstB[256*16];
void sum_histogram2_nocv(unsigned short* p_u16SrcB0, unsigned int* p_u32Dst);
void histogram_nocv(unsigned char* p_u8Src, unsigned short* p_u16DstB, int s32N, int s32M);
int main()
{
int width = 512;//图片宽度
int height = 512;//图片高度
const char* txt = "E:\\test\\lena.txt";
const char* histtxt = "E:\\test\\lena_hist_vc_nocv.txt";
ifstream inf;
std::ofstream ofh(histtxt);
inf.open(txt, ifstream::in);
string line;
int i = 0;
while (!inf.eof())
{
getline(inf, line);
int t= atoi(line.c_str());
unsigned char tt = (unsigned char)(t & 0xff);
source[i++] =tt;
}
unsigned char *src=(unsigned char*)source;
//src[15]=255;
//TODO vec-c progress
histogram_nocv(src, p_u16DstB, height, width);
//histogram_syj(src, p_u16DstB , p_u16DstB1, 16, height, width);
sum_histogram2_nocv(p_u16DstB,(unsigned int*) histv);
for(int i=1;i<256;i++)
{
ofh << i<<","<<histv[i]<<"\n";
}
ofh.flush();
ofh.close();
inf.close();
return 0;
}
void sum_histogram2_nocv(unsigned short* p_u16SrcB0, unsigned int* p_u32Dst)
{
short16 inN,inM;
short in[16]={0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30};
inN=*(short16*)in;
inM = vadd(inN, (short)1);
unsigned short* src=p_u16SrcB0;
for(int i=0;i<256;i+=2)
{
short16 v0=(short16)vpld(src,inN);
short8 vec_lo = vunpack_lo(v0);
short8 vec_hi = vunpack_hi(v0);
int sum=vintrasum((int8)vec_lo);
sum+=vintrasum((int8)vec_hi);
p_u32Dst[i]=sum;
short16 v1=(short16)vpld(src,inM);
short8 vec_lo1 = vunpack_lo(v1);
short8 vec_hi1 = vunpack_hi(v1);
int sum1=vintrasum((int8)vec_lo1);
sum1+=vintrasum((int8)vec_hi1);
p_u32Dst[i+1]=sum1;
src+=32;
}
//vst(vec_lo,(short*)tmp,(short)0xffff);
/*for(int i=0;i<256;i++)
{
if(p_u32Dst[i]!=0)
cout<<i<<":"<<p_u32Dst[i]<<",";
}*/
}
void histogram_nocv(unsigned char* p_u8Src, unsigned short* p_u16DstB, int s32N, int s32M)
{
unsigned char* src=p_u8Src;
unsigned short mask=0xffff;
for(int i=0;i<s32N*s32M;i+=16)
{
if(i+16>s32N*s32M)
{
mask = (1 << ((s32N*s32M-i) & 15)) - 1;
short16 v0=(short16)*(uchar16*)src;
vhist((short*)p_u16DstB,v0,mask);
}
else
{
short16 v0=(short16)*(uchar16*)src;
vhist((short*)p_u16DstB,v0);
src+=16;
}
}
}
这两版在cycle数上肯定是不一样的,VEC-C版本要快很多,看它们生成的汇编代码上VEC-C会有更多的双竖线,也就是与操作,这就是VEC-C版把很多过程都并行起来了,这就是矢量优化技术。