在进行svm训练的时候,会有一个参数 svm_scaling scaling,该参数实现对数据的归一化操作,在A Practical Guide to Support Vector Classification一文中有介绍,https://www.csie.ntu.edu.tw/~cjlin/papers/guide/guide.pdf,下面是对数据缩放以及加载(训练数据和测试数据进行同样的缩放)的源码实现进行汇总
//svm_wrapper.cpp
//生成缩放因子
void
pcl::SVMTrain::scaleFactors (std::vector<SVMData> training_set, svm_scaling &scaling)
{
int max = 0;
for (size_t i = 0; i < training_set.size() ; i++)
for (size_t j = 0; j < training_set[i].SV.size() ; j++)
if (training_set[i].SV[j].idx > max)
max = training_set[i].SV[j].idx; // max number of features
max += 1;
scaling.obj = Malloc (struct svm_node, max + 1);
scaling.max = max;
scaling.obj[max].index = -1; // last index is -1
for (int i = 0; i < max; i++) // Initialize values,scaling大小为最大特征向量
{
scaling.obj[i].index = 0;
scaling.obj[i].value = 0;
}
for (size_t i = 0; i < training_set.size(); i++)
for (size_t j = 0; j < training_set[i].SV.size(); j++)
// save scaling factor finding the maximum value,保存每个idx位置对应的最大特征值为缩放尺度因子
if (module (training_set[i].SV[j].value) > scaling.obj[ training_set[i].SV[j].idx ].value)
{
scaling.obj[ training_set[i].SV[j].idx ].index = 1;
scaling.obj[ training_set[i].SV[j].idx ].value = module (training_set[i].SV[j].value);
}
};
//svm.cpp
//加载缩放因子
if (res > 0 && strcmp (cmd, "scaling") == 0)
{
char *idx, *val, buff[10000];
int ii = 0, pre_ii = 0;
//char delims[]="\t: ";
model->scaling = Malloc (struct svm_node, 1);
res = fscanf (fp, "%10000[^\n]", buff);
idx = strtok (buff, ":");
while (idx != NULL)
{
val = strtok (NULL, " \t");
pre_ii = ii;
ii = atoi (idx);
model->scaling = Realloc (model->scaling, struct svm_node, ii + 2);
//setting to zero the non defined scaling factors
for (int j = pre_ii + 1; j < ii; j++)//不连续的idx设置为0
{
model->scaling[j].index = 0;
model->scaling[j].value = 0;
}
model->scaling[ii].index = 1;
model->scaling[ii].value = atof (val);
++ii;
idx = strtok (NULL, ":");
//printf("%d e %f\n",model->scaling[ii-1].index,model->scaling[ii-1].value);
}
model->scaling[ii].index = -1;
}
//svm_wrapper.cpp
//对测试数据进行与训练数据同样的缩放
void
pcl::SVMClassify::scaleProblem (svm_problem &input, svm_scaling scaling)
{
assert (scaling.max != 0);
for (int i = 0;i < input.l;i++)
{
int j = 0;
while (1)
{
if (input.x[i][j].index == -1)
break;
if (input.x[i][j].index < scaling.max && scaling.obj[ input.x[i][j].index ].index == 1)
input.x[i][j].value = input.x[i][j].value / scaling.obj[ input.x[i][j].index ].value;
j++;
}
}
}