版权声明:转载需写明出处 https://blog.csdn.net/qq_23860475/article/details/80668323
原理、计算过程
matlab代码
clc;clear
data=[1 2 2 1 0 0
2 2 2 1 1 0
3 2 1 1 0 1
5 0 0 0 0 1
6 0 1 0 1 1
1 1 2 1 0 0
3 0 2 0 0 1
4 1 0 0 0 1
2 1 2 0 1 1
3 1 1 1 1 1
4 2 1 0 0 1
5 1 2 1 1 0
1 0 2 0 1 0
5 1 2 1 0 1];%最后一列为目标变量,其它列为特征变量
[r,c]=size(data);
lable_value=[];%标签取值
group_value={};%每个变量取值
for i=1:c
group_value{i}=unique(data(:,i));
end
lable_value(:,1)=group_value{end};%因变量为最后一列
e=0;%标签熵
for i=1:length(lable_value)
lable_value(i,2)=sum((data(:,end)==lable_value(i,1)),1);%计算标签每个值数量
e=e+(-(lable_value(i,2)/r)*log2(lable_value(i,2)/r));%计算随机标签的不确定性
end
ev=zeros(c-1,1);%变量熵
in_gain=[];%信息增益
gain_ration=[];%信息增益率
for i=1:c-1
for j=1:length(group_value{i})
location=find(data(:,i)==group_value{i}(j));
variable_value_lable=data(location,end);%变量值对应标签
ev_value=0;%变量值熵
info=0;%划分带来的信息
for k=1:length(lable_value)
lable_value(k,2)=sum((variable_value_lable==lable_value(k,1)),1);%计算变量值对应标签每个值数量
if lable_value(k,2)~=0
ev_value=ev_value+(-(lable_value(k,2)/length(variable_value_lable))*log2(lable_value(k,2)/length(variable_value_lable)));%计算变量的不确定性
end
end
ev(i)=ev(i)+(length(variable_value_lable)/r)*ev_value;
info=info+(-(length(variable_value_lable)/r)*log2((length(variable_value_lable)/r)));
end
in_gain(i,1)=e-ev(i);
gain_ration(i,1)=in_gain(i,1)/info;
end
disp('各特征信息增益率为:')
disp(gain_ration)
运行结果
备注:承接模型、算法代码实现(支持python、matlab),有意请联系QQ947943645 ,非诚勿扰!