import sklearn
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import laplacian
from scipy.sparse.linalg import eigs
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import hamming_loss
from sklearn.metrics import roc_auc_score
# np.set_printoptions(threshold='nan')
class Metric(object):
def __init__(self,output,label):
self.output = output #prediction label matric
self.label = label #true label matric
def accuracy_subset(self,threash=0.5):
y_pred =self.output
y_true = self.label
y_pred=np.where(y_pred>threash,1,0)
accuracy=accuracy_score(y_true,y_pred)
return accuracy
def accuracy_mean(self,threash=0.5):
y_pred =self.output
y_true = self.label
y_pred=np.where(y_pred>threash,1,0)
accuracy=np.mean(np.equal(y_true,y_pred))
return accuracy
def accuracy_multiclass(self):
y_pred =self.output
y_true = self.label
accuracy=accuracy_score(np.argmax(y_pred,1),np.argmax(y_true,1))
return accuracy
def micfscore(self,threash=0.5,type='micro'):
y_pred =self.output
y_true = self.label
y_pred=np.where(y_pred>threash,1,0)
return f1_score(y_pred,y_true,average=type)
def macfscore(self,threash=0.5,type='macro'):
y_pred =self.output
y_true = self.label
y_pred=np.where(y_pred>threash,1,0)
return f1_score(y_pred,y_true,average=type)
def hamming_distance(self,threash=0.5):
y_pred =self.output
y_true = self.label
y_pred=np.where(y_pred>threash,1,0)
return hamming_loss(y_true,y_pred)
def fscore_class(self,type='micro'):
y_pred =self.output
y_true = self.label
return f1_score(np.argmax(y_pred,1),np.argmax(y_true,1),average=type)
def auROC(self):
y_pred =self.output
y_true = self.label
row,col = label.shape
temp = []
ROC = 0
for i in range(col):
ROC = roc_auc_score(y_true[:,i], y_pred[:,i], average='micro', sample_weight=None)
print("%d th AUROC: %f"%(i,ROC))
temp.append(ROC)
for i in range(col):
ROC += float(temp[i])
return ROC/(col+1)
def MacroAUC(self):
y_pred =self.output #num_instance*num_label
y_true = self.label #num_instance*num_label
num_instance,num_class = y_pred.shape
count = np.zeros((num_class,1)) # store the number of postive instance'score>negative instance'score
num_P_instance = np.zeros((num_class,1)) #number of positive instance for every label
num_N_instance = np.zeros((num_class,1))
auc = np.zeros((num_class,1)) # for each label
count_valid_label = 0
for i in range(num_class): #第i类
num_P_instance[i,0] = sum(y_true[:,i] == 1) #label,,test_target
num_N_instance[i,0] = num_instance - num_P_instance[i,0]
# exclude the label on which all instances are positive or negative,
# leading to num_P_instance(i,1) or num_N_instance(i,1) is zero
if num_P_instance[i,0] == 0 or num_N_instance[i,0] == 0:
auc[i,0] = 0
count_valid_label = count_valid_label + 1
else:
temp_P_Outputs = np.zeros((int(num_P_instance[i,0]), num_class))
temp_N_Outputs = np.zeros((int(num_N_instance[i,0]), num_class))
#
temp_P_Outputs[:,i] = y_pred[y_true[:,i]==1,i]
temp_N_Outputs[:,i] = y_pred[y_true[:,i]==0,i]
for m in range(int(num_P_instance[i,0])):
for n in range(int(num_N_instance[i,0])):
if(temp_P_Outputs[m,i] > temp_N_Outputs[n,i] ):
count[i,0] = count[i,0] + 1
elif(temp_P_Outputs[m,i] == temp_N_Outputs[n,i]):
count[i,0] = count[i,0] + 0.5
auc[i,0] = count[i,0]/(num_P_instance[i,0]*num_N_instance[i,0])
macroAUC1 = sum(auc)/(num_class-count_valid_label)
return float(macroAUC1),auc
if __name__ == '__main__':
output = np.array([[1,0,0,0,1],
[1,1,0,1,0],
[0,1,0,0,1],
[1,0,1,0,1],
[1,0,1,1,1],
[1,1,0,0,1]
])
label = np.array([ [1,0,1,0,1],
[1,1,0,1,0],
[0,1,0,0,1],
[0,1,0,0,1],
[0,0,1,0,1],
[1,1,0,0,1]
])
myMetic = Metric(output,label)
AUROC = myMetic.auROC()
AUROC1,auc = myMetic.MacroAUC()
print("AUROC: ",(AUROC))
print("MacroAUC: ",(AUROC1))
print(": ",(auc))
auROC方法是基于里面自带函数写的,MacroAUC是自己写的,下面是输出的结果:
比较奇怪的是2个方法的每个类别的输出AUC值一样,但合并以后就不一样了(ps:这里我也没弄清楚,欢迎下面留言讨论),上面是具体输出,(代码里面给出了数据,即每个样本的多个标签预测值和实际值矩阵,,k为样本数,c为预测的总类别数,可以复制代码运行试试)