机器学习算法-感知机
数据CSV:
颜色 大小 测试人员 测试动作 结果
0 黄色 小 成人 用手打 不爆炸
1 黄色 小 成人 用脚踩 爆炸
2 黄色 小 小孩 用手打 不爆炸
3 黄色 小 小孩 用脚踩 爆炸
4 黄色 小 小孩 用脚踩 爆炸
5 黄色 小 小孩 用脚踩 爆炸
6 黄色 大 成人 用手打 爆炸
7 黄色 大 成人 用脚踩 爆炸
8 黄色 大 小孩 用手打 不爆炸
9 紫色 小 成人 用手打 不爆炸
10 紫色 小 小孩 用手打 不爆炸
11 紫色 大 小孩 用手打 不爆炸
原始形式
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score
dataset = pd.read_csv('test.csv',encoding='gbk')featNames = dataset.columns[:-1]
labelNames = np.unique(dataset.values[:,-1])
feat,label = dataset.values[:,0:-1],dataset.values[:,-1]
def codeXByLabelEncoder(feat):tmp = feat.copy()
encoder = preprocessing.LabelEncoder()
for i in range(feat.shape[1]):
tmp[:,i] = encoder.fit_transform(tmp[:,i])
return tmp
x = codeXByLabelEncoder(feat)
def codeYByLabelEncoder(label):
tmp = label.copy()
tmp[tmp=='不爆炸'] = -1
tmp[tmp=='爆炸'] = 1
return tmp
y = codeYByLabelEncoder(label)
def trainModelByOrigin(x,y):
featNum = x.shape[1]
w = np.zeros(featNum)
b,lr,iter_time= 0,0.01,1000
for time in range(iter_time):
tag = y*(np.dot(x,w)+b)
wrongIndex = np.where(tag<=0)[0]
if(len(wrongIndex)):
_id_ = np.random.permutation(wrongIndex)[0]
w = w + lr*y[_id_]*x[_id_]
b = b + lr*y[_id_]
else:
break
return w,b
w,b = trainModelByOrigin(x,y)w = w.astype(float)
def predictByOrigin(w,b,testset):
count = 0
num = testset.shape[0]
for i in range(len(testset)):
predLabel = np.sign(np.dot(testset[i,0:-1],w)+b)
print('pred:{} actual:{}'.format(int(predLabel),testset[i,-1]))
if(predLabel==testset[i,-1]):
count += 1
print('accuracy: ',count/num)
trainset = np.hstack((x,y.reshape(len(y),1)));testset = np.array([
[1,1,0,1,1],
[0,0,1,1,1],
[1,0,0,0,1],
[0,1,0,1,-1],
[1,1,1,1,1]
])
print('------训练集------')
predictByOrigin(w,b,trainset)
print('------测试集------')
predictByOrigin(w,b,testset)
def predictFromModel(x,y,testset):
clf = Perceptron(max_iter=1000,eta0=0.01,random_state=0)
x,y = x.astype(int),y.astype(int)
clf.fit(x,y)
train = clf.predict(x)
print('----------perceptron----------')
print('accuracy in trainset: ',accuracy_score(y,train))
print('accuracy in testset: ',accuracy_score(testset[:,-1],clf.predict(testset[:,0:-1])))
print('w: ',clf.coef_[0])
print('b: ',clf.intercept_[0])
predictFromModel(x,y,testset)
对偶形式
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.linear_model import Perceptron
dataset = pd.read_csv('test.csv',encoding='gbk')
featNames = dataset.columns[:-1]
labelNames = np.unique(dataset.values[:,-1])
feat,label = dataset.values[:,0:-1],dataset.values[:,-1]
def codeXByLabelEncoder(feat):
tmp = feat.copy()
encoder = preprocessing.LabelEncoder()
for i in range(feat.shape[1]):
tmp[:,i] = encoder.fit_transform(tmp[:,i])
return tmp
x = codeXByLabelEncoder(feat)
def codeYByLabelEncoder(label):
tmp = label.copy()
tmp[tmp=='不爆炸'] = -1
tmp[tmp=='爆炸'] = 1
return tmp
y = codeYByLabelEncoder(label)
def trainModelByDual(x,y):
alpha = np.zeros(x.shape[0])
lr,iter_times = 0.01,1000
def getGramMatrix(x):
arr = []
row = x.shape[0]
for i in range(row):
for j in range(row):
arr.append(np.dot(x[i],x[j].T))
arr = np.array(arr).reshape(row,row)
return arr
mat = getGramMatrix(x)
for time in range(iter_times):
tag = y*(np.dot((alpha*y).T,mat+1))
wrongIndex = np.where(tag<=0)[0]
if(len(wrongIndex)):
_id_ = np.random.permutation(wrongIndex)[0]
alpha[_id_] = alpha[_id_] + lr
else:
break
w = np.dot((alpha*y).T,x)
b = np.dot(alpha.T,y)
return w,b
w,b = trainModelByDual(x,y)
w = w.astype(float)
def predictByDual(w,b,testset):
count = 0
num = testset.shape[0]
for i in range(len(testset)):
predLabel = np.sign(np.dot(testset[i,0:-1],w)+b)
print('pred:{} actual:{}'.format(int(predLabel),testset[i,-1]))
if(predLabel==testset[i,-1]):
count += 1
print('accuracy: ',count/num)
trainset = np.hstack((x,y.reshape(len(y),1)));
testset = np.array([
[1,1,0,1,1],
[0,0,1,1,1],
[1,0,0,0,1],
[0,1,0,1,-1],
[1,1,1,1,1]
])
print('------训练集------')
predictByDual(w,b,trainset)
print('------测试集------')
predictByDual(w,b,testset)
def predictFromModel(x,y,testset):
clf = Perceptron(max_iter=1000,eta0=0.01,random_state=0)
x,y = x.astype(int),y.astype(int)
clf.fit(x,y)
train = clf.predict(x)
print('----------perceptron----------')
print('accuracy in trainset: ',accuracy_score(y,train))
print('accuracy in testset: ',accuracy_score(testset[:,-1],clf.predict(testset[:,0:-1])))
print('w: ',clf.coef_[0])
print('b: ',clf.intercept_[0])
predictFromModel(x,y,testset)