import math
import numpy as np
import matplotlib.pyplot as plt
def load_data_set():
data_mat = []
label_mat = []
fr = open('testSet.txt')
for line in fr.readlines():
line_arr = line.strip().split()
data_mat.append([1.0, float(line_arr[0]), float(line_arr[1])])
label_mat.append(int(line_arr[2]))
return data_mat, label_mat
def sigmod(in_x):
return 1.0/(1+np.exp(-in_x)) #data_matrix与weights均为numpy矩阵,相乘也是numpy矩阵,而math.exp()函数只处理python标准数值。
#此处需要用numpy的exp()方法
def grad_ascent(data_mat_in, class_labels):
data_matrix = np.mat(data_mat_in)
label_mat = np.mat(class_labels).transpose()
m, n = np.shape(data_matrix)
alpha = 0.001
max_cycles = 500
weights = np.ones((n,1))
for k in range(max_cycles):
h = sigmod(data_matrix*weights)
error = (label_mat - h)
weights = weights + alpha * data_matrix.transpose() * error
return weights
def plot_best_fit(weights):
data_mat, label_mat = load_data_set()
data_arr = np.array(data_mat)
n = np.shape(data_arr)[0]
xcord1 = []
ycord1 = []
xcord2 = []
ycord2 = []
for i in range(n):
if int(label_mat[i]) == 1:
xcord1.append(data_arr[i, 1])
ycord1.append(data_arr[i, 2])
else:
xcord2.append(data_arr[i, 1])
ycord2.append(data_arr[i, 2])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
ax.scatter(xcord2, ycord2, s=20, c='green')
x = np.arange(-3.0, 3.0, 0.1)
y = (-weights[0] - weights[1]*x)/weights[2]
ax.plot(x,y)
plt.xlabel('X1')
plt.ylabel('X2')
plt.savefig('plot_best_fit1.jpeg')
plt.show()
def stoc_grad_ascent0(data_matrix, class_labels):
m, n = np.shape(data_matrix)
alpha = 0.01
weights = np.ones(n)
for i in range(m):
h = sigmod(sum(data_matrix[i]*weights))
error = class_labels[i] - h
weights = weights + alpha * error * data_matrix[i]
return weights
def stoc_grad_ascent1(data_matrix, class_labels, num_iter=150):
m, n = np.shape(data_matrix)
weights = np.ones(n)
for j in range(num_iter):
data_index = list(range(m))
for i in range(m):
alpha = 4/(1.0+j+i) + 0.01
rand_index = int(np.random.uniform(0, len(data_index)))
h = sigmod(sum(data_matrix[rand_index]*weights))
error = class_labels[rand_index] - h
weights = weights + alpha * error * data_matrix[rand_index]
del (data_index[rand_index])
return weights
def classify_vector(in_x, weights):
prob = sigmod(sum(in_x*weights))
if prob > 0.5:
return 1.0
else:
return 0.0
def colic_test():
fr_train = open('horseColicTraining.txt')
fr_test = open('horseColicTest.txt')
training_set = []
training_labels = []
for line in fr_train.readlines():
curr_line = line.strip().split('\t')
line_arr = []
for i in range(21):
line_arr.append(float(curr_line[i]))
training_set.append(line_arr)
training_labels.append(float(curr_line[21]))
train_weigths = stoc_grad_ascent1(np.array(training_set), training_labels, 500)
error_count = 0
num_test_vec = 0.0
for line in fr_test.readlines():
num_test_vec += 1.0
curr_line = line.strip().split('\t')
line_arr = []
for i in range(21):
line_arr.append(float(curr_line[i]))
if int(classify_vector(np.array(line_arr), train_weigths)) != int(curr_line[21]):
error_count += 1
error_rate = (float(error_count)/num_test_vec)
print('the error rate of this test is : %f'%error_rate)
return error_rate
def multi_test():
num_test = 10
error_sum = 0.0
for k in range(num_test):
error_sum += colic_test()
print('after %d iterations the average error rate is : %f '%(num_test, error_sum/float(num_test)))
if __name__ == '__main__':
data_arr, label_mat = load_data_set()
'''
weights = grad_ascent(np.array(data_arr), label_mat)
plot_best_fit(weights.getA())
'''
'''
weights = stoc_grad_ascent0(np.array(data_arr), label_mat)
plot_best_fit(weights)
'''
'''
weights = stoc_grad_ascent1(np.array(data_arr), label_mat, 2000)
plot_best_fit(weights)
'''
multi_test()
机器学习实战代码_Python3.6_Longistic回归
猜你喜欢
转载自blog.csdn.net/liyuanshuo_nuc/article/details/82703979
今日推荐
周排行