吴恩达老师练习二,逻辑回归的python代码实现。作业pdf和数据集就不上传了,需要的伙伴可以留言。
一、 Logistic Regression
- ML_exe02.py 主类,加载数据集,调用得到最优解的函数,调用图像绘制函数,AC率检测
- costFunction.py 代价函数计算,导数计算
- sigmod.py sigmoid函数计算
- plotData.py 图像绘制
ML_exe02.py
import numpy as np
from plotData import *
import matplotlib.pyplot as plt
import scipy.optimize as op
from costFunction import *
from sigmod import *
#读入数据
data = np.loadtxt('ex2data1.txt',delimiter=',')
X = data[...,0:2]
y = data[...,2]
m = y.size
n = X.shape[1]
#加入X0
X =np.c_[np.ones(m),X]
#初始化 theta
initial_theta = np.zeros(n+1)
#替代matlab中的fminunc,得到最优解
result = op.minimize(fun=costFunction,x0=initial_theta,args=(X,y),method='TNC',jac=gradient)
print('theta:',result.x)
print('cost:',result.fun)
#可视化
#散点,和回归边界绘制
plotData(X,y,result.x)
plt.show()
#预测准确率
z = np.dot([1,60,60],result.x)
print('Predict admission probability(60,60):',sigmod(z))
#四舍五入,>0.5分类为一,<=0.5分类为零
p = np.round(sigmod(np.dot(X,result.x)))
#计算争取率
acc = np.mean(p==y)*100
print('AC RATE: ',acc,'%')
costFunction.py
import numpy as np
from sigmod import *
#成本函数
def costFunction(theta,X,y):
m = X.shape[0]
h = sigmod(np.dot(X,theta))
J = (-np.dot(y.T,np.log(h))-np.dot((1-y.T),np.log(1-h)))*(1/m)
return J
#导数
def gradient(theta,X,y):
m=X.shape[0]
h=sigmod(np.dot(X,theta))
grad = 1/m*(np.dot(X.T,h-y))
return grad
sigmod.py
import numpy as np
def sigmod(z):
return 1/(1+np.exp(-z))
plotData.py
import matplotlib.pyplot as plt
import numpy as np
def plotData(X,y,theta):
# 绘制散点图
#样本的数量
m = y.size
#没有通过的学生成绩 列表
notpassX0 = []
notpassX1 = []
# 通过的学生成绩 列表
dopassX0 = []
dopassX1 = []
for i in range(m):
# 1通过
if y[i]==1:
dopassX0.append(X[i][1])
dopassX1.append(X[i][2])
# 0没有通过
else:
notpassX0.append(X[i][1])
notpassX1.append(X[i][2])
#散点图
passscatter = plt.scatter(np.asarray(dopassX0),np.asarray(dopassX1),c='black',edgecolors='none',s=10)
notpassscatter = plt.scatter(np.asarray(notpassX0), np.asarray(notpassX1), c='red', edgecolors='none', s=10)
#坐标轴设置
plt.title('Xuan Logistic Regression',fontsize=20)
plt.xlabel('Score 1',fontsize=15)
plt.ylabel('Score 2', fontsize=15)
#加入说明
plt.legend((passscatter,notpassscatter),('Admitted','Not Admitted'))
#画线
plot_x = np.arange(100)
plot_y = (-1 / theta[2]) * (theta[1] * plot_x + theta[0])
plt.plot(plot_x, plot_y)
#x最小值,x最大值。y最小值,y最大值
plt.axis([30, 100, 30, 100])
二、Regularized logistic regression
- ML_exe02_normal.py 主类,加载数据集,调用得到最优解的函数,调用图像绘制函数,AC率检测
- normalCostFunction.py 代价函数计算,导数计算,mapFeature计算
- sigmod.py sigmoid函数计算
- normalPlotData.py 图像绘制
ML_exe02_normal.py
import numpy as np
from normalPlotData import *
import matplotlib.pyplot as plt
import scipy.optimize as op
from normaCostFunction import *
from sigmod import *
#读取数据,第一列是特征1,第二列是特征2,第三列是分类
data = np.loadtxt('ex2data2.txt', delimiter=',', dtype='float')
#绘制样本点
X = data[:, 0:2]
y = data[:, 2:3] #列变量,y = data[:, 2]是一个没有维度的数组
m = y.shape[0]
#数据初始化
X = mapFeature(X[:, 0], X[:, 1])
lamb = 1
initial_theta = np.zeros(X.shape[1])
#求解最优解
result = op.minimize(fun=costFunction, x0=initial_theta, args=(X, y, lamb), method='TNC', jac=gradient)
cost = result.fun
theta = result.x
print("cost:",cost)
print("theta:",theta)
#可视化
#散点,和回归边界绘制
plotData(X,y,result.x)
plt.show()
#预测准确率
#四舍五入,>0.5分类为一,<=0.5分类为零
p = np.round(sigmod(np.dot(X,result.x)))
#计算争取率
acc = np.mean(p==y)*100
print('AC RATE: ',acc,'%')
normalCostFunction.py
import numpy as np
from sigmod import *
#cost计算函数
def costFunction(theta, X, y, lamb):
#向量成矩阵
theta = np.array(theta).reshape((np.size(theta), 1))
#样本数
m = y.shape[0];
#计算sigmod
h = sigmod(np.dot(X, theta))
# 正则化项去掉theta 0
theta2 = theta[1:, 0]
#计算J
J = 1/m*(-np.dot(y.T, np.log(h)) - np.dot((1-y.T), np.log(1-h)))+lamb/(2*m)*np.dot(theta2,theta2)
#矩阵铺平成向量
return J.flatten()
#求梯度
def gradient(theta, X, y, lamb):
#向量成矩阵
theta = np.array(theta).reshape((np.size(theta), 1))
# 样本数
m = y.shape[0];
h = sigmod(np.dot(X, theta))
# 正则化项去掉theta 0
theta[0, 0] = 0
grad = 1/m*np.dot(X.T, h - y)+lamb/m*theta
# 矩阵铺平成向量
return grad.flatten()
#特征矩阵
def mapFeature(X1, X2):
degree = 6
#第一列x0=1
out = np.ones((np.size(X1),1))
for i in range(1, degree+1):
for j in range(0, i+1):
res = np.multiply(np.power(X1, i-j), np.power(X2, j))
out = np.insert(out, np.size(out[0, :]), values=res,axis=1)
return out
normalPlotData.py
import matplotlib.pyplot as plt
import numpy as np
from normaCostFunction import *
#绘制
def plotData(X,y,theta):
# 绘制散点图
#样本的数量
m = y.size
#y=0 列表
notpassX0 = []
notpassX1 = []
# y=1 列表
dopassX0 = []
dopassX1 = []
for i in range(m):
# 1
if y[i]==1:
dopassX0.append(X[i][1])
dopassX1.append(X[i][2])
# 0
else:
notpassX0.append(X[i][1])
notpassX1.append(X[i][2])
#散点图
passscatter = plt.scatter(np.asarray(dopassX0),np.asarray(dopassX1),c='black',edgecolors='none',s=10)
notpassscatter = plt.scatter(np.asarray(notpassX0), np.asarray(notpassX1), c='red', edgecolors='none', s=10)
#坐标轴设置
plt.title('Xuan Logistic Regression',fontsize=20)
plt.xlabel('Feature 1',fontsize=15)
plt.ylabel('Feature 2', fontsize=15)
#加入说明
plt.legend((passscatter,notpassscatter),('y=1','y=0'))
#画决策边界(等高线)
#等差数组(-1,1.5)中间平均取50个数
u = np.linspace(-1, 1.5, 50)
v = np.linspace(-1, 1.5, 50)
z = np.zeros((np.size(u), np.size(v)))
#向量成矩阵
theta = np.array(theta).reshape((np.size(theta), 1))
#等高线绘制
for i in range(0, np.size(u)):
for j in range(0, np.size(v)):
z[i, j] = np.dot(mapFeature(u[i], v[j]), theta)
plt.contour(u, v, z.T, [0])
sigmod.py
import numpy as np
def sigmod(z):
return 1/(1+np.exp(-z))