版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/ACBattle/article/details/80607355
1.公式推导
2.
3.算法步骤
import math
import copy
import numpy as np
import matplotlib.pyplot as plt
isdebug = False
# 指定k个高斯分布参数,这里k=2。2个高斯分布具有相同均方差Sigma,均值分别为Mu1,Mu2。
def ini_data_1(Sigma,Mu1,Mu2,k,N):
global X
global Mu
global Expectations
X = np.zeros((1,N))
Mu = np.random.random(2)
Expectations = np.zeros((N,k))
for i in range(0,N):
if np.random.random(1) > 0.5:
X[0,i] = np.random.normal() * Sigma + Mu1
else:
X[0,i] = np.random.normal() * Sigma + Mu2
print(X)
def trans(pix):
sum = 0
for i in range(len(pix)):
sum = sum * 10 + pix[i]
return sum
def ini_data_2(k,N):
global X
global Mu
global Expectations
Mu = np.random.random(2) # 初始化Mu
Expectations = np.zeros((N,k))
pixArr1 = []
num = 0
csvFile = open('data.txt','r')
mini = 1000000
maxn = 0
pix = 0
with csvFile as f:
for line in f.readlines():
pix = [int(i) for i in line.split('\t')[0:6]]
pixx = trans(pix)
# print(pixx)
if pixx <= mini:
mini = pixx
if pixx >= maxn:
maxn = pixx
pixArr1.append([pixx])
num+=1
X = np.zeros([1,N])
Y = np.array(pixArr1)
print(Y)
X = np.transpose(Y)
print(X)
def ini_data_3(k,N):
global X
global Mu
global Expectations
Mu = np.random.random(2) # 初始化Mu
Expectations = np.zeros((N,k))
X = np.zeros([1,N])
X = np.array([-67,-48,6,8,14,16,23,24,28,29,41,49,56,60,75])
# EM算法:步骤1,计算E[zij]
def e_step(Sigma,k,N):
global Expectations
global Mu
global X
for i in range(0,N):
Denom = 0
for j in range(0,k):
Denom += math.exp((-1/(2*(float(Sigma**2))))*(float(X[0,i]-Mu[j]))**2)
for j in range(0,k):
Numer = math.exp((-1/(2*(float(Sigma**2))))*(float(X[0,i]-Mu[j]))**2)
Expectations[i,j] = Numer / Denom
# EM算法:M步,求最大化E[zij]的参数Mu
def m_step(k,N):
global Expectations
global X
for j in range(0,k):
Numer = 0
Denom = 0
for i in range(0,N):
Numer += Expectations[i,j]*X[0,i]
Denom += Expectations[i,j]
Mu[j] = Numer / Denom
# 算法迭代iter_num次,或达到精度Epsilon停止迭代
def run(Sigma,k,N,iter_num,Epsilon):
# ini_data_1(k,N)
# ini_data_2(k,N)
ini_data_3(k,N)
print(u"初始<u1,u2>:", Mu)
for i in range(iter_num):
Old_Mu = copy.deepcopy(Mu)
e_step(Sigma,k,N)
m_step(k,N)
print(i,Mu)
if sum(abs(Mu-Old_Mu)) < Epsilon: # 如果精度达到要求,那么停止迭代
break
if __name__ == '__main__':
run(10,2,15,1000,0.0001)
# run(6515,11722,34351,2,72276,69454,0.0001)
plt.hist(X[0,:],200)
plt.show()