1 k-means和MoG都容易出现局部最优解
2、公式:
3、自己实现EM算法
(1)主函数:
def EM(data, init_means, init_covariances, init_weights, maxiter=1000, thresh=1e-4): # Make copies of initial parameters, which we will update during each iteration means = init_means[:] covariances = init_covariances[:] weights = init_weights[:] # Infer dimensions of dataset and the number of clusters num_data = len(data) num_dim = len(data[0]) num_clusters = len(means) # Initialize some useful variables resp = np.zeros((num_data, num_clusters)) ll = loglikelihood(data, weights, means, covariances) ll_trace = [ll] for it in range(maxiter): if it % 5 == 0: print("Iteration %s" % it) # E-step: compute responsibilities resp = compute_responsibilities(data, weights, means, covariances) # M-step # Compute the total responsibility assigned to each cluster, which will be useful when # implementing M-steps below. In the lectures this is called N^{soft} counts = compute_soft_counts(resp) # Update the weight for cluster k using the M-step update rule for the cluster weight, \hat{\pi}_k. # YOUR CODE HERE weights = compute_weights(counts) # Update means for cluster k using the M-step update rule for the mean variables. # This will assign the variable means[k] to be our estimate for \hat{\mu}_k. # YOUR CODE HERE means = compute_means(data, resp, counts) # Update covariances for cluster k using the M-step update rule for covariance variables. # This will assign the variable covariances[k] to be the estimate for \hat{\Sigma}_k. # YOUR CODE HERE covariances = compute_covariances(data, resp, counts, means) # Compute the loglikelihood at this iteration # YOUR CODE HERE ll_latest = loglikelihood(data, weights, means, covariances) ll_trace.append(ll_latest) # Check for convergence in log-likelihood and store if (ll_latest - ll) < thresh and ll_latest > -np.inf: break ll = ll_latest if it % 5 != 0: print("Iteration %s" % it) out = {'weights': weights, 'means': means, 'covs': covariances, 'loglik': ll_trace, 'resp': resp} return out
(2)子函数:
def log_sum_exp(Z): """ Compute log(\sum_i exp(Z_i)) for some array Z.""" return np.max(Z) + np.log(np.sum(np.exp(Z - np.max(Z)))) def loglikelihood(data, weights, means, covs): """ Compute the loglikelihood of the data for a Gaussian mixture model with the given parameters. """ num_clusters = len(means) num_dim = len(data[0]) ll = 0 for d in data: Z = np.zeros(num_clusters) for k in range(num_clusters): # Compute (x-mu)^T * Sigma^{-1} * (x-mu) delta = np.array(d) - means[k] exponent_term = np.dot(delta.T, np.dot(np.linalg.inv(covs[k]), delta)) # Compute loglikelihood contribution for this data point and this cluster Z[k] += np.log(weights[k]) Z[k] -= 1/2. * (num_dim * np.log(2*np.pi) + np.log(np.linalg.det(covs[k])) + exponent_term) # Increment loglikelihood contribution of this data point across all clusters ll += log_sum_exp(Z) return ll
def compute_responsibilities(data, weights, means, covariances): '''E-step: compute responsibilities, given the current parameters''' num_data = len(data) num_clusters = len(means) resp = np.zeros((num_data, num_clusters)) # Update resp matrix so that resp[i,k] is the responsibility of cluster k for data point i. # Hint: To compute likelihood of seeing data point i given cluster k, use multivariate_normal.pdf. for i in range(num_data): for k in range(num_clusters): # YOUR CODE HERE gdi = weights[k] * multivariate_normal.pdf(data[i], means[k], covariances[k]) gdall = sum([weights[kidx] * multivariate_normal.pdf(data[i], means[kidx], covariances[kidx]) for kidx in range(num_clusters)]) resp[i, k] = gdi/gdall # Add up responsibilities over each data point and normalize row_sums = resp.sum(axis=1)[:, np.newaxis] resp = resp / row_sums return resp
def compute_soft_counts(resp): # Compute the total responsibility assigned to each cluster, which will be useful when # implementing M-steps below. In the lectures this is called N^{soft} counts = np.sum(resp, axis=0) return counts
def compute_weights(counts): num_clusters = len(counts) weights = [0.] * num_clusters for k in range(num_clusters): # Update the weight for cluster k using the M-step update rule for the cluster weight, \hat{\pi}_k. # HINT: compute # of data points by summing soft counts. # YOUR CODE HERE weights[k] = counts[k]/sum([counts[i] for i in range(num_clusters)]) return weights
def compute_means(data, resp, counts): num_clusters = len(counts) num_data = len(data) means = [np.zeros(len(data[0]))] * num_clusters for k in range(num_clusters): # Update means for cluster k using the M-step update rule for the mean variables. # This will assign the variable means[k] to be our estimate for \hat{\mu}_k. weighted_sum = 0. for i in range(num_data): # YOUR CODE HERE weighted_sum += resp[i,k]*data[i] # YOUR CODE HERE means[k] = weighted_sum/counts[k] return means
def compute_covariances(data, resp, counts, means): num_clusters = len(counts) num_dim = len(data[0]) num_data = len(data) covariances = [np.zeros((num_dim,num_dim))] * num_clusters for k in range(num_clusters): # Update covariances for cluster k using the M-step update rule for covariance variables. # This will assign the variable covariances[k] to be the estimate for \hat{\Sigma}_k. weighted_sum = np.zeros((num_dim, num_dim)) for i in range(num_data): # YOUR CODE HERE (Hint: Use np.outer on the data[i] and this cluster's mean) weighted_sum += resp[i,k]*np.outer(data[i]-means[k],data[i]-means[k]) # YOUR CODE HERE covariances[k] = weighted_sum/counts[k] return covariances
3、训练模型得到高斯模型参数
images = gl.SFrame('images.sf') gl.canvas.set_target('ipynb') import array images['rgb'] = images.pack_columns(['red', 'green', 'blue'])['X4'] images.show()
np.random.seed(1) # Initalize parameters init_means = [images['rgb'][x] for x in np.random.choice(len(images), 4, replace=False)] cov = np.diag([images['red'].var(), images['green'].var(), images['blue'].var()]) init_covariances = [cov, cov, cov, cov] init_weights = [1/4., 1/4., 1/4., 1/4.] # Convert rgb data to numpy arrays img_data = [np.array(i) for i in images['rgb']] # Run our EM algorithm on the image data using the above initializations. # This should converge in about 125 iterations out = EM(img_data, init_means, init_covariances, init_weights)
ll = out['loglik'] plt.plot(range(len(ll)),ll,linewidth=4) plt.xlabel('Iteration') plt.ylabel('Log-likelihood') plt.rcParams.update({'font.size':16}) plt.tight_layout()
此时得到高斯模型参数
4、测试图像类别
weights = out['weights'] means = out['means'] covariances = out['covs'] rgb = images['rgb'] N = len(images) # number of images K = len(means) # number of clusters assignments = [0]*N probs = [0]*N for i in range(N): # Compute the score of data point i under each Gaussian component: p = np.zeros(K) for k in range(K): p[k] = weights[k]*multivariate_normal.pdf(rgb[i], mean=means[k], cov=covariances[k]) # Compute assignments of each data point to a given cluster based on the above scores: assignments[i] = np.argmax(p) # For data point i, store the corresponding score under this cluster assignment: probs[i] = np.max(p) assignments = gl.SFrame({'assignments':assignments, 'probs':probs, 'image': images['image']})