function [weights,means,covars]= kmeans_init(data,K,numtries,permseed) % KMEANS_INIT: [weights,means,covars]= kmeans_init(data,K,numtries,permseed) % % Use k-means to find a good starting point for EM for Gaussian % mixture modeling. % % % INPUT: % ----- % data - matrix with feature vectors as rows % k - number of Gaussian components in the mixture % numtries - number of different random starting points to try for kmeans % permseed - random seed initializer % %------------------------------------------------------------------------------- % size of data matrix [n d] = size(data); % Initialize joints = zeros(n,K); epsilon = 0.0001; %t0 = clock; for i=1:numtries % Initialize means and sigmas using k-means [labels,means,kseed] = kmeans(data, K,permseed*i); seed(i) = kseed; [weights,covars] = kmeans_params(K,data,labels,n,d,epsilon); % Compute conditional likelihood p(x|wi) of validation data % using the means and covariance matrices of the mixture we just found for k = 1:K covar_k = covars(:,((k-1)*d+1):(k*d)); joints(:,k) = pgauss_chol(data, means(k,:), covar_k) * weights(k); end f_x = sum(joints'); ll(i) = sum(log(f_x)'); end %fprintf('time for kmeans, K=%d: %f\n',K,etime(clock,t0)); % Pick the kmeans partition with the highest likelihood - a hack % to try to avoid local minima [llmaxi maxi] = max(ll); [labels,means,kseed] = kmeans(data, K,seed(maxi)); [weights,covars] = kmeans_params(K,data,labels,n,d,epsilon);