// // K-Means clustering algorithm // // (C) 2005, Konstantin Tretjakov // // // Function k_means // ------------------------------------------ // Parameters: // X - n x m data matrix (containing n training instances as rows) // k - number of clusters to find // // Return values: // X_means - n x k matrix, containing the centers of the found clusters as its rows. // function X_means=k_means(X, k) n = size(X, 'r'); m = size(X, 'c'); means_prev = zeros(k, m); means_new = rand(k, m); itercount = 0; // While we are making progress... while (sum((means_prev - means_new).*(means_prev - means_new)) > 0.01) means_prev = means_new; // Classify all points according to prevmeans c = zeros(n, k) // c(i,j)=1 if point i belongs to cluster j for i = 1:n c(i, k_means_classify(X(i, :)', means_prev)) = 1; end // Update means for j = 1:k k_pts = X(find(c(:,j) <> 0),:); // Points belonging to k-th cluster if (size(k_pts, 'r') == 0) then k_pts = rand(1, m); // If no points were in cluster k, put a random point there end means_new(j, :) = mean(k_pts,'r'); end; itercount = itercount + 1; end X_means = means_new; printf("K-means converged in %d iterations\n", itercount); // Auxiliary procedure // Returns the index of the closest mean function mean_idx = k_means_classify(point, means) // distance between the point and each mean pt_dists = means - ones(size(means,'r'), 1)*point'; pt_dists = sum(pt_dists .* pt_dists, 'c'); [mind, mind_indx] = min(pt_dists); mean_idx = mind_indx;