I have a set of clusters consisting of 3D points. I want to get the nearest two points from each two clusters.
For example: I have 5 clusters C1 to C5 consisting of a 3D points. For C1 and C2 there are two points Pc1 "point in C1" and Pc2 "point in C2" that are the closet two points between the two clusters C1 and C2, same between C1 and C3..C5 and same between C2 and C3..C5 and so on. After that I'll have 20 points representing the nearest points between the different clusters.
The second thing is that I want to connect this points together if the distance between each of them and the other is less than a certain distance "threshold".
So I'm asking if anyone could please advise me
Update:
Thanks Amro for your answer, I've updated it to CIDX=kmeans(X, K,'distance','cityblock', 'replicates',5); to solve the empty cluster error. But another error appeared "pdistmex Out of memory. Type HELP MEMORY for your options." So I've checked your answer here: Out of memory error while using clusterdata in MATLAB and updated your code as below but the problem now is that there is now an indexing error in this code mn = min(min(D(idx1,idx2)));
I'm asking if there is a workaround for this error?
Code used:
%function single_linkage(depth,clrr)
X = randn(5000,3);
%X=XX;
% clr = clrr;
K=7;
clr = jet(K);
%// cluster into K=4
K = 7;
%CIDX = kmeans(X,K);
%// pairwise distances
SUBSET_SIZE = 1000; %# subset size
ind = randperm(size(X,1));
data = X(ind(1:SUBSET_SIZE), :);
D = squareform(pdist(data));
subs = 1:size(D,1);
CIDX=kmeans(D, K,'distance','sqEuclidean', 'replicates',5);
centers = zeros(K, size(data,2));
for i=1:size(data,2)
centers(:,i) = accumarray(CIDX, data(:,i), [], @mean);
end
%# calculate distance of each instance to all cluster centers
D = zeros(size(X,1), K);
for k=1:K
D(:,k) = sum( bsxfun(@minus, X, centers(k,:)).^2, 2);
end
%D=squareform(D);
%# assign each instance to the closest cluster
[~,clustIDX] = min(D, [], 2);
%// for each pair of clusters
cpairs = nchoosek(1:K,2);
pairs = zeros(size(cpairs));
dists = zeros(size(cpairs,1),1);
for i=1:size(cpairs,1)
%// index of points assigned to each of the two cluster
idx1 = (clustIDX == cpairs(i,1));
idx2 = (clustIDX == cpairs(i,2));
%// shortest distance between the two clusters
mn = min(min(D(idx1,idx2)));
dists(i) = mn;
%// corresponding pair of points with the minimum distance
[r,c] = find(D(idx1,idx2)==mn);
s1 = subs(idx1); s2 = subs(idx2);
pairs(i,:) = [s1(r) s2(c)];
end
%// filter pairs by keeping only those whose distances is below a threshold
thresh = inf;
cpairs(dist>thresh,:) = [];
%// plot 3D points color-coded by clusters
figure('renderer','zbuffer')
%clr = lines(K);
h = zeros(1,K);
for i=1:K
h(i) = line(X(CIDX==i,1), X(CIDX==i,2), X(CIDX==i,3), ...
'Color',clr(i,:), 'LineStyle','none', 'Marker','.', 'MarkerSize',5);
end
legend(h, num2str((1:K)', 'C%d')) %'
view(3), axis vis3d, grid on
%// mark and connect nearest points between each pair of clusters
for i=1:size(pairs,1)
line(X(pairs(i,:),1), X(pairs(i,:),2), X(pairs(i,:),3), ...
'Color','k', 'LineStyle','-', 'LineWidth',3, ...
'Marker','o', 'MarkerSize',10);
end
pdist2
command. – Shai