MNE-CPP  0.1.9
A Framework for Electrophysiology
kmeans.cpp
Go to the documentation of this file.
1 //=============================================================================================================
37 //=============================================================================================================
38 // INCLUDES
39 //=============================================================================================================
40 
41 #include "kmeans.h"
42 
43 #include <math.h>
44 #include <iostream>
45 #include <algorithm>
46 #include <vector>
47 #include <time.h>
48 
49 //=============================================================================================================
50 // QT INCLUDES
51 //=============================================================================================================
52 
53 #include <QDebug>
54 
55 //=============================================================================================================
56 // USED NAMESPACES
57 //=============================================================================================================
58 
59 using namespace UTILSLIB;
60 using namespace Eigen;
61 
62 //=============================================================================================================
63 // DEFINE MEMBER METHODS
64 //=============================================================================================================
65 
66 KMeans::KMeans(QString distance,
67  QString start,
68  qint32 replicates,
69  QString emptyact,
70  bool online,
71  qint32 maxit)
72 : m_sDistance(distance.toStdString())
73 , m_sStart(start.toStdString())
74 , m_iReps(replicates)
75 , m_sEmptyact(emptyact.toStdString())
76 , m_iMaxit(maxit)
77 , m_bOnline(online)
78 , emptyErrCnt(0)
79 , iter(0)
80 , k(0)
81 , n(0)
82 , p(0)
83 , totsumD(0)
84 , prevtotsumD(0)
85 {
86  // Assume one replicate
87  if (m_iReps < 1)
88  m_iReps = 1;
89 }
90 
91 //=============================================================================================================
92 
93 //KMeans::KMeans(std::string distance,
94 // std::string start,
95 // qint32 replicates,
96 // std::string emptyact,
97 // bool online,
98 // qint32 maxit)
99 //: m_sDistance(distance)
100 //, m_sStart(start)
101 //, m_iReps(replicates)
102 //, m_sEmptyact(emptyact)
103 //, m_iMaxit(maxit)
104 //, m_bOnline(online)
105 //, emptyErrCnt(0)
106 //, iter(0)
107 //, k(0)
108 //, n(0)
109 //, p(0)
110 //, totsumD(0)
111 //, prevtotsumD(0)
112 //{
113 // // Assume one replicate
114 // if (m_iReps < 1)
115 // m_iReps = 1;
116 //}
117 
118 //=============================================================================================================
119 
120 bool KMeans::calculate(MatrixXd X,
121  qint32 kClusters,
122  VectorXi& idx,
123  MatrixXd& C,
124  VectorXd& sumD,
125  MatrixXd& D)
126 {
127  if (kClusters < 1)
128  return false;
129 
130  //Init random generator
131  srand ( time(NULL) );
132 
133 // n points in p dimensional space
134  k = kClusters;
135  n = X.rows();
136  p = X.cols();
137 
138  if(m_sDistance.compare("cosine") == 0)
139  {
140 // Xnorm = sqrt(sum(X.^2, 2));
141 // if any(min(Xnorm) <= eps(max(Xnorm)))
142 // error(['Some points have small relative magnitudes, making them ', ...
143 // 'effectively zero.\nEither remove those points, or choose a ', ...
144 // 'distance other than ''cosine''.']);
145 // end
146 // X = X ./ Xnorm(:,ones(1,p));
147  }
148  else if(m_sDistance.compare("correlation")==0)
149  {
150  X.array() -= (X.rowwise().sum().array() / (double)p).replicate(1,p); //X - X.rowwise().sum();//.repmat(mean(X,2),1,p);
151  MatrixXd Xnorm = (X.array().pow(2).rowwise().sum()).sqrt();//sqrt(sum(X.^2, 2));
152 // if any(min(Xnorm) <= eps(max(Xnorm)))
153 // error(['Some points have small relative standard deviations, making them ', ...
154 // 'effectively constant.\nEither remove those points, or choose a ', ...
155 // 'distance other than ''correlation''.']);
156 // end
157  X.array() /= Xnorm.replicate(1,p).array();
158  }
159 // else if(m_sDistance.compare('hamming')==0)
160 // {
161 // if ~all(ismember(X(:),[0 1]))
162 // error(message('NonbinaryDataForHamm'));
163 // end
164 // }
165 
166  // Start
167  RowVectorXd Xmins;
168  RowVectorXd Xmaxs;
169  if (m_sStart.compare("uniform") == 0)
170  {
171  if (m_sDistance.compare("hamming") == 0)
172  {
173  printf("Error: Uniform Start For Hamming\n");
174  return false;
175  }
176  Xmins = X.colwise().minCoeff();
177  Xmaxs = X.colwise().maxCoeff();
178  }
179 
180  //
181  // Done with input argument processing, begin clustering
182  //
183  if (m_bOnline)
184  {
185  Del = MatrixXd(n,k);
186  Del.fill(std::numeric_limits<double>::quiet_NaN());// reassignment criterion
187  }
188 
189  double totsumDBest = std::numeric_limits<double>::max();
190  emptyErrCnt = 0;
191 
192  VectorXi idxBest;
193  MatrixXd Cbest;
194  VectorXd sumDBest;
195  MatrixXd Dbest;
196 
197  for(qint32 rep = 0; rep < m_iReps; ++rep)
198  {
199  if (m_sStart.compare("uniform") == 0)
200  {
201  C = MatrixXd::Zero(k,p);
202  for(qint32 i = 0; i < k; ++i)
203  for(qint32 j = 0; j < p; ++j)
204  C(i,j) = unifrnd(Xmins[j], Xmaxs[j]);
205  // For 'cosine' and 'correlation', these are uniform inside a subset
206  // of the unit hypersphere. Still need to center them for
207  // 'correlation'. (Re)normalization for 'cosine'/'correlation' is
208  // done at each iteration.
209  if (m_sDistance.compare("correlation") == 0)
210  C.array() -= (C.array().rowwise().sum()/p).replicate(1, p).array();
211  }
212  else if (m_sStart.compare("sample") == 0)
213  {
214  C = MatrixXd::Zero(k,p);
215  for(qint32 i = 0; i < k; ++i)
216  C.block(i,0,1,p) = X.block(rand() % n, 0, 1, p);
217  // DEBUG
218 // C.block(0,0,1,p) = X.block(2, 0, 1, p);
219 // C.block(1,0,1,p) = X.block(7, 0, 1, p);
220 // C.block(2,0,1,p) = X.block(17, 0, 1, p);
221  }
222  // else if (start.compare("cluster") == 0)
223  // {
224  // Xsubset = X(randsample(n,floor(.1*n)),:);
225  // [dum, C] = kmeans(Xsubset, k, varargin{:}, 'start','sample', 'replicates',1);
226  // }
227  // else if (start.compare("numeric") == 0)
228  // {
229  // C = CC(:,:,rep);
230  // }
231 
232  // Compute the distance from every point to each cluster centroid and the
233  // initial assignment of points to clusters
234  D = distfun(X, C);//, 0);
235  idx = VectorXi::Zero(D.rows());
236  d = VectorXd::Zero(D.rows());
237 
238  for(qint32 i = 0; i < D.rows(); ++i)
239  d[i] = D.row(i).minCoeff(&idx[i]);
240 
241  m = VectorXi::Zero(k);
242  for(qint32 i = 0; i < k; ++i)
243  for (qint32 j = 0; j < idx.rows(); ++j)
244  if(idx[j] == i)
245  ++ m[i];
246 
247  try // catch empty cluster errors and move on to next rep
248  {
249  // Begin phase one: batch reassignments
250  bool converged = batchUpdate(X, C, idx);
251 
252  // Begin phase two: single reassignments
253  if (m_bOnline)
254  converged = onlineUpdate(X, C, idx);
255 
256  if (!converged)
257  printf("Failed To Converge during replicate %d\n", rep);
258 
259  // Calculate cluster-wise sums of distances
260  VectorXi nonempties = VectorXi::Zero(m.rows());
261  quint32 count = 0;
262  for(qint32 i = 0; i < m.rows(); ++i)
263  {
264  if(m[i] > 0)
265  {
266  nonempties[i] = 1;
267  ++count;
268  }
269  }
270  MatrixXd C_tmp(count,C.cols());
271  count = 0;
272  for(qint32 i = 0; i < nonempties.rows(); ++i)
273  {
274  if(nonempties[i])
275  {
276  C_tmp.row(count) = C.row(i);
277  ++count;
278  }
279  }
280 
281  MatrixXd D_tmp = distfun(X, C_tmp);//, iter);
282  count = 0;
283  for(qint32 i = 0; i < nonempties.rows(); ++i)
284  {
285  if(nonempties[i])
286  {
287  D.col(i) = D_tmp.col(count);
288  C.row(i) = C_tmp.row(count);
289  ++count;
290  }
291  }
292 
293  d = VectorXd::Zero(n);
294  for(qint32 i = 0; i < n; ++i)
295  d[i] += D.array()(idx[i]*n+i);//Colum Major
296 
297  sumD = VectorXd::Zero(k);
298  for(qint32 i = 0; i < k; ++i)
299  for (qint32 j = 0; j < idx.rows(); ++j)
300  if(idx[j] == i)
301  sumD[i] += d[j];
302 
303  totsumD = sumD.array().sum();
304 
305 // printf("%d iterations, total sum of distances = %f\n", iter, totsumD);
306 
307  // Save the best solution so far
308  if (totsumD < totsumDBest)
309  {
310  totsumDBest = totsumD;
311  idxBest = idx;
312  Cbest = C;
313  sumDBest = sumD;
314  Dbest = D;
315  }
316  }
317  catch (int e)
318  {
319  if(e == 0)
320  {
321  // If an empty cluster error occurred in one of multiple replicates, catch
322  // it, warn, and move on to next replicate. Error only when all replicates
323  // fail. Rethrow an other kind of error.
324  if (m_iReps == 1)
325  return false;
326  else
327  {
328  emptyErrCnt = emptyErrCnt + 1;
329 // printf("Replicate %d terminated: empty cluster created at iteration %d.\n", rep, iter);
330  if (emptyErrCnt == m_iReps)
331  {
332 // error(message('EmptyClusterAllReps'));
333  return false;
334  }
335 
336  }
337  }
338  } // catch
339 
340  } // replicates
341 
342  // Return the best solution
343  idx = idxBest;
344  C = Cbest;
345  sumD = sumDBest;
346  D = Dbest;
347 
348 //if hadNaNs
349 // idx = statinsertnan(wasnan, idx);
350 //end
351  return true;
352 }
353 
354 //=============================================================================================================
355 
356 bool KMeans::batchUpdate(const MatrixXd& X, MatrixXd& C, VectorXi& idx)
357 {
358  // Every point moved, every cluster will need an update
359  qint32 i = 0;
360  VectorXi moved(n);
361  for(i = 0; i < n; ++i)
362  moved[i] = i;
363 
364  VectorXi changed(k);
365  for(i = 0; i < k; ++i)
366  changed[i] = i;
367 
368  previdx = VectorXi::Zero(n);
369 
370  prevtotsumD = std::numeric_limits<double>::max();//max double
371 
372  MatrixXd D = MatrixXd::Zero(X.rows(), k);
373 
374  //
375  // Begin phase one: batch reassignments
376  //
377  iter = 0;
378  bool converged = false;
379  while(true)
380  {
381  ++iter;
382 
383  // Calculate the new cluster centroids and counts, and update the
384  // distance from every point to those new cluster centroids
385  MatrixXd C_new;
386  VectorXi m_new;
387  KMeans::gcentroids(X, idx, changed, C_new, m_new);
388  MatrixXd D_new = distfun(X, C_new);//, iter);
389 
390  for(qint32 i = 0; i < changed.rows(); ++i)
391  {
392  C.row(changed[i]) = C_new.row(i);
393  D.col(changed[i]) = D_new.col(i);
394  m[changed[i]] = m_new[i];
395  }
396 
397  // Deal with clusters that have just lost all their members
398  VectorXi empties = VectorXi::Zero(changed.rows());
399  for(qint32 i = 0; i < changed.rows(); ++i)
400  if(m(i) == 0)
401  empties[i] = 1;
402 
403  if (empties.sum() > 0)
404  {
405  if (m_sEmptyact.compare("error") == 0)
406  {
407  return converged;
408 // throw 0;
409  }
410  else if (m_sEmptyact.compare("drop") == 0)
411  {
412  // // Remove the empty cluster from any further processing
413  // D(:,empties) = NaN;
414  // changed = changed(m(changed) > 0);
415  // warning('Empty cluster created at iteration %d during replicate %d.',iter, rep,);
416  }
417  else if (m_sEmptyact.compare("singleton") == 0)
418  {
419  // warning('Empty cluster created at iteration %d during replicate %d.', iter, rep);
420 
421  // for i = empties
422  // d = D((idx-1)*n + (1:n)'); // use newly updated distances
423 
424  // % Find the point furthest away from its current cluster.
425  // % Take that point out of its cluster and use it to create
426  // % a new singleton cluster to replace the empty one.
427  // [dlarge, lonely] = max(d);
428  // from = idx(lonely); % taking from this cluster
429  // if m(from) < 2
430  // % In the very unusual event that the cluster had only
431  // % one member, pick any other non-singleton point.
432  // from = find(m>1,1,'first');
433  // lonely = find(idx==from,1,'first');
434  // end
435  // C(i,:) = X(lonely,:);
436  // m(i) = 1;
437  // idx(lonely) = i;
438  // D(:,i) = distfun(X, C(i,:), distance, iter);
439 
440  // % Update clusters from which points are taken
441  // [C(from,:), m(from)] = gcentroids(X, idx, from, distance);
442  // D(:,from) = distfun(X, C(from,:), distance, iter);
443  // changed = unique([changed from]);
444  // end
445  }
446  }
447 
448  // Compute the total sum of distances for the current configuration.
449  totsumD = 0;
450  for(qint32 i = 0; i < n; ++i)
451  totsumD += D.array()(idx[i]*n+i);//Colum Major
452  // Test for a cycle: if objective is not decreased, back out
453  // the last step and move on to the single update phase
454  if(prevtotsumD <= totsumD)
455  {
456  idx = previdx;
457  MatrixXd C_new;
458  VectorXi m_new;
459  gcentroids(X, idx, changed, C_new, m_new);
460  C.block(0,0,k,C.cols()) = C_new;
461  m.block(0,0,k,1) = m_new;
462  --iter;
463  break;
464  }
465 
466 // printf("%6d\t%6d\t%8d\t%12g\n",iter,1,moved.rows(),totsumD);
467  if (iter >= m_iMaxit)
468  break;
469 
470  // Determine closest cluster for each point and reassign points to clusters
471  previdx = idx;
472  prevtotsumD = totsumD;
473 
474  VectorXi nidx(D.rows());
475  for(qint32 i = 0; i < D.rows(); ++i)
476  d[i] = D.row(i).minCoeff(&nidx[i]);
477 
478  // Determine which points moved
479  VectorXi moved = VectorXi::Zero(nidx.rows());
480  qint32 count = 0;
481  for(qint32 i = 0; i < nidx.rows(); ++i)
482  {
483  if(nidx[i] != previdx[i])
484  {
485  moved[count] = i;
486  ++count;
487  }
488  }
489  moved.conservativeResize(count);
490 
491  if (moved.rows() > 0)
492  {
493  // Resolve ties in favor of not moving
494  VectorXi moved_new = VectorXi::Zero(moved.rows());
495  count = 0;
496  for(qint32 i = 0; i < moved.rows(); ++i)
497  {
498  if(D.array()(previdx[moved[i]] * n + moved[i]) > d[moved[i]])
499  {
500  moved_new[count] = moved[i];
501  ++count;
502  }
503  }
504  moved_new.conservativeResize(count);
505  moved = moved_new;
506  }
507 
508  if (moved.rows() == 0)
509  {
510  converged = true;
511  break;
512  }
513 
514  for(qint32 i = 0; i < moved.rows(); ++i)
515  if(moved[i] >= 0)
516  idx[ moved[i] ] = nidx[ moved[i] ];
517 
518  // Find clusters that gained or lost members
519  std::vector<int> tmp;
520  for(qint32 i = 0; i < moved.rows(); ++i)
521  tmp.push_back(idx[moved[i]]);
522  for(qint32 i = 0; i < moved.rows(); ++i)
523  tmp.push_back(previdx[moved[i]]);
524 
525  std::sort(tmp.begin(),tmp.end());
526 
527  std::vector<int>::iterator it;
528  it = std::unique(tmp.begin(),tmp.end());
529  tmp.resize( it - tmp.begin() );
530 
531  changed.conservativeResize(tmp.size());
532 
533  for(quint32 i = 0; i < tmp.size(); ++i)
534  changed[i] = tmp[i];
535  } // phase one
536  return converged;
537 } // nested function
538 
539 //=============================================================================================================
540 
541 bool KMeans::onlineUpdate(const MatrixXd& X, MatrixXd& C, VectorXi& idx)
542 {
543  // Initialize some cluster information prior to phase two
544  MatrixXd Xmid1;
545  MatrixXd Xmid2;
546  if (m_sDistance.compare("cityblock") == 0)
547  {
548  Xmid1 = MatrixXd::Zero(k,p);
549  Xmid2 = MatrixXd::Zero(k,p);
550  for(qint32 i = 0; i < k; ++i)
551  {
552  if (m[i] > 0)
553  {
554  // Separate out sorted coords for points in i'th cluster,
555  // and save values above and below median, component-wise
556  MatrixXd Xsorted(m[i],p);
557  qint32 c = 0;
558  for(qint32 j = 0; j < idx.rows(); ++j)
559  {
560  if(idx[j] == i)
561  {
562  Xsorted.row(c) = X.row(j);
563  ++c;
564  }
565  }
566  for(qint32 j = 0; j < Xsorted.cols(); ++j)
567  std::sort(Xsorted.col(j).data(),Xsorted.col(j).data()+Xsorted.rows());
568 
569  qint32 nn = floor(0.5*m[i])-1;
570  if ((m[i] % 2) == 0)
571  {
572  Xmid1.row(i) = Xsorted.row(nn);
573  Xmid2.row(i) = Xsorted.row(nn+1);
574  }
575  else if (m[i] > 1)
576  {
577  Xmid1.row(i) = Xsorted.row(nn);
578  Xmid2.row(i) = Xsorted.row(nn+2);
579  }
580  else
581  {
582  Xmid1.row(i) = Xsorted.row(0);
583  Xmid2.row(i) = Xsorted.row(0);
584  }
585  }
586  }
587  }
588  else if (m_sDistance.compare("hamming") == 0)
589  {
590 // Xsum = zeros(k,p);
591 // for i = 1:k
592 // if m(i) > 0
593 // % Sum coords for points in i'th cluster, component-wise
594 // Xsum(i,:) = sum(X(idx==i,:), 1);
595 // end
596 // end
597  }
598 
599  //
600  // Begin phase two: single reassignments
601  //
602  VectorXi changed = VectorXi(m.rows());
603  qint32 count = 0;
604  for(qint32 i = 0; i < m.rows(); ++i)
605  {
606  if(m[i] > 0)
607  {
608  changed[count] = i;
609  ++count;
610  }
611  }
612  changed.conservativeResize(count);
613 
614  qint32 lastmoved = 0;
615  qint32 nummoved = 0;
616  qint32 iter1 = iter;
617  bool converged = false;
618  while (iter < m_iMaxit)
619  {
620  // Calculate distances to each cluster from each point, and the
621  // potential change in total sum of errors for adding or removing
622  // each point from each cluster. Clusters that have not changed
623  // membership need not be updated.
624  //
625  // Singleton clusters are a special case for the sum of dists
626  // calculation. Removing their only point is never best, so the
627  // reassignment criterion had better guarantee that a singleton
628  // point will stay in its own cluster. Happily, we get
629  // Del(i,idx(i)) == 0 automatically for them.
630 
631  if (m_sDistance.compare("sqeuclidean") == 0)
632  {
633  for(qint32 j = 0; j < changed.rows(); ++j)
634  {
635  qint32 i = changed[j];
636  VectorXi mbrs = VectorXi::Zero(idx.rows());
637  for(qint32 l = 0; l < idx.rows(); ++l)
638  if(idx[l] == i)
639  mbrs[l] = 1;
640 
641  VectorXi sgn = 1 - 2 * mbrs.array(); // -1 for members, 1 for nonmembers
642 
643  if (m[i] == 1)
644  for(qint32 l = 0; l < mbrs.rows(); ++l)
645  if(mbrs[l])
646  sgn[l] = 0; // prevent divide-by-zero for singleton mbrs
647 
648  Del.col(i) = ((double)m[i] / ((double)m[i] + sgn.cast<double>().array()));
649 
650  Del.col(i).array() *= (X - C.row(i).replicate(n,1)).array().pow(2).rowwise().sum().array();
651  }
652  }
653  else if (m_sDistance.compare("cityblock") == 0)
654  {
655  for(qint32 j = 0; j < changed.rows(); ++j)
656  {
657  qint32 i = changed[j];
658  if (m(i) % 2 == 0) // this will never catch singleton clusters
659  {
660  MatrixXd ldist = Xmid1.row(i).replicate(n,1) - X;
661  MatrixXd rdist = X - Xmid2.row(i).replicate(n,1);
662  VectorXd mbrs = VectorXd::Zero(idx.rows());
663 
664  for(qint32 l = 0; l < idx.rows(); ++l)
665  if(idx[l] == i)
666  mbrs[l] = 1;
667  MatrixXd sgn = ((-2*mbrs).array() + 1).replicate(1, p); // -1 for members, 1 for nonmembers
668  rdist = sgn.array()*rdist.array(); ldist = sgn.array()*ldist.array();
669 
670  for(qint32 l = 0; l < idx.rows(); ++l)
671  {
672  double sum = 0;
673  for(qint32 h = 0; h < rdist.cols(); ++h)
674  sum += rdist(l,h) > ldist(l,h) ? rdist(l,h) < 0 ? 0 : rdist(l,h) : ldist(l,h) < 0 ? 0 : ldist(l,h);
675  Del(l,i) = sum;
676  }
677  }
678  else
679  Del.col(i) = ((X - C.row(i).replicate(n,1)).array().abs()).rowwise().sum();
680  }
681  }
682  else if (m_sDistance.compare("cosine") == 0 || m_sDistance.compare("correlation") == 0)
683  {
684  // The points are normalized, centroids are not, so normalize them
685  MatrixXd normC = C.array().pow(2).rowwise().sum().sqrt();
686 // if any(normC < eps(class(normC))) % small relative to unit-length data points
687 // error('Zero cluster centroid created at iteration %d during replicate %d.',iter, rep);
688 // end
689  // This can be done without a loop, but the loop saves memory allocations
690  MatrixXd XCi;
691  qint32 i;
692  for(qint32 j = 0; j < changed.rows(); ++j)
693  {
694  i = changed[j];
695  XCi = X * C.row(i).transpose();
696 
697  VectorXi mbrs = VectorXi::Zero(idx.rows());
698  for(qint32 l = 0; l < idx.rows(); ++l)
699  if(idx[l] == i)
700  mbrs[l] = 1;
701 
702  VectorXi sgn = 1 - 2 * mbrs.array(); // -1 for members, 1 for nonmembers
703 
704  double A = (double)m[i] * normC(i,0);
705  double B = pow(((double)m[i] * normC(i,0)),2);
706 
707  Del.col(i) = 1 + sgn.cast<double>().array()*
708  (A - (B + 2 * sgn.cast<double>().array() * m[i] * XCi.array() + 1).sqrt());
709 
710  std::cout << "Del.col(i)\n" << Del.col(i) << std::endl;
711 
712 // Del(:,i) = 1 + sgn .*...
713 // (m(i).*normC(i) - sqrt((m(i).*normC(i)).^2 + 2.*sgn.*m(i).*XCi + 1));
714  }
715  }
716  else if (m_sDistance.compare("hamming") == 0)
717  {
718 // for i = changed
719 // if mod(m(i),2) == 0 % this will never catch singleton clusters
720 // % coords with an unequal number of 0s and 1s have a
721 // % different contribution than coords with an equal
722 // % number
723 // unequal01 = find(2*Xsum(i,:) ~= m(i));
724 // numequal01 = p - length(unequal01);
725 // mbrs = (idx == i);
726 // Di = abs(X(:,unequal01) - C(repmat(i,n,1),unequal01));
727 // Del(:,i) = (sum(Di, 2) + mbrs*numequal01) / p;
728 // else
729 // Del(:,i) = sum(abs(X - C(repmat(i,n,1),:)), 2) / p;
730 // end
731 // end
732  }
733 
734  // Determine best possible move, if any, for each point. Next we
735  // will pick one from those that actually did move.
736  previdx = idx;
737  prevtotsumD = totsumD;
738 
739  VectorXi nidx = VectorXi::Zero(Del.rows());
740  VectorXd minDel = VectorXd::Zero(Del.rows());
741 
742  for(qint32 i = 0; i < Del.rows(); ++i)
743  minDel[i] = Del.row(i).minCoeff(&nidx[i]);
744 
745  VectorXi moved = VectorXi::Zero(previdx.rows());
746  qint32 count = 0;
747  for(qint32 i = 0; i < moved.rows(); ++i)
748  {
749  if(previdx[i] != nidx[i])
750  {
751  moved[count] = i;
752  ++count;
753  }
754  }
755  moved.conservativeResize(count);
756 
757  if (moved.sum() > 0)
758  {
759  // Resolve ties in favor of not moving
760  VectorXi moved_new = VectorXi::Zero(moved.rows());
761  count = 0;
762  for(qint32 i = 0; i < moved.rows(); ++i)
763  {
764  if ( Del.array()(previdx[moved(i)]*n + moved(i)) > minDel(moved(i)))
765  {
766  moved_new[count] = moved[i];
767  ++count;
768  }
769  }
770  moved_new.conservativeResize(count);
771  moved = moved_new;
772  }
773 
774  if (moved.rows() <= 0)
775  {
776  // Count an iteration if phase 2 did nothing at all, or if we're
777  // in the middle of a pass through all the points
778  if ((iter == iter1) || nummoved > 0)
779  {
780  ++iter;
781 
782 // printf("%6d\t%6d\t%8d\t%12g\n",iter,2,nummoved,totsumD);
783  }
784  converged = true;
785  break;
786  }
787 
788  // Pick the next move in cyclic order
789  VectorXi moved_new(moved.rows());
790  for(qint32 i = 0; i < moved.rows(); ++i)
791  moved_new[i] = ((moved[i] - lastmoved) % n) + lastmoved;
792 
793  moved[0] = moved_new.minCoeff() % n;//+1
794  moved.conservativeResize(1);
795 
796  // If we've gone once through all the points, that's an iteration
797  if (moved[0] <= lastmoved)
798  {
799  ++iter;
800 // printf("%6d\t%6d\t%8d\t%12g\n",iter,2,nummoved,totsumD);
801  if(iter >= m_iMaxit)
802  break;
803  nummoved = 0;
804  }
805  ++nummoved;
806  lastmoved = moved[0];
807 
808  qint32 oidx = idx(moved[0]);
809  nidx[0] = nidx(moved[0]);
810  nidx.conservativeResize(1);
811  totsumD += Del(moved[0],nidx[0]) - Del(moved[0],oidx);
812 
813  // Update the cluster index vector, and the old and new cluster
814  // counts and centroids
815  idx[ moved[0] ] = nidx[0];
816  m( nidx[0] ) = m( nidx[0] ) + 1;
817  m( oidx ) = m( oidx ) - 1;
818 
819  if (m_sDistance.compare("sqeuclidean") == 0)
820  {
821  C.row(nidx[0]) = C.row(nidx[0]).array() + (X.row(moved[0]) - C.row(nidx[0])).array() / m[nidx[0]];
822  C.row(oidx) = C.row(oidx).array() - (X.row(moved[0]) - C.row(oidx)).array() / m[oidx];
823  }
824  else if (m_sDistance.compare("cityblock") == 0)
825  {
826  VectorXi onidx(2);
827  onidx << oidx, nidx[0];//ToDo always right?
828 
829  qint32 i;
830  for(qint32 h = 0; h < 2; ++h)
831  {
832  i = onidx[h];
833  // Separate out sorted coords for points in each cluster.
834  // New centroid is the coord median, save values above and
835  // below median. All done component-wise.
836  MatrixXd Xsorted(m[i],p);
837  qint32 c = 0;
838  for(qint32 j = 0; j < idx.rows(); ++j)
839  {
840  if(idx[j] == i)
841  {
842  Xsorted.row(c) = X.row(j);
843  ++c;
844  }
845  }
846  for(qint32 j = 0; j < Xsorted.cols(); ++j)
847  std::sort(Xsorted.col(j).data(),Xsorted.col(j).data()+Xsorted.rows());
848 
849  qint32 nn = floor(0.5*m[i])-1;
850  if ((m[i] % 2) == 0)
851  {
852  C.row(i) = 0.5 * (Xsorted.row(nn) + Xsorted.row(nn+1));
853  Xmid1.row(i) = Xsorted.row(nn);
854  Xmid2.row(i) = Xsorted.row(nn+1);
855  }
856  else
857  {
858  C.row(i) = Xsorted.row(nn+1);
859  if (m(i) > 1)
860  {
861  Xmid1.row(i) = Xsorted.row(nn);
862  Xmid2.row(i) = Xsorted.row(nn+2);
863  }
864  else
865  {
866  Xmid1.row(i) = Xsorted.row(0);
867  Xmid2.row(i) = Xsorted.row(0);
868  }
869  }
870  }
871  }
872  else if (m_sDistance.compare("cosine") == 0 || m_sDistance.compare("correlation") == 0)
873  {
874  C.row(nidx[0]).array() += (X.row(moved[0]) - C.row(nidx[0])).array() / m[nidx[0]];
875  C.row(oidx).array() += (X.row(moved[0]) - C.row(oidx)).array() / m[oidx];
876  }
877  else if (m_sDistance.compare("hamming") == 0)
878  {
879 // % Update summed coords for points in each cluster. New
880 // % centroid is the coord median. All done component-wise.
881 // Xsum(nidx,:) = Xsum(nidx,:) + X(moved,:);
882 // Xsum(oidx,:) = Xsum(oidx,:) - X(moved,:);
883 // C(nidx,:) = .5*sign(2*Xsum(nidx,:) - m(nidx)) + .5;
884 // C(oidx,:) = .5*sign(2*Xsum(oidx,:) - m(oidx)) + .5;
885  }
886 
887  VectorXi sorted_onidx(1+nidx.rows());
888  sorted_onidx << oidx, nidx;
889  std::sort(sorted_onidx.data(), sorted_onidx.data()+sorted_onidx.rows());
890  changed = sorted_onidx;
891  } // phase two
892 
893  return converged;
894 } // nested function
895 
896 //=============================================================================================================
897 //DISTFUN Calculate point to cluster centroid distances.
898 MatrixXd KMeans::distfun(const MatrixXd& X, MatrixXd& C)//, qint32 iter)
899 {
900  MatrixXd D = MatrixXd::Zero(n,C.rows());
901  qint32 nclusts = C.rows();
902 
903  if (m_sDistance.compare("sqeuclidean") == 0)
904  {
905  for(qint32 i = 0; i < nclusts; ++i)
906  {
907  D.col(i) = (X.col(0).array() - C(i,0)).pow(2);
908 
909  for(qint32 j = 1; j < p; ++j)
910  D.col(i) = D.col(i).array() + (X.col(j).array() - C(i,j)).pow(2);
911  }
912  }
913  else if (m_sDistance.compare("cityblock") == 0)
914  {
915  for(qint32 i = 0; i < nclusts; ++i)
916  {
917  D.col(i) = (X.col(0).array() - C(i,0)).array().abs();
918  for(qint32 j = 1; j < p; ++j)
919  {
920  D.col(i).array() += (X.col(j).array() - C(i,j)).array().abs();
921  }
922  }
923  }
924  else if (m_sDistance.compare("cosine") == 0 || m_sDistance.compare("correlation") == 0)
925  {
926  // The points are normalized, centroids are not, so normalize them
927  MatrixXd normC = C.array().pow(2).rowwise().sum().sqrt();
928 // if any(normC < eps(class(normC))) % small relative to unit-length data points
929 // error('Zero cluster centroid created at iteration %d.',iter);
930  for (qint32 i = 0; i < nclusts; ++i)
931  {
932  MatrixXd C_tmp = (C.row(i).array() / normC(i,0)).transpose();
933  D.col(i) = X * C_tmp;//max(1 - X * (C(i,:)./normC(i))', 0);
934  for(qint32 j = 0; j < D.rows(); ++j)
935  if(D(j,i) < 0)
936  D(j,i) = 0;
937  }
938  }
939 //case 'hamming'
940 // for i = 1:nclusts
941 // D(:,i) = abs(X(:,1) - C(i,1));
942 // for j = 2:p
943 // D(:,i) = D(:,i) + abs(X(:,j) - C(i,j));
944 // end
945 // D(:,i) = D(:,i) / p;
946 // % D(:,i) = sum(abs(X - C(repmat(i,n,1),:)), 2) / p;
947 // end
948 //end
949  return D;
950 } // function
951 
952 //=============================================================================================================
953 //GCENTROIDS Centroids and counts stratified by group.
954 void KMeans::gcentroids(const MatrixXd& X, const VectorXi& index, const VectorXi& clusts,
955  MatrixXd& centroids, VectorXi& counts)
956 {
957  qint32 num = clusts.rows();
958  centroids = MatrixXd::Zero(num,p);
959  centroids.fill(std::numeric_limits<double>::quiet_NaN());
960  counts = VectorXi::Zero(num);
961 
962  VectorXi members;
963 
964  qint32 c;
965 
966  for(qint32 i = 0; i < num; ++i)
967  {
968  c = 0;
969  members = VectorXi::Zero(index.rows());
970  for(qint32 j = 0; j < index.rows(); ++j)
971  {
972  if(index[j] == clusts[i])
973  {
974  members[c] = j;
975  ++c;
976  }
977  }
978  members.conservativeResize(c);
979  if (c > 0)
980  {
981  counts[i] = c;
982  if(m_sDistance.compare("sqeuclidean") == 0)
983  {
984  //Initialize
985  if(members.rows() > 0)
986  centroids.row(i) = RowVectorXd::Zero(centroids.cols());
987 
988  for(qint32 j = 0; j < members.rows(); ++j)
989  centroids.row(i).array() += X.row(members[j]).array() / counts[i];
990  }
991  else if(m_sDistance.compare("cityblock") == 0)
992  {
993  // Separate out sorted coords for points in i'th cluster,
994  // and use to compute a fast median, component-wise
995  MatrixXd Xsorted(counts[i],p);
996  c = 0;
997 
998  for(qint32 j = 0; j < index.rows(); ++j)
999  {
1000  if(index[j] == clusts[i])
1001  {
1002  Xsorted.row(c) = X.row(j);
1003  ++c;
1004  }
1005  }
1006 
1007  for(qint32 j = 0; j < Xsorted.cols(); ++j)
1008  std::sort(Xsorted.col(j).data(),Xsorted.col(j).data()+Xsorted.rows());
1009 
1010  qint32 nn = floor(0.5*(counts(i)))-1;
1011  if (counts[i] % 2 == 0)
1012  centroids.row(i) = .5 * (Xsorted.row(nn) + Xsorted.row(nn+1));
1013  else
1014  centroids.row(i) = Xsorted.row(nn+1);
1015  }
1016  else if(m_sDistance.compare("cosine") == 0 || m_sDistance.compare("correlation") == 0)
1017  {
1018  for(qint32 j = 0; j < members.rows(); ++j)
1019  centroids.row(i).array() += X.row(members[j]).array() / counts[i]; // unnormalized
1020  }
1021 // else if(m_sDistance.compare("hamming") == 0)
1022 // {
1023 // % Compute a fast median for binary data, component-wise
1024 // centroids(i,:) = .5*sign(2*sum(X(members,:), 1) - counts(i)) + .5;
1025 // }
1026  }
1027  }
1028 }// function
1029 
1030 //=============================================================================================================
1031 
1032 double KMeans::unifrnd(double a, double b)
1033 {
1034  if (a > b)
1035  return std::numeric_limits<double>::quiet_NaN();
1036 
1037  double a2 = a/2.0;
1038  double b2 = b/2.0;
1039  double mu = a2+b2;
1040  double sig = b2-a2;
1041 
1042  double r = mu + sig * (2.0* (rand() % 1000)/1000 -1.0);
1043 
1044  return r;
1045 }
KMeans class declaration.
bool calculate(Eigen::MatrixXd X, qint32 kClusters, Eigen::VectorXi &idx, Eigen::MatrixXd &C, Eigen::VectorXd &sumD, Eigen::MatrixXd &D)
Definition: kmeans.cpp:120
KMeans(QString distance=QString("sqeuclidean"), QString start=QString("sample"), qint32 replicates=1, QString emptyact=QString("error"), bool online=true, qint32 maxit=100)
Definition: kmeans.cpp:66