MNE-CPP 0.1.9
A Framework for Electrophysiology
Loading...
Searching...
No Matches
kmeans.cpp
Go to the documentation of this file.
1//=============================================================================================================
37//=============================================================================================================
38// INCLUDES
39//=============================================================================================================
40
41#include "kmeans.h"
42
43#include <math.h>
44#include <iostream>
45#include <algorithm>
46#include <vector>
47#include <time.h>
48
49//=============================================================================================================
50// QT INCLUDES
51//=============================================================================================================
52
53#include <QDebug>
54
55//=============================================================================================================
56// USED NAMESPACES
57//=============================================================================================================
58
59using namespace UTILSLIB;
60using namespace Eigen;
61
62//=============================================================================================================
63// DEFINE MEMBER METHODS
64//=============================================================================================================
65
66KMeans::KMeans(QString distance,
67 QString start,
68 qint32 replicates,
69 QString emptyact,
70 bool online,
71 qint32 maxit)
72: m_sDistance(distance.toStdString())
73, m_sStart(start.toStdString())
74, m_iReps(replicates)
75, m_sEmptyact(emptyact.toStdString())
76, m_iMaxit(maxit)
77, m_bOnline(online)
78, emptyErrCnt(0)
79, iter(0)
80, k(0)
81, n(0)
82, p(0)
83, totsumD(0)
84, prevtotsumD(0)
85{
86 // Assume one replicate
87 if (m_iReps < 1)
88 m_iReps = 1;
89}
90
91//=============================================================================================================
92
93//KMeans::KMeans(std::string distance,
94// std::string start,
95// qint32 replicates,
96// std::string emptyact,
97// bool online,
98// qint32 maxit)
99//: m_sDistance(distance)
100//, m_sStart(start)
101//, m_iReps(replicates)
102//, m_sEmptyact(emptyact)
103//, m_iMaxit(maxit)
104//, m_bOnline(online)
105//, emptyErrCnt(0)
106//, iter(0)
107//, k(0)
108//, n(0)
109//, p(0)
110//, totsumD(0)
111//, prevtotsumD(0)
112//{
113// // Assume one replicate
114// if (m_iReps < 1)
115// m_iReps = 1;
116//}
117
118//=============================================================================================================
119
120bool KMeans::calculate(MatrixXd X,
121 qint32 kClusters,
122 VectorXi& idx,
123 MatrixXd& C,
124 VectorXd& sumD,
125 MatrixXd& D)
126{
127 if (kClusters < 1)
128 return false;
129
130 //Init random generator
131 srand ( time(NULL) );
132
133// n points in p dimensional space
134 k = kClusters;
135 n = X.rows();
136 p = X.cols();
137
138 if(m_sDistance.compare("cosine") == 0)
139 {
140// Xnorm = sqrt(sum(X.^2, 2));
141// if any(min(Xnorm) <= eps(max(Xnorm)))
142// error(['Some points have small relative magnitudes, making them ', ...
143// 'effectively zero.\nEither remove those points, or choose a ', ...
144// 'distance other than ''cosine''.']);
145// end
146// X = X ./ Xnorm(:,ones(1,p));
147 }
148 else if(m_sDistance.compare("correlation")==0)
149 {
150 X.array() -= (X.rowwise().sum().array() / (double)p).replicate(1,p); //X - X.rowwise().sum();//.repmat(mean(X,2),1,p);
151 MatrixXd Xnorm = (X.array().pow(2).rowwise().sum()).sqrt();//sqrt(sum(X.^2, 2));
152// if any(min(Xnorm) <= eps(max(Xnorm)))
153// error(['Some points have small relative standard deviations, making them ', ...
154// 'effectively constant.\nEither remove those points, or choose a ', ...
155// 'distance other than ''correlation''.']);
156// end
157 X.array() /= Xnorm.replicate(1,p).array();
158 }
159// else if(m_sDistance.compare('hamming')==0)
160// {
161// if ~all(ismember(X(:),[0 1]))
162// error(message('NonbinaryDataForHamm'));
163// end
164// }
165
166 // Start
167 RowVectorXd Xmins;
168 RowVectorXd Xmaxs;
169 if (m_sStart.compare("uniform") == 0)
170 {
171 if (m_sDistance.compare("hamming") == 0)
172 {
173 printf("Error: Uniform Start For Hamming\n");
174 return false;
175 }
176 Xmins = X.colwise().minCoeff();
177 Xmaxs = X.colwise().maxCoeff();
178 }
179
180 //
181 // Done with input argument processing, begin clustering
182 //
183 if (m_bOnline)
184 {
185 Del = MatrixXd(n,k);
186 Del.fill(std::numeric_limits<double>::quiet_NaN());// reassignment criterion
187 }
188
189 double totsumDBest = std::numeric_limits<double>::max();
190 emptyErrCnt = 0;
191
192 VectorXi idxBest;
193 MatrixXd Cbest;
194 VectorXd sumDBest;
195 MatrixXd Dbest;
196
197 for(qint32 rep = 0; rep < m_iReps; ++rep)
198 {
199 if (m_sStart.compare("uniform") == 0)
200 {
201 C = MatrixXd::Zero(k,p);
202 for(qint32 i = 0; i < k; ++i)
203 for(qint32 j = 0; j < p; ++j)
204 C(i,j) = unifrnd(Xmins[j], Xmaxs[j]);
205 // For 'cosine' and 'correlation', these are uniform inside a subset
206 // of the unit hypersphere. Still need to center them for
207 // 'correlation'. (Re)normalization for 'cosine'/'correlation' is
208 // done at each iteration.
209 if (m_sDistance.compare("correlation") == 0)
210 C.array() -= (C.array().rowwise().sum()/p).replicate(1, p).array();
211 }
212 else if (m_sStart.compare("sample") == 0)
213 {
214 C = MatrixXd::Zero(k,p);
215 for(qint32 i = 0; i < k; ++i)
216 C.block(i,0,1,p) = X.block(rand() % n, 0, 1, p);
217 // DEBUG
218// C.block(0,0,1,p) = X.block(2, 0, 1, p);
219// C.block(1,0,1,p) = X.block(7, 0, 1, p);
220// C.block(2,0,1,p) = X.block(17, 0, 1, p);
221 }
222 // else if (start.compare("cluster") == 0)
223 // {
224 // Xsubset = X(randsample(n,floor(.1*n)),:);
225 // [dum, C] = kmeans(Xsubset, k, varargin{:}, 'start','sample', 'replicates',1);
226 // }
227 // else if (start.compare("numeric") == 0)
228 // {
229 // C = CC(:,:,rep);
230 // }
231
232 // Compute the distance from every point to each cluster centroid and the
233 // initial assignment of points to clusters
234 D = distfun(X, C);//, 0);
235 idx = VectorXi::Zero(D.rows());
236 d = VectorXd::Zero(D.rows());
237
238 for(qint32 i = 0; i < D.rows(); ++i)
239 d[i] = D.row(i).minCoeff(&idx[i]);
240
241 m = VectorXi::Zero(k);
242 for(qint32 i = 0; i < k; ++i)
243 for (qint32 j = 0; j < idx.rows(); ++j)
244 if(idx[j] == i)
245 ++ m[i];
246
247 try // catch empty cluster errors and move on to next rep
248 {
249 // Begin phase one: batch reassignments
250 bool converged = batchUpdate(X, C, idx);
251
252 // Begin phase two: single reassignments
253 if (m_bOnline)
254 converged = onlineUpdate(X, C, idx);
255
256 if (!converged)
257 printf("Failed To Converge during replicate %d\n", rep);
258
259 // Calculate cluster-wise sums of distances
260 VectorXi nonempties = VectorXi::Zero(m.rows());
261 quint32 count = 0;
262 for(qint32 i = 0; i < m.rows(); ++i)
263 {
264 if(m[i] > 0)
265 {
266 nonempties[i] = 1;
267 ++count;
268 }
269 }
270 MatrixXd C_tmp(count,C.cols());
271 count = 0;
272 for(qint32 i = 0; i < nonempties.rows(); ++i)
273 {
274 if(nonempties[i])
275 {
276 C_tmp.row(count) = C.row(i);
277 ++count;
278 }
279 }
280
281 MatrixXd D_tmp = distfun(X, C_tmp);//, iter);
282 count = 0;
283 for(qint32 i = 0; i < nonempties.rows(); ++i)
284 {
285 if(nonempties[i])
286 {
287 D.col(i) = D_tmp.col(count);
288 C.row(i) = C_tmp.row(count);
289 ++count;
290 }
291 }
292
293 d = VectorXd::Zero(n);
294 for(qint32 i = 0; i < n; ++i)
295 d[i] += D.array()(idx[i]*n+i);//Colum Major
296
297 sumD = VectorXd::Zero(k);
298 for(qint32 i = 0; i < k; ++i)
299 for (qint32 j = 0; j < idx.rows(); ++j)
300 if(idx[j] == i)
301 sumD[i] += d[j];
302
303 totsumD = sumD.array().sum();
304
305// printf("%d iterations, total sum of distances = %f\n", iter, totsumD);
306
307 // Save the best solution so far
308 if (totsumD < totsumDBest)
309 {
310 totsumDBest = totsumD;
311 idxBest = idx;
312 Cbest = C;
313 sumDBest = sumD;
314 Dbest = D;
315 }
316 }
317 catch (int e)
318 {
319 if(e == 0)
320 {
321 // If an empty cluster error occurred in one of multiple replicates, catch
322 // it, warn, and move on to next replicate. Error only when all replicates
323 // fail. Rethrow an other kind of error.
324 if (m_iReps == 1)
325 return false;
326 else
327 {
328 emptyErrCnt = emptyErrCnt + 1;
329// printf("Replicate %d terminated: empty cluster created at iteration %d.\n", rep, iter);
330 if (emptyErrCnt == m_iReps)
331 {
332// error(message('EmptyClusterAllReps'));
333 return false;
334 }
335
336 }
337 }
338 } // catch
339
340 } // replicates
341
342 // Return the best solution
343 idx = idxBest;
344 C = Cbest;
345 sumD = sumDBest;
346 D = Dbest;
347
348//if hadNaNs
349// idx = statinsertnan(wasnan, idx);
350//end
351 return true;
352}
353
354//=============================================================================================================
355
356bool KMeans::batchUpdate(const MatrixXd& X, MatrixXd& C, VectorXi& idx)
357{
358 // Every point moved, every cluster will need an update
359 qint32 i = 0;
360 VectorXi moved(n);
361 for(i = 0; i < n; ++i)
362 moved[i] = i;
363
364 VectorXi changed(k);
365 for(i = 0; i < k; ++i)
366 changed[i] = i;
367
368 previdx = VectorXi::Zero(n);
369
370 prevtotsumD = std::numeric_limits<double>::max();//max double
371
372 MatrixXd D = MatrixXd::Zero(X.rows(), k);
373
374 //
375 // Begin phase one: batch reassignments
376 //
377 iter = 0;
378 bool converged = false;
379 while(true)
380 {
381 ++iter;
382
383 // Calculate the new cluster centroids and counts, and update the
384 // distance from every point to those new cluster centroids
385 MatrixXd C_new;
386 VectorXi m_new;
387 KMeans::gcentroids(X, idx, changed, C_new, m_new);
388 MatrixXd D_new = distfun(X, C_new);//, iter);
389
390 for(qint32 i = 0; i < changed.rows(); ++i)
391 {
392 C.row(changed[i]) = C_new.row(i);
393 D.col(changed[i]) = D_new.col(i);
394 m[changed[i]] = m_new[i];
395 }
396
397 // Deal with clusters that have just lost all their members
398 VectorXi empties = VectorXi::Zero(changed.rows());
399 for(qint32 i = 0; i < changed.rows(); ++i)
400 if(m(i) == 0)
401 empties[i] = 1;
402
403 if (empties.sum() > 0)
404 {
405 if (m_sEmptyact.compare("error") == 0)
406 {
407 return converged;
408// throw 0;
409 }
410 else if (m_sEmptyact.compare("drop") == 0)
411 {
412 // // Remove the empty cluster from any further processing
413 // D(:,empties) = NaN;
414 // changed = changed(m(changed) > 0);
415 // warning('Empty cluster created at iteration %d during replicate %d.',iter, rep,);
416 }
417 else if (m_sEmptyact.compare("singleton") == 0)
418 {
419 // warning('Empty cluster created at iteration %d during replicate %d.', iter, rep);
420
421 // for i = empties
422 // d = D((idx-1)*n + (1:n)'); // use newly updated distances
423
424 // % Find the point furthest away from its current cluster.
425 // % Take that point out of its cluster and use it to create
426 // % a new singleton cluster to replace the empty one.
427 // [dlarge, lonely] = max(d);
428 // from = idx(lonely); % taking from this cluster
429 // if m(from) < 2
430 // % In the very unusual event that the cluster had only
431 // % one member, pick any other non-singleton point.
432 // from = find(m>1,1,'first');
433 // lonely = find(idx==from,1,'first');
434 // end
435 // C(i,:) = X(lonely,:);
436 // m(i) = 1;
437 // idx(lonely) = i;
438 // D(:,i) = distfun(X, C(i,:), distance, iter);
439
440 // % Update clusters from which points are taken
441 // [C(from,:), m(from)] = gcentroids(X, idx, from, distance);
442 // D(:,from) = distfun(X, C(from,:), distance, iter);
443 // changed = unique([changed from]);
444 // end
445 }
446 }
447
448 // Compute the total sum of distances for the current configuration.
449 totsumD = 0;
450 for(qint32 i = 0; i < n; ++i)
451 totsumD += D.array()(idx[i]*n+i);//Colum Major
452 // Test for a cycle: if objective is not decreased, back out
453 // the last step and move on to the single update phase
454 if(prevtotsumD <= totsumD)
455 {
456 idx = previdx;
457 MatrixXd C_new;
458 VectorXi m_new;
459 gcentroids(X, idx, changed, C_new, m_new);
460 C.block(0,0,k,C.cols()) = C_new;
461 m.block(0,0,k,1) = m_new;
462 --iter;
463 break;
464 }
465
466// printf("%6d\t%6d\t%8d\t%12g\n",iter,1,moved.rows(),totsumD);
467 if (iter >= m_iMaxit)
468 break;
469
470 // Determine closest cluster for each point and reassign points to clusters
471 previdx = idx;
472 prevtotsumD = totsumD;
473
474 VectorXi nidx(D.rows());
475 for(qint32 i = 0; i < D.rows(); ++i)
476 d[i] = D.row(i).minCoeff(&nidx[i]);
477
478 // Determine which points moved
479 VectorXi moved = VectorXi::Zero(nidx.rows());
480 qint32 count = 0;
481 for(qint32 i = 0; i < nidx.rows(); ++i)
482 {
483 if(nidx[i] != previdx[i])
484 {
485 moved[count] = i;
486 ++count;
487 }
488 }
489 moved.conservativeResize(count);
490
491 if (moved.rows() > 0)
492 {
493 // Resolve ties in favor of not moving
494 VectorXi moved_new = VectorXi::Zero(moved.rows());
495 count = 0;
496 for(qint32 i = 0; i < moved.rows(); ++i)
497 {
498 if(D.array()(previdx[moved[i]] * n + moved[i]) > d[moved[i]])
499 {
500 moved_new[count] = moved[i];
501 ++count;
502 }
503 }
504 moved_new.conservativeResize(count);
505 moved = moved_new;
506 }
507
508 if (moved.rows() == 0)
509 {
510 converged = true;
511 break;
512 }
513
514 for(qint32 i = 0; i < moved.rows(); ++i)
515 if(moved[i] >= 0)
516 idx[ moved[i] ] = nidx[ moved[i] ];
517
518 // Find clusters that gained or lost members
519 std::vector<int> tmp;
520 for(qint32 i = 0; i < moved.rows(); ++i)
521 tmp.push_back(idx[moved[i]]);
522 for(qint32 i = 0; i < moved.rows(); ++i)
523 tmp.push_back(previdx[moved[i]]);
524
525 std::sort(tmp.begin(),tmp.end());
526
527 std::vector<int>::iterator it;
528 it = std::unique(tmp.begin(),tmp.end());
529 tmp.resize( it - tmp.begin() );
530
531 changed.conservativeResize(tmp.size());
532
533 for(quint32 i = 0; i < tmp.size(); ++i)
534 changed[i] = tmp[i];
535 } // phase one
536 return converged;
537} // nested function
538
539//=============================================================================================================
540
541bool KMeans::onlineUpdate(const MatrixXd& X, MatrixXd& C, VectorXi& idx)
542{
543 // Initialize some cluster information prior to phase two
544 MatrixXd Xmid1;
545 MatrixXd Xmid2;
546 if (m_sDistance.compare("cityblock") == 0)
547 {
548 Xmid1 = MatrixXd::Zero(k,p);
549 Xmid2 = MatrixXd::Zero(k,p);
550 for(qint32 i = 0; i < k; ++i)
551 {
552 if (m[i] > 0)
553 {
554 // Separate out sorted coords for points in i'th cluster,
555 // and save values above and below median, component-wise
556 MatrixXd Xsorted(m[i],p);
557 qint32 c = 0;
558 for(qint32 j = 0; j < idx.rows(); ++j)
559 {
560 if(idx[j] == i)
561 {
562 Xsorted.row(c) = X.row(j);
563 ++c;
564 }
565 }
566 for(qint32 j = 0; j < Xsorted.cols(); ++j)
567 std::sort(Xsorted.col(j).data(),Xsorted.col(j).data()+Xsorted.rows());
568
569 qint32 nn = floor(0.5*m[i])-1;
570 if ((m[i] % 2) == 0)
571 {
572 Xmid1.row(i) = Xsorted.row(nn);
573 Xmid2.row(i) = Xsorted.row(nn+1);
574 }
575 else if (m[i] > 1)
576 {
577 Xmid1.row(i) = Xsorted.row(nn);
578 Xmid2.row(i) = Xsorted.row(nn+2);
579 }
580 else
581 {
582 Xmid1.row(i) = Xsorted.row(0);
583 Xmid2.row(i) = Xsorted.row(0);
584 }
585 }
586 }
587 }
588 else if (m_sDistance.compare("hamming") == 0)
589 {
590// Xsum = zeros(k,p);
591// for i = 1:k
592// if m(i) > 0
593// % Sum coords for points in i'th cluster, component-wise
594// Xsum(i,:) = sum(X(idx==i,:), 1);
595// end
596// end
597 }
598
599 //
600 // Begin phase two: single reassignments
601 //
602 VectorXi changed = VectorXi(m.rows());
603 qint32 count = 0;
604 for(qint32 i = 0; i < m.rows(); ++i)
605 {
606 if(m[i] > 0)
607 {
608 changed[count] = i;
609 ++count;
610 }
611 }
612 changed.conservativeResize(count);
613
614 qint32 lastmoved = 0;
615 qint32 nummoved = 0;
616 qint32 iter1 = iter;
617 bool converged = false;
618 while (iter < m_iMaxit)
619 {
620 // Calculate distances to each cluster from each point, and the
621 // potential change in total sum of errors for adding or removing
622 // each point from each cluster. Clusters that have not changed
623 // membership need not be updated.
624 //
625 // Singleton clusters are a special case for the sum of dists
626 // calculation. Removing their only point is never best, so the
627 // reassignment criterion had better guarantee that a singleton
628 // point will stay in its own cluster. Happily, we get
629 // Del(i,idx(i)) == 0 automatically for them.
630
631 if (m_sDistance.compare("sqeuclidean") == 0)
632 {
633 for(qint32 j = 0; j < changed.rows(); ++j)
634 {
635 qint32 i = changed[j];
636 VectorXi mbrs = VectorXi::Zero(idx.rows());
637 for(qint32 l = 0; l < idx.rows(); ++l)
638 if(idx[l] == i)
639 mbrs[l] = 1;
640
641 VectorXi sgn = 1 - 2 * mbrs.array(); // -1 for members, 1 for nonmembers
642
643 if (m[i] == 1)
644 for(qint32 l = 0; l < mbrs.rows(); ++l)
645 if(mbrs[l])
646 sgn[l] = 0; // prevent divide-by-zero for singleton mbrs
647
648 Del.col(i) = ((double)m[i] / ((double)m[i] + sgn.cast<double>().array()));
649
650 Del.col(i).array() *= (X - C.row(i).replicate(n,1)).array().pow(2).rowwise().sum().array();
651 }
652 }
653 else if (m_sDistance.compare("cityblock") == 0)
654 {
655 for(qint32 j = 0; j < changed.rows(); ++j)
656 {
657 qint32 i = changed[j];
658 if (m(i) % 2 == 0) // this will never catch singleton clusters
659 {
660 MatrixXd ldist = Xmid1.row(i).replicate(n,1) - X;
661 MatrixXd rdist = X - Xmid2.row(i).replicate(n,1);
662 VectorXd mbrs = VectorXd::Zero(idx.rows());
663
664 for(qint32 l = 0; l < idx.rows(); ++l)
665 if(idx[l] == i)
666 mbrs[l] = 1;
667 MatrixXd sgn = ((-2*mbrs).array() + 1).replicate(1, p); // -1 for members, 1 for nonmembers
668 rdist = sgn.array()*rdist.array(); ldist = sgn.array()*ldist.array();
669
670 for(qint32 l = 0; l < idx.rows(); ++l)
671 {
672 double sum = 0;
673 for(qint32 h = 0; h < rdist.cols(); ++h)
674 sum += rdist(l,h) > ldist(l,h) ? rdist(l,h) < 0 ? 0 : rdist(l,h) : ldist(l,h) < 0 ? 0 : ldist(l,h);
675 Del(l,i) = sum;
676 }
677 }
678 else
679 Del.col(i) = ((X - C.row(i).replicate(n,1)).array().abs()).rowwise().sum();
680 }
681 }
682 else if (m_sDistance.compare("cosine") == 0 || m_sDistance.compare("correlation") == 0)
683 {
684 // The points are normalized, centroids are not, so normalize them
685 MatrixXd normC = C.array().pow(2).rowwise().sum().sqrt();
686// if any(normC < eps(class(normC))) % small relative to unit-length data points
687// error('Zero cluster centroid created at iteration %d during replicate %d.',iter, rep);
688// end
689 // This can be done without a loop, but the loop saves memory allocations
690 MatrixXd XCi;
691 qint32 i;
692 for(qint32 j = 0; j < changed.rows(); ++j)
693 {
694 i = changed[j];
695 XCi = X * C.row(i).transpose();
696
697 VectorXi mbrs = VectorXi::Zero(idx.rows());
698 for(qint32 l = 0; l < idx.rows(); ++l)
699 if(idx[l] == i)
700 mbrs[l] = 1;
701
702 VectorXi sgn = 1 - 2 * mbrs.array(); // -1 for members, 1 for nonmembers
703
704 double A = (double)m[i] * normC(i,0);
705 double B = pow(((double)m[i] * normC(i,0)),2);
706
707 Del.col(i) = 1 + sgn.cast<double>().array()*
708 (A - (B + 2 * sgn.cast<double>().array() * m[i] * XCi.array() + 1).sqrt());
709
710 std::cout << "Del.col(i)\n" << Del.col(i) << std::endl;
711
712// Del(:,i) = 1 + sgn .*...
713// (m(i).*normC(i) - sqrt((m(i).*normC(i)).^2 + 2.*sgn.*m(i).*XCi + 1));
714 }
715 }
716 else if (m_sDistance.compare("hamming") == 0)
717 {
718// for i = changed
719// if mod(m(i),2) == 0 % this will never catch singleton clusters
720// % coords with an unequal number of 0s and 1s have a
721// % different contribution than coords with an equal
722// % number
723// unequal01 = find(2*Xsum(i,:) ~= m(i));
724// numequal01 = p - length(unequal01);
725// mbrs = (idx == i);
726// Di = abs(X(:,unequal01) - C(repmat(i,n,1),unequal01));
727// Del(:,i) = (sum(Di, 2) + mbrs*numequal01) / p;
728// else
729// Del(:,i) = sum(abs(X - C(repmat(i,n,1),:)), 2) / p;
730// end
731// end
732 }
733
734 // Determine best possible move, if any, for each point. Next we
735 // will pick one from those that actually did move.
736 previdx = idx;
737 prevtotsumD = totsumD;
738
739 VectorXi nidx = VectorXi::Zero(Del.rows());
740 VectorXd minDel = VectorXd::Zero(Del.rows());
741
742 for(qint32 i = 0; i < Del.rows(); ++i)
743 minDel[i] = Del.row(i).minCoeff(&nidx[i]);
744
745 VectorXi moved = VectorXi::Zero(previdx.rows());
746 qint32 count = 0;
747 for(qint32 i = 0; i < moved.rows(); ++i)
748 {
749 if(previdx[i] != nidx[i])
750 {
751 moved[count] = i;
752 ++count;
753 }
754 }
755 moved.conservativeResize(count);
756
757 if (moved.sum() > 0)
758 {
759 // Resolve ties in favor of not moving
760 VectorXi moved_new = VectorXi::Zero(moved.rows());
761 count = 0;
762 for(qint32 i = 0; i < moved.rows(); ++i)
763 {
764 if ( Del.array()(previdx[moved(i)]*n + moved(i)) > minDel(moved(i)))
765 {
766 moved_new[count] = moved[i];
767 ++count;
768 }
769 }
770 moved_new.conservativeResize(count);
771 moved = moved_new;
772 }
773
774 if (moved.rows() <= 0)
775 {
776 // Count an iteration if phase 2 did nothing at all, or if we're
777 // in the middle of a pass through all the points
778 if ((iter == iter1) || nummoved > 0)
779 {
780 ++iter;
781
782// printf("%6d\t%6d\t%8d\t%12g\n",iter,2,nummoved,totsumD);
783 }
784 converged = true;
785 break;
786 }
787
788 // Pick the next move in cyclic order
789 VectorXi moved_new(moved.rows());
790 for(qint32 i = 0; i < moved.rows(); ++i)
791 moved_new[i] = ((moved[i] - lastmoved) % n) + lastmoved;
792
793 moved[0] = moved_new.minCoeff() % n;//+1
794 moved.conservativeResize(1);
795
796 // If we've gone once through all the points, that's an iteration
797 if (moved[0] <= lastmoved)
798 {
799 ++iter;
800// printf("%6d\t%6d\t%8d\t%12g\n",iter,2,nummoved,totsumD);
801 if(iter >= m_iMaxit)
802 break;
803 nummoved = 0;
804 }
805 ++nummoved;
806 lastmoved = moved[0];
807
808 qint32 oidx = idx(moved[0]);
809 nidx[0] = nidx(moved[0]);
810 nidx.conservativeResize(1);
811 totsumD += Del(moved[0],nidx[0]) - Del(moved[0],oidx);
812
813 // Update the cluster index vector, and the old and new cluster
814 // counts and centroids
815 idx[ moved[0] ] = nidx[0];
816 m( nidx[0] ) = m( nidx[0] ) + 1;
817 m( oidx ) = m( oidx ) - 1;
818
819 if (m_sDistance.compare("sqeuclidean") == 0)
820 {
821 C.row(nidx[0]) = C.row(nidx[0]).array() + (X.row(moved[0]) - C.row(nidx[0])).array() / m[nidx[0]];
822 C.row(oidx) = C.row(oidx).array() - (X.row(moved[0]) - C.row(oidx)).array() / m[oidx];
823 }
824 else if (m_sDistance.compare("cityblock") == 0)
825 {
826 VectorXi onidx(2);
827 onidx << oidx, nidx[0];//ToDo always right?
828
829 qint32 i;
830 for(qint32 h = 0; h < 2; ++h)
831 {
832 i = onidx[h];
833 // Separate out sorted coords for points in each cluster.
834 // New centroid is the coord median, save values above and
835 // below median. All done component-wise.
836 MatrixXd Xsorted(m[i],p);
837 qint32 c = 0;
838 for(qint32 j = 0; j < idx.rows(); ++j)
839 {
840 if(idx[j] == i)
841 {
842 Xsorted.row(c) = X.row(j);
843 ++c;
844 }
845 }
846 for(qint32 j = 0; j < Xsorted.cols(); ++j)
847 std::sort(Xsorted.col(j).data(),Xsorted.col(j).data()+Xsorted.rows());
848
849 qint32 nn = floor(0.5*m[i])-1;
850 if ((m[i] % 2) == 0)
851 {
852 C.row(i) = 0.5 * (Xsorted.row(nn) + Xsorted.row(nn+1));
853 Xmid1.row(i) = Xsorted.row(nn);
854 Xmid2.row(i) = Xsorted.row(nn+1);
855 }
856 else
857 {
858 C.row(i) = Xsorted.row(nn+1);
859 if (m(i) > 1)
860 {
861 Xmid1.row(i) = Xsorted.row(nn);
862 Xmid2.row(i) = Xsorted.row(nn+2);
863 }
864 else
865 {
866 Xmid1.row(i) = Xsorted.row(0);
867 Xmid2.row(i) = Xsorted.row(0);
868 }
869 }
870 }
871 }
872 else if (m_sDistance.compare("cosine") == 0 || m_sDistance.compare("correlation") == 0)
873 {
874 C.row(nidx[0]).array() += (X.row(moved[0]) - C.row(nidx[0])).array() / m[nidx[0]];
875 C.row(oidx).array() += (X.row(moved[0]) - C.row(oidx)).array() / m[oidx];
876 }
877 else if (m_sDistance.compare("hamming") == 0)
878 {
879// % Update summed coords for points in each cluster. New
880// % centroid is the coord median. All done component-wise.
881// Xsum(nidx,:) = Xsum(nidx,:) + X(moved,:);
882// Xsum(oidx,:) = Xsum(oidx,:) - X(moved,:);
883// C(nidx,:) = .5*sign(2*Xsum(nidx,:) - m(nidx)) + .5;
884// C(oidx,:) = .5*sign(2*Xsum(oidx,:) - m(oidx)) + .5;
885 }
886
887 VectorXi sorted_onidx(1+nidx.rows());
888 sorted_onidx << oidx, nidx;
889 std::sort(sorted_onidx.data(), sorted_onidx.data()+sorted_onidx.rows());
890 changed = sorted_onidx;
891 } // phase two
892
893 return converged;
894} // nested function
895
896//=============================================================================================================
897//DISTFUN Calculate point to cluster centroid distances.
898MatrixXd KMeans::distfun(const MatrixXd& X, MatrixXd& C)//, qint32 iter)
899{
900 MatrixXd D = MatrixXd::Zero(n,C.rows());
901 qint32 nclusts = C.rows();
902
903 if (m_sDistance.compare("sqeuclidean") == 0)
904 {
905 for(qint32 i = 0; i < nclusts; ++i)
906 {
907 D.col(i) = (X.col(0).array() - C(i,0)).pow(2);
908
909 for(qint32 j = 1; j < p; ++j)
910 D.col(i) = D.col(i).array() + (X.col(j).array() - C(i,j)).pow(2);
911 }
912 }
913 else if (m_sDistance.compare("cityblock") == 0)
914 {
915 for(qint32 i = 0; i < nclusts; ++i)
916 {
917 D.col(i) = (X.col(0).array() - C(i,0)).array().abs();
918 for(qint32 j = 1; j < p; ++j)
919 {
920 D.col(i).array() += (X.col(j).array() - C(i,j)).array().abs();
921 }
922 }
923 }
924 else if (m_sDistance.compare("cosine") == 0 || m_sDistance.compare("correlation") == 0)
925 {
926 // The points are normalized, centroids are not, so normalize them
927 MatrixXd normC = C.array().pow(2).rowwise().sum().sqrt();
928// if any(normC < eps(class(normC))) % small relative to unit-length data points
929// error('Zero cluster centroid created at iteration %d.',iter);
930 for (qint32 i = 0; i < nclusts; ++i)
931 {
932 MatrixXd C_tmp = (C.row(i).array() / normC(i,0)).transpose();
933 D.col(i) = X * C_tmp;//max(1 - X * (C(i,:)./normC(i))', 0);
934 for(qint32 j = 0; j < D.rows(); ++j)
935 if(D(j,i) < 0)
936 D(j,i) = 0;
937 }
938 }
939//case 'hamming'
940// for i = 1:nclusts
941// D(:,i) = abs(X(:,1) - C(i,1));
942// for j = 2:p
943// D(:,i) = D(:,i) + abs(X(:,j) - C(i,j));
944// end
945// D(:,i) = D(:,i) / p;
946// % D(:,i) = sum(abs(X - C(repmat(i,n,1),:)), 2) / p;
947// end
948//end
949 return D;
950} // function
951
952//=============================================================================================================
953//GCENTROIDS Centroids and counts stratified by group.
954void KMeans::gcentroids(const MatrixXd& X, const VectorXi& index, const VectorXi& clusts,
955 MatrixXd& centroids, VectorXi& counts)
956{
957 qint32 num = clusts.rows();
958 centroids = MatrixXd::Zero(num,p);
959 centroids.fill(std::numeric_limits<double>::quiet_NaN());
960 counts = VectorXi::Zero(num);
961
962 VectorXi members;
963
964 qint32 c;
965
966 for(qint32 i = 0; i < num; ++i)
967 {
968 c = 0;
969 members = VectorXi::Zero(index.rows());
970 for(qint32 j = 0; j < index.rows(); ++j)
971 {
972 if(index[j] == clusts[i])
973 {
974 members[c] = j;
975 ++c;
976 }
977 }
978 members.conservativeResize(c);
979 if (c > 0)
980 {
981 counts[i] = c;
982 if(m_sDistance.compare("sqeuclidean") == 0)
983 {
984 //Initialize
985 if(members.rows() > 0)
986 centroids.row(i) = RowVectorXd::Zero(centroids.cols());
987
988 for(qint32 j = 0; j < members.rows(); ++j)
989 centroids.row(i).array() += X.row(members[j]).array() / counts[i];
990 }
991 else if(m_sDistance.compare("cityblock") == 0)
992 {
993 // Separate out sorted coords for points in i'th cluster,
994 // and use to compute a fast median, component-wise
995 MatrixXd Xsorted(counts[i],p);
996 c = 0;
997
998 for(qint32 j = 0; j < index.rows(); ++j)
999 {
1000 if(index[j] == clusts[i])
1001 {
1002 Xsorted.row(c) = X.row(j);
1003 ++c;
1004 }
1005 }
1006
1007 for(qint32 j = 0; j < Xsorted.cols(); ++j)
1008 std::sort(Xsorted.col(j).data(),Xsorted.col(j).data()+Xsorted.rows());
1009
1010 qint32 nn = floor(0.5*(counts(i)))-1;
1011 if (counts[i] % 2 == 0)
1012 centroids.row(i) = .5 * (Xsorted.row(nn) + Xsorted.row(nn+1));
1013 else
1014 centroids.row(i) = Xsorted.row(nn+1);
1015 }
1016 else if(m_sDistance.compare("cosine") == 0 || m_sDistance.compare("correlation") == 0)
1017 {
1018 for(qint32 j = 0; j < members.rows(); ++j)
1019 centroids.row(i).array() += X.row(members[j]).array() / counts[i]; // unnormalized
1020 }
1021// else if(m_sDistance.compare("hamming") == 0)
1022// {
1023// % Compute a fast median for binary data, component-wise
1024// centroids(i,:) = .5*sign(2*sum(X(members,:), 1) - counts(i)) + .5;
1025// }
1026 }
1027 }
1028}// function
1029
1030//=============================================================================================================
1031
1032double KMeans::unifrnd(double a, double b)
1033{
1034 if (a > b)
1035 return std::numeric_limits<double>::quiet_NaN();
1036
1037 double a2 = a/2.0;
1038 double b2 = b/2.0;
1039 double mu = a2+b2;
1040 double sig = b2-a2;
1041
1042 double r = mu + sig * (2.0* (rand() % 1000)/1000 -1.0);
1043
1044 return r;
1045}
int k
Definition fiff_tag.cpp:324
KMeans class declaration.
bool calculate(Eigen::MatrixXd X, qint32 kClusters, Eigen::VectorXi &idx, Eigen::MatrixXd &C, Eigen::VectorXd &sumD, Eigen::MatrixXd &D)
Definition kmeans.cpp:120
KMeans(QString distance=QString("sqeuclidean"), QString start=QString("sample"), qint32 replicates=1, QString emptyact=QString("error"), bool online=true, qint32 maxit=100)
Definition kmeans.cpp:66