71: m_distance(distanceFromString(distance.toStdString()))
72, m_start(startFromString(start.toStdString()))
73, m_emptyact(emptyactFromString(emptyact.toStdString()))
74, m_iReps(std::max(replicates, qint32(1)))
77, m_rng(std::random_device{}())
99, m_iReps(std::max(replicates, qint32(1)))
102, m_rng(std::random_device{}())
124KMeansStart KMeans::startFromString(
const std::string& name)
160 VectorXd Xnorm =
X.array().pow(2).rowwise().sum().sqrt();
161 for (qint32 i = 0; i < n; ++i)
164 X.row(i) /= Xnorm(i);
170 X.array() -= (
X.rowwise().sum().array() /
static_cast<double>(p)).replicate(1, p);
171 VectorXd Xnorm =
X.array().pow(2).rowwise().sum().sqrt();
172 for (qint32 i = 0; i < n; ++i)
175 X.row(i) /= Xnorm(i);
180 RowVectorXd Xmins, Xmaxs;
185 qWarning(
"KMeans: Uniform initialization is not supported for Hamming distance.");
188 Xmins =
X.colwise().minCoeff();
189 Xmaxs =
X.colwise().maxCoeff();
195 Del = MatrixXd::Constant(n, k, std::numeric_limits<double>::quiet_NaN());
198 double totsumDBest = std::numeric_limits<double>::max();
206 std::uniform_int_distribution<qint32> sampleDist(0, n - 1);
208 for (qint32 rep = 0; rep < m_iReps; ++rep)
213 C = MatrixXd::Zero(k, p);
214 for (qint32 i = 0; i < k; ++i)
216 for (qint32 j = 0; j < p; ++j)
218 std::uniform_real_distribution<double> dist(Xmins[j], Xmaxs[j]);
219 C(i, j) = dist(m_rng);
223 C.array() -= (C.array().rowwise().sum() / p).replicate(1, p).array();
227 C = MatrixXd::Zero(k, p);
228 for (qint32 i = 0; i < k; ++i)
229 C.row(i) =
X.row(sampleDist(m_rng));
234 idx = VectorXi::Zero(n);
235 d = VectorXd::Zero(n);
237 for (qint32 i = 0; i < n; ++i)
238 d[i] = D.row(i).minCoeff(&idx[i]);
240 m = VectorXi::Zero(k);
241 for (qint32 i = 0; i < n; ++i)
247 bool converged = batchUpdate(
X, C, idx);
251 converged = onlineUpdate(
X, C, idx);
254 qWarning(
"KMeans: Failed to converge during replicate %d.", rep);
257 VectorXi nonempties = (m.array() > 0).cast<
int>();
258 qint32 count = nonempties.sum();
260 MatrixXd C_tmp(count, C.cols());
262 for (qint32 i = 0; i < k; ++i)
264 C_tmp.row(ci++) = C.row(i);
266 MatrixXd D_tmp = distfun(
X, C_tmp);
268 for (qint32 i = 0; i < k; ++i)
272 D.col(i) = D_tmp.col(ci);
273 C.row(i) = C_tmp.row(ci);
279 d = VectorXd::Zero(n);
280 for (qint32 i = 0; i < n; ++i)
284 sumD = VectorXd::Zero(k);
285 for (qint32 i = 0; i < n; ++i)
286 sumD[idx[i]] += d[i];
288 totsumD = sumD.sum();
291 if (totsumD < totsumDBest)
293 totsumDBest = totsumD;
306 if (emptyErrCnt == m_iReps)
321bool KMeans::batchUpdate(
const MatrixXd&
X, MatrixXd& C, VectorXi& idx)
326 for (i = 0; i < n; ++i)
330 for (i = 0; i < k; ++i)
333 previdx = VectorXi::Zero(n);
334 prevtotsumD = std::numeric_limits<double>::max();
336 MatrixXd D = MatrixXd::Zero(n, k);
339 bool converged =
false;
347 gcentroids(
X, idx, changed, C_new, m_new);
348 MatrixXd D_new = distfun(
X, C_new);
350 for (qint32 i = 0; i < changed.rows(); ++i)
352 C.row(changed[i]) = C_new.row(i);
353 D.col(changed[i]) = D_new.col(i);
354 m[changed[i]] = m_new[i];
358 VectorXi empties = VectorXi::Zero(changed.rows());
359 for (qint32 i = 0; i < changed.rows(); ++i)
363 if (empties.sum() > 0)
374 for (qint32 i = 0; i < n; ++i)
375 totsumD += D(i, idx[i]);
378 if (prevtotsumD <= totsumD)
383 gcentroids(
X, idx, changed, C_rev, m_rev);
384 C.block(0, 0, k, C.cols()) = C_rev;
385 m.block(0, 0, k, 1) = m_rev;
390 if (iter >= m_iMaxit)
395 prevtotsumD = totsumD;
398 for (qint32 i = 0; i < n; ++i)
399 d[i] = D.row(i).minCoeff(&nidx[i]);
402 std::vector<int> movedVec;
404 for (qint32 i = 0; i < n; ++i)
406 if (nidx[i] != previdx[i])
407 movedVec.push_back(i);
411 std::vector<int> movedFinal;
412 movedFinal.reserve(movedVec.size());
413 for (
int mi : movedVec)
415 if (D(mi, previdx[mi]) > d[mi])
416 movedFinal.push_back(mi);
419 if (movedFinal.empty())
425 for (
int mi : movedFinal)
429 std::vector<int> tmp;
430 tmp.reserve(2 * movedFinal.size());
431 for (
int mi : movedFinal)
433 tmp.push_back(idx[mi]);
434 tmp.push_back(previdx[mi]);
436 std::sort(tmp.begin(), tmp.end());
437 tmp.erase(std::unique(tmp.begin(), tmp.end()), tmp.end());
439 changed.resize(tmp.size());
440 for (
size_t i = 0; i < tmp.size(); ++i)
448bool KMeans::onlineUpdate(
const MatrixXd&
X, MatrixXd& C, VectorXi& idx)
451 MatrixXd Xmid1, Xmid2;
454 Xmid1 = MatrixXd::Zero(k, p);
455 Xmid2 = MatrixXd::Zero(k, p);
456 for (qint32 i = 0; i < k; ++i)
460 MatrixXd Xsorted(m[i], p);
462 for (qint32 j = 0; j < n; ++j)
464 Xsorted.row(c++) =
X.row(j);
466 for (qint32 j = 0; j < p; ++j)
467 std::sort(Xsorted.col(j).data(), Xsorted.col(j).data() + Xsorted.rows());
469 qint32 nn =
static_cast<qint32
>(std::floor(0.5 * m[i])) - 1;
472 Xmid1.row(i) = Xsorted.row(nn);
473 Xmid2.row(i) = Xsorted.row(nn + 1);
477 Xmid1.row(i) = Xsorted.row(nn);
478 Xmid2.row(i) = Xsorted.row(nn + 2);
482 Xmid1.row(i) = Xsorted.row(0);
483 Xmid2.row(i) = Xsorted.row(0);
490 VectorXi changed(m.rows());
492 for (qint32 i = 0; i < m.rows(); ++i)
494 changed[count++] = i;
495 changed.conservativeResize(count);
497 qint32 lastmoved = 0;
500 bool converged =
false;
502 while (iter < m_iMaxit)
507 for (qint32 j = 0; j < changed.rows(); ++j)
509 qint32 i = changed[j];
510 VectorXi mbrs = VectorXi::Zero(n);
511 for (qint32 l = 0; l < n; ++l)
515 VectorXi sgn = 1 - 2 * mbrs.array();
517 for (qint32 l = 0; l < n; ++l)
521 Del.col(i) = (
static_cast<double>(m[i]) / (
static_cast<double>(m[i]) + sgn.cast<
double>().array()));
522 Del.col(i).array() *= (
X.rowwise() - C.row(i)).array().pow(2).rowwise().sum().array();
527 for (qint32 j = 0; j < changed.rows(); ++j)
529 qint32 i = changed[j];
532 MatrixXd ldist = Xmid1.row(i).replicate(n, 1) -
X;
533 MatrixXd rdist =
X - Xmid2.row(i).replicate(n, 1);
534 VectorXd mbrs = VectorXd::Zero(n);
535 for (qint32 l = 0; l < n; ++l)
538 MatrixXd sgn = ((-2 * mbrs).array() + 1).replicate(1, p);
539 rdist = sgn.array() * rdist.array();
540 ldist = sgn.array() * ldist.array();
542 for (qint32 l = 0; l < n; ++l)
545 for (qint32 h = 0; h < p; ++h)
546 sum += std::max(0.0, std::max(rdist(l, h), ldist(l, h)));
552 Del.col(i) = (
X.rowwise() - C.row(i)).array().abs().rowwise().sum();
558 MatrixXd normC = C.array().pow(2).rowwise().sum().sqrt();
559 for (qint32 j = 0; j < changed.rows(); ++j)
561 qint32 i = changed[j];
562 MatrixXd XCi =
X * C.row(i).transpose();
564 VectorXi mbrs = VectorXi::Zero(n);
565 for (qint32 l = 0; l < n; ++l)
569 VectorXi sgn = 1 - 2 * mbrs.array();
570 double A =
static_cast<double>(m[i]) * normC(i, 0);
573 Del.col(i) = 1 + sgn.cast<
double>().array() *
574 (A - (B + 2 * sgn.cast<
double>().array() * m[i] * XCi.array() + 1).sqrt());
581 prevtotsumD = totsumD;
583 VectorXi nidx = VectorXi::Zero(n);
584 VectorXd minDel = VectorXd::Zero(n);
585 for (qint32 i = 0; i < n; ++i)
586 minDel[i] = Del.row(i).minCoeff(&nidx[i]);
589 std::vector<int> movedVec;
591 for (qint32 i = 0; i < n; ++i)
592 if (previdx[i] != nidx[i])
593 movedVec.push_back(i);
596 std::vector<int> movedFinal;
597 movedFinal.reserve(movedVec.size());
598 for (
int mi : movedVec)
599 if (Del(mi, previdx[mi]) > minDel(mi))
600 movedFinal.push_back(mi);
602 if (movedFinal.empty())
604 if ((iter == iter1) || nummoved > 0)
611 int bestMoved = movedFinal[0];
612 int bestDist = ((movedFinal[0] - lastmoved) % n + n) % n;
613 for (
size_t i = 1; i < movedFinal.size(); ++i)
615 int d_i = ((movedFinal[i] - lastmoved) % n + n) % n;
619 bestMoved = movedFinal[i];
622 int movedPt = bestMoved;
624 if (movedPt <= lastmoved)
627 if (iter >= m_iMaxit)
634 qint32 oidx = idx[movedPt];
635 qint32 nidx_pt = nidx[movedPt];
636 totsumD += Del(movedPt, nidx_pt) - Del(movedPt, oidx);
638 idx[movedPt] = nidx_pt;
645 C.row(nidx_pt) += (
X.row(movedPt) - C.row(nidx_pt)) / m[nidx_pt];
646 C.row(oidx) -= (
X.row(movedPt) - C.row(oidx)) / m[oidx];
651 onidx << oidx, nidx_pt;
653 for (qint32 h = 0; h < 2; ++h)
655 qint32 ci = onidx[h];
656 MatrixXd Xsorted(m[ci], p);
658 for (qint32 j = 0; j < n; ++j)
660 Xsorted.row(c++) =
X.row(j);
662 for (qint32 j = 0; j < p; ++j)
663 std::sort(Xsorted.col(j).data(), Xsorted.col(j).data() + Xsorted.rows());
665 qint32 nn =
static_cast<qint32
>(std::floor(0.5 * m[ci])) - 1;
666 if ((m[ci] % 2) == 0)
668 C.row(ci) = 0.5 * (Xsorted.row(nn) + Xsorted.row(nn + 1));
669 Xmid1.row(ci) = Xsorted.row(nn);
670 Xmid2.row(ci) = Xsorted.row(nn + 1);
674 C.row(ci) = Xsorted.row(nn + 1);
677 Xmid1.row(ci) = Xsorted.row(nn);
678 Xmid2.row(ci) = Xsorted.row(nn + 2);
682 Xmid1.row(ci) = Xsorted.row(0);
683 Xmid2.row(ci) = Xsorted.row(0);
690 C.row(nidx_pt).array() += (
X.row(movedPt) - C.row(nidx_pt)).array() / m[nidx_pt];
691 C.row(oidx).array() += (
X.row(movedPt) - C.row(oidx)).array() / m[oidx];
694 VectorXi sorted_onidx(2);
695 sorted_onidx << oidx, nidx_pt;
696 std::sort(sorted_onidx.data(), sorted_onidx.data() + sorted_onidx.rows());
697 changed = sorted_onidx;
705MatrixXd KMeans::distfun(
const MatrixXd&
X,
const MatrixXd& C)
707 const qint32 nclusts = C.rows();
708 MatrixXd D = MatrixXd::Zero(n, nclusts);
713 for (qint32 i = 0; i < nclusts; ++i)
714 D.col(i) = (
X.rowwise() - C.row(i)).rowwise().squaredNorm();
718 for (qint32 i = 0; i < nclusts; ++i)
719 D.col(i) = (
X.rowwise() - C.row(i)).cwiseAbs().rowwise().sum();
725 VectorXd normC = C.rowwise().norm();
726 for (qint32 i = 0; i < nclusts; ++i)
728 RowVectorXd C_normed = C.row(i) / normC(i);
729 D.col(i) = (1.0 - (
X * C_normed.transpose()).array()).cwiseMax(0.0);
735 for (qint32 i = 0; i < nclusts; ++i)
736 D.col(i) = (
X.rowwise() - C.row(i)).cwiseAbs().rowwise().sum() / p;
745void KMeans::gcentroids(
const MatrixXd&
X,
const VectorXi& index,
const VectorXi& clusts,
746 MatrixXd& centroids, VectorXi& counts)
748 const qint32 num = clusts.rows();
749 centroids = MatrixXd::Constant(num, p, std::numeric_limits<double>::quiet_NaN());
750 counts = VectorXi::Zero(num);
752 for (qint32 i = 0; i < num; ++i)
755 std::vector<int> members;
757 for (qint32 j = 0; j < index.rows(); ++j)
758 if (index[j] == clusts[i])
759 members.push_back(j);
761 counts[i] =
static_cast<qint32
>(members.size());
771 centroids.row(i) = RowVectorXd::Zero(p);
772 for (
int j : members)
773 centroids.row(i) +=
X.row(j);
774 centroids.row(i) /= counts[i];
780 MatrixXd Xsorted(counts[i], p);
782 for (
int j : members)
783 Xsorted.row(c++) =
X.row(j);
785 for (qint32 j = 0; j < p; ++j)
786 std::sort(Xsorted.col(j).data(), Xsorted.col(j).data() + Xsorted.rows());
788 qint32 nn =
static_cast<qint32
>(std::floor(0.5 * counts[i])) - 1;
789 if (counts[i] % 2 == 0)
790 centroids.row(i) = 0.5 * (Xsorted.row(nn) + Xsorted.row(nn + 1));
792 centroids.row(i) = Xsorted.row(nn + 1);
KMeans class declaration.
Shared utilities (I/O helpers, spectral analysis, layout management, warp algorithms).
KMeansDistance
Distance metric for K-Means clustering.
KMeansEmptyAction
Action to take when a K-Means cluster becomes empty.
KMeansStart
Initialization strategy for K-Means clustering.
bool calculate(const Eigen::MatrixXd &X, qint32 kClusters, Eigen::VectorXi &idx, Eigen::MatrixXd &C, Eigen::VectorXd &sumD, Eigen::MatrixXd &D)
KMeans(QString distance=QString("sqeuclidean"), QString start=QString("sample"), qint32 replicates=1, QString emptyact=QString("error"), bool online=true, qint32 maxit=100)