71: m_distance(distanceFromString(distance.toStdString()))
72, m_start(startFromString(start.toStdString()))
73, m_emptyact(emptyactFromString(emptyact.toStdString()))
74, m_iReps(std::max(replicates, qint32(1)))
77, m_rng(std::random_device{}())
99, m_iReps(std::max(replicates, qint32(1)))
102, m_rng(std::random_device{}())
124KMeansStart KMeans::startFromString(
const std::string& name)
159 X.array() -= (
X.rowwise().sum().array() /
static_cast<double>(p)).replicate(1, p);
160 MatrixXd Xnorm =
X.array().pow(2).rowwise().sum().sqrt();
161 X.array() /= Xnorm.replicate(1, p).array();
165 RowVectorXd Xmins, Xmaxs;
170 qWarning(
"KMeans: Uniform initialization is not supported for Hamming distance.");
173 Xmins =
X.colwise().minCoeff();
174 Xmaxs =
X.colwise().maxCoeff();
180 Del = MatrixXd::Constant(n, k, std::numeric_limits<double>::quiet_NaN());
183 double totsumDBest = std::numeric_limits<double>::max();
191 std::uniform_int_distribution<qint32> sampleDist(0, n - 1);
193 for (qint32 rep = 0; rep < m_iReps; ++rep)
198 C = MatrixXd::Zero(k, p);
199 for (qint32 i = 0; i < k; ++i)
201 for (qint32 j = 0; j < p; ++j)
203 std::uniform_real_distribution<double> dist(Xmins[j], Xmaxs[j]);
204 C(i, j) = dist(m_rng);
208 C.array() -= (C.array().rowwise().sum() / p).replicate(1, p).array();
212 C = MatrixXd::Zero(k, p);
213 for (qint32 i = 0; i < k; ++i)
214 C.row(i) =
X.row(sampleDist(m_rng));
219 idx = VectorXi::Zero(n);
220 d = VectorXd::Zero(n);
222 for (qint32 i = 0; i < n; ++i)
223 d[i] = D.row(i).minCoeff(&idx[i]);
225 m = VectorXi::Zero(k);
226 for (qint32 i = 0; i < n; ++i)
232 bool converged = batchUpdate(
X, C, idx);
236 converged = onlineUpdate(
X, C, idx);
239 qWarning(
"KMeans: Failed to converge during replicate %d.", rep);
242 VectorXi nonempties = (m.array() > 0).cast<
int>();
243 qint32 count = nonempties.sum();
245 MatrixXd C_tmp(count, C.cols());
247 for (qint32 i = 0; i < k; ++i)
249 C_tmp.row(ci++) = C.row(i);
251 MatrixXd D_tmp = distfun(
X, C_tmp);
253 for (qint32 i = 0; i < k; ++i)
257 D.col(i) = D_tmp.col(ci);
258 C.row(i) = C_tmp.row(ci);
264 d = VectorXd::Zero(n);
265 for (qint32 i = 0; i < n; ++i)
269 sumD = VectorXd::Zero(k);
270 for (qint32 i = 0; i < n; ++i)
271 sumD[idx[i]] += d[i];
273 totsumD = sumD.sum();
276 if (totsumD < totsumDBest)
278 totsumDBest = totsumD;
291 if (emptyErrCnt == m_iReps)
306bool KMeans::batchUpdate(
const MatrixXd&
X, MatrixXd& C, VectorXi& idx)
311 for (i = 0; i < n; ++i)
315 for (i = 0; i < k; ++i)
318 previdx = VectorXi::Zero(n);
319 prevtotsumD = std::numeric_limits<double>::max();
321 MatrixXd D = MatrixXd::Zero(n, k);
324 bool converged =
false;
332 gcentroids(
X, idx, changed, C_new, m_new);
333 MatrixXd D_new = distfun(
X, C_new);
335 for (qint32 i = 0; i < changed.rows(); ++i)
337 C.row(changed[i]) = C_new.row(i);
338 D.col(changed[i]) = D_new.col(i);
339 m[changed[i]] = m_new[i];
343 VectorXi empties = VectorXi::Zero(changed.rows());
344 for (qint32 i = 0; i < changed.rows(); ++i)
348 if (empties.sum() > 0)
359 for (qint32 i = 0; i < n; ++i)
360 totsumD += D(i, idx[i]);
363 if (prevtotsumD <= totsumD)
368 gcentroids(
X, idx, changed, C_rev, m_rev);
369 C.block(0, 0, k, C.cols()) = C_rev;
370 m.block(0, 0, k, 1) = m_rev;
375 if (iter >= m_iMaxit)
380 prevtotsumD = totsumD;
383 for (qint32 i = 0; i < n; ++i)
384 d[i] = D.row(i).minCoeff(&nidx[i]);
387 std::vector<int> movedVec;
389 for (qint32 i = 0; i < n; ++i)
391 if (nidx[i] != previdx[i])
392 movedVec.push_back(i);
396 std::vector<int> movedFinal;
397 movedFinal.reserve(movedVec.size());
398 for (
int mi : movedVec)
400 if (D(mi, previdx[mi]) > d[mi])
401 movedFinal.push_back(mi);
404 if (movedFinal.empty())
410 for (
int mi : movedFinal)
414 std::vector<int> tmp;
415 tmp.reserve(2 * movedFinal.size());
416 for (
int mi : movedFinal)
418 tmp.push_back(idx[mi]);
419 tmp.push_back(previdx[mi]);
421 std::sort(tmp.begin(), tmp.end());
422 tmp.erase(std::unique(tmp.begin(), tmp.end()), tmp.end());
424 changed.resize(tmp.size());
425 for (
size_t i = 0; i < tmp.size(); ++i)
433bool KMeans::onlineUpdate(
const MatrixXd&
X, MatrixXd& C, VectorXi& idx)
436 MatrixXd Xmid1, Xmid2;
439 Xmid1 = MatrixXd::Zero(k, p);
440 Xmid2 = MatrixXd::Zero(k, p);
441 for (qint32 i = 0; i < k; ++i)
445 MatrixXd Xsorted(m[i], p);
447 for (qint32 j = 0; j < n; ++j)
449 Xsorted.row(c++) =
X.row(j);
451 for (qint32 j = 0; j < p; ++j)
452 std::sort(Xsorted.col(j).data(), Xsorted.col(j).data() + Xsorted.rows());
454 qint32 nn =
static_cast<qint32
>(std::floor(0.5 * m[i])) - 1;
457 Xmid1.row(i) = Xsorted.row(nn);
458 Xmid2.row(i) = Xsorted.row(nn + 1);
462 Xmid1.row(i) = Xsorted.row(nn);
463 Xmid2.row(i) = Xsorted.row(nn + 2);
467 Xmid1.row(i) = Xsorted.row(0);
468 Xmid2.row(i) = Xsorted.row(0);
475 VectorXi changed(m.rows());
477 for (qint32 i = 0; i < m.rows(); ++i)
479 changed[count++] = i;
480 changed.conservativeResize(count);
482 qint32 lastmoved = 0;
485 bool converged =
false;
487 while (iter < m_iMaxit)
492 for (qint32 j = 0; j < changed.rows(); ++j)
494 qint32 i = changed[j];
495 VectorXi mbrs = VectorXi::Zero(n);
496 for (qint32 l = 0; l < n; ++l)
500 VectorXi sgn = 1 - 2 * mbrs.array();
502 for (qint32 l = 0; l < n; ++l)
506 Del.col(i) = (
static_cast<double>(m[i]) / (
static_cast<double>(m[i]) + sgn.cast<
double>().array()));
507 Del.col(i).array() *= (
X.rowwise() - C.row(i)).array().pow(2).rowwise().sum().array();
512 for (qint32 j = 0; j < changed.rows(); ++j)
514 qint32 i = changed[j];
517 MatrixXd ldist = Xmid1.row(i).replicate(n, 1) -
X;
518 MatrixXd rdist =
X - Xmid2.row(i).replicate(n, 1);
519 VectorXd mbrs = VectorXd::Zero(n);
520 for (qint32 l = 0; l < n; ++l)
523 MatrixXd sgn = ((-2 * mbrs).array() + 1).replicate(1, p);
524 rdist = sgn.array() * rdist.array();
525 ldist = sgn.array() * ldist.array();
527 for (qint32 l = 0; l < n; ++l)
530 for (qint32 h = 0; h < p; ++h)
531 sum += std::max(0.0, std::max(rdist(l, h), ldist(l, h)));
537 Del.col(i) = (
X.rowwise() - C.row(i)).array().abs().rowwise().sum();
543 MatrixXd normC = C.array().pow(2).rowwise().sum().sqrt();
544 for (qint32 j = 0; j < changed.rows(); ++j)
546 qint32 i = changed[j];
547 MatrixXd XCi =
X * C.row(i).transpose();
549 VectorXi mbrs = VectorXi::Zero(n);
550 for (qint32 l = 0; l < n; ++l)
554 VectorXi sgn = 1 - 2 * mbrs.array();
555 double A =
static_cast<double>(m[i]) * normC(i, 0);
558 Del.col(i) = 1 + sgn.cast<
double>().array() *
559 (A - (B + 2 * sgn.cast<
double>().array() * m[i] * XCi.array() + 1).sqrt());
566 prevtotsumD = totsumD;
568 VectorXi nidx = VectorXi::Zero(n);
569 VectorXd minDel = VectorXd::Zero(n);
570 for (qint32 i = 0; i < n; ++i)
571 minDel[i] = Del.row(i).minCoeff(&nidx[i]);
574 std::vector<int> movedVec;
576 for (qint32 i = 0; i < n; ++i)
577 if (previdx[i] != nidx[i])
578 movedVec.push_back(i);
581 std::vector<int> movedFinal;
582 movedFinal.reserve(movedVec.size());
583 for (
int mi : movedVec)
584 if (Del(mi, previdx[mi]) > minDel(mi))
585 movedFinal.push_back(mi);
587 if (movedFinal.empty())
589 if ((iter == iter1) || nummoved > 0)
596 int bestMoved = movedFinal[0];
597 int bestDist = ((movedFinal[0] - lastmoved) % n + n) % n;
598 for (
size_t i = 1; i < movedFinal.size(); ++i)
600 int d_i = ((movedFinal[i] - lastmoved) % n + n) % n;
604 bestMoved = movedFinal[i];
607 int movedPt = bestMoved;
609 if (movedPt <= lastmoved)
612 if (iter >= m_iMaxit)
619 qint32 oidx = idx[movedPt];
620 qint32 nidx_pt = nidx[movedPt];
621 totsumD += Del(movedPt, nidx_pt) - Del(movedPt, oidx);
623 idx[movedPt] = nidx_pt;
630 C.row(nidx_pt) += (
X.row(movedPt) - C.row(nidx_pt)) / m[nidx_pt];
631 C.row(oidx) -= (
X.row(movedPt) - C.row(oidx)) / m[oidx];
636 onidx << oidx, nidx_pt;
638 for (qint32 h = 0; h < 2; ++h)
640 qint32 ci = onidx[h];
641 MatrixXd Xsorted(m[ci], p);
643 for (qint32 j = 0; j < n; ++j)
645 Xsorted.row(c++) =
X.row(j);
647 for (qint32 j = 0; j < p; ++j)
648 std::sort(Xsorted.col(j).data(), Xsorted.col(j).data() + Xsorted.rows());
650 qint32 nn =
static_cast<qint32
>(std::floor(0.5 * m[ci])) - 1;
651 if ((m[ci] % 2) == 0)
653 C.row(ci) = 0.5 * (Xsorted.row(nn) + Xsorted.row(nn + 1));
654 Xmid1.row(ci) = Xsorted.row(nn);
655 Xmid2.row(ci) = Xsorted.row(nn + 1);
659 C.row(ci) = Xsorted.row(nn + 1);
662 Xmid1.row(ci) = Xsorted.row(nn);
663 Xmid2.row(ci) = Xsorted.row(nn + 2);
667 Xmid1.row(ci) = Xsorted.row(0);
668 Xmid2.row(ci) = Xsorted.row(0);
675 C.row(nidx_pt).array() += (
X.row(movedPt) - C.row(nidx_pt)).array() / m[nidx_pt];
676 C.row(oidx).array() += (
X.row(movedPt) - C.row(oidx)).array() / m[oidx];
679 VectorXi sorted_onidx(2);
680 sorted_onidx << oidx, nidx_pt;
681 std::sort(sorted_onidx.data(), sorted_onidx.data() + sorted_onidx.rows());
682 changed = sorted_onidx;
690MatrixXd KMeans::distfun(
const MatrixXd&
X,
const MatrixXd& C)
692 const qint32 nclusts = C.rows();
693 MatrixXd D = MatrixXd::Zero(n, nclusts);
698 for (qint32 i = 0; i < nclusts; ++i)
699 D.col(i) = (
X.rowwise() - C.row(i)).rowwise().squaredNorm();
703 for (qint32 i = 0; i < nclusts; ++i)
704 D.col(i) = (
X.rowwise() - C.row(i)).cwiseAbs().rowwise().sum();
710 VectorXd normC = C.rowwise().norm();
711 for (qint32 i = 0; i < nclusts; ++i)
713 RowVectorXd C_normed = C.row(i) / normC(i);
714 D.col(i) = (1.0 - (
X * C_normed.transpose()).array()).cwiseMax(0.0);
720 for (qint32 i = 0; i < nclusts; ++i)
721 D.col(i) = (
X.rowwise() - C.row(i)).cwiseAbs().rowwise().sum() / p;
730void KMeans::gcentroids(
const MatrixXd&
X,
const VectorXi& index,
const VectorXi& clusts,
731 MatrixXd& centroids, VectorXi& counts)
733 const qint32 num = clusts.rows();
734 centroids = MatrixXd::Constant(num, p, std::numeric_limits<double>::quiet_NaN());
735 counts = VectorXi::Zero(num);
737 for (qint32 i = 0; i < num; ++i)
740 std::vector<int> members;
742 for (qint32 j = 0; j < index.rows(); ++j)
743 if (index[j] == clusts[i])
744 members.push_back(j);
746 counts[i] =
static_cast<qint32
>(members.size());
756 centroids.row(i) = RowVectorXd::Zero(p);
757 for (
int j : members)
758 centroids.row(i) +=
X.row(j);
759 centroids.row(i) /= counts[i];
765 MatrixXd Xsorted(counts[i], p);
767 for (
int j : members)
768 Xsorted.row(c++) =
X.row(j);
770 for (qint32 j = 0; j < p; ++j)
771 std::sort(Xsorted.col(j).data(), Xsorted.col(j).data() + Xsorted.rows());
773 qint32 nn =
static_cast<qint32
>(std::floor(0.5 * counts[i])) - 1;
774 if (counts[i] % 2 == 0)
775 centroids.row(i) = 0.5 * (Xsorted.row(nn) + Xsorted.row(nn + 1));
777 centroids.row(i) = Xsorted.row(nn + 1);
KMeans class declaration.
Shared utilities (I/O helpers, spectral analysis, layout management, warp algorithms).
KMeansDistance
Distance metric for K-Means clustering.
KMeansEmptyAction
Action to take when a K-Means cluster becomes empty.
KMeansStart
Initialization strategy for K-Means clustering.
bool calculate(const Eigen::MatrixXd &X, qint32 kClusters, Eigen::VectorXi &idx, Eigen::MatrixXd &C, Eigen::VectorXd &sumD, Eigen::MatrixXd &D)
KMeans(QString distance=QString("sqeuclidean"), QString start=QString("sample"), qint32 replicates=1, QString emptyact=QString("error"), bool online=true, qint32 maxit=100)