Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions expui/KMeans.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,28 @@ namespace MSSA
//
cen.clear();

if (s>0) { // Seed centers by stride
if (s>0) {
// Seed centers by stride
for (int i=0; i<classes.size(); i+=s) {
if (cen.size()>=k) break;
cen.push_back(classes.at(i)->x);
}
k = cen.size();
} else { // obtain a seed from the system clock
} else {
// Obtain a seed from the system clock
unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();

// Make a random generator
std::mt19937 gen(seed);
// Seed centers randomly

// Randomly shuffle a list of indexes
std::vector<int> indx;
for (int i=0; i<classes.size(); i++) indx.push_back(i);
std::shuffle(indx.begin(), indx.end(), gen);

// Seed centers randomly from the initial point list
for (int i=0; i<k; ++i) {
cen.push_back(classes.at(gen() % classes.size())->x);
cen.push_back(classes.at(indx[i])->x);
}
}

Expand Down
28 changes: 27 additions & 1 deletion expui/expMSSA.H
Original file line number Diff line number Diff line change
Expand Up @@ -243,13 +243,39 @@ namespace MSSA
//! Create wcorrlation matricies and output PNG
void wcorrPNG();

//@{
/**
Kmean analysis of the trajectories by PC with fixed cluster size
*/

/** Perform Kmeans analysis for a given number of clusters and
print the results

@param clusters is the maximum number of clusters considered
@param stride is the seed strategy. If positive, it is used to
select initial cluster centers by stride from the PC list. If
it is zero, centers are selected randomly from the PC list
@param toTerm write to stdout if true
@param toFile write to file if true
*/
void kmeans(int clusters, bool toTerm=true, bool toFile=false);
void kmeansPrint(int clusters, int stride,
bool toTerm=true, bool toFile=false);

/** Get Kmeans analysis per channel

@param clusters is the number of clusters to seed
@param key is the channel id vector<int>
*/
std::tuple<std::vector<int>, std::vector<double>, double>
kmeansChannel(Key key, int clusters, int stride);

/** Get Kmeans analysis for all channels

@param clusters is the number of clusters to seed
@param stride is the seeded strategy for initial centers
*/
std::tuple<std::vector<int>, std::vector<double>, double>
kmeans(int clusters, int stride);

//! Save current MSSA state to an HDF5 file with the given prefix
void saveState(const std::string& prefix);
Expand Down
201 changes: 142 additions & 59 deletions expui/expMSSA.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1063,10 +1063,10 @@ namespace MSSA {
#endif
}

void expMSSA::kmeans(int clusters, bool toTerm, bool toFile)
void expMSSA::kmeansPrint(int clusters, int stride, bool toTerm, bool toFile)
{
if (clusters==0) {
std::cout << "expMSSA::kmeans: you need clusters>0" << std::endl;
std::cout << "expMSSA::kmeansPrint: you need clusters>0" << std::endl;
return;
}

Expand All @@ -1087,41 +1087,22 @@ namespace MSSA {
std::cerr << "Error opening file <" << filename << ">" << std::endl;
}

// W-correlation-based distance functor
//
KMeans::WcorrDistance dist(numT, numW);

for (auto u : mean) {
// Pack point array
//
std::vector<KMeans::Ptr> data;
for (int j=0; j<ncomp; j++) {
data.push_back(std::make_shared<KMeans::Point>(numT));
for (int i=0; i<numT; i++) data.back()->x[i] = RC[u.first](i, j);
}

// Initialize k-means routine
//
KMeans::kMeansClustering kMeans(data);

// Run 100 iterations
//
kMeans.iterate(dist, 100, clusters, 2, false);

// Retrieve cluster associations
//
auto results = kMeans.get_results();
auto [id, dd, tol] = kmeansChannel(u.first, clusters, stride);

// Write to file
//
if (out) {
out << std::string(60, '-') << std::endl
<< " *** n=" << u.first << std::endl
<< " *** tol=" << tol << std::endl
<< std::string(60, '-') << std::endl;

for (int j=0; j<results.size(); j++) {
for (int j=0; j<id.size(); j++) {
out << std::setw(6) << j
<< std::setw(12) << std::get<1>(results[j])
<< std::setw(12) << id[j]
<< std::setw(16) << dd[j]
<< std::endl;
}
}
Expand All @@ -1133,51 +1114,31 @@ namespace MSSA {
<< " *** n=" << u.first << std::endl
<< std::string(60, '-') << std::endl;

for (int j=0; j<results.size(); j++) {
std::cout << std::setw(6) << j
<< std::setw(12) << std::get<1>(results[j])
for (int j=0; j<id.size(); j++) {
std::cout << std::setw( 6) << j
<< std::setw( 9) << id[j]
<< std::setw(16) << dd[j]
<< std::endl;
}
}
}

if (params["allchan"]) {

// Pack point array
//
std::vector<KMeans::Ptr> data;
int sz = mean.size();
for (int j=0; j<ncomp; j++) {
data.push_back(std::make_shared<KMeans::Point>(numT*sz));
int c = 0;
for (auto u : mean) {
for (int i=0; i<numT; i++) data.back()->x[c++] = RC[u.first](i, j);
}
}

// Initialize k-means routine
//
KMeans::kMeansClustering kMeans(data);

// Run 100 iterations
//
KMeans::WcorrDistMulti dist2(numT, numW, sz);
kMeans.iterate(dist2, 100, clusters, 2, false);

// Retrieve cluster associations
//
auto results = kMeans.get_results();
auto [id, dd, tol] = kmeans(clusters, stride);

// Write to file
//
if (out) {
out << std::string(60, '-') << std::endl
<< " *** total" << std::endl
<< " *** tol=" << tol << std::endl
<< std::string(60, '-') << std::endl;

for (int j=0; j<results.size(); j++) {
out << std::setw(6) << j
<< std::setw(9) << std::get<1>(results[j])
for (int j=0; j<id.size(); j++) {
out << std::setw( 6) << j
<< std::setw( 9) << id[j]
<< std::setw(16) << dd[j]
<< std::endl;
}
}
Expand All @@ -1189,9 +1150,10 @@ namespace MSSA {
<< " *** total" << std::endl
<< std::string(60, '-') << std::endl;

for (int j=0; j<results.size(); j++) {
std::cout << std::setw(6) << j
<< std::setw(9) << std::get<1>(results[j])
for (int j=0; j<id.size(); j++) {
std::cout << std::setw( 6) << j
<< std::setw( 9) << id[j]
<< std::setw(16) << dd[j]
<< std::endl;
}
}
Expand All @@ -1203,7 +1165,128 @@ namespace MSSA {
std::cout << "Bad output stream for <" << filename << ">" << std::endl;
}
out.close();
}


std::tuple<std::vector<int>, std::vector<double>, double>
expMSSA::kmeansChannel(Key key, int clusters, int stride)
{
if (clusters==0) {
throw std::invalid_argument("expMSSA::kmeansChannel: clusters==0");
}

if (stride<0) {
throw std::invalid_argument("expMSSA::kmeansChannel: stride must be >= 0");
}

if (mean.find(key) == mean.end()) {
std::ostringstream sout;
sout << "expMSSA::kmeansKey: key <" << key << "> not found";
throw std::invalid_argument(sout.str());
}

KMeans::WcorrDistance dist(numT, numW);

// Pack point array
//
std::vector<KMeans::Ptr> data;
for (int j=0; j<ncomp; j++) {
data.push_back(std::make_shared<KMeans::Point>(numT));
for (int i=0; i<numT; i++) data.back()->x[i] = RC[key](i, j);
}

// Initialize k-means routine
//
KMeans::kMeansClustering kMeans(data);

// Run 100 iterations
//
kMeans.iterate(dist, 1000, clusters, stride);

// Retrieve cluster associations
//
auto results = kMeans.get_results();
auto centers = kMeans.get_cen();

// Compute inertia
//
auto inertia = [&](int j, int id) -> double {
auto & cen = centers[id];
double d = 0.0;
for (int i=0; i<cen.size(); i++)
d += (cen[i] - data[j]->x[i])*(cen[i] - data[j]->x[i]);
return sqrt(d);
};

// Pack return vector
//
std::vector<int> retI;
std::vector<double> retD;
for (int j=0; j<results.size(); j++) {
retI.push_back(std::get<1>(results[j]));
retD.push_back(inertia(j, std::get<1>(results[j])));
}

return {retI, retD, kMeans.getTol()};
}

std::tuple<std::vector<int>, std::vector<double>, double>
expMSSA::kmeans(int clusters, int stride)
{
if (clusters==0) {
throw std::invalid_argument("expMSSA::kmeans: you need clusters>0");
}

if (stride<0) {
throw std::invalid_argument("expMSSA::kmeans: stride must be >= 0");
}

// Pack point array
//
std::vector<KMeans::Ptr> data;
int sz = mean.size();
for (int j=0; j<ncomp; j++) {
data.push_back(std::make_shared<KMeans::Point>(numT*sz));
int c = 0;
for (auto u : mean) {
for (int i=0; i<numT; i++) data.back()->x[c++] = RC[u.first](i, j);
}
}

// Initialize k-means routine
//
KMeans::kMeansClustering kMeans(data);

// Run 100 iterations
//
KMeans::WcorrDistMulti dist(numT, numW, sz);
kMeans.iterate(dist, 1000, clusters, stride);

// Retrieve cluster associations
//
auto results = kMeans.get_results();
auto centers = kMeans.get_cen();

// Compute inertia
//
auto inertia = [&](int j, int id) -> double {
auto & cen = centers[id];
double d = 0.0;
for (int i=0; i<cen.size(); i++)
d += (cen[i] - data[j]->x[i])*(cen[i] - data[j]->x[i]);
return sqrt(d);
};

// Pack return vector
//
std::vector<int> retI;
std::vector<double> retD;
for (int j=0; j<results.size(); j++) {
retI.push_back(std::get<1>(results[j]));
retD.push_back(inertia(j, std::get<1>(results[j])));
}

return {retI, retD, kMeans.getTol()};
}

std::map<std::string, CoefClasses::CoefsPtr> expMSSA::getReconstructed(bool reconstructmean)
Expand Down
Loading