diff --git a/README.md b/README.md index 4bcac2c0fc..b2f55e0c39 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ CPPTRAJ see the following publication: Disclaimer and Copyright ======================== -CPPTRAJ is Copyright (c) 2010-2021 Daniel R. Roe. +CPPTRAJ is Copyright (c) 2010-2022 Daniel R. Roe. The terms for using, copying, modifying, and distributing CPPTRAJ are specified in the file LICENSE. @@ -202,6 +202,9 @@ Fixes and improvements to nucleic acid dihedral angle definitions (DihedralSearc * David S. Cerutti (Rutgers University, Piscataway, NJ, USA) Original code for the 'xtalsymm' Action. +* Franz Waibl & Klaus R. Liedl (Department of General, Inorganic, and Theoretical Chemistry, University of Innsbruck) +Improvements and enhancements for GIST. + #### Various Contributions * David A. Case (Rutgers University, Piscataway, NJ, USA) * Hai Nguyen (Rutgers University, Piscataway, NJ, USA) diff --git a/devtools/benchmark/cluster/cluster.sh b/devtools/benchmark/cluster/cluster.sh index d310814bb3..25e01accf3 100755 --- a/devtools/benchmark/cluster/cluster.sh +++ b/devtools/benchmark/cluster/cluster.sh @@ -1,8 +1,10 @@ #!/bin/bash -CPPTRAJ=`which cpptraj` -#export OMP_NUM_THREADS=12 -#CPPTRAJ=`which cpptraj.OMP` +if [ -z "$CPPTRAJ" ] ; then + #CPPTRAJ=`which cpptraj` + export OMP_NUM_THREADS=4 + CPPTRAJ=`which cpptraj.OMP` +fi TOP=~/Cpptraj/Cpptraj-ExtendedTests/ChainA_1-268_NAD_TCL-gaff.tip3p.parm7 TRJ=~/Cpptraj/Cpptraj-ExtendedTests/run9.nc # 2000 frames @@ -52,10 +54,11 @@ EOF ((SPLIT3 = $NFRAMES * 3)) echo "Total $TOTAL, Each section $NFRAMES splits $SPLIT1 $SPLIT2 $SPLIT3" +# kmeans clusters 5 randompoint kseed 42 \ cat >> cpptraj.in <[DBI] Hold final Davies-Bouldin index. +\end_layout + +\begin_layout Description +[PSF] Hold final pseudo-F value. +\end_layout + +\begin_layout Description +[SSRSST] Hold final SSR/SST value. +\end_layout + \begin_layout Description [NCVT] ( \series bold @@ -41861,7 +41873,7 @@ The K-dist plot will be named ..dat, with the default prefix being The K-dist plot usually looks like a curve with an initially steep slope that gradually decreases. Around where the initial part of the curve starts to flatten out (indicating - an increas in density) is around where epsilon should be set; minpoints + an increase in density) is around where epsilon should be set; minpoints is set to whatever was. It has been suggested that the shape of the K-dist curve doesn't change too much after Kdist=4, but users are encouraged to experiment. diff --git a/src/Cluster/Algorithm_HierAgglo.cpp b/src/Cluster/Algorithm_HierAgglo.cpp index 39defa273f..3e86d1597e 100644 --- a/src/Cluster/Algorithm_HierAgglo.cpp +++ b/src/Cluster/Algorithm_HierAgglo.cpp @@ -69,9 +69,9 @@ void Cpptraj::Cluster::Algorithm_HierAgglo::Results(CpptrajFile& outfile) const /** Print clustering timing. */ void Cpptraj::Cluster::Algorithm_HierAgglo::Timing(double total) const { # ifdef TIMER - time_findMin_.WriteTiming(2, "Find min distance", total); - time_mergeFrames_.WriteTiming(2, "Merge cluster frames", total); - time_calcLinkage_.WriteTiming(2, "Calculate new linkage", total); + time_findMin_.WriteTiming(2, "Find min distance :", total); + time_mergeFrames_.WriteTiming(2, "Merge cluster frames :", total); + time_calcLinkage_.WriteTiming(2, "Calculate new linkage :", total); # endif } @@ -132,6 +132,11 @@ int Cpptraj::Cluster::Algorithm_HierAgglo::DoClustering(List& clusters, ClusterDistances_.SetCdist( C1_it->Num(), C2_it->Num(), dval ); } } + // DEBUG print closest indices + if (debug_ > 0) { + if (debug_ > 1) ClusterDistances_.PrintElementsSquare(); + ClusterDistances_.PrintClosest(); + } //InitializeClusterDistances(); // DEBUG - print initial clusters if (debug_ > 1) @@ -149,6 +154,11 @@ int Cpptraj::Cluster::Algorithm_HierAgglo::DoClustering(List& clusters, } if (clusters.Nclusters() == 1) clusteringComplete = true; // Sanity check cluster_progress.Update( iterations++ ); + // DEBUG print closest indices + if (debug_ > 0) { + if (debug_ > 1) ClusterDistances_.PrintElementsSquare(); + ClusterDistances_.PrintClosest(); + } } mprintf("\tCompleted after %i iterations, %u clusters.\n",iterations, clusters.Nclusters()); diff --git a/src/Cluster/CMakeLists.txt b/src/Cluster/CMakeLists.txt index 75edebfd73..5d84ddfba3 100644 --- a/src/Cluster/CMakeLists.txt +++ b/src/Cluster/CMakeLists.txt @@ -12,6 +12,7 @@ set(CLUSTER_SOURCES Cmatrix_Binary.cpp Cmatrix_NC.cpp Control.cpp + DBI.cpp DrawGraph.cpp DynamicMatrix.cpp List.cpp @@ -23,6 +24,7 @@ set(CLUSTER_SOURCES Metric_Torsion.cpp Node.cpp Output.cpp + PseudoF.cpp Results_Coords.cpp Sieve.cpp Silhouette.cpp diff --git a/src/Cluster/Control.cpp b/src/Cluster/Control.cpp index d17003aef4..a2957c7eb2 100644 --- a/src/Cluster/Control.cpp +++ b/src/Cluster/Control.cpp @@ -1,6 +1,8 @@ #include #include "Control.h" +#include "DBI.h" #include "Output.h" +#include "PseudoF.h" #include "../ArgList.h" #include "../BufferedLine.h" // For loading info file #include "../CpptrajStdio.h" @@ -44,7 +46,13 @@ Cpptraj::Cluster::Control::Control() : draw_tol_(0), draw_maxit_(0), debug_(0), - metricContribFile_(0) + metricContribFile_(0), + DBITotal_(0), + pseudoF_(0), + SSRSST_(0), + dbi_set_(0), + psf_set_(0), + ssrsst_set_(0) {} /** DESTRUCTOR */ @@ -498,6 +506,14 @@ int Cpptraj::Cluster::Control::SetupClustering(DataSetList const& setsToCluster, clustersvtimefile->AddDataSet( clustersVtime_ ); } + // DBI and pSF data sets + dbi_set_ = DSL.AddSet(DataSet::DOUBLE, MetaData(dsname_, "DBI")); + if (dbi_set_ == 0) return 1; + psf_set_ = DSL.AddSet(DataSet::DOUBLE, MetaData(dsname_, "PSF")); + if (psf_set_ == 0) return 1; + ssrsst_set_ = DSL.AddSet(DataSet::DOUBLE, MetaData(dsname_, "SSRSST")); + if (ssrsst_set_ == 0) return 1; + return 0; } @@ -803,7 +819,17 @@ int Cpptraj::Cluster::Control::Run() { clusters_.PrintClusters(); } + // Clustering metrics. Centroids should be up to date. + DBITotal_ = ComputeDBI(clusters_, averageDist_, metrics_); + dbi_set_->Add(0, &DBITotal_); + if (clusters_.Nclusters() > 1) { + pseudoF_ = ComputePseudoF(clusters_, SSRSST_, metrics_, debug_); + psf_set_->Add(0, &pseudoF_); + ssrsst_set_->Add(0, &SSRSST_); + } + // TODO assign reference names + timer_post_.Stop(); } timer_run_.Stop(); return 0; @@ -822,12 +848,14 @@ int Cpptraj::Cluster::Control::Output(DataSetList& DSL) { } // Info + if (!suppressInfo_) { CpptrajFile outfile; if (outfile.OpenWrite( clusterinfo_ )) return 1; timer_output_info_.Start(); Output::PrintClustersToFile(outfile, clusters_, *algorithm_, metrics_, - frameSieve_.SieveValue(), frameSieve_.FramesToCluster()); + frameSieve_.SieveValue(), frameSieve_.FramesToCluster(), + DBITotal_, averageDist_, pseudoF_, SSRSST_); timer_output_info_.Stop(); outfile.CloseFile(); } diff --git a/src/Cluster/Control.h b/src/Cluster/Control.h index dc364f5475..557da5a7e1 100644 --- a/src/Cluster/Control.h +++ b/src/Cluster/Control.h @@ -113,6 +113,15 @@ class Control { CpptrajFile* metricContribFile_; ///< If not null, determine how much each metric contributes to total distance + // Various metrics of clustering "goodness" + double DBITotal_; ///< Davies-Bouldin index (DBI) + std::vector averageDist_; ///< Average distance of each cluster to each other cluster (DBI) + double pseudoF_; ///< Pseudo-F (pSF) + double SSRSST_; ///< Sum of squares regression over total sum of squares. + DataSet* dbi_set_; ///< DataSet to store DBI. + DataSet* psf_set_; ///< DataSet to store pseudo-F. + DataSet* ssrsst_set_; ///< DataSet to store SSR/SST. + // Timers Timer timer_setup_; ///< Run - metric, frames to cluster setup Timer timer_pairwise_; ///< Run - pairwise caching diff --git a/src/Cluster/DBI.cpp b/src/Cluster/DBI.cpp new file mode 100644 index 0000000000..e935f7231f --- /dev/null +++ b/src/Cluster/DBI.cpp @@ -0,0 +1,46 @@ +#include +#include +#include +#include "DBI.h" +#include "List.h" +#include "Node.h" +#include "MetricArray.h" + +/** The Davies-Bouldin Index (DBI) measures the average similarity between each + * cluster and its most similar one; the smaller the DBI, the better. The DBI + * is defined as the average, for all clusters X, of fred, where fred(X) = max, + * across other clusters Y, of (Cx + Cy)/dXY. Here Cx is the average distance + * from points in X to the centroid, similarly Cy, and dXY is the distance + * between cluster centroids. + * NOTE: To use this, cluster centroids should be fully up-to-date. + */ +double Cpptraj::Cluster::ComputeDBI(List const& clusters, std::vector& averageDist, MetricArray& metricIn) +{ + averageDist.clear(); + averageDist.reserve( clusters.Nclusters() ); + for (List::cluster_iterator C1 = clusters.begincluster(); C1 != clusters.endcluster(); ++C1) { + // Calculate average distance to centroid for this cluster + averageDist.push_back( C1->CalcAvgToCentroid( metricIn ) ); + //if (outfile.IsOpen()) + // outfile.Printf("#Cluster %i has average-distance-to-centroid %f\n", + // C1->Num(), averageDist.back()); + } + double DBITotal = 0.0; + unsigned int nc1 = 0; + for (List::cluster_iterator c1 = clusters.begincluster(); c1 != clusters.endcluster(); ++c1, ++nc1) { + double MaxFred = 0; + unsigned int nc2 = 0; + for (List::cluster_iterator c2 = clusters.begincluster(); c2 != clusters.endcluster(); ++c2, ++nc2) { + if (c1 != c2) { + double Fred = averageDist[nc1] + averageDist[nc2]; + Fred /= metricIn.CentroidDist( c1->Cent(), c2->Cent() ); + if (Fred > MaxFred) + MaxFred = Fred; + } + } + DBITotal += MaxFred; + } + DBITotal /= (double)clusters.Nclusters(); + //if (outfile.IsOpen()) outfile.Printf("#DBI: %f\n", DBITotal); + return DBITotal; +} diff --git a/src/Cluster/DBI.h b/src/Cluster/DBI.h new file mode 100644 index 0000000000..72539fd541 --- /dev/null +++ b/src/Cluster/DBI.h @@ -0,0 +1,11 @@ +#ifndef INC_CLUSTER_DBI_H +#define INC_CLUSTER_DBI_H +namespace Cpptraj { +namespace Cluster { +class List; +class MetricArray; +/// Calculate the Davies-Bouldin index and average distance to centroid for each cluster +double ComputeDBI(List const&, std::vector&, MetricArray&); +} +} +#endif diff --git a/src/Cluster/DynamicMatrix.cpp b/src/Cluster/DynamicMatrix.cpp index 49245ca751..6de54041bb 100644 --- a/src/Cluster/DynamicMatrix.cpp +++ b/src/Cluster/DynamicMatrix.cpp @@ -1,70 +1,37 @@ #include // float max -#ifdef _OPENMP -#include -#endif #include "DynamicMatrix.h" #include "../CpptrajStdio.h" // DynamicMatrix::FindMin() /** Find the minimum not being ignored; set corresponding row and column. */ -#ifdef _OPENMP -double Cpptraj::Cluster::DynamicMatrix::FindMin(int& iOut, int& jOut) { - int row, mythread; - int nrows = (int)Mat_.Nrows(); -# pragma omp parallel private(row, mythread) +double Cpptraj::Cluster::DynamicMatrix::FindMin(int& iOut, int& jOut) const { + float currentMin = std::numeric_limits::max(); + int minRow = -1; + int minCol = -1; + for (unsigned int col = 0; col != closestIdx_.size(); col++) { - mythread = omp_get_thread_num(); - - minVal_[mythread] = std::numeric_limits::max(); -# pragma omp for schedule(dynamic) - for (row = 0; row < nrows; row++) { - if (!ignore_[row]) { - int col = row + 1; - unsigned int idx = Mat_.CalcIndex(col, row); /// idx is start of this row - for (; col != nrows; col++, idx++) { - if (!ignore_[col]) { - if (Mat_[idx] < minVal_[mythread]) { - minVal_[mythread] = Mat_[idx]; - minRow_[mythread] = row; - minCol_[mythread] = col; - } - } + int row = closestIdx_[col]; + if (!ignore_[col] && !ignore_[row]) { + float mval = Mat_.element(col, row); + if (mval < currentMin) { + currentMin = mval; + minRow = (int)row; + minCol = (int)col; } } } - } /* END pragma omp parallel */ - float min = minVal_[0]; - iOut = minRow_[0]; - jOut = minCol_[0]; - for (unsigned int idx = 1; idx != minVal_.size(); idx++) { - if (minVal_[idx] < min) { - min = minVal_[idx]; - iOut = minRow_[idx]; - jOut = minCol_[idx]; - } + // Ensure iOut < jOut + if (minRow < minCol) { + iOut = minRow; + jOut = minCol; + } else { + iOut = minCol; + jOut = minRow; } - return (double)min; + return (double)currentMin; } -#else -double Cpptraj::Cluster::DynamicMatrix::FindMin(int& iOut, int& jOut) const { - float min = std::numeric_limits::max(); - for (unsigned int row = 0; row != Mat_.Nrows(); row++) { - if (!ignore_[row]) { - unsigned int col = row + 1; - unsigned int idx = Mat_.CalcIndex(col, row); // idx is start of this row - for (; col != Mat_.Ncols(); col++, idx++) { - if (!ignore_[col] && Mat_[idx] < min) { - min = Mat_[idx]; - iOut = (int)row; - jOut = (int)col; - } - } - } - } - return (double)min; -} -#endif +/** For DEBUG. Print all non-ignored matrix elements. */ void Cpptraj::Cluster::DynamicMatrix::PrintElements() const { unsigned int iVal = 0; unsigned int jVal = 1; @@ -80,20 +47,67 @@ void Cpptraj::Cluster::DynamicMatrix::PrintElements() const { } } +/** For DEBUG. Print elements like a square. */ +void Cpptraj::Cluster::DynamicMatrix::PrintElementsSquare() const { +/* mprintf("%4s", ""); + for (unsigned int col = 0; col < Mat_.Ncols(); col++) + mprintf(" %12u", col); + mprintf("\n"); + for (unsigned int row = 0; row < Mat_.Nrows(); row++) { + mprintf("%4u", row); + for (unsigned int col = 0; col < Mat_.Ncols(); col++) { + if (row <= col) + mprintf(" %12s", "X"); + else if (ignore_[row] || ignore_[col]) + mprintf(" %12s", "i"); + else + mprintf(" %12.4f", Mat_.element(col, row)); + } + mprintf("\n"); + }*/ + for (unsigned int row = 0; row < Mat_.Nrows(); row++) { + if (!ignore_[row]) { + mprintf("%4u :", row); + for (unsigned int col = 0; col < Mat_.Ncols(); col++) { + if (col != row && !ignore_[col]) + mprintf(" %4u=%8.3f", col, Mat_.element(col, row)); + } + mprintf("\n"); + } + } +} + +/** For DEBUG. Print closest indices to STDOUT. */ +void Cpptraj::Cluster::DynamicMatrix::PrintClosest() const { + for (unsigned int idx = 0; idx != closestIdx_.size(); idx++) { + if (!ignore_[idx]) { + mprintf("DEBUG: Closest cluster to %u is %i (%f)\n", idx, closestIdx_[idx], Mat_.element(idx, closestIdx_[idx])); + // Check that this is actually the case + int cidx = -1; + float cdist = 0; + for (unsigned int jdx = 0; jdx != closestIdx_.size(); jdx++) { + if (idx != jdx && !ignore_[jdx]) { + float fdist = Mat_.element(idx, jdx); + if (cidx == -1) { + cidx = jdx; + cdist = fdist; + } else if (fdist < cdist) { + cidx = jdx; + cdist = fdist; + } + } + } + if (cidx != closestIdx_[idx]) { + mprintf("Error: ACTUAL closest idx to %u is %i (%f)\n", idx, cidx, cdist); + } + } + } +} + // DynamicMatrix::SetupMatrix() int Cpptraj::Cluster::DynamicMatrix::SetupMatrix(size_t sizeIn) { if (Mat_.resize( 0L, sizeIn )) return 1; ignore_.assign( sizeIn, false ); -# ifdef _OPENMP - int n_threads = 0; -# pragma omp parallel - { - if (omp_get_thread_num() == 0) - n_threads = omp_get_num_threads(); - } - minRow_.resize( n_threads ); - minCol_.resize( n_threads ); - minVal_.resize( n_threads ); -# endif + closestIdx_.assign( sizeIn, -1 ); return 0; } diff --git a/src/Cluster/DynamicMatrix.h b/src/Cluster/DynamicMatrix.h index bcfee8be0c..7f495ad63e 100644 --- a/src/Cluster/DynamicMatrix.h +++ b/src/Cluster/DynamicMatrix.h @@ -2,6 +2,7 @@ #define INC_CLUSTER_DYNAMICMATRIX_H #include #include "../Matrix.h" +//#incl ude "../CpptrajStdio.h" // DEBUG namespace Cpptraj { namespace Cluster { @@ -10,37 +11,120 @@ class DynamicMatrix { public: DynamicMatrix() {} /// Indicate given row/col should be ignored. - void Ignore(int row) { ignore_[row] = true; } + inline void Ignore(int); /// \return true if given row/col has been ignored. bool IgnoringRow(int row) const { return ignore_[row]; } /// \return Original number of rows in matrix size_t Nrows() const { return Mat_.Nrows(); } /// Set the row and column of the smallest element not being ignored. -# ifdef _OPENMP - double FindMin(int&, int&); -# else double FindMin(int&, int&) const; -# endif /// \return an element. inline double GetCdist(int c, int r) const { return Mat_.element(c,r); } /// Print all matrix elements to STDOUT void PrintElements() const; - /// Add given element to matrix. - int AddCdist(double d) { return Mat_.addElement((float)d); } + /// Print matrix elements to STDOUT as a square + void PrintElementsSquare() const; + /// Print closest index to each col to STDOUT + void PrintClosest() const; /// Set element at column/row to given value - void SetCdist(int col, int row, double val) { Mat_.setElement(col, row, val); } + inline void SetCdist(int, int, float); /// Set up matrix for given number of rows int SetupMatrix(size_t); private: - Matrix Mat_; ///< Upper-triangle matrix holding cluster distances. - std::vector ignore_; ///< If true, ignore the row/col when printing/searching etc. -# ifdef _OPENMP - std::vector minRow_; - std::vector minCol_; - std::vector minVal_; -# endif + /// Find the closest cluster to the given cluster + inline void updateClosestIdx(unsigned int); + + Matrix Mat_; ///< Upper-triangle matrix holding cluster distances. + std::vector ignore_; ///< If true, ignore the row/col when printing/searching etc. + std::vector closestIdx_; ///< For each cluster, index of the cluster closest to it }; +/** Update closest to given idx */ +void DynamicMatrix::updateClosestIdx(unsigned int idx) { + closestIdx_[idx] = -1; + float current_closest = 0; + for (unsigned int jdx = 0; jdx != closestIdx_.size(); jdx++) { + if (!ignore_[jdx] && idx != jdx) { + if (closestIdx_[idx] == -1) { + closestIdx_[idx] = jdx; + current_closest = Mat_.element(idx, jdx); + } else { + float fdist = Mat_.element(idx, jdx); + if (fdist < current_closest) { + closestIdx_[idx] = jdx; + current_closest = fdist; + } + } + } + } // END loop jdx +//mprintf("DEBUG: New closest to %u is %i\n", idx, closestIdx_[idx]); +} + +/** Set cluster distance. Record closest distance for each col. */ +void DynamicMatrix::SetCdist(int col, int row, float val) { +// mprintf("DEBUG: Setting cluster %i to cluster %i distance= %f\n", col, row, val); + + int update_col = -1; + int update_row = -1; + + // Check col->row distance + if (closestIdx_[col] < 0) { +// mprintf("DEBUG: This is the initial distance for col.\n"); + closestIdx_[col] = row; + } else { + float closest_dist_to_col = Mat_.element(col, closestIdx_[col]); +// mprintf("DEBUG: Current closest to %i is %i (%f)\n", col, closestIdx_[col], closest_dist_to_col); + if (val < closest_dist_to_col) { +// mprintf("DEBUG: Updated col.\n"); + closestIdx_[col] = row; + } else if (row == closestIdx_[col] && val > closest_dist_to_col) { + // We are increasing what was previously the closest distance. May need to find new closest distance to col. + update_col = col; + } + } + + // Check row->col distance + if (closestIdx_[row] < 0) { +// mprintf("DEBUG: This is the initial distance for row.\n"); + closestIdx_[row] = col; + } else { + float closest_dist_to_row = Mat_.element(row, closestIdx_[row]); +// mprintf("DEBUG: Current closest to %i is %i (%f)\n", row, closestIdx_[row], closest_dist_to_row); + if (val < closest_dist_to_row) { +// mprintf("DEBUG: Updated row.\n"); + closestIdx_[row] = col; + } else if (col == closestIdx_[row] && val > closest_dist_to_row) { + // We are increasing what was previously the closest distance. May need to find new closest distance to row. + update_row = row; + } + } + + // Actually update the element + Mat_.setElement(col, row, val); + + // Update closest if necessary + if (update_col != -1) { +// mprintf("DEBUG: Closest to column %i increased. Need to update.\n", update_col); + updateClosestIdx(update_col); + } + if (update_row != -1) { +// mprintf("DEBUG: Closest to row %i increased. Need to update.\n", update_row); + updateClosestIdx(update_row); + } +} + +/** Specify given row should be ignored. Update closest distances if necessary. */ +void DynamicMatrix::Ignore(int row) { + ignore_[row] = true; + // If any remaining indices have the ignored row that needs to be updated + for (unsigned int idx = 0; idx != closestIdx_.size(); idx++) { + if (!ignore_[idx] && closestIdx_[idx] == row) { +// mprintf("DEBUG: closest to %u was just ignored; must be updated.\n", idx); + updateClosestIdx(idx); + } + } // END loop idx +} + } } #endif diff --git a/src/Cluster/List.cpp b/src/Cluster/List.cpp index 0a199a6eba..fd511af22d 100644 --- a/src/Cluster/List.cpp +++ b/src/Cluster/List.cpp @@ -6,7 +6,6 @@ #include "../CpptrajStdio.h" #include "../DataSet_integer.h" #include "../ProgressBar.h" -#include "../Constants.h" // SMALL TODO use limits? #ifdef _OPENMP # include #endif @@ -294,118 +293,3 @@ void Cpptraj::Cluster::List::AddFramesByCentroid(Cframes const& framesIn, Metric mprintf("\t%i of %i sieved frames were discarded as noise.\n", n_sieved_noise, Nsieved); } - -// ----------------------------------------------------------------------------- -/** The Davies-Bouldin Index (DBI) measures the average similarity between each - * cluster and its most similar one; the smaller the DBI, the better. The DBI - * is defined as the average, for all clusters X, of fred, where fred(X) = max, - * across other clusters Y, of (Cx + Cy)/dXY. Here Cx is the average distance - * from points in X to the centroid, similarly Cy, and dXY is the distance - * between cluster centroids. - * NOTE: To use this, cluster centroids should be fully up-to-date. - */ -double Cpptraj::Cluster::List::ComputeDBI(std::vector& averageDist, MetricArray& metricIn) -const -{ - averageDist.clear(); - averageDist.reserve( clusters_.size() ); - for (cluster_iterator C1 = begincluster(); C1 != endcluster(); ++C1) { - // Calculate average distance to centroid for this cluster - averageDist.push_back( C1->CalcAvgToCentroid( metricIn ) ); - //if (outfile.IsOpen()) - // outfile.Printf("#Cluster %i has average-distance-to-centroid %f\n", - // C1->Num(), averageDist.back()); - } - double DBITotal = 0.0; - unsigned int nc1 = 0; - for (cluster_iterator c1 = begincluster(); c1 != endcluster(); ++c1, ++nc1) { - double MaxFred = 0; - unsigned int nc2 = 0; - for (cluster_iterator c2 = begincluster(); c2 != endcluster(); ++c2, ++nc2) { - if (c1 != c2) { - double Fred = averageDist[nc1] + averageDist[nc2]; - Fred /= metricIn.CentroidDist( c1->Cent(), c2->Cent() ); - if (Fred > MaxFred) - MaxFred = Fred; - } - } - DBITotal += MaxFred; - } - DBITotal /= (double)clusters_.size(); - //if (outfile.IsOpen()) outfile.Printf("#DBI: %f\n", DBITotal); - return DBITotal; -} - -/** The pseudo-F statistic is another measure of clustering goodness. It is - * intended to capture the 'tightness' of clusters, and is in essence a ratio - * of the mean sum of squares between groups to the mean sum of squares within - * group (Lattin et al., 2003: 291) High values are good. Generally, one - * selects a cluster-count that gives a peak in the pseudo-f statistic (or - * pSF, for short). - * Formula: A/B, where A = (T - P)/(G-1), and B = P / (n-G). Here n is the - * number of points, G is the number of clusters, T is the total distance from - * the all-data centroid, and P is the sum (for all clusters) of the distances - * from the cluster centroid. - * NOTE: To use this, cluster centroids should be fully up-to-date. - * NOTE: This calc differs slightly from PTRAJ in that real centroids are used - * instead of representative structures. - */ -double Cpptraj::Cluster::List::ComputePseudoF(double& SSRSST, MetricArray& metricIn) const -{ - // Calculation makes no sense with fewer than 2 clusters. - if (Nclusters() < 2) { - mprintf("Warning: Fewer than 2 clusters. Not calculating pseudo-F.\n"); - return 0.0; - } - - // Form a cluster with all points to get a centroid. Use only frames that - // are in clusters, i.e. ignore noise. Assumes all cluster centroids are - // up to date. - Node c_all; - for (cluster_iterator C1 = begincluster(); C1 != endcluster(); ++C1) - { - for (Node::frame_iterator f1 = C1->beginframe(); f1 != C1->endframe(); ++f1) - c_all.AddFrameToCluster( *f1 ); - } - // Pseudo-F makes no sense if # clusters == # frames - if (Nclusters() == c_all.Nframes()) { - mprintf("Warning: Each frame is in a separate cluster. Not calculating pseudo-F.\n"); - return 0.0; - } - c_all.SortFrameList(); - c_all.CalculateCentroid( metricIn ); - - // Loop over all clusters - double gss = 0.0; // between-group sum of squares - double wss = 0.0; // within-group sum of squares - for (cluster_iterator C1 = begincluster(); C1 != endcluster(); ++C1) - { - for (Node::frame_iterator f1 = C1->beginframe(); f1 != C1->endframe(); ++f1) - { - double dist = metricIn.FrameCentroidDist(*f1, c_all.Cent()); - gss += (dist * dist); - dist = metricIn.FrameCentroidDist(*f1, C1->Cent()); - wss += (dist * dist); - } - } - double d_nclusters = (double)Nclusters(); - double d_ntotal = (double)c_all.Nframes(); - double num = (gss - wss) / (d_nclusters - 1.0); - double den = wss / (d_ntotal - d_nclusters); - if (den < Constants::SMALL) - den = Constants::SMALL; - double pseudof = num / den; - if (debug_ > 0) - mprintf("Pseudo-f: Total distance to centroid is %.4f\n" - "Pseudo-f: Cluster distance to centroid is %.4f\n" - "Pseudo-f: Numerator %.4f over denominator %.4f gives %.4f\n", - gss, wss, num, den, pseudof); - //if (outfile.IsOpen()) { - // outfile.Printf("#pSF: %f\n", pseudof); - // This calculation taken directly from ptraj - SSRSST = pseudof*(d_nclusters-1)/(d_ntotal-d_nclusters+pseudof*(d_nclusters-1)); - // outfile.Printf("#SSR/SST: %f\n", SSRSST); - //} - - return pseudof; -} diff --git a/src/Cluster/List.h b/src/Cluster/List.h index 31018e1ee4..dfc1d4e376 100644 --- a/src/Cluster/List.h +++ b/src/Cluster/List.h @@ -1,6 +1,5 @@ #ifndef INC_CLUSTER_LIST_H #define INC_CLUSTER_LIST_H -//#include #include "Cframes.h" class DataSet_integer; namespace Cpptraj { @@ -66,11 +65,6 @@ class List { int CreateCnumVsTime(DataSet_integer&, unsigned int, int, int) const; /// Generate number unique clusters vs time data set int NclustersObserved(DataSet_integer&, unsigned int, int) const; - - /// Calculate the Davies-Bouldin index. - double ComputeDBI(std::vector&, MetricArray&) const; - /// Calculate pseudo-F - double ComputePseudoF(double&, MetricArray&) const; private: typedef std::list Narray; Narray clusters_; ///< Hold all clusters. diff --git a/src/Cluster/Output.cpp b/src/Cluster/Output.cpp index b084204f30..8d9dc46755 100644 --- a/src/Cluster/Output.cpp +++ b/src/Cluster/Output.cpp @@ -30,7 +30,9 @@ void Cpptraj::Cluster::Output::PrintClustersToFile(CpptrajFile& outfile, List const& clusters, Algorithm const& algorithmIn, MetricArray& metricIn, int sieve, - Cframes const& sievedFrames) + Cframes const& sievedFrames, + double DBITotal, std::vector const& averageDist, + double pseudof, double SSRSST) { //CpptrajFile outfile; std::string buffer; @@ -43,8 +45,8 @@ void Cpptraj::Cluster::Output::PrintClustersToFile(CpptrajFile& outfile, outfile.Printf("#Clustering: %i clusters %u frames\n", clusters.Nclusters(), metricIn.Ntotal()); // DBI - std::vector averageDist; - double DBITotal = clusters.ComputeDBI( averageDist, metricIn ); + //std::vector averageDist; + //double DBITotal = clusters.ComputeDBI( averageDist, metricIn ); std::vector::const_iterator avgd = averageDist.begin(); for (List::cluster_iterator C = clusters.begincluster(); C != clusters.endcluster(); @@ -53,8 +55,8 @@ void Cpptraj::Cluster::Output::PrintClustersToFile(CpptrajFile& outfile, outfile.Printf("#DBI: %f\n", DBITotal); // Pseudo-F if (clusters.Nclusters() > 1) { - double SSRSST = 0.0; - double pseudof = clusters.ComputePseudoF( SSRSST, metricIn ); + //double SSRSST = 0.0; + //double pseudof = clusters.ComputePseudoF( SSRSST, metricIn ); outfile.Printf("#pSF: %f\n", pseudof); outfile.Printf("#SSR/SST: %f\n", SSRSST); } else diff --git a/src/Cluster/Output.h b/src/Cluster/Output.h index 033e212f53..0ec1c91702 100644 --- a/src/Cluster/Output.h +++ b/src/Cluster/Output.h @@ -12,7 +12,8 @@ class BestReps; class Output { public: static void PrintClustersToFile(CpptrajFile&, List const&, Algorithm const&, MetricArray&, - int, Cframes const&); + int, Cframes const&, double, std::vector const&, + double, double); static int Summary(CpptrajFile&, List const&, Algorithm const&, MetricArray&, bool, bool, std::vector const&); static void Summary_Part(CpptrajFile&, unsigned int, Cframes const&, List const&, diff --git a/src/Cluster/PseudoF.cpp b/src/Cluster/PseudoF.cpp new file mode 100644 index 0000000000..abc91c7628 --- /dev/null +++ b/src/Cluster/PseudoF.cpp @@ -0,0 +1,89 @@ +#include +#include "PseudoF.h" +#include "List.h" +#include "MetricArray.h" +#include "Node.h" +#include "../Constants.h" +#include "../CpptrajStdio.h" + +/** The pseudo-F statistic is another measure of clustering goodness. It is + * intended to capture the 'tightness' of clusters, and is in essence a ratio + * of the mean sum of squares between groups to the mean sum of squares within + * group (Lattin et al., 2003: 291) High values are good. Generally, one + * selects a cluster-count that gives a peak in the pseudo-f statistic (or + * pSF, for short). + * Formula: A/B, where A = (T - P)/(G-1), and B = P / (n-G). Here n is the + * number of points, G is the number of clusters, T is the total distance from + * the all-data centroid, and P is the sum (for all clusters) of the distances + * from the cluster centroid. + * This will also calculate SSR (sum of squares regression, between sum of + * squares) over the total sum of squares (SST). The SSR/SST ratio value lies + * between 0 and 1 and gives the percentage of explained variance by the data, + * and is similar to the R 2 value in regression analysis. As the ratio + * inherently rises with cluster count, one looks for an “elbow” in the curve + * where adding another cluster does not add much new information. + * NOTE: To use this, cluster centroids should be fully up-to-date. + * NOTE: This calc differs slightly from PTRAJ in that real centroids are used + * instead of representative structures. + * \param clusters List of clusters + * \param SSRSST sum of squares regression (SSR or between sum of squares) over total sum of squares (SST). + */ +double Cpptraj::Cluster::ComputePseudoF(List const& clusters, double& SSRSST, MetricArray& metricIn, int debugIn) +{ + // Calculation makes no sense with fewer than 2 clusters. + if (clusters.Nclusters() < 2) { + mprintf("Warning: Fewer than 2 clusters. Not calculating pseudo-F.\n"); + return 0.0; + } + + // Form a cluster with all points to get a centroid. Use only frames that + // are in clusters, i.e. ignore noise. Assumes all cluster centroids are + // up to date. + Node c_all; + for (List::cluster_iterator C1 = clusters.begincluster(); C1 != clusters.endcluster(); ++C1) + { + for (Node::frame_iterator f1 = C1->beginframe(); f1 != C1->endframe(); ++f1) + c_all.AddFrameToCluster( *f1 ); + } + // Pseudo-F makes no sense if # clusters == # frames + if (clusters.Nclusters() == c_all.Nframes()) { + mprintf("Warning: Each frame is in a separate cluster. Not calculating pseudo-F.\n"); + return 0.0; + } + c_all.SortFrameList(); + c_all.CalculateCentroid( metricIn ); + + // Loop over all clusters + double gss = 0.0; // between-group sum of squares + double wss = 0.0; // within-group sum of squares + for (List::cluster_iterator C1 = clusters.begincluster(); C1 != clusters.endcluster(); ++C1) + { + for (Node::frame_iterator f1 = C1->beginframe(); f1 != C1->endframe(); ++f1) + { + double dist = metricIn.FrameCentroidDist(*f1, c_all.Cent()); + gss += (dist * dist); + dist = metricIn.FrameCentroidDist(*f1, C1->Cent()); + wss += (dist * dist); + } + } + double d_nclusters = (double)clusters.Nclusters(); + double d_ntotal = (double)c_all.Nframes(); + double num = (gss - wss) / (d_nclusters - 1.0); + double den = wss / (d_ntotal - d_nclusters); + if (den < Constants::SMALL) + den = Constants::SMALL; + double pseudof = num / den; + if (debugIn > 0) + mprintf("Pseudo-f: Total distance to centroid is %.4f\n" + "Pseudo-f: Cluster distance to centroid is %.4f\n" + "Pseudo-f: Numerator %.4f over denominator %.4f gives %.4f\n", + gss, wss, num, den, pseudof); + //if (outfile.IsOpen()) { + // outfile.Printf("#pSF: %f\n", pseudof); + // This calculation taken directly from ptraj + SSRSST = pseudof*(d_nclusters-1)/(d_ntotal-d_nclusters+pseudof*(d_nclusters-1)); + // outfile.Printf("#SSR/SST: %f\n", SSRSST); + //} + + return pseudof; +} diff --git a/src/Cluster/PseudoF.h b/src/Cluster/PseudoF.h new file mode 100644 index 0000000000..d7249f1a3d --- /dev/null +++ b/src/Cluster/PseudoF.h @@ -0,0 +1,11 @@ +#ifndef INC_CLUSTER_PSEUDOF_H +#define INC_CLUSTER_PSEUDOF_H +namespace Cpptraj { +namespace Cluster { +class List; +class MetricArray; +/// Calculate pseudo-F +double ComputePseudoF(List const&, double&, MetricArray&, int); +} +} +#endif diff --git a/src/Cluster/clusterdepend b/src/Cluster/clusterdepend index 909eb6cec8..1fafe4d710 100644 --- a/src/Cluster/clusterdepend +++ b/src/Cluster/clusterdepend @@ -9,10 +9,11 @@ Centroid_Coord.o : Centroid_Coord.cpp ../Atom.h ../AtomMask.h ../Box.h ../Coordi Cframes.o : Cframes.cpp Cframes.h Cmatrix_Binary.o : Cmatrix_Binary.cpp ../CpptrajFile.h ../CpptrajStdio.h ../FileIO.h ../FileName.h ../Parallel.h Cframes.h Cmatrix_Binary.h Cmatrix_NC.o : Cmatrix_NC.cpp ../CpptrajStdio.h ../FileName.h ../NC_Routines.h Cframes.h Cmatrix_NC.h -Control.o : Control.cpp ../ArgList.h ../ArrayIterator.h ../AssociatedData.h ../Atom.h ../AtomMask.h ../AtomType.h ../BaseIOtype.h ../Box.h ../BufferedLine.h ../Cluster/Cframes.h ../Constants.h ../CoordinateInfo.h ../CpptrajFile.h ../CpptrajStdio.h ../DataFile.h ../DataFileList.h ../DataSet.h ../DataSetList.h ../DataSet_1D.h ../DataSet_Coords.h ../DataSet_Coords_REF.h ../DataSet_PairwiseCache.h ../DataSet_float.h ../DataSet_integer.h ../Dimension.h ../FileIO.h ../FileName.h ../FileTypes.h ../Frame.h ../MaskToken.h ../Matrix.h ../Matrix_3x3.h ../MetaData.h ../Molecule.h ../NameType.h ../Parallel.h ../ParameterHolders.h ../ParameterSet.h ../ParameterTypes.h ../Random.h ../Range.h ../ReferenceFrame.h ../ReplicaDimArray.h ../Residue.h ../Segment.h ../SymbolExporting.h ../TextFormat.h ../Timer.h ../Topology.h ../TrajectoryFile.h ../TypeNameHolder.h ../Unit.h ../Vec3.h Algorithm.h Algorithm_DBscan.h Algorithm_DPeaks.h Algorithm_HierAgglo.h Algorithm_Kmeans.h BestReps.h CentroidArray.h Cframes.h Control.h DrawGraph.h DynamicMatrix.h List.h Metric.h MetricArray.h Node.h Output.h Results.h Results_Coords.h Sieve.h Silhouette.h +Control.o : Control.cpp ../ArgList.h ../ArrayIterator.h ../AssociatedData.h ../Atom.h ../AtomMask.h ../AtomType.h ../BaseIOtype.h ../Box.h ../BufferedLine.h ../Cluster/Cframes.h ../Constants.h ../CoordinateInfo.h ../CpptrajFile.h ../CpptrajStdio.h ../DataFile.h ../DataFileList.h ../DataSet.h ../DataSetList.h ../DataSet_1D.h ../DataSet_Coords.h ../DataSet_Coords_REF.h ../DataSet_PairwiseCache.h ../DataSet_float.h ../DataSet_integer.h ../Dimension.h ../FileIO.h ../FileName.h ../FileTypes.h ../Frame.h ../MaskToken.h ../Matrix.h ../Matrix_3x3.h ../MetaData.h ../Molecule.h ../NameType.h ../Parallel.h ../ParameterHolders.h ../ParameterSet.h ../ParameterTypes.h ../Random.h ../Range.h ../ReferenceFrame.h ../ReplicaDimArray.h ../Residue.h ../Segment.h ../SymbolExporting.h ../TextFormat.h ../Timer.h ../Topology.h ../TrajectoryFile.h ../TypeNameHolder.h ../Unit.h ../Vec3.h Algorithm.h Algorithm_DBscan.h Algorithm_DPeaks.h Algorithm_HierAgglo.h Algorithm_Kmeans.h BestReps.h CentroidArray.h Cframes.h Control.h DBI.h DrawGraph.h DynamicMatrix.h List.h Metric.h MetricArray.h Node.h Output.h PseudoF.h Results.h Results_Coords.h Sieve.h Silhouette.h +DBI.o : DBI.cpp CentroidArray.h Cframes.h DBI.h List.h Metric.h MetricArray.h Node.h DrawGraph.o : DrawGraph.cpp ../AssociatedData.h ../Atom.h ../Constants.h ../CpptrajFile.h ../CpptrajStdio.h ../DataSet.h ../DataSet_1D.h ../Dimension.h ../FileIO.h ../FileName.h ../MetaData.h ../NameType.h ../PDBfile.h ../Parallel.h ../Range.h ../Residue.h ../SymbolExporting.h ../TextFormat.h ../Vec3.h Cframes.h DrawGraph.h Metric.h MetricArray.h DynamicMatrix.o : DynamicMatrix.cpp ../ArrayIterator.h ../CpptrajStdio.h ../Matrix.h DynamicMatrix.h -List.o : List.cpp ../AssociatedData.h ../Constants.h ../CpptrajFile.h ../CpptrajStdio.h ../DataSet.h ../DataSet_1D.h ../DataSet_integer.h ../Dimension.h ../FileIO.h ../FileName.h ../MetaData.h ../Parallel.h ../ProgressBar.h ../Range.h ../TextFormat.h CentroidArray.h Cframes.h List.h Metric.h MetricArray.h Node.h +List.o : List.cpp ../AssociatedData.h ../CpptrajFile.h ../CpptrajStdio.h ../DataSet.h ../DataSet_1D.h ../DataSet_integer.h ../Dimension.h ../FileIO.h ../FileName.h ../MetaData.h ../Parallel.h ../ProgressBar.h ../Range.h ../TextFormat.h CentroidArray.h Cframes.h List.h Metric.h MetricArray.h Node.h MetricArray.o : MetricArray.cpp ../ArgList.h ../ArrayIterator.h ../AssociatedData.h ../Atom.h ../AtomMap.h ../AtomMask.h ../AtomType.h ../BaseIOtype.h ../Box.h ../Cluster/Cframes.h ../Constants.h ../CoordinateInfo.h ../CpptrajFile.h ../CpptrajStdio.h ../DataFile.h ../DataFileList.h ../DataSet.h ../DataSetList.h ../DataSet_Coords.h ../DataSet_Coords_REF.h ../DataSet_PairwiseCache.h ../Dimension.h ../FileIO.h ../FileName.h ../FileTypes.h ../Frame.h ../Hungarian.h ../MapAtom.h ../MaskToken.h ../Matrix.h ../Matrix_3x3.h ../MetaData.h ../Molecule.h ../NameType.h ../OnlineVarT.h ../Parallel.h ../ParameterHolders.h ../ParameterSet.h ../ParameterTypes.h ../ProgressBar.h ../Range.h ../ReferenceFrame.h ../ReplicaDimArray.h ../Residue.h ../Segment.h ../StringRoutines.h ../SymbolExporting.h ../SymmetricRmsdCalc.h ../TextFormat.h ../Timer.h ../Topology.h ../TypeNameHolder.h ../Unit.h ../Vec3.h CentroidArray.h Metric.h MetricArray.h Metric_DME.h Metric_RMS.h Metric_SRMSD.h Metric_Scalar.h Metric_Torsion.h Metric_DME.o : Metric_DME.cpp ../AssociatedData.h ../Atom.h ../AtomMask.h ../AtomType.h ../Box.h ../Constants.h ../CoordinateInfo.h ../CpptrajFile.h ../CpptrajStdio.h ../DataSet.h ../DataSet_Coords.h ../Dimension.h ../FileIO.h ../FileName.h ../Frame.h ../MaskToken.h ../Matrix_3x3.h ../MetaData.h ../Molecule.h ../NameType.h ../Parallel.h ../ParameterHolders.h ../ParameterSet.h ../ParameterTypes.h ../Range.h ../ReplicaDimArray.h ../Residue.h ../Segment.h ../SymbolExporting.h ../TextFormat.h ../Topology.h ../TypeNameHolder.h ../Unit.h ../Vec3.h Centroid.h Centroid_Coord.h Cframes.h Metric.h Metric_DME.h Metric_RMS.o : Metric_RMS.cpp ../AssociatedData.h ../Atom.h ../AtomMask.h ../AtomType.h ../Box.h ../Constants.h ../CoordinateInfo.h ../CpptrajFile.h ../CpptrajStdio.h ../DataSet.h ../DataSet_Coords.h ../Dimension.h ../FileIO.h ../FileName.h ../Frame.h ../MaskToken.h ../Matrix_3x3.h ../MetaData.h ../Molecule.h ../NameType.h ../Parallel.h ../ParameterHolders.h ../ParameterSet.h ../ParameterTypes.h ../Range.h ../ReplicaDimArray.h ../Residue.h ../Segment.h ../SymbolExporting.h ../TextFormat.h ../Topology.h ../TypeNameHolder.h ../Unit.h ../Vec3.h Centroid.h Centroid_Coord.h Cframes.h Metric.h Metric_RMS.h @@ -21,6 +22,7 @@ Metric_Scalar.o : Metric_Scalar.cpp ../AssociatedData.h ../CpptrajFile.h ../Cppt Metric_Torsion.o : Metric_Torsion.cpp ../AssociatedData.h ../Constants.h ../CpptrajFile.h ../CpptrajStdio.h ../DataSet.h ../DataSet_1D.h ../Dimension.h ../FileIO.h ../FileName.h ../MetaData.h ../Parallel.h ../Range.h ../TextFormat.h Centroid.h Centroid_Num.h Cframes.h Metric.h Metric_Torsion.h Node.o : Node.cpp ../AssociatedData.h ../CpptrajFile.h ../DataSet.h ../DataSet_1D.h ../DataSet_float.h ../DataSet_integer.h ../Dimension.h ../FileIO.h ../FileName.h ../MetaData.h ../Parallel.h ../Range.h ../TextFormat.h CentroidArray.h Cframes.h Metric.h MetricArray.h Node.h Output.o : Output.cpp ../ArrayIterator.h ../CpptrajFile.h ../CpptrajStdio.h ../FileIO.h ../FileName.h ../Matrix.h ../Parallel.h ../Timer.h Algorithm.h BestReps.h CentroidArray.h Cframes.h List.h Metric.h MetricArray.h Node.h Output.h +PseudoF.o : PseudoF.cpp ../Constants.h ../CpptrajStdio.h CentroidArray.h Cframes.h List.h Metric.h MetricArray.h Node.h PseudoF.h Results_Coords.o : Results_Coords.cpp ../ActionFrameCounter.h ../ArgList.h ../ArrayIterator.h ../AssociatedData.h ../Atom.h ../AtomMap.h ../AtomMask.h ../AtomType.h ../BaseIOtype.h ../Box.h ../Constants.h ../CoordinateInfo.h ../CpptrajFile.h ../CpptrajStdio.h ../DataSet.h ../DataSetList.h ../DataSet_Coords.h ../DataSet_Coords_REF.h ../Dimension.h ../FileIO.h ../FileName.h ../FileTypes.h ../Frame.h ../Hungarian.h ../MapAtom.h ../MaskToken.h ../Matrix.h ../Matrix_3x3.h ../MetaData.h ../Molecule.h ../NameType.h ../OutputTrajCommon.h ../Parallel.h ../ParameterHolders.h ../ParameterSet.h ../ParameterTypes.h ../Range.h ../ReferenceFrame.h ../ReplicaDimArray.h ../Residue.h ../Segment.h ../StringRoutines.h ../SymbolExporting.h ../SymmetricRmsdCalc.h ../TextFormat.h ../Timer.h ../Topology.h ../TrajectoryFile.h ../Trajout_Single.h ../TypeNameHolder.h ../Unit.h ../Vec3.h CentroidArray.h Cframes.h List.h Metric.h MetricArray.h Metric_DME.h Metric_RMS.h Metric_SRMSD.h Node.h Results.h Results_Coords.h Sieve.o : Sieve.cpp ../AssociatedData.h ../Cluster/Cframes.h ../CpptrajFile.h ../DataSet.h ../DataSet_PairwiseCache.h ../Dimension.h ../FileIO.h ../FileName.h ../MetaData.h ../Parallel.h ../Random.h ../Range.h ../TextFormat.h Cframes.h Sieve.h Silhouette.o : Silhouette.cpp ../CpptrajFile.h ../CpptrajStdio.h ../FileIO.h ../FileName.h ../Parallel.h CentroidArray.h Cframes.h List.h Metric.h MetricArray.h Node.h Silhouette.h diff --git a/src/Cluster/clusterfiles b/src/Cluster/clusterfiles index c1f870778a..293bad8a4c 100644 --- a/src/Cluster/clusterfiles +++ b/src/Cluster/clusterfiles @@ -12,6 +12,7 @@ CLUSTER_SOURCES= \ Cmatrix_Binary.cpp \ Cmatrix_NC.cpp \ Control.cpp \ + DBI.cpp \ DrawGraph.cpp \ DynamicMatrix.cpp \ List.cpp \ @@ -23,6 +24,7 @@ CLUSTER_SOURCES= \ Metric_Torsion.cpp \ Node.cpp \ Output.cpp \ + PseudoF.cpp \ Results_Coords.cpp \ Sieve.cpp \ Silhouette.cpp diff --git a/src/Command.cpp b/src/Command.cpp index 4854391fc5..96306da432 100644 --- a/src/Command.cpp +++ b/src/Command.cpp @@ -37,6 +37,7 @@ #include "Exec_Set.h" #include "Exec_Show.h" #include "Exec_Random.h" +#include "Exec_CompareClusters.h" // ----- SYSTEM ---------------------------------------------------------------- #include "Exec_System.h" // ----- COORDS ---------------------------------------------------------------- @@ -213,6 +214,7 @@ void Command::Init() { Command::AddCmd( new Exec_Calc(), Cmd::EXE, 1, "calc" ); Command::AddCmd( new Exec_Clear(), Cmd::EXE, 1, "clear" ); Command::AddCmd( new Exec_ClusterMap(), Cmd::EXE, 1, "clustermap" ); // hidden + Command::AddCmd( new Exec_CompareClusters(), Cmd::EXE, 1, "compareclusters" ); //hidden Command::AddCmd( new Exec_CreateDataFile(), Cmd::EXE, 1, "create" ); Command::AddCmd( new Exec_CreateSet(), Cmd::EXE, 1, "createset" ); Command::AddCmd( new Exec_DataFileCmd(), Cmd::EXE, 1, "datafile" ); diff --git a/src/DataSetList.cpp b/src/DataSetList.cpp index af0b2f162f..9486970992 100644 --- a/src/DataSetList.cpp +++ b/src/DataSetList.cpp @@ -435,7 +435,9 @@ DataSet* DataSetList::AddSet( DataSet::DataType inType, MetaData const& metaIn, * \return pointer to successfully set-up DataSet or 0 if error. */ DataSet* DataSetList::AllocateSet(DataSet::DataType inType, MetaData const& metaIn) +#ifndef TIMER const +#endif { // TODO Always generate default name if empty? # ifdef TIMER time_total_.Start(); diff --git a/src/DataSetList.h b/src/DataSetList.h index 8fb262f986..c13d63fe66 100644 --- a/src/DataSetList.h +++ b/src/DataSetList.h @@ -108,8 +108,13 @@ class DataSetList { DataSet* AddSet_NoCheck(DataSet::DataType, MetaData const&); /// Add an already set up DataSet to list; memory for DataSet will be freed. int AddSet( DataSet* ); - /// Allocate DataSet but do not add to the list +# ifdef TIMER + /// Allocate DataSet but do not add to the list. Not const so that Timer can be used. + DataSet* AllocateSet(DataSet::DataType, MetaData const&); +# else + /// Allocate DataSet but do not add to the list. DataSet* AllocateSet(DataSet::DataType, MetaData const&) const; +# endif /// Add new sets or append to existing ones. int AddOrAppendSets(std::string const&, Darray const&, DataListType const&); /// Add a copy of the DataSet to the list; memory for DataSet will not be freed. diff --git a/src/Exec_CompareClusters.cpp b/src/Exec_CompareClusters.cpp new file mode 100644 index 0000000000..8377cf8938 --- /dev/null +++ b/src/Exec_CompareClusters.cpp @@ -0,0 +1,134 @@ +#include "Exec_CompareClusters.h" +#include "CpptrajStdio.h" +#include "DataSet_1D.h" + +/** CONSTRUCTOR. */ +Exec_CompareClusters::Exec_CompareClusters() : + Exec(GENERAL) +{ + SetHidden( true ); +} + +/** Get cluster number vs time data set. */ +DataSet* Exec_CompareClusters::getClusterSet(std::string const& dsname, DataSetList const& DSL) { + DataSet* ds = DSL.GetDataSet( dsname ); + if (ds == 0) { + mprinterr("Error: '%s' does not correspond to a data set.\n", dsname.c_str()); + return 0; + } + if (ds->Group() != DataSet::SCALAR_1D) { + mprinterr("Error: '%s' is not a 1D scalar data set.\n", dsname.c_str()); + return 0; + } + return ds; +} + +/** For tracking same/different frames in clusters. */ +class ClusterStat { + public: + ClusterStat() : n_in_common_(0) {} + /// Both sets had frame assigned to same cluster + void FrameInCommon() { n_in_common_++; } + /// Increment counter for specified set only + void IncrementSet(unsigned int i) { + if (i >= n_in_set_.size()) + n_in_set_.resize(i + 1, 0); + n_in_set_[i]++; + } + /// Header to stdout + static void Header() { + mprintf("%-12s %12s %12s %12s\n", "#Cluster", "Common", "Set0", "Set1"); + } + /// Print to stdout + void Print(unsigned int cnum) { + if (n_in_set_.size() < 2) n_in_set_.resize(2, 0); + mprintf("%12i %12u %12u %12u\n", cnum, n_in_common_, n_in_set_[0], n_in_set_[1]); + } + private: + typedef std::vector Uarray; + + unsigned int n_in_common_; ///< Number of frames in common + Uarray n_in_set_; ///< Number of frames just in specified set +}; + +/** Compare cluster # vs time sets. */ +int Exec_CompareClusters::CompareClusters(DataSet* c0, DataSet* c1) const { + std::vector clusterStats; + + DataSet_1D const& set0 = static_cast( *c0 ); + DataSet_1D const& set1 = static_cast( *c1 ); + + unsigned int n0 = set0.Size(); + unsigned int n1 = set1.Size(); + + if (n0 != n1) { + mprintf("Warning: Size of '%s' (%u) != size of '%s' (%u)\n", + c0->legend(), n0, c1->legend(), n1); + } + + unsigned int noise0 = 0; + unsigned int noise1 = 0; + unsigned int idx0 = 0; + unsigned int idx1 = 0; + while (idx0 < n0 && idx1 < n1) + { + // TODO check for fractional component + int cnum0 = (int)set0.Dval(idx0); + int cnum1 = (int)set1.Dval(idx1); + + if (cnum0 == -1) noise0++; + if (cnum1 == -1) noise1++; + + if (cnum0 != -1 && cnum0 == cnum1) { + // This frame was assigned the same cluster in both sets + if ((unsigned int)cnum0 >= clusterStats.size()) + clusterStats.resize(cnum0+1); + clusterStats[cnum0].FrameInCommon(); + } else { + // This frame was assigned different clusters + if (cnum0 != -1) { + if ((unsigned int)cnum0 >= clusterStats.size()) + clusterStats.resize(cnum0+1); + clusterStats[cnum0].IncrementSet( 0 ); + } + if (cnum1 != -1) { + if ((unsigned int)cnum1 >= clusterStats.size()) + clusterStats.resize(cnum1+1); + clusterStats[cnum1].IncrementSet( 1 ); + } + } + + idx0++; + idx1++; + } + + mprintf("\tSet 0: %u noise frames\n", noise0); + mprintf("\tSet 1: %u noise frames\n", noise1); + + ClusterStat::Header(); + for (std::vector::iterator it = clusterStats.begin(); + it != clusterStats.end(); ++it) + it->Print(it - clusterStats.begin()); + return 0; +} + +// Exec_CompareClusters::Help() +void Exec_CompareClusters::Help() const +{ + mprintf("\tset set \n"); +} + +// Exec_CompareClusters::Execute() +Exec::RetType Exec_CompareClusters::Execute(CpptrajState& State, ArgList& argIn) +{ + DataSet* c0 = getClusterSet( argIn.GetStringKey("set"), State.DSL() ); + if (c0 == 0) return CpptrajState::ERR; + DataSet* c1 = getClusterSet( argIn.GetStringKey("set"), State.DSL() ); + if (c1 == 0) return CpptrajState::ERR; + mprintf("\tCluster set 0: '%s'\n", c0->legend()); + mprintf("\tCluster set 1: '%s'\n", c1->legend()); + + if (CompareClusters(c0, c1)) return CpptrajState::ERR; + + return CpptrajState::OK; +} diff --git a/src/Exec_CompareClusters.h b/src/Exec_CompareClusters.h new file mode 100644 index 0000000000..7662aca9ee --- /dev/null +++ b/src/Exec_CompareClusters.h @@ -0,0 +1,16 @@ +#ifndef INC_EXEC_COMPARECLUSTERS_H +#define INC_EXEC_COMPARECLUSTERS_H +#include "Exec.h" +/// Compare two cluster number vs time sets +class Exec_CompareClusters : public Exec { + public: + Exec_CompareClusters(); + void Help() const; + DispatchObject* Alloc() const { return (DispatchObject*)new Exec_CompareClusters(); } + RetType Execute(CpptrajState&, ArgList&); + private: + static DataSet* getClusterSet(std::string const&, DataSetList const&); + + int CompareClusters(DataSet*, DataSet*) const; +}; +#endif diff --git a/src/RNG.cpp b/src/RNG.cpp index 601b005b69..46b0d3755a 100644 --- a/src/RNG.cpp +++ b/src/RNG.cpp @@ -11,7 +11,13 @@ int Cpptraj::RNG::Set_Seed(int iseedIn) { if (iseedIn <= 0 ) { clock_t wallclock = clock(); iseed_ = (int)wallclock; - mprintf("Random_Number: seed is <= 0, using wallclock time as seed (%i)\n", iseed_); + //time_t currentTime = time(0); + //iseed_ = (int)currentTime; + // Since time_t/clock_t are typically something like unsigned long, + // need to protect against overflows. + if (iseed_ < 0) iseed_ = -iseed_; + mprintf("Random_Number: seed is <= 0, using wallclock time as seed ( %i )\n", iseed_); + //mprintf("Random_Number: seed is <= 0, using time as seed (%i)\n", iseed_); } else iseed_ = iseedIn; // Set up specific to RNG diff --git a/src/Version.h b/src/Version.h index 7ca66d77d1..e7e284e3bc 100644 --- a/src/Version.h +++ b/src/Version.h @@ -12,7 +12,7 @@ * Whenever a number that precedes is incremented, all subsequent * numbers should be reset to 0. */ -#define CPPTRAJ_INTERNAL_VERSION "V6.2.2" +#define CPPTRAJ_INTERNAL_VERSION "V6.2.3" /// PYTRAJ relies on this #define CPPTRAJ_VERSION_STRING CPPTRAJ_INTERNAL_VERSION #endif diff --git a/src/cpptrajdepend b/src/cpptrajdepend index f32546bd69..17b07b6590 100644 --- a/src/cpptrajdepend +++ b/src/cpptrajdepend @@ -92,7 +92,7 @@ AnalysisList.o : AnalysisList.cpp ActionState.h Analysis.h AnalysisList.h Analys Analysis_AmdBias.o : Analysis_AmdBias.cpp ActionState.h Analysis.h AnalysisState.h Analysis_AmdBias.h ArgList.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_1D.h DataSet_Coords.h DataSet_Coords_REF.h DataSet_double.h Dimension.h DispatchObject.h FileIO.h FileName.h FileTypes.h Frame.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TypeNameHolder.h Unit.h Vec3.h Analysis_AutoCorr.o : Analysis_AutoCorr.cpp ActionState.h Analysis.h AnalysisState.h Analysis_AutoCorr.h ArgList.h ArrayIterator.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h ComplexArray.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_1D.h DataSet_Coords.h DataSet_Coords_REF.h DataSet_Vector.h Dimension.h DispatchObject.h FileIO.h FileName.h FileTypes.h Frame.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TypeNameHolder.h Unit.h Vec3.h Analysis_Average.o : Analysis_Average.cpp ActionState.h Analysis.h AnalysisState.h Analysis_Average.h ArgList.h Array1D.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_1D.h DataSet_Coords.h DataSet_Coords_REF.h Dimension.h DispatchObject.h FileIO.h FileName.h FileTypes.h Frame.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TypeNameHolder.h Unit.h Vec3.h -Analysis_Clustering.o : Analysis_Clustering.cpp ActionState.h Analysis.h AnalysisState.h Analysis_Clustering.h ArgList.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Cluster/Algorithm.h Cluster/BestReps.h Cluster/CentroidArray.h Cluster/Cframes.h Cluster/Control.h Cluster/DrawGraph.h Cluster/List.h Cluster/Metric.h Cluster/MetricArray.h Cluster/Node.h Cluster/Sieve.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_Coords.h DataSet_Coords_REF.h Dimension.h DispatchObject.h FileIO.h FileName.h FileTypes.h Frame.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TypeNameHolder.h Unit.h Vec3.h +Analysis_Clustering.o : Analysis_Clustering.cpp ActionState.h Analysis.h AnalysisState.h Analysis_Clustering.h ArgList.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Cluster/Algorithm.h Cluster/BestReps.h Cluster/CentroidArray.h Cluster/Cframes.h Cluster/Control.h Cluster/DrawGraph.h Cluster/List.h Cluster/Metric.h Cluster/MetricArray.h Cluster/Node.h Cluster/Sieve.h Cluster/Silhouette.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_Coords.h DataSet_Coords_REF.h Dimension.h DispatchObject.h FileIO.h FileName.h FileTypes.h Frame.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TypeNameHolder.h Unit.h Vec3.h Analysis_ConstantPHStats.o : Analysis_ConstantPHStats.cpp ActionState.h Analysis.h AnalysisState.h Analysis_ConstantPHStats.h ArgList.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Constants.h CoordinateInfo.h Cph.h CpptrajFile.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_1D.h DataSet_Coords.h DataSet_Coords_REF.h DataSet_Mesh.h DataSet_pH.h Dimension.h DispatchObject.h FileIO.h FileName.h FileTypes.h Frame.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h Residue.h Segment.h Spline.h StringRoutines.h SymbolExporting.h TextFormat.h Timer.h Topology.h TypeNameHolder.h Unit.h Vec3.h Analysis_Corr.o : Analysis_Corr.cpp ActionState.h Analysis.h AnalysisState.h Analysis_Corr.h ArgList.h ArrayIterator.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h ComplexArray.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_1D.h DataSet_Coords.h DataSet_Coords_REF.h DataSet_Vector.h Dimension.h DispatchObject.h FileIO.h FileName.h FileTypes.h Frame.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TypeNameHolder.h Unit.h Vec3.h Analysis_CrankShaft.o : Analysis_CrankShaft.cpp ActionState.h Analysis.h AnalysisState.h Analysis_CrankShaft.h Analysis_Statistics.h ArgList.h Array1D.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_1D.h DataSet_Coords.h DataSet_Coords_REF.h DataSet_float.h DataSet_integer.h DataSet_string.h Dimension.h DispatchObject.h FileIO.h FileName.h FileTypes.h Frame.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TypeNameHolder.h Unit.h Vec3.h @@ -150,7 +150,7 @@ ClusterMap.o : ClusterMap.cpp AssociatedData.h ClusterMap.h Constants.h CpptrajF Cmd.o : Cmd.cpp Cmd.h DispatchObject.h CmdInput.o : CmdInput.cpp CmdInput.h StringRoutines.h CmdList.o : CmdList.cpp Cmd.h CmdList.h DispatchObject.h -Command.o : Command.cpp Action.h ActionFrameCounter.h ActionList.h ActionState.h ActionTopWriter.h Action_Align.h Action_Angle.h Action_AreaPerMol.h Action_AtomMap.h Action_AtomicCorr.h Action_AtomicFluct.h Action_AutoImage.h Action_Average.h Action_Bounds.h Action_Box.h Action_Center.h Action_Channel.h Action_CheckChirality.h Action_CheckStructure.h Action_Closest.h Action_ClusterDihedral.h Action_Contacts.h Action_CreateCrd.h Action_CreateReservoir.h Action_DNAionTracker.h Action_DSSP.h Action_Density.h Action_Diffusion.h Action_Dihedral.h Action_DihedralRMS.h Action_Dipole.h Action_DistRmsd.h Action_Distance.h Action_Energy.h Action_Esander.h Action_FilterByData.h Action_FixAtomOrder.h Action_FixImagedBonds.h Action_GIST.h Action_Grid.h Action_GridFreeEnergy.h Action_HydrogenBond.h Action_Image.h Action_InfraredSpectrum.h Action_Jcoupling.h Action_LESsplit.h Action_LIE.h Action_LipidOrder.h Action_MakeStructure.h Action_Mask.h Action_Matrix.h Action_MinImage.h Action_Molsurf.h Action_MultiDihedral.h Action_MultiPucker.h Action_MultiVector.h Action_NAstruct.h Action_NMRrst.h Action_NativeContacts.h Action_OrderParameter.h Action_Outtraj.h Action_PairDist.h Action_Pairwise.h Action_Principal.h Action_Projection.h Action_Pucker.h Action_Radgyr.h Action_Radial.h Action_RandomizeIons.h Action_Remap.h Action_ReplicateCell.h Action_Rmsd.h Action_Rotate.h Action_RunningAvg.h Action_STFC_Diffusion.h Action_Scale.h Action_SetVelocity.h Action_Spam.h Action_Strip.h Action_Surf.h Action_SymmetricRmsd.h Action_Temperature.h Action_Time.h Action_Translate.h Action_Unstrip.h Action_Unwrap.h Action_Vector.h Action_VelocityAutoCorr.h Action_Volmap.h Action_Volume.h Action_Watershell.h Action_XtalSymm.h Analysis.h AnalysisList.h AnalysisState.h Analysis_AmdBias.h Analysis_AutoCorr.h Analysis_Average.h Analysis_Clustering.h Analysis_ConstantPHStats.h Analysis_Corr.h Analysis_CrankShaft.h Analysis_CrdFluct.h Analysis_CrossCorr.h Analysis_CurveFit.h Analysis_Divergence.h Analysis_EvalPlateau.h Analysis_FFT.h Analysis_HausdorffDistance.h Analysis_Hist.h Analysis_IRED.h Analysis_Integrate.h Analysis_KDE.h Analysis_Lifetime.h Analysis_LowestCurve.h Analysis_Matrix.h Analysis_MeltCurve.h Analysis_Modes.h Analysis_MultiHist.h Analysis_Multicurve.h Analysis_Overlap.h Analysis_PhiPsi.h Analysis_Regression.h Analysis_RemLog.h Analysis_Rms2d.h Analysis_RmsAvgCorr.h Analysis_Rotdif.h Analysis_RunningAvg.h Analysis_Slope.h Analysis_Spline.h Analysis_State.h Analysis_Statistics.h Analysis_TI.h Analysis_Timecorr.h Analysis_VectorMath.h Analysis_Wavelet.h ArgList.h Array1D.h ArrayIterator.h AssociatedData.h Atom.h AtomMap.h AtomMask.h AtomType.h AxisType.h BaseIOtype.h Box.h BoxArgs.h BufferedLine.h CharMask.h Cluster/Algorithm.h Cluster/BestReps.h Cluster/CentroidArray.h Cluster/Cframes.h Cluster/Control.h Cluster/DrawGraph.h Cluster/List.h Cluster/Metric.h Cluster/MetricArray.h Cluster/Node.h Cluster/Sieve.h ClusterMap.h Cmd.h CmdInput.h CmdList.h Command.h CompactFrameArray.h ComplexArray.h Constants.h Constraints.h ControlBlock.h ControlBlock_For.h CoordinateInfo.h Corr.h Cph.h CpptrajFile.h CpptrajState.h CpptrajStdio.h DataFile.h DataFileList.h DataFilter.h DataSet.h DataSetList.h DataSet_1D.h DataSet_2D.h DataSet_3D.h DataSet_Coords.h DataSet_Coords_CRD.h DataSet_Coords_REF.h DataSet_GridFlt.h DataSet_Mat3x3.h DataSet_MatrixDbl.h DataSet_MatrixFlt.h DataSet_Mesh.h DataSet_Modes.h DataSet_RemLog.h DataSet_Vector.h DataSet_double.h DataSet_float.h DataSet_integer.h DataSet_integer_mem.h DataSet_pH.h DataSet_string.h Deprecated.h DihedralSearch.h Dimension.h DispatchObject.h Energy.h Energy_Sander.h EnsembleIn.h EnsembleOutList.h Ewald.h EwaldOptions.h Ewald_ParticleMesh.h ExclusionArray.h Exec.h Exec_AddMissingRes.h Exec_Analyze.h Exec_Calc.h Exec_CatCrd.h Exec_Change.h Exec_ClusterMap.h Exec_CombineCoords.h Exec_Commands.h Exec_CompareTop.h Exec_CrdAction.h Exec_CrdOut.h Exec_CreateSet.h Exec_DataFile.h Exec_DataFilter.h Exec_DataSetCmd.h Exec_Emin.h Exec_Flatten.h Exec_GenerateAmberRst.h Exec_Graft.h Exec_Help.h Exec_LoadCrd.h Exec_LoadTraj.h Exec_ParallelAnalysis.h Exec_ParmBox.h Exec_ParmSolvent.h Exec_ParmStrip.h Exec_ParmWrite.h Exec_PermuteDihedrals.h Exec_Precision.h Exec_PrepareForLeap.h Exec_PrintData.h Exec_Random.h Exec_ReadData.h Exec_ReadEnsembleData.h Exec_ReadInput.h Exec_RotateDihedral.h Exec_RunAnalysis.h Exec_ScaleDihedralK.h Exec_SequenceAlign.h Exec_Set.h Exec_Show.h Exec_SortEnsembleData.h Exec_SplitCoords.h Exec_System.h Exec_Top.h Exec_Traj.h Exec_UpdateParameters.h Exec_ViewRst.h FileIO.h FileName.h FileTypes.h Frame.h FramePtrArray.h GIST_PME.h Grid.h GridAction.h GridBin.h HistBin.h Hungarian.h ImageOption.h ImageTypes.h InputTrajCommon.h MapAtom.h MaskArray.h MaskToken.h Matrix.h Matrix_3x3.h MetaData.h Molecule.h NameType.h NetcdfFile.h OnlineVarT.h OutputTrajCommon.h PDBfile.h PairList.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h PubFFT.h Pucker.h Pucker_PuckerMask.h Pucker_PuckerSearch.h Pucker_PuckerToken.h RPNcalc.h Random.h Range.h ReferenceAction.h ReferenceFrame.h RemdReservoirNC.h ReplicaDimArray.h ReplicaInfo.h Residue.h Segment.h Spline.h SplineFxnTable.h StructureCheck.h SymbolExporting.h SymmetricRmsdCalc.h TextFormat.h Timer.h Topology.h TrajFrameCounter.h TrajectoryFile.h Trajin.h TrajinList.h TrajoutList.h Trajout_Single.h TypeNameHolder.h Unit.h Vec3.h cuda_kernels/GistCudaSetup.cuh helpme_standalone.h molsurf.h +Command.o : Command.cpp Action.h ActionFrameCounter.h ActionList.h ActionState.h ActionTopWriter.h Action_Align.h Action_Angle.h Action_AreaPerMol.h Action_AtomMap.h Action_AtomicCorr.h Action_AtomicFluct.h Action_AutoImage.h Action_Average.h Action_Bounds.h Action_Box.h Action_Center.h Action_Channel.h Action_CheckChirality.h Action_CheckStructure.h Action_Closest.h Action_ClusterDihedral.h Action_Contacts.h Action_CreateCrd.h Action_CreateReservoir.h Action_DNAionTracker.h Action_DSSP.h Action_Density.h Action_Diffusion.h Action_Dihedral.h Action_DihedralRMS.h Action_Dipole.h Action_DistRmsd.h Action_Distance.h Action_Energy.h Action_Esander.h Action_FilterByData.h Action_FixAtomOrder.h Action_FixImagedBonds.h Action_GIST.h Action_Grid.h Action_GridFreeEnergy.h Action_HydrogenBond.h Action_Image.h Action_InfraredSpectrum.h Action_Jcoupling.h Action_LESsplit.h Action_LIE.h Action_LipidOrder.h Action_MakeStructure.h Action_Mask.h Action_Matrix.h Action_MinImage.h Action_Molsurf.h Action_MultiDihedral.h Action_MultiPucker.h Action_MultiVector.h Action_NAstruct.h Action_NMRrst.h Action_NativeContacts.h Action_OrderParameter.h Action_Outtraj.h Action_PairDist.h Action_Pairwise.h Action_Principal.h Action_Projection.h Action_Pucker.h Action_Radgyr.h Action_Radial.h Action_RandomizeIons.h Action_Remap.h Action_ReplicateCell.h Action_Rmsd.h Action_Rotate.h Action_RunningAvg.h Action_STFC_Diffusion.h Action_Scale.h Action_SetVelocity.h Action_Spam.h Action_Strip.h Action_Surf.h Action_SymmetricRmsd.h Action_Temperature.h Action_Time.h Action_Translate.h Action_Unstrip.h Action_Unwrap.h Action_Vector.h Action_VelocityAutoCorr.h Action_Volmap.h Action_Volume.h Action_Watershell.h Action_XtalSymm.h Analysis.h AnalysisList.h AnalysisState.h Analysis_AmdBias.h Analysis_AutoCorr.h Analysis_Average.h Analysis_Clustering.h Analysis_ConstantPHStats.h Analysis_Corr.h Analysis_CrankShaft.h Analysis_CrdFluct.h Analysis_CrossCorr.h Analysis_CurveFit.h Analysis_Divergence.h Analysis_EvalPlateau.h Analysis_FFT.h Analysis_HausdorffDistance.h Analysis_Hist.h Analysis_IRED.h Analysis_Integrate.h Analysis_KDE.h Analysis_Lifetime.h Analysis_LowestCurve.h Analysis_Matrix.h Analysis_MeltCurve.h Analysis_Modes.h Analysis_MultiHist.h Analysis_Multicurve.h Analysis_Overlap.h Analysis_PhiPsi.h Analysis_Regression.h Analysis_RemLog.h Analysis_Rms2d.h Analysis_RmsAvgCorr.h Analysis_Rotdif.h Analysis_RunningAvg.h Analysis_Slope.h Analysis_Spline.h Analysis_State.h Analysis_Statistics.h Analysis_TI.h Analysis_Timecorr.h Analysis_VectorMath.h Analysis_Wavelet.h ArgList.h Array1D.h ArrayIterator.h AssociatedData.h Atom.h AtomMap.h AtomMask.h AtomType.h AxisType.h BaseIOtype.h Box.h BoxArgs.h BufferedLine.h CharMask.h Cluster/Algorithm.h Cluster/BestReps.h Cluster/CentroidArray.h Cluster/Cframes.h Cluster/Control.h Cluster/DrawGraph.h Cluster/List.h Cluster/Metric.h Cluster/MetricArray.h Cluster/Node.h Cluster/Sieve.h Cluster/Silhouette.h ClusterMap.h Cmd.h CmdInput.h CmdList.h Command.h CompactFrameArray.h ComplexArray.h Constants.h Constraints.h ControlBlock.h ControlBlock_For.h CoordinateInfo.h Corr.h Cph.h CpptrajFile.h CpptrajState.h CpptrajStdio.h DataFile.h DataFileList.h DataFilter.h DataSet.h DataSetList.h DataSet_1D.h DataSet_2D.h DataSet_3D.h DataSet_Coords.h DataSet_Coords_CRD.h DataSet_Coords_REF.h DataSet_GridFlt.h DataSet_Mat3x3.h DataSet_MatrixDbl.h DataSet_MatrixFlt.h DataSet_Mesh.h DataSet_Modes.h DataSet_RemLog.h DataSet_Vector.h DataSet_double.h DataSet_float.h DataSet_integer.h DataSet_integer_mem.h DataSet_pH.h DataSet_string.h Deprecated.h DihedralSearch.h Dimension.h DispatchObject.h Energy.h Energy_Sander.h EnsembleIn.h EnsembleOutList.h Ewald.h EwaldOptions.h Ewald_ParticleMesh.h ExclusionArray.h Exec.h Exec_AddMissingRes.h Exec_Analyze.h Exec_Calc.h Exec_CatCrd.h Exec_Change.h Exec_ClusterMap.h Exec_CombineCoords.h Exec_Commands.h Exec_CompareClusters.h Exec_CompareTop.h Exec_CrdAction.h Exec_CrdOut.h Exec_CreateSet.h Exec_DataFile.h Exec_DataFilter.h Exec_DataSetCmd.h Exec_Emin.h Exec_Flatten.h Exec_GenerateAmberRst.h Exec_Graft.h Exec_Help.h Exec_LoadCrd.h Exec_LoadTraj.h Exec_ParallelAnalysis.h Exec_ParmBox.h Exec_ParmSolvent.h Exec_ParmStrip.h Exec_ParmWrite.h Exec_PermuteDihedrals.h Exec_Precision.h Exec_PrepareForLeap.h Exec_PrintData.h Exec_Random.h Exec_ReadData.h Exec_ReadEnsembleData.h Exec_ReadInput.h Exec_RotateDihedral.h Exec_RunAnalysis.h Exec_ScaleDihedralK.h Exec_SequenceAlign.h Exec_Set.h Exec_Show.h Exec_SortEnsembleData.h Exec_SplitCoords.h Exec_System.h Exec_Top.h Exec_Traj.h Exec_UpdateParameters.h Exec_ViewRst.h FileIO.h FileName.h FileTypes.h Frame.h FramePtrArray.h GIST_PME.h Grid.h GridAction.h GridBin.h HistBin.h Hungarian.h ImageOption.h ImageTypes.h InputTrajCommon.h MapAtom.h MaskArray.h MaskToken.h Matrix.h Matrix_3x3.h MetaData.h Molecule.h NameType.h NetcdfFile.h OnlineVarT.h OutputTrajCommon.h PDBfile.h PairList.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h PubFFT.h Pucker.h Pucker_PuckerMask.h Pucker_PuckerSearch.h Pucker_PuckerToken.h RPNcalc.h Random.h Range.h ReferenceAction.h ReferenceFrame.h RemdReservoirNC.h ReplicaDimArray.h ReplicaInfo.h Residue.h Segment.h Spline.h SplineFxnTable.h StructureCheck.h SymbolExporting.h SymmetricRmsdCalc.h TextFormat.h Timer.h Topology.h TrajFrameCounter.h TrajectoryFile.h Trajin.h TrajinList.h TrajoutList.h Trajout_Single.h TypeNameHolder.h Unit.h Vec3.h cuda_kernels/GistCudaSetup.cuh helpme_standalone.h molsurf.h CompactFrameArray.o : CompactFrameArray.cpp Box.h CompactFrameArray.h CoordinateInfo.h CpptrajStdio.h Matrix_3x3.h Parallel.h ReplicaDimArray.h Vec3.h ComplexArray.o : ComplexArray.cpp ArrayIterator.h ComplexArray.h Constraints.o : Constraints.cpp ArgList.h Atom.h AtomMask.h AtomType.h Box.h CharMask.h Constants.h Constraints.h CoordinateInfo.h CpptrajStdio.h FileName.h Frame.h MaskToken.h Matrix_3x3.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReplicaDimArray.h Residue.h Segment.h SymbolExporting.h Topology.h TypeNameHolder.h Unit.h Vec3.h @@ -247,6 +247,7 @@ Exec_Change.o : Exec_Change.cpp Action.h ActionList.h ActionState.h Analysis.h A Exec_ClusterMap.o : Exec_ClusterMap.cpp Action.h ActionList.h ActionState.h Analysis.h AnalysisList.h AnalysisState.h ArgList.h ArrayIterator.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h ClusterMap.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajState.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_2D.h DataSet_Coords.h DataSet_Coords_REF.h DataSet_MatrixFlt.h Dimension.h DispatchObject.h EnsembleIn.h EnsembleOutList.h Exec.h Exec_ClusterMap.h FileIO.h FileName.h FileTypes.h Frame.h FramePtrArray.h InputTrajCommon.h MaskToken.h Matrix.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h ReplicaInfo.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TrajFrameCounter.h Trajin.h TrajinList.h TrajoutList.h TypeNameHolder.h Unit.h Vec3.h Exec_CombineCoords.o : Exec_CombineCoords.cpp Action.h ActionList.h ActionState.h Analysis.h AnalysisList.h AnalysisState.h ArgList.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajState.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_Coords.h DataSet_Coords_REF.h Dimension.h DispatchObject.h EnsembleIn.h EnsembleOutList.h Exec.h Exec_CombineCoords.h FileIO.h FileName.h FileTypes.h Frame.h FramePtrArray.h InputTrajCommon.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h ReplicaInfo.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TrajFrameCounter.h Trajin.h TrajinList.h TrajoutList.h TypeNameHolder.h Unit.h Vec3.h Exec_Commands.o : Exec_Commands.cpp Action.h ActionList.h ActionState.h Analysis.h AnalysisList.h AnalysisState.h ArgList.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajState.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_Coords.h DataSet_Coords_REF.h Dimension.h DispatchObject.h EnsembleIn.h EnsembleOutList.h Exec.h Exec_Commands.h FileIO.h FileName.h FileTypes.h Frame.h FramePtrArray.h InputTrajCommon.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h ReplicaInfo.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TrajFrameCounter.h Trajin.h TrajinList.h TrajoutList.h TypeNameHolder.h Unit.h Vec3.h +Exec_CompareClusters.o : Exec_CompareClusters.cpp Action.h ActionList.h ActionState.h Analysis.h AnalysisList.h AnalysisState.h ArgList.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajState.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_1D.h DataSet_Coords.h DataSet_Coords_REF.h Dimension.h DispatchObject.h EnsembleIn.h EnsembleOutList.h Exec.h Exec_CompareClusters.h FileIO.h FileName.h FileTypes.h Frame.h FramePtrArray.h InputTrajCommon.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h ReplicaInfo.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TrajFrameCounter.h Trajin.h TrajinList.h TrajoutList.h TypeNameHolder.h Unit.h Vec3.h Exec_CompareTop.o : Exec_CompareTop.cpp Action.h ActionList.h ActionState.h Analysis.h AnalysisList.h AnalysisState.h ArgList.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajState.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_Coords.h DataSet_Coords_REF.h Dimension.h DispatchObject.h EnsembleIn.h EnsembleOutList.h Exec.h Exec_CompareTop.h FileIO.h FileName.h FileTypes.h Frame.h FramePtrArray.h InputTrajCommon.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h Range.h ReferenceFrame.h ReplicaDimArray.h ReplicaInfo.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TrajFrameCounter.h Trajin.h TrajinList.h TrajoutList.h TypeNameHolder.h Unit.h Vec3.h Exec_CrdAction.o : Exec_CrdAction.cpp Action.h ActionList.h ActionState.h Analysis.h AnalysisList.h AnalysisState.h ArgList.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Cmd.h CmdList.h Command.h CompactFrameArray.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajState.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_Coords.h DataSet_Coords_CRD.h DataSet_Coords_REF.h Dimension.h DispatchObject.h EnsembleIn.h EnsembleOutList.h Exec.h Exec_CrdAction.h FileIO.h FileName.h FileTypes.h Frame.h FramePtrArray.h InputTrajCommon.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h ProgressBar.h Range.h ReferenceFrame.h ReplicaDimArray.h ReplicaInfo.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TrajFrameCounter.h Trajin.h TrajinList.h TrajoutList.h TypeNameHolder.h Unit.h Vec3.h Exec_CrdOut.o : Exec_CrdOut.cpp Action.h ActionFrameCounter.h ActionList.h ActionState.h Analysis.h AnalysisList.h AnalysisState.h ArgList.h AssociatedData.h Atom.h AtomMask.h AtomType.h BaseIOtype.h Box.h Constants.h CoordinateInfo.h CpptrajFile.h CpptrajState.h CpptrajStdio.h DataFile.h DataFileList.h DataSet.h DataSetList.h DataSet_Coords.h DataSet_Coords_REF.h Dimension.h DispatchObject.h EnsembleIn.h EnsembleOutList.h Exec.h Exec_CrdOut.h FileIO.h FileName.h FileTypes.h Frame.h FramePtrArray.h InputTrajCommon.h MaskToken.h Matrix_3x3.h MetaData.h Molecule.h NameType.h OutputTrajCommon.h Parallel.h ParameterHolders.h ParameterSet.h ParameterTypes.h ProgressBar.h Range.h ReferenceFrame.h ReplicaDimArray.h ReplicaInfo.h Residue.h Segment.h SymbolExporting.h TextFormat.h Timer.h Topology.h TrajFrameCounter.h TrajectoryFile.h Trajin.h TrajinList.h TrajoutList.h Trajout_Single.h TypeNameHolder.h Unit.h Vec3.h diff --git a/src/cpptrajfiles b/src/cpptrajfiles index e3befea6c9..fa16387fa7 100644 --- a/src/cpptrajfiles +++ b/src/cpptrajfiles @@ -247,6 +247,7 @@ COMMON_SOURCES= \ Exec_ClusterMap.cpp \ Exec_CombineCoords.cpp \ Exec_Commands.cpp \ + Exec_CompareClusters.cpp \ Exec_CompareTop.cpp \ Exec_CrdAction.cpp \ Exec_CrdOut.cpp \ diff --git a/test/Test_Cluster/RunTest.sh b/test/Test_Cluster/RunTest.sh index d422b8cf34..bf6ea5bfd5 100755 --- a/test/Test_Cluster/RunTest.sh +++ b/test/Test_Cluster/RunTest.sh @@ -7,7 +7,8 @@ CleanFiles cluster.in cnumvtime.dat avg.summary.dat summary.dat CpptrajPairDist \ cpop.agr summary2.dat Cmatrix.nccmatrix Cmatrix.cmatrix summary3.dat \ normpop.agr normframe.agr cascii.dat.save cascii.dat pw.dat \ - cinfo.dat mysil.cluster.dat mysil.frame.dat cascii?.info + cinfo.dat mysil.cluster.dat mysil.frame.dat cascii?.info \ + c1.cmetrics.dat TESTNAME='Hierarchical agglomerative clustering tests' #Requires netcdf @@ -19,6 +20,8 @@ parm ../tz2.parm7 trajin ../tz2.crd cluster C1 :2-10 clusters 3 epsilon 4.0 out cnumvtime.dat info cinfo.dat sil mysil summary avg.summary.dat nofit savepairdist cpopvtime cpop.agr pairdist Cmatrix.cmatrix cluster crd1 :2-10 clusters 3 epsilon 4.0 summary summary.dat complete nofit loadpairdist pairdist Cmatrix.cmatrix +run +writedata c1.cmetrics.dat C1[DBI] C1[PSF] C1[SSRSST] EOF RunCpptraj "Cluster command test, in-memory pairwise distances." DoTest cnumvtime.dat.save cnumvtime.dat @@ -26,6 +29,7 @@ DoTest cinfo.dat.save cinfo.dat DoTest mysil.cluster.dat.save mysil.cluster.dat DoTest mysil.frame.dat.save mysil.frame.dat DoTest avg.summary.dat.save avg.summary.dat +DoTest c1.cmetrics.dat.save c1.cmetrics.dat DoTest summary.dat.save summary.dat DoTest cpop.agr.save cpop.agr diff --git a/test/Test_Cluster/c1.cmetrics.dat.save b/test/Test_Cluster/c1.cmetrics.dat.save new file mode 100644 index 0000000000..7b25ab2b68 --- /dev/null +++ b/test/Test_Cluster/c1.cmetrics.dat.save @@ -0,0 +1,2 @@ +#Frame C1[DBI] C1[PSF] C1[SSRSST] + 1 1.0542 31.6034 0.7332