From 223b68b656591fa48fe951c727010e8624e25187 Mon Sep 17 00:00:00 2001
From: "Martin D. Weinberg" <weinberg@astro.umass.edu>
Date: Tue, 25 Feb 2025 15:31:35 -0500
Subject: [PATCH 1/3] An updated k-means analysis that separates printing from
 analysis.  The primary routine is now the full channel analysis, not the
 per-channel analysis in pyEXP.

---
 expui/expMSSA.H       |  10 ++-
 expui/expMSSA.cc      | 194 +++++++++++++++++++++++++++++-------------
 pyEXP/MSSAWrappers.cc |  51 ++++++++---
 3 files changed, 185 insertions(+), 70 deletions(-)
diff --git a/expui/expMSSA.H b/expui/expMSSA.H
index 3c1d180c6..60fe5e6b2 100644
--- a/expui/expMSSA.H
+++ b/expui/expMSSA.H
@@ -249,7 +249,15 @@ namespace MSSA
        @param toTerm write to stdout if true
        @param toFile write to file if true
     */
-    void kmeans(int clusters, bool toTerm=true, bool toFile=false);
+    void kmeansPrint(int clusters, bool toTerm=true, bool toFile=false);
+
+    //! Get Kmeans analysis per channel
+    std::tuple<std::vector<int>, std::vector<double>, double>
+    kmeansChannel(int clusters, Key key);
+
+    //! Get Kmeans analysis for all channels
+    std::tuple<std::vector<int>, std::vector<double>, double>
+    kmeans(int clusters);
 
     //! Save current MSSA state to an HDF5 file with the given prefix
     void saveState(const std::string& prefix);
diff --git a/expui/expMSSA.cc b/expui/expMSSA.cc
index 2ff5079dd..d4e23b8ab 100644
--- a/expui/expMSSA.cc
+++ b/expui/expMSSA.cc
@@ -1063,10 +1063,10 @@ namespace MSSA {
 #endif
   }
 
-  void expMSSA::kmeans(int clusters, bool toTerm, bool toFile)
+  void expMSSA::kmeansPrint(int clusters, bool toTerm, bool toFile)
   {
     if (clusters==0) {
-      std::cout << "expMSSA::kmeans: you need clusters>0" << std::endl;
+      std::cout << "expMSSA::kmeansPrint: you need clusters>0" << std::endl;
       return;
     }
 
@@ -1087,41 +1087,22 @@ namespace MSSA {
 	std::cerr << "Error opening file <" << filename << ">" << std::endl;
     }
 
-    // W-correlation-based distance functor
-    //
-    KMeans::WcorrDistance dist(numT, numW);
-
     for (auto u : mean) {
-      // Pack point array
-      //
-      std::vector<KMeans::Ptr> data;
-      for (int j=0; j<ncomp; j++) {
-	data.push_back(std::make_shared<KMeans::Point>(numT));
-	for (int i=0; i<numT; i++) data.back()->x[i] = RC[u.first](i, j);
-      }
-
-      // Initialize k-means routine
-      //
-      KMeans::kMeansClustering kMeans(data);
-
-      // Run 100 iterations
-      //
-      kMeans.iterate(dist, 100, clusters, 2, false);
 
-      // Retrieve cluster associations
-      //
-      auto results = kMeans.get_results();
+      auto [id, dd, tol] = kmeansChannel(clusters, u.first);
 
       // Write to file
       //
       if (out) {
 	out << std::string(60, '-') << std::endl
 	    << " *** n=" << u.first << std::endl
+	    << " *** tol=" << tol   << std::endl
 	    << std::string(60, '-') << std::endl;
 
-	for (int j=0; j<results.size(); j++) {
+	for (int j=0; j<id.size(); j++) {
 	  out << std::setw(6)  << j
-	      << std::setw(12) << std::get<1>(results[j])
+	      << std::setw(12) << id[j]
+	      << std::setw(16) << dd[j]
 	      << std::endl;
 	}
       }
@@ -1133,9 +1114,10 @@ namespace MSSA {
 		  << " *** n=" << u.first << std::endl
 		  << std::string(60, '-') << std::endl;
 
-	for (int j=0; j<results.size(); j++) {
-	  std::cout << std::setw(6)  << j
-		    << std::setw(12) << std::get<1>(results[j])
+	for (int j=0; j<id.size(); j++) {
+	  std::cout << std::setw( 6) << j
+		    << std::setw( 9) << id[j]
+		    << std::setw(16) << dd[j]
 		    << std::endl;
 	}
       }
@@ -1143,41 +1125,20 @@ namespace MSSA {
 
     if (params["allchan"]) {
 
-      // Pack point array
-      //
-      std::vector<KMeans::Ptr> data;
-      int sz = mean.size();
-      for (int j=0; j<ncomp; j++) {
-	data.push_back(std::make_shared<KMeans::Point>(numT*sz));
-	int c = 0;
-	for (auto u : mean) {
-	  for (int i=0; i<numT; i++) data.back()->x[c++] = RC[u.first](i, j);
-	}
-      }
-
-      // Initialize k-means routine
-      //
-      KMeans::kMeansClustering kMeans(data);
-
-      // Run 100 iterations
-      //
-      KMeans::WcorrDistMulti dist2(numT, numW, sz);
-      kMeans.iterate(dist2, 100, clusters, 2, false);
-
-      // Retrieve cluster associations
-      //
-      auto results = kMeans.get_results();
+      auto [id, dd, tol] = kmeans(clusters);
 
       // Write to file
       //
       if (out) {
 	out << std::string(60, '-') << std::endl
 	    << " *** total"         << std::endl
+	    << " *** tol=" << tol   << std::endl
 	    << std::string(60, '-') << std::endl;
 
-	for (int j=0; j<results.size(); j++) {
-	  out << std::setw(6) << j
-	      << std::setw(9) << std::get<1>(results[j])
+	for (int j=0; j<id.size(); j++) {
+	  out << std::setw( 6) << j
+	      << std::setw( 9) << id[j]
+	      << std::setw(16) << dd[j]
 	      << std::endl;
 	}
       }
@@ -1189,9 +1150,10 @@ namespace MSSA {
 		  << " *** total"         << std::endl
 		  << std::string(60, '-') << std::endl;
 
-	for (int j=0; j<results.size(); j++) {
-	  std::cout << std::setw(6) << j
-		    << std::setw(9) << std::get<1>(results[j])
+	for (int j=0; j<id.size(); j++) {
+	  std::cout << std::setw( 6) << j
+		    << std::setw( 9) << id[j]
+		    << std::setw(16) << dd[j]
 		    << std::endl;
 	}
       }
@@ -1206,6 +1168,120 @@ namespace MSSA {
 
   }
 
+  
+  std::tuple<std::vector<int>, std::vector<double>, double>
+  expMSSA::kmeansChannel(int clusters, Key key)
+  {
+    if (clusters==0) {
+      throw std::invalid_argument("expMSSA::kmeansChannel: clusters==0");
+    }
+
+    if (mean.find(key) == mean.end()) {
+      std::ostringstream sout;
+      sout << "expMSSA::kmeansKey: key <" << key << "> not found";
+      throw std::invalid_argument(sout.str());
+    }
+
+    KMeans::WcorrDistance dist(numT, numW);
+
+    // Pack point array
+    //
+    std::vector<KMeans::Ptr> data;
+    for (int j=0; j<ncomp; j++) {
+      data.push_back(std::make_shared<KMeans::Point>(numT));
+      for (int i=0; i<numT; i++) data.back()->x[i] = RC[key](i, j);
+    }
+
+    // Initialize k-means routine
+    //
+    KMeans::kMeansClustering kMeans(data);
+
+    // Run 100 iterations
+    //
+    kMeans.iterate(dist, 1000, clusters, 1);
+
+    // Retrieve cluster associations
+    //
+    auto results = kMeans.get_results();
+    auto centers = kMeans.get_cen();
+
+    // Compute inertia
+    //
+    auto inertia = [&](int j, int id) -> double {
+      auto & cen = centers[id];
+      double d = 0.0;
+      for (int i=0; i<cen.size(); i++)
+	d += (cen[i] - data[j]->x[i])*(cen[i] - data[j]->x[i]);
+      return sqrt(d);
+    };
+
+    // Pack return vector
+    //
+    std::vector<int> retI;
+    std::vector<double> retD;
+    for (int j=0; j<results.size(); j++) {
+      retI.push_back(std::get<1>(results[j]));
+      retD.push_back(inertia(j, std::get<1>(results[j])));
+    }
+
+    return {retI, retD, kMeans.getTol()};
+  }
+
+  std::tuple<std::vector<int>, std::vector<double>, double>
+  expMSSA::kmeans(int clusters)
+  {
+    if (clusters==0) {
+      throw std::invalid_argument("expMSSA::kmeans: you need clusters>0");
+    }
+
+    // Pack point array
+    //
+    std::vector<KMeans::Ptr> data;
+    int sz = mean.size();
+    for (int j=0; j<ncomp; j++) {
+      data.push_back(std::make_shared<KMeans::Point>(numT*sz));
+      int c = 0;
+      for (auto u : mean) {
+	for (int i=0; i<numT; i++) data.back()->x[c++] = RC[u.first](i, j);
+      }
+    }
+
+    // Initialize k-means routine
+    //
+    KMeans::kMeansClustering kMeans(data);
+    
+    // Run 100 iterations
+    //
+    KMeans::WcorrDistMulti dist(numT, numW, sz);
+    kMeans.iterate(dist, 1000, clusters, 1);
+
+    // Retrieve cluster associations
+    //
+    auto results = kMeans.get_results();
+    auto centers = kMeans.get_cen();
+
+    // Compute inertia
+    //
+    auto inertia = [&](int j, int id) -> double {
+      auto & cen = centers[id];
+      double d = 0.0;
+      for (int i=0; i<cen.size(); i++)
+	d += (cen[i] - data[j]->x[i])*(cen[i] - data[j]->x[i]);
+      return sqrt(d);
+    };
+
+    // Pack return vector
+    //
+    std::vector<int> retI;
+    std::vector<double> retD;
+    for (int j=0; j<results.size(); j++) {
+      retI.push_back(std::get<1>(results[j]));
+      retD.push_back(inertia(j, std::get<1>(results[j])));
+    }
+
+    return {retI, retD, kMeans.getTol()};
+  }
+
   std::map<std::string, CoefClasses::CoefsPtr> expMSSA::getReconstructed(bool reconstructmean)
   {
     if (not reconstructed) {
diff --git a/pyEXP/MSSAWrappers.cc b/pyEXP/MSSAWrappers.cc
index aa5ef5c57..082bd9270 100644
--- a/pyEXP/MSSAWrappers.cc
+++ b/pyEXP/MSSAWrappers.cc
@@ -436,28 +436,59 @@ void MSSAtoolkitClasses(py::module &m) {
 
   f.def("kmeans", &expMSSA::kmeans,
 	py::arg("clusters") = 4,
-	py::arg("toTerm") = true,
-	py::arg("toFile") = false,
 	R"(
-        Do a k-means analysis on the reconstructed trajectory matrices to provide grouping insight
+        Do a k-means analysis on the reconstructed trajectory matrices for a single channel (specified key value) to
+        provide grouping insight.  A vector of channel indices that identify clusters is return in a vector ordered by PC
+        index.
 
         Parameters
         ----------
         clusters : int, default=4
             number of clusters for the k-means analysis
-        toTerm  : bool, default=True
-            flag indicating whether to output to the terminal (standard output)
-        toFile : bool
-            flag indicating whether to write the output to a file
 
         Returns
         -------
-        None
+        tuple : (numpy.niarray, numpy.ndarray, double)
+            The PC indices of the k-means clusters, distance from the centroid, and the
+            final update error
+
+        Notes
+        -----
+        The k-means partitions n vector observations into k clusters in which each observation belongs to the cluster with
+        the nearest centers while minimizing the variance within each cluster.  In this case, the vectors are the full
+        trajectory matrices and the distance is the distance between the trajectory matricies reconstructed from each
+        eigentriple from mSSA.  The distance used here is the Frobenius distance or matrix norm distance: the square root
+        of the sum of squares of all elements in the difference between two matrices.
+
+        This version does the analysis for all channels together, the most useful for estimating groups.  For individual
+        contributions by channel, use kmeansChannel.
+        )");
+
+  f.def("kmeansChannel", &expMSSA::kmeansChannel,
+	py::arg("clusters") = 4,
+	py::arg("key"),
+	R"(
+        Do a k-means analysis on the reconstructed trajectory matrices for a single channel (specified key value) to
+        provide grouping insight.  In most cases, you will want to use the kmeans() version which analyzes all channels
+        together.
+
+        Parameters
+        ----------
+        clusters : int, default=4
+            number of clusters for the k-means analysis
+        key : list(int)
+            identifier indices of the selected data channel
+
+        Returns
+        -------
+        tuple : (numpy.niarray, numpy.ndarray, double)
+            The PC indices of the k-means clusters, distance from the centroid, and the
+            final update error
 
         Notes
         -----
-        By default, results are written to the standard output. Set `toFile=True` to write 
-        the output to a file. The file name will be derived from the 'output' parameter.
+        This version does the analysis channel-by-channel.  You may wish to see all channels together using
+        kmeansTotal.  See kemans() for more details.
         )");
 
   f.def("contrib", &expMSSA::contributions,

From 3074340788b8086ab08193d5d9b2d9a7a1f30852 Mon Sep 17 00:00:00 2001
From: "Martin D. Weinberg" <weinberg@astro.umass.edu>
Date: Tue, 25 Feb 2025 18:26:45 -0500
Subject: [PATCH 2/3] Make the random shuffle seed for KMeans the default in
 expMSSA

---
 expui/KMeans.cc       | 18 ++++++++++++++----
 expui/expMSSA.H       | 16 ++++++++++++++--
 expui/expMSSA.cc      |  6 ++++--
 pyEXP/MSSAWrappers.cc |  6 ++++--
 4 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/expui/KMeans.cc b/expui/KMeans.cc
index 9e220e10f..9a1a4f65e 100644
--- a/expui/KMeans.cc
+++ b/expui/KMeans.cc
@@ -13,18 +13,28 @@ namespace MSSA
     //
     cen.clear();
     
-    if (s>0) {			// Seed centers by stride
+    if (s>0) {
+      // Seed centers by stride
       for (int i=0; i<classes.size(); i+=s) {
 	if (cen.size()>=k) break;
 	cen.push_back(classes.at(i)->x);
       }
       k = cen.size();
-    } else {		       // obtain a seed from the system clock
+    } else {
+      // Obtain a seed from the system clock
       unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
+      
+      // Make a random generator
       std::mt19937 gen(seed);
-      // Seed centers randomly
+
+      // Randomly shuffle a list of indexes
+      std::vector<int> indx;
+      for (int i=0; i<classes.size(); i++) indx.push_back(i);
+      std::shuffle(indx.begin(), indx.end(), gen);
+
+      // Seed centers randomly from the initial point list
       for (int i=0; i<k; ++i) {
-	cen.push_back(classes.at(gen() % classes.size())->x);
+	cen.push_back(classes.at(indx[i])->x);
       }
     }
     
diff --git a/expui/expMSSA.H b/expui/expMSSA.H
index 60fe5e6b2..7857d6146 100644
--- a/expui/expMSSA.H
+++ b/expui/expMSSA.H
@@ -243,19 +243,31 @@ namespace MSSA
     //! Create wcorrlation matricies and output PNG
     void wcorrPNG();
 
+    //@{
     /**
        Kmean analysis of the trajectories by PC with fixed cluster size
+    */
+
+    /** Perform Kmeans analysis for a given number of clusters and
+       print the results
 
        @param toTerm write to stdout if true
        @param toFile write to file if true
     */
     void kmeansPrint(int clusters, bool toTerm=true, bool toFile=false);
 
-    //! Get Kmeans analysis per channel
+    /** Get Kmeans analysis per channel
+
+	@param clusters is the number of clusters to seed
+	@param key is the channel id vector<int>
+    */
     std::tuple<std::vector<int>, std::vector<double>, double>
     kmeansChannel(int clusters, Key key);
 
-    //! Get Kmeans analysis for all channels
+    /** Get Kmeans analysis for all channels
+
+	@param clusters is the number of clusters to seed
+    */
     std::tuple<std::vector<int>, std::vector<double>, double>
     kmeans(int clusters);
 
diff --git a/expui/expMSSA.cc b/expui/expMSSA.cc
index d4e23b8ab..c6341470b 100644
--- a/expui/expMSSA.cc
+++ b/expui/expMSSA.cc
@@ -1198,7 +1198,8 @@ namespace MSSA {
 
     // Run 100 iterations
     //
-    kMeans.iterate(dist, 1000, clusters, 1);
+    // kMeans.iterate(dist, 1000, clusters, 1);
+    kMeans.iterate(dist, 1000, clusters);
 
     // Retrieve cluster associations
     //
@@ -1253,7 +1254,8 @@ namespace MSSA {
     // Run 100 iterations
     //
     KMeans::WcorrDistMulti dist(numT, numW, sz);
-    kMeans.iterate(dist, 1000, clusters, 1);
+    // kMeans.iterate(dist, 1000, clusters, 1);
+    kMeans.iterate(dist, 1000, clusters);
 
     // Retrieve cluster associations
     //
diff --git a/pyEXP/MSSAWrappers.cc b/pyEXP/MSSAWrappers.cc
index 082bd9270..2eb5e46ab 100644
--- a/pyEXP/MSSAWrappers.cc
+++ b/pyEXP/MSSAWrappers.cc
@@ -450,7 +450,8 @@ void MSSAtoolkitClasses(py::module &m) {
         -------
         tuple : (numpy.niarray, numpy.ndarray, double)
             The PC indices of the k-means clusters, distance from the centroid, and the
-            final update error
+            final update error.  A zero update error implies that the k-means algorithm
+            converged.
 
         Notes
         -----
@@ -483,7 +484,8 @@ void MSSAtoolkitClasses(py::module &m) {
         -------
         tuple : (numpy.niarray, numpy.ndarray, double)
             The PC indices of the k-means clusters, distance from the centroid, and the
-            final update error
+            final update error.  A zero update error implies that the k-means algorithm
+            converged.
 
         Notes
         -----

From 4f40e7c2ff4e1b29b9403cc9e424728cc387b833 Mon Sep 17 00:00:00 2001
From: "Martin D. Weinberg" <weinberg@astro.umass.edu>
Date: Tue, 25 Feb 2025 18:44:43 -0500
Subject: [PATCH 3/3] Allow the user to specify the seeding strategy for
 k-means.  2-strided by default, a good choice for quasi-periodic systems

---
 expui/expMSSA.H       | 12 +++++++++---
 expui/expMSSA.cc      | 25 +++++++++++++++----------
 pyEXP/MSSAWrappers.cc |  7 ++++++-
 3 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/expui/expMSSA.H b/expui/expMSSA.H
index 7857d6146..2fe4d0c3a 100644
--- a/expui/expMSSA.H
+++ b/expui/expMSSA.H
@@ -251,10 +251,15 @@ namespace MSSA
     /** Perform Kmeans analysis for a given number of clusters and
        print the results
 
+       @param clusters is the maximum number of clusters considered
+       @param stride is the seed strategy.  If positive, it is used to
+       select initial cluster centers by stride from the PC list.  If
+       it is zero, centers are selected randomly from the PC list
        @param toTerm write to stdout if true
        @param toFile write to file if true
     */
-    void kmeansPrint(int clusters, bool toTerm=true, bool toFile=false);
+    void kmeansPrint(int clusters, int stride,
+		     bool toTerm=true, bool toFile=false);
 
     /** Get Kmeans analysis per channel
 
@@ -262,14 +267,15 @@ namespace MSSA
 	@param key is the channel id vector<int>
     */
     std::tuple<std::vector<int>, std::vector<double>, double>
-    kmeansChannel(int clusters, Key key);
+    kmeansChannel(Key key, int clusters, int stride);
 
     /** Get Kmeans analysis for all channels
 
 	@param clusters is the number of clusters to seed
+	@param stride is the seeded strategy for initial centers
     */
     std::tuple<std::vector<int>, std::vector<double>, double>
-    kmeans(int clusters);
+    kmeans(int clusters, int stride);
 
     //! Save current MSSA state to an HDF5 file with the given prefix
     void saveState(const std::string& prefix);
diff --git a/expui/expMSSA.cc b/expui/expMSSA.cc
index c6341470b..106857ee2 100644
--- a/expui/expMSSA.cc
+++ b/expui/expMSSA.cc
@@ -1063,7 +1063,7 @@ namespace MSSA {
 #endif
   }
 
-  void expMSSA::kmeansPrint(int clusters, bool toTerm, bool toFile)
+  void expMSSA::kmeansPrint(int clusters, int stride, bool toTerm, bool toFile)
   {
     if (clusters==0) {
       std::cout << "expMSSA::kmeansPrint: you need clusters>0" << std::endl;
@@ -1089,7 +1089,7 @@ namespace MSSA {
 
     for (auto u : mean) {
 
-      auto [id, dd, tol] = kmeansChannel(clusters, u.first);
+      auto [id, dd, tol] = kmeansChannel(u.first, clusters, stride);
 
       // Write to file
       //
@@ -1125,7 +1125,7 @@ namespace MSSA {
 
     if (params["allchan"]) {
 
-      auto [id, dd, tol] = kmeans(clusters);
+      auto [id, dd, tol] = kmeans(clusters, stride);
 
       // Write to file
       //
@@ -1165,17 +1165,20 @@ namespace MSSA {
 	std::cout << "Bad output stream for <" << filename << ">" << std::endl;
     }
     out.close();
-
   }
 
   
   std::tuple<std::vector<int>, std::vector<double>, double>
-  expMSSA::kmeansChannel(int clusters, Key key)
+  expMSSA::kmeansChannel(Key key, int clusters, int stride)
   {
     if (clusters==0) {
       throw std::invalid_argument("expMSSA::kmeansChannel: clusters==0");
     }
 
+    if (stride<0) {
+      throw std::invalid_argument("expMSSA::kmeansChannel: stride must be >= 0");
+    }
+
     if (mean.find(key) == mean.end()) {
       std::ostringstream sout;
       sout << "expMSSA::kmeansKey: key <" << key << "> not found";
@@ -1198,8 +1201,7 @@ namespace MSSA {
 
     // Run 100 iterations
     //
-    // kMeans.iterate(dist, 1000, clusters, 1);
-    kMeans.iterate(dist, 1000, clusters);
+    kMeans.iterate(dist, 1000, clusters, stride);
 
     // Retrieve cluster associations
     //
@@ -1229,12 +1231,16 @@ namespace MSSA {
   }
 
   std::tuple<std::vector<int>, std::vector<double>, double>
-  expMSSA::kmeans(int clusters)
+  expMSSA::kmeans(int clusters, int stride)
   {
     if (clusters==0) {
       throw std::invalid_argument("expMSSA::kmeans: you need clusters>0");
     }
 
+    if (stride<0) {
+      throw std::invalid_argument("expMSSA::kmeans: stride must be >= 0");
+    }
+
     // Pack point array
     //
     std::vector<KMeans::Ptr> data;
@@ -1254,8 +1260,7 @@ namespace MSSA {
     // Run 100 iterations
     //
     KMeans::WcorrDistMulti dist(numT, numW, sz);
-    // kMeans.iterate(dist, 1000, clusters, 1);
-    kMeans.iterate(dist, 1000, clusters);
+    kMeans.iterate(dist, 1000, clusters, stride);
 
     // Retrieve cluster associations
     //
diff --git a/pyEXP/MSSAWrappers.cc b/pyEXP/MSSAWrappers.cc
index 2eb5e46ab..b6e556993 100644
--- a/pyEXP/MSSAWrappers.cc
+++ b/pyEXP/MSSAWrappers.cc
@@ -436,6 +436,7 @@ void MSSAtoolkitClasses(py::module &m) {
 
   f.def("kmeans", &expMSSA::kmeans,
 	py::arg("clusters") = 4,
+	py::arg("stride") = 2,
 	R"(
         Do a k-means analysis on the reconstructed trajectory matrices for a single channel (specified key value) to
         provide grouping insight.  A vector of channel indices that identify clusters is return in a vector ordered by PC
@@ -445,6 +446,9 @@ void MSSAtoolkitClasses(py::module &m) {
         ----------
         clusters : int, default=4
             number of clusters for the k-means analysis
+        stride : int, default=2
+            if positive, the initial cluster centers are stride selected from the PC list.  If zero, the centers are
+            selected randomly from the PC list
 
         Returns
         -------
@@ -466,8 +470,9 @@ void MSSAtoolkitClasses(py::module &m) {
         )");
 
   f.def("kmeansChannel", &expMSSA::kmeansChannel,
-	py::arg("clusters") = 4,
 	py::arg("key"),
+	py::arg("clusters") = 4,
+	py::arg("stride") = 2,
 	R"(
         Do a k-means analysis on the reconstructed trajectory matrices for a single channel (specified key value) to
         provide grouping insight.  In most cases, you will want to use the kmeans() version which analyzes all channels